26630 lines
687 KiB
JSON
26630 lines
687 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 3.0,
|
|
"eval_steps": 500,
|
|
"global_step": 3798,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0007898894154818325,
|
|
"grad_norm": 40.71173454733523,
|
|
"learning_rate": 0.0,
|
|
"loss": 3.673168182373047,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.001579778830963665,
|
|
"grad_norm": 33.74250956215495,
|
|
"learning_rate": 2.631578947368421e-08,
|
|
"loss": 4.129596710205078,
|
|
"step": 2
|
|
},
|
|
{
|
|
"epoch": 0.002369668246445498,
|
|
"grad_norm": 36.11546455389956,
|
|
"learning_rate": 5.263157894736842e-08,
|
|
"loss": 4.184542655944824,
|
|
"step": 3
|
|
},
|
|
{
|
|
"epoch": 0.00315955766192733,
|
|
"grad_norm": 35.383571366831234,
|
|
"learning_rate": 7.894736842105264e-08,
|
|
"loss": 4.1142401695251465,
|
|
"step": 4
|
|
},
|
|
{
|
|
"epoch": 0.0039494470774091624,
|
|
"grad_norm": 34.37559125944977,
|
|
"learning_rate": 1.0526315789473685e-07,
|
|
"loss": 3.7817375659942627,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.004739336492890996,
|
|
"grad_norm": 36.14685215620937,
|
|
"learning_rate": 1.3157894736842107e-07,
|
|
"loss": 4.114397048950195,
|
|
"step": 6
|
|
},
|
|
{
|
|
"epoch": 0.005529225908372828,
|
|
"grad_norm": 41.482542925342734,
|
|
"learning_rate": 1.5789473684210527e-07,
|
|
"loss": 4.3803019523620605,
|
|
"step": 7
|
|
},
|
|
{
|
|
"epoch": 0.00631911532385466,
|
|
"grad_norm": 33.58108993777,
|
|
"learning_rate": 1.8421052631578948e-07,
|
|
"loss": 4.10243558883667,
|
|
"step": 8
|
|
},
|
|
{
|
|
"epoch": 0.0071090047393364926,
|
|
"grad_norm": 39.33271362667731,
|
|
"learning_rate": 2.105263157894737e-07,
|
|
"loss": 3.9596402645111084,
|
|
"step": 9
|
|
},
|
|
{
|
|
"epoch": 0.007898894154818325,
|
|
"grad_norm": 50.03237601031588,
|
|
"learning_rate": 2.3684210526315792e-07,
|
|
"loss": 4.745785713195801,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.008688783570300158,
|
|
"grad_norm": 35.67111148226347,
|
|
"learning_rate": 2.6315789473684213e-07,
|
|
"loss": 3.8755834102630615,
|
|
"step": 11
|
|
},
|
|
{
|
|
"epoch": 0.009478672985781991,
|
|
"grad_norm": 32.70142008970876,
|
|
"learning_rate": 2.894736842105263e-07,
|
|
"loss": 4.783450603485107,
|
|
"step": 12
|
|
},
|
|
{
|
|
"epoch": 0.010268562401263823,
|
|
"grad_norm": 52.049082104276714,
|
|
"learning_rate": 3.1578947368421055e-07,
|
|
"loss": 4.5102057456970215,
|
|
"step": 13
|
|
},
|
|
{
|
|
"epoch": 0.011058451816745656,
|
|
"grad_norm": 29.064967370756015,
|
|
"learning_rate": 3.421052631578948e-07,
|
|
"loss": 3.072397470474243,
|
|
"step": 14
|
|
},
|
|
{
|
|
"epoch": 0.011848341232227487,
|
|
"grad_norm": 22.10944679220599,
|
|
"learning_rate": 3.6842105263157896e-07,
|
|
"loss": 2.7109313011169434,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.01263823064770932,
|
|
"grad_norm": 40.53408902245728,
|
|
"learning_rate": 3.9473684210526315e-07,
|
|
"loss": 4.520854949951172,
|
|
"step": 16
|
|
},
|
|
{
|
|
"epoch": 0.013428120063191154,
|
|
"grad_norm": 36.60530673657224,
|
|
"learning_rate": 4.210526315789474e-07,
|
|
"loss": 4.224055290222168,
|
|
"step": 17
|
|
},
|
|
{
|
|
"epoch": 0.014218009478672985,
|
|
"grad_norm": 33.336265071789754,
|
|
"learning_rate": 4.473684210526316e-07,
|
|
"loss": 4.239147186279297,
|
|
"step": 18
|
|
},
|
|
{
|
|
"epoch": 0.015007898894154818,
|
|
"grad_norm": 43.94432106792913,
|
|
"learning_rate": 4.7368421052631585e-07,
|
|
"loss": 4.043759346008301,
|
|
"step": 19
|
|
},
|
|
{
|
|
"epoch": 0.01579778830963665,
|
|
"grad_norm": 33.99185039525682,
|
|
"learning_rate": 5.000000000000001e-07,
|
|
"loss": 4.236106872558594,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.016587677725118485,
|
|
"grad_norm": 38.29448839626639,
|
|
"learning_rate": 5.263157894736843e-07,
|
|
"loss": 4.180179595947266,
|
|
"step": 21
|
|
},
|
|
{
|
|
"epoch": 0.017377567140600316,
|
|
"grad_norm": 30.15666884397043,
|
|
"learning_rate": 5.526315789473684e-07,
|
|
"loss": 3.893503189086914,
|
|
"step": 22
|
|
},
|
|
{
|
|
"epoch": 0.018167456556082148,
|
|
"grad_norm": 36.85491373097027,
|
|
"learning_rate": 5.789473684210526e-07,
|
|
"loss": 4.568385124206543,
|
|
"step": 23
|
|
},
|
|
{
|
|
"epoch": 0.018957345971563982,
|
|
"grad_norm": 32.16948864627544,
|
|
"learning_rate": 6.052631578947369e-07,
|
|
"loss": 3.1852474212646484,
|
|
"step": 24
|
|
},
|
|
{
|
|
"epoch": 0.019747235387045814,
|
|
"grad_norm": 40.73483389215707,
|
|
"learning_rate": 6.315789473684211e-07,
|
|
"loss": 5.036445617675781,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.020537124802527645,
|
|
"grad_norm": 37.103608164895796,
|
|
"learning_rate": 6.578947368421053e-07,
|
|
"loss": 4.371845245361328,
|
|
"step": 26
|
|
},
|
|
{
|
|
"epoch": 0.02132701421800948,
|
|
"grad_norm": 24.05172035473979,
|
|
"learning_rate": 6.842105263157896e-07,
|
|
"loss": 3.7923712730407715,
|
|
"step": 27
|
|
},
|
|
{
|
|
"epoch": 0.022116903633491312,
|
|
"grad_norm": 35.28854412414826,
|
|
"learning_rate": 7.105263157894737e-07,
|
|
"loss": 4.174007415771484,
|
|
"step": 28
|
|
},
|
|
{
|
|
"epoch": 0.022906793048973143,
|
|
"grad_norm": 18.614769000709014,
|
|
"learning_rate": 7.368421052631579e-07,
|
|
"loss": 2.7547430992126465,
|
|
"step": 29
|
|
},
|
|
{
|
|
"epoch": 0.023696682464454975,
|
|
"grad_norm": 28.426674964913303,
|
|
"learning_rate": 7.631578947368422e-07,
|
|
"loss": 4.140813827514648,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.02448657187993681,
|
|
"grad_norm": 22.844379088367365,
|
|
"learning_rate": 7.894736842105263e-07,
|
|
"loss": 3.9789390563964844,
|
|
"step": 31
|
|
},
|
|
{
|
|
"epoch": 0.02527646129541864,
|
|
"grad_norm": 16.324910957553573,
|
|
"learning_rate": 8.157894736842106e-07,
|
|
"loss": 3.408233165740967,
|
|
"step": 32
|
|
},
|
|
{
|
|
"epoch": 0.026066350710900472,
|
|
"grad_norm": 17.84036406675527,
|
|
"learning_rate": 8.421052631578948e-07,
|
|
"loss": 3.152615547180176,
|
|
"step": 33
|
|
},
|
|
{
|
|
"epoch": 0.026856240126382307,
|
|
"grad_norm": 37.04828879151497,
|
|
"learning_rate": 8.68421052631579e-07,
|
|
"loss": 4.3278489112854,
|
|
"step": 34
|
|
},
|
|
{
|
|
"epoch": 0.02764612954186414,
|
|
"grad_norm": 23.57795025738247,
|
|
"learning_rate": 8.947368421052632e-07,
|
|
"loss": 3.982863426208496,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.02843601895734597,
|
|
"grad_norm": 21.626842144038697,
|
|
"learning_rate": 9.210526315789474e-07,
|
|
"loss": 3.7231547832489014,
|
|
"step": 36
|
|
},
|
|
{
|
|
"epoch": 0.029225908372827805,
|
|
"grad_norm": 16.16330549232114,
|
|
"learning_rate": 9.473684210526317e-07,
|
|
"loss": 3.6238391399383545,
|
|
"step": 37
|
|
},
|
|
{
|
|
"epoch": 0.030015797788309637,
|
|
"grad_norm": 19.328825464830587,
|
|
"learning_rate": 9.736842105263158e-07,
|
|
"loss": 3.8130602836608887,
|
|
"step": 38
|
|
},
|
|
{
|
|
"epoch": 0.030805687203791468,
|
|
"grad_norm": 13.0720498885057,
|
|
"learning_rate": 1.0000000000000002e-06,
|
|
"loss": 3.765305519104004,
|
|
"step": 39
|
|
},
|
|
{
|
|
"epoch": 0.0315955766192733,
|
|
"grad_norm": 20.81732653658493,
|
|
"learning_rate": 1.0263157894736843e-06,
|
|
"loss": 3.808793783187866,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.03238546603475513,
|
|
"grad_norm": 16.11869827988803,
|
|
"learning_rate": 1.0526315789473685e-06,
|
|
"loss": 3.3212504386901855,
|
|
"step": 41
|
|
},
|
|
{
|
|
"epoch": 0.03317535545023697,
|
|
"grad_norm": 19.85916750032169,
|
|
"learning_rate": 1.0789473684210527e-06,
|
|
"loss": 3.4793872833251953,
|
|
"step": 42
|
|
},
|
|
{
|
|
"epoch": 0.0339652448657188,
|
|
"grad_norm": 14.5384162049997,
|
|
"learning_rate": 1.1052631578947369e-06,
|
|
"loss": 3.4185662269592285,
|
|
"step": 43
|
|
},
|
|
{
|
|
"epoch": 0.03475513428120063,
|
|
"grad_norm": 13.037105709248864,
|
|
"learning_rate": 1.1315789473684213e-06,
|
|
"loss": 3.05875563621521,
|
|
"step": 44
|
|
},
|
|
{
|
|
"epoch": 0.035545023696682464,
|
|
"grad_norm": 28.094177440990883,
|
|
"learning_rate": 1.1578947368421053e-06,
|
|
"loss": 4.200315475463867,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.036334913112164295,
|
|
"grad_norm": 15.298203417574237,
|
|
"learning_rate": 1.1842105263157894e-06,
|
|
"loss": 3.230691909790039,
|
|
"step": 46
|
|
},
|
|
{
|
|
"epoch": 0.03712480252764613,
|
|
"grad_norm": 20.703338617496122,
|
|
"learning_rate": 1.2105263157894738e-06,
|
|
"loss": 3.226253032684326,
|
|
"step": 47
|
|
},
|
|
{
|
|
"epoch": 0.037914691943127965,
|
|
"grad_norm": 19.766902333430476,
|
|
"learning_rate": 1.236842105263158e-06,
|
|
"loss": 3.6443753242492676,
|
|
"step": 48
|
|
},
|
|
{
|
|
"epoch": 0.038704581358609796,
|
|
"grad_norm": 12.540974533970843,
|
|
"learning_rate": 1.2631578947368422e-06,
|
|
"loss": 3.3909823894500732,
|
|
"step": 49
|
|
},
|
|
{
|
|
"epoch": 0.03949447077409163,
|
|
"grad_norm": 8.67596125382628,
|
|
"learning_rate": 1.2894736842105266e-06,
|
|
"loss": 3.0414187908172607,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.04028436018957346,
|
|
"grad_norm": 14.600880901536001,
|
|
"learning_rate": 1.3157894736842106e-06,
|
|
"loss": 3.7503838539123535,
|
|
"step": 51
|
|
},
|
|
{
|
|
"epoch": 0.04107424960505529,
|
|
"grad_norm": 17.076909526004943,
|
|
"learning_rate": 1.342105263157895e-06,
|
|
"loss": 3.5003442764282227,
|
|
"step": 52
|
|
},
|
|
{
|
|
"epoch": 0.04186413902053712,
|
|
"grad_norm": 9.236759947184684,
|
|
"learning_rate": 1.3684210526315791e-06,
|
|
"loss": 2.874293088912964,
|
|
"step": 53
|
|
},
|
|
{
|
|
"epoch": 0.04265402843601896,
|
|
"grad_norm": 11.84849423549626,
|
|
"learning_rate": 1.394736842105263e-06,
|
|
"loss": 3.1183371543884277,
|
|
"step": 54
|
|
},
|
|
{
|
|
"epoch": 0.04344391785150079,
|
|
"grad_norm": 12.90432487886042,
|
|
"learning_rate": 1.4210526315789475e-06,
|
|
"loss": 3.250241756439209,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.044233807266982623,
|
|
"grad_norm": 16.869043357282678,
|
|
"learning_rate": 1.4473684210526317e-06,
|
|
"loss": 3.1247291564941406,
|
|
"step": 56
|
|
},
|
|
{
|
|
"epoch": 0.045023696682464455,
|
|
"grad_norm": 9.282453097796106,
|
|
"learning_rate": 1.4736842105263159e-06,
|
|
"loss": 2.924187421798706,
|
|
"step": 57
|
|
},
|
|
{
|
|
"epoch": 0.045813586097946286,
|
|
"grad_norm": 8.648636973287344,
|
|
"learning_rate": 1.5e-06,
|
|
"loss": 3.1763153076171875,
|
|
"step": 58
|
|
},
|
|
{
|
|
"epoch": 0.04660347551342812,
|
|
"grad_norm": 8.629843477816483,
|
|
"learning_rate": 1.5263157894736844e-06,
|
|
"loss": 3.0944461822509766,
|
|
"step": 59
|
|
},
|
|
{
|
|
"epoch": 0.04739336492890995,
|
|
"grad_norm": 52.54991212893474,
|
|
"learning_rate": 1.5526315789473686e-06,
|
|
"loss": 2.84696888923645,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.04818325434439179,
|
|
"grad_norm": 10.192033588061753,
|
|
"learning_rate": 1.5789473684210526e-06,
|
|
"loss": 3.2654457092285156,
|
|
"step": 61
|
|
},
|
|
{
|
|
"epoch": 0.04897314375987362,
|
|
"grad_norm": 19.884719085034156,
|
|
"learning_rate": 1.605263157894737e-06,
|
|
"loss": 3.7629897594451904,
|
|
"step": 62
|
|
},
|
|
{
|
|
"epoch": 0.04976303317535545,
|
|
"grad_norm": 12.313693728331911,
|
|
"learning_rate": 1.6315789473684212e-06,
|
|
"loss": 3.5302507877349854,
|
|
"step": 63
|
|
},
|
|
{
|
|
"epoch": 0.05055292259083728,
|
|
"grad_norm": 17.01311845510386,
|
|
"learning_rate": 1.6578947368421053e-06,
|
|
"loss": 3.5614633560180664,
|
|
"step": 64
|
|
},
|
|
{
|
|
"epoch": 0.05134281200631911,
|
|
"grad_norm": 16.95840885870688,
|
|
"learning_rate": 1.6842105263157895e-06,
|
|
"loss": 3.264988660812378,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.052132701421800945,
|
|
"grad_norm": 14.308550297948006,
|
|
"learning_rate": 1.710526315789474e-06,
|
|
"loss": 3.265235424041748,
|
|
"step": 66
|
|
},
|
|
{
|
|
"epoch": 0.05292259083728278,
|
|
"grad_norm": 14.519207172660034,
|
|
"learning_rate": 1.736842105263158e-06,
|
|
"loss": 3.188286542892456,
|
|
"step": 67
|
|
},
|
|
{
|
|
"epoch": 0.053712480252764615,
|
|
"grad_norm": 17.603652475188834,
|
|
"learning_rate": 1.7631578947368423e-06,
|
|
"loss": 2.9039247035980225,
|
|
"step": 68
|
|
},
|
|
{
|
|
"epoch": 0.054502369668246446,
|
|
"grad_norm": 11.285868937730449,
|
|
"learning_rate": 1.7894736842105265e-06,
|
|
"loss": 3.108914375305176,
|
|
"step": 69
|
|
},
|
|
{
|
|
"epoch": 0.05529225908372828,
|
|
"grad_norm": 9.21536421673824,
|
|
"learning_rate": 1.8157894736842109e-06,
|
|
"loss": 2.8382675647735596,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.05608214849921011,
|
|
"grad_norm": 13.008599403705528,
|
|
"learning_rate": 1.8421052631578948e-06,
|
|
"loss": 3.444577217102051,
|
|
"step": 71
|
|
},
|
|
{
|
|
"epoch": 0.05687203791469194,
|
|
"grad_norm": 18.071771458815004,
|
|
"learning_rate": 1.868421052631579e-06,
|
|
"loss": 3.67020320892334,
|
|
"step": 72
|
|
},
|
|
{
|
|
"epoch": 0.05766192733017378,
|
|
"grad_norm": 10.064836586829417,
|
|
"learning_rate": 1.8947368421052634e-06,
|
|
"loss": 3.3886473178863525,
|
|
"step": 73
|
|
},
|
|
{
|
|
"epoch": 0.05845181674565561,
|
|
"grad_norm": 10.757354094621407,
|
|
"learning_rate": 1.9210526315789474e-06,
|
|
"loss": 2.9087605476379395,
|
|
"step": 74
|
|
},
|
|
{
|
|
"epoch": 0.05924170616113744,
|
|
"grad_norm": 11.450459481352778,
|
|
"learning_rate": 1.9473684210526315e-06,
|
|
"loss": 3.3567676544189453,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.06003159557661927,
|
|
"grad_norm": 13.789083297248077,
|
|
"learning_rate": 1.973684210526316e-06,
|
|
"loss": 3.567488431930542,
|
|
"step": 76
|
|
},
|
|
{
|
|
"epoch": 0.060821484992101105,
|
|
"grad_norm": 10.972976083153702,
|
|
"learning_rate": 2.0000000000000003e-06,
|
|
"loss": 3.606407642364502,
|
|
"step": 77
|
|
},
|
|
{
|
|
"epoch": 0.061611374407582936,
|
|
"grad_norm": 12.31068362309533,
|
|
"learning_rate": 2.026315789473684e-06,
|
|
"loss": 3.5690627098083496,
|
|
"step": 78
|
|
},
|
|
{
|
|
"epoch": 0.06240126382306477,
|
|
"grad_norm": 10.473991872514576,
|
|
"learning_rate": 2.0526315789473687e-06,
|
|
"loss": 3.2413203716278076,
|
|
"step": 79
|
|
},
|
|
{
|
|
"epoch": 0.0631911532385466,
|
|
"grad_norm": 11.476517559025107,
|
|
"learning_rate": 2.078947368421053e-06,
|
|
"loss": 3.541959524154663,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.06398104265402843,
|
|
"grad_norm": 10.327083091837444,
|
|
"learning_rate": 2.105263157894737e-06,
|
|
"loss": 2.6203503608703613,
|
|
"step": 81
|
|
},
|
|
{
|
|
"epoch": 0.06477093206951026,
|
|
"grad_norm": 8.585567685933643,
|
|
"learning_rate": 2.1315789473684212e-06,
|
|
"loss": 3.0848870277404785,
|
|
"step": 82
|
|
},
|
|
{
|
|
"epoch": 0.06556082148499211,
|
|
"grad_norm": 13.504858685023217,
|
|
"learning_rate": 2.1578947368421054e-06,
|
|
"loss": 2.950331687927246,
|
|
"step": 83
|
|
},
|
|
{
|
|
"epoch": 0.06635071090047394,
|
|
"grad_norm": 12.928368968515905,
|
|
"learning_rate": 2.1842105263157896e-06,
|
|
"loss": 3.3477673530578613,
|
|
"step": 84
|
|
},
|
|
{
|
|
"epoch": 0.06714060031595577,
|
|
"grad_norm": 34.5955948056376,
|
|
"learning_rate": 2.2105263157894738e-06,
|
|
"loss": 3.6285767555236816,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.0679304897314376,
|
|
"grad_norm": 21.770696431108018,
|
|
"learning_rate": 2.236842105263158e-06,
|
|
"loss": 2.9111273288726807,
|
|
"step": 86
|
|
},
|
|
{
|
|
"epoch": 0.06872037914691943,
|
|
"grad_norm": 10.760518044408816,
|
|
"learning_rate": 2.2631578947368426e-06,
|
|
"loss": 3.0999932289123535,
|
|
"step": 87
|
|
},
|
|
{
|
|
"epoch": 0.06951026856240126,
|
|
"grad_norm": 14.808381283086948,
|
|
"learning_rate": 2.2894736842105263e-06,
|
|
"loss": 2.990964412689209,
|
|
"step": 88
|
|
},
|
|
{
|
|
"epoch": 0.0703001579778831,
|
|
"grad_norm": 10.068849907277835,
|
|
"learning_rate": 2.3157894736842105e-06,
|
|
"loss": 3.190941095352173,
|
|
"step": 89
|
|
},
|
|
{
|
|
"epoch": 0.07109004739336493,
|
|
"grad_norm": 13.214233489035319,
|
|
"learning_rate": 2.342105263157895e-06,
|
|
"loss": 3.2512447834014893,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.07187993680884676,
|
|
"grad_norm": 9.43746117081832,
|
|
"learning_rate": 2.368421052631579e-06,
|
|
"loss": 2.6215569972991943,
|
|
"step": 91
|
|
},
|
|
{
|
|
"epoch": 0.07266982622432859,
|
|
"grad_norm": 6.790489861990664,
|
|
"learning_rate": 2.3947368421052635e-06,
|
|
"loss": 2.6833393573760986,
|
|
"step": 92
|
|
},
|
|
{
|
|
"epoch": 0.07345971563981042,
|
|
"grad_norm": 10.046189419343667,
|
|
"learning_rate": 2.4210526315789477e-06,
|
|
"loss": 3.4258813858032227,
|
|
"step": 93
|
|
},
|
|
{
|
|
"epoch": 0.07424960505529225,
|
|
"grad_norm": 10.439120097914849,
|
|
"learning_rate": 2.447368421052632e-06,
|
|
"loss": 3.059093952178955,
|
|
"step": 94
|
|
},
|
|
{
|
|
"epoch": 0.0750394944707741,
|
|
"grad_norm": 15.352463342964036,
|
|
"learning_rate": 2.473684210526316e-06,
|
|
"loss": 3.1503472328186035,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.07582938388625593,
|
|
"grad_norm": 10.372976525025615,
|
|
"learning_rate": 2.5e-06,
|
|
"loss": 3.1993770599365234,
|
|
"step": 96
|
|
},
|
|
{
|
|
"epoch": 0.07661927330173776,
|
|
"grad_norm": 9.496881055859197,
|
|
"learning_rate": 2.5263157894736844e-06,
|
|
"loss": 3.2442855834960938,
|
|
"step": 97
|
|
},
|
|
{
|
|
"epoch": 0.07740916271721959,
|
|
"grad_norm": 18.476512661393848,
|
|
"learning_rate": 2.552631578947369e-06,
|
|
"loss": 3.0400185585021973,
|
|
"step": 98
|
|
},
|
|
{
|
|
"epoch": 0.07819905213270142,
|
|
"grad_norm": 9.034794663205666,
|
|
"learning_rate": 2.578947368421053e-06,
|
|
"loss": 3.134880542755127,
|
|
"step": 99
|
|
},
|
|
{
|
|
"epoch": 0.07898894154818326,
|
|
"grad_norm": 9.677932164121641,
|
|
"learning_rate": 2.605263157894737e-06,
|
|
"loss": 3.2033190727233887,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.07977883096366509,
|
|
"grad_norm": 11.430338365698852,
|
|
"learning_rate": 2.631578947368421e-06,
|
|
"loss": 2.273861885070801,
|
|
"step": 101
|
|
},
|
|
{
|
|
"epoch": 0.08056872037914692,
|
|
"grad_norm": 11.80829320514626,
|
|
"learning_rate": 2.6578947368421053e-06,
|
|
"loss": 2.6837069988250732,
|
|
"step": 102
|
|
},
|
|
{
|
|
"epoch": 0.08135860979462875,
|
|
"grad_norm": 11.605993978422056,
|
|
"learning_rate": 2.68421052631579e-06,
|
|
"loss": 3.128217935562134,
|
|
"step": 103
|
|
},
|
|
{
|
|
"epoch": 0.08214849921011058,
|
|
"grad_norm": 11.686211118151036,
|
|
"learning_rate": 2.710526315789474e-06,
|
|
"loss": 3.3786544799804688,
|
|
"step": 104
|
|
},
|
|
{
|
|
"epoch": 0.08293838862559241,
|
|
"grad_norm": 11.970591219069323,
|
|
"learning_rate": 2.7368421052631583e-06,
|
|
"loss": 3.7821977138519287,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.08372827804107424,
|
|
"grad_norm": 8.789149918197234,
|
|
"learning_rate": 2.7631578947368424e-06,
|
|
"loss": 2.573795795440674,
|
|
"step": 106
|
|
},
|
|
{
|
|
"epoch": 0.08451816745655608,
|
|
"grad_norm": 12.34914621357682,
|
|
"learning_rate": 2.789473684210526e-06,
|
|
"loss": 3.3090810775756836,
|
|
"step": 107
|
|
},
|
|
{
|
|
"epoch": 0.08530805687203792,
|
|
"grad_norm": 13.080241716157126,
|
|
"learning_rate": 2.815789473684211e-06,
|
|
"loss": 2.717519998550415,
|
|
"step": 108
|
|
},
|
|
{
|
|
"epoch": 0.08609794628751975,
|
|
"grad_norm": 10.317983708712724,
|
|
"learning_rate": 2.842105263157895e-06,
|
|
"loss": 2.5964150428771973,
|
|
"step": 109
|
|
},
|
|
{
|
|
"epoch": 0.08688783570300158,
|
|
"grad_norm": 8.933831846542349,
|
|
"learning_rate": 2.868421052631579e-06,
|
|
"loss": 3.3069779872894287,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.08767772511848342,
|
|
"grad_norm": 9.117041629342554,
|
|
"learning_rate": 2.8947368421052634e-06,
|
|
"loss": 2.860931396484375,
|
|
"step": 111
|
|
},
|
|
{
|
|
"epoch": 0.08846761453396525,
|
|
"grad_norm": 12.283222755341374,
|
|
"learning_rate": 2.921052631578948e-06,
|
|
"loss": 3.619509220123291,
|
|
"step": 112
|
|
},
|
|
{
|
|
"epoch": 0.08925750394944708,
|
|
"grad_norm": 9.277584709531098,
|
|
"learning_rate": 2.9473684210526317e-06,
|
|
"loss": 2.366508960723877,
|
|
"step": 113
|
|
},
|
|
{
|
|
"epoch": 0.09004739336492891,
|
|
"grad_norm": 7.894805274949022,
|
|
"learning_rate": 2.973684210526316e-06,
|
|
"loss": 3.2010068893432617,
|
|
"step": 114
|
|
},
|
|
{
|
|
"epoch": 0.09083728278041074,
|
|
"grad_norm": 7.815315629294605,
|
|
"learning_rate": 3e-06,
|
|
"loss": 3.1761326789855957,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.09162717219589257,
|
|
"grad_norm": 12.929613818899176,
|
|
"learning_rate": 3.0263157894736843e-06,
|
|
"loss": 3.074592351913452,
|
|
"step": 116
|
|
},
|
|
{
|
|
"epoch": 0.0924170616113744,
|
|
"grad_norm": 10.211678960918519,
|
|
"learning_rate": 3.052631578947369e-06,
|
|
"loss": 2.9478042125701904,
|
|
"step": 117
|
|
},
|
|
{
|
|
"epoch": 0.09320695102685624,
|
|
"grad_norm": 14.765610562991277,
|
|
"learning_rate": 3.078947368421053e-06,
|
|
"loss": 2.716470718383789,
|
|
"step": 118
|
|
},
|
|
{
|
|
"epoch": 0.09399684044233807,
|
|
"grad_norm": 13.058271020031325,
|
|
"learning_rate": 3.1052631578947372e-06,
|
|
"loss": 2.669914722442627,
|
|
"step": 119
|
|
},
|
|
{
|
|
"epoch": 0.0947867298578199,
|
|
"grad_norm": 10.81941062693438,
|
|
"learning_rate": 3.131578947368421e-06,
|
|
"loss": 2.9119224548339844,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.09557661927330174,
|
|
"grad_norm": 10.596711287978934,
|
|
"learning_rate": 3.157894736842105e-06,
|
|
"loss": 2.921963930130005,
|
|
"step": 121
|
|
},
|
|
{
|
|
"epoch": 0.09636650868878358,
|
|
"grad_norm": 15.574644209065054,
|
|
"learning_rate": 3.1842105263157898e-06,
|
|
"loss": 3.0262293815612793,
|
|
"step": 122
|
|
},
|
|
{
|
|
"epoch": 0.0971563981042654,
|
|
"grad_norm": 10.64778680746815,
|
|
"learning_rate": 3.210526315789474e-06,
|
|
"loss": 3.186441421508789,
|
|
"step": 123
|
|
},
|
|
{
|
|
"epoch": 0.09794628751974724,
|
|
"grad_norm": 14.63351232300644,
|
|
"learning_rate": 3.236842105263158e-06,
|
|
"loss": 3.451972246170044,
|
|
"step": 124
|
|
},
|
|
{
|
|
"epoch": 0.09873617693522907,
|
|
"grad_norm": 8.585310462971533,
|
|
"learning_rate": 3.2631578947368423e-06,
|
|
"loss": 2.5213232040405273,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.0995260663507109,
|
|
"grad_norm": 7.39672659605034,
|
|
"learning_rate": 3.289473684210527e-06,
|
|
"loss": 2.5365099906921387,
|
|
"step": 126
|
|
},
|
|
{
|
|
"epoch": 0.10031595576619273,
|
|
"grad_norm": 11.157374472691357,
|
|
"learning_rate": 3.3157894736842107e-06,
|
|
"loss": 2.8651609420776367,
|
|
"step": 127
|
|
},
|
|
{
|
|
"epoch": 0.10110584518167456,
|
|
"grad_norm": 10.596959610695333,
|
|
"learning_rate": 3.342105263157895e-06,
|
|
"loss": 3.034381866455078,
|
|
"step": 128
|
|
},
|
|
{
|
|
"epoch": 0.1018957345971564,
|
|
"grad_norm": 14.277741979207777,
|
|
"learning_rate": 3.368421052631579e-06,
|
|
"loss": 2.627697467803955,
|
|
"step": 129
|
|
},
|
|
{
|
|
"epoch": 0.10268562401263823,
|
|
"grad_norm": 7.246920014548371,
|
|
"learning_rate": 3.3947368421052636e-06,
|
|
"loss": 2.9030356407165527,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.10347551342812006,
|
|
"grad_norm": 7.2011210077095775,
|
|
"learning_rate": 3.421052631578948e-06,
|
|
"loss": 2.7330222129821777,
|
|
"step": 131
|
|
},
|
|
{
|
|
"epoch": 0.10426540284360189,
|
|
"grad_norm": 9.54971680440488,
|
|
"learning_rate": 3.447368421052632e-06,
|
|
"loss": 2.8853511810302734,
|
|
"step": 132
|
|
},
|
|
{
|
|
"epoch": 0.10505529225908374,
|
|
"grad_norm": 18.526435046610747,
|
|
"learning_rate": 3.473684210526316e-06,
|
|
"loss": 3.570380449295044,
|
|
"step": 133
|
|
},
|
|
{
|
|
"epoch": 0.10584518167456557,
|
|
"grad_norm": 34.71437679140294,
|
|
"learning_rate": 3.5e-06,
|
|
"loss": 3.5759708881378174,
|
|
"step": 134
|
|
},
|
|
{
|
|
"epoch": 0.1066350710900474,
|
|
"grad_norm": 13.769311251161728,
|
|
"learning_rate": 3.5263157894736846e-06,
|
|
"loss": 2.9467697143554688,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.10742496050552923,
|
|
"grad_norm": 8.108715220223203,
|
|
"learning_rate": 3.5526315789473687e-06,
|
|
"loss": 2.982203483581543,
|
|
"step": 136
|
|
},
|
|
{
|
|
"epoch": 0.10821484992101106,
|
|
"grad_norm": 9.835029796798992,
|
|
"learning_rate": 3.578947368421053e-06,
|
|
"loss": 2.9849791526794434,
|
|
"step": 137
|
|
},
|
|
{
|
|
"epoch": 0.10900473933649289,
|
|
"grad_norm": 10.17747887266375,
|
|
"learning_rate": 3.605263157894737e-06,
|
|
"loss": 2.5947208404541016,
|
|
"step": 138
|
|
},
|
|
{
|
|
"epoch": 0.10979462875197472,
|
|
"grad_norm": 30.956825233562924,
|
|
"learning_rate": 3.6315789473684217e-06,
|
|
"loss": 2.478144407272339,
|
|
"step": 139
|
|
},
|
|
{
|
|
"epoch": 0.11058451816745656,
|
|
"grad_norm": 14.433139764637794,
|
|
"learning_rate": 3.657894736842106e-06,
|
|
"loss": 3.3270864486694336,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.11137440758293839,
|
|
"grad_norm": 9.114525691597905,
|
|
"learning_rate": 3.6842105263157896e-06,
|
|
"loss": 3.3300728797912598,
|
|
"step": 141
|
|
},
|
|
{
|
|
"epoch": 0.11216429699842022,
|
|
"grad_norm": 10.962185488198799,
|
|
"learning_rate": 3.710526315789474e-06,
|
|
"loss": 2.858724594116211,
|
|
"step": 142
|
|
},
|
|
{
|
|
"epoch": 0.11295418641390205,
|
|
"grad_norm": 9.002282231003035,
|
|
"learning_rate": 3.736842105263158e-06,
|
|
"loss": 3.2307381629943848,
|
|
"step": 143
|
|
},
|
|
{
|
|
"epoch": 0.11374407582938388,
|
|
"grad_norm": 9.727066408665603,
|
|
"learning_rate": 3.7631578947368426e-06,
|
|
"loss": 3.1068058013916016,
|
|
"step": 144
|
|
},
|
|
{
|
|
"epoch": 0.11453396524486571,
|
|
"grad_norm": 11.203620625302497,
|
|
"learning_rate": 3.789473684210527e-06,
|
|
"loss": 3.130736827850342,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.11532385466034756,
|
|
"grad_norm": 9.522046772312253,
|
|
"learning_rate": 3.815789473684211e-06,
|
|
"loss": 3.020020008087158,
|
|
"step": 146
|
|
},
|
|
{
|
|
"epoch": 0.11611374407582939,
|
|
"grad_norm": 8.938687629412906,
|
|
"learning_rate": 3.842105263157895e-06,
|
|
"loss": 3.1917128562927246,
|
|
"step": 147
|
|
},
|
|
{
|
|
"epoch": 0.11690363349131122,
|
|
"grad_norm": 12.372597800255045,
|
|
"learning_rate": 3.868421052631579e-06,
|
|
"loss": 3.393472671508789,
|
|
"step": 148
|
|
},
|
|
{
|
|
"epoch": 0.11769352290679305,
|
|
"grad_norm": 11.917832534808403,
|
|
"learning_rate": 3.894736842105263e-06,
|
|
"loss": 2.8924148082733154,
|
|
"step": 149
|
|
},
|
|
{
|
|
"epoch": 0.11848341232227488,
|
|
"grad_norm": 8.376978563766828,
|
|
"learning_rate": 3.921052631578947e-06,
|
|
"loss": 3.18516206741333,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.11927330173775672,
|
|
"grad_norm": 10.751606207100387,
|
|
"learning_rate": 3.947368421052632e-06,
|
|
"loss": 3.0006637573242188,
|
|
"step": 151
|
|
},
|
|
{
|
|
"epoch": 0.12006319115323855,
|
|
"grad_norm": 10.601401419025592,
|
|
"learning_rate": 3.9736842105263165e-06,
|
|
"loss": 2.7774577140808105,
|
|
"step": 152
|
|
},
|
|
{
|
|
"epoch": 0.12085308056872038,
|
|
"grad_norm": 16.707393086077566,
|
|
"learning_rate": 4.000000000000001e-06,
|
|
"loss": 3.4506897926330566,
|
|
"step": 153
|
|
},
|
|
{
|
|
"epoch": 0.12164296998420221,
|
|
"grad_norm": 11.964190828423138,
|
|
"learning_rate": 4.026315789473684e-06,
|
|
"loss": 2.7091317176818848,
|
|
"step": 154
|
|
},
|
|
{
|
|
"epoch": 0.12243285939968404,
|
|
"grad_norm": 24.527932349156284,
|
|
"learning_rate": 4.052631578947368e-06,
|
|
"loss": 3.345675468444824,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.12322274881516587,
|
|
"grad_norm": 7.3190241803547895,
|
|
"learning_rate": 4.078947368421053e-06,
|
|
"loss": 2.6796462535858154,
|
|
"step": 156
|
|
},
|
|
{
|
|
"epoch": 0.1240126382306477,
|
|
"grad_norm": 11.87038661241619,
|
|
"learning_rate": 4.105263157894737e-06,
|
|
"loss": 2.6081080436706543,
|
|
"step": 157
|
|
},
|
|
{
|
|
"epoch": 0.12480252764612954,
|
|
"grad_norm": 11.72825788387354,
|
|
"learning_rate": 4.1315789473684216e-06,
|
|
"loss": 3.0451087951660156,
|
|
"step": 158
|
|
},
|
|
{
|
|
"epoch": 0.12559241706161137,
|
|
"grad_norm": 11.390036203544065,
|
|
"learning_rate": 4.157894736842106e-06,
|
|
"loss": 2.8156795501708984,
|
|
"step": 159
|
|
},
|
|
{
|
|
"epoch": 0.1263823064770932,
|
|
"grad_norm": 21.914947252886318,
|
|
"learning_rate": 4.18421052631579e-06,
|
|
"loss": 2.579136848449707,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.12717219589257503,
|
|
"grad_norm": 10.243517013753197,
|
|
"learning_rate": 4.210526315789474e-06,
|
|
"loss": 2.9866762161254883,
|
|
"step": 161
|
|
},
|
|
{
|
|
"epoch": 0.12796208530805686,
|
|
"grad_norm": 14.2986733371629,
|
|
"learning_rate": 4.236842105263158e-06,
|
|
"loss": 2.317359685897827,
|
|
"step": 162
|
|
},
|
|
{
|
|
"epoch": 0.1287519747235387,
|
|
"grad_norm": 7.467677343055879,
|
|
"learning_rate": 4.2631578947368425e-06,
|
|
"loss": 1.8712537288665771,
|
|
"step": 163
|
|
},
|
|
{
|
|
"epoch": 0.12954186413902052,
|
|
"grad_norm": 13.673831526330856,
|
|
"learning_rate": 4.289473684210527e-06,
|
|
"loss": 3.0335092544555664,
|
|
"step": 164
|
|
},
|
|
{
|
|
"epoch": 0.13033175355450238,
|
|
"grad_norm": 16.02571007766451,
|
|
"learning_rate": 4.315789473684211e-06,
|
|
"loss": 3.632401943206787,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.13112164296998421,
|
|
"grad_norm": 17.67212914050096,
|
|
"learning_rate": 4.342105263157895e-06,
|
|
"loss": 2.679199457168579,
|
|
"step": 166
|
|
},
|
|
{
|
|
"epoch": 0.13191153238546605,
|
|
"grad_norm": 19.192049393351457,
|
|
"learning_rate": 4.368421052631579e-06,
|
|
"loss": 2.2504091262817383,
|
|
"step": 167
|
|
},
|
|
{
|
|
"epoch": 0.13270142180094788,
|
|
"grad_norm": 15.388906369037276,
|
|
"learning_rate": 4.394736842105263e-06,
|
|
"loss": 2.8348331451416016,
|
|
"step": 168
|
|
},
|
|
{
|
|
"epoch": 0.1334913112164297,
|
|
"grad_norm": 16.961630671510154,
|
|
"learning_rate": 4.4210526315789476e-06,
|
|
"loss": 3.5437369346618652,
|
|
"step": 169
|
|
},
|
|
{
|
|
"epoch": 0.13428120063191154,
|
|
"grad_norm": 19.29309682197195,
|
|
"learning_rate": 4.447368421052632e-06,
|
|
"loss": 3.0312232971191406,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.13507109004739337,
|
|
"grad_norm": 12.4885877307498,
|
|
"learning_rate": 4.473684210526316e-06,
|
|
"loss": 2.930471658706665,
|
|
"step": 171
|
|
},
|
|
{
|
|
"epoch": 0.1358609794628752,
|
|
"grad_norm": 9.308821835922053,
|
|
"learning_rate": 4.5e-06,
|
|
"loss": 2.809572696685791,
|
|
"step": 172
|
|
},
|
|
{
|
|
"epoch": 0.13665086887835703,
|
|
"grad_norm": 22.10410295713834,
|
|
"learning_rate": 4.526315789473685e-06,
|
|
"loss": 2.782520294189453,
|
|
"step": 173
|
|
},
|
|
{
|
|
"epoch": 0.13744075829383887,
|
|
"grad_norm": 9.424258451368976,
|
|
"learning_rate": 4.552631578947369e-06,
|
|
"loss": 3.311187267303467,
|
|
"step": 174
|
|
},
|
|
{
|
|
"epoch": 0.1382306477093207,
|
|
"grad_norm": 11.81370027596634,
|
|
"learning_rate": 4.578947368421053e-06,
|
|
"loss": 2.8037075996398926,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.13902053712480253,
|
|
"grad_norm": 19.14018612073658,
|
|
"learning_rate": 4.605263157894737e-06,
|
|
"loss": 2.7607202529907227,
|
|
"step": 176
|
|
},
|
|
{
|
|
"epoch": 0.13981042654028436,
|
|
"grad_norm": 9.985114785424416,
|
|
"learning_rate": 4.631578947368421e-06,
|
|
"loss": 3.2012553215026855,
|
|
"step": 177
|
|
},
|
|
{
|
|
"epoch": 0.1406003159557662,
|
|
"grad_norm": 9.786086973550603,
|
|
"learning_rate": 4.657894736842106e-06,
|
|
"loss": 2.4347238540649414,
|
|
"step": 178
|
|
},
|
|
{
|
|
"epoch": 0.14139020537124802,
|
|
"grad_norm": 8.769407464343628,
|
|
"learning_rate": 4.68421052631579e-06,
|
|
"loss": 2.9895355701446533,
|
|
"step": 179
|
|
},
|
|
{
|
|
"epoch": 0.14218009478672985,
|
|
"grad_norm": 10.531181285243829,
|
|
"learning_rate": 4.710526315789474e-06,
|
|
"loss": 2.5453662872314453,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.1429699842022117,
|
|
"grad_norm": 11.460636592646704,
|
|
"learning_rate": 4.736842105263158e-06,
|
|
"loss": 2.439375400543213,
|
|
"step": 181
|
|
},
|
|
{
|
|
"epoch": 0.14375987361769352,
|
|
"grad_norm": 11.905830261985201,
|
|
"learning_rate": 4.763157894736842e-06,
|
|
"loss": 4.2906694412231445,
|
|
"step": 182
|
|
},
|
|
{
|
|
"epoch": 0.14454976303317535,
|
|
"grad_norm": 16.583174963846417,
|
|
"learning_rate": 4.789473684210527e-06,
|
|
"loss": 3.135786533355713,
|
|
"step": 183
|
|
},
|
|
{
|
|
"epoch": 0.14533965244865718,
|
|
"grad_norm": 16.327103894696393,
|
|
"learning_rate": 4.815789473684211e-06,
|
|
"loss": 1.763169288635254,
|
|
"step": 184
|
|
},
|
|
{
|
|
"epoch": 0.146129541864139,
|
|
"grad_norm": 9.224173810150578,
|
|
"learning_rate": 4.842105263157895e-06,
|
|
"loss": 2.444568157196045,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.14691943127962084,
|
|
"grad_norm": 13.627965372331012,
|
|
"learning_rate": 4.8684210526315795e-06,
|
|
"loss": 2.6934571266174316,
|
|
"step": 186
|
|
},
|
|
{
|
|
"epoch": 0.14770932069510267,
|
|
"grad_norm": 11.077507415528911,
|
|
"learning_rate": 4.894736842105264e-06,
|
|
"loss": 2.9741287231445312,
|
|
"step": 187
|
|
},
|
|
{
|
|
"epoch": 0.1484992101105845,
|
|
"grad_norm": 11.532032255217114,
|
|
"learning_rate": 4.921052631578948e-06,
|
|
"loss": 2.824622869491577,
|
|
"step": 188
|
|
},
|
|
{
|
|
"epoch": 0.14928909952606634,
|
|
"grad_norm": 18.679360225668912,
|
|
"learning_rate": 4.947368421052632e-06,
|
|
"loss": 3.0964691638946533,
|
|
"step": 189
|
|
},
|
|
{
|
|
"epoch": 0.1500789889415482,
|
|
"grad_norm": 9.620068127723037,
|
|
"learning_rate": 4.973684210526316e-06,
|
|
"loss": 2.981693744659424,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.15086887835703003,
|
|
"grad_norm": 7.772300956867857,
|
|
"learning_rate": 5e-06,
|
|
"loss": 2.799048662185669,
|
|
"step": 191
|
|
},
|
|
{
|
|
"epoch": 0.15165876777251186,
|
|
"grad_norm": 12.414480771670288,
|
|
"learning_rate": 5.026315789473685e-06,
|
|
"loss": 3.1177570819854736,
|
|
"step": 192
|
|
},
|
|
{
|
|
"epoch": 0.1524486571879937,
|
|
"grad_norm": 12.641305628552601,
|
|
"learning_rate": 5.052631578947369e-06,
|
|
"loss": 2.37302827835083,
|
|
"step": 193
|
|
},
|
|
{
|
|
"epoch": 0.15323854660347552,
|
|
"grad_norm": 11.55906392820543,
|
|
"learning_rate": 5.078947368421053e-06,
|
|
"loss": 2.5041732788085938,
|
|
"step": 194
|
|
},
|
|
{
|
|
"epoch": 0.15402843601895735,
|
|
"grad_norm": 7.7791945091120285,
|
|
"learning_rate": 5.105263157894738e-06,
|
|
"loss": 2.874569892883301,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 0.15481832543443919,
|
|
"grad_norm": 10.474120784603407,
|
|
"learning_rate": 5.131578947368422e-06,
|
|
"loss": 2.8549094200134277,
|
|
"step": 196
|
|
},
|
|
{
|
|
"epoch": 0.15560821484992102,
|
|
"grad_norm": 7.993346143459158,
|
|
"learning_rate": 5.157894736842106e-06,
|
|
"loss": 2.993251085281372,
|
|
"step": 197
|
|
},
|
|
{
|
|
"epoch": 0.15639810426540285,
|
|
"grad_norm": 20.941550789609742,
|
|
"learning_rate": 5.18421052631579e-06,
|
|
"loss": 4.168525695800781,
|
|
"step": 198
|
|
},
|
|
{
|
|
"epoch": 0.15718799368088468,
|
|
"grad_norm": 12.528721144731515,
|
|
"learning_rate": 5.210526315789474e-06,
|
|
"loss": 2.4273793697357178,
|
|
"step": 199
|
|
},
|
|
{
|
|
"epoch": 0.1579778830963665,
|
|
"grad_norm": 11.286529989353973,
|
|
"learning_rate": 5.236842105263158e-06,
|
|
"loss": 3.011016368865967,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.15876777251184834,
|
|
"grad_norm": 17.406854394281535,
|
|
"learning_rate": 5.263157894736842e-06,
|
|
"loss": 2.8892858028411865,
|
|
"step": 201
|
|
},
|
|
{
|
|
"epoch": 0.15955766192733017,
|
|
"grad_norm": 12.78306492867607,
|
|
"learning_rate": 5.289473684210526e-06,
|
|
"loss": 2.4699714183807373,
|
|
"step": 202
|
|
},
|
|
{
|
|
"epoch": 0.160347551342812,
|
|
"grad_norm": 12.969828387945576,
|
|
"learning_rate": 5.315789473684211e-06,
|
|
"loss": 2.7423977851867676,
|
|
"step": 203
|
|
},
|
|
{
|
|
"epoch": 0.16113744075829384,
|
|
"grad_norm": 7.657893790006663,
|
|
"learning_rate": 5.342105263157895e-06,
|
|
"loss": 2.7218151092529297,
|
|
"step": 204
|
|
},
|
|
{
|
|
"epoch": 0.16192733017377567,
|
|
"grad_norm": 9.492276008453782,
|
|
"learning_rate": 5.36842105263158e-06,
|
|
"loss": 2.555281639099121,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 0.1627172195892575,
|
|
"grad_norm": 13.463617124979155,
|
|
"learning_rate": 5.394736842105264e-06,
|
|
"loss": 3.284069538116455,
|
|
"step": 206
|
|
},
|
|
{
|
|
"epoch": 0.16350710900473933,
|
|
"grad_norm": 12.35995760266546,
|
|
"learning_rate": 5.421052631578948e-06,
|
|
"loss": 2.8107781410217285,
|
|
"step": 207
|
|
},
|
|
{
|
|
"epoch": 0.16429699842022116,
|
|
"grad_norm": 22.996410093914797,
|
|
"learning_rate": 5.447368421052632e-06,
|
|
"loss": 2.7325127124786377,
|
|
"step": 208
|
|
},
|
|
{
|
|
"epoch": 0.165086887835703,
|
|
"grad_norm": 16.585705791801974,
|
|
"learning_rate": 5.4736842105263165e-06,
|
|
"loss": 2.7490532398223877,
|
|
"step": 209
|
|
},
|
|
{
|
|
"epoch": 0.16587677725118483,
|
|
"grad_norm": 8.638152990987923,
|
|
"learning_rate": 5.500000000000001e-06,
|
|
"loss": 3.5097951889038086,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.16666666666666666,
|
|
"grad_norm": 13.734843553912885,
|
|
"learning_rate": 5.526315789473685e-06,
|
|
"loss": 3.340397834777832,
|
|
"step": 211
|
|
},
|
|
{
|
|
"epoch": 0.1674565560821485,
|
|
"grad_norm": 32.571959419532426,
|
|
"learning_rate": 5.552631578947368e-06,
|
|
"loss": 3.0845370292663574,
|
|
"step": 212
|
|
},
|
|
{
|
|
"epoch": 0.16824644549763032,
|
|
"grad_norm": 12.411694267674704,
|
|
"learning_rate": 5.578947368421052e-06,
|
|
"loss": 3.3935816287994385,
|
|
"step": 213
|
|
},
|
|
{
|
|
"epoch": 0.16903633491311215,
|
|
"grad_norm": 8.554865869690868,
|
|
"learning_rate": 5.605263157894737e-06,
|
|
"loss": 2.5191092491149902,
|
|
"step": 214
|
|
},
|
|
{
|
|
"epoch": 0.16982622432859398,
|
|
"grad_norm": 13.713331969172893,
|
|
"learning_rate": 5.631578947368422e-06,
|
|
"loss": 2.265045166015625,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 0.17061611374407584,
|
|
"grad_norm": 8.759424141686443,
|
|
"learning_rate": 5.657894736842106e-06,
|
|
"loss": 2.9024651050567627,
|
|
"step": 216
|
|
},
|
|
{
|
|
"epoch": 0.17140600315955767,
|
|
"grad_norm": 20.192932719198154,
|
|
"learning_rate": 5.68421052631579e-06,
|
|
"loss": 3.174898624420166,
|
|
"step": 217
|
|
},
|
|
{
|
|
"epoch": 0.1721958925750395,
|
|
"grad_norm": 9.300452899615632,
|
|
"learning_rate": 5.710526315789474e-06,
|
|
"loss": 2.203352212905884,
|
|
"step": 218
|
|
},
|
|
{
|
|
"epoch": 0.17298578199052134,
|
|
"grad_norm": 16.964918499584563,
|
|
"learning_rate": 5.736842105263158e-06,
|
|
"loss": 2.399977684020996,
|
|
"step": 219
|
|
},
|
|
{
|
|
"epoch": 0.17377567140600317,
|
|
"grad_norm": 20.569787010147447,
|
|
"learning_rate": 5.7631578947368425e-06,
|
|
"loss": 3.2228713035583496,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.174565560821485,
|
|
"grad_norm": 10.59360929404439,
|
|
"learning_rate": 5.789473684210527e-06,
|
|
"loss": 2.613041639328003,
|
|
"step": 221
|
|
},
|
|
{
|
|
"epoch": 0.17535545023696683,
|
|
"grad_norm": 12.774907668976972,
|
|
"learning_rate": 5.815789473684212e-06,
|
|
"loss": 3.2169127464294434,
|
|
"step": 222
|
|
},
|
|
{
|
|
"epoch": 0.17614533965244866,
|
|
"grad_norm": 38.185104925418536,
|
|
"learning_rate": 5.842105263157896e-06,
|
|
"loss": 3.227830648422241,
|
|
"step": 223
|
|
},
|
|
{
|
|
"epoch": 0.1769352290679305,
|
|
"grad_norm": 15.337225837697051,
|
|
"learning_rate": 5.86842105263158e-06,
|
|
"loss": 2.9216299057006836,
|
|
"step": 224
|
|
},
|
|
{
|
|
"epoch": 0.17772511848341233,
|
|
"grad_norm": 19.652283271220284,
|
|
"learning_rate": 5.8947368421052634e-06,
|
|
"loss": 3.0096397399902344,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.17851500789889416,
|
|
"grad_norm": 21.342277640312286,
|
|
"learning_rate": 5.921052631578948e-06,
|
|
"loss": 2.2839317321777344,
|
|
"step": 226
|
|
},
|
|
{
|
|
"epoch": 0.179304897314376,
|
|
"grad_norm": 18.794368318684786,
|
|
"learning_rate": 5.947368421052632e-06,
|
|
"loss": 3.2453808784484863,
|
|
"step": 227
|
|
},
|
|
{
|
|
"epoch": 0.18009478672985782,
|
|
"grad_norm": 20.372960047849478,
|
|
"learning_rate": 5.973684210526316e-06,
|
|
"loss": 3.73872447013855,
|
|
"step": 228
|
|
},
|
|
{
|
|
"epoch": 0.18088467614533965,
|
|
"grad_norm": 7.477591538110839,
|
|
"learning_rate": 6e-06,
|
|
"loss": 2.9210574626922607,
|
|
"step": 229
|
|
},
|
|
{
|
|
"epoch": 0.18167456556082148,
|
|
"grad_norm": 20.286752935347092,
|
|
"learning_rate": 6.026315789473684e-06,
|
|
"loss": 3.0264639854431152,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.18246445497630331,
|
|
"grad_norm": 7.752616268806402,
|
|
"learning_rate": 6.0526315789473685e-06,
|
|
"loss": 2.5452468395233154,
|
|
"step": 231
|
|
},
|
|
{
|
|
"epoch": 0.18325434439178515,
|
|
"grad_norm": 14.021383263434027,
|
|
"learning_rate": 6.0789473684210535e-06,
|
|
"loss": 3.122269868850708,
|
|
"step": 232
|
|
},
|
|
{
|
|
"epoch": 0.18404423380726698,
|
|
"grad_norm": 31.61505834794581,
|
|
"learning_rate": 6.105263157894738e-06,
|
|
"loss": 2.652003765106201,
|
|
"step": 233
|
|
},
|
|
{
|
|
"epoch": 0.1848341232227488,
|
|
"grad_norm": 14.714502845759036,
|
|
"learning_rate": 6.131578947368422e-06,
|
|
"loss": 3.3281772136688232,
|
|
"step": 234
|
|
},
|
|
{
|
|
"epoch": 0.18562401263823064,
|
|
"grad_norm": 8.281361641079902,
|
|
"learning_rate": 6.157894736842106e-06,
|
|
"loss": 2.793631076812744,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 0.18641390205371247,
|
|
"grad_norm": 18.19614005385017,
|
|
"learning_rate": 6.18421052631579e-06,
|
|
"loss": 2.75974178314209,
|
|
"step": 236
|
|
},
|
|
{
|
|
"epoch": 0.1872037914691943,
|
|
"grad_norm": 14.062302050239529,
|
|
"learning_rate": 6.2105263157894745e-06,
|
|
"loss": 3.2623343467712402,
|
|
"step": 237
|
|
},
|
|
{
|
|
"epoch": 0.18799368088467613,
|
|
"grad_norm": 11.273662081089574,
|
|
"learning_rate": 6.236842105263159e-06,
|
|
"loss": 3.017319440841675,
|
|
"step": 238
|
|
},
|
|
{
|
|
"epoch": 0.18878357030015797,
|
|
"grad_norm": 12.442581117618769,
|
|
"learning_rate": 6.263157894736842e-06,
|
|
"loss": 2.9280037879943848,
|
|
"step": 239
|
|
},
|
|
{
|
|
"epoch": 0.1895734597156398,
|
|
"grad_norm": 14.868405534686683,
|
|
"learning_rate": 6.289473684210526e-06,
|
|
"loss": 4.794824123382568,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.19036334913112166,
|
|
"grad_norm": 18.342383362134704,
|
|
"learning_rate": 6.31578947368421e-06,
|
|
"loss": 2.6316001415252686,
|
|
"step": 241
|
|
},
|
|
{
|
|
"epoch": 0.1911532385466035,
|
|
"grad_norm": 12.903091182966538,
|
|
"learning_rate": 6.342105263157895e-06,
|
|
"loss": 3.190481185913086,
|
|
"step": 242
|
|
},
|
|
{
|
|
"epoch": 0.19194312796208532,
|
|
"grad_norm": 13.060648808489958,
|
|
"learning_rate": 6.3684210526315795e-06,
|
|
"loss": 2.072887420654297,
|
|
"step": 243
|
|
},
|
|
{
|
|
"epoch": 0.19273301737756715,
|
|
"grad_norm": 10.434484710036394,
|
|
"learning_rate": 6.394736842105264e-06,
|
|
"loss": 3.4056625366210938,
|
|
"step": 244
|
|
},
|
|
{
|
|
"epoch": 0.19352290679304898,
|
|
"grad_norm": 17.07700555394061,
|
|
"learning_rate": 6.421052631578948e-06,
|
|
"loss": 3.1885697841644287,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 0.1943127962085308,
|
|
"grad_norm": 11.242377631499217,
|
|
"learning_rate": 6.447368421052632e-06,
|
|
"loss": 2.7972757816314697,
|
|
"step": 246
|
|
},
|
|
{
|
|
"epoch": 0.19510268562401265,
|
|
"grad_norm": 10.122508148985222,
|
|
"learning_rate": 6.473684210526316e-06,
|
|
"loss": 2.625680923461914,
|
|
"step": 247
|
|
},
|
|
{
|
|
"epoch": 0.19589257503949448,
|
|
"grad_norm": 15.194302817759452,
|
|
"learning_rate": 6.5000000000000004e-06,
|
|
"loss": 3.370196580886841,
|
|
"step": 248
|
|
},
|
|
{
|
|
"epoch": 0.1966824644549763,
|
|
"grad_norm": 9.367476542559452,
|
|
"learning_rate": 6.526315789473685e-06,
|
|
"loss": 3.2335524559020996,
|
|
"step": 249
|
|
},
|
|
{
|
|
"epoch": 0.19747235387045814,
|
|
"grad_norm": 13.638698209387572,
|
|
"learning_rate": 6.55263157894737e-06,
|
|
"loss": 3.060309410095215,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.19826224328593997,
|
|
"grad_norm": 21.504696461723928,
|
|
"learning_rate": 6.578947368421054e-06,
|
|
"loss": 2.5595827102661133,
|
|
"step": 251
|
|
},
|
|
{
|
|
"epoch": 0.1990521327014218,
|
|
"grad_norm": 12.760844619202933,
|
|
"learning_rate": 6.605263157894738e-06,
|
|
"loss": 2.9294533729553223,
|
|
"step": 252
|
|
},
|
|
{
|
|
"epoch": 0.19984202211690363,
|
|
"grad_norm": 16.14327478944924,
|
|
"learning_rate": 6.631578947368421e-06,
|
|
"loss": 3.1590025424957275,
|
|
"step": 253
|
|
},
|
|
{
|
|
"epoch": 0.20063191153238547,
|
|
"grad_norm": 10.670002260991126,
|
|
"learning_rate": 6.6578947368421055e-06,
|
|
"loss": 2.868878126144409,
|
|
"step": 254
|
|
},
|
|
{
|
|
"epoch": 0.2014218009478673,
|
|
"grad_norm": 7.410086829781492,
|
|
"learning_rate": 6.68421052631579e-06,
|
|
"loss": 2.524019479751587,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 0.20221169036334913,
|
|
"grad_norm": 9.676832864068576,
|
|
"learning_rate": 6.710526315789474e-06,
|
|
"loss": 2.7657065391540527,
|
|
"step": 256
|
|
},
|
|
{
|
|
"epoch": 0.20300157977883096,
|
|
"grad_norm": 15.950580447163818,
|
|
"learning_rate": 6.736842105263158e-06,
|
|
"loss": 3.229970932006836,
|
|
"step": 257
|
|
},
|
|
{
|
|
"epoch": 0.2037914691943128,
|
|
"grad_norm": 15.60324691981264,
|
|
"learning_rate": 6.763157894736842e-06,
|
|
"loss": 2.7795071601867676,
|
|
"step": 258
|
|
},
|
|
{
|
|
"epoch": 0.20458135860979462,
|
|
"grad_norm": 7.647680832081348,
|
|
"learning_rate": 6.789473684210527e-06,
|
|
"loss": 2.202699661254883,
|
|
"step": 259
|
|
},
|
|
{
|
|
"epoch": 0.20537124802527645,
|
|
"grad_norm": 11.113708735727473,
|
|
"learning_rate": 6.8157894736842115e-06,
|
|
"loss": 3.018834114074707,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.20616113744075829,
|
|
"grad_norm": 19.140215467090133,
|
|
"learning_rate": 6.842105263157896e-06,
|
|
"loss": 2.560054063796997,
|
|
"step": 261
|
|
},
|
|
{
|
|
"epoch": 0.20695102685624012,
|
|
"grad_norm": 9.162828639443463,
|
|
"learning_rate": 6.86842105263158e-06,
|
|
"loss": 2.5356526374816895,
|
|
"step": 262
|
|
},
|
|
{
|
|
"epoch": 0.20774091627172195,
|
|
"grad_norm": 13.818767936109861,
|
|
"learning_rate": 6.894736842105264e-06,
|
|
"loss": 3.1693482398986816,
|
|
"step": 263
|
|
},
|
|
{
|
|
"epoch": 0.20853080568720378,
|
|
"grad_norm": 18.595765057532514,
|
|
"learning_rate": 6.921052631578948e-06,
|
|
"loss": 2.3949215412139893,
|
|
"step": 264
|
|
},
|
|
{
|
|
"epoch": 0.2093206951026856,
|
|
"grad_norm": 17.318744899310534,
|
|
"learning_rate": 6.947368421052632e-06,
|
|
"loss": 3.075978994369507,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 0.21011058451816747,
|
|
"grad_norm": 24.655257168385077,
|
|
"learning_rate": 6.973684210526316e-06,
|
|
"loss": 3.427466869354248,
|
|
"step": 266
|
|
},
|
|
{
|
|
"epoch": 0.2109004739336493,
|
|
"grad_norm": 15.241527713725562,
|
|
"learning_rate": 7e-06,
|
|
"loss": 2.109011650085449,
|
|
"step": 267
|
|
},
|
|
{
|
|
"epoch": 0.21169036334913113,
|
|
"grad_norm": 8.501293757028371,
|
|
"learning_rate": 7.026315789473684e-06,
|
|
"loss": 3.112001419067383,
|
|
"step": 268
|
|
},
|
|
{
|
|
"epoch": 0.21248025276461296,
|
|
"grad_norm": 15.448418886330026,
|
|
"learning_rate": 7.052631578947369e-06,
|
|
"loss": 1.7198365926742554,
|
|
"step": 269
|
|
},
|
|
{
|
|
"epoch": 0.2132701421800948,
|
|
"grad_norm": 9.545915874496153,
|
|
"learning_rate": 7.078947368421053e-06,
|
|
"loss": 2.8806190490722656,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.21406003159557663,
|
|
"grad_norm": 18.727295292899043,
|
|
"learning_rate": 7.1052631578947375e-06,
|
|
"loss": 3.1914749145507812,
|
|
"step": 271
|
|
},
|
|
{
|
|
"epoch": 0.21484992101105846,
|
|
"grad_norm": 19.609264443447998,
|
|
"learning_rate": 7.131578947368422e-06,
|
|
"loss": 3.506901741027832,
|
|
"step": 272
|
|
},
|
|
{
|
|
"epoch": 0.2156398104265403,
|
|
"grad_norm": 13.136262515857457,
|
|
"learning_rate": 7.157894736842106e-06,
|
|
"loss": 3.197396755218506,
|
|
"step": 273
|
|
},
|
|
{
|
|
"epoch": 0.21642969984202212,
|
|
"grad_norm": 23.821750570981514,
|
|
"learning_rate": 7.18421052631579e-06,
|
|
"loss": 3.0057897567749023,
|
|
"step": 274
|
|
},
|
|
{
|
|
"epoch": 0.21721958925750395,
|
|
"grad_norm": 12.168381845745005,
|
|
"learning_rate": 7.210526315789474e-06,
|
|
"loss": 3.459364652633667,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.21800947867298578,
|
|
"grad_norm": 17.00612657277193,
|
|
"learning_rate": 7.236842105263158e-06,
|
|
"loss": 3.4900269508361816,
|
|
"step": 276
|
|
},
|
|
{
|
|
"epoch": 0.21879936808846762,
|
|
"grad_norm": 10.283758953220772,
|
|
"learning_rate": 7.263157894736843e-06,
|
|
"loss": 3.3802132606506348,
|
|
"step": 277
|
|
},
|
|
{
|
|
"epoch": 0.21958925750394945,
|
|
"grad_norm": 16.02360521858266,
|
|
"learning_rate": 7.289473684210528e-06,
|
|
"loss": 3.455819606781006,
|
|
"step": 278
|
|
},
|
|
{
|
|
"epoch": 0.22037914691943128,
|
|
"grad_norm": 11.274076685112723,
|
|
"learning_rate": 7.315789473684212e-06,
|
|
"loss": 2.847879409790039,
|
|
"step": 279
|
|
},
|
|
{
|
|
"epoch": 0.2211690363349131,
|
|
"grad_norm": 15.606199898777566,
|
|
"learning_rate": 7.342105263157895e-06,
|
|
"loss": 3.4169538021087646,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.22195892575039494,
|
|
"grad_norm": 12.898430910622961,
|
|
"learning_rate": 7.368421052631579e-06,
|
|
"loss": 3.030590772628784,
|
|
"step": 281
|
|
},
|
|
{
|
|
"epoch": 0.22274881516587677,
|
|
"grad_norm": 15.500513401800355,
|
|
"learning_rate": 7.3947368421052635e-06,
|
|
"loss": 2.649216890335083,
|
|
"step": 282
|
|
},
|
|
{
|
|
"epoch": 0.2235387045813586,
|
|
"grad_norm": 18.378896929038152,
|
|
"learning_rate": 7.421052631578948e-06,
|
|
"loss": 3.274688720703125,
|
|
"step": 283
|
|
},
|
|
{
|
|
"epoch": 0.22432859399684044,
|
|
"grad_norm": 9.090747343971215,
|
|
"learning_rate": 7.447368421052632e-06,
|
|
"loss": 2.7677786350250244,
|
|
"step": 284
|
|
},
|
|
{
|
|
"epoch": 0.22511848341232227,
|
|
"grad_norm": 11.906482510855422,
|
|
"learning_rate": 7.473684210526316e-06,
|
|
"loss": 3.239955186843872,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 0.2259083728278041,
|
|
"grad_norm": 11.474852988282166,
|
|
"learning_rate": 7.500000000000001e-06,
|
|
"loss": 3.009387969970703,
|
|
"step": 286
|
|
},
|
|
{
|
|
"epoch": 0.22669826224328593,
|
|
"grad_norm": 12.575433605163134,
|
|
"learning_rate": 7.526315789473685e-06,
|
|
"loss": 2.9016571044921875,
|
|
"step": 287
|
|
},
|
|
{
|
|
"epoch": 0.22748815165876776,
|
|
"grad_norm": 14.9975427549329,
|
|
"learning_rate": 7.552631578947369e-06,
|
|
"loss": 3.257054328918457,
|
|
"step": 288
|
|
},
|
|
{
|
|
"epoch": 0.2282780410742496,
|
|
"grad_norm": 26.71508060406798,
|
|
"learning_rate": 7.578947368421054e-06,
|
|
"loss": 2.75146222114563,
|
|
"step": 289
|
|
},
|
|
{
|
|
"epoch": 0.22906793048973143,
|
|
"grad_norm": 16.910026550475397,
|
|
"learning_rate": 7.605263157894738e-06,
|
|
"loss": 2.757063388824463,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.22985781990521326,
|
|
"grad_norm": 9.680373357034156,
|
|
"learning_rate": 7.631578947368423e-06,
|
|
"loss": 2.7959532737731934,
|
|
"step": 291
|
|
},
|
|
{
|
|
"epoch": 0.23064770932069512,
|
|
"grad_norm": 12.18894296936391,
|
|
"learning_rate": 7.657894736842106e-06,
|
|
"loss": 2.466348171234131,
|
|
"step": 292
|
|
},
|
|
{
|
|
"epoch": 0.23143759873617695,
|
|
"grad_norm": 10.475566667489755,
|
|
"learning_rate": 7.68421052631579e-06,
|
|
"loss": 3.3009557723999023,
|
|
"step": 293
|
|
},
|
|
{
|
|
"epoch": 0.23222748815165878,
|
|
"grad_norm": 51.393448155359216,
|
|
"learning_rate": 7.710526315789474e-06,
|
|
"loss": 2.725738048553467,
|
|
"step": 294
|
|
},
|
|
{
|
|
"epoch": 0.2330173775671406,
|
|
"grad_norm": 14.358238125160575,
|
|
"learning_rate": 7.736842105263158e-06,
|
|
"loss": 2.783334493637085,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 0.23380726698262244,
|
|
"grad_norm": 19.57667640214124,
|
|
"learning_rate": 7.763157894736843e-06,
|
|
"loss": 3.1590988636016846,
|
|
"step": 296
|
|
},
|
|
{
|
|
"epoch": 0.23459715639810427,
|
|
"grad_norm": 12.972851406285965,
|
|
"learning_rate": 7.789473684210526e-06,
|
|
"loss": 3.1896674633026123,
|
|
"step": 297
|
|
},
|
|
{
|
|
"epoch": 0.2353870458135861,
|
|
"grad_norm": 10.525192505195403,
|
|
"learning_rate": 7.815789473684211e-06,
|
|
"loss": 2.8256943225860596,
|
|
"step": 298
|
|
},
|
|
{
|
|
"epoch": 0.23617693522906794,
|
|
"grad_norm": 11.099287801098866,
|
|
"learning_rate": 7.842105263157895e-06,
|
|
"loss": 3.2492432594299316,
|
|
"step": 299
|
|
},
|
|
{
|
|
"epoch": 0.23696682464454977,
|
|
"grad_norm": 9.230404731264265,
|
|
"learning_rate": 7.86842105263158e-06,
|
|
"loss": 2.5488808155059814,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.2377567140600316,
|
|
"grad_norm": 15.341199592416597,
|
|
"learning_rate": 7.894736842105265e-06,
|
|
"loss": 2.4112818241119385,
|
|
"step": 301
|
|
},
|
|
{
|
|
"epoch": 0.23854660347551343,
|
|
"grad_norm": 7.319063117780196,
|
|
"learning_rate": 7.921052631578948e-06,
|
|
"loss": 3.260639190673828,
|
|
"step": 302
|
|
},
|
|
{
|
|
"epoch": 0.23933649289099526,
|
|
"grad_norm": 11.851958212642176,
|
|
"learning_rate": 7.947368421052633e-06,
|
|
"loss": 3.049391269683838,
|
|
"step": 303
|
|
},
|
|
{
|
|
"epoch": 0.2401263823064771,
|
|
"grad_norm": 28.13791336194168,
|
|
"learning_rate": 7.973684210526316e-06,
|
|
"loss": 4.401567459106445,
|
|
"step": 304
|
|
},
|
|
{
|
|
"epoch": 0.24091627172195892,
|
|
"grad_norm": 9.429106394158737,
|
|
"learning_rate": 8.000000000000001e-06,
|
|
"loss": 2.9501960277557373,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 0.24170616113744076,
|
|
"grad_norm": 7.158645831331756,
|
|
"learning_rate": 8.026315789473685e-06,
|
|
"loss": 2.5581390857696533,
|
|
"step": 306
|
|
},
|
|
{
|
|
"epoch": 0.2424960505529226,
|
|
"grad_norm": 12.768920646366887,
|
|
"learning_rate": 8.052631578947368e-06,
|
|
"loss": 3.1385931968688965,
|
|
"step": 307
|
|
},
|
|
{
|
|
"epoch": 0.24328593996840442,
|
|
"grad_norm": 12.913024370975956,
|
|
"learning_rate": 8.078947368421053e-06,
|
|
"loss": 2.7068610191345215,
|
|
"step": 308
|
|
},
|
|
{
|
|
"epoch": 0.24407582938388625,
|
|
"grad_norm": 10.536088937809055,
|
|
"learning_rate": 8.105263157894736e-06,
|
|
"loss": 2.828160524368286,
|
|
"step": 309
|
|
},
|
|
{
|
|
"epoch": 0.24486571879936808,
|
|
"grad_norm": 8.341704390314765,
|
|
"learning_rate": 8.131578947368421e-06,
|
|
"loss": 2.663266658782959,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.2456556082148499,
|
|
"grad_norm": 11.240401421047006,
|
|
"learning_rate": 8.157894736842106e-06,
|
|
"loss": 3.2981982231140137,
|
|
"step": 311
|
|
},
|
|
{
|
|
"epoch": 0.24644549763033174,
|
|
"grad_norm": 11.187665628610933,
|
|
"learning_rate": 8.18421052631579e-06,
|
|
"loss": 2.5415968894958496,
|
|
"step": 312
|
|
},
|
|
{
|
|
"epoch": 0.24723538704581358,
|
|
"grad_norm": 11.94500524279879,
|
|
"learning_rate": 8.210526315789475e-06,
|
|
"loss": 2.7816240787506104,
|
|
"step": 313
|
|
},
|
|
{
|
|
"epoch": 0.2480252764612954,
|
|
"grad_norm": 6.355221325715025,
|
|
"learning_rate": 8.236842105263158e-06,
|
|
"loss": 2.686246395111084,
|
|
"step": 314
|
|
},
|
|
{
|
|
"epoch": 0.24881516587677724,
|
|
"grad_norm": 17.979652703894846,
|
|
"learning_rate": 8.263157894736843e-06,
|
|
"loss": 3.970867395401001,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 0.24960505529225907,
|
|
"grad_norm": 11.300024279923099,
|
|
"learning_rate": 8.289473684210526e-06,
|
|
"loss": 2.6537764072418213,
|
|
"step": 316
|
|
},
|
|
{
|
|
"epoch": 0.25039494470774093,
|
|
"grad_norm": 14.279900232687705,
|
|
"learning_rate": 8.315789473684212e-06,
|
|
"loss": 2.353628396987915,
|
|
"step": 317
|
|
},
|
|
{
|
|
"epoch": 0.25118483412322273,
|
|
"grad_norm": 15.211272553278281,
|
|
"learning_rate": 8.342105263157897e-06,
|
|
"loss": 2.978910207748413,
|
|
"step": 318
|
|
},
|
|
{
|
|
"epoch": 0.2519747235387046,
|
|
"grad_norm": 10.672333315833844,
|
|
"learning_rate": 8.36842105263158e-06,
|
|
"loss": 3.0564768314361572,
|
|
"step": 319
|
|
},
|
|
{
|
|
"epoch": 0.2527646129541864,
|
|
"grad_norm": 13.177853507822194,
|
|
"learning_rate": 8.394736842105263e-06,
|
|
"loss": 3.3562185764312744,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.25355450236966826,
|
|
"grad_norm": 13.208149326921594,
|
|
"learning_rate": 8.421052631578948e-06,
|
|
"loss": 2.7793097496032715,
|
|
"step": 321
|
|
},
|
|
{
|
|
"epoch": 0.25434439178515006,
|
|
"grad_norm": 7.862272858844339,
|
|
"learning_rate": 8.447368421052632e-06,
|
|
"loss": 2.6758689880371094,
|
|
"step": 322
|
|
},
|
|
{
|
|
"epoch": 0.2551342812006319,
|
|
"grad_norm": 8.599054067109956,
|
|
"learning_rate": 8.473684210526317e-06,
|
|
"loss": 2.71248722076416,
|
|
"step": 323
|
|
},
|
|
{
|
|
"epoch": 0.2559241706161137,
|
|
"grad_norm": 7.541371370024793,
|
|
"learning_rate": 8.5e-06,
|
|
"loss": 2.3310184478759766,
|
|
"step": 324
|
|
},
|
|
{
|
|
"epoch": 0.2567140600315956,
|
|
"grad_norm": 7.201546562450413,
|
|
"learning_rate": 8.526315789473685e-06,
|
|
"loss": 2.957345962524414,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 0.2575039494470774,
|
|
"grad_norm": 12.601533616205508,
|
|
"learning_rate": 8.552631578947368e-06,
|
|
"loss": 3.0129475593566895,
|
|
"step": 326
|
|
},
|
|
{
|
|
"epoch": 0.25829383886255924,
|
|
"grad_norm": 11.854948826772384,
|
|
"learning_rate": 8.578947368421053e-06,
|
|
"loss": 2.7769176959991455,
|
|
"step": 327
|
|
},
|
|
{
|
|
"epoch": 0.25908372827804105,
|
|
"grad_norm": 10.669512492571942,
|
|
"learning_rate": 8.605263157894738e-06,
|
|
"loss": 2.550236940383911,
|
|
"step": 328
|
|
},
|
|
{
|
|
"epoch": 0.2598736176935229,
|
|
"grad_norm": 17.79545384444541,
|
|
"learning_rate": 8.631578947368422e-06,
|
|
"loss": 2.8490400314331055,
|
|
"step": 329
|
|
},
|
|
{
|
|
"epoch": 0.26066350710900477,
|
|
"grad_norm": 14.8135974727651,
|
|
"learning_rate": 8.657894736842107e-06,
|
|
"loss": 3.2657470703125,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.26145339652448657,
|
|
"grad_norm": 6.186576343564378,
|
|
"learning_rate": 8.68421052631579e-06,
|
|
"loss": 2.884676456451416,
|
|
"step": 331
|
|
},
|
|
{
|
|
"epoch": 0.26224328593996843,
|
|
"grad_norm": 10.116320453707134,
|
|
"learning_rate": 8.710526315789475e-06,
|
|
"loss": 2.7804079055786133,
|
|
"step": 332
|
|
},
|
|
{
|
|
"epoch": 0.26303317535545023,
|
|
"grad_norm": 9.185373101458339,
|
|
"learning_rate": 8.736842105263158e-06,
|
|
"loss": 3.2659783363342285,
|
|
"step": 333
|
|
},
|
|
{
|
|
"epoch": 0.2638230647709321,
|
|
"grad_norm": 12.059802052206885,
|
|
"learning_rate": 8.763157894736842e-06,
|
|
"loss": 2.868699073791504,
|
|
"step": 334
|
|
},
|
|
{
|
|
"epoch": 0.2646129541864139,
|
|
"grad_norm": 9.397740276905777,
|
|
"learning_rate": 8.789473684210527e-06,
|
|
"loss": 2.720752716064453,
|
|
"step": 335
|
|
},
|
|
{
|
|
"epoch": 0.26540284360189575,
|
|
"grad_norm": 15.209731085255642,
|
|
"learning_rate": 8.81578947368421e-06,
|
|
"loss": 2.566018581390381,
|
|
"step": 336
|
|
},
|
|
{
|
|
"epoch": 0.26619273301737756,
|
|
"grad_norm": 9.966999191876049,
|
|
"learning_rate": 8.842105263157895e-06,
|
|
"loss": 3.5632197856903076,
|
|
"step": 337
|
|
},
|
|
{
|
|
"epoch": 0.2669826224328594,
|
|
"grad_norm": 13.910376272854188,
|
|
"learning_rate": 8.86842105263158e-06,
|
|
"loss": 2.7050907611846924,
|
|
"step": 338
|
|
},
|
|
{
|
|
"epoch": 0.2677725118483412,
|
|
"grad_norm": 16.554248620157857,
|
|
"learning_rate": 8.894736842105264e-06,
|
|
"loss": 2.687314987182617,
|
|
"step": 339
|
|
},
|
|
{
|
|
"epoch": 0.2685624012638231,
|
|
"grad_norm": 8.06865971019336,
|
|
"learning_rate": 8.921052631578949e-06,
|
|
"loss": 2.154885768890381,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.2693522906793049,
|
|
"grad_norm": 11.43463991295312,
|
|
"learning_rate": 8.947368421052632e-06,
|
|
"loss": 3.1463260650634766,
|
|
"step": 341
|
|
},
|
|
{
|
|
"epoch": 0.27014218009478674,
|
|
"grad_norm": 8.94663093119559,
|
|
"learning_rate": 8.973684210526317e-06,
|
|
"loss": 2.7585976123809814,
|
|
"step": 342
|
|
},
|
|
{
|
|
"epoch": 0.27093206951026855,
|
|
"grad_norm": 12.607303575239936,
|
|
"learning_rate": 9e-06,
|
|
"loss": 2.725893974304199,
|
|
"step": 343
|
|
},
|
|
{
|
|
"epoch": 0.2717219589257504,
|
|
"grad_norm": 40.64523343780517,
|
|
"learning_rate": 9.026315789473685e-06,
|
|
"loss": 4.318365097045898,
|
|
"step": 344
|
|
},
|
|
{
|
|
"epoch": 0.2725118483412322,
|
|
"grad_norm": 13.253916224420975,
|
|
"learning_rate": 9.05263157894737e-06,
|
|
"loss": 3.0328493118286133,
|
|
"step": 345
|
|
},
|
|
{
|
|
"epoch": 0.27330173775671407,
|
|
"grad_norm": 10.67307265402947,
|
|
"learning_rate": 9.078947368421054e-06,
|
|
"loss": 3.1137566566467285,
|
|
"step": 346
|
|
},
|
|
{
|
|
"epoch": 0.2740916271721959,
|
|
"grad_norm": 21.521088237334375,
|
|
"learning_rate": 9.105263157894739e-06,
|
|
"loss": 2.418055295944214,
|
|
"step": 347
|
|
},
|
|
{
|
|
"epoch": 0.27488151658767773,
|
|
"grad_norm": 11.029078243075645,
|
|
"learning_rate": 9.131578947368422e-06,
|
|
"loss": 2.902218818664551,
|
|
"step": 348
|
|
},
|
|
{
|
|
"epoch": 0.27567140600315954,
|
|
"grad_norm": 9.383500534244467,
|
|
"learning_rate": 9.157894736842105e-06,
|
|
"loss": 3.282095432281494,
|
|
"step": 349
|
|
},
|
|
{
|
|
"epoch": 0.2764612954186414,
|
|
"grad_norm": 12.131725163736556,
|
|
"learning_rate": 9.18421052631579e-06,
|
|
"loss": 2.7594404220581055,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.2772511848341232,
|
|
"grad_norm": 8.647890182750494,
|
|
"learning_rate": 9.210526315789474e-06,
|
|
"loss": 2.8441665172576904,
|
|
"step": 351
|
|
},
|
|
{
|
|
"epoch": 0.27804107424960506,
|
|
"grad_norm": 24.531984631457014,
|
|
"learning_rate": 9.236842105263159e-06,
|
|
"loss": 2.7135703563690186,
|
|
"step": 352
|
|
},
|
|
{
|
|
"epoch": 0.27883096366508686,
|
|
"grad_norm": 18.852163365198667,
|
|
"learning_rate": 9.263157894736842e-06,
|
|
"loss": 2.604950428009033,
|
|
"step": 353
|
|
},
|
|
{
|
|
"epoch": 0.2796208530805687,
|
|
"grad_norm": 8.864599334587579,
|
|
"learning_rate": 9.289473684210527e-06,
|
|
"loss": 2.4473190307617188,
|
|
"step": 354
|
|
},
|
|
{
|
|
"epoch": 0.2804107424960506,
|
|
"grad_norm": 12.76373613296727,
|
|
"learning_rate": 9.315789473684212e-06,
|
|
"loss": 2.7692112922668457,
|
|
"step": 355
|
|
},
|
|
{
|
|
"epoch": 0.2812006319115324,
|
|
"grad_norm": 13.999019935363314,
|
|
"learning_rate": 9.342105263157895e-06,
|
|
"loss": 2.699820041656494,
|
|
"step": 356
|
|
},
|
|
{
|
|
"epoch": 0.28199052132701424,
|
|
"grad_norm": 12.316515560670386,
|
|
"learning_rate": 9.36842105263158e-06,
|
|
"loss": 2.9352567195892334,
|
|
"step": 357
|
|
},
|
|
{
|
|
"epoch": 0.28278041074249605,
|
|
"grad_norm": 12.51002958910899,
|
|
"learning_rate": 9.394736842105264e-06,
|
|
"loss": 3.0598864555358887,
|
|
"step": 358
|
|
},
|
|
{
|
|
"epoch": 0.2835703001579779,
|
|
"grad_norm": 9.85811587507726,
|
|
"learning_rate": 9.421052631578949e-06,
|
|
"loss": 2.760265588760376,
|
|
"step": 359
|
|
},
|
|
{
|
|
"epoch": 0.2843601895734597,
|
|
"grad_norm": 6.097027548328658,
|
|
"learning_rate": 9.447368421052632e-06,
|
|
"loss": 2.914360761642456,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.28515007898894157,
|
|
"grad_norm": 7.443161559538115,
|
|
"learning_rate": 9.473684210526315e-06,
|
|
"loss": 2.4574689865112305,
|
|
"step": 361
|
|
},
|
|
{
|
|
"epoch": 0.2859399684044234,
|
|
"grad_norm": 11.482751191563155,
|
|
"learning_rate": 9.5e-06,
|
|
"loss": 2.4862399101257324,
|
|
"step": 362
|
|
},
|
|
{
|
|
"epoch": 0.28672985781990523,
|
|
"grad_norm": 15.424578264173912,
|
|
"learning_rate": 9.526315789473684e-06,
|
|
"loss": 3.046543598175049,
|
|
"step": 363
|
|
},
|
|
{
|
|
"epoch": 0.28751974723538704,
|
|
"grad_norm": 9.948975453360674,
|
|
"learning_rate": 9.552631578947369e-06,
|
|
"loss": 2.4152588844299316,
|
|
"step": 364
|
|
},
|
|
{
|
|
"epoch": 0.2883096366508689,
|
|
"grad_norm": 13.617591686696079,
|
|
"learning_rate": 9.578947368421054e-06,
|
|
"loss": 2.736586332321167,
|
|
"step": 365
|
|
},
|
|
{
|
|
"epoch": 0.2890995260663507,
|
|
"grad_norm": 15.14504958752738,
|
|
"learning_rate": 9.605263157894737e-06,
|
|
"loss": 2.4989566802978516,
|
|
"step": 366
|
|
},
|
|
{
|
|
"epoch": 0.28988941548183256,
|
|
"grad_norm": 10.799521972148789,
|
|
"learning_rate": 9.631578947368422e-06,
|
|
"loss": 3.044905424118042,
|
|
"step": 367
|
|
},
|
|
{
|
|
"epoch": 0.29067930489731436,
|
|
"grad_norm": 12.761463042249291,
|
|
"learning_rate": 9.657894736842106e-06,
|
|
"loss": 3.2041683197021484,
|
|
"step": 368
|
|
},
|
|
{
|
|
"epoch": 0.2914691943127962,
|
|
"grad_norm": 12.283682401493921,
|
|
"learning_rate": 9.68421052631579e-06,
|
|
"loss": 2.799464225769043,
|
|
"step": 369
|
|
},
|
|
{
|
|
"epoch": 0.292259083728278,
|
|
"grad_norm": 8.71212269177479,
|
|
"learning_rate": 9.710526315789474e-06,
|
|
"loss": 2.7430920600891113,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.2930489731437599,
|
|
"grad_norm": 9.849085667279017,
|
|
"learning_rate": 9.736842105263159e-06,
|
|
"loss": 2.5595669746398926,
|
|
"step": 371
|
|
},
|
|
{
|
|
"epoch": 0.2938388625592417,
|
|
"grad_norm": 10.009176803122426,
|
|
"learning_rate": 9.763157894736844e-06,
|
|
"loss": 2.3133273124694824,
|
|
"step": 372
|
|
},
|
|
{
|
|
"epoch": 0.29462875197472355,
|
|
"grad_norm": 12.273926407605984,
|
|
"learning_rate": 9.789473684210527e-06,
|
|
"loss": 3.021390438079834,
|
|
"step": 373
|
|
},
|
|
{
|
|
"epoch": 0.29541864139020535,
|
|
"grad_norm": 11.93736332674034,
|
|
"learning_rate": 9.815789473684212e-06,
|
|
"loss": 2.2160844802856445,
|
|
"step": 374
|
|
},
|
|
{
|
|
"epoch": 0.2962085308056872,
|
|
"grad_norm": 12.560377306481286,
|
|
"learning_rate": 9.842105263157896e-06,
|
|
"loss": 3.1752209663391113,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 0.296998420221169,
|
|
"grad_norm": 11.075691497999232,
|
|
"learning_rate": 9.868421052631579e-06,
|
|
"loss": 3.2403969764709473,
|
|
"step": 376
|
|
},
|
|
{
|
|
"epoch": 0.29778830963665087,
|
|
"grad_norm": 9.127329916365918,
|
|
"learning_rate": 9.894736842105264e-06,
|
|
"loss": 2.920243501663208,
|
|
"step": 377
|
|
},
|
|
{
|
|
"epoch": 0.2985781990521327,
|
|
"grad_norm": 20.657276819664517,
|
|
"learning_rate": 9.921052631578947e-06,
|
|
"loss": 2.298133134841919,
|
|
"step": 378
|
|
},
|
|
{
|
|
"epoch": 0.29936808846761453,
|
|
"grad_norm": 11.374882419716124,
|
|
"learning_rate": 9.947368421052632e-06,
|
|
"loss": 3.370616912841797,
|
|
"step": 379
|
|
},
|
|
{
|
|
"epoch": 0.3001579778830964,
|
|
"grad_norm": 62.541878662922215,
|
|
"learning_rate": 9.973684210526316e-06,
|
|
"loss": 2.8304061889648438,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.3009478672985782,
|
|
"grad_norm": 12.325577555726225,
|
|
"learning_rate": 1e-05,
|
|
"loss": 2.761497974395752,
|
|
"step": 381
|
|
},
|
|
{
|
|
"epoch": 0.30173775671406006,
|
|
"grad_norm": 12.217449721945043,
|
|
"learning_rate": 9.999997887991768e-06,
|
|
"loss": 2.8636984825134277,
|
|
"step": 382
|
|
},
|
|
{
|
|
"epoch": 0.30252764612954186,
|
|
"grad_norm": 14.74250753809572,
|
|
"learning_rate": 9.999991551968855e-06,
|
|
"loss": 3.0699851512908936,
|
|
"step": 383
|
|
},
|
|
{
|
|
"epoch": 0.3033175355450237,
|
|
"grad_norm": 10.26093399476213,
|
|
"learning_rate": 9.999980991936614e-06,
|
|
"loss": 3.05659818649292,
|
|
"step": 384
|
|
},
|
|
{
|
|
"epoch": 0.3041074249605055,
|
|
"grad_norm": 9.938077108356492,
|
|
"learning_rate": 9.999966207903965e-06,
|
|
"loss": 2.55549955368042,
|
|
"step": 385
|
|
},
|
|
{
|
|
"epoch": 0.3048973143759874,
|
|
"grad_norm": 15.044911851031989,
|
|
"learning_rate": 9.999947199883402e-06,
|
|
"loss": 3.1818037033081055,
|
|
"step": 386
|
|
},
|
|
{
|
|
"epoch": 0.3056872037914692,
|
|
"grad_norm": 10.290661955924978,
|
|
"learning_rate": 9.999923967890976e-06,
|
|
"loss": 2.8169567584991455,
|
|
"step": 387
|
|
},
|
|
{
|
|
"epoch": 0.30647709320695105,
|
|
"grad_norm": 10.322447186873362,
|
|
"learning_rate": 9.999896511946318e-06,
|
|
"loss": 3.116442918777466,
|
|
"step": 388
|
|
},
|
|
{
|
|
"epoch": 0.30726698262243285,
|
|
"grad_norm": 11.292349498728926,
|
|
"learning_rate": 9.999864832072623e-06,
|
|
"loss": 3.246173858642578,
|
|
"step": 389
|
|
},
|
|
{
|
|
"epoch": 0.3080568720379147,
|
|
"grad_norm": 15.478127685526808,
|
|
"learning_rate": 9.999828928296653e-06,
|
|
"loss": 2.8075480461120605,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.3088467614533965,
|
|
"grad_norm": 11.742069475711396,
|
|
"learning_rate": 9.999788800648741e-06,
|
|
"loss": 2.6594908237457275,
|
|
"step": 391
|
|
},
|
|
{
|
|
"epoch": 0.30963665086887837,
|
|
"grad_norm": 19.69321922275349,
|
|
"learning_rate": 9.999744449162785e-06,
|
|
"loss": 3.7354040145874023,
|
|
"step": 392
|
|
},
|
|
{
|
|
"epoch": 0.3104265402843602,
|
|
"grad_norm": 29.415130395985084,
|
|
"learning_rate": 9.999695873876255e-06,
|
|
"loss": 3.1815226078033447,
|
|
"step": 393
|
|
},
|
|
{
|
|
"epoch": 0.31121642969984203,
|
|
"grad_norm": 9.941742752266487,
|
|
"learning_rate": 9.999643074830185e-06,
|
|
"loss": 2.3940351009368896,
|
|
"step": 394
|
|
},
|
|
{
|
|
"epoch": 0.31200631911532384,
|
|
"grad_norm": 9.210678237061227,
|
|
"learning_rate": 9.999586052069184e-06,
|
|
"loss": 2.998382568359375,
|
|
"step": 395
|
|
},
|
|
{
|
|
"epoch": 0.3127962085308057,
|
|
"grad_norm": 10.901057718421931,
|
|
"learning_rate": 9.99952480564142e-06,
|
|
"loss": 3.067406177520752,
|
|
"step": 396
|
|
},
|
|
{
|
|
"epoch": 0.3135860979462875,
|
|
"grad_norm": 9.78128771641425,
|
|
"learning_rate": 9.999459335598639e-06,
|
|
"loss": 2.52431058883667,
|
|
"step": 397
|
|
},
|
|
{
|
|
"epoch": 0.31437598736176936,
|
|
"grad_norm": 14.851243022076401,
|
|
"learning_rate": 9.999389641996148e-06,
|
|
"loss": 3.1244137287139893,
|
|
"step": 398
|
|
},
|
|
{
|
|
"epoch": 0.31516587677725116,
|
|
"grad_norm": 14.329659712124773,
|
|
"learning_rate": 9.999315724892824e-06,
|
|
"loss": 2.4937219619750977,
|
|
"step": 399
|
|
},
|
|
{
|
|
"epoch": 0.315955766192733,
|
|
"grad_norm": 20.409817077031228,
|
|
"learning_rate": 9.999237584351112e-06,
|
|
"loss": 3.1036581993103027,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.3167456556082148,
|
|
"grad_norm": 14.805880939701334,
|
|
"learning_rate": 9.999155220437027e-06,
|
|
"loss": 3.108419418334961,
|
|
"step": 401
|
|
},
|
|
{
|
|
"epoch": 0.3175355450236967,
|
|
"grad_norm": 14.542870401516955,
|
|
"learning_rate": 9.99906863322015e-06,
|
|
"loss": 2.667811632156372,
|
|
"step": 402
|
|
},
|
|
{
|
|
"epoch": 0.3183254344391785,
|
|
"grad_norm": 12.987665849474283,
|
|
"learning_rate": 9.99897782277363e-06,
|
|
"loss": 2.661196708679199,
|
|
"step": 403
|
|
},
|
|
{
|
|
"epoch": 0.31911532385466035,
|
|
"grad_norm": 6.198659599172178,
|
|
"learning_rate": 9.998882789174182e-06,
|
|
"loss": 2.734131336212158,
|
|
"step": 404
|
|
},
|
|
{
|
|
"epoch": 0.31990521327014215,
|
|
"grad_norm": 9.512514147413063,
|
|
"learning_rate": 9.998783532502094e-06,
|
|
"loss": 3.1050682067871094,
|
|
"step": 405
|
|
},
|
|
{
|
|
"epoch": 0.320695102685624,
|
|
"grad_norm": 12.163533280198134,
|
|
"learning_rate": 9.998680052841217e-06,
|
|
"loss": 3.1977107524871826,
|
|
"step": 406
|
|
},
|
|
{
|
|
"epoch": 0.32148499210110587,
|
|
"grad_norm": 14.380824912116953,
|
|
"learning_rate": 9.99857235027897e-06,
|
|
"loss": 2.267914056777954,
|
|
"step": 407
|
|
},
|
|
{
|
|
"epoch": 0.3222748815165877,
|
|
"grad_norm": 20.641763401030374,
|
|
"learning_rate": 9.998460424906343e-06,
|
|
"loss": 3.1761436462402344,
|
|
"step": 408
|
|
},
|
|
{
|
|
"epoch": 0.32306477093206953,
|
|
"grad_norm": 13.912380925992013,
|
|
"learning_rate": 9.998344276817888e-06,
|
|
"loss": 2.7362403869628906,
|
|
"step": 409
|
|
},
|
|
{
|
|
"epoch": 0.32385466034755134,
|
|
"grad_norm": 19.47995219931616,
|
|
"learning_rate": 9.998223906111728e-06,
|
|
"loss": 2.5798957347869873,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.3246445497630332,
|
|
"grad_norm": 9.554629411150461,
|
|
"learning_rate": 9.998099312889553e-06,
|
|
"loss": 3.0284502506256104,
|
|
"step": 411
|
|
},
|
|
{
|
|
"epoch": 0.325434439178515,
|
|
"grad_norm": 7.846029378625489,
|
|
"learning_rate": 9.997970497256619e-06,
|
|
"loss": 3.201026201248169,
|
|
"step": 412
|
|
},
|
|
{
|
|
"epoch": 0.32622432859399686,
|
|
"grad_norm": 14.900346297835531,
|
|
"learning_rate": 9.997837459321753e-06,
|
|
"loss": 2.7072229385375977,
|
|
"step": 413
|
|
},
|
|
{
|
|
"epoch": 0.32701421800947866,
|
|
"grad_norm": 13.87299677542047,
|
|
"learning_rate": 9.997700199197342e-06,
|
|
"loss": 3.2912824153900146,
|
|
"step": 414
|
|
},
|
|
{
|
|
"epoch": 0.3278041074249605,
|
|
"grad_norm": 9.946081688834646,
|
|
"learning_rate": 9.997558716999345e-06,
|
|
"loss": 2.7330331802368164,
|
|
"step": 415
|
|
},
|
|
{
|
|
"epoch": 0.3285939968404423,
|
|
"grad_norm": 11.42849151295169,
|
|
"learning_rate": 9.997413012847288e-06,
|
|
"loss": 2.7119922637939453,
|
|
"step": 416
|
|
},
|
|
{
|
|
"epoch": 0.3293838862559242,
|
|
"grad_norm": 7.619660611663469,
|
|
"learning_rate": 9.997263086864261e-06,
|
|
"loss": 2.9227335453033447,
|
|
"step": 417
|
|
},
|
|
{
|
|
"epoch": 0.330173775671406,
|
|
"grad_norm": 8.897091541230195,
|
|
"learning_rate": 9.997108939176923e-06,
|
|
"loss": 2.9391653537750244,
|
|
"step": 418
|
|
},
|
|
{
|
|
"epoch": 0.33096366508688785,
|
|
"grad_norm": 12.48321978059039,
|
|
"learning_rate": 9.996950569915496e-06,
|
|
"loss": 2.7242140769958496,
|
|
"step": 419
|
|
},
|
|
{
|
|
"epoch": 0.33175355450236965,
|
|
"grad_norm": 7.856468794186075,
|
|
"learning_rate": 9.996787979213774e-06,
|
|
"loss": 2.9598379135131836,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.3325434439178515,
|
|
"grad_norm": 6.881298773384626,
|
|
"learning_rate": 9.996621167209112e-06,
|
|
"loss": 2.7800540924072266,
|
|
"step": 421
|
|
},
|
|
{
|
|
"epoch": 0.3333333333333333,
|
|
"grad_norm": 8.554885683292087,
|
|
"learning_rate": 9.996450134042435e-06,
|
|
"loss": 3.363186836242676,
|
|
"step": 422
|
|
},
|
|
{
|
|
"epoch": 0.3341232227488152,
|
|
"grad_norm": 17.067634709237346,
|
|
"learning_rate": 9.99627487985823e-06,
|
|
"loss": 2.5413265228271484,
|
|
"step": 423
|
|
},
|
|
{
|
|
"epoch": 0.334913112164297,
|
|
"grad_norm": 7.715891787574891,
|
|
"learning_rate": 9.996095404804556e-06,
|
|
"loss": 2.6663031578063965,
|
|
"step": 424
|
|
},
|
|
{
|
|
"epoch": 0.33570300157977884,
|
|
"grad_norm": 12.33434757520458,
|
|
"learning_rate": 9.99591170903303e-06,
|
|
"loss": 3.798855781555176,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 0.33649289099526064,
|
|
"grad_norm": 12.051508519336322,
|
|
"learning_rate": 9.995723792698841e-06,
|
|
"loss": 2.8377747535705566,
|
|
"step": 426
|
|
},
|
|
{
|
|
"epoch": 0.3372827804107425,
|
|
"grad_norm": 15.639973760945276,
|
|
"learning_rate": 9.995531655960742e-06,
|
|
"loss": 2.916019916534424,
|
|
"step": 427
|
|
},
|
|
{
|
|
"epoch": 0.3380726698262243,
|
|
"grad_norm": 19.40003707121016,
|
|
"learning_rate": 9.995335298981051e-06,
|
|
"loss": 3.0988028049468994,
|
|
"step": 428
|
|
},
|
|
{
|
|
"epoch": 0.33886255924170616,
|
|
"grad_norm": 8.979641220891013,
|
|
"learning_rate": 9.995134721925647e-06,
|
|
"loss": 3.1492342948913574,
|
|
"step": 429
|
|
},
|
|
{
|
|
"epoch": 0.33965244865718797,
|
|
"grad_norm": 21.875396313167574,
|
|
"learning_rate": 9.994929924963982e-06,
|
|
"loss": 2.8157858848571777,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.3404423380726698,
|
|
"grad_norm": 10.410663939939454,
|
|
"learning_rate": 9.99472090826907e-06,
|
|
"loss": 2.898981809616089,
|
|
"step": 431
|
|
},
|
|
{
|
|
"epoch": 0.3412322274881517,
|
|
"grad_norm": 7.870464638075447,
|
|
"learning_rate": 9.994507672017483e-06,
|
|
"loss": 2.815958023071289,
|
|
"step": 432
|
|
},
|
|
{
|
|
"epoch": 0.3420221169036335,
|
|
"grad_norm": 10.074834498515466,
|
|
"learning_rate": 9.99429021638937e-06,
|
|
"loss": 2.8587734699249268,
|
|
"step": 433
|
|
},
|
|
{
|
|
"epoch": 0.34281200631911535,
|
|
"grad_norm": 9.479047537805325,
|
|
"learning_rate": 9.994068541568439e-06,
|
|
"loss": 2.758235216140747,
|
|
"step": 434
|
|
},
|
|
{
|
|
"epoch": 0.34360189573459715,
|
|
"grad_norm": 11.805213339073592,
|
|
"learning_rate": 9.993842647741955e-06,
|
|
"loss": 2.754237174987793,
|
|
"step": 435
|
|
},
|
|
{
|
|
"epoch": 0.344391785150079,
|
|
"grad_norm": 28.89113952525437,
|
|
"learning_rate": 9.993612535100759e-06,
|
|
"loss": 2.8837943077087402,
|
|
"step": 436
|
|
},
|
|
{
|
|
"epoch": 0.3451816745655608,
|
|
"grad_norm": 11.902001596459298,
|
|
"learning_rate": 9.993378203839248e-06,
|
|
"loss": 2.9341726303100586,
|
|
"step": 437
|
|
},
|
|
{
|
|
"epoch": 0.3459715639810427,
|
|
"grad_norm": 41.81687674626728,
|
|
"learning_rate": 9.99313965415539e-06,
|
|
"loss": 2.7734274864196777,
|
|
"step": 438
|
|
},
|
|
{
|
|
"epoch": 0.3467614533965245,
|
|
"grad_norm": 24.406866447867205,
|
|
"learning_rate": 9.992896886250708e-06,
|
|
"loss": 3.654956817626953,
|
|
"step": 439
|
|
},
|
|
{
|
|
"epoch": 0.34755134281200634,
|
|
"grad_norm": 9.751754855929297,
|
|
"learning_rate": 9.992649900330295e-06,
|
|
"loss": 2.4842309951782227,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.34834123222748814,
|
|
"grad_norm": 9.90576591780518,
|
|
"learning_rate": 9.992398696602805e-06,
|
|
"loss": 2.8286516666412354,
|
|
"step": 441
|
|
},
|
|
{
|
|
"epoch": 0.34913112164297,
|
|
"grad_norm": 15.419437091625719,
|
|
"learning_rate": 9.992143275280458e-06,
|
|
"loss": 2.875483989715576,
|
|
"step": 442
|
|
},
|
|
{
|
|
"epoch": 0.3499210110584518,
|
|
"grad_norm": 19.78870286772306,
|
|
"learning_rate": 9.991883636579031e-06,
|
|
"loss": 2.9084625244140625,
|
|
"step": 443
|
|
},
|
|
{
|
|
"epoch": 0.35071090047393366,
|
|
"grad_norm": 12.586490948950289,
|
|
"learning_rate": 9.99161978071787e-06,
|
|
"loss": 3.2318763732910156,
|
|
"step": 444
|
|
},
|
|
{
|
|
"epoch": 0.35150078988941547,
|
|
"grad_norm": 14.996840973011029,
|
|
"learning_rate": 9.991351707919881e-06,
|
|
"loss": 2.615654468536377,
|
|
"step": 445
|
|
},
|
|
{
|
|
"epoch": 0.3522906793048973,
|
|
"grad_norm": 18.69675800078161,
|
|
"learning_rate": 9.991079418411534e-06,
|
|
"loss": 2.594451427459717,
|
|
"step": 446
|
|
},
|
|
{
|
|
"epoch": 0.35308056872037913,
|
|
"grad_norm": 12.66669407216749,
|
|
"learning_rate": 9.990802912422857e-06,
|
|
"loss": 2.9762067794799805,
|
|
"step": 447
|
|
},
|
|
{
|
|
"epoch": 0.353870458135861,
|
|
"grad_norm": 6.37331509167863,
|
|
"learning_rate": 9.990522190187446e-06,
|
|
"loss": 2.9721593856811523,
|
|
"step": 448
|
|
},
|
|
{
|
|
"epoch": 0.3546603475513428,
|
|
"grad_norm": 12.731115361238068,
|
|
"learning_rate": 9.990237251942455e-06,
|
|
"loss": 3.0602238178253174,
|
|
"step": 449
|
|
},
|
|
{
|
|
"epoch": 0.35545023696682465,
|
|
"grad_norm": 10.595337761744732,
|
|
"learning_rate": 9.9899480979286e-06,
|
|
"loss": 2.5146780014038086,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.35624012638230645,
|
|
"grad_norm": 9.131974465956034,
|
|
"learning_rate": 9.98965472839016e-06,
|
|
"loss": 2.864349842071533,
|
|
"step": 451
|
|
},
|
|
{
|
|
"epoch": 0.3570300157977883,
|
|
"grad_norm": 9.313334991197689,
|
|
"learning_rate": 9.989357143574977e-06,
|
|
"loss": 3.1569554805755615,
|
|
"step": 452
|
|
},
|
|
{
|
|
"epoch": 0.3578199052132701,
|
|
"grad_norm": 9.952415070876715,
|
|
"learning_rate": 9.989055343734446e-06,
|
|
"loss": 2.114063262939453,
|
|
"step": 453
|
|
},
|
|
{
|
|
"epoch": 0.358609794628752,
|
|
"grad_norm": 8.442614576050127,
|
|
"learning_rate": 9.988749329123532e-06,
|
|
"loss": 2.6696736812591553,
|
|
"step": 454
|
|
},
|
|
{
|
|
"epoch": 0.3593996840442338,
|
|
"grad_norm": 12.468144651927648,
|
|
"learning_rate": 9.988439100000758e-06,
|
|
"loss": 2.951082468032837,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 0.36018957345971564,
|
|
"grad_norm": 13.301749174192217,
|
|
"learning_rate": 9.988124656628205e-06,
|
|
"loss": 2.8643898963928223,
|
|
"step": 456
|
|
},
|
|
{
|
|
"epoch": 0.3609794628751975,
|
|
"grad_norm": 51.815990840364165,
|
|
"learning_rate": 9.987805999271517e-06,
|
|
"loss": 2.082789421081543,
|
|
"step": 457
|
|
},
|
|
{
|
|
"epoch": 0.3617693522906793,
|
|
"grad_norm": 14.907621177805696,
|
|
"learning_rate": 9.987483128199896e-06,
|
|
"loss": 2.7254719734191895,
|
|
"step": 458
|
|
},
|
|
{
|
|
"epoch": 0.36255924170616116,
|
|
"grad_norm": 11.168139294706775,
|
|
"learning_rate": 9.987156043686103e-06,
|
|
"loss": 2.920536994934082,
|
|
"step": 459
|
|
},
|
|
{
|
|
"epoch": 0.36334913112164297,
|
|
"grad_norm": 14.696725014468187,
|
|
"learning_rate": 9.986824746006463e-06,
|
|
"loss": 2.5195441246032715,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.3641390205371248,
|
|
"grad_norm": 13.759499455594858,
|
|
"learning_rate": 9.986489235440858e-06,
|
|
"loss": 3.0576400756835938,
|
|
"step": 461
|
|
},
|
|
{
|
|
"epoch": 0.36492890995260663,
|
|
"grad_norm": 24.22268015695568,
|
|
"learning_rate": 9.986149512272723e-06,
|
|
"loss": 2.798210382461548,
|
|
"step": 462
|
|
},
|
|
{
|
|
"epoch": 0.3657187993680885,
|
|
"grad_norm": 7.164355232959107,
|
|
"learning_rate": 9.985805576789061e-06,
|
|
"loss": 3.2072739601135254,
|
|
"step": 463
|
|
},
|
|
{
|
|
"epoch": 0.3665086887835703,
|
|
"grad_norm": 21.200711435446056,
|
|
"learning_rate": 9.985457429280431e-06,
|
|
"loss": 2.2786130905151367,
|
|
"step": 464
|
|
},
|
|
{
|
|
"epoch": 0.36729857819905215,
|
|
"grad_norm": 10.245055778605597,
|
|
"learning_rate": 9.985105070040948e-06,
|
|
"loss": 2.6123218536376953,
|
|
"step": 465
|
|
},
|
|
{
|
|
"epoch": 0.36808846761453395,
|
|
"grad_norm": 11.013361642571633,
|
|
"learning_rate": 9.984748499368285e-06,
|
|
"loss": 2.8954989910125732,
|
|
"step": 466
|
|
},
|
|
{
|
|
"epoch": 0.3688783570300158,
|
|
"grad_norm": 11.61804983156601,
|
|
"learning_rate": 9.984387717563675e-06,
|
|
"loss": 2.308267116546631,
|
|
"step": 467
|
|
},
|
|
{
|
|
"epoch": 0.3696682464454976,
|
|
"grad_norm": 19.221327600533158,
|
|
"learning_rate": 9.984022724931908e-06,
|
|
"loss": 3.475597381591797,
|
|
"step": 468
|
|
},
|
|
{
|
|
"epoch": 0.3704581358609795,
|
|
"grad_norm": 11.854231560542186,
|
|
"learning_rate": 9.98365352178133e-06,
|
|
"loss": 3.1217355728149414,
|
|
"step": 469
|
|
},
|
|
{
|
|
"epoch": 0.3712480252764613,
|
|
"grad_norm": 6.403968704450684,
|
|
"learning_rate": 9.983280108423846e-06,
|
|
"loss": 3.116569995880127,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.37203791469194314,
|
|
"grad_norm": 9.683074091826104,
|
|
"learning_rate": 9.982902485174917e-06,
|
|
"loss": 2.748913526535034,
|
|
"step": 471
|
|
},
|
|
{
|
|
"epoch": 0.37282780410742494,
|
|
"grad_norm": 45.29403837946451,
|
|
"learning_rate": 9.98252065235356e-06,
|
|
"loss": 2.734304428100586,
|
|
"step": 472
|
|
},
|
|
{
|
|
"epoch": 0.3736176935229068,
|
|
"grad_norm": 8.629169803711216,
|
|
"learning_rate": 9.982134610282348e-06,
|
|
"loss": 2.9549429416656494,
|
|
"step": 473
|
|
},
|
|
{
|
|
"epoch": 0.3744075829383886,
|
|
"grad_norm": 11.378526346658687,
|
|
"learning_rate": 9.98174435928741e-06,
|
|
"loss": 3.1600706577301025,
|
|
"step": 474
|
|
},
|
|
{
|
|
"epoch": 0.37519747235387046,
|
|
"grad_norm": 13.877969372280013,
|
|
"learning_rate": 9.981349899698433e-06,
|
|
"loss": 3.0187835693359375,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 0.37598736176935227,
|
|
"grad_norm": 16.082987857555008,
|
|
"learning_rate": 9.98095123184866e-06,
|
|
"loss": 2.525953769683838,
|
|
"step": 476
|
|
},
|
|
{
|
|
"epoch": 0.3767772511848341,
|
|
"grad_norm": 15.97178916944899,
|
|
"learning_rate": 9.980548356074882e-06,
|
|
"loss": 2.4600391387939453,
|
|
"step": 477
|
|
},
|
|
{
|
|
"epoch": 0.37756714060031593,
|
|
"grad_norm": 11.222135044819899,
|
|
"learning_rate": 9.980141272717451e-06,
|
|
"loss": 3.0577778816223145,
|
|
"step": 478
|
|
},
|
|
{
|
|
"epoch": 0.3783570300157978,
|
|
"grad_norm": 10.38398589216634,
|
|
"learning_rate": 9.979729982120274e-06,
|
|
"loss": 2.5604796409606934,
|
|
"step": 479
|
|
},
|
|
{
|
|
"epoch": 0.3791469194312796,
|
|
"grad_norm": 28.714703496966944,
|
|
"learning_rate": 9.979314484630812e-06,
|
|
"loss": 2.344428300857544,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.37993680884676145,
|
|
"grad_norm": 8.421863202990965,
|
|
"learning_rate": 9.978894780600072e-06,
|
|
"loss": 2.434558391571045,
|
|
"step": 481
|
|
},
|
|
{
|
|
"epoch": 0.3807266982622433,
|
|
"grad_norm": 8.802465948474383,
|
|
"learning_rate": 9.978470870382631e-06,
|
|
"loss": 2.9265592098236084,
|
|
"step": 482
|
|
},
|
|
{
|
|
"epoch": 0.3815165876777251,
|
|
"grad_norm": 16.360909810195515,
|
|
"learning_rate": 9.9780427543366e-06,
|
|
"loss": 2.2389657497406006,
|
|
"step": 483
|
|
},
|
|
{
|
|
"epoch": 0.382306477093207,
|
|
"grad_norm": 12.714530836347008,
|
|
"learning_rate": 9.977610432823661e-06,
|
|
"loss": 2.518057346343994,
|
|
"step": 484
|
|
},
|
|
{
|
|
"epoch": 0.3830963665086888,
|
|
"grad_norm": 12.593833443055948,
|
|
"learning_rate": 9.977173906209035e-06,
|
|
"loss": 2.7258565425872803,
|
|
"step": 485
|
|
},
|
|
{
|
|
"epoch": 0.38388625592417064,
|
|
"grad_norm": 12.935614043323996,
|
|
"learning_rate": 9.976733174861504e-06,
|
|
"loss": 2.57004451751709,
|
|
"step": 486
|
|
},
|
|
{
|
|
"epoch": 0.38467614533965244,
|
|
"grad_norm": 6.953226645670275,
|
|
"learning_rate": 9.9762882391534e-06,
|
|
"loss": 2.808042526245117,
|
|
"step": 487
|
|
},
|
|
{
|
|
"epoch": 0.3854660347551343,
|
|
"grad_norm": 7.174411200974892,
|
|
"learning_rate": 9.975839099460603e-06,
|
|
"loss": 2.4963083267211914,
|
|
"step": 488
|
|
},
|
|
{
|
|
"epoch": 0.3862559241706161,
|
|
"grad_norm": 8.613913754322418,
|
|
"learning_rate": 9.97538575616255e-06,
|
|
"loss": 2.676271438598633,
|
|
"step": 489
|
|
},
|
|
{
|
|
"epoch": 0.38704581358609796,
|
|
"grad_norm": 11.876597542420834,
|
|
"learning_rate": 9.974928209642225e-06,
|
|
"loss": 2.7614307403564453,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.38783570300157977,
|
|
"grad_norm": 11.093132079984253,
|
|
"learning_rate": 9.974466460286168e-06,
|
|
"loss": 2.624708414077759,
|
|
"step": 491
|
|
},
|
|
{
|
|
"epoch": 0.3886255924170616,
|
|
"grad_norm": 10.27538264079467,
|
|
"learning_rate": 9.974000508484464e-06,
|
|
"loss": 2.5740928649902344,
|
|
"step": 492
|
|
},
|
|
{
|
|
"epoch": 0.38941548183254343,
|
|
"grad_norm": 12.05113290396417,
|
|
"learning_rate": 9.97353035463075e-06,
|
|
"loss": 3.132807731628418,
|
|
"step": 493
|
|
},
|
|
{
|
|
"epoch": 0.3902053712480253,
|
|
"grad_norm": 10.616071459580557,
|
|
"learning_rate": 9.973055999122217e-06,
|
|
"loss": 3.1886236667633057,
|
|
"step": 494
|
|
},
|
|
{
|
|
"epoch": 0.3909952606635071,
|
|
"grad_norm": 7.166433394997447,
|
|
"learning_rate": 9.972577442359596e-06,
|
|
"loss": 2.458066463470459,
|
|
"step": 495
|
|
},
|
|
{
|
|
"epoch": 0.39178515007898895,
|
|
"grad_norm": 18.273152599081556,
|
|
"learning_rate": 9.97209468474718e-06,
|
|
"loss": 2.686516761779785,
|
|
"step": 496
|
|
},
|
|
{
|
|
"epoch": 0.39257503949447076,
|
|
"grad_norm": 18.046414595846677,
|
|
"learning_rate": 9.9716077266928e-06,
|
|
"loss": 3.268564224243164,
|
|
"step": 497
|
|
},
|
|
{
|
|
"epoch": 0.3933649289099526,
|
|
"grad_norm": 20.306060211042794,
|
|
"learning_rate": 9.971116568607843e-06,
|
|
"loss": 2.7214527130126953,
|
|
"step": 498
|
|
},
|
|
{
|
|
"epoch": 0.3941548183254344,
|
|
"grad_norm": 12.50174696099143,
|
|
"learning_rate": 9.970621210907236e-06,
|
|
"loss": 2.9584507942199707,
|
|
"step": 499
|
|
},
|
|
{
|
|
"epoch": 0.3949447077409163,
|
|
"grad_norm": 12.497953323135684,
|
|
"learning_rate": 9.970121654009464e-06,
|
|
"loss": 2.7275800704956055,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.3957345971563981,
|
|
"grad_norm": 8.96830588462741,
|
|
"learning_rate": 9.969617898336552e-06,
|
|
"loss": 2.4311466217041016,
|
|
"step": 501
|
|
},
|
|
{
|
|
"epoch": 0.39652448657187994,
|
|
"grad_norm": 13.616711842115256,
|
|
"learning_rate": 9.969109944314075e-06,
|
|
"loss": 2.9500246047973633,
|
|
"step": 502
|
|
},
|
|
{
|
|
"epoch": 0.39731437598736175,
|
|
"grad_norm": 17.251138991228274,
|
|
"learning_rate": 9.968597792371151e-06,
|
|
"loss": 3.416146755218506,
|
|
"step": 503
|
|
},
|
|
{
|
|
"epoch": 0.3981042654028436,
|
|
"grad_norm": 19.00639226186363,
|
|
"learning_rate": 9.968081442940454e-06,
|
|
"loss": 3.451007604598999,
|
|
"step": 504
|
|
},
|
|
{
|
|
"epoch": 0.3988941548183254,
|
|
"grad_norm": 7.711636407435598,
|
|
"learning_rate": 9.967560896458192e-06,
|
|
"loss": 2.7228689193725586,
|
|
"step": 505
|
|
},
|
|
{
|
|
"epoch": 0.39968404423380727,
|
|
"grad_norm": 12.583460827994319,
|
|
"learning_rate": 9.967036153364127e-06,
|
|
"loss": 2.8506970405578613,
|
|
"step": 506
|
|
},
|
|
{
|
|
"epoch": 0.4004739336492891,
|
|
"grad_norm": 12.221260607836053,
|
|
"learning_rate": 9.966507214101565e-06,
|
|
"loss": 2.9692885875701904,
|
|
"step": 507
|
|
},
|
|
{
|
|
"epoch": 0.40126382306477093,
|
|
"grad_norm": 12.905892162067822,
|
|
"learning_rate": 9.965974079117351e-06,
|
|
"loss": 3.444052219390869,
|
|
"step": 508
|
|
},
|
|
{
|
|
"epoch": 0.4020537124802528,
|
|
"grad_norm": 11.169864619254174,
|
|
"learning_rate": 9.965436748861883e-06,
|
|
"loss": 3.00361967086792,
|
|
"step": 509
|
|
},
|
|
{
|
|
"epoch": 0.4028436018957346,
|
|
"grad_norm": 18.923855282243036,
|
|
"learning_rate": 9.9648952237891e-06,
|
|
"loss": 2.179131507873535,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.40363349131121645,
|
|
"grad_norm": 17.18325282035086,
|
|
"learning_rate": 9.964349504356481e-06,
|
|
"loss": 2.724170446395874,
|
|
"step": 511
|
|
},
|
|
{
|
|
"epoch": 0.40442338072669826,
|
|
"grad_norm": 9.348305791417955,
|
|
"learning_rate": 9.963799591025054e-06,
|
|
"loss": 2.658226490020752,
|
|
"step": 512
|
|
},
|
|
{
|
|
"epoch": 0.4052132701421801,
|
|
"grad_norm": 14.084370825683834,
|
|
"learning_rate": 9.963245484259384e-06,
|
|
"loss": 3.301179885864258,
|
|
"step": 513
|
|
},
|
|
{
|
|
"epoch": 0.4060031595576619,
|
|
"grad_norm": 9.131570579938595,
|
|
"learning_rate": 9.96268718452759e-06,
|
|
"loss": 2.7031455039978027,
|
|
"step": 514
|
|
},
|
|
{
|
|
"epoch": 0.4067930489731438,
|
|
"grad_norm": 7.293530938243252,
|
|
"learning_rate": 9.962124692301315e-06,
|
|
"loss": 2.310668468475342,
|
|
"step": 515
|
|
},
|
|
{
|
|
"epoch": 0.4075829383886256,
|
|
"grad_norm": 28.464034341123952,
|
|
"learning_rate": 9.961558008055764e-06,
|
|
"loss": 2.6063344478607178,
|
|
"step": 516
|
|
},
|
|
{
|
|
"epoch": 0.40837282780410744,
|
|
"grad_norm": 7.628841921327853,
|
|
"learning_rate": 9.960987132269668e-06,
|
|
"loss": 2.0414226055145264,
|
|
"step": 517
|
|
},
|
|
{
|
|
"epoch": 0.40916271721958924,
|
|
"grad_norm": 14.44432111132087,
|
|
"learning_rate": 9.960412065425308e-06,
|
|
"loss": 2.770200252532959,
|
|
"step": 518
|
|
},
|
|
{
|
|
"epoch": 0.4099526066350711,
|
|
"grad_norm": 11.901956808416232,
|
|
"learning_rate": 9.959832808008498e-06,
|
|
"loss": 2.8997509479522705,
|
|
"step": 519
|
|
},
|
|
{
|
|
"epoch": 0.4107424960505529,
|
|
"grad_norm": 14.40790924911134,
|
|
"learning_rate": 9.959249360508598e-06,
|
|
"loss": 2.9758782386779785,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.41153238546603477,
|
|
"grad_norm": 9.725811994478915,
|
|
"learning_rate": 9.95866172341851e-06,
|
|
"loss": 2.986323356628418,
|
|
"step": 521
|
|
},
|
|
{
|
|
"epoch": 0.41232227488151657,
|
|
"grad_norm": 16.22979712148631,
|
|
"learning_rate": 9.95806989723467e-06,
|
|
"loss": 2.5895464420318604,
|
|
"step": 522
|
|
},
|
|
{
|
|
"epoch": 0.41311216429699843,
|
|
"grad_norm": 13.923489867833995,
|
|
"learning_rate": 9.957473882457051e-06,
|
|
"loss": 2.687991142272949,
|
|
"step": 523
|
|
},
|
|
{
|
|
"epoch": 0.41390205371248023,
|
|
"grad_norm": 26.95251995610371,
|
|
"learning_rate": 9.956873679589173e-06,
|
|
"loss": 2.4715166091918945,
|
|
"step": 524
|
|
},
|
|
{
|
|
"epoch": 0.4146919431279621,
|
|
"grad_norm": 14.00634199685074,
|
|
"learning_rate": 9.956269289138088e-06,
|
|
"loss": 2.5624163150787354,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 0.4154818325434439,
|
|
"grad_norm": 29.26485590244888,
|
|
"learning_rate": 9.955660711614386e-06,
|
|
"loss": 2.6949751377105713,
|
|
"step": 526
|
|
},
|
|
{
|
|
"epoch": 0.41627172195892576,
|
|
"grad_norm": 13.78447550231186,
|
|
"learning_rate": 9.955047947532194e-06,
|
|
"loss": 3.0492568016052246,
|
|
"step": 527
|
|
},
|
|
{
|
|
"epoch": 0.41706161137440756,
|
|
"grad_norm": 9.027477437625507,
|
|
"learning_rate": 9.954430997409181e-06,
|
|
"loss": 3.8118910789489746,
|
|
"step": 528
|
|
},
|
|
{
|
|
"epoch": 0.4178515007898894,
|
|
"grad_norm": 8.039786133247507,
|
|
"learning_rate": 9.953809861766547e-06,
|
|
"loss": 3.058897018432617,
|
|
"step": 529
|
|
},
|
|
{
|
|
"epoch": 0.4186413902053712,
|
|
"grad_norm": 16.995294036547257,
|
|
"learning_rate": 9.953184541129029e-06,
|
|
"loss": 3.157442808151245,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.4194312796208531,
|
|
"grad_norm": 14.009340053986007,
|
|
"learning_rate": 9.952555036024898e-06,
|
|
"loss": 2.9258034229278564,
|
|
"step": 531
|
|
},
|
|
{
|
|
"epoch": 0.42022116903633494,
|
|
"grad_norm": 12.583256188813682,
|
|
"learning_rate": 9.951921346985966e-06,
|
|
"loss": 2.772176742553711,
|
|
"step": 532
|
|
},
|
|
{
|
|
"epoch": 0.42101105845181674,
|
|
"grad_norm": 14.778680678932332,
|
|
"learning_rate": 9.951283474547574e-06,
|
|
"loss": 3.1442911624908447,
|
|
"step": 533
|
|
},
|
|
{
|
|
"epoch": 0.4218009478672986,
|
|
"grad_norm": 10.191686358833875,
|
|
"learning_rate": 9.950641419248595e-06,
|
|
"loss": 2.6074397563934326,
|
|
"step": 534
|
|
},
|
|
{
|
|
"epoch": 0.4225908372827804,
|
|
"grad_norm": 13.596508027495249,
|
|
"learning_rate": 9.949995181631444e-06,
|
|
"loss": 2.861325740814209,
|
|
"step": 535
|
|
},
|
|
{
|
|
"epoch": 0.42338072669826227,
|
|
"grad_norm": 16.87570821527581,
|
|
"learning_rate": 9.949344762242064e-06,
|
|
"loss": 2.9847991466522217,
|
|
"step": 536
|
|
},
|
|
{
|
|
"epoch": 0.42417061611374407,
|
|
"grad_norm": 15.098978095560872,
|
|
"learning_rate": 9.94869016162993e-06,
|
|
"loss": 3.360105037689209,
|
|
"step": 537
|
|
},
|
|
{
|
|
"epoch": 0.42496050552922593,
|
|
"grad_norm": 11.916401679971019,
|
|
"learning_rate": 9.948031380348051e-06,
|
|
"loss": 2.6311533451080322,
|
|
"step": 538
|
|
},
|
|
{
|
|
"epoch": 0.42575039494470773,
|
|
"grad_norm": 15.393668192114527,
|
|
"learning_rate": 9.94736841895297e-06,
|
|
"loss": 2.572305202484131,
|
|
"step": 539
|
|
},
|
|
{
|
|
"epoch": 0.4265402843601896,
|
|
"grad_norm": 21.88619737730292,
|
|
"learning_rate": 9.946701278004755e-06,
|
|
"loss": 2.6645431518554688,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.4273301737756714,
|
|
"grad_norm": 14.623832761629139,
|
|
"learning_rate": 9.946029958067012e-06,
|
|
"loss": 2.8375582695007324,
|
|
"step": 541
|
|
},
|
|
{
|
|
"epoch": 0.42812006319115326,
|
|
"grad_norm": 11.959588905376497,
|
|
"learning_rate": 9.945354459706873e-06,
|
|
"loss": 2.8177828788757324,
|
|
"step": 542
|
|
},
|
|
{
|
|
"epoch": 0.42890995260663506,
|
|
"grad_norm": 11.801706214734535,
|
|
"learning_rate": 9.944674783495e-06,
|
|
"loss": 3.4021530151367188,
|
|
"step": 543
|
|
},
|
|
{
|
|
"epoch": 0.4296998420221169,
|
|
"grad_norm": 8.145218059279367,
|
|
"learning_rate": 9.94399093000559e-06,
|
|
"loss": 2.4974822998046875,
|
|
"step": 544
|
|
},
|
|
{
|
|
"epoch": 0.4304897314375987,
|
|
"grad_norm": 9.708825005813761,
|
|
"learning_rate": 9.94330289981636e-06,
|
|
"loss": 2.775845527648926,
|
|
"step": 545
|
|
},
|
|
{
|
|
"epoch": 0.4312796208530806,
|
|
"grad_norm": 22.963925015075688,
|
|
"learning_rate": 9.942610693508564e-06,
|
|
"loss": 2.5559940338134766,
|
|
"step": 546
|
|
},
|
|
{
|
|
"epoch": 0.4320695102685624,
|
|
"grad_norm": 9.353349856037912,
|
|
"learning_rate": 9.941914311666976e-06,
|
|
"loss": 2.7999205589294434,
|
|
"step": 547
|
|
},
|
|
{
|
|
"epoch": 0.43285939968404424,
|
|
"grad_norm": 13.107963046441109,
|
|
"learning_rate": 9.941213754879904e-06,
|
|
"loss": 2.6367478370666504,
|
|
"step": 548
|
|
},
|
|
{
|
|
"epoch": 0.43364928909952605,
|
|
"grad_norm": 9.146316137469308,
|
|
"learning_rate": 9.940509023739181e-06,
|
|
"loss": 2.6994175910949707,
|
|
"step": 549
|
|
},
|
|
{
|
|
"epoch": 0.4344391785150079,
|
|
"grad_norm": 6.883523518701926,
|
|
"learning_rate": 9.939800118840167e-06,
|
|
"loss": 2.807130813598633,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.4352290679304897,
|
|
"grad_norm": 9.612172104441717,
|
|
"learning_rate": 9.939087040781743e-06,
|
|
"loss": 2.729193687438965,
|
|
"step": 551
|
|
},
|
|
{
|
|
"epoch": 0.43601895734597157,
|
|
"grad_norm": 24.67008747020927,
|
|
"learning_rate": 9.938369790166325e-06,
|
|
"loss": 2.557534694671631,
|
|
"step": 552
|
|
},
|
|
{
|
|
"epoch": 0.4368088467614534,
|
|
"grad_norm": 8.054347498191312,
|
|
"learning_rate": 9.937648367599845e-06,
|
|
"loss": 2.8205268383026123,
|
|
"step": 553
|
|
},
|
|
{
|
|
"epoch": 0.43759873617693523,
|
|
"grad_norm": 14.422963472461976,
|
|
"learning_rate": 9.936922773691764e-06,
|
|
"loss": 2.7715141773223877,
|
|
"step": 554
|
|
},
|
|
{
|
|
"epoch": 0.43838862559241704,
|
|
"grad_norm": 7.484053892132038,
|
|
"learning_rate": 9.93619300905507e-06,
|
|
"loss": 2.254258155822754,
|
|
"step": 555
|
|
},
|
|
{
|
|
"epoch": 0.4391785150078989,
|
|
"grad_norm": 11.183683079441906,
|
|
"learning_rate": 9.935459074306261e-06,
|
|
"loss": 2.682985544204712,
|
|
"step": 556
|
|
},
|
|
{
|
|
"epoch": 0.4399684044233807,
|
|
"grad_norm": 9.16127826816926,
|
|
"learning_rate": 9.934720970065379e-06,
|
|
"loss": 2.539468288421631,
|
|
"step": 557
|
|
},
|
|
{
|
|
"epoch": 0.44075829383886256,
|
|
"grad_norm": 7.141623413765421,
|
|
"learning_rate": 9.93397869695597e-06,
|
|
"loss": 2.5426435470581055,
|
|
"step": 558
|
|
},
|
|
{
|
|
"epoch": 0.4415481832543444,
|
|
"grad_norm": 10.23954487534604,
|
|
"learning_rate": 9.93323225560511e-06,
|
|
"loss": 3.2652475833892822,
|
|
"step": 559
|
|
},
|
|
{
|
|
"epoch": 0.4423380726698262,
|
|
"grad_norm": 16.167362326761815,
|
|
"learning_rate": 9.932481646643395e-06,
|
|
"loss": 2.5560061931610107,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.4431279620853081,
|
|
"grad_norm": 24.50508286675025,
|
|
"learning_rate": 9.931726870704943e-06,
|
|
"loss": 2.4910902976989746,
|
|
"step": 561
|
|
},
|
|
{
|
|
"epoch": 0.4439178515007899,
|
|
"grad_norm": 13.272899493408493,
|
|
"learning_rate": 9.930967928427389e-06,
|
|
"loss": 3.1928012371063232,
|
|
"step": 562
|
|
},
|
|
{
|
|
"epoch": 0.44470774091627174,
|
|
"grad_norm": 11.029176665335946,
|
|
"learning_rate": 9.930204820451892e-06,
|
|
"loss": 2.045280933380127,
|
|
"step": 563
|
|
},
|
|
{
|
|
"epoch": 0.44549763033175355,
|
|
"grad_norm": 19.40400206586639,
|
|
"learning_rate": 9.92943754742313e-06,
|
|
"loss": 2.734166145324707,
|
|
"step": 564
|
|
},
|
|
{
|
|
"epoch": 0.4462875197472354,
|
|
"grad_norm": 7.706073509198339,
|
|
"learning_rate": 9.928666109989294e-06,
|
|
"loss": 2.8022024631500244,
|
|
"step": 565
|
|
},
|
|
{
|
|
"epoch": 0.4470774091627172,
|
|
"grad_norm": 14.62203101954205,
|
|
"learning_rate": 9.927890508802096e-06,
|
|
"loss": 2.7379016876220703,
|
|
"step": 566
|
|
},
|
|
{
|
|
"epoch": 0.44786729857819907,
|
|
"grad_norm": 17.181389624231375,
|
|
"learning_rate": 9.92711074451677e-06,
|
|
"loss": 2.999567985534668,
|
|
"step": 567
|
|
},
|
|
{
|
|
"epoch": 0.4486571879936809,
|
|
"grad_norm": 14.992846352483394,
|
|
"learning_rate": 9.926326817792065e-06,
|
|
"loss": 2.635314464569092,
|
|
"step": 568
|
|
},
|
|
{
|
|
"epoch": 0.44944707740916273,
|
|
"grad_norm": 7.6279407925730816,
|
|
"learning_rate": 9.925538729290239e-06,
|
|
"loss": 2.3740317821502686,
|
|
"step": 569
|
|
},
|
|
{
|
|
"epoch": 0.45023696682464454,
|
|
"grad_norm": 15.752498964379894,
|
|
"learning_rate": 9.924746479677075e-06,
|
|
"loss": 2.9476394653320312,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.4510268562401264,
|
|
"grad_norm": 7.93326356210501,
|
|
"learning_rate": 9.923950069621868e-06,
|
|
"loss": 3.3303630352020264,
|
|
"step": 571
|
|
},
|
|
{
|
|
"epoch": 0.4518167456556082,
|
|
"grad_norm": 29.90614936882604,
|
|
"learning_rate": 9.923149499797429e-06,
|
|
"loss": 3.194509267807007,
|
|
"step": 572
|
|
},
|
|
{
|
|
"epoch": 0.45260663507109006,
|
|
"grad_norm": 13.371064507421321,
|
|
"learning_rate": 9.92234477088008e-06,
|
|
"loss": 2.94869327545166,
|
|
"step": 573
|
|
},
|
|
{
|
|
"epoch": 0.45339652448657186,
|
|
"grad_norm": 15.897418580785546,
|
|
"learning_rate": 9.921535883549658e-06,
|
|
"loss": 2.7056546211242676,
|
|
"step": 574
|
|
},
|
|
{
|
|
"epoch": 0.4541864139020537,
|
|
"grad_norm": 26.124918845816804,
|
|
"learning_rate": 9.920722838489515e-06,
|
|
"loss": 3.060375452041626,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 0.4549763033175355,
|
|
"grad_norm": 18.882545145062025,
|
|
"learning_rate": 9.919905636386516e-06,
|
|
"loss": 3.0005345344543457,
|
|
"step": 576
|
|
},
|
|
{
|
|
"epoch": 0.4557661927330174,
|
|
"grad_norm": 13.37328310128464,
|
|
"learning_rate": 9.919084277931033e-06,
|
|
"loss": 2.5772323608398438,
|
|
"step": 577
|
|
},
|
|
{
|
|
"epoch": 0.4565560821484992,
|
|
"grad_norm": 17.390743534176387,
|
|
"learning_rate": 9.918258763816954e-06,
|
|
"loss": 3.439105749130249,
|
|
"step": 578
|
|
},
|
|
{
|
|
"epoch": 0.45734597156398105,
|
|
"grad_norm": 13.795578608418804,
|
|
"learning_rate": 9.917429094741676e-06,
|
|
"loss": 2.9797146320343018,
|
|
"step": 579
|
|
},
|
|
{
|
|
"epoch": 0.45813586097946285,
|
|
"grad_norm": 14.581579263670818,
|
|
"learning_rate": 9.916595271406104e-06,
|
|
"loss": 3.3291659355163574,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.4589257503949447,
|
|
"grad_norm": 23.055562889090734,
|
|
"learning_rate": 9.915757294514658e-06,
|
|
"loss": 3.4787819385528564,
|
|
"step": 581
|
|
},
|
|
{
|
|
"epoch": 0.4597156398104265,
|
|
"grad_norm": 11.454956025296493,
|
|
"learning_rate": 9.91491516477526e-06,
|
|
"loss": 2.543201446533203,
|
|
"step": 582
|
|
},
|
|
{
|
|
"epoch": 0.46050552922590837,
|
|
"grad_norm": 27.44348974944536,
|
|
"learning_rate": 9.91406888289935e-06,
|
|
"loss": 3.8477420806884766,
|
|
"step": 583
|
|
},
|
|
{
|
|
"epoch": 0.46129541864139023,
|
|
"grad_norm": 13.748326155704126,
|
|
"learning_rate": 9.913218449601862e-06,
|
|
"loss": 3.204080581665039,
|
|
"step": 584
|
|
},
|
|
{
|
|
"epoch": 0.46208530805687204,
|
|
"grad_norm": 14.852830560317845,
|
|
"learning_rate": 9.912363865601252e-06,
|
|
"loss": 2.0701780319213867,
|
|
"step": 585
|
|
},
|
|
{
|
|
"epoch": 0.4628751974723539,
|
|
"grad_norm": 9.286607736299267,
|
|
"learning_rate": 9.911505131619467e-06,
|
|
"loss": 2.616168975830078,
|
|
"step": 586
|
|
},
|
|
{
|
|
"epoch": 0.4636650868878357,
|
|
"grad_norm": 27.1312162902194,
|
|
"learning_rate": 9.910642248381978e-06,
|
|
"loss": 3.5931811332702637,
|
|
"step": 587
|
|
},
|
|
{
|
|
"epoch": 0.46445497630331756,
|
|
"grad_norm": 32.41215406453979,
|
|
"learning_rate": 9.909775216617746e-06,
|
|
"loss": 2.6403136253356934,
|
|
"step": 588
|
|
},
|
|
{
|
|
"epoch": 0.46524486571879936,
|
|
"grad_norm": 16.462651257709005,
|
|
"learning_rate": 9.908904037059242e-06,
|
|
"loss": 1.806509017944336,
|
|
"step": 589
|
|
},
|
|
{
|
|
"epoch": 0.4660347551342812,
|
|
"grad_norm": 8.054217161807381,
|
|
"learning_rate": 9.908028710442443e-06,
|
|
"loss": 2.955305337905884,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.466824644549763,
|
|
"grad_norm": 13.079572202568693,
|
|
"learning_rate": 9.907149237506825e-06,
|
|
"loss": 3.071561098098755,
|
|
"step": 591
|
|
},
|
|
{
|
|
"epoch": 0.4676145339652449,
|
|
"grad_norm": 17.039788808514484,
|
|
"learning_rate": 9.906265618995375e-06,
|
|
"loss": 3.0196356773376465,
|
|
"step": 592
|
|
},
|
|
{
|
|
"epoch": 0.4684044233807267,
|
|
"grad_norm": 13.364691523127064,
|
|
"learning_rate": 9.905377855654574e-06,
|
|
"loss": 2.70352840423584,
|
|
"step": 593
|
|
},
|
|
{
|
|
"epoch": 0.46919431279620855,
|
|
"grad_norm": 10.365123705943594,
|
|
"learning_rate": 9.904485948234406e-06,
|
|
"loss": 2.782586097717285,
|
|
"step": 594
|
|
},
|
|
{
|
|
"epoch": 0.46998420221169035,
|
|
"grad_norm": 17.914445810601254,
|
|
"learning_rate": 9.903589897488358e-06,
|
|
"loss": 3.854835271835327,
|
|
"step": 595
|
|
},
|
|
{
|
|
"epoch": 0.4707740916271722,
|
|
"grad_norm": 14.706414319504669,
|
|
"learning_rate": 9.902689704173418e-06,
|
|
"loss": 3.0983946323394775,
|
|
"step": 596
|
|
},
|
|
{
|
|
"epoch": 0.471563981042654,
|
|
"grad_norm": 11.041528398876812,
|
|
"learning_rate": 9.901785369050073e-06,
|
|
"loss": 3.148883819580078,
|
|
"step": 597
|
|
},
|
|
{
|
|
"epoch": 0.47235387045813587,
|
|
"grad_norm": 12.764570521775047,
|
|
"learning_rate": 9.900876892882303e-06,
|
|
"loss": 3.0621113777160645,
|
|
"step": 598
|
|
},
|
|
{
|
|
"epoch": 0.4731437598736177,
|
|
"grad_norm": 12.917586958199212,
|
|
"learning_rate": 9.899964276437596e-06,
|
|
"loss": 2.7622828483581543,
|
|
"step": 599
|
|
},
|
|
{
|
|
"epoch": 0.47393364928909953,
|
|
"grad_norm": 11.372886554785106,
|
|
"learning_rate": 9.899047520486935e-06,
|
|
"loss": 2.379685878753662,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.47472353870458134,
|
|
"grad_norm": 8.25416016120882,
|
|
"learning_rate": 9.898126625804796e-06,
|
|
"loss": 2.2554409503936768,
|
|
"step": 601
|
|
},
|
|
{
|
|
"epoch": 0.4755134281200632,
|
|
"grad_norm": 13.238723315824029,
|
|
"learning_rate": 9.897201593169153e-06,
|
|
"loss": 2.7117209434509277,
|
|
"step": 602
|
|
},
|
|
{
|
|
"epoch": 0.476303317535545,
|
|
"grad_norm": 11.785948359638766,
|
|
"learning_rate": 9.896272423361479e-06,
|
|
"loss": 2.219001531600952,
|
|
"step": 603
|
|
},
|
|
{
|
|
"epoch": 0.47709320695102686,
|
|
"grad_norm": 16.426783052438104,
|
|
"learning_rate": 9.895339117166737e-06,
|
|
"loss": 3.105238199234009,
|
|
"step": 604
|
|
},
|
|
{
|
|
"epoch": 0.47788309636650866,
|
|
"grad_norm": 9.983370358512682,
|
|
"learning_rate": 9.894401675373388e-06,
|
|
"loss": 2.7501213550567627,
|
|
"step": 605
|
|
},
|
|
{
|
|
"epoch": 0.4786729857819905,
|
|
"grad_norm": 11.613745339741977,
|
|
"learning_rate": 9.89346009877339e-06,
|
|
"loss": 3.157028913497925,
|
|
"step": 606
|
|
},
|
|
{
|
|
"epoch": 0.4794628751974723,
|
|
"grad_norm": 16.033920286391126,
|
|
"learning_rate": 9.892514388162183e-06,
|
|
"loss": 2.930591106414795,
|
|
"step": 607
|
|
},
|
|
{
|
|
"epoch": 0.4802527646129542,
|
|
"grad_norm": 26.306838660431477,
|
|
"learning_rate": 9.89156454433871e-06,
|
|
"loss": 2.365173816680908,
|
|
"step": 608
|
|
},
|
|
{
|
|
"epoch": 0.48104265402843605,
|
|
"grad_norm": 8.551782014277038,
|
|
"learning_rate": 9.890610568105401e-06,
|
|
"loss": 2.737978935241699,
|
|
"step": 609
|
|
},
|
|
{
|
|
"epoch": 0.48183254344391785,
|
|
"grad_norm": 6.191554963386605,
|
|
"learning_rate": 9.889652460268183e-06,
|
|
"loss": 1.0541880130767822,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.4826224328593997,
|
|
"grad_norm": 10.343101459770976,
|
|
"learning_rate": 9.888690221636462e-06,
|
|
"loss": 3.482835054397583,
|
|
"step": 611
|
|
},
|
|
{
|
|
"epoch": 0.4834123222748815,
|
|
"grad_norm": 7.536217244705736,
|
|
"learning_rate": 9.887723853023144e-06,
|
|
"loss": 2.714404582977295,
|
|
"step": 612
|
|
},
|
|
{
|
|
"epoch": 0.48420221169036337,
|
|
"grad_norm": 11.909381848673933,
|
|
"learning_rate": 9.88675335524462e-06,
|
|
"loss": 2.518251895904541,
|
|
"step": 613
|
|
},
|
|
{
|
|
"epoch": 0.4849921011058452,
|
|
"grad_norm": 19.55484435219003,
|
|
"learning_rate": 9.885778729120771e-06,
|
|
"loss": 3.3546159267425537,
|
|
"step": 614
|
|
},
|
|
{
|
|
"epoch": 0.48578199052132703,
|
|
"grad_norm": 13.61585851649587,
|
|
"learning_rate": 9.884799975474961e-06,
|
|
"loss": 3.282747745513916,
|
|
"step": 615
|
|
},
|
|
{
|
|
"epoch": 0.48657187993680884,
|
|
"grad_norm": 7.245085293733777,
|
|
"learning_rate": 9.883817095134048e-06,
|
|
"loss": 2.7314577102661133,
|
|
"step": 616
|
|
},
|
|
{
|
|
"epoch": 0.4873617693522907,
|
|
"grad_norm": 9.753488601463083,
|
|
"learning_rate": 9.882830088928368e-06,
|
|
"loss": 2.8968541622161865,
|
|
"step": 617
|
|
},
|
|
{
|
|
"epoch": 0.4881516587677725,
|
|
"grad_norm": 10.570209792818952,
|
|
"learning_rate": 9.881838957691752e-06,
|
|
"loss": 2.766514539718628,
|
|
"step": 618
|
|
},
|
|
{
|
|
"epoch": 0.48894154818325436,
|
|
"grad_norm": 10.820407163130954,
|
|
"learning_rate": 9.880843702261506e-06,
|
|
"loss": 2.4016904830932617,
|
|
"step": 619
|
|
},
|
|
{
|
|
"epoch": 0.48973143759873616,
|
|
"grad_norm": 11.038883967400233,
|
|
"learning_rate": 9.87984432347843e-06,
|
|
"loss": 2.8720149993896484,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.490521327014218,
|
|
"grad_norm": 11.813323160870127,
|
|
"learning_rate": 9.8788408221868e-06,
|
|
"loss": 2.9305214881896973,
|
|
"step": 621
|
|
},
|
|
{
|
|
"epoch": 0.4913112164296998,
|
|
"grad_norm": 11.366049566856011,
|
|
"learning_rate": 9.877833199234378e-06,
|
|
"loss": 2.9653875827789307,
|
|
"step": 622
|
|
},
|
|
{
|
|
"epoch": 0.4921011058451817,
|
|
"grad_norm": 12.438353507803086,
|
|
"learning_rate": 9.876821455472405e-06,
|
|
"loss": 2.3867058753967285,
|
|
"step": 623
|
|
},
|
|
{
|
|
"epoch": 0.4928909952606635,
|
|
"grad_norm": 8.498213125601112,
|
|
"learning_rate": 9.875805591755608e-06,
|
|
"loss": 3.2036352157592773,
|
|
"step": 624
|
|
},
|
|
{
|
|
"epoch": 0.49368088467614535,
|
|
"grad_norm": 16.30479309681846,
|
|
"learning_rate": 9.874785608942192e-06,
|
|
"loss": 3.305636167526245,
|
|
"step": 625
|
|
},
|
|
{
|
|
"epoch": 0.49447077409162715,
|
|
"grad_norm": 10.081140854214283,
|
|
"learning_rate": 9.87376150789384e-06,
|
|
"loss": 3.041412353515625,
|
|
"step": 626
|
|
},
|
|
{
|
|
"epoch": 0.495260663507109,
|
|
"grad_norm": 64.71900103986658,
|
|
"learning_rate": 9.872733289475717e-06,
|
|
"loss": 4.324435710906982,
|
|
"step": 627
|
|
},
|
|
{
|
|
"epoch": 0.4960505529225908,
|
|
"grad_norm": 11.539415904080359,
|
|
"learning_rate": 9.871700954556464e-06,
|
|
"loss": 3.1219332218170166,
|
|
"step": 628
|
|
},
|
|
{
|
|
"epoch": 0.4968404423380727,
|
|
"grad_norm": 16.008989800922443,
|
|
"learning_rate": 9.870664504008205e-06,
|
|
"loss": 2.5995893478393555,
|
|
"step": 629
|
|
},
|
|
{
|
|
"epoch": 0.4976303317535545,
|
|
"grad_norm": 11.383945681171209,
|
|
"learning_rate": 9.869623938706531e-06,
|
|
"loss": 2.9473705291748047,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.49842022116903634,
|
|
"grad_norm": 14.730134044674989,
|
|
"learning_rate": 9.868579259530519e-06,
|
|
"loss": 3.243873357772827,
|
|
"step": 631
|
|
},
|
|
{
|
|
"epoch": 0.49921011058451814,
|
|
"grad_norm": 8.390549227817651,
|
|
"learning_rate": 9.867530467362718e-06,
|
|
"loss": 2.4504904747009277,
|
|
"step": 632
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"grad_norm": 8.424703770575638,
|
|
"learning_rate": 9.866477563089148e-06,
|
|
"loss": 2.6318535804748535,
|
|
"step": 633
|
|
},
|
|
{
|
|
"epoch": 0.5007898894154819,
|
|
"grad_norm": 9.38333524987037,
|
|
"learning_rate": 9.865420547599308e-06,
|
|
"loss": 2.7379918098449707,
|
|
"step": 634
|
|
},
|
|
{
|
|
"epoch": 0.5015797788309637,
|
|
"grad_norm": 9.704149060081368,
|
|
"learning_rate": 9.864359421786168e-06,
|
|
"loss": 2.304293632507324,
|
|
"step": 635
|
|
},
|
|
{
|
|
"epoch": 0.5023696682464455,
|
|
"grad_norm": 24.08311236031377,
|
|
"learning_rate": 9.863294186546168e-06,
|
|
"loss": 2.332653522491455,
|
|
"step": 636
|
|
},
|
|
{
|
|
"epoch": 0.5031595576619273,
|
|
"grad_norm": 16.210594683251617,
|
|
"learning_rate": 9.862224842779225e-06,
|
|
"loss": 3.0899691581726074,
|
|
"step": 637
|
|
},
|
|
{
|
|
"epoch": 0.5039494470774092,
|
|
"grad_norm": 14.171269286760387,
|
|
"learning_rate": 9.861151391388726e-06,
|
|
"loss": 2.624315023422241,
|
|
"step": 638
|
|
},
|
|
{
|
|
"epoch": 0.504739336492891,
|
|
"grad_norm": 17.155749747348096,
|
|
"learning_rate": 9.86007383328152e-06,
|
|
"loss": 3.289152145385742,
|
|
"step": 639
|
|
},
|
|
{
|
|
"epoch": 0.5055292259083728,
|
|
"grad_norm": 8.077042869666613,
|
|
"learning_rate": 9.858992169367939e-06,
|
|
"loss": 2.8146300315856934,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.5063191153238547,
|
|
"grad_norm": 14.900885791368967,
|
|
"learning_rate": 9.857906400561771e-06,
|
|
"loss": 3.192298173904419,
|
|
"step": 641
|
|
},
|
|
{
|
|
"epoch": 0.5071090047393365,
|
|
"grad_norm": 9.42772244547937,
|
|
"learning_rate": 9.856816527780279e-06,
|
|
"loss": 2.958979845046997,
|
|
"step": 642
|
|
},
|
|
{
|
|
"epoch": 0.5078988941548184,
|
|
"grad_norm": 19.45725727165277,
|
|
"learning_rate": 9.855722551944192e-06,
|
|
"loss": 2.5792651176452637,
|
|
"step": 643
|
|
},
|
|
{
|
|
"epoch": 0.5086887835703001,
|
|
"grad_norm": 18.71191352944223,
|
|
"learning_rate": 9.854624473977702e-06,
|
|
"loss": 2.2687480449676514,
|
|
"step": 644
|
|
},
|
|
{
|
|
"epoch": 0.509478672985782,
|
|
"grad_norm": 5.973081965855647,
|
|
"learning_rate": 9.85352229480847e-06,
|
|
"loss": 2.5880727767944336,
|
|
"step": 645
|
|
},
|
|
{
|
|
"epoch": 0.5102685624012638,
|
|
"grad_norm": 7.964099363507532,
|
|
"learning_rate": 9.852416015367622e-06,
|
|
"loss": 3.0370497703552246,
|
|
"step": 646
|
|
},
|
|
{
|
|
"epoch": 0.5110584518167457,
|
|
"grad_norm": 6.554920362859979,
|
|
"learning_rate": 9.851305636589745e-06,
|
|
"loss": 2.9287662506103516,
|
|
"step": 647
|
|
},
|
|
{
|
|
"epoch": 0.5118483412322274,
|
|
"grad_norm": 8.324682375342665,
|
|
"learning_rate": 9.85019115941289e-06,
|
|
"loss": 2.7224721908569336,
|
|
"step": 648
|
|
},
|
|
{
|
|
"epoch": 0.5126382306477093,
|
|
"grad_norm": 15.108696475255462,
|
|
"learning_rate": 9.849072584778572e-06,
|
|
"loss": 3.304979085922241,
|
|
"step": 649
|
|
},
|
|
{
|
|
"epoch": 0.5134281200631912,
|
|
"grad_norm": 13.13698436229945,
|
|
"learning_rate": 9.847949913631767e-06,
|
|
"loss": 1.872714877128601,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.514218009478673,
|
|
"grad_norm": 14.549850333742206,
|
|
"learning_rate": 9.84682314692091e-06,
|
|
"loss": 3.086406707763672,
|
|
"step": 651
|
|
},
|
|
{
|
|
"epoch": 0.5150078988941548,
|
|
"grad_norm": 14.573017384190178,
|
|
"learning_rate": 9.845692285597898e-06,
|
|
"loss": 3.119309186935425,
|
|
"step": 652
|
|
},
|
|
{
|
|
"epoch": 0.5157977883096366,
|
|
"grad_norm": 14.70328848938873,
|
|
"learning_rate": 9.844557330618087e-06,
|
|
"loss": 3.3144378662109375,
|
|
"step": 653
|
|
},
|
|
{
|
|
"epoch": 0.5165876777251185,
|
|
"grad_norm": 13.577075929710624,
|
|
"learning_rate": 9.843418282940291e-06,
|
|
"loss": 3.093888282775879,
|
|
"step": 654
|
|
},
|
|
{
|
|
"epoch": 0.5173775671406003,
|
|
"grad_norm": 16.48466621944885,
|
|
"learning_rate": 9.842275143526779e-06,
|
|
"loss": 3.2132608890533447,
|
|
"step": 655
|
|
},
|
|
{
|
|
"epoch": 0.5181674565560821,
|
|
"grad_norm": 21.816699436754334,
|
|
"learning_rate": 9.841127913343281e-06,
|
|
"loss": 2.8770318031311035,
|
|
"step": 656
|
|
},
|
|
{
|
|
"epoch": 0.518957345971564,
|
|
"grad_norm": 18.527709516001913,
|
|
"learning_rate": 9.83997659335898e-06,
|
|
"loss": 3.0780622959136963,
|
|
"step": 657
|
|
},
|
|
{
|
|
"epoch": 0.5197472353870458,
|
|
"grad_norm": 12.06728106207623,
|
|
"learning_rate": 9.838821184546513e-06,
|
|
"loss": 2.7250850200653076,
|
|
"step": 658
|
|
},
|
|
{
|
|
"epoch": 0.5205371248025277,
|
|
"grad_norm": 24.07545499285517,
|
|
"learning_rate": 9.837661687881976e-06,
|
|
"loss": 2.6896378993988037,
|
|
"step": 659
|
|
},
|
|
{
|
|
"epoch": 0.5213270142180095,
|
|
"grad_norm": 14.021969692199526,
|
|
"learning_rate": 9.836498104344916e-06,
|
|
"loss": 2.549968719482422,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.5221169036334913,
|
|
"grad_norm": 10.925462746063516,
|
|
"learning_rate": 9.835330434918329e-06,
|
|
"loss": 2.809274673461914,
|
|
"step": 661
|
|
},
|
|
{
|
|
"epoch": 0.5229067930489731,
|
|
"grad_norm": 13.630283976038868,
|
|
"learning_rate": 9.83415868058867e-06,
|
|
"loss": 3.4539241790771484,
|
|
"step": 662
|
|
},
|
|
{
|
|
"epoch": 0.523696682464455,
|
|
"grad_norm": 12.004271103191083,
|
|
"learning_rate": 9.832982842345838e-06,
|
|
"loss": 3.2775259017944336,
|
|
"step": 663
|
|
},
|
|
{
|
|
"epoch": 0.5244865718799369,
|
|
"grad_norm": 12.05995942930735,
|
|
"learning_rate": 9.831802921183184e-06,
|
|
"loss": 2.543905735015869,
|
|
"step": 664
|
|
},
|
|
{
|
|
"epoch": 0.5252764612954186,
|
|
"grad_norm": 8.986701835621098,
|
|
"learning_rate": 9.830618918097514e-06,
|
|
"loss": 2.7053022384643555,
|
|
"step": 665
|
|
},
|
|
{
|
|
"epoch": 0.5260663507109005,
|
|
"grad_norm": 9.733936760368861,
|
|
"learning_rate": 9.829430834089072e-06,
|
|
"loss": 2.9009079933166504,
|
|
"step": 666
|
|
},
|
|
{
|
|
"epoch": 0.5268562401263823,
|
|
"grad_norm": 9.705816192138302,
|
|
"learning_rate": 9.82823867016156e-06,
|
|
"loss": 2.320451259613037,
|
|
"step": 667
|
|
},
|
|
{
|
|
"epoch": 0.5276461295418642,
|
|
"grad_norm": 15.366368478511447,
|
|
"learning_rate": 9.82704242732212e-06,
|
|
"loss": 3.4952645301818848,
|
|
"step": 668
|
|
},
|
|
{
|
|
"epoch": 0.5284360189573459,
|
|
"grad_norm": 15.006524588925533,
|
|
"learning_rate": 9.825842106581343e-06,
|
|
"loss": 2.6732113361358643,
|
|
"step": 669
|
|
},
|
|
{
|
|
"epoch": 0.5292259083728278,
|
|
"grad_norm": 9.72530845064569,
|
|
"learning_rate": 9.824637708953262e-06,
|
|
"loss": 2.7073092460632324,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 0.5300157977883097,
|
|
"grad_norm": 57.6393618147052,
|
|
"learning_rate": 9.823429235455357e-06,
|
|
"loss": 2.821194887161255,
|
|
"step": 671
|
|
},
|
|
{
|
|
"epoch": 0.5308056872037915,
|
|
"grad_norm": 15.587439690063317,
|
|
"learning_rate": 9.822216687108549e-06,
|
|
"loss": 3.2857871055603027,
|
|
"step": 672
|
|
},
|
|
{
|
|
"epoch": 0.5315955766192733,
|
|
"grad_norm": 5.803845483605398,
|
|
"learning_rate": 9.821000064937205e-06,
|
|
"loss": 2.699526309967041,
|
|
"step": 673
|
|
},
|
|
{
|
|
"epoch": 0.5323854660347551,
|
|
"grad_norm": 8.386330014964896,
|
|
"learning_rate": 9.81977936996913e-06,
|
|
"loss": 2.623192071914673,
|
|
"step": 674
|
|
},
|
|
{
|
|
"epoch": 0.533175355450237,
|
|
"grad_norm": 11.975531675577631,
|
|
"learning_rate": 9.818554603235574e-06,
|
|
"loss": 2.8475778102874756,
|
|
"step": 675
|
|
},
|
|
{
|
|
"epoch": 0.5339652448657188,
|
|
"grad_norm": 8.008863647269184,
|
|
"learning_rate": 9.81732576577122e-06,
|
|
"loss": 2.2658133506774902,
|
|
"step": 676
|
|
},
|
|
{
|
|
"epoch": 0.5347551342812006,
|
|
"grad_norm": 22.741549269978968,
|
|
"learning_rate": 9.816092858614197e-06,
|
|
"loss": 3.2006266117095947,
|
|
"step": 677
|
|
},
|
|
{
|
|
"epoch": 0.5355450236966824,
|
|
"grad_norm": 11.748680083922181,
|
|
"learning_rate": 9.814855882806068e-06,
|
|
"loss": 2.7071900367736816,
|
|
"step": 678
|
|
},
|
|
{
|
|
"epoch": 0.5363349131121643,
|
|
"grad_norm": 15.879587406556047,
|
|
"learning_rate": 9.813614839391831e-06,
|
|
"loss": 3.224722385406494,
|
|
"step": 679
|
|
},
|
|
{
|
|
"epoch": 0.5371248025276462,
|
|
"grad_norm": 9.796469130582556,
|
|
"learning_rate": 9.812369729419928e-06,
|
|
"loss": 3.0697150230407715,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 0.5379146919431279,
|
|
"grad_norm": 14.580990160396306,
|
|
"learning_rate": 9.811120553942232e-06,
|
|
"loss": 3.0138320922851562,
|
|
"step": 681
|
|
},
|
|
{
|
|
"epoch": 0.5387045813586098,
|
|
"grad_norm": 8.267187675857448,
|
|
"learning_rate": 9.809867314014047e-06,
|
|
"loss": 2.831322193145752,
|
|
"step": 682
|
|
},
|
|
{
|
|
"epoch": 0.5394944707740916,
|
|
"grad_norm": 7.636617838042198,
|
|
"learning_rate": 9.808610010694118e-06,
|
|
"loss": 3.239677906036377,
|
|
"step": 683
|
|
},
|
|
{
|
|
"epoch": 0.5402843601895735,
|
|
"grad_norm": 15.391979519107375,
|
|
"learning_rate": 9.807348645044617e-06,
|
|
"loss": 2.747056245803833,
|
|
"step": 684
|
|
},
|
|
{
|
|
"epoch": 0.5410742496050553,
|
|
"grad_norm": 8.961485412404242,
|
|
"learning_rate": 9.806083218131148e-06,
|
|
"loss": 2.910431385040283,
|
|
"step": 685
|
|
},
|
|
{
|
|
"epoch": 0.5418641390205371,
|
|
"grad_norm": 14.197136726081288,
|
|
"learning_rate": 9.804813731022753e-06,
|
|
"loss": 3.1174066066741943,
|
|
"step": 686
|
|
},
|
|
{
|
|
"epoch": 0.542654028436019,
|
|
"grad_norm": 17.168792272156345,
|
|
"learning_rate": 9.803540184791894e-06,
|
|
"loss": 3.201021671295166,
|
|
"step": 687
|
|
},
|
|
{
|
|
"epoch": 0.5434439178515008,
|
|
"grad_norm": 16.249151769463698,
|
|
"learning_rate": 9.80226258051447e-06,
|
|
"loss": 3.176429510116577,
|
|
"step": 688
|
|
},
|
|
{
|
|
"epoch": 0.5442338072669827,
|
|
"grad_norm": 13.891045745287295,
|
|
"learning_rate": 9.800980919269803e-06,
|
|
"loss": 2.7803795337677,
|
|
"step": 689
|
|
},
|
|
{
|
|
"epoch": 0.5450236966824644,
|
|
"grad_norm": 10.877077331774473,
|
|
"learning_rate": 9.799695202140647e-06,
|
|
"loss": 2.882291555404663,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 0.5458135860979463,
|
|
"grad_norm": 9.592760171635328,
|
|
"learning_rate": 9.798405430213177e-06,
|
|
"loss": 2.8458828926086426,
|
|
"step": 691
|
|
},
|
|
{
|
|
"epoch": 0.5466034755134281,
|
|
"grad_norm": 7.884425647446356,
|
|
"learning_rate": 9.797111604577e-06,
|
|
"loss": 2.5656301975250244,
|
|
"step": 692
|
|
},
|
|
{
|
|
"epoch": 0.54739336492891,
|
|
"grad_norm": 10.716521460356288,
|
|
"learning_rate": 9.795813726325142e-06,
|
|
"loss": 2.761523485183716,
|
|
"step": 693
|
|
},
|
|
{
|
|
"epoch": 0.5481832543443917,
|
|
"grad_norm": 11.360091917539375,
|
|
"learning_rate": 9.794511796554055e-06,
|
|
"loss": 2.7499184608459473,
|
|
"step": 694
|
|
},
|
|
{
|
|
"epoch": 0.5489731437598736,
|
|
"grad_norm": 23.055753815263646,
|
|
"learning_rate": 9.793205816363616e-06,
|
|
"loss": 2.517162799835205,
|
|
"step": 695
|
|
},
|
|
{
|
|
"epoch": 0.5497630331753555,
|
|
"grad_norm": 7.78885839561945,
|
|
"learning_rate": 9.791895786857118e-06,
|
|
"loss": 2.723165273666382,
|
|
"step": 696
|
|
},
|
|
{
|
|
"epoch": 0.5505529225908373,
|
|
"grad_norm": 17.967883170098016,
|
|
"learning_rate": 9.79058170914128e-06,
|
|
"loss": 2.6987109184265137,
|
|
"step": 697
|
|
},
|
|
{
|
|
"epoch": 0.5513428120063191,
|
|
"grad_norm": 11.709595647250415,
|
|
"learning_rate": 9.789263584326238e-06,
|
|
"loss": 2.288817882537842,
|
|
"step": 698
|
|
},
|
|
{
|
|
"epoch": 0.5521327014218009,
|
|
"grad_norm": 7.605046056867549,
|
|
"learning_rate": 9.78794141352555e-06,
|
|
"loss": 2.7125701904296875,
|
|
"step": 699
|
|
},
|
|
{
|
|
"epoch": 0.5529225908372828,
|
|
"grad_norm": 10.47514502255061,
|
|
"learning_rate": 9.786615197856188e-06,
|
|
"loss": 2.7359495162963867,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.5537124802527646,
|
|
"grad_norm": 14.91063318894891,
|
|
"learning_rate": 9.785284938438545e-06,
|
|
"loss": 3.3785290718078613,
|
|
"step": 701
|
|
},
|
|
{
|
|
"epoch": 0.5545023696682464,
|
|
"grad_norm": 8.501053428871433,
|
|
"learning_rate": 9.783950636396429e-06,
|
|
"loss": 2.2032179832458496,
|
|
"step": 702
|
|
},
|
|
{
|
|
"epoch": 0.5552922590837283,
|
|
"grad_norm": 8.382387828104997,
|
|
"learning_rate": 9.78261229285706e-06,
|
|
"loss": 2.5567541122436523,
|
|
"step": 703
|
|
},
|
|
{
|
|
"epoch": 0.5560821484992101,
|
|
"grad_norm": 6.833003481377768,
|
|
"learning_rate": 9.781269908951079e-06,
|
|
"loss": 2.9519448280334473,
|
|
"step": 704
|
|
},
|
|
{
|
|
"epoch": 0.556872037914692,
|
|
"grad_norm": 10.767129048941534,
|
|
"learning_rate": 9.779923485812534e-06,
|
|
"loss": 2.9880781173706055,
|
|
"step": 705
|
|
},
|
|
{
|
|
"epoch": 0.5576619273301737,
|
|
"grad_norm": 17.977394830366933,
|
|
"learning_rate": 9.778573024578886e-06,
|
|
"loss": 2.571939706802368,
|
|
"step": 706
|
|
},
|
|
{
|
|
"epoch": 0.5584518167456556,
|
|
"grad_norm": 8.839985291634292,
|
|
"learning_rate": 9.777218526391013e-06,
|
|
"loss": 2.6147358417510986,
|
|
"step": 707
|
|
},
|
|
{
|
|
"epoch": 0.5592417061611374,
|
|
"grad_norm": 16.290139197222373,
|
|
"learning_rate": 9.775859992393198e-06,
|
|
"loss": 2.765791654586792,
|
|
"step": 708
|
|
},
|
|
{
|
|
"epoch": 0.5600315955766193,
|
|
"grad_norm": 9.812939254834504,
|
|
"learning_rate": 9.774497423733134e-06,
|
|
"loss": 2.9214420318603516,
|
|
"step": 709
|
|
},
|
|
{
|
|
"epoch": 0.5608214849921012,
|
|
"grad_norm": 10.32637589864547,
|
|
"learning_rate": 9.773130821561923e-06,
|
|
"loss": 2.793147563934326,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 0.5616113744075829,
|
|
"grad_norm": 8.09381965198076,
|
|
"learning_rate": 9.771760187034076e-06,
|
|
"loss": 3.287661552429199,
|
|
"step": 711
|
|
},
|
|
{
|
|
"epoch": 0.5624012638230648,
|
|
"grad_norm": 35.57847572787897,
|
|
"learning_rate": 9.770385521307511e-06,
|
|
"loss": 2.639596462249756,
|
|
"step": 712
|
|
},
|
|
{
|
|
"epoch": 0.5631911532385466,
|
|
"grad_norm": 10.298454720332643,
|
|
"learning_rate": 9.769006825543547e-06,
|
|
"loss": 2.0149660110473633,
|
|
"step": 713
|
|
},
|
|
{
|
|
"epoch": 0.5639810426540285,
|
|
"grad_norm": 9.21174002933949,
|
|
"learning_rate": 9.767624100906915e-06,
|
|
"loss": 2.675302743911743,
|
|
"step": 714
|
|
},
|
|
{
|
|
"epoch": 0.5647709320695102,
|
|
"grad_norm": 7.382352205544447,
|
|
"learning_rate": 9.766237348565741e-06,
|
|
"loss": 1.6485764980316162,
|
|
"step": 715
|
|
},
|
|
{
|
|
"epoch": 0.5655608214849921,
|
|
"grad_norm": 10.607210159075297,
|
|
"learning_rate": 9.76484656969156e-06,
|
|
"loss": 2.4519925117492676,
|
|
"step": 716
|
|
},
|
|
{
|
|
"epoch": 0.566350710900474,
|
|
"grad_norm": 10.50163998316087,
|
|
"learning_rate": 9.763451765459307e-06,
|
|
"loss": 2.3074722290039062,
|
|
"step": 717
|
|
},
|
|
{
|
|
"epoch": 0.5671406003159558,
|
|
"grad_norm": 10.624503499088616,
|
|
"learning_rate": 9.762052937047318e-06,
|
|
"loss": 2.695051670074463,
|
|
"step": 718
|
|
},
|
|
{
|
|
"epoch": 0.5679304897314376,
|
|
"grad_norm": 15.616632678838926,
|
|
"learning_rate": 9.760650085637322e-06,
|
|
"loss": 3.360673666000366,
|
|
"step": 719
|
|
},
|
|
{
|
|
"epoch": 0.5687203791469194,
|
|
"grad_norm": 8.375766262844625,
|
|
"learning_rate": 9.75924321241446e-06,
|
|
"loss": 2.7661333084106445,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 0.5695102685624013,
|
|
"grad_norm": 16.894936615307905,
|
|
"learning_rate": 9.75783231856726e-06,
|
|
"loss": 2.6800551414489746,
|
|
"step": 721
|
|
},
|
|
{
|
|
"epoch": 0.5703001579778831,
|
|
"grad_norm": 9.306984501448646,
|
|
"learning_rate": 9.756417405287649e-06,
|
|
"loss": 3.1795547008514404,
|
|
"step": 722
|
|
},
|
|
{
|
|
"epoch": 0.5710900473933649,
|
|
"grad_norm": 9.638215383453232,
|
|
"learning_rate": 9.754998473770952e-06,
|
|
"loss": 3.0728039741516113,
|
|
"step": 723
|
|
},
|
|
{
|
|
"epoch": 0.5718799368088467,
|
|
"grad_norm": 18.971875030373713,
|
|
"learning_rate": 9.753575525215885e-06,
|
|
"loss": 3.101027250289917,
|
|
"step": 724
|
|
},
|
|
{
|
|
"epoch": 0.5726698262243286,
|
|
"grad_norm": 9.703129422537145,
|
|
"learning_rate": 9.752148560824562e-06,
|
|
"loss": 2.3094897270202637,
|
|
"step": 725
|
|
},
|
|
{
|
|
"epoch": 0.5734597156398105,
|
|
"grad_norm": 15.734256139808767,
|
|
"learning_rate": 9.750717581802486e-06,
|
|
"loss": 2.910053253173828,
|
|
"step": 726
|
|
},
|
|
{
|
|
"epoch": 0.5742496050552922,
|
|
"grad_norm": 16.302208461415216,
|
|
"learning_rate": 9.749282589358553e-06,
|
|
"loss": 3.1184496879577637,
|
|
"step": 727
|
|
},
|
|
{
|
|
"epoch": 0.5750394944707741,
|
|
"grad_norm": 12.9735057677299,
|
|
"learning_rate": 9.747843584705047e-06,
|
|
"loss": 2.873502731323242,
|
|
"step": 728
|
|
},
|
|
{
|
|
"epoch": 0.5758293838862559,
|
|
"grad_norm": 22.638733634704586,
|
|
"learning_rate": 9.746400569057648e-06,
|
|
"loss": 2.4251301288604736,
|
|
"step": 729
|
|
},
|
|
{
|
|
"epoch": 0.5766192733017378,
|
|
"grad_norm": 7.163678590109422,
|
|
"learning_rate": 9.744953543635417e-06,
|
|
"loss": 2.799077033996582,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 0.5774091627172195,
|
|
"grad_norm": 15.90505006968332,
|
|
"learning_rate": 9.74350250966081e-06,
|
|
"loss": 3.6660590171813965,
|
|
"step": 731
|
|
},
|
|
{
|
|
"epoch": 0.5781990521327014,
|
|
"grad_norm": 7.61368151252301,
|
|
"learning_rate": 9.742047468359661e-06,
|
|
"loss": 3.0964913368225098,
|
|
"step": 732
|
|
},
|
|
{
|
|
"epoch": 0.5789889415481833,
|
|
"grad_norm": 7.331962193745934,
|
|
"learning_rate": 9.740588420961194e-06,
|
|
"loss": 2.7701282501220703,
|
|
"step": 733
|
|
},
|
|
{
|
|
"epoch": 0.5797788309636651,
|
|
"grad_norm": 10.939975135494327,
|
|
"learning_rate": 9.739125368698019e-06,
|
|
"loss": 2.936520576477051,
|
|
"step": 734
|
|
},
|
|
{
|
|
"epoch": 0.580568720379147,
|
|
"grad_norm": 11.923772537267181,
|
|
"learning_rate": 9.737658312806128e-06,
|
|
"loss": 2.714221477508545,
|
|
"step": 735
|
|
},
|
|
{
|
|
"epoch": 0.5813586097946287,
|
|
"grad_norm": 8.343105964786538,
|
|
"learning_rate": 9.73618725452489e-06,
|
|
"loss": 2.6335134506225586,
|
|
"step": 736
|
|
},
|
|
{
|
|
"epoch": 0.5821484992101106,
|
|
"grad_norm": 11.578813272698921,
|
|
"learning_rate": 9.734712195097068e-06,
|
|
"loss": 3.0548324584960938,
|
|
"step": 737
|
|
},
|
|
{
|
|
"epoch": 0.5829383886255924,
|
|
"grad_norm": 8.178172803096258,
|
|
"learning_rate": 9.733233135768789e-06,
|
|
"loss": 2.455691337585449,
|
|
"step": 738
|
|
},
|
|
{
|
|
"epoch": 0.5837282780410743,
|
|
"grad_norm": 8.208384722056353,
|
|
"learning_rate": 9.731750077789572e-06,
|
|
"loss": 2.85522198677063,
|
|
"step": 739
|
|
},
|
|
{
|
|
"epoch": 0.584518167456556,
|
|
"grad_norm": 14.357432181582325,
|
|
"learning_rate": 9.730263022412307e-06,
|
|
"loss": 2.7620186805725098,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 0.5853080568720379,
|
|
"grad_norm": 19.304824374340818,
|
|
"learning_rate": 9.728771970893262e-06,
|
|
"loss": 2.078908920288086,
|
|
"step": 741
|
|
},
|
|
{
|
|
"epoch": 0.5860979462875198,
|
|
"grad_norm": 8.634363379681329,
|
|
"learning_rate": 9.727276924492088e-06,
|
|
"loss": 2.5789947509765625,
|
|
"step": 742
|
|
},
|
|
{
|
|
"epoch": 0.5868878357030016,
|
|
"grad_norm": 13.987495203502483,
|
|
"learning_rate": 9.725777884471798e-06,
|
|
"loss": 2.6600892543792725,
|
|
"step": 743
|
|
},
|
|
{
|
|
"epoch": 0.5876777251184834,
|
|
"grad_norm": 21.907120600463696,
|
|
"learning_rate": 9.724274852098792e-06,
|
|
"loss": 3.122257709503174,
|
|
"step": 744
|
|
},
|
|
{
|
|
"epoch": 0.5884676145339652,
|
|
"grad_norm": 12.86185834923555,
|
|
"learning_rate": 9.722767828642831e-06,
|
|
"loss": 2.9660885334014893,
|
|
"step": 745
|
|
},
|
|
{
|
|
"epoch": 0.5892575039494471,
|
|
"grad_norm": 15.825928885727478,
|
|
"learning_rate": 9.721256815377059e-06,
|
|
"loss": 2.9355366230010986,
|
|
"step": 746
|
|
},
|
|
{
|
|
"epoch": 0.590047393364929,
|
|
"grad_norm": 17.506752397926537,
|
|
"learning_rate": 9.719741813577982e-06,
|
|
"loss": 2.7380142211914062,
|
|
"step": 747
|
|
},
|
|
{
|
|
"epoch": 0.5908372827804107,
|
|
"grad_norm": 13.905446136498991,
|
|
"learning_rate": 9.718222824525476e-06,
|
|
"loss": 2.624443292617798,
|
|
"step": 748
|
|
},
|
|
{
|
|
"epoch": 0.5916271721958926,
|
|
"grad_norm": 10.950226102749792,
|
|
"learning_rate": 9.716699849502794e-06,
|
|
"loss": 3.353207588195801,
|
|
"step": 749
|
|
},
|
|
{
|
|
"epoch": 0.5924170616113744,
|
|
"grad_norm": 10.01361583202192,
|
|
"learning_rate": 9.715172889796546e-06,
|
|
"loss": 2.4462380409240723,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 0.5932069510268563,
|
|
"grad_norm": 10.868695455875331,
|
|
"learning_rate": 9.713641946696713e-06,
|
|
"loss": 2.470088005065918,
|
|
"step": 751
|
|
},
|
|
{
|
|
"epoch": 0.593996840442338,
|
|
"grad_norm": 12.24308594192275,
|
|
"learning_rate": 9.712107021496641e-06,
|
|
"loss": 2.746387004852295,
|
|
"step": 752
|
|
},
|
|
{
|
|
"epoch": 0.5947867298578199,
|
|
"grad_norm": 10.293898753873405,
|
|
"learning_rate": 9.710568115493041e-06,
|
|
"loss": 2.893784284591675,
|
|
"step": 753
|
|
},
|
|
{
|
|
"epoch": 0.5955766192733017,
|
|
"grad_norm": 8.791452259605602,
|
|
"learning_rate": 9.709025229985986e-06,
|
|
"loss": 3.845496654510498,
|
|
"step": 754
|
|
},
|
|
{
|
|
"epoch": 0.5963665086887836,
|
|
"grad_norm": 7.952120961759487,
|
|
"learning_rate": 9.707478366278911e-06,
|
|
"loss": 3.286113739013672,
|
|
"step": 755
|
|
},
|
|
{
|
|
"epoch": 0.5971563981042654,
|
|
"grad_norm": 11.052387334163443,
|
|
"learning_rate": 9.705927525678608e-06,
|
|
"loss": 2.54490327835083,
|
|
"step": 756
|
|
},
|
|
{
|
|
"epoch": 0.5979462875197472,
|
|
"grad_norm": 28.59255694909707,
|
|
"learning_rate": 9.704372709495237e-06,
|
|
"loss": 2.303287982940674,
|
|
"step": 757
|
|
},
|
|
{
|
|
"epoch": 0.5987361769352291,
|
|
"grad_norm": 12.717484973265515,
|
|
"learning_rate": 9.702813919042308e-06,
|
|
"loss": 2.5255141258239746,
|
|
"step": 758
|
|
},
|
|
{
|
|
"epoch": 0.5995260663507109,
|
|
"grad_norm": 10.644626496040491,
|
|
"learning_rate": 9.701251155636696e-06,
|
|
"loss": 2.6174449920654297,
|
|
"step": 759
|
|
},
|
|
{
|
|
"epoch": 0.6003159557661928,
|
|
"grad_norm": 10.420680692777719,
|
|
"learning_rate": 9.699684420598622e-06,
|
|
"loss": 3.0751430988311768,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 0.6011058451816745,
|
|
"grad_norm": 17.7407887464059,
|
|
"learning_rate": 9.698113715251678e-06,
|
|
"loss": 3.1690831184387207,
|
|
"step": 761
|
|
},
|
|
{
|
|
"epoch": 0.6018957345971564,
|
|
"grad_norm": 6.8133884021840165,
|
|
"learning_rate": 9.696539040922794e-06,
|
|
"loss": 2.992917060852051,
|
|
"step": 762
|
|
},
|
|
{
|
|
"epoch": 0.6026856240126383,
|
|
"grad_norm": 14.288096164471353,
|
|
"learning_rate": 9.694960398942264e-06,
|
|
"loss": 2.862287998199463,
|
|
"step": 763
|
|
},
|
|
{
|
|
"epoch": 0.6034755134281201,
|
|
"grad_norm": 8.475183410580591,
|
|
"learning_rate": 9.693377790643728e-06,
|
|
"loss": 2.5695481300354004,
|
|
"step": 764
|
|
},
|
|
{
|
|
"epoch": 0.6042654028436019,
|
|
"grad_norm": 12.006246518434427,
|
|
"learning_rate": 9.69179121736418e-06,
|
|
"loss": 2.456130266189575,
|
|
"step": 765
|
|
},
|
|
{
|
|
"epoch": 0.6050552922590837,
|
|
"grad_norm": 10.187805065838019,
|
|
"learning_rate": 9.69020068044396e-06,
|
|
"loss": 2.798001766204834,
|
|
"step": 766
|
|
},
|
|
{
|
|
"epoch": 0.6058451816745656,
|
|
"grad_norm": 12.030587021942077,
|
|
"learning_rate": 9.68860618122676e-06,
|
|
"loss": 3.1254353523254395,
|
|
"step": 767
|
|
},
|
|
{
|
|
"epoch": 0.6066350710900474,
|
|
"grad_norm": 8.960697365970546,
|
|
"learning_rate": 9.68700772105962e-06,
|
|
"loss": 2.784362316131592,
|
|
"step": 768
|
|
},
|
|
{
|
|
"epoch": 0.6074249605055292,
|
|
"grad_norm": 12.104781783849731,
|
|
"learning_rate": 9.685405301292924e-06,
|
|
"loss": 2.487422466278076,
|
|
"step": 769
|
|
},
|
|
{
|
|
"epoch": 0.608214849921011,
|
|
"grad_norm": 11.735082585197036,
|
|
"learning_rate": 9.683798923280398e-06,
|
|
"loss": 2.749908208847046,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 0.6090047393364929,
|
|
"grad_norm": 8.39933378207941,
|
|
"learning_rate": 9.68218858837912e-06,
|
|
"loss": 2.5542333126068115,
|
|
"step": 771
|
|
},
|
|
{
|
|
"epoch": 0.6097946287519748,
|
|
"grad_norm": 13.203001273703372,
|
|
"learning_rate": 9.680574297949503e-06,
|
|
"loss": 2.381009578704834,
|
|
"step": 772
|
|
},
|
|
{
|
|
"epoch": 0.6105845181674565,
|
|
"grad_norm": 10.227737175443485,
|
|
"learning_rate": 9.678956053355306e-06,
|
|
"loss": 2.797962188720703,
|
|
"step": 773
|
|
},
|
|
{
|
|
"epoch": 0.6113744075829384,
|
|
"grad_norm": 19.644909570407606,
|
|
"learning_rate": 9.677333855963627e-06,
|
|
"loss": 2.3713326454162598,
|
|
"step": 774
|
|
},
|
|
{
|
|
"epoch": 0.6121642969984202,
|
|
"grad_norm": 10.802978377889708,
|
|
"learning_rate": 9.675707707144906e-06,
|
|
"loss": 3.7946083545684814,
|
|
"step": 775
|
|
},
|
|
{
|
|
"epoch": 0.6129541864139021,
|
|
"grad_norm": 9.800404823560639,
|
|
"learning_rate": 9.674077608272916e-06,
|
|
"loss": 2.2313640117645264,
|
|
"step": 776
|
|
},
|
|
{
|
|
"epoch": 0.6137440758293838,
|
|
"grad_norm": 10.197415931944706,
|
|
"learning_rate": 9.67244356072477e-06,
|
|
"loss": 2.564563035964966,
|
|
"step": 777
|
|
},
|
|
{
|
|
"epoch": 0.6145339652448657,
|
|
"grad_norm": 13.443842890508776,
|
|
"learning_rate": 9.670805565880919e-06,
|
|
"loss": 2.851914405822754,
|
|
"step": 778
|
|
},
|
|
{
|
|
"epoch": 0.6153238546603476,
|
|
"grad_norm": 19.85673680273832,
|
|
"learning_rate": 9.669163625125143e-06,
|
|
"loss": 2.2651009559631348,
|
|
"step": 779
|
|
},
|
|
{
|
|
"epoch": 0.6161137440758294,
|
|
"grad_norm": 12.884481490463953,
|
|
"learning_rate": 9.667517739844563e-06,
|
|
"loss": 2.5570054054260254,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 0.6169036334913112,
|
|
"grad_norm": 17.453234670736126,
|
|
"learning_rate": 9.665867911429625e-06,
|
|
"loss": 2.5283799171447754,
|
|
"step": 781
|
|
},
|
|
{
|
|
"epoch": 0.617693522906793,
|
|
"grad_norm": 18.936216590713382,
|
|
"learning_rate": 9.664214141274111e-06,
|
|
"loss": 3.2821831703186035,
|
|
"step": 782
|
|
},
|
|
{
|
|
"epoch": 0.6184834123222749,
|
|
"grad_norm": 12.29731371387329,
|
|
"learning_rate": 9.662556430775132e-06,
|
|
"loss": 3.238713264465332,
|
|
"step": 783
|
|
},
|
|
{
|
|
"epoch": 0.6192733017377567,
|
|
"grad_norm": 13.021393099946007,
|
|
"learning_rate": 9.660894781333126e-06,
|
|
"loss": 2.938838005065918,
|
|
"step": 784
|
|
},
|
|
{
|
|
"epoch": 0.6200631911532386,
|
|
"grad_norm": 6.931741915013856,
|
|
"learning_rate": 9.65922919435186e-06,
|
|
"loss": 2.7250375747680664,
|
|
"step": 785
|
|
},
|
|
{
|
|
"epoch": 0.6208530805687204,
|
|
"grad_norm": 12.605958216464781,
|
|
"learning_rate": 9.657559671238428e-06,
|
|
"loss": 2.9273529052734375,
|
|
"step": 786
|
|
},
|
|
{
|
|
"epoch": 0.6216429699842022,
|
|
"grad_norm": 24.241779337105008,
|
|
"learning_rate": 9.65588621340325e-06,
|
|
"loss": 2.6192431449890137,
|
|
"step": 787
|
|
},
|
|
{
|
|
"epoch": 0.6224328593996841,
|
|
"grad_norm": 15.774873829312497,
|
|
"learning_rate": 9.654208822260064e-06,
|
|
"loss": 2.6683297157287598,
|
|
"step": 788
|
|
},
|
|
{
|
|
"epoch": 0.6232227488151659,
|
|
"grad_norm": 13.55562535680502,
|
|
"learning_rate": 9.65252749922594e-06,
|
|
"loss": 3.453798294067383,
|
|
"step": 789
|
|
},
|
|
{
|
|
"epoch": 0.6240126382306477,
|
|
"grad_norm": 10.437339665014067,
|
|
"learning_rate": 9.650842245721265e-06,
|
|
"loss": 2.660048007965088,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 0.6248025276461295,
|
|
"grad_norm": 7.373849740564372,
|
|
"learning_rate": 9.649153063169747e-06,
|
|
"loss": 3.181802272796631,
|
|
"step": 791
|
|
},
|
|
{
|
|
"epoch": 0.6255924170616114,
|
|
"grad_norm": 10.97034956730053,
|
|
"learning_rate": 9.647459952998409e-06,
|
|
"loss": 2.794236183166504,
|
|
"step": 792
|
|
},
|
|
{
|
|
"epoch": 0.6263823064770933,
|
|
"grad_norm": 13.289064458186381,
|
|
"learning_rate": 9.6457629166376e-06,
|
|
"loss": 2.934234142303467,
|
|
"step": 793
|
|
},
|
|
{
|
|
"epoch": 0.627172195892575,
|
|
"grad_norm": 6.66173575936569,
|
|
"learning_rate": 9.644061955520981e-06,
|
|
"loss": 2.53916072845459,
|
|
"step": 794
|
|
},
|
|
{
|
|
"epoch": 0.6279620853080569,
|
|
"grad_norm": 8.21096858166868,
|
|
"learning_rate": 9.642357071085527e-06,
|
|
"loss": 3.4347705841064453,
|
|
"step": 795
|
|
},
|
|
{
|
|
"epoch": 0.6287519747235387,
|
|
"grad_norm": 10.267840042109265,
|
|
"learning_rate": 9.640648264771532e-06,
|
|
"loss": 2.589984655380249,
|
|
"step": 796
|
|
},
|
|
{
|
|
"epoch": 0.6295418641390206,
|
|
"grad_norm": 23.740776784719007,
|
|
"learning_rate": 9.638935538022605e-06,
|
|
"loss": 2.2766027450561523,
|
|
"step": 797
|
|
},
|
|
{
|
|
"epoch": 0.6303317535545023,
|
|
"grad_norm": 10.175389822962396,
|
|
"learning_rate": 9.637218892285656e-06,
|
|
"loss": 2.140416383743286,
|
|
"step": 798
|
|
},
|
|
{
|
|
"epoch": 0.6311216429699842,
|
|
"grad_norm": 10.006330545127017,
|
|
"learning_rate": 9.635498329010918e-06,
|
|
"loss": 2.6404151916503906,
|
|
"step": 799
|
|
},
|
|
{
|
|
"epoch": 0.631911532385466,
|
|
"grad_norm": 15.860393911321083,
|
|
"learning_rate": 9.633773849651926e-06,
|
|
"loss": 3.4515304565429688,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.6327014218009479,
|
|
"grad_norm": 10.447911887203587,
|
|
"learning_rate": 9.632045455665528e-06,
|
|
"loss": 2.6439762115478516,
|
|
"step": 801
|
|
},
|
|
{
|
|
"epoch": 0.6334913112164297,
|
|
"grad_norm": 9.938315980267259,
|
|
"learning_rate": 9.630313148511876e-06,
|
|
"loss": 2.633496046066284,
|
|
"step": 802
|
|
},
|
|
{
|
|
"epoch": 0.6342812006319115,
|
|
"grad_norm": 14.127497750541355,
|
|
"learning_rate": 9.628576929654427e-06,
|
|
"loss": 3.0772128105163574,
|
|
"step": 803
|
|
},
|
|
{
|
|
"epoch": 0.6350710900473934,
|
|
"grad_norm": 9.475057560148757,
|
|
"learning_rate": 9.626836800559948e-06,
|
|
"loss": 2.8075199127197266,
|
|
"step": 804
|
|
},
|
|
{
|
|
"epoch": 0.6358609794628752,
|
|
"grad_norm": 15.2657530938932,
|
|
"learning_rate": 9.625092762698502e-06,
|
|
"loss": 2.2376973628997803,
|
|
"step": 805
|
|
},
|
|
{
|
|
"epoch": 0.636650868878357,
|
|
"grad_norm": 13.5038856684049,
|
|
"learning_rate": 9.623344817543462e-06,
|
|
"loss": 3.09859037399292,
|
|
"step": 806
|
|
},
|
|
{
|
|
"epoch": 0.6374407582938388,
|
|
"grad_norm": 15.533786385781745,
|
|
"learning_rate": 9.621592966571493e-06,
|
|
"loss": 2.4869344234466553,
|
|
"step": 807
|
|
},
|
|
{
|
|
"epoch": 0.6382306477093207,
|
|
"grad_norm": 13.72530582074668,
|
|
"learning_rate": 9.619837211262569e-06,
|
|
"loss": 3.0674853324890137,
|
|
"step": 808
|
|
},
|
|
{
|
|
"epoch": 0.6390205371248026,
|
|
"grad_norm": 23.557899308984517,
|
|
"learning_rate": 9.618077553099954e-06,
|
|
"loss": 3.3668880462646484,
|
|
"step": 809
|
|
},
|
|
{
|
|
"epoch": 0.6398104265402843,
|
|
"grad_norm": 18.763852919675887,
|
|
"learning_rate": 9.616313993570215e-06,
|
|
"loss": 2.933554172515869,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 0.6406003159557662,
|
|
"grad_norm": 8.365194309767189,
|
|
"learning_rate": 9.614546534163214e-06,
|
|
"loss": 2.367485523223877,
|
|
"step": 811
|
|
},
|
|
{
|
|
"epoch": 0.641390205371248,
|
|
"grad_norm": 8.781343171527238,
|
|
"learning_rate": 9.612775176372104e-06,
|
|
"loss": 2.1504476070404053,
|
|
"step": 812
|
|
},
|
|
{
|
|
"epoch": 0.6421800947867299,
|
|
"grad_norm": 14.453301784955004,
|
|
"learning_rate": 9.610999921693335e-06,
|
|
"loss": 3.482938766479492,
|
|
"step": 813
|
|
},
|
|
{
|
|
"epoch": 0.6429699842022117,
|
|
"grad_norm": 11.750899386703743,
|
|
"learning_rate": 9.60922077162665e-06,
|
|
"loss": 2.383328437805176,
|
|
"step": 814
|
|
},
|
|
{
|
|
"epoch": 0.6437598736176935,
|
|
"grad_norm": 9.466726112052974,
|
|
"learning_rate": 9.607437727675077e-06,
|
|
"loss": 2.781550884246826,
|
|
"step": 815
|
|
},
|
|
{
|
|
"epoch": 0.6445497630331753,
|
|
"grad_norm": 12.278841985932612,
|
|
"learning_rate": 9.60565079134494e-06,
|
|
"loss": 2.6321635246276855,
|
|
"step": 816
|
|
},
|
|
{
|
|
"epoch": 0.6453396524486572,
|
|
"grad_norm": 16.454023020237027,
|
|
"learning_rate": 9.60385996414585e-06,
|
|
"loss": 3.094892978668213,
|
|
"step": 817
|
|
},
|
|
{
|
|
"epoch": 0.6461295418641391,
|
|
"grad_norm": 7.492293232454839,
|
|
"learning_rate": 9.6020652475907e-06,
|
|
"loss": 2.353990077972412,
|
|
"step": 818
|
|
},
|
|
{
|
|
"epoch": 0.6469194312796208,
|
|
"grad_norm": 7.938504133286718,
|
|
"learning_rate": 9.600266643195675e-06,
|
|
"loss": 2.719548225402832,
|
|
"step": 819
|
|
},
|
|
{
|
|
"epoch": 0.6477093206951027,
|
|
"grad_norm": 34.78257617710777,
|
|
"learning_rate": 9.598464152480241e-06,
|
|
"loss": 2.486771821975708,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 0.6484992101105845,
|
|
"grad_norm": 7.695149398961293,
|
|
"learning_rate": 9.596657776967149e-06,
|
|
"loss": 2.359746217727661,
|
|
"step": 821
|
|
},
|
|
{
|
|
"epoch": 0.6492890995260664,
|
|
"grad_norm": 10.381251787843585,
|
|
"learning_rate": 9.594847518182428e-06,
|
|
"loss": 2.8774003982543945,
|
|
"step": 822
|
|
},
|
|
{
|
|
"epoch": 0.6500789889415481,
|
|
"grad_norm": 21.800739755225305,
|
|
"learning_rate": 9.593033377655396e-06,
|
|
"loss": 2.1589415073394775,
|
|
"step": 823
|
|
},
|
|
{
|
|
"epoch": 0.65086887835703,
|
|
"grad_norm": 16.66199847491689,
|
|
"learning_rate": 9.59121535691864e-06,
|
|
"loss": 3.30254864692688,
|
|
"step": 824
|
|
},
|
|
{
|
|
"epoch": 0.6516587677725119,
|
|
"grad_norm": 10.566975533918288,
|
|
"learning_rate": 9.589393457508032e-06,
|
|
"loss": 2.679553508758545,
|
|
"step": 825
|
|
},
|
|
{
|
|
"epoch": 0.6524486571879937,
|
|
"grad_norm": 19.121309989507864,
|
|
"learning_rate": 9.587567680962716e-06,
|
|
"loss": 3.172027111053467,
|
|
"step": 826
|
|
},
|
|
{
|
|
"epoch": 0.6532385466034755,
|
|
"grad_norm": 11.16230347524207,
|
|
"learning_rate": 9.58573802882512e-06,
|
|
"loss": 2.9337282180786133,
|
|
"step": 827
|
|
},
|
|
{
|
|
"epoch": 0.6540284360189573,
|
|
"grad_norm": 7.133910740967563,
|
|
"learning_rate": 9.583904502640936e-06,
|
|
"loss": 2.826122283935547,
|
|
"step": 828
|
|
},
|
|
{
|
|
"epoch": 0.6548183254344392,
|
|
"grad_norm": 14.35666338571649,
|
|
"learning_rate": 9.582067103959131e-06,
|
|
"loss": 3.0313868522644043,
|
|
"step": 829
|
|
},
|
|
{
|
|
"epoch": 0.655608214849921,
|
|
"grad_norm": 11.532262508822264,
|
|
"learning_rate": 9.58022583433195e-06,
|
|
"loss": 2.7884521484375,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 0.6563981042654028,
|
|
"grad_norm": 6.874943848075909,
|
|
"learning_rate": 9.5783806953149e-06,
|
|
"loss": 2.96806001663208,
|
|
"step": 831
|
|
},
|
|
{
|
|
"epoch": 0.6571879936808847,
|
|
"grad_norm": 12.146810880091056,
|
|
"learning_rate": 9.576531688466762e-06,
|
|
"loss": 2.976937770843506,
|
|
"step": 832
|
|
},
|
|
{
|
|
"epoch": 0.6579778830963665,
|
|
"grad_norm": 7.921587511162627,
|
|
"learning_rate": 9.574678815349585e-06,
|
|
"loss": 2.6038804054260254,
|
|
"step": 833
|
|
},
|
|
{
|
|
"epoch": 0.6587677725118484,
|
|
"grad_norm": 17.969038197005215,
|
|
"learning_rate": 9.572822077528678e-06,
|
|
"loss": 3.1494526863098145,
|
|
"step": 834
|
|
},
|
|
{
|
|
"epoch": 0.6595576619273301,
|
|
"grad_norm": 8.31536269495529,
|
|
"learning_rate": 9.570961476572624e-06,
|
|
"loss": 2.9516241550445557,
|
|
"step": 835
|
|
},
|
|
{
|
|
"epoch": 0.660347551342812,
|
|
"grad_norm": 6.4993286688590715,
|
|
"learning_rate": 9.56909701405326e-06,
|
|
"loss": 2.8607451915740967,
|
|
"step": 836
|
|
},
|
|
{
|
|
"epoch": 0.6611374407582938,
|
|
"grad_norm": 7.552272245609234,
|
|
"learning_rate": 9.567228691545696e-06,
|
|
"loss": 2.5649495124816895,
|
|
"step": 837
|
|
},
|
|
{
|
|
"epoch": 0.6619273301737757,
|
|
"grad_norm": 14.48881948712848,
|
|
"learning_rate": 9.565356510628291e-06,
|
|
"loss": 2.513335943222046,
|
|
"step": 838
|
|
},
|
|
{
|
|
"epoch": 0.6627172195892576,
|
|
"grad_norm": 9.081903072300518,
|
|
"learning_rate": 9.563480472882673e-06,
|
|
"loss": 2.9949398040771484,
|
|
"step": 839
|
|
},
|
|
{
|
|
"epoch": 0.6635071090047393,
|
|
"grad_norm": 12.074800830284559,
|
|
"learning_rate": 9.561600579893723e-06,
|
|
"loss": 2.6771364212036133,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 0.6642969984202212,
|
|
"grad_norm": 11.321384919112033,
|
|
"learning_rate": 9.559716833249583e-06,
|
|
"loss": 2.8205018043518066,
|
|
"step": 841
|
|
},
|
|
{
|
|
"epoch": 0.665086887835703,
|
|
"grad_norm": 9.775240458791433,
|
|
"learning_rate": 9.557829234541647e-06,
|
|
"loss": 2.9774630069732666,
|
|
"step": 842
|
|
},
|
|
{
|
|
"epoch": 0.6658767772511849,
|
|
"grad_norm": 11.521035463074744,
|
|
"learning_rate": 9.555937785364563e-06,
|
|
"loss": 2.579075574874878,
|
|
"step": 843
|
|
},
|
|
{
|
|
"epoch": 0.6666666666666666,
|
|
"grad_norm": 9.432485493960403,
|
|
"learning_rate": 9.554042487316237e-06,
|
|
"loss": 2.726024627685547,
|
|
"step": 844
|
|
},
|
|
{
|
|
"epoch": 0.6674565560821485,
|
|
"grad_norm": 14.134144139013555,
|
|
"learning_rate": 9.552143341997822e-06,
|
|
"loss": 2.8715529441833496,
|
|
"step": 845
|
|
},
|
|
{
|
|
"epoch": 0.6682464454976303,
|
|
"grad_norm": 8.902377494487911,
|
|
"learning_rate": 9.55024035101372e-06,
|
|
"loss": 2.533745527267456,
|
|
"step": 846
|
|
},
|
|
{
|
|
"epoch": 0.6690363349131122,
|
|
"grad_norm": 12.832395817425043,
|
|
"learning_rate": 9.548333515971587e-06,
|
|
"loss": 2.761075496673584,
|
|
"step": 847
|
|
},
|
|
{
|
|
"epoch": 0.669826224328594,
|
|
"grad_norm": 10.20576917282644,
|
|
"learning_rate": 9.546422838482322e-06,
|
|
"loss": 2.5824503898620605,
|
|
"step": 848
|
|
},
|
|
{
|
|
"epoch": 0.6706161137440758,
|
|
"grad_norm": 6.880173889228289,
|
|
"learning_rate": 9.54450832016007e-06,
|
|
"loss": 2.5947561264038086,
|
|
"step": 849
|
|
},
|
|
{
|
|
"epoch": 0.6714060031595577,
|
|
"grad_norm": 14.836403016663432,
|
|
"learning_rate": 9.542589962622225e-06,
|
|
"loss": 2.1935033798217773,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 0.6721958925750395,
|
|
"grad_norm": 6.943720267173772,
|
|
"learning_rate": 9.540667767489421e-06,
|
|
"loss": 2.6050100326538086,
|
|
"step": 851
|
|
},
|
|
{
|
|
"epoch": 0.6729857819905213,
|
|
"grad_norm": 15.12169878975551,
|
|
"learning_rate": 9.538741736385534e-06,
|
|
"loss": 3.6529133319854736,
|
|
"step": 852
|
|
},
|
|
{
|
|
"epoch": 0.6737756714060031,
|
|
"grad_norm": 5.867087588241686,
|
|
"learning_rate": 9.536811870937684e-06,
|
|
"loss": 2.063253164291382,
|
|
"step": 853
|
|
},
|
|
{
|
|
"epoch": 0.674565560821485,
|
|
"grad_norm": 7.798417844400532,
|
|
"learning_rate": 9.534878172776224e-06,
|
|
"loss": 2.7908072471618652,
|
|
"step": 854
|
|
},
|
|
{
|
|
"epoch": 0.6753554502369669,
|
|
"grad_norm": 11.818473782559336,
|
|
"learning_rate": 9.532940643534751e-06,
|
|
"loss": 2.4319844245910645,
|
|
"step": 855
|
|
},
|
|
{
|
|
"epoch": 0.6761453396524486,
|
|
"grad_norm": 12.80740078253414,
|
|
"learning_rate": 9.530999284850095e-06,
|
|
"loss": 3.1545660495758057,
|
|
"step": 856
|
|
},
|
|
{
|
|
"epoch": 0.6769352290679305,
|
|
"grad_norm": 9.292936008984638,
|
|
"learning_rate": 9.529054098362322e-06,
|
|
"loss": 2.947558641433716,
|
|
"step": 857
|
|
},
|
|
{
|
|
"epoch": 0.6777251184834123,
|
|
"grad_norm": 16.62193321855355,
|
|
"learning_rate": 9.527105085714734e-06,
|
|
"loss": 2.610852003097534,
|
|
"step": 858
|
|
},
|
|
{
|
|
"epoch": 0.6785150078988942,
|
|
"grad_norm": 14.554767788526535,
|
|
"learning_rate": 9.525152248553862e-06,
|
|
"loss": 2.979235887527466,
|
|
"step": 859
|
|
},
|
|
{
|
|
"epoch": 0.6793048973143759,
|
|
"grad_norm": 11.910814876723402,
|
|
"learning_rate": 9.523195588529468e-06,
|
|
"loss": 2.6078577041625977,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 0.6800947867298578,
|
|
"grad_norm": 8.286047769780788,
|
|
"learning_rate": 9.521235107294548e-06,
|
|
"loss": 2.068547010421753,
|
|
"step": 861
|
|
},
|
|
{
|
|
"epoch": 0.6808846761453397,
|
|
"grad_norm": 10.506290416192853,
|
|
"learning_rate": 9.51927080650532e-06,
|
|
"loss": 2.794530153274536,
|
|
"step": 862
|
|
},
|
|
{
|
|
"epoch": 0.6816745655608215,
|
|
"grad_norm": 11.391589488737578,
|
|
"learning_rate": 9.517302687821231e-06,
|
|
"loss": 2.5470008850097656,
|
|
"step": 863
|
|
},
|
|
{
|
|
"epoch": 0.6824644549763034,
|
|
"grad_norm": 7.668217055524585,
|
|
"learning_rate": 9.515330752904956e-06,
|
|
"loss": 2.6968884468078613,
|
|
"step": 864
|
|
},
|
|
{
|
|
"epoch": 0.6832543443917851,
|
|
"grad_norm": 8.551258441901858,
|
|
"learning_rate": 9.513355003422396e-06,
|
|
"loss": 2.8228256702423096,
|
|
"step": 865
|
|
},
|
|
{
|
|
"epoch": 0.684044233807267,
|
|
"grad_norm": 11.671477572882841,
|
|
"learning_rate": 9.511375441042663e-06,
|
|
"loss": 3.5630812644958496,
|
|
"step": 866
|
|
},
|
|
{
|
|
"epoch": 0.6848341232227488,
|
|
"grad_norm": 15.900567407044479,
|
|
"learning_rate": 9.5093920674381e-06,
|
|
"loss": 2.9535064697265625,
|
|
"step": 867
|
|
},
|
|
{
|
|
"epoch": 0.6856240126382307,
|
|
"grad_norm": 8.643390525668298,
|
|
"learning_rate": 9.507404884284273e-06,
|
|
"loss": 2.741084337234497,
|
|
"step": 868
|
|
},
|
|
{
|
|
"epoch": 0.6864139020537124,
|
|
"grad_norm": 10.692778433484234,
|
|
"learning_rate": 9.505413893259956e-06,
|
|
"loss": 3.3046531677246094,
|
|
"step": 869
|
|
},
|
|
{
|
|
"epoch": 0.6872037914691943,
|
|
"grad_norm": 16.535562300261393,
|
|
"learning_rate": 9.503419096047144e-06,
|
|
"loss": 3.2300820350646973,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 0.6879936808846762,
|
|
"grad_norm": 24.170107530956294,
|
|
"learning_rate": 9.501420494331052e-06,
|
|
"loss": 2.438554048538208,
|
|
"step": 871
|
|
},
|
|
{
|
|
"epoch": 0.688783570300158,
|
|
"grad_norm": 8.690027829520277,
|
|
"learning_rate": 9.499418089800102e-06,
|
|
"loss": 2.4033608436584473,
|
|
"step": 872
|
|
},
|
|
{
|
|
"epoch": 0.6895734597156398,
|
|
"grad_norm": 9.282954177762111,
|
|
"learning_rate": 9.497411884145933e-06,
|
|
"loss": 2.7961714267730713,
|
|
"step": 873
|
|
},
|
|
{
|
|
"epoch": 0.6903633491311216,
|
|
"grad_norm": 8.534548336142196,
|
|
"learning_rate": 9.495401879063395e-06,
|
|
"loss": 2.739697217941284,
|
|
"step": 874
|
|
},
|
|
{
|
|
"epoch": 0.6911532385466035,
|
|
"grad_norm": 14.710104471833855,
|
|
"learning_rate": 9.493388076250546e-06,
|
|
"loss": 3.3953442573547363,
|
|
"step": 875
|
|
},
|
|
{
|
|
"epoch": 0.6919431279620853,
|
|
"grad_norm": 10.793976678843643,
|
|
"learning_rate": 9.491370477408655e-06,
|
|
"loss": 3.082679271697998,
|
|
"step": 876
|
|
},
|
|
{
|
|
"epoch": 0.6927330173775671,
|
|
"grad_norm": 11.424816104504234,
|
|
"learning_rate": 9.489349084242192e-06,
|
|
"loss": 2.755612850189209,
|
|
"step": 877
|
|
},
|
|
{
|
|
"epoch": 0.693522906793049,
|
|
"grad_norm": 7.915901709561951,
|
|
"learning_rate": 9.487323898458841e-06,
|
|
"loss": 2.766568660736084,
|
|
"step": 878
|
|
},
|
|
{
|
|
"epoch": 0.6943127962085308,
|
|
"grad_norm": 17.848385230595895,
|
|
"learning_rate": 9.485294921769484e-06,
|
|
"loss": 2.5398964881896973,
|
|
"step": 879
|
|
},
|
|
{
|
|
"epoch": 0.6951026856240127,
|
|
"grad_norm": 29.674496967139927,
|
|
"learning_rate": 9.483262155888207e-06,
|
|
"loss": 3.093564987182617,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 0.6958925750394944,
|
|
"grad_norm": 14.90079966406828,
|
|
"learning_rate": 9.481225602532296e-06,
|
|
"loss": 2.9575257301330566,
|
|
"step": 881
|
|
},
|
|
{
|
|
"epoch": 0.6966824644549763,
|
|
"grad_norm": 26.431365807353334,
|
|
"learning_rate": 9.47918526342224e-06,
|
|
"loss": 3.3621833324432373,
|
|
"step": 882
|
|
},
|
|
{
|
|
"epoch": 0.6974723538704581,
|
|
"grad_norm": 18.17594227056942,
|
|
"learning_rate": 9.477141140281724e-06,
|
|
"loss": 2.7552647590637207,
|
|
"step": 883
|
|
},
|
|
{
|
|
"epoch": 0.69826224328594,
|
|
"grad_norm": 9.17468592273275,
|
|
"learning_rate": 9.475093234837629e-06,
|
|
"loss": 2.6432392597198486,
|
|
"step": 884
|
|
},
|
|
{
|
|
"epoch": 0.6990521327014217,
|
|
"grad_norm": 11.724571460894934,
|
|
"learning_rate": 9.473041548820034e-06,
|
|
"loss": 2.863342523574829,
|
|
"step": 885
|
|
},
|
|
{
|
|
"epoch": 0.6998420221169036,
|
|
"grad_norm": 13.647582206639745,
|
|
"learning_rate": 9.470986083962208e-06,
|
|
"loss": 3.1229562759399414,
|
|
"step": 886
|
|
},
|
|
{
|
|
"epoch": 0.7006319115323855,
|
|
"grad_norm": 10.904081018729473,
|
|
"learning_rate": 9.468926842000614e-06,
|
|
"loss": 2.8623602390289307,
|
|
"step": 887
|
|
},
|
|
{
|
|
"epoch": 0.7014218009478673,
|
|
"grad_norm": 14.77540518637624,
|
|
"learning_rate": 9.46686382467491e-06,
|
|
"loss": 2.9971213340759277,
|
|
"step": 888
|
|
},
|
|
{
|
|
"epoch": 0.7022116903633492,
|
|
"grad_norm": 17.160481522672626,
|
|
"learning_rate": 9.464797033727937e-06,
|
|
"loss": 2.8732876777648926,
|
|
"step": 889
|
|
},
|
|
{
|
|
"epoch": 0.7030015797788309,
|
|
"grad_norm": 7.662847037910413,
|
|
"learning_rate": 9.462726470905727e-06,
|
|
"loss": 2.4884605407714844,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 0.7037914691943128,
|
|
"grad_norm": 15.496194298757855,
|
|
"learning_rate": 9.460652137957497e-06,
|
|
"loss": 2.5895493030548096,
|
|
"step": 891
|
|
},
|
|
{
|
|
"epoch": 0.7045813586097947,
|
|
"grad_norm": 9.210243074506902,
|
|
"learning_rate": 9.458574036635656e-06,
|
|
"loss": 3.070889472961426,
|
|
"step": 892
|
|
},
|
|
{
|
|
"epoch": 0.7053712480252765,
|
|
"grad_norm": 13.22477817543685,
|
|
"learning_rate": 9.456492168695783e-06,
|
|
"loss": 2.6131277084350586,
|
|
"step": 893
|
|
},
|
|
{
|
|
"epoch": 0.7061611374407583,
|
|
"grad_norm": 10.736756209485726,
|
|
"learning_rate": 9.454406535896653e-06,
|
|
"loss": 2.7342894077301025,
|
|
"step": 894
|
|
},
|
|
{
|
|
"epoch": 0.7069510268562401,
|
|
"grad_norm": 15.649909827229424,
|
|
"learning_rate": 9.452317140000213e-06,
|
|
"loss": 2.709885835647583,
|
|
"step": 895
|
|
},
|
|
{
|
|
"epoch": 0.707740916271722,
|
|
"grad_norm": 17.03495887885535,
|
|
"learning_rate": 9.45022398277159e-06,
|
|
"loss": 2.229793071746826,
|
|
"step": 896
|
|
},
|
|
{
|
|
"epoch": 0.7085308056872038,
|
|
"grad_norm": 18.286957303019204,
|
|
"learning_rate": 9.448127065979093e-06,
|
|
"loss": 2.3719115257263184,
|
|
"step": 897
|
|
},
|
|
{
|
|
"epoch": 0.7093206951026856,
|
|
"grad_norm": 9.674060014502675,
|
|
"learning_rate": 9.446026391394203e-06,
|
|
"loss": 3.1232872009277344,
|
|
"step": 898
|
|
},
|
|
{
|
|
"epoch": 0.7101105845181674,
|
|
"grad_norm": 8.294415989977118,
|
|
"learning_rate": 9.443921960791578e-06,
|
|
"loss": 2.2887797355651855,
|
|
"step": 899
|
|
},
|
|
{
|
|
"epoch": 0.7109004739336493,
|
|
"grad_norm": 10.7843258557463,
|
|
"learning_rate": 9.441813775949045e-06,
|
|
"loss": 2.947249174118042,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 0.7116903633491312,
|
|
"grad_norm": 55.81560616750336,
|
|
"learning_rate": 9.439701838647607e-06,
|
|
"loss": 2.6564688682556152,
|
|
"step": 901
|
|
},
|
|
{
|
|
"epoch": 0.7124802527646129,
|
|
"grad_norm": 15.475637484457012,
|
|
"learning_rate": 9.437586150671438e-06,
|
|
"loss": 3.2652010917663574,
|
|
"step": 902
|
|
},
|
|
{
|
|
"epoch": 0.7132701421800948,
|
|
"grad_norm": 12.6346062418189,
|
|
"learning_rate": 9.435466713807875e-06,
|
|
"loss": 3.212409257888794,
|
|
"step": 903
|
|
},
|
|
{
|
|
"epoch": 0.7140600315955766,
|
|
"grad_norm": 12.653119224973862,
|
|
"learning_rate": 9.433343529847426e-06,
|
|
"loss": 2.8347318172454834,
|
|
"step": 904
|
|
},
|
|
{
|
|
"epoch": 0.7148499210110585,
|
|
"grad_norm": 8.174222426024595,
|
|
"learning_rate": 9.431216600583764e-06,
|
|
"loss": 2.7938289642333984,
|
|
"step": 905
|
|
},
|
|
{
|
|
"epoch": 0.7156398104265402,
|
|
"grad_norm": 10.498223049643716,
|
|
"learning_rate": 9.429085927813725e-06,
|
|
"loss": 2.8059895038604736,
|
|
"step": 906
|
|
},
|
|
{
|
|
"epoch": 0.7164296998420221,
|
|
"grad_norm": 11.610537771337127,
|
|
"learning_rate": 9.42695151333731e-06,
|
|
"loss": 2.670276403427124,
|
|
"step": 907
|
|
},
|
|
{
|
|
"epoch": 0.717219589257504,
|
|
"grad_norm": 15.926885320452712,
|
|
"learning_rate": 9.424813358957678e-06,
|
|
"loss": 2.8029661178588867,
|
|
"step": 908
|
|
},
|
|
{
|
|
"epoch": 0.7180094786729858,
|
|
"grad_norm": 12.370001760911942,
|
|
"learning_rate": 9.42267146648115e-06,
|
|
"loss": 2.344736099243164,
|
|
"step": 909
|
|
},
|
|
{
|
|
"epoch": 0.7187993680884676,
|
|
"grad_norm": 15.272394441293677,
|
|
"learning_rate": 9.420525837717205e-06,
|
|
"loss": 1.8855293989181519,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 0.7195892575039494,
|
|
"grad_norm": 10.81989245618176,
|
|
"learning_rate": 9.418376474478474e-06,
|
|
"loss": 2.810041666030884,
|
|
"step": 911
|
|
},
|
|
{
|
|
"epoch": 0.7203791469194313,
|
|
"grad_norm": 17.67649470148584,
|
|
"learning_rate": 9.416223378580747e-06,
|
|
"loss": 2.526409864425659,
|
|
"step": 912
|
|
},
|
|
{
|
|
"epoch": 0.7211690363349131,
|
|
"grad_norm": 10.055598633901095,
|
|
"learning_rate": 9.414066551842969e-06,
|
|
"loss": 2.868654489517212,
|
|
"step": 913
|
|
},
|
|
{
|
|
"epoch": 0.721958925750395,
|
|
"grad_norm": 9.513800374151746,
|
|
"learning_rate": 9.41190599608723e-06,
|
|
"loss": 2.928063154220581,
|
|
"step": 914
|
|
},
|
|
{
|
|
"epoch": 0.7227488151658767,
|
|
"grad_norm": 13.871125687274514,
|
|
"learning_rate": 9.40974171313878e-06,
|
|
"loss": 2.4988300800323486,
|
|
"step": 915
|
|
},
|
|
{
|
|
"epoch": 0.7235387045813586,
|
|
"grad_norm": 11.896211423240262,
|
|
"learning_rate": 9.407573704826008e-06,
|
|
"loss": 1.8240364789962769,
|
|
"step": 916
|
|
},
|
|
{
|
|
"epoch": 0.7243285939968405,
|
|
"grad_norm": 9.077450079878284,
|
|
"learning_rate": 9.405401972980457e-06,
|
|
"loss": 2.8183727264404297,
|
|
"step": 917
|
|
},
|
|
{
|
|
"epoch": 0.7251184834123223,
|
|
"grad_norm": 13.826245036289404,
|
|
"learning_rate": 9.40322651943681e-06,
|
|
"loss": 2.8091042041778564,
|
|
"step": 918
|
|
},
|
|
{
|
|
"epoch": 0.7259083728278041,
|
|
"grad_norm": 7.372371185613573,
|
|
"learning_rate": 9.4010473460329e-06,
|
|
"loss": 2.093374252319336,
|
|
"step": 919
|
|
},
|
|
{
|
|
"epoch": 0.7266982622432859,
|
|
"grad_norm": 8.058209243668498,
|
|
"learning_rate": 9.398864454609702e-06,
|
|
"loss": 2.278440475463867,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 0.7274881516587678,
|
|
"grad_norm": 12.147448734615645,
|
|
"learning_rate": 9.396677847011326e-06,
|
|
"loss": 2.403252601623535,
|
|
"step": 921
|
|
},
|
|
{
|
|
"epoch": 0.7282780410742496,
|
|
"grad_norm": 8.711345300212125,
|
|
"learning_rate": 9.394487525085027e-06,
|
|
"loss": 2.735347270965576,
|
|
"step": 922
|
|
},
|
|
{
|
|
"epoch": 0.7290679304897314,
|
|
"grad_norm": 8.80572208933033,
|
|
"learning_rate": 9.392293490681195e-06,
|
|
"loss": 2.715076446533203,
|
|
"step": 923
|
|
},
|
|
{
|
|
"epoch": 0.7298578199052133,
|
|
"grad_norm": 7.247758343473638,
|
|
"learning_rate": 9.390095745653359e-06,
|
|
"loss": 2.7396597862243652,
|
|
"step": 924
|
|
},
|
|
{
|
|
"epoch": 0.7306477093206951,
|
|
"grad_norm": 7.245885716473276,
|
|
"learning_rate": 9.38789429185818e-06,
|
|
"loss": 2.7173843383789062,
|
|
"step": 925
|
|
},
|
|
{
|
|
"epoch": 0.731437598736177,
|
|
"grad_norm": 19.0606393622214,
|
|
"learning_rate": 9.385689131155456e-06,
|
|
"loss": 3.2145304679870605,
|
|
"step": 926
|
|
},
|
|
{
|
|
"epoch": 0.7322274881516587,
|
|
"grad_norm": 14.229401707613647,
|
|
"learning_rate": 9.383480265408109e-06,
|
|
"loss": 2.976992130279541,
|
|
"step": 927
|
|
},
|
|
{
|
|
"epoch": 0.7330173775671406,
|
|
"grad_norm": 11.037803882230573,
|
|
"learning_rate": 9.3812676964822e-06,
|
|
"loss": 3.361060619354248,
|
|
"step": 928
|
|
},
|
|
{
|
|
"epoch": 0.7338072669826224,
|
|
"grad_norm": 23.852840951499623,
|
|
"learning_rate": 9.379051426246914e-06,
|
|
"loss": 3.223222255706787,
|
|
"step": 929
|
|
},
|
|
{
|
|
"epoch": 0.7345971563981043,
|
|
"grad_norm": 14.223529634226185,
|
|
"learning_rate": 9.376831456574561e-06,
|
|
"loss": 2.8687520027160645,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 0.735387045813586,
|
|
"grad_norm": 7.156756071444025,
|
|
"learning_rate": 9.374607789340584e-06,
|
|
"loss": 2.873199701309204,
|
|
"step": 931
|
|
},
|
|
{
|
|
"epoch": 0.7361769352290679,
|
|
"grad_norm": 7.616209133800571,
|
|
"learning_rate": 9.37238042642354e-06,
|
|
"loss": 3.0081300735473633,
|
|
"step": 932
|
|
},
|
|
{
|
|
"epoch": 0.7369668246445498,
|
|
"grad_norm": 7.149090801992643,
|
|
"learning_rate": 9.370149369705112e-06,
|
|
"loss": 2.981412887573242,
|
|
"step": 933
|
|
},
|
|
{
|
|
"epoch": 0.7377567140600316,
|
|
"grad_norm": 15.58505127158866,
|
|
"learning_rate": 9.367914621070107e-06,
|
|
"loss": 2.8132896423339844,
|
|
"step": 934
|
|
},
|
|
{
|
|
"epoch": 0.7385466034755134,
|
|
"grad_norm": 13.465860481670774,
|
|
"learning_rate": 9.365676182406446e-06,
|
|
"loss": 3.4976980686187744,
|
|
"step": 935
|
|
},
|
|
{
|
|
"epoch": 0.7393364928909952,
|
|
"grad_norm": 11.899120310195212,
|
|
"learning_rate": 9.36343405560517e-06,
|
|
"loss": 2.8405492305755615,
|
|
"step": 936
|
|
},
|
|
{
|
|
"epoch": 0.7401263823064771,
|
|
"grad_norm": 11.739242983649948,
|
|
"learning_rate": 9.361188242560436e-06,
|
|
"loss": 2.7775559425354004,
|
|
"step": 937
|
|
},
|
|
{
|
|
"epoch": 0.740916271721959,
|
|
"grad_norm": 10.611580913268543,
|
|
"learning_rate": 9.358938745169512e-06,
|
|
"loss": 2.7165842056274414,
|
|
"step": 938
|
|
},
|
|
{
|
|
"epoch": 0.7417061611374408,
|
|
"grad_norm": 16.286711299762892,
|
|
"learning_rate": 9.356685565332783e-06,
|
|
"loss": 2.8377950191497803,
|
|
"step": 939
|
|
},
|
|
{
|
|
"epoch": 0.7424960505529226,
|
|
"grad_norm": 10.06467306357769,
|
|
"learning_rate": 9.354428704953743e-06,
|
|
"loss": 2.605860471725464,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 0.7432859399684044,
|
|
"grad_norm": 9.106574090331993,
|
|
"learning_rate": 9.352168165938992e-06,
|
|
"loss": 3.039595127105713,
|
|
"step": 941
|
|
},
|
|
{
|
|
"epoch": 0.7440758293838863,
|
|
"grad_norm": 13.910557878586525,
|
|
"learning_rate": 9.349903950198243e-06,
|
|
"loss": 3.1908493041992188,
|
|
"step": 942
|
|
},
|
|
{
|
|
"epoch": 0.7448657187993681,
|
|
"grad_norm": 15.785181835421662,
|
|
"learning_rate": 9.347636059644313e-06,
|
|
"loss": 3.6178295612335205,
|
|
"step": 943
|
|
},
|
|
{
|
|
"epoch": 0.7456556082148499,
|
|
"grad_norm": 13.627805304388191,
|
|
"learning_rate": 9.345364496193124e-06,
|
|
"loss": 2.30802059173584,
|
|
"step": 944
|
|
},
|
|
{
|
|
"epoch": 0.7464454976303317,
|
|
"grad_norm": 13.113283530630058,
|
|
"learning_rate": 9.343089261763698e-06,
|
|
"loss": 2.7968385219573975,
|
|
"step": 945
|
|
},
|
|
{
|
|
"epoch": 0.7472353870458136,
|
|
"grad_norm": 54.4044476972888,
|
|
"learning_rate": 9.340810358278163e-06,
|
|
"loss": 2.887650728225708,
|
|
"step": 946
|
|
},
|
|
{
|
|
"epoch": 0.7480252764612955,
|
|
"grad_norm": 12.476106430670102,
|
|
"learning_rate": 9.338527787661743e-06,
|
|
"loss": 2.8030970096588135,
|
|
"step": 947
|
|
},
|
|
{
|
|
"epoch": 0.7488151658767772,
|
|
"grad_norm": 11.33742883881337,
|
|
"learning_rate": 9.336241551842759e-06,
|
|
"loss": 2.91349196434021,
|
|
"step": 948
|
|
},
|
|
{
|
|
"epoch": 0.7496050552922591,
|
|
"grad_norm": 10.310783405801457,
|
|
"learning_rate": 9.333951652752636e-06,
|
|
"loss": 2.592141628265381,
|
|
"step": 949
|
|
},
|
|
{
|
|
"epoch": 0.7503949447077409,
|
|
"grad_norm": 19.674293659067043,
|
|
"learning_rate": 9.331658092325884e-06,
|
|
"loss": 3.334771156311035,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 0.7511848341232228,
|
|
"grad_norm": 9.403755720421595,
|
|
"learning_rate": 9.32936087250011e-06,
|
|
"loss": 2.669703960418701,
|
|
"step": 951
|
|
},
|
|
{
|
|
"epoch": 0.7519747235387045,
|
|
"grad_norm": 7.772849025991002,
|
|
"learning_rate": 9.327059995216017e-06,
|
|
"loss": 2.772550344467163,
|
|
"step": 952
|
|
},
|
|
{
|
|
"epoch": 0.7527646129541864,
|
|
"grad_norm": 12.063324109253381,
|
|
"learning_rate": 9.32475546241739e-06,
|
|
"loss": 3.1131880283355713,
|
|
"step": 953
|
|
},
|
|
{
|
|
"epoch": 0.7535545023696683,
|
|
"grad_norm": 8.755986042063324,
|
|
"learning_rate": 9.322447276051106e-06,
|
|
"loss": 2.6408510208129883,
|
|
"step": 954
|
|
},
|
|
{
|
|
"epoch": 0.7543443917851501,
|
|
"grad_norm": 16.519428236399992,
|
|
"learning_rate": 9.32013543806713e-06,
|
|
"loss": 2.889667510986328,
|
|
"step": 955
|
|
},
|
|
{
|
|
"epoch": 0.7551342812006319,
|
|
"grad_norm": 10.18189972859802,
|
|
"learning_rate": 9.31781995041851e-06,
|
|
"loss": 1.998913049697876,
|
|
"step": 956
|
|
},
|
|
{
|
|
"epoch": 0.7559241706161137,
|
|
"grad_norm": 12.945059813106072,
|
|
"learning_rate": 9.315500815061378e-06,
|
|
"loss": 2.572543144226074,
|
|
"step": 957
|
|
},
|
|
{
|
|
"epoch": 0.7567140600315956,
|
|
"grad_norm": 7.760044557288449,
|
|
"learning_rate": 9.313178033954946e-06,
|
|
"loss": 2.8043367862701416,
|
|
"step": 958
|
|
},
|
|
{
|
|
"epoch": 0.7575039494470774,
|
|
"grad_norm": 12.169397261499922,
|
|
"learning_rate": 9.310851609061507e-06,
|
|
"loss": 2.6561851501464844,
|
|
"step": 959
|
|
},
|
|
{
|
|
"epoch": 0.7582938388625592,
|
|
"grad_norm": 8.032984175481454,
|
|
"learning_rate": 9.308521542346434e-06,
|
|
"loss": 3.0927743911743164,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 0.759083728278041,
|
|
"grad_norm": 11.370911942807972,
|
|
"learning_rate": 9.306187835778173e-06,
|
|
"loss": 3.112912178039551,
|
|
"step": 961
|
|
},
|
|
{
|
|
"epoch": 0.7598736176935229,
|
|
"grad_norm": 30.68759001827329,
|
|
"learning_rate": 9.30385049132825e-06,
|
|
"loss": 2.32753324508667,
|
|
"step": 962
|
|
},
|
|
{
|
|
"epoch": 0.7606635071090048,
|
|
"grad_norm": 19.20736554439743,
|
|
"learning_rate": 9.301509510971259e-06,
|
|
"loss": 2.4722962379455566,
|
|
"step": 963
|
|
},
|
|
{
|
|
"epoch": 0.7614533965244866,
|
|
"grad_norm": 7.896910852894032,
|
|
"learning_rate": 9.299164896684867e-06,
|
|
"loss": 2.8172154426574707,
|
|
"step": 964
|
|
},
|
|
{
|
|
"epoch": 0.7622432859399684,
|
|
"grad_norm": 8.285847211519757,
|
|
"learning_rate": 9.296816650449813e-06,
|
|
"loss": 3.217062473297119,
|
|
"step": 965
|
|
},
|
|
{
|
|
"epoch": 0.7630331753554502,
|
|
"grad_norm": 14.348208287781882,
|
|
"learning_rate": 9.294464774249905e-06,
|
|
"loss": 3.099119186401367,
|
|
"step": 966
|
|
},
|
|
{
|
|
"epoch": 0.7638230647709321,
|
|
"grad_norm": 17.781084869379775,
|
|
"learning_rate": 9.292109270072013e-06,
|
|
"loss": 3.2744314670562744,
|
|
"step": 967
|
|
},
|
|
{
|
|
"epoch": 0.764612954186414,
|
|
"grad_norm": 8.411579984707192,
|
|
"learning_rate": 9.289750139906075e-06,
|
|
"loss": 3.0986344814300537,
|
|
"step": 968
|
|
},
|
|
{
|
|
"epoch": 0.7654028436018957,
|
|
"grad_norm": 12.640585889095618,
|
|
"learning_rate": 9.287387385745094e-06,
|
|
"loss": 2.9320476055145264,
|
|
"step": 969
|
|
},
|
|
{
|
|
"epoch": 0.7661927330173776,
|
|
"grad_norm": 18.268696929650034,
|
|
"learning_rate": 9.28502100958513e-06,
|
|
"loss": 1.89057457447052,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 0.7669826224328594,
|
|
"grad_norm": 8.318894507210741,
|
|
"learning_rate": 9.282651013425309e-06,
|
|
"loss": 2.681485891342163,
|
|
"step": 971
|
|
},
|
|
{
|
|
"epoch": 0.7677725118483413,
|
|
"grad_norm": 12.92970042225882,
|
|
"learning_rate": 9.280277399267808e-06,
|
|
"loss": 3.2145707607269287,
|
|
"step": 972
|
|
},
|
|
{
|
|
"epoch": 0.768562401263823,
|
|
"grad_norm": 8.005504028109495,
|
|
"learning_rate": 9.277900169117864e-06,
|
|
"loss": 2.4123406410217285,
|
|
"step": 973
|
|
},
|
|
{
|
|
"epoch": 0.7693522906793049,
|
|
"grad_norm": 12.062434756711202,
|
|
"learning_rate": 9.27551932498377e-06,
|
|
"loss": 2.878951072692871,
|
|
"step": 974
|
|
},
|
|
{
|
|
"epoch": 0.7701421800947867,
|
|
"grad_norm": 9.094718919698682,
|
|
"learning_rate": 9.273134868876872e-06,
|
|
"loss": 2.949256420135498,
|
|
"step": 975
|
|
},
|
|
{
|
|
"epoch": 0.7709320695102686,
|
|
"grad_norm": 21.141805602331043,
|
|
"learning_rate": 9.270746802811566e-06,
|
|
"loss": 2.42653226852417,
|
|
"step": 976
|
|
},
|
|
{
|
|
"epoch": 0.7717219589257504,
|
|
"grad_norm": 14.843894359981512,
|
|
"learning_rate": 9.268355128805298e-06,
|
|
"loss": 2.678997039794922,
|
|
"step": 977
|
|
},
|
|
{
|
|
"epoch": 0.7725118483412322,
|
|
"grad_norm": 12.746073187269445,
|
|
"learning_rate": 9.265959848878558e-06,
|
|
"loss": 2.422623634338379,
|
|
"step": 978
|
|
},
|
|
{
|
|
"epoch": 0.7733017377567141,
|
|
"grad_norm": 11.40428902421513,
|
|
"learning_rate": 9.263560965054894e-06,
|
|
"loss": 2.154381036758423,
|
|
"step": 979
|
|
},
|
|
{
|
|
"epoch": 0.7740916271721959,
|
|
"grad_norm": 20.956261055506054,
|
|
"learning_rate": 9.261158479360884e-06,
|
|
"loss": 2.777848720550537,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 0.7748815165876777,
|
|
"grad_norm": 9.70811631729052,
|
|
"learning_rate": 9.25875239382616e-06,
|
|
"loss": 2.6050872802734375,
|
|
"step": 981
|
|
},
|
|
{
|
|
"epoch": 0.7756714060031595,
|
|
"grad_norm": 10.241699921518476,
|
|
"learning_rate": 9.25634271048339e-06,
|
|
"loss": 2.080990791320801,
|
|
"step": 982
|
|
},
|
|
{
|
|
"epoch": 0.7764612954186414,
|
|
"grad_norm": 11.768131588715727,
|
|
"learning_rate": 9.253929431368282e-06,
|
|
"loss": 3.0087733268737793,
|
|
"step": 983
|
|
},
|
|
{
|
|
"epoch": 0.7772511848341233,
|
|
"grad_norm": 12.464298821154987,
|
|
"learning_rate": 9.251512558519582e-06,
|
|
"loss": 2.852950096130371,
|
|
"step": 984
|
|
},
|
|
{
|
|
"epoch": 0.778041074249605,
|
|
"grad_norm": 11.838439498108045,
|
|
"learning_rate": 9.24909209397907e-06,
|
|
"loss": 3.7374510765075684,
|
|
"step": 985
|
|
},
|
|
{
|
|
"epoch": 0.7788309636650869,
|
|
"grad_norm": 15.277389692233058,
|
|
"learning_rate": 9.246668039791568e-06,
|
|
"loss": 2.7718963623046875,
|
|
"step": 986
|
|
},
|
|
{
|
|
"epoch": 0.7796208530805687,
|
|
"grad_norm": 6.6742475596758535,
|
|
"learning_rate": 9.244240398004922e-06,
|
|
"loss": 2.6798787117004395,
|
|
"step": 987
|
|
},
|
|
{
|
|
"epoch": 0.7804107424960506,
|
|
"grad_norm": 10.32795974684294,
|
|
"learning_rate": 9.241809170670008e-06,
|
|
"loss": 2.7594618797302246,
|
|
"step": 988
|
|
},
|
|
{
|
|
"epoch": 0.7812006319115324,
|
|
"grad_norm": 9.018400283983219,
|
|
"learning_rate": 9.239374359840742e-06,
|
|
"loss": 3.114830493927002,
|
|
"step": 989
|
|
},
|
|
{
|
|
"epoch": 0.7819905213270142,
|
|
"grad_norm": 16.724516344511926,
|
|
"learning_rate": 9.236935967574054e-06,
|
|
"loss": 3.228322982788086,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 0.782780410742496,
|
|
"grad_norm": 23.82315306020477,
|
|
"learning_rate": 9.234493995929912e-06,
|
|
"loss": 3.004939079284668,
|
|
"step": 991
|
|
},
|
|
{
|
|
"epoch": 0.7835703001579779,
|
|
"grad_norm": 13.99018159956102,
|
|
"learning_rate": 9.232048446971296e-06,
|
|
"loss": 2.6142897605895996,
|
|
"step": 992
|
|
},
|
|
{
|
|
"epoch": 0.7843601895734598,
|
|
"grad_norm": 10.608540414298071,
|
|
"learning_rate": 9.229599322764215e-06,
|
|
"loss": 2.6615846157073975,
|
|
"step": 993
|
|
},
|
|
{
|
|
"epoch": 0.7851500789889415,
|
|
"grad_norm": 23.10494541404644,
|
|
"learning_rate": 9.227146625377699e-06,
|
|
"loss": 2.831402063369751,
|
|
"step": 994
|
|
},
|
|
{
|
|
"epoch": 0.7859399684044234,
|
|
"grad_norm": 9.659599628220574,
|
|
"learning_rate": 9.224690356883793e-06,
|
|
"loss": 3.2252285480499268,
|
|
"step": 995
|
|
},
|
|
{
|
|
"epoch": 0.7867298578199052,
|
|
"grad_norm": 7.285958976275862,
|
|
"learning_rate": 9.222230519357562e-06,
|
|
"loss": 2.635441780090332,
|
|
"step": 996
|
|
},
|
|
{
|
|
"epoch": 0.7875197472353871,
|
|
"grad_norm": 7.160771261905654,
|
|
"learning_rate": 9.219767114877086e-06,
|
|
"loss": 2.6156837940216064,
|
|
"step": 997
|
|
},
|
|
{
|
|
"epoch": 0.7883096366508688,
|
|
"grad_norm": 14.57854463656351,
|
|
"learning_rate": 9.217300145523453e-06,
|
|
"loss": 2.979773998260498,
|
|
"step": 998
|
|
},
|
|
{
|
|
"epoch": 0.7890995260663507,
|
|
"grad_norm": 10.862479398287723,
|
|
"learning_rate": 9.214829613380772e-06,
|
|
"loss": 2.811668872833252,
|
|
"step": 999
|
|
},
|
|
{
|
|
"epoch": 0.7898894154818326,
|
|
"grad_norm": 14.273686758443457,
|
|
"learning_rate": 9.212355520536153e-06,
|
|
"loss": 3.670020818710327,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.7906793048973144,
|
|
"grad_norm": 7.748159510449536,
|
|
"learning_rate": 9.209877869079719e-06,
|
|
"loss": 2.974087953567505,
|
|
"step": 1001
|
|
},
|
|
{
|
|
"epoch": 0.7914691943127962,
|
|
"grad_norm": 7.596423622962509,
|
|
"learning_rate": 9.207396661104599e-06,
|
|
"loss": 2.0368399620056152,
|
|
"step": 1002
|
|
},
|
|
{
|
|
"epoch": 0.792259083728278,
|
|
"grad_norm": 15.086522167851689,
|
|
"learning_rate": 9.204911898706925e-06,
|
|
"loss": 3.096889019012451,
|
|
"step": 1003
|
|
},
|
|
{
|
|
"epoch": 0.7930489731437599,
|
|
"grad_norm": 11.981971765032,
|
|
"learning_rate": 9.202423583985832e-06,
|
|
"loss": 2.6723742485046387,
|
|
"step": 1004
|
|
},
|
|
{
|
|
"epoch": 0.7938388625592417,
|
|
"grad_norm": 8.646415889552525,
|
|
"learning_rate": 9.199931719043456e-06,
|
|
"loss": 2.6578660011291504,
|
|
"step": 1005
|
|
},
|
|
{
|
|
"epoch": 0.7946287519747235,
|
|
"grad_norm": 14.68042449542436,
|
|
"learning_rate": 9.197436305984933e-06,
|
|
"loss": 2.629140853881836,
|
|
"step": 1006
|
|
},
|
|
{
|
|
"epoch": 0.7954186413902053,
|
|
"grad_norm": 9.552656598953957,
|
|
"learning_rate": 9.194937346918398e-06,
|
|
"loss": 2.732150077819824,
|
|
"step": 1007
|
|
},
|
|
{
|
|
"epoch": 0.7962085308056872,
|
|
"grad_norm": 8.954639954969533,
|
|
"learning_rate": 9.192434843954977e-06,
|
|
"loss": 2.240567445755005,
|
|
"step": 1008
|
|
},
|
|
{
|
|
"epoch": 0.7969984202211691,
|
|
"grad_norm": 7.418598530709051,
|
|
"learning_rate": 9.189928799208794e-06,
|
|
"loss": 2.6821372509002686,
|
|
"step": 1009
|
|
},
|
|
{
|
|
"epoch": 0.7977883096366508,
|
|
"grad_norm": 6.460842787120627,
|
|
"learning_rate": 9.187419214796967e-06,
|
|
"loss": 2.697354793548584,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 0.7985781990521327,
|
|
"grad_norm": 6.9324406657677695,
|
|
"learning_rate": 9.184906092839596e-06,
|
|
"loss": 1.7275753021240234,
|
|
"step": 1011
|
|
},
|
|
{
|
|
"epoch": 0.7993680884676145,
|
|
"grad_norm": 15.212694250890982,
|
|
"learning_rate": 9.182389435459776e-06,
|
|
"loss": 2.717050075531006,
|
|
"step": 1012
|
|
},
|
|
{
|
|
"epoch": 0.8001579778830964,
|
|
"grad_norm": 8.870533933713203,
|
|
"learning_rate": 9.17986924478359e-06,
|
|
"loss": 2.5672261714935303,
|
|
"step": 1013
|
|
},
|
|
{
|
|
"epoch": 0.8009478672985783,
|
|
"grad_norm": 12.582683742658494,
|
|
"learning_rate": 9.177345522940102e-06,
|
|
"loss": 2.8706305027008057,
|
|
"step": 1014
|
|
},
|
|
{
|
|
"epoch": 0.80173775671406,
|
|
"grad_norm": 9.481753489579146,
|
|
"learning_rate": 9.174818272061358e-06,
|
|
"loss": 2.880066156387329,
|
|
"step": 1015
|
|
},
|
|
{
|
|
"epoch": 0.8025276461295419,
|
|
"grad_norm": 7.842796420054131,
|
|
"learning_rate": 9.172287494282393e-06,
|
|
"loss": 1.724432349205017,
|
|
"step": 1016
|
|
},
|
|
{
|
|
"epoch": 0.8033175355450237,
|
|
"grad_norm": 13.467226411075792,
|
|
"learning_rate": 9.169753191741211e-06,
|
|
"loss": 3.1031036376953125,
|
|
"step": 1017
|
|
},
|
|
{
|
|
"epoch": 0.8041074249605056,
|
|
"grad_norm": 9.642248663487798,
|
|
"learning_rate": 9.167215366578804e-06,
|
|
"loss": 2.266718626022339,
|
|
"step": 1018
|
|
},
|
|
{
|
|
"epoch": 0.8048973143759873,
|
|
"grad_norm": 9.565467328925202,
|
|
"learning_rate": 9.16467402093913e-06,
|
|
"loss": 2.8157501220703125,
|
|
"step": 1019
|
|
},
|
|
{
|
|
"epoch": 0.8056872037914692,
|
|
"grad_norm": 15.209367854706489,
|
|
"learning_rate": 9.162129156969131e-06,
|
|
"loss": 2.440033197402954,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 0.806477093206951,
|
|
"grad_norm": 9.68569819521668,
|
|
"learning_rate": 9.159580776818715e-06,
|
|
"loss": 2.4779491424560547,
|
|
"step": 1021
|
|
},
|
|
{
|
|
"epoch": 0.8072669826224329,
|
|
"grad_norm": 12.736707657945674,
|
|
"learning_rate": 9.15702888264076e-06,
|
|
"loss": 2.2091784477233887,
|
|
"step": 1022
|
|
},
|
|
{
|
|
"epoch": 0.8080568720379147,
|
|
"grad_norm": 13.514115226011919,
|
|
"learning_rate": 9.154473476591114e-06,
|
|
"loss": 3.1801180839538574,
|
|
"step": 1023
|
|
},
|
|
{
|
|
"epoch": 0.8088467614533965,
|
|
"grad_norm": 20.122470837459804,
|
|
"learning_rate": 9.151914560828598e-06,
|
|
"loss": 3.0873842239379883,
|
|
"step": 1024
|
|
},
|
|
{
|
|
"epoch": 0.8096366508688784,
|
|
"grad_norm": 13.480557474542609,
|
|
"learning_rate": 9.149352137514987e-06,
|
|
"loss": 2.603421449661255,
|
|
"step": 1025
|
|
},
|
|
{
|
|
"epoch": 0.8104265402843602,
|
|
"grad_norm": 21.95371373023252,
|
|
"learning_rate": 9.146786208815026e-06,
|
|
"loss": 2.6986594200134277,
|
|
"step": 1026
|
|
},
|
|
{
|
|
"epoch": 0.811216429699842,
|
|
"grad_norm": 38.65128008605009,
|
|
"learning_rate": 9.144216776896422e-06,
|
|
"loss": 2.905870199203491,
|
|
"step": 1027
|
|
},
|
|
{
|
|
"epoch": 0.8120063191153238,
|
|
"grad_norm": 10.540992057323065,
|
|
"learning_rate": 9.141643843929837e-06,
|
|
"loss": 2.4717659950256348,
|
|
"step": 1028
|
|
},
|
|
{
|
|
"epoch": 0.8127962085308057,
|
|
"grad_norm": 15.24208436151514,
|
|
"learning_rate": 9.139067412088895e-06,
|
|
"loss": 2.477531909942627,
|
|
"step": 1029
|
|
},
|
|
{
|
|
"epoch": 0.8135860979462876,
|
|
"grad_norm": 9.30424965654663,
|
|
"learning_rate": 9.136487483550172e-06,
|
|
"loss": 3.016301393508911,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 0.8143759873617693,
|
|
"grad_norm": 22.185168693739104,
|
|
"learning_rate": 9.1339040604932e-06,
|
|
"loss": 2.662216901779175,
|
|
"step": 1031
|
|
},
|
|
{
|
|
"epoch": 0.8151658767772512,
|
|
"grad_norm": 29.979197475408252,
|
|
"learning_rate": 9.131317145100469e-06,
|
|
"loss": 2.694211959838867,
|
|
"step": 1032
|
|
},
|
|
{
|
|
"epoch": 0.815955766192733,
|
|
"grad_norm": 11.06100764534907,
|
|
"learning_rate": 9.128726739557408e-06,
|
|
"loss": 2.5702898502349854,
|
|
"step": 1033
|
|
},
|
|
{
|
|
"epoch": 0.8167456556082149,
|
|
"grad_norm": 7.880841439580581,
|
|
"learning_rate": 9.126132846052401e-06,
|
|
"loss": 2.8700671195983887,
|
|
"step": 1034
|
|
},
|
|
{
|
|
"epoch": 0.8175355450236966,
|
|
"grad_norm": 27.058763319079304,
|
|
"learning_rate": 9.123535466776778e-06,
|
|
"loss": 3.244725227355957,
|
|
"step": 1035
|
|
},
|
|
{
|
|
"epoch": 0.8183254344391785,
|
|
"grad_norm": 16.382446856894965,
|
|
"learning_rate": 9.120934603924816e-06,
|
|
"loss": 2.4301857948303223,
|
|
"step": 1036
|
|
},
|
|
{
|
|
"epoch": 0.8191153238546603,
|
|
"grad_norm": 16.26715685664894,
|
|
"learning_rate": 9.118330259693728e-06,
|
|
"loss": 4.2284698486328125,
|
|
"step": 1037
|
|
},
|
|
{
|
|
"epoch": 0.8199052132701422,
|
|
"grad_norm": 21.996363546185417,
|
|
"learning_rate": 9.115722436283676e-06,
|
|
"loss": 3.5227627754211426,
|
|
"step": 1038
|
|
},
|
|
{
|
|
"epoch": 0.8206951026856241,
|
|
"grad_norm": 20.19577186521635,
|
|
"learning_rate": 9.113111135897757e-06,
|
|
"loss": 3.057605743408203,
|
|
"step": 1039
|
|
},
|
|
{
|
|
"epoch": 0.8214849921011058,
|
|
"grad_norm": 13.600963392770467,
|
|
"learning_rate": 9.110496360742006e-06,
|
|
"loss": 3.0911357402801514,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 0.8222748815165877,
|
|
"grad_norm": 12.336475393471844,
|
|
"learning_rate": 9.107878113025393e-06,
|
|
"loss": 3.0051560401916504,
|
|
"step": 1041
|
|
},
|
|
{
|
|
"epoch": 0.8230647709320695,
|
|
"grad_norm": 10.984835326798942,
|
|
"learning_rate": 9.105256394959822e-06,
|
|
"loss": 2.4400599002838135,
|
|
"step": 1042
|
|
},
|
|
{
|
|
"epoch": 0.8238546603475514,
|
|
"grad_norm": 20.947414774581414,
|
|
"learning_rate": 9.102631208760131e-06,
|
|
"loss": 3.4805350303649902,
|
|
"step": 1043
|
|
},
|
|
{
|
|
"epoch": 0.8246445497630331,
|
|
"grad_norm": 14.360309838396736,
|
|
"learning_rate": 9.100002556644086e-06,
|
|
"loss": 2.588095188140869,
|
|
"step": 1044
|
|
},
|
|
{
|
|
"epoch": 0.825434439178515,
|
|
"grad_norm": 10.20829475484016,
|
|
"learning_rate": 9.097370440832378e-06,
|
|
"loss": 2.78764271736145,
|
|
"step": 1045
|
|
},
|
|
{
|
|
"epoch": 0.8262243285939969,
|
|
"grad_norm": 10.885822678518133,
|
|
"learning_rate": 9.094734863548629e-06,
|
|
"loss": 2.8764867782592773,
|
|
"step": 1046
|
|
},
|
|
{
|
|
"epoch": 0.8270142180094787,
|
|
"grad_norm": 8.714569458458401,
|
|
"learning_rate": 9.092095827019385e-06,
|
|
"loss": 2.7947893142700195,
|
|
"step": 1047
|
|
},
|
|
{
|
|
"epoch": 0.8278041074249605,
|
|
"grad_norm": 16.531291726752407,
|
|
"learning_rate": 9.08945333347411e-06,
|
|
"loss": 3.4402239322662354,
|
|
"step": 1048
|
|
},
|
|
{
|
|
"epoch": 0.8285939968404423,
|
|
"grad_norm": 11.649383119473496,
|
|
"learning_rate": 9.086807385145193e-06,
|
|
"loss": 2.5275776386260986,
|
|
"step": 1049
|
|
},
|
|
{
|
|
"epoch": 0.8293838862559242,
|
|
"grad_norm": 37.04934005274269,
|
|
"learning_rate": 9.084157984267939e-06,
|
|
"loss": 3.1136865615844727,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 0.830173775671406,
|
|
"grad_norm": 27.375499924589366,
|
|
"learning_rate": 9.08150513308057e-06,
|
|
"loss": 2.692000389099121,
|
|
"step": 1051
|
|
},
|
|
{
|
|
"epoch": 0.8309636650868878,
|
|
"grad_norm": 15.165913666522817,
|
|
"learning_rate": 9.078848833824226e-06,
|
|
"loss": 3.2129108905792236,
|
|
"step": 1052
|
|
},
|
|
{
|
|
"epoch": 0.8317535545023697,
|
|
"grad_norm": 16.540229701542952,
|
|
"learning_rate": 9.076189088742955e-06,
|
|
"loss": 3.135190963745117,
|
|
"step": 1053
|
|
},
|
|
{
|
|
"epoch": 0.8325434439178515,
|
|
"grad_norm": 11.775957201988549,
|
|
"learning_rate": 9.073525900083717e-06,
|
|
"loss": 2.9450531005859375,
|
|
"step": 1054
|
|
},
|
|
{
|
|
"epoch": 0.8333333333333334,
|
|
"grad_norm": 28.18681618707373,
|
|
"learning_rate": 9.070859270096385e-06,
|
|
"loss": 3.7651073932647705,
|
|
"step": 1055
|
|
},
|
|
{
|
|
"epoch": 0.8341232227488151,
|
|
"grad_norm": 11.37393230420539,
|
|
"learning_rate": 9.06818920103374e-06,
|
|
"loss": 2.674816846847534,
|
|
"step": 1056
|
|
},
|
|
{
|
|
"epoch": 0.834913112164297,
|
|
"grad_norm": 5.773096989056217,
|
|
"learning_rate": 9.065515695151459e-06,
|
|
"loss": 2.8101024627685547,
|
|
"step": 1057
|
|
},
|
|
{
|
|
"epoch": 0.8357030015797788,
|
|
"grad_norm": 9.404139830590696,
|
|
"learning_rate": 9.06283875470813e-06,
|
|
"loss": 2.883345603942871,
|
|
"step": 1058
|
|
},
|
|
{
|
|
"epoch": 0.8364928909952607,
|
|
"grad_norm": 10.053270788353137,
|
|
"learning_rate": 9.060158381965242e-06,
|
|
"loss": 3.0177576541900635,
|
|
"step": 1059
|
|
},
|
|
{
|
|
"epoch": 0.8372827804107424,
|
|
"grad_norm": 9.56854149440088,
|
|
"learning_rate": 9.057474579187184e-06,
|
|
"loss": 2.5740466117858887,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 0.8380726698262243,
|
|
"grad_norm": 25.48696327806149,
|
|
"learning_rate": 9.05478734864124e-06,
|
|
"loss": 2.5997400283813477,
|
|
"step": 1061
|
|
},
|
|
{
|
|
"epoch": 0.8388625592417062,
|
|
"grad_norm": 8.822888785903398,
|
|
"learning_rate": 9.052096692597594e-06,
|
|
"loss": 2.480900287628174,
|
|
"step": 1062
|
|
},
|
|
{
|
|
"epoch": 0.839652448657188,
|
|
"grad_norm": 12.233243827649204,
|
|
"learning_rate": 9.049402613329316e-06,
|
|
"loss": 2.372171401977539,
|
|
"step": 1063
|
|
},
|
|
{
|
|
"epoch": 0.8404423380726699,
|
|
"grad_norm": 10.92795851598965,
|
|
"learning_rate": 9.046705113112375e-06,
|
|
"loss": 3.3010194301605225,
|
|
"step": 1064
|
|
},
|
|
{
|
|
"epoch": 0.8412322274881516,
|
|
"grad_norm": 14.04201976986867,
|
|
"learning_rate": 9.04400419422563e-06,
|
|
"loss": 1.9576343297958374,
|
|
"step": 1065
|
|
},
|
|
{
|
|
"epoch": 0.8420221169036335,
|
|
"grad_norm": 11.652105566428002,
|
|
"learning_rate": 9.041299858950824e-06,
|
|
"loss": 2.61598539352417,
|
|
"step": 1066
|
|
},
|
|
{
|
|
"epoch": 0.8428120063191153,
|
|
"grad_norm": 8.98594247433151,
|
|
"learning_rate": 9.03859210957259e-06,
|
|
"loss": 2.4816157817840576,
|
|
"step": 1067
|
|
},
|
|
{
|
|
"epoch": 0.8436018957345972,
|
|
"grad_norm": 15.298066202780824,
|
|
"learning_rate": 9.035880948378443e-06,
|
|
"loss": 2.8336338996887207,
|
|
"step": 1068
|
|
},
|
|
{
|
|
"epoch": 0.844391785150079,
|
|
"grad_norm": 10.357852449251151,
|
|
"learning_rate": 9.03316637765878e-06,
|
|
"loss": 2.5921220779418945,
|
|
"step": 1069
|
|
},
|
|
{
|
|
"epoch": 0.8451816745655608,
|
|
"grad_norm": 8.480809498263813,
|
|
"learning_rate": 9.030448399706881e-06,
|
|
"loss": 1.7483251094818115,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 0.8459715639810427,
|
|
"grad_norm": 27.723578315488755,
|
|
"learning_rate": 9.0277270168189e-06,
|
|
"loss": 2.880528688430786,
|
|
"step": 1071
|
|
},
|
|
{
|
|
"epoch": 0.8467614533965245,
|
|
"grad_norm": 6.917974588538861,
|
|
"learning_rate": 9.025002231293874e-06,
|
|
"loss": 2.048827648162842,
|
|
"step": 1072
|
|
},
|
|
{
|
|
"epoch": 0.8475513428120063,
|
|
"grad_norm": 12.11627041441912,
|
|
"learning_rate": 9.022274045433706e-06,
|
|
"loss": 2.47039794921875,
|
|
"step": 1073
|
|
},
|
|
{
|
|
"epoch": 0.8483412322274881,
|
|
"grad_norm": 44.924170557742606,
|
|
"learning_rate": 9.019542461543181e-06,
|
|
"loss": 2.97735857963562,
|
|
"step": 1074
|
|
},
|
|
{
|
|
"epoch": 0.84913112164297,
|
|
"grad_norm": 11.486390013948144,
|
|
"learning_rate": 9.016807481929948e-06,
|
|
"loss": 3.1126694679260254,
|
|
"step": 1075
|
|
},
|
|
{
|
|
"epoch": 0.8499210110584519,
|
|
"grad_norm": 6.894815895299225,
|
|
"learning_rate": 9.014069108904526e-06,
|
|
"loss": 2.364047050476074,
|
|
"step": 1076
|
|
},
|
|
{
|
|
"epoch": 0.8507109004739336,
|
|
"grad_norm": 23.140984836947318,
|
|
"learning_rate": 9.011327344780306e-06,
|
|
"loss": 2.1283740997314453,
|
|
"step": 1077
|
|
},
|
|
{
|
|
"epoch": 0.8515007898894155,
|
|
"grad_norm": 10.70156784462954,
|
|
"learning_rate": 9.008582191873531e-06,
|
|
"loss": 2.7940797805786133,
|
|
"step": 1078
|
|
},
|
|
{
|
|
"epoch": 0.8522906793048973,
|
|
"grad_norm": 7.795231096778107,
|
|
"learning_rate": 9.005833652503323e-06,
|
|
"loss": 2.638899803161621,
|
|
"step": 1079
|
|
},
|
|
{
|
|
"epoch": 0.8530805687203792,
|
|
"grad_norm": 8.536488429058528,
|
|
"learning_rate": 9.003081728991654e-06,
|
|
"loss": 2.5842251777648926,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 0.8538704581358609,
|
|
"grad_norm": 12.351330943815283,
|
|
"learning_rate": 9.000326423663356e-06,
|
|
"loss": 2.5075812339782715,
|
|
"step": 1081
|
|
},
|
|
{
|
|
"epoch": 0.8546603475513428,
|
|
"grad_norm": 7.148232698465105,
|
|
"learning_rate": 8.997567738846126e-06,
|
|
"loss": 2.6017603874206543,
|
|
"step": 1082
|
|
},
|
|
{
|
|
"epoch": 0.8554502369668247,
|
|
"grad_norm": 7.110606099151946,
|
|
"learning_rate": 8.994805676870504e-06,
|
|
"loss": 2.1451048851013184,
|
|
"step": 1083
|
|
},
|
|
{
|
|
"epoch": 0.8562401263823065,
|
|
"grad_norm": 7.011285288703447,
|
|
"learning_rate": 8.992040240069892e-06,
|
|
"loss": 2.408576488494873,
|
|
"step": 1084
|
|
},
|
|
{
|
|
"epoch": 0.8570300157977883,
|
|
"grad_norm": 6.646872041848021,
|
|
"learning_rate": 8.98927143078054e-06,
|
|
"loss": 2.678819179534912,
|
|
"step": 1085
|
|
},
|
|
{
|
|
"epoch": 0.8578199052132701,
|
|
"grad_norm": 14.417901398540348,
|
|
"learning_rate": 8.986499251341545e-06,
|
|
"loss": 2.4109766483306885,
|
|
"step": 1086
|
|
},
|
|
{
|
|
"epoch": 0.858609794628752,
|
|
"grad_norm": 18.976709689582428,
|
|
"learning_rate": 8.983723704094856e-06,
|
|
"loss": 2.79660701751709,
|
|
"step": 1087
|
|
},
|
|
{
|
|
"epoch": 0.8593996840442338,
|
|
"grad_norm": 13.97392247737316,
|
|
"learning_rate": 8.980944791385262e-06,
|
|
"loss": 2.5753228664398193,
|
|
"step": 1088
|
|
},
|
|
{
|
|
"epoch": 0.8601895734597157,
|
|
"grad_norm": 10.646221312923302,
|
|
"learning_rate": 8.9781625155604e-06,
|
|
"loss": 3.145460367202759,
|
|
"step": 1089
|
|
},
|
|
{
|
|
"epoch": 0.8609794628751974,
|
|
"grad_norm": 12.125091970169855,
|
|
"learning_rate": 8.975376878970744e-06,
|
|
"loss": 3.016714334487915,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"epoch": 0.8617693522906793,
|
|
"grad_norm": 10.247379269546157,
|
|
"learning_rate": 8.972587883969612e-06,
|
|
"loss": 3.0661256313323975,
|
|
"step": 1091
|
|
},
|
|
{
|
|
"epoch": 0.8625592417061612,
|
|
"grad_norm": 7.510993195440685,
|
|
"learning_rate": 8.969795532913152e-06,
|
|
"loss": 3.1030752658843994,
|
|
"step": 1092
|
|
},
|
|
{
|
|
"epoch": 0.863349131121643,
|
|
"grad_norm": 11.085936006300932,
|
|
"learning_rate": 8.966999828160355e-06,
|
|
"loss": 2.8964810371398926,
|
|
"step": 1093
|
|
},
|
|
{
|
|
"epoch": 0.8641390205371248,
|
|
"grad_norm": 10.635255649754786,
|
|
"learning_rate": 8.96420077207304e-06,
|
|
"loss": 2.5778634548187256,
|
|
"step": 1094
|
|
},
|
|
{
|
|
"epoch": 0.8649289099526066,
|
|
"grad_norm": 11.699323935123369,
|
|
"learning_rate": 8.961398367015857e-06,
|
|
"loss": 2.6623075008392334,
|
|
"step": 1095
|
|
},
|
|
{
|
|
"epoch": 0.8657187993680885,
|
|
"grad_norm": 17.05618719334767,
|
|
"learning_rate": 8.95859261535629e-06,
|
|
"loss": 3.076087474822998,
|
|
"step": 1096
|
|
},
|
|
{
|
|
"epoch": 0.8665086887835703,
|
|
"grad_norm": 9.989617355430017,
|
|
"learning_rate": 8.955783519464644e-06,
|
|
"loss": 2.579759120941162,
|
|
"step": 1097
|
|
},
|
|
{
|
|
"epoch": 0.8672985781990521,
|
|
"grad_norm": 13.983057400258621,
|
|
"learning_rate": 8.952971081714056e-06,
|
|
"loss": 2.9468941688537598,
|
|
"step": 1098
|
|
},
|
|
{
|
|
"epoch": 0.868088467614534,
|
|
"grad_norm": 11.807806126259434,
|
|
"learning_rate": 8.950155304480482e-06,
|
|
"loss": 2.851876974105835,
|
|
"step": 1099
|
|
},
|
|
{
|
|
"epoch": 0.8688783570300158,
|
|
"grad_norm": 9.121474991816212,
|
|
"learning_rate": 8.947336190142696e-06,
|
|
"loss": 2.568575382232666,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 0.8696682464454977,
|
|
"grad_norm": 25.714257060738078,
|
|
"learning_rate": 8.9445137410823e-06,
|
|
"loss": 4.189000129699707,
|
|
"step": 1101
|
|
},
|
|
{
|
|
"epoch": 0.8704581358609794,
|
|
"grad_norm": 9.883995956029263,
|
|
"learning_rate": 8.941687959683707e-06,
|
|
"loss": 2.751539707183838,
|
|
"step": 1102
|
|
},
|
|
{
|
|
"epoch": 0.8712480252764613,
|
|
"grad_norm": 25.188255479459396,
|
|
"learning_rate": 8.938858848334144e-06,
|
|
"loss": 3.325855255126953,
|
|
"step": 1103
|
|
},
|
|
{
|
|
"epoch": 0.8720379146919431,
|
|
"grad_norm": 13.67010382519282,
|
|
"learning_rate": 8.936026409423656e-06,
|
|
"loss": 2.7301278114318848,
|
|
"step": 1104
|
|
},
|
|
{
|
|
"epoch": 0.872827804107425,
|
|
"grad_norm": 11.48889119309666,
|
|
"learning_rate": 8.933190645345096e-06,
|
|
"loss": 2.7599031925201416,
|
|
"step": 1105
|
|
},
|
|
{
|
|
"epoch": 0.8736176935229067,
|
|
"grad_norm": 14.87256495310459,
|
|
"learning_rate": 8.930351558494128e-06,
|
|
"loss": 3.0885415077209473,
|
|
"step": 1106
|
|
},
|
|
{
|
|
"epoch": 0.8744075829383886,
|
|
"grad_norm": 10.449701555194096,
|
|
"learning_rate": 8.92750915126922e-06,
|
|
"loss": 2.876091718673706,
|
|
"step": 1107
|
|
},
|
|
{
|
|
"epoch": 0.8751974723538705,
|
|
"grad_norm": 14.84773136100038,
|
|
"learning_rate": 8.924663426071647e-06,
|
|
"loss": 3.0341625213623047,
|
|
"step": 1108
|
|
},
|
|
{
|
|
"epoch": 0.8759873617693523,
|
|
"grad_norm": 8.15947775366893,
|
|
"learning_rate": 8.921814385305489e-06,
|
|
"loss": 2.277728796005249,
|
|
"step": 1109
|
|
},
|
|
{
|
|
"epoch": 0.8767772511848341,
|
|
"grad_norm": 12.99890714367993,
|
|
"learning_rate": 8.918962031377622e-06,
|
|
"loss": 2.8230514526367188,
|
|
"step": 1110
|
|
},
|
|
{
|
|
"epoch": 0.8775671406003159,
|
|
"grad_norm": 9.545433136406718,
|
|
"learning_rate": 8.916106366697728e-06,
|
|
"loss": 2.9621667861938477,
|
|
"step": 1111
|
|
},
|
|
{
|
|
"epoch": 0.8783570300157978,
|
|
"grad_norm": 10.39494016316492,
|
|
"learning_rate": 8.913247393678278e-06,
|
|
"loss": 2.3225202560424805,
|
|
"step": 1112
|
|
},
|
|
{
|
|
"epoch": 0.8791469194312796,
|
|
"grad_norm": 8.192269609832815,
|
|
"learning_rate": 8.910385114734544e-06,
|
|
"loss": 2.7106945514678955,
|
|
"step": 1113
|
|
},
|
|
{
|
|
"epoch": 0.8799368088467614,
|
|
"grad_norm": 10.432502214764677,
|
|
"learning_rate": 8.907519532284589e-06,
|
|
"loss": 2.4334917068481445,
|
|
"step": 1114
|
|
},
|
|
{
|
|
"epoch": 0.8807266982622433,
|
|
"grad_norm": 12.431139105375072,
|
|
"learning_rate": 8.904650648749264e-06,
|
|
"loss": 2.7972915172576904,
|
|
"step": 1115
|
|
},
|
|
{
|
|
"epoch": 0.8815165876777251,
|
|
"grad_norm": 9.209050505947106,
|
|
"learning_rate": 8.901778466552215e-06,
|
|
"loss": 2.729956865310669,
|
|
"step": 1116
|
|
},
|
|
{
|
|
"epoch": 0.882306477093207,
|
|
"grad_norm": 34.636350949112945,
|
|
"learning_rate": 8.898902988119869e-06,
|
|
"loss": 4.157067775726318,
|
|
"step": 1117
|
|
},
|
|
{
|
|
"epoch": 0.8830963665086888,
|
|
"grad_norm": 10.426826060433754,
|
|
"learning_rate": 8.896024215881439e-06,
|
|
"loss": 2.5612125396728516,
|
|
"step": 1118
|
|
},
|
|
{
|
|
"epoch": 0.8838862559241706,
|
|
"grad_norm": 15.903953548700464,
|
|
"learning_rate": 8.89314215226892e-06,
|
|
"loss": 2.477797031402588,
|
|
"step": 1119
|
|
},
|
|
{
|
|
"epoch": 0.8846761453396524,
|
|
"grad_norm": 14.039357640579942,
|
|
"learning_rate": 8.890256799717092e-06,
|
|
"loss": 2.5441317558288574,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 0.8854660347551343,
|
|
"grad_norm": 12.158639837027732,
|
|
"learning_rate": 8.88736816066351e-06,
|
|
"loss": 2.358978748321533,
|
|
"step": 1121
|
|
},
|
|
{
|
|
"epoch": 0.8862559241706162,
|
|
"grad_norm": 23.052840320992853,
|
|
"learning_rate": 8.884476237548503e-06,
|
|
"loss": 2.7650527954101562,
|
|
"step": 1122
|
|
},
|
|
{
|
|
"epoch": 0.8870458135860979,
|
|
"grad_norm": 8.158346405347508,
|
|
"learning_rate": 8.88158103281518e-06,
|
|
"loss": 2.303537368774414,
|
|
"step": 1123
|
|
},
|
|
{
|
|
"epoch": 0.8878357030015798,
|
|
"grad_norm": 20.354107714614678,
|
|
"learning_rate": 8.87868254890942e-06,
|
|
"loss": 2.8699512481689453,
|
|
"step": 1124
|
|
},
|
|
{
|
|
"epoch": 0.8886255924170616,
|
|
"grad_norm": 12.005895316989173,
|
|
"learning_rate": 8.875780788279868e-06,
|
|
"loss": 2.5772287845611572,
|
|
"step": 1125
|
|
},
|
|
{
|
|
"epoch": 0.8894154818325435,
|
|
"grad_norm": 12.654751625675802,
|
|
"learning_rate": 8.872875753377943e-06,
|
|
"loss": 2.408010244369507,
|
|
"step": 1126
|
|
},
|
|
{
|
|
"epoch": 0.8902053712480252,
|
|
"grad_norm": 11.36505652800493,
|
|
"learning_rate": 8.86996744665783e-06,
|
|
"loss": 2.7588155269622803,
|
|
"step": 1127
|
|
},
|
|
{
|
|
"epoch": 0.8909952606635071,
|
|
"grad_norm": 10.687132877981576,
|
|
"learning_rate": 8.867055870576474e-06,
|
|
"loss": 2.394656181335449,
|
|
"step": 1128
|
|
},
|
|
{
|
|
"epoch": 0.891785150078989,
|
|
"grad_norm": 16.83799896924904,
|
|
"learning_rate": 8.864141027593585e-06,
|
|
"loss": 2.3354270458221436,
|
|
"step": 1129
|
|
},
|
|
{
|
|
"epoch": 0.8925750394944708,
|
|
"grad_norm": 13.157340664207496,
|
|
"learning_rate": 8.86122292017163e-06,
|
|
"loss": 3.017643690109253,
|
|
"step": 1130
|
|
},
|
|
{
|
|
"epoch": 0.8933649289099526,
|
|
"grad_norm": 11.447127195057677,
|
|
"learning_rate": 8.858301550775836e-06,
|
|
"loss": 2.929154396057129,
|
|
"step": 1131
|
|
},
|
|
{
|
|
"epoch": 0.8941548183254344,
|
|
"grad_norm": 9.601561380597447,
|
|
"learning_rate": 8.855376921874188e-06,
|
|
"loss": 2.9352359771728516,
|
|
"step": 1132
|
|
},
|
|
{
|
|
"epoch": 0.8949447077409163,
|
|
"grad_norm": 12.643220814871029,
|
|
"learning_rate": 8.85244903593742e-06,
|
|
"loss": 2.5534372329711914,
|
|
"step": 1133
|
|
},
|
|
{
|
|
"epoch": 0.8957345971563981,
|
|
"grad_norm": 13.712786164898073,
|
|
"learning_rate": 8.849517895439022e-06,
|
|
"loss": 2.969341516494751,
|
|
"step": 1134
|
|
},
|
|
{
|
|
"epoch": 0.8965244865718799,
|
|
"grad_norm": 27.475418365680294,
|
|
"learning_rate": 8.846583502855229e-06,
|
|
"loss": 2.770808696746826,
|
|
"step": 1135
|
|
},
|
|
{
|
|
"epoch": 0.8973143759873617,
|
|
"grad_norm": 19.579432202817973,
|
|
"learning_rate": 8.843645860665024e-06,
|
|
"loss": 2.384641408920288,
|
|
"step": 1136
|
|
},
|
|
{
|
|
"epoch": 0.8981042654028436,
|
|
"grad_norm": 19.78607010018951,
|
|
"learning_rate": 8.84070497135014e-06,
|
|
"loss": 2.5281355381011963,
|
|
"step": 1137
|
|
},
|
|
{
|
|
"epoch": 0.8988941548183255,
|
|
"grad_norm": 14.094462449831422,
|
|
"learning_rate": 8.83776083739505e-06,
|
|
"loss": 2.215435028076172,
|
|
"step": 1138
|
|
},
|
|
{
|
|
"epoch": 0.8996840442338072,
|
|
"grad_norm": 17.933215742543844,
|
|
"learning_rate": 8.834813461286965e-06,
|
|
"loss": 2.291853904724121,
|
|
"step": 1139
|
|
},
|
|
{
|
|
"epoch": 0.9004739336492891,
|
|
"grad_norm": 8.843018958262709,
|
|
"learning_rate": 8.831862845515842e-06,
|
|
"loss": 2.638589382171631,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 0.9012638230647709,
|
|
"grad_norm": 9.967486938214911,
|
|
"learning_rate": 8.828908992574366e-06,
|
|
"loss": 2.7979438304901123,
|
|
"step": 1141
|
|
},
|
|
{
|
|
"epoch": 0.9020537124802528,
|
|
"grad_norm": 13.195252873575834,
|
|
"learning_rate": 8.825951904957967e-06,
|
|
"loss": 3.2847375869750977,
|
|
"step": 1142
|
|
},
|
|
{
|
|
"epoch": 0.9028436018957346,
|
|
"grad_norm": 8.059436353539473,
|
|
"learning_rate": 8.822991585164799e-06,
|
|
"loss": 3.350722312927246,
|
|
"step": 1143
|
|
},
|
|
{
|
|
"epoch": 0.9036334913112164,
|
|
"grad_norm": 8.411890662561238,
|
|
"learning_rate": 8.82002803569575e-06,
|
|
"loss": 2.7170395851135254,
|
|
"step": 1144
|
|
},
|
|
{
|
|
"epoch": 0.9044233807266983,
|
|
"grad_norm": 6.173660264218937,
|
|
"learning_rate": 8.81706125905444e-06,
|
|
"loss": 2.3877620697021484,
|
|
"step": 1145
|
|
},
|
|
{
|
|
"epoch": 0.9052132701421801,
|
|
"grad_norm": 7.345392187325678,
|
|
"learning_rate": 8.814091257747206e-06,
|
|
"loss": 2.355260133743286,
|
|
"step": 1146
|
|
},
|
|
{
|
|
"epoch": 0.906003159557662,
|
|
"grad_norm": 18.230032497512713,
|
|
"learning_rate": 8.811118034283118e-06,
|
|
"loss": 2.5785627365112305,
|
|
"step": 1147
|
|
},
|
|
{
|
|
"epoch": 0.9067930489731437,
|
|
"grad_norm": 69.44999711709127,
|
|
"learning_rate": 8.808141591173966e-06,
|
|
"loss": 2.761636734008789,
|
|
"step": 1148
|
|
},
|
|
{
|
|
"epoch": 0.9075829383886256,
|
|
"grad_norm": 12.183701745113998,
|
|
"learning_rate": 8.805161930934256e-06,
|
|
"loss": 2.5530524253845215,
|
|
"step": 1149
|
|
},
|
|
{
|
|
"epoch": 0.9083728278041074,
|
|
"grad_norm": 7.936573557522932,
|
|
"learning_rate": 8.802179056081217e-06,
|
|
"loss": 2.907682418823242,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 0.9091627172195893,
|
|
"grad_norm": 10.173205195310263,
|
|
"learning_rate": 8.799192969134792e-06,
|
|
"loss": 2.7638745307922363,
|
|
"step": 1151
|
|
},
|
|
{
|
|
"epoch": 0.909952606635071,
|
|
"grad_norm": 11.128588471862935,
|
|
"learning_rate": 8.796203672617634e-06,
|
|
"loss": 2.4181337356567383,
|
|
"step": 1152
|
|
},
|
|
{
|
|
"epoch": 0.9107424960505529,
|
|
"grad_norm": 9.655844045925859,
|
|
"learning_rate": 8.793211169055114e-06,
|
|
"loss": 2.773463249206543,
|
|
"step": 1153
|
|
},
|
|
{
|
|
"epoch": 0.9115323854660348,
|
|
"grad_norm": 11.03401768931024,
|
|
"learning_rate": 8.790215460975307e-06,
|
|
"loss": 2.9100022315979004,
|
|
"step": 1154
|
|
},
|
|
{
|
|
"epoch": 0.9123222748815166,
|
|
"grad_norm": 7.123266399146448,
|
|
"learning_rate": 8.787216550908997e-06,
|
|
"loss": 2.4781482219696045,
|
|
"step": 1155
|
|
},
|
|
{
|
|
"epoch": 0.9131121642969984,
|
|
"grad_norm": 17.622221819502133,
|
|
"learning_rate": 8.784214441389675e-06,
|
|
"loss": 2.452256202697754,
|
|
"step": 1156
|
|
},
|
|
{
|
|
"epoch": 0.9139020537124802,
|
|
"grad_norm": 13.113971999138425,
|
|
"learning_rate": 8.78120913495353e-06,
|
|
"loss": 2.896636486053467,
|
|
"step": 1157
|
|
},
|
|
{
|
|
"epoch": 0.9146919431279621,
|
|
"grad_norm": 9.000975431480043,
|
|
"learning_rate": 8.778200634139456e-06,
|
|
"loss": 2.7890336513519287,
|
|
"step": 1158
|
|
},
|
|
{
|
|
"epoch": 0.915481832543444,
|
|
"grad_norm": 7.481715343714205,
|
|
"learning_rate": 8.775188941489046e-06,
|
|
"loss": 2.496453046798706,
|
|
"step": 1159
|
|
},
|
|
{
|
|
"epoch": 0.9162717219589257,
|
|
"grad_norm": 10.923526680550703,
|
|
"learning_rate": 8.772174059546587e-06,
|
|
"loss": 2.843217372894287,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 0.9170616113744076,
|
|
"grad_norm": 11.10478528656359,
|
|
"learning_rate": 8.76915599085906e-06,
|
|
"loss": 2.435269355773926,
|
|
"step": 1161
|
|
},
|
|
{
|
|
"epoch": 0.9178515007898894,
|
|
"grad_norm": 6.772268942857849,
|
|
"learning_rate": 8.76613473797614e-06,
|
|
"loss": 2.2593870162963867,
|
|
"step": 1162
|
|
},
|
|
{
|
|
"epoch": 0.9186413902053713,
|
|
"grad_norm": 9.493749507534623,
|
|
"learning_rate": 8.76311030345019e-06,
|
|
"loss": 2.758202075958252,
|
|
"step": 1163
|
|
},
|
|
{
|
|
"epoch": 0.919431279620853,
|
|
"grad_norm": 14.911631831896822,
|
|
"learning_rate": 8.760082689836267e-06,
|
|
"loss": 3.124772071838379,
|
|
"step": 1164
|
|
},
|
|
{
|
|
"epoch": 0.9202211690363349,
|
|
"grad_norm": 12.853328106271473,
|
|
"learning_rate": 8.757051899692104e-06,
|
|
"loss": 2.7403624057769775,
|
|
"step": 1165
|
|
},
|
|
{
|
|
"epoch": 0.9210110584518167,
|
|
"grad_norm": 16.513030408815556,
|
|
"learning_rate": 8.754017935578124e-06,
|
|
"loss": 2.6747732162475586,
|
|
"step": 1166
|
|
},
|
|
{
|
|
"epoch": 0.9218009478672986,
|
|
"grad_norm": 19.976818001028782,
|
|
"learning_rate": 8.75098080005743e-06,
|
|
"loss": 3.0514602661132812,
|
|
"step": 1167
|
|
},
|
|
{
|
|
"epoch": 0.9225908372827805,
|
|
"grad_norm": 15.44765720208878,
|
|
"learning_rate": 8.747940495695804e-06,
|
|
"loss": 3.660196304321289,
|
|
"step": 1168
|
|
},
|
|
{
|
|
"epoch": 0.9233807266982622,
|
|
"grad_norm": 46.251367241411415,
|
|
"learning_rate": 8.744897025061704e-06,
|
|
"loss": 3.5469841957092285,
|
|
"step": 1169
|
|
},
|
|
{
|
|
"epoch": 0.9241706161137441,
|
|
"grad_norm": 10.692755449897618,
|
|
"learning_rate": 8.741850390726268e-06,
|
|
"loss": 3.707672595977783,
|
|
"step": 1170
|
|
},
|
|
{
|
|
"epoch": 0.9249605055292259,
|
|
"grad_norm": 17.772477199253103,
|
|
"learning_rate": 8.7388005952633e-06,
|
|
"loss": 2.4242305755615234,
|
|
"step": 1171
|
|
},
|
|
{
|
|
"epoch": 0.9257503949447078,
|
|
"grad_norm": 18.70690435962618,
|
|
"learning_rate": 8.735747641249276e-06,
|
|
"loss": 2.7367851734161377,
|
|
"step": 1172
|
|
},
|
|
{
|
|
"epoch": 0.9265402843601895,
|
|
"grad_norm": 14.123094504926451,
|
|
"learning_rate": 8.732691531263344e-06,
|
|
"loss": 2.673311471939087,
|
|
"step": 1173
|
|
},
|
|
{
|
|
"epoch": 0.9273301737756714,
|
|
"grad_norm": 6.632016401084768,
|
|
"learning_rate": 8.729632267887313e-06,
|
|
"loss": 2.4245405197143555,
|
|
"step": 1174
|
|
},
|
|
{
|
|
"epoch": 0.9281200631911533,
|
|
"grad_norm": 12.470479655533174,
|
|
"learning_rate": 8.726569853705662e-06,
|
|
"loss": 2.9810891151428223,
|
|
"step": 1175
|
|
},
|
|
{
|
|
"epoch": 0.9289099526066351,
|
|
"grad_norm": 10.985987841138897,
|
|
"learning_rate": 8.723504291305526e-06,
|
|
"loss": 2.6249923706054688,
|
|
"step": 1176
|
|
},
|
|
{
|
|
"epoch": 0.9296998420221169,
|
|
"grad_norm": 10.933662910087982,
|
|
"learning_rate": 8.720435583276706e-06,
|
|
"loss": 2.765192985534668,
|
|
"step": 1177
|
|
},
|
|
{
|
|
"epoch": 0.9304897314375987,
|
|
"grad_norm": 11.327209932917212,
|
|
"learning_rate": 8.71736373221165e-06,
|
|
"loss": 2.9828245639801025,
|
|
"step": 1178
|
|
},
|
|
{
|
|
"epoch": 0.9312796208530806,
|
|
"grad_norm": 13.902491029618451,
|
|
"learning_rate": 8.714288740705475e-06,
|
|
"loss": 2.574052572250366,
|
|
"step": 1179
|
|
},
|
|
{
|
|
"epoch": 0.9320695102685624,
|
|
"grad_norm": 7.981484058770302,
|
|
"learning_rate": 8.711210611355938e-06,
|
|
"loss": 3.03520131111145,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 0.9328593996840442,
|
|
"grad_norm": 7.784551362187513,
|
|
"learning_rate": 8.708129346763457e-06,
|
|
"loss": 2.7354891300201416,
|
|
"step": 1181
|
|
},
|
|
{
|
|
"epoch": 0.933649289099526,
|
|
"grad_norm": 8.702455827026565,
|
|
"learning_rate": 8.705044949531092e-06,
|
|
"loss": 2.977090358734131,
|
|
"step": 1182
|
|
},
|
|
{
|
|
"epoch": 0.9344391785150079,
|
|
"grad_norm": 10.8130926994093,
|
|
"learning_rate": 8.701957422264555e-06,
|
|
"loss": 2.7915494441986084,
|
|
"step": 1183
|
|
},
|
|
{
|
|
"epoch": 0.9352290679304898,
|
|
"grad_norm": 8.086616773290197,
|
|
"learning_rate": 8.698866767572196e-06,
|
|
"loss": 2.89163875579834,
|
|
"step": 1184
|
|
},
|
|
{
|
|
"epoch": 0.9360189573459715,
|
|
"grad_norm": 13.946757867057391,
|
|
"learning_rate": 8.695772988065011e-06,
|
|
"loss": 2.9507193565368652,
|
|
"step": 1185
|
|
},
|
|
{
|
|
"epoch": 0.9368088467614534,
|
|
"grad_norm": 12.841477106561502,
|
|
"learning_rate": 8.692676086356637e-06,
|
|
"loss": 2.862083673477173,
|
|
"step": 1186
|
|
},
|
|
{
|
|
"epoch": 0.9375987361769352,
|
|
"grad_norm": 14.373739928948876,
|
|
"learning_rate": 8.689576065063343e-06,
|
|
"loss": 2.9636409282684326,
|
|
"step": 1187
|
|
},
|
|
{
|
|
"epoch": 0.9383886255924171,
|
|
"grad_norm": 11.002313560997857,
|
|
"learning_rate": 8.686472926804041e-06,
|
|
"loss": 2.7443132400512695,
|
|
"step": 1188
|
|
},
|
|
{
|
|
"epoch": 0.9391785150078988,
|
|
"grad_norm": 18.302833638551288,
|
|
"learning_rate": 8.683366674200271e-06,
|
|
"loss": 2.8994405269622803,
|
|
"step": 1189
|
|
},
|
|
{
|
|
"epoch": 0.9399684044233807,
|
|
"grad_norm": 20.60950939701956,
|
|
"learning_rate": 8.680257309876205e-06,
|
|
"loss": 2.9267029762268066,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"epoch": 0.9407582938388626,
|
|
"grad_norm": 8.17531589588896,
|
|
"learning_rate": 8.677144836458645e-06,
|
|
"loss": 2.567711591720581,
|
|
"step": 1191
|
|
},
|
|
{
|
|
"epoch": 0.9415481832543444,
|
|
"grad_norm": 18.719866253308787,
|
|
"learning_rate": 8.674029256577016e-06,
|
|
"loss": 3.6237592697143555,
|
|
"step": 1192
|
|
},
|
|
{
|
|
"epoch": 0.9423380726698263,
|
|
"grad_norm": 5.9652452107958736,
|
|
"learning_rate": 8.670910572863376e-06,
|
|
"loss": 2.7660140991210938,
|
|
"step": 1193
|
|
},
|
|
{
|
|
"epoch": 0.943127962085308,
|
|
"grad_norm": 13.08925807761372,
|
|
"learning_rate": 8.667788787952395e-06,
|
|
"loss": 2.949338674545288,
|
|
"step": 1194
|
|
},
|
|
{
|
|
"epoch": 0.9439178515007899,
|
|
"grad_norm": 8.927437335126726,
|
|
"learning_rate": 8.664663904481367e-06,
|
|
"loss": 2.4335386753082275,
|
|
"step": 1195
|
|
},
|
|
{
|
|
"epoch": 0.9447077409162717,
|
|
"grad_norm": 13.919010389840851,
|
|
"learning_rate": 8.661535925090205e-06,
|
|
"loss": 2.838738441467285,
|
|
"step": 1196
|
|
},
|
|
{
|
|
"epoch": 0.9454976303317536,
|
|
"grad_norm": 11.103546933243214,
|
|
"learning_rate": 8.658404852421436e-06,
|
|
"loss": 2.6665523052215576,
|
|
"step": 1197
|
|
},
|
|
{
|
|
"epoch": 0.9462875197472354,
|
|
"grad_norm": 8.53073315483788,
|
|
"learning_rate": 8.655270689120201e-06,
|
|
"loss": 2.4120519161224365,
|
|
"step": 1198
|
|
},
|
|
{
|
|
"epoch": 0.9470774091627172,
|
|
"grad_norm": 8.850565917179576,
|
|
"learning_rate": 8.652133437834251e-06,
|
|
"loss": 2.8354744911193848,
|
|
"step": 1199
|
|
},
|
|
{
|
|
"epoch": 0.9478672985781991,
|
|
"grad_norm": 11.217378073646433,
|
|
"learning_rate": 8.648993101213944e-06,
|
|
"loss": 2.960960626602173,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.9486571879936809,
|
|
"grad_norm": 10.898057483385264,
|
|
"learning_rate": 8.645849681912253e-06,
|
|
"loss": 2.597686767578125,
|
|
"step": 1201
|
|
},
|
|
{
|
|
"epoch": 0.9494470774091627,
|
|
"grad_norm": 6.67140736518561,
|
|
"learning_rate": 8.642703182584743e-06,
|
|
"loss": 2.3367583751678467,
|
|
"step": 1202
|
|
},
|
|
{
|
|
"epoch": 0.9502369668246445,
|
|
"grad_norm": 9.820716233427401,
|
|
"learning_rate": 8.639553605889588e-06,
|
|
"loss": 2.967700481414795,
|
|
"step": 1203
|
|
},
|
|
{
|
|
"epoch": 0.9510268562401264,
|
|
"grad_norm": 7.868965032787582,
|
|
"learning_rate": 8.636400954487563e-06,
|
|
"loss": 3.142509937286377,
|
|
"step": 1204
|
|
},
|
|
{
|
|
"epoch": 0.9518167456556083,
|
|
"grad_norm": 9.187837043838497,
|
|
"learning_rate": 8.633245231042038e-06,
|
|
"loss": 2.2670068740844727,
|
|
"step": 1205
|
|
},
|
|
{
|
|
"epoch": 0.95260663507109,
|
|
"grad_norm": 9.246511337142001,
|
|
"learning_rate": 8.630086438218976e-06,
|
|
"loss": 2.476592540740967,
|
|
"step": 1206
|
|
},
|
|
{
|
|
"epoch": 0.9533965244865719,
|
|
"grad_norm": 11.415361373055639,
|
|
"learning_rate": 8.626924578686937e-06,
|
|
"loss": 3.100846767425537,
|
|
"step": 1207
|
|
},
|
|
{
|
|
"epoch": 0.9541864139020537,
|
|
"grad_norm": 21.60356512140913,
|
|
"learning_rate": 8.623759655117072e-06,
|
|
"loss": 2.767620801925659,
|
|
"step": 1208
|
|
},
|
|
{
|
|
"epoch": 0.9549763033175356,
|
|
"grad_norm": 12.131835094302646,
|
|
"learning_rate": 8.620591670183116e-06,
|
|
"loss": 2.683656692504883,
|
|
"step": 1209
|
|
},
|
|
{
|
|
"epoch": 0.9557661927330173,
|
|
"grad_norm": 14.171005073002354,
|
|
"learning_rate": 8.617420626561394e-06,
|
|
"loss": 1.9756850004196167,
|
|
"step": 1210
|
|
},
|
|
{
|
|
"epoch": 0.9565560821484992,
|
|
"grad_norm": 18.6056036289689,
|
|
"learning_rate": 8.614246526930816e-06,
|
|
"loss": 2.502706527709961,
|
|
"step": 1211
|
|
},
|
|
{
|
|
"epoch": 0.957345971563981,
|
|
"grad_norm": 9.743897509731939,
|
|
"learning_rate": 8.61106937397287e-06,
|
|
"loss": 2.774007797241211,
|
|
"step": 1212
|
|
},
|
|
{
|
|
"epoch": 0.9581358609794629,
|
|
"grad_norm": 8.660394055988597,
|
|
"learning_rate": 8.607889170371627e-06,
|
|
"loss": 3.046370506286621,
|
|
"step": 1213
|
|
},
|
|
{
|
|
"epoch": 0.9589257503949447,
|
|
"grad_norm": 15.639249371215595,
|
|
"learning_rate": 8.604705918813729e-06,
|
|
"loss": 2.9602065086364746,
|
|
"step": 1214
|
|
},
|
|
{
|
|
"epoch": 0.9597156398104265,
|
|
"grad_norm": 18.947552868983692,
|
|
"learning_rate": 8.601519621988402e-06,
|
|
"loss": 2.3588128089904785,
|
|
"step": 1215
|
|
},
|
|
{
|
|
"epoch": 0.9605055292259084,
|
|
"grad_norm": 8.36234759685658,
|
|
"learning_rate": 8.59833028258744e-06,
|
|
"loss": 2.4646201133728027,
|
|
"step": 1216
|
|
},
|
|
{
|
|
"epoch": 0.9612954186413902,
|
|
"grad_norm": 12.494361272602035,
|
|
"learning_rate": 8.595137903305205e-06,
|
|
"loss": 2.6268255710601807,
|
|
"step": 1217
|
|
},
|
|
{
|
|
"epoch": 0.9620853080568721,
|
|
"grad_norm": 24.950930936425536,
|
|
"learning_rate": 8.591942486838629e-06,
|
|
"loss": 2.661611318588257,
|
|
"step": 1218
|
|
},
|
|
{
|
|
"epoch": 0.9628751974723538,
|
|
"grad_norm": 19.123936642513613,
|
|
"learning_rate": 8.588744035887214e-06,
|
|
"loss": 2.785714626312256,
|
|
"step": 1219
|
|
},
|
|
{
|
|
"epoch": 0.9636650868878357,
|
|
"grad_norm": 11.666101454211423,
|
|
"learning_rate": 8.585542553153017e-06,
|
|
"loss": 3.2729837894439697,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 0.9644549763033176,
|
|
"grad_norm": 5.6088413152939856,
|
|
"learning_rate": 8.582338041340668e-06,
|
|
"loss": 2.532158613204956,
|
|
"step": 1221
|
|
},
|
|
{
|
|
"epoch": 0.9652448657187994,
|
|
"grad_norm": 10.219761775257519,
|
|
"learning_rate": 8.579130503157343e-06,
|
|
"loss": 3.0070722103118896,
|
|
"step": 1222
|
|
},
|
|
{
|
|
"epoch": 0.9660347551342812,
|
|
"grad_norm": 9.057639040127485,
|
|
"learning_rate": 8.575919941312782e-06,
|
|
"loss": 2.773895263671875,
|
|
"step": 1223
|
|
},
|
|
{
|
|
"epoch": 0.966824644549763,
|
|
"grad_norm": 7.267537688630795,
|
|
"learning_rate": 8.57270635851928e-06,
|
|
"loss": 3.1114962100982666,
|
|
"step": 1224
|
|
},
|
|
{
|
|
"epoch": 0.9676145339652449,
|
|
"grad_norm": 24.757703365939474,
|
|
"learning_rate": 8.569489757491681e-06,
|
|
"loss": 3.395624876022339,
|
|
"step": 1225
|
|
},
|
|
{
|
|
"epoch": 0.9684044233807267,
|
|
"grad_norm": 11.88925219508561,
|
|
"learning_rate": 8.566270140947381e-06,
|
|
"loss": 2.6596829891204834,
|
|
"step": 1226
|
|
},
|
|
{
|
|
"epoch": 0.9691943127962085,
|
|
"grad_norm": 7.189319095204579,
|
|
"learning_rate": 8.563047511606322e-06,
|
|
"loss": 2.3300154209136963,
|
|
"step": 1227
|
|
},
|
|
{
|
|
"epoch": 0.9699842022116903,
|
|
"grad_norm": 7.840035719762905,
|
|
"learning_rate": 8.559821872190993e-06,
|
|
"loss": 2.5409016609191895,
|
|
"step": 1228
|
|
},
|
|
{
|
|
"epoch": 0.9707740916271722,
|
|
"grad_norm": 13.281718877860365,
|
|
"learning_rate": 8.556593225426424e-06,
|
|
"loss": 2.4998364448547363,
|
|
"step": 1229
|
|
},
|
|
{
|
|
"epoch": 0.9715639810426541,
|
|
"grad_norm": 5.968185731325506,
|
|
"learning_rate": 8.553361574040185e-06,
|
|
"loss": 2.6161770820617676,
|
|
"step": 1230
|
|
},
|
|
{
|
|
"epoch": 0.9723538704581358,
|
|
"grad_norm": 8.962503347087875,
|
|
"learning_rate": 8.550126920762389e-06,
|
|
"loss": 2.6376500129699707,
|
|
"step": 1231
|
|
},
|
|
{
|
|
"epoch": 0.9731437598736177,
|
|
"grad_norm": 15.044203306326212,
|
|
"learning_rate": 8.546889268325678e-06,
|
|
"loss": 2.7106094360351562,
|
|
"step": 1232
|
|
},
|
|
{
|
|
"epoch": 0.9739336492890995,
|
|
"grad_norm": 9.519163730532522,
|
|
"learning_rate": 8.543648619465232e-06,
|
|
"loss": 2.801136016845703,
|
|
"step": 1233
|
|
},
|
|
{
|
|
"epoch": 0.9747235387045814,
|
|
"grad_norm": 15.500075730991261,
|
|
"learning_rate": 8.540404976918766e-06,
|
|
"loss": 3.1230263710021973,
|
|
"step": 1234
|
|
},
|
|
{
|
|
"epoch": 0.9755134281200631,
|
|
"grad_norm": 11.401461873064811,
|
|
"learning_rate": 8.537158343426515e-06,
|
|
"loss": 2.6839194297790527,
|
|
"step": 1235
|
|
},
|
|
{
|
|
"epoch": 0.976303317535545,
|
|
"grad_norm": 19.111412494475047,
|
|
"learning_rate": 8.53390872173125e-06,
|
|
"loss": 2.720089912414551,
|
|
"step": 1236
|
|
},
|
|
{
|
|
"epoch": 0.9770932069510269,
|
|
"grad_norm": 8.343576881810694,
|
|
"learning_rate": 8.530656114578258e-06,
|
|
"loss": 2.9516029357910156,
|
|
"step": 1237
|
|
},
|
|
{
|
|
"epoch": 0.9778830963665087,
|
|
"grad_norm": 12.186373113060878,
|
|
"learning_rate": 8.527400524715355e-06,
|
|
"loss": 3.0438735485076904,
|
|
"step": 1238
|
|
},
|
|
{
|
|
"epoch": 0.9786729857819905,
|
|
"grad_norm": 12.434900360729275,
|
|
"learning_rate": 8.524141954892872e-06,
|
|
"loss": 2.8403427600860596,
|
|
"step": 1239
|
|
},
|
|
{
|
|
"epoch": 0.9794628751974723,
|
|
"grad_norm": 8.116131181660196,
|
|
"learning_rate": 8.52088040786366e-06,
|
|
"loss": 2.6644649505615234,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 0.9802527646129542,
|
|
"grad_norm": 9.291988122273889,
|
|
"learning_rate": 8.517615886383087e-06,
|
|
"loss": 2.3029625415802,
|
|
"step": 1241
|
|
},
|
|
{
|
|
"epoch": 0.981042654028436,
|
|
"grad_norm": 14.010297592734409,
|
|
"learning_rate": 8.514348393209029e-06,
|
|
"loss": 3.050496816635132,
|
|
"step": 1242
|
|
},
|
|
{
|
|
"epoch": 0.9818325434439179,
|
|
"grad_norm": 7.98145981401455,
|
|
"learning_rate": 8.511077931101875e-06,
|
|
"loss": 2.6487746238708496,
|
|
"step": 1243
|
|
},
|
|
{
|
|
"epoch": 0.9826224328593997,
|
|
"grad_norm": 12.609272032565443,
|
|
"learning_rate": 8.507804502824524e-06,
|
|
"loss": 2.459247589111328,
|
|
"step": 1244
|
|
},
|
|
{
|
|
"epoch": 0.9834123222748815,
|
|
"grad_norm": 16.863961424074052,
|
|
"learning_rate": 8.504528111142376e-06,
|
|
"loss": 2.5375590324401855,
|
|
"step": 1245
|
|
},
|
|
{
|
|
"epoch": 0.9842022116903634,
|
|
"grad_norm": 11.57902809675688,
|
|
"learning_rate": 8.501248758823342e-06,
|
|
"loss": 2.7621288299560547,
|
|
"step": 1246
|
|
},
|
|
{
|
|
"epoch": 0.9849921011058452,
|
|
"grad_norm": 11.15937321516801,
|
|
"learning_rate": 8.497966448637825e-06,
|
|
"loss": 2.9258103370666504,
|
|
"step": 1247
|
|
},
|
|
{
|
|
"epoch": 0.985781990521327,
|
|
"grad_norm": 19.29175817663025,
|
|
"learning_rate": 8.494681183358735e-06,
|
|
"loss": 2.59159255027771,
|
|
"step": 1248
|
|
},
|
|
{
|
|
"epoch": 0.9865718799368088,
|
|
"grad_norm": 8.989580829357356,
|
|
"learning_rate": 8.491392965761472e-06,
|
|
"loss": 2.8336706161499023,
|
|
"step": 1249
|
|
},
|
|
{
|
|
"epoch": 0.9873617693522907,
|
|
"grad_norm": 24.49208358302412,
|
|
"learning_rate": 8.488101798623934e-06,
|
|
"loss": 3.0984175205230713,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 0.9881516587677726,
|
|
"grad_norm": 11.127902851400203,
|
|
"learning_rate": 8.484807684726513e-06,
|
|
"loss": 3.016453981399536,
|
|
"step": 1251
|
|
},
|
|
{
|
|
"epoch": 0.9889415481832543,
|
|
"grad_norm": 11.677023812873243,
|
|
"learning_rate": 8.481510626852082e-06,
|
|
"loss": 2.415400266647339,
|
|
"step": 1252
|
|
},
|
|
{
|
|
"epoch": 0.9897314375987362,
|
|
"grad_norm": 6.718579596480926,
|
|
"learning_rate": 8.478210627786008e-06,
|
|
"loss": 3.216947078704834,
|
|
"step": 1253
|
|
},
|
|
{
|
|
"epoch": 0.990521327014218,
|
|
"grad_norm": 16.963884976243023,
|
|
"learning_rate": 8.474907690316143e-06,
|
|
"loss": 2.9188220500946045,
|
|
"step": 1254
|
|
},
|
|
{
|
|
"epoch": 0.9913112164296999,
|
|
"grad_norm": 18.12845817805615,
|
|
"learning_rate": 8.47160181723282e-06,
|
|
"loss": 2.854201078414917,
|
|
"step": 1255
|
|
},
|
|
{
|
|
"epoch": 0.9921011058451816,
|
|
"grad_norm": 10.387630225459594,
|
|
"learning_rate": 8.468293011328848e-06,
|
|
"loss": 2.473886489868164,
|
|
"step": 1256
|
|
},
|
|
{
|
|
"epoch": 0.9928909952606635,
|
|
"grad_norm": 10.817521575505426,
|
|
"learning_rate": 8.46498127539952e-06,
|
|
"loss": 2.68951153755188,
|
|
"step": 1257
|
|
},
|
|
{
|
|
"epoch": 0.9936808846761453,
|
|
"grad_norm": 8.916073091749583,
|
|
"learning_rate": 8.4616666122426e-06,
|
|
"loss": 3.3685083389282227,
|
|
"step": 1258
|
|
},
|
|
{
|
|
"epoch": 0.9944707740916272,
|
|
"grad_norm": 18.44181632696859,
|
|
"learning_rate": 8.458349024658326e-06,
|
|
"loss": 3.140796184539795,
|
|
"step": 1259
|
|
},
|
|
{
|
|
"epoch": 0.995260663507109,
|
|
"grad_norm": 15.378117707047336,
|
|
"learning_rate": 8.455028515449407e-06,
|
|
"loss": 3.070150852203369,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 0.9960505529225908,
|
|
"grad_norm": 14.235029232259741,
|
|
"learning_rate": 8.451705087421023e-06,
|
|
"loss": 2.6762990951538086,
|
|
"step": 1261
|
|
},
|
|
{
|
|
"epoch": 0.9968404423380727,
|
|
"grad_norm": 14.728149790719765,
|
|
"learning_rate": 8.448378743380816e-06,
|
|
"loss": 2.3728647232055664,
|
|
"step": 1262
|
|
},
|
|
{
|
|
"epoch": 0.9976303317535545,
|
|
"grad_norm": 16.863014645031626,
|
|
"learning_rate": 8.445049486138887e-06,
|
|
"loss": 2.7437081336975098,
|
|
"step": 1263
|
|
},
|
|
{
|
|
"epoch": 0.9984202211690363,
|
|
"grad_norm": 14.200104485327296,
|
|
"learning_rate": 8.441717318507811e-06,
|
|
"loss": 2.6188509464263916,
|
|
"step": 1264
|
|
},
|
|
{
|
|
"epoch": 0.9992101105845181,
|
|
"grad_norm": 10.67666931812201,
|
|
"learning_rate": 8.438382243302609e-06,
|
|
"loss": 2.8497185707092285,
|
|
"step": 1265
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"grad_norm": 13.692512039732208,
|
|
"learning_rate": 8.435044263340766e-06,
|
|
"loss": 3.2503585815429688,
|
|
"step": 1266
|
|
},
|
|
{
|
|
"epoch": 1.0007898894154819,
|
|
"grad_norm": 8.63492881542288,
|
|
"learning_rate": 8.431703381442219e-06,
|
|
"loss": 2.442643165588379,
|
|
"step": 1267
|
|
},
|
|
{
|
|
"epoch": 1.0015797788309637,
|
|
"grad_norm": 12.845854999880341,
|
|
"learning_rate": 8.428359600429352e-06,
|
|
"loss": 1.3568103313446045,
|
|
"step": 1268
|
|
},
|
|
{
|
|
"epoch": 1.0023696682464456,
|
|
"grad_norm": 9.925730237026748,
|
|
"learning_rate": 8.425012923127007e-06,
|
|
"loss": 1.8518157005310059,
|
|
"step": 1269
|
|
},
|
|
{
|
|
"epoch": 1.0031595576619274,
|
|
"grad_norm": 11.088071862507528,
|
|
"learning_rate": 8.421663352362465e-06,
|
|
"loss": 1.3870903253555298,
|
|
"step": 1270
|
|
},
|
|
{
|
|
"epoch": 1.003949447077409,
|
|
"grad_norm": 10.346802750253469,
|
|
"learning_rate": 8.418310890965456e-06,
|
|
"loss": 1.3119181394577026,
|
|
"step": 1271
|
|
},
|
|
{
|
|
"epoch": 1.004739336492891,
|
|
"grad_norm": 9.99087600807621,
|
|
"learning_rate": 8.414955541768148e-06,
|
|
"loss": 1.453743577003479,
|
|
"step": 1272
|
|
},
|
|
{
|
|
"epoch": 1.0055292259083728,
|
|
"grad_norm": 15.017657166973448,
|
|
"learning_rate": 8.411597307605156e-06,
|
|
"loss": 1.2929190397262573,
|
|
"step": 1273
|
|
},
|
|
{
|
|
"epoch": 1.0063191153238547,
|
|
"grad_norm": 7.93537553692124,
|
|
"learning_rate": 8.408236191313523e-06,
|
|
"loss": 1.5082969665527344,
|
|
"step": 1274
|
|
},
|
|
{
|
|
"epoch": 1.0071090047393365,
|
|
"grad_norm": 15.016597946841564,
|
|
"learning_rate": 8.40487219573273e-06,
|
|
"loss": 0.6322309374809265,
|
|
"step": 1275
|
|
},
|
|
{
|
|
"epoch": 1.0078988941548184,
|
|
"grad_norm": 20.805758505260766,
|
|
"learning_rate": 8.401505323704697e-06,
|
|
"loss": 1.5762568712234497,
|
|
"step": 1276
|
|
},
|
|
{
|
|
"epoch": 1.0086887835703002,
|
|
"grad_norm": 32.14895799539077,
|
|
"learning_rate": 8.398135578073763e-06,
|
|
"loss": 1.2138099670410156,
|
|
"step": 1277
|
|
},
|
|
{
|
|
"epoch": 1.009478672985782,
|
|
"grad_norm": 19.797132562379517,
|
|
"learning_rate": 8.394762961686706e-06,
|
|
"loss": 2.1060357093811035,
|
|
"step": 1278
|
|
},
|
|
{
|
|
"epoch": 1.0102685624012637,
|
|
"grad_norm": 9.137353495256644,
|
|
"learning_rate": 8.391387477392718e-06,
|
|
"loss": 1.1503310203552246,
|
|
"step": 1279
|
|
},
|
|
{
|
|
"epoch": 1.0110584518167456,
|
|
"grad_norm": 10.486340395654485,
|
|
"learning_rate": 8.38800912804342e-06,
|
|
"loss": 1.0871237516403198,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 1.0118483412322274,
|
|
"grad_norm": 13.381876680770137,
|
|
"learning_rate": 8.384627916492856e-06,
|
|
"loss": 2.127761125564575,
|
|
"step": 1281
|
|
},
|
|
{
|
|
"epoch": 1.0126382306477093,
|
|
"grad_norm": 7.526496737374128,
|
|
"learning_rate": 8.381243845597482e-06,
|
|
"loss": 1.72287917137146,
|
|
"step": 1282
|
|
},
|
|
{
|
|
"epoch": 1.0134281200631912,
|
|
"grad_norm": 11.304822146622485,
|
|
"learning_rate": 8.377856918216171e-06,
|
|
"loss": 1.1881051063537598,
|
|
"step": 1283
|
|
},
|
|
{
|
|
"epoch": 1.014218009478673,
|
|
"grad_norm": 9.044097061649525,
|
|
"learning_rate": 8.374467137210216e-06,
|
|
"loss": 1.359776496887207,
|
|
"step": 1284
|
|
},
|
|
{
|
|
"epoch": 1.0150078988941549,
|
|
"grad_norm": 17.550879838319734,
|
|
"learning_rate": 8.371074505443309e-06,
|
|
"loss": 1.7276735305786133,
|
|
"step": 1285
|
|
},
|
|
{
|
|
"epoch": 1.0157977883096367,
|
|
"grad_norm": 15.204121265895848,
|
|
"learning_rate": 8.367679025781559e-06,
|
|
"loss": 1.7062349319458008,
|
|
"step": 1286
|
|
},
|
|
{
|
|
"epoch": 1.0165876777251184,
|
|
"grad_norm": 11.379360194095462,
|
|
"learning_rate": 8.36428070109348e-06,
|
|
"loss": 1.3219001293182373,
|
|
"step": 1287
|
|
},
|
|
{
|
|
"epoch": 1.0173775671406002,
|
|
"grad_norm": 16.20401037301683,
|
|
"learning_rate": 8.360879534249984e-06,
|
|
"loss": 1.7655143737792969,
|
|
"step": 1288
|
|
},
|
|
{
|
|
"epoch": 1.018167456556082,
|
|
"grad_norm": 17.858221806964767,
|
|
"learning_rate": 8.35747552812439e-06,
|
|
"loss": 0.791549563407898,
|
|
"step": 1289
|
|
},
|
|
{
|
|
"epoch": 1.018957345971564,
|
|
"grad_norm": 12.734062958157567,
|
|
"learning_rate": 8.354068685592414e-06,
|
|
"loss": 2.4911937713623047,
|
|
"step": 1290
|
|
},
|
|
{
|
|
"epoch": 1.0197472353870458,
|
|
"grad_norm": 12.18144169507221,
|
|
"learning_rate": 8.350659009532169e-06,
|
|
"loss": 1.9642527103424072,
|
|
"step": 1291
|
|
},
|
|
{
|
|
"epoch": 1.0205371248025277,
|
|
"grad_norm": 8.090065379000105,
|
|
"learning_rate": 8.347246502824158e-06,
|
|
"loss": 1.8792918920516968,
|
|
"step": 1292
|
|
},
|
|
{
|
|
"epoch": 1.0213270142180095,
|
|
"grad_norm": 10.304570516520158,
|
|
"learning_rate": 8.343831168351279e-06,
|
|
"loss": 1.270374059677124,
|
|
"step": 1293
|
|
},
|
|
{
|
|
"epoch": 1.0221169036334914,
|
|
"grad_norm": 11.563660724782284,
|
|
"learning_rate": 8.340413008998818e-06,
|
|
"loss": 1.5317769050598145,
|
|
"step": 1294
|
|
},
|
|
{
|
|
"epoch": 1.0229067930489733,
|
|
"grad_norm": 15.047299484001131,
|
|
"learning_rate": 8.336992027654446e-06,
|
|
"loss": 1.1459553241729736,
|
|
"step": 1295
|
|
},
|
|
{
|
|
"epoch": 1.0236966824644549,
|
|
"grad_norm": 8.413193349303429,
|
|
"learning_rate": 8.333568227208221e-06,
|
|
"loss": 1.7619214057922363,
|
|
"step": 1296
|
|
},
|
|
{
|
|
"epoch": 1.0244865718799367,
|
|
"grad_norm": 17.455533064998168,
|
|
"learning_rate": 8.330141610552582e-06,
|
|
"loss": 1.3511468172073364,
|
|
"step": 1297
|
|
},
|
|
{
|
|
"epoch": 1.0252764612954186,
|
|
"grad_norm": 14.604139592871947,
|
|
"learning_rate": 8.326712180582343e-06,
|
|
"loss": 1.5933293104171753,
|
|
"step": 1298
|
|
},
|
|
{
|
|
"epoch": 1.0260663507109005,
|
|
"grad_norm": 27.973131188038252,
|
|
"learning_rate": 8.323279940194697e-06,
|
|
"loss": 1.5758417844772339,
|
|
"step": 1299
|
|
},
|
|
{
|
|
"epoch": 1.0268562401263823,
|
|
"grad_norm": 8.089809601055391,
|
|
"learning_rate": 8.319844892289218e-06,
|
|
"loss": 1.7407735586166382,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 1.0276461295418642,
|
|
"grad_norm": 10.840887730701892,
|
|
"learning_rate": 8.316407039767839e-06,
|
|
"loss": 1.3549420833587646,
|
|
"step": 1301
|
|
},
|
|
{
|
|
"epoch": 1.028436018957346,
|
|
"grad_norm": 12.182733951436646,
|
|
"learning_rate": 8.312966385534873e-06,
|
|
"loss": 0.9703428745269775,
|
|
"step": 1302
|
|
},
|
|
{
|
|
"epoch": 1.029225908372828,
|
|
"grad_norm": 14.482542096417097,
|
|
"learning_rate": 8.309522932496994e-06,
|
|
"loss": 1.5941872596740723,
|
|
"step": 1303
|
|
},
|
|
{
|
|
"epoch": 1.0300157977883095,
|
|
"grad_norm": 11.163985480995702,
|
|
"learning_rate": 8.306076683563245e-06,
|
|
"loss": 1.6860015392303467,
|
|
"step": 1304
|
|
},
|
|
{
|
|
"epoch": 1.0308056872037914,
|
|
"grad_norm": 18.11399534891752,
|
|
"learning_rate": 8.302627641645025e-06,
|
|
"loss": 2.1102190017700195,
|
|
"step": 1305
|
|
},
|
|
{
|
|
"epoch": 1.0315955766192733,
|
|
"grad_norm": 9.377953260206045,
|
|
"learning_rate": 8.299175809656099e-06,
|
|
"loss": 1.077817440032959,
|
|
"step": 1306
|
|
},
|
|
{
|
|
"epoch": 1.0323854660347551,
|
|
"grad_norm": 14.295491793789918,
|
|
"learning_rate": 8.295721190512584e-06,
|
|
"loss": 1.9387364387512207,
|
|
"step": 1307
|
|
},
|
|
{
|
|
"epoch": 1.033175355450237,
|
|
"grad_norm": 11.182947577292902,
|
|
"learning_rate": 8.292263787132955e-06,
|
|
"loss": 1.3131287097930908,
|
|
"step": 1308
|
|
},
|
|
{
|
|
"epoch": 1.0339652448657188,
|
|
"grad_norm": 9.345350781275434,
|
|
"learning_rate": 8.288803602438037e-06,
|
|
"loss": 1.1507320404052734,
|
|
"step": 1309
|
|
},
|
|
{
|
|
"epoch": 1.0347551342812007,
|
|
"grad_norm": 9.05362446143137,
|
|
"learning_rate": 8.285340639351005e-06,
|
|
"loss": 1.400825023651123,
|
|
"step": 1310
|
|
},
|
|
{
|
|
"epoch": 1.0355450236966826,
|
|
"grad_norm": 12.34226551872451,
|
|
"learning_rate": 8.281874900797384e-06,
|
|
"loss": 1.3061918020248413,
|
|
"step": 1311
|
|
},
|
|
{
|
|
"epoch": 1.0363349131121642,
|
|
"grad_norm": 12.863696082340267,
|
|
"learning_rate": 8.278406389705038e-06,
|
|
"loss": 1.2435519695281982,
|
|
"step": 1312
|
|
},
|
|
{
|
|
"epoch": 1.037124802527646,
|
|
"grad_norm": 9.912824621386932,
|
|
"learning_rate": 8.274935109004179e-06,
|
|
"loss": 1.968496561050415,
|
|
"step": 1313
|
|
},
|
|
{
|
|
"epoch": 1.037914691943128,
|
|
"grad_norm": 6.595291570397836,
|
|
"learning_rate": 8.271461061627356e-06,
|
|
"loss": 1.566641092300415,
|
|
"step": 1314
|
|
},
|
|
{
|
|
"epoch": 1.0387045813586098,
|
|
"grad_norm": 13.422406591433509,
|
|
"learning_rate": 8.267984250509456e-06,
|
|
"loss": 1.5585157871246338,
|
|
"step": 1315
|
|
},
|
|
{
|
|
"epoch": 1.0394944707740916,
|
|
"grad_norm": 15.367990128433833,
|
|
"learning_rate": 8.264504678587697e-06,
|
|
"loss": 1.4819953441619873,
|
|
"step": 1316
|
|
},
|
|
{
|
|
"epoch": 1.0402843601895735,
|
|
"grad_norm": 14.804150275595717,
|
|
"learning_rate": 8.261022348801638e-06,
|
|
"loss": 1.8838303089141846,
|
|
"step": 1317
|
|
},
|
|
{
|
|
"epoch": 1.0410742496050553,
|
|
"grad_norm": 10.730588920504516,
|
|
"learning_rate": 8.257537264093158e-06,
|
|
"loss": 1.101423978805542,
|
|
"step": 1318
|
|
},
|
|
{
|
|
"epoch": 1.0418641390205372,
|
|
"grad_norm": 20.04793554392532,
|
|
"learning_rate": 8.254049427406472e-06,
|
|
"loss": 1.2617835998535156,
|
|
"step": 1319
|
|
},
|
|
{
|
|
"epoch": 1.042654028436019,
|
|
"grad_norm": 8.148456102914626,
|
|
"learning_rate": 8.250558841688114e-06,
|
|
"loss": 1.9862099885940552,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 1.0434439178515007,
|
|
"grad_norm": 10.559290761333362,
|
|
"learning_rate": 8.247065509886943e-06,
|
|
"loss": 1.2836475372314453,
|
|
"step": 1321
|
|
},
|
|
{
|
|
"epoch": 1.0442338072669826,
|
|
"grad_norm": 10.381133061098705,
|
|
"learning_rate": 8.243569434954136e-06,
|
|
"loss": 1.5940964221954346,
|
|
"step": 1322
|
|
},
|
|
{
|
|
"epoch": 1.0450236966824644,
|
|
"grad_norm": 12.297214798561669,
|
|
"learning_rate": 8.24007061984319e-06,
|
|
"loss": 1.687330722808838,
|
|
"step": 1323
|
|
},
|
|
{
|
|
"epoch": 1.0458135860979463,
|
|
"grad_norm": 13.218655430214906,
|
|
"learning_rate": 8.236569067509914e-06,
|
|
"loss": 1.3215546607971191,
|
|
"step": 1324
|
|
},
|
|
{
|
|
"epoch": 1.0466034755134281,
|
|
"grad_norm": 11.341162960064988,
|
|
"learning_rate": 8.233064780912431e-06,
|
|
"loss": 1.0560698509216309,
|
|
"step": 1325
|
|
},
|
|
{
|
|
"epoch": 1.04739336492891,
|
|
"grad_norm": 22.19384496177019,
|
|
"learning_rate": 8.229557763011176e-06,
|
|
"loss": 1.9986159801483154,
|
|
"step": 1326
|
|
},
|
|
{
|
|
"epoch": 1.0481832543443919,
|
|
"grad_norm": 11.222176676313888,
|
|
"learning_rate": 8.226048016768888e-06,
|
|
"loss": 1.0500805377960205,
|
|
"step": 1327
|
|
},
|
|
{
|
|
"epoch": 1.0489731437598737,
|
|
"grad_norm": 8.465112348761927,
|
|
"learning_rate": 8.222535545150612e-06,
|
|
"loss": 1.7865219116210938,
|
|
"step": 1328
|
|
},
|
|
{
|
|
"epoch": 1.0497630331753554,
|
|
"grad_norm": 10.856718757186945,
|
|
"learning_rate": 8.219020351123695e-06,
|
|
"loss": 0.9995932579040527,
|
|
"step": 1329
|
|
},
|
|
{
|
|
"epoch": 1.0505529225908372,
|
|
"grad_norm": 12.139594241353372,
|
|
"learning_rate": 8.215502437657785e-06,
|
|
"loss": 1.1767065525054932,
|
|
"step": 1330
|
|
},
|
|
{
|
|
"epoch": 1.051342812006319,
|
|
"grad_norm": 21.039747631184284,
|
|
"learning_rate": 8.211981807724828e-06,
|
|
"loss": 1.5868887901306152,
|
|
"step": 1331
|
|
},
|
|
{
|
|
"epoch": 1.052132701421801,
|
|
"grad_norm": 10.89948650178149,
|
|
"learning_rate": 8.208458464299061e-06,
|
|
"loss": 1.2937088012695312,
|
|
"step": 1332
|
|
},
|
|
{
|
|
"epoch": 1.0529225908372828,
|
|
"grad_norm": 11.666317075229031,
|
|
"learning_rate": 8.20493241035702e-06,
|
|
"loss": 1.497636318206787,
|
|
"step": 1333
|
|
},
|
|
{
|
|
"epoch": 1.0537124802527646,
|
|
"grad_norm": 11.771023636832185,
|
|
"learning_rate": 8.201403648877523e-06,
|
|
"loss": 1.822361707687378,
|
|
"step": 1334
|
|
},
|
|
{
|
|
"epoch": 1.0545023696682465,
|
|
"grad_norm": 9.652940358468332,
|
|
"learning_rate": 8.197872182841684e-06,
|
|
"loss": 1.5103296041488647,
|
|
"step": 1335
|
|
},
|
|
{
|
|
"epoch": 1.0552922590837284,
|
|
"grad_norm": 11.614519754388345,
|
|
"learning_rate": 8.194338015232893e-06,
|
|
"loss": 1.5001356601715088,
|
|
"step": 1336
|
|
},
|
|
{
|
|
"epoch": 1.05608214849921,
|
|
"grad_norm": 9.35354809288492,
|
|
"learning_rate": 8.190801149036828e-06,
|
|
"loss": 1.7350692749023438,
|
|
"step": 1337
|
|
},
|
|
{
|
|
"epoch": 1.0568720379146919,
|
|
"grad_norm": 30.14331426380188,
|
|
"learning_rate": 8.187261587241444e-06,
|
|
"loss": 1.2237708568572998,
|
|
"step": 1338
|
|
},
|
|
{
|
|
"epoch": 1.0576619273301737,
|
|
"grad_norm": 16.364193953877205,
|
|
"learning_rate": 8.183719332836976e-06,
|
|
"loss": 1.5238933563232422,
|
|
"step": 1339
|
|
},
|
|
{
|
|
"epoch": 1.0584518167456556,
|
|
"grad_norm": 11.363557290265472,
|
|
"learning_rate": 8.180174388815933e-06,
|
|
"loss": 1.208627700805664,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 1.0592417061611374,
|
|
"grad_norm": 14.373459768882784,
|
|
"learning_rate": 8.176626758173093e-06,
|
|
"loss": 2.0444703102111816,
|
|
"step": 1341
|
|
},
|
|
{
|
|
"epoch": 1.0600315955766193,
|
|
"grad_norm": 18.032219552306884,
|
|
"learning_rate": 8.173076443905506e-06,
|
|
"loss": 1.2289549112319946,
|
|
"step": 1342
|
|
},
|
|
{
|
|
"epoch": 1.0608214849921012,
|
|
"grad_norm": 10.187495171136964,
|
|
"learning_rate": 8.16952344901249e-06,
|
|
"loss": 1.079569935798645,
|
|
"step": 1343
|
|
},
|
|
{
|
|
"epoch": 1.061611374407583,
|
|
"grad_norm": 26.979192881381238,
|
|
"learning_rate": 8.16596777649563e-06,
|
|
"loss": 3.324129343032837,
|
|
"step": 1344
|
|
},
|
|
{
|
|
"epoch": 1.0624012638230647,
|
|
"grad_norm": 10.606069599550024,
|
|
"learning_rate": 8.162409429358764e-06,
|
|
"loss": 1.3011245727539062,
|
|
"step": 1345
|
|
},
|
|
{
|
|
"epoch": 1.0631911532385465,
|
|
"grad_norm": 9.756876263614647,
|
|
"learning_rate": 8.158848410608e-06,
|
|
"loss": 3.0105724334716797,
|
|
"step": 1346
|
|
},
|
|
{
|
|
"epoch": 1.0639810426540284,
|
|
"grad_norm": 11.406355450796742,
|
|
"learning_rate": 8.155284723251697e-06,
|
|
"loss": 1.46226167678833,
|
|
"step": 1347
|
|
},
|
|
{
|
|
"epoch": 1.0647709320695102,
|
|
"grad_norm": 13.374416212880506,
|
|
"learning_rate": 8.15171837030047e-06,
|
|
"loss": 1.8406553268432617,
|
|
"step": 1348
|
|
},
|
|
{
|
|
"epoch": 1.065560821484992,
|
|
"grad_norm": 10.14760836082322,
|
|
"learning_rate": 8.148149354767183e-06,
|
|
"loss": 2.1282176971435547,
|
|
"step": 1349
|
|
},
|
|
{
|
|
"epoch": 1.066350710900474,
|
|
"grad_norm": 9.351026637130168,
|
|
"learning_rate": 8.144577679666955e-06,
|
|
"loss": 1.2611757516860962,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 1.0671406003159558,
|
|
"grad_norm": 12.303709820191473,
|
|
"learning_rate": 8.14100334801715e-06,
|
|
"loss": 1.4453020095825195,
|
|
"step": 1351
|
|
},
|
|
{
|
|
"epoch": 1.0679304897314377,
|
|
"grad_norm": 8.355205018000369,
|
|
"learning_rate": 8.13742636283737e-06,
|
|
"loss": 0.9628135561943054,
|
|
"step": 1352
|
|
},
|
|
{
|
|
"epoch": 1.0687203791469195,
|
|
"grad_norm": 9.764330703940649,
|
|
"learning_rate": 8.13384672714947e-06,
|
|
"loss": 1.3544663190841675,
|
|
"step": 1353
|
|
},
|
|
{
|
|
"epoch": 1.0695102685624012,
|
|
"grad_norm": 12.402831239510688,
|
|
"learning_rate": 8.130264443977534e-06,
|
|
"loss": 2.4541101455688477,
|
|
"step": 1354
|
|
},
|
|
{
|
|
"epoch": 1.070300157977883,
|
|
"grad_norm": 14.656238462966826,
|
|
"learning_rate": 8.126679516347887e-06,
|
|
"loss": 1.269155502319336,
|
|
"step": 1355
|
|
},
|
|
{
|
|
"epoch": 1.0710900473933649,
|
|
"grad_norm": 15.277650232839177,
|
|
"learning_rate": 8.123091947289089e-06,
|
|
"loss": 1.9917043447494507,
|
|
"step": 1356
|
|
},
|
|
{
|
|
"epoch": 1.0718799368088467,
|
|
"grad_norm": 8.257937346753295,
|
|
"learning_rate": 8.119501739831929e-06,
|
|
"loss": 1.520797610282898,
|
|
"step": 1357
|
|
},
|
|
{
|
|
"epoch": 1.0726698262243286,
|
|
"grad_norm": 12.639910986990902,
|
|
"learning_rate": 8.11590889700943e-06,
|
|
"loss": 1.22843599319458,
|
|
"step": 1358
|
|
},
|
|
{
|
|
"epoch": 1.0734597156398105,
|
|
"grad_norm": 10.525028379122327,
|
|
"learning_rate": 8.11231342185683e-06,
|
|
"loss": 1.971350908279419,
|
|
"step": 1359
|
|
},
|
|
{
|
|
"epoch": 1.0742496050552923,
|
|
"grad_norm": 12.312343896152003,
|
|
"learning_rate": 8.108715317411606e-06,
|
|
"loss": 1.4550845623016357,
|
|
"step": 1360
|
|
},
|
|
{
|
|
"epoch": 1.0750394944707742,
|
|
"grad_norm": 15.40492755500279,
|
|
"learning_rate": 8.105114586713442e-06,
|
|
"loss": 1.4124548435211182,
|
|
"step": 1361
|
|
},
|
|
{
|
|
"epoch": 1.0758293838862558,
|
|
"grad_norm": 11.11985081751917,
|
|
"learning_rate": 8.101511232804251e-06,
|
|
"loss": 1.9308778047561646,
|
|
"step": 1362
|
|
},
|
|
{
|
|
"epoch": 1.0766192733017377,
|
|
"grad_norm": 7.884346798092688,
|
|
"learning_rate": 8.097905258728158e-06,
|
|
"loss": 1.8403844833374023,
|
|
"step": 1363
|
|
},
|
|
{
|
|
"epoch": 1.0774091627172195,
|
|
"grad_norm": 10.235048213024509,
|
|
"learning_rate": 8.094296667531501e-06,
|
|
"loss": 2.4593963623046875,
|
|
"step": 1364
|
|
},
|
|
{
|
|
"epoch": 1.0781990521327014,
|
|
"grad_norm": 15.402993599882011,
|
|
"learning_rate": 8.090685462262828e-06,
|
|
"loss": 1.2213027477264404,
|
|
"step": 1365
|
|
},
|
|
{
|
|
"epoch": 1.0789889415481833,
|
|
"grad_norm": 13.057398754067725,
|
|
"learning_rate": 8.0870716459729e-06,
|
|
"loss": 1.3943471908569336,
|
|
"step": 1366
|
|
},
|
|
{
|
|
"epoch": 1.0797788309636651,
|
|
"grad_norm": 11.831339156526893,
|
|
"learning_rate": 8.08345522171468e-06,
|
|
"loss": 1.2609305381774902,
|
|
"step": 1367
|
|
},
|
|
{
|
|
"epoch": 1.080568720379147,
|
|
"grad_norm": 10.352369944456171,
|
|
"learning_rate": 8.079836192543335e-06,
|
|
"loss": 1.6797740459442139,
|
|
"step": 1368
|
|
},
|
|
{
|
|
"epoch": 1.0813586097946288,
|
|
"grad_norm": 8.167152293191428,
|
|
"learning_rate": 8.076214561516234e-06,
|
|
"loss": 1.5496795177459717,
|
|
"step": 1369
|
|
},
|
|
{
|
|
"epoch": 1.0821484992101107,
|
|
"grad_norm": 10.632786869980947,
|
|
"learning_rate": 8.07259033169294e-06,
|
|
"loss": 1.1447343826293945,
|
|
"step": 1370
|
|
},
|
|
{
|
|
"epoch": 1.0829383886255923,
|
|
"grad_norm": 9.772931829234892,
|
|
"learning_rate": 8.068963506135214e-06,
|
|
"loss": 1.840114712715149,
|
|
"step": 1371
|
|
},
|
|
{
|
|
"epoch": 1.0837282780410742,
|
|
"grad_norm": 12.315109646538895,
|
|
"learning_rate": 8.065334087907016e-06,
|
|
"loss": 1.73477041721344,
|
|
"step": 1372
|
|
},
|
|
{
|
|
"epoch": 1.084518167456556,
|
|
"grad_norm": 18.168778117659386,
|
|
"learning_rate": 8.061702080074483e-06,
|
|
"loss": 2.225961923599243,
|
|
"step": 1373
|
|
},
|
|
{
|
|
"epoch": 1.085308056872038,
|
|
"grad_norm": 32.98324986784259,
|
|
"learning_rate": 8.05806748570595e-06,
|
|
"loss": 1.1636614799499512,
|
|
"step": 1374
|
|
},
|
|
{
|
|
"epoch": 1.0860979462875198,
|
|
"grad_norm": 9.324662001634294,
|
|
"learning_rate": 8.054430307871939e-06,
|
|
"loss": 1.8258980512619019,
|
|
"step": 1375
|
|
},
|
|
{
|
|
"epoch": 1.0868878357030016,
|
|
"grad_norm": 9.190806390754526,
|
|
"learning_rate": 8.050790549645141e-06,
|
|
"loss": 1.2510772943496704,
|
|
"step": 1376
|
|
},
|
|
{
|
|
"epoch": 1.0876777251184835,
|
|
"grad_norm": 16.506042815565262,
|
|
"learning_rate": 8.047148214100445e-06,
|
|
"loss": 1.6223645210266113,
|
|
"step": 1377
|
|
},
|
|
{
|
|
"epoch": 1.0884676145339653,
|
|
"grad_norm": 13.858600832931351,
|
|
"learning_rate": 8.043503304314901e-06,
|
|
"loss": 1.3856348991394043,
|
|
"step": 1378
|
|
},
|
|
{
|
|
"epoch": 1.089257503949447,
|
|
"grad_norm": 10.071290706752858,
|
|
"learning_rate": 8.039855823367744e-06,
|
|
"loss": 0.7631658911705017,
|
|
"step": 1379
|
|
},
|
|
{
|
|
"epoch": 1.0900473933649288,
|
|
"grad_norm": 12.6598160611525,
|
|
"learning_rate": 8.036205774340378e-06,
|
|
"loss": 1.197335958480835,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 1.0908372827804107,
|
|
"grad_norm": 11.751428435172853,
|
|
"learning_rate": 8.032553160316374e-06,
|
|
"loss": 1.2686131000518799,
|
|
"step": 1381
|
|
},
|
|
{
|
|
"epoch": 1.0916271721958926,
|
|
"grad_norm": 10.67438658042041,
|
|
"learning_rate": 8.028897984381475e-06,
|
|
"loss": 1.5238006114959717,
|
|
"step": 1382
|
|
},
|
|
{
|
|
"epoch": 1.0924170616113744,
|
|
"grad_norm": 11.743621256195373,
|
|
"learning_rate": 8.025240249623583e-06,
|
|
"loss": 1.500381588935852,
|
|
"step": 1383
|
|
},
|
|
{
|
|
"epoch": 1.0932069510268563,
|
|
"grad_norm": 7.559878550374157,
|
|
"learning_rate": 8.021579959132768e-06,
|
|
"loss": 1.3321250677108765,
|
|
"step": 1384
|
|
},
|
|
{
|
|
"epoch": 1.0939968404423381,
|
|
"grad_norm": 23.292812697121704,
|
|
"learning_rate": 8.017917116001253e-06,
|
|
"loss": 2.0412609577178955,
|
|
"step": 1385
|
|
},
|
|
{
|
|
"epoch": 1.09478672985782,
|
|
"grad_norm": 13.85465573871678,
|
|
"learning_rate": 8.01425172332342e-06,
|
|
"loss": 2.5268537998199463,
|
|
"step": 1386
|
|
},
|
|
{
|
|
"epoch": 1.0955766192733019,
|
|
"grad_norm": 9.779613579870743,
|
|
"learning_rate": 8.010583784195804e-06,
|
|
"loss": 1.6717355251312256,
|
|
"step": 1387
|
|
},
|
|
{
|
|
"epoch": 1.0963665086887835,
|
|
"grad_norm": 8.068370833710562,
|
|
"learning_rate": 8.006913301717097e-06,
|
|
"loss": 1.9225399494171143,
|
|
"step": 1388
|
|
},
|
|
{
|
|
"epoch": 1.0971563981042654,
|
|
"grad_norm": 27.77102947325972,
|
|
"learning_rate": 8.003240278988127e-06,
|
|
"loss": 1.9709285497665405,
|
|
"step": 1389
|
|
},
|
|
{
|
|
"epoch": 1.0979462875197472,
|
|
"grad_norm": 14.301520105323926,
|
|
"learning_rate": 7.999564719111884e-06,
|
|
"loss": 1.1914315223693848,
|
|
"step": 1390
|
|
},
|
|
{
|
|
"epoch": 1.098736176935229,
|
|
"grad_norm": 8.296671084591999,
|
|
"learning_rate": 7.995886625193484e-06,
|
|
"loss": 1.6483365297317505,
|
|
"step": 1391
|
|
},
|
|
{
|
|
"epoch": 1.099526066350711,
|
|
"grad_norm": 13.348402769682588,
|
|
"learning_rate": 7.9922060003402e-06,
|
|
"loss": 1.258619785308838,
|
|
"step": 1392
|
|
},
|
|
{
|
|
"epoch": 1.1003159557661928,
|
|
"grad_norm": 11.776544553295185,
|
|
"learning_rate": 7.988522847661432e-06,
|
|
"loss": 1.841566801071167,
|
|
"step": 1393
|
|
},
|
|
{
|
|
"epoch": 1.1011058451816746,
|
|
"grad_norm": 10.875063836555396,
|
|
"learning_rate": 7.984837170268725e-06,
|
|
"loss": 1.3339906930923462,
|
|
"step": 1394
|
|
},
|
|
{
|
|
"epoch": 1.1018957345971563,
|
|
"grad_norm": 13.070070571580835,
|
|
"learning_rate": 7.981148971275744e-06,
|
|
"loss": 1.6276012659072876,
|
|
"step": 1395
|
|
},
|
|
{
|
|
"epoch": 1.1026856240126381,
|
|
"grad_norm": 8.908917736629908,
|
|
"learning_rate": 7.977458253798296e-06,
|
|
"loss": 1.7726579904556274,
|
|
"step": 1396
|
|
},
|
|
{
|
|
"epoch": 1.10347551342812,
|
|
"grad_norm": 18.167256312241264,
|
|
"learning_rate": 7.973765020954312e-06,
|
|
"loss": 1.8422517776489258,
|
|
"step": 1397
|
|
},
|
|
{
|
|
"epoch": 1.1042654028436019,
|
|
"grad_norm": 7.381573039738737,
|
|
"learning_rate": 7.970069275863844e-06,
|
|
"loss": 1.8285367488861084,
|
|
"step": 1398
|
|
},
|
|
{
|
|
"epoch": 1.1050552922590837,
|
|
"grad_norm": 13.52831771786112,
|
|
"learning_rate": 7.96637102164907e-06,
|
|
"loss": 1.3294421434402466,
|
|
"step": 1399
|
|
},
|
|
{
|
|
"epoch": 1.1058451816745656,
|
|
"grad_norm": 10.887865537597344,
|
|
"learning_rate": 7.96267026143429e-06,
|
|
"loss": 1.9184372425079346,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 1.1066350710900474,
|
|
"grad_norm": 14.487647084460335,
|
|
"learning_rate": 7.95896699834592e-06,
|
|
"loss": 1.6796505451202393,
|
|
"step": 1401
|
|
},
|
|
{
|
|
"epoch": 1.1074249605055293,
|
|
"grad_norm": 11.249265183893405,
|
|
"learning_rate": 7.955261235512483e-06,
|
|
"loss": 1.4529224634170532,
|
|
"step": 1402
|
|
},
|
|
{
|
|
"epoch": 1.1082148499210112,
|
|
"grad_norm": 9.645198748151126,
|
|
"learning_rate": 7.951552976064623e-06,
|
|
"loss": 1.2036597728729248,
|
|
"step": 1403
|
|
},
|
|
{
|
|
"epoch": 1.1090047393364928,
|
|
"grad_norm": 6.984305487322073,
|
|
"learning_rate": 7.947842223135089e-06,
|
|
"loss": 1.6848506927490234,
|
|
"step": 1404
|
|
},
|
|
{
|
|
"epoch": 1.1097946287519747,
|
|
"grad_norm": 10.21639035313384,
|
|
"learning_rate": 7.944128979858739e-06,
|
|
"loss": 2.247422218322754,
|
|
"step": 1405
|
|
},
|
|
{
|
|
"epoch": 1.1105845181674565,
|
|
"grad_norm": 11.108091790322238,
|
|
"learning_rate": 7.940413249372529e-06,
|
|
"loss": 1.2086236476898193,
|
|
"step": 1406
|
|
},
|
|
{
|
|
"epoch": 1.1113744075829384,
|
|
"grad_norm": 16.112732227769964,
|
|
"learning_rate": 7.936695034815527e-06,
|
|
"loss": 1.3832933902740479,
|
|
"step": 1407
|
|
},
|
|
{
|
|
"epoch": 1.1121642969984202,
|
|
"grad_norm": 10.731967743134,
|
|
"learning_rate": 7.932974339328887e-06,
|
|
"loss": 1.68961763381958,
|
|
"step": 1408
|
|
},
|
|
{
|
|
"epoch": 1.112954186413902,
|
|
"grad_norm": 9.149310423822971,
|
|
"learning_rate": 7.929251166055867e-06,
|
|
"loss": 0.9486649632453918,
|
|
"step": 1409
|
|
},
|
|
{
|
|
"epoch": 1.113744075829384,
|
|
"grad_norm": 9.704188919512742,
|
|
"learning_rate": 7.925525518141816e-06,
|
|
"loss": 1.669637680053711,
|
|
"step": 1410
|
|
},
|
|
{
|
|
"epoch": 1.1145339652448658,
|
|
"grad_norm": 12.16446463953121,
|
|
"learning_rate": 7.921797398734174e-06,
|
|
"loss": 1.8166450262069702,
|
|
"step": 1411
|
|
},
|
|
{
|
|
"epoch": 1.1153238546603474,
|
|
"grad_norm": 13.23762039499484,
|
|
"learning_rate": 7.918066810982468e-06,
|
|
"loss": 2.1429643630981445,
|
|
"step": 1412
|
|
},
|
|
{
|
|
"epoch": 1.1161137440758293,
|
|
"grad_norm": 11.441036394254365,
|
|
"learning_rate": 7.914333758038311e-06,
|
|
"loss": 1.7969956398010254,
|
|
"step": 1413
|
|
},
|
|
{
|
|
"epoch": 1.1169036334913112,
|
|
"grad_norm": 11.37805305383338,
|
|
"learning_rate": 7.910598243055396e-06,
|
|
"loss": 1.394661784172058,
|
|
"step": 1414
|
|
},
|
|
{
|
|
"epoch": 1.117693522906793,
|
|
"grad_norm": 10.617508342494792,
|
|
"learning_rate": 7.906860269189504e-06,
|
|
"loss": 2.4616918563842773,
|
|
"step": 1415
|
|
},
|
|
{
|
|
"epoch": 1.1184834123222749,
|
|
"grad_norm": 15.908117706983552,
|
|
"learning_rate": 7.903119839598482e-06,
|
|
"loss": 1.3925102949142456,
|
|
"step": 1416
|
|
},
|
|
{
|
|
"epoch": 1.1192733017377567,
|
|
"grad_norm": 6.98257561740011,
|
|
"learning_rate": 7.899376957442262e-06,
|
|
"loss": 1.6068451404571533,
|
|
"step": 1417
|
|
},
|
|
{
|
|
"epoch": 1.1200631911532386,
|
|
"grad_norm": 8.912600940978086,
|
|
"learning_rate": 7.89563162588284e-06,
|
|
"loss": 1.1992769241333008,
|
|
"step": 1418
|
|
},
|
|
{
|
|
"epoch": 1.1208530805687205,
|
|
"grad_norm": 8.211754082626802,
|
|
"learning_rate": 7.891883848084283e-06,
|
|
"loss": 1.857825517654419,
|
|
"step": 1419
|
|
},
|
|
{
|
|
"epoch": 1.1216429699842023,
|
|
"grad_norm": 9.581409453425586,
|
|
"learning_rate": 7.88813362721273e-06,
|
|
"loss": 1.6415996551513672,
|
|
"step": 1420
|
|
},
|
|
{
|
|
"epoch": 1.122432859399684,
|
|
"grad_norm": 18.329912927070012,
|
|
"learning_rate": 7.884380966436379e-06,
|
|
"loss": 1.7880502939224243,
|
|
"step": 1421
|
|
},
|
|
{
|
|
"epoch": 1.1232227488151658,
|
|
"grad_norm": 9.810032143627515,
|
|
"learning_rate": 7.880625868925487e-06,
|
|
"loss": 1.8203599452972412,
|
|
"step": 1422
|
|
},
|
|
{
|
|
"epoch": 1.1240126382306477,
|
|
"grad_norm": 9.06670473315355,
|
|
"learning_rate": 7.876868337852377e-06,
|
|
"loss": 1.596606731414795,
|
|
"step": 1423
|
|
},
|
|
{
|
|
"epoch": 1.1248025276461295,
|
|
"grad_norm": 9.942294890261461,
|
|
"learning_rate": 7.87310837639142e-06,
|
|
"loss": 1.4776908159255981,
|
|
"step": 1424
|
|
},
|
|
{
|
|
"epoch": 1.1255924170616114,
|
|
"grad_norm": 10.54945063589167,
|
|
"learning_rate": 7.869345987719047e-06,
|
|
"loss": 1.6829514503479004,
|
|
"step": 1425
|
|
},
|
|
{
|
|
"epoch": 1.1263823064770933,
|
|
"grad_norm": 11.580449343278849,
|
|
"learning_rate": 7.865581175013735e-06,
|
|
"loss": 1.1238837242126465,
|
|
"step": 1426
|
|
},
|
|
{
|
|
"epoch": 1.1271721958925751,
|
|
"grad_norm": 18.360177466539106,
|
|
"learning_rate": 7.86181394145601e-06,
|
|
"loss": 1.6330945491790771,
|
|
"step": 1427
|
|
},
|
|
{
|
|
"epoch": 1.1279620853080567,
|
|
"grad_norm": 13.657848488790902,
|
|
"learning_rate": 7.858044290228443e-06,
|
|
"loss": 1.6493346691131592,
|
|
"step": 1428
|
|
},
|
|
{
|
|
"epoch": 1.1287519747235386,
|
|
"grad_norm": 8.8436841483474,
|
|
"learning_rate": 7.854272224515646e-06,
|
|
"loss": 1.7612037658691406,
|
|
"step": 1429
|
|
},
|
|
{
|
|
"epoch": 1.1295418641390205,
|
|
"grad_norm": 15.349132583927682,
|
|
"learning_rate": 7.850497747504278e-06,
|
|
"loss": 1.4374094009399414,
|
|
"step": 1430
|
|
},
|
|
{
|
|
"epoch": 1.1303317535545023,
|
|
"grad_norm": 7.20960688510771,
|
|
"learning_rate": 7.846720862383024e-06,
|
|
"loss": 1.5491845607757568,
|
|
"step": 1431
|
|
},
|
|
{
|
|
"epoch": 1.1311216429699842,
|
|
"grad_norm": 8.9481729357505,
|
|
"learning_rate": 7.842941572342613e-06,
|
|
"loss": 1.082542896270752,
|
|
"step": 1432
|
|
},
|
|
{
|
|
"epoch": 1.131911532385466,
|
|
"grad_norm": 11.741346122561193,
|
|
"learning_rate": 7.839159880575798e-06,
|
|
"loss": 1.0406155586242676,
|
|
"step": 1433
|
|
},
|
|
{
|
|
"epoch": 1.132701421800948,
|
|
"grad_norm": 12.852699840775466,
|
|
"learning_rate": 7.835375790277367e-06,
|
|
"loss": 1.5804665088653564,
|
|
"step": 1434
|
|
},
|
|
{
|
|
"epoch": 1.1334913112164298,
|
|
"grad_norm": 11.234205111215255,
|
|
"learning_rate": 7.831589304644132e-06,
|
|
"loss": 1.6930797100067139,
|
|
"step": 1435
|
|
},
|
|
{
|
|
"epoch": 1.1342812006319116,
|
|
"grad_norm": 12.465436878993371,
|
|
"learning_rate": 7.827800426874927e-06,
|
|
"loss": 2.350858211517334,
|
|
"step": 1436
|
|
},
|
|
{
|
|
"epoch": 1.1350710900473935,
|
|
"grad_norm": 10.417673706008966,
|
|
"learning_rate": 7.824009160170611e-06,
|
|
"loss": 2.1081159114837646,
|
|
"step": 1437
|
|
},
|
|
{
|
|
"epoch": 1.1358609794628751,
|
|
"grad_norm": 6.972975321299533,
|
|
"learning_rate": 7.820215507734057e-06,
|
|
"loss": 1.550991415977478,
|
|
"step": 1438
|
|
},
|
|
{
|
|
"epoch": 1.136650868878357,
|
|
"grad_norm": 9.653383800691586,
|
|
"learning_rate": 7.816419472770156e-06,
|
|
"loss": 2.1447458267211914,
|
|
"step": 1439
|
|
},
|
|
{
|
|
"epoch": 1.1374407582938388,
|
|
"grad_norm": 10.486086790783105,
|
|
"learning_rate": 7.812621058485807e-06,
|
|
"loss": 1.8012946844100952,
|
|
"step": 1440
|
|
},
|
|
{
|
|
"epoch": 1.1382306477093207,
|
|
"grad_norm": 18.588695506303544,
|
|
"learning_rate": 7.808820268089928e-06,
|
|
"loss": 0.923102617263794,
|
|
"step": 1441
|
|
},
|
|
{
|
|
"epoch": 1.1390205371248026,
|
|
"grad_norm": 11.289408037095642,
|
|
"learning_rate": 7.805017104793438e-06,
|
|
"loss": 1.2512264251708984,
|
|
"step": 1442
|
|
},
|
|
{
|
|
"epoch": 1.1398104265402844,
|
|
"grad_norm": 17.187322631022983,
|
|
"learning_rate": 7.801211571809258e-06,
|
|
"loss": 2.007840633392334,
|
|
"step": 1443
|
|
},
|
|
{
|
|
"epoch": 1.1406003159557663,
|
|
"grad_norm": 13.141510083418034,
|
|
"learning_rate": 7.79740367235232e-06,
|
|
"loss": 1.7152661085128784,
|
|
"step": 1444
|
|
},
|
|
{
|
|
"epoch": 1.141390205371248,
|
|
"grad_norm": 15.112494999687053,
|
|
"learning_rate": 7.793593409639547e-06,
|
|
"loss": 2.0199451446533203,
|
|
"step": 1445
|
|
},
|
|
{
|
|
"epoch": 1.1421800947867298,
|
|
"grad_norm": 85.2167253034368,
|
|
"learning_rate": 7.789780786889862e-06,
|
|
"loss": 2.34489369392395,
|
|
"step": 1446
|
|
},
|
|
{
|
|
"epoch": 1.1429699842022116,
|
|
"grad_norm": 9.312272447955333,
|
|
"learning_rate": 7.785965807324182e-06,
|
|
"loss": 1.8030688762664795,
|
|
"step": 1447
|
|
},
|
|
{
|
|
"epoch": 1.1437598736176935,
|
|
"grad_norm": 6.989138103509471,
|
|
"learning_rate": 7.782148474165414e-06,
|
|
"loss": 1.0236456394195557,
|
|
"step": 1448
|
|
},
|
|
{
|
|
"epoch": 1.1445497630331753,
|
|
"grad_norm": 13.66043355155644,
|
|
"learning_rate": 7.778328790638454e-06,
|
|
"loss": 1.432175636291504,
|
|
"step": 1449
|
|
},
|
|
{
|
|
"epoch": 1.1453396524486572,
|
|
"grad_norm": 11.738420362584556,
|
|
"learning_rate": 7.774506759970183e-06,
|
|
"loss": 1.8172495365142822,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 1.146129541864139,
|
|
"grad_norm": 12.2141920832457,
|
|
"learning_rate": 7.770682385389461e-06,
|
|
"loss": 1.540172815322876,
|
|
"step": 1451
|
|
},
|
|
{
|
|
"epoch": 1.146919431279621,
|
|
"grad_norm": 11.67045045892725,
|
|
"learning_rate": 7.76685567012714e-06,
|
|
"loss": 1.0208656787872314,
|
|
"step": 1452
|
|
},
|
|
{
|
|
"epoch": 1.1477093206951028,
|
|
"grad_norm": 19.223683414914778,
|
|
"learning_rate": 7.763026617416037e-06,
|
|
"loss": 2.1578786373138428,
|
|
"step": 1453
|
|
},
|
|
{
|
|
"epoch": 1.1484992101105844,
|
|
"grad_norm": 11.975180223168232,
|
|
"learning_rate": 7.759195230490948e-06,
|
|
"loss": 1.0847947597503662,
|
|
"step": 1454
|
|
},
|
|
{
|
|
"epoch": 1.1492890995260663,
|
|
"grad_norm": 14.479350754911671,
|
|
"learning_rate": 7.755361512588641e-06,
|
|
"loss": 0.8910523653030396,
|
|
"step": 1455
|
|
},
|
|
{
|
|
"epoch": 1.1500789889415481,
|
|
"grad_norm": 12.322055451503076,
|
|
"learning_rate": 7.751525466947858e-06,
|
|
"loss": 1.9108871221542358,
|
|
"step": 1456
|
|
},
|
|
{
|
|
"epoch": 1.15086887835703,
|
|
"grad_norm": 8.486429198821625,
|
|
"learning_rate": 7.747687096809298e-06,
|
|
"loss": 1.3047826290130615,
|
|
"step": 1457
|
|
},
|
|
{
|
|
"epoch": 1.1516587677725119,
|
|
"grad_norm": 22.216033260635363,
|
|
"learning_rate": 7.74384640541563e-06,
|
|
"loss": 1.3196589946746826,
|
|
"step": 1458
|
|
},
|
|
{
|
|
"epoch": 1.1524486571879937,
|
|
"grad_norm": 13.4404210054278,
|
|
"learning_rate": 7.740003396011485e-06,
|
|
"loss": 1.7318623065948486,
|
|
"step": 1459
|
|
},
|
|
{
|
|
"epoch": 1.1532385466034756,
|
|
"grad_norm": 10.120534079823207,
|
|
"learning_rate": 7.736158071843446e-06,
|
|
"loss": 1.0442365407943726,
|
|
"step": 1460
|
|
},
|
|
{
|
|
"epoch": 1.1540284360189574,
|
|
"grad_norm": 11.889433421917458,
|
|
"learning_rate": 7.73231043616006e-06,
|
|
"loss": 1.421532392501831,
|
|
"step": 1461
|
|
},
|
|
{
|
|
"epoch": 1.154818325434439,
|
|
"grad_norm": 8.828012658172716,
|
|
"learning_rate": 7.728460492211818e-06,
|
|
"loss": 1.5032670497894287,
|
|
"step": 1462
|
|
},
|
|
{
|
|
"epoch": 1.155608214849921,
|
|
"grad_norm": 16.251861602372543,
|
|
"learning_rate": 7.724608243251168e-06,
|
|
"loss": 1.5522558689117432,
|
|
"step": 1463
|
|
},
|
|
{
|
|
"epoch": 1.1563981042654028,
|
|
"grad_norm": 14.565593393435774,
|
|
"learning_rate": 7.720753692532503e-06,
|
|
"loss": 1.4599350690841675,
|
|
"step": 1464
|
|
},
|
|
{
|
|
"epoch": 1.1571879936808847,
|
|
"grad_norm": 8.480351738134685,
|
|
"learning_rate": 7.716896843312158e-06,
|
|
"loss": 1.7275468111038208,
|
|
"step": 1465
|
|
},
|
|
{
|
|
"epoch": 1.1579778830963665,
|
|
"grad_norm": 13.805554305973507,
|
|
"learning_rate": 7.713037698848414e-06,
|
|
"loss": 1.4946775436401367,
|
|
"step": 1466
|
|
},
|
|
{
|
|
"epoch": 1.1587677725118484,
|
|
"grad_norm": 14.030328008782464,
|
|
"learning_rate": 7.709176262401488e-06,
|
|
"loss": 1.6568756103515625,
|
|
"step": 1467
|
|
},
|
|
{
|
|
"epoch": 1.1595576619273302,
|
|
"grad_norm": 18.97968668941531,
|
|
"learning_rate": 7.705312537233534e-06,
|
|
"loss": 2.4846177101135254,
|
|
"step": 1468
|
|
},
|
|
{
|
|
"epoch": 1.160347551342812,
|
|
"grad_norm": 21.074569384938723,
|
|
"learning_rate": 7.70144652660864e-06,
|
|
"loss": 1.6268469095230103,
|
|
"step": 1469
|
|
},
|
|
{
|
|
"epoch": 1.161137440758294,
|
|
"grad_norm": 11.221094852791536,
|
|
"learning_rate": 7.697578233792824e-06,
|
|
"loss": 1.4419642686843872,
|
|
"step": 1470
|
|
},
|
|
{
|
|
"epoch": 1.1619273301737756,
|
|
"grad_norm": 12.283969197549489,
|
|
"learning_rate": 7.693707662054033e-06,
|
|
"loss": 2.8970489501953125,
|
|
"step": 1471
|
|
},
|
|
{
|
|
"epoch": 1.1627172195892574,
|
|
"grad_norm": 10.191761634757446,
|
|
"learning_rate": 7.68983481466214e-06,
|
|
"loss": 1.694975733757019,
|
|
"step": 1472
|
|
},
|
|
{
|
|
"epoch": 1.1635071090047393,
|
|
"grad_norm": 9.871959664749303,
|
|
"learning_rate": 7.685959694888935e-06,
|
|
"loss": 1.5678069591522217,
|
|
"step": 1473
|
|
},
|
|
{
|
|
"epoch": 1.1642969984202212,
|
|
"grad_norm": 13.739000495877802,
|
|
"learning_rate": 7.682082306008138e-06,
|
|
"loss": 2.2661375999450684,
|
|
"step": 1474
|
|
},
|
|
{
|
|
"epoch": 1.165086887835703,
|
|
"grad_norm": 13.029810676854682,
|
|
"learning_rate": 7.678202651295377e-06,
|
|
"loss": 1.779642105102539,
|
|
"step": 1475
|
|
},
|
|
{
|
|
"epoch": 1.1658767772511849,
|
|
"grad_norm": 11.679296000479976,
|
|
"learning_rate": 7.674320734028193e-06,
|
|
"loss": 0.8130778074264526,
|
|
"step": 1476
|
|
},
|
|
{
|
|
"epoch": 1.1666666666666667,
|
|
"grad_norm": 14.423216823781184,
|
|
"learning_rate": 7.67043655748605e-06,
|
|
"loss": 1.6942588090896606,
|
|
"step": 1477
|
|
},
|
|
{
|
|
"epoch": 1.1674565560821484,
|
|
"grad_norm": 11.441884700380314,
|
|
"learning_rate": 7.66655012495031e-06,
|
|
"loss": 1.179840087890625,
|
|
"step": 1478
|
|
},
|
|
{
|
|
"epoch": 1.1682464454976302,
|
|
"grad_norm": 16.02771109914067,
|
|
"learning_rate": 7.662661439704241e-06,
|
|
"loss": 2.1548268795013428,
|
|
"step": 1479
|
|
},
|
|
{
|
|
"epoch": 1.169036334913112,
|
|
"grad_norm": 14.394873535724342,
|
|
"learning_rate": 7.658770505033022e-06,
|
|
"loss": 1.1767363548278809,
|
|
"step": 1480
|
|
},
|
|
{
|
|
"epoch": 1.169826224328594,
|
|
"grad_norm": 16.396153231459706,
|
|
"learning_rate": 7.654877324223723e-06,
|
|
"loss": 1.0473990440368652,
|
|
"step": 1481
|
|
},
|
|
{
|
|
"epoch": 1.1706161137440758,
|
|
"grad_norm": 36.207662154699634,
|
|
"learning_rate": 7.65098190056532e-06,
|
|
"loss": 1.4607884883880615,
|
|
"step": 1482
|
|
},
|
|
{
|
|
"epoch": 1.1714060031595577,
|
|
"grad_norm": 17.99136981853802,
|
|
"learning_rate": 7.647084237348676e-06,
|
|
"loss": 1.0910999774932861,
|
|
"step": 1483
|
|
},
|
|
{
|
|
"epoch": 1.1721958925750395,
|
|
"grad_norm": 17.53587070270741,
|
|
"learning_rate": 7.643184337866551e-06,
|
|
"loss": 1.9439430236816406,
|
|
"step": 1484
|
|
},
|
|
{
|
|
"epoch": 1.1729857819905214,
|
|
"grad_norm": 21.036905024705902,
|
|
"learning_rate": 7.639282205413595e-06,
|
|
"loss": 1.2512264251708984,
|
|
"step": 1485
|
|
},
|
|
{
|
|
"epoch": 1.1737756714060033,
|
|
"grad_norm": 14.69734973276274,
|
|
"learning_rate": 7.63537784328634e-06,
|
|
"loss": 1.736374855041504,
|
|
"step": 1486
|
|
},
|
|
{
|
|
"epoch": 1.174565560821485,
|
|
"grad_norm": 10.589187353935765,
|
|
"learning_rate": 7.631471254783205e-06,
|
|
"loss": 2.0629525184631348,
|
|
"step": 1487
|
|
},
|
|
{
|
|
"epoch": 1.1753554502369667,
|
|
"grad_norm": 12.298150667488217,
|
|
"learning_rate": 7.627562443204488e-06,
|
|
"loss": 1.75897216796875,
|
|
"step": 1488
|
|
},
|
|
{
|
|
"epoch": 1.1761453396524486,
|
|
"grad_norm": 11.144800996692458,
|
|
"learning_rate": 7.623651411852369e-06,
|
|
"loss": 1.8783044815063477,
|
|
"step": 1489
|
|
},
|
|
{
|
|
"epoch": 1.1769352290679305,
|
|
"grad_norm": 18.274971673090146,
|
|
"learning_rate": 7.619738164030894e-06,
|
|
"loss": 1.2765934467315674,
|
|
"step": 1490
|
|
},
|
|
{
|
|
"epoch": 1.1777251184834123,
|
|
"grad_norm": 13.609333078610245,
|
|
"learning_rate": 7.615822703045993e-06,
|
|
"loss": 0.9716304540634155,
|
|
"step": 1491
|
|
},
|
|
{
|
|
"epoch": 1.1785150078988942,
|
|
"grad_norm": 9.06953052854543,
|
|
"learning_rate": 7.611905032205459e-06,
|
|
"loss": 1.2346336841583252,
|
|
"step": 1492
|
|
},
|
|
{
|
|
"epoch": 1.179304897314376,
|
|
"grad_norm": 7.894635099442645,
|
|
"learning_rate": 7.607985154818951e-06,
|
|
"loss": 1.3905161619186401,
|
|
"step": 1493
|
|
},
|
|
{
|
|
"epoch": 1.180094786729858,
|
|
"grad_norm": 11.930990729479253,
|
|
"learning_rate": 7.6040630741979985e-06,
|
|
"loss": 1.5520501136779785,
|
|
"step": 1494
|
|
},
|
|
{
|
|
"epoch": 1.1808846761453395,
|
|
"grad_norm": 13.518366545034592,
|
|
"learning_rate": 7.6001387936559845e-06,
|
|
"loss": 1.478689193725586,
|
|
"step": 1495
|
|
},
|
|
{
|
|
"epoch": 1.1816745655608214,
|
|
"grad_norm": 13.55754756002125,
|
|
"learning_rate": 7.596212316508155e-06,
|
|
"loss": 1.195493221282959,
|
|
"step": 1496
|
|
},
|
|
{
|
|
"epoch": 1.1824644549763033,
|
|
"grad_norm": 9.14918248953396,
|
|
"learning_rate": 7.592283646071612e-06,
|
|
"loss": 1.777099609375,
|
|
"step": 1497
|
|
},
|
|
{
|
|
"epoch": 1.1832543443917851,
|
|
"grad_norm": 12.578589306666348,
|
|
"learning_rate": 7.588352785665307e-06,
|
|
"loss": 1.780318260192871,
|
|
"step": 1498
|
|
},
|
|
{
|
|
"epoch": 1.184044233807267,
|
|
"grad_norm": 16.331287548330316,
|
|
"learning_rate": 7.5844197386100475e-06,
|
|
"loss": 0.6511062383651733,
|
|
"step": 1499
|
|
},
|
|
{
|
|
"epoch": 1.1848341232227488,
|
|
"grad_norm": 16.33379318813503,
|
|
"learning_rate": 7.58048450822848e-06,
|
|
"loss": 1.3710471391677856,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 1.1856240126382307,
|
|
"grad_norm": 7.772998081101554,
|
|
"learning_rate": 7.576547097845105e-06,
|
|
"loss": 1.412032127380371,
|
|
"step": 1501
|
|
},
|
|
{
|
|
"epoch": 1.1864139020537126,
|
|
"grad_norm": 8.398419142031567,
|
|
"learning_rate": 7.572607510786256e-06,
|
|
"loss": 1.2059210538864136,
|
|
"step": 1502
|
|
},
|
|
{
|
|
"epoch": 1.1872037914691944,
|
|
"grad_norm": 9.901903204130111,
|
|
"learning_rate": 7.568665750380111e-06,
|
|
"loss": 1.7022660970687866,
|
|
"step": 1503
|
|
},
|
|
{
|
|
"epoch": 1.187993680884676,
|
|
"grad_norm": 19.304850784714255,
|
|
"learning_rate": 7.56472181995668e-06,
|
|
"loss": 0.9760904908180237,
|
|
"step": 1504
|
|
},
|
|
{
|
|
"epoch": 1.188783570300158,
|
|
"grad_norm": 7.87702786044823,
|
|
"learning_rate": 7.560775722847811e-06,
|
|
"loss": 1.2958579063415527,
|
|
"step": 1505
|
|
},
|
|
{
|
|
"epoch": 1.1895734597156398,
|
|
"grad_norm": 8.790058222382367,
|
|
"learning_rate": 7.556827462387179e-06,
|
|
"loss": 1.5401803255081177,
|
|
"step": 1506
|
|
},
|
|
{
|
|
"epoch": 1.1903633491311216,
|
|
"grad_norm": 10.161647102455948,
|
|
"learning_rate": 7.5528770419102845e-06,
|
|
"loss": 1.2612812519073486,
|
|
"step": 1507
|
|
},
|
|
{
|
|
"epoch": 1.1911532385466035,
|
|
"grad_norm": 8.100962154198587,
|
|
"learning_rate": 7.548924464754462e-06,
|
|
"loss": 1.4908281564712524,
|
|
"step": 1508
|
|
},
|
|
{
|
|
"epoch": 1.1919431279620853,
|
|
"grad_norm": 7.739278263317995,
|
|
"learning_rate": 7.544969734258856e-06,
|
|
"loss": 1.3893706798553467,
|
|
"step": 1509
|
|
},
|
|
{
|
|
"epoch": 1.1927330173775672,
|
|
"grad_norm": 9.236436622854567,
|
|
"learning_rate": 7.54101285376444e-06,
|
|
"loss": 1.428430438041687,
|
|
"step": 1510
|
|
},
|
|
{
|
|
"epoch": 1.193522906793049,
|
|
"grad_norm": 9.21444761020881,
|
|
"learning_rate": 7.537053826613995e-06,
|
|
"loss": 1.2131574153900146,
|
|
"step": 1511
|
|
},
|
|
{
|
|
"epoch": 1.1943127962085307,
|
|
"grad_norm": 12.77391408135766,
|
|
"learning_rate": 7.533092656152123e-06,
|
|
"loss": 1.4747178554534912,
|
|
"step": 1512
|
|
},
|
|
{
|
|
"epoch": 1.1951026856240126,
|
|
"grad_norm": 10.144190882358012,
|
|
"learning_rate": 7.529129345725234e-06,
|
|
"loss": 1.2598638534545898,
|
|
"step": 1513
|
|
},
|
|
{
|
|
"epoch": 1.1958925750394944,
|
|
"grad_norm": 11.632673780290576,
|
|
"learning_rate": 7.525163898681547e-06,
|
|
"loss": 2.219787120819092,
|
|
"step": 1514
|
|
},
|
|
{
|
|
"epoch": 1.1966824644549763,
|
|
"grad_norm": 11.100505677689917,
|
|
"learning_rate": 7.521196318371082e-06,
|
|
"loss": 1.492173433303833,
|
|
"step": 1515
|
|
},
|
|
{
|
|
"epoch": 1.1974723538704581,
|
|
"grad_norm": 13.348061220126512,
|
|
"learning_rate": 7.517226608145665e-06,
|
|
"loss": 1.3286212682724,
|
|
"step": 1516
|
|
},
|
|
{
|
|
"epoch": 1.19826224328594,
|
|
"grad_norm": 10.85425623441688,
|
|
"learning_rate": 7.51325477135892e-06,
|
|
"loss": 1.711487054824829,
|
|
"step": 1517
|
|
},
|
|
{
|
|
"epoch": 1.1990521327014219,
|
|
"grad_norm": 10.554910275401799,
|
|
"learning_rate": 7.509280811366268e-06,
|
|
"loss": 1.6877734661102295,
|
|
"step": 1518
|
|
},
|
|
{
|
|
"epoch": 1.1998420221169037,
|
|
"grad_norm": 9.907008317230424,
|
|
"learning_rate": 7.505304731524925e-06,
|
|
"loss": 1.5272674560546875,
|
|
"step": 1519
|
|
},
|
|
{
|
|
"epoch": 1.2006319115323856,
|
|
"grad_norm": 6.706994018715486,
|
|
"learning_rate": 7.501326535193893e-06,
|
|
"loss": 1.4311678409576416,
|
|
"step": 1520
|
|
},
|
|
{
|
|
"epoch": 1.2014218009478672,
|
|
"grad_norm": 10.997085955938047,
|
|
"learning_rate": 7.497346225733968e-06,
|
|
"loss": 2.118659734725952,
|
|
"step": 1521
|
|
},
|
|
{
|
|
"epoch": 1.202211690363349,
|
|
"grad_norm": 11.689892035649317,
|
|
"learning_rate": 7.493363806507727e-06,
|
|
"loss": 1.3729009628295898,
|
|
"step": 1522
|
|
},
|
|
{
|
|
"epoch": 1.203001579778831,
|
|
"grad_norm": 9.397482204646643,
|
|
"learning_rate": 7.489379280879532e-06,
|
|
"loss": 1.257511854171753,
|
|
"step": 1523
|
|
},
|
|
{
|
|
"epoch": 1.2037914691943128,
|
|
"grad_norm": 11.597065678347706,
|
|
"learning_rate": 7.485392652215524e-06,
|
|
"loss": 2.0744144916534424,
|
|
"step": 1524
|
|
},
|
|
{
|
|
"epoch": 1.2045813586097947,
|
|
"grad_norm": 9.18002114901742,
|
|
"learning_rate": 7.481403923883618e-06,
|
|
"loss": 2.4062094688415527,
|
|
"step": 1525
|
|
},
|
|
{
|
|
"epoch": 1.2053712480252765,
|
|
"grad_norm": 11.51200612045866,
|
|
"learning_rate": 7.477413099253507e-06,
|
|
"loss": 1.3634181022644043,
|
|
"step": 1526
|
|
},
|
|
{
|
|
"epoch": 1.2061611374407584,
|
|
"grad_norm": 9.886739815987795,
|
|
"learning_rate": 7.4734201816966504e-06,
|
|
"loss": 1.8432646989822388,
|
|
"step": 1527
|
|
},
|
|
{
|
|
"epoch": 1.20695102685624,
|
|
"grad_norm": 7.821912561074209,
|
|
"learning_rate": 7.46942517458628e-06,
|
|
"loss": 1.6155750751495361,
|
|
"step": 1528
|
|
},
|
|
{
|
|
"epoch": 1.2077409162717219,
|
|
"grad_norm": 208.73695380738994,
|
|
"learning_rate": 7.46542808129739e-06,
|
|
"loss": 2.2105181217193604,
|
|
"step": 1529
|
|
},
|
|
{
|
|
"epoch": 1.2085308056872037,
|
|
"grad_norm": 9.442612830841552,
|
|
"learning_rate": 7.461428905206738e-06,
|
|
"loss": 1.035463571548462,
|
|
"step": 1530
|
|
},
|
|
{
|
|
"epoch": 1.2093206951026856,
|
|
"grad_norm": 14.92511763923537,
|
|
"learning_rate": 7.457427649692842e-06,
|
|
"loss": 1.388323187828064,
|
|
"step": 1531
|
|
},
|
|
{
|
|
"epoch": 1.2101105845181674,
|
|
"grad_norm": 10.04534923313997,
|
|
"learning_rate": 7.453424318135976e-06,
|
|
"loss": 1.5381313562393188,
|
|
"step": 1532
|
|
},
|
|
{
|
|
"epoch": 1.2109004739336493,
|
|
"grad_norm": 10.045984329726831,
|
|
"learning_rate": 7.4494189139181664e-06,
|
|
"loss": 2.077467441558838,
|
|
"step": 1533
|
|
},
|
|
{
|
|
"epoch": 1.2116903633491312,
|
|
"grad_norm": 11.346686651643333,
|
|
"learning_rate": 7.445411440423191e-06,
|
|
"loss": 1.4641904830932617,
|
|
"step": 1534
|
|
},
|
|
{
|
|
"epoch": 1.212480252764613,
|
|
"grad_norm": 10.573921142627707,
|
|
"learning_rate": 7.4414019010365804e-06,
|
|
"loss": 1.4743311405181885,
|
|
"step": 1535
|
|
},
|
|
{
|
|
"epoch": 1.2132701421800949,
|
|
"grad_norm": 13.824451819029829,
|
|
"learning_rate": 7.437390299145602e-06,
|
|
"loss": 1.4657893180847168,
|
|
"step": 1536
|
|
},
|
|
{
|
|
"epoch": 1.2140600315955767,
|
|
"grad_norm": 7.208254750492127,
|
|
"learning_rate": 7.433376638139273e-06,
|
|
"loss": 1.7224773168563843,
|
|
"step": 1537
|
|
},
|
|
{
|
|
"epoch": 1.2148499210110584,
|
|
"grad_norm": 14.6258689437837,
|
|
"learning_rate": 7.429360921408348e-06,
|
|
"loss": 1.240898847579956,
|
|
"step": 1538
|
|
},
|
|
{
|
|
"epoch": 1.2156398104265402,
|
|
"grad_norm": 9.289403617691221,
|
|
"learning_rate": 7.4253431523453156e-06,
|
|
"loss": 1.5302990674972534,
|
|
"step": 1539
|
|
},
|
|
{
|
|
"epoch": 1.216429699842022,
|
|
"grad_norm": 15.992588174167665,
|
|
"learning_rate": 7.421323334344403e-06,
|
|
"loss": 1.2926700115203857,
|
|
"step": 1540
|
|
},
|
|
{
|
|
"epoch": 1.217219589257504,
|
|
"grad_norm": 13.29806452130727,
|
|
"learning_rate": 7.417301470801563e-06,
|
|
"loss": 1.858204960823059,
|
|
"step": 1541
|
|
},
|
|
{
|
|
"epoch": 1.2180094786729858,
|
|
"grad_norm": 10.981694927001664,
|
|
"learning_rate": 7.41327756511448e-06,
|
|
"loss": 1.6751494407653809,
|
|
"step": 1542
|
|
},
|
|
{
|
|
"epoch": 1.2187993680884677,
|
|
"grad_norm": 12.919094806936021,
|
|
"learning_rate": 7.409251620682565e-06,
|
|
"loss": 1.1227457523345947,
|
|
"step": 1543
|
|
},
|
|
{
|
|
"epoch": 1.2195892575039495,
|
|
"grad_norm": 12.822642145949752,
|
|
"learning_rate": 7.405223640906945e-06,
|
|
"loss": 1.8671314716339111,
|
|
"step": 1544
|
|
},
|
|
{
|
|
"epoch": 1.2203791469194312,
|
|
"grad_norm": 13.207524616649813,
|
|
"learning_rate": 7.401193629190475e-06,
|
|
"loss": 2.4761242866516113,
|
|
"step": 1545
|
|
},
|
|
{
|
|
"epoch": 1.221169036334913,
|
|
"grad_norm": 10.043425267983269,
|
|
"learning_rate": 7.397161588937718e-06,
|
|
"loss": 1.4061156511306763,
|
|
"step": 1546
|
|
},
|
|
{
|
|
"epoch": 1.2219589257503949,
|
|
"grad_norm": 10.369465986167071,
|
|
"learning_rate": 7.393127523554959e-06,
|
|
"loss": 1.6333423852920532,
|
|
"step": 1547
|
|
},
|
|
{
|
|
"epoch": 1.2227488151658767,
|
|
"grad_norm": 28.920248006785016,
|
|
"learning_rate": 7.389091436450185e-06,
|
|
"loss": 2.198690414428711,
|
|
"step": 1548
|
|
},
|
|
{
|
|
"epoch": 1.2235387045813586,
|
|
"grad_norm": 10.220829768400877,
|
|
"learning_rate": 7.3850533310330995e-06,
|
|
"loss": 1.6852078437805176,
|
|
"step": 1549
|
|
},
|
|
{
|
|
"epoch": 1.2243285939968405,
|
|
"grad_norm": 10.219543177913568,
|
|
"learning_rate": 7.381013210715106e-06,
|
|
"loss": 1.4084906578063965,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 1.2251184834123223,
|
|
"grad_norm": 11.198352523535156,
|
|
"learning_rate": 7.376971078909312e-06,
|
|
"loss": 1.3699990510940552,
|
|
"step": 1551
|
|
},
|
|
{
|
|
"epoch": 1.2259083728278042,
|
|
"grad_norm": 11.335053711356307,
|
|
"learning_rate": 7.3729269390305226e-06,
|
|
"loss": 1.7044817209243774,
|
|
"step": 1552
|
|
},
|
|
{
|
|
"epoch": 1.226698262243286,
|
|
"grad_norm": 12.010639946062005,
|
|
"learning_rate": 7.36888079449524e-06,
|
|
"loss": 1.949375033378601,
|
|
"step": 1553
|
|
},
|
|
{
|
|
"epoch": 1.2274881516587677,
|
|
"grad_norm": 8.245851384242593,
|
|
"learning_rate": 7.364832648721664e-06,
|
|
"loss": 1.9039802551269531,
|
|
"step": 1554
|
|
},
|
|
{
|
|
"epoch": 1.2282780410742495,
|
|
"grad_norm": 7.08878950015621,
|
|
"learning_rate": 7.360782505129675e-06,
|
|
"loss": 1.9304604530334473,
|
|
"step": 1555
|
|
},
|
|
{
|
|
"epoch": 1.2290679304897314,
|
|
"grad_norm": 15.151405491025901,
|
|
"learning_rate": 7.356730367140856e-06,
|
|
"loss": 1.5305366516113281,
|
|
"step": 1556
|
|
},
|
|
{
|
|
"epoch": 1.2298578199052133,
|
|
"grad_norm": 25.91795312714592,
|
|
"learning_rate": 7.35267623817846e-06,
|
|
"loss": 1.2220271825790405,
|
|
"step": 1557
|
|
},
|
|
{
|
|
"epoch": 1.2306477093206951,
|
|
"grad_norm": 15.027153220859233,
|
|
"learning_rate": 7.348620121667431e-06,
|
|
"loss": 1.7458312511444092,
|
|
"step": 1558
|
|
},
|
|
{
|
|
"epoch": 1.231437598736177,
|
|
"grad_norm": 11.869844226224354,
|
|
"learning_rate": 7.34456202103439e-06,
|
|
"loss": 1.1599485874176025,
|
|
"step": 1559
|
|
},
|
|
{
|
|
"epoch": 1.2322274881516588,
|
|
"grad_norm": 9.271449236873991,
|
|
"learning_rate": 7.3405019397076314e-06,
|
|
"loss": 1.1800360679626465,
|
|
"step": 1560
|
|
},
|
|
{
|
|
"epoch": 1.2330173775671407,
|
|
"grad_norm": 11.38399689838541,
|
|
"learning_rate": 7.336439881117128e-06,
|
|
"loss": 1.3739066123962402,
|
|
"step": 1561
|
|
},
|
|
{
|
|
"epoch": 1.2338072669826223,
|
|
"grad_norm": 10.444548453126936,
|
|
"learning_rate": 7.332375848694519e-06,
|
|
"loss": 1.8069045543670654,
|
|
"step": 1562
|
|
},
|
|
{
|
|
"epoch": 1.2345971563981042,
|
|
"grad_norm": 17.266827050977902,
|
|
"learning_rate": 7.328309845873112e-06,
|
|
"loss": 2.19236159324646,
|
|
"step": 1563
|
|
},
|
|
{
|
|
"epoch": 1.235387045813586,
|
|
"grad_norm": 13.591954894799105,
|
|
"learning_rate": 7.3242418760878805e-06,
|
|
"loss": 2.5699267387390137,
|
|
"step": 1564
|
|
},
|
|
{
|
|
"epoch": 1.236176935229068,
|
|
"grad_norm": 11.237452612969626,
|
|
"learning_rate": 7.320171942775458e-06,
|
|
"loss": 1.278207778930664,
|
|
"step": 1565
|
|
},
|
|
{
|
|
"epoch": 1.2369668246445498,
|
|
"grad_norm": 17.089988693890064,
|
|
"learning_rate": 7.316100049374139e-06,
|
|
"loss": 1.3304542303085327,
|
|
"step": 1566
|
|
},
|
|
{
|
|
"epoch": 1.2377567140600316,
|
|
"grad_norm": 11.92363646482957,
|
|
"learning_rate": 7.312026199323869e-06,
|
|
"loss": 1.8582803010940552,
|
|
"step": 1567
|
|
},
|
|
{
|
|
"epoch": 1.2385466034755135,
|
|
"grad_norm": 12.460514274935408,
|
|
"learning_rate": 7.307950396066257e-06,
|
|
"loss": 1.5322096347808838,
|
|
"step": 1568
|
|
},
|
|
{
|
|
"epoch": 1.2393364928909953,
|
|
"grad_norm": 12.477145420644185,
|
|
"learning_rate": 7.303872643044545e-06,
|
|
"loss": 1.9782072305679321,
|
|
"step": 1569
|
|
},
|
|
{
|
|
"epoch": 1.2401263823064772,
|
|
"grad_norm": 12.74235377003251,
|
|
"learning_rate": 7.299792943703642e-06,
|
|
"loss": 1.2845888137817383,
|
|
"step": 1570
|
|
},
|
|
{
|
|
"epoch": 1.2409162717219588,
|
|
"grad_norm": 10.673508111722988,
|
|
"learning_rate": 7.295711301490084e-06,
|
|
"loss": 1.6466004848480225,
|
|
"step": 1571
|
|
},
|
|
{
|
|
"epoch": 1.2417061611374407,
|
|
"grad_norm": 9.652022285685392,
|
|
"learning_rate": 7.291627719852059e-06,
|
|
"loss": 1.3372915983200073,
|
|
"step": 1572
|
|
},
|
|
{
|
|
"epoch": 1.2424960505529226,
|
|
"grad_norm": 9.596732477385675,
|
|
"learning_rate": 7.287542202239392e-06,
|
|
"loss": 1.8011666536331177,
|
|
"step": 1573
|
|
},
|
|
{
|
|
"epoch": 1.2432859399684044,
|
|
"grad_norm": 7.083810704363881,
|
|
"learning_rate": 7.283454752103538e-06,
|
|
"loss": 1.1540056467056274,
|
|
"step": 1574
|
|
},
|
|
{
|
|
"epoch": 1.2440758293838863,
|
|
"grad_norm": 9.393392719670336,
|
|
"learning_rate": 7.27936537289759e-06,
|
|
"loss": 1.677268385887146,
|
|
"step": 1575
|
|
},
|
|
{
|
|
"epoch": 1.2448657187993681,
|
|
"grad_norm": 17.733645365732354,
|
|
"learning_rate": 7.27527406807627e-06,
|
|
"loss": 0.8276841640472412,
|
|
"step": 1576
|
|
},
|
|
{
|
|
"epoch": 1.24565560821485,
|
|
"grad_norm": 13.067729880728594,
|
|
"learning_rate": 7.271180841095924e-06,
|
|
"loss": 1.3127690553665161,
|
|
"step": 1577
|
|
},
|
|
{
|
|
"epoch": 1.2464454976303316,
|
|
"grad_norm": 13.042413994956872,
|
|
"learning_rate": 7.267085695414525e-06,
|
|
"loss": 1.4197359085083008,
|
|
"step": 1578
|
|
},
|
|
{
|
|
"epoch": 1.2472353870458135,
|
|
"grad_norm": 10.928197034241155,
|
|
"learning_rate": 7.262988634491664e-06,
|
|
"loss": 1.261674165725708,
|
|
"step": 1579
|
|
},
|
|
{
|
|
"epoch": 1.2480252764612954,
|
|
"grad_norm": 10.758652573808499,
|
|
"learning_rate": 7.258889661788554e-06,
|
|
"loss": 1.5431902408599854,
|
|
"step": 1580
|
|
},
|
|
{
|
|
"epoch": 1.2488151658767772,
|
|
"grad_norm": 10.6083190072106,
|
|
"learning_rate": 7.254788780768018e-06,
|
|
"loss": 1.829581379890442,
|
|
"step": 1581
|
|
},
|
|
{
|
|
"epoch": 1.249605055292259,
|
|
"grad_norm": 13.989251634457949,
|
|
"learning_rate": 7.250685994894496e-06,
|
|
"loss": 1.342024564743042,
|
|
"step": 1582
|
|
},
|
|
{
|
|
"epoch": 1.250394944707741,
|
|
"grad_norm": 10.951019623163544,
|
|
"learning_rate": 7.246581307634033e-06,
|
|
"loss": 1.6949963569641113,
|
|
"step": 1583
|
|
},
|
|
{
|
|
"epoch": 1.2511848341232228,
|
|
"grad_norm": 14.242065090086145,
|
|
"learning_rate": 7.242474722454286e-06,
|
|
"loss": 1.4241949319839478,
|
|
"step": 1584
|
|
},
|
|
{
|
|
"epoch": 1.2519747235387046,
|
|
"grad_norm": 12.522283827649254,
|
|
"learning_rate": 7.238366242824506e-06,
|
|
"loss": 2.3611106872558594,
|
|
"step": 1585
|
|
},
|
|
{
|
|
"epoch": 1.2527646129541865,
|
|
"grad_norm": 10.607084561048083,
|
|
"learning_rate": 7.234255872215554e-06,
|
|
"loss": 1.5256626605987549,
|
|
"step": 1586
|
|
},
|
|
{
|
|
"epoch": 1.2535545023696684,
|
|
"grad_norm": 7.96307207456326,
|
|
"learning_rate": 7.230143614099885e-06,
|
|
"loss": 1.2228140830993652,
|
|
"step": 1587
|
|
},
|
|
{
|
|
"epoch": 1.25434439178515,
|
|
"grad_norm": 9.008816824433687,
|
|
"learning_rate": 7.226029471951545e-06,
|
|
"loss": 1.4289093017578125,
|
|
"step": 1588
|
|
},
|
|
{
|
|
"epoch": 1.2551342812006319,
|
|
"grad_norm": 20.920359812194434,
|
|
"learning_rate": 7.221913449246178e-06,
|
|
"loss": 1.3880183696746826,
|
|
"step": 1589
|
|
},
|
|
{
|
|
"epoch": 1.2559241706161137,
|
|
"grad_norm": 15.238445561586087,
|
|
"learning_rate": 7.217795549461012e-06,
|
|
"loss": 1.5309922695159912,
|
|
"step": 1590
|
|
},
|
|
{
|
|
"epoch": 1.2567140600315956,
|
|
"grad_norm": 16.4951650860319,
|
|
"learning_rate": 7.213675776074862e-06,
|
|
"loss": 1.835852026939392,
|
|
"step": 1591
|
|
},
|
|
{
|
|
"epoch": 1.2575039494470774,
|
|
"grad_norm": 8.574885331151954,
|
|
"learning_rate": 7.209554132568128e-06,
|
|
"loss": 0.49211519956588745,
|
|
"step": 1592
|
|
},
|
|
{
|
|
"epoch": 1.2582938388625593,
|
|
"grad_norm": 19.73342294585578,
|
|
"learning_rate": 7.205430622422786e-06,
|
|
"loss": 1.538137435913086,
|
|
"step": 1593
|
|
},
|
|
{
|
|
"epoch": 1.259083728278041,
|
|
"grad_norm": 18.023327663051525,
|
|
"learning_rate": 7.201305249122392e-06,
|
|
"loss": 1.457167148590088,
|
|
"step": 1594
|
|
},
|
|
{
|
|
"epoch": 1.2598736176935228,
|
|
"grad_norm": 12.076397086371236,
|
|
"learning_rate": 7.197178016152076e-06,
|
|
"loss": 1.1796395778656006,
|
|
"step": 1595
|
|
},
|
|
{
|
|
"epoch": 1.2606635071090047,
|
|
"grad_norm": 10.685605988239777,
|
|
"learning_rate": 7.193048926998535e-06,
|
|
"loss": 1.7177990674972534,
|
|
"step": 1596
|
|
},
|
|
{
|
|
"epoch": 1.2614533965244865,
|
|
"grad_norm": 11.711401184144716,
|
|
"learning_rate": 7.188917985150038e-06,
|
|
"loss": 1.2435717582702637,
|
|
"step": 1597
|
|
},
|
|
{
|
|
"epoch": 1.2622432859399684,
|
|
"grad_norm": 11.385740810589741,
|
|
"learning_rate": 7.184785194096421e-06,
|
|
"loss": 1.6954989433288574,
|
|
"step": 1598
|
|
},
|
|
{
|
|
"epoch": 1.2630331753554502,
|
|
"grad_norm": 11.48602327882758,
|
|
"learning_rate": 7.180650557329076e-06,
|
|
"loss": 1.3193989992141724,
|
|
"step": 1599
|
|
},
|
|
{
|
|
"epoch": 1.263823064770932,
|
|
"grad_norm": 12.906600512802692,
|
|
"learning_rate": 7.1765140783409615e-06,
|
|
"loss": 1.6648807525634766,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 1.264612954186414,
|
|
"grad_norm": 10.918513023394707,
|
|
"learning_rate": 7.1723757606265845e-06,
|
|
"loss": 1.1041438579559326,
|
|
"step": 1601
|
|
},
|
|
{
|
|
"epoch": 1.2654028436018958,
|
|
"grad_norm": 10.843029403978079,
|
|
"learning_rate": 7.16823560768201e-06,
|
|
"loss": 1.7871309518814087,
|
|
"step": 1602
|
|
},
|
|
{
|
|
"epoch": 1.2661927330173777,
|
|
"grad_norm": 19.286859146429332,
|
|
"learning_rate": 7.164093623004858e-06,
|
|
"loss": 2.269460439682007,
|
|
"step": 1603
|
|
},
|
|
{
|
|
"epoch": 1.2669826224328595,
|
|
"grad_norm": 15.634640920458306,
|
|
"learning_rate": 7.159949810094283e-06,
|
|
"loss": 1.2490503787994385,
|
|
"step": 1604
|
|
},
|
|
{
|
|
"epoch": 1.2677725118483412,
|
|
"grad_norm": 9.491676824429073,
|
|
"learning_rate": 7.155804172450999e-06,
|
|
"loss": 1.3973164558410645,
|
|
"step": 1605
|
|
},
|
|
{
|
|
"epoch": 1.268562401263823,
|
|
"grad_norm": 8.966724738021117,
|
|
"learning_rate": 7.151656713577248e-06,
|
|
"loss": 1.924464225769043,
|
|
"step": 1606
|
|
},
|
|
{
|
|
"epoch": 1.2693522906793049,
|
|
"grad_norm": 10.340849066157938,
|
|
"learning_rate": 7.147507436976823e-06,
|
|
"loss": 0.9530832171440125,
|
|
"step": 1607
|
|
},
|
|
{
|
|
"epoch": 1.2701421800947867,
|
|
"grad_norm": 8.707473817662407,
|
|
"learning_rate": 7.143356346155042e-06,
|
|
"loss": 1.555940866470337,
|
|
"step": 1608
|
|
},
|
|
{
|
|
"epoch": 1.2709320695102686,
|
|
"grad_norm": 10.219855234886426,
|
|
"learning_rate": 7.139203444618762e-06,
|
|
"loss": 1.6086714267730713,
|
|
"step": 1609
|
|
},
|
|
{
|
|
"epoch": 1.2717219589257505,
|
|
"grad_norm": 8.944536325339534,
|
|
"learning_rate": 7.135048735876368e-06,
|
|
"loss": 1.2452785968780518,
|
|
"step": 1610
|
|
},
|
|
{
|
|
"epoch": 1.272511848341232,
|
|
"grad_norm": 12.328677473799477,
|
|
"learning_rate": 7.130892223437771e-06,
|
|
"loss": 2.04856014251709,
|
|
"step": 1611
|
|
},
|
|
{
|
|
"epoch": 1.273301737756714,
|
|
"grad_norm": 12.463645792972384,
|
|
"learning_rate": 7.126733910814406e-06,
|
|
"loss": 1.6133408546447754,
|
|
"step": 1612
|
|
},
|
|
{
|
|
"epoch": 1.2740916271721958,
|
|
"grad_norm": 19.208477820593387,
|
|
"learning_rate": 7.122573801519229e-06,
|
|
"loss": 1.7257981300354004,
|
|
"step": 1613
|
|
},
|
|
{
|
|
"epoch": 1.2748815165876777,
|
|
"grad_norm": 10.648285597019955,
|
|
"learning_rate": 7.118411899066717e-06,
|
|
"loss": 1.6325335502624512,
|
|
"step": 1614
|
|
},
|
|
{
|
|
"epoch": 1.2756714060031595,
|
|
"grad_norm": 8.899110923088042,
|
|
"learning_rate": 7.114248206972856e-06,
|
|
"loss": 2.037642478942871,
|
|
"step": 1615
|
|
},
|
|
{
|
|
"epoch": 1.2764612954186414,
|
|
"grad_norm": 11.20674724929018,
|
|
"learning_rate": 7.110082728755147e-06,
|
|
"loss": 1.7042773962020874,
|
|
"step": 1616
|
|
},
|
|
{
|
|
"epoch": 1.2772511848341233,
|
|
"grad_norm": 18.487835882110616,
|
|
"learning_rate": 7.105915467932601e-06,
|
|
"loss": 1.3538520336151123,
|
|
"step": 1617
|
|
},
|
|
{
|
|
"epoch": 1.2780410742496051,
|
|
"grad_norm": 7.198026203991742,
|
|
"learning_rate": 7.101746428025732e-06,
|
|
"loss": 1.396047592163086,
|
|
"step": 1618
|
|
},
|
|
{
|
|
"epoch": 1.278830963665087,
|
|
"grad_norm": 13.95821454839482,
|
|
"learning_rate": 7.097575612556561e-06,
|
|
"loss": 1.2027523517608643,
|
|
"step": 1619
|
|
},
|
|
{
|
|
"epoch": 1.2796208530805688,
|
|
"grad_norm": 11.242118092533596,
|
|
"learning_rate": 7.0934030250486044e-06,
|
|
"loss": 2.4239554405212402,
|
|
"step": 1620
|
|
},
|
|
{
|
|
"epoch": 1.2804107424960507,
|
|
"grad_norm": 11.487912451476449,
|
|
"learning_rate": 7.089228669026878e-06,
|
|
"loss": 1.6502265930175781,
|
|
"step": 1621
|
|
},
|
|
{
|
|
"epoch": 1.2812006319115323,
|
|
"grad_norm": 13.921903791621322,
|
|
"learning_rate": 7.085052548017893e-06,
|
|
"loss": 2.114908456802368,
|
|
"step": 1622
|
|
},
|
|
{
|
|
"epoch": 1.2819905213270142,
|
|
"grad_norm": 7.371765331620391,
|
|
"learning_rate": 7.08087466554965e-06,
|
|
"loss": 1.7077139616012573,
|
|
"step": 1623
|
|
},
|
|
{
|
|
"epoch": 1.282780410742496,
|
|
"grad_norm": 16.302760837877532,
|
|
"learning_rate": 7.076695025151636e-06,
|
|
"loss": 1.2382946014404297,
|
|
"step": 1624
|
|
},
|
|
{
|
|
"epoch": 1.283570300157978,
|
|
"grad_norm": 13.124566165615319,
|
|
"learning_rate": 7.072513630354827e-06,
|
|
"loss": 2.201374053955078,
|
|
"step": 1625
|
|
},
|
|
{
|
|
"epoch": 1.2843601895734598,
|
|
"grad_norm": 7.528109529798553,
|
|
"learning_rate": 7.068330484691679e-06,
|
|
"loss": 1.3300297260284424,
|
|
"step": 1626
|
|
},
|
|
{
|
|
"epoch": 1.2851500789889416,
|
|
"grad_norm": 12.041483320882802,
|
|
"learning_rate": 7.0641455916961244e-06,
|
|
"loss": 0.9577473402023315,
|
|
"step": 1627
|
|
},
|
|
{
|
|
"epoch": 1.2859399684044233,
|
|
"grad_norm": 8.602253519605528,
|
|
"learning_rate": 7.0599589549035785e-06,
|
|
"loss": 1.0266809463500977,
|
|
"step": 1628
|
|
},
|
|
{
|
|
"epoch": 1.2867298578199051,
|
|
"grad_norm": 11.151691436114625,
|
|
"learning_rate": 7.055770577850925e-06,
|
|
"loss": 2.103332757949829,
|
|
"step": 1629
|
|
},
|
|
{
|
|
"epoch": 1.287519747235387,
|
|
"grad_norm": 12.866943913296561,
|
|
"learning_rate": 7.051580464076515e-06,
|
|
"loss": 1.1621947288513184,
|
|
"step": 1630
|
|
},
|
|
{
|
|
"epoch": 1.2883096366508688,
|
|
"grad_norm": 13.134566302958063,
|
|
"learning_rate": 7.047388617120175e-06,
|
|
"loss": 2.0336053371429443,
|
|
"step": 1631
|
|
},
|
|
{
|
|
"epoch": 1.2890995260663507,
|
|
"grad_norm": 18.298472533368262,
|
|
"learning_rate": 7.043195040523187e-06,
|
|
"loss": 1.6731476783752441,
|
|
"step": 1632
|
|
},
|
|
{
|
|
"epoch": 1.2898894154818326,
|
|
"grad_norm": 11.064624121392312,
|
|
"learning_rate": 7.0389997378283034e-06,
|
|
"loss": 1.4558773040771484,
|
|
"step": 1633
|
|
},
|
|
{
|
|
"epoch": 1.2906793048973144,
|
|
"grad_norm": 12.035862755722269,
|
|
"learning_rate": 7.034802712579725e-06,
|
|
"loss": 1.4987773895263672,
|
|
"step": 1634
|
|
},
|
|
{
|
|
"epoch": 1.2914691943127963,
|
|
"grad_norm": 11.000347165868366,
|
|
"learning_rate": 7.030603968323115e-06,
|
|
"loss": 1.652524471282959,
|
|
"step": 1635
|
|
},
|
|
{
|
|
"epoch": 1.2922590837282781,
|
|
"grad_norm": 11.920424614729273,
|
|
"learning_rate": 7.026403508605586e-06,
|
|
"loss": 1.199878215789795,
|
|
"step": 1636
|
|
},
|
|
{
|
|
"epoch": 1.29304897314376,
|
|
"grad_norm": 12.250643358349953,
|
|
"learning_rate": 7.022201336975701e-06,
|
|
"loss": 1.563902735710144,
|
|
"step": 1637
|
|
},
|
|
{
|
|
"epoch": 1.2938388625592416,
|
|
"grad_norm": 10.264821730924035,
|
|
"learning_rate": 7.017997456983465e-06,
|
|
"loss": 1.4691765308380127,
|
|
"step": 1638
|
|
},
|
|
{
|
|
"epoch": 1.2946287519747235,
|
|
"grad_norm": 13.243547862779414,
|
|
"learning_rate": 7.013791872180333e-06,
|
|
"loss": 2.1046786308288574,
|
|
"step": 1639
|
|
},
|
|
{
|
|
"epoch": 1.2954186413902053,
|
|
"grad_norm": 7.356657896392653,
|
|
"learning_rate": 7.009584586119198e-06,
|
|
"loss": 1.6833205223083496,
|
|
"step": 1640
|
|
},
|
|
{
|
|
"epoch": 1.2962085308056872,
|
|
"grad_norm": 7.547893143985833,
|
|
"learning_rate": 7.005375602354385e-06,
|
|
"loss": 1.406240463256836,
|
|
"step": 1641
|
|
},
|
|
{
|
|
"epoch": 1.296998420221169,
|
|
"grad_norm": 10.735447099759265,
|
|
"learning_rate": 7.00116492444166e-06,
|
|
"loss": 1.8073036670684814,
|
|
"step": 1642
|
|
},
|
|
{
|
|
"epoch": 1.297788309636651,
|
|
"grad_norm": 10.170543223857521,
|
|
"learning_rate": 6.996952555938217e-06,
|
|
"loss": 1.3641012907028198,
|
|
"step": 1643
|
|
},
|
|
{
|
|
"epoch": 1.2985781990521326,
|
|
"grad_norm": 6.261340458786887,
|
|
"learning_rate": 6.992738500402679e-06,
|
|
"loss": 1.5656462907791138,
|
|
"step": 1644
|
|
},
|
|
{
|
|
"epoch": 1.2993680884676144,
|
|
"grad_norm": 12.61218163311875,
|
|
"learning_rate": 6.988522761395093e-06,
|
|
"loss": 1.4655276536941528,
|
|
"step": 1645
|
|
},
|
|
{
|
|
"epoch": 1.3001579778830963,
|
|
"grad_norm": 9.829462165844339,
|
|
"learning_rate": 6.984305342476931e-06,
|
|
"loss": 1.841139554977417,
|
|
"step": 1646
|
|
},
|
|
{
|
|
"epoch": 1.3009478672985781,
|
|
"grad_norm": 10.428718423453438,
|
|
"learning_rate": 6.980086247211082e-06,
|
|
"loss": 1.9564460515975952,
|
|
"step": 1647
|
|
},
|
|
{
|
|
"epoch": 1.30173775671406,
|
|
"grad_norm": 7.749615781587074,
|
|
"learning_rate": 6.975865479161849e-06,
|
|
"loss": 1.387686848640442,
|
|
"step": 1648
|
|
},
|
|
{
|
|
"epoch": 1.3025276461295419,
|
|
"grad_norm": 9.844486088428916,
|
|
"learning_rate": 6.971643041894953e-06,
|
|
"loss": 0.7119489312171936,
|
|
"step": 1649
|
|
},
|
|
{
|
|
"epoch": 1.3033175355450237,
|
|
"grad_norm": 8.565082994108224,
|
|
"learning_rate": 6.967418938977524e-06,
|
|
"loss": 1.407379150390625,
|
|
"step": 1650
|
|
},
|
|
{
|
|
"epoch": 1.3041074249605056,
|
|
"grad_norm": 9.682368107994689,
|
|
"learning_rate": 6.963193173978095e-06,
|
|
"loss": 1.277189016342163,
|
|
"step": 1651
|
|
},
|
|
{
|
|
"epoch": 1.3048973143759874,
|
|
"grad_norm": 13.513336697960028,
|
|
"learning_rate": 6.9589657504666095e-06,
|
|
"loss": 1.2872400283813477,
|
|
"step": 1652
|
|
},
|
|
{
|
|
"epoch": 1.3056872037914693,
|
|
"grad_norm": 20.735938043035457,
|
|
"learning_rate": 6.954736672014406e-06,
|
|
"loss": 2.1411285400390625,
|
|
"step": 1653
|
|
},
|
|
{
|
|
"epoch": 1.3064770932069512,
|
|
"grad_norm": 13.154183276627684,
|
|
"learning_rate": 6.950505942194226e-06,
|
|
"loss": 1.1476898193359375,
|
|
"step": 1654
|
|
},
|
|
{
|
|
"epoch": 1.3072669826224328,
|
|
"grad_norm": 22.670571761450503,
|
|
"learning_rate": 6.946273564580202e-06,
|
|
"loss": 1.3200886249542236,
|
|
"step": 1655
|
|
},
|
|
{
|
|
"epoch": 1.3080568720379147,
|
|
"grad_norm": 13.376955794870272,
|
|
"learning_rate": 6.942039542747863e-06,
|
|
"loss": 1.7086883783340454,
|
|
"step": 1656
|
|
},
|
|
{
|
|
"epoch": 1.3088467614533965,
|
|
"grad_norm": 6.135115256463472,
|
|
"learning_rate": 6.937803880274122e-06,
|
|
"loss": 1.4040107727050781,
|
|
"step": 1657
|
|
},
|
|
{
|
|
"epoch": 1.3096366508688784,
|
|
"grad_norm": 10.847847813759419,
|
|
"learning_rate": 6.933566580737282e-06,
|
|
"loss": 1.342395544052124,
|
|
"step": 1658
|
|
},
|
|
{
|
|
"epoch": 1.3104265402843602,
|
|
"grad_norm": 9.846488420083572,
|
|
"learning_rate": 6.929327647717028e-06,
|
|
"loss": 1.7640855312347412,
|
|
"step": 1659
|
|
},
|
|
{
|
|
"epoch": 1.311216429699842,
|
|
"grad_norm": 17.579714964873293,
|
|
"learning_rate": 6.925087084794422e-06,
|
|
"loss": 1.837497353553772,
|
|
"step": 1660
|
|
},
|
|
{
|
|
"epoch": 1.3120063191153237,
|
|
"grad_norm": 13.308781650389735,
|
|
"learning_rate": 6.92084489555191e-06,
|
|
"loss": 1.5832990407943726,
|
|
"step": 1661
|
|
},
|
|
{
|
|
"epoch": 1.3127962085308056,
|
|
"grad_norm": 15.704664883204273,
|
|
"learning_rate": 6.916601083573302e-06,
|
|
"loss": 2.004098653793335,
|
|
"step": 1662
|
|
},
|
|
{
|
|
"epoch": 1.3135860979462874,
|
|
"grad_norm": 8.685136767072184,
|
|
"learning_rate": 6.912355652443791e-06,
|
|
"loss": 1.037060260772705,
|
|
"step": 1663
|
|
},
|
|
{
|
|
"epoch": 1.3143759873617693,
|
|
"grad_norm": 10.120809843785937,
|
|
"learning_rate": 6.908108605749925e-06,
|
|
"loss": 1.7781083583831787,
|
|
"step": 1664
|
|
},
|
|
{
|
|
"epoch": 1.3151658767772512,
|
|
"grad_norm": 8.220963621405001,
|
|
"learning_rate": 6.903859947079625e-06,
|
|
"loss": 1.7696709632873535,
|
|
"step": 1665
|
|
},
|
|
{
|
|
"epoch": 1.315955766192733,
|
|
"grad_norm": 10.606659026468149,
|
|
"learning_rate": 6.899609680022175e-06,
|
|
"loss": 1.7073678970336914,
|
|
"step": 1666
|
|
},
|
|
{
|
|
"epoch": 1.3167456556082149,
|
|
"grad_norm": 10.40542278789583,
|
|
"learning_rate": 6.895357808168209e-06,
|
|
"loss": 1.32874596118927,
|
|
"step": 1667
|
|
},
|
|
{
|
|
"epoch": 1.3175355450236967,
|
|
"grad_norm": 14.900627896368178,
|
|
"learning_rate": 6.8911043351097265e-06,
|
|
"loss": 1.4731537103652954,
|
|
"step": 1668
|
|
},
|
|
{
|
|
"epoch": 1.3183254344391786,
|
|
"grad_norm": 14.289865839749087,
|
|
"learning_rate": 6.886849264440074e-06,
|
|
"loss": 2.3647401332855225,
|
|
"step": 1669
|
|
},
|
|
{
|
|
"epoch": 1.3191153238546605,
|
|
"grad_norm": 10.096520767718063,
|
|
"learning_rate": 6.88259259975395e-06,
|
|
"loss": 1.7499630451202393,
|
|
"step": 1670
|
|
},
|
|
{
|
|
"epoch": 1.319905213270142,
|
|
"grad_norm": 12.58275455787123,
|
|
"learning_rate": 6.878334344647399e-06,
|
|
"loss": 1.4793438911437988,
|
|
"step": 1671
|
|
},
|
|
{
|
|
"epoch": 1.320695102685624,
|
|
"grad_norm": 9.213580765946672,
|
|
"learning_rate": 6.874074502717807e-06,
|
|
"loss": 1.533569574356079,
|
|
"step": 1672
|
|
},
|
|
{
|
|
"epoch": 1.3214849921011058,
|
|
"grad_norm": 11.233518853535507,
|
|
"learning_rate": 6.869813077563905e-06,
|
|
"loss": 1.5673787593841553,
|
|
"step": 1673
|
|
},
|
|
{
|
|
"epoch": 1.3222748815165877,
|
|
"grad_norm": 11.5105493592369,
|
|
"learning_rate": 6.865550072785757e-06,
|
|
"loss": 1.8369773626327515,
|
|
"step": 1674
|
|
},
|
|
{
|
|
"epoch": 1.3230647709320695,
|
|
"grad_norm": 14.469541616554176,
|
|
"learning_rate": 6.861285491984764e-06,
|
|
"loss": 1.5419373512268066,
|
|
"step": 1675
|
|
},
|
|
{
|
|
"epoch": 1.3238546603475514,
|
|
"grad_norm": 10.063880181802128,
|
|
"learning_rate": 6.857019338763657e-06,
|
|
"loss": 1.5308325290679932,
|
|
"step": 1676
|
|
},
|
|
{
|
|
"epoch": 1.3246445497630333,
|
|
"grad_norm": 10.589111964405951,
|
|
"learning_rate": 6.852751616726501e-06,
|
|
"loss": 1.3474477529525757,
|
|
"step": 1677
|
|
},
|
|
{
|
|
"epoch": 1.325434439178515,
|
|
"grad_norm": 16.81222045065148,
|
|
"learning_rate": 6.848482329478675e-06,
|
|
"loss": 2.032487630844116,
|
|
"step": 1678
|
|
},
|
|
{
|
|
"epoch": 1.3262243285939967,
|
|
"grad_norm": 11.866053591597979,
|
|
"learning_rate": 6.844211480626892e-06,
|
|
"loss": 1.1282556056976318,
|
|
"step": 1679
|
|
},
|
|
{
|
|
"epoch": 1.3270142180094786,
|
|
"grad_norm": 9.019099813424168,
|
|
"learning_rate": 6.839939073779177e-06,
|
|
"loss": 1.6074413061141968,
|
|
"step": 1680
|
|
},
|
|
{
|
|
"epoch": 1.3278041074249605,
|
|
"grad_norm": 8.783590478924218,
|
|
"learning_rate": 6.835665112544871e-06,
|
|
"loss": 1.224888801574707,
|
|
"step": 1681
|
|
},
|
|
{
|
|
"epoch": 1.3285939968404423,
|
|
"grad_norm": 9.151154533135232,
|
|
"learning_rate": 6.831389600534637e-06,
|
|
"loss": 1.2797878980636597,
|
|
"step": 1682
|
|
},
|
|
{
|
|
"epoch": 1.3293838862559242,
|
|
"grad_norm": 15.249803779803319,
|
|
"learning_rate": 6.8271125413604344e-06,
|
|
"loss": 1.8004652261734009,
|
|
"step": 1683
|
|
},
|
|
{
|
|
"epoch": 1.330173775671406,
|
|
"grad_norm": 22.765578093916663,
|
|
"learning_rate": 6.822833938635543e-06,
|
|
"loss": 1.466485619544983,
|
|
"step": 1684
|
|
},
|
|
{
|
|
"epoch": 1.330963665086888,
|
|
"grad_norm": 9.461043264779885,
|
|
"learning_rate": 6.818553795974536e-06,
|
|
"loss": 2.0474472045898438,
|
|
"step": 1685
|
|
},
|
|
{
|
|
"epoch": 1.3317535545023698,
|
|
"grad_norm": 12.50340732157009,
|
|
"learning_rate": 6.814272116993294e-06,
|
|
"loss": 2.1818690299987793,
|
|
"step": 1686
|
|
},
|
|
{
|
|
"epoch": 1.3325434439178516,
|
|
"grad_norm": 19.829927714642757,
|
|
"learning_rate": 6.809988905308993e-06,
|
|
"loss": 2.1175107955932617,
|
|
"step": 1687
|
|
},
|
|
{
|
|
"epoch": 1.3333333333333333,
|
|
"grad_norm": 10.675607140843853,
|
|
"learning_rate": 6.805704164540105e-06,
|
|
"loss": 1.130906343460083,
|
|
"step": 1688
|
|
},
|
|
{
|
|
"epoch": 1.3341232227488151,
|
|
"grad_norm": 10.39793547659673,
|
|
"learning_rate": 6.8014178983063914e-06,
|
|
"loss": 1.098733901977539,
|
|
"step": 1689
|
|
},
|
|
{
|
|
"epoch": 1.334913112164297,
|
|
"grad_norm": 10.503557720518131,
|
|
"learning_rate": 6.7971301102289054e-06,
|
|
"loss": 1.2792344093322754,
|
|
"step": 1690
|
|
},
|
|
{
|
|
"epoch": 1.3357030015797788,
|
|
"grad_norm": 10.340351227260813,
|
|
"learning_rate": 6.792840803929987e-06,
|
|
"loss": 1.5470017194747925,
|
|
"step": 1691
|
|
},
|
|
{
|
|
"epoch": 1.3364928909952607,
|
|
"grad_norm": 9.882170112299617,
|
|
"learning_rate": 6.78854998303325e-06,
|
|
"loss": 1.1156797409057617,
|
|
"step": 1692
|
|
},
|
|
{
|
|
"epoch": 1.3372827804107426,
|
|
"grad_norm": 10.827400927376479,
|
|
"learning_rate": 6.7842576511636e-06,
|
|
"loss": 1.2364952564239502,
|
|
"step": 1693
|
|
},
|
|
{
|
|
"epoch": 1.3380726698262242,
|
|
"grad_norm": 12.60283058617565,
|
|
"learning_rate": 6.779963811947211e-06,
|
|
"loss": 1.4180421829223633,
|
|
"step": 1694
|
|
},
|
|
{
|
|
"epoch": 1.338862559241706,
|
|
"grad_norm": 12.037561798280398,
|
|
"learning_rate": 6.775668469011531e-06,
|
|
"loss": 1.811230182647705,
|
|
"step": 1695
|
|
},
|
|
{
|
|
"epoch": 1.339652448657188,
|
|
"grad_norm": 14.317321439652712,
|
|
"learning_rate": 6.771371625985282e-06,
|
|
"loss": 1.5977898836135864,
|
|
"step": 1696
|
|
},
|
|
{
|
|
"epoch": 1.3404423380726698,
|
|
"grad_norm": 12.550990602775505,
|
|
"learning_rate": 6.767073286498449e-06,
|
|
"loss": 1.4557725191116333,
|
|
"step": 1697
|
|
},
|
|
{
|
|
"epoch": 1.3412322274881516,
|
|
"grad_norm": 9.595105877395659,
|
|
"learning_rate": 6.762773454182285e-06,
|
|
"loss": 1.620849609375,
|
|
"step": 1698
|
|
},
|
|
{
|
|
"epoch": 1.3420221169036335,
|
|
"grad_norm": 26.931211154992397,
|
|
"learning_rate": 6.7584721326693024e-06,
|
|
"loss": 1.621090292930603,
|
|
"step": 1699
|
|
},
|
|
{
|
|
"epoch": 1.3428120063191153,
|
|
"grad_norm": 18.2341302921318,
|
|
"learning_rate": 6.754169325593273e-06,
|
|
"loss": 1.1882051229476929,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 1.3436018957345972,
|
|
"grad_norm": 13.079885370924936,
|
|
"learning_rate": 6.749865036589219e-06,
|
|
"loss": 1.9173457622528076,
|
|
"step": 1701
|
|
},
|
|
{
|
|
"epoch": 1.344391785150079,
|
|
"grad_norm": 10.73628142485319,
|
|
"learning_rate": 6.7455592692934205e-06,
|
|
"loss": 1.4644713401794434,
|
|
"step": 1702
|
|
},
|
|
{
|
|
"epoch": 1.345181674565561,
|
|
"grad_norm": 13.507997760455746,
|
|
"learning_rate": 6.741252027343403e-06,
|
|
"loss": 1.3514494895935059,
|
|
"step": 1703
|
|
},
|
|
{
|
|
"epoch": 1.3459715639810428,
|
|
"grad_norm": 9.434097289624326,
|
|
"learning_rate": 6.736943314377939e-06,
|
|
"loss": 1.4577107429504395,
|
|
"step": 1704
|
|
},
|
|
{
|
|
"epoch": 1.3467614533965244,
|
|
"grad_norm": 12.60917651464132,
|
|
"learning_rate": 6.732633134037043e-06,
|
|
"loss": 1.3438491821289062,
|
|
"step": 1705
|
|
},
|
|
{
|
|
"epoch": 1.3475513428120063,
|
|
"grad_norm": 34.45628221491453,
|
|
"learning_rate": 6.7283214899619685e-06,
|
|
"loss": 1.6202247142791748,
|
|
"step": 1706
|
|
},
|
|
{
|
|
"epoch": 1.3483412322274881,
|
|
"grad_norm": 11.815572511405222,
|
|
"learning_rate": 6.724008385795211e-06,
|
|
"loss": 1.8743906021118164,
|
|
"step": 1707
|
|
},
|
|
{
|
|
"epoch": 1.34913112164297,
|
|
"grad_norm": 14.955379827242334,
|
|
"learning_rate": 6.719693825180491e-06,
|
|
"loss": 1.5417712926864624,
|
|
"step": 1708
|
|
},
|
|
{
|
|
"epoch": 1.3499210110584519,
|
|
"grad_norm": 13.982695999884012,
|
|
"learning_rate": 6.7153778117627635e-06,
|
|
"loss": 1.5238138437271118,
|
|
"step": 1709
|
|
},
|
|
{
|
|
"epoch": 1.3507109004739337,
|
|
"grad_norm": 13.558773627084195,
|
|
"learning_rate": 6.711060349188213e-06,
|
|
"loss": 1.0729316473007202,
|
|
"step": 1710
|
|
},
|
|
{
|
|
"epoch": 1.3515007898894154,
|
|
"grad_norm": 15.460928981412394,
|
|
"learning_rate": 6.706741441104243e-06,
|
|
"loss": 1.6075103282928467,
|
|
"step": 1711
|
|
},
|
|
{
|
|
"epoch": 1.3522906793048972,
|
|
"grad_norm": 27.070598507663696,
|
|
"learning_rate": 6.702421091159487e-06,
|
|
"loss": 1.9741368293762207,
|
|
"step": 1712
|
|
},
|
|
{
|
|
"epoch": 1.353080568720379,
|
|
"grad_norm": 9.716774266414612,
|
|
"learning_rate": 6.698099303003784e-06,
|
|
"loss": 1.7203664779663086,
|
|
"step": 1713
|
|
},
|
|
{
|
|
"epoch": 1.353870458135861,
|
|
"grad_norm": 10.642254002327258,
|
|
"learning_rate": 6.693776080288198e-06,
|
|
"loss": 1.163581132888794,
|
|
"step": 1714
|
|
},
|
|
{
|
|
"epoch": 1.3546603475513428,
|
|
"grad_norm": 8.496905620664727,
|
|
"learning_rate": 6.689451426665004e-06,
|
|
"loss": 2.0292768478393555,
|
|
"step": 1715
|
|
},
|
|
{
|
|
"epoch": 1.3554502369668247,
|
|
"grad_norm": 10.126564260567976,
|
|
"learning_rate": 6.685125345787679e-06,
|
|
"loss": 2.220201015472412,
|
|
"step": 1716
|
|
},
|
|
{
|
|
"epoch": 1.3562401263823065,
|
|
"grad_norm": 13.061476880554993,
|
|
"learning_rate": 6.680797841310914e-06,
|
|
"loss": 2.056443214416504,
|
|
"step": 1717
|
|
},
|
|
{
|
|
"epoch": 1.3570300157977884,
|
|
"grad_norm": 13.47212904208046,
|
|
"learning_rate": 6.676468916890597e-06,
|
|
"loss": 1.468867540359497,
|
|
"step": 1718
|
|
},
|
|
{
|
|
"epoch": 1.3578199052132702,
|
|
"grad_norm": 13.538453828063716,
|
|
"learning_rate": 6.67213857618382e-06,
|
|
"loss": 0.8965187072753906,
|
|
"step": 1719
|
|
},
|
|
{
|
|
"epoch": 1.358609794628752,
|
|
"grad_norm": 7.238990017595394,
|
|
"learning_rate": 6.667806822848865e-06,
|
|
"loss": 1.4493942260742188,
|
|
"step": 1720
|
|
},
|
|
{
|
|
"epoch": 1.3593996840442337,
|
|
"grad_norm": 13.06712117871877,
|
|
"learning_rate": 6.663473660545217e-06,
|
|
"loss": 1.303008794784546,
|
|
"step": 1721
|
|
},
|
|
{
|
|
"epoch": 1.3601895734597156,
|
|
"grad_norm": 18.893117290210554,
|
|
"learning_rate": 6.659139092933542e-06,
|
|
"loss": 2.37463641166687,
|
|
"step": 1722
|
|
},
|
|
{
|
|
"epoch": 1.3609794628751974,
|
|
"grad_norm": 10.071203220715486,
|
|
"learning_rate": 6.6548031236756975e-06,
|
|
"loss": 2.426710605621338,
|
|
"step": 1723
|
|
},
|
|
{
|
|
"epoch": 1.3617693522906793,
|
|
"grad_norm": 7.544794430629949,
|
|
"learning_rate": 6.650465756434724e-06,
|
|
"loss": 0.8276800513267517,
|
|
"step": 1724
|
|
},
|
|
{
|
|
"epoch": 1.3625592417061612,
|
|
"grad_norm": 9.200917908287058,
|
|
"learning_rate": 6.6461269948748445e-06,
|
|
"loss": 2.1654391288757324,
|
|
"step": 1725
|
|
},
|
|
{
|
|
"epoch": 1.363349131121643,
|
|
"grad_norm": 25.451210894852924,
|
|
"learning_rate": 6.64178684266146e-06,
|
|
"loss": 1.5984547138214111,
|
|
"step": 1726
|
|
},
|
|
{
|
|
"epoch": 1.3641390205371249,
|
|
"grad_norm": 12.38315288359827,
|
|
"learning_rate": 6.637445303461143e-06,
|
|
"loss": 0.9575186967849731,
|
|
"step": 1727
|
|
},
|
|
{
|
|
"epoch": 1.3649289099526065,
|
|
"grad_norm": 13.455605811383814,
|
|
"learning_rate": 6.633102380941643e-06,
|
|
"loss": 1.4046566486358643,
|
|
"step": 1728
|
|
},
|
|
{
|
|
"epoch": 1.3657187993680884,
|
|
"grad_norm": 11.193113951721786,
|
|
"learning_rate": 6.628758078771873e-06,
|
|
"loss": 2.0002331733703613,
|
|
"step": 1729
|
|
},
|
|
{
|
|
"epoch": 1.3665086887835702,
|
|
"grad_norm": 10.231357721297691,
|
|
"learning_rate": 6.624412400621916e-06,
|
|
"loss": 1.3115997314453125,
|
|
"step": 1730
|
|
},
|
|
{
|
|
"epoch": 1.367298578199052,
|
|
"grad_norm": 15.410181553876129,
|
|
"learning_rate": 6.620065350163012e-06,
|
|
"loss": 1.2699280977249146,
|
|
"step": 1731
|
|
},
|
|
{
|
|
"epoch": 1.368088467614534,
|
|
"grad_norm": 10.808531808691974,
|
|
"learning_rate": 6.615716931067566e-06,
|
|
"loss": 2.0870327949523926,
|
|
"step": 1732
|
|
},
|
|
{
|
|
"epoch": 1.3688783570300158,
|
|
"grad_norm": 9.638709809302068,
|
|
"learning_rate": 6.6113671470091355e-06,
|
|
"loss": 2.032163619995117,
|
|
"step": 1733
|
|
},
|
|
{
|
|
"epoch": 1.3696682464454977,
|
|
"grad_norm": 9.573439798053808,
|
|
"learning_rate": 6.607016001662434e-06,
|
|
"loss": 1.5165016651153564,
|
|
"step": 1734
|
|
},
|
|
{
|
|
"epoch": 1.3704581358609795,
|
|
"grad_norm": 16.637270126365415,
|
|
"learning_rate": 6.602663498703323e-06,
|
|
"loss": 1.579211711883545,
|
|
"step": 1735
|
|
},
|
|
{
|
|
"epoch": 1.3712480252764614,
|
|
"grad_norm": 12.245545065101416,
|
|
"learning_rate": 6.598309641808809e-06,
|
|
"loss": 1.451501727104187,
|
|
"step": 1736
|
|
},
|
|
{
|
|
"epoch": 1.3720379146919433,
|
|
"grad_norm": 10.871654496984037,
|
|
"learning_rate": 6.593954434657047e-06,
|
|
"loss": 1.3775560855865479,
|
|
"step": 1737
|
|
},
|
|
{
|
|
"epoch": 1.3728278041074249,
|
|
"grad_norm": 14.826876502837509,
|
|
"learning_rate": 6.5895978809273295e-06,
|
|
"loss": 1.1092920303344727,
|
|
"step": 1738
|
|
},
|
|
{
|
|
"epoch": 1.3736176935229067,
|
|
"grad_norm": 9.937827702763318,
|
|
"learning_rate": 6.585239984300088e-06,
|
|
"loss": 1.1688158512115479,
|
|
"step": 1739
|
|
},
|
|
{
|
|
"epoch": 1.3744075829383886,
|
|
"grad_norm": 10.154954545278624,
|
|
"learning_rate": 6.580880748456888e-06,
|
|
"loss": 1.9065393209457397,
|
|
"step": 1740
|
|
},
|
|
{
|
|
"epoch": 1.3751974723538705,
|
|
"grad_norm": 20.375541326941555,
|
|
"learning_rate": 6.576520177080425e-06,
|
|
"loss": 3.503018379211426,
|
|
"step": 1741
|
|
},
|
|
{
|
|
"epoch": 1.3759873617693523,
|
|
"grad_norm": 15.460278049176381,
|
|
"learning_rate": 6.572158273854527e-06,
|
|
"loss": 1.5270636081695557,
|
|
"step": 1742
|
|
},
|
|
{
|
|
"epoch": 1.3767772511848342,
|
|
"grad_norm": 21.983057924875485,
|
|
"learning_rate": 6.567795042464139e-06,
|
|
"loss": 1.8938589096069336,
|
|
"step": 1743
|
|
},
|
|
{
|
|
"epoch": 1.3775671406003158,
|
|
"grad_norm": 16.471995959910757,
|
|
"learning_rate": 6.563430486595339e-06,
|
|
"loss": 1.7760772705078125,
|
|
"step": 1744
|
|
},
|
|
{
|
|
"epoch": 1.3783570300157977,
|
|
"grad_norm": 15.032482903239565,
|
|
"learning_rate": 6.5590646099353135e-06,
|
|
"loss": 1.6687235832214355,
|
|
"step": 1745
|
|
},
|
|
{
|
|
"epoch": 1.3791469194312795,
|
|
"grad_norm": 12.844038712723703,
|
|
"learning_rate": 6.554697416172373e-06,
|
|
"loss": 1.5714036226272583,
|
|
"step": 1746
|
|
},
|
|
{
|
|
"epoch": 1.3799368088467614,
|
|
"grad_norm": 17.21043619059917,
|
|
"learning_rate": 6.5503289089959354e-06,
|
|
"loss": 3.404308795928955,
|
|
"step": 1747
|
|
},
|
|
{
|
|
"epoch": 1.3807266982622433,
|
|
"grad_norm": 11.175765500846673,
|
|
"learning_rate": 6.5459590920965295e-06,
|
|
"loss": 1.9153468608856201,
|
|
"step": 1748
|
|
},
|
|
{
|
|
"epoch": 1.3815165876777251,
|
|
"grad_norm": 12.6196975598501,
|
|
"learning_rate": 6.541587969165793e-06,
|
|
"loss": 2.095952033996582,
|
|
"step": 1749
|
|
},
|
|
{
|
|
"epoch": 1.382306477093207,
|
|
"grad_norm": 10.31639931220762,
|
|
"learning_rate": 6.537215543896463e-06,
|
|
"loss": 1.1625829935073853,
|
|
"step": 1750
|
|
},
|
|
{
|
|
"epoch": 1.3830963665086888,
|
|
"grad_norm": 29.7649936622185,
|
|
"learning_rate": 6.532841819982381e-06,
|
|
"loss": 3.0494065284729004,
|
|
"step": 1751
|
|
},
|
|
{
|
|
"epoch": 1.3838862559241707,
|
|
"grad_norm": 10.666741440827831,
|
|
"learning_rate": 6.52846680111848e-06,
|
|
"loss": 1.2341630458831787,
|
|
"step": 1752
|
|
},
|
|
{
|
|
"epoch": 1.3846761453396526,
|
|
"grad_norm": 12.22687954037762,
|
|
"learning_rate": 6.524090491000793e-06,
|
|
"loss": 1.308075189590454,
|
|
"step": 1753
|
|
},
|
|
{
|
|
"epoch": 1.3854660347551344,
|
|
"grad_norm": 32.49773976246368,
|
|
"learning_rate": 6.519712893326439e-06,
|
|
"loss": 1.6739758253097534,
|
|
"step": 1754
|
|
},
|
|
{
|
|
"epoch": 1.386255924170616,
|
|
"grad_norm": 10.896992529162494,
|
|
"learning_rate": 6.515334011793629e-06,
|
|
"loss": 2.6935648918151855,
|
|
"step": 1755
|
|
},
|
|
{
|
|
"epoch": 1.387045813586098,
|
|
"grad_norm": 14.152067149422212,
|
|
"learning_rate": 6.510953850101658e-06,
|
|
"loss": 1.4659454822540283,
|
|
"step": 1756
|
|
},
|
|
{
|
|
"epoch": 1.3878357030015798,
|
|
"grad_norm": 9.490627971683756,
|
|
"learning_rate": 6.506572411950896e-06,
|
|
"loss": 1.4744107723236084,
|
|
"step": 1757
|
|
},
|
|
{
|
|
"epoch": 1.3886255924170616,
|
|
"grad_norm": 9.178009642515413,
|
|
"learning_rate": 6.5021897010428006e-06,
|
|
"loss": 1.4771769046783447,
|
|
"step": 1758
|
|
},
|
|
{
|
|
"epoch": 1.3894154818325435,
|
|
"grad_norm": 7.901450497913,
|
|
"learning_rate": 6.4978057210798995e-06,
|
|
"loss": 1.5277764797210693,
|
|
"step": 1759
|
|
},
|
|
{
|
|
"epoch": 1.3902053712480253,
|
|
"grad_norm": 16.24677122520427,
|
|
"learning_rate": 6.4934204757657925e-06,
|
|
"loss": 1.8329733610153198,
|
|
"step": 1760
|
|
},
|
|
{
|
|
"epoch": 1.390995260663507,
|
|
"grad_norm": 17.706742868384215,
|
|
"learning_rate": 6.489033968805149e-06,
|
|
"loss": 1.9825087785720825,
|
|
"step": 1761
|
|
},
|
|
{
|
|
"epoch": 1.3917851500789888,
|
|
"grad_norm": 15.685673090253694,
|
|
"learning_rate": 6.484646203903706e-06,
|
|
"loss": 1.2657302618026733,
|
|
"step": 1762
|
|
},
|
|
{
|
|
"epoch": 1.3925750394944707,
|
|
"grad_norm": 10.790748538927113,
|
|
"learning_rate": 6.4802571847682616e-06,
|
|
"loss": 3.029433250427246,
|
|
"step": 1763
|
|
},
|
|
{
|
|
"epoch": 1.3933649289099526,
|
|
"grad_norm": 9.163423388663407,
|
|
"learning_rate": 6.4758669151066724e-06,
|
|
"loss": 1.4058465957641602,
|
|
"step": 1764
|
|
},
|
|
{
|
|
"epoch": 1.3941548183254344,
|
|
"grad_norm": 10.067549014086303,
|
|
"learning_rate": 6.4714753986278535e-06,
|
|
"loss": 1.397748589515686,
|
|
"step": 1765
|
|
},
|
|
{
|
|
"epoch": 1.3949447077409163,
|
|
"grad_norm": 13.05855432795011,
|
|
"learning_rate": 6.467082639041772e-06,
|
|
"loss": 1.4465043544769287,
|
|
"step": 1766
|
|
},
|
|
{
|
|
"epoch": 1.3957345971563981,
|
|
"grad_norm": 14.849808480124686,
|
|
"learning_rate": 6.462688640059446e-06,
|
|
"loss": 1.5454909801483154,
|
|
"step": 1767
|
|
},
|
|
{
|
|
"epoch": 1.39652448657188,
|
|
"grad_norm": 11.702650248241305,
|
|
"learning_rate": 6.4582934053929405e-06,
|
|
"loss": 1.1027021408081055,
|
|
"step": 1768
|
|
},
|
|
{
|
|
"epoch": 1.3973143759873619,
|
|
"grad_norm": 7.374612887912086,
|
|
"learning_rate": 6.453896938755362e-06,
|
|
"loss": 1.7206413745880127,
|
|
"step": 1769
|
|
},
|
|
{
|
|
"epoch": 1.3981042654028437,
|
|
"grad_norm": 13.105128531972968,
|
|
"learning_rate": 6.449499243860865e-06,
|
|
"loss": 2.0842576026916504,
|
|
"step": 1770
|
|
},
|
|
{
|
|
"epoch": 1.3988941548183254,
|
|
"grad_norm": 16.342442684958176,
|
|
"learning_rate": 6.445100324424631e-06,
|
|
"loss": 1.9844783544540405,
|
|
"step": 1771
|
|
},
|
|
{
|
|
"epoch": 1.3996840442338072,
|
|
"grad_norm": 7.941412914747672,
|
|
"learning_rate": 6.440700184162887e-06,
|
|
"loss": 1.3604265451431274,
|
|
"step": 1772
|
|
},
|
|
{
|
|
"epoch": 1.400473933649289,
|
|
"grad_norm": 7.665007430651305,
|
|
"learning_rate": 6.436298826792882e-06,
|
|
"loss": 0.7555409669876099,
|
|
"step": 1773
|
|
},
|
|
{
|
|
"epoch": 1.401263823064771,
|
|
"grad_norm": 9.961027255210812,
|
|
"learning_rate": 6.431896256032896e-06,
|
|
"loss": 1.5438669919967651,
|
|
"step": 1774
|
|
},
|
|
{
|
|
"epoch": 1.4020537124802528,
|
|
"grad_norm": 12.80133317623476,
|
|
"learning_rate": 6.427492475602242e-06,
|
|
"loss": 1.6424999237060547,
|
|
"step": 1775
|
|
},
|
|
{
|
|
"epoch": 1.4028436018957346,
|
|
"grad_norm": 11.021716530624692,
|
|
"learning_rate": 6.423087489221241e-06,
|
|
"loss": 1.6980810165405273,
|
|
"step": 1776
|
|
},
|
|
{
|
|
"epoch": 1.4036334913112165,
|
|
"grad_norm": 20.704996350287434,
|
|
"learning_rate": 6.418681300611244e-06,
|
|
"loss": 1.92954421043396,
|
|
"step": 1777
|
|
},
|
|
{
|
|
"epoch": 1.4044233807266981,
|
|
"grad_norm": 12.682678877921038,
|
|
"learning_rate": 6.414273913494612e-06,
|
|
"loss": 0.9070745706558228,
|
|
"step": 1778
|
|
},
|
|
{
|
|
"epoch": 1.40521327014218,
|
|
"grad_norm": 12.513930307508547,
|
|
"learning_rate": 6.409865331594721e-06,
|
|
"loss": 1.1177334785461426,
|
|
"step": 1779
|
|
},
|
|
{
|
|
"epoch": 1.4060031595576619,
|
|
"grad_norm": 19.948578372309566,
|
|
"learning_rate": 6.4054555586359556e-06,
|
|
"loss": 1.5609198808670044,
|
|
"step": 1780
|
|
},
|
|
{
|
|
"epoch": 1.4067930489731437,
|
|
"grad_norm": 10.304880695036104,
|
|
"learning_rate": 6.401044598343705e-06,
|
|
"loss": 1.9408183097839355,
|
|
"step": 1781
|
|
},
|
|
{
|
|
"epoch": 1.4075829383886256,
|
|
"grad_norm": 11.485897348203851,
|
|
"learning_rate": 6.3966324544443646e-06,
|
|
"loss": 2.1067936420440674,
|
|
"step": 1782
|
|
},
|
|
{
|
|
"epoch": 1.4083728278041074,
|
|
"grad_norm": 12.36441904247693,
|
|
"learning_rate": 6.392219130665328e-06,
|
|
"loss": 1.3297131061553955,
|
|
"step": 1783
|
|
},
|
|
{
|
|
"epoch": 1.4091627172195893,
|
|
"grad_norm": 11.454598254103093,
|
|
"learning_rate": 6.387804630734985e-06,
|
|
"loss": 1.3817702531814575,
|
|
"step": 1784
|
|
},
|
|
{
|
|
"epoch": 1.4099526066350712,
|
|
"grad_norm": 11.226000917241246,
|
|
"learning_rate": 6.383388958382719e-06,
|
|
"loss": 2.0304312705993652,
|
|
"step": 1785
|
|
},
|
|
{
|
|
"epoch": 1.410742496050553,
|
|
"grad_norm": 15.283704119726693,
|
|
"learning_rate": 6.378972117338908e-06,
|
|
"loss": 1.2152456045150757,
|
|
"step": 1786
|
|
},
|
|
{
|
|
"epoch": 1.4115323854660349,
|
|
"grad_norm": 14.282288242101943,
|
|
"learning_rate": 6.374554111334908e-06,
|
|
"loss": 2.027944564819336,
|
|
"step": 1787
|
|
},
|
|
{
|
|
"epoch": 1.4123222748815165,
|
|
"grad_norm": 19.074788092743933,
|
|
"learning_rate": 6.37013494410307e-06,
|
|
"loss": 1.8768101930618286,
|
|
"step": 1788
|
|
},
|
|
{
|
|
"epoch": 1.4131121642969984,
|
|
"grad_norm": 27.405083978695842,
|
|
"learning_rate": 6.365714619376722e-06,
|
|
"loss": 1.921675205230713,
|
|
"step": 1789
|
|
},
|
|
{
|
|
"epoch": 1.4139020537124802,
|
|
"grad_norm": 12.762740990846117,
|
|
"learning_rate": 6.361293140890161e-06,
|
|
"loss": 1.2969154119491577,
|
|
"step": 1790
|
|
},
|
|
{
|
|
"epoch": 1.414691943127962,
|
|
"grad_norm": 13.968926857294704,
|
|
"learning_rate": 6.356870512378675e-06,
|
|
"loss": 1.6537883281707764,
|
|
"step": 1791
|
|
},
|
|
{
|
|
"epoch": 1.415481832543444,
|
|
"grad_norm": 10.371222981346492,
|
|
"learning_rate": 6.3524467375785125e-06,
|
|
"loss": 2.2494006156921387,
|
|
"step": 1792
|
|
},
|
|
{
|
|
"epoch": 1.4162717219589258,
|
|
"grad_norm": 7.840468212923819,
|
|
"learning_rate": 6.348021820226891e-06,
|
|
"loss": 1.4510530233383179,
|
|
"step": 1793
|
|
},
|
|
{
|
|
"epoch": 1.4170616113744074,
|
|
"grad_norm": 16.008361511520068,
|
|
"learning_rate": 6.343595764061999e-06,
|
|
"loss": 2.526327133178711,
|
|
"step": 1794
|
|
},
|
|
{
|
|
"epoch": 1.4178515007898893,
|
|
"grad_norm": 7.9497180354538415,
|
|
"learning_rate": 6.339168572822978e-06,
|
|
"loss": 1.4724477529525757,
|
|
"step": 1795
|
|
},
|
|
{
|
|
"epoch": 1.4186413902053712,
|
|
"grad_norm": 6.971397200044914,
|
|
"learning_rate": 6.334740250249938e-06,
|
|
"loss": 1.9278626441955566,
|
|
"step": 1796
|
|
},
|
|
{
|
|
"epoch": 1.419431279620853,
|
|
"grad_norm": 14.711227502395163,
|
|
"learning_rate": 6.33031080008394e-06,
|
|
"loss": 1.0732061862945557,
|
|
"step": 1797
|
|
},
|
|
{
|
|
"epoch": 1.4202211690363349,
|
|
"grad_norm": 8.10829033173858,
|
|
"learning_rate": 6.325880226066997e-06,
|
|
"loss": 1.8444054126739502,
|
|
"step": 1798
|
|
},
|
|
{
|
|
"epoch": 1.4210110584518167,
|
|
"grad_norm": 8.220785031600306,
|
|
"learning_rate": 6.321448531942072e-06,
|
|
"loss": 1.180464267730713,
|
|
"step": 1799
|
|
},
|
|
{
|
|
"epoch": 1.4218009478672986,
|
|
"grad_norm": 7.13121507626065,
|
|
"learning_rate": 6.317015721453077e-06,
|
|
"loss": 1.8239995241165161,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 1.4225908372827805,
|
|
"grad_norm": 10.075780879909827,
|
|
"learning_rate": 6.3125817983448624e-06,
|
|
"loss": 1.9934985637664795,
|
|
"step": 1801
|
|
},
|
|
{
|
|
"epoch": 1.4233807266982623,
|
|
"grad_norm": 11.805782298666543,
|
|
"learning_rate": 6.308146766363225e-06,
|
|
"loss": 1.1996102333068848,
|
|
"step": 1802
|
|
},
|
|
{
|
|
"epoch": 1.4241706161137442,
|
|
"grad_norm": 14.758482012061515,
|
|
"learning_rate": 6.303710629254889e-06,
|
|
"loss": 1.6951093673706055,
|
|
"step": 1803
|
|
},
|
|
{
|
|
"epoch": 1.424960505529226,
|
|
"grad_norm": 6.415227648148693,
|
|
"learning_rate": 6.29927339076752e-06,
|
|
"loss": 1.2805756330490112,
|
|
"step": 1804
|
|
},
|
|
{
|
|
"epoch": 1.4257503949447077,
|
|
"grad_norm": 16.190811963050376,
|
|
"learning_rate": 6.294835054649714e-06,
|
|
"loss": 1.6054199934005737,
|
|
"step": 1805
|
|
},
|
|
{
|
|
"epoch": 1.4265402843601895,
|
|
"grad_norm": 19.35675670483708,
|
|
"learning_rate": 6.290395624650988e-06,
|
|
"loss": 2.000746726989746,
|
|
"step": 1806
|
|
},
|
|
{
|
|
"epoch": 1.4273301737756714,
|
|
"grad_norm": 10.953221955867015,
|
|
"learning_rate": 6.28595510452179e-06,
|
|
"loss": 1.7414836883544922,
|
|
"step": 1807
|
|
},
|
|
{
|
|
"epoch": 1.4281200631911533,
|
|
"grad_norm": 14.62887358716854,
|
|
"learning_rate": 6.281513498013485e-06,
|
|
"loss": 1.2090952396392822,
|
|
"step": 1808
|
|
},
|
|
{
|
|
"epoch": 1.4289099526066351,
|
|
"grad_norm": 8.841041489194227,
|
|
"learning_rate": 6.277070808878356e-06,
|
|
"loss": 1.1835849285125732,
|
|
"step": 1809
|
|
},
|
|
{
|
|
"epoch": 1.429699842022117,
|
|
"grad_norm": 9.037929728341096,
|
|
"learning_rate": 6.2726270408696035e-06,
|
|
"loss": 1.8852903842926025,
|
|
"step": 1810
|
|
},
|
|
{
|
|
"epoch": 1.4304897314375986,
|
|
"grad_norm": 14.839212546666879,
|
|
"learning_rate": 6.268182197741336e-06,
|
|
"loss": 2.071831703186035,
|
|
"step": 1811
|
|
},
|
|
{
|
|
"epoch": 1.4312796208530805,
|
|
"grad_norm": 12.37353569458125,
|
|
"learning_rate": 6.263736283248571e-06,
|
|
"loss": 1.7889072895050049,
|
|
"step": 1812
|
|
},
|
|
{
|
|
"epoch": 1.4320695102685623,
|
|
"grad_norm": 14.308437007860489,
|
|
"learning_rate": 6.259289301147233e-06,
|
|
"loss": 2.109577178955078,
|
|
"step": 1813
|
|
},
|
|
{
|
|
"epoch": 1.4328593996840442,
|
|
"grad_norm": 10.588829861541612,
|
|
"learning_rate": 6.254841255194148e-06,
|
|
"loss": 0.9634921550750732,
|
|
"step": 1814
|
|
},
|
|
{
|
|
"epoch": 1.433649289099526,
|
|
"grad_norm": 8.269928453685496,
|
|
"learning_rate": 6.250392149147035e-06,
|
|
"loss": 1.1673877239227295,
|
|
"step": 1815
|
|
},
|
|
{
|
|
"epoch": 1.434439178515008,
|
|
"grad_norm": 12.167224980723917,
|
|
"learning_rate": 6.2459419867645195e-06,
|
|
"loss": 1.7496429681777954,
|
|
"step": 1816
|
|
},
|
|
{
|
|
"epoch": 1.4352290679304898,
|
|
"grad_norm": 11.74353184566792,
|
|
"learning_rate": 6.241490771806111e-06,
|
|
"loss": 1.6411113739013672,
|
|
"step": 1817
|
|
},
|
|
{
|
|
"epoch": 1.4360189573459716,
|
|
"grad_norm": 10.038663586636575,
|
|
"learning_rate": 6.2370385080322085e-06,
|
|
"loss": 1.6785610914230347,
|
|
"step": 1818
|
|
},
|
|
{
|
|
"epoch": 1.4368088467614535,
|
|
"grad_norm": 12.705482849240397,
|
|
"learning_rate": 6.232585199204102e-06,
|
|
"loss": 1.4933853149414062,
|
|
"step": 1819
|
|
},
|
|
{
|
|
"epoch": 1.4375987361769353,
|
|
"grad_norm": 8.797279147148013,
|
|
"learning_rate": 6.22813084908396e-06,
|
|
"loss": 1.2085703611373901,
|
|
"step": 1820
|
|
},
|
|
{
|
|
"epoch": 1.438388625592417,
|
|
"grad_norm": 9.13523458275769,
|
|
"learning_rate": 6.223675461434831e-06,
|
|
"loss": 1.3428914546966553,
|
|
"step": 1821
|
|
},
|
|
{
|
|
"epoch": 1.4391785150078988,
|
|
"grad_norm": 7.848994958997547,
|
|
"learning_rate": 6.219219040020646e-06,
|
|
"loss": 1.0880684852600098,
|
|
"step": 1822
|
|
},
|
|
{
|
|
"epoch": 1.4399684044233807,
|
|
"grad_norm": 13.565272501706762,
|
|
"learning_rate": 6.214761588606199e-06,
|
|
"loss": 2.37025785446167,
|
|
"step": 1823
|
|
},
|
|
{
|
|
"epoch": 1.4407582938388626,
|
|
"grad_norm": 11.496245700277813,
|
|
"learning_rate": 6.210303110957161e-06,
|
|
"loss": 1.0500859022140503,
|
|
"step": 1824
|
|
},
|
|
{
|
|
"epoch": 1.4415481832543444,
|
|
"grad_norm": 11.699195594543191,
|
|
"learning_rate": 6.205843610840071e-06,
|
|
"loss": 1.364439606666565,
|
|
"step": 1825
|
|
},
|
|
{
|
|
"epoch": 1.4423380726698263,
|
|
"grad_norm": 12.92363516840581,
|
|
"learning_rate": 6.201383092022326e-06,
|
|
"loss": 1.575169324874878,
|
|
"step": 1826
|
|
},
|
|
{
|
|
"epoch": 1.4431279620853081,
|
|
"grad_norm": 9.636126549348427,
|
|
"learning_rate": 6.196921558272191e-06,
|
|
"loss": 1.2875540256500244,
|
|
"step": 1827
|
|
},
|
|
{
|
|
"epoch": 1.4439178515007898,
|
|
"grad_norm": 25.404058624847497,
|
|
"learning_rate": 6.19245901335878e-06,
|
|
"loss": 1.948940634727478,
|
|
"step": 1828
|
|
},
|
|
{
|
|
"epoch": 1.4447077409162716,
|
|
"grad_norm": 16.28182362055586,
|
|
"learning_rate": 6.187995461052067e-06,
|
|
"loss": 1.686116337776184,
|
|
"step": 1829
|
|
},
|
|
{
|
|
"epoch": 1.4454976303317535,
|
|
"grad_norm": 12.142680163856589,
|
|
"learning_rate": 6.183530905122881e-06,
|
|
"loss": 1.513108253479004,
|
|
"step": 1830
|
|
},
|
|
{
|
|
"epoch": 1.4462875197472354,
|
|
"grad_norm": 9.796255845103145,
|
|
"learning_rate": 6.179065349342885e-06,
|
|
"loss": 1.6807351112365723,
|
|
"step": 1831
|
|
},
|
|
{
|
|
"epoch": 1.4470774091627172,
|
|
"grad_norm": 14.771373088976882,
|
|
"learning_rate": 6.1745987974846e-06,
|
|
"loss": 1.763586163520813,
|
|
"step": 1832
|
|
},
|
|
{
|
|
"epoch": 1.447867298578199,
|
|
"grad_norm": 11.746125463949197,
|
|
"learning_rate": 6.170131253321384e-06,
|
|
"loss": 1.5122478008270264,
|
|
"step": 1833
|
|
},
|
|
{
|
|
"epoch": 1.448657187993681,
|
|
"grad_norm": 10.693788951485445,
|
|
"learning_rate": 6.16566272062743e-06,
|
|
"loss": 2.1962921619415283,
|
|
"step": 1834
|
|
},
|
|
{
|
|
"epoch": 1.4494470774091628,
|
|
"grad_norm": 12.44749625950401,
|
|
"learning_rate": 6.161193203177773e-06,
|
|
"loss": 1.9236458539962769,
|
|
"step": 1835
|
|
},
|
|
{
|
|
"epoch": 1.4502369668246446,
|
|
"grad_norm": 17.61123152678093,
|
|
"learning_rate": 6.156722704748273e-06,
|
|
"loss": 1.6482089757919312,
|
|
"step": 1836
|
|
},
|
|
{
|
|
"epoch": 1.4510268562401265,
|
|
"grad_norm": 7.824657050413297,
|
|
"learning_rate": 6.152251229115625e-06,
|
|
"loss": 1.2396411895751953,
|
|
"step": 1837
|
|
},
|
|
{
|
|
"epoch": 1.4518167456556081,
|
|
"grad_norm": 12.100352057209935,
|
|
"learning_rate": 6.147778780057342e-06,
|
|
"loss": 1.445483922958374,
|
|
"step": 1838
|
|
},
|
|
{
|
|
"epoch": 1.45260663507109,
|
|
"grad_norm": 10.176922246717954,
|
|
"learning_rate": 6.143305361351766e-06,
|
|
"loss": 2.227597713470459,
|
|
"step": 1839
|
|
},
|
|
{
|
|
"epoch": 1.4533965244865719,
|
|
"grad_norm": 23.08434863955186,
|
|
"learning_rate": 6.1388309767780575e-06,
|
|
"loss": 2.184255361557007,
|
|
"step": 1840
|
|
},
|
|
{
|
|
"epoch": 1.4541864139020537,
|
|
"grad_norm": 15.247266132776515,
|
|
"learning_rate": 6.134355630116189e-06,
|
|
"loss": 1.357899785041809,
|
|
"step": 1841
|
|
},
|
|
{
|
|
"epoch": 1.4549763033175356,
|
|
"grad_norm": 7.690736401984131,
|
|
"learning_rate": 6.129879325146948e-06,
|
|
"loss": 1.3187198638916016,
|
|
"step": 1842
|
|
},
|
|
{
|
|
"epoch": 1.4557661927330174,
|
|
"grad_norm": 17.084504147813014,
|
|
"learning_rate": 6.125402065651933e-06,
|
|
"loss": 1.1403226852416992,
|
|
"step": 1843
|
|
},
|
|
{
|
|
"epoch": 1.456556082148499,
|
|
"grad_norm": 10.647716875554703,
|
|
"learning_rate": 6.120923855413546e-06,
|
|
"loss": 1.5669901371002197,
|
|
"step": 1844
|
|
},
|
|
{
|
|
"epoch": 1.457345971563981,
|
|
"grad_norm": 17.169042345137168,
|
|
"learning_rate": 6.116444698214996e-06,
|
|
"loss": 1.8641374111175537,
|
|
"step": 1845
|
|
},
|
|
{
|
|
"epoch": 1.4581358609794628,
|
|
"grad_norm": 10.230215295961285,
|
|
"learning_rate": 6.111964597840288e-06,
|
|
"loss": 1.8520389795303345,
|
|
"step": 1846
|
|
},
|
|
{
|
|
"epoch": 1.4589257503949447,
|
|
"grad_norm": 14.086738280821706,
|
|
"learning_rate": 6.1074835580742274e-06,
|
|
"loss": 1.135934829711914,
|
|
"step": 1847
|
|
},
|
|
{
|
|
"epoch": 1.4597156398104265,
|
|
"grad_norm": 20.40220132997962,
|
|
"learning_rate": 6.103001582702408e-06,
|
|
"loss": 1.4812136888504028,
|
|
"step": 1848
|
|
},
|
|
{
|
|
"epoch": 1.4605055292259084,
|
|
"grad_norm": 31.44316031849772,
|
|
"learning_rate": 6.098518675511221e-06,
|
|
"loss": 2.650513172149658,
|
|
"step": 1849
|
|
},
|
|
{
|
|
"epoch": 1.4612954186413902,
|
|
"grad_norm": 8.456469054005813,
|
|
"learning_rate": 6.094034840287838e-06,
|
|
"loss": 1.8254547119140625,
|
|
"step": 1850
|
|
},
|
|
{
|
|
"epoch": 1.462085308056872,
|
|
"grad_norm": 8.984175379105094,
|
|
"learning_rate": 6.08955008082022e-06,
|
|
"loss": 1.96278715133667,
|
|
"step": 1851
|
|
},
|
|
{
|
|
"epoch": 1.462875197472354,
|
|
"grad_norm": 17.024546561160296,
|
|
"learning_rate": 6.085064400897106e-06,
|
|
"loss": 1.2698783874511719,
|
|
"step": 1852
|
|
},
|
|
{
|
|
"epoch": 1.4636650868878358,
|
|
"grad_norm": 9.736224700781001,
|
|
"learning_rate": 6.080577804308012e-06,
|
|
"loss": 1.6380083560943604,
|
|
"step": 1853
|
|
},
|
|
{
|
|
"epoch": 1.4644549763033177,
|
|
"grad_norm": 20.83200309530118,
|
|
"learning_rate": 6.076090294843233e-06,
|
|
"loss": 1.6041996479034424,
|
|
"step": 1854
|
|
},
|
|
{
|
|
"epoch": 1.4652448657187993,
|
|
"grad_norm": 10.731218316042554,
|
|
"learning_rate": 6.0716018762938265e-06,
|
|
"loss": 1.3004403114318848,
|
|
"step": 1855
|
|
},
|
|
{
|
|
"epoch": 1.4660347551342812,
|
|
"grad_norm": 12.047943285394913,
|
|
"learning_rate": 6.067112552451628e-06,
|
|
"loss": 1.7745938301086426,
|
|
"step": 1856
|
|
},
|
|
{
|
|
"epoch": 1.466824644549763,
|
|
"grad_norm": 11.575538627172165,
|
|
"learning_rate": 6.062622327109231e-06,
|
|
"loss": 1.8333407640457153,
|
|
"step": 1857
|
|
},
|
|
{
|
|
"epoch": 1.4676145339652449,
|
|
"grad_norm": 8.54961227395821,
|
|
"learning_rate": 6.0581312040599926e-06,
|
|
"loss": 1.6345336437225342,
|
|
"step": 1858
|
|
},
|
|
{
|
|
"epoch": 1.4684044233807267,
|
|
"grad_norm": 15.409163973089639,
|
|
"learning_rate": 6.053639187098028e-06,
|
|
"loss": 0.9887954592704773,
|
|
"step": 1859
|
|
},
|
|
{
|
|
"epoch": 1.4691943127962086,
|
|
"grad_norm": 12.434754832094152,
|
|
"learning_rate": 6.04914628001821e-06,
|
|
"loss": 1.7964859008789062,
|
|
"step": 1860
|
|
},
|
|
{
|
|
"epoch": 1.4699842022116902,
|
|
"grad_norm": 30.51669618244978,
|
|
"learning_rate": 6.044652486616159e-06,
|
|
"loss": 2.461520195007324,
|
|
"step": 1861
|
|
},
|
|
{
|
|
"epoch": 1.470774091627172,
|
|
"grad_norm": 10.994064510333782,
|
|
"learning_rate": 6.040157810688245e-06,
|
|
"loss": 1.86288583278656,
|
|
"step": 1862
|
|
},
|
|
{
|
|
"epoch": 1.471563981042654,
|
|
"grad_norm": 9.30779783354174,
|
|
"learning_rate": 6.035662256031592e-06,
|
|
"loss": 1.46977698802948,
|
|
"step": 1863
|
|
},
|
|
{
|
|
"epoch": 1.4723538704581358,
|
|
"grad_norm": 7.934486944796402,
|
|
"learning_rate": 6.03116582644405e-06,
|
|
"loss": 1.5796260833740234,
|
|
"step": 1864
|
|
},
|
|
{
|
|
"epoch": 1.4731437598736177,
|
|
"grad_norm": 14.423496800164589,
|
|
"learning_rate": 6.026668525724226e-06,
|
|
"loss": 1.8589414358139038,
|
|
"step": 1865
|
|
},
|
|
{
|
|
"epoch": 1.4739336492890995,
|
|
"grad_norm": 10.939581596140304,
|
|
"learning_rate": 6.022170357671448e-06,
|
|
"loss": 2.25348162651062,
|
|
"step": 1866
|
|
},
|
|
{
|
|
"epoch": 1.4747235387045814,
|
|
"grad_norm": 18.444767247036655,
|
|
"learning_rate": 6.017671326085787e-06,
|
|
"loss": 2.1810455322265625,
|
|
"step": 1867
|
|
},
|
|
{
|
|
"epoch": 1.4755134281200633,
|
|
"grad_norm": 6.294704216554538,
|
|
"learning_rate": 6.013171434768039e-06,
|
|
"loss": 1.2740647792816162,
|
|
"step": 1868
|
|
},
|
|
{
|
|
"epoch": 1.4763033175355451,
|
|
"grad_norm": 11.258105727454375,
|
|
"learning_rate": 6.008670687519726e-06,
|
|
"loss": 1.537172555923462,
|
|
"step": 1869
|
|
},
|
|
{
|
|
"epoch": 1.477093206951027,
|
|
"grad_norm": 8.495483927169012,
|
|
"learning_rate": 6.004169088143093e-06,
|
|
"loss": 1.5573079586029053,
|
|
"step": 1870
|
|
},
|
|
{
|
|
"epoch": 1.4778830963665086,
|
|
"grad_norm": 15.784635016949512,
|
|
"learning_rate": 5.99966664044111e-06,
|
|
"loss": 1.1430606842041016,
|
|
"step": 1871
|
|
},
|
|
{
|
|
"epoch": 1.4786729857819905,
|
|
"grad_norm": 10.067351907649972,
|
|
"learning_rate": 5.9951633482174565e-06,
|
|
"loss": 1.8512628078460693,
|
|
"step": 1872
|
|
},
|
|
{
|
|
"epoch": 1.4794628751974723,
|
|
"grad_norm": 12.439277630875722,
|
|
"learning_rate": 5.99065921527653e-06,
|
|
"loss": 1.0742204189300537,
|
|
"step": 1873
|
|
},
|
|
{
|
|
"epoch": 1.4802527646129542,
|
|
"grad_norm": 11.790578305048301,
|
|
"learning_rate": 5.986154245423435e-06,
|
|
"loss": 2.0604054927825928,
|
|
"step": 1874
|
|
},
|
|
{
|
|
"epoch": 1.481042654028436,
|
|
"grad_norm": 9.886128974159105,
|
|
"learning_rate": 5.981648442463987e-06,
|
|
"loss": 1.2165788412094116,
|
|
"step": 1875
|
|
},
|
|
{
|
|
"epoch": 1.481832543443918,
|
|
"grad_norm": 11.693448495370616,
|
|
"learning_rate": 5.977141810204702e-06,
|
|
"loss": 1.2570345401763916,
|
|
"step": 1876
|
|
},
|
|
{
|
|
"epoch": 1.4826224328593998,
|
|
"grad_norm": 11.392265499751487,
|
|
"learning_rate": 5.972634352452797e-06,
|
|
"loss": 1.3734052181243896,
|
|
"step": 1877
|
|
},
|
|
{
|
|
"epoch": 1.4834123222748814,
|
|
"grad_norm": 9.935897727227923,
|
|
"learning_rate": 5.968126073016188e-06,
|
|
"loss": 1.536318302154541,
|
|
"step": 1878
|
|
},
|
|
{
|
|
"epoch": 1.4842022116903633,
|
|
"grad_norm": 13.37009015419332,
|
|
"learning_rate": 5.963616975703488e-06,
|
|
"loss": 1.6027817726135254,
|
|
"step": 1879
|
|
},
|
|
{
|
|
"epoch": 1.4849921011058451,
|
|
"grad_norm": 13.54732672385344,
|
|
"learning_rate": 5.95910706432399e-06,
|
|
"loss": 1.4695227146148682,
|
|
"step": 1880
|
|
},
|
|
{
|
|
"epoch": 1.485781990521327,
|
|
"grad_norm": 10.426563375284957,
|
|
"learning_rate": 5.954596342687686e-06,
|
|
"loss": 1.2945826053619385,
|
|
"step": 1881
|
|
},
|
|
{
|
|
"epoch": 1.4865718799368088,
|
|
"grad_norm": 12.085174713834514,
|
|
"learning_rate": 5.950084814605252e-06,
|
|
"loss": 1.690997838973999,
|
|
"step": 1882
|
|
},
|
|
{
|
|
"epoch": 1.4873617693522907,
|
|
"grad_norm": 14.941420232595867,
|
|
"learning_rate": 5.945572483888033e-06,
|
|
"loss": 1.3554736375808716,
|
|
"step": 1883
|
|
},
|
|
{
|
|
"epoch": 1.4881516587677726,
|
|
"grad_norm": 12.228142172469568,
|
|
"learning_rate": 5.94105935434807e-06,
|
|
"loss": 1.512892723083496,
|
|
"step": 1884
|
|
},
|
|
{
|
|
"epoch": 1.4889415481832544,
|
|
"grad_norm": 11.913640083855395,
|
|
"learning_rate": 5.936545429798062e-06,
|
|
"loss": 0.7298011779785156,
|
|
"step": 1885
|
|
},
|
|
{
|
|
"epoch": 1.4897314375987363,
|
|
"grad_norm": 15.962737514375329,
|
|
"learning_rate": 5.932030714051392e-06,
|
|
"loss": 1.493302583694458,
|
|
"step": 1886
|
|
},
|
|
{
|
|
"epoch": 1.4905213270142181,
|
|
"grad_norm": 9.775040361342926,
|
|
"learning_rate": 5.927515210922107e-06,
|
|
"loss": 2.2897167205810547,
|
|
"step": 1887
|
|
},
|
|
{
|
|
"epoch": 1.4913112164296998,
|
|
"grad_norm": 16.24056817956605,
|
|
"learning_rate": 5.922998924224917e-06,
|
|
"loss": 1.8842390775680542,
|
|
"step": 1888
|
|
},
|
|
{
|
|
"epoch": 1.4921011058451816,
|
|
"grad_norm": 10.431504207313186,
|
|
"learning_rate": 5.918481857775196e-06,
|
|
"loss": 1.5306816101074219,
|
|
"step": 1889
|
|
},
|
|
{
|
|
"epoch": 1.4928909952606635,
|
|
"grad_norm": 10.932057272276792,
|
|
"learning_rate": 5.913964015388976e-06,
|
|
"loss": 1.5974483489990234,
|
|
"step": 1890
|
|
},
|
|
{
|
|
"epoch": 1.4936808846761453,
|
|
"grad_norm": 16.338433560086507,
|
|
"learning_rate": 5.909445400882948e-06,
|
|
"loss": 0.9032529592514038,
|
|
"step": 1891
|
|
},
|
|
{
|
|
"epoch": 1.4944707740916272,
|
|
"grad_norm": 16.083461588553835,
|
|
"learning_rate": 5.904926018074448e-06,
|
|
"loss": 2.5330991744995117,
|
|
"step": 1892
|
|
},
|
|
{
|
|
"epoch": 1.495260663507109,
|
|
"grad_norm": 7.719604767773579,
|
|
"learning_rate": 5.9004058707814715e-06,
|
|
"loss": 1.676531434059143,
|
|
"step": 1893
|
|
},
|
|
{
|
|
"epoch": 1.4960505529225907,
|
|
"grad_norm": 7.901640170457522,
|
|
"learning_rate": 5.895884962822648e-06,
|
|
"loss": 1.7386832237243652,
|
|
"step": 1894
|
|
},
|
|
{
|
|
"epoch": 1.4968404423380726,
|
|
"grad_norm": 9.168608646448185,
|
|
"learning_rate": 5.891363298017259e-06,
|
|
"loss": 1.2910975217819214,
|
|
"step": 1895
|
|
},
|
|
{
|
|
"epoch": 1.4976303317535544,
|
|
"grad_norm": 23.252379383869613,
|
|
"learning_rate": 5.886840880185221e-06,
|
|
"loss": 1.953572154045105,
|
|
"step": 1896
|
|
},
|
|
{
|
|
"epoch": 1.4984202211690363,
|
|
"grad_norm": 14.861072270306032,
|
|
"learning_rate": 5.8823177131470845e-06,
|
|
"loss": 1.128541350364685,
|
|
"step": 1897
|
|
},
|
|
{
|
|
"epoch": 1.4992101105845181,
|
|
"grad_norm": 11.269382836461835,
|
|
"learning_rate": 5.877793800724041e-06,
|
|
"loss": 1.7088985443115234,
|
|
"step": 1898
|
|
},
|
|
{
|
|
"epoch": 1.5,
|
|
"grad_norm": 10.533224054579982,
|
|
"learning_rate": 5.873269146737901e-06,
|
|
"loss": 2.03849458694458,
|
|
"step": 1899
|
|
},
|
|
{
|
|
"epoch": 1.5007898894154819,
|
|
"grad_norm": 10.821862530515162,
|
|
"learning_rate": 5.868743755011113e-06,
|
|
"loss": 0.8391838669776917,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 1.5015797788309637,
|
|
"grad_norm": 12.815338614755259,
|
|
"learning_rate": 5.86421762936674e-06,
|
|
"loss": 2.290050745010376,
|
|
"step": 1901
|
|
},
|
|
{
|
|
"epoch": 1.5023696682464456,
|
|
"grad_norm": 6.75820438876223,
|
|
"learning_rate": 5.859690773628466e-06,
|
|
"loss": 1.656872034072876,
|
|
"step": 1902
|
|
},
|
|
{
|
|
"epoch": 1.5031595576619274,
|
|
"grad_norm": 13.057735640244541,
|
|
"learning_rate": 5.855163191620597e-06,
|
|
"loss": 1.4379336833953857,
|
|
"step": 1903
|
|
},
|
|
{
|
|
"epoch": 1.5039494470774093,
|
|
"grad_norm": 8.816578457255313,
|
|
"learning_rate": 5.8506348871680475e-06,
|
|
"loss": 1.9750895500183105,
|
|
"step": 1904
|
|
},
|
|
{
|
|
"epoch": 1.5047393364928912,
|
|
"grad_norm": 10.27544331263895,
|
|
"learning_rate": 5.846105864096343e-06,
|
|
"loss": 1.3693504333496094,
|
|
"step": 1905
|
|
},
|
|
{
|
|
"epoch": 1.5055292259083728,
|
|
"grad_norm": 19.407143316882802,
|
|
"learning_rate": 5.84157612623162e-06,
|
|
"loss": 1.635138750076294,
|
|
"step": 1906
|
|
},
|
|
{
|
|
"epoch": 1.5063191153238547,
|
|
"grad_norm": 12.029439868679129,
|
|
"learning_rate": 5.837045677400613e-06,
|
|
"loss": 0.5531861782073975,
|
|
"step": 1907
|
|
},
|
|
{
|
|
"epoch": 1.5071090047393365,
|
|
"grad_norm": 14.005078207323043,
|
|
"learning_rate": 5.832514521430661e-06,
|
|
"loss": 1.1493902206420898,
|
|
"step": 1908
|
|
},
|
|
{
|
|
"epoch": 1.5078988941548184,
|
|
"grad_norm": 6.882826860067384,
|
|
"learning_rate": 5.827982662149703e-06,
|
|
"loss": 1.7186492681503296,
|
|
"step": 1909
|
|
},
|
|
{
|
|
"epoch": 1.5086887835703,
|
|
"grad_norm": 9.057187685271941,
|
|
"learning_rate": 5.8234501033862624e-06,
|
|
"loss": 2.1788861751556396,
|
|
"step": 1910
|
|
},
|
|
{
|
|
"epoch": 1.5094786729857819,
|
|
"grad_norm": 11.664865065310822,
|
|
"learning_rate": 5.818916848969463e-06,
|
|
"loss": 2.0605411529541016,
|
|
"step": 1911
|
|
},
|
|
{
|
|
"epoch": 1.5102685624012637,
|
|
"grad_norm": 8.847275772967832,
|
|
"learning_rate": 5.814382902729015e-06,
|
|
"loss": 1.4167741537094116,
|
|
"step": 1912
|
|
},
|
|
{
|
|
"epoch": 1.5110584518167456,
|
|
"grad_norm": 9.964601394953007,
|
|
"learning_rate": 5.809848268495206e-06,
|
|
"loss": 0.9648761749267578,
|
|
"step": 1913
|
|
},
|
|
{
|
|
"epoch": 1.5118483412322274,
|
|
"grad_norm": 18.140123517520358,
|
|
"learning_rate": 5.8053129500989156e-06,
|
|
"loss": 1.2075505256652832,
|
|
"step": 1914
|
|
},
|
|
{
|
|
"epoch": 1.5126382306477093,
|
|
"grad_norm": 14.956061178193,
|
|
"learning_rate": 5.80077695137159e-06,
|
|
"loss": 1.8038408756256104,
|
|
"step": 1915
|
|
},
|
|
{
|
|
"epoch": 1.5134281200631912,
|
|
"grad_norm": 8.549470794553304,
|
|
"learning_rate": 5.7962402761452616e-06,
|
|
"loss": 1.2158410549163818,
|
|
"step": 1916
|
|
},
|
|
{
|
|
"epoch": 1.514218009478673,
|
|
"grad_norm": 10.833339245641687,
|
|
"learning_rate": 5.791702928252525e-06,
|
|
"loss": 0.7378091812133789,
|
|
"step": 1917
|
|
},
|
|
{
|
|
"epoch": 1.5150078988941549,
|
|
"grad_norm": 9.732584946999157,
|
|
"learning_rate": 5.7871649115265484e-06,
|
|
"loss": 1.1355817317962646,
|
|
"step": 1918
|
|
},
|
|
{
|
|
"epoch": 1.5157977883096367,
|
|
"grad_norm": 15.80570500150481,
|
|
"learning_rate": 5.782626229801062e-06,
|
|
"loss": 1.5603950023651123,
|
|
"step": 1919
|
|
},
|
|
{
|
|
"epoch": 1.5165876777251186,
|
|
"grad_norm": 12.04096957012893,
|
|
"learning_rate": 5.778086886910359e-06,
|
|
"loss": 1.6280852556228638,
|
|
"step": 1920
|
|
},
|
|
{
|
|
"epoch": 1.5173775671406005,
|
|
"grad_norm": 17.757331266774536,
|
|
"learning_rate": 5.773546886689292e-06,
|
|
"loss": 1.6459561586380005,
|
|
"step": 1921
|
|
},
|
|
{
|
|
"epoch": 1.518167456556082,
|
|
"grad_norm": 10.980216863583156,
|
|
"learning_rate": 5.769006232973266e-06,
|
|
"loss": 1.7304844856262207,
|
|
"step": 1922
|
|
},
|
|
{
|
|
"epoch": 1.518957345971564,
|
|
"grad_norm": 8.64298769734182,
|
|
"learning_rate": 5.764464929598246e-06,
|
|
"loss": 1.9379894733428955,
|
|
"step": 1923
|
|
},
|
|
{
|
|
"epoch": 1.5197472353870458,
|
|
"grad_norm": 8.969841480124796,
|
|
"learning_rate": 5.759922980400734e-06,
|
|
"loss": 1.1220753192901611,
|
|
"step": 1924
|
|
},
|
|
{
|
|
"epoch": 1.5205371248025277,
|
|
"grad_norm": 14.709009423534145,
|
|
"learning_rate": 5.755380389217785e-06,
|
|
"loss": 1.1699135303497314,
|
|
"step": 1925
|
|
},
|
|
{
|
|
"epoch": 1.5213270142180095,
|
|
"grad_norm": 9.204182201289338,
|
|
"learning_rate": 5.750837159886996e-06,
|
|
"loss": 1.9453136920928955,
|
|
"step": 1926
|
|
},
|
|
{
|
|
"epoch": 1.5221169036334912,
|
|
"grad_norm": 20.29037732104967,
|
|
"learning_rate": 5.746293296246502e-06,
|
|
"loss": 1.1104214191436768,
|
|
"step": 1927
|
|
},
|
|
{
|
|
"epoch": 1.522906793048973,
|
|
"grad_norm": 9.963584395362918,
|
|
"learning_rate": 5.741748802134976e-06,
|
|
"loss": 1.0753260850906372,
|
|
"step": 1928
|
|
},
|
|
{
|
|
"epoch": 1.5236966824644549,
|
|
"grad_norm": 10.194943039916806,
|
|
"learning_rate": 5.7372036813916155e-06,
|
|
"loss": 1.6703574657440186,
|
|
"step": 1929
|
|
},
|
|
{
|
|
"epoch": 1.5244865718799367,
|
|
"grad_norm": 12.899448039370167,
|
|
"learning_rate": 5.732657937856158e-06,
|
|
"loss": 1.3306403160095215,
|
|
"step": 1930
|
|
},
|
|
{
|
|
"epoch": 1.5252764612954186,
|
|
"grad_norm": 9.761711644822508,
|
|
"learning_rate": 5.728111575368865e-06,
|
|
"loss": 1.5857300758361816,
|
|
"step": 1931
|
|
},
|
|
{
|
|
"epoch": 1.5260663507109005,
|
|
"grad_norm": 10.629942354032634,
|
|
"learning_rate": 5.723564597770514e-06,
|
|
"loss": 1.4648703336715698,
|
|
"step": 1932
|
|
},
|
|
{
|
|
"epoch": 1.5268562401263823,
|
|
"grad_norm": 13.432843847452732,
|
|
"learning_rate": 5.719017008902407e-06,
|
|
"loss": 1.4944384098052979,
|
|
"step": 1933
|
|
},
|
|
{
|
|
"epoch": 1.5276461295418642,
|
|
"grad_norm": 11.813071490114952,
|
|
"learning_rate": 5.714468812606364e-06,
|
|
"loss": 1.323237657546997,
|
|
"step": 1934
|
|
},
|
|
{
|
|
"epoch": 1.528436018957346,
|
|
"grad_norm": 8.562112811115774,
|
|
"learning_rate": 5.709920012724716e-06,
|
|
"loss": 2.0364575386047363,
|
|
"step": 1935
|
|
},
|
|
{
|
|
"epoch": 1.529225908372828,
|
|
"grad_norm": 13.067657788966802,
|
|
"learning_rate": 5.705370613100303e-06,
|
|
"loss": 1.4062690734863281,
|
|
"step": 1936
|
|
},
|
|
{
|
|
"epoch": 1.5300157977883098,
|
|
"grad_norm": 13.707958453722402,
|
|
"learning_rate": 5.700820617576472e-06,
|
|
"loss": 2.091384172439575,
|
|
"step": 1937
|
|
},
|
|
{
|
|
"epoch": 1.5308056872037916,
|
|
"grad_norm": 10.413369263252646,
|
|
"learning_rate": 5.696270029997078e-06,
|
|
"loss": 1.5702612400054932,
|
|
"step": 1938
|
|
},
|
|
{
|
|
"epoch": 1.5315955766192733,
|
|
"grad_norm": 7.882076727278544,
|
|
"learning_rate": 5.691718854206469e-06,
|
|
"loss": 1.8636072874069214,
|
|
"step": 1939
|
|
},
|
|
{
|
|
"epoch": 1.5323854660347551,
|
|
"grad_norm": 9.17078361317583,
|
|
"learning_rate": 5.687167094049493e-06,
|
|
"loss": 2.772977828979492,
|
|
"step": 1940
|
|
},
|
|
{
|
|
"epoch": 1.533175355450237,
|
|
"grad_norm": 15.116676322563023,
|
|
"learning_rate": 5.682614753371493e-06,
|
|
"loss": 1.5175914764404297,
|
|
"step": 1941
|
|
},
|
|
{
|
|
"epoch": 1.5339652448657188,
|
|
"grad_norm": 7.844046810826043,
|
|
"learning_rate": 5.678061836018303e-06,
|
|
"loss": 2.061984062194824,
|
|
"step": 1942
|
|
},
|
|
{
|
|
"epoch": 1.5347551342812005,
|
|
"grad_norm": 10.701078465755538,
|
|
"learning_rate": 5.673508345836239e-06,
|
|
"loss": 1.6619548797607422,
|
|
"step": 1943
|
|
},
|
|
{
|
|
"epoch": 1.5355450236966823,
|
|
"grad_norm": 7.891225505862987,
|
|
"learning_rate": 5.6689542866721095e-06,
|
|
"loss": 1.1752052307128906,
|
|
"step": 1944
|
|
},
|
|
{
|
|
"epoch": 1.5363349131121642,
|
|
"grad_norm": 6.304249509659231,
|
|
"learning_rate": 5.664399662373192e-06,
|
|
"loss": 0.8437387943267822,
|
|
"step": 1945
|
|
},
|
|
{
|
|
"epoch": 1.537124802527646,
|
|
"grad_norm": 17.603728317953017,
|
|
"learning_rate": 5.659844476787255e-06,
|
|
"loss": 1.1868000030517578,
|
|
"step": 1946
|
|
},
|
|
{
|
|
"epoch": 1.537914691943128,
|
|
"grad_norm": 66.47257484493905,
|
|
"learning_rate": 5.655288733762531e-06,
|
|
"loss": 2.8787412643432617,
|
|
"step": 1947
|
|
},
|
|
{
|
|
"epoch": 1.5387045813586098,
|
|
"grad_norm": 8.360497076634983,
|
|
"learning_rate": 5.650732437147725e-06,
|
|
"loss": 1.4284359216690063,
|
|
"step": 1948
|
|
},
|
|
{
|
|
"epoch": 1.5394944707740916,
|
|
"grad_norm": 16.849166837845633,
|
|
"learning_rate": 5.646175590792015e-06,
|
|
"loss": 1.8208255767822266,
|
|
"step": 1949
|
|
},
|
|
{
|
|
"epoch": 1.5402843601895735,
|
|
"grad_norm": 15.853584711578053,
|
|
"learning_rate": 5.6416181985450365e-06,
|
|
"loss": 1.4639555215835571,
|
|
"step": 1950
|
|
},
|
|
{
|
|
"epoch": 1.5410742496050553,
|
|
"grad_norm": 11.418310736533611,
|
|
"learning_rate": 5.637060264256893e-06,
|
|
"loss": 1.445953369140625,
|
|
"step": 1951
|
|
},
|
|
{
|
|
"epoch": 1.5418641390205372,
|
|
"grad_norm": 10.03242971159572,
|
|
"learning_rate": 5.632501791778139e-06,
|
|
"loss": 1.7128900289535522,
|
|
"step": 1952
|
|
},
|
|
{
|
|
"epoch": 1.542654028436019,
|
|
"grad_norm": 11.985633656861989,
|
|
"learning_rate": 5.6279427849597876e-06,
|
|
"loss": 1.3158780336380005,
|
|
"step": 1953
|
|
},
|
|
{
|
|
"epoch": 1.543443917851501,
|
|
"grad_norm": 13.823142105370444,
|
|
"learning_rate": 5.623383247653306e-06,
|
|
"loss": 1.426164150238037,
|
|
"step": 1954
|
|
},
|
|
{
|
|
"epoch": 1.5442338072669828,
|
|
"grad_norm": 12.41483138077164,
|
|
"learning_rate": 5.6188231837106024e-06,
|
|
"loss": 1.4300283193588257,
|
|
"step": 1955
|
|
},
|
|
{
|
|
"epoch": 1.5450236966824644,
|
|
"grad_norm": 12.100433098038376,
|
|
"learning_rate": 5.6142625969840355e-06,
|
|
"loss": 1.334028720855713,
|
|
"step": 1956
|
|
},
|
|
{
|
|
"epoch": 1.5458135860979463,
|
|
"grad_norm": 7.826204795219399,
|
|
"learning_rate": 5.6097014913264036e-06,
|
|
"loss": 1.2844315767288208,
|
|
"step": 1957
|
|
},
|
|
{
|
|
"epoch": 1.5466034755134281,
|
|
"grad_norm": 10.029697199051965,
|
|
"learning_rate": 5.605139870590945e-06,
|
|
"loss": 1.7111456394195557,
|
|
"step": 1958
|
|
},
|
|
{
|
|
"epoch": 1.54739336492891,
|
|
"grad_norm": 23.257849538913792,
|
|
"learning_rate": 5.600577738631331e-06,
|
|
"loss": 1.3320598602294922,
|
|
"step": 1959
|
|
},
|
|
{
|
|
"epoch": 1.5481832543443916,
|
|
"grad_norm": 28.133569384076022,
|
|
"learning_rate": 5.596015099301665e-06,
|
|
"loss": 1.7531509399414062,
|
|
"step": 1960
|
|
},
|
|
{
|
|
"epoch": 1.5489731437598735,
|
|
"grad_norm": 7.595217526983019,
|
|
"learning_rate": 5.591451956456482e-06,
|
|
"loss": 1.5534119606018066,
|
|
"step": 1961
|
|
},
|
|
{
|
|
"epoch": 1.5497630331753554,
|
|
"grad_norm": 14.700635546212805,
|
|
"learning_rate": 5.586888313950737e-06,
|
|
"loss": 0.8609148263931274,
|
|
"step": 1962
|
|
},
|
|
{
|
|
"epoch": 1.5505529225908372,
|
|
"grad_norm": 11.364286626899796,
|
|
"learning_rate": 5.5823241756398115e-06,
|
|
"loss": 1.637607216835022,
|
|
"step": 1963
|
|
},
|
|
{
|
|
"epoch": 1.551342812006319,
|
|
"grad_norm": 9.425435127940757,
|
|
"learning_rate": 5.577759545379507e-06,
|
|
"loss": 1.3392387628555298,
|
|
"step": 1964
|
|
},
|
|
{
|
|
"epoch": 1.552132701421801,
|
|
"grad_norm": 13.850940959168309,
|
|
"learning_rate": 5.573194427026034e-06,
|
|
"loss": 1.3945591449737549,
|
|
"step": 1965
|
|
},
|
|
{
|
|
"epoch": 1.5529225908372828,
|
|
"grad_norm": 11.948388622145545,
|
|
"learning_rate": 5.568628824436022e-06,
|
|
"loss": 1.3258531093597412,
|
|
"step": 1966
|
|
},
|
|
{
|
|
"epoch": 1.5537124802527646,
|
|
"grad_norm": 12.964726484077811,
|
|
"learning_rate": 5.564062741466506e-06,
|
|
"loss": 1.6788570880889893,
|
|
"step": 1967
|
|
},
|
|
{
|
|
"epoch": 1.5545023696682465,
|
|
"grad_norm": 14.308086462658784,
|
|
"learning_rate": 5.559496181974929e-06,
|
|
"loss": 1.5159149169921875,
|
|
"step": 1968
|
|
},
|
|
{
|
|
"epoch": 1.5552922590837284,
|
|
"grad_norm": 12.57208338798326,
|
|
"learning_rate": 5.554929149819136e-06,
|
|
"loss": 1.6231142282485962,
|
|
"step": 1969
|
|
},
|
|
{
|
|
"epoch": 1.5560821484992102,
|
|
"grad_norm": 24.3591417534666,
|
|
"learning_rate": 5.550361648857369e-06,
|
|
"loss": 2.435429573059082,
|
|
"step": 1970
|
|
},
|
|
{
|
|
"epoch": 1.556872037914692,
|
|
"grad_norm": 9.389696915132824,
|
|
"learning_rate": 5.545793682948269e-06,
|
|
"loss": 1.346461296081543,
|
|
"step": 1971
|
|
},
|
|
{
|
|
"epoch": 1.5576619273301737,
|
|
"grad_norm": 15.375687685358574,
|
|
"learning_rate": 5.541225255950868e-06,
|
|
"loss": 1.1790099143981934,
|
|
"step": 1972
|
|
},
|
|
{
|
|
"epoch": 1.5584518167456556,
|
|
"grad_norm": 11.802119366322316,
|
|
"learning_rate": 5.536656371724588e-06,
|
|
"loss": 1.6181936264038086,
|
|
"step": 1973
|
|
},
|
|
{
|
|
"epoch": 1.5592417061611374,
|
|
"grad_norm": 9.110006585026039,
|
|
"learning_rate": 5.5320870341292396e-06,
|
|
"loss": 1.6034982204437256,
|
|
"step": 1974
|
|
},
|
|
{
|
|
"epoch": 1.5600315955766193,
|
|
"grad_norm": 11.636051169463032,
|
|
"learning_rate": 5.527517247025012e-06,
|
|
"loss": 1.4720101356506348,
|
|
"step": 1975
|
|
},
|
|
{
|
|
"epoch": 1.5608214849921012,
|
|
"grad_norm": 11.163295280212573,
|
|
"learning_rate": 5.522947014272476e-06,
|
|
"loss": 1.140345573425293,
|
|
"step": 1976
|
|
},
|
|
{
|
|
"epoch": 1.5616113744075828,
|
|
"grad_norm": 13.38890976856887,
|
|
"learning_rate": 5.518376339732582e-06,
|
|
"loss": 1.2083477973937988,
|
|
"step": 1977
|
|
},
|
|
{
|
|
"epoch": 1.5624012638230647,
|
|
"grad_norm": 11.809538383883128,
|
|
"learning_rate": 5.513805227266648e-06,
|
|
"loss": 2.1081316471099854,
|
|
"step": 1978
|
|
},
|
|
{
|
|
"epoch": 1.5631911532385465,
|
|
"grad_norm": 7.495208922125521,
|
|
"learning_rate": 5.5092336807363655e-06,
|
|
"loss": 2.039696455001831,
|
|
"step": 1979
|
|
},
|
|
{
|
|
"epoch": 1.5639810426540284,
|
|
"grad_norm": 13.493304714233394,
|
|
"learning_rate": 5.504661704003793e-06,
|
|
"loss": 1.486254096031189,
|
|
"step": 1980
|
|
},
|
|
{
|
|
"epoch": 1.5647709320695102,
|
|
"grad_norm": 7.784789213569366,
|
|
"learning_rate": 5.5000893009313515e-06,
|
|
"loss": 1.3860276937484741,
|
|
"step": 1981
|
|
},
|
|
{
|
|
"epoch": 1.565560821484992,
|
|
"grad_norm": 18.85650733652106,
|
|
"learning_rate": 5.495516475381822e-06,
|
|
"loss": 1.6732574701309204,
|
|
"step": 1982
|
|
},
|
|
{
|
|
"epoch": 1.566350710900474,
|
|
"grad_norm": 8.123420469888398,
|
|
"learning_rate": 5.490943231218343e-06,
|
|
"loss": 1.847348928451538,
|
|
"step": 1983
|
|
},
|
|
{
|
|
"epoch": 1.5671406003159558,
|
|
"grad_norm": 16.49563314393432,
|
|
"learning_rate": 5.486369572304404e-06,
|
|
"loss": 1.5314483642578125,
|
|
"step": 1984
|
|
},
|
|
{
|
|
"epoch": 1.5679304897314377,
|
|
"grad_norm": 8.487512867858714,
|
|
"learning_rate": 5.48179550250385e-06,
|
|
"loss": 1.5116339921951294,
|
|
"step": 1985
|
|
},
|
|
{
|
|
"epoch": 1.5687203791469195,
|
|
"grad_norm": 6.915693492968808,
|
|
"learning_rate": 5.477221025680868e-06,
|
|
"loss": 1.482391119003296,
|
|
"step": 1986
|
|
},
|
|
{
|
|
"epoch": 1.5695102685624014,
|
|
"grad_norm": 19.644776640804693,
|
|
"learning_rate": 5.472646145699991e-06,
|
|
"loss": 1.6486904621124268,
|
|
"step": 1987
|
|
},
|
|
{
|
|
"epoch": 1.5703001579778832,
|
|
"grad_norm": 7.603103639854315,
|
|
"learning_rate": 5.468070866426098e-06,
|
|
"loss": 1.633828043937683,
|
|
"step": 1988
|
|
},
|
|
{
|
|
"epoch": 1.5710900473933649,
|
|
"grad_norm": 13.602654260017356,
|
|
"learning_rate": 5.4634951917243905e-06,
|
|
"loss": 1.9716848134994507,
|
|
"step": 1989
|
|
},
|
|
{
|
|
"epoch": 1.5718799368088467,
|
|
"grad_norm": 9.757410424603487,
|
|
"learning_rate": 5.458919125460421e-06,
|
|
"loss": 2.7240705490112305,
|
|
"step": 1990
|
|
},
|
|
{
|
|
"epoch": 1.5726698262243286,
|
|
"grad_norm": 19.360862166490207,
|
|
"learning_rate": 5.4543426715000644e-06,
|
|
"loss": 1.697304368019104,
|
|
"step": 1991
|
|
},
|
|
{
|
|
"epoch": 1.5734597156398105,
|
|
"grad_norm": 10.343188536941312,
|
|
"learning_rate": 5.4497658337095205e-06,
|
|
"loss": 1.9906163215637207,
|
|
"step": 1992
|
|
},
|
|
{
|
|
"epoch": 1.574249605055292,
|
|
"grad_norm": 11.847212271229933,
|
|
"learning_rate": 5.44518861595532e-06,
|
|
"loss": 2.298971176147461,
|
|
"step": 1993
|
|
},
|
|
{
|
|
"epoch": 1.575039494470774,
|
|
"grad_norm": 9.679704863397104,
|
|
"learning_rate": 5.440611022104312e-06,
|
|
"loss": 1.621870994567871,
|
|
"step": 1994
|
|
},
|
|
{
|
|
"epoch": 1.5758293838862558,
|
|
"grad_norm": 8.36320218724247,
|
|
"learning_rate": 5.43603305602366e-06,
|
|
"loss": 0.9557559490203857,
|
|
"step": 1995
|
|
},
|
|
{
|
|
"epoch": 1.5766192733017377,
|
|
"grad_norm": 9.018756709124666,
|
|
"learning_rate": 5.431454721580847e-06,
|
|
"loss": 1.3342235088348389,
|
|
"step": 1996
|
|
},
|
|
{
|
|
"epoch": 1.5774091627172195,
|
|
"grad_norm": 10.023772272993021,
|
|
"learning_rate": 5.426876022643665e-06,
|
|
"loss": 2.0808849334716797,
|
|
"step": 1997
|
|
},
|
|
{
|
|
"epoch": 1.5781990521327014,
|
|
"grad_norm": 11.455856990097669,
|
|
"learning_rate": 5.422296963080212e-06,
|
|
"loss": 1.4423177242279053,
|
|
"step": 1998
|
|
},
|
|
{
|
|
"epoch": 1.5789889415481833,
|
|
"grad_norm": 23.049740134401613,
|
|
"learning_rate": 5.417717546758895e-06,
|
|
"loss": 1.558653712272644,
|
|
"step": 1999
|
|
},
|
|
{
|
|
"epoch": 1.5797788309636651,
|
|
"grad_norm": 19.165624885196646,
|
|
"learning_rate": 5.413137777548418e-06,
|
|
"loss": 2.2184576988220215,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 1.580568720379147,
|
|
"grad_norm": 11.595793426567166,
|
|
"learning_rate": 5.4085576593177865e-06,
|
|
"loss": 1.0131672620773315,
|
|
"step": 2001
|
|
},
|
|
{
|
|
"epoch": 1.5813586097946288,
|
|
"grad_norm": 8.213720732018274,
|
|
"learning_rate": 5.403977195936301e-06,
|
|
"loss": 1.3775444030761719,
|
|
"step": 2002
|
|
},
|
|
{
|
|
"epoch": 1.5821484992101107,
|
|
"grad_norm": 12.113284402572612,
|
|
"learning_rate": 5.399396391273547e-06,
|
|
"loss": 1.8444898128509521,
|
|
"step": 2003
|
|
},
|
|
{
|
|
"epoch": 1.5829383886255926,
|
|
"grad_norm": 14.168275573138237,
|
|
"learning_rate": 5.394815249199408e-06,
|
|
"loss": 1.2480335235595703,
|
|
"step": 2004
|
|
},
|
|
{
|
|
"epoch": 1.5837282780410744,
|
|
"grad_norm": 5.247577025302916,
|
|
"learning_rate": 5.390233773584047e-06,
|
|
"loss": 0.7935315370559692,
|
|
"step": 2005
|
|
},
|
|
{
|
|
"epoch": 1.584518167456556,
|
|
"grad_norm": 10.201870401815132,
|
|
"learning_rate": 5.385651968297907e-06,
|
|
"loss": 1.4372203350067139,
|
|
"step": 2006
|
|
},
|
|
{
|
|
"epoch": 1.585308056872038,
|
|
"grad_norm": 9.572141631234302,
|
|
"learning_rate": 5.3810698372117165e-06,
|
|
"loss": 1.6270627975463867,
|
|
"step": 2007
|
|
},
|
|
{
|
|
"epoch": 1.5860979462875198,
|
|
"grad_norm": 11.97927351834067,
|
|
"learning_rate": 5.37648738419647e-06,
|
|
"loss": 1.5614657402038574,
|
|
"step": 2008
|
|
},
|
|
{
|
|
"epoch": 1.5868878357030016,
|
|
"grad_norm": 8.772950923899149,
|
|
"learning_rate": 5.371904613123444e-06,
|
|
"loss": 1.8875480890274048,
|
|
"step": 2009
|
|
},
|
|
{
|
|
"epoch": 1.5876777251184833,
|
|
"grad_norm": 8.245381714413037,
|
|
"learning_rate": 5.367321527864175e-06,
|
|
"loss": 1.6283080577850342,
|
|
"step": 2010
|
|
},
|
|
{
|
|
"epoch": 1.5884676145339651,
|
|
"grad_norm": 7.982415790960697,
|
|
"learning_rate": 5.362738132290471e-06,
|
|
"loss": 1.4374988079071045,
|
|
"step": 2011
|
|
},
|
|
{
|
|
"epoch": 1.589257503949447,
|
|
"grad_norm": 11.43633322051133,
|
|
"learning_rate": 5.358154430274397e-06,
|
|
"loss": 1.4911184310913086,
|
|
"step": 2012
|
|
},
|
|
{
|
|
"epoch": 1.5900473933649288,
|
|
"grad_norm": 10.388360353070853,
|
|
"learning_rate": 5.353570425688282e-06,
|
|
"loss": 0.8646364212036133,
|
|
"step": 2013
|
|
},
|
|
{
|
|
"epoch": 1.5908372827804107,
|
|
"grad_norm": 8.234236363122712,
|
|
"learning_rate": 5.348986122404706e-06,
|
|
"loss": 1.3800685405731201,
|
|
"step": 2014
|
|
},
|
|
{
|
|
"epoch": 1.5916271721958926,
|
|
"grad_norm": 9.432342143567002,
|
|
"learning_rate": 5.344401524296506e-06,
|
|
"loss": 1.8379184007644653,
|
|
"step": 2015
|
|
},
|
|
{
|
|
"epoch": 1.5924170616113744,
|
|
"grad_norm": 10.131723893874554,
|
|
"learning_rate": 5.339816635236762e-06,
|
|
"loss": 1.7298725843429565,
|
|
"step": 2016
|
|
},
|
|
{
|
|
"epoch": 1.5932069510268563,
|
|
"grad_norm": 9.394398464778371,
|
|
"learning_rate": 5.335231459098806e-06,
|
|
"loss": 1.9646117687225342,
|
|
"step": 2017
|
|
},
|
|
{
|
|
"epoch": 1.5939968404423381,
|
|
"grad_norm": 15.074944044842479,
|
|
"learning_rate": 5.330645999756211e-06,
|
|
"loss": 1.759244680404663,
|
|
"step": 2018
|
|
},
|
|
{
|
|
"epoch": 1.59478672985782,
|
|
"grad_norm": 30.106091146105907,
|
|
"learning_rate": 5.326060261082786e-06,
|
|
"loss": 1.3692538738250732,
|
|
"step": 2019
|
|
},
|
|
{
|
|
"epoch": 1.5955766192733019,
|
|
"grad_norm": 10.236894381216507,
|
|
"learning_rate": 5.321474246952577e-06,
|
|
"loss": 1.1828837394714355,
|
|
"step": 2020
|
|
},
|
|
{
|
|
"epoch": 1.5963665086887837,
|
|
"grad_norm": 14.4756810516669,
|
|
"learning_rate": 5.3168879612398684e-06,
|
|
"loss": 2.2654309272766113,
|
|
"step": 2021
|
|
},
|
|
{
|
|
"epoch": 1.5971563981042654,
|
|
"grad_norm": 12.643909473952599,
|
|
"learning_rate": 5.3123014078191635e-06,
|
|
"loss": 1.6730940341949463,
|
|
"step": 2022
|
|
},
|
|
{
|
|
"epoch": 1.5979462875197472,
|
|
"grad_norm": 11.041702917394197,
|
|
"learning_rate": 5.307714590565203e-06,
|
|
"loss": 1.547790288925171,
|
|
"step": 2023
|
|
},
|
|
{
|
|
"epoch": 1.598736176935229,
|
|
"grad_norm": 11.4813876902655,
|
|
"learning_rate": 5.303127513352943e-06,
|
|
"loss": 1.3282029628753662,
|
|
"step": 2024
|
|
},
|
|
{
|
|
"epoch": 1.599526066350711,
|
|
"grad_norm": 17.03475421856299,
|
|
"learning_rate": 5.298540180057561e-06,
|
|
"loss": 1.527526617050171,
|
|
"step": 2025
|
|
},
|
|
{
|
|
"epoch": 1.6003159557661928,
|
|
"grad_norm": 12.426584174439972,
|
|
"learning_rate": 5.293952594554452e-06,
|
|
"loss": 1.0609666109085083,
|
|
"step": 2026
|
|
},
|
|
{
|
|
"epoch": 1.6011058451816744,
|
|
"grad_norm": 14.614321755378219,
|
|
"learning_rate": 5.289364760719223e-06,
|
|
"loss": 1.6314609050750732,
|
|
"step": 2027
|
|
},
|
|
{
|
|
"epoch": 1.6018957345971563,
|
|
"grad_norm": 17.90827490801936,
|
|
"learning_rate": 5.284776682427691e-06,
|
|
"loss": 2.031951904296875,
|
|
"step": 2028
|
|
},
|
|
{
|
|
"epoch": 1.6026856240126381,
|
|
"grad_norm": 9.299749173159547,
|
|
"learning_rate": 5.280188363555881e-06,
|
|
"loss": 1.3200483322143555,
|
|
"step": 2029
|
|
},
|
|
{
|
|
"epoch": 1.60347551342812,
|
|
"grad_norm": 13.120649438444463,
|
|
"learning_rate": 5.275599807980019e-06,
|
|
"loss": 1.2993329763412476,
|
|
"step": 2030
|
|
},
|
|
{
|
|
"epoch": 1.6042654028436019,
|
|
"grad_norm": 22.075534962390574,
|
|
"learning_rate": 5.271011019576528e-06,
|
|
"loss": 1.9799494743347168,
|
|
"step": 2031
|
|
},
|
|
{
|
|
"epoch": 1.6050552922590837,
|
|
"grad_norm": 7.468671537450528,
|
|
"learning_rate": 5.2664220022220404e-06,
|
|
"loss": 1.303866982460022,
|
|
"step": 2032
|
|
},
|
|
{
|
|
"epoch": 1.6058451816745656,
|
|
"grad_norm": 9.57368198328469,
|
|
"learning_rate": 5.261832759793365e-06,
|
|
"loss": 1.6507763862609863,
|
|
"step": 2033
|
|
},
|
|
{
|
|
"epoch": 1.6066350710900474,
|
|
"grad_norm": 9.44370501166652,
|
|
"learning_rate": 5.2572432961675115e-06,
|
|
"loss": 1.9541301727294922,
|
|
"step": 2034
|
|
},
|
|
{
|
|
"epoch": 1.6074249605055293,
|
|
"grad_norm": 14.242349861981912,
|
|
"learning_rate": 5.252653615221677e-06,
|
|
"loss": 1.2269582748413086,
|
|
"step": 2035
|
|
},
|
|
{
|
|
"epoch": 1.6082148499210112,
|
|
"grad_norm": 8.26115422114046,
|
|
"learning_rate": 5.248063720833233e-06,
|
|
"loss": 1.1905943155288696,
|
|
"step": 2036
|
|
},
|
|
{
|
|
"epoch": 1.609004739336493,
|
|
"grad_norm": 10.698374761244876,
|
|
"learning_rate": 5.243473616879744e-06,
|
|
"loss": 1.0884801149368286,
|
|
"step": 2037
|
|
},
|
|
{
|
|
"epoch": 1.6097946287519749,
|
|
"grad_norm": 11.821184713135086,
|
|
"learning_rate": 5.238883307238939e-06,
|
|
"loss": 1.7929291725158691,
|
|
"step": 2038
|
|
},
|
|
{
|
|
"epoch": 1.6105845181674565,
|
|
"grad_norm": 16.8095121665838,
|
|
"learning_rate": 5.234292795788731e-06,
|
|
"loss": 1.8585515022277832,
|
|
"step": 2039
|
|
},
|
|
{
|
|
"epoch": 1.6113744075829384,
|
|
"grad_norm": 14.477383097248046,
|
|
"learning_rate": 5.229702086407197e-06,
|
|
"loss": 2.150439977645874,
|
|
"step": 2040
|
|
},
|
|
{
|
|
"epoch": 1.6121642969984202,
|
|
"grad_norm": 11.989923601556997,
|
|
"learning_rate": 5.225111182972584e-06,
|
|
"loss": 1.4646919965744019,
|
|
"step": 2041
|
|
},
|
|
{
|
|
"epoch": 1.612954186413902,
|
|
"grad_norm": 21.745170716359002,
|
|
"learning_rate": 5.220520089363302e-06,
|
|
"loss": 1.7785918712615967,
|
|
"step": 2042
|
|
},
|
|
{
|
|
"epoch": 1.6137440758293837,
|
|
"grad_norm": 11.270529683690256,
|
|
"learning_rate": 5.215928809457924e-06,
|
|
"loss": 2.263561964035034,
|
|
"step": 2043
|
|
},
|
|
{
|
|
"epoch": 1.6145339652448656,
|
|
"grad_norm": 15.910293271142669,
|
|
"learning_rate": 5.211337347135176e-06,
|
|
"loss": 1.4089993238449097,
|
|
"step": 2044
|
|
},
|
|
{
|
|
"epoch": 1.6153238546603474,
|
|
"grad_norm": 16.722563692314175,
|
|
"learning_rate": 5.20674570627394e-06,
|
|
"loss": 1.6263060569763184,
|
|
"step": 2045
|
|
},
|
|
{
|
|
"epoch": 1.6161137440758293,
|
|
"grad_norm": 11.666163831636595,
|
|
"learning_rate": 5.202153890753252e-06,
|
|
"loss": 1.2603790760040283,
|
|
"step": 2046
|
|
},
|
|
{
|
|
"epoch": 1.6169036334913112,
|
|
"grad_norm": 12.621495227684445,
|
|
"learning_rate": 5.197561904452291e-06,
|
|
"loss": 2.883836507797241,
|
|
"step": 2047
|
|
},
|
|
{
|
|
"epoch": 1.617693522906793,
|
|
"grad_norm": 10.161552116575363,
|
|
"learning_rate": 5.192969751250382e-06,
|
|
"loss": 1.666745662689209,
|
|
"step": 2048
|
|
},
|
|
{
|
|
"epoch": 1.6184834123222749,
|
|
"grad_norm": 8.91334171222036,
|
|
"learning_rate": 5.188377435026991e-06,
|
|
"loss": 1.3152096271514893,
|
|
"step": 2049
|
|
},
|
|
{
|
|
"epoch": 1.6192733017377567,
|
|
"grad_norm": 12.703171570917682,
|
|
"learning_rate": 5.183784959661723e-06,
|
|
"loss": 1.2735559940338135,
|
|
"step": 2050
|
|
},
|
|
{
|
|
"epoch": 1.6200631911532386,
|
|
"grad_norm": 13.160459144969359,
|
|
"learning_rate": 5.1791923290343175e-06,
|
|
"loss": 0.9299610257148743,
|
|
"step": 2051
|
|
},
|
|
{
|
|
"epoch": 1.6208530805687205,
|
|
"grad_norm": 8.443761976415868,
|
|
"learning_rate": 5.17459954702464e-06,
|
|
"loss": 1.560915231704712,
|
|
"step": 2052
|
|
},
|
|
{
|
|
"epoch": 1.6216429699842023,
|
|
"grad_norm": 14.155078887312051,
|
|
"learning_rate": 5.1700066175126915e-06,
|
|
"loss": 2.029481887817383,
|
|
"step": 2053
|
|
},
|
|
{
|
|
"epoch": 1.6224328593996842,
|
|
"grad_norm": 7.4669992465133825,
|
|
"learning_rate": 5.165413544378594e-06,
|
|
"loss": 1.332027792930603,
|
|
"step": 2054
|
|
},
|
|
{
|
|
"epoch": 1.623222748815166,
|
|
"grad_norm": 11.339506770988185,
|
|
"learning_rate": 5.160820331502587e-06,
|
|
"loss": 1.6719763278961182,
|
|
"step": 2055
|
|
},
|
|
{
|
|
"epoch": 1.6240126382306477,
|
|
"grad_norm": 9.957423283849112,
|
|
"learning_rate": 5.1562269827650365e-06,
|
|
"loss": 1.8021857738494873,
|
|
"step": 2056
|
|
},
|
|
{
|
|
"epoch": 1.6248025276461295,
|
|
"grad_norm": 13.931093811264075,
|
|
"learning_rate": 5.1516335020464146e-06,
|
|
"loss": 1.2689666748046875,
|
|
"step": 2057
|
|
},
|
|
{
|
|
"epoch": 1.6255924170616114,
|
|
"grad_norm": 10.873902590482956,
|
|
"learning_rate": 5.147039893227312e-06,
|
|
"loss": 1.6544064283370972,
|
|
"step": 2058
|
|
},
|
|
{
|
|
"epoch": 1.6263823064770933,
|
|
"grad_norm": 14.998070753763457,
|
|
"learning_rate": 5.142446160188423e-06,
|
|
"loss": 0.7487756013870239,
|
|
"step": 2059
|
|
},
|
|
{
|
|
"epoch": 1.627172195892575,
|
|
"grad_norm": 11.06717608145743,
|
|
"learning_rate": 5.137852306810549e-06,
|
|
"loss": 1.2134767770767212,
|
|
"step": 2060
|
|
},
|
|
{
|
|
"epoch": 1.6279620853080567,
|
|
"grad_norm": 9.422607327758378,
|
|
"learning_rate": 5.133258336974593e-06,
|
|
"loss": 1.7278623580932617,
|
|
"step": 2061
|
|
},
|
|
{
|
|
"epoch": 1.6287519747235386,
|
|
"grad_norm": 11.032813566120764,
|
|
"learning_rate": 5.128664254561554e-06,
|
|
"loss": 1.4675060510635376,
|
|
"step": 2062
|
|
},
|
|
{
|
|
"epoch": 1.6295418641390205,
|
|
"grad_norm": 19.42554207831122,
|
|
"learning_rate": 5.12407006345253e-06,
|
|
"loss": 1.6174191236495972,
|
|
"step": 2063
|
|
},
|
|
{
|
|
"epoch": 1.6303317535545023,
|
|
"grad_norm": 8.820495621535443,
|
|
"learning_rate": 5.119475767528706e-06,
|
|
"loss": 1.6649625301361084,
|
|
"step": 2064
|
|
},
|
|
{
|
|
"epoch": 1.6311216429699842,
|
|
"grad_norm": 8.576125799967375,
|
|
"learning_rate": 5.114881370671363e-06,
|
|
"loss": 1.447519063949585,
|
|
"step": 2065
|
|
},
|
|
{
|
|
"epoch": 1.631911532385466,
|
|
"grad_norm": 12.22755594166127,
|
|
"learning_rate": 5.1102868767618564e-06,
|
|
"loss": 2.094078540802002,
|
|
"step": 2066
|
|
},
|
|
{
|
|
"epoch": 1.632701421800948,
|
|
"grad_norm": 17.90068380574316,
|
|
"learning_rate": 5.105692289681637e-06,
|
|
"loss": 2.2962210178375244,
|
|
"step": 2067
|
|
},
|
|
{
|
|
"epoch": 1.6334913112164298,
|
|
"grad_norm": 10.844973400668414,
|
|
"learning_rate": 5.10109761331222e-06,
|
|
"loss": 1.753501534461975,
|
|
"step": 2068
|
|
},
|
|
{
|
|
"epoch": 1.6342812006319116,
|
|
"grad_norm": 9.036279470960077,
|
|
"learning_rate": 5.096502851535207e-06,
|
|
"loss": 1.4237632751464844,
|
|
"step": 2069
|
|
},
|
|
{
|
|
"epoch": 1.6350710900473935,
|
|
"grad_norm": 19.783228854851586,
|
|
"learning_rate": 5.091908008232269e-06,
|
|
"loss": 1.0084950923919678,
|
|
"step": 2070
|
|
},
|
|
{
|
|
"epoch": 1.6358609794628753,
|
|
"grad_norm": 8.265709299215786,
|
|
"learning_rate": 5.08731308728514e-06,
|
|
"loss": 2.1787667274475098,
|
|
"step": 2071
|
|
},
|
|
{
|
|
"epoch": 1.636650868878357,
|
|
"grad_norm": 14.174582481095438,
|
|
"learning_rate": 5.082718092575629e-06,
|
|
"loss": 2.0619583129882812,
|
|
"step": 2072
|
|
},
|
|
{
|
|
"epoch": 1.6374407582938388,
|
|
"grad_norm": 11.108397416157564,
|
|
"learning_rate": 5.078123027985602e-06,
|
|
"loss": 1.3192667961120605,
|
|
"step": 2073
|
|
},
|
|
{
|
|
"epoch": 1.6382306477093207,
|
|
"grad_norm": 8.363886228263077,
|
|
"learning_rate": 5.073527897396983e-06,
|
|
"loss": 1.50796639919281,
|
|
"step": 2074
|
|
},
|
|
{
|
|
"epoch": 1.6390205371248026,
|
|
"grad_norm": 15.251141269075907,
|
|
"learning_rate": 5.068932704691754e-06,
|
|
"loss": 2.005817174911499,
|
|
"step": 2075
|
|
},
|
|
{
|
|
"epoch": 1.6398104265402842,
|
|
"grad_norm": 13.455564265575436,
|
|
"learning_rate": 5.064337453751949e-06,
|
|
"loss": 0.9753101468086243,
|
|
"step": 2076
|
|
},
|
|
{
|
|
"epoch": 1.640600315955766,
|
|
"grad_norm": 10.151593209532505,
|
|
"learning_rate": 5.059742148459651e-06,
|
|
"loss": 1.3239325284957886,
|
|
"step": 2077
|
|
},
|
|
{
|
|
"epoch": 1.641390205371248,
|
|
"grad_norm": 9.012341783855266,
|
|
"learning_rate": 5.055146792696989e-06,
|
|
"loss": 1.3081142902374268,
|
|
"step": 2078
|
|
},
|
|
{
|
|
"epoch": 1.6421800947867298,
|
|
"grad_norm": 14.881682947088617,
|
|
"learning_rate": 5.050551390346135e-06,
|
|
"loss": 1.9254162311553955,
|
|
"step": 2079
|
|
},
|
|
{
|
|
"epoch": 1.6429699842022116,
|
|
"grad_norm": 9.10456564272968,
|
|
"learning_rate": 5.0459559452893e-06,
|
|
"loss": 1.5548919439315796,
|
|
"step": 2080
|
|
},
|
|
{
|
|
"epoch": 1.6437598736176935,
|
|
"grad_norm": 19.6788851757542,
|
|
"learning_rate": 5.041360461408733e-06,
|
|
"loss": 1.1454696655273438,
|
|
"step": 2081
|
|
},
|
|
{
|
|
"epoch": 1.6445497630331753,
|
|
"grad_norm": 14.710569283676744,
|
|
"learning_rate": 5.036764942586709e-06,
|
|
"loss": 1.5640335083007812,
|
|
"step": 2082
|
|
},
|
|
{
|
|
"epoch": 1.6453396524486572,
|
|
"grad_norm": 12.439497693452665,
|
|
"learning_rate": 5.032169392705542e-06,
|
|
"loss": 1.5709795951843262,
|
|
"step": 2083
|
|
},
|
|
{
|
|
"epoch": 1.646129541864139,
|
|
"grad_norm": 16.197420446815777,
|
|
"learning_rate": 5.027573815647567e-06,
|
|
"loss": 2.125795602798462,
|
|
"step": 2084
|
|
},
|
|
{
|
|
"epoch": 1.646919431279621,
|
|
"grad_norm": 12.282692453985112,
|
|
"learning_rate": 5.0229782152951405e-06,
|
|
"loss": 1.4845194816589355,
|
|
"step": 2085
|
|
},
|
|
{
|
|
"epoch": 1.6477093206951028,
|
|
"grad_norm": 9.520283321188863,
|
|
"learning_rate": 5.018382595530643e-06,
|
|
"loss": 1.538682460784912,
|
|
"step": 2086
|
|
},
|
|
{
|
|
"epoch": 1.6484992101105846,
|
|
"grad_norm": 9.675670573110745,
|
|
"learning_rate": 5.0137869602364665e-06,
|
|
"loss": 1.280341625213623,
|
|
"step": 2087
|
|
},
|
|
{
|
|
"epoch": 1.6492890995260665,
|
|
"grad_norm": 18.974744400190374,
|
|
"learning_rate": 5.009191313295021e-06,
|
|
"loss": 1.772722601890564,
|
|
"step": 2088
|
|
},
|
|
{
|
|
"epoch": 1.6500789889415481,
|
|
"grad_norm": 9.452933859527397,
|
|
"learning_rate": 5.004595658588725e-06,
|
|
"loss": 1.2798036336898804,
|
|
"step": 2089
|
|
},
|
|
{
|
|
"epoch": 1.65086887835703,
|
|
"grad_norm": 11.716983488622807,
|
|
"learning_rate": 5e-06,
|
|
"loss": 2.5696773529052734,
|
|
"step": 2090
|
|
},
|
|
{
|
|
"epoch": 1.6516587677725119,
|
|
"grad_norm": 17.30778123295629,
|
|
"learning_rate": 4.995404341411277e-06,
|
|
"loss": 2.143465518951416,
|
|
"step": 2091
|
|
},
|
|
{
|
|
"epoch": 1.6524486571879937,
|
|
"grad_norm": 6.676595853028537,
|
|
"learning_rate": 4.990808686704979e-06,
|
|
"loss": 1.9209420680999756,
|
|
"step": 2092
|
|
},
|
|
{
|
|
"epoch": 1.6532385466034754,
|
|
"grad_norm": 10.700026843162101,
|
|
"learning_rate": 4.986213039763537e-06,
|
|
"loss": 1.3106441497802734,
|
|
"step": 2093
|
|
},
|
|
{
|
|
"epoch": 1.6540284360189572,
|
|
"grad_norm": 11.845311555733167,
|
|
"learning_rate": 4.98161740446936e-06,
|
|
"loss": 1.331827998161316,
|
|
"step": 2094
|
|
},
|
|
{
|
|
"epoch": 1.654818325434439,
|
|
"grad_norm": 14.482146662888397,
|
|
"learning_rate": 4.977021784704862e-06,
|
|
"loss": 1.2673121690750122,
|
|
"step": 2095
|
|
},
|
|
{
|
|
"epoch": 1.655608214849921,
|
|
"grad_norm": 9.96068930193819,
|
|
"learning_rate": 4.9724261843524345e-06,
|
|
"loss": 1.4080572128295898,
|
|
"step": 2096
|
|
},
|
|
{
|
|
"epoch": 1.6563981042654028,
|
|
"grad_norm": 6.81552208619803,
|
|
"learning_rate": 4.967830607294459e-06,
|
|
"loss": 1.8892409801483154,
|
|
"step": 2097
|
|
},
|
|
{
|
|
"epoch": 1.6571879936808847,
|
|
"grad_norm": 9.139351921483907,
|
|
"learning_rate": 4.963235057413292e-06,
|
|
"loss": 1.865785002708435,
|
|
"step": 2098
|
|
},
|
|
{
|
|
"epoch": 1.6579778830963665,
|
|
"grad_norm": 18.25394807894221,
|
|
"learning_rate": 4.95863953859127e-06,
|
|
"loss": 3.839024782180786,
|
|
"step": 2099
|
|
},
|
|
{
|
|
"epoch": 1.6587677725118484,
|
|
"grad_norm": 12.684731410566474,
|
|
"learning_rate": 4.9540440547107016e-06,
|
|
"loss": 1.6854069232940674,
|
|
"step": 2100
|
|
},
|
|
{
|
|
"epoch": 1.6595576619273302,
|
|
"grad_norm": 11.466195743942457,
|
|
"learning_rate": 4.9494486096538654e-06,
|
|
"loss": 0.6167169809341431,
|
|
"step": 2101
|
|
},
|
|
{
|
|
"epoch": 1.660347551342812,
|
|
"grad_norm": 11.989810253242599,
|
|
"learning_rate": 4.9448532073030125e-06,
|
|
"loss": 1.5397396087646484,
|
|
"step": 2102
|
|
},
|
|
{
|
|
"epoch": 1.661137440758294,
|
|
"grad_norm": 13.091229369852948,
|
|
"learning_rate": 4.940257851540351e-06,
|
|
"loss": 1.1000051498413086,
|
|
"step": 2103
|
|
},
|
|
{
|
|
"epoch": 1.6619273301737758,
|
|
"grad_norm": 15.278599967251001,
|
|
"learning_rate": 4.935662546248054e-06,
|
|
"loss": 1.370941162109375,
|
|
"step": 2104
|
|
},
|
|
{
|
|
"epoch": 1.6627172195892577,
|
|
"grad_norm": 8.92736303739886,
|
|
"learning_rate": 4.9310672953082486e-06,
|
|
"loss": 1.4179476499557495,
|
|
"step": 2105
|
|
},
|
|
{
|
|
"epoch": 1.6635071090047393,
|
|
"grad_norm": 7.439214281898773,
|
|
"learning_rate": 4.92647210260302e-06,
|
|
"loss": 1.1264572143554688,
|
|
"step": 2106
|
|
},
|
|
{
|
|
"epoch": 1.6642969984202212,
|
|
"grad_norm": 8.391749061486982,
|
|
"learning_rate": 4.9218769720144e-06,
|
|
"loss": 1.7352138757705688,
|
|
"step": 2107
|
|
},
|
|
{
|
|
"epoch": 1.665086887835703,
|
|
"grad_norm": 8.41116001387029,
|
|
"learning_rate": 4.917281907424371e-06,
|
|
"loss": 1.3535940647125244,
|
|
"step": 2108
|
|
},
|
|
{
|
|
"epoch": 1.6658767772511849,
|
|
"grad_norm": 9.156612347669475,
|
|
"learning_rate": 4.912686912714861e-06,
|
|
"loss": 1.4920666217803955,
|
|
"step": 2109
|
|
},
|
|
{
|
|
"epoch": 1.6666666666666665,
|
|
"grad_norm": 16.712403107809568,
|
|
"learning_rate": 4.908091991767734e-06,
|
|
"loss": 1.6276068687438965,
|
|
"step": 2110
|
|
},
|
|
{
|
|
"epoch": 1.6674565560821484,
|
|
"grad_norm": 18.944886780238278,
|
|
"learning_rate": 4.903497148464795e-06,
|
|
"loss": 1.2643494606018066,
|
|
"step": 2111
|
|
},
|
|
{
|
|
"epoch": 1.6682464454976302,
|
|
"grad_norm": 8.854420836492178,
|
|
"learning_rate": 4.898902386687782e-06,
|
|
"loss": 1.728925108909607,
|
|
"step": 2112
|
|
},
|
|
{
|
|
"epoch": 1.669036334913112,
|
|
"grad_norm": 7.998931904416969,
|
|
"learning_rate": 4.894307710318365e-06,
|
|
"loss": 1.2208718061447144,
|
|
"step": 2113
|
|
},
|
|
{
|
|
"epoch": 1.669826224328594,
|
|
"grad_norm": 11.667293171545051,
|
|
"learning_rate": 4.8897131232381435e-06,
|
|
"loss": 1.281367540359497,
|
|
"step": 2114
|
|
},
|
|
{
|
|
"epoch": 1.6706161137440758,
|
|
"grad_norm": 8.360501164751504,
|
|
"learning_rate": 4.88511862932864e-06,
|
|
"loss": 1.7427953481674194,
|
|
"step": 2115
|
|
},
|
|
{
|
|
"epoch": 1.6714060031595577,
|
|
"grad_norm": 12.84273569482612,
|
|
"learning_rate": 4.880524232471295e-06,
|
|
"loss": 1.1219735145568848,
|
|
"step": 2116
|
|
},
|
|
{
|
|
"epoch": 1.6721958925750395,
|
|
"grad_norm": 8.81213908324747,
|
|
"learning_rate": 4.875929936547472e-06,
|
|
"loss": 1.642223596572876,
|
|
"step": 2117
|
|
},
|
|
{
|
|
"epoch": 1.6729857819905214,
|
|
"grad_norm": 17.800338724408167,
|
|
"learning_rate": 4.871335745438448e-06,
|
|
"loss": 2.3190040588378906,
|
|
"step": 2118
|
|
},
|
|
{
|
|
"epoch": 1.6737756714060033,
|
|
"grad_norm": 17.22836983227111,
|
|
"learning_rate": 4.866741663025409e-06,
|
|
"loss": 1.689987063407898,
|
|
"step": 2119
|
|
},
|
|
{
|
|
"epoch": 1.674565560821485,
|
|
"grad_norm": 10.031772239297299,
|
|
"learning_rate": 4.8621476931894505e-06,
|
|
"loss": 1.2501479387283325,
|
|
"step": 2120
|
|
},
|
|
{
|
|
"epoch": 1.675355450236967,
|
|
"grad_norm": 9.091404836352739,
|
|
"learning_rate": 4.857553839811579e-06,
|
|
"loss": 1.4689991474151611,
|
|
"step": 2121
|
|
},
|
|
{
|
|
"epoch": 1.6761453396524486,
|
|
"grad_norm": 11.720644404349697,
|
|
"learning_rate": 4.85296010677269e-06,
|
|
"loss": 1.3092423677444458,
|
|
"step": 2122
|
|
},
|
|
{
|
|
"epoch": 1.6769352290679305,
|
|
"grad_norm": 9.826261602656874,
|
|
"learning_rate": 4.848366497953586e-06,
|
|
"loss": 1.6839494705200195,
|
|
"step": 2123
|
|
},
|
|
{
|
|
"epoch": 1.6777251184834123,
|
|
"grad_norm": 10.367714457357824,
|
|
"learning_rate": 4.843773017234964e-06,
|
|
"loss": 1.245840072631836,
|
|
"step": 2124
|
|
},
|
|
{
|
|
"epoch": 1.6785150078988942,
|
|
"grad_norm": 15.019641392439464,
|
|
"learning_rate": 4.839179668497413e-06,
|
|
"loss": 1.2178664207458496,
|
|
"step": 2125
|
|
},
|
|
{
|
|
"epoch": 1.6793048973143758,
|
|
"grad_norm": 13.354170916911665,
|
|
"learning_rate": 4.834586455621409e-06,
|
|
"loss": 1.5003374814987183,
|
|
"step": 2126
|
|
},
|
|
{
|
|
"epoch": 1.6800947867298577,
|
|
"grad_norm": 9.162723435056346,
|
|
"learning_rate": 4.829993382487309e-06,
|
|
"loss": 1.3886042833328247,
|
|
"step": 2127
|
|
},
|
|
{
|
|
"epoch": 1.6808846761453395,
|
|
"grad_norm": 9.551211296648892,
|
|
"learning_rate": 4.825400452975361e-06,
|
|
"loss": 1.7338354587554932,
|
|
"step": 2128
|
|
},
|
|
{
|
|
"epoch": 1.6816745655608214,
|
|
"grad_norm": 15.926343890819645,
|
|
"learning_rate": 4.820807670965683e-06,
|
|
"loss": 1.217260718345642,
|
|
"step": 2129
|
|
},
|
|
{
|
|
"epoch": 1.6824644549763033,
|
|
"grad_norm": 9.271922963812838,
|
|
"learning_rate": 4.816215040338277e-06,
|
|
"loss": 1.4944868087768555,
|
|
"step": 2130
|
|
},
|
|
{
|
|
"epoch": 1.6832543443917851,
|
|
"grad_norm": 10.041906359891817,
|
|
"learning_rate": 4.811622564973011e-06,
|
|
"loss": 1.8609442710876465,
|
|
"step": 2131
|
|
},
|
|
{
|
|
"epoch": 1.684044233807267,
|
|
"grad_norm": 10.228103619124214,
|
|
"learning_rate": 4.807030248749621e-06,
|
|
"loss": 1.6223942041397095,
|
|
"step": 2132
|
|
},
|
|
{
|
|
"epoch": 1.6848341232227488,
|
|
"grad_norm": 9.327061347477445,
|
|
"learning_rate": 4.802438095547712e-06,
|
|
"loss": 1.100557804107666,
|
|
"step": 2133
|
|
},
|
|
{
|
|
"epoch": 1.6856240126382307,
|
|
"grad_norm": 30.522951746249742,
|
|
"learning_rate": 4.7978461092467495e-06,
|
|
"loss": 1.0848801136016846,
|
|
"step": 2134
|
|
},
|
|
{
|
|
"epoch": 1.6864139020537126,
|
|
"grad_norm": 13.164129620057786,
|
|
"learning_rate": 4.793254293726061e-06,
|
|
"loss": 1.1626616716384888,
|
|
"step": 2135
|
|
},
|
|
{
|
|
"epoch": 1.6872037914691944,
|
|
"grad_norm": 21.254188334183024,
|
|
"learning_rate": 4.788662652864825e-06,
|
|
"loss": 1.7419378757476807,
|
|
"step": 2136
|
|
},
|
|
{
|
|
"epoch": 1.6879936808846763,
|
|
"grad_norm": 15.489897028057435,
|
|
"learning_rate": 4.784071190542079e-06,
|
|
"loss": 1.6035929918289185,
|
|
"step": 2137
|
|
},
|
|
{
|
|
"epoch": 1.6887835703001581,
|
|
"grad_norm": 9.477674481683636,
|
|
"learning_rate": 4.7794799106366985e-06,
|
|
"loss": 0.9086638689041138,
|
|
"step": 2138
|
|
},
|
|
{
|
|
"epoch": 1.6895734597156398,
|
|
"grad_norm": 14.744530447825799,
|
|
"learning_rate": 4.774888817027417e-06,
|
|
"loss": 2.459580183029175,
|
|
"step": 2139
|
|
},
|
|
{
|
|
"epoch": 1.6903633491311216,
|
|
"grad_norm": 10.143359754031751,
|
|
"learning_rate": 4.770297913592805e-06,
|
|
"loss": 1.5249871015548706,
|
|
"step": 2140
|
|
},
|
|
{
|
|
"epoch": 1.6911532385466035,
|
|
"grad_norm": 8.92591707625083,
|
|
"learning_rate": 4.76570720421127e-06,
|
|
"loss": 1.3436775207519531,
|
|
"step": 2141
|
|
},
|
|
{
|
|
"epoch": 1.6919431279620853,
|
|
"grad_norm": 30.798541971194087,
|
|
"learning_rate": 4.7611166927610625e-06,
|
|
"loss": 1.0415196418762207,
|
|
"step": 2142
|
|
},
|
|
{
|
|
"epoch": 1.692733017377567,
|
|
"grad_norm": 10.766454601215523,
|
|
"learning_rate": 4.756526383120258e-06,
|
|
"loss": 1.6581356525421143,
|
|
"step": 2143
|
|
},
|
|
{
|
|
"epoch": 1.6935229067930488,
|
|
"grad_norm": 10.521084763687215,
|
|
"learning_rate": 4.751936279166767e-06,
|
|
"loss": 1.5447998046875,
|
|
"step": 2144
|
|
},
|
|
{
|
|
"epoch": 1.6943127962085307,
|
|
"grad_norm": 9.436881293717475,
|
|
"learning_rate": 4.747346384778325e-06,
|
|
"loss": 2.1874170303344727,
|
|
"step": 2145
|
|
},
|
|
{
|
|
"epoch": 1.6951026856240126,
|
|
"grad_norm": 9.945491755562474,
|
|
"learning_rate": 4.7427567038324884e-06,
|
|
"loss": 1.6179015636444092,
|
|
"step": 2146
|
|
},
|
|
{
|
|
"epoch": 1.6958925750394944,
|
|
"grad_norm": 20.026946174142626,
|
|
"learning_rate": 4.738167240206637e-06,
|
|
"loss": 1.4960978031158447,
|
|
"step": 2147
|
|
},
|
|
{
|
|
"epoch": 1.6966824644549763,
|
|
"grad_norm": 9.92767129752773,
|
|
"learning_rate": 4.733577997777963e-06,
|
|
"loss": 1.0159810781478882,
|
|
"step": 2148
|
|
},
|
|
{
|
|
"epoch": 1.6974723538704581,
|
|
"grad_norm": 8.419076351215592,
|
|
"learning_rate": 4.728988980423473e-06,
|
|
"loss": 1.4090895652770996,
|
|
"step": 2149
|
|
},
|
|
{
|
|
"epoch": 1.69826224328594,
|
|
"grad_norm": 8.367910062718474,
|
|
"learning_rate": 4.724400192019983e-06,
|
|
"loss": 1.2694896459579468,
|
|
"step": 2150
|
|
},
|
|
{
|
|
"epoch": 1.6990521327014219,
|
|
"grad_norm": 15.475743745207783,
|
|
"learning_rate": 4.71981163644412e-06,
|
|
"loss": 1.5766160488128662,
|
|
"step": 2151
|
|
},
|
|
{
|
|
"epoch": 1.6998420221169037,
|
|
"grad_norm": 8.610091370022774,
|
|
"learning_rate": 4.715223317572309e-06,
|
|
"loss": 1.2474552392959595,
|
|
"step": 2152
|
|
},
|
|
{
|
|
"epoch": 1.7006319115323856,
|
|
"grad_norm": 8.616644887482238,
|
|
"learning_rate": 4.7106352392807794e-06,
|
|
"loss": 1.4365007877349854,
|
|
"step": 2153
|
|
},
|
|
{
|
|
"epoch": 1.7014218009478674,
|
|
"grad_norm": 12.289373525187166,
|
|
"learning_rate": 4.70604740544555e-06,
|
|
"loss": 1.2263505458831787,
|
|
"step": 2154
|
|
},
|
|
{
|
|
"epoch": 1.7022116903633493,
|
|
"grad_norm": 14.493463927125523,
|
|
"learning_rate": 4.701459819942441e-06,
|
|
"loss": 1.1191456317901611,
|
|
"step": 2155
|
|
},
|
|
{
|
|
"epoch": 1.703001579778831,
|
|
"grad_norm": 11.091605026430587,
|
|
"learning_rate": 4.696872486647059e-06,
|
|
"loss": 1.8349318504333496,
|
|
"step": 2156
|
|
},
|
|
{
|
|
"epoch": 1.7037914691943128,
|
|
"grad_norm": 13.242011895533519,
|
|
"learning_rate": 4.692285409434797e-06,
|
|
"loss": 1.2665749788284302,
|
|
"step": 2157
|
|
},
|
|
{
|
|
"epoch": 1.7045813586097947,
|
|
"grad_norm": 12.97363928423666,
|
|
"learning_rate": 4.6876985921808365e-06,
|
|
"loss": 1.3644407987594604,
|
|
"step": 2158
|
|
},
|
|
{
|
|
"epoch": 1.7053712480252765,
|
|
"grad_norm": 9.838108998315606,
|
|
"learning_rate": 4.683112038760135e-06,
|
|
"loss": 1.3256113529205322,
|
|
"step": 2159
|
|
},
|
|
{
|
|
"epoch": 1.7061611374407581,
|
|
"grad_norm": 11.584707197962354,
|
|
"learning_rate": 4.6785257530474244e-06,
|
|
"loss": 1.4737862348556519,
|
|
"step": 2160
|
|
},
|
|
{
|
|
"epoch": 1.70695102685624,
|
|
"grad_norm": 13.735300921932108,
|
|
"learning_rate": 4.673939738917216e-06,
|
|
"loss": 1.6939847469329834,
|
|
"step": 2161
|
|
},
|
|
{
|
|
"epoch": 1.7077409162717219,
|
|
"grad_norm": 9.701613005896544,
|
|
"learning_rate": 4.669354000243791e-06,
|
|
"loss": 1.218980312347412,
|
|
"step": 2162
|
|
},
|
|
{
|
|
"epoch": 1.7085308056872037,
|
|
"grad_norm": 16.55205261891123,
|
|
"learning_rate": 4.664768540901194e-06,
|
|
"loss": 1.3491718769073486,
|
|
"step": 2163
|
|
},
|
|
{
|
|
"epoch": 1.7093206951026856,
|
|
"grad_norm": 10.899568751624019,
|
|
"learning_rate": 4.66018336476324e-06,
|
|
"loss": 1.0617191791534424,
|
|
"step": 2164
|
|
},
|
|
{
|
|
"epoch": 1.7101105845181674,
|
|
"grad_norm": 31.659285393659797,
|
|
"learning_rate": 4.655598475703498e-06,
|
|
"loss": 1.6565725803375244,
|
|
"step": 2165
|
|
},
|
|
{
|
|
"epoch": 1.7109004739336493,
|
|
"grad_norm": 14.401725841266487,
|
|
"learning_rate": 4.651013877595296e-06,
|
|
"loss": 1.4930999279022217,
|
|
"step": 2166
|
|
},
|
|
{
|
|
"epoch": 1.7116903633491312,
|
|
"grad_norm": 16.299266500455296,
|
|
"learning_rate": 4.64642957431172e-06,
|
|
"loss": 0.774669885635376,
|
|
"step": 2167
|
|
},
|
|
{
|
|
"epoch": 1.712480252764613,
|
|
"grad_norm": 19.156771040259898,
|
|
"learning_rate": 4.641845569725605e-06,
|
|
"loss": 2.0019631385803223,
|
|
"step": 2168
|
|
},
|
|
{
|
|
"epoch": 1.7132701421800949,
|
|
"grad_norm": 17.22778044854949,
|
|
"learning_rate": 4.63726186770953e-06,
|
|
"loss": 1.3190504312515259,
|
|
"step": 2169
|
|
},
|
|
{
|
|
"epoch": 1.7140600315955767,
|
|
"grad_norm": 9.853391429883748,
|
|
"learning_rate": 4.6326784721358255e-06,
|
|
"loss": 1.7607496976852417,
|
|
"step": 2170
|
|
},
|
|
{
|
|
"epoch": 1.7148499210110586,
|
|
"grad_norm": 11.600650539403235,
|
|
"learning_rate": 4.628095386876557e-06,
|
|
"loss": 1.8006988763809204,
|
|
"step": 2171
|
|
},
|
|
{
|
|
"epoch": 1.7156398104265402,
|
|
"grad_norm": 9.816615664918947,
|
|
"learning_rate": 4.623512615803531e-06,
|
|
"loss": 1.423611044883728,
|
|
"step": 2172
|
|
},
|
|
{
|
|
"epoch": 1.716429699842022,
|
|
"grad_norm": 11.105593098350882,
|
|
"learning_rate": 4.618930162788284e-06,
|
|
"loss": 1.4016926288604736,
|
|
"step": 2173
|
|
},
|
|
{
|
|
"epoch": 1.717219589257504,
|
|
"grad_norm": 9.31423831663281,
|
|
"learning_rate": 4.614348031702093e-06,
|
|
"loss": 1.490910291671753,
|
|
"step": 2174
|
|
},
|
|
{
|
|
"epoch": 1.7180094786729858,
|
|
"grad_norm": 9.65485402130286,
|
|
"learning_rate": 4.609766226415955e-06,
|
|
"loss": 1.4671694040298462,
|
|
"step": 2175
|
|
},
|
|
{
|
|
"epoch": 1.7187993680884674,
|
|
"grad_norm": 8.831389046526823,
|
|
"learning_rate": 4.605184750800594e-06,
|
|
"loss": 1.1502845287322998,
|
|
"step": 2176
|
|
},
|
|
{
|
|
"epoch": 1.7195892575039493,
|
|
"grad_norm": 11.97525173977403,
|
|
"learning_rate": 4.6006036087264544e-06,
|
|
"loss": 1.1983712911605835,
|
|
"step": 2177
|
|
},
|
|
{
|
|
"epoch": 1.7203791469194312,
|
|
"grad_norm": 7.344974809829897,
|
|
"learning_rate": 4.596022804063701e-06,
|
|
"loss": 1.7621641159057617,
|
|
"step": 2178
|
|
},
|
|
{
|
|
"epoch": 1.721169036334913,
|
|
"grad_norm": 8.826471148662039,
|
|
"learning_rate": 4.591442340682214e-06,
|
|
"loss": 1.350406289100647,
|
|
"step": 2179
|
|
},
|
|
{
|
|
"epoch": 1.7219589257503949,
|
|
"grad_norm": 14.464898797136621,
|
|
"learning_rate": 4.586862222451582e-06,
|
|
"loss": 1.644295334815979,
|
|
"step": 2180
|
|
},
|
|
{
|
|
"epoch": 1.7227488151658767,
|
|
"grad_norm": 14.126992669178419,
|
|
"learning_rate": 4.582282453241108e-06,
|
|
"loss": 1.1674833297729492,
|
|
"step": 2181
|
|
},
|
|
{
|
|
"epoch": 1.7235387045813586,
|
|
"grad_norm": 15.220420323290032,
|
|
"learning_rate": 4.5777030369197895e-06,
|
|
"loss": 1.2685773372650146,
|
|
"step": 2182
|
|
},
|
|
{
|
|
"epoch": 1.7243285939968405,
|
|
"grad_norm": 9.673306531164522,
|
|
"learning_rate": 4.573123977356337e-06,
|
|
"loss": 1.3816874027252197,
|
|
"step": 2183
|
|
},
|
|
{
|
|
"epoch": 1.7251184834123223,
|
|
"grad_norm": 10.109225928810197,
|
|
"learning_rate": 4.568545278419154e-06,
|
|
"loss": 0.9319192171096802,
|
|
"step": 2184
|
|
},
|
|
{
|
|
"epoch": 1.7259083728278042,
|
|
"grad_norm": 9.14802501607995,
|
|
"learning_rate": 4.56396694397634e-06,
|
|
"loss": 1.8452692031860352,
|
|
"step": 2185
|
|
},
|
|
{
|
|
"epoch": 1.726698262243286,
|
|
"grad_norm": 12.908130900260579,
|
|
"learning_rate": 4.55938897789569e-06,
|
|
"loss": 1.1169474124908447,
|
|
"step": 2186
|
|
},
|
|
{
|
|
"epoch": 1.727488151658768,
|
|
"grad_norm": 13.515379103637612,
|
|
"learning_rate": 4.554811384044681e-06,
|
|
"loss": 2.0045344829559326,
|
|
"step": 2187
|
|
},
|
|
{
|
|
"epoch": 1.7282780410742498,
|
|
"grad_norm": 14.12974423042282,
|
|
"learning_rate": 4.550234166290481e-06,
|
|
"loss": 1.453951358795166,
|
|
"step": 2188
|
|
},
|
|
{
|
|
"epoch": 1.7290679304897314,
|
|
"grad_norm": 11.23753254826144,
|
|
"learning_rate": 4.545657328499937e-06,
|
|
"loss": 1.3621933460235596,
|
|
"step": 2189
|
|
},
|
|
{
|
|
"epoch": 1.7298578199052133,
|
|
"grad_norm": 13.330762458315919,
|
|
"learning_rate": 4.541080874539579e-06,
|
|
"loss": 2.1850600242614746,
|
|
"step": 2190
|
|
},
|
|
{
|
|
"epoch": 1.7306477093206951,
|
|
"grad_norm": 15.273321036263173,
|
|
"learning_rate": 4.5365048082756095e-06,
|
|
"loss": 2.321899890899658,
|
|
"step": 2191
|
|
},
|
|
{
|
|
"epoch": 1.731437598736177,
|
|
"grad_norm": 10.112662361275033,
|
|
"learning_rate": 4.531929133573906e-06,
|
|
"loss": 1.4877285957336426,
|
|
"step": 2192
|
|
},
|
|
{
|
|
"epoch": 1.7322274881516586,
|
|
"grad_norm": 12.097152921819895,
|
|
"learning_rate": 4.5273538543000095e-06,
|
|
"loss": 1.2780163288116455,
|
|
"step": 2193
|
|
},
|
|
{
|
|
"epoch": 1.7330173775671405,
|
|
"grad_norm": 10.432317615119704,
|
|
"learning_rate": 4.522778974319133e-06,
|
|
"loss": 1.5664427280426025,
|
|
"step": 2194
|
|
},
|
|
{
|
|
"epoch": 1.7338072669826223,
|
|
"grad_norm": 7.912507954338899,
|
|
"learning_rate": 4.518204497496151e-06,
|
|
"loss": 1.6642968654632568,
|
|
"step": 2195
|
|
},
|
|
{
|
|
"epoch": 1.7345971563981042,
|
|
"grad_norm": 11.522163423849092,
|
|
"learning_rate": 4.513630427695597e-06,
|
|
"loss": 2.0236799716949463,
|
|
"step": 2196
|
|
},
|
|
{
|
|
"epoch": 1.735387045813586,
|
|
"grad_norm": 9.953485109546435,
|
|
"learning_rate": 4.50905676878166e-06,
|
|
"loss": 2.1658871173858643,
|
|
"step": 2197
|
|
},
|
|
{
|
|
"epoch": 1.736176935229068,
|
|
"grad_norm": 13.055500452513753,
|
|
"learning_rate": 4.504483524618179e-06,
|
|
"loss": 0.8655682802200317,
|
|
"step": 2198
|
|
},
|
|
{
|
|
"epoch": 1.7369668246445498,
|
|
"grad_norm": 10.348280690590556,
|
|
"learning_rate": 4.499910699068649e-06,
|
|
"loss": 1.5977658033370972,
|
|
"step": 2199
|
|
},
|
|
{
|
|
"epoch": 1.7377567140600316,
|
|
"grad_norm": 9.252649833434882,
|
|
"learning_rate": 4.495338295996208e-06,
|
|
"loss": 1.5957226753234863,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 1.7385466034755135,
|
|
"grad_norm": 11.994815495201612,
|
|
"learning_rate": 4.4907663192636345e-06,
|
|
"loss": 0.9464290738105774,
|
|
"step": 2201
|
|
},
|
|
{
|
|
"epoch": 1.7393364928909953,
|
|
"grad_norm": 8.819889689255204,
|
|
"learning_rate": 4.486194772733356e-06,
|
|
"loss": 1.736267328262329,
|
|
"step": 2202
|
|
},
|
|
{
|
|
"epoch": 1.7401263823064772,
|
|
"grad_norm": 10.89375250297425,
|
|
"learning_rate": 4.4816236602674204e-06,
|
|
"loss": 1.5301947593688965,
|
|
"step": 2203
|
|
},
|
|
{
|
|
"epoch": 1.740916271721959,
|
|
"grad_norm": 28.361460541379106,
|
|
"learning_rate": 4.477052985727525e-06,
|
|
"loss": 2.966822385787964,
|
|
"step": 2204
|
|
},
|
|
{
|
|
"epoch": 1.741706161137441,
|
|
"grad_norm": 16.527934269673235,
|
|
"learning_rate": 4.47248275297499e-06,
|
|
"loss": 1.2912685871124268,
|
|
"step": 2205
|
|
},
|
|
{
|
|
"epoch": 1.7424960505529226,
|
|
"grad_norm": 7.535472217731121,
|
|
"learning_rate": 4.467912965870761e-06,
|
|
"loss": 1.1335291862487793,
|
|
"step": 2206
|
|
},
|
|
{
|
|
"epoch": 1.7432859399684044,
|
|
"grad_norm": 8.2905986876192,
|
|
"learning_rate": 4.463343628275412e-06,
|
|
"loss": 1.4929556846618652,
|
|
"step": 2207
|
|
},
|
|
{
|
|
"epoch": 1.7440758293838863,
|
|
"grad_norm": 12.59336480489856,
|
|
"learning_rate": 4.458774744049134e-06,
|
|
"loss": 1.0215003490447998,
|
|
"step": 2208
|
|
},
|
|
{
|
|
"epoch": 1.7448657187993681,
|
|
"grad_norm": 10.41128966672044,
|
|
"learning_rate": 4.454206317051734e-06,
|
|
"loss": 1.46480393409729,
|
|
"step": 2209
|
|
},
|
|
{
|
|
"epoch": 1.7456556082148498,
|
|
"grad_norm": 10.615522798499182,
|
|
"learning_rate": 4.449638351142632e-06,
|
|
"loss": 1.465099811553955,
|
|
"step": 2210
|
|
},
|
|
{
|
|
"epoch": 1.7464454976303316,
|
|
"grad_norm": 9.897944735916337,
|
|
"learning_rate": 4.445070850180865e-06,
|
|
"loss": 1.993574857711792,
|
|
"step": 2211
|
|
},
|
|
{
|
|
"epoch": 1.7472353870458135,
|
|
"grad_norm": 11.844266382618313,
|
|
"learning_rate": 4.4405038180250715e-06,
|
|
"loss": 1.3619449138641357,
|
|
"step": 2212
|
|
},
|
|
{
|
|
"epoch": 1.7480252764612954,
|
|
"grad_norm": 8.226740624728182,
|
|
"learning_rate": 4.435937258533496e-06,
|
|
"loss": 1.619173288345337,
|
|
"step": 2213
|
|
},
|
|
{
|
|
"epoch": 1.7488151658767772,
|
|
"grad_norm": 11.343296287634017,
|
|
"learning_rate": 4.43137117556398e-06,
|
|
"loss": 1.3998527526855469,
|
|
"step": 2214
|
|
},
|
|
{
|
|
"epoch": 1.749605055292259,
|
|
"grad_norm": 13.493219686349496,
|
|
"learning_rate": 4.426805572973968e-06,
|
|
"loss": 1.625508427619934,
|
|
"step": 2215
|
|
},
|
|
{
|
|
"epoch": 1.750394944707741,
|
|
"grad_norm": 12.38010034938405,
|
|
"learning_rate": 4.422240454620496e-06,
|
|
"loss": 2.122622013092041,
|
|
"step": 2216
|
|
},
|
|
{
|
|
"epoch": 1.7511848341232228,
|
|
"grad_norm": 14.980457953037133,
|
|
"learning_rate": 4.4176758243601885e-06,
|
|
"loss": 1.5803240537643433,
|
|
"step": 2217
|
|
},
|
|
{
|
|
"epoch": 1.7519747235387046,
|
|
"grad_norm": 9.389237583408196,
|
|
"learning_rate": 4.413111686049264e-06,
|
|
"loss": 1.1918928623199463,
|
|
"step": 2218
|
|
},
|
|
{
|
|
"epoch": 1.7527646129541865,
|
|
"grad_norm": 10.030002557757358,
|
|
"learning_rate": 4.40854804354352e-06,
|
|
"loss": 0.9838066101074219,
|
|
"step": 2219
|
|
},
|
|
{
|
|
"epoch": 1.7535545023696684,
|
|
"grad_norm": 9.03857007330401,
|
|
"learning_rate": 4.403984900698336e-06,
|
|
"loss": 1.4722139835357666,
|
|
"step": 2220
|
|
},
|
|
{
|
|
"epoch": 1.7543443917851502,
|
|
"grad_norm": 13.96770687161923,
|
|
"learning_rate": 4.399422261368671e-06,
|
|
"loss": 1.4021885395050049,
|
|
"step": 2221
|
|
},
|
|
{
|
|
"epoch": 1.7551342812006319,
|
|
"grad_norm": 8.562842515589852,
|
|
"learning_rate": 4.394860129409056e-06,
|
|
"loss": 1.6055982112884521,
|
|
"step": 2222
|
|
},
|
|
{
|
|
"epoch": 1.7559241706161137,
|
|
"grad_norm": 16.556021076354195,
|
|
"learning_rate": 4.390298508673596e-06,
|
|
"loss": 1.7816779613494873,
|
|
"step": 2223
|
|
},
|
|
{
|
|
"epoch": 1.7567140600315956,
|
|
"grad_norm": 12.43470769219723,
|
|
"learning_rate": 4.385737403015967e-06,
|
|
"loss": 1.5340075492858887,
|
|
"step": 2224
|
|
},
|
|
{
|
|
"epoch": 1.7575039494470774,
|
|
"grad_norm": 24.066858353519663,
|
|
"learning_rate": 4.3811768162894e-06,
|
|
"loss": 1.3754091262817383,
|
|
"step": 2225
|
|
},
|
|
{
|
|
"epoch": 1.758293838862559,
|
|
"grad_norm": 11.278622345644811,
|
|
"learning_rate": 4.376616752346696e-06,
|
|
"loss": 1.6019980907440186,
|
|
"step": 2226
|
|
},
|
|
{
|
|
"epoch": 1.759083728278041,
|
|
"grad_norm": 9.564683161114333,
|
|
"learning_rate": 4.372057215040213e-06,
|
|
"loss": 1.9771608114242554,
|
|
"step": 2227
|
|
},
|
|
{
|
|
"epoch": 1.7598736176935228,
|
|
"grad_norm": 12.195360323247021,
|
|
"learning_rate": 4.367498208221863e-06,
|
|
"loss": 1.3690104484558105,
|
|
"step": 2228
|
|
},
|
|
{
|
|
"epoch": 1.7606635071090047,
|
|
"grad_norm": 13.65143464575961,
|
|
"learning_rate": 4.362939735743108e-06,
|
|
"loss": 1.7435321807861328,
|
|
"step": 2229
|
|
},
|
|
{
|
|
"epoch": 1.7614533965244865,
|
|
"grad_norm": 14.339262082940179,
|
|
"learning_rate": 4.358381801454966e-06,
|
|
"loss": 0.8703070878982544,
|
|
"step": 2230
|
|
},
|
|
{
|
|
"epoch": 1.7622432859399684,
|
|
"grad_norm": 9.781836635217395,
|
|
"learning_rate": 4.353824409207988e-06,
|
|
"loss": 2.1449623107910156,
|
|
"step": 2231
|
|
},
|
|
{
|
|
"epoch": 1.7630331753554502,
|
|
"grad_norm": 9.901685682067114,
|
|
"learning_rate": 4.349267562852276e-06,
|
|
"loss": 1.5492500066757202,
|
|
"step": 2232
|
|
},
|
|
{
|
|
"epoch": 1.763823064770932,
|
|
"grad_norm": 11.408200068592867,
|
|
"learning_rate": 4.34471126623747e-06,
|
|
"loss": 1.4743053913116455,
|
|
"step": 2233
|
|
},
|
|
{
|
|
"epoch": 1.764612954186414,
|
|
"grad_norm": 11.620081956175499,
|
|
"learning_rate": 4.340155523212746e-06,
|
|
"loss": 1.495714545249939,
|
|
"step": 2234
|
|
},
|
|
{
|
|
"epoch": 1.7654028436018958,
|
|
"grad_norm": 12.144719137720987,
|
|
"learning_rate": 4.335600337626809e-06,
|
|
"loss": 1.1870977878570557,
|
|
"step": 2235
|
|
},
|
|
{
|
|
"epoch": 1.7661927330173777,
|
|
"grad_norm": 10.073805141373832,
|
|
"learning_rate": 4.331045713327894e-06,
|
|
"loss": 1.2681677341461182,
|
|
"step": 2236
|
|
},
|
|
{
|
|
"epoch": 1.7669826224328595,
|
|
"grad_norm": 13.71543165458782,
|
|
"learning_rate": 4.326491654163762e-06,
|
|
"loss": 1.1395235061645508,
|
|
"step": 2237
|
|
},
|
|
{
|
|
"epoch": 1.7677725118483414,
|
|
"grad_norm": 13.755051603926155,
|
|
"learning_rate": 4.321938163981699e-06,
|
|
"loss": 1.4418907165527344,
|
|
"step": 2238
|
|
},
|
|
{
|
|
"epoch": 1.768562401263823,
|
|
"grad_norm": 12.213862776654832,
|
|
"learning_rate": 4.317385246628508e-06,
|
|
"loss": 1.602990746498108,
|
|
"step": 2239
|
|
},
|
|
{
|
|
"epoch": 1.7693522906793049,
|
|
"grad_norm": 13.831956811214068,
|
|
"learning_rate": 4.312832905950509e-06,
|
|
"loss": 1.280178427696228,
|
|
"step": 2240
|
|
},
|
|
{
|
|
"epoch": 1.7701421800947867,
|
|
"grad_norm": 12.777246523961239,
|
|
"learning_rate": 4.308281145793535e-06,
|
|
"loss": 1.7366316318511963,
|
|
"step": 2241
|
|
},
|
|
{
|
|
"epoch": 1.7709320695102686,
|
|
"grad_norm": 7.280612489287066,
|
|
"learning_rate": 4.303729970002924e-06,
|
|
"loss": 1.6962597370147705,
|
|
"step": 2242
|
|
},
|
|
{
|
|
"epoch": 1.7717219589257502,
|
|
"grad_norm": 13.907639824752652,
|
|
"learning_rate": 4.2991793824235286e-06,
|
|
"loss": 1.6384978294372559,
|
|
"step": 2243
|
|
},
|
|
{
|
|
"epoch": 1.772511848341232,
|
|
"grad_norm": 11.962864332754167,
|
|
"learning_rate": 4.294629386899699e-06,
|
|
"loss": 1.5645751953125,
|
|
"step": 2244
|
|
},
|
|
{
|
|
"epoch": 1.773301737756714,
|
|
"grad_norm": 10.40002219488319,
|
|
"learning_rate": 4.290079987275285e-06,
|
|
"loss": 1.8200668096542358,
|
|
"step": 2245
|
|
},
|
|
{
|
|
"epoch": 1.7740916271721958,
|
|
"grad_norm": 7.796350477300916,
|
|
"learning_rate": 4.285531187393639e-06,
|
|
"loss": 1.8340072631835938,
|
|
"step": 2246
|
|
},
|
|
{
|
|
"epoch": 1.7748815165876777,
|
|
"grad_norm": 14.305874479858872,
|
|
"learning_rate": 4.280982991097594e-06,
|
|
"loss": 1.3229985237121582,
|
|
"step": 2247
|
|
},
|
|
{
|
|
"epoch": 1.7756714060031595,
|
|
"grad_norm": 17.06836253359979,
|
|
"learning_rate": 4.276435402229488e-06,
|
|
"loss": 1.6926765441894531,
|
|
"step": 2248
|
|
},
|
|
{
|
|
"epoch": 1.7764612954186414,
|
|
"grad_norm": 14.834597218782198,
|
|
"learning_rate": 4.271888424631137e-06,
|
|
"loss": 1.4247950315475464,
|
|
"step": 2249
|
|
},
|
|
{
|
|
"epoch": 1.7772511848341233,
|
|
"grad_norm": 12.619750807648803,
|
|
"learning_rate": 4.267342062143841e-06,
|
|
"loss": 1.1141537427902222,
|
|
"step": 2250
|
|
},
|
|
{
|
|
"epoch": 1.7780410742496051,
|
|
"grad_norm": 12.049893792135833,
|
|
"learning_rate": 4.2627963186083844e-06,
|
|
"loss": 1.3787682056427002,
|
|
"step": 2251
|
|
},
|
|
{
|
|
"epoch": 1.778830963665087,
|
|
"grad_norm": 17.958477722896514,
|
|
"learning_rate": 4.258251197865028e-06,
|
|
"loss": 1.4096425771713257,
|
|
"step": 2252
|
|
},
|
|
{
|
|
"epoch": 1.7796208530805688,
|
|
"grad_norm": 10.777731590166498,
|
|
"learning_rate": 4.253706703753499e-06,
|
|
"loss": 1.2030799388885498,
|
|
"step": 2253
|
|
},
|
|
{
|
|
"epoch": 1.7804107424960507,
|
|
"grad_norm": 17.628412607375658,
|
|
"learning_rate": 4.249162840113005e-06,
|
|
"loss": 2.639885425567627,
|
|
"step": 2254
|
|
},
|
|
{
|
|
"epoch": 1.7812006319115326,
|
|
"grad_norm": 14.704071301126223,
|
|
"learning_rate": 4.244619610782216e-06,
|
|
"loss": 1.2214397192001343,
|
|
"step": 2255
|
|
},
|
|
{
|
|
"epoch": 1.7819905213270142,
|
|
"grad_norm": 12.269537431737808,
|
|
"learning_rate": 4.240077019599268e-06,
|
|
"loss": 0.9519909620285034,
|
|
"step": 2256
|
|
},
|
|
{
|
|
"epoch": 1.782780410742496,
|
|
"grad_norm": 7.752341256018781,
|
|
"learning_rate": 4.235535070401757e-06,
|
|
"loss": 1.37101149559021,
|
|
"step": 2257
|
|
},
|
|
{
|
|
"epoch": 1.783570300157978,
|
|
"grad_norm": 16.293151882011077,
|
|
"learning_rate": 4.2309937670267355e-06,
|
|
"loss": 1.9549283981323242,
|
|
"step": 2258
|
|
},
|
|
{
|
|
"epoch": 1.7843601895734598,
|
|
"grad_norm": 12.578113105487619,
|
|
"learning_rate": 4.22645311331071e-06,
|
|
"loss": 1.6407248973846436,
|
|
"step": 2259
|
|
},
|
|
{
|
|
"epoch": 1.7851500789889414,
|
|
"grad_norm": 11.529806537339917,
|
|
"learning_rate": 4.221913113089643e-06,
|
|
"loss": 1.6672661304473877,
|
|
"step": 2260
|
|
},
|
|
{
|
|
"epoch": 1.7859399684044233,
|
|
"grad_norm": 10.540110804631544,
|
|
"learning_rate": 4.217373770198939e-06,
|
|
"loss": 1.268946647644043,
|
|
"step": 2261
|
|
},
|
|
{
|
|
"epoch": 1.7867298578199051,
|
|
"grad_norm": 12.276750271201358,
|
|
"learning_rate": 4.212835088473455e-06,
|
|
"loss": 1.6153327226638794,
|
|
"step": 2262
|
|
},
|
|
{
|
|
"epoch": 1.787519747235387,
|
|
"grad_norm": 8.08704746323851,
|
|
"learning_rate": 4.208297071747476e-06,
|
|
"loss": 2.285081386566162,
|
|
"step": 2263
|
|
},
|
|
{
|
|
"epoch": 1.7883096366508688,
|
|
"grad_norm": 18.248267446477442,
|
|
"learning_rate": 4.20375972385474e-06,
|
|
"loss": 1.9024407863616943,
|
|
"step": 2264
|
|
},
|
|
{
|
|
"epoch": 1.7890995260663507,
|
|
"grad_norm": 9.775614166254378,
|
|
"learning_rate": 4.1992230486284105e-06,
|
|
"loss": 1.5444855690002441,
|
|
"step": 2265
|
|
},
|
|
{
|
|
"epoch": 1.7898894154818326,
|
|
"grad_norm": 13.515175274258306,
|
|
"learning_rate": 4.194687049901086e-06,
|
|
"loss": 1.2774243354797363,
|
|
"step": 2266
|
|
},
|
|
{
|
|
"epoch": 1.7906793048973144,
|
|
"grad_norm": 12.940831969941822,
|
|
"learning_rate": 4.190151731504795e-06,
|
|
"loss": 1.5125453472137451,
|
|
"step": 2267
|
|
},
|
|
{
|
|
"epoch": 1.7914691943127963,
|
|
"grad_norm": 9.973015494639506,
|
|
"learning_rate": 4.185617097270989e-06,
|
|
"loss": 1.5182291269302368,
|
|
"step": 2268
|
|
},
|
|
{
|
|
"epoch": 1.7922590837282781,
|
|
"grad_norm": 10.794888121500216,
|
|
"learning_rate": 4.181083151030539e-06,
|
|
"loss": 0.991271436214447,
|
|
"step": 2269
|
|
},
|
|
{
|
|
"epoch": 1.79304897314376,
|
|
"grad_norm": 23.58963526924935,
|
|
"learning_rate": 4.176549896613739e-06,
|
|
"loss": 1.3529629707336426,
|
|
"step": 2270
|
|
},
|
|
{
|
|
"epoch": 1.7938388625592419,
|
|
"grad_norm": 15.70406604388244,
|
|
"learning_rate": 4.1720173378503e-06,
|
|
"loss": 1.1336208581924438,
|
|
"step": 2271
|
|
},
|
|
{
|
|
"epoch": 1.7946287519747235,
|
|
"grad_norm": 8.10592167127099,
|
|
"learning_rate": 4.1674854785693395e-06,
|
|
"loss": 1.4463564157485962,
|
|
"step": 2272
|
|
},
|
|
{
|
|
"epoch": 1.7954186413902053,
|
|
"grad_norm": 16.247088268768564,
|
|
"learning_rate": 4.162954322599389e-06,
|
|
"loss": 1.5160022974014282,
|
|
"step": 2273
|
|
},
|
|
{
|
|
"epoch": 1.7962085308056872,
|
|
"grad_norm": 14.65045540543928,
|
|
"learning_rate": 4.158423873768382e-06,
|
|
"loss": 1.3699758052825928,
|
|
"step": 2274
|
|
},
|
|
{
|
|
"epoch": 1.796998420221169,
|
|
"grad_norm": 8.615488903760822,
|
|
"learning_rate": 4.153894135903658e-06,
|
|
"loss": 1.9635517597198486,
|
|
"step": 2275
|
|
},
|
|
{
|
|
"epoch": 1.7977883096366507,
|
|
"grad_norm": 7.489719358192559,
|
|
"learning_rate": 4.149365112831954e-06,
|
|
"loss": 1.8137181997299194,
|
|
"step": 2276
|
|
},
|
|
{
|
|
"epoch": 1.7985781990521326,
|
|
"grad_norm": 9.694581073663853,
|
|
"learning_rate": 4.144836808379404e-06,
|
|
"loss": 1.3416333198547363,
|
|
"step": 2277
|
|
},
|
|
{
|
|
"epoch": 1.7993680884676144,
|
|
"grad_norm": 12.289624728131038,
|
|
"learning_rate": 4.140309226371534e-06,
|
|
"loss": 1.53472900390625,
|
|
"step": 2278
|
|
},
|
|
{
|
|
"epoch": 1.8001579778830963,
|
|
"grad_norm": 11.175578971485294,
|
|
"learning_rate": 4.135782370633263e-06,
|
|
"loss": 1.6156749725341797,
|
|
"step": 2279
|
|
},
|
|
{
|
|
"epoch": 1.8009478672985781,
|
|
"grad_norm": 9.238967112564058,
|
|
"learning_rate": 4.131256244988888e-06,
|
|
"loss": 0.7965636253356934,
|
|
"step": 2280
|
|
},
|
|
{
|
|
"epoch": 1.80173775671406,
|
|
"grad_norm": 12.814521660266232,
|
|
"learning_rate": 4.1267308532621e-06,
|
|
"loss": 1.129547357559204,
|
|
"step": 2281
|
|
},
|
|
{
|
|
"epoch": 1.8025276461295419,
|
|
"grad_norm": 15.163564996840357,
|
|
"learning_rate": 4.122206199275961e-06,
|
|
"loss": 1.5986425876617432,
|
|
"step": 2282
|
|
},
|
|
{
|
|
"epoch": 1.8033175355450237,
|
|
"grad_norm": 12.370089121691372,
|
|
"learning_rate": 4.117682286852916e-06,
|
|
"loss": 1.1130129098892212,
|
|
"step": 2283
|
|
},
|
|
{
|
|
"epoch": 1.8041074249605056,
|
|
"grad_norm": 7.3337703985283325,
|
|
"learning_rate": 4.1131591198147825e-06,
|
|
"loss": 1.4250520467758179,
|
|
"step": 2284
|
|
},
|
|
{
|
|
"epoch": 1.8048973143759874,
|
|
"grad_norm": 15.555243696941096,
|
|
"learning_rate": 4.108636701982744e-06,
|
|
"loss": 1.0984294414520264,
|
|
"step": 2285
|
|
},
|
|
{
|
|
"epoch": 1.8056872037914693,
|
|
"grad_norm": 11.624250828595164,
|
|
"learning_rate": 4.104115037177354e-06,
|
|
"loss": 1.8641960620880127,
|
|
"step": 2286
|
|
},
|
|
{
|
|
"epoch": 1.8064770932069512,
|
|
"grad_norm": 8.641764907398887,
|
|
"learning_rate": 4.09959412921853e-06,
|
|
"loss": 2.202810287475586,
|
|
"step": 2287
|
|
},
|
|
{
|
|
"epoch": 1.807266982622433,
|
|
"grad_norm": 10.272414502220078,
|
|
"learning_rate": 4.0950739819255516e-06,
|
|
"loss": 1.1767101287841797,
|
|
"step": 2288
|
|
},
|
|
{
|
|
"epoch": 1.8080568720379147,
|
|
"grad_norm": 10.73523808838913,
|
|
"learning_rate": 4.090554599117053e-06,
|
|
"loss": 1.3976107835769653,
|
|
"step": 2289
|
|
},
|
|
{
|
|
"epoch": 1.8088467614533965,
|
|
"grad_norm": 9.029022789020436,
|
|
"learning_rate": 4.086035984611026e-06,
|
|
"loss": 1.4328737258911133,
|
|
"step": 2290
|
|
},
|
|
{
|
|
"epoch": 1.8096366508688784,
|
|
"grad_norm": 12.75831158267409,
|
|
"learning_rate": 4.081518142224806e-06,
|
|
"loss": 1.5775648355484009,
|
|
"step": 2291
|
|
},
|
|
{
|
|
"epoch": 1.8104265402843602,
|
|
"grad_norm": 8.797489371278768,
|
|
"learning_rate": 4.077001075775085e-06,
|
|
"loss": 1.8569629192352295,
|
|
"step": 2292
|
|
},
|
|
{
|
|
"epoch": 1.8112164296998419,
|
|
"grad_norm": 19.2818864408269,
|
|
"learning_rate": 4.072484789077894e-06,
|
|
"loss": 1.569921851158142,
|
|
"step": 2293
|
|
},
|
|
{
|
|
"epoch": 1.8120063191153237,
|
|
"grad_norm": 9.606639381333586,
|
|
"learning_rate": 4.0679692859486076e-06,
|
|
"loss": 1.5646262168884277,
|
|
"step": 2294
|
|
},
|
|
{
|
|
"epoch": 1.8127962085308056,
|
|
"grad_norm": 15.982700710777124,
|
|
"learning_rate": 4.06345457020194e-06,
|
|
"loss": 1.6539026498794556,
|
|
"step": 2295
|
|
},
|
|
{
|
|
"epoch": 1.8135860979462874,
|
|
"grad_norm": 11.758946943159149,
|
|
"learning_rate": 4.0589406456519335e-06,
|
|
"loss": 2.3593804836273193,
|
|
"step": 2296
|
|
},
|
|
{
|
|
"epoch": 1.8143759873617693,
|
|
"grad_norm": 10.648338644534252,
|
|
"learning_rate": 4.054427516111968e-06,
|
|
"loss": 1.4271035194396973,
|
|
"step": 2297
|
|
},
|
|
{
|
|
"epoch": 1.8151658767772512,
|
|
"grad_norm": 13.889260348670108,
|
|
"learning_rate": 4.049915185394751e-06,
|
|
"loss": 1.5787549018859863,
|
|
"step": 2298
|
|
},
|
|
{
|
|
"epoch": 1.815955766192733,
|
|
"grad_norm": 11.265998620973773,
|
|
"learning_rate": 4.045403657312313e-06,
|
|
"loss": 1.2817461490631104,
|
|
"step": 2299
|
|
},
|
|
{
|
|
"epoch": 1.8167456556082149,
|
|
"grad_norm": 15.757768299774062,
|
|
"learning_rate": 4.040892935676011e-06,
|
|
"loss": 1.6207523345947266,
|
|
"step": 2300
|
|
},
|
|
{
|
|
"epoch": 1.8175355450236967,
|
|
"grad_norm": 10.598445710266809,
|
|
"learning_rate": 4.036383024296515e-06,
|
|
"loss": 1.390448808670044,
|
|
"step": 2301
|
|
},
|
|
{
|
|
"epoch": 1.8183254344391786,
|
|
"grad_norm": 10.708487190144936,
|
|
"learning_rate": 4.031873926983813e-06,
|
|
"loss": 1.3507099151611328,
|
|
"step": 2302
|
|
},
|
|
{
|
|
"epoch": 1.8191153238546605,
|
|
"grad_norm": 13.04295029302765,
|
|
"learning_rate": 4.027365647547204e-06,
|
|
"loss": 1.3747905492782593,
|
|
"step": 2303
|
|
},
|
|
{
|
|
"epoch": 1.8199052132701423,
|
|
"grad_norm": 15.06243045733676,
|
|
"learning_rate": 4.0228581897953e-06,
|
|
"loss": 1.7040674686431885,
|
|
"step": 2304
|
|
},
|
|
{
|
|
"epoch": 1.8206951026856242,
|
|
"grad_norm": 8.112658972064233,
|
|
"learning_rate": 4.018351557536015e-06,
|
|
"loss": 1.1987684965133667,
|
|
"step": 2305
|
|
},
|
|
{
|
|
"epoch": 1.8214849921011058,
|
|
"grad_norm": 6.926031154500966,
|
|
"learning_rate": 4.013845754576567e-06,
|
|
"loss": 1.538601040840149,
|
|
"step": 2306
|
|
},
|
|
{
|
|
"epoch": 1.8222748815165877,
|
|
"grad_norm": 7.86014809309979,
|
|
"learning_rate": 4.0093407847234725e-06,
|
|
"loss": 1.5583560466766357,
|
|
"step": 2307
|
|
},
|
|
{
|
|
"epoch": 1.8230647709320695,
|
|
"grad_norm": 14.5747224324332,
|
|
"learning_rate": 4.004836651782545e-06,
|
|
"loss": 1.8234143257141113,
|
|
"step": 2308
|
|
},
|
|
{
|
|
"epoch": 1.8238546603475514,
|
|
"grad_norm": 13.904991074728608,
|
|
"learning_rate": 4.000333359558891e-06,
|
|
"loss": 1.4908664226531982,
|
|
"step": 2309
|
|
},
|
|
{
|
|
"epoch": 1.824644549763033,
|
|
"grad_norm": 10.205474451588882,
|
|
"learning_rate": 3.995830911856907e-06,
|
|
"loss": 1.1028133630752563,
|
|
"step": 2310
|
|
},
|
|
{
|
|
"epoch": 1.825434439178515,
|
|
"grad_norm": 10.206650398885225,
|
|
"learning_rate": 3.991329312480275e-06,
|
|
"loss": 0.9024048447608948,
|
|
"step": 2311
|
|
},
|
|
{
|
|
"epoch": 1.8262243285939967,
|
|
"grad_norm": 20.427132609768222,
|
|
"learning_rate": 3.986828565231963e-06,
|
|
"loss": 1.241244912147522,
|
|
"step": 2312
|
|
},
|
|
{
|
|
"epoch": 1.8270142180094786,
|
|
"grad_norm": 5.947375976678703,
|
|
"learning_rate": 3.982328673914215e-06,
|
|
"loss": 1.3205927610397339,
|
|
"step": 2313
|
|
},
|
|
{
|
|
"epoch": 1.8278041074249605,
|
|
"grad_norm": 11.631133599224034,
|
|
"learning_rate": 3.977829642328553e-06,
|
|
"loss": 1.45220947265625,
|
|
"step": 2314
|
|
},
|
|
{
|
|
"epoch": 1.8285939968404423,
|
|
"grad_norm": 30.658181570348717,
|
|
"learning_rate": 3.9733314742757755e-06,
|
|
"loss": 2.151265859603882,
|
|
"step": 2315
|
|
},
|
|
{
|
|
"epoch": 1.8293838862559242,
|
|
"grad_norm": 14.081842026874503,
|
|
"learning_rate": 3.96883417355595e-06,
|
|
"loss": 1.9974944591522217,
|
|
"step": 2316
|
|
},
|
|
{
|
|
"epoch": 1.830173775671406,
|
|
"grad_norm": 14.732530661028573,
|
|
"learning_rate": 3.964337743968411e-06,
|
|
"loss": 1.3346126079559326,
|
|
"step": 2317
|
|
},
|
|
{
|
|
"epoch": 1.830963665086888,
|
|
"grad_norm": 11.657064075884962,
|
|
"learning_rate": 3.959842189311756e-06,
|
|
"loss": 1.1636943817138672,
|
|
"step": 2318
|
|
},
|
|
{
|
|
"epoch": 1.8317535545023698,
|
|
"grad_norm": 21.842024915662858,
|
|
"learning_rate": 3.955347513383842e-06,
|
|
"loss": 2.715574264526367,
|
|
"step": 2319
|
|
},
|
|
{
|
|
"epoch": 1.8325434439178516,
|
|
"grad_norm": 8.945848850752133,
|
|
"learning_rate": 3.950853719981792e-06,
|
|
"loss": 1.8645424842834473,
|
|
"step": 2320
|
|
},
|
|
{
|
|
"epoch": 1.8333333333333335,
|
|
"grad_norm": 7.363041428428467,
|
|
"learning_rate": 3.946360812901973e-06,
|
|
"loss": 2.0045013427734375,
|
|
"step": 2321
|
|
},
|
|
{
|
|
"epoch": 1.8341232227488151,
|
|
"grad_norm": 14.121661847109614,
|
|
"learning_rate": 3.941868795940008e-06,
|
|
"loss": 1.6329420804977417,
|
|
"step": 2322
|
|
},
|
|
{
|
|
"epoch": 1.834913112164297,
|
|
"grad_norm": 12.076873368108538,
|
|
"learning_rate": 3.937377672890771e-06,
|
|
"loss": 1.9567821025848389,
|
|
"step": 2323
|
|
},
|
|
{
|
|
"epoch": 1.8357030015797788,
|
|
"grad_norm": 9.278188737515212,
|
|
"learning_rate": 3.932887447548373e-06,
|
|
"loss": 1.7651054859161377,
|
|
"step": 2324
|
|
},
|
|
{
|
|
"epoch": 1.8364928909952607,
|
|
"grad_norm": 7.783361269922115,
|
|
"learning_rate": 3.928398123706174e-06,
|
|
"loss": 1.2713581323623657,
|
|
"step": 2325
|
|
},
|
|
{
|
|
"epoch": 1.8372827804107423,
|
|
"grad_norm": 10.329079837290895,
|
|
"learning_rate": 3.923909705156768e-06,
|
|
"loss": 1.2148998975753784,
|
|
"step": 2326
|
|
},
|
|
{
|
|
"epoch": 1.8380726698262242,
|
|
"grad_norm": 8.376184643701944,
|
|
"learning_rate": 3.919422195691987e-06,
|
|
"loss": 1.1715630292892456,
|
|
"step": 2327
|
|
},
|
|
{
|
|
"epoch": 1.838862559241706,
|
|
"grad_norm": 9.805258629191533,
|
|
"learning_rate": 3.9149355991028955e-06,
|
|
"loss": 1.2779135704040527,
|
|
"step": 2328
|
|
},
|
|
{
|
|
"epoch": 1.839652448657188,
|
|
"grad_norm": 14.601491970830052,
|
|
"learning_rate": 3.910449919179782e-06,
|
|
"loss": 1.2939941883087158,
|
|
"step": 2329
|
|
},
|
|
{
|
|
"epoch": 1.8404423380726698,
|
|
"grad_norm": 9.430037846938918,
|
|
"learning_rate": 3.905965159712164e-06,
|
|
"loss": 1.3495118618011475,
|
|
"step": 2330
|
|
},
|
|
{
|
|
"epoch": 1.8412322274881516,
|
|
"grad_norm": 10.818117159129248,
|
|
"learning_rate": 3.901481324488781e-06,
|
|
"loss": 1.3297629356384277,
|
|
"step": 2331
|
|
},
|
|
{
|
|
"epoch": 1.8420221169036335,
|
|
"grad_norm": 11.805518524216797,
|
|
"learning_rate": 3.896998417297593e-06,
|
|
"loss": 1.4001942873001099,
|
|
"step": 2332
|
|
},
|
|
{
|
|
"epoch": 1.8428120063191153,
|
|
"grad_norm": 17.217093882822702,
|
|
"learning_rate": 3.892516441925776e-06,
|
|
"loss": 1.0463072061538696,
|
|
"step": 2333
|
|
},
|
|
{
|
|
"epoch": 1.8436018957345972,
|
|
"grad_norm": 10.365237443982673,
|
|
"learning_rate": 3.888035402159713e-06,
|
|
"loss": 0.9372630715370178,
|
|
"step": 2334
|
|
},
|
|
{
|
|
"epoch": 1.844391785150079,
|
|
"grad_norm": 14.0678966475704,
|
|
"learning_rate": 3.883555301785005e-06,
|
|
"loss": 1.43800950050354,
|
|
"step": 2335
|
|
},
|
|
{
|
|
"epoch": 1.845181674565561,
|
|
"grad_norm": 11.354835493412002,
|
|
"learning_rate": 3.879076144586455e-06,
|
|
"loss": 1.584133505821228,
|
|
"step": 2336
|
|
},
|
|
{
|
|
"epoch": 1.8459715639810428,
|
|
"grad_norm": 9.595921862598702,
|
|
"learning_rate": 3.874597934348069e-06,
|
|
"loss": 1.214270830154419,
|
|
"step": 2337
|
|
},
|
|
{
|
|
"epoch": 1.8467614533965246,
|
|
"grad_norm": 16.11156740518648,
|
|
"learning_rate": 3.870120674853053e-06,
|
|
"loss": 1.300584316253662,
|
|
"step": 2338
|
|
},
|
|
{
|
|
"epoch": 1.8475513428120063,
|
|
"grad_norm": 10.555883298615067,
|
|
"learning_rate": 3.865644369883814e-06,
|
|
"loss": 1.2705886363983154,
|
|
"step": 2339
|
|
},
|
|
{
|
|
"epoch": 1.8483412322274881,
|
|
"grad_norm": 13.361585040619424,
|
|
"learning_rate": 3.861169023221944e-06,
|
|
"loss": 1.4234365224838257,
|
|
"step": 2340
|
|
},
|
|
{
|
|
"epoch": 1.84913112164297,
|
|
"grad_norm": 8.97574341942341,
|
|
"learning_rate": 3.856694638648235e-06,
|
|
"loss": 2.0097668170928955,
|
|
"step": 2341
|
|
},
|
|
{
|
|
"epoch": 1.8499210110584519,
|
|
"grad_norm": 19.363394658374865,
|
|
"learning_rate": 3.85222121994266e-06,
|
|
"loss": 2.359208106994629,
|
|
"step": 2342
|
|
},
|
|
{
|
|
"epoch": 1.8507109004739335,
|
|
"grad_norm": 14.425059595509914,
|
|
"learning_rate": 3.847748770884376e-06,
|
|
"loss": 1.24526047706604,
|
|
"step": 2343
|
|
},
|
|
{
|
|
"epoch": 1.8515007898894154,
|
|
"grad_norm": 16.95844235432838,
|
|
"learning_rate": 3.8432772952517285e-06,
|
|
"loss": 1.2295589447021484,
|
|
"step": 2344
|
|
},
|
|
{
|
|
"epoch": 1.8522906793048972,
|
|
"grad_norm": 17.76856930002662,
|
|
"learning_rate": 3.8388067968222285e-06,
|
|
"loss": 1.4502266645431519,
|
|
"step": 2345
|
|
},
|
|
{
|
|
"epoch": 1.853080568720379,
|
|
"grad_norm": 9.344678461709814,
|
|
"learning_rate": 3.834337279372571e-06,
|
|
"loss": 1.0235224962234497,
|
|
"step": 2346
|
|
},
|
|
{
|
|
"epoch": 1.853870458135861,
|
|
"grad_norm": 7.771543247668636,
|
|
"learning_rate": 3.829868746678617e-06,
|
|
"loss": 0.9621250629425049,
|
|
"step": 2347
|
|
},
|
|
{
|
|
"epoch": 1.8546603475513428,
|
|
"grad_norm": 8.997193063535544,
|
|
"learning_rate": 3.825401202515401e-06,
|
|
"loss": 1.0980961322784424,
|
|
"step": 2348
|
|
},
|
|
{
|
|
"epoch": 1.8554502369668247,
|
|
"grad_norm": 11.396355571537933,
|
|
"learning_rate": 3.820934650657116e-06,
|
|
"loss": 1.376495599746704,
|
|
"step": 2349
|
|
},
|
|
{
|
|
"epoch": 1.8562401263823065,
|
|
"grad_norm": 12.015491646099614,
|
|
"learning_rate": 3.816469094877122e-06,
|
|
"loss": 1.1994984149932861,
|
|
"step": 2350
|
|
},
|
|
{
|
|
"epoch": 1.8570300157977884,
|
|
"grad_norm": 13.018195454364,
|
|
"learning_rate": 3.812004538947933e-06,
|
|
"loss": 1.5184617042541504,
|
|
"step": 2351
|
|
},
|
|
{
|
|
"epoch": 1.8578199052132702,
|
|
"grad_norm": 16.613834476479624,
|
|
"learning_rate": 3.807540986641221e-06,
|
|
"loss": 1.3802169561386108,
|
|
"step": 2352
|
|
},
|
|
{
|
|
"epoch": 1.858609794628752,
|
|
"grad_norm": 8.55272399415255,
|
|
"learning_rate": 3.803078441727811e-06,
|
|
"loss": 2.0620877742767334,
|
|
"step": 2353
|
|
},
|
|
{
|
|
"epoch": 1.859399684044234,
|
|
"grad_norm": 16.517593501003834,
|
|
"learning_rate": 3.7986169079776738e-06,
|
|
"loss": 1.021907925605774,
|
|
"step": 2354
|
|
},
|
|
{
|
|
"epoch": 1.8601895734597158,
|
|
"grad_norm": 12.612088978458775,
|
|
"learning_rate": 3.794156389159932e-06,
|
|
"loss": 1.9003463983535767,
|
|
"step": 2355
|
|
},
|
|
{
|
|
"epoch": 1.8609794628751974,
|
|
"grad_norm": 9.508374505836391,
|
|
"learning_rate": 3.78969688904284e-06,
|
|
"loss": 1.6055456399917603,
|
|
"step": 2356
|
|
},
|
|
{
|
|
"epoch": 1.8617693522906793,
|
|
"grad_norm": 13.25180074627195,
|
|
"learning_rate": 3.7852384113938033e-06,
|
|
"loss": 1.5654323101043701,
|
|
"step": 2357
|
|
},
|
|
{
|
|
"epoch": 1.8625592417061612,
|
|
"grad_norm": 7.960467731299903,
|
|
"learning_rate": 3.7807809599793564e-06,
|
|
"loss": 1.5566787719726562,
|
|
"step": 2358
|
|
},
|
|
{
|
|
"epoch": 1.863349131121643,
|
|
"grad_norm": 10.271738225651552,
|
|
"learning_rate": 3.7763245385651688e-06,
|
|
"loss": 0.9510334134101868,
|
|
"step": 2359
|
|
},
|
|
{
|
|
"epoch": 1.8641390205371247,
|
|
"grad_norm": 10.69224989126236,
|
|
"learning_rate": 3.771869150916041e-06,
|
|
"loss": 1.842404842376709,
|
|
"step": 2360
|
|
},
|
|
{
|
|
"epoch": 1.8649289099526065,
|
|
"grad_norm": 10.785184946501737,
|
|
"learning_rate": 3.7674148007959e-06,
|
|
"loss": 1.2416088581085205,
|
|
"step": 2361
|
|
},
|
|
{
|
|
"epoch": 1.8657187993680884,
|
|
"grad_norm": 8.752263843413806,
|
|
"learning_rate": 3.762961491967793e-06,
|
|
"loss": 2.3513264656066895,
|
|
"step": 2362
|
|
},
|
|
{
|
|
"epoch": 1.8665086887835702,
|
|
"grad_norm": 11.250525502071884,
|
|
"learning_rate": 3.7585092281938906e-06,
|
|
"loss": 1.8920762538909912,
|
|
"step": 2363
|
|
},
|
|
{
|
|
"epoch": 1.867298578199052,
|
|
"grad_norm": 8.990065479551909,
|
|
"learning_rate": 3.754058013235481e-06,
|
|
"loss": 1.415196180343628,
|
|
"step": 2364
|
|
},
|
|
{
|
|
"epoch": 1.868088467614534,
|
|
"grad_norm": 8.516130774368182,
|
|
"learning_rate": 3.7496078508529655e-06,
|
|
"loss": 1.542191743850708,
|
|
"step": 2365
|
|
},
|
|
{
|
|
"epoch": 1.8688783570300158,
|
|
"grad_norm": 9.072473337050232,
|
|
"learning_rate": 3.7451587448058553e-06,
|
|
"loss": 1.6856296062469482,
|
|
"step": 2366
|
|
},
|
|
{
|
|
"epoch": 1.8696682464454977,
|
|
"grad_norm": 12.708061491523079,
|
|
"learning_rate": 3.7407106988527687e-06,
|
|
"loss": 1.783271074295044,
|
|
"step": 2367
|
|
},
|
|
{
|
|
"epoch": 1.8704581358609795,
|
|
"grad_norm": 10.769206669854055,
|
|
"learning_rate": 3.7362637167514294e-06,
|
|
"loss": 1.5678787231445312,
|
|
"step": 2368
|
|
},
|
|
{
|
|
"epoch": 1.8712480252764614,
|
|
"grad_norm": 20.106127018365843,
|
|
"learning_rate": 3.731817802258665e-06,
|
|
"loss": 1.4182826280593872,
|
|
"step": 2369
|
|
},
|
|
{
|
|
"epoch": 1.8720379146919433,
|
|
"grad_norm": 14.613741216644682,
|
|
"learning_rate": 3.7273729591303977e-06,
|
|
"loss": 0.9464998245239258,
|
|
"step": 2370
|
|
},
|
|
{
|
|
"epoch": 1.872827804107425,
|
|
"grad_norm": 14.57353180320642,
|
|
"learning_rate": 3.7229291911216438e-06,
|
|
"loss": 1.2734510898590088,
|
|
"step": 2371
|
|
},
|
|
{
|
|
"epoch": 1.8736176935229067,
|
|
"grad_norm": 18.57604525014746,
|
|
"learning_rate": 3.718486501986517e-06,
|
|
"loss": 1.3280824422836304,
|
|
"step": 2372
|
|
},
|
|
{
|
|
"epoch": 1.8744075829383886,
|
|
"grad_norm": 10.655701944239475,
|
|
"learning_rate": 3.7140448954782118e-06,
|
|
"loss": 2.1533942222595215,
|
|
"step": 2373
|
|
},
|
|
{
|
|
"epoch": 1.8751974723538705,
|
|
"grad_norm": 10.436689103007193,
|
|
"learning_rate": 3.7096043753490135e-06,
|
|
"loss": 3.3665125370025635,
|
|
"step": 2374
|
|
},
|
|
{
|
|
"epoch": 1.8759873617693523,
|
|
"grad_norm": 11.030169703908209,
|
|
"learning_rate": 3.7051649453502874e-06,
|
|
"loss": 2.9506380558013916,
|
|
"step": 2375
|
|
},
|
|
{
|
|
"epoch": 1.876777251184834,
|
|
"grad_norm": 14.013016012791953,
|
|
"learning_rate": 3.7007266092324807e-06,
|
|
"loss": 1.3356175422668457,
|
|
"step": 2376
|
|
},
|
|
{
|
|
"epoch": 1.8775671406003158,
|
|
"grad_norm": 17.981966299258165,
|
|
"learning_rate": 3.6962893707451136e-06,
|
|
"loss": 0.6594985723495483,
|
|
"step": 2377
|
|
},
|
|
{
|
|
"epoch": 1.8783570300157977,
|
|
"grad_norm": 13.564577074220647,
|
|
"learning_rate": 3.6918532336367784e-06,
|
|
"loss": 2.0847041606903076,
|
|
"step": 2378
|
|
},
|
|
{
|
|
"epoch": 1.8791469194312795,
|
|
"grad_norm": 15.468644474324055,
|
|
"learning_rate": 3.6874182016551384e-06,
|
|
"loss": 1.3281214237213135,
|
|
"step": 2379
|
|
},
|
|
{
|
|
"epoch": 1.8799368088467614,
|
|
"grad_norm": 18.198119013305984,
|
|
"learning_rate": 3.6829842785469237e-06,
|
|
"loss": 2.035587787628174,
|
|
"step": 2380
|
|
},
|
|
{
|
|
"epoch": 1.8807266982622433,
|
|
"grad_norm": 7.022992331825169,
|
|
"learning_rate": 3.678551468057928e-06,
|
|
"loss": 1.31044602394104,
|
|
"step": 2381
|
|
},
|
|
{
|
|
"epoch": 1.8815165876777251,
|
|
"grad_norm": 11.785392947710033,
|
|
"learning_rate": 3.6741197739330036e-06,
|
|
"loss": 1.1300781965255737,
|
|
"step": 2382
|
|
},
|
|
{
|
|
"epoch": 1.882306477093207,
|
|
"grad_norm": 13.706145484472717,
|
|
"learning_rate": 3.6696891999160624e-06,
|
|
"loss": 2.2334394454956055,
|
|
"step": 2383
|
|
},
|
|
{
|
|
"epoch": 1.8830963665086888,
|
|
"grad_norm": 9.781655657096698,
|
|
"learning_rate": 3.6652597497500632e-06,
|
|
"loss": 1.2571077346801758,
|
|
"step": 2384
|
|
},
|
|
{
|
|
"epoch": 1.8838862559241707,
|
|
"grad_norm": 16.34961943071829,
|
|
"learning_rate": 3.660831427177023e-06,
|
|
"loss": 1.1918299198150635,
|
|
"step": 2385
|
|
},
|
|
{
|
|
"epoch": 1.8846761453396526,
|
|
"grad_norm": 9.661383436716706,
|
|
"learning_rate": 3.6564042359380035e-06,
|
|
"loss": 1.571300983428955,
|
|
"step": 2386
|
|
},
|
|
{
|
|
"epoch": 1.8854660347551344,
|
|
"grad_norm": 9.08149537153619,
|
|
"learning_rate": 3.651978179773109e-06,
|
|
"loss": 1.381611943244934,
|
|
"step": 2387
|
|
},
|
|
{
|
|
"epoch": 1.8862559241706163,
|
|
"grad_norm": 8.947483706023426,
|
|
"learning_rate": 3.647553262421489e-06,
|
|
"loss": 1.569331169128418,
|
|
"step": 2388
|
|
},
|
|
{
|
|
"epoch": 1.887045813586098,
|
|
"grad_norm": 15.643579814373975,
|
|
"learning_rate": 3.6431294876213256e-06,
|
|
"loss": 1.7174663543701172,
|
|
"step": 2389
|
|
},
|
|
{
|
|
"epoch": 1.8878357030015798,
|
|
"grad_norm": 11.74224852213168,
|
|
"learning_rate": 3.6387068591098394e-06,
|
|
"loss": 1.6020495891571045,
|
|
"step": 2390
|
|
},
|
|
{
|
|
"epoch": 1.8886255924170616,
|
|
"grad_norm": 17.71812535453601,
|
|
"learning_rate": 3.6342853806232803e-06,
|
|
"loss": 2.5341310501098633,
|
|
"step": 2391
|
|
},
|
|
{
|
|
"epoch": 1.8894154818325435,
|
|
"grad_norm": 19.738562508307506,
|
|
"learning_rate": 3.6298650558969297e-06,
|
|
"loss": 1.986846685409546,
|
|
"step": 2392
|
|
},
|
|
{
|
|
"epoch": 1.8902053712480251,
|
|
"grad_norm": 11.658929421377039,
|
|
"learning_rate": 3.6254458886650916e-06,
|
|
"loss": 1.847143530845642,
|
|
"step": 2393
|
|
},
|
|
{
|
|
"epoch": 1.890995260663507,
|
|
"grad_norm": 12.222377083494536,
|
|
"learning_rate": 3.6210278826610947e-06,
|
|
"loss": 1.7250714302062988,
|
|
"step": 2394
|
|
},
|
|
{
|
|
"epoch": 1.8917851500789888,
|
|
"grad_norm": 23.353812919786044,
|
|
"learning_rate": 3.6166110416172824e-06,
|
|
"loss": 1.1605632305145264,
|
|
"step": 2395
|
|
},
|
|
{
|
|
"epoch": 1.8925750394944707,
|
|
"grad_norm": 13.967775786929394,
|
|
"learning_rate": 3.612195369265016e-06,
|
|
"loss": 1.477918028831482,
|
|
"step": 2396
|
|
},
|
|
{
|
|
"epoch": 1.8933649289099526,
|
|
"grad_norm": 7.824641265572897,
|
|
"learning_rate": 3.607780869334673e-06,
|
|
"loss": 0.8736203908920288,
|
|
"step": 2397
|
|
},
|
|
{
|
|
"epoch": 1.8941548183254344,
|
|
"grad_norm": 15.896684803906586,
|
|
"learning_rate": 3.6033675455556362e-06,
|
|
"loss": 1.7004368305206299,
|
|
"step": 2398
|
|
},
|
|
{
|
|
"epoch": 1.8949447077409163,
|
|
"grad_norm": 12.602951334291856,
|
|
"learning_rate": 3.5989554016562973e-06,
|
|
"loss": 1.6466461420059204,
|
|
"step": 2399
|
|
},
|
|
{
|
|
"epoch": 1.8957345971563981,
|
|
"grad_norm": 23.183480711381492,
|
|
"learning_rate": 3.594544441364046e-06,
|
|
"loss": 2.251540184020996,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 1.89652448657188,
|
|
"grad_norm": 10.791019706405871,
|
|
"learning_rate": 3.590134668405281e-06,
|
|
"loss": 2.160658121109009,
|
|
"step": 2401
|
|
},
|
|
{
|
|
"epoch": 1.8973143759873619,
|
|
"grad_norm": 20.769430115923996,
|
|
"learning_rate": 3.5857260865053888e-06,
|
|
"loss": 2.002129077911377,
|
|
"step": 2402
|
|
},
|
|
{
|
|
"epoch": 1.8981042654028437,
|
|
"grad_norm": 11.654913849622542,
|
|
"learning_rate": 3.5813186993887564e-06,
|
|
"loss": 1.1885507106781006,
|
|
"step": 2403
|
|
},
|
|
{
|
|
"epoch": 1.8988941548183256,
|
|
"grad_norm": 11.018801992801427,
|
|
"learning_rate": 3.5769125107787615e-06,
|
|
"loss": 1.4551243782043457,
|
|
"step": 2404
|
|
},
|
|
{
|
|
"epoch": 1.8996840442338072,
|
|
"grad_norm": 13.81180119203942,
|
|
"learning_rate": 3.57250752439776e-06,
|
|
"loss": 1.8174326419830322,
|
|
"step": 2405
|
|
},
|
|
{
|
|
"epoch": 1.900473933649289,
|
|
"grad_norm": 8.538596914130633,
|
|
"learning_rate": 3.568103743967104e-06,
|
|
"loss": 2.0413808822631836,
|
|
"step": 2406
|
|
},
|
|
{
|
|
"epoch": 1.901263823064771,
|
|
"grad_norm": 13.208971367362476,
|
|
"learning_rate": 3.56370117320712e-06,
|
|
"loss": 1.130143642425537,
|
|
"step": 2407
|
|
},
|
|
{
|
|
"epoch": 1.9020537124802528,
|
|
"grad_norm": 20.133975799192914,
|
|
"learning_rate": 3.559299815837114e-06,
|
|
"loss": 2.2564547061920166,
|
|
"step": 2408
|
|
},
|
|
{
|
|
"epoch": 1.9028436018957346,
|
|
"grad_norm": 8.040080834857976,
|
|
"learning_rate": 3.5548996755753686e-06,
|
|
"loss": 1.9948700666427612,
|
|
"step": 2409
|
|
},
|
|
{
|
|
"epoch": 1.9036334913112163,
|
|
"grad_norm": 10.107125326201913,
|
|
"learning_rate": 3.550500756139137e-06,
|
|
"loss": 1.2073781490325928,
|
|
"step": 2410
|
|
},
|
|
{
|
|
"epoch": 1.9044233807266981,
|
|
"grad_norm": 12.074035310627842,
|
|
"learning_rate": 3.546103061244639e-06,
|
|
"loss": 1.4633095264434814,
|
|
"step": 2411
|
|
},
|
|
{
|
|
"epoch": 1.90521327014218,
|
|
"grad_norm": 8.748368688532468,
|
|
"learning_rate": 3.5417065946070616e-06,
|
|
"loss": 1.4983105659484863,
|
|
"step": 2412
|
|
},
|
|
{
|
|
"epoch": 1.9060031595576619,
|
|
"grad_norm": 8.119449198425269,
|
|
"learning_rate": 3.5373113599405558e-06,
|
|
"loss": 1.4577915668487549,
|
|
"step": 2413
|
|
},
|
|
{
|
|
"epoch": 1.9067930489731437,
|
|
"grad_norm": 8.602605817692487,
|
|
"learning_rate": 3.53291736095823e-06,
|
|
"loss": 1.7091364860534668,
|
|
"step": 2414
|
|
},
|
|
{
|
|
"epoch": 1.9075829383886256,
|
|
"grad_norm": 9.57318217061906,
|
|
"learning_rate": 3.52852460137215e-06,
|
|
"loss": 0.9629848003387451,
|
|
"step": 2415
|
|
},
|
|
{
|
|
"epoch": 1.9083728278041074,
|
|
"grad_norm": 12.401875854076282,
|
|
"learning_rate": 3.5241330848933297e-06,
|
|
"loss": 1.1933588981628418,
|
|
"step": 2416
|
|
},
|
|
{
|
|
"epoch": 1.9091627172195893,
|
|
"grad_norm": 19.330318737313394,
|
|
"learning_rate": 3.5197428152317405e-06,
|
|
"loss": 1.4718396663665771,
|
|
"step": 2417
|
|
},
|
|
{
|
|
"epoch": 1.9099526066350712,
|
|
"grad_norm": 10.724633948475557,
|
|
"learning_rate": 3.5153537960962953e-06,
|
|
"loss": 1.8858482837677002,
|
|
"step": 2418
|
|
},
|
|
{
|
|
"epoch": 1.910742496050553,
|
|
"grad_norm": 9.259587522429628,
|
|
"learning_rate": 3.510966031194851e-06,
|
|
"loss": 1.550106167793274,
|
|
"step": 2419
|
|
},
|
|
{
|
|
"epoch": 1.9115323854660349,
|
|
"grad_norm": 10.146358708655796,
|
|
"learning_rate": 3.5065795242342083e-06,
|
|
"loss": 1.5809369087219238,
|
|
"step": 2420
|
|
},
|
|
{
|
|
"epoch": 1.9123222748815167,
|
|
"grad_norm": 13.004346105846725,
|
|
"learning_rate": 3.5021942789201026e-06,
|
|
"loss": 1.3224411010742188,
|
|
"step": 2421
|
|
},
|
|
{
|
|
"epoch": 1.9131121642969984,
|
|
"grad_norm": 8.080997608907936,
|
|
"learning_rate": 3.4978102989572007e-06,
|
|
"loss": 1.6125473976135254,
|
|
"step": 2422
|
|
},
|
|
{
|
|
"epoch": 1.9139020537124802,
|
|
"grad_norm": 12.741289791550335,
|
|
"learning_rate": 3.4934275880491055e-06,
|
|
"loss": 1.4377524852752686,
|
|
"step": 2423
|
|
},
|
|
{
|
|
"epoch": 1.914691943127962,
|
|
"grad_norm": 7.712596175637102,
|
|
"learning_rate": 3.4890461498983436e-06,
|
|
"loss": 1.7991526126861572,
|
|
"step": 2424
|
|
},
|
|
{
|
|
"epoch": 1.915481832543444,
|
|
"grad_norm": 9.444274006754885,
|
|
"learning_rate": 3.4846659882063704e-06,
|
|
"loss": 1.237257957458496,
|
|
"step": 2425
|
|
},
|
|
{
|
|
"epoch": 1.9162717219589256,
|
|
"grad_norm": 12.543456884340403,
|
|
"learning_rate": 3.4802871066735623e-06,
|
|
"loss": 1.9164607524871826,
|
|
"step": 2426
|
|
},
|
|
{
|
|
"epoch": 1.9170616113744074,
|
|
"grad_norm": 13.74775394370082,
|
|
"learning_rate": 3.4759095089992094e-06,
|
|
"loss": 1.111218810081482,
|
|
"step": 2427
|
|
},
|
|
{
|
|
"epoch": 1.9178515007898893,
|
|
"grad_norm": 13.934197139537574,
|
|
"learning_rate": 3.4715331988815217e-06,
|
|
"loss": 1.1446185111999512,
|
|
"step": 2428
|
|
},
|
|
{
|
|
"epoch": 1.9186413902053712,
|
|
"grad_norm": 8.58653764163054,
|
|
"learning_rate": 3.4671581800176208e-06,
|
|
"loss": 1.4410523176193237,
|
|
"step": 2429
|
|
},
|
|
{
|
|
"epoch": 1.919431279620853,
|
|
"grad_norm": 13.847657929892328,
|
|
"learning_rate": 3.462784456103537e-06,
|
|
"loss": 1.9751472473144531,
|
|
"step": 2430
|
|
},
|
|
{
|
|
"epoch": 1.9202211690363349,
|
|
"grad_norm": 11.542004822189035,
|
|
"learning_rate": 3.4584120308342068e-06,
|
|
"loss": 0.9431929588317871,
|
|
"step": 2431
|
|
},
|
|
{
|
|
"epoch": 1.9210110584518167,
|
|
"grad_norm": 12.290366977682353,
|
|
"learning_rate": 3.454040907903472e-06,
|
|
"loss": 1.09321129322052,
|
|
"step": 2432
|
|
},
|
|
{
|
|
"epoch": 1.9218009478672986,
|
|
"grad_norm": 10.957843707967365,
|
|
"learning_rate": 3.4496710910040654e-06,
|
|
"loss": 2.014118194580078,
|
|
"step": 2433
|
|
},
|
|
{
|
|
"epoch": 1.9225908372827805,
|
|
"grad_norm": 30.636247581974626,
|
|
"learning_rate": 3.4453025838276283e-06,
|
|
"loss": 1.576662302017212,
|
|
"step": 2434
|
|
},
|
|
{
|
|
"epoch": 1.9233807266982623,
|
|
"grad_norm": 15.05397997539076,
|
|
"learning_rate": 3.4409353900646873e-06,
|
|
"loss": 1.2868478298187256,
|
|
"step": 2435
|
|
},
|
|
{
|
|
"epoch": 1.9241706161137442,
|
|
"grad_norm": 12.897559358848328,
|
|
"learning_rate": 3.4365695134046616e-06,
|
|
"loss": 1.1865384578704834,
|
|
"step": 2436
|
|
},
|
|
{
|
|
"epoch": 1.924960505529226,
|
|
"grad_norm": 15.253959007398343,
|
|
"learning_rate": 3.432204957535862e-06,
|
|
"loss": 1.626413106918335,
|
|
"step": 2437
|
|
},
|
|
{
|
|
"epoch": 1.925750394944708,
|
|
"grad_norm": 8.00956704186196,
|
|
"learning_rate": 3.4278417261454754e-06,
|
|
"loss": 1.630741834640503,
|
|
"step": 2438
|
|
},
|
|
{
|
|
"epoch": 1.9265402843601895,
|
|
"grad_norm": 17.812247659435748,
|
|
"learning_rate": 3.4234798229195764e-06,
|
|
"loss": 1.313347578048706,
|
|
"step": 2439
|
|
},
|
|
{
|
|
"epoch": 1.9273301737756714,
|
|
"grad_norm": 9.723288589416475,
|
|
"learning_rate": 3.4191192515431125e-06,
|
|
"loss": 1.5095974206924438,
|
|
"step": 2440
|
|
},
|
|
{
|
|
"epoch": 1.9281200631911533,
|
|
"grad_norm": 19.0000198211733,
|
|
"learning_rate": 3.414760015699913e-06,
|
|
"loss": 1.7037584781646729,
|
|
"step": 2441
|
|
},
|
|
{
|
|
"epoch": 1.9289099526066351,
|
|
"grad_norm": 7.4858175557613436,
|
|
"learning_rate": 3.410402119072671e-06,
|
|
"loss": 2.0781524181365967,
|
|
"step": 2442
|
|
},
|
|
{
|
|
"epoch": 1.9296998420221168,
|
|
"grad_norm": 9.98301676651302,
|
|
"learning_rate": 3.406045565342955e-06,
|
|
"loss": 1.491240382194519,
|
|
"step": 2443
|
|
},
|
|
{
|
|
"epoch": 1.9304897314375986,
|
|
"grad_norm": 10.15893764761394,
|
|
"learning_rate": 3.401690358191192e-06,
|
|
"loss": 1.3460967540740967,
|
|
"step": 2444
|
|
},
|
|
{
|
|
"epoch": 1.9312796208530805,
|
|
"grad_norm": 12.890492449150006,
|
|
"learning_rate": 3.397336501296679e-06,
|
|
"loss": 0.8602248430252075,
|
|
"step": 2445
|
|
},
|
|
{
|
|
"epoch": 1.9320695102685623,
|
|
"grad_norm": 12.533401638864804,
|
|
"learning_rate": 3.392983998337567e-06,
|
|
"loss": 0.8721238374710083,
|
|
"step": 2446
|
|
},
|
|
{
|
|
"epoch": 1.9328593996840442,
|
|
"grad_norm": 20.30153738152959,
|
|
"learning_rate": 3.388632852990864e-06,
|
|
"loss": 1.3083229064941406,
|
|
"step": 2447
|
|
},
|
|
{
|
|
"epoch": 1.933649289099526,
|
|
"grad_norm": 9.12776969619587,
|
|
"learning_rate": 3.3842830689324367e-06,
|
|
"loss": 1.5735386610031128,
|
|
"step": 2448
|
|
},
|
|
{
|
|
"epoch": 1.934439178515008,
|
|
"grad_norm": 20.700954625914637,
|
|
"learning_rate": 3.3799346498369895e-06,
|
|
"loss": 1.5870678424835205,
|
|
"step": 2449
|
|
},
|
|
{
|
|
"epoch": 1.9352290679304898,
|
|
"grad_norm": 9.831445546192326,
|
|
"learning_rate": 3.3755875993780862e-06,
|
|
"loss": 1.48130464553833,
|
|
"step": 2450
|
|
},
|
|
{
|
|
"epoch": 1.9360189573459716,
|
|
"grad_norm": 24.574710960838644,
|
|
"learning_rate": 3.3712419212281284e-06,
|
|
"loss": 1.7219302654266357,
|
|
"step": 2451
|
|
},
|
|
{
|
|
"epoch": 1.9368088467614535,
|
|
"grad_norm": 14.89660871560069,
|
|
"learning_rate": 3.366897619058358e-06,
|
|
"loss": 0.9834906458854675,
|
|
"step": 2452
|
|
},
|
|
{
|
|
"epoch": 1.9375987361769353,
|
|
"grad_norm": 10.712068685842123,
|
|
"learning_rate": 3.362554696538857e-06,
|
|
"loss": 2.1397347450256348,
|
|
"step": 2453
|
|
},
|
|
{
|
|
"epoch": 1.9383886255924172,
|
|
"grad_norm": 7.7667540070752485,
|
|
"learning_rate": 3.358213157338542e-06,
|
|
"loss": 1.1784471273422241,
|
|
"step": 2454
|
|
},
|
|
{
|
|
"epoch": 1.9391785150078988,
|
|
"grad_norm": 9.841175283941777,
|
|
"learning_rate": 3.3538730051251576e-06,
|
|
"loss": 1.503877878189087,
|
|
"step": 2455
|
|
},
|
|
{
|
|
"epoch": 1.9399684044233807,
|
|
"grad_norm": 12.542202389896184,
|
|
"learning_rate": 3.3495342435652777e-06,
|
|
"loss": 1.7189602851867676,
|
|
"step": 2456
|
|
},
|
|
{
|
|
"epoch": 1.9407582938388626,
|
|
"grad_norm": 11.391348598231419,
|
|
"learning_rate": 3.3451968763243046e-06,
|
|
"loss": 1.686078667640686,
|
|
"step": 2457
|
|
},
|
|
{
|
|
"epoch": 1.9415481832543444,
|
|
"grad_norm": 11.575710499478722,
|
|
"learning_rate": 3.34086090706646e-06,
|
|
"loss": 1.031868577003479,
|
|
"step": 2458
|
|
},
|
|
{
|
|
"epoch": 1.9423380726698263,
|
|
"grad_norm": 8.78322861407791,
|
|
"learning_rate": 3.3365263394547852e-06,
|
|
"loss": 1.1780157089233398,
|
|
"step": 2459
|
|
},
|
|
{
|
|
"epoch": 1.943127962085308,
|
|
"grad_norm": 12.608494943318862,
|
|
"learning_rate": 3.3321931771511363e-06,
|
|
"loss": 1.1709704399108887,
|
|
"step": 2460
|
|
},
|
|
{
|
|
"epoch": 1.9439178515007898,
|
|
"grad_norm": 12.683639129888297,
|
|
"learning_rate": 3.3278614238161818e-06,
|
|
"loss": 0.8888792991638184,
|
|
"step": 2461
|
|
},
|
|
{
|
|
"epoch": 1.9447077409162716,
|
|
"grad_norm": 11.443846674864774,
|
|
"learning_rate": 3.323531083109404e-06,
|
|
"loss": 1.3447215557098389,
|
|
"step": 2462
|
|
},
|
|
{
|
|
"epoch": 1.9454976303317535,
|
|
"grad_norm": 11.117047622365464,
|
|
"learning_rate": 3.3192021586890866e-06,
|
|
"loss": 1.329740047454834,
|
|
"step": 2463
|
|
},
|
|
{
|
|
"epoch": 1.9462875197472354,
|
|
"grad_norm": 14.02605723542626,
|
|
"learning_rate": 3.314874654212321e-06,
|
|
"loss": 1.4177271127700806,
|
|
"step": 2464
|
|
},
|
|
{
|
|
"epoch": 1.9470774091627172,
|
|
"grad_norm": 10.663308506333905,
|
|
"learning_rate": 3.3105485733349984e-06,
|
|
"loss": 1.0513715744018555,
|
|
"step": 2465
|
|
},
|
|
{
|
|
"epoch": 1.947867298578199,
|
|
"grad_norm": 12.042995971816161,
|
|
"learning_rate": 3.3062239197118027e-06,
|
|
"loss": 2.0589680671691895,
|
|
"step": 2466
|
|
},
|
|
{
|
|
"epoch": 1.948657187993681,
|
|
"grad_norm": 13.784435713074545,
|
|
"learning_rate": 3.301900696996218e-06,
|
|
"loss": 2.251110553741455,
|
|
"step": 2467
|
|
},
|
|
{
|
|
"epoch": 1.9494470774091628,
|
|
"grad_norm": 13.710270558589283,
|
|
"learning_rate": 3.297578908840515e-06,
|
|
"loss": 1.2197270393371582,
|
|
"step": 2468
|
|
},
|
|
{
|
|
"epoch": 1.9502369668246446,
|
|
"grad_norm": 10.246163702089847,
|
|
"learning_rate": 3.2932585588957565e-06,
|
|
"loss": 1.1383863687515259,
|
|
"step": 2469
|
|
},
|
|
{
|
|
"epoch": 1.9510268562401265,
|
|
"grad_norm": 12.115693006024177,
|
|
"learning_rate": 3.288939650811789e-06,
|
|
"loss": 1.6648939847946167,
|
|
"step": 2470
|
|
},
|
|
{
|
|
"epoch": 1.9518167456556084,
|
|
"grad_norm": 10.775832953436032,
|
|
"learning_rate": 3.2846221882372386e-06,
|
|
"loss": 1.417509913444519,
|
|
"step": 2471
|
|
},
|
|
{
|
|
"epoch": 1.95260663507109,
|
|
"grad_norm": 13.57822764736623,
|
|
"learning_rate": 3.280306174819511e-06,
|
|
"loss": 1.1139479875564575,
|
|
"step": 2472
|
|
},
|
|
{
|
|
"epoch": 1.9533965244865719,
|
|
"grad_norm": 14.390531948777902,
|
|
"learning_rate": 3.27599161420479e-06,
|
|
"loss": 1.3739941120147705,
|
|
"step": 2473
|
|
},
|
|
{
|
|
"epoch": 1.9541864139020537,
|
|
"grad_norm": 11.875606554603966,
|
|
"learning_rate": 3.271678510038031e-06,
|
|
"loss": 1.250982642173767,
|
|
"step": 2474
|
|
},
|
|
{
|
|
"epoch": 1.9549763033175356,
|
|
"grad_norm": 10.755099863929798,
|
|
"learning_rate": 3.2673668659629594e-06,
|
|
"loss": 1.8845748901367188,
|
|
"step": 2475
|
|
},
|
|
{
|
|
"epoch": 1.9557661927330172,
|
|
"grad_norm": 12.930480165235073,
|
|
"learning_rate": 3.2630566856220636e-06,
|
|
"loss": 1.489890456199646,
|
|
"step": 2476
|
|
},
|
|
{
|
|
"epoch": 1.956556082148499,
|
|
"grad_norm": 14.065755846735259,
|
|
"learning_rate": 3.2587479726565985e-06,
|
|
"loss": 1.648732304573059,
|
|
"step": 2477
|
|
},
|
|
{
|
|
"epoch": 1.957345971563981,
|
|
"grad_norm": 12.648739189286825,
|
|
"learning_rate": 3.2544407307065808e-06,
|
|
"loss": 1.7760826349258423,
|
|
"step": 2478
|
|
},
|
|
{
|
|
"epoch": 1.9581358609794628,
|
|
"grad_norm": 11.427887541540612,
|
|
"learning_rate": 3.2501349634107825e-06,
|
|
"loss": 0.8107354640960693,
|
|
"step": 2479
|
|
},
|
|
{
|
|
"epoch": 1.9589257503949447,
|
|
"grad_norm": 16.28308318755435,
|
|
"learning_rate": 3.245830674406728e-06,
|
|
"loss": 1.8128418922424316,
|
|
"step": 2480
|
|
},
|
|
{
|
|
"epoch": 1.9597156398104265,
|
|
"grad_norm": 13.404249494910019,
|
|
"learning_rate": 3.2415278673306984e-06,
|
|
"loss": 1.3533351421356201,
|
|
"step": 2481
|
|
},
|
|
{
|
|
"epoch": 1.9605055292259084,
|
|
"grad_norm": 26.577557541363184,
|
|
"learning_rate": 3.237226545817716e-06,
|
|
"loss": 1.6566579341888428,
|
|
"step": 2482
|
|
},
|
|
{
|
|
"epoch": 1.9612954186413902,
|
|
"grad_norm": 10.919680561043819,
|
|
"learning_rate": 3.2329267135015526e-06,
|
|
"loss": 1.7733817100524902,
|
|
"step": 2483
|
|
},
|
|
{
|
|
"epoch": 1.962085308056872,
|
|
"grad_norm": 22.667838737436107,
|
|
"learning_rate": 3.2286283740147194e-06,
|
|
"loss": 1.8271636962890625,
|
|
"step": 2484
|
|
},
|
|
{
|
|
"epoch": 1.962875197472354,
|
|
"grad_norm": 10.40163155926731,
|
|
"learning_rate": 3.2243315309884697e-06,
|
|
"loss": 1.3933049440383911,
|
|
"step": 2485
|
|
},
|
|
{
|
|
"epoch": 1.9636650868878358,
|
|
"grad_norm": 9.862448912563854,
|
|
"learning_rate": 3.2200361880527914e-06,
|
|
"loss": 2.3680734634399414,
|
|
"step": 2486
|
|
},
|
|
{
|
|
"epoch": 1.9644549763033177,
|
|
"grad_norm": 11.811513863223011,
|
|
"learning_rate": 3.2157423488364013e-06,
|
|
"loss": 1.6310195922851562,
|
|
"step": 2487
|
|
},
|
|
{
|
|
"epoch": 1.9652448657187995,
|
|
"grad_norm": 8.26150488888275,
|
|
"learning_rate": 3.2114500169667513e-06,
|
|
"loss": 1.2277730703353882,
|
|
"step": 2488
|
|
},
|
|
{
|
|
"epoch": 1.9660347551342812,
|
|
"grad_norm": 17.701718441376492,
|
|
"learning_rate": 3.207159196070015e-06,
|
|
"loss": 1.0331852436065674,
|
|
"step": 2489
|
|
},
|
|
{
|
|
"epoch": 1.966824644549763,
|
|
"grad_norm": 12.041291133088178,
|
|
"learning_rate": 3.2028698897710945e-06,
|
|
"loss": 1.8590238094329834,
|
|
"step": 2490
|
|
},
|
|
{
|
|
"epoch": 1.9676145339652449,
|
|
"grad_norm": 10.507710338817834,
|
|
"learning_rate": 3.198582101693608e-06,
|
|
"loss": 1.9933216571807861,
|
|
"step": 2491
|
|
},
|
|
{
|
|
"epoch": 1.9684044233807267,
|
|
"grad_norm": 8.15881705828358,
|
|
"learning_rate": 3.194295835459898e-06,
|
|
"loss": 1.6253411769866943,
|
|
"step": 2492
|
|
},
|
|
{
|
|
"epoch": 1.9691943127962084,
|
|
"grad_norm": 10.38474355259108,
|
|
"learning_rate": 3.1900110946910084e-06,
|
|
"loss": 1.3114337921142578,
|
|
"step": 2493
|
|
},
|
|
{
|
|
"epoch": 1.9699842022116902,
|
|
"grad_norm": 9.949738129316227,
|
|
"learning_rate": 3.1857278830067075e-06,
|
|
"loss": 1.256972074508667,
|
|
"step": 2494
|
|
},
|
|
{
|
|
"epoch": 1.970774091627172,
|
|
"grad_norm": 16.162021019463328,
|
|
"learning_rate": 3.1814462040254657e-06,
|
|
"loss": 1.8827450275421143,
|
|
"step": 2495
|
|
},
|
|
{
|
|
"epoch": 1.971563981042654,
|
|
"grad_norm": 22.149860191874843,
|
|
"learning_rate": 3.1771660613644582e-06,
|
|
"loss": 0.9394640922546387,
|
|
"step": 2496
|
|
},
|
|
{
|
|
"epoch": 1.9723538704581358,
|
|
"grad_norm": 8.539480886296017,
|
|
"learning_rate": 3.1728874586395677e-06,
|
|
"loss": 1.0433759689331055,
|
|
"step": 2497
|
|
},
|
|
{
|
|
"epoch": 1.9731437598736177,
|
|
"grad_norm": 11.67531560801483,
|
|
"learning_rate": 3.168610399465365e-06,
|
|
"loss": 0.7940645813941956,
|
|
"step": 2498
|
|
},
|
|
{
|
|
"epoch": 1.9739336492890995,
|
|
"grad_norm": 12.4425838328368,
|
|
"learning_rate": 3.1643348874551294e-06,
|
|
"loss": 1.4504199028015137,
|
|
"step": 2499
|
|
},
|
|
{
|
|
"epoch": 1.9747235387045814,
|
|
"grad_norm": 10.347989319044142,
|
|
"learning_rate": 3.1600609262208244e-06,
|
|
"loss": 1.3296544551849365,
|
|
"step": 2500
|
|
},
|
|
{
|
|
"epoch": 1.9755134281200633,
|
|
"grad_norm": 8.989622478769817,
|
|
"learning_rate": 3.1557885193731086e-06,
|
|
"loss": 1.6991509199142456,
|
|
"step": 2501
|
|
},
|
|
{
|
|
"epoch": 1.9763033175355451,
|
|
"grad_norm": 9.631551183343522,
|
|
"learning_rate": 3.151517670521325e-06,
|
|
"loss": 1.6035475730895996,
|
|
"step": 2502
|
|
},
|
|
{
|
|
"epoch": 1.977093206951027,
|
|
"grad_norm": 13.638367050748972,
|
|
"learning_rate": 3.1472483832735014e-06,
|
|
"loss": 1.8189468383789062,
|
|
"step": 2503
|
|
},
|
|
{
|
|
"epoch": 1.9778830963665088,
|
|
"grad_norm": 13.50296256744864,
|
|
"learning_rate": 3.1429806612363432e-06,
|
|
"loss": 1.6658248901367188,
|
|
"step": 2504
|
|
},
|
|
{
|
|
"epoch": 1.9786729857819905,
|
|
"grad_norm": 7.393153154801466,
|
|
"learning_rate": 3.138714508015237e-06,
|
|
"loss": 1.200107455253601,
|
|
"step": 2505
|
|
},
|
|
{
|
|
"epoch": 1.9794628751974723,
|
|
"grad_norm": 11.920956663917268,
|
|
"learning_rate": 3.1344499272142447e-06,
|
|
"loss": 0.791041910648346,
|
|
"step": 2506
|
|
},
|
|
{
|
|
"epoch": 1.9802527646129542,
|
|
"grad_norm": 17.66290191613317,
|
|
"learning_rate": 3.130186922436097e-06,
|
|
"loss": 1.5144439935684204,
|
|
"step": 2507
|
|
},
|
|
{
|
|
"epoch": 1.981042654028436,
|
|
"grad_norm": 9.017263658547026,
|
|
"learning_rate": 3.125925497282195e-06,
|
|
"loss": 1.9003779888153076,
|
|
"step": 2508
|
|
},
|
|
{
|
|
"epoch": 1.981832543443918,
|
|
"grad_norm": 16.75636258557487,
|
|
"learning_rate": 3.1216656553526026e-06,
|
|
"loss": 2.5085067749023438,
|
|
"step": 2509
|
|
},
|
|
{
|
|
"epoch": 1.9826224328593995,
|
|
"grad_norm": 14.890632169656481,
|
|
"learning_rate": 3.117407400246051e-06,
|
|
"loss": 1.7289268970489502,
|
|
"step": 2510
|
|
},
|
|
{
|
|
"epoch": 1.9834123222748814,
|
|
"grad_norm": 9.15166116321032,
|
|
"learning_rate": 3.1131507355599267e-06,
|
|
"loss": 1.61943781375885,
|
|
"step": 2511
|
|
},
|
|
{
|
|
"epoch": 1.9842022116903633,
|
|
"grad_norm": 6.859620699544047,
|
|
"learning_rate": 3.1088956648902735e-06,
|
|
"loss": 2.0489935874938965,
|
|
"step": 2512
|
|
},
|
|
{
|
|
"epoch": 1.9849921011058451,
|
|
"grad_norm": 14.799263181017864,
|
|
"learning_rate": 3.1046421918317916e-06,
|
|
"loss": 1.6825857162475586,
|
|
"step": 2513
|
|
},
|
|
{
|
|
"epoch": 1.985781990521327,
|
|
"grad_norm": 8.947142781131408,
|
|
"learning_rate": 3.1003903199778273e-06,
|
|
"loss": 1.6780674457550049,
|
|
"step": 2514
|
|
},
|
|
{
|
|
"epoch": 1.9865718799368088,
|
|
"grad_norm": 14.278954614357216,
|
|
"learning_rate": 3.096140052920376e-06,
|
|
"loss": 1.194378137588501,
|
|
"step": 2515
|
|
},
|
|
{
|
|
"epoch": 1.9873617693522907,
|
|
"grad_norm": 12.21887451937195,
|
|
"learning_rate": 3.091891394250077e-06,
|
|
"loss": 2.4973843097686768,
|
|
"step": 2516
|
|
},
|
|
{
|
|
"epoch": 1.9881516587677726,
|
|
"grad_norm": 13.216871045904774,
|
|
"learning_rate": 3.087644347556211e-06,
|
|
"loss": 1.668590784072876,
|
|
"step": 2517
|
|
},
|
|
{
|
|
"epoch": 1.9889415481832544,
|
|
"grad_norm": 14.625469207562114,
|
|
"learning_rate": 3.0833989164266974e-06,
|
|
"loss": 1.5314085483551025,
|
|
"step": 2518
|
|
},
|
|
{
|
|
"epoch": 1.9897314375987363,
|
|
"grad_norm": 11.732142027757016,
|
|
"learning_rate": 3.0791551044480926e-06,
|
|
"loss": 1.6780108213424683,
|
|
"step": 2519
|
|
},
|
|
{
|
|
"epoch": 1.9905213270142181,
|
|
"grad_norm": 9.710757091096028,
|
|
"learning_rate": 3.0749129152055795e-06,
|
|
"loss": 1.6987766027450562,
|
|
"step": 2520
|
|
},
|
|
{
|
|
"epoch": 1.9913112164297,
|
|
"grad_norm": 18.032480852526703,
|
|
"learning_rate": 3.070672352282974e-06,
|
|
"loss": 1.1783912181854248,
|
|
"step": 2521
|
|
},
|
|
{
|
|
"epoch": 1.9921011058451816,
|
|
"grad_norm": 11.162322784263365,
|
|
"learning_rate": 3.0664334192627197e-06,
|
|
"loss": 1.6210596561431885,
|
|
"step": 2522
|
|
},
|
|
{
|
|
"epoch": 1.9928909952606635,
|
|
"grad_norm": 9.526530451148624,
|
|
"learning_rate": 3.06219611972588e-06,
|
|
"loss": 0.9034061431884766,
|
|
"step": 2523
|
|
},
|
|
{
|
|
"epoch": 1.9936808846761453,
|
|
"grad_norm": 12.853469107604116,
|
|
"learning_rate": 3.0579604572521382e-06,
|
|
"loss": 1.8308205604553223,
|
|
"step": 2524
|
|
},
|
|
{
|
|
"epoch": 1.9944707740916272,
|
|
"grad_norm": 12.043619204725749,
|
|
"learning_rate": 3.0537264354198005e-06,
|
|
"loss": 1.336472749710083,
|
|
"step": 2525
|
|
},
|
|
{
|
|
"epoch": 1.9952606635071088,
|
|
"grad_norm": 24.68078723401722,
|
|
"learning_rate": 3.049494057805776e-06,
|
|
"loss": 1.3730854988098145,
|
|
"step": 2526
|
|
},
|
|
{
|
|
"epoch": 1.9960505529225907,
|
|
"grad_norm": 8.875683203972285,
|
|
"learning_rate": 3.045263327985595e-06,
|
|
"loss": 1.297802209854126,
|
|
"step": 2527
|
|
},
|
|
{
|
|
"epoch": 1.9968404423380726,
|
|
"grad_norm": 10.388495319632469,
|
|
"learning_rate": 3.0410342495333913e-06,
|
|
"loss": 1.585411548614502,
|
|
"step": 2528
|
|
},
|
|
{
|
|
"epoch": 1.9976303317535544,
|
|
"grad_norm": 9.15038105991676,
|
|
"learning_rate": 3.0368068260219054e-06,
|
|
"loss": 1.1837197542190552,
|
|
"step": 2529
|
|
},
|
|
{
|
|
"epoch": 1.9984202211690363,
|
|
"grad_norm": 10.39999784581774,
|
|
"learning_rate": 3.0325810610224783e-06,
|
|
"loss": 1.3715815544128418,
|
|
"step": 2530
|
|
},
|
|
{
|
|
"epoch": 1.9992101105845181,
|
|
"grad_norm": 20.945648961140414,
|
|
"learning_rate": 3.0283569581050486e-06,
|
|
"loss": 1.6476037502288818,
|
|
"step": 2531
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"grad_norm": 12.576094322325524,
|
|
"learning_rate": 3.0241345208381533e-06,
|
|
"loss": 1.4700212478637695,
|
|
"step": 2532
|
|
},
|
|
{
|
|
"epoch": 2.000789889415482,
|
|
"grad_norm": 10.359676749996417,
|
|
"learning_rate": 3.01991375278892e-06,
|
|
"loss": 0.46016550064086914,
|
|
"step": 2533
|
|
},
|
|
{
|
|
"epoch": 2.0015797788309637,
|
|
"grad_norm": 10.397265684651309,
|
|
"learning_rate": 3.01569465752307e-06,
|
|
"loss": 0.43968772888183594,
|
|
"step": 2534
|
|
},
|
|
{
|
|
"epoch": 2.0023696682464456,
|
|
"grad_norm": 10.588510736324265,
|
|
"learning_rate": 3.0114772386049087e-06,
|
|
"loss": 0.40929579734802246,
|
|
"step": 2535
|
|
},
|
|
{
|
|
"epoch": 2.0031595576619274,
|
|
"grad_norm": 12.058979157630162,
|
|
"learning_rate": 3.0072614995973236e-06,
|
|
"loss": 0.6055519580841064,
|
|
"step": 2536
|
|
},
|
|
{
|
|
"epoch": 2.0039494470774093,
|
|
"grad_norm": 9.905968562829903,
|
|
"learning_rate": 3.003047444061784e-06,
|
|
"loss": 0.33923691511154175,
|
|
"step": 2537
|
|
},
|
|
{
|
|
"epoch": 2.004739336492891,
|
|
"grad_norm": 11.848016978276616,
|
|
"learning_rate": 2.998835075558342e-06,
|
|
"loss": 0.5501883029937744,
|
|
"step": 2538
|
|
},
|
|
{
|
|
"epoch": 2.005529225908373,
|
|
"grad_norm": 7.660245787743295,
|
|
"learning_rate": 2.994624397645616e-06,
|
|
"loss": 0.3189247250556946,
|
|
"step": 2539
|
|
},
|
|
{
|
|
"epoch": 2.006319115323855,
|
|
"grad_norm": 7.718904002575967,
|
|
"learning_rate": 2.990415413880803e-06,
|
|
"loss": 0.8301833868026733,
|
|
"step": 2540
|
|
},
|
|
{
|
|
"epoch": 2.0071090047393363,
|
|
"grad_norm": 7.798220693220483,
|
|
"learning_rate": 2.9862081278196685e-06,
|
|
"loss": 0.34704911708831787,
|
|
"step": 2541
|
|
},
|
|
{
|
|
"epoch": 2.007898894154818,
|
|
"grad_norm": 8.488060658104867,
|
|
"learning_rate": 2.9820025430165358e-06,
|
|
"loss": 0.8342065215110779,
|
|
"step": 2542
|
|
},
|
|
{
|
|
"epoch": 2.0086887835703,
|
|
"grad_norm": 17.260191665775476,
|
|
"learning_rate": 2.977798663024302e-06,
|
|
"loss": 0.341113805770874,
|
|
"step": 2543
|
|
},
|
|
{
|
|
"epoch": 2.009478672985782,
|
|
"grad_norm": 9.539527306419366,
|
|
"learning_rate": 2.9735964913944153e-06,
|
|
"loss": 0.7216507792472839,
|
|
"step": 2544
|
|
},
|
|
{
|
|
"epoch": 2.0102685624012637,
|
|
"grad_norm": 6.040467200683253,
|
|
"learning_rate": 2.9693960316768856e-06,
|
|
"loss": 0.5428364276885986,
|
|
"step": 2545
|
|
},
|
|
{
|
|
"epoch": 2.0110584518167456,
|
|
"grad_norm": 10.079491641898473,
|
|
"learning_rate": 2.965197287420276e-06,
|
|
"loss": 0.49245187640190125,
|
|
"step": 2546
|
|
},
|
|
{
|
|
"epoch": 2.0118483412322274,
|
|
"grad_norm": 8.736640891813208,
|
|
"learning_rate": 2.9610002621716987e-06,
|
|
"loss": 0.37076878547668457,
|
|
"step": 2547
|
|
},
|
|
{
|
|
"epoch": 2.0126382306477093,
|
|
"grad_norm": 14.117126577405678,
|
|
"learning_rate": 2.956804959476814e-06,
|
|
"loss": 0.4397502541542053,
|
|
"step": 2548
|
|
},
|
|
{
|
|
"epoch": 2.013428120063191,
|
|
"grad_norm": 7.821072536076488,
|
|
"learning_rate": 2.9526113828798266e-06,
|
|
"loss": 0.3473249673843384,
|
|
"step": 2549
|
|
},
|
|
{
|
|
"epoch": 2.014218009478673,
|
|
"grad_norm": 12.898146258975322,
|
|
"learning_rate": 2.9484195359234855e-06,
|
|
"loss": 0.507472813129425,
|
|
"step": 2550
|
|
},
|
|
{
|
|
"epoch": 2.015007898894155,
|
|
"grad_norm": 16.615044423238285,
|
|
"learning_rate": 2.9442294221490773e-06,
|
|
"loss": 0.32390978932380676,
|
|
"step": 2551
|
|
},
|
|
{
|
|
"epoch": 2.0157977883096367,
|
|
"grad_norm": 10.491110312547283,
|
|
"learning_rate": 2.940041045096423e-06,
|
|
"loss": 0.5616019368171692,
|
|
"step": 2552
|
|
},
|
|
{
|
|
"epoch": 2.0165876777251186,
|
|
"grad_norm": 17.4266369509056,
|
|
"learning_rate": 2.9358544083038772e-06,
|
|
"loss": 0.5512704849243164,
|
|
"step": 2553
|
|
},
|
|
{
|
|
"epoch": 2.0173775671406005,
|
|
"grad_norm": 9.189915300922015,
|
|
"learning_rate": 2.931669515308323e-06,
|
|
"loss": 0.5548714399337769,
|
|
"step": 2554
|
|
},
|
|
{
|
|
"epoch": 2.0181674565560823,
|
|
"grad_norm": 12.471542857964309,
|
|
"learning_rate": 2.927486369645174e-06,
|
|
"loss": 0.5579742789268494,
|
|
"step": 2555
|
|
},
|
|
{
|
|
"epoch": 2.018957345971564,
|
|
"grad_norm": 10.57651269059222,
|
|
"learning_rate": 2.9233049748483637e-06,
|
|
"loss": 0.4655217230319977,
|
|
"step": 2556
|
|
},
|
|
{
|
|
"epoch": 2.0197472353870456,
|
|
"grad_norm": 16.404149783138585,
|
|
"learning_rate": 2.9191253344503512e-06,
|
|
"loss": 0.5133844017982483,
|
|
"step": 2557
|
|
},
|
|
{
|
|
"epoch": 2.0205371248025275,
|
|
"grad_norm": 8.739917453875979,
|
|
"learning_rate": 2.9149474519821073e-06,
|
|
"loss": 0.425764799118042,
|
|
"step": 2558
|
|
},
|
|
{
|
|
"epoch": 2.0213270142180093,
|
|
"grad_norm": 12.340461413890134,
|
|
"learning_rate": 2.910771330973123e-06,
|
|
"loss": 0.31363582611083984,
|
|
"step": 2559
|
|
},
|
|
{
|
|
"epoch": 2.022116903633491,
|
|
"grad_norm": 17.15887001035681,
|
|
"learning_rate": 2.9065969749513972e-06,
|
|
"loss": 0.3946301341056824,
|
|
"step": 2560
|
|
},
|
|
{
|
|
"epoch": 2.022906793048973,
|
|
"grad_norm": 12.235209650094014,
|
|
"learning_rate": 2.9024243874434412e-06,
|
|
"loss": 0.3221214711666107,
|
|
"step": 2561
|
|
},
|
|
{
|
|
"epoch": 2.023696682464455,
|
|
"grad_norm": 12.122441612475534,
|
|
"learning_rate": 2.898253571974267e-06,
|
|
"loss": 0.7128668427467346,
|
|
"step": 2562
|
|
},
|
|
{
|
|
"epoch": 2.0244865718799367,
|
|
"grad_norm": 11.253478945540717,
|
|
"learning_rate": 2.8940845320674003e-06,
|
|
"loss": 0.7156331539154053,
|
|
"step": 2563
|
|
},
|
|
{
|
|
"epoch": 2.0252764612954186,
|
|
"grad_norm": 11.411246914531567,
|
|
"learning_rate": 2.889917271244854e-06,
|
|
"loss": 0.38757872581481934,
|
|
"step": 2564
|
|
},
|
|
{
|
|
"epoch": 2.0260663507109005,
|
|
"grad_norm": 13.55858423904863,
|
|
"learning_rate": 2.885751793027146e-06,
|
|
"loss": 1.2920098304748535,
|
|
"step": 2565
|
|
},
|
|
{
|
|
"epoch": 2.0268562401263823,
|
|
"grad_norm": 7.218573790221784,
|
|
"learning_rate": 2.8815881009332847e-06,
|
|
"loss": 0.305652379989624,
|
|
"step": 2566
|
|
},
|
|
{
|
|
"epoch": 2.027646129541864,
|
|
"grad_norm": 21.733202425060888,
|
|
"learning_rate": 2.8774261984807705e-06,
|
|
"loss": 0.5940957069396973,
|
|
"step": 2567
|
|
},
|
|
{
|
|
"epoch": 2.028436018957346,
|
|
"grad_norm": 10.902544131148884,
|
|
"learning_rate": 2.873266089185597e-06,
|
|
"loss": 0.4019826352596283,
|
|
"step": 2568
|
|
},
|
|
{
|
|
"epoch": 2.029225908372828,
|
|
"grad_norm": 12.95751723968972,
|
|
"learning_rate": 2.869107776562232e-06,
|
|
"loss": 0.9044560790061951,
|
|
"step": 2569
|
|
},
|
|
{
|
|
"epoch": 2.0300157977883098,
|
|
"grad_norm": 12.066912492836503,
|
|
"learning_rate": 2.864951264123635e-06,
|
|
"loss": 0.9884500503540039,
|
|
"step": 2570
|
|
},
|
|
{
|
|
"epoch": 2.0308056872037916,
|
|
"grad_norm": 9.279060734945645,
|
|
"learning_rate": 2.860796555381238e-06,
|
|
"loss": 0.21672052145004272,
|
|
"step": 2571
|
|
},
|
|
{
|
|
"epoch": 2.0315955766192735,
|
|
"grad_norm": 12.515738686148927,
|
|
"learning_rate": 2.8566436538449583e-06,
|
|
"loss": 0.5464432239532471,
|
|
"step": 2572
|
|
},
|
|
{
|
|
"epoch": 2.0323854660347553,
|
|
"grad_norm": 5.072237401183682,
|
|
"learning_rate": 2.8524925630231774e-06,
|
|
"loss": 0.12266440689563751,
|
|
"step": 2573
|
|
},
|
|
{
|
|
"epoch": 2.0331753554502368,
|
|
"grad_norm": 13.049508774102216,
|
|
"learning_rate": 2.8483432864227533e-06,
|
|
"loss": 0.5199903249740601,
|
|
"step": 2574
|
|
},
|
|
{
|
|
"epoch": 2.0339652448657186,
|
|
"grad_norm": 26.009573976811115,
|
|
"learning_rate": 2.8441958275490044e-06,
|
|
"loss": 1.3708068132400513,
|
|
"step": 2575
|
|
},
|
|
{
|
|
"epoch": 2.0347551342812005,
|
|
"grad_norm": 8.306114337004733,
|
|
"learning_rate": 2.8400501899057164e-06,
|
|
"loss": 0.41701173782348633,
|
|
"step": 2576
|
|
},
|
|
{
|
|
"epoch": 2.0355450236966823,
|
|
"grad_norm": 19.310700207769223,
|
|
"learning_rate": 2.8359063769951435e-06,
|
|
"loss": 0.48340296745300293,
|
|
"step": 2577
|
|
},
|
|
{
|
|
"epoch": 2.036334913112164,
|
|
"grad_norm": 9.685038803587986,
|
|
"learning_rate": 2.8317643923179894e-06,
|
|
"loss": 0.2897825539112091,
|
|
"step": 2578
|
|
},
|
|
{
|
|
"epoch": 2.037124802527646,
|
|
"grad_norm": 7.494546906971376,
|
|
"learning_rate": 2.827624239373419e-06,
|
|
"loss": 0.3327743411064148,
|
|
"step": 2579
|
|
},
|
|
{
|
|
"epoch": 2.037914691943128,
|
|
"grad_norm": 14.776433298746518,
|
|
"learning_rate": 2.8234859216590406e-06,
|
|
"loss": 0.30218467116355896,
|
|
"step": 2580
|
|
},
|
|
{
|
|
"epoch": 2.0387045813586098,
|
|
"grad_norm": 9.947771139586356,
|
|
"learning_rate": 2.8193494426709245e-06,
|
|
"loss": 0.2601761519908905,
|
|
"step": 2581
|
|
},
|
|
{
|
|
"epoch": 2.0394944707740916,
|
|
"grad_norm": 14.37459477886007,
|
|
"learning_rate": 2.8152148059035804e-06,
|
|
"loss": 2.110447645187378,
|
|
"step": 2582
|
|
},
|
|
{
|
|
"epoch": 2.0402843601895735,
|
|
"grad_norm": 10.93265288740834,
|
|
"learning_rate": 2.811082014849963e-06,
|
|
"loss": 0.6686077117919922,
|
|
"step": 2583
|
|
},
|
|
{
|
|
"epoch": 2.0410742496050553,
|
|
"grad_norm": 10.24553214669215,
|
|
"learning_rate": 2.806951073001467e-06,
|
|
"loss": 0.33964627981185913,
|
|
"step": 2584
|
|
},
|
|
{
|
|
"epoch": 2.041864139020537,
|
|
"grad_norm": 10.359426539370531,
|
|
"learning_rate": 2.8028219838479265e-06,
|
|
"loss": 0.2946935296058655,
|
|
"step": 2585
|
|
},
|
|
{
|
|
"epoch": 2.042654028436019,
|
|
"grad_norm": 7.743470398158407,
|
|
"learning_rate": 2.798694750877609e-06,
|
|
"loss": 0.5350501537322998,
|
|
"step": 2586
|
|
},
|
|
{
|
|
"epoch": 2.043443917851501,
|
|
"grad_norm": 11.20913313781443,
|
|
"learning_rate": 2.7945693775772153e-06,
|
|
"loss": 0.3125535547733307,
|
|
"step": 2587
|
|
},
|
|
{
|
|
"epoch": 2.044233807266983,
|
|
"grad_norm": 8.18102147636904,
|
|
"learning_rate": 2.7904458674318733e-06,
|
|
"loss": 0.5411368012428284,
|
|
"step": 2588
|
|
},
|
|
{
|
|
"epoch": 2.0450236966824646,
|
|
"grad_norm": 9.35774408306311,
|
|
"learning_rate": 2.7863242239251385e-06,
|
|
"loss": 0.33201736211776733,
|
|
"step": 2589
|
|
},
|
|
{
|
|
"epoch": 2.0458135860979465,
|
|
"grad_norm": 8.673142738839285,
|
|
"learning_rate": 2.78220445053899e-06,
|
|
"loss": 0.4620051085948944,
|
|
"step": 2590
|
|
},
|
|
{
|
|
"epoch": 2.046603475513428,
|
|
"grad_norm": 9.55716346640722,
|
|
"learning_rate": 2.7780865507538236e-06,
|
|
"loss": 0.44175124168395996,
|
|
"step": 2591
|
|
},
|
|
{
|
|
"epoch": 2.0473933649289098,
|
|
"grad_norm": 9.80538220230876,
|
|
"learning_rate": 2.7739705280484565e-06,
|
|
"loss": 0.23179033398628235,
|
|
"step": 2592
|
|
},
|
|
{
|
|
"epoch": 2.0481832543443916,
|
|
"grad_norm": 7.778339221842325,
|
|
"learning_rate": 2.769856385900118e-06,
|
|
"loss": 0.2992667555809021,
|
|
"step": 2593
|
|
},
|
|
{
|
|
"epoch": 2.0489731437598735,
|
|
"grad_norm": 10.15304744757696,
|
|
"learning_rate": 2.7657441277844475e-06,
|
|
"loss": 0.8387447595596313,
|
|
"step": 2594
|
|
},
|
|
{
|
|
"epoch": 2.0497630331753554,
|
|
"grad_norm": 22.187673641215007,
|
|
"learning_rate": 2.7616337571754937e-06,
|
|
"loss": 0.6397115588188171,
|
|
"step": 2595
|
|
},
|
|
{
|
|
"epoch": 2.050552922590837,
|
|
"grad_norm": 11.077698692839348,
|
|
"learning_rate": 2.7575252775457175e-06,
|
|
"loss": 0.4148407280445099,
|
|
"step": 2596
|
|
},
|
|
{
|
|
"epoch": 2.051342812006319,
|
|
"grad_norm": 11.214710142878744,
|
|
"learning_rate": 2.753418692365968e-06,
|
|
"loss": 0.3338342607021332,
|
|
"step": 2597
|
|
},
|
|
{
|
|
"epoch": 2.052132701421801,
|
|
"grad_norm": 24.053873668111414,
|
|
"learning_rate": 2.7493140051055055e-06,
|
|
"loss": 0.19720637798309326,
|
|
"step": 2598
|
|
},
|
|
{
|
|
"epoch": 2.052922590837283,
|
|
"grad_norm": 7.360243899896904,
|
|
"learning_rate": 2.7452112192319813e-06,
|
|
"loss": 0.17611664533615112,
|
|
"step": 2599
|
|
},
|
|
{
|
|
"epoch": 2.0537124802527646,
|
|
"grad_norm": 7.902969436001227,
|
|
"learning_rate": 2.741110338211446e-06,
|
|
"loss": 0.18759757280349731,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 2.0545023696682465,
|
|
"grad_norm": 11.102771052686371,
|
|
"learning_rate": 2.7370113655083373e-06,
|
|
"loss": 0.5418599247932434,
|
|
"step": 2601
|
|
},
|
|
{
|
|
"epoch": 2.0552922590837284,
|
|
"grad_norm": 10.890805356373017,
|
|
"learning_rate": 2.732914304585478e-06,
|
|
"loss": 0.4259986877441406,
|
|
"step": 2602
|
|
},
|
|
{
|
|
"epoch": 2.0560821484992102,
|
|
"grad_norm": 11.586458157042319,
|
|
"learning_rate": 2.728819158904078e-06,
|
|
"loss": 0.36362117528915405,
|
|
"step": 2603
|
|
},
|
|
{
|
|
"epoch": 2.056872037914692,
|
|
"grad_norm": 18.320864664521476,
|
|
"learning_rate": 2.7247259319237306e-06,
|
|
"loss": 0.939771294593811,
|
|
"step": 2604
|
|
},
|
|
{
|
|
"epoch": 2.057661927330174,
|
|
"grad_norm": 11.501402001297102,
|
|
"learning_rate": 2.7206346271024103e-06,
|
|
"loss": 0.2737082839012146,
|
|
"step": 2605
|
|
},
|
|
{
|
|
"epoch": 2.058451816745656,
|
|
"grad_norm": 12.125160137594307,
|
|
"learning_rate": 2.716545247896465e-06,
|
|
"loss": 0.7849869728088379,
|
|
"step": 2606
|
|
},
|
|
{
|
|
"epoch": 2.0592417061611377,
|
|
"grad_norm": 11.093511362641284,
|
|
"learning_rate": 2.7124577977606114e-06,
|
|
"loss": 0.47068697214126587,
|
|
"step": 2607
|
|
},
|
|
{
|
|
"epoch": 2.060031595576619,
|
|
"grad_norm": 6.735093477058937,
|
|
"learning_rate": 2.7083722801479407e-06,
|
|
"loss": 0.30919548869132996,
|
|
"step": 2608
|
|
},
|
|
{
|
|
"epoch": 2.060821484992101,
|
|
"grad_norm": 11.012490104989881,
|
|
"learning_rate": 2.704288698509917e-06,
|
|
"loss": 0.5182478427886963,
|
|
"step": 2609
|
|
},
|
|
{
|
|
"epoch": 2.061611374407583,
|
|
"grad_norm": 19.951181172455126,
|
|
"learning_rate": 2.70020705629636e-06,
|
|
"loss": 0.30237090587615967,
|
|
"step": 2610
|
|
},
|
|
{
|
|
"epoch": 2.0624012638230647,
|
|
"grad_norm": 7.174756319853717,
|
|
"learning_rate": 2.696127356955455e-06,
|
|
"loss": 0.2660676836967468,
|
|
"step": 2611
|
|
},
|
|
{
|
|
"epoch": 2.0631911532385465,
|
|
"grad_norm": 12.385922369030384,
|
|
"learning_rate": 2.6920496039337474e-06,
|
|
"loss": 0.3617851734161377,
|
|
"step": 2612
|
|
},
|
|
{
|
|
"epoch": 2.0639810426540284,
|
|
"grad_norm": 11.015014488490415,
|
|
"learning_rate": 2.6879738006761303e-06,
|
|
"loss": 0.46704721450805664,
|
|
"step": 2613
|
|
},
|
|
{
|
|
"epoch": 2.0647709320695102,
|
|
"grad_norm": 16.919341385806266,
|
|
"learning_rate": 2.6838999506258623e-06,
|
|
"loss": 0.6928585767745972,
|
|
"step": 2614
|
|
},
|
|
{
|
|
"epoch": 2.065560821484992,
|
|
"grad_norm": 11.230526922983666,
|
|
"learning_rate": 2.6798280572245427e-06,
|
|
"loss": 0.47172248363494873,
|
|
"step": 2615
|
|
},
|
|
{
|
|
"epoch": 2.066350710900474,
|
|
"grad_norm": 6.544914274412647,
|
|
"learning_rate": 2.6757581239121203e-06,
|
|
"loss": 0.27590444684028625,
|
|
"step": 2616
|
|
},
|
|
{
|
|
"epoch": 2.067140600315956,
|
|
"grad_norm": 20.085203153128194,
|
|
"learning_rate": 2.671690154126889e-06,
|
|
"loss": 1.725602388381958,
|
|
"step": 2617
|
|
},
|
|
{
|
|
"epoch": 2.0679304897314377,
|
|
"grad_norm": 9.954561309572316,
|
|
"learning_rate": 2.667624151305482e-06,
|
|
"loss": 0.34551358222961426,
|
|
"step": 2618
|
|
},
|
|
{
|
|
"epoch": 2.0687203791469195,
|
|
"grad_norm": 9.55670352614984,
|
|
"learning_rate": 2.6635601188828736e-06,
|
|
"loss": 0.2692929804325104,
|
|
"step": 2619
|
|
},
|
|
{
|
|
"epoch": 2.0695102685624014,
|
|
"grad_norm": 19.36634128513723,
|
|
"learning_rate": 2.6594980602923702e-06,
|
|
"loss": 0.4668102264404297,
|
|
"step": 2620
|
|
},
|
|
{
|
|
"epoch": 2.0703001579778832,
|
|
"grad_norm": 10.139253556415792,
|
|
"learning_rate": 2.6554379789656124e-06,
|
|
"loss": 0.4466455578804016,
|
|
"step": 2621
|
|
},
|
|
{
|
|
"epoch": 2.071090047393365,
|
|
"grad_norm": 10.412158390774838,
|
|
"learning_rate": 2.6513798783325708e-06,
|
|
"loss": 0.3185625970363617,
|
|
"step": 2622
|
|
},
|
|
{
|
|
"epoch": 2.071879936808847,
|
|
"grad_norm": 16.193194717671236,
|
|
"learning_rate": 2.647323761821542e-06,
|
|
"loss": 1.1391959190368652,
|
|
"step": 2623
|
|
},
|
|
{
|
|
"epoch": 2.0726698262243284,
|
|
"grad_norm": 10.665895924665014,
|
|
"learning_rate": 2.643269632859146e-06,
|
|
"loss": 0.5236232876777649,
|
|
"step": 2624
|
|
},
|
|
{
|
|
"epoch": 2.0734597156398102,
|
|
"grad_norm": 9.152693760808571,
|
|
"learning_rate": 2.6392174948703253e-06,
|
|
"loss": 0.38468989729881287,
|
|
"step": 2625
|
|
},
|
|
{
|
|
"epoch": 2.074249605055292,
|
|
"grad_norm": 10.476888206104105,
|
|
"learning_rate": 2.635167351278339e-06,
|
|
"loss": 0.21812653541564941,
|
|
"step": 2626
|
|
},
|
|
{
|
|
"epoch": 2.075039494470774,
|
|
"grad_norm": 8.26594520989118,
|
|
"learning_rate": 2.63111920550476e-06,
|
|
"loss": 0.5767203569412231,
|
|
"step": 2627
|
|
},
|
|
{
|
|
"epoch": 2.075829383886256,
|
|
"grad_norm": 10.011641691561126,
|
|
"learning_rate": 2.62707306096948e-06,
|
|
"loss": 0.5115009546279907,
|
|
"step": 2628
|
|
},
|
|
{
|
|
"epoch": 2.0766192733017377,
|
|
"grad_norm": 9.073068879094404,
|
|
"learning_rate": 2.6230289210906904e-06,
|
|
"loss": 0.4996326267719269,
|
|
"step": 2629
|
|
},
|
|
{
|
|
"epoch": 2.0774091627172195,
|
|
"grad_norm": 11.435973854776599,
|
|
"learning_rate": 2.6189867892848962e-06,
|
|
"loss": 1.0042023658752441,
|
|
"step": 2630
|
|
},
|
|
{
|
|
"epoch": 2.0781990521327014,
|
|
"grad_norm": 16.103230479782546,
|
|
"learning_rate": 2.614946668966902e-06,
|
|
"loss": 0.5340977907180786,
|
|
"step": 2631
|
|
},
|
|
{
|
|
"epoch": 2.0789889415481833,
|
|
"grad_norm": 10.298294605139441,
|
|
"learning_rate": 2.610908563549815e-06,
|
|
"loss": 0.23495090007781982,
|
|
"step": 2632
|
|
},
|
|
{
|
|
"epoch": 2.079778830963665,
|
|
"grad_norm": 12.126262761621527,
|
|
"learning_rate": 2.606872476445042e-06,
|
|
"loss": 0.4852793216705322,
|
|
"step": 2633
|
|
},
|
|
{
|
|
"epoch": 2.080568720379147,
|
|
"grad_norm": 14.89375330624192,
|
|
"learning_rate": 2.602838411062284e-06,
|
|
"loss": 0.42270925641059875,
|
|
"step": 2634
|
|
},
|
|
{
|
|
"epoch": 2.081358609794629,
|
|
"grad_norm": 8.811379648381822,
|
|
"learning_rate": 2.598806370809528e-06,
|
|
"loss": 0.3555320203304291,
|
|
"step": 2635
|
|
},
|
|
{
|
|
"epoch": 2.0821484992101107,
|
|
"grad_norm": 9.627230128604296,
|
|
"learning_rate": 2.5947763590930542e-06,
|
|
"loss": 0.7813281416893005,
|
|
"step": 2636
|
|
},
|
|
{
|
|
"epoch": 2.0829383886255926,
|
|
"grad_norm": 12.811410790915238,
|
|
"learning_rate": 2.590748379317436e-06,
|
|
"loss": 0.5171550512313843,
|
|
"step": 2637
|
|
},
|
|
{
|
|
"epoch": 2.0837282780410744,
|
|
"grad_norm": 6.703660941737119,
|
|
"learning_rate": 2.586722434885519e-06,
|
|
"loss": 0.4919162392616272,
|
|
"step": 2638
|
|
},
|
|
{
|
|
"epoch": 2.0845181674565563,
|
|
"grad_norm": 8.360573561210712,
|
|
"learning_rate": 2.582698529198439e-06,
|
|
"loss": 0.28129732608795166,
|
|
"step": 2639
|
|
},
|
|
{
|
|
"epoch": 2.085308056872038,
|
|
"grad_norm": 10.528148772142709,
|
|
"learning_rate": 2.5786766656555996e-06,
|
|
"loss": 0.28879594802856445,
|
|
"step": 2640
|
|
},
|
|
{
|
|
"epoch": 2.0860979462875195,
|
|
"grad_norm": 9.079673356688698,
|
|
"learning_rate": 2.574656847654684e-06,
|
|
"loss": 0.5426896810531616,
|
|
"step": 2641
|
|
},
|
|
{
|
|
"epoch": 2.0868878357030014,
|
|
"grad_norm": 9.340991792914485,
|
|
"learning_rate": 2.5706390785916526e-06,
|
|
"loss": 0.7707650661468506,
|
|
"step": 2642
|
|
},
|
|
{
|
|
"epoch": 2.0876777251184833,
|
|
"grad_norm": 9.244986743162158,
|
|
"learning_rate": 2.5666233618607274e-06,
|
|
"loss": 0.34384623169898987,
|
|
"step": 2643
|
|
},
|
|
{
|
|
"epoch": 2.088467614533965,
|
|
"grad_norm": 16.744652694879548,
|
|
"learning_rate": 2.5626097008543995e-06,
|
|
"loss": 0.5586552619934082,
|
|
"step": 2644
|
|
},
|
|
{
|
|
"epoch": 2.089257503949447,
|
|
"grad_norm": 9.45648755694814,
|
|
"learning_rate": 2.5585980989634217e-06,
|
|
"loss": 0.20669318735599518,
|
|
"step": 2645
|
|
},
|
|
{
|
|
"epoch": 2.090047393364929,
|
|
"grad_norm": 10.927788414106475,
|
|
"learning_rate": 2.55458855957681e-06,
|
|
"loss": 0.3616315424442291,
|
|
"step": 2646
|
|
},
|
|
{
|
|
"epoch": 2.0908372827804107,
|
|
"grad_norm": 10.887569307564323,
|
|
"learning_rate": 2.5505810860818356e-06,
|
|
"loss": 0.4344330132007599,
|
|
"step": 2647
|
|
},
|
|
{
|
|
"epoch": 2.0916271721958926,
|
|
"grad_norm": 7.716338584985328,
|
|
"learning_rate": 2.5465756818640258e-06,
|
|
"loss": 0.7404396533966064,
|
|
"step": 2648
|
|
},
|
|
{
|
|
"epoch": 2.0924170616113744,
|
|
"grad_norm": 8.682244564927794,
|
|
"learning_rate": 2.5425723503071586e-06,
|
|
"loss": 0.7206960320472717,
|
|
"step": 2649
|
|
},
|
|
{
|
|
"epoch": 2.0932069510268563,
|
|
"grad_norm": 10.426071122202922,
|
|
"learning_rate": 2.538571094793263e-06,
|
|
"loss": 0.4914172887802124,
|
|
"step": 2650
|
|
},
|
|
{
|
|
"epoch": 2.093996840442338,
|
|
"grad_norm": 10.818446184348494,
|
|
"learning_rate": 2.534571918702611e-06,
|
|
"loss": 0.22247469425201416,
|
|
"step": 2651
|
|
},
|
|
{
|
|
"epoch": 2.09478672985782,
|
|
"grad_norm": 8.183773295547795,
|
|
"learning_rate": 2.530574825413722e-06,
|
|
"loss": 0.5524343252182007,
|
|
"step": 2652
|
|
},
|
|
{
|
|
"epoch": 2.095576619273302,
|
|
"grad_norm": 10.561001834128774,
|
|
"learning_rate": 2.526579818303351e-06,
|
|
"loss": 0.4442840814590454,
|
|
"step": 2653
|
|
},
|
|
{
|
|
"epoch": 2.0963665086887837,
|
|
"grad_norm": 12.36354051019539,
|
|
"learning_rate": 2.5225869007464953e-06,
|
|
"loss": 0.43416035175323486,
|
|
"step": 2654
|
|
},
|
|
{
|
|
"epoch": 2.0971563981042656,
|
|
"grad_norm": 10.720335557119547,
|
|
"learning_rate": 2.5185960761163816e-06,
|
|
"loss": 0.3992769718170166,
|
|
"step": 2655
|
|
},
|
|
{
|
|
"epoch": 2.0979462875197474,
|
|
"grad_norm": 12.529038317941579,
|
|
"learning_rate": 2.514607347784478e-06,
|
|
"loss": 0.5805951356887817,
|
|
"step": 2656
|
|
},
|
|
{
|
|
"epoch": 2.098736176935229,
|
|
"grad_norm": 8.702799415503215,
|
|
"learning_rate": 2.510620719120469e-06,
|
|
"loss": 0.4916655123233795,
|
|
"step": 2657
|
|
},
|
|
{
|
|
"epoch": 2.0995260663507107,
|
|
"grad_norm": 14.453659414269685,
|
|
"learning_rate": 2.5066361934922745e-06,
|
|
"loss": 0.3664616346359253,
|
|
"step": 2658
|
|
},
|
|
{
|
|
"epoch": 2.1003159557661926,
|
|
"grad_norm": 9.593268812077245,
|
|
"learning_rate": 2.502653774266034e-06,
|
|
"loss": 1.0257253646850586,
|
|
"step": 2659
|
|
},
|
|
{
|
|
"epoch": 2.1011058451816744,
|
|
"grad_norm": 10.7133353823578,
|
|
"learning_rate": 2.4986734648061066e-06,
|
|
"loss": 0.3108058273792267,
|
|
"step": 2660
|
|
},
|
|
{
|
|
"epoch": 2.1018957345971563,
|
|
"grad_norm": 9.400571761002238,
|
|
"learning_rate": 2.4946952684750773e-06,
|
|
"loss": 0.6172835826873779,
|
|
"step": 2661
|
|
},
|
|
{
|
|
"epoch": 2.102685624012638,
|
|
"grad_norm": 13.17142044952775,
|
|
"learning_rate": 2.4907191886337334e-06,
|
|
"loss": 0.7078754305839539,
|
|
"step": 2662
|
|
},
|
|
{
|
|
"epoch": 2.10347551342812,
|
|
"grad_norm": 9.62188121387961,
|
|
"learning_rate": 2.4867452286410815e-06,
|
|
"loss": 0.8794481754302979,
|
|
"step": 2663
|
|
},
|
|
{
|
|
"epoch": 2.104265402843602,
|
|
"grad_norm": 8.644663424593888,
|
|
"learning_rate": 2.482773391854335e-06,
|
|
"loss": 0.3982178866863251,
|
|
"step": 2664
|
|
},
|
|
{
|
|
"epoch": 2.1050552922590837,
|
|
"grad_norm": 8.326165450957228,
|
|
"learning_rate": 2.4788036816289177e-06,
|
|
"loss": 0.2630547881126404,
|
|
"step": 2665
|
|
},
|
|
{
|
|
"epoch": 2.1058451816745656,
|
|
"grad_norm": 8.952882981363153,
|
|
"learning_rate": 2.474836101318453e-06,
|
|
"loss": 0.5207113027572632,
|
|
"step": 2666
|
|
},
|
|
{
|
|
"epoch": 2.1066350710900474,
|
|
"grad_norm": 7.954230630151849,
|
|
"learning_rate": 2.470870654274767e-06,
|
|
"loss": 0.6573061943054199,
|
|
"step": 2667
|
|
},
|
|
{
|
|
"epoch": 2.1074249605055293,
|
|
"grad_norm": 9.631811512663736,
|
|
"learning_rate": 2.4669073438478784e-06,
|
|
"loss": 0.29487311840057373,
|
|
"step": 2668
|
|
},
|
|
{
|
|
"epoch": 2.108214849921011,
|
|
"grad_norm": 10.558058540717864,
|
|
"learning_rate": 2.4629461733860055e-06,
|
|
"loss": 0.24810953438282013,
|
|
"step": 2669
|
|
},
|
|
{
|
|
"epoch": 2.109004739336493,
|
|
"grad_norm": 7.799037056656612,
|
|
"learning_rate": 2.458987146235562e-06,
|
|
"loss": 0.20641303062438965,
|
|
"step": 2670
|
|
},
|
|
{
|
|
"epoch": 2.109794628751975,
|
|
"grad_norm": 9.478790048879866,
|
|
"learning_rate": 2.4550302657411447e-06,
|
|
"loss": 0.3360731303691864,
|
|
"step": 2671
|
|
},
|
|
{
|
|
"epoch": 2.1105845181674567,
|
|
"grad_norm": 6.220059265575532,
|
|
"learning_rate": 2.4510755352455413e-06,
|
|
"loss": 0.5347034931182861,
|
|
"step": 2672
|
|
},
|
|
{
|
|
"epoch": 2.1113744075829386,
|
|
"grad_norm": 12.272978606568286,
|
|
"learning_rate": 2.4471229580897155e-06,
|
|
"loss": 0.33499157428741455,
|
|
"step": 2673
|
|
},
|
|
{
|
|
"epoch": 2.11216429699842,
|
|
"grad_norm": 11.71613295930798,
|
|
"learning_rate": 2.443172537612823e-06,
|
|
"loss": 0.810235321521759,
|
|
"step": 2674
|
|
},
|
|
{
|
|
"epoch": 2.112954186413902,
|
|
"grad_norm": 10.155734244037252,
|
|
"learning_rate": 2.43922427715219e-06,
|
|
"loss": 0.7765663862228394,
|
|
"step": 2675
|
|
},
|
|
{
|
|
"epoch": 2.1137440758293837,
|
|
"grad_norm": 8.684157709345978,
|
|
"learning_rate": 2.4352781800433213e-06,
|
|
"loss": 0.2663138508796692,
|
|
"step": 2676
|
|
},
|
|
{
|
|
"epoch": 2.1145339652448656,
|
|
"grad_norm": 7.269900003866313,
|
|
"learning_rate": 2.431334249619891e-06,
|
|
"loss": 0.8140415549278259,
|
|
"step": 2677
|
|
},
|
|
{
|
|
"epoch": 2.1153238546603474,
|
|
"grad_norm": 11.872895668793475,
|
|
"learning_rate": 2.427392489213745e-06,
|
|
"loss": 0.7136765718460083,
|
|
"step": 2678
|
|
},
|
|
{
|
|
"epoch": 2.1161137440758293,
|
|
"grad_norm": 15.205862969567574,
|
|
"learning_rate": 2.4234529021548963e-06,
|
|
"loss": 0.581038236618042,
|
|
"step": 2679
|
|
},
|
|
{
|
|
"epoch": 2.116903633491311,
|
|
"grad_norm": 10.329194968162666,
|
|
"learning_rate": 2.4195154917715202e-06,
|
|
"loss": 0.313004732131958,
|
|
"step": 2680
|
|
},
|
|
{
|
|
"epoch": 2.117693522906793,
|
|
"grad_norm": 7.375552046117448,
|
|
"learning_rate": 2.4155802613899537e-06,
|
|
"loss": 0.41316336393356323,
|
|
"step": 2681
|
|
},
|
|
{
|
|
"epoch": 2.118483412322275,
|
|
"grad_norm": 12.51452481692,
|
|
"learning_rate": 2.4116472143346935e-06,
|
|
"loss": 1.1459236145019531,
|
|
"step": 2682
|
|
},
|
|
{
|
|
"epoch": 2.1192733017377567,
|
|
"grad_norm": 8.014729129098054,
|
|
"learning_rate": 2.40771635392839e-06,
|
|
"loss": 0.10683616995811462,
|
|
"step": 2683
|
|
},
|
|
{
|
|
"epoch": 2.1200631911532386,
|
|
"grad_norm": 16.715819051395663,
|
|
"learning_rate": 2.4037876834918467e-06,
|
|
"loss": 0.8776874542236328,
|
|
"step": 2684
|
|
},
|
|
{
|
|
"epoch": 2.1208530805687205,
|
|
"grad_norm": 15.222031604067872,
|
|
"learning_rate": 2.3998612063440175e-06,
|
|
"loss": 0.549107551574707,
|
|
"step": 2685
|
|
},
|
|
{
|
|
"epoch": 2.1216429699842023,
|
|
"grad_norm": 11.067349309197576,
|
|
"learning_rate": 2.3959369258020036e-06,
|
|
"loss": 0.3277229368686676,
|
|
"step": 2686
|
|
},
|
|
{
|
|
"epoch": 2.122432859399684,
|
|
"grad_norm": 10.422204153009583,
|
|
"learning_rate": 2.3920148451810504e-06,
|
|
"loss": 0.3309401869773865,
|
|
"step": 2687
|
|
},
|
|
{
|
|
"epoch": 2.123222748815166,
|
|
"grad_norm": 11.728679388557909,
|
|
"learning_rate": 2.3880949677945437e-06,
|
|
"loss": 0.6130886673927307,
|
|
"step": 2688
|
|
},
|
|
{
|
|
"epoch": 2.124012638230648,
|
|
"grad_norm": 7.010036521611688,
|
|
"learning_rate": 2.3841772969540088e-06,
|
|
"loss": 0.18529269099235535,
|
|
"step": 2689
|
|
},
|
|
{
|
|
"epoch": 2.1248025276461293,
|
|
"grad_norm": 21.13853477472658,
|
|
"learning_rate": 2.380261835969108e-06,
|
|
"loss": 1.7113615274429321,
|
|
"step": 2690
|
|
},
|
|
{
|
|
"epoch": 2.125592417061611,
|
|
"grad_norm": 11.881636165836428,
|
|
"learning_rate": 2.3763485881476345e-06,
|
|
"loss": 0.254605233669281,
|
|
"step": 2691
|
|
},
|
|
{
|
|
"epoch": 2.126382306477093,
|
|
"grad_norm": 8.99366986984141,
|
|
"learning_rate": 2.3724375567955115e-06,
|
|
"loss": 0.17952272295951843,
|
|
"step": 2692
|
|
},
|
|
{
|
|
"epoch": 2.127172195892575,
|
|
"grad_norm": 12.677526063207466,
|
|
"learning_rate": 2.368528745216795e-06,
|
|
"loss": 0.47902315855026245,
|
|
"step": 2693
|
|
},
|
|
{
|
|
"epoch": 2.1279620853080567,
|
|
"grad_norm": 17.055248079539123,
|
|
"learning_rate": 2.3646221567136618e-06,
|
|
"loss": 0.36781108379364014,
|
|
"step": 2694
|
|
},
|
|
{
|
|
"epoch": 2.1287519747235386,
|
|
"grad_norm": 12.63653018866412,
|
|
"learning_rate": 2.3607177945864073e-06,
|
|
"loss": 0.3109586834907532,
|
|
"step": 2695
|
|
},
|
|
{
|
|
"epoch": 2.1295418641390205,
|
|
"grad_norm": 7.61269588995347,
|
|
"learning_rate": 2.3568156621334508e-06,
|
|
"loss": 0.5835099220275879,
|
|
"step": 2696
|
|
},
|
|
{
|
|
"epoch": 2.1303317535545023,
|
|
"grad_norm": 9.309063388608303,
|
|
"learning_rate": 2.352915762651325e-06,
|
|
"loss": 0.6755543947219849,
|
|
"step": 2697
|
|
},
|
|
{
|
|
"epoch": 2.131121642969984,
|
|
"grad_norm": 12.180295054957952,
|
|
"learning_rate": 2.3490180994346816e-06,
|
|
"loss": 0.3916603624820709,
|
|
"step": 2698
|
|
},
|
|
{
|
|
"epoch": 2.131911532385466,
|
|
"grad_norm": 11.375922907331335,
|
|
"learning_rate": 2.3451226757762794e-06,
|
|
"loss": 0.3473902940750122,
|
|
"step": 2699
|
|
},
|
|
{
|
|
"epoch": 2.132701421800948,
|
|
"grad_norm": 11.141196729822012,
|
|
"learning_rate": 2.341229494966981e-06,
|
|
"loss": 0.48169010877609253,
|
|
"step": 2700
|
|
},
|
|
{
|
|
"epoch": 2.1334913112164298,
|
|
"grad_norm": 11.044250444422778,
|
|
"learning_rate": 2.3373385602957595e-06,
|
|
"loss": 0.5068016052246094,
|
|
"step": 2701
|
|
},
|
|
{
|
|
"epoch": 2.1342812006319116,
|
|
"grad_norm": 14.599024263747504,
|
|
"learning_rate": 2.3334498750496913e-06,
|
|
"loss": 0.5701960325241089,
|
|
"step": 2702
|
|
},
|
|
{
|
|
"epoch": 2.1350710900473935,
|
|
"grad_norm": 8.48416563722893,
|
|
"learning_rate": 2.3295634425139502e-06,
|
|
"loss": 0.7108640074729919,
|
|
"step": 2703
|
|
},
|
|
{
|
|
"epoch": 2.1358609794628753,
|
|
"grad_norm": 8.64074974867785,
|
|
"learning_rate": 2.3256792659718065e-06,
|
|
"loss": 0.5362042188644409,
|
|
"step": 2704
|
|
},
|
|
{
|
|
"epoch": 2.136650868878357,
|
|
"grad_norm": 13.030143439241625,
|
|
"learning_rate": 2.321797348704625e-06,
|
|
"loss": 0.6571926474571228,
|
|
"step": 2705
|
|
},
|
|
{
|
|
"epoch": 2.137440758293839,
|
|
"grad_norm": 11.69730983416275,
|
|
"learning_rate": 2.317917693991863e-06,
|
|
"loss": 0.4899098575115204,
|
|
"step": 2706
|
|
},
|
|
{
|
|
"epoch": 2.138230647709321,
|
|
"grad_norm": 14.65236719972843,
|
|
"learning_rate": 2.314040305111065e-06,
|
|
"loss": 1.4374269247055054,
|
|
"step": 2707
|
|
},
|
|
{
|
|
"epoch": 2.1390205371248023,
|
|
"grad_norm": 11.277026389499401,
|
|
"learning_rate": 2.310165185337862e-06,
|
|
"loss": 0.40772897005081177,
|
|
"step": 2708
|
|
},
|
|
{
|
|
"epoch": 2.139810426540284,
|
|
"grad_norm": 9.022193712896843,
|
|
"learning_rate": 2.3062923379459684e-06,
|
|
"loss": 0.2784253656864166,
|
|
"step": 2709
|
|
},
|
|
{
|
|
"epoch": 2.140600315955766,
|
|
"grad_norm": 9.676192450422299,
|
|
"learning_rate": 2.302421766207177e-06,
|
|
"loss": 0.34623268246650696,
|
|
"step": 2710
|
|
},
|
|
{
|
|
"epoch": 2.141390205371248,
|
|
"grad_norm": 8.652801319265459,
|
|
"learning_rate": 2.2985534733913623e-06,
|
|
"loss": 0.33302298188209534,
|
|
"step": 2711
|
|
},
|
|
{
|
|
"epoch": 2.1421800947867298,
|
|
"grad_norm": 8.990062562090557,
|
|
"learning_rate": 2.2946874627664677e-06,
|
|
"loss": 0.44455865025520325,
|
|
"step": 2712
|
|
},
|
|
{
|
|
"epoch": 2.1429699842022116,
|
|
"grad_norm": 21.04889344511468,
|
|
"learning_rate": 2.2908237375985137e-06,
|
|
"loss": 0.5782222747802734,
|
|
"step": 2713
|
|
},
|
|
{
|
|
"epoch": 2.1437598736176935,
|
|
"grad_norm": 9.050405036026557,
|
|
"learning_rate": 2.2869623011515874e-06,
|
|
"loss": 0.3401952385902405,
|
|
"step": 2714
|
|
},
|
|
{
|
|
"epoch": 2.1445497630331753,
|
|
"grad_norm": 15.891303126899643,
|
|
"learning_rate": 2.283103156687843e-06,
|
|
"loss": 0.5884231925010681,
|
|
"step": 2715
|
|
},
|
|
{
|
|
"epoch": 2.145339652448657,
|
|
"grad_norm": 10.482443238163478,
|
|
"learning_rate": 2.2792463074674987e-06,
|
|
"loss": 0.6347147226333618,
|
|
"step": 2716
|
|
},
|
|
{
|
|
"epoch": 2.146129541864139,
|
|
"grad_norm": 8.647383536000094,
|
|
"learning_rate": 2.275391756748833e-06,
|
|
"loss": 0.5545493960380554,
|
|
"step": 2717
|
|
},
|
|
{
|
|
"epoch": 2.146919431279621,
|
|
"grad_norm": 12.912399269663396,
|
|
"learning_rate": 2.2715395077881837e-06,
|
|
"loss": 0.639219343662262,
|
|
"step": 2718
|
|
},
|
|
{
|
|
"epoch": 2.147709320695103,
|
|
"grad_norm": 14.471210640975691,
|
|
"learning_rate": 2.2676895638399427e-06,
|
|
"loss": 0.5747156143188477,
|
|
"step": 2719
|
|
},
|
|
{
|
|
"epoch": 2.1484992101105846,
|
|
"grad_norm": 10.784235307725265,
|
|
"learning_rate": 2.2638419281565536e-06,
|
|
"loss": 1.0131480693817139,
|
|
"step": 2720
|
|
},
|
|
{
|
|
"epoch": 2.1492890995260665,
|
|
"grad_norm": 8.991887236619839,
|
|
"learning_rate": 2.259996603988518e-06,
|
|
"loss": 0.1766696721315384,
|
|
"step": 2721
|
|
},
|
|
{
|
|
"epoch": 2.1500789889415484,
|
|
"grad_norm": 8.434251974603205,
|
|
"learning_rate": 2.256153594584372e-06,
|
|
"loss": 0.4502110481262207,
|
|
"step": 2722
|
|
},
|
|
{
|
|
"epoch": 2.1508688783570302,
|
|
"grad_norm": 9.15347582657087,
|
|
"learning_rate": 2.2523129031907047e-06,
|
|
"loss": 0.17326998710632324,
|
|
"step": 2723
|
|
},
|
|
{
|
|
"epoch": 2.1516587677725116,
|
|
"grad_norm": 10.132042927830476,
|
|
"learning_rate": 2.248474533052145e-06,
|
|
"loss": 0.32104817032814026,
|
|
"step": 2724
|
|
},
|
|
{
|
|
"epoch": 2.1524486571879935,
|
|
"grad_norm": 22.126808261308526,
|
|
"learning_rate": 2.2446384874113586e-06,
|
|
"loss": 0.28873202204704285,
|
|
"step": 2725
|
|
},
|
|
{
|
|
"epoch": 2.1532385466034754,
|
|
"grad_norm": 9.874113810387273,
|
|
"learning_rate": 2.2408047695090533e-06,
|
|
"loss": 0.9483802318572998,
|
|
"step": 2726
|
|
},
|
|
{
|
|
"epoch": 2.154028436018957,
|
|
"grad_norm": 8.399149585433827,
|
|
"learning_rate": 2.2369733825839663e-06,
|
|
"loss": 0.4678645133972168,
|
|
"step": 2727
|
|
},
|
|
{
|
|
"epoch": 2.154818325434439,
|
|
"grad_norm": 8.149774853170221,
|
|
"learning_rate": 2.233144329872863e-06,
|
|
"loss": 0.4194965660572052,
|
|
"step": 2728
|
|
},
|
|
{
|
|
"epoch": 2.155608214849921,
|
|
"grad_norm": 10.007897566804706,
|
|
"learning_rate": 2.229317614610539e-06,
|
|
"loss": 0.18562570214271545,
|
|
"step": 2729
|
|
},
|
|
{
|
|
"epoch": 2.156398104265403,
|
|
"grad_norm": 11.593570889438942,
|
|
"learning_rate": 2.2254932400298195e-06,
|
|
"loss": 0.2921253442764282,
|
|
"step": 2730
|
|
},
|
|
{
|
|
"epoch": 2.1571879936808847,
|
|
"grad_norm": 9.592986733727226,
|
|
"learning_rate": 2.2216712093615474e-06,
|
|
"loss": 0.5794805288314819,
|
|
"step": 2731
|
|
},
|
|
{
|
|
"epoch": 2.1579778830963665,
|
|
"grad_norm": 18.709687315043503,
|
|
"learning_rate": 2.2178515258345885e-06,
|
|
"loss": 0.7258075475692749,
|
|
"step": 2732
|
|
},
|
|
{
|
|
"epoch": 2.1587677725118484,
|
|
"grad_norm": 15.272801270549794,
|
|
"learning_rate": 2.2140341926758186e-06,
|
|
"loss": 0.622929036617279,
|
|
"step": 2733
|
|
},
|
|
{
|
|
"epoch": 2.1595576619273302,
|
|
"grad_norm": 12.017018764788721,
|
|
"learning_rate": 2.2102192131101386e-06,
|
|
"loss": 0.656425952911377,
|
|
"step": 2734
|
|
},
|
|
{
|
|
"epoch": 2.160347551342812,
|
|
"grad_norm": 13.837307556949694,
|
|
"learning_rate": 2.2064065903604537e-06,
|
|
"loss": 0.3320290744304657,
|
|
"step": 2735
|
|
},
|
|
{
|
|
"epoch": 2.161137440758294,
|
|
"grad_norm": 9.400730803116534,
|
|
"learning_rate": 2.2025963276476814e-06,
|
|
"loss": 0.3217647075653076,
|
|
"step": 2736
|
|
},
|
|
{
|
|
"epoch": 2.161927330173776,
|
|
"grad_norm": 13.942726496241551,
|
|
"learning_rate": 2.1987884281907425e-06,
|
|
"loss": 0.5152993202209473,
|
|
"step": 2737
|
|
},
|
|
{
|
|
"epoch": 2.1627172195892577,
|
|
"grad_norm": 9.822554670458867,
|
|
"learning_rate": 2.1949828952065643e-06,
|
|
"loss": 0.23020845651626587,
|
|
"step": 2738
|
|
},
|
|
{
|
|
"epoch": 2.1635071090047395,
|
|
"grad_norm": 12.584615682614533,
|
|
"learning_rate": 2.191179731910073e-06,
|
|
"loss": 0.4029275178909302,
|
|
"step": 2739
|
|
},
|
|
{
|
|
"epoch": 2.1642969984202214,
|
|
"grad_norm": 11.17103259387213,
|
|
"learning_rate": 2.1873789415141932e-06,
|
|
"loss": 0.23068757355213165,
|
|
"step": 2740
|
|
},
|
|
{
|
|
"epoch": 2.165086887835703,
|
|
"grad_norm": 12.389468057157949,
|
|
"learning_rate": 2.183580527229846e-06,
|
|
"loss": 0.39349129796028137,
|
|
"step": 2741
|
|
},
|
|
{
|
|
"epoch": 2.1658767772511847,
|
|
"grad_norm": 8.11758263289566,
|
|
"learning_rate": 2.1797844922659437e-06,
|
|
"loss": 0.2747986912727356,
|
|
"step": 2742
|
|
},
|
|
{
|
|
"epoch": 2.1666666666666665,
|
|
"grad_norm": 12.530738023381977,
|
|
"learning_rate": 2.1759908398293896e-06,
|
|
"loss": 1.14316987991333,
|
|
"step": 2743
|
|
},
|
|
{
|
|
"epoch": 2.1674565560821484,
|
|
"grad_norm": 14.915166154269375,
|
|
"learning_rate": 2.1721995731250726e-06,
|
|
"loss": 0.517180323600769,
|
|
"step": 2744
|
|
},
|
|
{
|
|
"epoch": 2.1682464454976302,
|
|
"grad_norm": 13.701952179268636,
|
|
"learning_rate": 2.1684106953558693e-06,
|
|
"loss": 0.25364458560943604,
|
|
"step": 2745
|
|
},
|
|
{
|
|
"epoch": 2.169036334913112,
|
|
"grad_norm": 9.011375942635011,
|
|
"learning_rate": 2.1646242097226343e-06,
|
|
"loss": 0.34542322158813477,
|
|
"step": 2746
|
|
},
|
|
{
|
|
"epoch": 2.169826224328594,
|
|
"grad_norm": 9.781858990537309,
|
|
"learning_rate": 2.1608401194242035e-06,
|
|
"loss": 0.24171094596385956,
|
|
"step": 2747
|
|
},
|
|
{
|
|
"epoch": 2.170616113744076,
|
|
"grad_norm": 8.982304890802853,
|
|
"learning_rate": 2.1570584276573896e-06,
|
|
"loss": 0.609094500541687,
|
|
"step": 2748
|
|
},
|
|
{
|
|
"epoch": 2.1714060031595577,
|
|
"grad_norm": 9.10557044009763,
|
|
"learning_rate": 2.1532791376169778e-06,
|
|
"loss": 0.2869632840156555,
|
|
"step": 2749
|
|
},
|
|
{
|
|
"epoch": 2.1721958925750395,
|
|
"grad_norm": 9.939923288953379,
|
|
"learning_rate": 2.1495022524957244e-06,
|
|
"loss": 0.39264094829559326,
|
|
"step": 2750
|
|
},
|
|
{
|
|
"epoch": 2.1729857819905214,
|
|
"grad_norm": 9.585598670064135,
|
|
"learning_rate": 2.1457277754843547e-06,
|
|
"loss": 0.18833236396312714,
|
|
"step": 2751
|
|
},
|
|
{
|
|
"epoch": 2.1737756714060033,
|
|
"grad_norm": 11.521955780403779,
|
|
"learning_rate": 2.14195570977156e-06,
|
|
"loss": 0.9987523555755615,
|
|
"step": 2752
|
|
},
|
|
{
|
|
"epoch": 2.174565560821485,
|
|
"grad_norm": 19.37585048155451,
|
|
"learning_rate": 2.1381860585439906e-06,
|
|
"loss": 0.46828562021255493,
|
|
"step": 2753
|
|
},
|
|
{
|
|
"epoch": 2.175355450236967,
|
|
"grad_norm": 11.086157538641421,
|
|
"learning_rate": 2.134418824986267e-06,
|
|
"loss": 0.4600408673286438,
|
|
"step": 2754
|
|
},
|
|
{
|
|
"epoch": 2.176145339652449,
|
|
"grad_norm": 7.742526132754247,
|
|
"learning_rate": 2.1306540122809543e-06,
|
|
"loss": 0.27102628350257874,
|
|
"step": 2755
|
|
},
|
|
{
|
|
"epoch": 2.1769352290679307,
|
|
"grad_norm": 19.82779184404181,
|
|
"learning_rate": 2.1268916236085814e-06,
|
|
"loss": 0.5771945714950562,
|
|
"step": 2756
|
|
},
|
|
{
|
|
"epoch": 2.177725118483412,
|
|
"grad_norm": 14.081906680861074,
|
|
"learning_rate": 2.1231316621476234e-06,
|
|
"loss": 0.3828513026237488,
|
|
"step": 2757
|
|
},
|
|
{
|
|
"epoch": 2.178515007898894,
|
|
"grad_norm": 12.677665354894604,
|
|
"learning_rate": 2.1193741310745125e-06,
|
|
"loss": 1.0411120653152466,
|
|
"step": 2758
|
|
},
|
|
{
|
|
"epoch": 2.179304897314376,
|
|
"grad_norm": 8.072309116801922,
|
|
"learning_rate": 2.115619033563624e-06,
|
|
"loss": 0.1945279985666275,
|
|
"step": 2759
|
|
},
|
|
{
|
|
"epoch": 2.1800947867298577,
|
|
"grad_norm": 13.107958264184125,
|
|
"learning_rate": 2.111866372787272e-06,
|
|
"loss": 0.32704418897628784,
|
|
"step": 2760
|
|
},
|
|
{
|
|
"epoch": 2.1808846761453395,
|
|
"grad_norm": 10.8975500647945,
|
|
"learning_rate": 2.1081161519157168e-06,
|
|
"loss": 0.4165365695953369,
|
|
"step": 2761
|
|
},
|
|
{
|
|
"epoch": 2.1816745655608214,
|
|
"grad_norm": 15.891308461676624,
|
|
"learning_rate": 2.104368374117161e-06,
|
|
"loss": 0.7583081722259521,
|
|
"step": 2762
|
|
},
|
|
{
|
|
"epoch": 2.1824644549763033,
|
|
"grad_norm": 12.34192529563348,
|
|
"learning_rate": 2.100623042557739e-06,
|
|
"loss": 0.39596104621887207,
|
|
"step": 2763
|
|
},
|
|
{
|
|
"epoch": 2.183254344391785,
|
|
"grad_norm": 12.790970766784817,
|
|
"learning_rate": 2.0968801604015176e-06,
|
|
"loss": 0.33872219920158386,
|
|
"step": 2764
|
|
},
|
|
{
|
|
"epoch": 2.184044233807267,
|
|
"grad_norm": 8.883238538058585,
|
|
"learning_rate": 2.0931397308104986e-06,
|
|
"loss": 0.48357391357421875,
|
|
"step": 2765
|
|
},
|
|
{
|
|
"epoch": 2.184834123222749,
|
|
"grad_norm": 10.033364618014366,
|
|
"learning_rate": 2.0894017569446034e-06,
|
|
"loss": 0.2329304814338684,
|
|
"step": 2766
|
|
},
|
|
{
|
|
"epoch": 2.1856240126382307,
|
|
"grad_norm": 10.963546856917345,
|
|
"learning_rate": 2.0856662419616908e-06,
|
|
"loss": 0.38120099902153015,
|
|
"step": 2767
|
|
},
|
|
{
|
|
"epoch": 2.1864139020537126,
|
|
"grad_norm": 8.659173313868209,
|
|
"learning_rate": 2.081933189017533e-06,
|
|
"loss": 0.18225795030593872,
|
|
"step": 2768
|
|
},
|
|
{
|
|
"epoch": 2.1872037914691944,
|
|
"grad_norm": 12.579078453532555,
|
|
"learning_rate": 2.0782026012658268e-06,
|
|
"loss": 0.6677108407020569,
|
|
"step": 2769
|
|
},
|
|
{
|
|
"epoch": 2.1879936808846763,
|
|
"grad_norm": 10.496831262975576,
|
|
"learning_rate": 2.074474481858185e-06,
|
|
"loss": 0.7098184823989868,
|
|
"step": 2770
|
|
},
|
|
{
|
|
"epoch": 2.188783570300158,
|
|
"grad_norm": 15.314620074905251,
|
|
"learning_rate": 2.0707488339441338e-06,
|
|
"loss": 0.8517345786094666,
|
|
"step": 2771
|
|
},
|
|
{
|
|
"epoch": 2.18957345971564,
|
|
"grad_norm": 11.322801114701715,
|
|
"learning_rate": 2.067025660671114e-06,
|
|
"loss": 0.15628886222839355,
|
|
"step": 2772
|
|
},
|
|
{
|
|
"epoch": 2.190363349131122,
|
|
"grad_norm": 10.857872805242897,
|
|
"learning_rate": 2.0633049651844744e-06,
|
|
"loss": 0.23775914311408997,
|
|
"step": 2773
|
|
},
|
|
{
|
|
"epoch": 2.1911532385466037,
|
|
"grad_norm": 17.367672664117908,
|
|
"learning_rate": 2.0595867506274707e-06,
|
|
"loss": 1.0561261177062988,
|
|
"step": 2774
|
|
},
|
|
{
|
|
"epoch": 2.191943127962085,
|
|
"grad_norm": 10.613741381941095,
|
|
"learning_rate": 2.055871020141263e-06,
|
|
"loss": 0.7364283204078674,
|
|
"step": 2775
|
|
},
|
|
{
|
|
"epoch": 2.192733017377567,
|
|
"grad_norm": 11.141296214016752,
|
|
"learning_rate": 2.052157776864912e-06,
|
|
"loss": 0.5597008466720581,
|
|
"step": 2776
|
|
},
|
|
{
|
|
"epoch": 2.193522906793049,
|
|
"grad_norm": 16.153450357978947,
|
|
"learning_rate": 2.0484470239353786e-06,
|
|
"loss": 0.48830780386924744,
|
|
"step": 2777
|
|
},
|
|
{
|
|
"epoch": 2.1943127962085307,
|
|
"grad_norm": 13.351249645259788,
|
|
"learning_rate": 2.044738764487519e-06,
|
|
"loss": 1.164239764213562,
|
|
"step": 2778
|
|
},
|
|
{
|
|
"epoch": 2.1951026856240126,
|
|
"grad_norm": 9.298702632023925,
|
|
"learning_rate": 2.0410330016540824e-06,
|
|
"loss": 0.6914669275283813,
|
|
"step": 2779
|
|
},
|
|
{
|
|
"epoch": 2.1958925750394944,
|
|
"grad_norm": 19.26030750116404,
|
|
"learning_rate": 2.0373297385657105e-06,
|
|
"loss": 0.9622442722320557,
|
|
"step": 2780
|
|
},
|
|
{
|
|
"epoch": 2.1966824644549763,
|
|
"grad_norm": 10.373277943662284,
|
|
"learning_rate": 2.0336289783509306e-06,
|
|
"loss": 0.2636718153953552,
|
|
"step": 2781
|
|
},
|
|
{
|
|
"epoch": 2.197472353870458,
|
|
"grad_norm": 13.914270193422176,
|
|
"learning_rate": 2.0299307241361587e-06,
|
|
"loss": 0.5753885507583618,
|
|
"step": 2782
|
|
},
|
|
{
|
|
"epoch": 2.19826224328594,
|
|
"grad_norm": 11.117981102297946,
|
|
"learning_rate": 2.0262349790456908e-06,
|
|
"loss": 0.3335786461830139,
|
|
"step": 2783
|
|
},
|
|
{
|
|
"epoch": 2.199052132701422,
|
|
"grad_norm": 12.558104305781397,
|
|
"learning_rate": 2.0225417462017054e-06,
|
|
"loss": 0.348050594329834,
|
|
"step": 2784
|
|
},
|
|
{
|
|
"epoch": 2.1998420221169037,
|
|
"grad_norm": 11.355102467965308,
|
|
"learning_rate": 2.0188510287242564e-06,
|
|
"loss": 1.1008378267288208,
|
|
"step": 2785
|
|
},
|
|
{
|
|
"epoch": 2.2006319115323856,
|
|
"grad_norm": 7.680728739198105,
|
|
"learning_rate": 2.0151628297312765e-06,
|
|
"loss": 0.573356568813324,
|
|
"step": 2786
|
|
},
|
|
{
|
|
"epoch": 2.2014218009478674,
|
|
"grad_norm": 7.8089582056318525,
|
|
"learning_rate": 2.0114771523385682e-06,
|
|
"loss": 0.12702372670173645,
|
|
"step": 2787
|
|
},
|
|
{
|
|
"epoch": 2.2022116903633493,
|
|
"grad_norm": 9.904120419329733,
|
|
"learning_rate": 2.0077939996598023e-06,
|
|
"loss": 0.33679264783859253,
|
|
"step": 2788
|
|
},
|
|
{
|
|
"epoch": 2.203001579778831,
|
|
"grad_norm": 9.182229422297102,
|
|
"learning_rate": 2.004113374806516e-06,
|
|
"loss": 0.6996742486953735,
|
|
"step": 2789
|
|
},
|
|
{
|
|
"epoch": 2.2037914691943126,
|
|
"grad_norm": 19.940382025619883,
|
|
"learning_rate": 2.0004352808881183e-06,
|
|
"loss": 0.4126805067062378,
|
|
"step": 2790
|
|
},
|
|
{
|
|
"epoch": 2.2045813586097944,
|
|
"grad_norm": 8.37724044555483,
|
|
"learning_rate": 1.996759721011873e-06,
|
|
"loss": 0.43272721767425537,
|
|
"step": 2791
|
|
},
|
|
{
|
|
"epoch": 2.2053712480252763,
|
|
"grad_norm": 10.772306642802901,
|
|
"learning_rate": 1.9930866982829067e-06,
|
|
"loss": 0.31847819685935974,
|
|
"step": 2792
|
|
},
|
|
{
|
|
"epoch": 2.206161137440758,
|
|
"grad_norm": 8.767557779912506,
|
|
"learning_rate": 1.9894162158041972e-06,
|
|
"loss": 0.3612692654132843,
|
|
"step": 2793
|
|
},
|
|
{
|
|
"epoch": 2.20695102685624,
|
|
"grad_norm": 7.606502171835215,
|
|
"learning_rate": 1.9857482766765812e-06,
|
|
"loss": 0.21649795770645142,
|
|
"step": 2794
|
|
},
|
|
{
|
|
"epoch": 2.207740916271722,
|
|
"grad_norm": 13.255090811561212,
|
|
"learning_rate": 1.9820828839987483e-06,
|
|
"loss": 0.3744838535785675,
|
|
"step": 2795
|
|
},
|
|
{
|
|
"epoch": 2.2085308056872037,
|
|
"grad_norm": 8.919411850780316,
|
|
"learning_rate": 1.9784200408672332e-06,
|
|
"loss": 0.26096653938293457,
|
|
"step": 2796
|
|
},
|
|
{
|
|
"epoch": 2.2093206951026856,
|
|
"grad_norm": 9.147600079448718,
|
|
"learning_rate": 1.9747597503764177e-06,
|
|
"loss": 0.47567954659461975,
|
|
"step": 2797
|
|
},
|
|
{
|
|
"epoch": 2.2101105845181674,
|
|
"grad_norm": 9.819631053417764,
|
|
"learning_rate": 1.9711020156185266e-06,
|
|
"loss": 0.4913148880004883,
|
|
"step": 2798
|
|
},
|
|
{
|
|
"epoch": 2.2109004739336493,
|
|
"grad_norm": 11.328664528684907,
|
|
"learning_rate": 1.9674468396836273e-06,
|
|
"loss": 0.6570562720298767,
|
|
"step": 2799
|
|
},
|
|
{
|
|
"epoch": 2.211690363349131,
|
|
"grad_norm": 8.021273430434308,
|
|
"learning_rate": 1.963794225659624e-06,
|
|
"loss": 0.5904330015182495,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 2.212480252764613,
|
|
"grad_norm": 8.57411699896354,
|
|
"learning_rate": 1.960144176632257e-06,
|
|
"loss": 0.6471877098083496,
|
|
"step": 2801
|
|
},
|
|
{
|
|
"epoch": 2.213270142180095,
|
|
"grad_norm": 11.950981978395122,
|
|
"learning_rate": 1.9564966956850995e-06,
|
|
"loss": 0.5829299688339233,
|
|
"step": 2802
|
|
},
|
|
{
|
|
"epoch": 2.2140600315955767,
|
|
"grad_norm": 8.451163149947735,
|
|
"learning_rate": 1.952851785899556e-06,
|
|
"loss": 0.35751497745513916,
|
|
"step": 2803
|
|
},
|
|
{
|
|
"epoch": 2.2148499210110586,
|
|
"grad_norm": 10.138877718616586,
|
|
"learning_rate": 1.949209450354858e-06,
|
|
"loss": 0.4311722218990326,
|
|
"step": 2804
|
|
},
|
|
{
|
|
"epoch": 2.2156398104265405,
|
|
"grad_norm": 12.683217627573496,
|
|
"learning_rate": 1.9455696921280627e-06,
|
|
"loss": 0.7441365718841553,
|
|
"step": 2805
|
|
},
|
|
{
|
|
"epoch": 2.2164296998420223,
|
|
"grad_norm": 11.694728091873205,
|
|
"learning_rate": 1.941932514294049e-06,
|
|
"loss": 0.49206316471099854,
|
|
"step": 2806
|
|
},
|
|
{
|
|
"epoch": 2.217219589257504,
|
|
"grad_norm": 11.855236520924457,
|
|
"learning_rate": 1.938297919925518e-06,
|
|
"loss": 0.5097864866256714,
|
|
"step": 2807
|
|
},
|
|
{
|
|
"epoch": 2.2180094786729856,
|
|
"grad_norm": 12.318836475278742,
|
|
"learning_rate": 1.9346659120929867e-06,
|
|
"loss": 0.6787229776382446,
|
|
"step": 2808
|
|
},
|
|
{
|
|
"epoch": 2.2187993680884674,
|
|
"grad_norm": 11.391591651407479,
|
|
"learning_rate": 1.9310364938647863e-06,
|
|
"loss": 0.3987847864627838,
|
|
"step": 2809
|
|
},
|
|
{
|
|
"epoch": 2.2195892575039493,
|
|
"grad_norm": 10.502147605835487,
|
|
"learning_rate": 1.9274096683070625e-06,
|
|
"loss": 0.5808389782905579,
|
|
"step": 2810
|
|
},
|
|
{
|
|
"epoch": 2.220379146919431,
|
|
"grad_norm": 15.889506259074903,
|
|
"learning_rate": 1.9237854384837685e-06,
|
|
"loss": 0.6458317041397095,
|
|
"step": 2811
|
|
},
|
|
{
|
|
"epoch": 2.221169036334913,
|
|
"grad_norm": 10.72441475075481,
|
|
"learning_rate": 1.920163807456666e-06,
|
|
"loss": 0.22764013707637787,
|
|
"step": 2812
|
|
},
|
|
{
|
|
"epoch": 2.221958925750395,
|
|
"grad_norm": 9.406804023326472,
|
|
"learning_rate": 1.916544778285319e-06,
|
|
"loss": 0.3760148286819458,
|
|
"step": 2813
|
|
},
|
|
{
|
|
"epoch": 2.2227488151658767,
|
|
"grad_norm": 12.999087321799392,
|
|
"learning_rate": 1.912928354027101e-06,
|
|
"loss": 0.9230769872665405,
|
|
"step": 2814
|
|
},
|
|
{
|
|
"epoch": 2.2235387045813586,
|
|
"grad_norm": 5.261146266935435,
|
|
"learning_rate": 1.9093145377371734e-06,
|
|
"loss": 0.22567349672317505,
|
|
"step": 2815
|
|
},
|
|
{
|
|
"epoch": 2.2243285939968405,
|
|
"grad_norm": 9.418952716249674,
|
|
"learning_rate": 1.9057033324685014e-06,
|
|
"loss": 0.3293178677558899,
|
|
"step": 2816
|
|
},
|
|
{
|
|
"epoch": 2.2251184834123223,
|
|
"grad_norm": 7.627242194894803,
|
|
"learning_rate": 1.9020947412718428e-06,
|
|
"loss": 0.3232775926589966,
|
|
"step": 2817
|
|
},
|
|
{
|
|
"epoch": 2.225908372827804,
|
|
"grad_norm": 6.888057264096037,
|
|
"learning_rate": 1.8984887671957492e-06,
|
|
"loss": 0.3350941240787506,
|
|
"step": 2818
|
|
},
|
|
{
|
|
"epoch": 2.226698262243286,
|
|
"grad_norm": 9.593533038856789,
|
|
"learning_rate": 1.8948854132865601e-06,
|
|
"loss": 0.4311079978942871,
|
|
"step": 2819
|
|
},
|
|
{
|
|
"epoch": 2.227488151658768,
|
|
"grad_norm": 13.454745411900346,
|
|
"learning_rate": 1.8912846825883973e-06,
|
|
"loss": 1.0597552061080933,
|
|
"step": 2820
|
|
},
|
|
{
|
|
"epoch": 2.2282780410742498,
|
|
"grad_norm": 11.058976812584511,
|
|
"learning_rate": 1.8876865781431714e-06,
|
|
"loss": 0.6466134786605835,
|
|
"step": 2821
|
|
},
|
|
{
|
|
"epoch": 2.2290679304897316,
|
|
"grad_norm": 9.06033016129208,
|
|
"learning_rate": 1.8840911029905718e-06,
|
|
"loss": 0.2965662479400635,
|
|
"step": 2822
|
|
},
|
|
{
|
|
"epoch": 2.229857819905213,
|
|
"grad_norm": 10.351708737498239,
|
|
"learning_rate": 1.8804982601680705e-06,
|
|
"loss": 0.367464542388916,
|
|
"step": 2823
|
|
},
|
|
{
|
|
"epoch": 2.230647709320695,
|
|
"grad_norm": 9.6500211642715,
|
|
"learning_rate": 1.8769080527109113e-06,
|
|
"loss": 0.48031240701675415,
|
|
"step": 2824
|
|
},
|
|
{
|
|
"epoch": 2.2314375987361768,
|
|
"grad_norm": 10.833034804804822,
|
|
"learning_rate": 1.8733204836521156e-06,
|
|
"loss": 0.40878814458847046,
|
|
"step": 2825
|
|
},
|
|
{
|
|
"epoch": 2.2322274881516586,
|
|
"grad_norm": 7.26374801360143,
|
|
"learning_rate": 1.8697355560224679e-06,
|
|
"loss": 0.5350329279899597,
|
|
"step": 2826
|
|
},
|
|
{
|
|
"epoch": 2.2330173775671405,
|
|
"grad_norm": 7.132708625630138,
|
|
"learning_rate": 1.8661532728505315e-06,
|
|
"loss": 0.12886342406272888,
|
|
"step": 2827
|
|
},
|
|
{
|
|
"epoch": 2.2338072669826223,
|
|
"grad_norm": 17.58232884596969,
|
|
"learning_rate": 1.8625736371626307e-06,
|
|
"loss": 0.37391525506973267,
|
|
"step": 2828
|
|
},
|
|
{
|
|
"epoch": 2.234597156398104,
|
|
"grad_norm": 9.783002261543379,
|
|
"learning_rate": 1.8589966519828523e-06,
|
|
"loss": 0.47310611605644226,
|
|
"step": 2829
|
|
},
|
|
{
|
|
"epoch": 2.235387045813586,
|
|
"grad_norm": 6.751501626334738,
|
|
"learning_rate": 1.8554223203330473e-06,
|
|
"loss": 0.15924470126628876,
|
|
"step": 2830
|
|
},
|
|
{
|
|
"epoch": 2.236176935229068,
|
|
"grad_norm": 11.510963325216071,
|
|
"learning_rate": 1.8518506452328182e-06,
|
|
"loss": 0.589087188243866,
|
|
"step": 2831
|
|
},
|
|
{
|
|
"epoch": 2.2369668246445498,
|
|
"grad_norm": 11.300997192020064,
|
|
"learning_rate": 1.848281629699532e-06,
|
|
"loss": 0.29100021719932556,
|
|
"step": 2832
|
|
},
|
|
{
|
|
"epoch": 2.2377567140600316,
|
|
"grad_norm": 11.201992942679986,
|
|
"learning_rate": 1.8447152767483039e-06,
|
|
"loss": 0.4221479296684265,
|
|
"step": 2833
|
|
},
|
|
{
|
|
"epoch": 2.2385466034755135,
|
|
"grad_norm": 13.129885124330743,
|
|
"learning_rate": 1.8411515893920002e-06,
|
|
"loss": 0.5831780433654785,
|
|
"step": 2834
|
|
},
|
|
{
|
|
"epoch": 2.2393364928909953,
|
|
"grad_norm": 8.6407107464604,
|
|
"learning_rate": 1.8375905706412362e-06,
|
|
"loss": 0.1891106814146042,
|
|
"step": 2835
|
|
},
|
|
{
|
|
"epoch": 2.240126382306477,
|
|
"grad_norm": 11.674843768645822,
|
|
"learning_rate": 1.834032223504371e-06,
|
|
"loss": 0.7293417453765869,
|
|
"step": 2836
|
|
},
|
|
{
|
|
"epoch": 2.240916271721959,
|
|
"grad_norm": 13.500251067265657,
|
|
"learning_rate": 1.83047655098751e-06,
|
|
"loss": 0.6112027168273926,
|
|
"step": 2837
|
|
},
|
|
{
|
|
"epoch": 2.241706161137441,
|
|
"grad_norm": 10.949569000417599,
|
|
"learning_rate": 1.8269235560944954e-06,
|
|
"loss": 0.7212045192718506,
|
|
"step": 2838
|
|
},
|
|
{
|
|
"epoch": 2.242496050552923,
|
|
"grad_norm": 15.306834691971645,
|
|
"learning_rate": 1.823373241826909e-06,
|
|
"loss": 0.3620745539665222,
|
|
"step": 2839
|
|
},
|
|
{
|
|
"epoch": 2.2432859399684046,
|
|
"grad_norm": 11.812488548682289,
|
|
"learning_rate": 1.819825611184069e-06,
|
|
"loss": 0.6836791038513184,
|
|
"step": 2840
|
|
},
|
|
{
|
|
"epoch": 2.244075829383886,
|
|
"grad_norm": 12.12932684339931,
|
|
"learning_rate": 1.8162806671630252e-06,
|
|
"loss": 0.6339531540870667,
|
|
"step": 2841
|
|
},
|
|
{
|
|
"epoch": 2.244865718799368,
|
|
"grad_norm": 17.039607010297708,
|
|
"learning_rate": 1.8127384127585574e-06,
|
|
"loss": 0.6890298128128052,
|
|
"step": 2842
|
|
},
|
|
{
|
|
"epoch": 2.2456556082148498,
|
|
"grad_norm": 9.659644016427855,
|
|
"learning_rate": 1.8091988509631741e-06,
|
|
"loss": 0.7656424641609192,
|
|
"step": 2843
|
|
},
|
|
{
|
|
"epoch": 2.2464454976303316,
|
|
"grad_norm": 11.125303849970079,
|
|
"learning_rate": 1.8056619847671087e-06,
|
|
"loss": 0.4176521897315979,
|
|
"step": 2844
|
|
},
|
|
{
|
|
"epoch": 2.2472353870458135,
|
|
"grad_norm": 10.295484759083562,
|
|
"learning_rate": 1.8021278171583163e-06,
|
|
"loss": 0.4512486457824707,
|
|
"step": 2845
|
|
},
|
|
{
|
|
"epoch": 2.2480252764612954,
|
|
"grad_norm": 14.130813395934341,
|
|
"learning_rate": 1.7985963511224757e-06,
|
|
"loss": 0.2794567346572876,
|
|
"step": 2846
|
|
},
|
|
{
|
|
"epoch": 2.248815165876777,
|
|
"grad_norm": 7.986427853036559,
|
|
"learning_rate": 1.7950675896429815e-06,
|
|
"loss": 0.5515426993370056,
|
|
"step": 2847
|
|
},
|
|
{
|
|
"epoch": 2.249605055292259,
|
|
"grad_norm": 10.914377369413538,
|
|
"learning_rate": 1.7915415357009398e-06,
|
|
"loss": 0.5858911275863647,
|
|
"step": 2848
|
|
},
|
|
{
|
|
"epoch": 2.250394944707741,
|
|
"grad_norm": 9.057658354905428,
|
|
"learning_rate": 1.7880181922751743e-06,
|
|
"loss": 0.4116283357143402,
|
|
"step": 2849
|
|
},
|
|
{
|
|
"epoch": 2.251184834123223,
|
|
"grad_norm": 7.630261787099066,
|
|
"learning_rate": 1.7844975623422151e-06,
|
|
"loss": 0.11056342720985413,
|
|
"step": 2850
|
|
},
|
|
{
|
|
"epoch": 2.2519747235387046,
|
|
"grad_norm": 10.503693991312414,
|
|
"learning_rate": 1.7809796488763058e-06,
|
|
"loss": 0.3872639834880829,
|
|
"step": 2851
|
|
},
|
|
{
|
|
"epoch": 2.2527646129541865,
|
|
"grad_norm": 12.035802077065455,
|
|
"learning_rate": 1.7774644548493908e-06,
|
|
"loss": 0.6129223108291626,
|
|
"step": 2852
|
|
},
|
|
{
|
|
"epoch": 2.2535545023696684,
|
|
"grad_norm": 9.46607219708113,
|
|
"learning_rate": 1.7739519832311147e-06,
|
|
"loss": 0.7763924598693848,
|
|
"step": 2853
|
|
},
|
|
{
|
|
"epoch": 2.2543443917851502,
|
|
"grad_norm": 6.681381059967548,
|
|
"learning_rate": 1.770442236988824e-06,
|
|
"loss": 0.14335396885871887,
|
|
"step": 2854
|
|
},
|
|
{
|
|
"epoch": 2.255134281200632,
|
|
"grad_norm": 18.45511369704355,
|
|
"learning_rate": 1.7669352190875687e-06,
|
|
"loss": 0.8193086385726929,
|
|
"step": 2855
|
|
},
|
|
{
|
|
"epoch": 2.2559241706161135,
|
|
"grad_norm": 13.554836834181051,
|
|
"learning_rate": 1.7634309324900872e-06,
|
|
"loss": 0.43514060974121094,
|
|
"step": 2856
|
|
},
|
|
{
|
|
"epoch": 2.2567140600315954,
|
|
"grad_norm": 7.454792364779305,
|
|
"learning_rate": 1.7599293801568114e-06,
|
|
"loss": 0.2814521789550781,
|
|
"step": 2857
|
|
},
|
|
{
|
|
"epoch": 2.257503949447077,
|
|
"grad_norm": 10.368313203853804,
|
|
"learning_rate": 1.7564305650458662e-06,
|
|
"loss": 0.41086602210998535,
|
|
"step": 2858
|
|
},
|
|
{
|
|
"epoch": 2.258293838862559,
|
|
"grad_norm": 13.204982194940747,
|
|
"learning_rate": 1.7529344901130579e-06,
|
|
"loss": 0.8798295259475708,
|
|
"step": 2859
|
|
},
|
|
{
|
|
"epoch": 2.259083728278041,
|
|
"grad_norm": 7.7989794475969925,
|
|
"learning_rate": 1.749441158311886e-06,
|
|
"loss": 0.5868876576423645,
|
|
"step": 2860
|
|
},
|
|
{
|
|
"epoch": 2.259873617693523,
|
|
"grad_norm": 11.119432491809754,
|
|
"learning_rate": 1.7459505725935282e-06,
|
|
"loss": 0.737459123134613,
|
|
"step": 2861
|
|
},
|
|
{
|
|
"epoch": 2.2606635071090047,
|
|
"grad_norm": 8.550440775085379,
|
|
"learning_rate": 1.7424627359068418e-06,
|
|
"loss": 0.5273802280426025,
|
|
"step": 2862
|
|
},
|
|
{
|
|
"epoch": 2.2614533965244865,
|
|
"grad_norm": 17.591370161982017,
|
|
"learning_rate": 1.738977651198363e-06,
|
|
"loss": 0.33475935459136963,
|
|
"step": 2863
|
|
},
|
|
{
|
|
"epoch": 2.2622432859399684,
|
|
"grad_norm": 18.24717692489189,
|
|
"learning_rate": 1.7354953214123033e-06,
|
|
"loss": 0.6660683751106262,
|
|
"step": 2864
|
|
},
|
|
{
|
|
"epoch": 2.2630331753554502,
|
|
"grad_norm": 18.92375097618639,
|
|
"learning_rate": 1.732015749490546e-06,
|
|
"loss": 0.5045152902603149,
|
|
"step": 2865
|
|
},
|
|
{
|
|
"epoch": 2.263823064770932,
|
|
"grad_norm": 11.503325523793645,
|
|
"learning_rate": 1.7285389383726448e-06,
|
|
"loss": 0.4045574367046356,
|
|
"step": 2866
|
|
},
|
|
{
|
|
"epoch": 2.264612954186414,
|
|
"grad_norm": 6.49265889702697,
|
|
"learning_rate": 1.7250648909958218e-06,
|
|
"loss": 0.15654566884040833,
|
|
"step": 2867
|
|
},
|
|
{
|
|
"epoch": 2.265402843601896,
|
|
"grad_norm": 21.090448949983475,
|
|
"learning_rate": 1.7215936102949626e-06,
|
|
"loss": 0.48715391755104065,
|
|
"step": 2868
|
|
},
|
|
{
|
|
"epoch": 2.2661927330173777,
|
|
"grad_norm": 16.416122390825365,
|
|
"learning_rate": 1.7181250992026177e-06,
|
|
"loss": 0.43794485926628113,
|
|
"step": 2869
|
|
},
|
|
{
|
|
"epoch": 2.2669826224328595,
|
|
"grad_norm": 9.994750391150157,
|
|
"learning_rate": 1.7146593606489958e-06,
|
|
"loss": 0.24812400341033936,
|
|
"step": 2870
|
|
},
|
|
{
|
|
"epoch": 2.2677725118483414,
|
|
"grad_norm": 12.269825935627136,
|
|
"learning_rate": 1.7111963975619644e-06,
|
|
"loss": 0.3631921410560608,
|
|
"step": 2871
|
|
},
|
|
{
|
|
"epoch": 2.2685624012638232,
|
|
"grad_norm": 9.202619856134907,
|
|
"learning_rate": 1.7077362128670471e-06,
|
|
"loss": 0.402981162071228,
|
|
"step": 2872
|
|
},
|
|
{
|
|
"epoch": 2.269352290679305,
|
|
"grad_norm": 15.450749001959284,
|
|
"learning_rate": 1.7042788094874162e-06,
|
|
"loss": 0.4597552418708801,
|
|
"step": 2873
|
|
},
|
|
{
|
|
"epoch": 2.270142180094787,
|
|
"grad_norm": 14.523705053291398,
|
|
"learning_rate": 1.7008241903439032e-06,
|
|
"loss": 0.8005387783050537,
|
|
"step": 2874
|
|
},
|
|
{
|
|
"epoch": 2.2709320695102684,
|
|
"grad_norm": 15.590177412312109,
|
|
"learning_rate": 1.6973723583549773e-06,
|
|
"loss": 0.9203214645385742,
|
|
"step": 2875
|
|
},
|
|
{
|
|
"epoch": 2.2717219589257502,
|
|
"grad_norm": 10.511158290122776,
|
|
"learning_rate": 1.693923316436758e-06,
|
|
"loss": 0.4327046573162079,
|
|
"step": 2876
|
|
},
|
|
{
|
|
"epoch": 2.272511848341232,
|
|
"grad_norm": 8.438221638698291,
|
|
"learning_rate": 1.690477067503008e-06,
|
|
"loss": 0.5756062269210815,
|
|
"step": 2877
|
|
},
|
|
{
|
|
"epoch": 2.273301737756714,
|
|
"grad_norm": 10.785045606484534,
|
|
"learning_rate": 1.6870336144651279e-06,
|
|
"loss": 0.4346384108066559,
|
|
"step": 2878
|
|
},
|
|
{
|
|
"epoch": 2.274091627172196,
|
|
"grad_norm": 23.099471736106224,
|
|
"learning_rate": 1.6835929602321632e-06,
|
|
"loss": 1.036636233329773,
|
|
"step": 2879
|
|
},
|
|
{
|
|
"epoch": 2.2748815165876777,
|
|
"grad_norm": 9.814952316628542,
|
|
"learning_rate": 1.6801551077107846e-06,
|
|
"loss": 0.35150665044784546,
|
|
"step": 2880
|
|
},
|
|
{
|
|
"epoch": 2.2756714060031595,
|
|
"grad_norm": 8.217909844120667,
|
|
"learning_rate": 1.676720059805304e-06,
|
|
"loss": 0.39682185649871826,
|
|
"step": 2881
|
|
},
|
|
{
|
|
"epoch": 2.2764612954186414,
|
|
"grad_norm": 10.897341078417234,
|
|
"learning_rate": 1.6732878194176583e-06,
|
|
"loss": 0.7163082361221313,
|
|
"step": 2882
|
|
},
|
|
{
|
|
"epoch": 2.2772511848341233,
|
|
"grad_norm": 9.957341822543633,
|
|
"learning_rate": 1.6698583894474191e-06,
|
|
"loss": 0.30133479833602905,
|
|
"step": 2883
|
|
},
|
|
{
|
|
"epoch": 2.278041074249605,
|
|
"grad_norm": 13.035107378741438,
|
|
"learning_rate": 1.6664317727917783e-06,
|
|
"loss": 0.35737180709838867,
|
|
"step": 2884
|
|
},
|
|
{
|
|
"epoch": 2.278830963665087,
|
|
"grad_norm": 7.354777374068423,
|
|
"learning_rate": 1.6630079723455555e-06,
|
|
"loss": 0.44452938437461853,
|
|
"step": 2885
|
|
},
|
|
{
|
|
"epoch": 2.279620853080569,
|
|
"grad_norm": 15.178155573408157,
|
|
"learning_rate": 1.6595869910011847e-06,
|
|
"loss": 0.4684889614582062,
|
|
"step": 2886
|
|
},
|
|
{
|
|
"epoch": 2.2804107424960507,
|
|
"grad_norm": 13.101785923414612,
|
|
"learning_rate": 1.6561688316487218e-06,
|
|
"loss": 0.5309923887252808,
|
|
"step": 2887
|
|
},
|
|
{
|
|
"epoch": 2.2812006319115326,
|
|
"grad_norm": 18.864692186332565,
|
|
"learning_rate": 1.652753497175843e-06,
|
|
"loss": 0.44382137060165405,
|
|
"step": 2888
|
|
},
|
|
{
|
|
"epoch": 2.2819905213270144,
|
|
"grad_norm": 11.720692846924454,
|
|
"learning_rate": 1.649340990467832e-06,
|
|
"loss": 0.5143547058105469,
|
|
"step": 2889
|
|
},
|
|
{
|
|
"epoch": 2.282780410742496,
|
|
"grad_norm": 7.264023340638442,
|
|
"learning_rate": 1.6459313144075879e-06,
|
|
"loss": 0.5263423323631287,
|
|
"step": 2890
|
|
},
|
|
{
|
|
"epoch": 2.2835703001579777,
|
|
"grad_norm": 10.926449502075325,
|
|
"learning_rate": 1.642524471875611e-06,
|
|
"loss": 0.39519965648651123,
|
|
"step": 2891
|
|
},
|
|
{
|
|
"epoch": 2.2843601895734595,
|
|
"grad_norm": 6.753518451586659,
|
|
"learning_rate": 1.6391204657500175e-06,
|
|
"loss": 0.2852955758571625,
|
|
"step": 2892
|
|
},
|
|
{
|
|
"epoch": 2.2851500789889414,
|
|
"grad_norm": 17.425208641245916,
|
|
"learning_rate": 1.6357192989065224e-06,
|
|
"loss": 0.6628504991531372,
|
|
"step": 2893
|
|
},
|
|
{
|
|
"epoch": 2.2859399684044233,
|
|
"grad_norm": 8.821318692301833,
|
|
"learning_rate": 1.632320974218442e-06,
|
|
"loss": 0.3219633102416992,
|
|
"step": 2894
|
|
},
|
|
{
|
|
"epoch": 2.286729857819905,
|
|
"grad_norm": 9.0269356351622,
|
|
"learning_rate": 1.6289254945566924e-06,
|
|
"loss": 0.4846106171607971,
|
|
"step": 2895
|
|
},
|
|
{
|
|
"epoch": 2.287519747235387,
|
|
"grad_norm": 13.754083021538255,
|
|
"learning_rate": 1.625532862789786e-06,
|
|
"loss": 0.6081640720367432,
|
|
"step": 2896
|
|
},
|
|
{
|
|
"epoch": 2.288309636650869,
|
|
"grad_norm": 6.98421162588746,
|
|
"learning_rate": 1.622143081783829e-06,
|
|
"loss": 0.3637077510356903,
|
|
"step": 2897
|
|
},
|
|
{
|
|
"epoch": 2.2890995260663507,
|
|
"grad_norm": 8.83511177282514,
|
|
"learning_rate": 1.6187561544025198e-06,
|
|
"loss": 0.9372393488883972,
|
|
"step": 2898
|
|
},
|
|
{
|
|
"epoch": 2.2898894154818326,
|
|
"grad_norm": 14.596564979975735,
|
|
"learning_rate": 1.6153720835071456e-06,
|
|
"loss": 0.5858408212661743,
|
|
"step": 2899
|
|
},
|
|
{
|
|
"epoch": 2.2906793048973144,
|
|
"grad_norm": 8.954576114923759,
|
|
"learning_rate": 1.6119908719565808e-06,
|
|
"loss": 0.6942223310470581,
|
|
"step": 2900
|
|
},
|
|
{
|
|
"epoch": 2.2914691943127963,
|
|
"grad_norm": 4.994809407224409,
|
|
"learning_rate": 1.6086125226072841e-06,
|
|
"loss": 0.10130809992551804,
|
|
"step": 2901
|
|
},
|
|
{
|
|
"epoch": 2.292259083728278,
|
|
"grad_norm": 13.657220031311276,
|
|
"learning_rate": 1.6052370383132959e-06,
|
|
"loss": 0.8644411563873291,
|
|
"step": 2902
|
|
},
|
|
{
|
|
"epoch": 2.29304897314376,
|
|
"grad_norm": 8.017431818886724,
|
|
"learning_rate": 1.601864421926237e-06,
|
|
"loss": 0.2293100655078888,
|
|
"step": 2903
|
|
},
|
|
{
|
|
"epoch": 2.293838862559242,
|
|
"grad_norm": 8.496986168821188,
|
|
"learning_rate": 1.5984946762953047e-06,
|
|
"loss": 0.29841262102127075,
|
|
"step": 2904
|
|
},
|
|
{
|
|
"epoch": 2.2946287519747237,
|
|
"grad_norm": 11.503821635042993,
|
|
"learning_rate": 1.5951278042672703e-06,
|
|
"loss": 0.2735556662082672,
|
|
"step": 2905
|
|
},
|
|
{
|
|
"epoch": 2.2954186413902056,
|
|
"grad_norm": 12.231463222308765,
|
|
"learning_rate": 1.5917638086864778e-06,
|
|
"loss": 0.22586968541145325,
|
|
"step": 2906
|
|
},
|
|
{
|
|
"epoch": 2.2962085308056874,
|
|
"grad_norm": 15.045555109566415,
|
|
"learning_rate": 1.5884026923948465e-06,
|
|
"loss": 0.6831101179122925,
|
|
"step": 2907
|
|
},
|
|
{
|
|
"epoch": 2.296998420221169,
|
|
"grad_norm": 9.987448369916713,
|
|
"learning_rate": 1.585044458231853e-06,
|
|
"loss": 0.4950708746910095,
|
|
"step": 2908
|
|
},
|
|
{
|
|
"epoch": 2.2977883096366507,
|
|
"grad_norm": 12.921655770502397,
|
|
"learning_rate": 1.5816891090345465e-06,
|
|
"loss": 0.4142148494720459,
|
|
"step": 2909
|
|
},
|
|
{
|
|
"epoch": 2.2985781990521326,
|
|
"grad_norm": 11.818337429892228,
|
|
"learning_rate": 1.5783366476375355e-06,
|
|
"loss": 0.7360016703605652,
|
|
"step": 2910
|
|
},
|
|
{
|
|
"epoch": 2.2993680884676144,
|
|
"grad_norm": 11.946060281453493,
|
|
"learning_rate": 1.5749870768729942e-06,
|
|
"loss": 0.43478429317474365,
|
|
"step": 2911
|
|
},
|
|
{
|
|
"epoch": 2.3001579778830963,
|
|
"grad_norm": 12.943633330633444,
|
|
"learning_rate": 1.5716403995706504e-06,
|
|
"loss": 0.3442850112915039,
|
|
"step": 2912
|
|
},
|
|
{
|
|
"epoch": 2.300947867298578,
|
|
"grad_norm": 13.412914057941535,
|
|
"learning_rate": 1.5682966185577846e-06,
|
|
"loss": 0.4648604989051819,
|
|
"step": 2913
|
|
},
|
|
{
|
|
"epoch": 2.30173775671406,
|
|
"grad_norm": 7.870032588866806,
|
|
"learning_rate": 1.564955736659236e-06,
|
|
"loss": 0.25151491165161133,
|
|
"step": 2914
|
|
},
|
|
{
|
|
"epoch": 2.302527646129542,
|
|
"grad_norm": 12.706146933959786,
|
|
"learning_rate": 1.561617756697391e-06,
|
|
"loss": 0.7445260882377625,
|
|
"step": 2915
|
|
},
|
|
{
|
|
"epoch": 2.3033175355450237,
|
|
"grad_norm": 14.267775682697732,
|
|
"learning_rate": 1.5582826814921898e-06,
|
|
"loss": 0.4260343313217163,
|
|
"step": 2916
|
|
},
|
|
{
|
|
"epoch": 2.3041074249605056,
|
|
"grad_norm": 12.34278465900189,
|
|
"learning_rate": 1.5549505138611126e-06,
|
|
"loss": 0.27285605669021606,
|
|
"step": 2917
|
|
},
|
|
{
|
|
"epoch": 2.3048973143759874,
|
|
"grad_norm": 8.37869237695817,
|
|
"learning_rate": 1.5516212566191874e-06,
|
|
"loss": 0.24186520278453827,
|
|
"step": 2918
|
|
},
|
|
{
|
|
"epoch": 2.3056872037914693,
|
|
"grad_norm": 16.50332542092082,
|
|
"learning_rate": 1.5482949125789765e-06,
|
|
"loss": 0.7350625395774841,
|
|
"step": 2919
|
|
},
|
|
{
|
|
"epoch": 2.306477093206951,
|
|
"grad_norm": 9.451218272417687,
|
|
"learning_rate": 1.5449714845505919e-06,
|
|
"loss": 0.48754340410232544,
|
|
"step": 2920
|
|
},
|
|
{
|
|
"epoch": 2.307266982622433,
|
|
"grad_norm": 8.596522267263161,
|
|
"learning_rate": 1.5416509753416742e-06,
|
|
"loss": 0.4277498126029968,
|
|
"step": 2921
|
|
},
|
|
{
|
|
"epoch": 2.308056872037915,
|
|
"grad_norm": 9.543021614361946,
|
|
"learning_rate": 1.5383333877574014e-06,
|
|
"loss": 0.7987942099571228,
|
|
"step": 2922
|
|
},
|
|
{
|
|
"epoch": 2.3088467614533963,
|
|
"grad_norm": 8.956197097997833,
|
|
"learning_rate": 1.5350187246004827e-06,
|
|
"loss": 0.3484036922454834,
|
|
"step": 2923
|
|
},
|
|
{
|
|
"epoch": 2.309636650868878,
|
|
"grad_norm": 14.554160691726453,
|
|
"learning_rate": 1.5317069886711527e-06,
|
|
"loss": 0.262870728969574,
|
|
"step": 2924
|
|
},
|
|
{
|
|
"epoch": 2.31042654028436,
|
|
"grad_norm": 10.028179610842129,
|
|
"learning_rate": 1.5283981827671817e-06,
|
|
"loss": 0.4316098093986511,
|
|
"step": 2925
|
|
},
|
|
{
|
|
"epoch": 2.311216429699842,
|
|
"grad_norm": 8.395938728397983,
|
|
"learning_rate": 1.525092309683857e-06,
|
|
"loss": 0.29740965366363525,
|
|
"step": 2926
|
|
},
|
|
{
|
|
"epoch": 2.3120063191153237,
|
|
"grad_norm": 13.787916527860657,
|
|
"learning_rate": 1.5217893722139927e-06,
|
|
"loss": 0.3893413841724396,
|
|
"step": 2927
|
|
},
|
|
{
|
|
"epoch": 2.3127962085308056,
|
|
"grad_norm": 9.912191066457186,
|
|
"learning_rate": 1.51848937314792e-06,
|
|
"loss": 0.32815465331077576,
|
|
"step": 2928
|
|
},
|
|
{
|
|
"epoch": 2.3135860979462874,
|
|
"grad_norm": 9.266231242165011,
|
|
"learning_rate": 1.5151923152734899e-06,
|
|
"loss": 0.3238118290901184,
|
|
"step": 2929
|
|
},
|
|
{
|
|
"epoch": 2.3143759873617693,
|
|
"grad_norm": 13.526154892749354,
|
|
"learning_rate": 1.511898201376067e-06,
|
|
"loss": 0.47903206944465637,
|
|
"step": 2930
|
|
},
|
|
{
|
|
"epoch": 2.315165876777251,
|
|
"grad_norm": 7.3178550699031915,
|
|
"learning_rate": 1.5086070342385301e-06,
|
|
"loss": 0.5913638472557068,
|
|
"step": 2931
|
|
},
|
|
{
|
|
"epoch": 2.315955766192733,
|
|
"grad_norm": 10.488519945078615,
|
|
"learning_rate": 1.5053188166412675e-06,
|
|
"loss": 0.4839520752429962,
|
|
"step": 2932
|
|
},
|
|
{
|
|
"epoch": 2.316745655608215,
|
|
"grad_norm": 8.255353237433672,
|
|
"learning_rate": 1.5020335513621765e-06,
|
|
"loss": 0.44816941022872925,
|
|
"step": 2933
|
|
},
|
|
{
|
|
"epoch": 2.3175355450236967,
|
|
"grad_norm": 9.243776086558636,
|
|
"learning_rate": 1.4987512411766597e-06,
|
|
"loss": 0.2721986472606659,
|
|
"step": 2934
|
|
},
|
|
{
|
|
"epoch": 2.3183254344391786,
|
|
"grad_norm": 10.746251103777091,
|
|
"learning_rate": 1.4954718888576247e-06,
|
|
"loss": 0.3570769131183624,
|
|
"step": 2935
|
|
},
|
|
{
|
|
"epoch": 2.3191153238546605,
|
|
"grad_norm": 8.639831387338281,
|
|
"learning_rate": 1.4921954971754783e-06,
|
|
"loss": 0.3641059994697571,
|
|
"step": 2936
|
|
},
|
|
{
|
|
"epoch": 2.3199052132701423,
|
|
"grad_norm": 11.092618448863458,
|
|
"learning_rate": 1.4889220688981265e-06,
|
|
"loss": 0.2935643196105957,
|
|
"step": 2937
|
|
},
|
|
{
|
|
"epoch": 2.320695102685624,
|
|
"grad_norm": 12.23607825667408,
|
|
"learning_rate": 1.4856516067909715e-06,
|
|
"loss": 0.6410992741584778,
|
|
"step": 2938
|
|
},
|
|
{
|
|
"epoch": 2.321484992101106,
|
|
"grad_norm": 16.623306809469454,
|
|
"learning_rate": 1.4823841136169132e-06,
|
|
"loss": 1.6279677152633667,
|
|
"step": 2939
|
|
},
|
|
{
|
|
"epoch": 2.322274881516588,
|
|
"grad_norm": 12.421816386616667,
|
|
"learning_rate": 1.479119592136341e-06,
|
|
"loss": 0.5754894018173218,
|
|
"step": 2940
|
|
},
|
|
{
|
|
"epoch": 2.3230647709320698,
|
|
"grad_norm": 8.991638214444569,
|
|
"learning_rate": 1.4758580451071303e-06,
|
|
"loss": 0.5960466861724854,
|
|
"step": 2941
|
|
},
|
|
{
|
|
"epoch": 2.323854660347551,
|
|
"grad_norm": 12.162545193825485,
|
|
"learning_rate": 1.4725994752846473e-06,
|
|
"loss": 0.3044765889644623,
|
|
"step": 2942
|
|
},
|
|
{
|
|
"epoch": 2.324644549763033,
|
|
"grad_norm": 19.440244075455958,
|
|
"learning_rate": 1.4693438854217423e-06,
|
|
"loss": 0.34671056270599365,
|
|
"step": 2943
|
|
},
|
|
{
|
|
"epoch": 2.325434439178515,
|
|
"grad_norm": 11.430703439045205,
|
|
"learning_rate": 1.4660912782687508e-06,
|
|
"loss": 0.43449515104293823,
|
|
"step": 2944
|
|
},
|
|
{
|
|
"epoch": 2.3262243285939967,
|
|
"grad_norm": 10.157383176145405,
|
|
"learning_rate": 1.4628416565734859e-06,
|
|
"loss": 0.3315383791923523,
|
|
"step": 2945
|
|
},
|
|
{
|
|
"epoch": 2.3270142180094786,
|
|
"grad_norm": 17.3305490576594,
|
|
"learning_rate": 1.4595950230812362e-06,
|
|
"loss": 0.888778567314148,
|
|
"step": 2946
|
|
},
|
|
{
|
|
"epoch": 2.3278041074249605,
|
|
"grad_norm": 16.32977159230478,
|
|
"learning_rate": 1.4563513805347672e-06,
|
|
"loss": 0.8644918203353882,
|
|
"step": 2947
|
|
},
|
|
{
|
|
"epoch": 2.3285939968404423,
|
|
"grad_norm": 14.638072535644602,
|
|
"learning_rate": 1.4531107316743231e-06,
|
|
"loss": 0.47349095344543457,
|
|
"step": 2948
|
|
},
|
|
{
|
|
"epoch": 2.329383886255924,
|
|
"grad_norm": 9.49689056502259,
|
|
"learning_rate": 1.4498730792376126e-06,
|
|
"loss": 0.5828062295913696,
|
|
"step": 2949
|
|
},
|
|
{
|
|
"epoch": 2.330173775671406,
|
|
"grad_norm": 11.520314966686117,
|
|
"learning_rate": 1.446638425959817e-06,
|
|
"loss": 0.3956340551376343,
|
|
"step": 2950
|
|
},
|
|
{
|
|
"epoch": 2.330963665086888,
|
|
"grad_norm": 8.269907045653584,
|
|
"learning_rate": 1.4434067745735792e-06,
|
|
"loss": 0.2273710072040558,
|
|
"step": 2951
|
|
},
|
|
{
|
|
"epoch": 2.3317535545023698,
|
|
"grad_norm": 19.223537730273925,
|
|
"learning_rate": 1.4401781278090077e-06,
|
|
"loss": 0.2646605968475342,
|
|
"step": 2952
|
|
},
|
|
{
|
|
"epoch": 2.3325434439178516,
|
|
"grad_norm": 7.308582717188896,
|
|
"learning_rate": 1.436952488393678e-06,
|
|
"loss": 0.25829195976257324,
|
|
"step": 2953
|
|
},
|
|
{
|
|
"epoch": 2.3333333333333335,
|
|
"grad_norm": 9.018243729685395,
|
|
"learning_rate": 1.4337298590526193e-06,
|
|
"loss": 0.6489487290382385,
|
|
"step": 2954
|
|
},
|
|
{
|
|
"epoch": 2.3341232227488153,
|
|
"grad_norm": 11.471038810226908,
|
|
"learning_rate": 1.4305102425083195e-06,
|
|
"loss": 0.23926126956939697,
|
|
"step": 2955
|
|
},
|
|
{
|
|
"epoch": 2.3349131121642968,
|
|
"grad_norm": 8.516369253631089,
|
|
"learning_rate": 1.4272936414807215e-06,
|
|
"loss": 0.7570828199386597,
|
|
"step": 2956
|
|
},
|
|
{
|
|
"epoch": 2.3357030015797786,
|
|
"grad_norm": 9.3142967522863,
|
|
"learning_rate": 1.4240800586872194e-06,
|
|
"loss": 0.9574933052062988,
|
|
"step": 2957
|
|
},
|
|
{
|
|
"epoch": 2.3364928909952605,
|
|
"grad_norm": 18.891161628854636,
|
|
"learning_rate": 1.4208694968426594e-06,
|
|
"loss": 1.0689260959625244,
|
|
"step": 2958
|
|
},
|
|
{
|
|
"epoch": 2.3372827804107423,
|
|
"grad_norm": 11.717390658258914,
|
|
"learning_rate": 1.4176619586593343e-06,
|
|
"loss": 0.5372721552848816,
|
|
"step": 2959
|
|
},
|
|
{
|
|
"epoch": 2.338072669826224,
|
|
"grad_norm": 12.175691953414255,
|
|
"learning_rate": 1.4144574468469836e-06,
|
|
"loss": 1.1094560623168945,
|
|
"step": 2960
|
|
},
|
|
{
|
|
"epoch": 2.338862559241706,
|
|
"grad_norm": 10.010738282134813,
|
|
"learning_rate": 1.4112559641127882e-06,
|
|
"loss": 0.3446424603462219,
|
|
"step": 2961
|
|
},
|
|
{
|
|
"epoch": 2.339652448657188,
|
|
"grad_norm": 13.582629438685526,
|
|
"learning_rate": 1.4080575131613727e-06,
|
|
"loss": 0.3784876763820648,
|
|
"step": 2962
|
|
},
|
|
{
|
|
"epoch": 2.3404423380726698,
|
|
"grad_norm": 12.572883408640878,
|
|
"learning_rate": 1.4048620966947979e-06,
|
|
"loss": 0.206849604845047,
|
|
"step": 2963
|
|
},
|
|
{
|
|
"epoch": 2.3412322274881516,
|
|
"grad_norm": 11.77936634392298,
|
|
"learning_rate": 1.4016697174125627e-06,
|
|
"loss": 0.41492050886154175,
|
|
"step": 2964
|
|
},
|
|
{
|
|
"epoch": 2.3420221169036335,
|
|
"grad_norm": 13.508111308050823,
|
|
"learning_rate": 1.3984803780115997e-06,
|
|
"loss": 0.5502463579177856,
|
|
"step": 2965
|
|
},
|
|
{
|
|
"epoch": 2.3428120063191153,
|
|
"grad_norm": 9.14917705724563,
|
|
"learning_rate": 1.3952940811862715e-06,
|
|
"loss": 0.31409573554992676,
|
|
"step": 2966
|
|
},
|
|
{
|
|
"epoch": 2.343601895734597,
|
|
"grad_norm": 12.299355989140265,
|
|
"learning_rate": 1.3921108296283765e-06,
|
|
"loss": 0.562318742275238,
|
|
"step": 2967
|
|
},
|
|
{
|
|
"epoch": 2.344391785150079,
|
|
"grad_norm": 10.934243215627218,
|
|
"learning_rate": 1.3889306260271318e-06,
|
|
"loss": 0.4536178708076477,
|
|
"step": 2968
|
|
},
|
|
{
|
|
"epoch": 2.345181674565561,
|
|
"grad_norm": 8.937500050751812,
|
|
"learning_rate": 1.3857534730691857e-06,
|
|
"loss": 0.34480780363082886,
|
|
"step": 2969
|
|
},
|
|
{
|
|
"epoch": 2.345971563981043,
|
|
"grad_norm": 14.281522624319098,
|
|
"learning_rate": 1.3825793734386074e-06,
|
|
"loss": 1.0019593238830566,
|
|
"step": 2970
|
|
},
|
|
{
|
|
"epoch": 2.3467614533965246,
|
|
"grad_norm": 12.232172084092497,
|
|
"learning_rate": 1.3794083298168848e-06,
|
|
"loss": 0.8148961663246155,
|
|
"step": 2971
|
|
},
|
|
{
|
|
"epoch": 2.3475513428120065,
|
|
"grad_norm": 14.943915817149255,
|
|
"learning_rate": 1.376240344882931e-06,
|
|
"loss": 0.294752836227417,
|
|
"step": 2972
|
|
},
|
|
{
|
|
"epoch": 2.3483412322274884,
|
|
"grad_norm": 10.941172869384477,
|
|
"learning_rate": 1.3730754213130648e-06,
|
|
"loss": 0.793616533279419,
|
|
"step": 2973
|
|
},
|
|
{
|
|
"epoch": 2.34913112164297,
|
|
"grad_norm": 19.66512269635939,
|
|
"learning_rate": 1.3699135617810266e-06,
|
|
"loss": 0.858728289604187,
|
|
"step": 2974
|
|
},
|
|
{
|
|
"epoch": 2.3499210110584516,
|
|
"grad_norm": 12.311189719186588,
|
|
"learning_rate": 1.3667547689579636e-06,
|
|
"loss": 0.5237981677055359,
|
|
"step": 2975
|
|
},
|
|
{
|
|
"epoch": 2.3507109004739335,
|
|
"grad_norm": 27.368898959383774,
|
|
"learning_rate": 1.3635990455124371e-06,
|
|
"loss": 0.45691031217575073,
|
|
"step": 2976
|
|
},
|
|
{
|
|
"epoch": 2.3515007898894154,
|
|
"grad_norm": 12.118450870594108,
|
|
"learning_rate": 1.3604463941104119e-06,
|
|
"loss": 0.3164505660533905,
|
|
"step": 2977
|
|
},
|
|
{
|
|
"epoch": 2.352290679304897,
|
|
"grad_norm": 10.789370739328524,
|
|
"learning_rate": 1.3572968174152595e-06,
|
|
"loss": 0.6960369348526001,
|
|
"step": 2978
|
|
},
|
|
{
|
|
"epoch": 2.353080568720379,
|
|
"grad_norm": 8.417646546087006,
|
|
"learning_rate": 1.3541503180877497e-06,
|
|
"loss": 0.45459938049316406,
|
|
"step": 2979
|
|
},
|
|
{
|
|
"epoch": 2.353870458135861,
|
|
"grad_norm": 10.279601142928406,
|
|
"learning_rate": 1.3510068987860554e-06,
|
|
"loss": 0.38531941175460815,
|
|
"step": 2980
|
|
},
|
|
{
|
|
"epoch": 2.354660347551343,
|
|
"grad_norm": 10.475719668500435,
|
|
"learning_rate": 1.3478665621657505e-06,
|
|
"loss": 0.3190561532974243,
|
|
"step": 2981
|
|
},
|
|
{
|
|
"epoch": 2.3554502369668247,
|
|
"grad_norm": 12.300957016080215,
|
|
"learning_rate": 1.3447293108798005e-06,
|
|
"loss": 0.6847068667411804,
|
|
"step": 2982
|
|
},
|
|
{
|
|
"epoch": 2.3562401263823065,
|
|
"grad_norm": 18.557938231221407,
|
|
"learning_rate": 1.3415951475785666e-06,
|
|
"loss": 0.6536878347396851,
|
|
"step": 2983
|
|
},
|
|
{
|
|
"epoch": 2.3570300157977884,
|
|
"grad_norm": 9.51452055614094,
|
|
"learning_rate": 1.338464074909796e-06,
|
|
"loss": 1.2326545715332031,
|
|
"step": 2984
|
|
},
|
|
{
|
|
"epoch": 2.3578199052132702,
|
|
"grad_norm": 7.117469162686578,
|
|
"learning_rate": 1.3353360955186346e-06,
|
|
"loss": 0.2746868431568146,
|
|
"step": 2985
|
|
},
|
|
{
|
|
"epoch": 2.358609794628752,
|
|
"grad_norm": 8.473831273234493,
|
|
"learning_rate": 1.3322112120476066e-06,
|
|
"loss": 0.7424836754798889,
|
|
"step": 2986
|
|
},
|
|
{
|
|
"epoch": 2.359399684044234,
|
|
"grad_norm": 8.174865295399881,
|
|
"learning_rate": 1.3290894271366251e-06,
|
|
"loss": 0.26962894201278687,
|
|
"step": 2987
|
|
},
|
|
{
|
|
"epoch": 2.360189573459716,
|
|
"grad_norm": 9.674308066012069,
|
|
"learning_rate": 1.3259707434229834e-06,
|
|
"loss": 0.24160385131835938,
|
|
"step": 2988
|
|
},
|
|
{
|
|
"epoch": 2.3609794628751977,
|
|
"grad_norm": 15.484285096363958,
|
|
"learning_rate": 1.3228551635413567e-06,
|
|
"loss": 0.2914643883705139,
|
|
"step": 2989
|
|
},
|
|
{
|
|
"epoch": 2.361769352290679,
|
|
"grad_norm": 6.8344064852954425,
|
|
"learning_rate": 1.3197426901237965e-06,
|
|
"loss": 0.6412686109542847,
|
|
"step": 2990
|
|
},
|
|
{
|
|
"epoch": 2.362559241706161,
|
|
"grad_norm": 10.510405239917262,
|
|
"learning_rate": 1.3166333257997305e-06,
|
|
"loss": 0.23676377534866333,
|
|
"step": 2991
|
|
},
|
|
{
|
|
"epoch": 2.363349131121643,
|
|
"grad_norm": 18.178644105487813,
|
|
"learning_rate": 1.3135270731959599e-06,
|
|
"loss": 0.6876839995384216,
|
|
"step": 2992
|
|
},
|
|
{
|
|
"epoch": 2.3641390205371247,
|
|
"grad_norm": 14.269241469381564,
|
|
"learning_rate": 1.3104239349366577e-06,
|
|
"loss": 0.4241482615470886,
|
|
"step": 2993
|
|
},
|
|
{
|
|
"epoch": 2.3649289099526065,
|
|
"grad_norm": 12.211602110609782,
|
|
"learning_rate": 1.3073239136433651e-06,
|
|
"loss": 0.5298961997032166,
|
|
"step": 2994
|
|
},
|
|
{
|
|
"epoch": 2.3657187993680884,
|
|
"grad_norm": 8.26732985074683,
|
|
"learning_rate": 1.3042270119349903e-06,
|
|
"loss": 0.5399200320243835,
|
|
"step": 2995
|
|
},
|
|
{
|
|
"epoch": 2.3665086887835702,
|
|
"grad_norm": 12.593489559566338,
|
|
"learning_rate": 1.3011332324278054e-06,
|
|
"loss": 0.28332608938217163,
|
|
"step": 2996
|
|
},
|
|
{
|
|
"epoch": 2.367298578199052,
|
|
"grad_norm": 13.460677136621092,
|
|
"learning_rate": 1.2980425777354466e-06,
|
|
"loss": 0.8239960670471191,
|
|
"step": 2997
|
|
},
|
|
{
|
|
"epoch": 2.368088467614534,
|
|
"grad_norm": 11.353725107540342,
|
|
"learning_rate": 1.2949550504689084e-06,
|
|
"loss": 0.3334651589393616,
|
|
"step": 2998
|
|
},
|
|
{
|
|
"epoch": 2.368878357030016,
|
|
"grad_norm": 10.254492835832023,
|
|
"learning_rate": 1.2918706532365427e-06,
|
|
"loss": 0.3909056782722473,
|
|
"step": 2999
|
|
},
|
|
{
|
|
"epoch": 2.3696682464454977,
|
|
"grad_norm": 14.050213628495184,
|
|
"learning_rate": 1.2887893886440634e-06,
|
|
"loss": 0.42461708188056946,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 2.3704581358609795,
|
|
"grad_norm": 9.514180124670807,
|
|
"learning_rate": 1.2857112592945277e-06,
|
|
"loss": 0.3221348822116852,
|
|
"step": 3001
|
|
},
|
|
{
|
|
"epoch": 2.3712480252764614,
|
|
"grad_norm": 13.534122595465016,
|
|
"learning_rate": 1.2826362677883509e-06,
|
|
"loss": 0.31083306670188904,
|
|
"step": 3002
|
|
},
|
|
{
|
|
"epoch": 2.3720379146919433,
|
|
"grad_norm": 10.83312836661239,
|
|
"learning_rate": 1.279564416723295e-06,
|
|
"loss": 0.6842789649963379,
|
|
"step": 3003
|
|
},
|
|
{
|
|
"epoch": 2.372827804107425,
|
|
"grad_norm": 13.613162216619145,
|
|
"learning_rate": 1.2764957086944729e-06,
|
|
"loss": 0.2826170325279236,
|
|
"step": 3004
|
|
},
|
|
{
|
|
"epoch": 2.373617693522907,
|
|
"grad_norm": 7.054250032981525,
|
|
"learning_rate": 1.2734301462943393e-06,
|
|
"loss": 0.3503812253475189,
|
|
"step": 3005
|
|
},
|
|
{
|
|
"epoch": 2.374407582938389,
|
|
"grad_norm": 23.530290090188814,
|
|
"learning_rate": 1.2703677321126878e-06,
|
|
"loss": 0.9787487983703613,
|
|
"step": 3006
|
|
},
|
|
{
|
|
"epoch": 2.3751974723538707,
|
|
"grad_norm": 9.116900547516696,
|
|
"learning_rate": 1.2673084687366577e-06,
|
|
"loss": 0.5961554050445557,
|
|
"step": 3007
|
|
},
|
|
{
|
|
"epoch": 2.375987361769352,
|
|
"grad_norm": 12.370457479829778,
|
|
"learning_rate": 1.264252358750724e-06,
|
|
"loss": 0.4279392957687378,
|
|
"step": 3008
|
|
},
|
|
{
|
|
"epoch": 2.376777251184834,
|
|
"grad_norm": 16.664525922372817,
|
|
"learning_rate": 1.2611994047367004e-06,
|
|
"loss": 2.003368854522705,
|
|
"step": 3009
|
|
},
|
|
{
|
|
"epoch": 2.377567140600316,
|
|
"grad_norm": 9.89227361650676,
|
|
"learning_rate": 1.2581496092737315e-06,
|
|
"loss": 0.36015745997428894,
|
|
"step": 3010
|
|
},
|
|
{
|
|
"epoch": 2.3783570300157977,
|
|
"grad_norm": 7.456303625403205,
|
|
"learning_rate": 1.2551029749382966e-06,
|
|
"loss": 0.38664817810058594,
|
|
"step": 3011
|
|
},
|
|
{
|
|
"epoch": 2.3791469194312795,
|
|
"grad_norm": 10.16486281921813,
|
|
"learning_rate": 1.2520595043041967e-06,
|
|
"loss": 0.1998920887708664,
|
|
"step": 3012
|
|
},
|
|
{
|
|
"epoch": 2.3799368088467614,
|
|
"grad_norm": 12.477795753447527,
|
|
"learning_rate": 1.249019199942571e-06,
|
|
"loss": 0.5245856046676636,
|
|
"step": 3013
|
|
},
|
|
{
|
|
"epoch": 2.3807266982622433,
|
|
"grad_norm": 15.497688569467634,
|
|
"learning_rate": 1.2459820644218772e-06,
|
|
"loss": 0.5648780465126038,
|
|
"step": 3014
|
|
},
|
|
{
|
|
"epoch": 2.381516587677725,
|
|
"grad_norm": 9.766778610490318,
|
|
"learning_rate": 1.242948100307898e-06,
|
|
"loss": 0.3348008990287781,
|
|
"step": 3015
|
|
},
|
|
{
|
|
"epoch": 2.382306477093207,
|
|
"grad_norm": 9.459152445457095,
|
|
"learning_rate": 1.2399173101637362e-06,
|
|
"loss": 0.28159070014953613,
|
|
"step": 3016
|
|
},
|
|
{
|
|
"epoch": 2.383096366508689,
|
|
"grad_norm": 10.550046979381259,
|
|
"learning_rate": 1.23688969654981e-06,
|
|
"loss": 0.5117573738098145,
|
|
"step": 3017
|
|
},
|
|
{
|
|
"epoch": 2.3838862559241707,
|
|
"grad_norm": 15.689874055565582,
|
|
"learning_rate": 1.2338652620238617e-06,
|
|
"loss": 0.40390482544898987,
|
|
"step": 3018
|
|
},
|
|
{
|
|
"epoch": 2.3846761453396526,
|
|
"grad_norm": 13.629010440932046,
|
|
"learning_rate": 1.2308440091409418e-06,
|
|
"loss": 0.255649209022522,
|
|
"step": 3019
|
|
},
|
|
{
|
|
"epoch": 2.3854660347551344,
|
|
"grad_norm": 12.962233890538123,
|
|
"learning_rate": 1.2278259404534148e-06,
|
|
"loss": 0.7356714010238647,
|
|
"step": 3020
|
|
},
|
|
{
|
|
"epoch": 2.3862559241706163,
|
|
"grad_norm": 12.685586645489847,
|
|
"learning_rate": 1.2248110585109546e-06,
|
|
"loss": 0.30903008580207825,
|
|
"step": 3021
|
|
},
|
|
{
|
|
"epoch": 2.387045813586098,
|
|
"grad_norm": 9.857261027285547,
|
|
"learning_rate": 1.2217993658605442e-06,
|
|
"loss": 0.19482699036598206,
|
|
"step": 3022
|
|
},
|
|
{
|
|
"epoch": 2.3878357030015795,
|
|
"grad_norm": 10.203386869299976,
|
|
"learning_rate": 1.2187908650464713e-06,
|
|
"loss": 0.777467668056488,
|
|
"step": 3023
|
|
},
|
|
{
|
|
"epoch": 2.3886255924170614,
|
|
"grad_norm": 13.960859010293253,
|
|
"learning_rate": 1.2157855586103268e-06,
|
|
"loss": 0.42737501859664917,
|
|
"step": 3024
|
|
},
|
|
{
|
|
"epoch": 2.3894154818325433,
|
|
"grad_norm": 9.02890660772045,
|
|
"learning_rate": 1.212783449091004e-06,
|
|
"loss": 0.19785018265247345,
|
|
"step": 3025
|
|
},
|
|
{
|
|
"epoch": 2.390205371248025,
|
|
"grad_norm": 16.20027560463624,
|
|
"learning_rate": 1.2097845390246944e-06,
|
|
"loss": 0.4012932777404785,
|
|
"step": 3026
|
|
},
|
|
{
|
|
"epoch": 2.390995260663507,
|
|
"grad_norm": 13.935718162104166,
|
|
"learning_rate": 1.2067888309448872e-06,
|
|
"loss": 1.0133466720581055,
|
|
"step": 3027
|
|
},
|
|
{
|
|
"epoch": 2.391785150078989,
|
|
"grad_norm": 17.86275453891239,
|
|
"learning_rate": 1.2037963273823667e-06,
|
|
"loss": 0.7729724049568176,
|
|
"step": 3028
|
|
},
|
|
{
|
|
"epoch": 2.3925750394944707,
|
|
"grad_norm": 19.729430525861485,
|
|
"learning_rate": 1.2008070308652097e-06,
|
|
"loss": 0.6262521147727966,
|
|
"step": 3029
|
|
},
|
|
{
|
|
"epoch": 2.3933649289099526,
|
|
"grad_norm": 9.043064178522485,
|
|
"learning_rate": 1.1978209439187843e-06,
|
|
"loss": 0.33055561780929565,
|
|
"step": 3030
|
|
},
|
|
{
|
|
"epoch": 2.3941548183254344,
|
|
"grad_norm": 10.08632808389374,
|
|
"learning_rate": 1.194838069065744e-06,
|
|
"loss": 0.3754257559776306,
|
|
"step": 3031
|
|
},
|
|
{
|
|
"epoch": 2.3949447077409163,
|
|
"grad_norm": 16.377512511153483,
|
|
"learning_rate": 1.191858408826036e-06,
|
|
"loss": 0.7418103218078613,
|
|
"step": 3032
|
|
},
|
|
{
|
|
"epoch": 2.395734597156398,
|
|
"grad_norm": 13.891209536245656,
|
|
"learning_rate": 1.1888819657168832e-06,
|
|
"loss": 0.6630780100822449,
|
|
"step": 3033
|
|
},
|
|
{
|
|
"epoch": 2.39652448657188,
|
|
"grad_norm": 9.70934891051542,
|
|
"learning_rate": 1.185908742252796e-06,
|
|
"loss": 0.6031774878501892,
|
|
"step": 3034
|
|
},
|
|
{
|
|
"epoch": 2.397314375987362,
|
|
"grad_norm": 7.781623949513264,
|
|
"learning_rate": 1.1829387409455628e-06,
|
|
"loss": 0.3950078785419464,
|
|
"step": 3035
|
|
},
|
|
{
|
|
"epoch": 2.3981042654028437,
|
|
"grad_norm": 14.63680014242443,
|
|
"learning_rate": 1.1799719643042494e-06,
|
|
"loss": 1.1075937747955322,
|
|
"step": 3036
|
|
},
|
|
{
|
|
"epoch": 2.3988941548183256,
|
|
"grad_norm": 9.35495088180136,
|
|
"learning_rate": 1.1770084148352013e-06,
|
|
"loss": 0.5169080495834351,
|
|
"step": 3037
|
|
},
|
|
{
|
|
"epoch": 2.3996840442338074,
|
|
"grad_norm": 12.0333389179042,
|
|
"learning_rate": 1.1740480950420346e-06,
|
|
"loss": 0.6824804544448853,
|
|
"step": 3038
|
|
},
|
|
{
|
|
"epoch": 2.4004739336492893,
|
|
"grad_norm": 10.918655037249678,
|
|
"learning_rate": 1.1710910074256353e-06,
|
|
"loss": 0.21143901348114014,
|
|
"step": 3039
|
|
},
|
|
{
|
|
"epoch": 2.401263823064771,
|
|
"grad_norm": 12.024208404396846,
|
|
"learning_rate": 1.1681371544841596e-06,
|
|
"loss": 0.22546377778053284,
|
|
"step": 3040
|
|
},
|
|
{
|
|
"epoch": 2.402053712480253,
|
|
"grad_norm": 7.956119102031276,
|
|
"learning_rate": 1.165186538713035e-06,
|
|
"loss": 0.4426957070827484,
|
|
"step": 3041
|
|
},
|
|
{
|
|
"epoch": 2.4028436018957344,
|
|
"grad_norm": 14.526088548416308,
|
|
"learning_rate": 1.1622391626049512e-06,
|
|
"loss": 0.46725398302078247,
|
|
"step": 3042
|
|
},
|
|
{
|
|
"epoch": 2.4036334913112163,
|
|
"grad_norm": 15.015374102906868,
|
|
"learning_rate": 1.1592950286498617e-06,
|
|
"loss": 0.3960593044757843,
|
|
"step": 3043
|
|
},
|
|
{
|
|
"epoch": 2.404423380726698,
|
|
"grad_norm": 8.595894150813733,
|
|
"learning_rate": 1.156354139334978e-06,
|
|
"loss": 0.34023603796958923,
|
|
"step": 3044
|
|
},
|
|
{
|
|
"epoch": 2.40521327014218,
|
|
"grad_norm": 10.668250389515402,
|
|
"learning_rate": 1.153416497144773e-06,
|
|
"loss": 0.29296159744262695,
|
|
"step": 3045
|
|
},
|
|
{
|
|
"epoch": 2.406003159557662,
|
|
"grad_norm": 12.422225116517867,
|
|
"learning_rate": 1.1504821045609793e-06,
|
|
"loss": 0.3671707510948181,
|
|
"step": 3046
|
|
},
|
|
{
|
|
"epoch": 2.4067930489731437,
|
|
"grad_norm": 10.146132230382701,
|
|
"learning_rate": 1.1475509640625803e-06,
|
|
"loss": 0.3341038227081299,
|
|
"step": 3047
|
|
},
|
|
{
|
|
"epoch": 2.4075829383886256,
|
|
"grad_norm": 15.595249640220638,
|
|
"learning_rate": 1.1446230781258126e-06,
|
|
"loss": 0.4406832456588745,
|
|
"step": 3048
|
|
},
|
|
{
|
|
"epoch": 2.4083728278041074,
|
|
"grad_norm": 15.105385899136605,
|
|
"learning_rate": 1.1416984492241651e-06,
|
|
"loss": 0.3876189589500427,
|
|
"step": 3049
|
|
},
|
|
{
|
|
"epoch": 2.4091627172195893,
|
|
"grad_norm": 12.938838417300977,
|
|
"learning_rate": 1.138777079828372e-06,
|
|
"loss": 0.3145609498023987,
|
|
"step": 3050
|
|
},
|
|
{
|
|
"epoch": 2.409952606635071,
|
|
"grad_norm": 8.358585121545957,
|
|
"learning_rate": 1.1358589724064172e-06,
|
|
"loss": 0.3923751711845398,
|
|
"step": 3051
|
|
},
|
|
{
|
|
"epoch": 2.410742496050553,
|
|
"grad_norm": 12.197191053772348,
|
|
"learning_rate": 1.1329441294235271e-06,
|
|
"loss": 0.4876922369003296,
|
|
"step": 3052
|
|
},
|
|
{
|
|
"epoch": 2.411532385466035,
|
|
"grad_norm": 22.74826926426158,
|
|
"learning_rate": 1.1300325533421708e-06,
|
|
"loss": 1.1759089231491089,
|
|
"step": 3053
|
|
},
|
|
{
|
|
"epoch": 2.4123222748815167,
|
|
"grad_norm": 9.507426051564869,
|
|
"learning_rate": 1.1271242466220566e-06,
|
|
"loss": 0.7912863492965698,
|
|
"step": 3054
|
|
},
|
|
{
|
|
"epoch": 2.4131121642969986,
|
|
"grad_norm": 28.164313247406366,
|
|
"learning_rate": 1.1242192117201329e-06,
|
|
"loss": 2.359377384185791,
|
|
"step": 3055
|
|
},
|
|
{
|
|
"epoch": 2.41390205371248,
|
|
"grad_norm": 6.939852525178187,
|
|
"learning_rate": 1.1213174510905818e-06,
|
|
"loss": 0.1508610099554062,
|
|
"step": 3056
|
|
},
|
|
{
|
|
"epoch": 2.414691943127962,
|
|
"grad_norm": 8.079273130567692,
|
|
"learning_rate": 1.1184189671848205e-06,
|
|
"loss": 0.26179447770118713,
|
|
"step": 3057
|
|
},
|
|
{
|
|
"epoch": 2.4154818325434437,
|
|
"grad_norm": 8.63717491884411,
|
|
"learning_rate": 1.1155237624514975e-06,
|
|
"loss": 0.3997868001461029,
|
|
"step": 3058
|
|
},
|
|
{
|
|
"epoch": 2.4162717219589256,
|
|
"grad_norm": 12.57415659403395,
|
|
"learning_rate": 1.1126318393364905e-06,
|
|
"loss": 0.31383663415908813,
|
|
"step": 3059
|
|
},
|
|
{
|
|
"epoch": 2.4170616113744074,
|
|
"grad_norm": 10.339245467585227,
|
|
"learning_rate": 1.109743200282909e-06,
|
|
"loss": 0.3091076612472534,
|
|
"step": 3060
|
|
},
|
|
{
|
|
"epoch": 2.4178515007898893,
|
|
"grad_norm": 9.227956180667832,
|
|
"learning_rate": 1.106857847731081e-06,
|
|
"loss": 0.28240686655044556,
|
|
"step": 3061
|
|
},
|
|
{
|
|
"epoch": 2.418641390205371,
|
|
"grad_norm": 14.195925477576337,
|
|
"learning_rate": 1.1039757841185638e-06,
|
|
"loss": 0.58428555727005,
|
|
"step": 3062
|
|
},
|
|
{
|
|
"epoch": 2.419431279620853,
|
|
"grad_norm": 12.117805874205489,
|
|
"learning_rate": 1.1010970118801335e-06,
|
|
"loss": 0.36473608016967773,
|
|
"step": 3063
|
|
},
|
|
{
|
|
"epoch": 2.420221169036335,
|
|
"grad_norm": 8.135434340440062,
|
|
"learning_rate": 1.0982215334477852e-06,
|
|
"loss": 0.5204439163208008,
|
|
"step": 3064
|
|
},
|
|
{
|
|
"epoch": 2.4210110584518167,
|
|
"grad_norm": 11.478951662361853,
|
|
"learning_rate": 1.0953493512507369e-06,
|
|
"loss": 0.7073631882667542,
|
|
"step": 3065
|
|
},
|
|
{
|
|
"epoch": 2.4218009478672986,
|
|
"grad_norm": 8.874708226579822,
|
|
"learning_rate": 1.0924804677154132e-06,
|
|
"loss": 0.3390922546386719,
|
|
"step": 3066
|
|
},
|
|
{
|
|
"epoch": 2.4225908372827805,
|
|
"grad_norm": 8.771829165557099,
|
|
"learning_rate": 1.0896148852654576e-06,
|
|
"loss": 0.28562629222869873,
|
|
"step": 3067
|
|
},
|
|
{
|
|
"epoch": 2.4233807266982623,
|
|
"grad_norm": 12.196152581610507,
|
|
"learning_rate": 1.0867526063217225e-06,
|
|
"loss": 0.7520745396614075,
|
|
"step": 3068
|
|
},
|
|
{
|
|
"epoch": 2.424170616113744,
|
|
"grad_norm": 15.941013046283597,
|
|
"learning_rate": 1.0838936333022732e-06,
|
|
"loss": 0.7924416661262512,
|
|
"step": 3069
|
|
},
|
|
{
|
|
"epoch": 2.424960505529226,
|
|
"grad_norm": 16.208712282712163,
|
|
"learning_rate": 1.0810379686223782e-06,
|
|
"loss": 0.4997054934501648,
|
|
"step": 3070
|
|
},
|
|
{
|
|
"epoch": 2.425750394944708,
|
|
"grad_norm": 12.626153196177519,
|
|
"learning_rate": 1.0781856146945135e-06,
|
|
"loss": 0.31034407019615173,
|
|
"step": 3071
|
|
},
|
|
{
|
|
"epoch": 2.4265402843601898,
|
|
"grad_norm": 13.13529853421417,
|
|
"learning_rate": 1.075336573928355e-06,
|
|
"loss": 1.2663923501968384,
|
|
"step": 3072
|
|
},
|
|
{
|
|
"epoch": 2.4273301737756716,
|
|
"grad_norm": 11.619655409461624,
|
|
"learning_rate": 1.0724908487307812e-06,
|
|
"loss": 0.4666603207588196,
|
|
"step": 3073
|
|
},
|
|
{
|
|
"epoch": 2.4281200631911535,
|
|
"grad_norm": 11.29095734565805,
|
|
"learning_rate": 1.0696484415058732e-06,
|
|
"loss": 0.6091010570526123,
|
|
"step": 3074
|
|
},
|
|
{
|
|
"epoch": 2.428909952606635,
|
|
"grad_norm": 12.496992069841568,
|
|
"learning_rate": 1.066809354654904e-06,
|
|
"loss": 0.3018745183944702,
|
|
"step": 3075
|
|
},
|
|
{
|
|
"epoch": 2.4296998420221168,
|
|
"grad_norm": 10.57772764970064,
|
|
"learning_rate": 1.063973590576346e-06,
|
|
"loss": 0.7717863917350769,
|
|
"step": 3076
|
|
},
|
|
{
|
|
"epoch": 2.4304897314375986,
|
|
"grad_norm": 14.741370815856945,
|
|
"learning_rate": 1.0611411516658566e-06,
|
|
"loss": 0.47512930631637573,
|
|
"step": 3077
|
|
},
|
|
{
|
|
"epoch": 2.4312796208530805,
|
|
"grad_norm": 14.21167289494127,
|
|
"learning_rate": 1.0583120403162943e-06,
|
|
"loss": 0.6081647872924805,
|
|
"step": 3078
|
|
},
|
|
{
|
|
"epoch": 2.4320695102685623,
|
|
"grad_norm": 9.193770485318598,
|
|
"learning_rate": 1.0554862589177007e-06,
|
|
"loss": 0.34148019552230835,
|
|
"step": 3079
|
|
},
|
|
{
|
|
"epoch": 2.432859399684044,
|
|
"grad_norm": 9.062778818641565,
|
|
"learning_rate": 1.0526638098573045e-06,
|
|
"loss": 0.3882153630256653,
|
|
"step": 3080
|
|
},
|
|
{
|
|
"epoch": 2.433649289099526,
|
|
"grad_norm": 18.929160213629444,
|
|
"learning_rate": 1.0498446955195202e-06,
|
|
"loss": 0.3846644163131714,
|
|
"step": 3081
|
|
},
|
|
{
|
|
"epoch": 2.434439178515008,
|
|
"grad_norm": 8.998677826427556,
|
|
"learning_rate": 1.047028918285945e-06,
|
|
"loss": 0.3000609576702118,
|
|
"step": 3082
|
|
},
|
|
{
|
|
"epoch": 2.4352290679304898,
|
|
"grad_norm": 10.101377476509365,
|
|
"learning_rate": 1.0442164805353565e-06,
|
|
"loss": 0.3316442370414734,
|
|
"step": 3083
|
|
},
|
|
{
|
|
"epoch": 2.4360189573459716,
|
|
"grad_norm": 15.80218064139609,
|
|
"learning_rate": 1.0414073846437122e-06,
|
|
"loss": 0.15290002524852753,
|
|
"step": 3084
|
|
},
|
|
{
|
|
"epoch": 2.4368088467614535,
|
|
"grad_norm": 9.603248142881998,
|
|
"learning_rate": 1.0386016329841448e-06,
|
|
"loss": 0.30186790227890015,
|
|
"step": 3085
|
|
},
|
|
{
|
|
"epoch": 2.4375987361769353,
|
|
"grad_norm": 14.04175823933125,
|
|
"learning_rate": 1.0357992279269623e-06,
|
|
"loss": 0.4339219033718109,
|
|
"step": 3086
|
|
},
|
|
{
|
|
"epoch": 2.438388625592417,
|
|
"grad_norm": 18.009198275639264,
|
|
"learning_rate": 1.033000171839646e-06,
|
|
"loss": 1.9742562770843506,
|
|
"step": 3087
|
|
},
|
|
{
|
|
"epoch": 2.439178515007899,
|
|
"grad_norm": 22.24916433433492,
|
|
"learning_rate": 1.0302044670868483e-06,
|
|
"loss": 0.504414439201355,
|
|
"step": 3088
|
|
},
|
|
{
|
|
"epoch": 2.4399684044233805,
|
|
"grad_norm": 10.361343950481873,
|
|
"learning_rate": 1.027412116030389e-06,
|
|
"loss": 1.0488673448562622,
|
|
"step": 3089
|
|
},
|
|
{
|
|
"epoch": 2.4407582938388623,
|
|
"grad_norm": 10.797759458558541,
|
|
"learning_rate": 1.0246231210292557e-06,
|
|
"loss": 0.3521267771720886,
|
|
"step": 3090
|
|
},
|
|
{
|
|
"epoch": 2.441548183254344,
|
|
"grad_norm": 12.14235046863475,
|
|
"learning_rate": 1.0218374844396011e-06,
|
|
"loss": 0.3208717703819275,
|
|
"step": 3091
|
|
},
|
|
{
|
|
"epoch": 2.442338072669826,
|
|
"grad_norm": 14.3011282222263,
|
|
"learning_rate": 1.0190552086147393e-06,
|
|
"loss": 0.7246259450912476,
|
|
"step": 3092
|
|
},
|
|
{
|
|
"epoch": 2.443127962085308,
|
|
"grad_norm": 14.472676250973395,
|
|
"learning_rate": 1.0162762959051464e-06,
|
|
"loss": 0.3144262433052063,
|
|
"step": 3093
|
|
},
|
|
{
|
|
"epoch": 2.4439178515007898,
|
|
"grad_norm": 8.531330890483545,
|
|
"learning_rate": 1.013500748658457e-06,
|
|
"loss": 0.4461020231246948,
|
|
"step": 3094
|
|
},
|
|
{
|
|
"epoch": 2.4447077409162716,
|
|
"grad_norm": 14.749179854381705,
|
|
"learning_rate": 1.010728569219463e-06,
|
|
"loss": 0.37278565764427185,
|
|
"step": 3095
|
|
},
|
|
{
|
|
"epoch": 2.4454976303317535,
|
|
"grad_norm": 9.553911314657903,
|
|
"learning_rate": 1.0079597599301088e-06,
|
|
"loss": 0.5785685777664185,
|
|
"step": 3096
|
|
},
|
|
{
|
|
"epoch": 2.4462875197472354,
|
|
"grad_norm": 11.059506181609464,
|
|
"learning_rate": 1.0051943231294965e-06,
|
|
"loss": 1.0506287813186646,
|
|
"step": 3097
|
|
},
|
|
{
|
|
"epoch": 2.447077409162717,
|
|
"grad_norm": 15.548120320030591,
|
|
"learning_rate": 1.0024322611538762e-06,
|
|
"loss": 0.4273751974105835,
|
|
"step": 3098
|
|
},
|
|
{
|
|
"epoch": 2.447867298578199,
|
|
"grad_norm": 9.759199951892318,
|
|
"learning_rate": 9.996735763366444e-07,
|
|
"loss": 0.37685832381248474,
|
|
"step": 3099
|
|
},
|
|
{
|
|
"epoch": 2.448657187993681,
|
|
"grad_norm": 11.701020741919134,
|
|
"learning_rate": 9.96918271008348e-07,
|
|
"loss": 0.4007868766784668,
|
|
"step": 3100
|
|
},
|
|
{
|
|
"epoch": 2.449447077409163,
|
|
"grad_norm": 11.350172953468489,
|
|
"learning_rate": 9.941663474966772e-07,
|
|
"loss": 0.5525588989257812,
|
|
"step": 3101
|
|
},
|
|
{
|
|
"epoch": 2.4502369668246446,
|
|
"grad_norm": 15.30001249449009,
|
|
"learning_rate": 9.914178081264685e-07,
|
|
"loss": 0.6951796412467957,
|
|
"step": 3102
|
|
},
|
|
{
|
|
"epoch": 2.4510268562401265,
|
|
"grad_norm": 7.995616395973255,
|
|
"learning_rate": 9.886726552196974e-07,
|
|
"loss": 0.47238144278526306,
|
|
"step": 3103
|
|
},
|
|
{
|
|
"epoch": 2.4518167456556084,
|
|
"grad_norm": 10.984193552146008,
|
|
"learning_rate": 9.859308910954745e-07,
|
|
"loss": 0.32252442836761475,
|
|
"step": 3104
|
|
},
|
|
{
|
|
"epoch": 2.4526066350710902,
|
|
"grad_norm": 8.04105675908032,
|
|
"learning_rate": 9.831925180700525e-07,
|
|
"loss": 0.21019114553928375,
|
|
"step": 3105
|
|
},
|
|
{
|
|
"epoch": 2.453396524486572,
|
|
"grad_norm": 27.322091310870565,
|
|
"learning_rate": 9.804575384568194e-07,
|
|
"loss": 1.3946754932403564,
|
|
"step": 3106
|
|
},
|
|
{
|
|
"epoch": 2.454186413902054,
|
|
"grad_norm": 8.839342829542206,
|
|
"learning_rate": 9.777259545662944e-07,
|
|
"loss": 0.5038160085678101,
|
|
"step": 3107
|
|
},
|
|
{
|
|
"epoch": 2.4549763033175354,
|
|
"grad_norm": 21.324783217152557,
|
|
"learning_rate": 9.749977687061279e-07,
|
|
"loss": 0.518517017364502,
|
|
"step": 3108
|
|
},
|
|
{
|
|
"epoch": 2.455766192733017,
|
|
"grad_norm": 8.481164678205802,
|
|
"learning_rate": 9.722729831811007e-07,
|
|
"loss": 0.8147182464599609,
|
|
"step": 3109
|
|
},
|
|
{
|
|
"epoch": 2.456556082148499,
|
|
"grad_norm": 6.780024339611966,
|
|
"learning_rate": 9.695516002931204e-07,
|
|
"loss": 0.3741002380847931,
|
|
"step": 3110
|
|
},
|
|
{
|
|
"epoch": 2.457345971563981,
|
|
"grad_norm": 11.801344448984926,
|
|
"learning_rate": 9.668336223412207e-07,
|
|
"loss": 0.3355671763420105,
|
|
"step": 3111
|
|
},
|
|
{
|
|
"epoch": 2.458135860979463,
|
|
"grad_norm": 18.426072152940076,
|
|
"learning_rate": 9.641190516215583e-07,
|
|
"loss": 0.5405136346817017,
|
|
"step": 3112
|
|
},
|
|
{
|
|
"epoch": 2.4589257503949447,
|
|
"grad_norm": 8.59802906534028,
|
|
"learning_rate": 9.614078904274105e-07,
|
|
"loss": 0.4377972483634949,
|
|
"step": 3113
|
|
},
|
|
{
|
|
"epoch": 2.4597156398104265,
|
|
"grad_norm": 14.687076152913516,
|
|
"learning_rate": 9.587001410491764e-07,
|
|
"loss": 0.46500271558761597,
|
|
"step": 3114
|
|
},
|
|
{
|
|
"epoch": 2.4605055292259084,
|
|
"grad_norm": 11.107539453005398,
|
|
"learning_rate": 9.559958057743712e-07,
|
|
"loss": 0.5390537977218628,
|
|
"step": 3115
|
|
},
|
|
{
|
|
"epoch": 2.4612954186413902,
|
|
"grad_norm": 16.500533704000823,
|
|
"learning_rate": 9.532948868876258e-07,
|
|
"loss": 0.21158595383167267,
|
|
"step": 3116
|
|
},
|
|
{
|
|
"epoch": 2.462085308056872,
|
|
"grad_norm": 13.41292359415877,
|
|
"learning_rate": 9.505973866706858e-07,
|
|
"loss": 0.5755499601364136,
|
|
"step": 3117
|
|
},
|
|
{
|
|
"epoch": 2.462875197472354,
|
|
"grad_norm": 21.715368481608397,
|
|
"learning_rate": 9.47903307402408e-07,
|
|
"loss": 1.7295933961868286,
|
|
"step": 3118
|
|
},
|
|
{
|
|
"epoch": 2.463665086887836,
|
|
"grad_norm": 16.12344079053901,
|
|
"learning_rate": 9.452126513587601e-07,
|
|
"loss": 0.6120498180389404,
|
|
"step": 3119
|
|
},
|
|
{
|
|
"epoch": 2.4644549763033177,
|
|
"grad_norm": 6.872808233122644,
|
|
"learning_rate": 9.42525420812816e-07,
|
|
"loss": 0.20094534754753113,
|
|
"step": 3120
|
|
},
|
|
{
|
|
"epoch": 2.4652448657187995,
|
|
"grad_norm": 13.205846064827124,
|
|
"learning_rate": 9.398416180347581e-07,
|
|
"loss": 0.3877865672111511,
|
|
"step": 3121
|
|
},
|
|
{
|
|
"epoch": 2.4660347551342814,
|
|
"grad_norm": 32.63764306543489,
|
|
"learning_rate": 9.371612452918711e-07,
|
|
"loss": 1.0538610219955444,
|
|
"step": 3122
|
|
},
|
|
{
|
|
"epoch": 2.466824644549763,
|
|
"grad_norm": 10.156083117031175,
|
|
"learning_rate": 9.34484304848543e-07,
|
|
"loss": 0.6839322447776794,
|
|
"step": 3123
|
|
},
|
|
{
|
|
"epoch": 2.4676145339652447,
|
|
"grad_norm": 10.388055079887849,
|
|
"learning_rate": 9.318107989662611e-07,
|
|
"loss": 0.19889391958713531,
|
|
"step": 3124
|
|
},
|
|
{
|
|
"epoch": 2.4684044233807265,
|
|
"grad_norm": 7.682004289712716,
|
|
"learning_rate": 9.291407299036148e-07,
|
|
"loss": 0.29008305072784424,
|
|
"step": 3125
|
|
},
|
|
{
|
|
"epoch": 2.4691943127962084,
|
|
"grad_norm": 8.701364307694792,
|
|
"learning_rate": 9.264740999162836e-07,
|
|
"loss": 0.28695201873779297,
|
|
"step": 3126
|
|
},
|
|
{
|
|
"epoch": 2.4699842022116902,
|
|
"grad_norm": 17.465630884728238,
|
|
"learning_rate": 9.238109112570475e-07,
|
|
"loss": 0.29782527685165405,
|
|
"step": 3127
|
|
},
|
|
{
|
|
"epoch": 2.470774091627172,
|
|
"grad_norm": 8.305085250067776,
|
|
"learning_rate": 9.21151166175776e-07,
|
|
"loss": 0.21786969900131226,
|
|
"step": 3128
|
|
},
|
|
{
|
|
"epoch": 2.471563981042654,
|
|
"grad_norm": 9.419126772200544,
|
|
"learning_rate": 9.184948669194299e-07,
|
|
"loss": 0.6029007434844971,
|
|
"step": 3129
|
|
},
|
|
{
|
|
"epoch": 2.472353870458136,
|
|
"grad_norm": 20.242949144706834,
|
|
"learning_rate": 9.158420157320613e-07,
|
|
"loss": 0.539847731590271,
|
|
"step": 3130
|
|
},
|
|
{
|
|
"epoch": 2.4731437598736177,
|
|
"grad_norm": 14.865152633768327,
|
|
"learning_rate": 9.131926148548087e-07,
|
|
"loss": 0.6652476787567139,
|
|
"step": 3131
|
|
},
|
|
{
|
|
"epoch": 2.4739336492890995,
|
|
"grad_norm": 10.195973961976483,
|
|
"learning_rate": 9.105466665258916e-07,
|
|
"loss": 0.6767930388450623,
|
|
"step": 3132
|
|
},
|
|
{
|
|
"epoch": 2.4747235387045814,
|
|
"grad_norm": 12.913760166214779,
|
|
"learning_rate": 9.07904172980616e-07,
|
|
"loss": 0.30187326669692993,
|
|
"step": 3133
|
|
},
|
|
{
|
|
"epoch": 2.4755134281200633,
|
|
"grad_norm": 7.210724493670894,
|
|
"learning_rate": 9.052651364513709e-07,
|
|
"loss": 0.2819286584854126,
|
|
"step": 3134
|
|
},
|
|
{
|
|
"epoch": 2.476303317535545,
|
|
"grad_norm": 9.694770018852635,
|
|
"learning_rate": 9.026295591676232e-07,
|
|
"loss": 1.3492515087127686,
|
|
"step": 3135
|
|
},
|
|
{
|
|
"epoch": 2.477093206951027,
|
|
"grad_norm": 10.663258311074385,
|
|
"learning_rate": 8.999974433559172e-07,
|
|
"loss": 0.6058721542358398,
|
|
"step": 3136
|
|
},
|
|
{
|
|
"epoch": 2.477883096366509,
|
|
"grad_norm": 8.609940369345201,
|
|
"learning_rate": 8.973687912398698e-07,
|
|
"loss": 0.3588424324989319,
|
|
"step": 3137
|
|
},
|
|
{
|
|
"epoch": 2.4786729857819907,
|
|
"grad_norm": 10.99741800368398,
|
|
"learning_rate": 8.947436050401792e-07,
|
|
"loss": 0.9377810955047607,
|
|
"step": 3138
|
|
},
|
|
{
|
|
"epoch": 2.4794628751974725,
|
|
"grad_norm": 13.297554259312957,
|
|
"learning_rate": 8.92121886974609e-07,
|
|
"loss": 0.3509410619735718,
|
|
"step": 3139
|
|
},
|
|
{
|
|
"epoch": 2.4802527646129544,
|
|
"grad_norm": 25.874105611022717,
|
|
"learning_rate": 8.895036392579965e-07,
|
|
"loss": 1.956540822982788,
|
|
"step": 3140
|
|
},
|
|
{
|
|
"epoch": 2.4810426540284363,
|
|
"grad_norm": 13.02959939002725,
|
|
"learning_rate": 8.868888641022449e-07,
|
|
"loss": 0.32808127999305725,
|
|
"step": 3141
|
|
},
|
|
{
|
|
"epoch": 2.4818325434439177,
|
|
"grad_norm": 22.382659848118195,
|
|
"learning_rate": 8.842775637163259e-07,
|
|
"loss": 1.4966964721679688,
|
|
"step": 3142
|
|
},
|
|
{
|
|
"epoch": 2.4826224328593995,
|
|
"grad_norm": 6.597640303742646,
|
|
"learning_rate": 8.816697403062736e-07,
|
|
"loss": 0.2421848326921463,
|
|
"step": 3143
|
|
},
|
|
{
|
|
"epoch": 2.4834123222748814,
|
|
"grad_norm": 10.171804451221856,
|
|
"learning_rate": 8.790653960751861e-07,
|
|
"loss": 0.3181907534599304,
|
|
"step": 3144
|
|
},
|
|
{
|
|
"epoch": 2.4842022116903633,
|
|
"grad_norm": 12.678509259303947,
|
|
"learning_rate": 8.764645332232225e-07,
|
|
"loss": 2.06329345703125,
|
|
"step": 3145
|
|
},
|
|
{
|
|
"epoch": 2.484992101105845,
|
|
"grad_norm": 8.062598004826272,
|
|
"learning_rate": 8.738671539476001e-07,
|
|
"loss": 0.40570273995399475,
|
|
"step": 3146
|
|
},
|
|
{
|
|
"epoch": 2.485781990521327,
|
|
"grad_norm": 8.067583181720805,
|
|
"learning_rate": 8.712732604425933e-07,
|
|
"loss": 0.2615022361278534,
|
|
"step": 3147
|
|
},
|
|
{
|
|
"epoch": 2.486571879936809,
|
|
"grad_norm": 11.514174208308086,
|
|
"learning_rate": 8.686828548995318e-07,
|
|
"loss": 0.3940156102180481,
|
|
"step": 3148
|
|
},
|
|
{
|
|
"epoch": 2.4873617693522907,
|
|
"grad_norm": 13.106691868280555,
|
|
"learning_rate": 8.660959395067991e-07,
|
|
"loss": 0.33123183250427246,
|
|
"step": 3149
|
|
},
|
|
{
|
|
"epoch": 2.4881516587677726,
|
|
"grad_norm": 6.521694530585903,
|
|
"learning_rate": 8.635125164498293e-07,
|
|
"loss": 0.1495874524116516,
|
|
"step": 3150
|
|
},
|
|
{
|
|
"epoch": 2.4889415481832544,
|
|
"grad_norm": 8.547476041571496,
|
|
"learning_rate": 8.609325879111069e-07,
|
|
"loss": 0.5456651449203491,
|
|
"step": 3151
|
|
},
|
|
{
|
|
"epoch": 2.4897314375987363,
|
|
"grad_norm": 10.933870809001716,
|
|
"learning_rate": 8.583561560701647e-07,
|
|
"loss": 0.3192444145679474,
|
|
"step": 3152
|
|
},
|
|
{
|
|
"epoch": 2.490521327014218,
|
|
"grad_norm": 28.363445895171516,
|
|
"learning_rate": 8.557832231035796e-07,
|
|
"loss": 0.5993216037750244,
|
|
"step": 3153
|
|
},
|
|
{
|
|
"epoch": 2.4913112164297,
|
|
"grad_norm": 10.933363522689742,
|
|
"learning_rate": 8.532137911849747e-07,
|
|
"loss": 0.3621593117713928,
|
|
"step": 3154
|
|
},
|
|
{
|
|
"epoch": 2.492101105845182,
|
|
"grad_norm": 7.744021564422991,
|
|
"learning_rate": 8.506478624850145e-07,
|
|
"loss": 0.20899435877799988,
|
|
"step": 3155
|
|
},
|
|
{
|
|
"epoch": 2.4928909952606633,
|
|
"grad_norm": 16.455158224717778,
|
|
"learning_rate": 8.480854391714039e-07,
|
|
"loss": 0.5033141374588013,
|
|
"step": 3156
|
|
},
|
|
{
|
|
"epoch": 2.493680884676145,
|
|
"grad_norm": 8.898300886169093,
|
|
"learning_rate": 8.45526523408885e-07,
|
|
"loss": 0.4480706751346588,
|
|
"step": 3157
|
|
},
|
|
{
|
|
"epoch": 2.494470774091627,
|
|
"grad_norm": 7.0747670856799045,
|
|
"learning_rate": 8.42971117359242e-07,
|
|
"loss": 0.21309760212898254,
|
|
"step": 3158
|
|
},
|
|
{
|
|
"epoch": 2.495260663507109,
|
|
"grad_norm": 9.196654852828132,
|
|
"learning_rate": 8.404192231812875e-07,
|
|
"loss": 0.3876749873161316,
|
|
"step": 3159
|
|
},
|
|
{
|
|
"epoch": 2.4960505529225907,
|
|
"grad_norm": 8.41438123486234,
|
|
"learning_rate": 8.378708430308702e-07,
|
|
"loss": 0.3482446074485779,
|
|
"step": 3160
|
|
},
|
|
{
|
|
"epoch": 2.4968404423380726,
|
|
"grad_norm": 9.952515369779197,
|
|
"learning_rate": 8.353259790608698e-07,
|
|
"loss": 0.19175337255001068,
|
|
"step": 3161
|
|
},
|
|
{
|
|
"epoch": 2.4976303317535544,
|
|
"grad_norm": 15.820391137731752,
|
|
"learning_rate": 8.32784633421197e-07,
|
|
"loss": 1.1132574081420898,
|
|
"step": 3162
|
|
},
|
|
{
|
|
"epoch": 2.4984202211690363,
|
|
"grad_norm": 9.122774774351079,
|
|
"learning_rate": 8.302468082587906e-07,
|
|
"loss": 0.39598995447158813,
|
|
"step": 3163
|
|
},
|
|
{
|
|
"epoch": 2.499210110584518,
|
|
"grad_norm": 10.71112514476097,
|
|
"learning_rate": 8.277125057176095e-07,
|
|
"loss": 0.37614136934280396,
|
|
"step": 3164
|
|
},
|
|
{
|
|
"epoch": 2.5,
|
|
"grad_norm": 15.433700062046594,
|
|
"learning_rate": 8.25181727938642e-07,
|
|
"loss": 0.4549494981765747,
|
|
"step": 3165
|
|
},
|
|
{
|
|
"epoch": 2.500789889415482,
|
|
"grad_norm": 6.775255209163935,
|
|
"learning_rate": 8.226544770598993e-07,
|
|
"loss": 0.38162803649902344,
|
|
"step": 3166
|
|
},
|
|
{
|
|
"epoch": 2.5015797788309637,
|
|
"grad_norm": 9.07190974562414,
|
|
"learning_rate": 8.201307552164106e-07,
|
|
"loss": 0.4735422730445862,
|
|
"step": 3167
|
|
},
|
|
{
|
|
"epoch": 2.5023696682464456,
|
|
"grad_norm": 9.713947704476643,
|
|
"learning_rate": 8.176105645402244e-07,
|
|
"loss": 0.3699314594268799,
|
|
"step": 3168
|
|
},
|
|
{
|
|
"epoch": 2.5031595576619274,
|
|
"grad_norm": 11.905314094509981,
|
|
"learning_rate": 8.150939071604069e-07,
|
|
"loss": 0.2857322692871094,
|
|
"step": 3169
|
|
},
|
|
{
|
|
"epoch": 2.5039494470774093,
|
|
"grad_norm": 11.738198518074968,
|
|
"learning_rate": 8.125807852030349e-07,
|
|
"loss": 0.8294199109077454,
|
|
"step": 3170
|
|
},
|
|
{
|
|
"epoch": 2.504739336492891,
|
|
"grad_norm": 23.694230717994056,
|
|
"learning_rate": 8.10071200791206e-07,
|
|
"loss": 0.8081961870193481,
|
|
"step": 3171
|
|
},
|
|
{
|
|
"epoch": 2.505529225908373,
|
|
"grad_norm": 11.356117033254058,
|
|
"learning_rate": 8.075651560450237e-07,
|
|
"loss": 0.45390763878822327,
|
|
"step": 3172
|
|
},
|
|
{
|
|
"epoch": 2.506319115323855,
|
|
"grad_norm": 9.254162743946639,
|
|
"learning_rate": 8.050626530816036e-07,
|
|
"loss": 0.39210453629493713,
|
|
"step": 3173
|
|
},
|
|
{
|
|
"epoch": 2.5071090047393367,
|
|
"grad_norm": 14.960564989579739,
|
|
"learning_rate": 8.025636940150677e-07,
|
|
"loss": 0.4236029386520386,
|
|
"step": 3174
|
|
},
|
|
{
|
|
"epoch": 2.5078988941548186,
|
|
"grad_norm": 9.28804391772842,
|
|
"learning_rate": 8.000682809565457e-07,
|
|
"loss": 0.38558968901634216,
|
|
"step": 3175
|
|
},
|
|
{
|
|
"epoch": 2.5086887835703,
|
|
"grad_norm": 9.760069575355349,
|
|
"learning_rate": 7.975764160141702e-07,
|
|
"loss": 0.197739839553833,
|
|
"step": 3176
|
|
},
|
|
{
|
|
"epoch": 2.509478672985782,
|
|
"grad_norm": 8.394775554288064,
|
|
"learning_rate": 7.950881012930766e-07,
|
|
"loss": 0.29299861192703247,
|
|
"step": 3177
|
|
},
|
|
{
|
|
"epoch": 2.5102685624012637,
|
|
"grad_norm": 13.432882997987198,
|
|
"learning_rate": 7.926033388954019e-07,
|
|
"loss": 0.6216456294059753,
|
|
"step": 3178
|
|
},
|
|
{
|
|
"epoch": 2.5110584518167456,
|
|
"grad_norm": 11.340453335100433,
|
|
"learning_rate": 7.901221309202817e-07,
|
|
"loss": 0.4832969307899475,
|
|
"step": 3179
|
|
},
|
|
{
|
|
"epoch": 2.5118483412322274,
|
|
"grad_norm": 12.980596557657627,
|
|
"learning_rate": 7.876444794638477e-07,
|
|
"loss": 0.3053157329559326,
|
|
"step": 3180
|
|
},
|
|
{
|
|
"epoch": 2.5126382306477093,
|
|
"grad_norm": 10.678528762002763,
|
|
"learning_rate": 7.851703866192295e-07,
|
|
"loss": 0.5949424505233765,
|
|
"step": 3181
|
|
},
|
|
{
|
|
"epoch": 2.513428120063191,
|
|
"grad_norm": 8.615764477300809,
|
|
"learning_rate": 7.826998544765473e-07,
|
|
"loss": 0.3822169005870819,
|
|
"step": 3182
|
|
},
|
|
{
|
|
"epoch": 2.514218009478673,
|
|
"grad_norm": 12.219849039480954,
|
|
"learning_rate": 7.802328851229163e-07,
|
|
"loss": 0.35568392276763916,
|
|
"step": 3183
|
|
},
|
|
{
|
|
"epoch": 2.515007898894155,
|
|
"grad_norm": 12.387025000660175,
|
|
"learning_rate": 7.77769480642439e-07,
|
|
"loss": 0.386673241853714,
|
|
"step": 3184
|
|
},
|
|
{
|
|
"epoch": 2.5157977883096367,
|
|
"grad_norm": 8.52886409021774,
|
|
"learning_rate": 7.753096431162083e-07,
|
|
"loss": 0.24550145864486694,
|
|
"step": 3185
|
|
},
|
|
{
|
|
"epoch": 2.5165876777251186,
|
|
"grad_norm": 8.816997119450642,
|
|
"learning_rate": 7.728533746223032e-07,
|
|
"loss": 0.3060890734195709,
|
|
"step": 3186
|
|
},
|
|
{
|
|
"epoch": 2.5173775671406005,
|
|
"grad_norm": 12.72993681147683,
|
|
"learning_rate": 7.70400677235787e-07,
|
|
"loss": 0.4496222734451294,
|
|
"step": 3187
|
|
},
|
|
{
|
|
"epoch": 2.518167456556082,
|
|
"grad_norm": 14.450644987050909,
|
|
"learning_rate": 7.679515530287068e-07,
|
|
"loss": 0.3027800917625427,
|
|
"step": 3188
|
|
},
|
|
{
|
|
"epoch": 2.5189573459715637,
|
|
"grad_norm": 6.8289260496500335,
|
|
"learning_rate": 7.655060040700895e-07,
|
|
"loss": 0.21853289008140564,
|
|
"step": 3189
|
|
},
|
|
{
|
|
"epoch": 2.5197472353870456,
|
|
"grad_norm": 16.18749221673546,
|
|
"learning_rate": 7.630640324259453e-07,
|
|
"loss": 0.3517822027206421,
|
|
"step": 3190
|
|
},
|
|
{
|
|
"epoch": 2.5205371248025275,
|
|
"grad_norm": 7.317050751605334,
|
|
"learning_rate": 7.606256401592599e-07,
|
|
"loss": 0.20418161153793335,
|
|
"step": 3191
|
|
},
|
|
{
|
|
"epoch": 2.5213270142180093,
|
|
"grad_norm": 11.575307062369431,
|
|
"learning_rate": 7.581908293299923e-07,
|
|
"loss": 0.29537758231163025,
|
|
"step": 3192
|
|
},
|
|
{
|
|
"epoch": 2.522116903633491,
|
|
"grad_norm": 13.330694266343132,
|
|
"learning_rate": 7.557596019950797e-07,
|
|
"loss": 0.24356764554977417,
|
|
"step": 3193
|
|
},
|
|
{
|
|
"epoch": 2.522906793048973,
|
|
"grad_norm": 12.770819836355157,
|
|
"learning_rate": 7.533319602084321e-07,
|
|
"loss": 0.9563419818878174,
|
|
"step": 3194
|
|
},
|
|
{
|
|
"epoch": 2.523696682464455,
|
|
"grad_norm": 9.323681305876523,
|
|
"learning_rate": 7.50907906020929e-07,
|
|
"loss": 0.4368267059326172,
|
|
"step": 3195
|
|
},
|
|
{
|
|
"epoch": 2.5244865718799367,
|
|
"grad_norm": 12.737056250718048,
|
|
"learning_rate": 7.484874414804206e-07,
|
|
"loss": 0.8397213816642761,
|
|
"step": 3196
|
|
},
|
|
{
|
|
"epoch": 2.5252764612954186,
|
|
"grad_norm": 11.13269574815902,
|
|
"learning_rate": 7.460705686317205e-07,
|
|
"loss": 0.6406710743904114,
|
|
"step": 3197
|
|
},
|
|
{
|
|
"epoch": 2.5260663507109005,
|
|
"grad_norm": 15.377479065191766,
|
|
"learning_rate": 7.43657289516611e-07,
|
|
"loss": 0.5515921115875244,
|
|
"step": 3198
|
|
},
|
|
{
|
|
"epoch": 2.5268562401263823,
|
|
"grad_norm": 10.942333674945784,
|
|
"learning_rate": 7.412476061738405e-07,
|
|
"loss": 0.47541213035583496,
|
|
"step": 3199
|
|
},
|
|
{
|
|
"epoch": 2.527646129541864,
|
|
"grad_norm": 11.44586983157694,
|
|
"learning_rate": 7.388415206391164e-07,
|
|
"loss": 0.2634442150592804,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 2.528436018957346,
|
|
"grad_norm": 12.935860176811287,
|
|
"learning_rate": 7.364390349451073e-07,
|
|
"loss": 1.6421760320663452,
|
|
"step": 3201
|
|
},
|
|
{
|
|
"epoch": 2.529225908372828,
|
|
"grad_norm": 6.618453794857769,
|
|
"learning_rate": 7.340401511214418e-07,
|
|
"loss": 0.18729032576084137,
|
|
"step": 3202
|
|
},
|
|
{
|
|
"epoch": 2.5300157977883098,
|
|
"grad_norm": 21.158575104978276,
|
|
"learning_rate": 7.316448711947038e-07,
|
|
"loss": 0.6561132073402405,
|
|
"step": 3203
|
|
},
|
|
{
|
|
"epoch": 2.5308056872037916,
|
|
"grad_norm": 17.41414326406974,
|
|
"learning_rate": 7.292531971884348e-07,
|
|
"loss": 1.311091423034668,
|
|
"step": 3204
|
|
},
|
|
{
|
|
"epoch": 2.5315955766192735,
|
|
"grad_norm": 9.054118102670634,
|
|
"learning_rate": 7.268651311231278e-07,
|
|
"loss": 0.37653642892837524,
|
|
"step": 3205
|
|
},
|
|
{
|
|
"epoch": 2.5323854660347553,
|
|
"grad_norm": 7.875373579401623,
|
|
"learning_rate": 7.244806750162298e-07,
|
|
"loss": 0.13674522936344147,
|
|
"step": 3206
|
|
},
|
|
{
|
|
"epoch": 2.533175355450237,
|
|
"grad_norm": 7.790372139492247,
|
|
"learning_rate": 7.22099830882137e-07,
|
|
"loss": 0.37409287691116333,
|
|
"step": 3207
|
|
},
|
|
{
|
|
"epoch": 2.533965244865719,
|
|
"grad_norm": 21.5175970885638,
|
|
"learning_rate": 7.197226007321939e-07,
|
|
"loss": 0.5270158052444458,
|
|
"step": 3208
|
|
},
|
|
{
|
|
"epoch": 2.5347551342812005,
|
|
"grad_norm": 9.240238834016377,
|
|
"learning_rate": 7.173489865746924e-07,
|
|
"loss": 0.57960045337677,
|
|
"step": 3209
|
|
},
|
|
{
|
|
"epoch": 2.5355450236966823,
|
|
"grad_norm": 17.71089132438801,
|
|
"learning_rate": 7.149789904148696e-07,
|
|
"loss": 0.9655189514160156,
|
|
"step": 3210
|
|
},
|
|
{
|
|
"epoch": 2.536334913112164,
|
|
"grad_norm": 7.9060126053659685,
|
|
"learning_rate": 7.126126142549067e-07,
|
|
"loss": 0.8784974217414856,
|
|
"step": 3211
|
|
},
|
|
{
|
|
"epoch": 2.537124802527646,
|
|
"grad_norm": 15.460935801679469,
|
|
"learning_rate": 7.102498600939256e-07,
|
|
"loss": 0.9071961641311646,
|
|
"step": 3212
|
|
},
|
|
{
|
|
"epoch": 2.537914691943128,
|
|
"grad_norm": 9.491362341174666,
|
|
"learning_rate": 7.078907299279886e-07,
|
|
"loss": 0.99891197681427,
|
|
"step": 3213
|
|
},
|
|
{
|
|
"epoch": 2.5387045813586098,
|
|
"grad_norm": 13.553645359898077,
|
|
"learning_rate": 7.05535225750097e-07,
|
|
"loss": 0.4606255292892456,
|
|
"step": 3214
|
|
},
|
|
{
|
|
"epoch": 2.5394944707740916,
|
|
"grad_norm": 11.434760827242846,
|
|
"learning_rate": 7.031833495501878e-07,
|
|
"loss": 0.38194912672042847,
|
|
"step": 3215
|
|
},
|
|
{
|
|
"epoch": 2.5402843601895735,
|
|
"grad_norm": 8.816989170036669,
|
|
"learning_rate": 7.008351033151345e-07,
|
|
"loss": 0.5898439884185791,
|
|
"step": 3216
|
|
},
|
|
{
|
|
"epoch": 2.5410742496050553,
|
|
"grad_norm": 8.941678431636664,
|
|
"learning_rate": 6.984904890287419e-07,
|
|
"loss": 0.6821322441101074,
|
|
"step": 3217
|
|
},
|
|
{
|
|
"epoch": 2.541864139020537,
|
|
"grad_norm": 11.879397851452923,
|
|
"learning_rate": 6.961495086717518e-07,
|
|
"loss": 0.30271491408348083,
|
|
"step": 3218
|
|
},
|
|
{
|
|
"epoch": 2.542654028436019,
|
|
"grad_norm": 11.904426225856726,
|
|
"learning_rate": 6.938121642218277e-07,
|
|
"loss": 0.8798356056213379,
|
|
"step": 3219
|
|
},
|
|
{
|
|
"epoch": 2.543443917851501,
|
|
"grad_norm": 9.833714180030546,
|
|
"learning_rate": 6.914784576535671e-07,
|
|
"loss": 0.4576849341392517,
|
|
"step": 3220
|
|
},
|
|
{
|
|
"epoch": 2.544233807266983,
|
|
"grad_norm": 7.60568754755486,
|
|
"learning_rate": 6.891483909384927e-07,
|
|
"loss": 0.39021506905555725,
|
|
"step": 3221
|
|
},
|
|
{
|
|
"epoch": 2.545023696682464,
|
|
"grad_norm": 13.385710625425459,
|
|
"learning_rate": 6.868219660450542e-07,
|
|
"loss": 0.6809737682342529,
|
|
"step": 3222
|
|
},
|
|
{
|
|
"epoch": 2.545813586097946,
|
|
"grad_norm": 14.296223483394565,
|
|
"learning_rate": 6.844991849386234e-07,
|
|
"loss": 0.354898601770401,
|
|
"step": 3223
|
|
},
|
|
{
|
|
"epoch": 2.546603475513428,
|
|
"grad_norm": 7.507430954335532,
|
|
"learning_rate": 6.821800495814906e-07,
|
|
"loss": 0.3235066831111908,
|
|
"step": 3224
|
|
},
|
|
{
|
|
"epoch": 2.5473933649289098,
|
|
"grad_norm": 8.831847194569248,
|
|
"learning_rate": 6.798645619328709e-07,
|
|
"loss": 0.6501250267028809,
|
|
"step": 3225
|
|
},
|
|
{
|
|
"epoch": 2.5481832543443916,
|
|
"grad_norm": 11.08888965436733,
|
|
"learning_rate": 6.775527239488939e-07,
|
|
"loss": 0.3581928312778473,
|
|
"step": 3226
|
|
},
|
|
{
|
|
"epoch": 2.5489731437598735,
|
|
"grad_norm": 14.993261176629105,
|
|
"learning_rate": 6.752445375826111e-07,
|
|
"loss": 0.47264599800109863,
|
|
"step": 3227
|
|
},
|
|
{
|
|
"epoch": 2.5497630331753554,
|
|
"grad_norm": 12.12552691044683,
|
|
"learning_rate": 6.729400047839834e-07,
|
|
"loss": 1.0418339967727661,
|
|
"step": 3228
|
|
},
|
|
{
|
|
"epoch": 2.550552922590837,
|
|
"grad_norm": 9.816976412231398,
|
|
"learning_rate": 6.706391274998908e-07,
|
|
"loss": 0.40765923261642456,
|
|
"step": 3229
|
|
},
|
|
{
|
|
"epoch": 2.551342812006319,
|
|
"grad_norm": 8.814268750339222,
|
|
"learning_rate": 6.683419076741166e-07,
|
|
"loss": 0.659870982170105,
|
|
"step": 3230
|
|
},
|
|
{
|
|
"epoch": 2.552132701421801,
|
|
"grad_norm": 12.387930895767527,
|
|
"learning_rate": 6.660483472473644e-07,
|
|
"loss": 0.6081492900848389,
|
|
"step": 3231
|
|
},
|
|
{
|
|
"epoch": 2.552922590837283,
|
|
"grad_norm": 17.021655475649457,
|
|
"learning_rate": 6.637584481572407e-07,
|
|
"loss": 0.8503941297531128,
|
|
"step": 3232
|
|
},
|
|
{
|
|
"epoch": 2.5537124802527646,
|
|
"grad_norm": 10.612469129748503,
|
|
"learning_rate": 6.614722123382583e-07,
|
|
"loss": 0.5053238868713379,
|
|
"step": 3233
|
|
},
|
|
{
|
|
"epoch": 2.5545023696682465,
|
|
"grad_norm": 11.199796923866582,
|
|
"learning_rate": 6.591896417218391e-07,
|
|
"loss": 0.5718584656715393,
|
|
"step": 3234
|
|
},
|
|
{
|
|
"epoch": 2.5552922590837284,
|
|
"grad_norm": 24.003473547596283,
|
|
"learning_rate": 6.569107382363027e-07,
|
|
"loss": 0.8553175330162048,
|
|
"step": 3235
|
|
},
|
|
{
|
|
"epoch": 2.5560821484992102,
|
|
"grad_norm": 8.724785188614117,
|
|
"learning_rate": 6.546355038068774e-07,
|
|
"loss": 0.2981413006782532,
|
|
"step": 3236
|
|
},
|
|
{
|
|
"epoch": 2.556872037914692,
|
|
"grad_norm": 8.108209067959447,
|
|
"learning_rate": 6.523639403556875e-07,
|
|
"loss": 0.27729976177215576,
|
|
"step": 3237
|
|
},
|
|
{
|
|
"epoch": 2.557661927330174,
|
|
"grad_norm": 7.579505693226836,
|
|
"learning_rate": 6.500960498017578e-07,
|
|
"loss": 0.3648611903190613,
|
|
"step": 3238
|
|
},
|
|
{
|
|
"epoch": 2.558451816745656,
|
|
"grad_norm": 13.670618984625548,
|
|
"learning_rate": 6.478318340610091e-07,
|
|
"loss": 0.4756515324115753,
|
|
"step": 3239
|
|
},
|
|
{
|
|
"epoch": 2.5592417061611377,
|
|
"grad_norm": 16.165515463439487,
|
|
"learning_rate": 6.45571295046259e-07,
|
|
"loss": 0.6007115840911865,
|
|
"step": 3240
|
|
},
|
|
{
|
|
"epoch": 2.5600315955766195,
|
|
"grad_norm": 9.184975673193794,
|
|
"learning_rate": 6.433144346672177e-07,
|
|
"loss": 0.22796334326267242,
|
|
"step": 3241
|
|
},
|
|
{
|
|
"epoch": 2.5608214849921014,
|
|
"grad_norm": 11.242675742774068,
|
|
"learning_rate": 6.410612548304884e-07,
|
|
"loss": 0.5558523535728455,
|
|
"step": 3242
|
|
},
|
|
{
|
|
"epoch": 2.561611374407583,
|
|
"grad_norm": 17.912335926244754,
|
|
"learning_rate": 6.388117574395652e-07,
|
|
"loss": 0.45684516429901123,
|
|
"step": 3243
|
|
},
|
|
{
|
|
"epoch": 2.5624012638230647,
|
|
"grad_norm": 6.42831978379293,
|
|
"learning_rate": 6.365659443948307e-07,
|
|
"loss": 0.37593698501586914,
|
|
"step": 3244
|
|
},
|
|
{
|
|
"epoch": 2.5631911532385465,
|
|
"grad_norm": 14.380203287837352,
|
|
"learning_rate": 6.343238175935551e-07,
|
|
"loss": 1.0076820850372314,
|
|
"step": 3245
|
|
},
|
|
{
|
|
"epoch": 2.5639810426540284,
|
|
"grad_norm": 11.649489711574187,
|
|
"learning_rate": 6.320853789298942e-07,
|
|
"loss": 0.6352476477622986,
|
|
"step": 3246
|
|
},
|
|
{
|
|
"epoch": 2.5647709320695102,
|
|
"grad_norm": 11.141609677532234,
|
|
"learning_rate": 6.298506302948886e-07,
|
|
"loss": 0.4481988549232483,
|
|
"step": 3247
|
|
},
|
|
{
|
|
"epoch": 2.565560821484992,
|
|
"grad_norm": 11.697367831752132,
|
|
"learning_rate": 6.276195735764617e-07,
|
|
"loss": 0.5129117965698242,
|
|
"step": 3248
|
|
},
|
|
{
|
|
"epoch": 2.566350710900474,
|
|
"grad_norm": 10.308816622041837,
|
|
"learning_rate": 6.253922106594162e-07,
|
|
"loss": 0.6752769351005554,
|
|
"step": 3249
|
|
},
|
|
{
|
|
"epoch": 2.567140600315956,
|
|
"grad_norm": 10.354709362911104,
|
|
"learning_rate": 6.231685434254375e-07,
|
|
"loss": 0.22525343298912048,
|
|
"step": 3250
|
|
},
|
|
{
|
|
"epoch": 2.5679304897314377,
|
|
"grad_norm": 11.05644990012399,
|
|
"learning_rate": 6.209485737530873e-07,
|
|
"loss": 0.35317540168762207,
|
|
"step": 3251
|
|
},
|
|
{
|
|
"epoch": 2.5687203791469195,
|
|
"grad_norm": 6.477124764043402,
|
|
"learning_rate": 6.187323035178012e-07,
|
|
"loss": 0.22543203830718994,
|
|
"step": 3252
|
|
},
|
|
{
|
|
"epoch": 2.5695102685624014,
|
|
"grad_norm": 14.30255153474572,
|
|
"learning_rate": 6.165197345918927e-07,
|
|
"loss": 0.46911317110061646,
|
|
"step": 3253
|
|
},
|
|
{
|
|
"epoch": 2.5703001579778832,
|
|
"grad_norm": 9.552175646905189,
|
|
"learning_rate": 6.14310868844546e-07,
|
|
"loss": 0.33642643690109253,
|
|
"step": 3254
|
|
},
|
|
{
|
|
"epoch": 2.5710900473933647,
|
|
"grad_norm": 6.838188468089331,
|
|
"learning_rate": 6.121057081418202e-07,
|
|
"loss": 0.33152109384536743,
|
|
"step": 3255
|
|
},
|
|
{
|
|
"epoch": 2.5718799368088465,
|
|
"grad_norm": 10.0723559433558,
|
|
"learning_rate": 6.099042543466427e-07,
|
|
"loss": 0.4825100898742676,
|
|
"step": 3256
|
|
},
|
|
{
|
|
"epoch": 2.5726698262243284,
|
|
"grad_norm": 9.82952412822834,
|
|
"learning_rate": 6.077065093188062e-07,
|
|
"loss": 0.3012405037879944,
|
|
"step": 3257
|
|
},
|
|
{
|
|
"epoch": 2.5734597156398102,
|
|
"grad_norm": 9.586040497432494,
|
|
"learning_rate": 6.055124749149738e-07,
|
|
"loss": 0.23892341554164886,
|
|
"step": 3258
|
|
},
|
|
{
|
|
"epoch": 2.574249605055292,
|
|
"grad_norm": 12.603578521658129,
|
|
"learning_rate": 6.033221529886745e-07,
|
|
"loss": 0.6413030624389648,
|
|
"step": 3259
|
|
},
|
|
{
|
|
"epoch": 2.575039494470774,
|
|
"grad_norm": 8.672198769457234,
|
|
"learning_rate": 6.011355453902984e-07,
|
|
"loss": 0.3947061598300934,
|
|
"step": 3260
|
|
},
|
|
{
|
|
"epoch": 2.575829383886256,
|
|
"grad_norm": 15.44081680860732,
|
|
"learning_rate": 5.989526539670992e-07,
|
|
"loss": 1.9373308420181274,
|
|
"step": 3261
|
|
},
|
|
{
|
|
"epoch": 2.5766192733017377,
|
|
"grad_norm": 15.004738901323284,
|
|
"learning_rate": 5.967734805631913e-07,
|
|
"loss": 0.90089350938797,
|
|
"step": 3262
|
|
},
|
|
{
|
|
"epoch": 2.5774091627172195,
|
|
"grad_norm": 11.506465084275762,
|
|
"learning_rate": 5.945980270195451e-07,
|
|
"loss": 0.742828369140625,
|
|
"step": 3263
|
|
},
|
|
{
|
|
"epoch": 2.5781990521327014,
|
|
"grad_norm": 13.32230582597808,
|
|
"learning_rate": 5.924262951739929e-07,
|
|
"loss": 0.24065065383911133,
|
|
"step": 3264
|
|
},
|
|
{
|
|
"epoch": 2.5789889415481833,
|
|
"grad_norm": 9.779711772862912,
|
|
"learning_rate": 5.902582868612211e-07,
|
|
"loss": 0.5692986249923706,
|
|
"step": 3265
|
|
},
|
|
{
|
|
"epoch": 2.579778830963665,
|
|
"grad_norm": 8.091630298353223,
|
|
"learning_rate": 5.880940039127703e-07,
|
|
"loss": 0.3704443573951721,
|
|
"step": 3266
|
|
},
|
|
{
|
|
"epoch": 2.580568720379147,
|
|
"grad_norm": 10.939765166542545,
|
|
"learning_rate": 5.859334481570328e-07,
|
|
"loss": 0.5513951778411865,
|
|
"step": 3267
|
|
},
|
|
{
|
|
"epoch": 2.581358609794629,
|
|
"grad_norm": 12.859593353523778,
|
|
"learning_rate": 5.837766214192536e-07,
|
|
"loss": 0.3680616319179535,
|
|
"step": 3268
|
|
},
|
|
{
|
|
"epoch": 2.5821484992101107,
|
|
"grad_norm": 13.927295492601075,
|
|
"learning_rate": 5.816235255215275e-07,
|
|
"loss": 0.25186580419540405,
|
|
"step": 3269
|
|
},
|
|
{
|
|
"epoch": 2.5829383886255926,
|
|
"grad_norm": 9.056287052188528,
|
|
"learning_rate": 5.794741622827966e-07,
|
|
"loss": 0.361020565032959,
|
|
"step": 3270
|
|
},
|
|
{
|
|
"epoch": 2.5837282780410744,
|
|
"grad_norm": 9.11489390301563,
|
|
"learning_rate": 5.773285335188499e-07,
|
|
"loss": 0.567996084690094,
|
|
"step": 3271
|
|
},
|
|
{
|
|
"epoch": 2.5845181674565563,
|
|
"grad_norm": 7.932039650484051,
|
|
"learning_rate": 5.751866410423224e-07,
|
|
"loss": 0.4094735085964203,
|
|
"step": 3272
|
|
},
|
|
{
|
|
"epoch": 2.585308056872038,
|
|
"grad_norm": 11.984895953206886,
|
|
"learning_rate": 5.730484866626912e-07,
|
|
"loss": 0.30167537927627563,
|
|
"step": 3273
|
|
},
|
|
{
|
|
"epoch": 2.58609794628752,
|
|
"grad_norm": 9.289484227401946,
|
|
"learning_rate": 5.70914072186276e-07,
|
|
"loss": 0.32061922550201416,
|
|
"step": 3274
|
|
},
|
|
{
|
|
"epoch": 2.586887835703002,
|
|
"grad_norm": 9.622010065894505,
|
|
"learning_rate": 5.687833994162378e-07,
|
|
"loss": 0.32422295212745667,
|
|
"step": 3275
|
|
},
|
|
{
|
|
"epoch": 2.5876777251184833,
|
|
"grad_norm": 14.227211464394562,
|
|
"learning_rate": 5.666564701525762e-07,
|
|
"loss": 0.27812737226486206,
|
|
"step": 3276
|
|
},
|
|
{
|
|
"epoch": 2.588467614533965,
|
|
"grad_norm": 10.998826927820959,
|
|
"learning_rate": 5.64533286192126e-07,
|
|
"loss": 0.4728177785873413,
|
|
"step": 3277
|
|
},
|
|
{
|
|
"epoch": 2.589257503949447,
|
|
"grad_norm": 13.428356160162433,
|
|
"learning_rate": 5.624138493285636e-07,
|
|
"loss": 0.36791884899139404,
|
|
"step": 3278
|
|
},
|
|
{
|
|
"epoch": 2.590047393364929,
|
|
"grad_norm": 10.44768694966881,
|
|
"learning_rate": 5.602981613523933e-07,
|
|
"loss": 0.5437135100364685,
|
|
"step": 3279
|
|
},
|
|
{
|
|
"epoch": 2.5908372827804107,
|
|
"grad_norm": 8.729760092755804,
|
|
"learning_rate": 5.581862240509561e-07,
|
|
"loss": 0.3248332440853119,
|
|
"step": 3280
|
|
},
|
|
{
|
|
"epoch": 2.5916271721958926,
|
|
"grad_norm": 7.7718729951563725,
|
|
"learning_rate": 5.560780392084236e-07,
|
|
"loss": 0.30842357873916626,
|
|
"step": 3281
|
|
},
|
|
{
|
|
"epoch": 2.5924170616113744,
|
|
"grad_norm": 8.64870834436161,
|
|
"learning_rate": 5.539736086057968e-07,
|
|
"loss": 0.5332027673721313,
|
|
"step": 3282
|
|
},
|
|
{
|
|
"epoch": 2.5932069510268563,
|
|
"grad_norm": 12.471905432522792,
|
|
"learning_rate": 5.518729340209067e-07,
|
|
"loss": 0.3546110689640045,
|
|
"step": 3283
|
|
},
|
|
{
|
|
"epoch": 2.593996840442338,
|
|
"grad_norm": 10.520884370675867,
|
|
"learning_rate": 5.497760172284105e-07,
|
|
"loss": 0.5727818012237549,
|
|
"step": 3284
|
|
},
|
|
{
|
|
"epoch": 2.59478672985782,
|
|
"grad_norm": 21.842408865407872,
|
|
"learning_rate": 5.476828599997891e-07,
|
|
"loss": 0.9324047565460205,
|
|
"step": 3285
|
|
},
|
|
{
|
|
"epoch": 2.595576619273302,
|
|
"grad_norm": 15.695330799147337,
|
|
"learning_rate": 5.455934641033473e-07,
|
|
"loss": 0.7432706952095032,
|
|
"step": 3286
|
|
},
|
|
{
|
|
"epoch": 2.5963665086887837,
|
|
"grad_norm": 8.672936031824056,
|
|
"learning_rate": 5.43507831304217e-07,
|
|
"loss": 0.32435929775238037,
|
|
"step": 3287
|
|
},
|
|
{
|
|
"epoch": 2.597156398104265,
|
|
"grad_norm": 12.82117590286232,
|
|
"learning_rate": 5.414259633643454e-07,
|
|
"loss": 0.31739306449890137,
|
|
"step": 3288
|
|
},
|
|
{
|
|
"epoch": 2.597946287519747,
|
|
"grad_norm": 8.894198702503063,
|
|
"learning_rate": 5.39347862042503e-07,
|
|
"loss": 0.23482373356819153,
|
|
"step": 3289
|
|
},
|
|
{
|
|
"epoch": 2.598736176935229,
|
|
"grad_norm": 9.107176351218104,
|
|
"learning_rate": 5.372735290942749e-07,
|
|
"loss": 0.37216585874557495,
|
|
"step": 3290
|
|
},
|
|
{
|
|
"epoch": 2.5995260663507107,
|
|
"grad_norm": 12.065946085010948,
|
|
"learning_rate": 5.352029662720643e-07,
|
|
"loss": 0.45027846097946167,
|
|
"step": 3291
|
|
},
|
|
{
|
|
"epoch": 2.6003159557661926,
|
|
"grad_norm": 9.353380690591916,
|
|
"learning_rate": 5.331361753250908e-07,
|
|
"loss": 0.5409231781959534,
|
|
"step": 3292
|
|
},
|
|
{
|
|
"epoch": 2.6011058451816744,
|
|
"grad_norm": 9.465254647955879,
|
|
"learning_rate": 5.31073157999386e-07,
|
|
"loss": 0.30727618932724,
|
|
"step": 3293
|
|
},
|
|
{
|
|
"epoch": 2.6018957345971563,
|
|
"grad_norm": 9.607201625715247,
|
|
"learning_rate": 5.290139160377944e-07,
|
|
"loss": 0.31384027004241943,
|
|
"step": 3294
|
|
},
|
|
{
|
|
"epoch": 2.602685624012638,
|
|
"grad_norm": 14.736505951040836,
|
|
"learning_rate": 5.269584511799674e-07,
|
|
"loss": 0.9769009351730347,
|
|
"step": 3295
|
|
},
|
|
{
|
|
"epoch": 2.60347551342812,
|
|
"grad_norm": 11.551220516709076,
|
|
"learning_rate": 5.249067651623713e-07,
|
|
"loss": 0.4081469178199768,
|
|
"step": 3296
|
|
},
|
|
{
|
|
"epoch": 2.604265402843602,
|
|
"grad_norm": 11.149694766419422,
|
|
"learning_rate": 5.228588597182771e-07,
|
|
"loss": 0.31222042441368103,
|
|
"step": 3297
|
|
},
|
|
{
|
|
"epoch": 2.6050552922590837,
|
|
"grad_norm": 12.284974464013962,
|
|
"learning_rate": 5.208147365777605e-07,
|
|
"loss": 0.3729371428489685,
|
|
"step": 3298
|
|
},
|
|
{
|
|
"epoch": 2.6058451816745656,
|
|
"grad_norm": 6.66143671120458,
|
|
"learning_rate": 5.187743974677051e-07,
|
|
"loss": 0.23765571415424347,
|
|
"step": 3299
|
|
},
|
|
{
|
|
"epoch": 2.6066350710900474,
|
|
"grad_norm": 10.189036379966337,
|
|
"learning_rate": 5.167378441117948e-07,
|
|
"loss": 0.5407176613807678,
|
|
"step": 3300
|
|
},
|
|
{
|
|
"epoch": 2.6074249605055293,
|
|
"grad_norm": 10.508990210906765,
|
|
"learning_rate": 5.147050782305174e-07,
|
|
"loss": 0.4038906693458557,
|
|
"step": 3301
|
|
},
|
|
{
|
|
"epoch": 2.608214849921011,
|
|
"grad_norm": 10.671951482144292,
|
|
"learning_rate": 5.126761015411602e-07,
|
|
"loss": 0.3577304482460022,
|
|
"step": 3302
|
|
},
|
|
{
|
|
"epoch": 2.609004739336493,
|
|
"grad_norm": 16.67545595391455,
|
|
"learning_rate": 5.106509157578088e-07,
|
|
"loss": 0.6719971895217896,
|
|
"step": 3303
|
|
},
|
|
{
|
|
"epoch": 2.609794628751975,
|
|
"grad_norm": 8.313398028205375,
|
|
"learning_rate": 5.086295225913468e-07,
|
|
"loss": 0.417365163564682,
|
|
"step": 3304
|
|
},
|
|
{
|
|
"epoch": 2.6105845181674567,
|
|
"grad_norm": 6.802692561793048,
|
|
"learning_rate": 5.066119237494543e-07,
|
|
"loss": 0.20259469747543335,
|
|
"step": 3305
|
|
},
|
|
{
|
|
"epoch": 2.6113744075829386,
|
|
"grad_norm": 14.896512148391754,
|
|
"learning_rate": 5.045981209366058e-07,
|
|
"loss": 0.6620730757713318,
|
|
"step": 3306
|
|
},
|
|
{
|
|
"epoch": 2.6121642969984205,
|
|
"grad_norm": 10.671850166031216,
|
|
"learning_rate": 5.025881158540674e-07,
|
|
"loss": 0.6920949220657349,
|
|
"step": 3307
|
|
},
|
|
{
|
|
"epoch": 2.6129541864139023,
|
|
"grad_norm": 9.483878722714492,
|
|
"learning_rate": 5.005819101998993e-07,
|
|
"loss": 0.24593792855739594,
|
|
"step": 3308
|
|
},
|
|
{
|
|
"epoch": 2.6137440758293837,
|
|
"grad_norm": 12.335044317508292,
|
|
"learning_rate": 4.985795056689496e-07,
|
|
"loss": 0.38339120149612427,
|
|
"step": 3309
|
|
},
|
|
{
|
|
"epoch": 2.6145339652448656,
|
|
"grad_norm": 11.267803688952569,
|
|
"learning_rate": 4.965809039528557e-07,
|
|
"loss": 0.7271929383277893,
|
|
"step": 3310
|
|
},
|
|
{
|
|
"epoch": 2.6153238546603474,
|
|
"grad_norm": 9.841740710130463,
|
|
"learning_rate": 4.945861067400459e-07,
|
|
"loss": 0.33051010966300964,
|
|
"step": 3311
|
|
},
|
|
{
|
|
"epoch": 2.6161137440758293,
|
|
"grad_norm": 8.575258139149547,
|
|
"learning_rate": 4.925951157157282e-07,
|
|
"loss": 0.40669572353363037,
|
|
"step": 3312
|
|
},
|
|
{
|
|
"epoch": 2.616903633491311,
|
|
"grad_norm": 14.064126439379152,
|
|
"learning_rate": 4.906079325618995e-07,
|
|
"loss": 0.4142283797264099,
|
|
"step": 3313
|
|
},
|
|
{
|
|
"epoch": 2.617693522906793,
|
|
"grad_norm": 10.926203888261908,
|
|
"learning_rate": 4.886245589573379e-07,
|
|
"loss": 0.3864701986312866,
|
|
"step": 3314
|
|
},
|
|
{
|
|
"epoch": 2.618483412322275,
|
|
"grad_norm": 8.095460272657196,
|
|
"learning_rate": 4.86644996577606e-07,
|
|
"loss": 0.6239743828773499,
|
|
"step": 3315
|
|
},
|
|
{
|
|
"epoch": 2.6192733017377567,
|
|
"grad_norm": 9.5988025752161,
|
|
"learning_rate": 4.846692470950442e-07,
|
|
"loss": 0.44168534874916077,
|
|
"step": 3316
|
|
},
|
|
{
|
|
"epoch": 2.6200631911532386,
|
|
"grad_norm": 12.25520966680489,
|
|
"learning_rate": 4.826973121787704e-07,
|
|
"loss": 0.7248414754867554,
|
|
"step": 3317
|
|
},
|
|
{
|
|
"epoch": 2.6208530805687205,
|
|
"grad_norm": 11.504118164211533,
|
|
"learning_rate": 4.807291934946828e-07,
|
|
"loss": 0.388072669506073,
|
|
"step": 3318
|
|
},
|
|
{
|
|
"epoch": 2.6216429699842023,
|
|
"grad_norm": 11.714875357297148,
|
|
"learning_rate": 4.787648927054534e-07,
|
|
"loss": 0.31926900148391724,
|
|
"step": 3319
|
|
},
|
|
{
|
|
"epoch": 2.622432859399684,
|
|
"grad_norm": 9.583077187413663,
|
|
"learning_rate": 4.7680441147053225e-07,
|
|
"loss": 0.25406989455223083,
|
|
"step": 3320
|
|
},
|
|
{
|
|
"epoch": 2.623222748815166,
|
|
"grad_norm": 12.602149801934326,
|
|
"learning_rate": 4.748477514461386e-07,
|
|
"loss": 1.1567286252975464,
|
|
"step": 3321
|
|
},
|
|
{
|
|
"epoch": 2.6240126382306475,
|
|
"grad_norm": 12.380509221098798,
|
|
"learning_rate": 4.728949142852668e-07,
|
|
"loss": 0.2009027898311615,
|
|
"step": 3322
|
|
},
|
|
{
|
|
"epoch": 2.6248025276461293,
|
|
"grad_norm": 37.105558180160095,
|
|
"learning_rate": 4.709459016376777e-07,
|
|
"loss": 0.7523494362831116,
|
|
"step": 3323
|
|
},
|
|
{
|
|
"epoch": 2.625592417061611,
|
|
"grad_norm": 8.134017867193721,
|
|
"learning_rate": 4.6900071514990543e-07,
|
|
"loss": 0.2318010777235031,
|
|
"step": 3324
|
|
},
|
|
{
|
|
"epoch": 2.626382306477093,
|
|
"grad_norm": 8.83131524505039,
|
|
"learning_rate": 4.670593564652498e-07,
|
|
"loss": 0.43371960520744324,
|
|
"step": 3325
|
|
},
|
|
{
|
|
"epoch": 2.627172195892575,
|
|
"grad_norm": 10.963397183621947,
|
|
"learning_rate": 4.6512182722377677e-07,
|
|
"loss": 0.36909347772598267,
|
|
"step": 3326
|
|
},
|
|
{
|
|
"epoch": 2.6279620853080567,
|
|
"grad_norm": 12.084093646962547,
|
|
"learning_rate": 4.63188129062318e-07,
|
|
"loss": 0.388027161359787,
|
|
"step": 3327
|
|
},
|
|
{
|
|
"epoch": 2.6287519747235386,
|
|
"grad_norm": 9.478263087592277,
|
|
"learning_rate": 4.6125826361446633e-07,
|
|
"loss": 0.3623710870742798,
|
|
"step": 3328
|
|
},
|
|
{
|
|
"epoch": 2.6295418641390205,
|
|
"grad_norm": 18.041943384926824,
|
|
"learning_rate": 4.593322325105798e-07,
|
|
"loss": 2.725191831588745,
|
|
"step": 3329
|
|
},
|
|
{
|
|
"epoch": 2.6303317535545023,
|
|
"grad_norm": 10.053745549005518,
|
|
"learning_rate": 4.574100373777762e-07,
|
|
"loss": 0.9186097383499146,
|
|
"step": 3330
|
|
},
|
|
{
|
|
"epoch": 2.631121642969984,
|
|
"grad_norm": 16.56032999257091,
|
|
"learning_rate": 4.554916798399311e-07,
|
|
"loss": 0.7610374689102173,
|
|
"step": 3331
|
|
},
|
|
{
|
|
"epoch": 2.631911532385466,
|
|
"grad_norm": 9.799384700311915,
|
|
"learning_rate": 4.5357716151768037e-07,
|
|
"loss": 0.22280161082744598,
|
|
"step": 3332
|
|
},
|
|
{
|
|
"epoch": 2.632701421800948,
|
|
"grad_norm": 8.292734812546973,
|
|
"learning_rate": 4.5166648402841464e-07,
|
|
"loss": 0.3859997093677521,
|
|
"step": 3333
|
|
},
|
|
{
|
|
"epoch": 2.6334913112164298,
|
|
"grad_norm": 8.969496231310965,
|
|
"learning_rate": 4.49759648986281e-07,
|
|
"loss": 1.129380226135254,
|
|
"step": 3334
|
|
},
|
|
{
|
|
"epoch": 2.6342812006319116,
|
|
"grad_norm": 14.025626351449633,
|
|
"learning_rate": 4.4785665800217925e-07,
|
|
"loss": 0.7709635496139526,
|
|
"step": 3335
|
|
},
|
|
{
|
|
"epoch": 2.6350710900473935,
|
|
"grad_norm": 12.132670588201732,
|
|
"learning_rate": 4.459575126837634e-07,
|
|
"loss": 0.31990846991539,
|
|
"step": 3336
|
|
},
|
|
{
|
|
"epoch": 2.6358609794628753,
|
|
"grad_norm": 12.500867440865886,
|
|
"learning_rate": 4.440622146354373e-07,
|
|
"loss": 0.7797756195068359,
|
|
"step": 3337
|
|
},
|
|
{
|
|
"epoch": 2.636650868878357,
|
|
"grad_norm": 13.48812236217527,
|
|
"learning_rate": 4.421707654583546e-07,
|
|
"loss": 0.7479414939880371,
|
|
"step": 3338
|
|
},
|
|
{
|
|
"epoch": 2.637440758293839,
|
|
"grad_norm": 19.89516186489078,
|
|
"learning_rate": 4.402831667504187e-07,
|
|
"loss": 0.7601022720336914,
|
|
"step": 3339
|
|
},
|
|
{
|
|
"epoch": 2.638230647709321,
|
|
"grad_norm": 11.98723585736858,
|
|
"learning_rate": 4.38399420106278e-07,
|
|
"loss": 0.5701296329498291,
|
|
"step": 3340
|
|
},
|
|
{
|
|
"epoch": 2.639020537124803,
|
|
"grad_norm": 5.891135933647762,
|
|
"learning_rate": 4.365195271173289e-07,
|
|
"loss": 0.18816259503364563,
|
|
"step": 3341
|
|
},
|
|
{
|
|
"epoch": 2.639810426540284,
|
|
"grad_norm": 14.919973147368829,
|
|
"learning_rate": 4.3464348937170996e-07,
|
|
"loss": 0.6145678162574768,
|
|
"step": 3342
|
|
},
|
|
{
|
|
"epoch": 2.640600315955766,
|
|
"grad_norm": 13.275675106320598,
|
|
"learning_rate": 4.327713084543056e-07,
|
|
"loss": 0.7048325538635254,
|
|
"step": 3343
|
|
},
|
|
{
|
|
"epoch": 2.641390205371248,
|
|
"grad_norm": 12.503928032714008,
|
|
"learning_rate": 4.3090298594674006e-07,
|
|
"loss": 0.3908374607563019,
|
|
"step": 3344
|
|
},
|
|
{
|
|
"epoch": 2.6421800947867298,
|
|
"grad_norm": 11.72770662305032,
|
|
"learning_rate": 4.290385234273775e-07,
|
|
"loss": 0.505962610244751,
|
|
"step": 3345
|
|
},
|
|
{
|
|
"epoch": 2.6429699842022116,
|
|
"grad_norm": 12.690173043224384,
|
|
"learning_rate": 4.2717792247132293e-07,
|
|
"loss": 0.4017457962036133,
|
|
"step": 3346
|
|
},
|
|
{
|
|
"epoch": 2.6437598736176935,
|
|
"grad_norm": 8.36874609608357,
|
|
"learning_rate": 4.253211846504163e-07,
|
|
"loss": 0.22178924083709717,
|
|
"step": 3347
|
|
},
|
|
{
|
|
"epoch": 2.6445497630331753,
|
|
"grad_norm": 10.103545343043429,
|
|
"learning_rate": 4.234683115332383e-07,
|
|
"loss": 0.2969557046890259,
|
|
"step": 3348
|
|
},
|
|
{
|
|
"epoch": 2.645339652448657,
|
|
"grad_norm": 13.225582438481474,
|
|
"learning_rate": 4.216193046851019e-07,
|
|
"loss": 0.37480732798576355,
|
|
"step": 3349
|
|
},
|
|
{
|
|
"epoch": 2.646129541864139,
|
|
"grad_norm": 10.84422500520785,
|
|
"learning_rate": 4.1977416566805264e-07,
|
|
"loss": 0.9092705845832825,
|
|
"step": 3350
|
|
},
|
|
{
|
|
"epoch": 2.646919431279621,
|
|
"grad_norm": 9.789088606323828,
|
|
"learning_rate": 4.179328960408696e-07,
|
|
"loss": 0.31875336170196533,
|
|
"step": 3351
|
|
},
|
|
{
|
|
"epoch": 2.647709320695103,
|
|
"grad_norm": 10.486891620887812,
|
|
"learning_rate": 4.160954973590664e-07,
|
|
"loss": 0.3058662414550781,
|
|
"step": 3352
|
|
},
|
|
{
|
|
"epoch": 2.6484992101105846,
|
|
"grad_norm": 13.733159335396302,
|
|
"learning_rate": 4.1426197117488134e-07,
|
|
"loss": 2.2363317012786865,
|
|
"step": 3353
|
|
},
|
|
{
|
|
"epoch": 2.6492890995260665,
|
|
"grad_norm": 6.946032041163559,
|
|
"learning_rate": 4.1243231903728363e-07,
|
|
"loss": 0.2704191505908966,
|
|
"step": 3354
|
|
},
|
|
{
|
|
"epoch": 2.650078988941548,
|
|
"grad_norm": 11.504621566189082,
|
|
"learning_rate": 4.106065424919703e-07,
|
|
"loss": 0.3017812967300415,
|
|
"step": 3355
|
|
},
|
|
{
|
|
"epoch": 2.65086887835703,
|
|
"grad_norm": 10.321032506154271,
|
|
"learning_rate": 4.087846430813613e-07,
|
|
"loss": 1.0040827989578247,
|
|
"step": 3356
|
|
},
|
|
{
|
|
"epoch": 2.6516587677725116,
|
|
"grad_norm": 11.294039143603817,
|
|
"learning_rate": 4.069666223446056e-07,
|
|
"loss": 0.4513833522796631,
|
|
"step": 3357
|
|
},
|
|
{
|
|
"epoch": 2.6524486571879935,
|
|
"grad_norm": 14.456974047266456,
|
|
"learning_rate": 4.051524818175723e-07,
|
|
"loss": 0.23808935284614563,
|
|
"step": 3358
|
|
},
|
|
{
|
|
"epoch": 2.6532385466034754,
|
|
"grad_norm": 7.5143794681952745,
|
|
"learning_rate": 4.033422230328526e-07,
|
|
"loss": 0.2904347777366638,
|
|
"step": 3359
|
|
},
|
|
{
|
|
"epoch": 2.654028436018957,
|
|
"grad_norm": 7.991560366796248,
|
|
"learning_rate": 4.0153584751976007e-07,
|
|
"loss": 0.4038187563419342,
|
|
"step": 3360
|
|
},
|
|
{
|
|
"epoch": 2.654818325434439,
|
|
"grad_norm": 9.497887673841467,
|
|
"learning_rate": 3.99733356804326e-07,
|
|
"loss": 0.31666648387908936,
|
|
"step": 3361
|
|
},
|
|
{
|
|
"epoch": 2.655608214849921,
|
|
"grad_norm": 10.401304732363979,
|
|
"learning_rate": 3.9793475240930077e-07,
|
|
"loss": 0.4911503493785858,
|
|
"step": 3362
|
|
},
|
|
{
|
|
"epoch": 2.656398104265403,
|
|
"grad_norm": 12.221183465688934,
|
|
"learning_rate": 3.9614003585415117e-07,
|
|
"loss": 0.8834859728813171,
|
|
"step": 3363
|
|
},
|
|
{
|
|
"epoch": 2.6571879936808847,
|
|
"grad_norm": 11.390696610946822,
|
|
"learning_rate": 3.943492086550599e-07,
|
|
"loss": 0.6366713047027588,
|
|
"step": 3364
|
|
},
|
|
{
|
|
"epoch": 2.6579778830963665,
|
|
"grad_norm": 15.422428455701008,
|
|
"learning_rate": 3.9256227232492337e-07,
|
|
"loss": 0.3374771773815155,
|
|
"step": 3365
|
|
},
|
|
{
|
|
"epoch": 2.6587677725118484,
|
|
"grad_norm": 9.583693820140757,
|
|
"learning_rate": 3.907792283733514e-07,
|
|
"loss": 0.7819290161132812,
|
|
"step": 3366
|
|
},
|
|
{
|
|
"epoch": 2.6595576619273302,
|
|
"grad_norm": 12.981199540184159,
|
|
"learning_rate": 3.8900007830666555e-07,
|
|
"loss": 0.5065968036651611,
|
|
"step": 3367
|
|
},
|
|
{
|
|
"epoch": 2.660347551342812,
|
|
"grad_norm": 7.236244572582725,
|
|
"learning_rate": 3.872248236278975e-07,
|
|
"loss": 0.6708056926727295,
|
|
"step": 3368
|
|
},
|
|
{
|
|
"epoch": 2.661137440758294,
|
|
"grad_norm": 8.694974340614756,
|
|
"learning_rate": 3.854534658367881e-07,
|
|
"loss": 0.26976072788238525,
|
|
"step": 3369
|
|
},
|
|
{
|
|
"epoch": 2.661927330173776,
|
|
"grad_norm": 11.235824129714906,
|
|
"learning_rate": 3.836860064297854e-07,
|
|
"loss": 0.3245980441570282,
|
|
"step": 3370
|
|
},
|
|
{
|
|
"epoch": 2.6627172195892577,
|
|
"grad_norm": 16.011419931261926,
|
|
"learning_rate": 3.819224469000482e-07,
|
|
"loss": 0.7259745597839355,
|
|
"step": 3371
|
|
},
|
|
{
|
|
"epoch": 2.6635071090047395,
|
|
"grad_norm": 19.22540678332816,
|
|
"learning_rate": 3.8016278873743375e-07,
|
|
"loss": 0.45876407623291016,
|
|
"step": 3372
|
|
},
|
|
{
|
|
"epoch": 2.6642969984202214,
|
|
"grad_norm": 8.780171480326908,
|
|
"learning_rate": 3.7840703342850893e-07,
|
|
"loss": 0.278285413980484,
|
|
"step": 3373
|
|
},
|
|
{
|
|
"epoch": 2.6650868878357032,
|
|
"grad_norm": 17.47655948792051,
|
|
"learning_rate": 3.766551824565406e-07,
|
|
"loss": 0.5746378898620605,
|
|
"step": 3374
|
|
},
|
|
{
|
|
"epoch": 2.665876777251185,
|
|
"grad_norm": 23.25805650807008,
|
|
"learning_rate": 3.7490723730149836e-07,
|
|
"loss": 0.507803201675415,
|
|
"step": 3375
|
|
},
|
|
{
|
|
"epoch": 2.6666666666666665,
|
|
"grad_norm": 9.306113512629594,
|
|
"learning_rate": 3.731631994400536e-07,
|
|
"loss": 0.18884757161140442,
|
|
"step": 3376
|
|
},
|
|
{
|
|
"epoch": 2.6674565560821484,
|
|
"grad_norm": 8.348124341525704,
|
|
"learning_rate": 3.7142307034557345e-07,
|
|
"loss": 0.29510441422462463,
|
|
"step": 3377
|
|
},
|
|
{
|
|
"epoch": 2.6682464454976302,
|
|
"grad_norm": 10.578489755562448,
|
|
"learning_rate": 3.696868514881258e-07,
|
|
"loss": 0.33782392740249634,
|
|
"step": 3378
|
|
},
|
|
{
|
|
"epoch": 2.669036334913112,
|
|
"grad_norm": 14.26143918553158,
|
|
"learning_rate": 3.679545443344723e-07,
|
|
"loss": 0.9008026719093323,
|
|
"step": 3379
|
|
},
|
|
{
|
|
"epoch": 2.669826224328594,
|
|
"grad_norm": 11.857039310446325,
|
|
"learning_rate": 3.662261503480741e-07,
|
|
"loss": 0.5593395233154297,
|
|
"step": 3380
|
|
},
|
|
{
|
|
"epoch": 2.670616113744076,
|
|
"grad_norm": 10.652642297722368,
|
|
"learning_rate": 3.6450167098908253e-07,
|
|
"loss": 0.31769973039627075,
|
|
"step": 3381
|
|
},
|
|
{
|
|
"epoch": 2.6714060031595577,
|
|
"grad_norm": 9.458143348474115,
|
|
"learning_rate": 3.6278110771434504e-07,
|
|
"loss": 0.2788546681404114,
|
|
"step": 3382
|
|
},
|
|
{
|
|
"epoch": 2.6721958925750395,
|
|
"grad_norm": 13.225514562775611,
|
|
"learning_rate": 3.610644619773973e-07,
|
|
"loss": 0.6898187398910522,
|
|
"step": 3383
|
|
},
|
|
{
|
|
"epoch": 2.6729857819905214,
|
|
"grad_norm": 10.118377023292018,
|
|
"learning_rate": 3.59351735228467e-07,
|
|
"loss": 0.2825648784637451,
|
|
"step": 3384
|
|
},
|
|
{
|
|
"epoch": 2.6737756714060033,
|
|
"grad_norm": 5.075483514495616,
|
|
"learning_rate": 3.576429289144734e-07,
|
|
"loss": 0.23227611184120178,
|
|
"step": 3385
|
|
},
|
|
{
|
|
"epoch": 2.674565560821485,
|
|
"grad_norm": 11.943311728775157,
|
|
"learning_rate": 3.559380444790206e-07,
|
|
"loss": 0.8256229758262634,
|
|
"step": 3386
|
|
},
|
|
{
|
|
"epoch": 2.675355450236967,
|
|
"grad_norm": 8.112570938757484,
|
|
"learning_rate": 3.54237083362402e-07,
|
|
"loss": 0.5945410132408142,
|
|
"step": 3387
|
|
},
|
|
{
|
|
"epoch": 2.6761453396524484,
|
|
"grad_norm": 15.634285640069708,
|
|
"learning_rate": 3.525400470015916e-07,
|
|
"loss": 0.6739033460617065,
|
|
"step": 3388
|
|
},
|
|
{
|
|
"epoch": 2.6769352290679302,
|
|
"grad_norm": 20.357153137616915,
|
|
"learning_rate": 3.508469368302542e-07,
|
|
"loss": 0.45265576243400574,
|
|
"step": 3389
|
|
},
|
|
{
|
|
"epoch": 2.677725118483412,
|
|
"grad_norm": 9.899660066154995,
|
|
"learning_rate": 3.4915775427873445e-07,
|
|
"loss": 0.4432603120803833,
|
|
"step": 3390
|
|
},
|
|
{
|
|
"epoch": 2.678515007898894,
|
|
"grad_norm": 8.138889689930242,
|
|
"learning_rate": 3.4747250077405925e-07,
|
|
"loss": 0.19528892636299133,
|
|
"step": 3391
|
|
},
|
|
{
|
|
"epoch": 2.679304897314376,
|
|
"grad_norm": 18.23444496802974,
|
|
"learning_rate": 3.4579117773993586e-07,
|
|
"loss": 0.5927311778068542,
|
|
"step": 3392
|
|
},
|
|
{
|
|
"epoch": 2.6800947867298577,
|
|
"grad_norm": 10.188403198684156,
|
|
"learning_rate": 3.4411378659675197e-07,
|
|
"loss": 0.2367173135280609,
|
|
"step": 3393
|
|
},
|
|
{
|
|
"epoch": 2.6808846761453395,
|
|
"grad_norm": 11.657906732572709,
|
|
"learning_rate": 3.424403287615724e-07,
|
|
"loss": 0.23523610830307007,
|
|
"step": 3394
|
|
},
|
|
{
|
|
"epoch": 2.6816745655608214,
|
|
"grad_norm": 15.05635601715627,
|
|
"learning_rate": 3.4077080564814126e-07,
|
|
"loss": 0.5322354435920715,
|
|
"step": 3395
|
|
},
|
|
{
|
|
"epoch": 2.6824644549763033,
|
|
"grad_norm": 12.057487107339211,
|
|
"learning_rate": 3.391052186668753e-07,
|
|
"loss": 0.7865498661994934,
|
|
"step": 3396
|
|
},
|
|
{
|
|
"epoch": 2.683254344391785,
|
|
"grad_norm": 8.968136456469345,
|
|
"learning_rate": 3.374435692248695e-07,
|
|
"loss": 0.2981482148170471,
|
|
"step": 3397
|
|
},
|
|
{
|
|
"epoch": 2.684044233807267,
|
|
"grad_norm": 7.657461394448841,
|
|
"learning_rate": 3.3578585872589e-07,
|
|
"loss": 0.39448219537734985,
|
|
"step": 3398
|
|
},
|
|
{
|
|
"epoch": 2.684834123222749,
|
|
"grad_norm": 13.924417235813834,
|
|
"learning_rate": 3.3413208857037636e-07,
|
|
"loss": 0.33557915687561035,
|
|
"step": 3399
|
|
},
|
|
{
|
|
"epoch": 2.6856240126382307,
|
|
"grad_norm": 8.80765991011228,
|
|
"learning_rate": 3.324822601554389e-07,
|
|
"loss": 0.4689452350139618,
|
|
"step": 3400
|
|
},
|
|
{
|
|
"epoch": 2.6864139020537126,
|
|
"grad_norm": 10.376386874091963,
|
|
"learning_rate": 3.308363748748583e-07,
|
|
"loss": 0.5030757188796997,
|
|
"step": 3401
|
|
},
|
|
{
|
|
"epoch": 2.6872037914691944,
|
|
"grad_norm": 7.506351165902702,
|
|
"learning_rate": 3.2919443411908335e-07,
|
|
"loss": 0.4964003562927246,
|
|
"step": 3402
|
|
},
|
|
{
|
|
"epoch": 2.6879936808846763,
|
|
"grad_norm": 8.750015580357847,
|
|
"learning_rate": 3.275564392752306e-07,
|
|
"loss": 0.6735177040100098,
|
|
"step": 3403
|
|
},
|
|
{
|
|
"epoch": 2.688783570300158,
|
|
"grad_norm": 8.400292003551172,
|
|
"learning_rate": 3.25922391727086e-07,
|
|
"loss": 0.5502939224243164,
|
|
"step": 3404
|
|
},
|
|
{
|
|
"epoch": 2.68957345971564,
|
|
"grad_norm": 17.77933866658459,
|
|
"learning_rate": 3.2429229285509565e-07,
|
|
"loss": 0.561028003692627,
|
|
"step": 3405
|
|
},
|
|
{
|
|
"epoch": 2.690363349131122,
|
|
"grad_norm": 8.989376021851474,
|
|
"learning_rate": 3.226661440363732e-07,
|
|
"loss": 0.4054949879646301,
|
|
"step": 3406
|
|
},
|
|
{
|
|
"epoch": 2.6911532385466037,
|
|
"grad_norm": 8.662923922835425,
|
|
"learning_rate": 3.210439466446941e-07,
|
|
"loss": 0.18396064639091492,
|
|
"step": 3407
|
|
},
|
|
{
|
|
"epoch": 2.6919431279620856,
|
|
"grad_norm": 13.879543447285586,
|
|
"learning_rate": 3.194257020504976e-07,
|
|
"loss": 0.49747684597969055,
|
|
"step": 3408
|
|
},
|
|
{
|
|
"epoch": 2.692733017377567,
|
|
"grad_norm": 10.218308650070451,
|
|
"learning_rate": 3.178114116208819e-07,
|
|
"loss": 0.2940269708633423,
|
|
"step": 3409
|
|
},
|
|
{
|
|
"epoch": 2.693522906793049,
|
|
"grad_norm": 11.95090160706378,
|
|
"learning_rate": 3.1620107671960274e-07,
|
|
"loss": 0.5844002366065979,
|
|
"step": 3410
|
|
},
|
|
{
|
|
"epoch": 2.6943127962085307,
|
|
"grad_norm": 12.15425735480325,
|
|
"learning_rate": 3.14594698707078e-07,
|
|
"loss": 0.3902283012866974,
|
|
"step": 3411
|
|
},
|
|
{
|
|
"epoch": 2.6951026856240126,
|
|
"grad_norm": 7.527465679424551,
|
|
"learning_rate": 3.1299227894038e-07,
|
|
"loss": 0.25770941376686096,
|
|
"step": 3412
|
|
},
|
|
{
|
|
"epoch": 2.6958925750394944,
|
|
"grad_norm": 19.33893813349674,
|
|
"learning_rate": 3.113938187732396e-07,
|
|
"loss": 0.4086335003376007,
|
|
"step": 3413
|
|
},
|
|
{
|
|
"epoch": 2.6966824644549763,
|
|
"grad_norm": 11.241807816989384,
|
|
"learning_rate": 3.097993195560406e-07,
|
|
"loss": 0.28467923402786255,
|
|
"step": 3414
|
|
},
|
|
{
|
|
"epoch": 2.697472353870458,
|
|
"grad_norm": 6.848242031731855,
|
|
"learning_rate": 3.082087826358221e-07,
|
|
"loss": 0.460833340883255,
|
|
"step": 3415
|
|
},
|
|
{
|
|
"epoch": 2.69826224328594,
|
|
"grad_norm": 13.881441071873525,
|
|
"learning_rate": 3.0662220935627264e-07,
|
|
"loss": 0.5943915843963623,
|
|
"step": 3416
|
|
},
|
|
{
|
|
"epoch": 2.699052132701422,
|
|
"grad_norm": 13.318701652240515,
|
|
"learning_rate": 3.0503960105773664e-07,
|
|
"loss": 0.3852251470088959,
|
|
"step": 3417
|
|
},
|
|
{
|
|
"epoch": 2.6998420221169037,
|
|
"grad_norm": 11.40666777900844,
|
|
"learning_rate": 3.034609590772064e-07,
|
|
"loss": 0.43630069494247437,
|
|
"step": 3418
|
|
},
|
|
{
|
|
"epoch": 2.7006319115323856,
|
|
"grad_norm": 17.06726177611801,
|
|
"learning_rate": 3.0188628474832283e-07,
|
|
"loss": 0.3444702625274658,
|
|
"step": 3419
|
|
},
|
|
{
|
|
"epoch": 2.7014218009478674,
|
|
"grad_norm": 11.317050973138297,
|
|
"learning_rate": 3.0031557940137846e-07,
|
|
"loss": 0.5595932006835938,
|
|
"step": 3420
|
|
},
|
|
{
|
|
"epoch": 2.7022116903633493,
|
|
"grad_norm": 6.595748201513425,
|
|
"learning_rate": 2.987488443633063e-07,
|
|
"loss": 0.3122694492340088,
|
|
"step": 3421
|
|
},
|
|
{
|
|
"epoch": 2.7030015797788307,
|
|
"grad_norm": 9.417440233234284,
|
|
"learning_rate": 2.971860809576926e-07,
|
|
"loss": 0.602641761302948,
|
|
"step": 3422
|
|
},
|
|
{
|
|
"epoch": 2.7037914691943126,
|
|
"grad_norm": 17.899529937935377,
|
|
"learning_rate": 2.956272905047641e-07,
|
|
"loss": 1.1811764240264893,
|
|
"step": 3423
|
|
},
|
|
{
|
|
"epoch": 2.7045813586097944,
|
|
"grad_norm": 21.27161915680074,
|
|
"learning_rate": 2.9407247432139184e-07,
|
|
"loss": 0.5429356694221497,
|
|
"step": 3424
|
|
},
|
|
{
|
|
"epoch": 2.7053712480252763,
|
|
"grad_norm": 14.45817636342026,
|
|
"learning_rate": 2.9252163372109013e-07,
|
|
"loss": 0.2626524567604065,
|
|
"step": 3425
|
|
},
|
|
{
|
|
"epoch": 2.706161137440758,
|
|
"grad_norm": 11.966013741915225,
|
|
"learning_rate": 2.9097477001401364e-07,
|
|
"loss": 0.3152087926864624,
|
|
"step": 3426
|
|
},
|
|
{
|
|
"epoch": 2.70695102685624,
|
|
"grad_norm": 9.01180406739463,
|
|
"learning_rate": 2.8943188450695824e-07,
|
|
"loss": 0.5591740608215332,
|
|
"step": 3427
|
|
},
|
|
{
|
|
"epoch": 2.707740916271722,
|
|
"grad_norm": 15.155219955926972,
|
|
"learning_rate": 2.878929785033585e-07,
|
|
"loss": 0.8030872941017151,
|
|
"step": 3428
|
|
},
|
|
{
|
|
"epoch": 2.7085308056872037,
|
|
"grad_norm": 9.0050236739937,
|
|
"learning_rate": 2.8635805330328783e-07,
|
|
"loss": 0.6855502128601074,
|
|
"step": 3429
|
|
},
|
|
{
|
|
"epoch": 2.7093206951026856,
|
|
"grad_norm": 9.140820391889852,
|
|
"learning_rate": 2.8482711020345556e-07,
|
|
"loss": 0.8755874037742615,
|
|
"step": 3430
|
|
},
|
|
{
|
|
"epoch": 2.7101105845181674,
|
|
"grad_norm": 15.402621823538889,
|
|
"learning_rate": 2.833001504972077e-07,
|
|
"loss": 0.429756760597229,
|
|
"step": 3431
|
|
},
|
|
{
|
|
"epoch": 2.7109004739336493,
|
|
"grad_norm": 9.610037912675201,
|
|
"learning_rate": 2.8177717547452463e-07,
|
|
"loss": 0.405164510011673,
|
|
"step": 3432
|
|
},
|
|
{
|
|
"epoch": 2.711690363349131,
|
|
"grad_norm": 13.695365189160531,
|
|
"learning_rate": 2.8025818642202054e-07,
|
|
"loss": 0.6108412742614746,
|
|
"step": 3433
|
|
},
|
|
{
|
|
"epoch": 2.712480252764613,
|
|
"grad_norm": 13.619752565690785,
|
|
"learning_rate": 2.787431846229427e-07,
|
|
"loss": 0.454412043094635,
|
|
"step": 3434
|
|
},
|
|
{
|
|
"epoch": 2.713270142180095,
|
|
"grad_norm": 11.53614981565061,
|
|
"learning_rate": 2.7723217135716906e-07,
|
|
"loss": 0.4228717088699341,
|
|
"step": 3435
|
|
},
|
|
{
|
|
"epoch": 2.7140600315955767,
|
|
"grad_norm": 6.706916975241827,
|
|
"learning_rate": 2.757251479012102e-07,
|
|
"loss": 0.2297818660736084,
|
|
"step": 3436
|
|
},
|
|
{
|
|
"epoch": 2.7148499210110586,
|
|
"grad_norm": 15.153915751186066,
|
|
"learning_rate": 2.742221155282027e-07,
|
|
"loss": 0.441479355096817,
|
|
"step": 3437
|
|
},
|
|
{
|
|
"epoch": 2.7156398104265405,
|
|
"grad_norm": 12.736343688491294,
|
|
"learning_rate": 2.727230755079141e-07,
|
|
"loss": 0.3135189712047577,
|
|
"step": 3438
|
|
},
|
|
{
|
|
"epoch": 2.7164296998420223,
|
|
"grad_norm": 10.469524983647013,
|
|
"learning_rate": 2.712280291067382e-07,
|
|
"loss": 0.2524583637714386,
|
|
"step": 3439
|
|
},
|
|
{
|
|
"epoch": 2.717219589257504,
|
|
"grad_norm": 10.21269953062072,
|
|
"learning_rate": 2.6973697758769404e-07,
|
|
"loss": 0.6804049015045166,
|
|
"step": 3440
|
|
},
|
|
{
|
|
"epoch": 2.718009478672986,
|
|
"grad_norm": 13.439002602740715,
|
|
"learning_rate": 2.68249922210429e-07,
|
|
"loss": 1.7227437496185303,
|
|
"step": 3441
|
|
},
|
|
{
|
|
"epoch": 2.7187993680884674,
|
|
"grad_norm": 9.105196897219319,
|
|
"learning_rate": 2.667668642312121e-07,
|
|
"loss": 0.24244064092636108,
|
|
"step": 3442
|
|
},
|
|
{
|
|
"epoch": 2.7195892575039493,
|
|
"grad_norm": 17.032916970105017,
|
|
"learning_rate": 2.6528780490293394e-07,
|
|
"loss": 0.42305219173431396,
|
|
"step": 3443
|
|
},
|
|
{
|
|
"epoch": 2.720379146919431,
|
|
"grad_norm": 8.341873292787225,
|
|
"learning_rate": 2.638127454751083e-07,
|
|
"loss": 0.3290414810180664,
|
|
"step": 3444
|
|
},
|
|
{
|
|
"epoch": 2.721169036334913,
|
|
"grad_norm": 14.438304518877404,
|
|
"learning_rate": 2.6234168719387275e-07,
|
|
"loss": 0.7699002027511597,
|
|
"step": 3445
|
|
},
|
|
{
|
|
"epoch": 2.721958925750395,
|
|
"grad_norm": 11.533451670720245,
|
|
"learning_rate": 2.6087463130198053e-07,
|
|
"loss": 0.39063939452171326,
|
|
"step": 3446
|
|
},
|
|
{
|
|
"epoch": 2.7227488151658767,
|
|
"grad_norm": 7.533668401408119,
|
|
"learning_rate": 2.594115790388069e-07,
|
|
"loss": 0.30158624053001404,
|
|
"step": 3447
|
|
},
|
|
{
|
|
"epoch": 2.7235387045813586,
|
|
"grad_norm": 9.966662834105735,
|
|
"learning_rate": 2.5795253164034084e-07,
|
|
"loss": 0.7435629367828369,
|
|
"step": 3448
|
|
},
|
|
{
|
|
"epoch": 2.7243285939968405,
|
|
"grad_norm": 14.725256543363951,
|
|
"learning_rate": 2.564974903391915e-07,
|
|
"loss": 0.22876577079296112,
|
|
"step": 3449
|
|
},
|
|
{
|
|
"epoch": 2.7251184834123223,
|
|
"grad_norm": 11.42689474920191,
|
|
"learning_rate": 2.550464563645827e-07,
|
|
"loss": 0.48019081354141235,
|
|
"step": 3450
|
|
},
|
|
{
|
|
"epoch": 2.725908372827804,
|
|
"grad_norm": 6.525526730403295,
|
|
"learning_rate": 2.5359943094235284e-07,
|
|
"loss": 0.3264992833137512,
|
|
"step": 3451
|
|
},
|
|
{
|
|
"epoch": 2.726698262243286,
|
|
"grad_norm": 12.867989920941069,
|
|
"learning_rate": 2.521564152949535e-07,
|
|
"loss": 0.6327470541000366,
|
|
"step": 3452
|
|
},
|
|
{
|
|
"epoch": 2.727488151658768,
|
|
"grad_norm": 33.14582147619613,
|
|
"learning_rate": 2.5071741064144893e-07,
|
|
"loss": 0.7746727466583252,
|
|
"step": 3453
|
|
},
|
|
{
|
|
"epoch": 2.7282780410742498,
|
|
"grad_norm": 11.842666297046986,
|
|
"learning_rate": 2.4928241819751506e-07,
|
|
"loss": 0.41545653343200684,
|
|
"step": 3454
|
|
},
|
|
{
|
|
"epoch": 2.729067930489731,
|
|
"grad_norm": 10.904910444914098,
|
|
"learning_rate": 2.4785143917543886e-07,
|
|
"loss": 0.30271655321121216,
|
|
"step": 3455
|
|
},
|
|
{
|
|
"epoch": 2.729857819905213,
|
|
"grad_norm": 7.237418545407075,
|
|
"learning_rate": 2.464244747841155e-07,
|
|
"loss": 0.2641463577747345,
|
|
"step": 3456
|
|
},
|
|
{
|
|
"epoch": 2.730647709320695,
|
|
"grad_norm": 12.797520847903757,
|
|
"learning_rate": 2.4500152622904895e-07,
|
|
"loss": 0.18360668420791626,
|
|
"step": 3457
|
|
},
|
|
{
|
|
"epoch": 2.7314375987361768,
|
|
"grad_norm": 6.889781589750479,
|
|
"learning_rate": 2.435825947123516e-07,
|
|
"loss": 0.2461070865392685,
|
|
"step": 3458
|
|
},
|
|
{
|
|
"epoch": 2.7322274881516586,
|
|
"grad_norm": 9.695805170509885,
|
|
"learning_rate": 2.4216768143274115e-07,
|
|
"loss": 0.5353419780731201,
|
|
"step": 3459
|
|
},
|
|
{
|
|
"epoch": 2.7330173775671405,
|
|
"grad_norm": 10.343918221842735,
|
|
"learning_rate": 2.4075678758554047e-07,
|
|
"loss": 0.6046707630157471,
|
|
"step": 3460
|
|
},
|
|
{
|
|
"epoch": 2.7338072669826223,
|
|
"grad_norm": 9.007895090494918,
|
|
"learning_rate": 2.3934991436267816e-07,
|
|
"loss": 0.2909160852432251,
|
|
"step": 3461
|
|
},
|
|
{
|
|
"epoch": 2.734597156398104,
|
|
"grad_norm": 10.62496325664592,
|
|
"learning_rate": 2.3794706295268476e-07,
|
|
"loss": 0.30107566714286804,
|
|
"step": 3462
|
|
},
|
|
{
|
|
"epoch": 2.735387045813586,
|
|
"grad_norm": 13.67484568966304,
|
|
"learning_rate": 2.365482345406933e-07,
|
|
"loss": 0.3950386643409729,
|
|
"step": 3463
|
|
},
|
|
{
|
|
"epoch": 2.736176935229068,
|
|
"grad_norm": 21.77362986226219,
|
|
"learning_rate": 2.3515343030844073e-07,
|
|
"loss": 1.0076971054077148,
|
|
"step": 3464
|
|
},
|
|
{
|
|
"epoch": 2.7369668246445498,
|
|
"grad_norm": 7.730693964041083,
|
|
"learning_rate": 2.3376265143426003e-07,
|
|
"loss": 0.24211300909519196,
|
|
"step": 3465
|
|
},
|
|
{
|
|
"epoch": 2.7377567140600316,
|
|
"grad_norm": 9.296217895983373,
|
|
"learning_rate": 2.3237589909308632e-07,
|
|
"loss": 0.2887963056564331,
|
|
"step": 3466
|
|
},
|
|
{
|
|
"epoch": 2.7385466034755135,
|
|
"grad_norm": 10.840522993677942,
|
|
"learning_rate": 2.309931744564531e-07,
|
|
"loss": 0.41945865750312805,
|
|
"step": 3467
|
|
},
|
|
{
|
|
"epoch": 2.7393364928909953,
|
|
"grad_norm": 8.917010556292599,
|
|
"learning_rate": 2.2961447869248977e-07,
|
|
"loss": 0.20327429473400116,
|
|
"step": 3468
|
|
},
|
|
{
|
|
"epoch": 2.740126382306477,
|
|
"grad_norm": 16.489207308906405,
|
|
"learning_rate": 2.2823981296592468e-07,
|
|
"loss": 0.445822149515152,
|
|
"step": 3469
|
|
},
|
|
{
|
|
"epoch": 2.740916271721959,
|
|
"grad_norm": 13.177843752844728,
|
|
"learning_rate": 2.2686917843807832e-07,
|
|
"loss": 0.4169418513774872,
|
|
"step": 3470
|
|
},
|
|
{
|
|
"epoch": 2.741706161137441,
|
|
"grad_norm": 18.679760356479285,
|
|
"learning_rate": 2.2550257626686835e-07,
|
|
"loss": 0.29727548360824585,
|
|
"step": 3471
|
|
},
|
|
{
|
|
"epoch": 2.742496050552923,
|
|
"grad_norm": 17.416569754051274,
|
|
"learning_rate": 2.2414000760680344e-07,
|
|
"loss": 0.9264640808105469,
|
|
"step": 3472
|
|
},
|
|
{
|
|
"epoch": 2.7432859399684046,
|
|
"grad_norm": 11.51848657708186,
|
|
"learning_rate": 2.2278147360898726e-07,
|
|
"loss": 0.31270313262939453,
|
|
"step": 3473
|
|
},
|
|
{
|
|
"epoch": 2.7440758293838865,
|
|
"grad_norm": 9.677231027214653,
|
|
"learning_rate": 2.2142697542111403e-07,
|
|
"loss": 0.519598126411438,
|
|
"step": 3474
|
|
},
|
|
{
|
|
"epoch": 2.7448657187993684,
|
|
"grad_norm": 8.89666307610365,
|
|
"learning_rate": 2.2007651418746777e-07,
|
|
"loss": 0.26155680418014526,
|
|
"step": 3475
|
|
},
|
|
{
|
|
"epoch": 2.7456556082148498,
|
|
"grad_norm": 8.068195381347717,
|
|
"learning_rate": 2.1873009104892207e-07,
|
|
"loss": 0.26666006445884705,
|
|
"step": 3476
|
|
},
|
|
{
|
|
"epoch": 2.7464454976303316,
|
|
"grad_norm": 9.967802346446724,
|
|
"learning_rate": 2.1738770714293978e-07,
|
|
"loss": 0.4388732612133026,
|
|
"step": 3477
|
|
},
|
|
{
|
|
"epoch": 2.7472353870458135,
|
|
"grad_norm": 12.682142726744404,
|
|
"learning_rate": 2.160493636035721e-07,
|
|
"loss": 1.5601736307144165,
|
|
"step": 3478
|
|
},
|
|
{
|
|
"epoch": 2.7480252764612954,
|
|
"grad_norm": 8.216179392592059,
|
|
"learning_rate": 2.1471506156145572e-07,
|
|
"loss": 0.34426096081733704,
|
|
"step": 3479
|
|
},
|
|
{
|
|
"epoch": 2.748815165876777,
|
|
"grad_norm": 12.700475916611518,
|
|
"learning_rate": 2.133848021438134e-07,
|
|
"loss": 0.3579084873199463,
|
|
"step": 3480
|
|
},
|
|
{
|
|
"epoch": 2.749605055292259,
|
|
"grad_norm": 8.827090645854712,
|
|
"learning_rate": 2.1205858647445175e-07,
|
|
"loss": 0.3631330728530884,
|
|
"step": 3481
|
|
},
|
|
{
|
|
"epoch": 2.750394944707741,
|
|
"grad_norm": 9.001270743632745,
|
|
"learning_rate": 2.107364156737629e-07,
|
|
"loss": 0.30411210656166077,
|
|
"step": 3482
|
|
},
|
|
{
|
|
"epoch": 2.751184834123223,
|
|
"grad_norm": 9.409837583682123,
|
|
"learning_rate": 2.0941829085872168e-07,
|
|
"loss": 0.23499026894569397,
|
|
"step": 3483
|
|
},
|
|
{
|
|
"epoch": 2.7519747235387046,
|
|
"grad_norm": 12.705050308923921,
|
|
"learning_rate": 2.0810421314288342e-07,
|
|
"loss": 1.2970982789993286,
|
|
"step": 3484
|
|
},
|
|
{
|
|
"epoch": 2.7527646129541865,
|
|
"grad_norm": 8.03711225808964,
|
|
"learning_rate": 2.067941836363857e-07,
|
|
"loss": 0.5299091339111328,
|
|
"step": 3485
|
|
},
|
|
{
|
|
"epoch": 2.7535545023696684,
|
|
"grad_norm": 11.220356334155515,
|
|
"learning_rate": 2.0548820344594544e-07,
|
|
"loss": 0.4197993278503418,
|
|
"step": 3486
|
|
},
|
|
{
|
|
"epoch": 2.7543443917851502,
|
|
"grad_norm": 12.91965843833499,
|
|
"learning_rate": 2.04186273674859e-07,
|
|
"loss": 0.6132807731628418,
|
|
"step": 3487
|
|
},
|
|
{
|
|
"epoch": 2.7551342812006316,
|
|
"grad_norm": 18.240887411131222,
|
|
"learning_rate": 2.02888395423001e-07,
|
|
"loss": 1.271854281425476,
|
|
"step": 3488
|
|
},
|
|
{
|
|
"epoch": 2.7559241706161135,
|
|
"grad_norm": 10.732674912361663,
|
|
"learning_rate": 2.0159456978682378e-07,
|
|
"loss": 0.3073996901512146,
|
|
"step": 3489
|
|
},
|
|
{
|
|
"epoch": 2.7567140600315954,
|
|
"grad_norm": 9.252251075885143,
|
|
"learning_rate": 2.0030479785935532e-07,
|
|
"loss": 0.2578376531600952,
|
|
"step": 3490
|
|
},
|
|
{
|
|
"epoch": 2.757503949447077,
|
|
"grad_norm": 8.516995882822272,
|
|
"learning_rate": 1.9901908073019837e-07,
|
|
"loss": 0.519225001335144,
|
|
"step": 3491
|
|
},
|
|
{
|
|
"epoch": 2.758293838862559,
|
|
"grad_norm": 8.43300426338786,
|
|
"learning_rate": 1.9773741948553194e-07,
|
|
"loss": 0.199580118060112,
|
|
"step": 3492
|
|
},
|
|
{
|
|
"epoch": 2.759083728278041,
|
|
"grad_norm": 9.32908519660615,
|
|
"learning_rate": 1.964598152081071e-07,
|
|
"loss": 0.4582338333129883,
|
|
"step": 3493
|
|
},
|
|
{
|
|
"epoch": 2.759873617693523,
|
|
"grad_norm": 9.046945945787343,
|
|
"learning_rate": 1.9518626897724878e-07,
|
|
"loss": 0.25943028926849365,
|
|
"step": 3494
|
|
},
|
|
{
|
|
"epoch": 2.7606635071090047,
|
|
"grad_norm": 11.314219665735752,
|
|
"learning_rate": 1.93916781868852e-07,
|
|
"loss": 0.4258866608142853,
|
|
"step": 3495
|
|
},
|
|
{
|
|
"epoch": 2.7614533965244865,
|
|
"grad_norm": 10.20689716332431,
|
|
"learning_rate": 1.9265135495538488e-07,
|
|
"loss": 0.271173357963562,
|
|
"step": 3496
|
|
},
|
|
{
|
|
"epoch": 2.7622432859399684,
|
|
"grad_norm": 16.200438350903056,
|
|
"learning_rate": 1.9138998930588348e-07,
|
|
"loss": 0.4719555377960205,
|
|
"step": 3497
|
|
},
|
|
{
|
|
"epoch": 2.7630331753554502,
|
|
"grad_norm": 11.738594703612247,
|
|
"learning_rate": 1.901326859859537e-07,
|
|
"loss": 0.980524480342865,
|
|
"step": 3498
|
|
},
|
|
{
|
|
"epoch": 2.763823064770932,
|
|
"grad_norm": 17.919465561173997,
|
|
"learning_rate": 1.888794460577692e-07,
|
|
"loss": 0.9541090726852417,
|
|
"step": 3499
|
|
},
|
|
{
|
|
"epoch": 2.764612954186414,
|
|
"grad_norm": 13.890459920478602,
|
|
"learning_rate": 1.8763027058007145e-07,
|
|
"loss": 0.69322669506073,
|
|
"step": 3500
|
|
},
|
|
{
|
|
"epoch": 2.765402843601896,
|
|
"grad_norm": 16.321197407525634,
|
|
"learning_rate": 1.8638516060816903e-07,
|
|
"loss": 0.42115840315818787,
|
|
"step": 3501
|
|
},
|
|
{
|
|
"epoch": 2.7661927330173777,
|
|
"grad_norm": 10.96242137226945,
|
|
"learning_rate": 1.8514411719393445e-07,
|
|
"loss": 0.46676504611968994,
|
|
"step": 3502
|
|
},
|
|
{
|
|
"epoch": 2.7669826224328595,
|
|
"grad_norm": 8.812275087497374,
|
|
"learning_rate": 1.8390714138580457e-07,
|
|
"loss": 0.5044353604316711,
|
|
"step": 3503
|
|
},
|
|
{
|
|
"epoch": 2.7677725118483414,
|
|
"grad_norm": 17.943245247505377,
|
|
"learning_rate": 1.8267423422878073e-07,
|
|
"loss": 0.41426870226860046,
|
|
"step": 3504
|
|
},
|
|
{
|
|
"epoch": 2.7685624012638232,
|
|
"grad_norm": 10.71591258305487,
|
|
"learning_rate": 1.814453967644264e-07,
|
|
"loss": 0.6353201270103455,
|
|
"step": 3505
|
|
},
|
|
{
|
|
"epoch": 2.769352290679305,
|
|
"grad_norm": 10.317665072167928,
|
|
"learning_rate": 1.8022063003086952e-07,
|
|
"loss": 0.20802390575408936,
|
|
"step": 3506
|
|
},
|
|
{
|
|
"epoch": 2.770142180094787,
|
|
"grad_norm": 9.95179330655714,
|
|
"learning_rate": 1.7899993506279577e-07,
|
|
"loss": 0.38674095273017883,
|
|
"step": 3507
|
|
},
|
|
{
|
|
"epoch": 2.770932069510269,
|
|
"grad_norm": 9.564138951257297,
|
|
"learning_rate": 1.7778331289145246e-07,
|
|
"loss": 0.37485527992248535,
|
|
"step": 3508
|
|
},
|
|
{
|
|
"epoch": 2.7717219589257502,
|
|
"grad_norm": 11.837113131259644,
|
|
"learning_rate": 1.765707645446446e-07,
|
|
"loss": 1.245941162109375,
|
|
"step": 3509
|
|
},
|
|
{
|
|
"epoch": 2.772511848341232,
|
|
"grad_norm": 16.993616371219062,
|
|
"learning_rate": 1.7536229104673952e-07,
|
|
"loss": 1.4415150880813599,
|
|
"step": 3510
|
|
},
|
|
{
|
|
"epoch": 2.773301737756714,
|
|
"grad_norm": 18.46978395432566,
|
|
"learning_rate": 1.741578934186583e-07,
|
|
"loss": 0.48280513286590576,
|
|
"step": 3511
|
|
},
|
|
{
|
|
"epoch": 2.774091627172196,
|
|
"grad_norm": 19.046571471804004,
|
|
"learning_rate": 1.7295757267787982e-07,
|
|
"loss": 0.6890305280685425,
|
|
"step": 3512
|
|
},
|
|
{
|
|
"epoch": 2.7748815165876777,
|
|
"grad_norm": 8.374569669050457,
|
|
"learning_rate": 1.717613298384402e-07,
|
|
"loss": 0.17919717729091644,
|
|
"step": 3513
|
|
},
|
|
{
|
|
"epoch": 2.7756714060031595,
|
|
"grad_norm": 14.297372965397592,
|
|
"learning_rate": 1.7056916591092765e-07,
|
|
"loss": 0.9247697591781616,
|
|
"step": 3514
|
|
},
|
|
{
|
|
"epoch": 2.7764612954186414,
|
|
"grad_norm": 12.33201213532176,
|
|
"learning_rate": 1.6938108190248714e-07,
|
|
"loss": 0.7695714831352234,
|
|
"step": 3515
|
|
},
|
|
{
|
|
"epoch": 2.7772511848341233,
|
|
"grad_norm": 11.395421501521598,
|
|
"learning_rate": 1.681970788168158e-07,
|
|
"loss": 0.5965884923934937,
|
|
"step": 3516
|
|
},
|
|
{
|
|
"epoch": 2.778041074249605,
|
|
"grad_norm": 11.322776826507724,
|
|
"learning_rate": 1.670171576541635e-07,
|
|
"loss": 0.28291648626327515,
|
|
"step": 3517
|
|
},
|
|
{
|
|
"epoch": 2.778830963665087,
|
|
"grad_norm": 9.052162465156956,
|
|
"learning_rate": 1.658413194113312e-07,
|
|
"loss": 0.26967617869377136,
|
|
"step": 3518
|
|
},
|
|
{
|
|
"epoch": 2.779620853080569,
|
|
"grad_norm": 8.85381583315956,
|
|
"learning_rate": 1.6466956508167098e-07,
|
|
"loss": 0.27431443333625793,
|
|
"step": 3519
|
|
},
|
|
{
|
|
"epoch": 2.7804107424960507,
|
|
"grad_norm": 10.26241792481637,
|
|
"learning_rate": 1.635018956550849e-07,
|
|
"loss": 0.5575605630874634,
|
|
"step": 3520
|
|
},
|
|
{
|
|
"epoch": 2.7812006319115326,
|
|
"grad_norm": 19.04805022049209,
|
|
"learning_rate": 1.6233831211802443e-07,
|
|
"loss": 0.507323145866394,
|
|
"step": 3521
|
|
},
|
|
{
|
|
"epoch": 2.781990521327014,
|
|
"grad_norm": 12.25593303035708,
|
|
"learning_rate": 1.6117881545348768e-07,
|
|
"loss": 0.7778584957122803,
|
|
"step": 3522
|
|
},
|
|
{
|
|
"epoch": 2.782780410742496,
|
|
"grad_norm": 10.716935186355487,
|
|
"learning_rate": 1.6002340664102222e-07,
|
|
"loss": 0.4240133762359619,
|
|
"step": 3523
|
|
},
|
|
{
|
|
"epoch": 2.7835703001579777,
|
|
"grad_norm": 10.691328997589723,
|
|
"learning_rate": 1.588720866567206e-07,
|
|
"loss": 0.3414255976676941,
|
|
"step": 3524
|
|
},
|
|
{
|
|
"epoch": 2.7843601895734595,
|
|
"grad_norm": 11.768217491281503,
|
|
"learning_rate": 1.57724856473222e-07,
|
|
"loss": 0.41171273589134216,
|
|
"step": 3525
|
|
},
|
|
{
|
|
"epoch": 2.7851500789889414,
|
|
"grad_norm": 11.135806497121866,
|
|
"learning_rate": 1.5658171705971002e-07,
|
|
"loss": 0.44822290539741516,
|
|
"step": 3526
|
|
},
|
|
{
|
|
"epoch": 2.7859399684044233,
|
|
"grad_norm": 12.802286130199466,
|
|
"learning_rate": 1.5544266938191277e-07,
|
|
"loss": 0.5907123684883118,
|
|
"step": 3527
|
|
},
|
|
{
|
|
"epoch": 2.786729857819905,
|
|
"grad_norm": 9.291881799567307,
|
|
"learning_rate": 1.5430771440210102e-07,
|
|
"loss": 0.5149095058441162,
|
|
"step": 3528
|
|
},
|
|
{
|
|
"epoch": 2.787519747235387,
|
|
"grad_norm": 11.903535206167968,
|
|
"learning_rate": 1.5317685307909003e-07,
|
|
"loss": 0.3978126645088196,
|
|
"step": 3529
|
|
},
|
|
{
|
|
"epoch": 2.788309636650869,
|
|
"grad_norm": 11.87266940877088,
|
|
"learning_rate": 1.5205008636823392e-07,
|
|
"loss": 0.7672142386436462,
|
|
"step": 3530
|
|
},
|
|
{
|
|
"epoch": 2.7890995260663507,
|
|
"grad_norm": 12.664993847505396,
|
|
"learning_rate": 1.509274152214285e-07,
|
|
"loss": 0.6181859970092773,
|
|
"step": 3531
|
|
},
|
|
{
|
|
"epoch": 2.7898894154818326,
|
|
"grad_norm": 11.706302565544545,
|
|
"learning_rate": 1.4980884058711122e-07,
|
|
"loss": 0.2257220596075058,
|
|
"step": 3532
|
|
},
|
|
{
|
|
"epoch": 2.7906793048973144,
|
|
"grad_norm": 11.269026746440312,
|
|
"learning_rate": 1.486943634102561e-07,
|
|
"loss": 0.411458283662796,
|
|
"step": 3533
|
|
},
|
|
{
|
|
"epoch": 2.7914691943127963,
|
|
"grad_norm": 11.770992911424287,
|
|
"learning_rate": 1.4758398463237844e-07,
|
|
"loss": 0.391770601272583,
|
|
"step": 3534
|
|
},
|
|
{
|
|
"epoch": 2.792259083728278,
|
|
"grad_norm": 12.179827825593687,
|
|
"learning_rate": 1.464777051915306e-07,
|
|
"loss": 0.7870375514030457,
|
|
"step": 3535
|
|
},
|
|
{
|
|
"epoch": 2.79304897314376,
|
|
"grad_norm": 10.205085198463788,
|
|
"learning_rate": 1.4537552602229888e-07,
|
|
"loss": 0.24328409135341644,
|
|
"step": 3536
|
|
},
|
|
{
|
|
"epoch": 2.793838862559242,
|
|
"grad_norm": 11.707290493646688,
|
|
"learning_rate": 1.4427744805580902e-07,
|
|
"loss": 0.7062838077545166,
|
|
"step": 3537
|
|
},
|
|
{
|
|
"epoch": 2.7946287519747237,
|
|
"grad_norm": 9.449178364796186,
|
|
"learning_rate": 1.4318347221972118e-07,
|
|
"loss": 0.4576803743839264,
|
|
"step": 3538
|
|
},
|
|
{
|
|
"epoch": 2.7954186413902056,
|
|
"grad_norm": 8.800231574547174,
|
|
"learning_rate": 1.420935994382294e-07,
|
|
"loss": 0.6782954931259155,
|
|
"step": 3539
|
|
},
|
|
{
|
|
"epoch": 2.7962085308056874,
|
|
"grad_norm": 20.41988700193614,
|
|
"learning_rate": 1.4100783063206224e-07,
|
|
"loss": 0.5219430923461914,
|
|
"step": 3540
|
|
},
|
|
{
|
|
"epoch": 2.7969984202211693,
|
|
"grad_norm": 7.5166874724474715,
|
|
"learning_rate": 1.3992616671847981e-07,
|
|
"loss": 0.3515085279941559,
|
|
"step": 3541
|
|
},
|
|
{
|
|
"epoch": 2.7977883096366507,
|
|
"grad_norm": 6.932947866396507,
|
|
"learning_rate": 1.3884860861127558e-07,
|
|
"loss": 0.47976720333099365,
|
|
"step": 3542
|
|
},
|
|
{
|
|
"epoch": 2.7985781990521326,
|
|
"grad_norm": 9.491598002493626,
|
|
"learning_rate": 1.377751572207753e-07,
|
|
"loss": 0.6151677370071411,
|
|
"step": 3543
|
|
},
|
|
{
|
|
"epoch": 2.7993680884676144,
|
|
"grad_norm": 9.194411344753396,
|
|
"learning_rate": 1.3670581345383294e-07,
|
|
"loss": 0.26150283217430115,
|
|
"step": 3544
|
|
},
|
|
{
|
|
"epoch": 2.8001579778830963,
|
|
"grad_norm": 17.758549126307468,
|
|
"learning_rate": 1.3564057821383426e-07,
|
|
"loss": 0.48430135846138,
|
|
"step": 3545
|
|
},
|
|
{
|
|
"epoch": 2.800947867298578,
|
|
"grad_norm": 10.109089794707371,
|
|
"learning_rate": 1.345794524006938e-07,
|
|
"loss": 0.4719211459159851,
|
|
"step": 3546
|
|
},
|
|
{
|
|
"epoch": 2.80173775671406,
|
|
"grad_norm": 6.966698821581658,
|
|
"learning_rate": 1.3352243691085343e-07,
|
|
"loss": 0.34015411138534546,
|
|
"step": 3547
|
|
},
|
|
{
|
|
"epoch": 2.802527646129542,
|
|
"grad_norm": 17.133059090598415,
|
|
"learning_rate": 1.3246953263728323e-07,
|
|
"loss": 0.662192165851593,
|
|
"step": 3548
|
|
},
|
|
{
|
|
"epoch": 2.8033175355450237,
|
|
"grad_norm": 13.801086934095315,
|
|
"learning_rate": 1.3142074046948117e-07,
|
|
"loss": 0.48745739459991455,
|
|
"step": 3549
|
|
},
|
|
{
|
|
"epoch": 2.8041074249605056,
|
|
"grad_norm": 9.455351798236373,
|
|
"learning_rate": 1.3037606129346903e-07,
|
|
"loss": 0.4470326602458954,
|
|
"step": 3550
|
|
},
|
|
{
|
|
"epoch": 2.8048973143759874,
|
|
"grad_norm": 16.270417938160158,
|
|
"learning_rate": 1.293354959917964e-07,
|
|
"loss": 0.5577215552330017,
|
|
"step": 3551
|
|
},
|
|
{
|
|
"epoch": 2.8056872037914693,
|
|
"grad_norm": 7.398103500346509,
|
|
"learning_rate": 1.282990454435362e-07,
|
|
"loss": 0.33731377124786377,
|
|
"step": 3552
|
|
},
|
|
{
|
|
"epoch": 2.806477093206951,
|
|
"grad_norm": 11.993082049967715,
|
|
"learning_rate": 1.2726671052428418e-07,
|
|
"loss": 0.3734150528907776,
|
|
"step": 3553
|
|
},
|
|
{
|
|
"epoch": 2.807266982622433,
|
|
"grad_norm": 13.661721906893305,
|
|
"learning_rate": 1.26238492106161e-07,
|
|
"loss": 0.2888742685317993,
|
|
"step": 3554
|
|
},
|
|
{
|
|
"epoch": 2.8080568720379144,
|
|
"grad_norm": 10.344916708225794,
|
|
"learning_rate": 1.2521439105780909e-07,
|
|
"loss": 0.3093745708465576,
|
|
"step": 3555
|
|
},
|
|
{
|
|
"epoch": 2.8088467614533963,
|
|
"grad_norm": 13.865074941166652,
|
|
"learning_rate": 1.2419440824439188e-07,
|
|
"loss": 0.4877317547798157,
|
|
"step": 3556
|
|
},
|
|
{
|
|
"epoch": 2.809636650868878,
|
|
"grad_norm": 10.788397713560993,
|
|
"learning_rate": 1.231785445275957e-07,
|
|
"loss": 0.4557979702949524,
|
|
"step": 3557
|
|
},
|
|
{
|
|
"epoch": 2.81042654028436,
|
|
"grad_norm": 10.285465020965095,
|
|
"learning_rate": 1.2216680076562347e-07,
|
|
"loss": 0.3419281840324402,
|
|
"step": 3558
|
|
},
|
|
{
|
|
"epoch": 2.811216429699842,
|
|
"grad_norm": 8.877423938913571,
|
|
"learning_rate": 1.2115917781320096e-07,
|
|
"loss": 0.4035925269126892,
|
|
"step": 3559
|
|
},
|
|
{
|
|
"epoch": 2.8120063191153237,
|
|
"grad_norm": 13.435791700322584,
|
|
"learning_rate": 1.2015567652157057e-07,
|
|
"loss": 0.6396586298942566,
|
|
"step": 3560
|
|
},
|
|
{
|
|
"epoch": 2.8127962085308056,
|
|
"grad_norm": 19.702185429939885,
|
|
"learning_rate": 1.1915629773849358e-07,
|
|
"loss": 1.082403540611267,
|
|
"step": 3561
|
|
},
|
|
{
|
|
"epoch": 2.8135860979462874,
|
|
"grad_norm": 11.822298832725835,
|
|
"learning_rate": 1.1816104230825021e-07,
|
|
"loss": 0.3863110840320587,
|
|
"step": 3562
|
|
},
|
|
{
|
|
"epoch": 2.8143759873617693,
|
|
"grad_norm": 8.295210827383162,
|
|
"learning_rate": 1.1716991107163289e-07,
|
|
"loss": 0.58207106590271,
|
|
"step": 3563
|
|
},
|
|
{
|
|
"epoch": 2.815165876777251,
|
|
"grad_norm": 14.2016386137118,
|
|
"learning_rate": 1.1618290486595463e-07,
|
|
"loss": 0.2098863422870636,
|
|
"step": 3564
|
|
},
|
|
{
|
|
"epoch": 2.815955766192733,
|
|
"grad_norm": 10.082988120031807,
|
|
"learning_rate": 1.1520002452503953e-07,
|
|
"loss": 0.27094680070877075,
|
|
"step": 3565
|
|
},
|
|
{
|
|
"epoch": 2.816745655608215,
|
|
"grad_norm": 5.875570359193218,
|
|
"learning_rate": 1.1422127087923007e-07,
|
|
"loss": 0.21748504042625427,
|
|
"step": 3566
|
|
},
|
|
{
|
|
"epoch": 2.8175355450236967,
|
|
"grad_norm": 12.438687688635737,
|
|
"learning_rate": 1.1324664475538038e-07,
|
|
"loss": 0.5272513031959534,
|
|
"step": 3567
|
|
},
|
|
{
|
|
"epoch": 2.8183254344391786,
|
|
"grad_norm": 10.889129102864676,
|
|
"learning_rate": 1.1227614697685629e-07,
|
|
"loss": 0.9930387139320374,
|
|
"step": 3568
|
|
},
|
|
{
|
|
"epoch": 2.8191153238546605,
|
|
"grad_norm": 12.382867902327071,
|
|
"learning_rate": 1.1130977836353862e-07,
|
|
"loss": 0.3840116858482361,
|
|
"step": 3569
|
|
},
|
|
{
|
|
"epoch": 2.8199052132701423,
|
|
"grad_norm": 9.33063650473277,
|
|
"learning_rate": 1.1034753973181877e-07,
|
|
"loss": 0.24336904287338257,
|
|
"step": 3570
|
|
},
|
|
{
|
|
"epoch": 2.820695102685624,
|
|
"grad_norm": 15.374029447353267,
|
|
"learning_rate": 1.0938943189459872e-07,
|
|
"loss": 0.3328930735588074,
|
|
"step": 3571
|
|
},
|
|
{
|
|
"epoch": 2.821484992101106,
|
|
"grad_norm": 10.569256308523215,
|
|
"learning_rate": 1.0843545566129154e-07,
|
|
"loss": 0.40318596363067627,
|
|
"step": 3572
|
|
},
|
|
{
|
|
"epoch": 2.822274881516588,
|
|
"grad_norm": 11.764490990287323,
|
|
"learning_rate": 1.074856118378198e-07,
|
|
"loss": 0.2967602014541626,
|
|
"step": 3573
|
|
},
|
|
{
|
|
"epoch": 2.8230647709320698,
|
|
"grad_norm": 11.711306728260894,
|
|
"learning_rate": 1.0653990122661273e-07,
|
|
"loss": 0.20409056544303894,
|
|
"step": 3574
|
|
},
|
|
{
|
|
"epoch": 2.8238546603475516,
|
|
"grad_norm": 8.980141255462513,
|
|
"learning_rate": 1.0559832462661234e-07,
|
|
"loss": 0.4850717782974243,
|
|
"step": 3575
|
|
},
|
|
{
|
|
"epoch": 2.824644549763033,
|
|
"grad_norm": 17.31606580117272,
|
|
"learning_rate": 1.0466088283326404e-07,
|
|
"loss": 0.6416319608688354,
|
|
"step": 3576
|
|
},
|
|
{
|
|
"epoch": 2.825434439178515,
|
|
"grad_norm": 15.344712637864934,
|
|
"learning_rate": 1.0372757663852262e-07,
|
|
"loss": 0.46508005261421204,
|
|
"step": 3577
|
|
},
|
|
{
|
|
"epoch": 2.8262243285939967,
|
|
"grad_norm": 8.57007968363552,
|
|
"learning_rate": 1.0279840683084741e-07,
|
|
"loss": 0.8428210616111755,
|
|
"step": 3578
|
|
},
|
|
{
|
|
"epoch": 2.8270142180094786,
|
|
"grad_norm": 8.550821162807685,
|
|
"learning_rate": 1.0187337419520493e-07,
|
|
"loss": 0.682414710521698,
|
|
"step": 3579
|
|
},
|
|
{
|
|
"epoch": 2.8278041074249605,
|
|
"grad_norm": 7.199168718842685,
|
|
"learning_rate": 1.0095247951306508e-07,
|
|
"loss": 0.25026553869247437,
|
|
"step": 3580
|
|
},
|
|
{
|
|
"epoch": 2.8285939968404423,
|
|
"grad_norm": 12.81831188890812,
|
|
"learning_rate": 1.0003572356240333e-07,
|
|
"loss": 0.933559238910675,
|
|
"step": 3581
|
|
},
|
|
{
|
|
"epoch": 2.829383886255924,
|
|
"grad_norm": 7.6333393554744156,
|
|
"learning_rate": 9.912310711769734e-08,
|
|
"loss": 0.26869896054267883,
|
|
"step": 3582
|
|
},
|
|
{
|
|
"epoch": 2.830173775671406,
|
|
"grad_norm": 9.048728044949662,
|
|
"learning_rate": 9.821463094992934e-08,
|
|
"loss": 0.44904714822769165,
|
|
"step": 3583
|
|
},
|
|
{
|
|
"epoch": 2.830963665086888,
|
|
"grad_norm": 10.975379290838536,
|
|
"learning_rate": 9.731029582658258e-08,
|
|
"loss": 0.7174665927886963,
|
|
"step": 3584
|
|
},
|
|
{
|
|
"epoch": 2.8317535545023698,
|
|
"grad_norm": 9.80556755933507,
|
|
"learning_rate": 9.641010251164263e-08,
|
|
"loss": 0.47703051567077637,
|
|
"step": 3585
|
|
},
|
|
{
|
|
"epoch": 2.8325434439178516,
|
|
"grad_norm": 9.546590438112691,
|
|
"learning_rate": 9.551405176559558e-08,
|
|
"loss": 0.32327979803085327,
|
|
"step": 3586
|
|
},
|
|
{
|
|
"epoch": 2.8333333333333335,
|
|
"grad_norm": 11.700829296565805,
|
|
"learning_rate": 9.46221443454276e-08,
|
|
"loss": 0.7538062334060669,
|
|
"step": 3587
|
|
},
|
|
{
|
|
"epoch": 2.834123222748815,
|
|
"grad_norm": 13.33437853882375,
|
|
"learning_rate": 9.37343810046254e-08,
|
|
"loss": 0.5941387414932251,
|
|
"step": 3588
|
|
},
|
|
{
|
|
"epoch": 2.8349131121642968,
|
|
"grad_norm": 5.894312748053277,
|
|
"learning_rate": 9.28507624931746e-08,
|
|
"loss": 0.14909735321998596,
|
|
"step": 3589
|
|
},
|
|
{
|
|
"epoch": 2.8357030015797786,
|
|
"grad_norm": 11.328914671419902,
|
|
"learning_rate": 9.197128955755919e-08,
|
|
"loss": 0.6000841856002808,
|
|
"step": 3590
|
|
},
|
|
{
|
|
"epoch": 2.8364928909952605,
|
|
"grad_norm": 14.987543569871976,
|
|
"learning_rate": 9.109596294075983e-08,
|
|
"loss": 0.4695100784301758,
|
|
"step": 3591
|
|
},
|
|
{
|
|
"epoch": 2.8372827804107423,
|
|
"grad_norm": 7.157405951875022,
|
|
"learning_rate": 9.022478338225616e-08,
|
|
"loss": 0.2676977217197418,
|
|
"step": 3592
|
|
},
|
|
{
|
|
"epoch": 2.838072669826224,
|
|
"grad_norm": 10.025275842504612,
|
|
"learning_rate": 8.935775161802274e-08,
|
|
"loss": 0.6638664603233337,
|
|
"step": 3593
|
|
},
|
|
{
|
|
"epoch": 2.838862559241706,
|
|
"grad_norm": 7.7262320503218005,
|
|
"learning_rate": 8.849486838053201e-08,
|
|
"loss": 0.2409912347793579,
|
|
"step": 3594
|
|
},
|
|
{
|
|
"epoch": 2.839652448657188,
|
|
"grad_norm": 12.117641612900385,
|
|
"learning_rate": 8.763613439875085e-08,
|
|
"loss": 0.4687439203262329,
|
|
"step": 3595
|
|
},
|
|
{
|
|
"epoch": 2.8404423380726698,
|
|
"grad_norm": 10.35658710588346,
|
|
"learning_rate": 8.67815503981384e-08,
|
|
"loss": 0.8544546961784363,
|
|
"step": 3596
|
|
},
|
|
{
|
|
"epoch": 2.8412322274881516,
|
|
"grad_norm": 11.638767331627365,
|
|
"learning_rate": 8.59311171006516e-08,
|
|
"loss": 0.8178413510322571,
|
|
"step": 3597
|
|
},
|
|
{
|
|
"epoch": 2.8420221169036335,
|
|
"grad_norm": 8.669410862880607,
|
|
"learning_rate": 8.508483522473909e-08,
|
|
"loss": 0.4358055293560028,
|
|
"step": 3598
|
|
},
|
|
{
|
|
"epoch": 2.8428120063191153,
|
|
"grad_norm": 15.987554618443884,
|
|
"learning_rate": 8.424270548534286e-08,
|
|
"loss": 0.5043225884437561,
|
|
"step": 3599
|
|
},
|
|
{
|
|
"epoch": 2.843601895734597,
|
|
"grad_norm": 33.576463764455724,
|
|
"learning_rate": 8.340472859389714e-08,
|
|
"loss": 0.38993752002716064,
|
|
"step": 3600
|
|
},
|
|
{
|
|
"epoch": 2.844391785150079,
|
|
"grad_norm": 12.91658376574621,
|
|
"learning_rate": 8.257090525832623e-08,
|
|
"loss": 0.8641099333763123,
|
|
"step": 3601
|
|
},
|
|
{
|
|
"epoch": 2.845181674565561,
|
|
"grad_norm": 7.742223493684328,
|
|
"learning_rate": 8.17412361830472e-08,
|
|
"loss": 0.36251041293144226,
|
|
"step": 3602
|
|
},
|
|
{
|
|
"epoch": 2.845971563981043,
|
|
"grad_norm": 9.525833528037221,
|
|
"learning_rate": 8.091572206896769e-08,
|
|
"loss": 0.4047582745552063,
|
|
"step": 3603
|
|
},
|
|
{
|
|
"epoch": 2.8467614533965246,
|
|
"grad_norm": 12.327238364324716,
|
|
"learning_rate": 8.009436361348543e-08,
|
|
"loss": 0.22032329440116882,
|
|
"step": 3604
|
|
},
|
|
{
|
|
"epoch": 2.8475513428120065,
|
|
"grad_norm": 8.279880501050302,
|
|
"learning_rate": 7.927716151048537e-08,
|
|
"loss": 0.4643440246582031,
|
|
"step": 3605
|
|
},
|
|
{
|
|
"epoch": 2.8483412322274884,
|
|
"grad_norm": 11.646960594230466,
|
|
"learning_rate": 7.846411645034302e-08,
|
|
"loss": 0.20467007160186768,
|
|
"step": 3606
|
|
},
|
|
{
|
|
"epoch": 2.84913112164297,
|
|
"grad_norm": 11.370802049937053,
|
|
"learning_rate": 7.765522911992229e-08,
|
|
"loss": 0.27269160747528076,
|
|
"step": 3607
|
|
},
|
|
{
|
|
"epoch": 2.849921011058452,
|
|
"grad_norm": 23.699981990614827,
|
|
"learning_rate": 7.685050020257267e-08,
|
|
"loss": 0.8710867762565613,
|
|
"step": 3608
|
|
},
|
|
{
|
|
"epoch": 2.8507109004739335,
|
|
"grad_norm": 13.69662146387405,
|
|
"learning_rate": 7.604993037813257e-08,
|
|
"loss": 0.7823088765144348,
|
|
"step": 3609
|
|
},
|
|
{
|
|
"epoch": 2.8515007898894154,
|
|
"grad_norm": 11.238494785694472,
|
|
"learning_rate": 7.525352032292599e-08,
|
|
"loss": 0.47667503356933594,
|
|
"step": 3610
|
|
},
|
|
{
|
|
"epoch": 2.852290679304897,
|
|
"grad_norm": 11.90408448356978,
|
|
"learning_rate": 7.44612707097625e-08,
|
|
"loss": 0.4056919515132904,
|
|
"step": 3611
|
|
},
|
|
{
|
|
"epoch": 2.853080568720379,
|
|
"grad_norm": 14.64818168207235,
|
|
"learning_rate": 7.367318220793673e-08,
|
|
"loss": 0.31270867586135864,
|
|
"step": 3612
|
|
},
|
|
{
|
|
"epoch": 2.853870458135861,
|
|
"grad_norm": 8.786247946279532,
|
|
"learning_rate": 7.288925548322945e-08,
|
|
"loss": 0.18290819227695465,
|
|
"step": 3613
|
|
},
|
|
{
|
|
"epoch": 2.854660347551343,
|
|
"grad_norm": 9.827541412175302,
|
|
"learning_rate": 7.210949119790423e-08,
|
|
"loss": 1.236111044883728,
|
|
"step": 3614
|
|
},
|
|
{
|
|
"epoch": 2.8554502369668247,
|
|
"grad_norm": 7.902486340561014,
|
|
"learning_rate": 7.133389001070801e-08,
|
|
"loss": 0.19697430729866028,
|
|
"step": 3615
|
|
},
|
|
{
|
|
"epoch": 2.8562401263823065,
|
|
"grad_norm": 10.36680879415055,
|
|
"learning_rate": 7.056245257687166e-08,
|
|
"loss": 0.4995325207710266,
|
|
"step": 3616
|
|
},
|
|
{
|
|
"epoch": 2.8570300157977884,
|
|
"grad_norm": 8.029144851923272,
|
|
"learning_rate": 6.979517954810777e-08,
|
|
"loss": 0.35892802476882935,
|
|
"step": 3617
|
|
},
|
|
{
|
|
"epoch": 2.8578199052132702,
|
|
"grad_norm": 10.443143901118459,
|
|
"learning_rate": 6.903207157261116e-08,
|
|
"loss": 0.6018155813217163,
|
|
"step": 3618
|
|
},
|
|
{
|
|
"epoch": 2.858609794628752,
|
|
"grad_norm": 9.972612916820504,
|
|
"learning_rate": 6.827312929505837e-08,
|
|
"loss": 0.3768244683742523,
|
|
"step": 3619
|
|
},
|
|
{
|
|
"epoch": 2.859399684044234,
|
|
"grad_norm": 9.967954749916855,
|
|
"learning_rate": 6.75183533566065e-08,
|
|
"loss": 0.6008504033088684,
|
|
"step": 3620
|
|
},
|
|
{
|
|
"epoch": 2.860189573459716,
|
|
"grad_norm": 11.826572647957775,
|
|
"learning_rate": 6.676774439489109e-08,
|
|
"loss": 0.9027575254440308,
|
|
"step": 3621
|
|
},
|
|
{
|
|
"epoch": 2.860979462875197,
|
|
"grad_norm": 7.566116639922751,
|
|
"learning_rate": 6.602130304403153e-08,
|
|
"loss": 0.3454285264015198,
|
|
"step": 3622
|
|
},
|
|
{
|
|
"epoch": 2.861769352290679,
|
|
"grad_norm": 11.334110441287553,
|
|
"learning_rate": 6.527902993462232e-08,
|
|
"loss": 0.3822672963142395,
|
|
"step": 3623
|
|
},
|
|
{
|
|
"epoch": 2.862559241706161,
|
|
"grad_norm": 30.42379993258823,
|
|
"learning_rate": 6.454092569373849e-08,
|
|
"loss": 1.1499581336975098,
|
|
"step": 3624
|
|
},
|
|
{
|
|
"epoch": 2.863349131121643,
|
|
"grad_norm": 12.189705870905884,
|
|
"learning_rate": 6.380699094493292e-08,
|
|
"loss": 0.28113341331481934,
|
|
"step": 3625
|
|
},
|
|
{
|
|
"epoch": 2.8641390205371247,
|
|
"grad_norm": 10.306143494702912,
|
|
"learning_rate": 6.307722630823631e-08,
|
|
"loss": 0.6603707671165466,
|
|
"step": 3626
|
|
},
|
|
{
|
|
"epoch": 2.8649289099526065,
|
|
"grad_norm": 12.679232897956698,
|
|
"learning_rate": 6.235163240015608e-08,
|
|
"loss": 0.6723718047142029,
|
|
"step": 3627
|
|
},
|
|
{
|
|
"epoch": 2.8657187993680884,
|
|
"grad_norm": 11.11612011580192,
|
|
"learning_rate": 6.163020983367685e-08,
|
|
"loss": 0.28444093465805054,
|
|
"step": 3628
|
|
},
|
|
{
|
|
"epoch": 2.8665086887835702,
|
|
"grad_norm": 10.621322926022057,
|
|
"learning_rate": 6.091295921825779e-08,
|
|
"loss": 0.2627624571323395,
|
|
"step": 3629
|
|
},
|
|
{
|
|
"epoch": 2.867298578199052,
|
|
"grad_norm": 9.584051079895877,
|
|
"learning_rate": 6.019988115983533e-08,
|
|
"loss": 0.6522977948188782,
|
|
"step": 3630
|
|
},
|
|
{
|
|
"epoch": 2.868088467614534,
|
|
"grad_norm": 10.104067057656607,
|
|
"learning_rate": 5.949097626081979e-08,
|
|
"loss": 0.806284487247467,
|
|
"step": 3631
|
|
},
|
|
{
|
|
"epoch": 2.868878357030016,
|
|
"grad_norm": 9.3150959160621,
|
|
"learning_rate": 5.878624512009712e-08,
|
|
"loss": 0.49238792061805725,
|
|
"step": 3632
|
|
},
|
|
{
|
|
"epoch": 2.8696682464454977,
|
|
"grad_norm": 11.064857677412675,
|
|
"learning_rate": 5.808568833302552e-08,
|
|
"loss": 0.3209676146507263,
|
|
"step": 3633
|
|
},
|
|
{
|
|
"epoch": 2.8704581358609795,
|
|
"grad_norm": 9.937366426134174,
|
|
"learning_rate": 5.73893064914377e-08,
|
|
"loss": 0.5651168823242188,
|
|
"step": 3634
|
|
},
|
|
{
|
|
"epoch": 2.8712480252764614,
|
|
"grad_norm": 9.43706524762976,
|
|
"learning_rate": 5.669710018364028e-08,
|
|
"loss": 0.25006648898124695,
|
|
"step": 3635
|
|
},
|
|
{
|
|
"epoch": 2.8720379146919433,
|
|
"grad_norm": 8.086894664969703,
|
|
"learning_rate": 5.600906999441047e-08,
|
|
"loss": 0.289908766746521,
|
|
"step": 3636
|
|
},
|
|
{
|
|
"epoch": 2.872827804107425,
|
|
"grad_norm": 11.049864934040414,
|
|
"learning_rate": 5.5325216504999445e-08,
|
|
"loss": 0.2839067578315735,
|
|
"step": 3637
|
|
},
|
|
{
|
|
"epoch": 2.873617693522907,
|
|
"grad_norm": 11.326901492566288,
|
|
"learning_rate": 5.464554029312785e-08,
|
|
"loss": 0.4851897358894348,
|
|
"step": 3638
|
|
},
|
|
{
|
|
"epoch": 2.874407582938389,
|
|
"grad_norm": 9.870517653518927,
|
|
"learning_rate": 5.39700419329886e-08,
|
|
"loss": 0.3466881215572357,
|
|
"step": 3639
|
|
},
|
|
{
|
|
"epoch": 2.8751974723538707,
|
|
"grad_norm": 9.471904879535721,
|
|
"learning_rate": 5.329872199524577e-08,
|
|
"loss": 0.41402703523635864,
|
|
"step": 3640
|
|
},
|
|
{
|
|
"epoch": 2.8759873617693525,
|
|
"grad_norm": 11.444565675892793,
|
|
"learning_rate": 5.263158104703125e-08,
|
|
"loss": 0.7139902710914612,
|
|
"step": 3641
|
|
},
|
|
{
|
|
"epoch": 2.876777251184834,
|
|
"grad_norm": 9.863961110827145,
|
|
"learning_rate": 5.196861965194922e-08,
|
|
"loss": 0.4099463224411011,
|
|
"step": 3642
|
|
},
|
|
{
|
|
"epoch": 2.877567140600316,
|
|
"grad_norm": 14.395860031992335,
|
|
"learning_rate": 5.1309838370071086e-08,
|
|
"loss": 0.635336697101593,
|
|
"step": 3643
|
|
},
|
|
{
|
|
"epoch": 2.8783570300157977,
|
|
"grad_norm": 11.549260196243356,
|
|
"learning_rate": 5.0655237757937236e-08,
|
|
"loss": 0.5518008470535278,
|
|
"step": 3644
|
|
},
|
|
{
|
|
"epoch": 2.8791469194312795,
|
|
"grad_norm": 16.914171209055556,
|
|
"learning_rate": 5.000481836855697e-08,
|
|
"loss": 0.5935429930686951,
|
|
"step": 3645
|
|
},
|
|
{
|
|
"epoch": 2.8799368088467614,
|
|
"grad_norm": 10.519627347888093,
|
|
"learning_rate": 4.935858075140631e-08,
|
|
"loss": 0.2353929877281189,
|
|
"step": 3646
|
|
},
|
|
{
|
|
"epoch": 2.8807266982622433,
|
|
"grad_norm": 10.147294910154294,
|
|
"learning_rate": 4.8716525452428556e-08,
|
|
"loss": 0.5117212533950806,
|
|
"step": 3647
|
|
},
|
|
{
|
|
"epoch": 2.881516587677725,
|
|
"grad_norm": 7.79219575426103,
|
|
"learning_rate": 4.807865301403536e-08,
|
|
"loss": 0.29170703887939453,
|
|
"step": 3648
|
|
},
|
|
{
|
|
"epoch": 2.882306477093207,
|
|
"grad_norm": 9.052236405274959,
|
|
"learning_rate": 4.744496397510234e-08,
|
|
"loss": 0.34299367666244507,
|
|
"step": 3649
|
|
},
|
|
{
|
|
"epoch": 2.883096366508689,
|
|
"grad_norm": 12.457687363908496,
|
|
"learning_rate": 4.681545887097239e-08,
|
|
"loss": 0.33234238624572754,
|
|
"step": 3650
|
|
},
|
|
{
|
|
"epoch": 2.8838862559241707,
|
|
"grad_norm": 13.05592857803276,
|
|
"learning_rate": 4.6190138233453976e-08,
|
|
"loss": 0.27858805656433105,
|
|
"step": 3651
|
|
},
|
|
{
|
|
"epoch": 2.8846761453396526,
|
|
"grad_norm": 10.916182057073014,
|
|
"learning_rate": 4.556900259081898e-08,
|
|
"loss": 0.6553702354431152,
|
|
"step": 3652
|
|
},
|
|
{
|
|
"epoch": 2.8854660347551344,
|
|
"grad_norm": 9.39192402230028,
|
|
"learning_rate": 4.495205246780543e-08,
|
|
"loss": 0.5300555229187012,
|
|
"step": 3653
|
|
},
|
|
{
|
|
"epoch": 2.8862559241706163,
|
|
"grad_norm": 5.7960707776723295,
|
|
"learning_rate": 4.43392883856153e-08,
|
|
"loss": 0.24335479736328125,
|
|
"step": 3654
|
|
},
|
|
{
|
|
"epoch": 2.8870458135860977,
|
|
"grad_norm": 7.039387389488856,
|
|
"learning_rate": 4.373071086191338e-08,
|
|
"loss": 0.3162982761859894,
|
|
"step": 3655
|
|
},
|
|
{
|
|
"epoch": 2.8878357030015795,
|
|
"grad_norm": 10.4351163530241,
|
|
"learning_rate": 4.312632041082787e-08,
|
|
"loss": 0.36064600944519043,
|
|
"step": 3656
|
|
},
|
|
{
|
|
"epoch": 2.8886255924170614,
|
|
"grad_norm": 11.795988539884263,
|
|
"learning_rate": 4.2526117542949774e-08,
|
|
"loss": 0.343291312456131,
|
|
"step": 3657
|
|
},
|
|
{
|
|
"epoch": 2.8894154818325433,
|
|
"grad_norm": 10.393934212831047,
|
|
"learning_rate": 4.193010276533183e-08,
|
|
"loss": 0.6907198429107666,
|
|
"step": 3658
|
|
},
|
|
{
|
|
"epoch": 2.890205371248025,
|
|
"grad_norm": 9.56184934798155,
|
|
"learning_rate": 4.13382765814907e-08,
|
|
"loss": 0.7030273675918579,
|
|
"step": 3659
|
|
},
|
|
{
|
|
"epoch": 2.890995260663507,
|
|
"grad_norm": 7.063388291601224,
|
|
"learning_rate": 4.075063949140201e-08,
|
|
"loss": 0.17387212812900543,
|
|
"step": 3660
|
|
},
|
|
{
|
|
"epoch": 2.891785150078989,
|
|
"grad_norm": 8.661731775267791,
|
|
"learning_rate": 4.016719199150365e-08,
|
|
"loss": 0.5848299264907837,
|
|
"step": 3661
|
|
},
|
|
{
|
|
"epoch": 2.8925750394944707,
|
|
"grad_norm": 16.72243976052531,
|
|
"learning_rate": 3.958793457469412e-08,
|
|
"loss": 0.4991316795349121,
|
|
"step": 3662
|
|
},
|
|
{
|
|
"epoch": 2.8933649289099526,
|
|
"grad_norm": 14.820335568585508,
|
|
"learning_rate": 3.901286773033253e-08,
|
|
"loss": 0.30617228150367737,
|
|
"step": 3663
|
|
},
|
|
{
|
|
"epoch": 2.8941548183254344,
|
|
"grad_norm": 13.043213388040584,
|
|
"learning_rate": 3.8441991944236365e-08,
|
|
"loss": 0.35002079606056213,
|
|
"step": 3664
|
|
},
|
|
{
|
|
"epoch": 2.8949447077409163,
|
|
"grad_norm": 7.380501973738135,
|
|
"learning_rate": 3.787530769868431e-08,
|
|
"loss": 0.14187008142471313,
|
|
"step": 3665
|
|
},
|
|
{
|
|
"epoch": 2.895734597156398,
|
|
"grad_norm": 9.224074386705713,
|
|
"learning_rate": 3.731281547241228e-08,
|
|
"loss": 0.1989508867263794,
|
|
"step": 3666
|
|
},
|
|
{
|
|
"epoch": 2.89652448657188,
|
|
"grad_norm": 10.063244268865743,
|
|
"learning_rate": 3.675451574061517e-08,
|
|
"loss": 0.33559074997901917,
|
|
"step": 3667
|
|
},
|
|
{
|
|
"epoch": 2.897314375987362,
|
|
"grad_norm": 17.367117559815597,
|
|
"learning_rate": 3.620040897494737e-08,
|
|
"loss": 0.7757275104522705,
|
|
"step": 3668
|
|
},
|
|
{
|
|
"epoch": 2.8981042654028437,
|
|
"grad_norm": 14.80922413991561,
|
|
"learning_rate": 3.565049564351997e-08,
|
|
"loss": 0.5378328561782837,
|
|
"step": 3669
|
|
},
|
|
{
|
|
"epoch": 2.8988941548183256,
|
|
"grad_norm": 11.674232729531763,
|
|
"learning_rate": 3.510477621090192e-08,
|
|
"loss": 0.5047122240066528,
|
|
"step": 3670
|
|
},
|
|
{
|
|
"epoch": 2.8996840442338074,
|
|
"grad_norm": 9.61930347404772,
|
|
"learning_rate": 3.456325113811776e-08,
|
|
"loss": 0.22347621619701385,
|
|
"step": 3671
|
|
},
|
|
{
|
|
"epoch": 2.9004739336492893,
|
|
"grad_norm": 12.428464114281123,
|
|
"learning_rate": 3.4025920882649886e-08,
|
|
"loss": 0.2939353585243225,
|
|
"step": 3672
|
|
},
|
|
{
|
|
"epoch": 2.901263823064771,
|
|
"grad_norm": 11.787785169855981,
|
|
"learning_rate": 3.3492785898437407e-08,
|
|
"loss": 0.6785522699356079,
|
|
"step": 3673
|
|
},
|
|
{
|
|
"epoch": 2.902053712480253,
|
|
"grad_norm": 10.424690477560073,
|
|
"learning_rate": 3.296384663587338e-08,
|
|
"loss": 0.9920533895492554,
|
|
"step": 3674
|
|
},
|
|
{
|
|
"epoch": 2.902843601895735,
|
|
"grad_norm": 9.203768626957741,
|
|
"learning_rate": 3.243910354180868e-08,
|
|
"loss": 0.6544739007949829,
|
|
"step": 3675
|
|
},
|
|
{
|
|
"epoch": 2.9036334913112163,
|
|
"grad_norm": 8.198475817478249,
|
|
"learning_rate": 3.1918557059547605e-08,
|
|
"loss": 0.2281288057565689,
|
|
"step": 3676
|
|
},
|
|
{
|
|
"epoch": 2.904423380726698,
|
|
"grad_norm": 10.800632093569277,
|
|
"learning_rate": 3.1402207628848935e-08,
|
|
"loss": 0.2759685218334198,
|
|
"step": 3677
|
|
},
|
|
{
|
|
"epoch": 2.90521327014218,
|
|
"grad_norm": 11.368103163964738,
|
|
"learning_rate": 3.089005568592707e-08,
|
|
"loss": 0.4425602853298187,
|
|
"step": 3678
|
|
},
|
|
{
|
|
"epoch": 2.906003159557662,
|
|
"grad_norm": 15.676906486716955,
|
|
"learning_rate": 3.038210166344924e-08,
|
|
"loss": 0.9129424691200256,
|
|
"step": 3679
|
|
},
|
|
{
|
|
"epoch": 2.9067930489731437,
|
|
"grad_norm": 7.319860581451804,
|
|
"learning_rate": 2.9878345990536626e-08,
|
|
"loss": 0.5854052305221558,
|
|
"step": 3680
|
|
},
|
|
{
|
|
"epoch": 2.9075829383886256,
|
|
"grad_norm": 9.087306671915462,
|
|
"learning_rate": 2.9378789092763816e-08,
|
|
"loss": 0.2461828887462616,
|
|
"step": 3681
|
|
},
|
|
{
|
|
"epoch": 2.9083728278041074,
|
|
"grad_norm": 9.89538463931709,
|
|
"learning_rate": 2.8883431392158768e-08,
|
|
"loss": 0.6880824565887451,
|
|
"step": 3682
|
|
},
|
|
{
|
|
"epoch": 2.9091627172195893,
|
|
"grad_norm": 12.748919800585687,
|
|
"learning_rate": 2.8392273307200068e-08,
|
|
"loss": 0.9422566294670105,
|
|
"step": 3683
|
|
},
|
|
{
|
|
"epoch": 2.909952606635071,
|
|
"grad_norm": 8.421188816814379,
|
|
"learning_rate": 2.7905315252820808e-08,
|
|
"loss": 0.4385561943054199,
|
|
"step": 3684
|
|
},
|
|
{
|
|
"epoch": 2.910742496050553,
|
|
"grad_norm": 14.078145544877923,
|
|
"learning_rate": 2.7422557640404135e-08,
|
|
"loss": 0.5254925489425659,
|
|
"step": 3685
|
|
},
|
|
{
|
|
"epoch": 2.911532385466035,
|
|
"grad_norm": 9.082888238767863,
|
|
"learning_rate": 2.6944000877784925e-08,
|
|
"loss": 0.5949431657791138,
|
|
"step": 3686
|
|
},
|
|
{
|
|
"epoch": 2.9123222748815167,
|
|
"grad_norm": 13.607392988749478,
|
|
"learning_rate": 2.646964536925034e-08,
|
|
"loss": 0.691329300403595,
|
|
"step": 3687
|
|
},
|
|
{
|
|
"epoch": 2.913112164296998,
|
|
"grad_norm": 10.530634100048369,
|
|
"learning_rate": 2.59994915155376e-08,
|
|
"loss": 0.4718400239944458,
|
|
"step": 3688
|
|
},
|
|
{
|
|
"epoch": 2.91390205371248,
|
|
"grad_norm": 10.59452097306126,
|
|
"learning_rate": 2.5533539713832878e-08,
|
|
"loss": 0.33786359429359436,
|
|
"step": 3689
|
|
},
|
|
{
|
|
"epoch": 2.914691943127962,
|
|
"grad_norm": 11.039511779455802,
|
|
"learning_rate": 2.5071790357775183e-08,
|
|
"loss": 0.8592206239700317,
|
|
"step": 3690
|
|
},
|
|
{
|
|
"epoch": 2.9154818325434437,
|
|
"grad_norm": 11.096868713143294,
|
|
"learning_rate": 2.461424383745137e-08,
|
|
"loss": 0.5247258543968201,
|
|
"step": 3691
|
|
},
|
|
{
|
|
"epoch": 2.9162717219589256,
|
|
"grad_norm": 9.7943810808923,
|
|
"learning_rate": 2.4160900539397792e-08,
|
|
"loss": 0.4834524393081665,
|
|
"step": 3692
|
|
},
|
|
{
|
|
"epoch": 2.9170616113744074,
|
|
"grad_norm": 14.220945945722814,
|
|
"learning_rate": 2.3711760846601427e-08,
|
|
"loss": 0.49882930517196655,
|
|
"step": 3693
|
|
},
|
|
{
|
|
"epoch": 2.9178515007898893,
|
|
"grad_norm": 9.444978699118497,
|
|
"learning_rate": 2.326682513849654e-08,
|
|
"loss": 0.4514763653278351,
|
|
"step": 3694
|
|
},
|
|
{
|
|
"epoch": 2.918641390205371,
|
|
"grad_norm": 16.735071211993855,
|
|
"learning_rate": 2.282609379096523e-08,
|
|
"loss": 0.413520872592926,
|
|
"step": 3695
|
|
},
|
|
{
|
|
"epoch": 2.919431279620853,
|
|
"grad_norm": 10.18649650301645,
|
|
"learning_rate": 2.238956717634022e-08,
|
|
"loss": 0.3149523138999939,
|
|
"step": 3696
|
|
},
|
|
{
|
|
"epoch": 2.920221169036335,
|
|
"grad_norm": 7.242383152098311,
|
|
"learning_rate": 2.1957245663399852e-08,
|
|
"loss": 0.42863062024116516,
|
|
"step": 3697
|
|
},
|
|
{
|
|
"epoch": 2.9210110584518167,
|
|
"grad_norm": 12.249847711419223,
|
|
"learning_rate": 2.1529129617371414e-08,
|
|
"loss": 0.3425447344779968,
|
|
"step": 3698
|
|
},
|
|
{
|
|
"epoch": 2.9218009478672986,
|
|
"grad_norm": 21.564346952177747,
|
|
"learning_rate": 2.1105219399927823e-08,
|
|
"loss": 0.6913363933563232,
|
|
"step": 3699
|
|
},
|
|
{
|
|
"epoch": 2.9225908372827805,
|
|
"grad_norm": 14.247309861203046,
|
|
"learning_rate": 2.068551536919039e-08,
|
|
"loss": 0.5754636526107788,
|
|
"step": 3700
|
|
},
|
|
{
|
|
"epoch": 2.9233807266982623,
|
|
"grad_norm": 14.729656431110298,
|
|
"learning_rate": 2.02700178797266e-08,
|
|
"loss": 0.1754104197025299,
|
|
"step": 3701
|
|
},
|
|
{
|
|
"epoch": 2.924170616113744,
|
|
"grad_norm": 10.378294101333756,
|
|
"learning_rate": 1.9858727282549563e-08,
|
|
"loss": 0.5599175095558167,
|
|
"step": 3702
|
|
},
|
|
{
|
|
"epoch": 2.924960505529226,
|
|
"grad_norm": 7.713686434655625,
|
|
"learning_rate": 1.9451643925119667e-08,
|
|
"loss": 0.429149329662323,
|
|
"step": 3703
|
|
},
|
|
{
|
|
"epoch": 2.925750394944708,
|
|
"grad_norm": 18.397053591718155,
|
|
"learning_rate": 1.9048768151341825e-08,
|
|
"loss": 1.2451764345169067,
|
|
"step": 3704
|
|
},
|
|
{
|
|
"epoch": 2.9265402843601898,
|
|
"grad_norm": 11.278121609485279,
|
|
"learning_rate": 1.8650100301566553e-08,
|
|
"loss": 0.2728927433490753,
|
|
"step": 3705
|
|
},
|
|
{
|
|
"epoch": 2.9273301737756716,
|
|
"grad_norm": 12.412783868000135,
|
|
"learning_rate": 1.8255640712590004e-08,
|
|
"loss": 0.39738449454307556,
|
|
"step": 3706
|
|
},
|
|
{
|
|
"epoch": 2.9281200631911535,
|
|
"grad_norm": 14.573969152453861,
|
|
"learning_rate": 1.7865389717653392e-08,
|
|
"loss": 0.4282408356666565,
|
|
"step": 3707
|
|
},
|
|
{
|
|
"epoch": 2.9289099526066353,
|
|
"grad_norm": 12.27556396872362,
|
|
"learning_rate": 1.7479347646441323e-08,
|
|
"loss": 0.573512852191925,
|
|
"step": 3708
|
|
},
|
|
{
|
|
"epoch": 2.9296998420221168,
|
|
"grad_norm": 13.78449081143613,
|
|
"learning_rate": 1.709751482508404e-08,
|
|
"loss": 0.4519804120063782,
|
|
"step": 3709
|
|
},
|
|
{
|
|
"epoch": 2.9304897314375986,
|
|
"grad_norm": 8.515540650947582,
|
|
"learning_rate": 1.6719891576154612e-08,
|
|
"loss": 0.6025781035423279,
|
|
"step": 3710
|
|
},
|
|
{
|
|
"epoch": 2.9312796208530805,
|
|
"grad_norm": 9.597411743199306,
|
|
"learning_rate": 1.634647821867119e-08,
|
|
"loss": 0.15691038966178894,
|
|
"step": 3711
|
|
},
|
|
{
|
|
"epoch": 2.9320695102685623,
|
|
"grad_norm": 10.486595198764013,
|
|
"learning_rate": 1.5977275068093657e-08,
|
|
"loss": 0.22191372513771057,
|
|
"step": 3712
|
|
},
|
|
{
|
|
"epoch": 2.932859399684044,
|
|
"grad_norm": 11.847663895398458,
|
|
"learning_rate": 1.56122824363264e-08,
|
|
"loss": 0.5614909529685974,
|
|
"step": 3713
|
|
},
|
|
{
|
|
"epoch": 2.933649289099526,
|
|
"grad_norm": 13.508354632592122,
|
|
"learning_rate": 1.52515006317161e-08,
|
|
"loss": 0.42124590277671814,
|
|
"step": 3714
|
|
},
|
|
{
|
|
"epoch": 2.934439178515008,
|
|
"grad_norm": 12.085187248497775,
|
|
"learning_rate": 1.4894929959053395e-08,
|
|
"loss": 0.39891332387924194,
|
|
"step": 3715
|
|
},
|
|
{
|
|
"epoch": 2.9352290679304898,
|
|
"grad_norm": 11.772324248757105,
|
|
"learning_rate": 1.4542570719569549e-08,
|
|
"loss": 0.743476152420044,
|
|
"step": 3716
|
|
},
|
|
{
|
|
"epoch": 2.9360189573459716,
|
|
"grad_norm": 13.824383157557017,
|
|
"learning_rate": 1.4194423210939223e-08,
|
|
"loss": 0.7813572883605957,
|
|
"step": 3717
|
|
},
|
|
{
|
|
"epoch": 2.9368088467614535,
|
|
"grad_norm": 12.023055278816088,
|
|
"learning_rate": 1.3850487727278261e-08,
|
|
"loss": 0.18068841099739075,
|
|
"step": 3718
|
|
},
|
|
{
|
|
"epoch": 2.9375987361769353,
|
|
"grad_norm": 9.004344869826209,
|
|
"learning_rate": 1.351076455914424e-08,
|
|
"loss": 0.35265034437179565,
|
|
"step": 3719
|
|
},
|
|
{
|
|
"epoch": 2.938388625592417,
|
|
"grad_norm": 6.411410825512506,
|
|
"learning_rate": 1.3175253993537584e-08,
|
|
"loss": 0.1986955851316452,
|
|
"step": 3720
|
|
},
|
|
{
|
|
"epoch": 2.9391785150078986,
|
|
"grad_norm": 15.542536754284288,
|
|
"learning_rate": 1.284395631389712e-08,
|
|
"loss": 1.2308954000473022,
|
|
"step": 3721
|
|
},
|
|
{
|
|
"epoch": 2.9399684044233805,
|
|
"grad_norm": 11.589586159826656,
|
|
"learning_rate": 1.251687180010508e-08,
|
|
"loss": 0.3059263229370117,
|
|
"step": 3722
|
|
},
|
|
{
|
|
"epoch": 2.9407582938388623,
|
|
"grad_norm": 11.848666416715453,
|
|
"learning_rate": 1.2194000728483758e-08,
|
|
"loss": 0.381493479013443,
|
|
"step": 3723
|
|
},
|
|
{
|
|
"epoch": 2.941548183254344,
|
|
"grad_norm": 9.762685828075522,
|
|
"learning_rate": 1.1875343371795522e-08,
|
|
"loss": 0.3416656255722046,
|
|
"step": 3724
|
|
},
|
|
{
|
|
"epoch": 2.942338072669826,
|
|
"grad_norm": 13.146351843743089,
|
|
"learning_rate": 1.1560899999242813e-08,
|
|
"loss": 0.24065348505973816,
|
|
"step": 3725
|
|
},
|
|
{
|
|
"epoch": 2.943127962085308,
|
|
"grad_norm": 8.753449681389933,
|
|
"learning_rate": 1.1250670876468694e-08,
|
|
"loss": 0.5435752868652344,
|
|
"step": 3726
|
|
},
|
|
{
|
|
"epoch": 2.9439178515007898,
|
|
"grad_norm": 9.571620847460116,
|
|
"learning_rate": 1.0944656265555186e-08,
|
|
"loss": 0.5217230319976807,
|
|
"step": 3727
|
|
},
|
|
{
|
|
"epoch": 2.9447077409162716,
|
|
"grad_norm": 13.736435986610593,
|
|
"learning_rate": 1.0642856425025494e-08,
|
|
"loss": 0.30995261669158936,
|
|
"step": 3728
|
|
},
|
|
{
|
|
"epoch": 2.9454976303317535,
|
|
"grad_norm": 11.819774523695497,
|
|
"learning_rate": 1.0345271609840668e-08,
|
|
"loss": 0.27620571851730347,
|
|
"step": 3729
|
|
},
|
|
{
|
|
"epoch": 2.9462875197472354,
|
|
"grad_norm": 15.156276372780589,
|
|
"learning_rate": 1.0051902071401276e-08,
|
|
"loss": 0.6031606793403625,
|
|
"step": 3730
|
|
},
|
|
{
|
|
"epoch": 2.947077409162717,
|
|
"grad_norm": 12.80904870667671,
|
|
"learning_rate": 9.762748057546845e-09,
|
|
"loss": 0.4767545461654663,
|
|
"step": 3731
|
|
},
|
|
{
|
|
"epoch": 2.947867298578199,
|
|
"grad_norm": 9.951774811263586,
|
|
"learning_rate": 9.477809812555306e-09,
|
|
"loss": 0.6249617338180542,
|
|
"step": 3732
|
|
},
|
|
{
|
|
"epoch": 2.948657187993681,
|
|
"grad_norm": 8.066686549216158,
|
|
"learning_rate": 9.197087577144104e-09,
|
|
"loss": 0.2534811198711395,
|
|
"step": 3733
|
|
},
|
|
{
|
|
"epoch": 2.949447077409163,
|
|
"grad_norm": 14.503347622672706,
|
|
"learning_rate": 8.92058158846798e-09,
|
|
"loss": 0.259134441614151,
|
|
"step": 3734
|
|
},
|
|
{
|
|
"epoch": 2.9502369668246446,
|
|
"grad_norm": 12.45013867650853,
|
|
"learning_rate": 8.648292080119524e-09,
|
|
"loss": 0.7195329070091248,
|
|
"step": 3735
|
|
},
|
|
{
|
|
"epoch": 2.9510268562401265,
|
|
"grad_norm": 14.526124154717072,
|
|
"learning_rate": 8.38021928213084e-09,
|
|
"loss": 0.5841654539108276,
|
|
"step": 3736
|
|
},
|
|
{
|
|
"epoch": 2.9518167456556084,
|
|
"grad_norm": 9.850968529140541,
|
|
"learning_rate": 8.11636342097022e-09,
|
|
"loss": 0.36491304636001587,
|
|
"step": 3737
|
|
},
|
|
{
|
|
"epoch": 2.9526066350710902,
|
|
"grad_norm": 10.07563348526924,
|
|
"learning_rate": 7.856724719544351e-09,
|
|
"loss": 0.4188252091407776,
|
|
"step": 3738
|
|
},
|
|
{
|
|
"epoch": 2.953396524486572,
|
|
"grad_norm": 22.212639293967655,
|
|
"learning_rate": 7.601303397196113e-09,
|
|
"loss": 0.3442292809486389,
|
|
"step": 3739
|
|
},
|
|
{
|
|
"epoch": 2.954186413902054,
|
|
"grad_norm": 14.029241381991797,
|
|
"learning_rate": 7.350099669706235e-09,
|
|
"loss": 0.21167263388633728,
|
|
"step": 3740
|
|
},
|
|
{
|
|
"epoch": 2.954976303317536,
|
|
"grad_norm": 12.136596175552866,
|
|
"learning_rate": 7.103113749293844e-09,
|
|
"loss": 0.6398590207099915,
|
|
"step": 3741
|
|
},
|
|
{
|
|
"epoch": 2.955766192733017,
|
|
"grad_norm": 9.741023971909382,
|
|
"learning_rate": 6.860345844612038e-09,
|
|
"loss": 0.5108597278594971,
|
|
"step": 3742
|
|
},
|
|
{
|
|
"epoch": 2.956556082148499,
|
|
"grad_norm": 7.655686126361488,
|
|
"learning_rate": 6.621796160752314e-09,
|
|
"loss": 0.36386775970458984,
|
|
"step": 3743
|
|
},
|
|
{
|
|
"epoch": 2.957345971563981,
|
|
"grad_norm": 10.018875222198334,
|
|
"learning_rate": 6.387464899242357e-09,
|
|
"loss": 0.30166739225387573,
|
|
"step": 3744
|
|
},
|
|
{
|
|
"epoch": 2.958135860979463,
|
|
"grad_norm": 9.095182318951428,
|
|
"learning_rate": 6.1573522580460346e-09,
|
|
"loss": 0.45996037125587463,
|
|
"step": 3745
|
|
},
|
|
{
|
|
"epoch": 2.9589257503949447,
|
|
"grad_norm": 10.26232984567016,
|
|
"learning_rate": 5.931458431562842e-09,
|
|
"loss": 0.5725584626197815,
|
|
"step": 3746
|
|
},
|
|
{
|
|
"epoch": 2.9597156398104265,
|
|
"grad_norm": 8.756926533099401,
|
|
"learning_rate": 5.709783610629571e-09,
|
|
"loss": 0.29128578305244446,
|
|
"step": 3747
|
|
},
|
|
{
|
|
"epoch": 2.9605055292259084,
|
|
"grad_norm": 11.68804184137246,
|
|
"learning_rate": 5.492327982516976e-09,
|
|
"loss": 0.5195301175117493,
|
|
"step": 3748
|
|
},
|
|
{
|
|
"epoch": 2.9612954186413902,
|
|
"grad_norm": 10.545825985768683,
|
|
"learning_rate": 5.2790917309325505e-09,
|
|
"loss": 0.5382466912269592,
|
|
"step": 3749
|
|
},
|
|
{
|
|
"epoch": 2.962085308056872,
|
|
"grad_norm": 11.350603837614045,
|
|
"learning_rate": 5.070075036018863e-09,
|
|
"loss": 0.43567579984664917,
|
|
"step": 3750
|
|
},
|
|
{
|
|
"epoch": 2.962875197472354,
|
|
"grad_norm": 13.409834921169738,
|
|
"learning_rate": 4.865278074354107e-09,
|
|
"loss": 0.4768742322921753,
|
|
"step": 3751
|
|
},
|
|
{
|
|
"epoch": 2.963665086887836,
|
|
"grad_norm": 14.469370990431916,
|
|
"learning_rate": 4.664701018951001e-09,
|
|
"loss": 0.48725759983062744,
|
|
"step": 3752
|
|
},
|
|
{
|
|
"epoch": 2.9644549763033177,
|
|
"grad_norm": 10.832912501886673,
|
|
"learning_rate": 4.468344039258443e-09,
|
|
"loss": 0.2995648980140686,
|
|
"step": 3753
|
|
},
|
|
{
|
|
"epoch": 2.9652448657187995,
|
|
"grad_norm": 17.54919278880869,
|
|
"learning_rate": 4.2762073011592966e-09,
|
|
"loss": 0.6087955832481384,
|
|
"step": 3754
|
|
},
|
|
{
|
|
"epoch": 2.966034755134281,
|
|
"grad_norm": 10.31999554844462,
|
|
"learning_rate": 4.088290966970943e-09,
|
|
"loss": 0.8731129169464111,
|
|
"step": 3755
|
|
},
|
|
{
|
|
"epoch": 2.966824644549763,
|
|
"grad_norm": 10.898544647919328,
|
|
"learning_rate": 3.904595195445837e-09,
|
|
"loss": 0.7485541105270386,
|
|
"step": 3756
|
|
},
|
|
{
|
|
"epoch": 2.9676145339652447,
|
|
"grad_norm": 13.203065381428397,
|
|
"learning_rate": 3.7251201417703996e-09,
|
|
"loss": 0.3719606399536133,
|
|
"step": 3757
|
|
},
|
|
{
|
|
"epoch": 2.9684044233807265,
|
|
"grad_norm": 8.052328293519382,
|
|
"learning_rate": 3.5498659575666783e-09,
|
|
"loss": 0.2648746371269226,
|
|
"step": 3758
|
|
},
|
|
{
|
|
"epoch": 2.9691943127962084,
|
|
"grad_norm": 12.815000426949975,
|
|
"learning_rate": 3.3788327908890196e-09,
|
|
"loss": 0.3461490869522095,
|
|
"step": 3759
|
|
},
|
|
{
|
|
"epoch": 2.9699842022116902,
|
|
"grad_norm": 7.421454114527485,
|
|
"learning_rate": 3.212020786227399e-09,
|
|
"loss": 0.3270190358161926,
|
|
"step": 3760
|
|
},
|
|
{
|
|
"epoch": 2.970774091627172,
|
|
"grad_norm": 10.632206347492682,
|
|
"learning_rate": 3.0494300845052006e-09,
|
|
"loss": 0.48663121461868286,
|
|
"step": 3761
|
|
},
|
|
{
|
|
"epoch": 2.971563981042654,
|
|
"grad_norm": 9.052624248155158,
|
|
"learning_rate": 2.8910608230786617e-09,
|
|
"loss": 0.3653707206249237,
|
|
"step": 3762
|
|
},
|
|
{
|
|
"epoch": 2.972353870458136,
|
|
"grad_norm": 21.507879887876314,
|
|
"learning_rate": 2.7369131357402045e-09,
|
|
"loss": 0.6693405508995056,
|
|
"step": 3763
|
|
},
|
|
{
|
|
"epoch": 2.9731437598736177,
|
|
"grad_norm": 11.871106908449853,
|
|
"learning_rate": 2.586987152712883e-09,
|
|
"loss": 0.3541383445262909,
|
|
"step": 3764
|
|
},
|
|
{
|
|
"epoch": 2.9739336492890995,
|
|
"grad_norm": 9.347340281030029,
|
|
"learning_rate": 2.441283000655381e-09,
|
|
"loss": 0.2928946614265442,
|
|
"step": 3765
|
|
},
|
|
{
|
|
"epoch": 2.9747235387045814,
|
|
"grad_norm": 14.20794932298081,
|
|
"learning_rate": 2.2998008026592356e-09,
|
|
"loss": 0.8095124363899231,
|
|
"step": 3766
|
|
},
|
|
{
|
|
"epoch": 2.9755134281200633,
|
|
"grad_norm": 19.781359673109964,
|
|
"learning_rate": 2.1625406782482817e-09,
|
|
"loss": 0.9541232585906982,
|
|
"step": 3767
|
|
},
|
|
{
|
|
"epoch": 2.976303317535545,
|
|
"grad_norm": 12.307332036811797,
|
|
"learning_rate": 2.0295027433808736e-09,
|
|
"loss": 1.270265817642212,
|
|
"step": 3768
|
|
},
|
|
{
|
|
"epoch": 2.977093206951027,
|
|
"grad_norm": 9.268483761306474,
|
|
"learning_rate": 1.9006871104482183e-09,
|
|
"loss": 0.3142393231391907,
|
|
"step": 3769
|
|
},
|
|
{
|
|
"epoch": 2.977883096366509,
|
|
"grad_norm": 12.935271250550574,
|
|
"learning_rate": 1.776093888273267e-09,
|
|
"loss": 2.3558170795440674,
|
|
"step": 3770
|
|
},
|
|
{
|
|
"epoch": 2.9786729857819907,
|
|
"grad_norm": 14.14628000993969,
|
|
"learning_rate": 1.655723182113489e-09,
|
|
"loss": 0.3965626657009125,
|
|
"step": 3771
|
|
},
|
|
{
|
|
"epoch": 2.9794628751974725,
|
|
"grad_norm": 10.694643173228462,
|
|
"learning_rate": 1.5395750936580967e-09,
|
|
"loss": 0.3927484452724457,
|
|
"step": 3772
|
|
},
|
|
{
|
|
"epoch": 2.9802527646129544,
|
|
"grad_norm": 15.868269189008021,
|
|
"learning_rate": 1.4276497210297114e-09,
|
|
"loss": 0.2537468671798706,
|
|
"step": 3773
|
|
},
|
|
{
|
|
"epoch": 2.9810426540284363,
|
|
"grad_norm": 13.865339454139647,
|
|
"learning_rate": 1.3199471587832524e-09,
|
|
"loss": 0.46213477849960327,
|
|
"step": 3774
|
|
},
|
|
{
|
|
"epoch": 2.981832543443918,
|
|
"grad_norm": 12.317620064332576,
|
|
"learning_rate": 1.2164674979059378e-09,
|
|
"loss": 0.6187537312507629,
|
|
"step": 3775
|
|
},
|
|
{
|
|
"epoch": 2.9826224328593995,
|
|
"grad_norm": 12.70811742821486,
|
|
"learning_rate": 1.1172108258178382e-09,
|
|
"loss": 0.9330609440803528,
|
|
"step": 3776
|
|
},
|
|
{
|
|
"epoch": 2.9834123222748814,
|
|
"grad_norm": 12.521602937647542,
|
|
"learning_rate": 1.0221772263707686e-09,
|
|
"loss": 0.48245567083358765,
|
|
"step": 3777
|
|
},
|
|
{
|
|
"epoch": 2.9842022116903633,
|
|
"grad_norm": 13.143173870731685,
|
|
"learning_rate": 9.313667798505065e-10,
|
|
"loss": 0.4393605887889862,
|
|
"step": 3778
|
|
},
|
|
{
|
|
"epoch": 2.984992101105845,
|
|
"grad_norm": 7.047235748717833,
|
|
"learning_rate": 8.447795629734634e-10,
|
|
"loss": 0.32978931069374084,
|
|
"step": 3779
|
|
},
|
|
{
|
|
"epoch": 2.985781990521327,
|
|
"grad_norm": 8.024033854946516,
|
|
"learning_rate": 7.624156488883483e-10,
|
|
"loss": 0.41405847668647766,
|
|
"step": 3780
|
|
},
|
|
{
|
|
"epoch": 2.986571879936809,
|
|
"grad_norm": 22.169614392633356,
|
|
"learning_rate": 6.842751071772791e-10,
|
|
"loss": 0.6391655206680298,
|
|
"step": 3781
|
|
},
|
|
{
|
|
"epoch": 2.9873617693522907,
|
|
"grad_norm": 12.141908595978165,
|
|
"learning_rate": 6.103580038530066e-10,
|
|
"loss": 0.5002020001411438,
|
|
"step": 3782
|
|
},
|
|
{
|
|
"epoch": 2.9881516587677726,
|
|
"grad_norm": 11.596188780257132,
|
|
"learning_rate": 5.406644013616902e-10,
|
|
"loss": 0.745347261428833,
|
|
"step": 3783
|
|
},
|
|
{
|
|
"epoch": 2.9889415481832544,
|
|
"grad_norm": 16.660637478949464,
|
|
"learning_rate": 4.751943585801222e-10,
|
|
"loss": 0.6414915919303894,
|
|
"step": 3784
|
|
},
|
|
{
|
|
"epoch": 2.9897314375987363,
|
|
"grad_norm": 10.793044072751652,
|
|
"learning_rate": 4.1394793081739324e-10,
|
|
"loss": 0.5143662691116333,
|
|
"step": 3785
|
|
},
|
|
{
|
|
"epoch": 2.990521327014218,
|
|
"grad_norm": 10.877705766316154,
|
|
"learning_rate": 3.5692516981544744e-10,
|
|
"loss": 0.35477590560913086,
|
|
"step": 3786
|
|
},
|
|
{
|
|
"epoch": 2.9913112164297,
|
|
"grad_norm": 10.463002285868905,
|
|
"learning_rate": 3.041261237463067e-10,
|
|
"loss": 0.5146927833557129,
|
|
"step": 3787
|
|
},
|
|
{
|
|
"epoch": 2.9921011058451814,
|
|
"grad_norm": 12.683003185650735,
|
|
"learning_rate": 2.5555083721595654e-10,
|
|
"loss": 0.7653088569641113,
|
|
"step": 3788
|
|
},
|
|
{
|
|
"epoch": 2.9928909952606633,
|
|
"grad_norm": 11.791360620237418,
|
|
"learning_rate": 2.1119935125990532e-10,
|
|
"loss": 0.3955351710319519,
|
|
"step": 3789
|
|
},
|
|
{
|
|
"epoch": 2.993680884676145,
|
|
"grad_norm": 18.909176585649405,
|
|
"learning_rate": 1.710717033470699e-10,
|
|
"loss": 0.6973004341125488,
|
|
"step": 3790
|
|
},
|
|
{
|
|
"epoch": 2.994470774091627,
|
|
"grad_norm": 10.889260209523764,
|
|
"learning_rate": 1.3516792737700014e-10,
|
|
"loss": 0.3600936233997345,
|
|
"step": 3791
|
|
},
|
|
{
|
|
"epoch": 2.995260663507109,
|
|
"grad_norm": 8.488379905621288,
|
|
"learning_rate": 1.0348805368209924e-10,
|
|
"loss": 0.3776974678039551,
|
|
"step": 3792
|
|
},
|
|
{
|
|
"epoch": 2.9960505529225907,
|
|
"grad_norm": 7.3688565178666146,
|
|
"learning_rate": 7.603210902484837e-11,
|
|
"loss": 0.2519044876098633,
|
|
"step": 3793
|
|
},
|
|
{
|
|
"epoch": 2.9968404423380726,
|
|
"grad_norm": 9.212568490304637,
|
|
"learning_rate": 5.280011660002693e-11,
|
|
"loss": 0.3771563470363617,
|
|
"step": 3794
|
|
},
|
|
{
|
|
"epoch": 2.9976303317535544,
|
|
"grad_norm": 10.294914027544094,
|
|
"learning_rate": 3.3792096034712675e-11,
|
|
"loss": 0.35696250200271606,
|
|
"step": 3795
|
|
},
|
|
{
|
|
"epoch": 2.9984202211690363,
|
|
"grad_norm": 6.820884906019979,
|
|
"learning_rate": 1.9008063386616315e-11,
|
|
"loss": 0.24771413207054138,
|
|
"step": 3796
|
|
},
|
|
{
|
|
"epoch": 2.999210110584518,
|
|
"grad_norm": 11.182619500206162,
|
|
"learning_rate": 8.448031145746883e-12,
|
|
"loss": 0.6118890047073364,
|
|
"step": 3797
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"grad_norm": 11.835933309637019,
|
|
"learning_rate": 2.1120082327463766e-12,
|
|
"loss": 0.2823507785797119,
|
|
"step": 3798
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"step": 3798,
|
|
"total_flos": 10133269585920.0,
|
|
"train_loss": 1.6510612177336417,
|
|
"train_runtime": 4328.0202,
|
|
"train_samples_per_second": 3.51,
|
|
"train_steps_per_second": 0.878
|
|
}
|
|
],
|
|
"logging_steps": 1,
|
|
"max_steps": 3798,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 3,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 10133269585920.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|