17096 lines
441 KiB
JSON
17096 lines
441 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 3.0,
|
|
"eval_steps": 500,
|
|
"global_step": 2436,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0012315270935960591,
|
|
"grad_norm": 36.7600685768779,
|
|
"learning_rate": 0.0,
|
|
"loss": 4.157936096191406,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.0024630541871921183,
|
|
"grad_norm": 37.664654386111934,
|
|
"learning_rate": 4.098360655737705e-08,
|
|
"loss": 3.8494455814361572,
|
|
"step": 2
|
|
},
|
|
{
|
|
"epoch": 0.003694581280788177,
|
|
"grad_norm": 38.23654519991739,
|
|
"learning_rate": 8.19672131147541e-08,
|
|
"loss": 3.7497382164001465,
|
|
"step": 3
|
|
},
|
|
{
|
|
"epoch": 0.0049261083743842365,
|
|
"grad_norm": 49.1212230676838,
|
|
"learning_rate": 1.2295081967213116e-07,
|
|
"loss": 4.874395847320557,
|
|
"step": 4
|
|
},
|
|
{
|
|
"epoch": 0.006157635467980296,
|
|
"grad_norm": 51.23013396325368,
|
|
"learning_rate": 1.639344262295082e-07,
|
|
"loss": 5.729328155517578,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.007389162561576354,
|
|
"grad_norm": 33.06662236870545,
|
|
"learning_rate": 2.0491803278688524e-07,
|
|
"loss": 3.968146324157715,
|
|
"step": 6
|
|
},
|
|
{
|
|
"epoch": 0.008620689655172414,
|
|
"grad_norm": 33.94224964860029,
|
|
"learning_rate": 2.459016393442623e-07,
|
|
"loss": 4.092198848724365,
|
|
"step": 7
|
|
},
|
|
{
|
|
"epoch": 0.009852216748768473,
|
|
"grad_norm": 28.585037517248036,
|
|
"learning_rate": 2.8688524590163937e-07,
|
|
"loss": 3.4101109504699707,
|
|
"step": 8
|
|
},
|
|
{
|
|
"epoch": 0.011083743842364532,
|
|
"grad_norm": 39.512646004891735,
|
|
"learning_rate": 3.278688524590164e-07,
|
|
"loss": 4.387180805206299,
|
|
"step": 9
|
|
},
|
|
{
|
|
"epoch": 0.012315270935960592,
|
|
"grad_norm": 29.487139965581328,
|
|
"learning_rate": 3.6885245901639347e-07,
|
|
"loss": 3.4985814094543457,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.013546798029556651,
|
|
"grad_norm": 35.1254398727907,
|
|
"learning_rate": 4.0983606557377047e-07,
|
|
"loss": 5.157108306884766,
|
|
"step": 11
|
|
},
|
|
{
|
|
"epoch": 0.014778325123152709,
|
|
"grad_norm": 33.7037580376338,
|
|
"learning_rate": 4.508196721311476e-07,
|
|
"loss": 4.057161808013916,
|
|
"step": 12
|
|
},
|
|
{
|
|
"epoch": 0.01600985221674877,
|
|
"grad_norm": 35.136997816960864,
|
|
"learning_rate": 4.918032786885246e-07,
|
|
"loss": 4.237695693969727,
|
|
"step": 13
|
|
},
|
|
{
|
|
"epoch": 0.017241379310344827,
|
|
"grad_norm": 39.34259468640213,
|
|
"learning_rate": 5.327868852459017e-07,
|
|
"loss": 4.635364532470703,
|
|
"step": 14
|
|
},
|
|
{
|
|
"epoch": 0.01847290640394089,
|
|
"grad_norm": 33.5811322334086,
|
|
"learning_rate": 5.737704918032787e-07,
|
|
"loss": 3.3291709423065186,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.019704433497536946,
|
|
"grad_norm": 33.93459885987163,
|
|
"learning_rate": 6.147540983606558e-07,
|
|
"loss": 3.8693442344665527,
|
|
"step": 16
|
|
},
|
|
{
|
|
"epoch": 0.020935960591133004,
|
|
"grad_norm": 25.605142057165235,
|
|
"learning_rate": 6.557377049180328e-07,
|
|
"loss": 3.4419002532958984,
|
|
"step": 17
|
|
},
|
|
{
|
|
"epoch": 0.022167487684729065,
|
|
"grad_norm": 33.566059151369195,
|
|
"learning_rate": 6.967213114754098e-07,
|
|
"loss": 3.8446784019470215,
|
|
"step": 18
|
|
},
|
|
{
|
|
"epoch": 0.023399014778325122,
|
|
"grad_norm": 29.72848721122937,
|
|
"learning_rate": 7.377049180327869e-07,
|
|
"loss": 3.5930001735687256,
|
|
"step": 19
|
|
},
|
|
{
|
|
"epoch": 0.024630541871921183,
|
|
"grad_norm": 26.393927957123275,
|
|
"learning_rate": 7.78688524590164e-07,
|
|
"loss": 3.638699531555176,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.02586206896551724,
|
|
"grad_norm": 26.06446386508918,
|
|
"learning_rate": 8.196721311475409e-07,
|
|
"loss": 3.6789143085479736,
|
|
"step": 21
|
|
},
|
|
{
|
|
"epoch": 0.027093596059113302,
|
|
"grad_norm": 35.2733178056508,
|
|
"learning_rate": 8.606557377049181e-07,
|
|
"loss": 3.959703207015991,
|
|
"step": 22
|
|
},
|
|
{
|
|
"epoch": 0.02832512315270936,
|
|
"grad_norm": 33.03896583989334,
|
|
"learning_rate": 9.016393442622952e-07,
|
|
"loss": 3.8822054862976074,
|
|
"step": 23
|
|
},
|
|
{
|
|
"epoch": 0.029556650246305417,
|
|
"grad_norm": 33.57337166473473,
|
|
"learning_rate": 9.426229508196721e-07,
|
|
"loss": 3.8448376655578613,
|
|
"step": 24
|
|
},
|
|
{
|
|
"epoch": 0.03078817733990148,
|
|
"grad_norm": 20.141759958099808,
|
|
"learning_rate": 9.836065573770493e-07,
|
|
"loss": 3.372765064239502,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.03201970443349754,
|
|
"grad_norm": 23.420906015149534,
|
|
"learning_rate": 1.0245901639344263e-06,
|
|
"loss": 3.4989559650421143,
|
|
"step": 26
|
|
},
|
|
{
|
|
"epoch": 0.0332512315270936,
|
|
"grad_norm": 33.133583346249836,
|
|
"learning_rate": 1.0655737704918034e-06,
|
|
"loss": 3.6318516731262207,
|
|
"step": 27
|
|
},
|
|
{
|
|
"epoch": 0.034482758620689655,
|
|
"grad_norm": 18.99907077955952,
|
|
"learning_rate": 1.1065573770491804e-06,
|
|
"loss": 3.351621627807617,
|
|
"step": 28
|
|
},
|
|
{
|
|
"epoch": 0.03571428571428571,
|
|
"grad_norm": 18.353082575411992,
|
|
"learning_rate": 1.1475409836065575e-06,
|
|
"loss": 3.1978442668914795,
|
|
"step": 29
|
|
},
|
|
{
|
|
"epoch": 0.03694581280788178,
|
|
"grad_norm": 26.628518248775677,
|
|
"learning_rate": 1.1885245901639345e-06,
|
|
"loss": 4.033670902252197,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.038177339901477834,
|
|
"grad_norm": 16.452853960671934,
|
|
"learning_rate": 1.2295081967213116e-06,
|
|
"loss": 3.626315116882324,
|
|
"step": 31
|
|
},
|
|
{
|
|
"epoch": 0.03940886699507389,
|
|
"grad_norm": 16.372280561150735,
|
|
"learning_rate": 1.2704918032786886e-06,
|
|
"loss": 3.385767936706543,
|
|
"step": 32
|
|
},
|
|
{
|
|
"epoch": 0.04064039408866995,
|
|
"grad_norm": 23.073122100098054,
|
|
"learning_rate": 1.3114754098360657e-06,
|
|
"loss": 3.946913719177246,
|
|
"step": 33
|
|
},
|
|
{
|
|
"epoch": 0.04187192118226601,
|
|
"grad_norm": 11.580002792760054,
|
|
"learning_rate": 1.352459016393443e-06,
|
|
"loss": 3.3034565448760986,
|
|
"step": 34
|
|
},
|
|
{
|
|
"epoch": 0.04310344827586207,
|
|
"grad_norm": 17.961230909917667,
|
|
"learning_rate": 1.3934426229508196e-06,
|
|
"loss": 3.2368359565734863,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.04433497536945813,
|
|
"grad_norm": 11.543206406321579,
|
|
"learning_rate": 1.4344262295081968e-06,
|
|
"loss": 3.728569984436035,
|
|
"step": 36
|
|
},
|
|
{
|
|
"epoch": 0.04556650246305419,
|
|
"grad_norm": 14.762221765187595,
|
|
"learning_rate": 1.4754098360655739e-06,
|
|
"loss": 3.3756117820739746,
|
|
"step": 37
|
|
},
|
|
{
|
|
"epoch": 0.046798029556650245,
|
|
"grad_norm": 13.981113216433073,
|
|
"learning_rate": 1.516393442622951e-06,
|
|
"loss": 3.399596691131592,
|
|
"step": 38
|
|
},
|
|
{
|
|
"epoch": 0.0480295566502463,
|
|
"grad_norm": 24.184372796013783,
|
|
"learning_rate": 1.557377049180328e-06,
|
|
"loss": 4.209182262420654,
|
|
"step": 39
|
|
},
|
|
{
|
|
"epoch": 0.04926108374384237,
|
|
"grad_norm": 11.628888477605962,
|
|
"learning_rate": 1.5983606557377053e-06,
|
|
"loss": 2.797691822052002,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.050492610837438424,
|
|
"grad_norm": 16.948512477650098,
|
|
"learning_rate": 1.6393442622950819e-06,
|
|
"loss": 3.630617141723633,
|
|
"step": 41
|
|
},
|
|
{
|
|
"epoch": 0.05172413793103448,
|
|
"grad_norm": 14.186312302659116,
|
|
"learning_rate": 1.6803278688524592e-06,
|
|
"loss": 3.182535171508789,
|
|
"step": 42
|
|
},
|
|
{
|
|
"epoch": 0.05295566502463054,
|
|
"grad_norm": 13.666441097834594,
|
|
"learning_rate": 1.7213114754098362e-06,
|
|
"loss": 3.554767370223999,
|
|
"step": 43
|
|
},
|
|
{
|
|
"epoch": 0.054187192118226604,
|
|
"grad_norm": 16.91458664100256,
|
|
"learning_rate": 1.7622950819672133e-06,
|
|
"loss": 3.675961494445801,
|
|
"step": 44
|
|
},
|
|
{
|
|
"epoch": 0.05541871921182266,
|
|
"grad_norm": 16.161861225550066,
|
|
"learning_rate": 1.8032786885245903e-06,
|
|
"loss": 3.346269369125366,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.05665024630541872,
|
|
"grad_norm": 14.040742605132769,
|
|
"learning_rate": 1.8442622950819674e-06,
|
|
"loss": 3.4892683029174805,
|
|
"step": 46
|
|
},
|
|
{
|
|
"epoch": 0.05788177339901478,
|
|
"grad_norm": 14.981644166015332,
|
|
"learning_rate": 1.8852459016393442e-06,
|
|
"loss": 3.3602352142333984,
|
|
"step": 47
|
|
},
|
|
{
|
|
"epoch": 0.059113300492610835,
|
|
"grad_norm": 9.346123052417639,
|
|
"learning_rate": 1.9262295081967215e-06,
|
|
"loss": 3.301713228225708,
|
|
"step": 48
|
|
},
|
|
{
|
|
"epoch": 0.0603448275862069,
|
|
"grad_norm": 22.6894652203607,
|
|
"learning_rate": 1.9672131147540985e-06,
|
|
"loss": 3.7745046615600586,
|
|
"step": 49
|
|
},
|
|
{
|
|
"epoch": 0.06157635467980296,
|
|
"grad_norm": 8.465817304604528,
|
|
"learning_rate": 2.0081967213114756e-06,
|
|
"loss": 3.0452070236206055,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.06280788177339902,
|
|
"grad_norm": 20.560185363485036,
|
|
"learning_rate": 2.0491803278688526e-06,
|
|
"loss": 3.7955079078674316,
|
|
"step": 51
|
|
},
|
|
{
|
|
"epoch": 0.06403940886699508,
|
|
"grad_norm": 8.75621229547506,
|
|
"learning_rate": 2.0901639344262297e-06,
|
|
"loss": 3.1644039154052734,
|
|
"step": 52
|
|
},
|
|
{
|
|
"epoch": 0.06527093596059114,
|
|
"grad_norm": 13.679443353464602,
|
|
"learning_rate": 2.1311475409836067e-06,
|
|
"loss": 3.2459874153137207,
|
|
"step": 53
|
|
},
|
|
{
|
|
"epoch": 0.0665024630541872,
|
|
"grad_norm": 12.278683741598382,
|
|
"learning_rate": 2.1721311475409838e-06,
|
|
"loss": 3.61742901802063,
|
|
"step": 54
|
|
},
|
|
{
|
|
"epoch": 0.06773399014778325,
|
|
"grad_norm": 12.717536959646948,
|
|
"learning_rate": 2.213114754098361e-06,
|
|
"loss": 3.3136467933654785,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.06896551724137931,
|
|
"grad_norm": 15.543240982145285,
|
|
"learning_rate": 2.254098360655738e-06,
|
|
"loss": 3.272696018218994,
|
|
"step": 56
|
|
},
|
|
{
|
|
"epoch": 0.07019704433497537,
|
|
"grad_norm": 13.101250342680272,
|
|
"learning_rate": 2.295081967213115e-06,
|
|
"loss": 3.041365385055542,
|
|
"step": 57
|
|
},
|
|
{
|
|
"epoch": 0.07142857142857142,
|
|
"grad_norm": 11.7077150462335,
|
|
"learning_rate": 2.336065573770492e-06,
|
|
"loss": 3.309293746948242,
|
|
"step": 58
|
|
},
|
|
{
|
|
"epoch": 0.07266009852216748,
|
|
"grad_norm": 26.32874973946408,
|
|
"learning_rate": 2.377049180327869e-06,
|
|
"loss": 3.4676990509033203,
|
|
"step": 59
|
|
},
|
|
{
|
|
"epoch": 0.07389162561576355,
|
|
"grad_norm": 16.588748060840203,
|
|
"learning_rate": 2.418032786885246e-06,
|
|
"loss": 2.8236446380615234,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.07512315270935961,
|
|
"grad_norm": 8.18040938852151,
|
|
"learning_rate": 2.459016393442623e-06,
|
|
"loss": 2.716705083847046,
|
|
"step": 61
|
|
},
|
|
{
|
|
"epoch": 0.07635467980295567,
|
|
"grad_norm": 20.07190390154421,
|
|
"learning_rate": 2.5e-06,
|
|
"loss": 2.5590922832489014,
|
|
"step": 62
|
|
},
|
|
{
|
|
"epoch": 0.07758620689655173,
|
|
"grad_norm": 11.418876796774995,
|
|
"learning_rate": 2.5409836065573773e-06,
|
|
"loss": 2.6987993717193604,
|
|
"step": 63
|
|
},
|
|
{
|
|
"epoch": 0.07881773399014778,
|
|
"grad_norm": 13.315536498724418,
|
|
"learning_rate": 2.5819672131147543e-06,
|
|
"loss": 4.340274810791016,
|
|
"step": 64
|
|
},
|
|
{
|
|
"epoch": 0.08004926108374384,
|
|
"grad_norm": 17.075484530853824,
|
|
"learning_rate": 2.6229508196721314e-06,
|
|
"loss": 4.166017532348633,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.0812807881773399,
|
|
"grad_norm": 9.586520693266204,
|
|
"learning_rate": 2.6639344262295084e-06,
|
|
"loss": 2.664743185043335,
|
|
"step": 66
|
|
},
|
|
{
|
|
"epoch": 0.08251231527093596,
|
|
"grad_norm": 11.154276667212649,
|
|
"learning_rate": 2.704918032786886e-06,
|
|
"loss": 3.4285409450531006,
|
|
"step": 67
|
|
},
|
|
{
|
|
"epoch": 0.08374384236453201,
|
|
"grad_norm": 23.203683210215114,
|
|
"learning_rate": 2.745901639344263e-06,
|
|
"loss": 2.613044023513794,
|
|
"step": 68
|
|
},
|
|
{
|
|
"epoch": 0.08497536945812807,
|
|
"grad_norm": 13.748249566024421,
|
|
"learning_rate": 2.786885245901639e-06,
|
|
"loss": 3.1923232078552246,
|
|
"step": 69
|
|
},
|
|
{
|
|
"epoch": 0.08620689655172414,
|
|
"grad_norm": 23.6456335605133,
|
|
"learning_rate": 2.8278688524590166e-06,
|
|
"loss": 3.881509780883789,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.0874384236453202,
|
|
"grad_norm": 12.242314523228817,
|
|
"learning_rate": 2.8688524590163937e-06,
|
|
"loss": 3.3872318267822266,
|
|
"step": 71
|
|
},
|
|
{
|
|
"epoch": 0.08866995073891626,
|
|
"grad_norm": 10.174962303917177,
|
|
"learning_rate": 2.9098360655737707e-06,
|
|
"loss": 3.1114461421966553,
|
|
"step": 72
|
|
},
|
|
{
|
|
"epoch": 0.08990147783251232,
|
|
"grad_norm": 9.979115596445391,
|
|
"learning_rate": 2.9508196721311478e-06,
|
|
"loss": 3.182547092437744,
|
|
"step": 73
|
|
},
|
|
{
|
|
"epoch": 0.09113300492610837,
|
|
"grad_norm": 10.437140873327547,
|
|
"learning_rate": 2.991803278688525e-06,
|
|
"loss": 3.488222599029541,
|
|
"step": 74
|
|
},
|
|
{
|
|
"epoch": 0.09236453201970443,
|
|
"grad_norm": 9.422729886318432,
|
|
"learning_rate": 3.032786885245902e-06,
|
|
"loss": 3.0836119651794434,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.09359605911330049,
|
|
"grad_norm": 9.576987414129725,
|
|
"learning_rate": 3.073770491803279e-06,
|
|
"loss": 2.965284824371338,
|
|
"step": 76
|
|
},
|
|
{
|
|
"epoch": 0.09482758620689655,
|
|
"grad_norm": 9.051063368959207,
|
|
"learning_rate": 3.114754098360656e-06,
|
|
"loss": 3.0366950035095215,
|
|
"step": 77
|
|
},
|
|
{
|
|
"epoch": 0.0960591133004926,
|
|
"grad_norm": 19.769081445901076,
|
|
"learning_rate": 3.155737704918033e-06,
|
|
"loss": 3.7336153984069824,
|
|
"step": 78
|
|
},
|
|
{
|
|
"epoch": 0.09729064039408868,
|
|
"grad_norm": 17.150697728192082,
|
|
"learning_rate": 3.1967213114754105e-06,
|
|
"loss": 3.3801069259643555,
|
|
"step": 79
|
|
},
|
|
{
|
|
"epoch": 0.09852216748768473,
|
|
"grad_norm": 11.029522805215215,
|
|
"learning_rate": 3.2377049180327876e-06,
|
|
"loss": 3.1140761375427246,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.09975369458128079,
|
|
"grad_norm": 9.099280236883942,
|
|
"learning_rate": 3.2786885245901638e-06,
|
|
"loss": 3.1199679374694824,
|
|
"step": 81
|
|
},
|
|
{
|
|
"epoch": 0.10098522167487685,
|
|
"grad_norm": 10.894555994753386,
|
|
"learning_rate": 3.3196721311475413e-06,
|
|
"loss": 2.919370651245117,
|
|
"step": 82
|
|
},
|
|
{
|
|
"epoch": 0.1022167487684729,
|
|
"grad_norm": 10.246835888516838,
|
|
"learning_rate": 3.3606557377049183e-06,
|
|
"loss": 3.0058987140655518,
|
|
"step": 83
|
|
},
|
|
{
|
|
"epoch": 0.10344827586206896,
|
|
"grad_norm": 8.315907792605513,
|
|
"learning_rate": 3.4016393442622954e-06,
|
|
"loss": 3.201812744140625,
|
|
"step": 84
|
|
},
|
|
{
|
|
"epoch": 0.10467980295566502,
|
|
"grad_norm": 10.55746200109404,
|
|
"learning_rate": 3.4426229508196724e-06,
|
|
"loss": 2.8387913703918457,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.10591133004926108,
|
|
"grad_norm": 23.69077930997652,
|
|
"learning_rate": 3.4836065573770495e-06,
|
|
"loss": 3.565217971801758,
|
|
"step": 86
|
|
},
|
|
{
|
|
"epoch": 0.10714285714285714,
|
|
"grad_norm": 17.752023971892026,
|
|
"learning_rate": 3.5245901639344265e-06,
|
|
"loss": 3.563566207885742,
|
|
"step": 87
|
|
},
|
|
{
|
|
"epoch": 0.10837438423645321,
|
|
"grad_norm": 7.328374103560201,
|
|
"learning_rate": 3.5655737704918036e-06,
|
|
"loss": 3.3282840251922607,
|
|
"step": 88
|
|
},
|
|
{
|
|
"epoch": 0.10960591133004927,
|
|
"grad_norm": 9.307632619059875,
|
|
"learning_rate": 3.6065573770491806e-06,
|
|
"loss": 2.693999767303467,
|
|
"step": 89
|
|
},
|
|
{
|
|
"epoch": 0.11083743842364532,
|
|
"grad_norm": 9.537047052971076,
|
|
"learning_rate": 3.6475409836065577e-06,
|
|
"loss": 3.0820372104644775,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.11206896551724138,
|
|
"grad_norm": 11.895652602739977,
|
|
"learning_rate": 3.6885245901639347e-06,
|
|
"loss": 2.5853302478790283,
|
|
"step": 91
|
|
},
|
|
{
|
|
"epoch": 0.11330049261083744,
|
|
"grad_norm": 19.909007675751152,
|
|
"learning_rate": 3.729508196721312e-06,
|
|
"loss": 3.622239589691162,
|
|
"step": 92
|
|
},
|
|
{
|
|
"epoch": 0.1145320197044335,
|
|
"grad_norm": 9.562243449141407,
|
|
"learning_rate": 3.7704918032786884e-06,
|
|
"loss": 3.269063949584961,
|
|
"step": 93
|
|
},
|
|
{
|
|
"epoch": 0.11576354679802955,
|
|
"grad_norm": 10.402493100303827,
|
|
"learning_rate": 3.811475409836066e-06,
|
|
"loss": 2.932877540588379,
|
|
"step": 94
|
|
},
|
|
{
|
|
"epoch": 0.11699507389162561,
|
|
"grad_norm": 7.9937288583052,
|
|
"learning_rate": 3.852459016393443e-06,
|
|
"loss": 2.8118062019348145,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.11822660098522167,
|
|
"grad_norm": 12.161021036700474,
|
|
"learning_rate": 3.8934426229508196e-06,
|
|
"loss": 2.977217674255371,
|
|
"step": 96
|
|
},
|
|
{
|
|
"epoch": 0.11945812807881774,
|
|
"grad_norm": 9.48055025878799,
|
|
"learning_rate": 3.934426229508197e-06,
|
|
"loss": 2.534318685531616,
|
|
"step": 97
|
|
},
|
|
{
|
|
"epoch": 0.1206896551724138,
|
|
"grad_norm": 8.971246829575332,
|
|
"learning_rate": 3.975409836065574e-06,
|
|
"loss": 2.888187885284424,
|
|
"step": 98
|
|
},
|
|
{
|
|
"epoch": 0.12192118226600986,
|
|
"grad_norm": 9.005963079459367,
|
|
"learning_rate": 4.016393442622951e-06,
|
|
"loss": 2.6558847427368164,
|
|
"step": 99
|
|
},
|
|
{
|
|
"epoch": 0.12315270935960591,
|
|
"grad_norm": 9.651575487247985,
|
|
"learning_rate": 4.057377049180329e-06,
|
|
"loss": 2.707779884338379,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.12438423645320197,
|
|
"grad_norm": 8.8113086796363,
|
|
"learning_rate": 4.098360655737705e-06,
|
|
"loss": 3.2292768955230713,
|
|
"step": 101
|
|
},
|
|
{
|
|
"epoch": 0.12561576354679804,
|
|
"grad_norm": 13.438004585842267,
|
|
"learning_rate": 4.139344262295083e-06,
|
|
"loss": 2.9476242065429688,
|
|
"step": 102
|
|
},
|
|
{
|
|
"epoch": 0.1268472906403941,
|
|
"grad_norm": 9.014089316100105,
|
|
"learning_rate": 4.180327868852459e-06,
|
|
"loss": 2.9598989486694336,
|
|
"step": 103
|
|
},
|
|
{
|
|
"epoch": 0.12807881773399016,
|
|
"grad_norm": 8.84790292690003,
|
|
"learning_rate": 4.221311475409837e-06,
|
|
"loss": 2.593669891357422,
|
|
"step": 104
|
|
},
|
|
{
|
|
"epoch": 0.12931034482758622,
|
|
"grad_norm": 9.732549020932908,
|
|
"learning_rate": 4.2622950819672135e-06,
|
|
"loss": 2.884164810180664,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.13054187192118227,
|
|
"grad_norm": 16.843882776588455,
|
|
"learning_rate": 4.30327868852459e-06,
|
|
"loss": 3.091454267501831,
|
|
"step": 106
|
|
},
|
|
{
|
|
"epoch": 0.13177339901477833,
|
|
"grad_norm": 11.588593389024608,
|
|
"learning_rate": 4.3442622950819676e-06,
|
|
"loss": 2.913923740386963,
|
|
"step": 107
|
|
},
|
|
{
|
|
"epoch": 0.1330049261083744,
|
|
"grad_norm": 18.29569166468431,
|
|
"learning_rate": 4.385245901639344e-06,
|
|
"loss": 2.779545307159424,
|
|
"step": 108
|
|
},
|
|
{
|
|
"epoch": 0.13423645320197045,
|
|
"grad_norm": 9.202902461418143,
|
|
"learning_rate": 4.426229508196722e-06,
|
|
"loss": 1.8711936473846436,
|
|
"step": 109
|
|
},
|
|
{
|
|
"epoch": 0.1354679802955665,
|
|
"grad_norm": 13.481452134492262,
|
|
"learning_rate": 4.467213114754098e-06,
|
|
"loss": 2.892902374267578,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.13669950738916256,
|
|
"grad_norm": 12.958399723073786,
|
|
"learning_rate": 4.508196721311476e-06,
|
|
"loss": 3.0064496994018555,
|
|
"step": 111
|
|
},
|
|
{
|
|
"epoch": 0.13793103448275862,
|
|
"grad_norm": 13.016721832572243,
|
|
"learning_rate": 4.549180327868853e-06,
|
|
"loss": 2.8515172004699707,
|
|
"step": 112
|
|
},
|
|
{
|
|
"epoch": 0.13916256157635468,
|
|
"grad_norm": 8.374489861175874,
|
|
"learning_rate": 4.59016393442623e-06,
|
|
"loss": 3.2504403591156006,
|
|
"step": 113
|
|
},
|
|
{
|
|
"epoch": 0.14039408866995073,
|
|
"grad_norm": 7.893218569270328,
|
|
"learning_rate": 4.631147540983607e-06,
|
|
"loss": 2.67405366897583,
|
|
"step": 114
|
|
},
|
|
{
|
|
"epoch": 0.1416256157635468,
|
|
"grad_norm": 10.146133271952388,
|
|
"learning_rate": 4.672131147540984e-06,
|
|
"loss": 3.079516887664795,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.14285714285714285,
|
|
"grad_norm": 19.354096600007853,
|
|
"learning_rate": 4.7131147540983615e-06,
|
|
"loss": 2.8897287845611572,
|
|
"step": 116
|
|
},
|
|
{
|
|
"epoch": 0.1440886699507389,
|
|
"grad_norm": 13.276953948761626,
|
|
"learning_rate": 4.754098360655738e-06,
|
|
"loss": 2.7275729179382324,
|
|
"step": 117
|
|
},
|
|
{
|
|
"epoch": 0.14532019704433496,
|
|
"grad_norm": 9.682874064462416,
|
|
"learning_rate": 4.795081967213115e-06,
|
|
"loss": 2.9996538162231445,
|
|
"step": 118
|
|
},
|
|
{
|
|
"epoch": 0.14655172413793102,
|
|
"grad_norm": 7.397102570298892,
|
|
"learning_rate": 4.836065573770492e-06,
|
|
"loss": 3.307245969772339,
|
|
"step": 119
|
|
},
|
|
{
|
|
"epoch": 0.1477832512315271,
|
|
"grad_norm": 12.665703486872426,
|
|
"learning_rate": 4.877049180327869e-06,
|
|
"loss": 3.475133180618286,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.14901477832512317,
|
|
"grad_norm": 11.317195785901513,
|
|
"learning_rate": 4.918032786885246e-06,
|
|
"loss": 3.0947790145874023,
|
|
"step": 121
|
|
},
|
|
{
|
|
"epoch": 0.15024630541871922,
|
|
"grad_norm": 7.236267930218516,
|
|
"learning_rate": 4.959016393442623e-06,
|
|
"loss": 2.9675135612487793,
|
|
"step": 122
|
|
},
|
|
{
|
|
"epoch": 0.15147783251231528,
|
|
"grad_norm": 8.759893869589918,
|
|
"learning_rate": 5e-06,
|
|
"loss": 2.7873148918151855,
|
|
"step": 123
|
|
},
|
|
{
|
|
"epoch": 0.15270935960591134,
|
|
"grad_norm": 10.395692764487977,
|
|
"learning_rate": 5.040983606557377e-06,
|
|
"loss": 3.10044264793396,
|
|
"step": 124
|
|
},
|
|
{
|
|
"epoch": 0.1539408866995074,
|
|
"grad_norm": 10.40007835832301,
|
|
"learning_rate": 5.0819672131147545e-06,
|
|
"loss": 3.755798101425171,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.15517241379310345,
|
|
"grad_norm": 13.715148535872732,
|
|
"learning_rate": 5.122950819672131e-06,
|
|
"loss": 3.0117135047912598,
|
|
"step": 126
|
|
},
|
|
{
|
|
"epoch": 0.1564039408866995,
|
|
"grad_norm": 12.668410235183005,
|
|
"learning_rate": 5.163934426229509e-06,
|
|
"loss": 2.944417953491211,
|
|
"step": 127
|
|
},
|
|
{
|
|
"epoch": 0.15763546798029557,
|
|
"grad_norm": 14.317219715469237,
|
|
"learning_rate": 5.204918032786885e-06,
|
|
"loss": 2.672874927520752,
|
|
"step": 128
|
|
},
|
|
{
|
|
"epoch": 0.15886699507389163,
|
|
"grad_norm": 16.489459603874575,
|
|
"learning_rate": 5.245901639344263e-06,
|
|
"loss": 2.7205734252929688,
|
|
"step": 129
|
|
},
|
|
{
|
|
"epoch": 0.16009852216748768,
|
|
"grad_norm": 16.41932178225047,
|
|
"learning_rate": 5.286885245901639e-06,
|
|
"loss": 2.883897304534912,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.16133004926108374,
|
|
"grad_norm": 15.043569897203326,
|
|
"learning_rate": 5.327868852459017e-06,
|
|
"loss": 2.782104253768921,
|
|
"step": 131
|
|
},
|
|
{
|
|
"epoch": 0.1625615763546798,
|
|
"grad_norm": 8.98371180872493,
|
|
"learning_rate": 5.3688524590163935e-06,
|
|
"loss": 2.6445870399475098,
|
|
"step": 132
|
|
},
|
|
{
|
|
"epoch": 0.16379310344827586,
|
|
"grad_norm": 11.815392040561601,
|
|
"learning_rate": 5.409836065573772e-06,
|
|
"loss": 2.9319727420806885,
|
|
"step": 133
|
|
},
|
|
{
|
|
"epoch": 0.16502463054187191,
|
|
"grad_norm": 10.152797634103624,
|
|
"learning_rate": 5.4508196721311476e-06,
|
|
"loss": 3.169668674468994,
|
|
"step": 134
|
|
},
|
|
{
|
|
"epoch": 0.16625615763546797,
|
|
"grad_norm": 14.778160076043047,
|
|
"learning_rate": 5.491803278688526e-06,
|
|
"loss": 2.8588128089904785,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.16748768472906403,
|
|
"grad_norm": 10.175583728158522,
|
|
"learning_rate": 5.5327868852459025e-06,
|
|
"loss": 2.9894580841064453,
|
|
"step": 136
|
|
},
|
|
{
|
|
"epoch": 0.1687192118226601,
|
|
"grad_norm": 9.056737222762985,
|
|
"learning_rate": 5.573770491803278e-06,
|
|
"loss": 2.5721185207366943,
|
|
"step": 137
|
|
},
|
|
{
|
|
"epoch": 0.16995073891625614,
|
|
"grad_norm": 13.273464461148466,
|
|
"learning_rate": 5.614754098360657e-06,
|
|
"loss": 2.927572727203369,
|
|
"step": 138
|
|
},
|
|
{
|
|
"epoch": 0.17118226600985223,
|
|
"grad_norm": 6.55893818610158,
|
|
"learning_rate": 5.655737704918033e-06,
|
|
"loss": 2.1956796646118164,
|
|
"step": 139
|
|
},
|
|
{
|
|
"epoch": 0.1724137931034483,
|
|
"grad_norm": 29.225445444647217,
|
|
"learning_rate": 5.696721311475411e-06,
|
|
"loss": 2.9739363193511963,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.17364532019704434,
|
|
"grad_norm": 11.15274917433196,
|
|
"learning_rate": 5.737704918032787e-06,
|
|
"loss": 2.9413986206054688,
|
|
"step": 141
|
|
},
|
|
{
|
|
"epoch": 0.1748768472906404,
|
|
"grad_norm": 10.26279112360335,
|
|
"learning_rate": 5.778688524590165e-06,
|
|
"loss": 3.267493724822998,
|
|
"step": 142
|
|
},
|
|
{
|
|
"epoch": 0.17610837438423646,
|
|
"grad_norm": 10.574770426769376,
|
|
"learning_rate": 5.8196721311475415e-06,
|
|
"loss": 3.355569362640381,
|
|
"step": 143
|
|
},
|
|
{
|
|
"epoch": 0.17733990147783252,
|
|
"grad_norm": 30.57215689151005,
|
|
"learning_rate": 5.860655737704919e-06,
|
|
"loss": 1.9742871522903442,
|
|
"step": 144
|
|
},
|
|
{
|
|
"epoch": 0.17857142857142858,
|
|
"grad_norm": 12.842491765573998,
|
|
"learning_rate": 5.9016393442622956e-06,
|
|
"loss": 3.571032762527466,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.17980295566502463,
|
|
"grad_norm": 12.726974439363154,
|
|
"learning_rate": 5.942622950819673e-06,
|
|
"loss": 3.3115599155426025,
|
|
"step": 146
|
|
},
|
|
{
|
|
"epoch": 0.1810344827586207,
|
|
"grad_norm": 17.55458268041124,
|
|
"learning_rate": 5.98360655737705e-06,
|
|
"loss": 2.781893730163574,
|
|
"step": 147
|
|
},
|
|
{
|
|
"epoch": 0.18226600985221675,
|
|
"grad_norm": 21.115989900825127,
|
|
"learning_rate": 6.024590163934426e-06,
|
|
"loss": 3.5053911209106445,
|
|
"step": 148
|
|
},
|
|
{
|
|
"epoch": 0.1834975369458128,
|
|
"grad_norm": 14.601719954400593,
|
|
"learning_rate": 6.065573770491804e-06,
|
|
"loss": 2.797297477722168,
|
|
"step": 149
|
|
},
|
|
{
|
|
"epoch": 0.18472906403940886,
|
|
"grad_norm": 11.706500964440364,
|
|
"learning_rate": 6.10655737704918e-06,
|
|
"loss": 2.995811939239502,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.18596059113300492,
|
|
"grad_norm": 15.414506649569596,
|
|
"learning_rate": 6.147540983606558e-06,
|
|
"loss": 3.028142213821411,
|
|
"step": 151
|
|
},
|
|
{
|
|
"epoch": 0.18719211822660098,
|
|
"grad_norm": 16.893206406115734,
|
|
"learning_rate": 6.1885245901639345e-06,
|
|
"loss": 3.092806816101074,
|
|
"step": 152
|
|
},
|
|
{
|
|
"epoch": 0.18842364532019704,
|
|
"grad_norm": 15.790657692703299,
|
|
"learning_rate": 6.229508196721312e-06,
|
|
"loss": 3.4657726287841797,
|
|
"step": 153
|
|
},
|
|
{
|
|
"epoch": 0.1896551724137931,
|
|
"grad_norm": 14.336314687505745,
|
|
"learning_rate": 6.270491803278689e-06,
|
|
"loss": 2.888990879058838,
|
|
"step": 154
|
|
},
|
|
{
|
|
"epoch": 0.19088669950738915,
|
|
"grad_norm": 8.384597105554349,
|
|
"learning_rate": 6.311475409836066e-06,
|
|
"loss": 2.21640682220459,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.1921182266009852,
|
|
"grad_norm": 15.11144998304732,
|
|
"learning_rate": 6.352459016393443e-06,
|
|
"loss": 3.1153030395507812,
|
|
"step": 156
|
|
},
|
|
{
|
|
"epoch": 0.1933497536945813,
|
|
"grad_norm": 10.552333909396582,
|
|
"learning_rate": 6.393442622950821e-06,
|
|
"loss": 3.5814146995544434,
|
|
"step": 157
|
|
},
|
|
{
|
|
"epoch": 0.19458128078817735,
|
|
"grad_norm": 16.968338748229492,
|
|
"learning_rate": 6.434426229508197e-06,
|
|
"loss": 3.3865175247192383,
|
|
"step": 158
|
|
},
|
|
{
|
|
"epoch": 0.1958128078817734,
|
|
"grad_norm": 18.57431273466726,
|
|
"learning_rate": 6.475409836065575e-06,
|
|
"loss": 3.2125191688537598,
|
|
"step": 159
|
|
},
|
|
{
|
|
"epoch": 0.19704433497536947,
|
|
"grad_norm": 6.884951933192958,
|
|
"learning_rate": 6.516393442622952e-06,
|
|
"loss": 3.137500286102295,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.19827586206896552,
|
|
"grad_norm": 14.232532156130397,
|
|
"learning_rate": 6.5573770491803276e-06,
|
|
"loss": 2.63275408744812,
|
|
"step": 161
|
|
},
|
|
{
|
|
"epoch": 0.19950738916256158,
|
|
"grad_norm": 8.457248873163048,
|
|
"learning_rate": 6.598360655737706e-06,
|
|
"loss": 3.1714844703674316,
|
|
"step": 162
|
|
},
|
|
{
|
|
"epoch": 0.20073891625615764,
|
|
"grad_norm": 8.202663921028103,
|
|
"learning_rate": 6.6393442622950825e-06,
|
|
"loss": 2.2414371967315674,
|
|
"step": 163
|
|
},
|
|
{
|
|
"epoch": 0.2019704433497537,
|
|
"grad_norm": 21.716160496341246,
|
|
"learning_rate": 6.68032786885246e-06,
|
|
"loss": 2.4281110763549805,
|
|
"step": 164
|
|
},
|
|
{
|
|
"epoch": 0.20320197044334976,
|
|
"grad_norm": 14.06837422573523,
|
|
"learning_rate": 6.721311475409837e-06,
|
|
"loss": 2.6953632831573486,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.2044334975369458,
|
|
"grad_norm": 12.440616463990054,
|
|
"learning_rate": 6.762295081967214e-06,
|
|
"loss": 2.7645516395568848,
|
|
"step": 166
|
|
},
|
|
{
|
|
"epoch": 0.20566502463054187,
|
|
"grad_norm": 9.155924284482328,
|
|
"learning_rate": 6.803278688524591e-06,
|
|
"loss": 2.676801919937134,
|
|
"step": 167
|
|
},
|
|
{
|
|
"epoch": 0.20689655172413793,
|
|
"grad_norm": 18.399209140322007,
|
|
"learning_rate": 6.844262295081968e-06,
|
|
"loss": 3.2417163848876953,
|
|
"step": 168
|
|
},
|
|
{
|
|
"epoch": 0.20812807881773399,
|
|
"grad_norm": 10.633235724872472,
|
|
"learning_rate": 6.885245901639345e-06,
|
|
"loss": 3.1967976093292236,
|
|
"step": 169
|
|
},
|
|
{
|
|
"epoch": 0.20935960591133004,
|
|
"grad_norm": 9.001521768789516,
|
|
"learning_rate": 6.926229508196722e-06,
|
|
"loss": 3.4212145805358887,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.2105911330049261,
|
|
"grad_norm": 19.131341549460146,
|
|
"learning_rate": 6.967213114754099e-06,
|
|
"loss": 3.0731911659240723,
|
|
"step": 171
|
|
},
|
|
{
|
|
"epoch": 0.21182266009852216,
|
|
"grad_norm": 24.78027708091891,
|
|
"learning_rate": 7.0081967213114756e-06,
|
|
"loss": 3.8659727573394775,
|
|
"step": 172
|
|
},
|
|
{
|
|
"epoch": 0.21305418719211822,
|
|
"grad_norm": 7.256951095872975,
|
|
"learning_rate": 7.049180327868853e-06,
|
|
"loss": 3.036478042602539,
|
|
"step": 173
|
|
},
|
|
{
|
|
"epoch": 0.21428571428571427,
|
|
"grad_norm": 13.753177425595323,
|
|
"learning_rate": 7.09016393442623e-06,
|
|
"loss": 2.489211082458496,
|
|
"step": 174
|
|
},
|
|
{
|
|
"epoch": 0.21551724137931033,
|
|
"grad_norm": 15.568690129763258,
|
|
"learning_rate": 7.131147540983607e-06,
|
|
"loss": 3.8306775093078613,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.21674876847290642,
|
|
"grad_norm": 14.053955715138319,
|
|
"learning_rate": 7.172131147540984e-06,
|
|
"loss": 3.0287742614746094,
|
|
"step": 176
|
|
},
|
|
{
|
|
"epoch": 0.21798029556650247,
|
|
"grad_norm": 7.402046078874498,
|
|
"learning_rate": 7.213114754098361e-06,
|
|
"loss": 2.767753839492798,
|
|
"step": 177
|
|
},
|
|
{
|
|
"epoch": 0.21921182266009853,
|
|
"grad_norm": 7.607064770644376,
|
|
"learning_rate": 7.254098360655738e-06,
|
|
"loss": 2.8400726318359375,
|
|
"step": 178
|
|
},
|
|
{
|
|
"epoch": 0.2204433497536946,
|
|
"grad_norm": 9.218463959135196,
|
|
"learning_rate": 7.295081967213115e-06,
|
|
"loss": 2.9013113975524902,
|
|
"step": 179
|
|
},
|
|
{
|
|
"epoch": 0.22167487684729065,
|
|
"grad_norm": 14.207394035741054,
|
|
"learning_rate": 7.336065573770492e-06,
|
|
"loss": 3.1111714839935303,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.2229064039408867,
|
|
"grad_norm": 22.91981906121516,
|
|
"learning_rate": 7.3770491803278695e-06,
|
|
"loss": 2.968287229537964,
|
|
"step": 181
|
|
},
|
|
{
|
|
"epoch": 0.22413793103448276,
|
|
"grad_norm": 25.20920899192849,
|
|
"learning_rate": 7.418032786885246e-06,
|
|
"loss": 3.2560596466064453,
|
|
"step": 182
|
|
},
|
|
{
|
|
"epoch": 0.22536945812807882,
|
|
"grad_norm": 11.263908332317076,
|
|
"learning_rate": 7.459016393442624e-06,
|
|
"loss": 2.6196365356445312,
|
|
"step": 183
|
|
},
|
|
{
|
|
"epoch": 0.22660098522167488,
|
|
"grad_norm": 9.253114778490854,
|
|
"learning_rate": 7.500000000000001e-06,
|
|
"loss": 2.48789644241333,
|
|
"step": 184
|
|
},
|
|
{
|
|
"epoch": 0.22783251231527094,
|
|
"grad_norm": 10.894130133931592,
|
|
"learning_rate": 7.540983606557377e-06,
|
|
"loss": 3.492011308670044,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.229064039408867,
|
|
"grad_norm": 10.265317756792616,
|
|
"learning_rate": 7.581967213114755e-06,
|
|
"loss": 2.643688917160034,
|
|
"step": 186
|
|
},
|
|
{
|
|
"epoch": 0.23029556650246305,
|
|
"grad_norm": 18.91537781193984,
|
|
"learning_rate": 7.622950819672132e-06,
|
|
"loss": 3.291731834411621,
|
|
"step": 187
|
|
},
|
|
{
|
|
"epoch": 0.2315270935960591,
|
|
"grad_norm": 8.094549723224802,
|
|
"learning_rate": 7.66393442622951e-06,
|
|
"loss": 2.9554359912872314,
|
|
"step": 188
|
|
},
|
|
{
|
|
"epoch": 0.23275862068965517,
|
|
"grad_norm": 8.032083532292669,
|
|
"learning_rate": 7.704918032786886e-06,
|
|
"loss": 2.634860038757324,
|
|
"step": 189
|
|
},
|
|
{
|
|
"epoch": 0.23399014778325122,
|
|
"grad_norm": 12.421064936443088,
|
|
"learning_rate": 7.745901639344263e-06,
|
|
"loss": 3.505284309387207,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.23522167487684728,
|
|
"grad_norm": 9.73160074977933,
|
|
"learning_rate": 7.786885245901639e-06,
|
|
"loss": 2.8865461349487305,
|
|
"step": 191
|
|
},
|
|
{
|
|
"epoch": 0.23645320197044334,
|
|
"grad_norm": 9.154882618515046,
|
|
"learning_rate": 7.827868852459017e-06,
|
|
"loss": 2.804072618484497,
|
|
"step": 192
|
|
},
|
|
{
|
|
"epoch": 0.2376847290640394,
|
|
"grad_norm": 19.13061642741136,
|
|
"learning_rate": 7.868852459016394e-06,
|
|
"loss": 2.830981969833374,
|
|
"step": 193
|
|
},
|
|
{
|
|
"epoch": 0.23891625615763548,
|
|
"grad_norm": 15.563283146640595,
|
|
"learning_rate": 7.909836065573772e-06,
|
|
"loss": 2.2295336723327637,
|
|
"step": 194
|
|
},
|
|
{
|
|
"epoch": 0.24014778325123154,
|
|
"grad_norm": 12.225259694302743,
|
|
"learning_rate": 7.950819672131147e-06,
|
|
"loss": 2.338548183441162,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 0.2413793103448276,
|
|
"grad_norm": 9.892040827483035,
|
|
"learning_rate": 7.991803278688526e-06,
|
|
"loss": 3.0856008529663086,
|
|
"step": 196
|
|
},
|
|
{
|
|
"epoch": 0.24261083743842365,
|
|
"grad_norm": 7.694617498251832,
|
|
"learning_rate": 8.032786885245902e-06,
|
|
"loss": 2.8032941818237305,
|
|
"step": 197
|
|
},
|
|
{
|
|
"epoch": 0.2438423645320197,
|
|
"grad_norm": 14.517107480578428,
|
|
"learning_rate": 8.073770491803279e-06,
|
|
"loss": 2.793623924255371,
|
|
"step": 198
|
|
},
|
|
{
|
|
"epoch": 0.24507389162561577,
|
|
"grad_norm": 14.257539519236145,
|
|
"learning_rate": 8.114754098360657e-06,
|
|
"loss": 3.316802740097046,
|
|
"step": 199
|
|
},
|
|
{
|
|
"epoch": 0.24630541871921183,
|
|
"grad_norm": 9.345732169704513,
|
|
"learning_rate": 8.155737704918034e-06,
|
|
"loss": 2.7230677604675293,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.24753694581280788,
|
|
"grad_norm": 15.629904452590212,
|
|
"learning_rate": 8.19672131147541e-06,
|
|
"loss": 3.3343541622161865,
|
|
"step": 201
|
|
},
|
|
{
|
|
"epoch": 0.24876847290640394,
|
|
"grad_norm": 15.523761255621764,
|
|
"learning_rate": 8.237704918032787e-06,
|
|
"loss": 2.6796741485595703,
|
|
"step": 202
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"grad_norm": 19.56220339462512,
|
|
"learning_rate": 8.278688524590165e-06,
|
|
"loss": 3.5974526405334473,
|
|
"step": 203
|
|
},
|
|
{
|
|
"epoch": 0.2512315270935961,
|
|
"grad_norm": 13.897070581153926,
|
|
"learning_rate": 8.319672131147542e-06,
|
|
"loss": 2.2697930335998535,
|
|
"step": 204
|
|
},
|
|
{
|
|
"epoch": 0.2524630541871921,
|
|
"grad_norm": 58.73834156491825,
|
|
"learning_rate": 8.360655737704919e-06,
|
|
"loss": 3.692251682281494,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 0.2536945812807882,
|
|
"grad_norm": 7.38409958845656,
|
|
"learning_rate": 8.401639344262295e-06,
|
|
"loss": 1.9303261041641235,
|
|
"step": 206
|
|
},
|
|
{
|
|
"epoch": 0.25492610837438423,
|
|
"grad_norm": 9.965151267955871,
|
|
"learning_rate": 8.442622950819674e-06,
|
|
"loss": 2.538956880569458,
|
|
"step": 207
|
|
},
|
|
{
|
|
"epoch": 0.2561576354679803,
|
|
"grad_norm": 9.12744959101674,
|
|
"learning_rate": 8.48360655737705e-06,
|
|
"loss": 2.777608633041382,
|
|
"step": 208
|
|
},
|
|
{
|
|
"epoch": 0.25738916256157635,
|
|
"grad_norm": 7.651759491423955,
|
|
"learning_rate": 8.524590163934427e-06,
|
|
"loss": 2.5776896476745605,
|
|
"step": 209
|
|
},
|
|
{
|
|
"epoch": 0.25862068965517243,
|
|
"grad_norm": 7.384463920815584,
|
|
"learning_rate": 8.565573770491804e-06,
|
|
"loss": 2.9199795722961426,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.25985221674876846,
|
|
"grad_norm": 20.103355409171535,
|
|
"learning_rate": 8.60655737704918e-06,
|
|
"loss": 3.515129566192627,
|
|
"step": 211
|
|
},
|
|
{
|
|
"epoch": 0.26108374384236455,
|
|
"grad_norm": 11.426838299111452,
|
|
"learning_rate": 8.647540983606559e-06,
|
|
"loss": 2.5549678802490234,
|
|
"step": 212
|
|
},
|
|
{
|
|
"epoch": 0.2623152709359606,
|
|
"grad_norm": 9.257633699344172,
|
|
"learning_rate": 8.688524590163935e-06,
|
|
"loss": 2.769425630569458,
|
|
"step": 213
|
|
},
|
|
{
|
|
"epoch": 0.26354679802955666,
|
|
"grad_norm": 10.532098802898833,
|
|
"learning_rate": 8.729508196721312e-06,
|
|
"loss": 3.369231700897217,
|
|
"step": 214
|
|
},
|
|
{
|
|
"epoch": 0.2647783251231527,
|
|
"grad_norm": 9.351621764685488,
|
|
"learning_rate": 8.770491803278688e-06,
|
|
"loss": 2.942309856414795,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 0.2660098522167488,
|
|
"grad_norm": 13.925057065300786,
|
|
"learning_rate": 8.811475409836067e-06,
|
|
"loss": 2.7516608238220215,
|
|
"step": 216
|
|
},
|
|
{
|
|
"epoch": 0.2672413793103448,
|
|
"grad_norm": 36.50661601809998,
|
|
"learning_rate": 8.852459016393443e-06,
|
|
"loss": 2.8445613384246826,
|
|
"step": 217
|
|
},
|
|
{
|
|
"epoch": 0.2684729064039409,
|
|
"grad_norm": 22.25960453914331,
|
|
"learning_rate": 8.893442622950822e-06,
|
|
"loss": 2.987518787384033,
|
|
"step": 218
|
|
},
|
|
{
|
|
"epoch": 0.2697044334975369,
|
|
"grad_norm": 16.564591915051718,
|
|
"learning_rate": 8.934426229508197e-06,
|
|
"loss": 3.2499587535858154,
|
|
"step": 219
|
|
},
|
|
{
|
|
"epoch": 0.270935960591133,
|
|
"grad_norm": 17.28227853231096,
|
|
"learning_rate": 8.975409836065575e-06,
|
|
"loss": 2.926447868347168,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.27216748768472904,
|
|
"grad_norm": 11.211927116407436,
|
|
"learning_rate": 9.016393442622952e-06,
|
|
"loss": 2.8910017013549805,
|
|
"step": 221
|
|
},
|
|
{
|
|
"epoch": 0.2733990147783251,
|
|
"grad_norm": 8.72596083956733,
|
|
"learning_rate": 9.057377049180328e-06,
|
|
"loss": 3.0613536834716797,
|
|
"step": 222
|
|
},
|
|
{
|
|
"epoch": 0.2746305418719212,
|
|
"grad_norm": 9.803135692376356,
|
|
"learning_rate": 9.098360655737707e-06,
|
|
"loss": 2.829414129257202,
|
|
"step": 223
|
|
},
|
|
{
|
|
"epoch": 0.27586206896551724,
|
|
"grad_norm": 12.92734853493422,
|
|
"learning_rate": 9.139344262295083e-06,
|
|
"loss": 2.7085399627685547,
|
|
"step": 224
|
|
},
|
|
{
|
|
"epoch": 0.2770935960591133,
|
|
"grad_norm": 9.4118708856159,
|
|
"learning_rate": 9.18032786885246e-06,
|
|
"loss": 2.6637799739837646,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.27832512315270935,
|
|
"grad_norm": 18.83957093140758,
|
|
"learning_rate": 9.221311475409836e-06,
|
|
"loss": 2.845503807067871,
|
|
"step": 226
|
|
},
|
|
{
|
|
"epoch": 0.27955665024630544,
|
|
"grad_norm": 13.475569415500434,
|
|
"learning_rate": 9.262295081967215e-06,
|
|
"loss": 2.954394817352295,
|
|
"step": 227
|
|
},
|
|
{
|
|
"epoch": 0.28078817733990147,
|
|
"grad_norm": 8.290170639522628,
|
|
"learning_rate": 9.303278688524591e-06,
|
|
"loss": 2.640540838241577,
|
|
"step": 228
|
|
},
|
|
{
|
|
"epoch": 0.28201970443349755,
|
|
"grad_norm": 11.224559700746246,
|
|
"learning_rate": 9.344262295081968e-06,
|
|
"loss": 2.806300163269043,
|
|
"step": 229
|
|
},
|
|
{
|
|
"epoch": 0.2832512315270936,
|
|
"grad_norm": 7.885675569548075,
|
|
"learning_rate": 9.385245901639345e-06,
|
|
"loss": 2.6030101776123047,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.28448275862068967,
|
|
"grad_norm": 24.236973973758758,
|
|
"learning_rate": 9.426229508196723e-06,
|
|
"loss": 2.7991466522216797,
|
|
"step": 231
|
|
},
|
|
{
|
|
"epoch": 0.2857142857142857,
|
|
"grad_norm": 8.845347044883379,
|
|
"learning_rate": 9.4672131147541e-06,
|
|
"loss": 3.106261968612671,
|
|
"step": 232
|
|
},
|
|
{
|
|
"epoch": 0.2869458128078818,
|
|
"grad_norm": 51.821805980416265,
|
|
"learning_rate": 9.508196721311476e-06,
|
|
"loss": 3.2630815505981445,
|
|
"step": 233
|
|
},
|
|
{
|
|
"epoch": 0.2881773399014778,
|
|
"grad_norm": 16.78742746550897,
|
|
"learning_rate": 9.549180327868853e-06,
|
|
"loss": 3.1156482696533203,
|
|
"step": 234
|
|
},
|
|
{
|
|
"epoch": 0.2894088669950739,
|
|
"grad_norm": 13.713777073631656,
|
|
"learning_rate": 9.59016393442623e-06,
|
|
"loss": 3.1271071434020996,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 0.29064039408866993,
|
|
"grad_norm": 13.698738323083157,
|
|
"learning_rate": 9.631147540983608e-06,
|
|
"loss": 2.536348342895508,
|
|
"step": 236
|
|
},
|
|
{
|
|
"epoch": 0.291871921182266,
|
|
"grad_norm": 15.926322663194057,
|
|
"learning_rate": 9.672131147540984e-06,
|
|
"loss": 2.8055825233459473,
|
|
"step": 237
|
|
},
|
|
{
|
|
"epoch": 0.29310344827586204,
|
|
"grad_norm": 10.519363729962654,
|
|
"learning_rate": 9.713114754098361e-06,
|
|
"loss": 2.9949395656585693,
|
|
"step": 238
|
|
},
|
|
{
|
|
"epoch": 0.29433497536945813,
|
|
"grad_norm": 12.579584872972768,
|
|
"learning_rate": 9.754098360655738e-06,
|
|
"loss": 2.125136137008667,
|
|
"step": 239
|
|
},
|
|
{
|
|
"epoch": 0.2955665024630542,
|
|
"grad_norm": 11.391036061101172,
|
|
"learning_rate": 9.795081967213116e-06,
|
|
"loss": 2.830984592437744,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.29679802955665024,
|
|
"grad_norm": 14.46789942529014,
|
|
"learning_rate": 9.836065573770493e-06,
|
|
"loss": 3.2255706787109375,
|
|
"step": 241
|
|
},
|
|
{
|
|
"epoch": 0.29802955665024633,
|
|
"grad_norm": 8.899469108078774,
|
|
"learning_rate": 9.87704918032787e-06,
|
|
"loss": 2.686436653137207,
|
|
"step": 242
|
|
},
|
|
{
|
|
"epoch": 0.29926108374384236,
|
|
"grad_norm": 10.094433891654246,
|
|
"learning_rate": 9.918032786885246e-06,
|
|
"loss": 2.497978687286377,
|
|
"step": 243
|
|
},
|
|
{
|
|
"epoch": 0.30049261083743845,
|
|
"grad_norm": 8.691385167763809,
|
|
"learning_rate": 9.959016393442624e-06,
|
|
"loss": 3.308448076248169,
|
|
"step": 244
|
|
},
|
|
{
|
|
"epoch": 0.3017241379310345,
|
|
"grad_norm": 15.757524580227669,
|
|
"learning_rate": 1e-05,
|
|
"loss": 3.2378220558166504,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 0.30295566502463056,
|
|
"grad_norm": 8.671108255060687,
|
|
"learning_rate": 9.999994864785605e-06,
|
|
"loss": 2.4129133224487305,
|
|
"step": 246
|
|
},
|
|
{
|
|
"epoch": 0.3041871921182266,
|
|
"grad_norm": 13.501190126023713,
|
|
"learning_rate": 9.99997945915297e-06,
|
|
"loss": 2.938180923461914,
|
|
"step": 247
|
|
},
|
|
{
|
|
"epoch": 0.3054187192118227,
|
|
"grad_norm": 11.217667256673044,
|
|
"learning_rate": 9.999953783133733e-06,
|
|
"loss": 2.5165305137634277,
|
|
"step": 248
|
|
},
|
|
{
|
|
"epoch": 0.3066502463054187,
|
|
"grad_norm": 7.520771962392289,
|
|
"learning_rate": 9.999917836780642e-06,
|
|
"loss": 3.425577163696289,
|
|
"step": 249
|
|
},
|
|
{
|
|
"epoch": 0.3078817733990148,
|
|
"grad_norm": 13.889092280188136,
|
|
"learning_rate": 9.999871620167532e-06,
|
|
"loss": 2.876093626022339,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.3091133004926108,
|
|
"grad_norm": 7.799661481860974,
|
|
"learning_rate": 9.999815133389334e-06,
|
|
"loss": 2.9071428775787354,
|
|
"step": 251
|
|
},
|
|
{
|
|
"epoch": 0.3103448275862069,
|
|
"grad_norm": 18.185225557276123,
|
|
"learning_rate": 9.999748376562078e-06,
|
|
"loss": 2.998086452484131,
|
|
"step": 252
|
|
},
|
|
{
|
|
"epoch": 0.31157635467980294,
|
|
"grad_norm": 27.086825836566575,
|
|
"learning_rate": 9.999671349822887e-06,
|
|
"loss": 2.1193456649780273,
|
|
"step": 253
|
|
},
|
|
{
|
|
"epoch": 0.312807881773399,
|
|
"grad_norm": 13.320934166458603,
|
|
"learning_rate": 9.999584053329983e-06,
|
|
"loss": 2.753380298614502,
|
|
"step": 254
|
|
},
|
|
{
|
|
"epoch": 0.31403940886699505,
|
|
"grad_norm": 14.498031739385082,
|
|
"learning_rate": 9.999486487262677e-06,
|
|
"loss": 2.876704216003418,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 0.31527093596059114,
|
|
"grad_norm": 13.532410059083729,
|
|
"learning_rate": 9.999378651821381e-06,
|
|
"loss": 3.0882208347320557,
|
|
"step": 256
|
|
},
|
|
{
|
|
"epoch": 0.31650246305418717,
|
|
"grad_norm": 13.700484400761207,
|
|
"learning_rate": 9.999260547227599e-06,
|
|
"loss": 3.155285120010376,
|
|
"step": 257
|
|
},
|
|
{
|
|
"epoch": 0.31773399014778325,
|
|
"grad_norm": 12.6000984521867,
|
|
"learning_rate": 9.999132173723923e-06,
|
|
"loss": 2.7646055221557617,
|
|
"step": 258
|
|
},
|
|
{
|
|
"epoch": 0.31896551724137934,
|
|
"grad_norm": 15.115470197004113,
|
|
"learning_rate": 9.998993531574048e-06,
|
|
"loss": 2.7237563133239746,
|
|
"step": 259
|
|
},
|
|
{
|
|
"epoch": 0.32019704433497537,
|
|
"grad_norm": 20.594748113733633,
|
|
"learning_rate": 9.998844621062755e-06,
|
|
"loss": 3.3845739364624023,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.32142857142857145,
|
|
"grad_norm": 10.767576295669059,
|
|
"learning_rate": 9.998685442495921e-06,
|
|
"loss": 3.8065264225006104,
|
|
"step": 261
|
|
},
|
|
{
|
|
"epoch": 0.3226600985221675,
|
|
"grad_norm": 20.754860824013544,
|
|
"learning_rate": 9.998515996200508e-06,
|
|
"loss": 2.8899989128112793,
|
|
"step": 262
|
|
},
|
|
{
|
|
"epoch": 0.32389162561576357,
|
|
"grad_norm": 15.819137797930164,
|
|
"learning_rate": 9.998336282524579e-06,
|
|
"loss": 3.253079414367676,
|
|
"step": 263
|
|
},
|
|
{
|
|
"epoch": 0.3251231527093596,
|
|
"grad_norm": 18.790797790728803,
|
|
"learning_rate": 9.998146301837274e-06,
|
|
"loss": 3.346510648727417,
|
|
"step": 264
|
|
},
|
|
{
|
|
"epoch": 0.3263546798029557,
|
|
"grad_norm": 23.146345527241454,
|
|
"learning_rate": 9.997946054528837e-06,
|
|
"loss": 3.4698657989501953,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 0.3275862068965517,
|
|
"grad_norm": 14.512612088330997,
|
|
"learning_rate": 9.99773554101059e-06,
|
|
"loss": 3.174567699432373,
|
|
"step": 266
|
|
},
|
|
{
|
|
"epoch": 0.3288177339901478,
|
|
"grad_norm": 12.860516080892424,
|
|
"learning_rate": 9.997514761714946e-06,
|
|
"loss": 2.5275719165802,
|
|
"step": 267
|
|
},
|
|
{
|
|
"epoch": 0.33004926108374383,
|
|
"grad_norm": 9.43003857415246,
|
|
"learning_rate": 9.997283717095403e-06,
|
|
"loss": 2.9102673530578613,
|
|
"step": 268
|
|
},
|
|
{
|
|
"epoch": 0.3312807881773399,
|
|
"grad_norm": 11.178249951549107,
|
|
"learning_rate": 9.99704240762655e-06,
|
|
"loss": 2.865558624267578,
|
|
"step": 269
|
|
},
|
|
{
|
|
"epoch": 0.33251231527093594,
|
|
"grad_norm": 24.802063921828417,
|
|
"learning_rate": 9.996790833804053e-06,
|
|
"loss": 2.749305248260498,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.33374384236453203,
|
|
"grad_norm": 24.70724769915988,
|
|
"learning_rate": 9.996528996144668e-06,
|
|
"loss": 2.0590691566467285,
|
|
"step": 271
|
|
},
|
|
{
|
|
"epoch": 0.33497536945812806,
|
|
"grad_norm": 14.115920333851845,
|
|
"learning_rate": 9.996256895186234e-06,
|
|
"loss": 3.0421628952026367,
|
|
"step": 272
|
|
},
|
|
{
|
|
"epoch": 0.33620689655172414,
|
|
"grad_norm": 12.058059347872495,
|
|
"learning_rate": 9.995974531487668e-06,
|
|
"loss": 2.8302841186523438,
|
|
"step": 273
|
|
},
|
|
{
|
|
"epoch": 0.3374384236453202,
|
|
"grad_norm": 12.632643288786921,
|
|
"learning_rate": 9.995681905628968e-06,
|
|
"loss": 2.7192673683166504,
|
|
"step": 274
|
|
},
|
|
{
|
|
"epoch": 0.33866995073891626,
|
|
"grad_norm": 15.484122360072316,
|
|
"learning_rate": 9.995379018211215e-06,
|
|
"loss": 2.3330166339874268,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.3399014778325123,
|
|
"grad_norm": 13.2967377526589,
|
|
"learning_rate": 9.995065869856566e-06,
|
|
"loss": 2.5359480381011963,
|
|
"step": 276
|
|
},
|
|
{
|
|
"epoch": 0.3411330049261084,
|
|
"grad_norm": 15.221286627267526,
|
|
"learning_rate": 9.994742461208251e-06,
|
|
"loss": 3.049252986907959,
|
|
"step": 277
|
|
},
|
|
{
|
|
"epoch": 0.34236453201970446,
|
|
"grad_norm": 15.24270242699156,
|
|
"learning_rate": 9.994408792930584e-06,
|
|
"loss": 3.3440940380096436,
|
|
"step": 278
|
|
},
|
|
{
|
|
"epoch": 0.3435960591133005,
|
|
"grad_norm": 14.053973379642196,
|
|
"learning_rate": 9.994064865708944e-06,
|
|
"loss": 3.038376808166504,
|
|
"step": 279
|
|
},
|
|
{
|
|
"epoch": 0.3448275862068966,
|
|
"grad_norm": 22.631635572415856,
|
|
"learning_rate": 9.993710680249788e-06,
|
|
"loss": 3.6074423789978027,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.3460591133004926,
|
|
"grad_norm": 20.559687915989883,
|
|
"learning_rate": 9.993346237280646e-06,
|
|
"loss": 2.686741352081299,
|
|
"step": 281
|
|
},
|
|
{
|
|
"epoch": 0.3472906403940887,
|
|
"grad_norm": 12.521946549290966,
|
|
"learning_rate": 9.992971537550112e-06,
|
|
"loss": 2.4198198318481445,
|
|
"step": 282
|
|
},
|
|
{
|
|
"epoch": 0.3485221674876847,
|
|
"grad_norm": 6.138840145200369,
|
|
"learning_rate": 9.992586581827853e-06,
|
|
"loss": 2.8091788291931152,
|
|
"step": 283
|
|
},
|
|
{
|
|
"epoch": 0.3497536945812808,
|
|
"grad_norm": 9.177811201919399,
|
|
"learning_rate": 9.992191370904599e-06,
|
|
"loss": 3.0199592113494873,
|
|
"step": 284
|
|
},
|
|
{
|
|
"epoch": 0.35098522167487683,
|
|
"grad_norm": 11.072879739046153,
|
|
"learning_rate": 9.991785905592149e-06,
|
|
"loss": 2.6372945308685303,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 0.3522167487684729,
|
|
"grad_norm": 12.835701532770578,
|
|
"learning_rate": 9.991370186723363e-06,
|
|
"loss": 2.9127607345581055,
|
|
"step": 286
|
|
},
|
|
{
|
|
"epoch": 0.35344827586206895,
|
|
"grad_norm": 16.621843867679726,
|
|
"learning_rate": 9.990944215152166e-06,
|
|
"loss": 2.464376926422119,
|
|
"step": 287
|
|
},
|
|
{
|
|
"epoch": 0.35467980295566504,
|
|
"grad_norm": 9.777456171349527,
|
|
"learning_rate": 9.990507991753535e-06,
|
|
"loss": 2.8306374549865723,
|
|
"step": 288
|
|
},
|
|
{
|
|
"epoch": 0.35591133004926107,
|
|
"grad_norm": 11.701262899932036,
|
|
"learning_rate": 9.990061517423513e-06,
|
|
"loss": 2.9181313514709473,
|
|
"step": 289
|
|
},
|
|
{
|
|
"epoch": 0.35714285714285715,
|
|
"grad_norm": 12.914380903938605,
|
|
"learning_rate": 9.989604793079198e-06,
|
|
"loss": 3.1937739849090576,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.3583743842364532,
|
|
"grad_norm": 25.41280169964493,
|
|
"learning_rate": 9.989137819658738e-06,
|
|
"loss": 4.190927028656006,
|
|
"step": 291
|
|
},
|
|
{
|
|
"epoch": 0.35960591133004927,
|
|
"grad_norm": 12.268585179317036,
|
|
"learning_rate": 9.988660598121337e-06,
|
|
"loss": 2.8343558311462402,
|
|
"step": 292
|
|
},
|
|
{
|
|
"epoch": 0.3608374384236453,
|
|
"grad_norm": 14.508602864953724,
|
|
"learning_rate": 9.988173129447251e-06,
|
|
"loss": 3.741821050643921,
|
|
"step": 293
|
|
},
|
|
{
|
|
"epoch": 0.3620689655172414,
|
|
"grad_norm": 8.935077328629724,
|
|
"learning_rate": 9.98767541463778e-06,
|
|
"loss": 2.484419345855713,
|
|
"step": 294
|
|
},
|
|
{
|
|
"epoch": 0.3633004926108374,
|
|
"grad_norm": 8.195009351092525,
|
|
"learning_rate": 9.987167454715277e-06,
|
|
"loss": 2.671337127685547,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 0.3645320197044335,
|
|
"grad_norm": 11.197259917333458,
|
|
"learning_rate": 9.986649250723129e-06,
|
|
"loss": 3.118803024291992,
|
|
"step": 296
|
|
},
|
|
{
|
|
"epoch": 0.3657635467980296,
|
|
"grad_norm": 15.270785643435941,
|
|
"learning_rate": 9.986120803725776e-06,
|
|
"loss": 3.10141658782959,
|
|
"step": 297
|
|
},
|
|
{
|
|
"epoch": 0.3669950738916256,
|
|
"grad_norm": 11.19651727126236,
|
|
"learning_rate": 9.985582114808693e-06,
|
|
"loss": 2.7978734970092773,
|
|
"step": 298
|
|
},
|
|
{
|
|
"epoch": 0.3682266009852217,
|
|
"grad_norm": 14.058148431334251,
|
|
"learning_rate": 9.985033185078392e-06,
|
|
"loss": 2.5770411491394043,
|
|
"step": 299
|
|
},
|
|
{
|
|
"epoch": 0.3694581280788177,
|
|
"grad_norm": 9.544840021071943,
|
|
"learning_rate": 9.984474015662421e-06,
|
|
"loss": 3.0273873805999756,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.3706896551724138,
|
|
"grad_norm": 8.198220678999139,
|
|
"learning_rate": 9.983904607709365e-06,
|
|
"loss": 2.9202780723571777,
|
|
"step": 301
|
|
},
|
|
{
|
|
"epoch": 0.37192118226600984,
|
|
"grad_norm": 12.107800006970532,
|
|
"learning_rate": 9.983324962388835e-06,
|
|
"loss": 2.9816439151763916,
|
|
"step": 302
|
|
},
|
|
{
|
|
"epoch": 0.3731527093596059,
|
|
"grad_norm": 7.601271321831279,
|
|
"learning_rate": 9.982735080891471e-06,
|
|
"loss": 2.5605852603912354,
|
|
"step": 303
|
|
},
|
|
{
|
|
"epoch": 0.37438423645320196,
|
|
"grad_norm": 13.035543237033318,
|
|
"learning_rate": 9.982134964428942e-06,
|
|
"loss": 2.9378490447998047,
|
|
"step": 304
|
|
},
|
|
{
|
|
"epoch": 0.37561576354679804,
|
|
"grad_norm": 7.731680542963359,
|
|
"learning_rate": 9.981524614233938e-06,
|
|
"loss": 2.410521984100342,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 0.3768472906403941,
|
|
"grad_norm": 13.52353943681927,
|
|
"learning_rate": 9.98090403156017e-06,
|
|
"loss": 2.381927013397217,
|
|
"step": 306
|
|
},
|
|
{
|
|
"epoch": 0.37807881773399016,
|
|
"grad_norm": 17.35628297309107,
|
|
"learning_rate": 9.98027321768237e-06,
|
|
"loss": 3.1156816482543945,
|
|
"step": 307
|
|
},
|
|
{
|
|
"epoch": 0.3793103448275862,
|
|
"grad_norm": 8.977028820084396,
|
|
"learning_rate": 9.97963217389628e-06,
|
|
"loss": 3.2660152912139893,
|
|
"step": 308
|
|
},
|
|
{
|
|
"epoch": 0.3805418719211823,
|
|
"grad_norm": 14.66965301106164,
|
|
"learning_rate": 9.978980901518663e-06,
|
|
"loss": 3.1832613945007324,
|
|
"step": 309
|
|
},
|
|
{
|
|
"epoch": 0.3817733990147783,
|
|
"grad_norm": 27.78972817701185,
|
|
"learning_rate": 9.978319401887287e-06,
|
|
"loss": 2.719600200653076,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.3830049261083744,
|
|
"grad_norm": 10.666579101176065,
|
|
"learning_rate": 9.977647676360927e-06,
|
|
"loss": 2.652092456817627,
|
|
"step": 311
|
|
},
|
|
{
|
|
"epoch": 0.3842364532019704,
|
|
"grad_norm": 8.005520537074315,
|
|
"learning_rate": 9.976965726319369e-06,
|
|
"loss": 2.5932788848876953,
|
|
"step": 312
|
|
},
|
|
{
|
|
"epoch": 0.3854679802955665,
|
|
"grad_norm": 15.690472287679249,
|
|
"learning_rate": 9.976273553163393e-06,
|
|
"loss": 2.558863401412964,
|
|
"step": 313
|
|
},
|
|
{
|
|
"epoch": 0.3866995073891626,
|
|
"grad_norm": 11.958180437694066,
|
|
"learning_rate": 9.975571158314783e-06,
|
|
"loss": 3.1973023414611816,
|
|
"step": 314
|
|
},
|
|
{
|
|
"epoch": 0.3879310344827586,
|
|
"grad_norm": 12.749275597057334,
|
|
"learning_rate": 9.974858543216319e-06,
|
|
"loss": 3.286236524581909,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 0.3891625615763547,
|
|
"grad_norm": 16.985399241319477,
|
|
"learning_rate": 9.974135709331774e-06,
|
|
"loss": 3.5159969329833984,
|
|
"step": 316
|
|
},
|
|
{
|
|
"epoch": 0.39039408866995073,
|
|
"grad_norm": 10.457440991240187,
|
|
"learning_rate": 9.973402658145908e-06,
|
|
"loss": 2.647761821746826,
|
|
"step": 317
|
|
},
|
|
{
|
|
"epoch": 0.3916256157635468,
|
|
"grad_norm": 9.450705495020088,
|
|
"learning_rate": 9.972659391164473e-06,
|
|
"loss": 2.8499808311462402,
|
|
"step": 318
|
|
},
|
|
{
|
|
"epoch": 0.39285714285714285,
|
|
"grad_norm": 10.546244474419336,
|
|
"learning_rate": 9.971905909914206e-06,
|
|
"loss": 2.332852840423584,
|
|
"step": 319
|
|
},
|
|
{
|
|
"epoch": 0.39408866995073893,
|
|
"grad_norm": 10.2366500934473,
|
|
"learning_rate": 9.971142215942817e-06,
|
|
"loss": 2.627098560333252,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.39532019704433496,
|
|
"grad_norm": 6.472838949640434,
|
|
"learning_rate": 9.970368310819e-06,
|
|
"loss": 2.302323341369629,
|
|
"step": 321
|
|
},
|
|
{
|
|
"epoch": 0.39655172413793105,
|
|
"grad_norm": 6.421471401290025,
|
|
"learning_rate": 9.969584196132427e-06,
|
|
"loss": 2.6783509254455566,
|
|
"step": 322
|
|
},
|
|
{
|
|
"epoch": 0.3977832512315271,
|
|
"grad_norm": 12.353934861805914,
|
|
"learning_rate": 9.96878987349373e-06,
|
|
"loss": 2.9487061500549316,
|
|
"step": 323
|
|
},
|
|
{
|
|
"epoch": 0.39901477832512317,
|
|
"grad_norm": 13.993445702154649,
|
|
"learning_rate": 9.967985344534521e-06,
|
|
"loss": 2.5883233547210693,
|
|
"step": 324
|
|
},
|
|
{
|
|
"epoch": 0.4002463054187192,
|
|
"grad_norm": 20.380213804590188,
|
|
"learning_rate": 9.96717061090737e-06,
|
|
"loss": 3.125821590423584,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 0.4014778325123153,
|
|
"grad_norm": 6.812077926758059,
|
|
"learning_rate": 9.966345674285808e-06,
|
|
"loss": 2.829881191253662,
|
|
"step": 326
|
|
},
|
|
{
|
|
"epoch": 0.4027093596059113,
|
|
"grad_norm": 16.808551579421827,
|
|
"learning_rate": 9.965510536364329e-06,
|
|
"loss": 2.5988128185272217,
|
|
"step": 327
|
|
},
|
|
{
|
|
"epoch": 0.4039408866995074,
|
|
"grad_norm": 7.777965739175337,
|
|
"learning_rate": 9.964665198858375e-06,
|
|
"loss": 2.158940315246582,
|
|
"step": 328
|
|
},
|
|
{
|
|
"epoch": 0.4051724137931034,
|
|
"grad_norm": 10.632017505369658,
|
|
"learning_rate": 9.96380966350434e-06,
|
|
"loss": 2.716994285583496,
|
|
"step": 329
|
|
},
|
|
{
|
|
"epoch": 0.4064039408866995,
|
|
"grad_norm": 12.778378390552197,
|
|
"learning_rate": 9.962943932059573e-06,
|
|
"loss": 3.1283516883850098,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.40763546798029554,
|
|
"grad_norm": 12.686658918372668,
|
|
"learning_rate": 9.962068006302357e-06,
|
|
"loss": 3.0957908630371094,
|
|
"step": 331
|
|
},
|
|
{
|
|
"epoch": 0.4088669950738916,
|
|
"grad_norm": 24.890731349370103,
|
|
"learning_rate": 9.961181888031917e-06,
|
|
"loss": 2.3027350902557373,
|
|
"step": 332
|
|
},
|
|
{
|
|
"epoch": 0.4100985221674877,
|
|
"grad_norm": 10.45514873243925,
|
|
"learning_rate": 9.960285579068419e-06,
|
|
"loss": 2.956791877746582,
|
|
"step": 333
|
|
},
|
|
{
|
|
"epoch": 0.41133004926108374,
|
|
"grad_norm": 28.23036034704062,
|
|
"learning_rate": 9.959379081252958e-06,
|
|
"loss": 2.5689826011657715,
|
|
"step": 334
|
|
},
|
|
{
|
|
"epoch": 0.4125615763546798,
|
|
"grad_norm": 8.031700376672275,
|
|
"learning_rate": 9.958462396447556e-06,
|
|
"loss": 3.1086199283599854,
|
|
"step": 335
|
|
},
|
|
{
|
|
"epoch": 0.41379310344827586,
|
|
"grad_norm": 15.790958589129726,
|
|
"learning_rate": 9.957535526535165e-06,
|
|
"loss": 3.134901285171509,
|
|
"step": 336
|
|
},
|
|
{
|
|
"epoch": 0.41502463054187194,
|
|
"grad_norm": 12.433447054233632,
|
|
"learning_rate": 9.956598473419652e-06,
|
|
"loss": 2.642225742340088,
|
|
"step": 337
|
|
},
|
|
{
|
|
"epoch": 0.41625615763546797,
|
|
"grad_norm": 9.36121478561991,
|
|
"learning_rate": 9.95565123902581e-06,
|
|
"loss": 2.828200340270996,
|
|
"step": 338
|
|
},
|
|
{
|
|
"epoch": 0.41748768472906406,
|
|
"grad_norm": 14.194698913635616,
|
|
"learning_rate": 9.954693825299333e-06,
|
|
"loss": 2.751354217529297,
|
|
"step": 339
|
|
},
|
|
{
|
|
"epoch": 0.4187192118226601,
|
|
"grad_norm": 13.475276856352862,
|
|
"learning_rate": 9.953726234206835e-06,
|
|
"loss": 2.818434715270996,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.41995073891625617,
|
|
"grad_norm": 14.017642174434487,
|
|
"learning_rate": 9.95274846773583e-06,
|
|
"loss": 2.8631365299224854,
|
|
"step": 341
|
|
},
|
|
{
|
|
"epoch": 0.4211822660098522,
|
|
"grad_norm": 37.92442284518435,
|
|
"learning_rate": 9.951760527894733e-06,
|
|
"loss": 2.387998580932617,
|
|
"step": 342
|
|
},
|
|
{
|
|
"epoch": 0.4224137931034483,
|
|
"grad_norm": 8.636388354492292,
|
|
"learning_rate": 9.950762416712862e-06,
|
|
"loss": 2.366614580154419,
|
|
"step": 343
|
|
},
|
|
{
|
|
"epoch": 0.4236453201970443,
|
|
"grad_norm": 10.06521281831273,
|
|
"learning_rate": 9.949754136240416e-06,
|
|
"loss": 2.4502060413360596,
|
|
"step": 344
|
|
},
|
|
{
|
|
"epoch": 0.4248768472906404,
|
|
"grad_norm": 12.481723752818217,
|
|
"learning_rate": 9.948735688548496e-06,
|
|
"loss": 2.47091007232666,
|
|
"step": 345
|
|
},
|
|
{
|
|
"epoch": 0.42610837438423643,
|
|
"grad_norm": 8.973793469902368,
|
|
"learning_rate": 9.947707075729076e-06,
|
|
"loss": 3.0400021076202393,
|
|
"step": 346
|
|
},
|
|
{
|
|
"epoch": 0.4273399014778325,
|
|
"grad_norm": 10.331950331735893,
|
|
"learning_rate": 9.946668299895017e-06,
|
|
"loss": 2.622288227081299,
|
|
"step": 347
|
|
},
|
|
{
|
|
"epoch": 0.42857142857142855,
|
|
"grad_norm": 22.195871941281137,
|
|
"learning_rate": 9.945619363180054e-06,
|
|
"loss": 3.3773419857025146,
|
|
"step": 348
|
|
},
|
|
{
|
|
"epoch": 0.42980295566502463,
|
|
"grad_norm": 19.575310687428036,
|
|
"learning_rate": 9.944560267738792e-06,
|
|
"loss": 3.279005527496338,
|
|
"step": 349
|
|
},
|
|
{
|
|
"epoch": 0.43103448275862066,
|
|
"grad_norm": 11.204766296525598,
|
|
"learning_rate": 9.943491015746704e-06,
|
|
"loss": 2.8206255435943604,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.43226600985221675,
|
|
"grad_norm": 19.31443626404287,
|
|
"learning_rate": 9.942411609400127e-06,
|
|
"loss": 3.312700033187866,
|
|
"step": 351
|
|
},
|
|
{
|
|
"epoch": 0.43349753694581283,
|
|
"grad_norm": 12.40959825169754,
|
|
"learning_rate": 9.941322050916251e-06,
|
|
"loss": 2.580315113067627,
|
|
"step": 352
|
|
},
|
|
{
|
|
"epoch": 0.43472906403940886,
|
|
"grad_norm": 18.26867922192619,
|
|
"learning_rate": 9.940222342533126e-06,
|
|
"loss": 2.8339614868164062,
|
|
"step": 353
|
|
},
|
|
{
|
|
"epoch": 0.43596059113300495,
|
|
"grad_norm": 15.240586085653998,
|
|
"learning_rate": 9.939112486509644e-06,
|
|
"loss": 2.582752227783203,
|
|
"step": 354
|
|
},
|
|
{
|
|
"epoch": 0.437192118226601,
|
|
"grad_norm": 14.054810279727889,
|
|
"learning_rate": 9.937992485125547e-06,
|
|
"loss": 2.9355309009552,
|
|
"step": 355
|
|
},
|
|
{
|
|
"epoch": 0.43842364532019706,
|
|
"grad_norm": 7.204056413186231,
|
|
"learning_rate": 9.936862340681412e-06,
|
|
"loss": 2.796612024307251,
|
|
"step": 356
|
|
},
|
|
{
|
|
"epoch": 0.4396551724137931,
|
|
"grad_norm": 5.797127744814052,
|
|
"learning_rate": 9.935722055498655e-06,
|
|
"loss": 2.6307716369628906,
|
|
"step": 357
|
|
},
|
|
{
|
|
"epoch": 0.4408866995073892,
|
|
"grad_norm": 8.742348132173227,
|
|
"learning_rate": 9.934571631919518e-06,
|
|
"loss": 2.8603620529174805,
|
|
"step": 358
|
|
},
|
|
{
|
|
"epoch": 0.4421182266009852,
|
|
"grad_norm": 12.186262361276388,
|
|
"learning_rate": 9.933411072307071e-06,
|
|
"loss": 3.1397266387939453,
|
|
"step": 359
|
|
},
|
|
{
|
|
"epoch": 0.4433497536945813,
|
|
"grad_norm": 8.973047578523662,
|
|
"learning_rate": 9.9322403790452e-06,
|
|
"loss": 2.5362772941589355,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.4445812807881773,
|
|
"grad_norm": 17.982816499460725,
|
|
"learning_rate": 9.931059554538613e-06,
|
|
"loss": 2.7547712326049805,
|
|
"step": 361
|
|
},
|
|
{
|
|
"epoch": 0.4458128078817734,
|
|
"grad_norm": 15.389405107024809,
|
|
"learning_rate": 9.929868601212822e-06,
|
|
"loss": 3.144801139831543,
|
|
"step": 362
|
|
},
|
|
{
|
|
"epoch": 0.44704433497536944,
|
|
"grad_norm": 16.343273720769005,
|
|
"learning_rate": 9.928667521514149e-06,
|
|
"loss": 2.600550889968872,
|
|
"step": 363
|
|
},
|
|
{
|
|
"epoch": 0.4482758620689655,
|
|
"grad_norm": 11.532249256759682,
|
|
"learning_rate": 9.927456317909711e-06,
|
|
"loss": 2.176116704940796,
|
|
"step": 364
|
|
},
|
|
{
|
|
"epoch": 0.44950738916256155,
|
|
"grad_norm": 25.088404612293182,
|
|
"learning_rate": 9.92623499288743e-06,
|
|
"loss": 3.1918365955352783,
|
|
"step": 365
|
|
},
|
|
{
|
|
"epoch": 0.45073891625615764,
|
|
"grad_norm": 12.864077493891681,
|
|
"learning_rate": 9.92500354895601e-06,
|
|
"loss": 2.6937577724456787,
|
|
"step": 366
|
|
},
|
|
{
|
|
"epoch": 0.45197044334975367,
|
|
"grad_norm": 29.27990733585633,
|
|
"learning_rate": 9.92376198864494e-06,
|
|
"loss": 3.6490774154663086,
|
|
"step": 367
|
|
},
|
|
{
|
|
"epoch": 0.45320197044334976,
|
|
"grad_norm": 7.620954232577737,
|
|
"learning_rate": 9.922510314504493e-06,
|
|
"loss": 3.0342392921447754,
|
|
"step": 368
|
|
},
|
|
{
|
|
"epoch": 0.4544334975369458,
|
|
"grad_norm": 14.562498240608573,
|
|
"learning_rate": 9.921248529105716e-06,
|
|
"loss": 3.175008773803711,
|
|
"step": 369
|
|
},
|
|
{
|
|
"epoch": 0.45566502463054187,
|
|
"grad_norm": 9.096092875139751,
|
|
"learning_rate": 9.919976635040425e-06,
|
|
"loss": 1.9000710248947144,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.45689655172413796,
|
|
"grad_norm": 19.30965262540543,
|
|
"learning_rate": 9.918694634921195e-06,
|
|
"loss": 3.5248589515686035,
|
|
"step": 371
|
|
},
|
|
{
|
|
"epoch": 0.458128078817734,
|
|
"grad_norm": 10.529945298812061,
|
|
"learning_rate": 9.91740253138137e-06,
|
|
"loss": 2.869842529296875,
|
|
"step": 372
|
|
},
|
|
{
|
|
"epoch": 0.45935960591133007,
|
|
"grad_norm": 10.698638706211932,
|
|
"learning_rate": 9.916100327075038e-06,
|
|
"loss": 1.9380724430084229,
|
|
"step": 373
|
|
},
|
|
{
|
|
"epoch": 0.4605911330049261,
|
|
"grad_norm": 17.707591147238283,
|
|
"learning_rate": 9.914788024677039e-06,
|
|
"loss": 2.2112460136413574,
|
|
"step": 374
|
|
},
|
|
{
|
|
"epoch": 0.4618226600985222,
|
|
"grad_norm": 10.065846050311237,
|
|
"learning_rate": 9.913465626882954e-06,
|
|
"loss": 3.1283068656921387,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 0.4630541871921182,
|
|
"grad_norm": 25.33369677490011,
|
|
"learning_rate": 9.912133136409103e-06,
|
|
"loss": 2.692117929458618,
|
|
"step": 376
|
|
},
|
|
{
|
|
"epoch": 0.4642857142857143,
|
|
"grad_norm": 57.3231139544447,
|
|
"learning_rate": 9.910790555992536e-06,
|
|
"loss": 3.047241687774658,
|
|
"step": 377
|
|
},
|
|
{
|
|
"epoch": 0.46551724137931033,
|
|
"grad_norm": 11.840834448379393,
|
|
"learning_rate": 9.909437888391025e-06,
|
|
"loss": 3.0103232860565186,
|
|
"step": 378
|
|
},
|
|
{
|
|
"epoch": 0.4667487684729064,
|
|
"grad_norm": 15.056907160003684,
|
|
"learning_rate": 9.908075136383068e-06,
|
|
"loss": 2.8296966552734375,
|
|
"step": 379
|
|
},
|
|
{
|
|
"epoch": 0.46798029556650245,
|
|
"grad_norm": 8.534626696858023,
|
|
"learning_rate": 9.906702302767876e-06,
|
|
"loss": 2.818819999694824,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.46921182266009853,
|
|
"grad_norm": 29.849300222390532,
|
|
"learning_rate": 9.905319390365364e-06,
|
|
"loss": 3.6281867027282715,
|
|
"step": 381
|
|
},
|
|
{
|
|
"epoch": 0.47044334975369456,
|
|
"grad_norm": 17.161390821083423,
|
|
"learning_rate": 9.903926402016153e-06,
|
|
"loss": 2.7123236656188965,
|
|
"step": 382
|
|
},
|
|
{
|
|
"epoch": 0.47167487684729065,
|
|
"grad_norm": 13.097065098778378,
|
|
"learning_rate": 9.902523340581562e-06,
|
|
"loss": 2.69736909866333,
|
|
"step": 383
|
|
},
|
|
{
|
|
"epoch": 0.4729064039408867,
|
|
"grad_norm": 11.269340257234004,
|
|
"learning_rate": 9.901110208943599e-06,
|
|
"loss": 3.088184118270874,
|
|
"step": 384
|
|
},
|
|
{
|
|
"epoch": 0.47413793103448276,
|
|
"grad_norm": 6.6950707947616745,
|
|
"learning_rate": 9.899687010004956e-06,
|
|
"loss": 2.606736183166504,
|
|
"step": 385
|
|
},
|
|
{
|
|
"epoch": 0.4753694581280788,
|
|
"grad_norm": 10.297903581299613,
|
|
"learning_rate": 9.898253746689007e-06,
|
|
"loss": 2.684105157852173,
|
|
"step": 386
|
|
},
|
|
{
|
|
"epoch": 0.4766009852216749,
|
|
"grad_norm": 15.82478266058562,
|
|
"learning_rate": 9.896810421939797e-06,
|
|
"loss": 2.8739280700683594,
|
|
"step": 387
|
|
},
|
|
{
|
|
"epoch": 0.47783251231527096,
|
|
"grad_norm": 8.284309924074774,
|
|
"learning_rate": 9.895357038722043e-06,
|
|
"loss": 2.835542917251587,
|
|
"step": 388
|
|
},
|
|
{
|
|
"epoch": 0.479064039408867,
|
|
"grad_norm": 15.854123121769446,
|
|
"learning_rate": 9.893893600021112e-06,
|
|
"loss": 2.855287551879883,
|
|
"step": 389
|
|
},
|
|
{
|
|
"epoch": 0.4802955665024631,
|
|
"grad_norm": 7.88725535997062,
|
|
"learning_rate": 9.892420108843038e-06,
|
|
"loss": 2.8026838302612305,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.4815270935960591,
|
|
"grad_norm": 11.000709518913423,
|
|
"learning_rate": 9.890936568214493e-06,
|
|
"loss": 3.1150124073028564,
|
|
"step": 391
|
|
},
|
|
{
|
|
"epoch": 0.4827586206896552,
|
|
"grad_norm": 13.588584372243895,
|
|
"learning_rate": 9.889442981182802e-06,
|
|
"loss": 2.578108072280884,
|
|
"step": 392
|
|
},
|
|
{
|
|
"epoch": 0.4839901477832512,
|
|
"grad_norm": 16.34748858179715,
|
|
"learning_rate": 9.88793935081592e-06,
|
|
"loss": 2.7470006942749023,
|
|
"step": 393
|
|
},
|
|
{
|
|
"epoch": 0.4852216748768473,
|
|
"grad_norm": 10.809579161505546,
|
|
"learning_rate": 9.88642568020243e-06,
|
|
"loss": 2.9015283584594727,
|
|
"step": 394
|
|
},
|
|
{
|
|
"epoch": 0.48645320197044334,
|
|
"grad_norm": 13.55439142286002,
|
|
"learning_rate": 9.884901972451542e-06,
|
|
"loss": 3.79250431060791,
|
|
"step": 395
|
|
},
|
|
{
|
|
"epoch": 0.4876847290640394,
|
|
"grad_norm": 8.909988613184693,
|
|
"learning_rate": 9.883368230693082e-06,
|
|
"loss": 3.0748767852783203,
|
|
"step": 396
|
|
},
|
|
{
|
|
"epoch": 0.48891625615763545,
|
|
"grad_norm": 13.412610776910293,
|
|
"learning_rate": 9.881824458077491e-06,
|
|
"loss": 2.822726011276245,
|
|
"step": 397
|
|
},
|
|
{
|
|
"epoch": 0.49014778325123154,
|
|
"grad_norm": 11.426335338698937,
|
|
"learning_rate": 9.880270657775806e-06,
|
|
"loss": 2.7966151237487793,
|
|
"step": 398
|
|
},
|
|
{
|
|
"epoch": 0.49137931034482757,
|
|
"grad_norm": 10.55324948832395,
|
|
"learning_rate": 9.878706832979668e-06,
|
|
"loss": 2.8517651557922363,
|
|
"step": 399
|
|
},
|
|
{
|
|
"epoch": 0.49261083743842365,
|
|
"grad_norm": 11.070058186972197,
|
|
"learning_rate": 9.877132986901306e-06,
|
|
"loss": 2.7754080295562744,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.4938423645320197,
|
|
"grad_norm": 8.886322673700336,
|
|
"learning_rate": 9.875549122773536e-06,
|
|
"loss": 2.9478702545166016,
|
|
"step": 401
|
|
},
|
|
{
|
|
"epoch": 0.49507389162561577,
|
|
"grad_norm": 9.759021404672636,
|
|
"learning_rate": 9.87395524384975e-06,
|
|
"loss": 2.9535412788391113,
|
|
"step": 402
|
|
},
|
|
{
|
|
"epoch": 0.4963054187192118,
|
|
"grad_norm": 22.265516010081125,
|
|
"learning_rate": 9.872351353403912e-06,
|
|
"loss": 3.415161609649658,
|
|
"step": 403
|
|
},
|
|
{
|
|
"epoch": 0.4975369458128079,
|
|
"grad_norm": 10.3371436402533,
|
|
"learning_rate": 9.870737454730552e-06,
|
|
"loss": 2.573082447052002,
|
|
"step": 404
|
|
},
|
|
{
|
|
"epoch": 0.4987684729064039,
|
|
"grad_norm": 14.615736501967937,
|
|
"learning_rate": 9.869113551144754e-06,
|
|
"loss": 2.4743850231170654,
|
|
"step": 405
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"grad_norm": 10.275697391044838,
|
|
"learning_rate": 9.867479645982158e-06,
|
|
"loss": 2.6644279956817627,
|
|
"step": 406
|
|
},
|
|
{
|
|
"epoch": 0.5012315270935961,
|
|
"grad_norm": 7.731558128938727,
|
|
"learning_rate": 9.865835742598942e-06,
|
|
"loss": 2.7798032760620117,
|
|
"step": 407
|
|
},
|
|
{
|
|
"epoch": 0.5024630541871922,
|
|
"grad_norm": 28.59542346400597,
|
|
"learning_rate": 9.864181844371828e-06,
|
|
"loss": 3.939884662628174,
|
|
"step": 408
|
|
},
|
|
{
|
|
"epoch": 0.5036945812807881,
|
|
"grad_norm": 21.07739414791098,
|
|
"learning_rate": 9.86251795469806e-06,
|
|
"loss": 2.8093104362487793,
|
|
"step": 409
|
|
},
|
|
{
|
|
"epoch": 0.5049261083743842,
|
|
"grad_norm": 8.961555424981583,
|
|
"learning_rate": 9.860844076995416e-06,
|
|
"loss": 2.1494715213775635,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.5061576354679803,
|
|
"grad_norm": 21.200756727942377,
|
|
"learning_rate": 9.85916021470218e-06,
|
|
"loss": 2.964136838912964,
|
|
"step": 411
|
|
},
|
|
{
|
|
"epoch": 0.5073891625615764,
|
|
"grad_norm": 11.020672835034468,
|
|
"learning_rate": 9.857466371277152e-06,
|
|
"loss": 2.641287088394165,
|
|
"step": 412
|
|
},
|
|
{
|
|
"epoch": 0.5086206896551724,
|
|
"grad_norm": 9.8391871787113,
|
|
"learning_rate": 9.85576255019963e-06,
|
|
"loss": 2.454512357711792,
|
|
"step": 413
|
|
},
|
|
{
|
|
"epoch": 0.5098522167487685,
|
|
"grad_norm": 9.302782088404763,
|
|
"learning_rate": 9.85404875496941e-06,
|
|
"loss": 2.4566071033477783,
|
|
"step": 414
|
|
},
|
|
{
|
|
"epoch": 0.5110837438423645,
|
|
"grad_norm": 12.209048739605382,
|
|
"learning_rate": 9.852324989106772e-06,
|
|
"loss": 2.7254204750061035,
|
|
"step": 415
|
|
},
|
|
{
|
|
"epoch": 0.5123152709359606,
|
|
"grad_norm": 17.193015982984093,
|
|
"learning_rate": 9.850591256152483e-06,
|
|
"loss": 2.743382215499878,
|
|
"step": 416
|
|
},
|
|
{
|
|
"epoch": 0.5135467980295566,
|
|
"grad_norm": 31.54989094640885,
|
|
"learning_rate": 9.848847559667774e-06,
|
|
"loss": 3.376046657562256,
|
|
"step": 417
|
|
},
|
|
{
|
|
"epoch": 0.5147783251231527,
|
|
"grad_norm": 11.734812553622533,
|
|
"learning_rate": 9.847093903234351e-06,
|
|
"loss": 2.73980975151062,
|
|
"step": 418
|
|
},
|
|
{
|
|
"epoch": 0.5160098522167488,
|
|
"grad_norm": 8.164256099521083,
|
|
"learning_rate": 9.845330290454373e-06,
|
|
"loss": 2.7565903663635254,
|
|
"step": 419
|
|
},
|
|
{
|
|
"epoch": 0.5172413793103449,
|
|
"grad_norm": 9.178438912949575,
|
|
"learning_rate": 9.843556724950454e-06,
|
|
"loss": 2.9061315059661865,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.5184729064039408,
|
|
"grad_norm": 18.23493245534027,
|
|
"learning_rate": 9.841773210365646e-06,
|
|
"loss": 3.1584839820861816,
|
|
"step": 421
|
|
},
|
|
{
|
|
"epoch": 0.5197044334975369,
|
|
"grad_norm": 13.406138718704618,
|
|
"learning_rate": 9.839979750363443e-06,
|
|
"loss": 3.300762176513672,
|
|
"step": 422
|
|
},
|
|
{
|
|
"epoch": 0.520935960591133,
|
|
"grad_norm": 16.907140017416133,
|
|
"learning_rate": 9.838176348627768e-06,
|
|
"loss": 2.5202269554138184,
|
|
"step": 423
|
|
},
|
|
{
|
|
"epoch": 0.5221674876847291,
|
|
"grad_norm": 14.800436222535966,
|
|
"learning_rate": 9.83636300886296e-06,
|
|
"loss": 3.9240634441375732,
|
|
"step": 424
|
|
},
|
|
{
|
|
"epoch": 0.5233990147783252,
|
|
"grad_norm": 13.058319822050642,
|
|
"learning_rate": 9.834539734793774e-06,
|
|
"loss": 3.1783556938171387,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 0.5246305418719212,
|
|
"grad_norm": 9.577210971277129,
|
|
"learning_rate": 9.832706530165372e-06,
|
|
"loss": 2.787106513977051,
|
|
"step": 426
|
|
},
|
|
{
|
|
"epoch": 0.5258620689655172,
|
|
"grad_norm": 17.432663310497652,
|
|
"learning_rate": 9.830863398743313e-06,
|
|
"loss": 3.270280599594116,
|
|
"step": 427
|
|
},
|
|
{
|
|
"epoch": 0.5270935960591133,
|
|
"grad_norm": 13.065514198679326,
|
|
"learning_rate": 9.829010344313548e-06,
|
|
"loss": 3.0135059356689453,
|
|
"step": 428
|
|
},
|
|
{
|
|
"epoch": 0.5283251231527094,
|
|
"grad_norm": 12.9248393025633,
|
|
"learning_rate": 9.82714737068241e-06,
|
|
"loss": 2.989795207977295,
|
|
"step": 429
|
|
},
|
|
{
|
|
"epoch": 0.5295566502463054,
|
|
"grad_norm": 15.64315185844485,
|
|
"learning_rate": 9.825274481676605e-06,
|
|
"loss": 2.5208187103271484,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.5307881773399015,
|
|
"grad_norm": 11.452591471364267,
|
|
"learning_rate": 9.82339168114321e-06,
|
|
"loss": 3.1890928745269775,
|
|
"step": 431
|
|
},
|
|
{
|
|
"epoch": 0.5320197044334976,
|
|
"grad_norm": 11.650610381993676,
|
|
"learning_rate": 9.821498972949657e-06,
|
|
"loss": 3.0655789375305176,
|
|
"step": 432
|
|
},
|
|
{
|
|
"epoch": 0.5332512315270936,
|
|
"grad_norm": 7.7840344730355335,
|
|
"learning_rate": 9.81959636098373e-06,
|
|
"loss": 2.611284017562866,
|
|
"step": 433
|
|
},
|
|
{
|
|
"epoch": 0.5344827586206896,
|
|
"grad_norm": 8.93478095027874,
|
|
"learning_rate": 9.817683849153561e-06,
|
|
"loss": 2.863576889038086,
|
|
"step": 434
|
|
},
|
|
{
|
|
"epoch": 0.5357142857142857,
|
|
"grad_norm": 10.52062689285789,
|
|
"learning_rate": 9.815761441387609e-06,
|
|
"loss": 2.6186623573303223,
|
|
"step": 435
|
|
},
|
|
{
|
|
"epoch": 0.5369458128078818,
|
|
"grad_norm": 6.68274047677578,
|
|
"learning_rate": 9.813829141634666e-06,
|
|
"loss": 1.3848458528518677,
|
|
"step": 436
|
|
},
|
|
{
|
|
"epoch": 0.5381773399014779,
|
|
"grad_norm": 9.593848866659638,
|
|
"learning_rate": 9.811886953863841e-06,
|
|
"loss": 3.00791597366333,
|
|
"step": 437
|
|
},
|
|
{
|
|
"epoch": 0.5394088669950738,
|
|
"grad_norm": 7.8032629730941565,
|
|
"learning_rate": 9.809934882064555e-06,
|
|
"loss": 2.8431854248046875,
|
|
"step": 438
|
|
},
|
|
{
|
|
"epoch": 0.5406403940886699,
|
|
"grad_norm": 10.324361743530943,
|
|
"learning_rate": 9.807972930246531e-06,
|
|
"loss": 2.3595449924468994,
|
|
"step": 439
|
|
},
|
|
{
|
|
"epoch": 0.541871921182266,
|
|
"grad_norm": 15.306323140698186,
|
|
"learning_rate": 9.806001102439789e-06,
|
|
"loss": 2.55434250831604,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.5431034482758621,
|
|
"grad_norm": 23.37582741202724,
|
|
"learning_rate": 9.804019402694627e-06,
|
|
"loss": 2.4509990215301514,
|
|
"step": 441
|
|
},
|
|
{
|
|
"epoch": 0.5443349753694581,
|
|
"grad_norm": 9.38267743442567,
|
|
"learning_rate": 9.802027835081628e-06,
|
|
"loss": 2.825401782989502,
|
|
"step": 442
|
|
},
|
|
{
|
|
"epoch": 0.5455665024630542,
|
|
"grad_norm": 10.449224530160473,
|
|
"learning_rate": 9.800026403691643e-06,
|
|
"loss": 2.7315573692321777,
|
|
"step": 443
|
|
},
|
|
{
|
|
"epoch": 0.5467980295566502,
|
|
"grad_norm": 22.900410887080454,
|
|
"learning_rate": 9.798015112635786e-06,
|
|
"loss": 3.1359333992004395,
|
|
"step": 444
|
|
},
|
|
{
|
|
"epoch": 0.5480295566502463,
|
|
"grad_norm": 9.839888483337905,
|
|
"learning_rate": 9.795993966045418e-06,
|
|
"loss": 3.2884740829467773,
|
|
"step": 445
|
|
},
|
|
{
|
|
"epoch": 0.5492610837438424,
|
|
"grad_norm": 9.35231433219537,
|
|
"learning_rate": 9.793962968072149e-06,
|
|
"loss": 2.8281359672546387,
|
|
"step": 446
|
|
},
|
|
{
|
|
"epoch": 0.5504926108374384,
|
|
"grad_norm": 6.698793862232108,
|
|
"learning_rate": 9.791922122887823e-06,
|
|
"loss": 2.633974313735962,
|
|
"step": 447
|
|
},
|
|
{
|
|
"epoch": 0.5517241379310345,
|
|
"grad_norm": 8.317360049933578,
|
|
"learning_rate": 9.78987143468451e-06,
|
|
"loss": 2.1651690006256104,
|
|
"step": 448
|
|
},
|
|
{
|
|
"epoch": 0.5529556650246306,
|
|
"grad_norm": 11.511312923842238,
|
|
"learning_rate": 9.7878109076745e-06,
|
|
"loss": 3.011908531188965,
|
|
"step": 449
|
|
},
|
|
{
|
|
"epoch": 0.5541871921182266,
|
|
"grad_norm": 15.627130212627556,
|
|
"learning_rate": 9.785740546090293e-06,
|
|
"loss": 3.121683359146118,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.5554187192118226,
|
|
"grad_norm": 14.263261857694998,
|
|
"learning_rate": 9.783660354184589e-06,
|
|
"loss": 2.9901375770568848,
|
|
"step": 451
|
|
},
|
|
{
|
|
"epoch": 0.5566502463054187,
|
|
"grad_norm": 15.230602091833177,
|
|
"learning_rate": 9.78157033623028e-06,
|
|
"loss": 3.1121528148651123,
|
|
"step": 452
|
|
},
|
|
{
|
|
"epoch": 0.5578817733990148,
|
|
"grad_norm": 22.32110731618789,
|
|
"learning_rate": 9.779470496520442e-06,
|
|
"loss": 2.9811508655548096,
|
|
"step": 453
|
|
},
|
|
{
|
|
"epoch": 0.5591133004926109,
|
|
"grad_norm": 11.801131103021726,
|
|
"learning_rate": 9.777360839368327e-06,
|
|
"loss": 2.8219947814941406,
|
|
"step": 454
|
|
},
|
|
{
|
|
"epoch": 0.5603448275862069,
|
|
"grad_norm": 10.166506753796495,
|
|
"learning_rate": 9.77524136910735e-06,
|
|
"loss": 2.870987892150879,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 0.5615763546798029,
|
|
"grad_norm": 9.413959781223877,
|
|
"learning_rate": 9.773112090091084e-06,
|
|
"loss": 3.1902365684509277,
|
|
"step": 456
|
|
},
|
|
{
|
|
"epoch": 0.562807881773399,
|
|
"grad_norm": 12.723571043561764,
|
|
"learning_rate": 9.770973006693256e-06,
|
|
"loss": 3.3052220344543457,
|
|
"step": 457
|
|
},
|
|
{
|
|
"epoch": 0.5640394088669951,
|
|
"grad_norm": 14.337077670753716,
|
|
"learning_rate": 9.76882412330772e-06,
|
|
"loss": 2.3376049995422363,
|
|
"step": 458
|
|
},
|
|
{
|
|
"epoch": 0.5652709359605911,
|
|
"grad_norm": 10.245935627064924,
|
|
"learning_rate": 9.766665444348472e-06,
|
|
"loss": 2.8364970684051514,
|
|
"step": 459
|
|
},
|
|
{
|
|
"epoch": 0.5665024630541872,
|
|
"grad_norm": 18.308636912090915,
|
|
"learning_rate": 9.76449697424962e-06,
|
|
"loss": 2.582505702972412,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.5677339901477833,
|
|
"grad_norm": 8.927255205757533,
|
|
"learning_rate": 9.76231871746539e-06,
|
|
"loss": 2.485147476196289,
|
|
"step": 461
|
|
},
|
|
{
|
|
"epoch": 0.5689655172413793,
|
|
"grad_norm": 11.356171958036413,
|
|
"learning_rate": 9.760130678470106e-06,
|
|
"loss": 3.0910027027130127,
|
|
"step": 462
|
|
},
|
|
{
|
|
"epoch": 0.5701970443349754,
|
|
"grad_norm": 10.937354765360512,
|
|
"learning_rate": 9.757932861758188e-06,
|
|
"loss": 3.3621506690979004,
|
|
"step": 463
|
|
},
|
|
{
|
|
"epoch": 0.5714285714285714,
|
|
"grad_norm": 11.222097055926637,
|
|
"learning_rate": 9.755725271844142e-06,
|
|
"loss": 2.8310019969940186,
|
|
"step": 464
|
|
},
|
|
{
|
|
"epoch": 0.5726600985221675,
|
|
"grad_norm": 30.527175863167063,
|
|
"learning_rate": 9.753507913262548e-06,
|
|
"loss": 2.797703742980957,
|
|
"step": 465
|
|
},
|
|
{
|
|
"epoch": 0.5738916256157636,
|
|
"grad_norm": 15.045285480872131,
|
|
"learning_rate": 9.751280790568047e-06,
|
|
"loss": 2.6609878540039062,
|
|
"step": 466
|
|
},
|
|
{
|
|
"epoch": 0.5751231527093597,
|
|
"grad_norm": 13.871081363987201,
|
|
"learning_rate": 9.749043908335343e-06,
|
|
"loss": 2.778043508529663,
|
|
"step": 467
|
|
},
|
|
{
|
|
"epoch": 0.5763546798029556,
|
|
"grad_norm": 13.771545893500338,
|
|
"learning_rate": 9.74679727115918e-06,
|
|
"loss": 2.8315014839172363,
|
|
"step": 468
|
|
},
|
|
{
|
|
"epoch": 0.5775862068965517,
|
|
"grad_norm": 19.916341772532764,
|
|
"learning_rate": 9.744540883654348e-06,
|
|
"loss": 3.3902840614318848,
|
|
"step": 469
|
|
},
|
|
{
|
|
"epoch": 0.5788177339901478,
|
|
"grad_norm": 22.648986055714484,
|
|
"learning_rate": 9.742274750455659e-06,
|
|
"loss": 3.53080153465271,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.5800492610837439,
|
|
"grad_norm": 23.493391135041467,
|
|
"learning_rate": 9.739998876217943e-06,
|
|
"loss": 2.270110845565796,
|
|
"step": 471
|
|
},
|
|
{
|
|
"epoch": 0.5812807881773399,
|
|
"grad_norm": 12.049204240060057,
|
|
"learning_rate": 9.737713265616043e-06,
|
|
"loss": 2.7059872150421143,
|
|
"step": 472
|
|
},
|
|
{
|
|
"epoch": 0.5825123152709359,
|
|
"grad_norm": 20.2953123538445,
|
|
"learning_rate": 9.735417923344798e-06,
|
|
"loss": 4.328514575958252,
|
|
"step": 473
|
|
},
|
|
{
|
|
"epoch": 0.583743842364532,
|
|
"grad_norm": 14.790979425207205,
|
|
"learning_rate": 9.73311285411904e-06,
|
|
"loss": 3.2155938148498535,
|
|
"step": 474
|
|
},
|
|
{
|
|
"epoch": 0.5849753694581281,
|
|
"grad_norm": 35.79655633932577,
|
|
"learning_rate": 9.730798062673575e-06,
|
|
"loss": 2.277022361755371,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 0.5862068965517241,
|
|
"grad_norm": 10.760493401180613,
|
|
"learning_rate": 9.728473553763186e-06,
|
|
"loss": 2.794111490249634,
|
|
"step": 476
|
|
},
|
|
{
|
|
"epoch": 0.5874384236453202,
|
|
"grad_norm": 7.877057642797786,
|
|
"learning_rate": 9.726139332162613e-06,
|
|
"loss": 3.00388765335083,
|
|
"step": 477
|
|
},
|
|
{
|
|
"epoch": 0.5886699507389163,
|
|
"grad_norm": 10.974644270731439,
|
|
"learning_rate": 9.723795402666549e-06,
|
|
"loss": 2.5355563163757324,
|
|
"step": 478
|
|
},
|
|
{
|
|
"epoch": 0.5899014778325123,
|
|
"grad_norm": 22.285874447386394,
|
|
"learning_rate": 9.721441770089621e-06,
|
|
"loss": 3.2441415786743164,
|
|
"step": 479
|
|
},
|
|
{
|
|
"epoch": 0.5911330049261084,
|
|
"grad_norm": 13.333764613863938,
|
|
"learning_rate": 9.719078439266399e-06,
|
|
"loss": 2.826803207397461,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.5923645320197044,
|
|
"grad_norm": 6.843940415955184,
|
|
"learning_rate": 9.716705415051362e-06,
|
|
"loss": 2.5396804809570312,
|
|
"step": 481
|
|
},
|
|
{
|
|
"epoch": 0.5935960591133005,
|
|
"grad_norm": 23.860174795633608,
|
|
"learning_rate": 9.714322702318908e-06,
|
|
"loss": 2.85546875,
|
|
"step": 482
|
|
},
|
|
{
|
|
"epoch": 0.5948275862068966,
|
|
"grad_norm": 12.255473790019064,
|
|
"learning_rate": 9.711930305963333e-06,
|
|
"loss": 3.217014789581299,
|
|
"step": 483
|
|
},
|
|
{
|
|
"epoch": 0.5960591133004927,
|
|
"grad_norm": 8.15967079186392,
|
|
"learning_rate": 9.70952823089882e-06,
|
|
"loss": 2.781094551086426,
|
|
"step": 484
|
|
},
|
|
{
|
|
"epoch": 0.5972906403940886,
|
|
"grad_norm": 11.942750739396006,
|
|
"learning_rate": 9.707116482059447e-06,
|
|
"loss": 2.617154121398926,
|
|
"step": 485
|
|
},
|
|
{
|
|
"epoch": 0.5985221674876847,
|
|
"grad_norm": 15.243819163950327,
|
|
"learning_rate": 9.704695064399143e-06,
|
|
"loss": 2.601886510848999,
|
|
"step": 486
|
|
},
|
|
{
|
|
"epoch": 0.5997536945812808,
|
|
"grad_norm": 27.321867153996244,
|
|
"learning_rate": 9.702263982891712e-06,
|
|
"loss": 2.9616146087646484,
|
|
"step": 487
|
|
},
|
|
{
|
|
"epoch": 0.6009852216748769,
|
|
"grad_norm": 9.511966390540264,
|
|
"learning_rate": 9.699823242530803e-06,
|
|
"loss": 2.8881943225860596,
|
|
"step": 488
|
|
},
|
|
{
|
|
"epoch": 0.6022167487684729,
|
|
"grad_norm": 9.673073669047454,
|
|
"learning_rate": 9.697372848329905e-06,
|
|
"loss": 2.6718311309814453,
|
|
"step": 489
|
|
},
|
|
{
|
|
"epoch": 0.603448275862069,
|
|
"grad_norm": 12.946431548834504,
|
|
"learning_rate": 9.69491280532234e-06,
|
|
"loss": 2.959104537963867,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.604679802955665,
|
|
"grad_norm": 13.919071872066077,
|
|
"learning_rate": 9.692443118561248e-06,
|
|
"loss": 2.085991621017456,
|
|
"step": 491
|
|
},
|
|
{
|
|
"epoch": 0.6059113300492611,
|
|
"grad_norm": 168.7126461149896,
|
|
"learning_rate": 9.689963793119574e-06,
|
|
"loss": 4.498569488525391,
|
|
"step": 492
|
|
},
|
|
{
|
|
"epoch": 0.6071428571428571,
|
|
"grad_norm": 12.118400731206464,
|
|
"learning_rate": 9.68747483409007e-06,
|
|
"loss": 2.7837424278259277,
|
|
"step": 493
|
|
},
|
|
{
|
|
"epoch": 0.6083743842364532,
|
|
"grad_norm": 14.436749099341482,
|
|
"learning_rate": 9.684976246585264e-06,
|
|
"loss": 2.637524366378784,
|
|
"step": 494
|
|
},
|
|
{
|
|
"epoch": 0.6096059113300493,
|
|
"grad_norm": 12.923969042105849,
|
|
"learning_rate": 9.682468035737475e-06,
|
|
"loss": 2.765727996826172,
|
|
"step": 495
|
|
},
|
|
{
|
|
"epoch": 0.6108374384236454,
|
|
"grad_norm": 12.957696638033102,
|
|
"learning_rate": 9.679950206698782e-06,
|
|
"loss": 2.825129270553589,
|
|
"step": 496
|
|
},
|
|
{
|
|
"epoch": 0.6120689655172413,
|
|
"grad_norm": 12.328586386653942,
|
|
"learning_rate": 9.677422764641021e-06,
|
|
"loss": 2.733224630355835,
|
|
"step": 497
|
|
},
|
|
{
|
|
"epoch": 0.6133004926108374,
|
|
"grad_norm": 10.367355913707218,
|
|
"learning_rate": 9.674885714755773e-06,
|
|
"loss": 3.6287670135498047,
|
|
"step": 498
|
|
},
|
|
{
|
|
"epoch": 0.6145320197044335,
|
|
"grad_norm": 8.212604152981882,
|
|
"learning_rate": 9.672339062254359e-06,
|
|
"loss": 2.38788104057312,
|
|
"step": 499
|
|
},
|
|
{
|
|
"epoch": 0.6157635467980296,
|
|
"grad_norm": 13.545719741820621,
|
|
"learning_rate": 9.66978281236782e-06,
|
|
"loss": 2.942269802093506,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.6169950738916257,
|
|
"grad_norm": 12.748449735511594,
|
|
"learning_rate": 9.667216970346916e-06,
|
|
"loss": 2.4100990295410156,
|
|
"step": 501
|
|
},
|
|
{
|
|
"epoch": 0.6182266009852216,
|
|
"grad_norm": 15.669540249604715,
|
|
"learning_rate": 9.6646415414621e-06,
|
|
"loss": 2.3959155082702637,
|
|
"step": 502
|
|
},
|
|
{
|
|
"epoch": 0.6194581280788177,
|
|
"grad_norm": 7.949797631449559,
|
|
"learning_rate": 9.662056531003528e-06,
|
|
"loss": 2.93027925491333,
|
|
"step": 503
|
|
},
|
|
{
|
|
"epoch": 0.6206896551724138,
|
|
"grad_norm": 10.116460165226645,
|
|
"learning_rate": 9.659461944281035e-06,
|
|
"loss": 3.164715528488159,
|
|
"step": 504
|
|
},
|
|
{
|
|
"epoch": 0.6219211822660099,
|
|
"grad_norm": 16.218136964088803,
|
|
"learning_rate": 9.656857786624119e-06,
|
|
"loss": 2.634587287902832,
|
|
"step": 505
|
|
},
|
|
{
|
|
"epoch": 0.6231527093596059,
|
|
"grad_norm": 10.922060482445831,
|
|
"learning_rate": 9.654244063381948e-06,
|
|
"loss": 3.5667788982391357,
|
|
"step": 506
|
|
},
|
|
{
|
|
"epoch": 0.624384236453202,
|
|
"grad_norm": 8.542161812174806,
|
|
"learning_rate": 9.651620779923332e-06,
|
|
"loss": 2.9383740425109863,
|
|
"step": 507
|
|
},
|
|
{
|
|
"epoch": 0.625615763546798,
|
|
"grad_norm": 10.347829866523263,
|
|
"learning_rate": 9.648987941636719e-06,
|
|
"loss": 2.7658987045288086,
|
|
"step": 508
|
|
},
|
|
{
|
|
"epoch": 0.6268472906403941,
|
|
"grad_norm": 8.548905747003822,
|
|
"learning_rate": 9.646345553930187e-06,
|
|
"loss": 3.3089890480041504,
|
|
"step": 509
|
|
},
|
|
{
|
|
"epoch": 0.6280788177339901,
|
|
"grad_norm": 6.487031716645425,
|
|
"learning_rate": 9.643693622231426e-06,
|
|
"loss": 2.6208066940307617,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.6293103448275862,
|
|
"grad_norm": 8.110412464341984,
|
|
"learning_rate": 9.64103215198773e-06,
|
|
"loss": 2.7099995613098145,
|
|
"step": 511
|
|
},
|
|
{
|
|
"epoch": 0.6305418719211823,
|
|
"grad_norm": 14.245396567085763,
|
|
"learning_rate": 9.638361148665989e-06,
|
|
"loss": 2.894531488418579,
|
|
"step": 512
|
|
},
|
|
{
|
|
"epoch": 0.6317733990147784,
|
|
"grad_norm": 11.657856176430656,
|
|
"learning_rate": 9.63568061775267e-06,
|
|
"loss": 3.1289191246032715,
|
|
"step": 513
|
|
},
|
|
{
|
|
"epoch": 0.6330049261083743,
|
|
"grad_norm": 14.82098703249081,
|
|
"learning_rate": 9.632990564753817e-06,
|
|
"loss": 2.954707145690918,
|
|
"step": 514
|
|
},
|
|
{
|
|
"epoch": 0.6342364532019704,
|
|
"grad_norm": 6.808305322372754,
|
|
"learning_rate": 9.630290995195028e-06,
|
|
"loss": 2.93411922454834,
|
|
"step": 515
|
|
},
|
|
{
|
|
"epoch": 0.6354679802955665,
|
|
"grad_norm": 7.276364027378903,
|
|
"learning_rate": 9.62758191462145e-06,
|
|
"loss": 2.637021541595459,
|
|
"step": 516
|
|
},
|
|
{
|
|
"epoch": 0.6366995073891626,
|
|
"grad_norm": 13.898029887698447,
|
|
"learning_rate": 9.624863328597767e-06,
|
|
"loss": 3.020066261291504,
|
|
"step": 517
|
|
},
|
|
{
|
|
"epoch": 0.6379310344827587,
|
|
"grad_norm": 24.08793299798331,
|
|
"learning_rate": 9.622135242708188e-06,
|
|
"loss": 2.5983335971832275,
|
|
"step": 518
|
|
},
|
|
{
|
|
"epoch": 0.6391625615763546,
|
|
"grad_norm": 13.609628946959008,
|
|
"learning_rate": 9.619397662556434e-06,
|
|
"loss": 2.714207410812378,
|
|
"step": 519
|
|
},
|
|
{
|
|
"epoch": 0.6403940886699507,
|
|
"grad_norm": 8.67874834351866,
|
|
"learning_rate": 9.616650593765733e-06,
|
|
"loss": 2.8505520820617676,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.6416256157635468,
|
|
"grad_norm": 8.300798802306481,
|
|
"learning_rate": 9.613894041978795e-06,
|
|
"loss": 2.8081271648406982,
|
|
"step": 521
|
|
},
|
|
{
|
|
"epoch": 0.6428571428571429,
|
|
"grad_norm": 10.020203888067801,
|
|
"learning_rate": 9.611128012857818e-06,
|
|
"loss": 3.106411933898926,
|
|
"step": 522
|
|
},
|
|
{
|
|
"epoch": 0.6440886699507389,
|
|
"grad_norm": 9.32846194404547,
|
|
"learning_rate": 9.60835251208446e-06,
|
|
"loss": 3.087594985961914,
|
|
"step": 523
|
|
},
|
|
{
|
|
"epoch": 0.645320197044335,
|
|
"grad_norm": 15.30312860694116,
|
|
"learning_rate": 9.60556754535984e-06,
|
|
"loss": 2.7104361057281494,
|
|
"step": 524
|
|
},
|
|
{
|
|
"epoch": 0.646551724137931,
|
|
"grad_norm": 14.847900307580543,
|
|
"learning_rate": 9.602773118404518e-06,
|
|
"loss": 2.8562324047088623,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 0.6477832512315271,
|
|
"grad_norm": 8.874728218475076,
|
|
"learning_rate": 9.599969236958485e-06,
|
|
"loss": 3.282554864883423,
|
|
"step": 526
|
|
},
|
|
{
|
|
"epoch": 0.6490147783251231,
|
|
"grad_norm": 8.797844640723032,
|
|
"learning_rate": 9.597155906781154e-06,
|
|
"loss": 2.623101234436035,
|
|
"step": 527
|
|
},
|
|
{
|
|
"epoch": 0.6502463054187192,
|
|
"grad_norm": 9.863712955626877,
|
|
"learning_rate": 9.59433313365135e-06,
|
|
"loss": 2.889674663543701,
|
|
"step": 528
|
|
},
|
|
{
|
|
"epoch": 0.6514778325123153,
|
|
"grad_norm": 10.895399946836921,
|
|
"learning_rate": 9.591500923367287e-06,
|
|
"loss": 2.787289619445801,
|
|
"step": 529
|
|
},
|
|
{
|
|
"epoch": 0.6527093596059114,
|
|
"grad_norm": 10.227588231836696,
|
|
"learning_rate": 9.58865928174657e-06,
|
|
"loss": 2.879824161529541,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.6539408866995073,
|
|
"grad_norm": 8.869590002729453,
|
|
"learning_rate": 9.585808214626173e-06,
|
|
"loss": 2.967193126678467,
|
|
"step": 531
|
|
},
|
|
{
|
|
"epoch": 0.6551724137931034,
|
|
"grad_norm": 8.822784237769133,
|
|
"learning_rate": 9.582947727862433e-06,
|
|
"loss": 3.1004772186279297,
|
|
"step": 532
|
|
},
|
|
{
|
|
"epoch": 0.6564039408866995,
|
|
"grad_norm": 13.346747444504954,
|
|
"learning_rate": 9.580077827331038e-06,
|
|
"loss": 2.69935941696167,
|
|
"step": 533
|
|
},
|
|
{
|
|
"epoch": 0.6576354679802956,
|
|
"grad_norm": 13.781647523739567,
|
|
"learning_rate": 9.577198518927005e-06,
|
|
"loss": 3.2806637287139893,
|
|
"step": 534
|
|
},
|
|
{
|
|
"epoch": 0.6588669950738916,
|
|
"grad_norm": 17.336818625260154,
|
|
"learning_rate": 9.574309808564682e-06,
|
|
"loss": 3.050356149673462,
|
|
"step": 535
|
|
},
|
|
{
|
|
"epoch": 0.6600985221674877,
|
|
"grad_norm": 9.311777076008125,
|
|
"learning_rate": 9.57141170217773e-06,
|
|
"loss": 2.8415322303771973,
|
|
"step": 536
|
|
},
|
|
{
|
|
"epoch": 0.6613300492610837,
|
|
"grad_norm": 12.410317292425518,
|
|
"learning_rate": 9.568504205719106e-06,
|
|
"loss": 2.5309085845947266,
|
|
"step": 537
|
|
},
|
|
{
|
|
"epoch": 0.6625615763546798,
|
|
"grad_norm": 15.225443304522335,
|
|
"learning_rate": 9.565587325161056e-06,
|
|
"loss": 3.5695877075195312,
|
|
"step": 538
|
|
},
|
|
{
|
|
"epoch": 0.6637931034482759,
|
|
"grad_norm": 9.562550097283651,
|
|
"learning_rate": 9.562661066495108e-06,
|
|
"loss": 2.7938594818115234,
|
|
"step": 539
|
|
},
|
|
{
|
|
"epoch": 0.6650246305418719,
|
|
"grad_norm": 8.825138850911314,
|
|
"learning_rate": 9.559725435732042e-06,
|
|
"loss": 2.8548948764801025,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.666256157635468,
|
|
"grad_norm": 10.262300101456184,
|
|
"learning_rate": 9.556780438901899e-06,
|
|
"loss": 3.054051399230957,
|
|
"step": 541
|
|
},
|
|
{
|
|
"epoch": 0.6674876847290641,
|
|
"grad_norm": 26.545357662435233,
|
|
"learning_rate": 9.553826082053951e-06,
|
|
"loss": 3.566359281539917,
|
|
"step": 542
|
|
},
|
|
{
|
|
"epoch": 0.6687192118226601,
|
|
"grad_norm": 12.751257760928588,
|
|
"learning_rate": 9.550862371256705e-06,
|
|
"loss": 2.8619909286499023,
|
|
"step": 543
|
|
},
|
|
{
|
|
"epoch": 0.6699507389162561,
|
|
"grad_norm": 14.522375958962538,
|
|
"learning_rate": 9.547889312597877e-06,
|
|
"loss": 3.0177836418151855,
|
|
"step": 544
|
|
},
|
|
{
|
|
"epoch": 0.6711822660098522,
|
|
"grad_norm": 21.356139863129055,
|
|
"learning_rate": 9.544906912184383e-06,
|
|
"loss": 1.9943304061889648,
|
|
"step": 545
|
|
},
|
|
{
|
|
"epoch": 0.6724137931034483,
|
|
"grad_norm": 5.562548029921876,
|
|
"learning_rate": 9.541915176142326e-06,
|
|
"loss": 2.650038957595825,
|
|
"step": 546
|
|
},
|
|
{
|
|
"epoch": 0.6736453201970444,
|
|
"grad_norm": 12.716408540810125,
|
|
"learning_rate": 9.538914110616995e-06,
|
|
"loss": 2.826953411102295,
|
|
"step": 547
|
|
},
|
|
{
|
|
"epoch": 0.6748768472906403,
|
|
"grad_norm": 9.963475586190201,
|
|
"learning_rate": 9.53590372177283e-06,
|
|
"loss": 2.770202159881592,
|
|
"step": 548
|
|
},
|
|
{
|
|
"epoch": 0.6761083743842364,
|
|
"grad_norm": 32.875675817649174,
|
|
"learning_rate": 9.532884015793432e-06,
|
|
"loss": 2.0859670639038086,
|
|
"step": 549
|
|
},
|
|
{
|
|
"epoch": 0.6773399014778325,
|
|
"grad_norm": 11.983581363761447,
|
|
"learning_rate": 9.529854998881534e-06,
|
|
"loss": 2.7557499408721924,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.6785714285714286,
|
|
"grad_norm": 13.15410482971192,
|
|
"learning_rate": 9.526816677258995e-06,
|
|
"loss": 2.710692882537842,
|
|
"step": 551
|
|
},
|
|
{
|
|
"epoch": 0.6798029556650246,
|
|
"grad_norm": 9.416519545873685,
|
|
"learning_rate": 9.523769057166791e-06,
|
|
"loss": 3.055102825164795,
|
|
"step": 552
|
|
},
|
|
{
|
|
"epoch": 0.6810344827586207,
|
|
"grad_norm": 11.60625904359093,
|
|
"learning_rate": 9.520712144864997e-06,
|
|
"loss": 2.606031894683838,
|
|
"step": 553
|
|
},
|
|
{
|
|
"epoch": 0.6822660098522167,
|
|
"grad_norm": 12.067258837088112,
|
|
"learning_rate": 9.517645946632766e-06,
|
|
"loss": 2.9099555015563965,
|
|
"step": 554
|
|
},
|
|
{
|
|
"epoch": 0.6834975369458128,
|
|
"grad_norm": 10.888483887311708,
|
|
"learning_rate": 9.514570468768338e-06,
|
|
"loss": 2.7148189544677734,
|
|
"step": 555
|
|
},
|
|
{
|
|
"epoch": 0.6847290640394089,
|
|
"grad_norm": 15.652077873544759,
|
|
"learning_rate": 9.511485717589006e-06,
|
|
"loss": 2.528857707977295,
|
|
"step": 556
|
|
},
|
|
{
|
|
"epoch": 0.6859605911330049,
|
|
"grad_norm": 12.750166049911234,
|
|
"learning_rate": 9.508391699431114e-06,
|
|
"loss": 2.814006805419922,
|
|
"step": 557
|
|
},
|
|
{
|
|
"epoch": 0.687192118226601,
|
|
"grad_norm": 12.187355034460829,
|
|
"learning_rate": 9.50528842065004e-06,
|
|
"loss": 3.3046352863311768,
|
|
"step": 558
|
|
},
|
|
{
|
|
"epoch": 0.6884236453201971,
|
|
"grad_norm": 12.182964964248615,
|
|
"learning_rate": 9.502175887620188e-06,
|
|
"loss": 3.1519320011138916,
|
|
"step": 559
|
|
},
|
|
{
|
|
"epoch": 0.6896551724137931,
|
|
"grad_norm": 26.00958255437091,
|
|
"learning_rate": 9.499054106734963e-06,
|
|
"loss": 2.2819509506225586,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.6908866995073891,
|
|
"grad_norm": 10.437408285902773,
|
|
"learning_rate": 9.495923084406773e-06,
|
|
"loss": 2.7894287109375,
|
|
"step": 561
|
|
},
|
|
{
|
|
"epoch": 0.6921182266009852,
|
|
"grad_norm": 27.469926449959043,
|
|
"learning_rate": 9.492782827067006e-06,
|
|
"loss": 3.233968734741211,
|
|
"step": 562
|
|
},
|
|
{
|
|
"epoch": 0.6933497536945813,
|
|
"grad_norm": 19.246363086379436,
|
|
"learning_rate": 9.48963334116602e-06,
|
|
"loss": 2.594421863555908,
|
|
"step": 563
|
|
},
|
|
{
|
|
"epoch": 0.6945812807881774,
|
|
"grad_norm": 11.788384104886402,
|
|
"learning_rate": 9.486474633173129e-06,
|
|
"loss": 3.181318759918213,
|
|
"step": 564
|
|
},
|
|
{
|
|
"epoch": 0.6958128078817734,
|
|
"grad_norm": 10.754721829366346,
|
|
"learning_rate": 9.48330670957659e-06,
|
|
"loss": 3.2115392684936523,
|
|
"step": 565
|
|
},
|
|
{
|
|
"epoch": 0.6970443349753694,
|
|
"grad_norm": 12.089226690676854,
|
|
"learning_rate": 9.480129576883592e-06,
|
|
"loss": 2.408634901046753,
|
|
"step": 566
|
|
},
|
|
{
|
|
"epoch": 0.6982758620689655,
|
|
"grad_norm": 13.370163003636199,
|
|
"learning_rate": 9.476943241620233e-06,
|
|
"loss": 2.9304041862487793,
|
|
"step": 567
|
|
},
|
|
{
|
|
"epoch": 0.6995073891625616,
|
|
"grad_norm": 23.52604617683973,
|
|
"learning_rate": 9.473747710331524e-06,
|
|
"loss": 2.75127911567688,
|
|
"step": 568
|
|
},
|
|
{
|
|
"epoch": 0.7007389162561576,
|
|
"grad_norm": 33.407245089515435,
|
|
"learning_rate": 9.470542989581357e-06,
|
|
"loss": 3.3793530464172363,
|
|
"step": 569
|
|
},
|
|
{
|
|
"epoch": 0.7019704433497537,
|
|
"grad_norm": 8.494714152681327,
|
|
"learning_rate": 9.467329085952505e-06,
|
|
"loss": 3.001579999923706,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.7032019704433498,
|
|
"grad_norm": 12.457476112208125,
|
|
"learning_rate": 9.464106006046602e-06,
|
|
"loss": 2.063443422317505,
|
|
"step": 571
|
|
},
|
|
{
|
|
"epoch": 0.7044334975369458,
|
|
"grad_norm": 11.893453239405563,
|
|
"learning_rate": 9.460873756484128e-06,
|
|
"loss": 3.079399585723877,
|
|
"step": 572
|
|
},
|
|
{
|
|
"epoch": 0.7056650246305419,
|
|
"grad_norm": 17.600286095390665,
|
|
"learning_rate": 9.457632343904404e-06,
|
|
"loss": 2.6499621868133545,
|
|
"step": 573
|
|
},
|
|
{
|
|
"epoch": 0.7068965517241379,
|
|
"grad_norm": 11.052824766544509,
|
|
"learning_rate": 9.454381774965567e-06,
|
|
"loss": 2.848517656326294,
|
|
"step": 574
|
|
},
|
|
{
|
|
"epoch": 0.708128078817734,
|
|
"grad_norm": 11.779141171142625,
|
|
"learning_rate": 9.451122056344564e-06,
|
|
"loss": 2.936286687850952,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 0.7093596059113301,
|
|
"grad_norm": 12.447965784800195,
|
|
"learning_rate": 9.44785319473714e-06,
|
|
"loss": 2.315443515777588,
|
|
"step": 576
|
|
},
|
|
{
|
|
"epoch": 0.7105911330049262,
|
|
"grad_norm": 13.488894073216153,
|
|
"learning_rate": 9.444575196857814e-06,
|
|
"loss": 3.121138334274292,
|
|
"step": 577
|
|
},
|
|
{
|
|
"epoch": 0.7118226600985221,
|
|
"grad_norm": 15.155327825693226,
|
|
"learning_rate": 9.441288069439876e-06,
|
|
"loss": 3.326282501220703,
|
|
"step": 578
|
|
},
|
|
{
|
|
"epoch": 0.7130541871921182,
|
|
"grad_norm": 12.463167654535278,
|
|
"learning_rate": 9.437991819235366e-06,
|
|
"loss": 2.8816466331481934,
|
|
"step": 579
|
|
},
|
|
{
|
|
"epoch": 0.7142857142857143,
|
|
"grad_norm": 14.769356931380226,
|
|
"learning_rate": 9.434686453015067e-06,
|
|
"loss": 3.6819610595703125,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.7155172413793104,
|
|
"grad_norm": 33.4724384154282,
|
|
"learning_rate": 9.431371977568483e-06,
|
|
"loss": 2.904045343399048,
|
|
"step": 581
|
|
},
|
|
{
|
|
"epoch": 0.7167487684729064,
|
|
"grad_norm": 8.623967512206425,
|
|
"learning_rate": 9.428048399703831e-06,
|
|
"loss": 3.5356435775756836,
|
|
"step": 582
|
|
},
|
|
{
|
|
"epoch": 0.7179802955665024,
|
|
"grad_norm": 11.543651581364673,
|
|
"learning_rate": 9.424715726248027e-06,
|
|
"loss": 2.4456870555877686,
|
|
"step": 583
|
|
},
|
|
{
|
|
"epoch": 0.7192118226600985,
|
|
"grad_norm": 6.392692599853808,
|
|
"learning_rate": 9.421373964046665e-06,
|
|
"loss": 2.5000674724578857,
|
|
"step": 584
|
|
},
|
|
{
|
|
"epoch": 0.7204433497536946,
|
|
"grad_norm": 14.327212598984625,
|
|
"learning_rate": 9.418023119964012e-06,
|
|
"loss": 2.856738567352295,
|
|
"step": 585
|
|
},
|
|
{
|
|
"epoch": 0.7216748768472906,
|
|
"grad_norm": 6.593431351524387,
|
|
"learning_rate": 9.414663200882991e-06,
|
|
"loss": 2.623438835144043,
|
|
"step": 586
|
|
},
|
|
{
|
|
"epoch": 0.7229064039408867,
|
|
"grad_norm": 21.188129548487396,
|
|
"learning_rate": 9.411294213705162e-06,
|
|
"loss": 2.987426996231079,
|
|
"step": 587
|
|
},
|
|
{
|
|
"epoch": 0.7241379310344828,
|
|
"grad_norm": 16.308054128010806,
|
|
"learning_rate": 9.407916165350713e-06,
|
|
"loss": 2.8868589401245117,
|
|
"step": 588
|
|
},
|
|
{
|
|
"epoch": 0.7253694581280788,
|
|
"grad_norm": 5.6345787753710965,
|
|
"learning_rate": 9.404529062758447e-06,
|
|
"loss": 2.878659725189209,
|
|
"step": 589
|
|
},
|
|
{
|
|
"epoch": 0.7266009852216748,
|
|
"grad_norm": 21.624096395043555,
|
|
"learning_rate": 9.401132912885764e-06,
|
|
"loss": 3.197636127471924,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.7278325123152709,
|
|
"grad_norm": 28.674970274616843,
|
|
"learning_rate": 9.397727722708643e-06,
|
|
"loss": 2.8974030017852783,
|
|
"step": 591
|
|
},
|
|
{
|
|
"epoch": 0.729064039408867,
|
|
"grad_norm": 14.603582651571138,
|
|
"learning_rate": 9.39431349922164e-06,
|
|
"loss": 2.558945894241333,
|
|
"step": 592
|
|
},
|
|
{
|
|
"epoch": 0.7302955665024631,
|
|
"grad_norm": 6.004290408591086,
|
|
"learning_rate": 9.390890249437863e-06,
|
|
"loss": 1.0518803596496582,
|
|
"step": 593
|
|
},
|
|
{
|
|
"epoch": 0.7315270935960592,
|
|
"grad_norm": 16.62422153547852,
|
|
"learning_rate": 9.38745798038896e-06,
|
|
"loss": 3.5599231719970703,
|
|
"step": 594
|
|
},
|
|
{
|
|
"epoch": 0.7327586206896551,
|
|
"grad_norm": 9.731487783525235,
|
|
"learning_rate": 9.384016699125102e-06,
|
|
"loss": 3.1517539024353027,
|
|
"step": 595
|
|
},
|
|
{
|
|
"epoch": 0.7339901477832512,
|
|
"grad_norm": 10.319265754066222,
|
|
"learning_rate": 9.380566412714982e-06,
|
|
"loss": 2.809019088745117,
|
|
"step": 596
|
|
},
|
|
{
|
|
"epoch": 0.7352216748768473,
|
|
"grad_norm": 14.675772943073882,
|
|
"learning_rate": 9.377107128245782e-06,
|
|
"loss": 3.2317776679992676,
|
|
"step": 597
|
|
},
|
|
{
|
|
"epoch": 0.7364532019704434,
|
|
"grad_norm": 15.494293767128655,
|
|
"learning_rate": 9.373638852823166e-06,
|
|
"loss": 2.7792513370513916,
|
|
"step": 598
|
|
},
|
|
{
|
|
"epoch": 0.7376847290640394,
|
|
"grad_norm": 17.02704136876628,
|
|
"learning_rate": 9.370161593571274e-06,
|
|
"loss": 2.75253963470459,
|
|
"step": 599
|
|
},
|
|
{
|
|
"epoch": 0.7389162561576355,
|
|
"grad_norm": 14.987899586174,
|
|
"learning_rate": 9.36667535763269e-06,
|
|
"loss": 3.381519317626953,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.7401477832512315,
|
|
"grad_norm": 19.24830788986111,
|
|
"learning_rate": 9.363180152168448e-06,
|
|
"loss": 2.62427020072937,
|
|
"step": 601
|
|
},
|
|
{
|
|
"epoch": 0.7413793103448276,
|
|
"grad_norm": 29.185871046378647,
|
|
"learning_rate": 9.359675984357992e-06,
|
|
"loss": 2.4824719429016113,
|
|
"step": 602
|
|
},
|
|
{
|
|
"epoch": 0.7426108374384236,
|
|
"grad_norm": 8.673285241589555,
|
|
"learning_rate": 9.356162861399188e-06,
|
|
"loss": 2.8167097568511963,
|
|
"step": 603
|
|
},
|
|
{
|
|
"epoch": 0.7438423645320197,
|
|
"grad_norm": 15.318689439779794,
|
|
"learning_rate": 9.352640790508291e-06,
|
|
"loss": 2.9545063972473145,
|
|
"step": 604
|
|
},
|
|
{
|
|
"epoch": 0.7450738916256158,
|
|
"grad_norm": 16.1719679891284,
|
|
"learning_rate": 9.349109778919938e-06,
|
|
"loss": 2.833635091781616,
|
|
"step": 605
|
|
},
|
|
{
|
|
"epoch": 0.7463054187192119,
|
|
"grad_norm": 9.791828516981264,
|
|
"learning_rate": 9.345569833887124e-06,
|
|
"loss": 2.775730609893799,
|
|
"step": 606
|
|
},
|
|
{
|
|
"epoch": 0.7475369458128078,
|
|
"grad_norm": 28.327643593931583,
|
|
"learning_rate": 9.342020962681206e-06,
|
|
"loss": 2.652602195739746,
|
|
"step": 607
|
|
},
|
|
{
|
|
"epoch": 0.7487684729064039,
|
|
"grad_norm": 10.194351110042778,
|
|
"learning_rate": 9.338463172591868e-06,
|
|
"loss": 2.7008144855499268,
|
|
"step": 608
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"grad_norm": 9.445868833849106,
|
|
"learning_rate": 9.334896470927115e-06,
|
|
"loss": 2.7525248527526855,
|
|
"step": 609
|
|
},
|
|
{
|
|
"epoch": 0.7512315270935961,
|
|
"grad_norm": 26.640278263158898,
|
|
"learning_rate": 9.331320865013257e-06,
|
|
"loss": 3.446526527404785,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.7524630541871922,
|
|
"grad_norm": 14.322498892724218,
|
|
"learning_rate": 9.327736362194899e-06,
|
|
"loss": 3.0489022731781006,
|
|
"step": 611
|
|
},
|
|
{
|
|
"epoch": 0.7536945812807881,
|
|
"grad_norm": 9.879694468014232,
|
|
"learning_rate": 9.324142969834916e-06,
|
|
"loss": 2.840083360671997,
|
|
"step": 612
|
|
},
|
|
{
|
|
"epoch": 0.7549261083743842,
|
|
"grad_norm": 8.637072486896487,
|
|
"learning_rate": 9.32054069531444e-06,
|
|
"loss": 2.878903388977051,
|
|
"step": 613
|
|
},
|
|
{
|
|
"epoch": 0.7561576354679803,
|
|
"grad_norm": 10.815449949874669,
|
|
"learning_rate": 9.316929546032855e-06,
|
|
"loss": 2.568045139312744,
|
|
"step": 614
|
|
},
|
|
{
|
|
"epoch": 0.7573891625615764,
|
|
"grad_norm": 18.206411357576574,
|
|
"learning_rate": 9.313309529407773e-06,
|
|
"loss": 2.8981618881225586,
|
|
"step": 615
|
|
},
|
|
{
|
|
"epoch": 0.7586206896551724,
|
|
"grad_norm": 14.515670827099761,
|
|
"learning_rate": 9.309680652875015e-06,
|
|
"loss": 3.3486928939819336,
|
|
"step": 616
|
|
},
|
|
{
|
|
"epoch": 0.7598522167487685,
|
|
"grad_norm": 10.208627841304171,
|
|
"learning_rate": 9.306042923888607e-06,
|
|
"loss": 3.1101677417755127,
|
|
"step": 617
|
|
},
|
|
{
|
|
"epoch": 0.7610837438423645,
|
|
"grad_norm": 9.545526159427496,
|
|
"learning_rate": 9.302396349920756e-06,
|
|
"loss": 2.5806779861450195,
|
|
"step": 618
|
|
},
|
|
{
|
|
"epoch": 0.7623152709359606,
|
|
"grad_norm": 14.260459979245976,
|
|
"learning_rate": 9.298740938461835e-06,
|
|
"loss": 2.678412437438965,
|
|
"step": 619
|
|
},
|
|
{
|
|
"epoch": 0.7635467980295566,
|
|
"grad_norm": 10.808443055524243,
|
|
"learning_rate": 9.295076697020378e-06,
|
|
"loss": 2.62287974357605,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.7647783251231527,
|
|
"grad_norm": 7.635004154714619,
|
|
"learning_rate": 9.291403633123046e-06,
|
|
"loss": 3.0267720222473145,
|
|
"step": 621
|
|
},
|
|
{
|
|
"epoch": 0.7660098522167488,
|
|
"grad_norm": 15.707612902426492,
|
|
"learning_rate": 9.287721754314629e-06,
|
|
"loss": 3.147644281387329,
|
|
"step": 622
|
|
},
|
|
{
|
|
"epoch": 0.7672413793103449,
|
|
"grad_norm": 14.526297785533162,
|
|
"learning_rate": 9.284031068158023e-06,
|
|
"loss": 3.159574031829834,
|
|
"step": 623
|
|
},
|
|
{
|
|
"epoch": 0.7684729064039408,
|
|
"grad_norm": 13.384426615670701,
|
|
"learning_rate": 9.280331582234212e-06,
|
|
"loss": 2.6432247161865234,
|
|
"step": 624
|
|
},
|
|
{
|
|
"epoch": 0.7697044334975369,
|
|
"grad_norm": 14.835270706650137,
|
|
"learning_rate": 9.27662330414226e-06,
|
|
"loss": 3.2058279514312744,
|
|
"step": 625
|
|
},
|
|
{
|
|
"epoch": 0.770935960591133,
|
|
"grad_norm": 10.18160016154191,
|
|
"learning_rate": 9.272906241499285e-06,
|
|
"loss": 2.787260055541992,
|
|
"step": 626
|
|
},
|
|
{
|
|
"epoch": 0.7721674876847291,
|
|
"grad_norm": 13.10691777443293,
|
|
"learning_rate": 9.269180401940455e-06,
|
|
"loss": 2.5751729011535645,
|
|
"step": 627
|
|
},
|
|
{
|
|
"epoch": 0.7733990147783252,
|
|
"grad_norm": 31.695378978025254,
|
|
"learning_rate": 9.265445793118962e-06,
|
|
"loss": 2.7433929443359375,
|
|
"step": 628
|
|
},
|
|
{
|
|
"epoch": 0.7746305418719212,
|
|
"grad_norm": 14.739647225699887,
|
|
"learning_rate": 9.261702422706014e-06,
|
|
"loss": 2.771510124206543,
|
|
"step": 629
|
|
},
|
|
{
|
|
"epoch": 0.7758620689655172,
|
|
"grad_norm": 10.064291707891675,
|
|
"learning_rate": 9.257950298390815e-06,
|
|
"loss": 2.873830795288086,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.7770935960591133,
|
|
"grad_norm": 11.389694880244464,
|
|
"learning_rate": 9.254189427880548e-06,
|
|
"loss": 2.7849340438842773,
|
|
"step": 631
|
|
},
|
|
{
|
|
"epoch": 0.7783251231527094,
|
|
"grad_norm": 9.049096315314397,
|
|
"learning_rate": 9.250419818900366e-06,
|
|
"loss": 3.1721668243408203,
|
|
"step": 632
|
|
},
|
|
{
|
|
"epoch": 0.7795566502463054,
|
|
"grad_norm": 10.167539529464127,
|
|
"learning_rate": 9.24664147919337e-06,
|
|
"loss": 2.7493605613708496,
|
|
"step": 633
|
|
},
|
|
{
|
|
"epoch": 0.7807881773399015,
|
|
"grad_norm": 16.15312048584227,
|
|
"learning_rate": 9.242854416520591e-06,
|
|
"loss": 2.470233917236328,
|
|
"step": 634
|
|
},
|
|
{
|
|
"epoch": 0.7820197044334976,
|
|
"grad_norm": 11.446898989077285,
|
|
"learning_rate": 9.239058638660983e-06,
|
|
"loss": 2.7109014987945557,
|
|
"step": 635
|
|
},
|
|
{
|
|
"epoch": 0.7832512315270936,
|
|
"grad_norm": 15.265461277758774,
|
|
"learning_rate": 9.235254153411394e-06,
|
|
"loss": 3.0344791412353516,
|
|
"step": 636
|
|
},
|
|
{
|
|
"epoch": 0.7844827586206896,
|
|
"grad_norm": 12.820354961892846,
|
|
"learning_rate": 9.231440968586572e-06,
|
|
"loss": 2.381561279296875,
|
|
"step": 637
|
|
},
|
|
{
|
|
"epoch": 0.7857142857142857,
|
|
"grad_norm": 11.033746075983524,
|
|
"learning_rate": 9.227619092019116e-06,
|
|
"loss": 1.716524362564087,
|
|
"step": 638
|
|
},
|
|
{
|
|
"epoch": 0.7869458128078818,
|
|
"grad_norm": 36.36927433118522,
|
|
"learning_rate": 9.223788531559495e-06,
|
|
"loss": 2.591820240020752,
|
|
"step": 639
|
|
},
|
|
{
|
|
"epoch": 0.7881773399014779,
|
|
"grad_norm": 22.998289773218893,
|
|
"learning_rate": 9.219949295076006e-06,
|
|
"loss": 3.0194711685180664,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.7894088669950738,
|
|
"grad_norm": 9.82623401522864,
|
|
"learning_rate": 9.216101390454771e-06,
|
|
"loss": 2.852489471435547,
|
|
"step": 641
|
|
},
|
|
{
|
|
"epoch": 0.7906403940886699,
|
|
"grad_norm": 16.052245879830704,
|
|
"learning_rate": 9.212244825599714e-06,
|
|
"loss": 3.1419005393981934,
|
|
"step": 642
|
|
},
|
|
{
|
|
"epoch": 0.791871921182266,
|
|
"grad_norm": 7.825862600095094,
|
|
"learning_rate": 9.208379608432552e-06,
|
|
"loss": 2.8307576179504395,
|
|
"step": 643
|
|
},
|
|
{
|
|
"epoch": 0.7931034482758621,
|
|
"grad_norm": 8.143984458879574,
|
|
"learning_rate": 9.204505746892772e-06,
|
|
"loss": 2.581083297729492,
|
|
"step": 644
|
|
},
|
|
{
|
|
"epoch": 0.7943349753694581,
|
|
"grad_norm": 18.48744043986469,
|
|
"learning_rate": 9.200623248937619e-06,
|
|
"loss": 2.868973731994629,
|
|
"step": 645
|
|
},
|
|
{
|
|
"epoch": 0.7955665024630542,
|
|
"grad_norm": 8.257209013058233,
|
|
"learning_rate": 9.196732122542073e-06,
|
|
"loss": 2.8063859939575195,
|
|
"step": 646
|
|
},
|
|
{
|
|
"epoch": 0.7967980295566502,
|
|
"grad_norm": 12.8457758247775,
|
|
"learning_rate": 9.192832375698845e-06,
|
|
"loss": 2.990504264831543,
|
|
"step": 647
|
|
},
|
|
{
|
|
"epoch": 0.7980295566502463,
|
|
"grad_norm": 15.29216631759892,
|
|
"learning_rate": 9.18892401641835e-06,
|
|
"loss": 2.390320301055908,
|
|
"step": 648
|
|
},
|
|
{
|
|
"epoch": 0.7992610837438424,
|
|
"grad_norm": 10.724837816433517,
|
|
"learning_rate": 9.185007052728689e-06,
|
|
"loss": 2.671368360519409,
|
|
"step": 649
|
|
},
|
|
{
|
|
"epoch": 0.8004926108374384,
|
|
"grad_norm": 34.65249876179552,
|
|
"learning_rate": 9.181081492675645e-06,
|
|
"loss": 3.259225845336914,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.8017241379310345,
|
|
"grad_norm": 15.454469742488547,
|
|
"learning_rate": 9.177147344322651e-06,
|
|
"loss": 2.6810710430145264,
|
|
"step": 651
|
|
},
|
|
{
|
|
"epoch": 0.8029556650246306,
|
|
"grad_norm": 11.530365704888945,
|
|
"learning_rate": 9.173204615750792e-06,
|
|
"loss": 2.833371162414551,
|
|
"step": 652
|
|
},
|
|
{
|
|
"epoch": 0.8041871921182266,
|
|
"grad_norm": 16.732932575361076,
|
|
"learning_rate": 9.169253315058764e-06,
|
|
"loss": 2.3488945960998535,
|
|
"step": 653
|
|
},
|
|
{
|
|
"epoch": 0.8054187192118226,
|
|
"grad_norm": 9.726564803680413,
|
|
"learning_rate": 9.165293450362882e-06,
|
|
"loss": 2.609282970428467,
|
|
"step": 654
|
|
},
|
|
{
|
|
"epoch": 0.8066502463054187,
|
|
"grad_norm": 7.091881545178562,
|
|
"learning_rate": 9.161325029797044e-06,
|
|
"loss": 2.536142587661743,
|
|
"step": 655
|
|
},
|
|
{
|
|
"epoch": 0.8078817733990148,
|
|
"grad_norm": 9.986592341017682,
|
|
"learning_rate": 9.157348061512728e-06,
|
|
"loss": 2.7175073623657227,
|
|
"step": 656
|
|
},
|
|
{
|
|
"epoch": 0.8091133004926109,
|
|
"grad_norm": 8.682128121343633,
|
|
"learning_rate": 9.153362553678967e-06,
|
|
"loss": 2.99211049079895,
|
|
"step": 657
|
|
},
|
|
{
|
|
"epoch": 0.8103448275862069,
|
|
"grad_norm": 9.322932294885456,
|
|
"learning_rate": 9.149368514482337e-06,
|
|
"loss": 2.9390807151794434,
|
|
"step": 658
|
|
},
|
|
{
|
|
"epoch": 0.8115763546798029,
|
|
"grad_norm": 18.322306761451276,
|
|
"learning_rate": 9.145365952126937e-06,
|
|
"loss": 3.0422894954681396,
|
|
"step": 659
|
|
},
|
|
{
|
|
"epoch": 0.812807881773399,
|
|
"grad_norm": 13.085537087984829,
|
|
"learning_rate": 9.141354874834372e-06,
|
|
"loss": 3.0573301315307617,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.8140394088669951,
|
|
"grad_norm": 11.125925990068074,
|
|
"learning_rate": 9.13733529084374e-06,
|
|
"loss": 2.5086781978607178,
|
|
"step": 661
|
|
},
|
|
{
|
|
"epoch": 0.8152709359605911,
|
|
"grad_norm": 12.865460326379043,
|
|
"learning_rate": 9.13330720841161e-06,
|
|
"loss": 2.858813762664795,
|
|
"step": 662
|
|
},
|
|
{
|
|
"epoch": 0.8165024630541872,
|
|
"grad_norm": 16.68197454357427,
|
|
"learning_rate": 9.129270635812013e-06,
|
|
"loss": 2.6715052127838135,
|
|
"step": 663
|
|
},
|
|
{
|
|
"epoch": 0.8177339901477833,
|
|
"grad_norm": 8.328828299636488,
|
|
"learning_rate": 9.125225581336408e-06,
|
|
"loss": 3.18508243560791,
|
|
"step": 664
|
|
},
|
|
{
|
|
"epoch": 0.8189655172413793,
|
|
"grad_norm": 12.129831350250795,
|
|
"learning_rate": 9.12117205329369e-06,
|
|
"loss": 3.0426509380340576,
|
|
"step": 665
|
|
},
|
|
{
|
|
"epoch": 0.8201970443349754,
|
|
"grad_norm": 10.31532455027376,
|
|
"learning_rate": 9.11711006001015e-06,
|
|
"loss": 2.8654000759124756,
|
|
"step": 666
|
|
},
|
|
{
|
|
"epoch": 0.8214285714285714,
|
|
"grad_norm": 22.312769944556898,
|
|
"learning_rate": 9.113039609829472e-06,
|
|
"loss": 3.141207695007324,
|
|
"step": 667
|
|
},
|
|
{
|
|
"epoch": 0.8226600985221675,
|
|
"grad_norm": 9.864189257198062,
|
|
"learning_rate": 9.108960711112709e-06,
|
|
"loss": 2.3188462257385254,
|
|
"step": 668
|
|
},
|
|
{
|
|
"epoch": 0.8238916256157636,
|
|
"grad_norm": 7.227847497482275,
|
|
"learning_rate": 9.104873372238269e-06,
|
|
"loss": 2.785968542098999,
|
|
"step": 669
|
|
},
|
|
{
|
|
"epoch": 0.8251231527093597,
|
|
"grad_norm": 11.651688072805056,
|
|
"learning_rate": 9.100777601601896e-06,
|
|
"loss": 3.0693092346191406,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 0.8263546798029556,
|
|
"grad_norm": 14.359029220301974,
|
|
"learning_rate": 9.096673407616656e-06,
|
|
"loss": 3.038943290710449,
|
|
"step": 671
|
|
},
|
|
{
|
|
"epoch": 0.8275862068965517,
|
|
"grad_norm": 11.367718044029667,
|
|
"learning_rate": 9.092560798712913e-06,
|
|
"loss": 3.259847640991211,
|
|
"step": 672
|
|
},
|
|
{
|
|
"epoch": 0.8288177339901478,
|
|
"grad_norm": 7.44988788267686,
|
|
"learning_rate": 9.08843978333832e-06,
|
|
"loss": 2.8227295875549316,
|
|
"step": 673
|
|
},
|
|
{
|
|
"epoch": 0.8300492610837439,
|
|
"grad_norm": 11.316814915640423,
|
|
"learning_rate": 9.084310369957795e-06,
|
|
"loss": 3.373309850692749,
|
|
"step": 674
|
|
},
|
|
{
|
|
"epoch": 0.8312807881773399,
|
|
"grad_norm": 8.828902957926932,
|
|
"learning_rate": 9.08017256705351e-06,
|
|
"loss": 3.2833662033081055,
|
|
"step": 675
|
|
},
|
|
{
|
|
"epoch": 0.8325123152709359,
|
|
"grad_norm": 26.42438693311499,
|
|
"learning_rate": 9.076026383124863e-06,
|
|
"loss": 2.7175965309143066,
|
|
"step": 676
|
|
},
|
|
{
|
|
"epoch": 0.833743842364532,
|
|
"grad_norm": 15.34429558424053,
|
|
"learning_rate": 9.071871826688472e-06,
|
|
"loss": 2.594611167907715,
|
|
"step": 677
|
|
},
|
|
{
|
|
"epoch": 0.8349753694581281,
|
|
"grad_norm": 23.79233069504134,
|
|
"learning_rate": 9.067708906278155e-06,
|
|
"loss": 2.8605175018310547,
|
|
"step": 678
|
|
},
|
|
{
|
|
"epoch": 0.8362068965517241,
|
|
"grad_norm": 16.81935056764866,
|
|
"learning_rate": 9.063537630444903e-06,
|
|
"loss": 2.1438748836517334,
|
|
"step": 679
|
|
},
|
|
{
|
|
"epoch": 0.8374384236453202,
|
|
"grad_norm": 10.888612008792562,
|
|
"learning_rate": 9.05935800775688e-06,
|
|
"loss": 2.8170299530029297,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 0.8386699507389163,
|
|
"grad_norm": 14.167748893628115,
|
|
"learning_rate": 9.055170046799386e-06,
|
|
"loss": 1.7328954935073853,
|
|
"step": 681
|
|
},
|
|
{
|
|
"epoch": 0.8399014778325123,
|
|
"grad_norm": 9.011227940975711,
|
|
"learning_rate": 9.050973756174852e-06,
|
|
"loss": 2.8324766159057617,
|
|
"step": 682
|
|
},
|
|
{
|
|
"epoch": 0.8411330049261084,
|
|
"grad_norm": 10.1469630150836,
|
|
"learning_rate": 9.046769144502818e-06,
|
|
"loss": 2.805690288543701,
|
|
"step": 683
|
|
},
|
|
{
|
|
"epoch": 0.8423645320197044,
|
|
"grad_norm": 18.955236663194235,
|
|
"learning_rate": 9.04255622041992e-06,
|
|
"loss": 2.1270194053649902,
|
|
"step": 684
|
|
},
|
|
{
|
|
"epoch": 0.8435960591133005,
|
|
"grad_norm": 15.32094380068091,
|
|
"learning_rate": 9.038334992579863e-06,
|
|
"loss": 2.8757829666137695,
|
|
"step": 685
|
|
},
|
|
{
|
|
"epoch": 0.8448275862068966,
|
|
"grad_norm": 11.38695715200097,
|
|
"learning_rate": 9.034105469653412e-06,
|
|
"loss": 2.84549617767334,
|
|
"step": 686
|
|
},
|
|
{
|
|
"epoch": 0.8460591133004927,
|
|
"grad_norm": 9.897557814234148,
|
|
"learning_rate": 9.029867660328369e-06,
|
|
"loss": 2.4058642387390137,
|
|
"step": 687
|
|
},
|
|
{
|
|
"epoch": 0.8472906403940886,
|
|
"grad_norm": 11.793589267069729,
|
|
"learning_rate": 9.025621573309559e-06,
|
|
"loss": 3.2583184242248535,
|
|
"step": 688
|
|
},
|
|
{
|
|
"epoch": 0.8485221674876847,
|
|
"grad_norm": 16.425935376287054,
|
|
"learning_rate": 9.021367217318808e-06,
|
|
"loss": 2.951143264770508,
|
|
"step": 689
|
|
},
|
|
{
|
|
"epoch": 0.8497536945812808,
|
|
"grad_norm": 23.876213749579968,
|
|
"learning_rate": 9.017104601094927e-06,
|
|
"loss": 3.0142836570739746,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 0.8509852216748769,
|
|
"grad_norm": 6.8041557155789345,
|
|
"learning_rate": 9.012833733393697e-06,
|
|
"loss": 2.7629013061523438,
|
|
"step": 691
|
|
},
|
|
{
|
|
"epoch": 0.8522167487684729,
|
|
"grad_norm": 12.775266706976657,
|
|
"learning_rate": 9.008554622987845e-06,
|
|
"loss": 2.6153712272644043,
|
|
"step": 692
|
|
},
|
|
{
|
|
"epoch": 0.853448275862069,
|
|
"grad_norm": 10.104362674966435,
|
|
"learning_rate": 9.004267278667032e-06,
|
|
"loss": 2.7227087020874023,
|
|
"step": 693
|
|
},
|
|
{
|
|
"epoch": 0.854679802955665,
|
|
"grad_norm": 10.955806195385584,
|
|
"learning_rate": 8.999971709237832e-06,
|
|
"loss": 2.7320899963378906,
|
|
"step": 694
|
|
},
|
|
{
|
|
"epoch": 0.8559113300492611,
|
|
"grad_norm": 9.04416662510961,
|
|
"learning_rate": 8.99566792352371e-06,
|
|
"loss": 2.4416356086730957,
|
|
"step": 695
|
|
},
|
|
{
|
|
"epoch": 0.8571428571428571,
|
|
"grad_norm": 23.838296750423428,
|
|
"learning_rate": 8.991355930365013e-06,
|
|
"loss": 3.251642942428589,
|
|
"step": 696
|
|
},
|
|
{
|
|
"epoch": 0.8583743842364532,
|
|
"grad_norm": 46.67562045008053,
|
|
"learning_rate": 8.987035738618943e-06,
|
|
"loss": 2.9292666912078857,
|
|
"step": 697
|
|
},
|
|
{
|
|
"epoch": 0.8596059113300493,
|
|
"grad_norm": 16.120654552226135,
|
|
"learning_rate": 8.982707357159549e-06,
|
|
"loss": 2.804452896118164,
|
|
"step": 698
|
|
},
|
|
{
|
|
"epoch": 0.8608374384236454,
|
|
"grad_norm": 9.903594099304835,
|
|
"learning_rate": 8.978370794877691e-06,
|
|
"loss": 2.4997687339782715,
|
|
"step": 699
|
|
},
|
|
{
|
|
"epoch": 0.8620689655172413,
|
|
"grad_norm": 43.24532276513338,
|
|
"learning_rate": 8.974026060681044e-06,
|
|
"loss": 2.459716558456421,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.8633004926108374,
|
|
"grad_norm": 6.407514764745252,
|
|
"learning_rate": 8.969673163494063e-06,
|
|
"loss": 2.57291316986084,
|
|
"step": 701
|
|
},
|
|
{
|
|
"epoch": 0.8645320197044335,
|
|
"grad_norm": 9.925965111489338,
|
|
"learning_rate": 8.965312112257973e-06,
|
|
"loss": 2.6452269554138184,
|
|
"step": 702
|
|
},
|
|
{
|
|
"epoch": 0.8657635467980296,
|
|
"grad_norm": 15.666974346483006,
|
|
"learning_rate": 8.960942915930749e-06,
|
|
"loss": 2.4361040592193604,
|
|
"step": 703
|
|
},
|
|
{
|
|
"epoch": 0.8669950738916257,
|
|
"grad_norm": 12.205200732214369,
|
|
"learning_rate": 8.956565583487092e-06,
|
|
"loss": 2.819046974182129,
|
|
"step": 704
|
|
},
|
|
{
|
|
"epoch": 0.8682266009852216,
|
|
"grad_norm": 23.813445037945687,
|
|
"learning_rate": 8.952180123918419e-06,
|
|
"loss": 3.536510944366455,
|
|
"step": 705
|
|
},
|
|
{
|
|
"epoch": 0.8694581280788177,
|
|
"grad_norm": 19.455220333084014,
|
|
"learning_rate": 8.94778654623284e-06,
|
|
"loss": 3.340855121612549,
|
|
"step": 706
|
|
},
|
|
{
|
|
"epoch": 0.8706896551724138,
|
|
"grad_norm": 15.988003472296347,
|
|
"learning_rate": 8.94338485945514e-06,
|
|
"loss": 2.7881288528442383,
|
|
"step": 707
|
|
},
|
|
{
|
|
"epoch": 0.8719211822660099,
|
|
"grad_norm": 18.44911045759373,
|
|
"learning_rate": 8.938975072626762e-06,
|
|
"loss": 3.119422197341919,
|
|
"step": 708
|
|
},
|
|
{
|
|
"epoch": 0.8731527093596059,
|
|
"grad_norm": 18.233236078041163,
|
|
"learning_rate": 8.934557194805787e-06,
|
|
"loss": 2.694553852081299,
|
|
"step": 709
|
|
},
|
|
{
|
|
"epoch": 0.874384236453202,
|
|
"grad_norm": 13.897466836595251,
|
|
"learning_rate": 8.930131235066914e-06,
|
|
"loss": 2.7162301540374756,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 0.875615763546798,
|
|
"grad_norm": 9.86969530883223,
|
|
"learning_rate": 8.925697202501442e-06,
|
|
"loss": 2.4017574787139893,
|
|
"step": 711
|
|
},
|
|
{
|
|
"epoch": 0.8768472906403941,
|
|
"grad_norm": 22.07024366462836,
|
|
"learning_rate": 8.92125510621726e-06,
|
|
"loss": 2.491663932800293,
|
|
"step": 712
|
|
},
|
|
{
|
|
"epoch": 0.8780788177339901,
|
|
"grad_norm": 9.704458797982127,
|
|
"learning_rate": 8.916804955338807e-06,
|
|
"loss": 3.09323787689209,
|
|
"step": 713
|
|
},
|
|
{
|
|
"epoch": 0.8793103448275862,
|
|
"grad_norm": 14.245234888372442,
|
|
"learning_rate": 8.91234675900708e-06,
|
|
"loss": 3.0273964405059814,
|
|
"step": 714
|
|
},
|
|
{
|
|
"epoch": 0.8805418719211823,
|
|
"grad_norm": 10.033605733175728,
|
|
"learning_rate": 8.907880526379594e-06,
|
|
"loss": 2.5009701251983643,
|
|
"step": 715
|
|
},
|
|
{
|
|
"epoch": 0.8817733990147784,
|
|
"grad_norm": 14.04261929200788,
|
|
"learning_rate": 8.903406266630374e-06,
|
|
"loss": 2.7629752159118652,
|
|
"step": 716
|
|
},
|
|
{
|
|
"epoch": 0.8830049261083743,
|
|
"grad_norm": 19.00265649950274,
|
|
"learning_rate": 8.898923988949936e-06,
|
|
"loss": 2.5285563468933105,
|
|
"step": 717
|
|
},
|
|
{
|
|
"epoch": 0.8842364532019704,
|
|
"grad_norm": 11.293266358312355,
|
|
"learning_rate": 8.89443370254526e-06,
|
|
"loss": 2.6903738975524902,
|
|
"step": 718
|
|
},
|
|
{
|
|
"epoch": 0.8854679802955665,
|
|
"grad_norm": 4.918527502448237,
|
|
"learning_rate": 8.88993541663978e-06,
|
|
"loss": 2.8083925247192383,
|
|
"step": 719
|
|
},
|
|
{
|
|
"epoch": 0.8866995073891626,
|
|
"grad_norm": 14.900444889845339,
|
|
"learning_rate": 8.885429140473361e-06,
|
|
"loss": 3.0920486450195312,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 0.8879310344827587,
|
|
"grad_norm": 15.55585461742265,
|
|
"learning_rate": 8.880914883302278e-06,
|
|
"loss": 2.7464776039123535,
|
|
"step": 721
|
|
},
|
|
{
|
|
"epoch": 0.8891625615763546,
|
|
"grad_norm": 28.218307852720514,
|
|
"learning_rate": 8.876392654399208e-06,
|
|
"loss": 2.7022242546081543,
|
|
"step": 722
|
|
},
|
|
{
|
|
"epoch": 0.8903940886699507,
|
|
"grad_norm": 7.9907639594026385,
|
|
"learning_rate": 8.871862463053193e-06,
|
|
"loss": 3.202090263366699,
|
|
"step": 723
|
|
},
|
|
{
|
|
"epoch": 0.8916256157635468,
|
|
"grad_norm": 12.370662746549176,
|
|
"learning_rate": 8.867324318569637e-06,
|
|
"loss": 2.792590856552124,
|
|
"step": 724
|
|
},
|
|
{
|
|
"epoch": 0.8928571428571429,
|
|
"grad_norm": 12.485149742498526,
|
|
"learning_rate": 8.862778230270276e-06,
|
|
"loss": 2.8918404579162598,
|
|
"step": 725
|
|
},
|
|
{
|
|
"epoch": 0.8940886699507389,
|
|
"grad_norm": 17.523163987955954,
|
|
"learning_rate": 8.858224207493165e-06,
|
|
"loss": 2.881380081176758,
|
|
"step": 726
|
|
},
|
|
{
|
|
"epoch": 0.895320197044335,
|
|
"grad_norm": 10.929446497515306,
|
|
"learning_rate": 8.85366225959266e-06,
|
|
"loss": 2.7197518348693848,
|
|
"step": 727
|
|
},
|
|
{
|
|
"epoch": 0.896551724137931,
|
|
"grad_norm": 14.58273441890301,
|
|
"learning_rate": 8.849092395939388e-06,
|
|
"loss": 2.8458380699157715,
|
|
"step": 728
|
|
},
|
|
{
|
|
"epoch": 0.8977832512315271,
|
|
"grad_norm": 9.240130544994555,
|
|
"learning_rate": 8.844514625920246e-06,
|
|
"loss": 2.5815629959106445,
|
|
"step": 729
|
|
},
|
|
{
|
|
"epoch": 0.8990147783251231,
|
|
"grad_norm": 12.536324929930204,
|
|
"learning_rate": 8.839928958938364e-06,
|
|
"loss": 2.388244867324829,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 0.9002463054187192,
|
|
"grad_norm": 9.268565736662921,
|
|
"learning_rate": 8.835335404413096e-06,
|
|
"loss": 2.678809404373169,
|
|
"step": 731
|
|
},
|
|
{
|
|
"epoch": 0.9014778325123153,
|
|
"grad_norm": 13.664345931125762,
|
|
"learning_rate": 8.830733971779996e-06,
|
|
"loss": 3.4926984310150146,
|
|
"step": 732
|
|
},
|
|
{
|
|
"epoch": 0.9027093596059114,
|
|
"grad_norm": 8.38741339708261,
|
|
"learning_rate": 8.826124670490804e-06,
|
|
"loss": 3.143955707550049,
|
|
"step": 733
|
|
},
|
|
{
|
|
"epoch": 0.9039408866995073,
|
|
"grad_norm": 8.285169477267281,
|
|
"learning_rate": 8.821507510013416e-06,
|
|
"loss": 2.30763840675354,
|
|
"step": 734
|
|
},
|
|
{
|
|
"epoch": 0.9051724137931034,
|
|
"grad_norm": 11.658087999854533,
|
|
"learning_rate": 8.816882499831877e-06,
|
|
"loss": 3.2019965648651123,
|
|
"step": 735
|
|
},
|
|
{
|
|
"epoch": 0.9064039408866995,
|
|
"grad_norm": 11.03286006250671,
|
|
"learning_rate": 8.812249649446357e-06,
|
|
"loss": 2.5554118156433105,
|
|
"step": 736
|
|
},
|
|
{
|
|
"epoch": 0.9076354679802956,
|
|
"grad_norm": 10.468019775536181,
|
|
"learning_rate": 8.807608968373123e-06,
|
|
"loss": 2.6560721397399902,
|
|
"step": 737
|
|
},
|
|
{
|
|
"epoch": 0.9088669950738916,
|
|
"grad_norm": 21.753543318554573,
|
|
"learning_rate": 8.802960466144537e-06,
|
|
"loss": 3.2792091369628906,
|
|
"step": 738
|
|
},
|
|
{
|
|
"epoch": 0.9100985221674877,
|
|
"grad_norm": 8.801113008077715,
|
|
"learning_rate": 8.798304152309019e-06,
|
|
"loss": 2.4306914806365967,
|
|
"step": 739
|
|
},
|
|
{
|
|
"epoch": 0.9113300492610837,
|
|
"grad_norm": 11.427047186823343,
|
|
"learning_rate": 8.793640036431036e-06,
|
|
"loss": 2.791334867477417,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 0.9125615763546798,
|
|
"grad_norm": 11.78168946860072,
|
|
"learning_rate": 8.788968128091084e-06,
|
|
"loss": 2.8516879081726074,
|
|
"step": 741
|
|
},
|
|
{
|
|
"epoch": 0.9137931034482759,
|
|
"grad_norm": 18.40294226204317,
|
|
"learning_rate": 8.784288436885663e-06,
|
|
"loss": 2.783674716949463,
|
|
"step": 742
|
|
},
|
|
{
|
|
"epoch": 0.9150246305418719,
|
|
"grad_norm": 9.042045966372719,
|
|
"learning_rate": 8.779600972427257e-06,
|
|
"loss": 2.538564443588257,
|
|
"step": 743
|
|
},
|
|
{
|
|
"epoch": 0.916256157635468,
|
|
"grad_norm": 21.11608056647587,
|
|
"learning_rate": 8.774905744344326e-06,
|
|
"loss": 2.603914260864258,
|
|
"step": 744
|
|
},
|
|
{
|
|
"epoch": 0.9174876847290641,
|
|
"grad_norm": 18.991966127623154,
|
|
"learning_rate": 8.770202762281267e-06,
|
|
"loss": 2.6232197284698486,
|
|
"step": 745
|
|
},
|
|
{
|
|
"epoch": 0.9187192118226601,
|
|
"grad_norm": 9.533961363388334,
|
|
"learning_rate": 8.765492035898406e-06,
|
|
"loss": 2.586906671524048,
|
|
"step": 746
|
|
},
|
|
{
|
|
"epoch": 0.9199507389162561,
|
|
"grad_norm": 11.702571386481814,
|
|
"learning_rate": 8.760773574871985e-06,
|
|
"loss": 3.019075870513916,
|
|
"step": 747
|
|
},
|
|
{
|
|
"epoch": 0.9211822660098522,
|
|
"grad_norm": 13.549959986762131,
|
|
"learning_rate": 8.756047388894123e-06,
|
|
"loss": 2.6554617881774902,
|
|
"step": 748
|
|
},
|
|
{
|
|
"epoch": 0.9224137931034483,
|
|
"grad_norm": 10.617389263376301,
|
|
"learning_rate": 8.751313487672815e-06,
|
|
"loss": 3.3622567653656006,
|
|
"step": 749
|
|
},
|
|
{
|
|
"epoch": 0.9236453201970444,
|
|
"grad_norm": 15.62971817318244,
|
|
"learning_rate": 8.746571880931896e-06,
|
|
"loss": 2.748253345489502,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 0.9248768472906403,
|
|
"grad_norm": 10.680533586135248,
|
|
"learning_rate": 8.741822578411036e-06,
|
|
"loss": 3.358571767807007,
|
|
"step": 751
|
|
},
|
|
{
|
|
"epoch": 0.9261083743842364,
|
|
"grad_norm": 8.513871800316197,
|
|
"learning_rate": 8.737065589865709e-06,
|
|
"loss": 2.707146167755127,
|
|
"step": 752
|
|
},
|
|
{
|
|
"epoch": 0.9273399014778325,
|
|
"grad_norm": 15.06206429941032,
|
|
"learning_rate": 8.732300925067177e-06,
|
|
"loss": 2.782027006149292,
|
|
"step": 753
|
|
},
|
|
{
|
|
"epoch": 0.9285714285714286,
|
|
"grad_norm": 13.377969237833796,
|
|
"learning_rate": 8.727528593802469e-06,
|
|
"loss": 2.758582830429077,
|
|
"step": 754
|
|
},
|
|
{
|
|
"epoch": 0.9298029556650246,
|
|
"grad_norm": 12.5189792863405,
|
|
"learning_rate": 8.722748605874365e-06,
|
|
"loss": 2.798398971557617,
|
|
"step": 755
|
|
},
|
|
{
|
|
"epoch": 0.9310344827586207,
|
|
"grad_norm": 7.0237993457565056,
|
|
"learning_rate": 8.717960971101367e-06,
|
|
"loss": 2.8893141746520996,
|
|
"step": 756
|
|
},
|
|
{
|
|
"epoch": 0.9322660098522167,
|
|
"grad_norm": 13.108491345078546,
|
|
"learning_rate": 8.71316569931769e-06,
|
|
"loss": 2.8260703086853027,
|
|
"step": 757
|
|
},
|
|
{
|
|
"epoch": 0.9334975369458128,
|
|
"grad_norm": 13.669452983841648,
|
|
"learning_rate": 8.708362800373235e-06,
|
|
"loss": 2.8373727798461914,
|
|
"step": 758
|
|
},
|
|
{
|
|
"epoch": 0.9347290640394089,
|
|
"grad_norm": 9.979755254671996,
|
|
"learning_rate": 8.703552284133565e-06,
|
|
"loss": 2.7638840675354004,
|
|
"step": 759
|
|
},
|
|
{
|
|
"epoch": 0.9359605911330049,
|
|
"grad_norm": 12.948663627163679,
|
|
"learning_rate": 8.698734160479892e-06,
|
|
"loss": 3.436288833618164,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 0.937192118226601,
|
|
"grad_norm": 11.570964225425659,
|
|
"learning_rate": 8.69390843930906e-06,
|
|
"loss": 2.9463398456573486,
|
|
"step": 761
|
|
},
|
|
{
|
|
"epoch": 0.9384236453201971,
|
|
"grad_norm": 7.2963116550893945,
|
|
"learning_rate": 8.68907513053351e-06,
|
|
"loss": 2.8301844596862793,
|
|
"step": 762
|
|
},
|
|
{
|
|
"epoch": 0.9396551724137931,
|
|
"grad_norm": 22.281531901716622,
|
|
"learning_rate": 8.684234244081274e-06,
|
|
"loss": 2.329922676086426,
|
|
"step": 763
|
|
},
|
|
{
|
|
"epoch": 0.9408866995073891,
|
|
"grad_norm": 7.190935942786577,
|
|
"learning_rate": 8.67938578989595e-06,
|
|
"loss": 2.2752580642700195,
|
|
"step": 764
|
|
},
|
|
{
|
|
"epoch": 0.9421182266009852,
|
|
"grad_norm": 15.09705330042877,
|
|
"learning_rate": 8.674529777936674e-06,
|
|
"loss": 2.549682378768921,
|
|
"step": 765
|
|
},
|
|
{
|
|
"epoch": 0.9433497536945813,
|
|
"grad_norm": 12.2992067648861,
|
|
"learning_rate": 8.669666218178114e-06,
|
|
"loss": 2.177875518798828,
|
|
"step": 766
|
|
},
|
|
{
|
|
"epoch": 0.9445812807881774,
|
|
"grad_norm": 17.93631082058447,
|
|
"learning_rate": 8.66479512061044e-06,
|
|
"loss": 3.4030704498291016,
|
|
"step": 767
|
|
},
|
|
{
|
|
"epoch": 0.9458128078817734,
|
|
"grad_norm": 12.986753736790972,
|
|
"learning_rate": 8.659916495239302e-06,
|
|
"loss": 2.8890881538391113,
|
|
"step": 768
|
|
},
|
|
{
|
|
"epoch": 0.9470443349753694,
|
|
"grad_norm": 7.80817017570662,
|
|
"learning_rate": 8.655030352085816e-06,
|
|
"loss": 2.6665287017822266,
|
|
"step": 769
|
|
},
|
|
{
|
|
"epoch": 0.9482758620689655,
|
|
"grad_norm": 8.892699708308717,
|
|
"learning_rate": 8.650136701186537e-06,
|
|
"loss": 2.8044798374176025,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 0.9495073891625616,
|
|
"grad_norm": 12.053681412169821,
|
|
"learning_rate": 8.645235552593447e-06,
|
|
"loss": 2.809295654296875,
|
|
"step": 771
|
|
},
|
|
{
|
|
"epoch": 0.9507389162561576,
|
|
"grad_norm": 9.563242350440067,
|
|
"learning_rate": 8.640326916373923e-06,
|
|
"loss": 2.66239070892334,
|
|
"step": 772
|
|
},
|
|
{
|
|
"epoch": 0.9519704433497537,
|
|
"grad_norm": 11.397593157331492,
|
|
"learning_rate": 8.635410802610724e-06,
|
|
"loss": 3.0714645385742188,
|
|
"step": 773
|
|
},
|
|
{
|
|
"epoch": 0.9532019704433498,
|
|
"grad_norm": 11.141014900339497,
|
|
"learning_rate": 8.630487221401974e-06,
|
|
"loss": 2.5254178047180176,
|
|
"step": 774
|
|
},
|
|
{
|
|
"epoch": 0.9544334975369458,
|
|
"grad_norm": 61.411465635020065,
|
|
"learning_rate": 8.625556182861126e-06,
|
|
"loss": 2.4160585403442383,
|
|
"step": 775
|
|
},
|
|
{
|
|
"epoch": 0.9556650246305419,
|
|
"grad_norm": 15.426050261321397,
|
|
"learning_rate": 8.620617697116957e-06,
|
|
"loss": 2.972367763519287,
|
|
"step": 776
|
|
},
|
|
{
|
|
"epoch": 0.9568965517241379,
|
|
"grad_norm": 11.628713988566439,
|
|
"learning_rate": 8.615671774313543e-06,
|
|
"loss": 2.9206340312957764,
|
|
"step": 777
|
|
},
|
|
{
|
|
"epoch": 0.958128078817734,
|
|
"grad_norm": 9.967877704713992,
|
|
"learning_rate": 8.61071842461023e-06,
|
|
"loss": 3.192002296447754,
|
|
"step": 778
|
|
},
|
|
{
|
|
"epoch": 0.9593596059113301,
|
|
"grad_norm": 8.547648553030225,
|
|
"learning_rate": 8.605757658181626e-06,
|
|
"loss": 3.0840883255004883,
|
|
"step": 779
|
|
},
|
|
{
|
|
"epoch": 0.9605911330049262,
|
|
"grad_norm": 16.72939304902535,
|
|
"learning_rate": 8.60078948521757e-06,
|
|
"loss": 3.344426155090332,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 0.9618226600985221,
|
|
"grad_norm": 14.860196885671575,
|
|
"learning_rate": 8.595813915923113e-06,
|
|
"loss": 2.887132406234741,
|
|
"step": 781
|
|
},
|
|
{
|
|
"epoch": 0.9630541871921182,
|
|
"grad_norm": 16.504287008501006,
|
|
"learning_rate": 8.590830960518502e-06,
|
|
"loss": 2.354299306869507,
|
|
"step": 782
|
|
},
|
|
{
|
|
"epoch": 0.9642857142857143,
|
|
"grad_norm": 14.601237072457945,
|
|
"learning_rate": 8.585840629239158e-06,
|
|
"loss": 2.574817657470703,
|
|
"step": 783
|
|
},
|
|
{
|
|
"epoch": 0.9655172413793104,
|
|
"grad_norm": 13.581762855163804,
|
|
"learning_rate": 8.580842932335644e-06,
|
|
"loss": 2.3363120555877686,
|
|
"step": 784
|
|
},
|
|
{
|
|
"epoch": 0.9667487684729064,
|
|
"grad_norm": 8.025263413179824,
|
|
"learning_rate": 8.575837880073663e-06,
|
|
"loss": 2.452828884124756,
|
|
"step": 785
|
|
},
|
|
{
|
|
"epoch": 0.9679802955665024,
|
|
"grad_norm": 13.65572211743131,
|
|
"learning_rate": 8.57082548273402e-06,
|
|
"loss": 2.8182177543640137,
|
|
"step": 786
|
|
},
|
|
{
|
|
"epoch": 0.9692118226600985,
|
|
"grad_norm": 22.799475456448384,
|
|
"learning_rate": 8.565805750612607e-06,
|
|
"loss": 3.2871310710906982,
|
|
"step": 787
|
|
},
|
|
{
|
|
"epoch": 0.9704433497536946,
|
|
"grad_norm": 18.807286124868686,
|
|
"learning_rate": 8.560778694020387e-06,
|
|
"loss": 2.959153175354004,
|
|
"step": 788
|
|
},
|
|
{
|
|
"epoch": 0.9716748768472906,
|
|
"grad_norm": 10.644957881123116,
|
|
"learning_rate": 8.555744323283364e-06,
|
|
"loss": 2.859107732772827,
|
|
"step": 789
|
|
},
|
|
{
|
|
"epoch": 0.9729064039408867,
|
|
"grad_norm": 9.606245608690044,
|
|
"learning_rate": 8.550702648742566e-06,
|
|
"loss": 2.8537421226501465,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 0.9741379310344828,
|
|
"grad_norm": 11.364684038946328,
|
|
"learning_rate": 8.545653680754029e-06,
|
|
"loss": 2.77693772315979,
|
|
"step": 791
|
|
},
|
|
{
|
|
"epoch": 0.9753694581280788,
|
|
"grad_norm": 14.67534992412754,
|
|
"learning_rate": 8.540597429688761e-06,
|
|
"loss": 2.6960999965667725,
|
|
"step": 792
|
|
},
|
|
{
|
|
"epoch": 0.9766009852216748,
|
|
"grad_norm": 14.854511519014162,
|
|
"learning_rate": 8.535533905932739e-06,
|
|
"loss": 3.3942298889160156,
|
|
"step": 793
|
|
},
|
|
{
|
|
"epoch": 0.9778325123152709,
|
|
"grad_norm": 14.090660071520212,
|
|
"learning_rate": 8.530463119886871e-06,
|
|
"loss": 2.8664398193359375,
|
|
"step": 794
|
|
},
|
|
{
|
|
"epoch": 0.979064039408867,
|
|
"grad_norm": 15.427403822127253,
|
|
"learning_rate": 8.525385081966992e-06,
|
|
"loss": 3.023148536682129,
|
|
"step": 795
|
|
},
|
|
{
|
|
"epoch": 0.9802955665024631,
|
|
"grad_norm": 27.257958140053717,
|
|
"learning_rate": 8.520299802603826e-06,
|
|
"loss": 2.7858657836914062,
|
|
"step": 796
|
|
},
|
|
{
|
|
"epoch": 0.9815270935960592,
|
|
"grad_norm": 9.983005237782791,
|
|
"learning_rate": 8.515207292242969e-06,
|
|
"loss": 2.4665451049804688,
|
|
"step": 797
|
|
},
|
|
{
|
|
"epoch": 0.9827586206896551,
|
|
"grad_norm": 11.230050254551738,
|
|
"learning_rate": 8.510107561344876e-06,
|
|
"loss": 2.412269115447998,
|
|
"step": 798
|
|
},
|
|
{
|
|
"epoch": 0.9839901477832512,
|
|
"grad_norm": 18.314579409480903,
|
|
"learning_rate": 8.505000620384834e-06,
|
|
"loss": 3.08200740814209,
|
|
"step": 799
|
|
},
|
|
{
|
|
"epoch": 0.9852216748768473,
|
|
"grad_norm": 12.337382000838234,
|
|
"learning_rate": 8.499886479852935e-06,
|
|
"loss": 2.851126194000244,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.9864532019704434,
|
|
"grad_norm": 16.588814488060716,
|
|
"learning_rate": 8.494765150254063e-06,
|
|
"loss": 2.7692008018493652,
|
|
"step": 801
|
|
},
|
|
{
|
|
"epoch": 0.9876847290640394,
|
|
"grad_norm": 10.778667289136193,
|
|
"learning_rate": 8.489636642107867e-06,
|
|
"loss": 2.045649290084839,
|
|
"step": 802
|
|
},
|
|
{
|
|
"epoch": 0.9889162561576355,
|
|
"grad_norm": 16.235817598925898,
|
|
"learning_rate": 8.484500965948746e-06,
|
|
"loss": 3.0901870727539062,
|
|
"step": 803
|
|
},
|
|
{
|
|
"epoch": 0.9901477832512315,
|
|
"grad_norm": 12.772148604340376,
|
|
"learning_rate": 8.479358132325815e-06,
|
|
"loss": 4.652253150939941,
|
|
"step": 804
|
|
},
|
|
{
|
|
"epoch": 0.9913793103448276,
|
|
"grad_norm": 30.743685192648066,
|
|
"learning_rate": 8.474208151802898e-06,
|
|
"loss": 3.992189884185791,
|
|
"step": 805
|
|
},
|
|
{
|
|
"epoch": 0.9926108374384236,
|
|
"grad_norm": 8.73281768145785,
|
|
"learning_rate": 8.469051034958496e-06,
|
|
"loss": 2.7150464057922363,
|
|
"step": 806
|
|
},
|
|
{
|
|
"epoch": 0.9938423645320197,
|
|
"grad_norm": 9.053303002827397,
|
|
"learning_rate": 8.46388679238577e-06,
|
|
"loss": 2.807770013809204,
|
|
"step": 807
|
|
},
|
|
{
|
|
"epoch": 0.9950738916256158,
|
|
"grad_norm": 10.322870900342917,
|
|
"learning_rate": 8.458715434692515e-06,
|
|
"loss": 2.386625289916992,
|
|
"step": 808
|
|
},
|
|
{
|
|
"epoch": 0.9963054187192119,
|
|
"grad_norm": 11.08968761753187,
|
|
"learning_rate": 8.453536972501146e-06,
|
|
"loss": 2.585855484008789,
|
|
"step": 809
|
|
},
|
|
{
|
|
"epoch": 0.9975369458128078,
|
|
"grad_norm": 17.867602225530977,
|
|
"learning_rate": 8.448351416448664e-06,
|
|
"loss": 1.9756630659103394,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 0.9987684729064039,
|
|
"grad_norm": 10.119397987976452,
|
|
"learning_rate": 8.443158777186652e-06,
|
|
"loss": 2.844794511795044,
|
|
"step": 811
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"grad_norm": 7.980679156666685,
|
|
"learning_rate": 8.437959065381232e-06,
|
|
"loss": 2.8835721015930176,
|
|
"step": 812
|
|
},
|
|
{
|
|
"epoch": 1.001231527093596,
|
|
"grad_norm": 7.910274895398585,
|
|
"learning_rate": 8.432752291713058e-06,
|
|
"loss": 1.4173179864883423,
|
|
"step": 813
|
|
},
|
|
{
|
|
"epoch": 1.0024630541871922,
|
|
"grad_norm": 11.748384071481883,
|
|
"learning_rate": 8.427538466877294e-06,
|
|
"loss": 1.3743655681610107,
|
|
"step": 814
|
|
},
|
|
{
|
|
"epoch": 1.0036945812807883,
|
|
"grad_norm": 15.520903995356328,
|
|
"learning_rate": 8.422317601583576e-06,
|
|
"loss": 1.448968768119812,
|
|
"step": 815
|
|
},
|
|
{
|
|
"epoch": 1.0049261083743843,
|
|
"grad_norm": 10.900297712673185,
|
|
"learning_rate": 8.417089706556015e-06,
|
|
"loss": 1.4555410146713257,
|
|
"step": 816
|
|
},
|
|
{
|
|
"epoch": 1.0061576354679802,
|
|
"grad_norm": 14.944365989075473,
|
|
"learning_rate": 8.411854792533154e-06,
|
|
"loss": 1.3096075057983398,
|
|
"step": 817
|
|
},
|
|
{
|
|
"epoch": 1.0073891625615763,
|
|
"grad_norm": 28.47454569698464,
|
|
"learning_rate": 8.406612870267957e-06,
|
|
"loss": 1.8452348709106445,
|
|
"step": 818
|
|
},
|
|
{
|
|
"epoch": 1.0086206896551724,
|
|
"grad_norm": 15.756002610301957,
|
|
"learning_rate": 8.401363950527777e-06,
|
|
"loss": 1.6339285373687744,
|
|
"step": 819
|
|
},
|
|
{
|
|
"epoch": 1.0098522167487685,
|
|
"grad_norm": 6.289340790151406,
|
|
"learning_rate": 8.39610804409435e-06,
|
|
"loss": 1.714133381843567,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 1.0110837438423645,
|
|
"grad_norm": 11.713574774158978,
|
|
"learning_rate": 8.390845161763756e-06,
|
|
"loss": 1.7810550928115845,
|
|
"step": 821
|
|
},
|
|
{
|
|
"epoch": 1.0123152709359606,
|
|
"grad_norm": 13.688437053039554,
|
|
"learning_rate": 8.385575314346408e-06,
|
|
"loss": 1.2523250579833984,
|
|
"step": 822
|
|
},
|
|
{
|
|
"epoch": 1.0135467980295567,
|
|
"grad_norm": 9.835238587520983,
|
|
"learning_rate": 8.380298512667023e-06,
|
|
"loss": 1.4618515968322754,
|
|
"step": 823
|
|
},
|
|
{
|
|
"epoch": 1.0147783251231528,
|
|
"grad_norm": 12.580368500055666,
|
|
"learning_rate": 8.375014767564606e-06,
|
|
"loss": 1.5188508033752441,
|
|
"step": 824
|
|
},
|
|
{
|
|
"epoch": 1.0160098522167487,
|
|
"grad_norm": 13.76649655840591,
|
|
"learning_rate": 8.369724089892423e-06,
|
|
"loss": 1.3847301006317139,
|
|
"step": 825
|
|
},
|
|
{
|
|
"epoch": 1.0172413793103448,
|
|
"grad_norm": 10.435853268719002,
|
|
"learning_rate": 8.364426490517978e-06,
|
|
"loss": 1.2926149368286133,
|
|
"step": 826
|
|
},
|
|
{
|
|
"epoch": 1.0184729064039408,
|
|
"grad_norm": 16.445003227804108,
|
|
"learning_rate": 8.359121980322992e-06,
|
|
"loss": 2.3063907623291016,
|
|
"step": 827
|
|
},
|
|
{
|
|
"epoch": 1.019704433497537,
|
|
"grad_norm": 11.557235656795728,
|
|
"learning_rate": 8.353810570203392e-06,
|
|
"loss": 1.8268505334854126,
|
|
"step": 828
|
|
},
|
|
{
|
|
"epoch": 1.020935960591133,
|
|
"grad_norm": 14.632274264873946,
|
|
"learning_rate": 8.34849227106926e-06,
|
|
"loss": 1.7018903493881226,
|
|
"step": 829
|
|
},
|
|
{
|
|
"epoch": 1.022167487684729,
|
|
"grad_norm": 11.600489411721503,
|
|
"learning_rate": 8.343167093844847e-06,
|
|
"loss": 1.228044867515564,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 1.0233990147783252,
|
|
"grad_norm": 16.088239405853525,
|
|
"learning_rate": 8.337835049468517e-06,
|
|
"loss": 1.8953372240066528,
|
|
"step": 831
|
|
},
|
|
{
|
|
"epoch": 1.0246305418719213,
|
|
"grad_norm": 18.96191614490354,
|
|
"learning_rate": 8.332496148892748e-06,
|
|
"loss": 2.2595765590667725,
|
|
"step": 832
|
|
},
|
|
{
|
|
"epoch": 1.0258620689655173,
|
|
"grad_norm": 15.40920733163635,
|
|
"learning_rate": 8.327150403084105e-06,
|
|
"loss": 1.9772108793258667,
|
|
"step": 833
|
|
},
|
|
{
|
|
"epoch": 1.0270935960591132,
|
|
"grad_norm": 13.682030994380478,
|
|
"learning_rate": 8.321797823023201e-06,
|
|
"loss": 1.6397690773010254,
|
|
"step": 834
|
|
},
|
|
{
|
|
"epoch": 1.0283251231527093,
|
|
"grad_norm": 15.155038881668695,
|
|
"learning_rate": 8.3164384197047e-06,
|
|
"loss": 1.8092628717422485,
|
|
"step": 835
|
|
},
|
|
{
|
|
"epoch": 1.0295566502463054,
|
|
"grad_norm": 11.138568264810678,
|
|
"learning_rate": 8.311072204137272e-06,
|
|
"loss": 1.4974594116210938,
|
|
"step": 836
|
|
},
|
|
{
|
|
"epoch": 1.0307881773399015,
|
|
"grad_norm": 12.21109867389211,
|
|
"learning_rate": 8.305699187343586e-06,
|
|
"loss": 1.6198664903640747,
|
|
"step": 837
|
|
},
|
|
{
|
|
"epoch": 1.0320197044334976,
|
|
"grad_norm": 15.324750685835358,
|
|
"learning_rate": 8.300319380360278e-06,
|
|
"loss": 1.3746960163116455,
|
|
"step": 838
|
|
},
|
|
{
|
|
"epoch": 1.0332512315270936,
|
|
"grad_norm": 7.824249576144248,
|
|
"learning_rate": 8.294932794237936e-06,
|
|
"loss": 1.6171293258666992,
|
|
"step": 839
|
|
},
|
|
{
|
|
"epoch": 1.0344827586206897,
|
|
"grad_norm": 8.892333167572344,
|
|
"learning_rate": 8.289539440041066e-06,
|
|
"loss": 1.569738507270813,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 1.0357142857142858,
|
|
"grad_norm": 11.852198048161208,
|
|
"learning_rate": 8.284139328848083e-06,
|
|
"loss": 1.2823517322540283,
|
|
"step": 841
|
|
},
|
|
{
|
|
"epoch": 1.0369458128078817,
|
|
"grad_norm": 8.261136034676777,
|
|
"learning_rate": 8.278732471751275e-06,
|
|
"loss": 1.646303415298462,
|
|
"step": 842
|
|
},
|
|
{
|
|
"epoch": 1.0381773399014778,
|
|
"grad_norm": 10.756475200770923,
|
|
"learning_rate": 8.273318879856794e-06,
|
|
"loss": 1.1557375192642212,
|
|
"step": 843
|
|
},
|
|
{
|
|
"epoch": 1.0394088669950738,
|
|
"grad_norm": 11.706598803766697,
|
|
"learning_rate": 8.26789856428462e-06,
|
|
"loss": 1.8793773651123047,
|
|
"step": 844
|
|
},
|
|
{
|
|
"epoch": 1.04064039408867,
|
|
"grad_norm": 12.96726521358098,
|
|
"learning_rate": 8.262471536168547e-06,
|
|
"loss": 1.8577170372009277,
|
|
"step": 845
|
|
},
|
|
{
|
|
"epoch": 1.041871921182266,
|
|
"grad_norm": 9.437922676603566,
|
|
"learning_rate": 8.257037806656156e-06,
|
|
"loss": 1.6104650497436523,
|
|
"step": 846
|
|
},
|
|
{
|
|
"epoch": 1.043103448275862,
|
|
"grad_norm": 9.578661144979,
|
|
"learning_rate": 8.251597386908791e-06,
|
|
"loss": 1.5425922870635986,
|
|
"step": 847
|
|
},
|
|
{
|
|
"epoch": 1.0443349753694582,
|
|
"grad_norm": 20.263987667471525,
|
|
"learning_rate": 8.246150288101544e-06,
|
|
"loss": 1.681383728981018,
|
|
"step": 848
|
|
},
|
|
{
|
|
"epoch": 1.0455665024630543,
|
|
"grad_norm": 13.601576634163374,
|
|
"learning_rate": 8.240696521423221e-06,
|
|
"loss": 1.7646219730377197,
|
|
"step": 849
|
|
},
|
|
{
|
|
"epoch": 1.0467980295566504,
|
|
"grad_norm": 7.679649660703675,
|
|
"learning_rate": 8.23523609807633e-06,
|
|
"loss": 1.445223331451416,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 1.0480295566502462,
|
|
"grad_norm": 14.66829985016366,
|
|
"learning_rate": 8.229769029277044e-06,
|
|
"loss": 0.9492518901824951,
|
|
"step": 851
|
|
},
|
|
{
|
|
"epoch": 1.0492610837438423,
|
|
"grad_norm": 10.487758371701569,
|
|
"learning_rate": 8.224295326255194e-06,
|
|
"loss": 1.33433997631073,
|
|
"step": 852
|
|
},
|
|
{
|
|
"epoch": 1.0504926108374384,
|
|
"grad_norm": 10.533804685248148,
|
|
"learning_rate": 8.218815000254233e-06,
|
|
"loss": 1.712221384048462,
|
|
"step": 853
|
|
},
|
|
{
|
|
"epoch": 1.0517241379310345,
|
|
"grad_norm": 9.208819021387981,
|
|
"learning_rate": 8.213328062531223e-06,
|
|
"loss": 2.256254196166992,
|
|
"step": 854
|
|
},
|
|
{
|
|
"epoch": 1.0529556650246306,
|
|
"grad_norm": 20.4330836347585,
|
|
"learning_rate": 8.207834524356804e-06,
|
|
"loss": 1.1827871799468994,
|
|
"step": 855
|
|
},
|
|
{
|
|
"epoch": 1.0541871921182266,
|
|
"grad_norm": 16.459676535775454,
|
|
"learning_rate": 8.202334397015173e-06,
|
|
"loss": 1.831944465637207,
|
|
"step": 856
|
|
},
|
|
{
|
|
"epoch": 1.0554187192118227,
|
|
"grad_norm": 9.540607740889314,
|
|
"learning_rate": 8.196827691804066e-06,
|
|
"loss": 1.4239716529846191,
|
|
"step": 857
|
|
},
|
|
{
|
|
"epoch": 1.0566502463054188,
|
|
"grad_norm": 8.826612392912715,
|
|
"learning_rate": 8.191314420034728e-06,
|
|
"loss": 1.4468379020690918,
|
|
"step": 858
|
|
},
|
|
{
|
|
"epoch": 1.0578817733990147,
|
|
"grad_norm": 11.710928299860754,
|
|
"learning_rate": 8.185794593031889e-06,
|
|
"loss": 1.5082018375396729,
|
|
"step": 859
|
|
},
|
|
{
|
|
"epoch": 1.0591133004926108,
|
|
"grad_norm": 11.098469341339896,
|
|
"learning_rate": 8.180268222133748e-06,
|
|
"loss": 1.7838118076324463,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 1.0603448275862069,
|
|
"grad_norm": 14.517325254327519,
|
|
"learning_rate": 8.174735318691946e-06,
|
|
"loss": 2.0072226524353027,
|
|
"step": 861
|
|
},
|
|
{
|
|
"epoch": 1.061576354679803,
|
|
"grad_norm": 15.816554295123568,
|
|
"learning_rate": 8.16919589407154e-06,
|
|
"loss": 1.521295189857483,
|
|
"step": 862
|
|
},
|
|
{
|
|
"epoch": 1.062807881773399,
|
|
"grad_norm": 10.07588615463877,
|
|
"learning_rate": 8.163649959650983e-06,
|
|
"loss": 1.790357232093811,
|
|
"step": 863
|
|
},
|
|
{
|
|
"epoch": 1.064039408866995,
|
|
"grad_norm": 12.92318973646725,
|
|
"learning_rate": 8.1580975268221e-06,
|
|
"loss": 1.602294683456421,
|
|
"step": 864
|
|
},
|
|
{
|
|
"epoch": 1.0652709359605912,
|
|
"grad_norm": 16.86268483373184,
|
|
"learning_rate": 8.152538606990065e-06,
|
|
"loss": 1.4220796823501587,
|
|
"step": 865
|
|
},
|
|
{
|
|
"epoch": 1.0665024630541873,
|
|
"grad_norm": 8.194415784575718,
|
|
"learning_rate": 8.146973211573378e-06,
|
|
"loss": 1.5728261470794678,
|
|
"step": 866
|
|
},
|
|
{
|
|
"epoch": 1.0677339901477834,
|
|
"grad_norm": 9.338981810977407,
|
|
"learning_rate": 8.141401352003834e-06,
|
|
"loss": 1.4759845733642578,
|
|
"step": 867
|
|
},
|
|
{
|
|
"epoch": 1.0689655172413792,
|
|
"grad_norm": 13.09579029321424,
|
|
"learning_rate": 8.135823039726513e-06,
|
|
"loss": 1.0524405241012573,
|
|
"step": 868
|
|
},
|
|
{
|
|
"epoch": 1.0701970443349753,
|
|
"grad_norm": 11.844876838448121,
|
|
"learning_rate": 8.130238286199747e-06,
|
|
"loss": 1.538460373878479,
|
|
"step": 869
|
|
},
|
|
{
|
|
"epoch": 1.0714285714285714,
|
|
"grad_norm": 14.772231246122598,
|
|
"learning_rate": 8.124647102895098e-06,
|
|
"loss": 1.1455146074295044,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 1.0726600985221675,
|
|
"grad_norm": 6.428068633502984,
|
|
"learning_rate": 8.119049501297336e-06,
|
|
"loss": 1.5209722518920898,
|
|
"step": 871
|
|
},
|
|
{
|
|
"epoch": 1.0738916256157636,
|
|
"grad_norm": 8.28556104097166,
|
|
"learning_rate": 8.113445492904416e-06,
|
|
"loss": 1.359959602355957,
|
|
"step": 872
|
|
},
|
|
{
|
|
"epoch": 1.0751231527093597,
|
|
"grad_norm": 17.73488508571987,
|
|
"learning_rate": 8.107835089227446e-06,
|
|
"loss": 0.7508935928344727,
|
|
"step": 873
|
|
},
|
|
{
|
|
"epoch": 1.0763546798029557,
|
|
"grad_norm": 11.851747710913228,
|
|
"learning_rate": 8.102218301790686e-06,
|
|
"loss": 1.1200660467147827,
|
|
"step": 874
|
|
},
|
|
{
|
|
"epoch": 1.0775862068965518,
|
|
"grad_norm": 19.474238137735632,
|
|
"learning_rate": 8.096595142131491e-06,
|
|
"loss": 1.4502555131912231,
|
|
"step": 875
|
|
},
|
|
{
|
|
"epoch": 1.0788177339901477,
|
|
"grad_norm": 15.231876740076657,
|
|
"learning_rate": 8.090965621800317e-06,
|
|
"loss": 1.4533472061157227,
|
|
"step": 876
|
|
},
|
|
{
|
|
"epoch": 1.0800492610837438,
|
|
"grad_norm": 11.532100577512736,
|
|
"learning_rate": 8.085329752360683e-06,
|
|
"loss": 1.3467981815338135,
|
|
"step": 877
|
|
},
|
|
{
|
|
"epoch": 1.0812807881773399,
|
|
"grad_norm": 13.292362259628844,
|
|
"learning_rate": 8.079687545389144e-06,
|
|
"loss": 1.5720915794372559,
|
|
"step": 878
|
|
},
|
|
{
|
|
"epoch": 1.082512315270936,
|
|
"grad_norm": 9.912980730028881,
|
|
"learning_rate": 8.074039012475277e-06,
|
|
"loss": 0.9794504642486572,
|
|
"step": 879
|
|
},
|
|
{
|
|
"epoch": 1.083743842364532,
|
|
"grad_norm": 13.363222552608596,
|
|
"learning_rate": 8.068384165221657e-06,
|
|
"loss": 1.8581080436706543,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 1.0849753694581281,
|
|
"grad_norm": 11.004102766432679,
|
|
"learning_rate": 8.062723015243821e-06,
|
|
"loss": 1.5307658910751343,
|
|
"step": 881
|
|
},
|
|
{
|
|
"epoch": 1.0862068965517242,
|
|
"grad_norm": 18.014628524050508,
|
|
"learning_rate": 8.05705557417026e-06,
|
|
"loss": 2.7890782356262207,
|
|
"step": 882
|
|
},
|
|
{
|
|
"epoch": 1.0874384236453203,
|
|
"grad_norm": 14.288061386453462,
|
|
"learning_rate": 8.051381853642385e-06,
|
|
"loss": 1.7938904762268066,
|
|
"step": 883
|
|
},
|
|
{
|
|
"epoch": 1.0886699507389164,
|
|
"grad_norm": 10.969422494881371,
|
|
"learning_rate": 8.0457018653145e-06,
|
|
"loss": 1.7228388786315918,
|
|
"step": 884
|
|
},
|
|
{
|
|
"epoch": 1.0899014778325122,
|
|
"grad_norm": 12.323796763628843,
|
|
"learning_rate": 8.04001562085379e-06,
|
|
"loss": 1.2761911153793335,
|
|
"step": 885
|
|
},
|
|
{
|
|
"epoch": 1.0911330049261083,
|
|
"grad_norm": 14.027385869484647,
|
|
"learning_rate": 8.034323131940288e-06,
|
|
"loss": 1.2001762390136719,
|
|
"step": 886
|
|
},
|
|
{
|
|
"epoch": 1.0923645320197044,
|
|
"grad_norm": 14.618738176876956,
|
|
"learning_rate": 8.028624410266856e-06,
|
|
"loss": 1.0602792501449585,
|
|
"step": 887
|
|
},
|
|
{
|
|
"epoch": 1.0935960591133005,
|
|
"grad_norm": 11.93157233511751,
|
|
"learning_rate": 8.022919467539157e-06,
|
|
"loss": 1.6093053817749023,
|
|
"step": 888
|
|
},
|
|
{
|
|
"epoch": 1.0948275862068966,
|
|
"grad_norm": 10.808992515441345,
|
|
"learning_rate": 8.017208315475633e-06,
|
|
"loss": 1.3845837116241455,
|
|
"step": 889
|
|
},
|
|
{
|
|
"epoch": 1.0960591133004927,
|
|
"grad_norm": 12.467752533525676,
|
|
"learning_rate": 8.011490965807479e-06,
|
|
"loss": 1.170523762702942,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 1.0972906403940887,
|
|
"grad_norm": 17.336013797078692,
|
|
"learning_rate": 8.005767430278619e-06,
|
|
"loss": 2.2524640560150146,
|
|
"step": 891
|
|
},
|
|
{
|
|
"epoch": 1.0985221674876848,
|
|
"grad_norm": 15.86628802074285,
|
|
"learning_rate": 8.00003772064569e-06,
|
|
"loss": 1.900492787361145,
|
|
"step": 892
|
|
},
|
|
{
|
|
"epoch": 1.0997536945812807,
|
|
"grad_norm": 19.413325130840665,
|
|
"learning_rate": 7.994301848678006e-06,
|
|
"loss": 1.9371180534362793,
|
|
"step": 893
|
|
},
|
|
{
|
|
"epoch": 1.1009852216748768,
|
|
"grad_norm": 4.577148785717797,
|
|
"learning_rate": 7.98855982615754e-06,
|
|
"loss": 0.5737314224243164,
|
|
"step": 894
|
|
},
|
|
{
|
|
"epoch": 1.1022167487684729,
|
|
"grad_norm": 10.864604119199031,
|
|
"learning_rate": 7.982811664878897e-06,
|
|
"loss": 1.9806501865386963,
|
|
"step": 895
|
|
},
|
|
{
|
|
"epoch": 1.103448275862069,
|
|
"grad_norm": 8.224536911257772,
|
|
"learning_rate": 7.977057376649295e-06,
|
|
"loss": 1.0362755060195923,
|
|
"step": 896
|
|
},
|
|
{
|
|
"epoch": 1.104679802955665,
|
|
"grad_norm": 13.847190655637428,
|
|
"learning_rate": 7.971296973288534e-06,
|
|
"loss": 1.70633864402771,
|
|
"step": 897
|
|
},
|
|
{
|
|
"epoch": 1.1059113300492611,
|
|
"grad_norm": 11.90483842365472,
|
|
"learning_rate": 7.965530466628977e-06,
|
|
"loss": 1.787100911140442,
|
|
"step": 898
|
|
},
|
|
{
|
|
"epoch": 1.1071428571428572,
|
|
"grad_norm": 7.493522717607931,
|
|
"learning_rate": 7.959757868515526e-06,
|
|
"loss": 1.725630283355713,
|
|
"step": 899
|
|
},
|
|
{
|
|
"epoch": 1.1083743842364533,
|
|
"grad_norm": 12.386314393672189,
|
|
"learning_rate": 7.953979190805587e-06,
|
|
"loss": 1.216347575187683,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 1.1096059113300494,
|
|
"grad_norm": 13.629660364524488,
|
|
"learning_rate": 7.948194445369065e-06,
|
|
"loss": 1.4683033227920532,
|
|
"step": 901
|
|
},
|
|
{
|
|
"epoch": 1.1108374384236452,
|
|
"grad_norm": 9.487923792239608,
|
|
"learning_rate": 7.942403644088319e-06,
|
|
"loss": 1.1516010761260986,
|
|
"step": 902
|
|
},
|
|
{
|
|
"epoch": 1.1120689655172413,
|
|
"grad_norm": 10.340810165841779,
|
|
"learning_rate": 7.936606798858154e-06,
|
|
"loss": 1.9040346145629883,
|
|
"step": 903
|
|
},
|
|
{
|
|
"epoch": 1.1133004926108374,
|
|
"grad_norm": 10.742162155829218,
|
|
"learning_rate": 7.930803921585787e-06,
|
|
"loss": 1.3092480897903442,
|
|
"step": 904
|
|
},
|
|
{
|
|
"epoch": 1.1145320197044335,
|
|
"grad_norm": 16.471340717748625,
|
|
"learning_rate": 7.924995024190825e-06,
|
|
"loss": 1.5384130477905273,
|
|
"step": 905
|
|
},
|
|
{
|
|
"epoch": 1.1157635467980296,
|
|
"grad_norm": 11.414793353837775,
|
|
"learning_rate": 7.91918011860524e-06,
|
|
"loss": 1.537634015083313,
|
|
"step": 906
|
|
},
|
|
{
|
|
"epoch": 1.1169950738916257,
|
|
"grad_norm": 12.176064899819426,
|
|
"learning_rate": 7.91335921677335e-06,
|
|
"loss": 1.7487473487854004,
|
|
"step": 907
|
|
},
|
|
{
|
|
"epoch": 1.1182266009852218,
|
|
"grad_norm": 12.781345279460623,
|
|
"learning_rate": 7.907532330651784e-06,
|
|
"loss": 2.079786539077759,
|
|
"step": 908
|
|
},
|
|
{
|
|
"epoch": 1.1194581280788178,
|
|
"grad_norm": 10.30058954805613,
|
|
"learning_rate": 7.901699472209467e-06,
|
|
"loss": 1.8143104314804077,
|
|
"step": 909
|
|
},
|
|
{
|
|
"epoch": 1.1206896551724137,
|
|
"grad_norm": 15.820572235657158,
|
|
"learning_rate": 7.89586065342759e-06,
|
|
"loss": 1.532914161682129,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 1.1219211822660098,
|
|
"grad_norm": 26.078680608781927,
|
|
"learning_rate": 7.890015886299587e-06,
|
|
"loss": 1.2643623352050781,
|
|
"step": 911
|
|
},
|
|
{
|
|
"epoch": 1.1231527093596059,
|
|
"grad_norm": 15.92927259283418,
|
|
"learning_rate": 7.884165182831112e-06,
|
|
"loss": 1.9245643615722656,
|
|
"step": 912
|
|
},
|
|
{
|
|
"epoch": 1.124384236453202,
|
|
"grad_norm": 8.730585299979154,
|
|
"learning_rate": 7.878308555040012e-06,
|
|
"loss": 1.7177766561508179,
|
|
"step": 913
|
|
},
|
|
{
|
|
"epoch": 1.125615763546798,
|
|
"grad_norm": 13.722962990198047,
|
|
"learning_rate": 7.872446014956302e-06,
|
|
"loss": 1.8152745962142944,
|
|
"step": 914
|
|
},
|
|
{
|
|
"epoch": 1.1268472906403941,
|
|
"grad_norm": 12.040054937289696,
|
|
"learning_rate": 7.86657757462214e-06,
|
|
"loss": 1.1599400043487549,
|
|
"step": 915
|
|
},
|
|
{
|
|
"epoch": 1.1280788177339902,
|
|
"grad_norm": 17.03991328119548,
|
|
"learning_rate": 7.860703246091808e-06,
|
|
"loss": 2.191415786743164,
|
|
"step": 916
|
|
},
|
|
{
|
|
"epoch": 1.1293103448275863,
|
|
"grad_norm": 8.884816055359531,
|
|
"learning_rate": 7.85482304143168e-06,
|
|
"loss": 1.395401120185852,
|
|
"step": 917
|
|
},
|
|
{
|
|
"epoch": 1.1305418719211824,
|
|
"grad_norm": 10.016142876641439,
|
|
"learning_rate": 7.848936972720203e-06,
|
|
"loss": 1.3161064386367798,
|
|
"step": 918
|
|
},
|
|
{
|
|
"epoch": 1.1317733990147782,
|
|
"grad_norm": 10.950651931490869,
|
|
"learning_rate": 7.843045052047863e-06,
|
|
"loss": 1.1442368030548096,
|
|
"step": 919
|
|
},
|
|
{
|
|
"epoch": 1.1330049261083743,
|
|
"grad_norm": 11.684566217639523,
|
|
"learning_rate": 7.837147291517172e-06,
|
|
"loss": 1.7718126773834229,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 1.1342364532019704,
|
|
"grad_norm": 38.19632435773612,
|
|
"learning_rate": 7.831243703242636e-06,
|
|
"loss": 0.8722761869430542,
|
|
"step": 921
|
|
},
|
|
{
|
|
"epoch": 1.1354679802955665,
|
|
"grad_norm": 13.481663274756508,
|
|
"learning_rate": 7.825334299350733e-06,
|
|
"loss": 1.5427806377410889,
|
|
"step": 922
|
|
},
|
|
{
|
|
"epoch": 1.1366995073891626,
|
|
"grad_norm": 12.916623808621747,
|
|
"learning_rate": 7.819419091979884e-06,
|
|
"loss": 1.1668936014175415,
|
|
"step": 923
|
|
},
|
|
{
|
|
"epoch": 1.1379310344827587,
|
|
"grad_norm": 33.988394562573184,
|
|
"learning_rate": 7.813498093280432e-06,
|
|
"loss": 1.1266424655914307,
|
|
"step": 924
|
|
},
|
|
{
|
|
"epoch": 1.1391625615763548,
|
|
"grad_norm": 12.20456485780647,
|
|
"learning_rate": 7.807571315414616e-06,
|
|
"loss": 1.493699550628662,
|
|
"step": 925
|
|
},
|
|
{
|
|
"epoch": 1.1403940886699506,
|
|
"grad_norm": 11.501099824006364,
|
|
"learning_rate": 7.801638770556547e-06,
|
|
"loss": 1.6297705173492432,
|
|
"step": 926
|
|
},
|
|
{
|
|
"epoch": 1.1416256157635467,
|
|
"grad_norm": 15.624448888450939,
|
|
"learning_rate": 7.795700470892177e-06,
|
|
"loss": 2.0215024948120117,
|
|
"step": 927
|
|
},
|
|
{
|
|
"epoch": 1.1428571428571428,
|
|
"grad_norm": 16.250949070025708,
|
|
"learning_rate": 7.78975642861929e-06,
|
|
"loss": 1.6887433528900146,
|
|
"step": 928
|
|
},
|
|
{
|
|
"epoch": 1.1440886699507389,
|
|
"grad_norm": 11.317008900299918,
|
|
"learning_rate": 7.783806655947454e-06,
|
|
"loss": 1.3021103143692017,
|
|
"step": 929
|
|
},
|
|
{
|
|
"epoch": 1.145320197044335,
|
|
"grad_norm": 18.00432398689311,
|
|
"learning_rate": 7.777851165098012e-06,
|
|
"loss": 1.2565847635269165,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 1.146551724137931,
|
|
"grad_norm": 12.425268826770786,
|
|
"learning_rate": 7.771889968304054e-06,
|
|
"loss": 2.616732358932495,
|
|
"step": 931
|
|
},
|
|
{
|
|
"epoch": 1.1477832512315271,
|
|
"grad_norm": 8.224670550968264,
|
|
"learning_rate": 7.765923077810389e-06,
|
|
"loss": 1.4130675792694092,
|
|
"step": 932
|
|
},
|
|
{
|
|
"epoch": 1.1490147783251232,
|
|
"grad_norm": 10.969684493935905,
|
|
"learning_rate": 7.759950505873523e-06,
|
|
"loss": 1.4476386308670044,
|
|
"step": 933
|
|
},
|
|
{
|
|
"epoch": 1.1502463054187193,
|
|
"grad_norm": 11.651048950094761,
|
|
"learning_rate": 7.753972264761629e-06,
|
|
"loss": 2.25156307220459,
|
|
"step": 934
|
|
},
|
|
{
|
|
"epoch": 1.1514778325123154,
|
|
"grad_norm": 8.613574530576384,
|
|
"learning_rate": 7.747988366754529e-06,
|
|
"loss": 1.5051602125167847,
|
|
"step": 935
|
|
},
|
|
{
|
|
"epoch": 1.1527093596059113,
|
|
"grad_norm": 7.732488282674765,
|
|
"learning_rate": 7.74199882414366e-06,
|
|
"loss": 1.6275739669799805,
|
|
"step": 936
|
|
},
|
|
{
|
|
"epoch": 1.1539408866995073,
|
|
"grad_norm": 8.808852629450387,
|
|
"learning_rate": 7.736003649232058e-06,
|
|
"loss": 1.595947504043579,
|
|
"step": 937
|
|
},
|
|
{
|
|
"epoch": 1.1551724137931034,
|
|
"grad_norm": 9.458208308368622,
|
|
"learning_rate": 7.730002854334328e-06,
|
|
"loss": 1.4467124938964844,
|
|
"step": 938
|
|
},
|
|
{
|
|
"epoch": 1.1564039408866995,
|
|
"grad_norm": 9.214195809195965,
|
|
"learning_rate": 7.723996451776615e-06,
|
|
"loss": 1.2888911962509155,
|
|
"step": 939
|
|
},
|
|
{
|
|
"epoch": 1.1576354679802956,
|
|
"grad_norm": 9.788392349003187,
|
|
"learning_rate": 7.717984453896585e-06,
|
|
"loss": 1.2005081176757812,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 1.1588669950738917,
|
|
"grad_norm": 13.47176609715776,
|
|
"learning_rate": 7.711966873043396e-06,
|
|
"loss": 1.5737872123718262,
|
|
"step": 941
|
|
},
|
|
{
|
|
"epoch": 1.1600985221674878,
|
|
"grad_norm": 14.995704151739991,
|
|
"learning_rate": 7.705943721577679e-06,
|
|
"loss": 1.929309368133545,
|
|
"step": 942
|
|
},
|
|
{
|
|
"epoch": 1.1613300492610836,
|
|
"grad_norm": 17.48600802078703,
|
|
"learning_rate": 7.699915011871502e-06,
|
|
"loss": 1.2395710945129395,
|
|
"step": 943
|
|
},
|
|
{
|
|
"epoch": 1.1625615763546797,
|
|
"grad_norm": 17.02963003158409,
|
|
"learning_rate": 7.693880756308349e-06,
|
|
"loss": 1.5058845281600952,
|
|
"step": 944
|
|
},
|
|
{
|
|
"epoch": 1.1637931034482758,
|
|
"grad_norm": 9.980347268918823,
|
|
"learning_rate": 7.687840967283102e-06,
|
|
"loss": 1.1811325550079346,
|
|
"step": 945
|
|
},
|
|
{
|
|
"epoch": 1.1650246305418719,
|
|
"grad_norm": 10.638678008803145,
|
|
"learning_rate": 7.681795657202004e-06,
|
|
"loss": 1.0631262063980103,
|
|
"step": 946
|
|
},
|
|
{
|
|
"epoch": 1.166256157635468,
|
|
"grad_norm": 13.280226823401785,
|
|
"learning_rate": 7.675744838482641e-06,
|
|
"loss": 1.8445112705230713,
|
|
"step": 947
|
|
},
|
|
{
|
|
"epoch": 1.167487684729064,
|
|
"grad_norm": 14.581956189852988,
|
|
"learning_rate": 7.669688523553913e-06,
|
|
"loss": 0.4735199511051178,
|
|
"step": 948
|
|
},
|
|
{
|
|
"epoch": 1.1687192118226601,
|
|
"grad_norm": 17.412681962110952,
|
|
"learning_rate": 7.66362672485601e-06,
|
|
"loss": 2.7862026691436768,
|
|
"step": 949
|
|
},
|
|
{
|
|
"epoch": 1.1699507389162562,
|
|
"grad_norm": 15.996981867868751,
|
|
"learning_rate": 7.657559454840386e-06,
|
|
"loss": 2.1690142154693604,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 1.1711822660098523,
|
|
"grad_norm": 13.46492564795987,
|
|
"learning_rate": 7.651486725969736e-06,
|
|
"loss": 1.7143161296844482,
|
|
"step": 951
|
|
},
|
|
{
|
|
"epoch": 1.1724137931034484,
|
|
"grad_norm": 60.546763405202356,
|
|
"learning_rate": 7.645408550717966e-06,
|
|
"loss": 1.5288606882095337,
|
|
"step": 952
|
|
},
|
|
{
|
|
"epoch": 1.1736453201970443,
|
|
"grad_norm": 20.830833617022666,
|
|
"learning_rate": 7.639324941570165e-06,
|
|
"loss": 1.8929002285003662,
|
|
"step": 953
|
|
},
|
|
{
|
|
"epoch": 1.1748768472906403,
|
|
"grad_norm": 11.758979912185547,
|
|
"learning_rate": 7.633235911022592e-06,
|
|
"loss": 1.5853391885757446,
|
|
"step": 954
|
|
},
|
|
{
|
|
"epoch": 1.1761083743842364,
|
|
"grad_norm": 9.321138258104417,
|
|
"learning_rate": 7.627141471582635e-06,
|
|
"loss": 1.1136324405670166,
|
|
"step": 955
|
|
},
|
|
{
|
|
"epoch": 1.1773399014778325,
|
|
"grad_norm": 12.598497007373025,
|
|
"learning_rate": 7.6210416357687975e-06,
|
|
"loss": 1.868667721748352,
|
|
"step": 956
|
|
},
|
|
{
|
|
"epoch": 1.1785714285714286,
|
|
"grad_norm": 18.119098704002848,
|
|
"learning_rate": 7.614936416110668e-06,
|
|
"loss": 1.5594688653945923,
|
|
"step": 957
|
|
},
|
|
{
|
|
"epoch": 1.1798029556650247,
|
|
"grad_norm": 12.510268205050629,
|
|
"learning_rate": 7.6088258251488845e-06,
|
|
"loss": 2.3145830631256104,
|
|
"step": 958
|
|
},
|
|
{
|
|
"epoch": 1.1810344827586208,
|
|
"grad_norm": 21.45877658729593,
|
|
"learning_rate": 7.6027098754351306e-06,
|
|
"loss": 1.1473604440689087,
|
|
"step": 959
|
|
},
|
|
{
|
|
"epoch": 1.1822660098522166,
|
|
"grad_norm": 14.411977842812997,
|
|
"learning_rate": 7.596588579532087e-06,
|
|
"loss": 2.2835638523101807,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 1.1834975369458127,
|
|
"grad_norm": 10.612962818159787,
|
|
"learning_rate": 7.590461950013424e-06,
|
|
"loss": 1.8787577152252197,
|
|
"step": 961
|
|
},
|
|
{
|
|
"epoch": 1.1847290640394088,
|
|
"grad_norm": 14.448843378652771,
|
|
"learning_rate": 7.584329999463763e-06,
|
|
"loss": 2.114804983139038,
|
|
"step": 962
|
|
},
|
|
{
|
|
"epoch": 1.185960591133005,
|
|
"grad_norm": 18.66312529631292,
|
|
"learning_rate": 7.578192740478656e-06,
|
|
"loss": 1.288927435874939,
|
|
"step": 963
|
|
},
|
|
{
|
|
"epoch": 1.187192118226601,
|
|
"grad_norm": 13.413800953526167,
|
|
"learning_rate": 7.572050185664558e-06,
|
|
"loss": 1.929607629776001,
|
|
"step": 964
|
|
},
|
|
{
|
|
"epoch": 1.188423645320197,
|
|
"grad_norm": 33.30553598268168,
|
|
"learning_rate": 7.565902347638806e-06,
|
|
"loss": 0.5397343039512634,
|
|
"step": 965
|
|
},
|
|
{
|
|
"epoch": 1.1896551724137931,
|
|
"grad_norm": 22.357001178408265,
|
|
"learning_rate": 7.559749239029584e-06,
|
|
"loss": 1.1908174753189087,
|
|
"step": 966
|
|
},
|
|
{
|
|
"epoch": 1.1908866995073892,
|
|
"grad_norm": 12.645033432851402,
|
|
"learning_rate": 7.553590872475909e-06,
|
|
"loss": 1.624518632888794,
|
|
"step": 967
|
|
},
|
|
{
|
|
"epoch": 1.1921182266009853,
|
|
"grad_norm": 7.88579724345472,
|
|
"learning_rate": 7.547427260627586e-06,
|
|
"loss": 1.3011376857757568,
|
|
"step": 968
|
|
},
|
|
{
|
|
"epoch": 1.1933497536945814,
|
|
"grad_norm": 12.668296763355277,
|
|
"learning_rate": 7.541258416145212e-06,
|
|
"loss": 1.2930490970611572,
|
|
"step": 969
|
|
},
|
|
{
|
|
"epoch": 1.1945812807881773,
|
|
"grad_norm": 13.656364437533624,
|
|
"learning_rate": 7.535084351700117e-06,
|
|
"loss": 1.34272038936615,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 1.1958128078817734,
|
|
"grad_norm": 7.953764967047039,
|
|
"learning_rate": 7.528905079974358e-06,
|
|
"loss": 1.2804269790649414,
|
|
"step": 971
|
|
},
|
|
{
|
|
"epoch": 1.1970443349753694,
|
|
"grad_norm": 30.30009152991955,
|
|
"learning_rate": 7.522720613660691e-06,
|
|
"loss": 1.7138396501541138,
|
|
"step": 972
|
|
},
|
|
{
|
|
"epoch": 1.1982758620689655,
|
|
"grad_norm": 11.304720421109014,
|
|
"learning_rate": 7.5165309654625405e-06,
|
|
"loss": 1.7358574867248535,
|
|
"step": 973
|
|
},
|
|
{
|
|
"epoch": 1.1995073891625616,
|
|
"grad_norm": 12.764936977199811,
|
|
"learning_rate": 7.510336148093975e-06,
|
|
"loss": 1.0514552593231201,
|
|
"step": 974
|
|
},
|
|
{
|
|
"epoch": 1.2007389162561577,
|
|
"grad_norm": 13.712017805285841,
|
|
"learning_rate": 7.504136174279679e-06,
|
|
"loss": 1.7314313650131226,
|
|
"step": 975
|
|
},
|
|
{
|
|
"epoch": 1.2019704433497538,
|
|
"grad_norm": 10.549295388514395,
|
|
"learning_rate": 7.4979310567549315e-06,
|
|
"loss": 1.0069202184677124,
|
|
"step": 976
|
|
},
|
|
{
|
|
"epoch": 1.2032019704433496,
|
|
"grad_norm": 11.995004609846932,
|
|
"learning_rate": 7.491720808265576e-06,
|
|
"loss": 1.1851680278778076,
|
|
"step": 977
|
|
},
|
|
{
|
|
"epoch": 1.2044334975369457,
|
|
"grad_norm": 9.145447142909285,
|
|
"learning_rate": 7.485505441567995e-06,
|
|
"loss": 1.355776309967041,
|
|
"step": 978
|
|
},
|
|
{
|
|
"epoch": 1.2056650246305418,
|
|
"grad_norm": 12.426586307445273,
|
|
"learning_rate": 7.4792849694290846e-06,
|
|
"loss": 1.5034677982330322,
|
|
"step": 979
|
|
},
|
|
{
|
|
"epoch": 1.206896551724138,
|
|
"grad_norm": 10.349726791509415,
|
|
"learning_rate": 7.473059404626229e-06,
|
|
"loss": 1.9321900606155396,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 1.208128078817734,
|
|
"grad_norm": 15.998756607416226,
|
|
"learning_rate": 7.466828759947271e-06,
|
|
"loss": 1.4899095296859741,
|
|
"step": 981
|
|
},
|
|
{
|
|
"epoch": 1.20935960591133,
|
|
"grad_norm": 9.148483453369403,
|
|
"learning_rate": 7.46059304819049e-06,
|
|
"loss": 1.9984737634658813,
|
|
"step": 982
|
|
},
|
|
{
|
|
"epoch": 1.2105911330049262,
|
|
"grad_norm": 14.110455851158502,
|
|
"learning_rate": 7.454352282164572e-06,
|
|
"loss": 1.7756625413894653,
|
|
"step": 983
|
|
},
|
|
{
|
|
"epoch": 1.2118226600985222,
|
|
"grad_norm": 14.856359846911952,
|
|
"learning_rate": 7.448106474688588e-06,
|
|
"loss": 1.47117018699646,
|
|
"step": 984
|
|
},
|
|
{
|
|
"epoch": 1.2130541871921183,
|
|
"grad_norm": 11.010014718420686,
|
|
"learning_rate": 7.441855638591958e-06,
|
|
"loss": 1.3485603332519531,
|
|
"step": 985
|
|
},
|
|
{
|
|
"epoch": 1.2142857142857142,
|
|
"grad_norm": 9.111669104291623,
|
|
"learning_rate": 7.435599786714438e-06,
|
|
"loss": 1.3982055187225342,
|
|
"step": 986
|
|
},
|
|
{
|
|
"epoch": 1.2155172413793103,
|
|
"grad_norm": 8.494506145789243,
|
|
"learning_rate": 7.429338931906085e-06,
|
|
"loss": 1.4942795038223267,
|
|
"step": 987
|
|
},
|
|
{
|
|
"epoch": 1.2167487684729064,
|
|
"grad_norm": 10.475857134873458,
|
|
"learning_rate": 7.423073087027228e-06,
|
|
"loss": 2.227587938308716,
|
|
"step": 988
|
|
},
|
|
{
|
|
"epoch": 1.2179802955665024,
|
|
"grad_norm": 14.131512244457296,
|
|
"learning_rate": 7.416802264948455e-06,
|
|
"loss": 1.523234486579895,
|
|
"step": 989
|
|
},
|
|
{
|
|
"epoch": 1.2192118226600985,
|
|
"grad_norm": 26.011485441346537,
|
|
"learning_rate": 7.410526478550568e-06,
|
|
"loss": 3.9873814582824707,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 1.2204433497536946,
|
|
"grad_norm": 8.306933788704631,
|
|
"learning_rate": 7.404245740724573e-06,
|
|
"loss": 1.279615044593811,
|
|
"step": 991
|
|
},
|
|
{
|
|
"epoch": 1.2216748768472907,
|
|
"grad_norm": 9.109406755351628,
|
|
"learning_rate": 7.3979600643716435e-06,
|
|
"loss": 0.9347010850906372,
|
|
"step": 992
|
|
},
|
|
{
|
|
"epoch": 1.2229064039408868,
|
|
"grad_norm": 8.57513677802596,
|
|
"learning_rate": 7.391669462403096e-06,
|
|
"loss": 1.9017002582550049,
|
|
"step": 993
|
|
},
|
|
{
|
|
"epoch": 1.2241379310344827,
|
|
"grad_norm": 10.325069084719962,
|
|
"learning_rate": 7.385373947740369e-06,
|
|
"loss": 1.7247897386550903,
|
|
"step": 994
|
|
},
|
|
{
|
|
"epoch": 1.2253694581280787,
|
|
"grad_norm": 13.648497855444653,
|
|
"learning_rate": 7.379073533314988e-06,
|
|
"loss": 0.7111251950263977,
|
|
"step": 995
|
|
},
|
|
{
|
|
"epoch": 1.2266009852216748,
|
|
"grad_norm": 10.812707758109589,
|
|
"learning_rate": 7.372768232068544e-06,
|
|
"loss": 0.9086591601371765,
|
|
"step": 996
|
|
},
|
|
{
|
|
"epoch": 1.227832512315271,
|
|
"grad_norm": 11.1413160950967,
|
|
"learning_rate": 7.366458056952668e-06,
|
|
"loss": 1.6426423788070679,
|
|
"step": 997
|
|
},
|
|
{
|
|
"epoch": 1.229064039408867,
|
|
"grad_norm": 19.358982299314505,
|
|
"learning_rate": 7.360143020929e-06,
|
|
"loss": 1.2501566410064697,
|
|
"step": 998
|
|
},
|
|
{
|
|
"epoch": 1.230295566502463,
|
|
"grad_norm": 15.35154457763416,
|
|
"learning_rate": 7.353823136969167e-06,
|
|
"loss": 2.263824939727783,
|
|
"step": 999
|
|
},
|
|
{
|
|
"epoch": 1.2315270935960592,
|
|
"grad_norm": 15.502037939673096,
|
|
"learning_rate": 7.34749841805475e-06,
|
|
"loss": 1.3503868579864502,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 1.2327586206896552,
|
|
"grad_norm": 12.387685564521446,
|
|
"learning_rate": 7.341168877177267e-06,
|
|
"loss": 1.2844277620315552,
|
|
"step": 1001
|
|
},
|
|
{
|
|
"epoch": 1.2339901477832513,
|
|
"grad_norm": 21.028406448646585,
|
|
"learning_rate": 7.3348345273381365e-06,
|
|
"loss": 1.823725700378418,
|
|
"step": 1002
|
|
},
|
|
{
|
|
"epoch": 1.2352216748768472,
|
|
"grad_norm": 12.53431965462443,
|
|
"learning_rate": 7.328495381548655e-06,
|
|
"loss": 1.8349339962005615,
|
|
"step": 1003
|
|
},
|
|
{
|
|
"epoch": 1.2364532019704433,
|
|
"grad_norm": 11.75012181314542,
|
|
"learning_rate": 7.322151452829972e-06,
|
|
"loss": 1.431024432182312,
|
|
"step": 1004
|
|
},
|
|
{
|
|
"epoch": 1.2376847290640394,
|
|
"grad_norm": 7.268447687614364,
|
|
"learning_rate": 7.315802754213062e-06,
|
|
"loss": 0.8406596183776855,
|
|
"step": 1005
|
|
},
|
|
{
|
|
"epoch": 1.2389162561576355,
|
|
"grad_norm": 16.476664169610704,
|
|
"learning_rate": 7.309449298738696e-06,
|
|
"loss": 1.7037804126739502,
|
|
"step": 1006
|
|
},
|
|
{
|
|
"epoch": 1.2401477832512315,
|
|
"grad_norm": 10.719400575974607,
|
|
"learning_rate": 7.303091099457418e-06,
|
|
"loss": 1.4264461994171143,
|
|
"step": 1007
|
|
},
|
|
{
|
|
"epoch": 1.2413793103448276,
|
|
"grad_norm": 11.634717084876037,
|
|
"learning_rate": 7.296728169429511e-06,
|
|
"loss": 2.502678632736206,
|
|
"step": 1008
|
|
},
|
|
{
|
|
"epoch": 1.2426108374384237,
|
|
"grad_norm": 9.436373278027489,
|
|
"learning_rate": 7.290360521724984e-06,
|
|
"loss": 1.5582114458084106,
|
|
"step": 1009
|
|
},
|
|
{
|
|
"epoch": 1.2438423645320198,
|
|
"grad_norm": 10.373164591549747,
|
|
"learning_rate": 7.283988169423526e-06,
|
|
"loss": 1.494875192642212,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 1.2450738916256157,
|
|
"grad_norm": 13.031187040858585,
|
|
"learning_rate": 7.277611125614499e-06,
|
|
"loss": 1.886913776397705,
|
|
"step": 1011
|
|
},
|
|
{
|
|
"epoch": 1.2463054187192117,
|
|
"grad_norm": 19.92471933345498,
|
|
"learning_rate": 7.271229403396896e-06,
|
|
"loss": 1.8913657665252686,
|
|
"step": 1012
|
|
},
|
|
{
|
|
"epoch": 1.2475369458128078,
|
|
"grad_norm": 21.8856932814209,
|
|
"learning_rate": 7.264843015879321e-06,
|
|
"loss": 1.1614234447479248,
|
|
"step": 1013
|
|
},
|
|
{
|
|
"epoch": 1.248768472906404,
|
|
"grad_norm": 11.581317439717322,
|
|
"learning_rate": 7.258451976179967e-06,
|
|
"loss": 1.6838147640228271,
|
|
"step": 1014
|
|
},
|
|
{
|
|
"epoch": 1.25,
|
|
"grad_norm": 14.274704649607155,
|
|
"learning_rate": 7.25205629742657e-06,
|
|
"loss": 1.1039239168167114,
|
|
"step": 1015
|
|
},
|
|
{
|
|
"epoch": 1.251231527093596,
|
|
"grad_norm": 10.222730157124893,
|
|
"learning_rate": 7.245655992756406e-06,
|
|
"loss": 1.519346833229065,
|
|
"step": 1016
|
|
},
|
|
{
|
|
"epoch": 1.2524630541871922,
|
|
"grad_norm": 8.325249693832719,
|
|
"learning_rate": 7.2392510753162516e-06,
|
|
"loss": 1.0175197124481201,
|
|
"step": 1017
|
|
},
|
|
{
|
|
"epoch": 1.2536945812807883,
|
|
"grad_norm": 12.766382857494223,
|
|
"learning_rate": 7.232841558262354e-06,
|
|
"loss": 0.9778202772140503,
|
|
"step": 1018
|
|
},
|
|
{
|
|
"epoch": 1.2549261083743843,
|
|
"grad_norm": 17.499343558391605,
|
|
"learning_rate": 7.226427454760412e-06,
|
|
"loss": 1.8379024267196655,
|
|
"step": 1019
|
|
},
|
|
{
|
|
"epoch": 1.2561576354679804,
|
|
"grad_norm": 11.150234617545141,
|
|
"learning_rate": 7.2200087779855435e-06,
|
|
"loss": 1.8412721157073975,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 1.2573891625615763,
|
|
"grad_norm": 8.992400726896724,
|
|
"learning_rate": 7.213585541122261e-06,
|
|
"loss": 1.8508501052856445,
|
|
"step": 1021
|
|
},
|
|
{
|
|
"epoch": 1.2586206896551724,
|
|
"grad_norm": 12.44309006439825,
|
|
"learning_rate": 7.207157757364445e-06,
|
|
"loss": 1.3070871829986572,
|
|
"step": 1022
|
|
},
|
|
{
|
|
"epoch": 1.2598522167487685,
|
|
"grad_norm": 12.840031276685824,
|
|
"learning_rate": 7.200725439915314e-06,
|
|
"loss": 2.1278223991394043,
|
|
"step": 1023
|
|
},
|
|
{
|
|
"epoch": 1.2610837438423645,
|
|
"grad_norm": 8.633495704921142,
|
|
"learning_rate": 7.194288601987398e-06,
|
|
"loss": 1.0636892318725586,
|
|
"step": 1024
|
|
},
|
|
{
|
|
"epoch": 1.2623152709359606,
|
|
"grad_norm": 10.874767223460788,
|
|
"learning_rate": 7.187847256802518e-06,
|
|
"loss": 1.7365200519561768,
|
|
"step": 1025
|
|
},
|
|
{
|
|
"epoch": 1.2635467980295567,
|
|
"grad_norm": 12.21472476387578,
|
|
"learning_rate": 7.181401417591746e-06,
|
|
"loss": 1.792116403579712,
|
|
"step": 1026
|
|
},
|
|
{
|
|
"epoch": 1.2647783251231526,
|
|
"grad_norm": 8.787411821208611,
|
|
"learning_rate": 7.174951097595389e-06,
|
|
"loss": 1.3348667621612549,
|
|
"step": 1027
|
|
},
|
|
{
|
|
"epoch": 1.2660098522167487,
|
|
"grad_norm": 17.72872801553084,
|
|
"learning_rate": 7.168496310062959e-06,
|
|
"loss": 1.677919626235962,
|
|
"step": 1028
|
|
},
|
|
{
|
|
"epoch": 1.2672413793103448,
|
|
"grad_norm": 13.283913596324016,
|
|
"learning_rate": 7.162037068253141e-06,
|
|
"loss": 1.1518199443817139,
|
|
"step": 1029
|
|
},
|
|
{
|
|
"epoch": 1.2684729064039408,
|
|
"grad_norm": 7.98681967422814,
|
|
"learning_rate": 7.155573385433772e-06,
|
|
"loss": 2.1126716136932373,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 1.269704433497537,
|
|
"grad_norm": 11.20695829302969,
|
|
"learning_rate": 7.149105274881815e-06,
|
|
"loss": 1.3222094774246216,
|
|
"step": 1031
|
|
},
|
|
{
|
|
"epoch": 1.270935960591133,
|
|
"grad_norm": 9.408024877970139,
|
|
"learning_rate": 7.1426327498833174e-06,
|
|
"loss": 0.8843763470649719,
|
|
"step": 1032
|
|
},
|
|
{
|
|
"epoch": 1.272167487684729,
|
|
"grad_norm": 18.111033872908873,
|
|
"learning_rate": 7.136155823733405e-06,
|
|
"loss": 1.3091545104980469,
|
|
"step": 1033
|
|
},
|
|
{
|
|
"epoch": 1.2733990147783252,
|
|
"grad_norm": 11.598349915801498,
|
|
"learning_rate": 7.129674509736237e-06,
|
|
"loss": 1.4408364295959473,
|
|
"step": 1034
|
|
},
|
|
{
|
|
"epoch": 1.2746305418719213,
|
|
"grad_norm": 17.074081488403696,
|
|
"learning_rate": 7.12318882120499e-06,
|
|
"loss": 1.330906867980957,
|
|
"step": 1035
|
|
},
|
|
{
|
|
"epoch": 1.2758620689655173,
|
|
"grad_norm": 11.931439673872655,
|
|
"learning_rate": 7.116698771461825e-06,
|
|
"loss": 1.9561724662780762,
|
|
"step": 1036
|
|
},
|
|
{
|
|
"epoch": 1.2770935960591134,
|
|
"grad_norm": 14.506364150634404,
|
|
"learning_rate": 7.110204373837857e-06,
|
|
"loss": 2.185842275619507,
|
|
"step": 1037
|
|
},
|
|
{
|
|
"epoch": 1.2783251231527093,
|
|
"grad_norm": 8.783423067272876,
|
|
"learning_rate": 7.1037056416731395e-06,
|
|
"loss": 1.724360466003418,
|
|
"step": 1038
|
|
},
|
|
{
|
|
"epoch": 1.2795566502463054,
|
|
"grad_norm": 10.548795738669158,
|
|
"learning_rate": 7.097202588316625e-06,
|
|
"loss": 1.179841160774231,
|
|
"step": 1039
|
|
},
|
|
{
|
|
"epoch": 1.2807881773399015,
|
|
"grad_norm": 14.968187776502731,
|
|
"learning_rate": 7.090695227126141e-06,
|
|
"loss": 1.6783604621887207,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 1.2820197044334976,
|
|
"grad_norm": 10.70366989067169,
|
|
"learning_rate": 7.084183571468368e-06,
|
|
"loss": 1.761925220489502,
|
|
"step": 1041
|
|
},
|
|
{
|
|
"epoch": 1.2832512315270936,
|
|
"grad_norm": 12.9020971876039,
|
|
"learning_rate": 7.077667634718801e-06,
|
|
"loss": 0.9297729134559631,
|
|
"step": 1042
|
|
},
|
|
{
|
|
"epoch": 1.2844827586206897,
|
|
"grad_norm": 12.446847341840494,
|
|
"learning_rate": 7.071147430261738e-06,
|
|
"loss": 1.6091060638427734,
|
|
"step": 1043
|
|
},
|
|
{
|
|
"epoch": 1.2857142857142856,
|
|
"grad_norm": 8.238449521430923,
|
|
"learning_rate": 7.064622971490234e-06,
|
|
"loss": 1.280853509902954,
|
|
"step": 1044
|
|
},
|
|
{
|
|
"epoch": 1.2869458128078817,
|
|
"grad_norm": 10.190528956891907,
|
|
"learning_rate": 7.058094271806091e-06,
|
|
"loss": 2.4095635414123535,
|
|
"step": 1045
|
|
},
|
|
{
|
|
"epoch": 1.2881773399014778,
|
|
"grad_norm": 12.210698142217534,
|
|
"learning_rate": 7.051561344619814e-06,
|
|
"loss": 1.7969441413879395,
|
|
"step": 1046
|
|
},
|
|
{
|
|
"epoch": 1.2894088669950738,
|
|
"grad_norm": 8.968258930303262,
|
|
"learning_rate": 7.045024203350598e-06,
|
|
"loss": 2.4331698417663574,
|
|
"step": 1047
|
|
},
|
|
{
|
|
"epoch": 1.29064039408867,
|
|
"grad_norm": 9.034111830970843,
|
|
"learning_rate": 7.0384828614262905e-06,
|
|
"loss": 1.336733341217041,
|
|
"step": 1048
|
|
},
|
|
{
|
|
"epoch": 1.291871921182266,
|
|
"grad_norm": 9.358643506315515,
|
|
"learning_rate": 7.031937332283367e-06,
|
|
"loss": 1.2959213256835938,
|
|
"step": 1049
|
|
},
|
|
{
|
|
"epoch": 1.293103448275862,
|
|
"grad_norm": 15.177096960870495,
|
|
"learning_rate": 7.025387629366912e-06,
|
|
"loss": 1.0095289945602417,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 1.2943349753694582,
|
|
"grad_norm": 8.708668143059782,
|
|
"learning_rate": 7.018833766130571e-06,
|
|
"loss": 1.8314733505249023,
|
|
"step": 1051
|
|
},
|
|
{
|
|
"epoch": 1.2955665024630543,
|
|
"grad_norm": 12.10925693324793,
|
|
"learning_rate": 7.012275756036544e-06,
|
|
"loss": 1.121436595916748,
|
|
"step": 1052
|
|
},
|
|
{
|
|
"epoch": 1.2967980295566504,
|
|
"grad_norm": 20.569530418297486,
|
|
"learning_rate": 7.0057136125555456e-06,
|
|
"loss": 1.5652289390563965,
|
|
"step": 1053
|
|
},
|
|
{
|
|
"epoch": 1.2980295566502464,
|
|
"grad_norm": 14.018717429311812,
|
|
"learning_rate": 6.999147349166779e-06,
|
|
"loss": 1.1146215200424194,
|
|
"step": 1054
|
|
},
|
|
{
|
|
"epoch": 1.2992610837438423,
|
|
"grad_norm": 17.232932273490494,
|
|
"learning_rate": 6.9925769793579165e-06,
|
|
"loss": 2.400024175643921,
|
|
"step": 1055
|
|
},
|
|
{
|
|
"epoch": 1.3004926108374384,
|
|
"grad_norm": 11.12761938883381,
|
|
"learning_rate": 6.986002516625058e-06,
|
|
"loss": 1.7114648818969727,
|
|
"step": 1056
|
|
},
|
|
{
|
|
"epoch": 1.3017241379310345,
|
|
"grad_norm": 10.072038004871871,
|
|
"learning_rate": 6.979423974472714e-06,
|
|
"loss": 1.5338797569274902,
|
|
"step": 1057
|
|
},
|
|
{
|
|
"epoch": 1.3029556650246306,
|
|
"grad_norm": 8.812025010262357,
|
|
"learning_rate": 6.972841366413777e-06,
|
|
"loss": 1.078460931777954,
|
|
"step": 1058
|
|
},
|
|
{
|
|
"epoch": 1.3041871921182266,
|
|
"grad_norm": 11.356722343645167,
|
|
"learning_rate": 6.966254705969484e-06,
|
|
"loss": 1.5467915534973145,
|
|
"step": 1059
|
|
},
|
|
{
|
|
"epoch": 1.3054187192118227,
|
|
"grad_norm": 14.67705148794403,
|
|
"learning_rate": 6.959664006669404e-06,
|
|
"loss": 1.2715568542480469,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 1.3066502463054186,
|
|
"grad_norm": 8.890913561904203,
|
|
"learning_rate": 6.953069282051397e-06,
|
|
"loss": 1.887066125869751,
|
|
"step": 1061
|
|
},
|
|
{
|
|
"epoch": 1.3078817733990147,
|
|
"grad_norm": 10.182269397064065,
|
|
"learning_rate": 6.946470545661593e-06,
|
|
"loss": 1.419116497039795,
|
|
"step": 1062
|
|
},
|
|
{
|
|
"epoch": 1.3091133004926108,
|
|
"grad_norm": 8.361662711059678,
|
|
"learning_rate": 6.939867811054365e-06,
|
|
"loss": 1.3843079805374146,
|
|
"step": 1063
|
|
},
|
|
{
|
|
"epoch": 1.3103448275862069,
|
|
"grad_norm": 27.704350160970165,
|
|
"learning_rate": 6.9332610917922915e-06,
|
|
"loss": 2.5894885063171387,
|
|
"step": 1064
|
|
},
|
|
{
|
|
"epoch": 1.311576354679803,
|
|
"grad_norm": 16.17688431061018,
|
|
"learning_rate": 6.9266504014461425e-06,
|
|
"loss": 1.6600944995880127,
|
|
"step": 1065
|
|
},
|
|
{
|
|
"epoch": 1.312807881773399,
|
|
"grad_norm": 18.474330510936614,
|
|
"learning_rate": 6.920035753594845e-06,
|
|
"loss": 1.7698057889938354,
|
|
"step": 1066
|
|
},
|
|
{
|
|
"epoch": 1.314039408866995,
|
|
"grad_norm": 9.914676123570585,
|
|
"learning_rate": 6.913417161825449e-06,
|
|
"loss": 1.5610848665237427,
|
|
"step": 1067
|
|
},
|
|
{
|
|
"epoch": 1.3152709359605912,
|
|
"grad_norm": 8.489359998020161,
|
|
"learning_rate": 6.906794639733114e-06,
|
|
"loss": 1.6380643844604492,
|
|
"step": 1068
|
|
},
|
|
{
|
|
"epoch": 1.3165024630541873,
|
|
"grad_norm": 8.9532327938231,
|
|
"learning_rate": 6.900168200921065e-06,
|
|
"loss": 1.390014410018921,
|
|
"step": 1069
|
|
},
|
|
{
|
|
"epoch": 1.3177339901477834,
|
|
"grad_norm": 10.45013795003969,
|
|
"learning_rate": 6.893537859000576e-06,
|
|
"loss": 1.6589158773422241,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 1.3189655172413794,
|
|
"grad_norm": 12.436644147912617,
|
|
"learning_rate": 6.886903627590938e-06,
|
|
"loss": 1.5524673461914062,
|
|
"step": 1071
|
|
},
|
|
{
|
|
"epoch": 1.3201970443349753,
|
|
"grad_norm": 12.240484798983633,
|
|
"learning_rate": 6.880265520319434e-06,
|
|
"loss": 2.0204474925994873,
|
|
"step": 1072
|
|
},
|
|
{
|
|
"epoch": 1.3214285714285714,
|
|
"grad_norm": 10.928634620934101,
|
|
"learning_rate": 6.8736235508213024e-06,
|
|
"loss": 1.7947957515716553,
|
|
"step": 1073
|
|
},
|
|
{
|
|
"epoch": 1.3226600985221675,
|
|
"grad_norm": 12.192004015491179,
|
|
"learning_rate": 6.866977732739719e-06,
|
|
"loss": 1.6154756546020508,
|
|
"step": 1074
|
|
},
|
|
{
|
|
"epoch": 1.3238916256157636,
|
|
"grad_norm": 10.239608872921218,
|
|
"learning_rate": 6.860328079725764e-06,
|
|
"loss": 1.419677734375,
|
|
"step": 1075
|
|
},
|
|
{
|
|
"epoch": 1.3251231527093597,
|
|
"grad_norm": 11.490298083513249,
|
|
"learning_rate": 6.853674605438395e-06,
|
|
"loss": 2.2221052646636963,
|
|
"step": 1076
|
|
},
|
|
{
|
|
"epoch": 1.3263546798029557,
|
|
"grad_norm": 10.796599749157496,
|
|
"learning_rate": 6.84701732354442e-06,
|
|
"loss": 1.6474840641021729,
|
|
"step": 1077
|
|
},
|
|
{
|
|
"epoch": 1.3275862068965516,
|
|
"grad_norm": 16.05723789346112,
|
|
"learning_rate": 6.840356247718466e-06,
|
|
"loss": 2.035231828689575,
|
|
"step": 1078
|
|
},
|
|
{
|
|
"epoch": 1.3288177339901477,
|
|
"grad_norm": 12.127949373836048,
|
|
"learning_rate": 6.8336913916429515e-06,
|
|
"loss": 1.5675947666168213,
|
|
"step": 1079
|
|
},
|
|
{
|
|
"epoch": 1.3300492610837438,
|
|
"grad_norm": 12.561351822867852,
|
|
"learning_rate": 6.827022769008068e-06,
|
|
"loss": 1.2241394519805908,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 1.3312807881773399,
|
|
"grad_norm": 10.606640209072971,
|
|
"learning_rate": 6.820350393511732e-06,
|
|
"loss": 1.3507403135299683,
|
|
"step": 1081
|
|
},
|
|
{
|
|
"epoch": 1.332512315270936,
|
|
"grad_norm": 23.44696719245062,
|
|
"learning_rate": 6.81367427885958e-06,
|
|
"loss": 2.256551504135132,
|
|
"step": 1082
|
|
},
|
|
{
|
|
"epoch": 1.333743842364532,
|
|
"grad_norm": 17.90054749002111,
|
|
"learning_rate": 6.806994438764922e-06,
|
|
"loss": 1.6412163972854614,
|
|
"step": 1083
|
|
},
|
|
{
|
|
"epoch": 1.3349753694581281,
|
|
"grad_norm": 10.747816339677435,
|
|
"learning_rate": 6.8003108869487225e-06,
|
|
"loss": 1.500988483428955,
|
|
"step": 1084
|
|
},
|
|
{
|
|
"epoch": 1.3362068965517242,
|
|
"grad_norm": 8.86240548184895,
|
|
"learning_rate": 6.79362363713957e-06,
|
|
"loss": 1.4661070108413696,
|
|
"step": 1085
|
|
},
|
|
{
|
|
"epoch": 1.3374384236453203,
|
|
"grad_norm": 9.325455271074935,
|
|
"learning_rate": 6.786932703073648e-06,
|
|
"loss": 1.42755126953125,
|
|
"step": 1086
|
|
},
|
|
{
|
|
"epoch": 1.3386699507389164,
|
|
"grad_norm": 14.863538954404982,
|
|
"learning_rate": 6.780238098494711e-06,
|
|
"loss": 1.165806531906128,
|
|
"step": 1087
|
|
},
|
|
{
|
|
"epoch": 1.3399014778325122,
|
|
"grad_norm": 21.9332846077213,
|
|
"learning_rate": 6.773539837154051e-06,
|
|
"loss": 1.3795387744903564,
|
|
"step": 1088
|
|
},
|
|
{
|
|
"epoch": 1.3411330049261083,
|
|
"grad_norm": 15.064922882268542,
|
|
"learning_rate": 6.766837932810468e-06,
|
|
"loss": 1.3203850984573364,
|
|
"step": 1089
|
|
},
|
|
{
|
|
"epoch": 1.3423645320197044,
|
|
"grad_norm": 12.791071147567429,
|
|
"learning_rate": 6.7601323992302525e-06,
|
|
"loss": 1.645883321762085,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"epoch": 1.3435960591133005,
|
|
"grad_norm": 8.072143933965927,
|
|
"learning_rate": 6.7534232501871425e-06,
|
|
"loss": 1.6904821395874023,
|
|
"step": 1091
|
|
},
|
|
{
|
|
"epoch": 1.3448275862068966,
|
|
"grad_norm": 8.711589751937055,
|
|
"learning_rate": 6.7467104994623066e-06,
|
|
"loss": 1.332162618637085,
|
|
"step": 1092
|
|
},
|
|
{
|
|
"epoch": 1.3460591133004927,
|
|
"grad_norm": 9.451447429997234,
|
|
"learning_rate": 6.7399941608443096e-06,
|
|
"loss": 1.4389145374298096,
|
|
"step": 1093
|
|
},
|
|
{
|
|
"epoch": 1.3472906403940887,
|
|
"grad_norm": 7.323937666452591,
|
|
"learning_rate": 6.733274248129089e-06,
|
|
"loss": 1.6597908735275269,
|
|
"step": 1094
|
|
},
|
|
{
|
|
"epoch": 1.3485221674876846,
|
|
"grad_norm": 17.883843051775344,
|
|
"learning_rate": 6.72655077511992e-06,
|
|
"loss": 0.9520257711410522,
|
|
"step": 1095
|
|
},
|
|
{
|
|
"epoch": 1.3497536945812807,
|
|
"grad_norm": 11.223594087909252,
|
|
"learning_rate": 6.719823755627393e-06,
|
|
"loss": 1.4488117694854736,
|
|
"step": 1096
|
|
},
|
|
{
|
|
"epoch": 1.3509852216748768,
|
|
"grad_norm": 7.977177991617555,
|
|
"learning_rate": 6.713093203469384e-06,
|
|
"loss": 1.5133984088897705,
|
|
"step": 1097
|
|
},
|
|
{
|
|
"epoch": 1.3522167487684729,
|
|
"grad_norm": 8.682066451366055,
|
|
"learning_rate": 6.7063591324710234e-06,
|
|
"loss": 1.846522569656372,
|
|
"step": 1098
|
|
},
|
|
{
|
|
"epoch": 1.353448275862069,
|
|
"grad_norm": 12.792486675857687,
|
|
"learning_rate": 6.6996215564646705e-06,
|
|
"loss": 0.9724826812744141,
|
|
"step": 1099
|
|
},
|
|
{
|
|
"epoch": 1.354679802955665,
|
|
"grad_norm": 11.989074062954435,
|
|
"learning_rate": 6.692880489289885e-06,
|
|
"loss": 1.24728262424469,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 1.3559113300492611,
|
|
"grad_norm": 22.734635359059652,
|
|
"learning_rate": 6.686135944793395e-06,
|
|
"loss": 1.5332872867584229,
|
|
"step": 1101
|
|
},
|
|
{
|
|
"epoch": 1.3571428571428572,
|
|
"grad_norm": 11.645074036110657,
|
|
"learning_rate": 6.679387936829076e-06,
|
|
"loss": 1.5978163480758667,
|
|
"step": 1102
|
|
},
|
|
{
|
|
"epoch": 1.3583743842364533,
|
|
"grad_norm": 9.223736434919791,
|
|
"learning_rate": 6.672636479257912e-06,
|
|
"loss": 2.05710506439209,
|
|
"step": 1103
|
|
},
|
|
{
|
|
"epoch": 1.3596059113300494,
|
|
"grad_norm": 11.48041589458668,
|
|
"learning_rate": 6.665881585947981e-06,
|
|
"loss": 1.667812466621399,
|
|
"step": 1104
|
|
},
|
|
{
|
|
"epoch": 1.3608374384236452,
|
|
"grad_norm": 18.141176793209265,
|
|
"learning_rate": 6.659123270774406e-06,
|
|
"loss": 1.3053381443023682,
|
|
"step": 1105
|
|
},
|
|
{
|
|
"epoch": 1.3620689655172413,
|
|
"grad_norm": 11.11014263526773,
|
|
"learning_rate": 6.652361547619352e-06,
|
|
"loss": 1.5228716135025024,
|
|
"step": 1106
|
|
},
|
|
{
|
|
"epoch": 1.3633004926108374,
|
|
"grad_norm": 11.869708221541034,
|
|
"learning_rate": 6.645596430371976e-06,
|
|
"loss": 1.3818378448486328,
|
|
"step": 1107
|
|
},
|
|
{
|
|
"epoch": 1.3645320197044335,
|
|
"grad_norm": 11.298030039811758,
|
|
"learning_rate": 6.6388279329284065e-06,
|
|
"loss": 1.217841386795044,
|
|
"step": 1108
|
|
},
|
|
{
|
|
"epoch": 1.3657635467980296,
|
|
"grad_norm": 21.11595250544298,
|
|
"learning_rate": 6.632056069191723e-06,
|
|
"loss": 1.4309210777282715,
|
|
"step": 1109
|
|
},
|
|
{
|
|
"epoch": 1.3669950738916257,
|
|
"grad_norm": 13.7021684816084,
|
|
"learning_rate": 6.6252808530719095e-06,
|
|
"loss": 1.3015059232711792,
|
|
"step": 1110
|
|
},
|
|
{
|
|
"epoch": 1.3682266009852218,
|
|
"grad_norm": 11.973457349226296,
|
|
"learning_rate": 6.618502298485844e-06,
|
|
"loss": 1.2734256982803345,
|
|
"step": 1111
|
|
},
|
|
{
|
|
"epoch": 1.3694581280788176,
|
|
"grad_norm": 15.830227785424638,
|
|
"learning_rate": 6.611720419357257e-06,
|
|
"loss": 1.907172441482544,
|
|
"step": 1112
|
|
},
|
|
{
|
|
"epoch": 1.3706896551724137,
|
|
"grad_norm": 10.756653422484252,
|
|
"learning_rate": 6.604935229616711e-06,
|
|
"loss": 1.1207606792449951,
|
|
"step": 1113
|
|
},
|
|
{
|
|
"epoch": 1.3719211822660098,
|
|
"grad_norm": 12.736281126843005,
|
|
"learning_rate": 6.598146743201568e-06,
|
|
"loss": 2.3231239318847656,
|
|
"step": 1114
|
|
},
|
|
{
|
|
"epoch": 1.3731527093596059,
|
|
"grad_norm": 11.597483205953116,
|
|
"learning_rate": 6.5913549740559606e-06,
|
|
"loss": 1.1395865678787231,
|
|
"step": 1115
|
|
},
|
|
{
|
|
"epoch": 1.374384236453202,
|
|
"grad_norm": 14.754486017260728,
|
|
"learning_rate": 6.584559936130763e-06,
|
|
"loss": 3.1981747150421143,
|
|
"step": 1116
|
|
},
|
|
{
|
|
"epoch": 1.375615763546798,
|
|
"grad_norm": 12.874438415282308,
|
|
"learning_rate": 6.57776164338357e-06,
|
|
"loss": 1.7495319843292236,
|
|
"step": 1117
|
|
},
|
|
{
|
|
"epoch": 1.3768472906403941,
|
|
"grad_norm": 12.611228408009778,
|
|
"learning_rate": 6.570960109778655e-06,
|
|
"loss": 1.3304778337478638,
|
|
"step": 1118
|
|
},
|
|
{
|
|
"epoch": 1.3780788177339902,
|
|
"grad_norm": 11.84441441686591,
|
|
"learning_rate": 6.564155349286952e-06,
|
|
"loss": 1.6510775089263916,
|
|
"step": 1119
|
|
},
|
|
{
|
|
"epoch": 1.3793103448275863,
|
|
"grad_norm": 13.996316648052032,
|
|
"learning_rate": 6.557347375886022e-06,
|
|
"loss": 1.3382967710494995,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 1.3805418719211824,
|
|
"grad_norm": 11.351524045305764,
|
|
"learning_rate": 6.550536203560028e-06,
|
|
"loss": 1.418992042541504,
|
|
"step": 1121
|
|
},
|
|
{
|
|
"epoch": 1.3817733990147782,
|
|
"grad_norm": 16.848897992260934,
|
|
"learning_rate": 6.543721846299701e-06,
|
|
"loss": 1.4815843105316162,
|
|
"step": 1122
|
|
},
|
|
{
|
|
"epoch": 1.3830049261083743,
|
|
"grad_norm": 13.42654012333122,
|
|
"learning_rate": 6.536904318102314e-06,
|
|
"loss": 0.9823303818702698,
|
|
"step": 1123
|
|
},
|
|
{
|
|
"epoch": 1.3842364532019704,
|
|
"grad_norm": 11.039715301984293,
|
|
"learning_rate": 6.530083632971658e-06,
|
|
"loss": 1.4959704875946045,
|
|
"step": 1124
|
|
},
|
|
{
|
|
"epoch": 1.3854679802955665,
|
|
"grad_norm": 13.499332863560449,
|
|
"learning_rate": 6.523259804918001e-06,
|
|
"loss": 1.3141142129898071,
|
|
"step": 1125
|
|
},
|
|
{
|
|
"epoch": 1.3866995073891626,
|
|
"grad_norm": 18.762617405218773,
|
|
"learning_rate": 6.516432847958074e-06,
|
|
"loss": 1.60225248336792,
|
|
"step": 1126
|
|
},
|
|
{
|
|
"epoch": 1.3879310344827587,
|
|
"grad_norm": 12.76800599324204,
|
|
"learning_rate": 6.509602776115029e-06,
|
|
"loss": 1.7774362564086914,
|
|
"step": 1127
|
|
},
|
|
{
|
|
"epoch": 1.3891625615763548,
|
|
"grad_norm": 14.80003777651342,
|
|
"learning_rate": 6.502769603418423e-06,
|
|
"loss": 1.3750693798065186,
|
|
"step": 1128
|
|
},
|
|
{
|
|
"epoch": 1.3903940886699506,
|
|
"grad_norm": 12.846839874270263,
|
|
"learning_rate": 6.4959333439041775e-06,
|
|
"loss": 1.0850452184677124,
|
|
"step": 1129
|
|
},
|
|
{
|
|
"epoch": 1.3916256157635467,
|
|
"grad_norm": 17.175837709461415,
|
|
"learning_rate": 6.489094011614553e-06,
|
|
"loss": 1.7440909147262573,
|
|
"step": 1130
|
|
},
|
|
{
|
|
"epoch": 1.3928571428571428,
|
|
"grad_norm": 8.34120026588026,
|
|
"learning_rate": 6.482251620598129e-06,
|
|
"loss": 1.5904752016067505,
|
|
"step": 1131
|
|
},
|
|
{
|
|
"epoch": 1.3940886699507389,
|
|
"grad_norm": 10.398946422121055,
|
|
"learning_rate": 6.47540618490976e-06,
|
|
"loss": 1.4864649772644043,
|
|
"step": 1132
|
|
},
|
|
{
|
|
"epoch": 1.395320197044335,
|
|
"grad_norm": 16.449380414530893,
|
|
"learning_rate": 6.4685577186105595e-06,
|
|
"loss": 1.3869491815567017,
|
|
"step": 1133
|
|
},
|
|
{
|
|
"epoch": 1.396551724137931,
|
|
"grad_norm": 11.708541771363075,
|
|
"learning_rate": 6.461706235767866e-06,
|
|
"loss": 1.1635327339172363,
|
|
"step": 1134
|
|
},
|
|
{
|
|
"epoch": 1.3977832512315271,
|
|
"grad_norm": 6.616557203492817,
|
|
"learning_rate": 6.45485175045521e-06,
|
|
"loss": 1.4063032865524292,
|
|
"step": 1135
|
|
},
|
|
{
|
|
"epoch": 1.3990147783251232,
|
|
"grad_norm": 26.794737362449215,
|
|
"learning_rate": 6.447994276752293e-06,
|
|
"loss": 2.2259998321533203,
|
|
"step": 1136
|
|
},
|
|
{
|
|
"epoch": 1.4002463054187193,
|
|
"grad_norm": 10.511853223185177,
|
|
"learning_rate": 6.441133828744954e-06,
|
|
"loss": 1.2302110195159912,
|
|
"step": 1137
|
|
},
|
|
{
|
|
"epoch": 1.4014778325123154,
|
|
"grad_norm": 10.658533095355526,
|
|
"learning_rate": 6.434270420525144e-06,
|
|
"loss": 1.2579622268676758,
|
|
"step": 1138
|
|
},
|
|
{
|
|
"epoch": 1.4027093596059113,
|
|
"grad_norm": 18.972607390940905,
|
|
"learning_rate": 6.427404066190889e-06,
|
|
"loss": 1.6761397123336792,
|
|
"step": 1139
|
|
},
|
|
{
|
|
"epoch": 1.4039408866995073,
|
|
"grad_norm": 12.172946298049014,
|
|
"learning_rate": 6.4205347798462704e-06,
|
|
"loss": 1.3933346271514893,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 1.4051724137931034,
|
|
"grad_norm": 13.681043588339055,
|
|
"learning_rate": 6.413662575601391e-06,
|
|
"loss": 1.9914003610610962,
|
|
"step": 1141
|
|
},
|
|
{
|
|
"epoch": 1.4064039408866995,
|
|
"grad_norm": 16.934291210588032,
|
|
"learning_rate": 6.406787467572348e-06,
|
|
"loss": 1.9921746253967285,
|
|
"step": 1142
|
|
},
|
|
{
|
|
"epoch": 1.4076354679802956,
|
|
"grad_norm": 18.5006822922468,
|
|
"learning_rate": 6.3999094698812055e-06,
|
|
"loss": 1.6050479412078857,
|
|
"step": 1143
|
|
},
|
|
{
|
|
"epoch": 1.4088669950738917,
|
|
"grad_norm": 12.333046745730567,
|
|
"learning_rate": 6.393028596655958e-06,
|
|
"loss": 1.7796251773834229,
|
|
"step": 1144
|
|
},
|
|
{
|
|
"epoch": 1.4100985221674878,
|
|
"grad_norm": 18.731485023409682,
|
|
"learning_rate": 6.386144862030508e-06,
|
|
"loss": 1.7936886548995972,
|
|
"step": 1145
|
|
},
|
|
{
|
|
"epoch": 1.4113300492610836,
|
|
"grad_norm": 18.37593149730845,
|
|
"learning_rate": 6.37925828014464e-06,
|
|
"loss": 1.9030745029449463,
|
|
"step": 1146
|
|
},
|
|
{
|
|
"epoch": 1.4125615763546797,
|
|
"grad_norm": 11.93678536094984,
|
|
"learning_rate": 6.3723688651439806e-06,
|
|
"loss": 1.4446496963500977,
|
|
"step": 1147
|
|
},
|
|
{
|
|
"epoch": 1.4137931034482758,
|
|
"grad_norm": 13.469356839829612,
|
|
"learning_rate": 6.365476631179982e-06,
|
|
"loss": 1.5683763027191162,
|
|
"step": 1148
|
|
},
|
|
{
|
|
"epoch": 1.4150246305418719,
|
|
"grad_norm": 8.488203520402504,
|
|
"learning_rate": 6.358581592409881e-06,
|
|
"loss": 1.4594917297363281,
|
|
"step": 1149
|
|
},
|
|
{
|
|
"epoch": 1.416256157635468,
|
|
"grad_norm": 25.588676453436552,
|
|
"learning_rate": 6.351683762996681e-06,
|
|
"loss": 2.1706323623657227,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 1.417487684729064,
|
|
"grad_norm": 11.810343655960159,
|
|
"learning_rate": 6.344783157109114e-06,
|
|
"loss": 1.835425853729248,
|
|
"step": 1151
|
|
},
|
|
{
|
|
"epoch": 1.4187192118226601,
|
|
"grad_norm": 10.711102782202751,
|
|
"learning_rate": 6.337879788921615e-06,
|
|
"loss": 1.1789867877960205,
|
|
"step": 1152
|
|
},
|
|
{
|
|
"epoch": 1.4199507389162562,
|
|
"grad_norm": 28.404082710690172,
|
|
"learning_rate": 6.3309736726142965e-06,
|
|
"loss": 1.9750418663024902,
|
|
"step": 1153
|
|
},
|
|
{
|
|
"epoch": 1.4211822660098523,
|
|
"grad_norm": 14.02852797567233,
|
|
"learning_rate": 6.324064822372913e-06,
|
|
"loss": 1.4960027933120728,
|
|
"step": 1154
|
|
},
|
|
{
|
|
"epoch": 1.4224137931034484,
|
|
"grad_norm": 20.199397968799044,
|
|
"learning_rate": 6.317153252388834e-06,
|
|
"loss": 1.12904691696167,
|
|
"step": 1155
|
|
},
|
|
{
|
|
"epoch": 1.4236453201970443,
|
|
"grad_norm": 10.534543863605384,
|
|
"learning_rate": 6.31023897685902e-06,
|
|
"loss": 1.30333411693573,
|
|
"step": 1156
|
|
},
|
|
{
|
|
"epoch": 1.4248768472906403,
|
|
"grad_norm": 15.66714236524435,
|
|
"learning_rate": 6.303322009985984e-06,
|
|
"loss": 2.5257434844970703,
|
|
"step": 1157
|
|
},
|
|
{
|
|
"epoch": 1.4261083743842364,
|
|
"grad_norm": 18.065303617570866,
|
|
"learning_rate": 6.296402365977767e-06,
|
|
"loss": 0.9684423208236694,
|
|
"step": 1158
|
|
},
|
|
{
|
|
"epoch": 1.4273399014778325,
|
|
"grad_norm": 12.376925974972115,
|
|
"learning_rate": 6.289480059047915e-06,
|
|
"loss": 1.457876443862915,
|
|
"step": 1159
|
|
},
|
|
{
|
|
"epoch": 1.4285714285714286,
|
|
"grad_norm": 9.05985921030025,
|
|
"learning_rate": 6.282555103415438e-06,
|
|
"loss": 1.5206713676452637,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 1.4298029556650247,
|
|
"grad_norm": 14.712390356925216,
|
|
"learning_rate": 6.27562751330479e-06,
|
|
"loss": 1.680644154548645,
|
|
"step": 1161
|
|
},
|
|
{
|
|
"epoch": 1.4310344827586206,
|
|
"grad_norm": 9.786932196785434,
|
|
"learning_rate": 6.268697302945835e-06,
|
|
"loss": 1.3704997301101685,
|
|
"step": 1162
|
|
},
|
|
{
|
|
"epoch": 1.4322660098522166,
|
|
"grad_norm": 9.786888328650228,
|
|
"learning_rate": 6.261764486573816e-06,
|
|
"loss": 1.3250343799591064,
|
|
"step": 1163
|
|
},
|
|
{
|
|
"epoch": 1.4334975369458127,
|
|
"grad_norm": 15.544106160026582,
|
|
"learning_rate": 6.254829078429336e-06,
|
|
"loss": 1.8659427165985107,
|
|
"step": 1164
|
|
},
|
|
{
|
|
"epoch": 1.4347290640394088,
|
|
"grad_norm": 21.077430430000046,
|
|
"learning_rate": 6.247891092758319e-06,
|
|
"loss": 2.043597936630249,
|
|
"step": 1165
|
|
},
|
|
{
|
|
"epoch": 1.435960591133005,
|
|
"grad_norm": 12.476492579798414,
|
|
"learning_rate": 6.24095054381198e-06,
|
|
"loss": 1.5634403228759766,
|
|
"step": 1166
|
|
},
|
|
{
|
|
"epoch": 1.437192118226601,
|
|
"grad_norm": 11.790373846414154,
|
|
"learning_rate": 6.2340074458468014e-06,
|
|
"loss": 1.1179373264312744,
|
|
"step": 1167
|
|
},
|
|
{
|
|
"epoch": 1.438423645320197,
|
|
"grad_norm": 13.094422813370427,
|
|
"learning_rate": 6.227061813124504e-06,
|
|
"loss": 0.8013179302215576,
|
|
"step": 1168
|
|
},
|
|
{
|
|
"epoch": 1.4396551724137931,
|
|
"grad_norm": 9.010286032120458,
|
|
"learning_rate": 6.220113659912012e-06,
|
|
"loss": 1.3435392379760742,
|
|
"step": 1169
|
|
},
|
|
{
|
|
"epoch": 1.4408866995073892,
|
|
"grad_norm": 8.308881028265468,
|
|
"learning_rate": 6.213163000481428e-06,
|
|
"loss": 1.39387845993042,
|
|
"step": 1170
|
|
},
|
|
{
|
|
"epoch": 1.4421182266009853,
|
|
"grad_norm": 8.499060752632088,
|
|
"learning_rate": 6.206209849110001e-06,
|
|
"loss": 1.760462760925293,
|
|
"step": 1171
|
|
},
|
|
{
|
|
"epoch": 1.4433497536945814,
|
|
"grad_norm": 13.348998095152654,
|
|
"learning_rate": 6.1992542200801035e-06,
|
|
"loss": 1.0812432765960693,
|
|
"step": 1172
|
|
},
|
|
{
|
|
"epoch": 1.4445812807881773,
|
|
"grad_norm": 9.263056193047571,
|
|
"learning_rate": 6.1922961276791925e-06,
|
|
"loss": 1.7997616529464722,
|
|
"step": 1173
|
|
},
|
|
{
|
|
"epoch": 1.4458128078817734,
|
|
"grad_norm": 11.646405372699148,
|
|
"learning_rate": 6.1853355861997854e-06,
|
|
"loss": 1.773369550704956,
|
|
"step": 1174
|
|
},
|
|
{
|
|
"epoch": 1.4470443349753694,
|
|
"grad_norm": 8.442523087287304,
|
|
"learning_rate": 6.1783726099394324e-06,
|
|
"loss": 1.9488962888717651,
|
|
"step": 1175
|
|
},
|
|
{
|
|
"epoch": 1.4482758620689655,
|
|
"grad_norm": 13.332895782423902,
|
|
"learning_rate": 6.171407213200683e-06,
|
|
"loss": 1.6990149021148682,
|
|
"step": 1176
|
|
},
|
|
{
|
|
"epoch": 1.4495073891625616,
|
|
"grad_norm": 12.609637801512664,
|
|
"learning_rate": 6.164439410291061e-06,
|
|
"loss": 1.4307571649551392,
|
|
"step": 1177
|
|
},
|
|
{
|
|
"epoch": 1.4507389162561577,
|
|
"grad_norm": 8.885074358137231,
|
|
"learning_rate": 6.157469215523031e-06,
|
|
"loss": 1.3966443538665771,
|
|
"step": 1178
|
|
},
|
|
{
|
|
"epoch": 1.4519704433497536,
|
|
"grad_norm": 16.606696238854166,
|
|
"learning_rate": 6.150496643213969e-06,
|
|
"loss": 1.2959253787994385,
|
|
"step": 1179
|
|
},
|
|
{
|
|
"epoch": 1.4532019704433496,
|
|
"grad_norm": 16.898895754976742,
|
|
"learning_rate": 6.143521707686137e-06,
|
|
"loss": 1.4992142915725708,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 1.4544334975369457,
|
|
"grad_norm": 16.69245348652636,
|
|
"learning_rate": 6.136544423266651e-06,
|
|
"loss": 1.8196167945861816,
|
|
"step": 1181
|
|
},
|
|
{
|
|
"epoch": 1.4556650246305418,
|
|
"grad_norm": 16.12465629803321,
|
|
"learning_rate": 6.129564804287454e-06,
|
|
"loss": 1.4129021167755127,
|
|
"step": 1182
|
|
},
|
|
{
|
|
"epoch": 1.456896551724138,
|
|
"grad_norm": 15.4451290282442,
|
|
"learning_rate": 6.122582865085278e-06,
|
|
"loss": 1.2009403705596924,
|
|
"step": 1183
|
|
},
|
|
{
|
|
"epoch": 1.458128078817734,
|
|
"grad_norm": 12.682560791700617,
|
|
"learning_rate": 6.115598620001627e-06,
|
|
"loss": 1.698556661605835,
|
|
"step": 1184
|
|
},
|
|
{
|
|
"epoch": 1.45935960591133,
|
|
"grad_norm": 21.414952415899087,
|
|
"learning_rate": 6.108612083382739e-06,
|
|
"loss": 1.5819299221038818,
|
|
"step": 1185
|
|
},
|
|
{
|
|
"epoch": 1.4605911330049262,
|
|
"grad_norm": 10.708464197323055,
|
|
"learning_rate": 6.101623269579558e-06,
|
|
"loss": 1.374379277229309,
|
|
"step": 1186
|
|
},
|
|
{
|
|
"epoch": 1.4618226600985222,
|
|
"grad_norm": 10.541290993965774,
|
|
"learning_rate": 6.094632192947711e-06,
|
|
"loss": 1.2765707969665527,
|
|
"step": 1187
|
|
},
|
|
{
|
|
"epoch": 1.4630541871921183,
|
|
"grad_norm": 14.098976562454558,
|
|
"learning_rate": 6.087638867847465e-06,
|
|
"loss": 1.2740705013275146,
|
|
"step": 1188
|
|
},
|
|
{
|
|
"epoch": 1.4642857142857144,
|
|
"grad_norm": 11.154362665776958,
|
|
"learning_rate": 6.08064330864371e-06,
|
|
"loss": 1.6713453531265259,
|
|
"step": 1189
|
|
},
|
|
{
|
|
"epoch": 1.4655172413793103,
|
|
"grad_norm": 9.205967970627526,
|
|
"learning_rate": 6.073645529705926e-06,
|
|
"loss": 1.6606531143188477,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"epoch": 1.4667487684729064,
|
|
"grad_norm": 12.43504089477338,
|
|
"learning_rate": 6.066645545408149e-06,
|
|
"loss": 1.6029870510101318,
|
|
"step": 1191
|
|
},
|
|
{
|
|
"epoch": 1.4679802955665024,
|
|
"grad_norm": 9.416406443647212,
|
|
"learning_rate": 6.0596433701289506e-06,
|
|
"loss": 1.5884819030761719,
|
|
"step": 1192
|
|
},
|
|
{
|
|
"epoch": 1.4692118226600985,
|
|
"grad_norm": 17.434043985101933,
|
|
"learning_rate": 6.052639018251394e-06,
|
|
"loss": 1.060668706893921,
|
|
"step": 1193
|
|
},
|
|
{
|
|
"epoch": 1.4704433497536946,
|
|
"grad_norm": 13.053843358479307,
|
|
"learning_rate": 6.045632504163024e-06,
|
|
"loss": 1.6251329183578491,
|
|
"step": 1194
|
|
},
|
|
{
|
|
"epoch": 1.4716748768472907,
|
|
"grad_norm": 10.200397873502725,
|
|
"learning_rate": 6.03862384225582e-06,
|
|
"loss": 1.2369989156723022,
|
|
"step": 1195
|
|
},
|
|
{
|
|
"epoch": 1.4729064039408866,
|
|
"grad_norm": 28.146477262288624,
|
|
"learning_rate": 6.0316130469261705e-06,
|
|
"loss": 1.7742527723312378,
|
|
"step": 1196
|
|
},
|
|
{
|
|
"epoch": 1.4741379310344827,
|
|
"grad_norm": 6.380213600146285,
|
|
"learning_rate": 6.024600132574855e-06,
|
|
"loss": 2.166492223739624,
|
|
"step": 1197
|
|
},
|
|
{
|
|
"epoch": 1.4753694581280787,
|
|
"grad_norm": 15.296147923549848,
|
|
"learning_rate": 6.017585113606999e-06,
|
|
"loss": 1.8031083345413208,
|
|
"step": 1198
|
|
},
|
|
{
|
|
"epoch": 1.4766009852216748,
|
|
"grad_norm": 7.580109898357858,
|
|
"learning_rate": 6.010568004432055e-06,
|
|
"loss": 1.9966365098953247,
|
|
"step": 1199
|
|
},
|
|
{
|
|
"epoch": 1.477832512315271,
|
|
"grad_norm": 13.138438168026589,
|
|
"learning_rate": 6.0035488194637645e-06,
|
|
"loss": 1.0125515460968018,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 1.479064039408867,
|
|
"grad_norm": 16.24938270382903,
|
|
"learning_rate": 5.9965275731201364e-06,
|
|
"loss": 1.1396842002868652,
|
|
"step": 1201
|
|
},
|
|
{
|
|
"epoch": 1.480295566502463,
|
|
"grad_norm": 6.579201955073294,
|
|
"learning_rate": 5.9895042798234125e-06,
|
|
"loss": 1.8030388355255127,
|
|
"step": 1202
|
|
},
|
|
{
|
|
"epoch": 1.4815270935960592,
|
|
"grad_norm": 12.865016417179568,
|
|
"learning_rate": 5.982478954000042e-06,
|
|
"loss": 1.4132026433944702,
|
|
"step": 1203
|
|
},
|
|
{
|
|
"epoch": 1.4827586206896552,
|
|
"grad_norm": 11.295614659779242,
|
|
"learning_rate": 5.975451610080643e-06,
|
|
"loss": 1.3726825714111328,
|
|
"step": 1204
|
|
},
|
|
{
|
|
"epoch": 1.4839901477832513,
|
|
"grad_norm": 10.812781562044428,
|
|
"learning_rate": 5.968422262499983e-06,
|
|
"loss": 2.3436193466186523,
|
|
"step": 1205
|
|
},
|
|
{
|
|
"epoch": 1.4852216748768474,
|
|
"grad_norm": 11.93980767439267,
|
|
"learning_rate": 5.961390925696947e-06,
|
|
"loss": 1.4617420434951782,
|
|
"step": 1206
|
|
},
|
|
{
|
|
"epoch": 1.4864532019704433,
|
|
"grad_norm": 8.752972802049372,
|
|
"learning_rate": 5.9543576141145035e-06,
|
|
"loss": 1.8050814867019653,
|
|
"step": 1207
|
|
},
|
|
{
|
|
"epoch": 1.4876847290640394,
|
|
"grad_norm": 11.595272230479853,
|
|
"learning_rate": 5.947322342199674e-06,
|
|
"loss": 1.3426543474197388,
|
|
"step": 1208
|
|
},
|
|
{
|
|
"epoch": 1.4889162561576355,
|
|
"grad_norm": 13.910327681643947,
|
|
"learning_rate": 5.940285124403517e-06,
|
|
"loss": 1.6211771965026855,
|
|
"step": 1209
|
|
},
|
|
{
|
|
"epoch": 1.4901477832512315,
|
|
"grad_norm": 10.490417163522949,
|
|
"learning_rate": 5.933245975181074e-06,
|
|
"loss": 2.695863723754883,
|
|
"step": 1210
|
|
},
|
|
{
|
|
"epoch": 1.4913793103448276,
|
|
"grad_norm": 9.128292414129945,
|
|
"learning_rate": 5.926204908991366e-06,
|
|
"loss": 1.2743788957595825,
|
|
"step": 1211
|
|
},
|
|
{
|
|
"epoch": 1.4926108374384237,
|
|
"grad_norm": 11.2632445422812,
|
|
"learning_rate": 5.919161940297346e-06,
|
|
"loss": 1.652765154838562,
|
|
"step": 1212
|
|
},
|
|
{
|
|
"epoch": 1.4938423645320196,
|
|
"grad_norm": 7.537950882850561,
|
|
"learning_rate": 5.912117083565874e-06,
|
|
"loss": 1.3720670938491821,
|
|
"step": 1213
|
|
},
|
|
{
|
|
"epoch": 1.4950738916256157,
|
|
"grad_norm": 14.216763115794095,
|
|
"learning_rate": 5.905070353267692e-06,
|
|
"loss": 1.222616195678711,
|
|
"step": 1214
|
|
},
|
|
{
|
|
"epoch": 1.4963054187192117,
|
|
"grad_norm": 7.742622309976788,
|
|
"learning_rate": 5.898021763877388e-06,
|
|
"loss": 1.4626069068908691,
|
|
"step": 1215
|
|
},
|
|
{
|
|
"epoch": 1.4975369458128078,
|
|
"grad_norm": 10.044815043339705,
|
|
"learning_rate": 5.890971329873366e-06,
|
|
"loss": 1.7813634872436523,
|
|
"step": 1216
|
|
},
|
|
{
|
|
"epoch": 1.498768472906404,
|
|
"grad_norm": 14.537107209189347,
|
|
"learning_rate": 5.883919065737827e-06,
|
|
"loss": 0.5114675760269165,
|
|
"step": 1217
|
|
},
|
|
{
|
|
"epoch": 1.5,
|
|
"grad_norm": 18.934697309871,
|
|
"learning_rate": 5.876864985956722e-06,
|
|
"loss": 1.6000962257385254,
|
|
"step": 1218
|
|
},
|
|
{
|
|
"epoch": 1.501231527093596,
|
|
"grad_norm": 33.040397060632486,
|
|
"learning_rate": 5.869809105019738e-06,
|
|
"loss": 1.5674512386322021,
|
|
"step": 1219
|
|
},
|
|
{
|
|
"epoch": 1.5024630541871922,
|
|
"grad_norm": 9.76563438047523,
|
|
"learning_rate": 5.8627514374202596e-06,
|
|
"loss": 1.7963311672210693,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 1.5036945812807883,
|
|
"grad_norm": 10.95067481959561,
|
|
"learning_rate": 5.85569199765534e-06,
|
|
"loss": 1.1649596691131592,
|
|
"step": 1221
|
|
},
|
|
{
|
|
"epoch": 1.5049261083743843,
|
|
"grad_norm": 9.927773449159055,
|
|
"learning_rate": 5.848630800225678e-06,
|
|
"loss": 1.140197992324829,
|
|
"step": 1222
|
|
},
|
|
{
|
|
"epoch": 1.5061576354679804,
|
|
"grad_norm": 8.586607717080767,
|
|
"learning_rate": 5.841567859635572e-06,
|
|
"loss": 1.865435242652893,
|
|
"step": 1223
|
|
},
|
|
{
|
|
"epoch": 1.5073891625615765,
|
|
"grad_norm": 11.43552738813054,
|
|
"learning_rate": 5.834503190392912e-06,
|
|
"loss": 1.457642912864685,
|
|
"step": 1224
|
|
},
|
|
{
|
|
"epoch": 1.5086206896551724,
|
|
"grad_norm": 9.978595721772624,
|
|
"learning_rate": 5.827436807009133e-06,
|
|
"loss": 1.3783336877822876,
|
|
"step": 1225
|
|
},
|
|
{
|
|
"epoch": 1.5098522167487685,
|
|
"grad_norm": 10.75044326200818,
|
|
"learning_rate": 5.8203687239991935e-06,
|
|
"loss": 1.939549207687378,
|
|
"step": 1226
|
|
},
|
|
{
|
|
"epoch": 1.5110837438423645,
|
|
"grad_norm": 14.588582695069839,
|
|
"learning_rate": 5.813298955881542e-06,
|
|
"loss": 1.3607597351074219,
|
|
"step": 1227
|
|
},
|
|
{
|
|
"epoch": 1.5123152709359606,
|
|
"grad_norm": 9.739548479278437,
|
|
"learning_rate": 5.806227517178089e-06,
|
|
"loss": 0.81966233253479,
|
|
"step": 1228
|
|
},
|
|
{
|
|
"epoch": 1.5135467980295565,
|
|
"grad_norm": 7.228017183846092,
|
|
"learning_rate": 5.799154422414174e-06,
|
|
"loss": 0.9481602311134338,
|
|
"step": 1229
|
|
},
|
|
{
|
|
"epoch": 1.5147783251231526,
|
|
"grad_norm": 16.162733557662186,
|
|
"learning_rate": 5.79207968611854e-06,
|
|
"loss": 1.3550889492034912,
|
|
"step": 1230
|
|
},
|
|
{
|
|
"epoch": 1.5160098522167487,
|
|
"grad_norm": 10.696500057601996,
|
|
"learning_rate": 5.785003322823307e-06,
|
|
"loss": 2.022425889968872,
|
|
"step": 1231
|
|
},
|
|
{
|
|
"epoch": 1.5172413793103448,
|
|
"grad_norm": 8.501680697642309,
|
|
"learning_rate": 5.777925347063927e-06,
|
|
"loss": 1.5649950504302979,
|
|
"step": 1232
|
|
},
|
|
{
|
|
"epoch": 1.5184729064039408,
|
|
"grad_norm": 12.185227926920462,
|
|
"learning_rate": 5.7708457733791715e-06,
|
|
"loss": 1.9720977544784546,
|
|
"step": 1233
|
|
},
|
|
{
|
|
"epoch": 1.519704433497537,
|
|
"grad_norm": 12.902985615374178,
|
|
"learning_rate": 5.763764616311089e-06,
|
|
"loss": 1.0029213428497314,
|
|
"step": 1234
|
|
},
|
|
{
|
|
"epoch": 1.520935960591133,
|
|
"grad_norm": 13.23751211435566,
|
|
"learning_rate": 5.756681890404987e-06,
|
|
"loss": 1.8926727771759033,
|
|
"step": 1235
|
|
},
|
|
{
|
|
"epoch": 1.522167487684729,
|
|
"grad_norm": 8.93687413398984,
|
|
"learning_rate": 5.749597610209392e-06,
|
|
"loss": 1.462761402130127,
|
|
"step": 1236
|
|
},
|
|
{
|
|
"epoch": 1.5233990147783252,
|
|
"grad_norm": 10.137890971821589,
|
|
"learning_rate": 5.7425117902760195e-06,
|
|
"loss": 2.1467416286468506,
|
|
"step": 1237
|
|
},
|
|
{
|
|
"epoch": 1.5246305418719213,
|
|
"grad_norm": 12.30865285718221,
|
|
"learning_rate": 5.7354244451597545e-06,
|
|
"loss": 1.191473364830017,
|
|
"step": 1238
|
|
},
|
|
{
|
|
"epoch": 1.5258620689655173,
|
|
"grad_norm": 11.884477014639941,
|
|
"learning_rate": 5.72833558941861e-06,
|
|
"loss": 0.896723210811615,
|
|
"step": 1239
|
|
},
|
|
{
|
|
"epoch": 1.5270935960591134,
|
|
"grad_norm": 12.439035862181441,
|
|
"learning_rate": 5.721245237613704e-06,
|
|
"loss": 0.8741526007652283,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 1.5283251231527095,
|
|
"grad_norm": 11.437489612490284,
|
|
"learning_rate": 5.714153404309228e-06,
|
|
"loss": 1.6330994367599487,
|
|
"step": 1241
|
|
},
|
|
{
|
|
"epoch": 1.5295566502463054,
|
|
"grad_norm": 8.493940846915361,
|
|
"learning_rate": 5.707060104072415e-06,
|
|
"loss": 2.2386982440948486,
|
|
"step": 1242
|
|
},
|
|
{
|
|
"epoch": 1.5307881773399015,
|
|
"grad_norm": 15.002139823216499,
|
|
"learning_rate": 5.6999653514735124e-06,
|
|
"loss": 1.5266145467758179,
|
|
"step": 1243
|
|
},
|
|
{
|
|
"epoch": 1.5320197044334976,
|
|
"grad_norm": 10.763593391596421,
|
|
"learning_rate": 5.6928691610857515e-06,
|
|
"loss": 1.4918262958526611,
|
|
"step": 1244
|
|
},
|
|
{
|
|
"epoch": 1.5332512315270936,
|
|
"grad_norm": 13.978563202935332,
|
|
"learning_rate": 5.685771547485312e-06,
|
|
"loss": 1.241945743560791,
|
|
"step": 1245
|
|
},
|
|
{
|
|
"epoch": 1.5344827586206895,
|
|
"grad_norm": 13.403953021065679,
|
|
"learning_rate": 5.678672525251304e-06,
|
|
"loss": 1.1569273471832275,
|
|
"step": 1246
|
|
},
|
|
{
|
|
"epoch": 1.5357142857142856,
|
|
"grad_norm": 11.182023407334606,
|
|
"learning_rate": 5.671572108965729e-06,
|
|
"loss": 1.946014404296875,
|
|
"step": 1247
|
|
},
|
|
{
|
|
"epoch": 1.5369458128078817,
|
|
"grad_norm": 11.304302205859694,
|
|
"learning_rate": 5.664470313213448e-06,
|
|
"loss": 1.8601741790771484,
|
|
"step": 1248
|
|
},
|
|
{
|
|
"epoch": 1.5381773399014778,
|
|
"grad_norm": 16.894321658591,
|
|
"learning_rate": 5.65736715258216e-06,
|
|
"loss": 1.7164549827575684,
|
|
"step": 1249
|
|
},
|
|
{
|
|
"epoch": 1.5394088669950738,
|
|
"grad_norm": 10.02548837159482,
|
|
"learning_rate": 5.650262641662367e-06,
|
|
"loss": 2.0459697246551514,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 1.54064039408867,
|
|
"grad_norm": 9.37570660013781,
|
|
"learning_rate": 5.643156795047343e-06,
|
|
"loss": 1.4485859870910645,
|
|
"step": 1251
|
|
},
|
|
{
|
|
"epoch": 1.541871921182266,
|
|
"grad_norm": 7.685396722064439,
|
|
"learning_rate": 5.6360496273331055e-06,
|
|
"loss": 1.8672525882720947,
|
|
"step": 1252
|
|
},
|
|
{
|
|
"epoch": 1.543103448275862,
|
|
"grad_norm": 10.04870984968868,
|
|
"learning_rate": 5.628941153118388e-06,
|
|
"loss": 1.4309324026107788,
|
|
"step": 1253
|
|
},
|
|
{
|
|
"epoch": 1.5443349753694582,
|
|
"grad_norm": 8.68197237847592,
|
|
"learning_rate": 5.621831387004603e-06,
|
|
"loss": 1.8784745931625366,
|
|
"step": 1254
|
|
},
|
|
{
|
|
"epoch": 1.5455665024630543,
|
|
"grad_norm": 13.277977807429252,
|
|
"learning_rate": 5.6147203435958246e-06,
|
|
"loss": 2.109992027282715,
|
|
"step": 1255
|
|
},
|
|
{
|
|
"epoch": 1.5467980295566504,
|
|
"grad_norm": 12.972460738003901,
|
|
"learning_rate": 5.607608037498742e-06,
|
|
"loss": 1.5892071723937988,
|
|
"step": 1256
|
|
},
|
|
{
|
|
"epoch": 1.5480295566502464,
|
|
"grad_norm": 13.365650986627243,
|
|
"learning_rate": 5.600494483322643e-06,
|
|
"loss": 1.3583379983901978,
|
|
"step": 1257
|
|
},
|
|
{
|
|
"epoch": 1.5492610837438425,
|
|
"grad_norm": 20.27099102357665,
|
|
"learning_rate": 5.593379695679378e-06,
|
|
"loss": 2.126896381378174,
|
|
"step": 1258
|
|
},
|
|
{
|
|
"epoch": 1.5504926108374384,
|
|
"grad_norm": 17.176572909103676,
|
|
"learning_rate": 5.586263689183332e-06,
|
|
"loss": 1.7454299926757812,
|
|
"step": 1259
|
|
},
|
|
{
|
|
"epoch": 1.5517241379310345,
|
|
"grad_norm": 13.916773869762237,
|
|
"learning_rate": 5.5791464784513905e-06,
|
|
"loss": 1.1533763408660889,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 1.5529556650246306,
|
|
"grad_norm": 7.929553367189426,
|
|
"learning_rate": 5.572028078102917e-06,
|
|
"loss": 1.4818049669265747,
|
|
"step": 1261
|
|
},
|
|
{
|
|
"epoch": 1.5541871921182266,
|
|
"grad_norm": 10.401505556673449,
|
|
"learning_rate": 5.564908502759714e-06,
|
|
"loss": 1.7103283405303955,
|
|
"step": 1262
|
|
},
|
|
{
|
|
"epoch": 1.5554187192118225,
|
|
"grad_norm": 9.47500952850124,
|
|
"learning_rate": 5.557787767046001e-06,
|
|
"loss": 2.1653401851654053,
|
|
"step": 1263
|
|
},
|
|
{
|
|
"epoch": 1.5566502463054186,
|
|
"grad_norm": 11.53902942298552,
|
|
"learning_rate": 5.55066588558838e-06,
|
|
"loss": 1.3127275705337524,
|
|
"step": 1264
|
|
},
|
|
{
|
|
"epoch": 1.5578817733990147,
|
|
"grad_norm": 16.55540616140196,
|
|
"learning_rate": 5.543542873015806e-06,
|
|
"loss": 1.0865871906280518,
|
|
"step": 1265
|
|
},
|
|
{
|
|
"epoch": 1.5591133004926108,
|
|
"grad_norm": 11.513704169835737,
|
|
"learning_rate": 5.536418743959559e-06,
|
|
"loss": 1.341281533241272,
|
|
"step": 1266
|
|
},
|
|
{
|
|
"epoch": 1.5603448275862069,
|
|
"grad_norm": 13.363897307451165,
|
|
"learning_rate": 5.529293513053207e-06,
|
|
"loss": 1.1612720489501953,
|
|
"step": 1267
|
|
},
|
|
{
|
|
"epoch": 1.561576354679803,
|
|
"grad_norm": 8.231595025537441,
|
|
"learning_rate": 5.522167194932588e-06,
|
|
"loss": 1.7491642236709595,
|
|
"step": 1268
|
|
},
|
|
{
|
|
"epoch": 1.562807881773399,
|
|
"grad_norm": 14.714195860173573,
|
|
"learning_rate": 5.515039804235772e-06,
|
|
"loss": 1.8244414329528809,
|
|
"step": 1269
|
|
},
|
|
{
|
|
"epoch": 1.564039408866995,
|
|
"grad_norm": 14.369418745397832,
|
|
"learning_rate": 5.50791135560303e-06,
|
|
"loss": 1.6449997425079346,
|
|
"step": 1270
|
|
},
|
|
{
|
|
"epoch": 1.5652709359605912,
|
|
"grad_norm": 10.791840038500066,
|
|
"learning_rate": 5.5007818636768055e-06,
|
|
"loss": 1.258559226989746,
|
|
"step": 1271
|
|
},
|
|
{
|
|
"epoch": 1.5665024630541873,
|
|
"grad_norm": 12.265469895779276,
|
|
"learning_rate": 5.493651343101686e-06,
|
|
"loss": 2.075775146484375,
|
|
"step": 1272
|
|
},
|
|
{
|
|
"epoch": 1.5677339901477834,
|
|
"grad_norm": 33.663491606092755,
|
|
"learning_rate": 5.486519808524374e-06,
|
|
"loss": 1.8196138143539429,
|
|
"step": 1273
|
|
},
|
|
{
|
|
"epoch": 1.5689655172413794,
|
|
"grad_norm": 10.504622195873791,
|
|
"learning_rate": 5.479387274593653e-06,
|
|
"loss": 1.129037618637085,
|
|
"step": 1274
|
|
},
|
|
{
|
|
"epoch": 1.5701970443349755,
|
|
"grad_norm": 10.887519946570082,
|
|
"learning_rate": 5.472253755960358e-06,
|
|
"loss": 1.7367748022079468,
|
|
"step": 1275
|
|
},
|
|
{
|
|
"epoch": 1.5714285714285714,
|
|
"grad_norm": 9.127598313619417,
|
|
"learning_rate": 5.4651192672773475e-06,
|
|
"loss": 1.9274532794952393,
|
|
"step": 1276
|
|
},
|
|
{
|
|
"epoch": 1.5726600985221675,
|
|
"grad_norm": 17.490821839529264,
|
|
"learning_rate": 5.457983823199475e-06,
|
|
"loss": 1.4018654823303223,
|
|
"step": 1277
|
|
},
|
|
{
|
|
"epoch": 1.5738916256157636,
|
|
"grad_norm": 17.899672160499332,
|
|
"learning_rate": 5.450847438383555e-06,
|
|
"loss": 1.383131504058838,
|
|
"step": 1278
|
|
},
|
|
{
|
|
"epoch": 1.5751231527093597,
|
|
"grad_norm": 6.595048027752494,
|
|
"learning_rate": 5.443710127488331e-06,
|
|
"loss": 1.277740716934204,
|
|
"step": 1279
|
|
},
|
|
{
|
|
"epoch": 1.5763546798029555,
|
|
"grad_norm": 9.304406142462632,
|
|
"learning_rate": 5.4365719051744556e-06,
|
|
"loss": 1.507627010345459,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 1.5775862068965516,
|
|
"grad_norm": 13.383687869982538,
|
|
"learning_rate": 5.429432786104446e-06,
|
|
"loss": 1.609743595123291,
|
|
"step": 1281
|
|
},
|
|
{
|
|
"epoch": 1.5788177339901477,
|
|
"grad_norm": 14.966009265010456,
|
|
"learning_rate": 5.422292784942666e-06,
|
|
"loss": 3.7705276012420654,
|
|
"step": 1282
|
|
},
|
|
{
|
|
"epoch": 1.5800492610837438,
|
|
"grad_norm": 8.997880163576188,
|
|
"learning_rate": 5.415151916355292e-06,
|
|
"loss": 1.5003160238265991,
|
|
"step": 1283
|
|
},
|
|
{
|
|
"epoch": 1.5812807881773399,
|
|
"grad_norm": 9.476478190888859,
|
|
"learning_rate": 5.408010195010278e-06,
|
|
"loss": 2.2466366291046143,
|
|
"step": 1284
|
|
},
|
|
{
|
|
"epoch": 1.582512315270936,
|
|
"grad_norm": 7.465134227448914,
|
|
"learning_rate": 5.400867635577335e-06,
|
|
"loss": 1.0722277164459229,
|
|
"step": 1285
|
|
},
|
|
{
|
|
"epoch": 1.583743842364532,
|
|
"grad_norm": 13.942249242079209,
|
|
"learning_rate": 5.3937242527278885e-06,
|
|
"loss": 1.3113644123077393,
|
|
"step": 1286
|
|
},
|
|
{
|
|
"epoch": 1.5849753694581281,
|
|
"grad_norm": 14.224147707467683,
|
|
"learning_rate": 5.3865800611350634e-06,
|
|
"loss": 1.4688694477081299,
|
|
"step": 1287
|
|
},
|
|
{
|
|
"epoch": 1.5862068965517242,
|
|
"grad_norm": 9.648975936769988,
|
|
"learning_rate": 5.379435075473641e-06,
|
|
"loss": 1.3646764755249023,
|
|
"step": 1288
|
|
},
|
|
{
|
|
"epoch": 1.5874384236453203,
|
|
"grad_norm": 8.753285038565833,
|
|
"learning_rate": 5.372289310420032e-06,
|
|
"loss": 1.6248177289962769,
|
|
"step": 1289
|
|
},
|
|
{
|
|
"epoch": 1.5886699507389164,
|
|
"grad_norm": 9.773114583134893,
|
|
"learning_rate": 5.365142780652255e-06,
|
|
"loss": 1.5507471561431885,
|
|
"step": 1290
|
|
},
|
|
{
|
|
"epoch": 1.5899014778325125,
|
|
"grad_norm": 8.752822975110762,
|
|
"learning_rate": 5.35799550084989e-06,
|
|
"loss": 1.2866086959838867,
|
|
"step": 1291
|
|
},
|
|
{
|
|
"epoch": 1.5911330049261085,
|
|
"grad_norm": 10.021050170312028,
|
|
"learning_rate": 5.350847485694067e-06,
|
|
"loss": 2.336108684539795,
|
|
"step": 1292
|
|
},
|
|
{
|
|
"epoch": 1.5923645320197044,
|
|
"grad_norm": 11.648640054355637,
|
|
"learning_rate": 5.343698749867421e-06,
|
|
"loss": 1.6604368686676025,
|
|
"step": 1293
|
|
},
|
|
{
|
|
"epoch": 1.5935960591133005,
|
|
"grad_norm": 16.28378480699955,
|
|
"learning_rate": 5.336549308054066e-06,
|
|
"loss": 1.2169203758239746,
|
|
"step": 1294
|
|
},
|
|
{
|
|
"epoch": 1.5948275862068966,
|
|
"grad_norm": 14.069009000417143,
|
|
"learning_rate": 5.329399174939572e-06,
|
|
"loss": 1.546027421951294,
|
|
"step": 1295
|
|
},
|
|
{
|
|
"epoch": 1.5960591133004927,
|
|
"grad_norm": 9.646944240372145,
|
|
"learning_rate": 5.3222483652109235e-06,
|
|
"loss": 1.1372979879379272,
|
|
"step": 1296
|
|
},
|
|
{
|
|
"epoch": 1.5972906403940885,
|
|
"grad_norm": 10.548510904543294,
|
|
"learning_rate": 5.315096893556497e-06,
|
|
"loss": 1.3435921669006348,
|
|
"step": 1297
|
|
},
|
|
{
|
|
"epoch": 1.5985221674876846,
|
|
"grad_norm": 14.79008878560828,
|
|
"learning_rate": 5.307944774666029e-06,
|
|
"loss": 1.522647500038147,
|
|
"step": 1298
|
|
},
|
|
{
|
|
"epoch": 1.5997536945812807,
|
|
"grad_norm": 17.912683434114346,
|
|
"learning_rate": 5.300792023230587e-06,
|
|
"loss": 2.0829434394836426,
|
|
"step": 1299
|
|
},
|
|
{
|
|
"epoch": 1.6009852216748768,
|
|
"grad_norm": 8.420566897576393,
|
|
"learning_rate": 5.2936386539425325e-06,
|
|
"loss": 1.761828064918518,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 1.6022167487684729,
|
|
"grad_norm": 14.83308627903251,
|
|
"learning_rate": 5.2864846814955e-06,
|
|
"loss": 2.4108588695526123,
|
|
"step": 1301
|
|
},
|
|
{
|
|
"epoch": 1.603448275862069,
|
|
"grad_norm": 7.959651684795871,
|
|
"learning_rate": 5.279330120584365e-06,
|
|
"loss": 1.626701831817627,
|
|
"step": 1302
|
|
},
|
|
{
|
|
"epoch": 1.604679802955665,
|
|
"grad_norm": 15.705970904875606,
|
|
"learning_rate": 5.272174985905207e-06,
|
|
"loss": 1.2424887418746948,
|
|
"step": 1303
|
|
},
|
|
{
|
|
"epoch": 1.6059113300492611,
|
|
"grad_norm": 12.239359710615943,
|
|
"learning_rate": 5.2650192921552845e-06,
|
|
"loss": 2.149031639099121,
|
|
"step": 1304
|
|
},
|
|
{
|
|
"epoch": 1.6071428571428572,
|
|
"grad_norm": 10.231856507403213,
|
|
"learning_rate": 5.257863054033012e-06,
|
|
"loss": 2.6947379112243652,
|
|
"step": 1305
|
|
},
|
|
{
|
|
"epoch": 1.6083743842364533,
|
|
"grad_norm": 18.838018326977505,
|
|
"learning_rate": 5.25070628623791e-06,
|
|
"loss": 1.665069818496704,
|
|
"step": 1306
|
|
},
|
|
{
|
|
"epoch": 1.6096059113300494,
|
|
"grad_norm": 14.325294673284358,
|
|
"learning_rate": 5.243549003470599e-06,
|
|
"loss": 1.3887734413146973,
|
|
"step": 1307
|
|
},
|
|
{
|
|
"epoch": 1.6108374384236455,
|
|
"grad_norm": 11.840772011671689,
|
|
"learning_rate": 5.236391220432745e-06,
|
|
"loss": 1.340559720993042,
|
|
"step": 1308
|
|
},
|
|
{
|
|
"epoch": 1.6120689655172413,
|
|
"grad_norm": 10.400173398296557,
|
|
"learning_rate": 5.229232951827054e-06,
|
|
"loss": 1.1291146278381348,
|
|
"step": 1309
|
|
},
|
|
{
|
|
"epoch": 1.6133004926108374,
|
|
"grad_norm": 11.008129364503455,
|
|
"learning_rate": 5.222074212357221e-06,
|
|
"loss": 1.8375647068023682,
|
|
"step": 1310
|
|
},
|
|
{
|
|
"epoch": 1.6145320197044335,
|
|
"grad_norm": 26.174008264121436,
|
|
"learning_rate": 5.2149150167279106e-06,
|
|
"loss": 1.3299870491027832,
|
|
"step": 1311
|
|
},
|
|
{
|
|
"epoch": 1.6157635467980296,
|
|
"grad_norm": 9.874671943961642,
|
|
"learning_rate": 5.2077553796447254e-06,
|
|
"loss": 1.1574440002441406,
|
|
"step": 1312
|
|
},
|
|
{
|
|
"epoch": 1.6169950738916257,
|
|
"grad_norm": 9.304756709434216,
|
|
"learning_rate": 5.200595315814174e-06,
|
|
"loss": 1.8118785619735718,
|
|
"step": 1313
|
|
},
|
|
{
|
|
"epoch": 1.6182266009852215,
|
|
"grad_norm": 10.54430610217864,
|
|
"learning_rate": 5.19343483994364e-06,
|
|
"loss": 1.333923101425171,
|
|
"step": 1314
|
|
},
|
|
{
|
|
"epoch": 1.6194581280788176,
|
|
"grad_norm": 8.365290613104223,
|
|
"learning_rate": 5.18627396674136e-06,
|
|
"loss": 1.2107478380203247,
|
|
"step": 1315
|
|
},
|
|
{
|
|
"epoch": 1.6206896551724137,
|
|
"grad_norm": 11.934365489822259,
|
|
"learning_rate": 5.1791127109163734e-06,
|
|
"loss": 1.662817120552063,
|
|
"step": 1316
|
|
},
|
|
{
|
|
"epoch": 1.6219211822660098,
|
|
"grad_norm": 11.66068657995672,
|
|
"learning_rate": 5.17195108717852e-06,
|
|
"loss": 1.7790195941925049,
|
|
"step": 1317
|
|
},
|
|
{
|
|
"epoch": 1.6231527093596059,
|
|
"grad_norm": 15.883414066148024,
|
|
"learning_rate": 5.164789110238387e-06,
|
|
"loss": 1.5893058776855469,
|
|
"step": 1318
|
|
},
|
|
{
|
|
"epoch": 1.624384236453202,
|
|
"grad_norm": 9.631844787083402,
|
|
"learning_rate": 5.15762679480729e-06,
|
|
"loss": 1.256395936012268,
|
|
"step": 1319
|
|
},
|
|
{
|
|
"epoch": 1.625615763546798,
|
|
"grad_norm": 18.80096398191795,
|
|
"learning_rate": 5.150464155597239e-06,
|
|
"loss": 1.3061628341674805,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 1.6268472906403941,
|
|
"grad_norm": 8.93680164244121,
|
|
"learning_rate": 5.143301207320909e-06,
|
|
"loss": 1.4399319887161255,
|
|
"step": 1321
|
|
},
|
|
{
|
|
"epoch": 1.6280788177339902,
|
|
"grad_norm": 13.559338660465917,
|
|
"learning_rate": 5.136137964691609e-06,
|
|
"loss": 1.2071207761764526,
|
|
"step": 1322
|
|
},
|
|
{
|
|
"epoch": 1.6293103448275863,
|
|
"grad_norm": 15.329093630080337,
|
|
"learning_rate": 5.128974442423254e-06,
|
|
"loss": 2.2784008979797363,
|
|
"step": 1323
|
|
},
|
|
{
|
|
"epoch": 1.6305418719211824,
|
|
"grad_norm": 10.677223802578135,
|
|
"learning_rate": 5.121810655230336e-06,
|
|
"loss": 1.3703962564468384,
|
|
"step": 1324
|
|
},
|
|
{
|
|
"epoch": 1.6317733990147785,
|
|
"grad_norm": 7.672085033643185,
|
|
"learning_rate": 5.114646617827884e-06,
|
|
"loss": 0.6955282688140869,
|
|
"step": 1325
|
|
},
|
|
{
|
|
"epoch": 1.6330049261083743,
|
|
"grad_norm": 9.372418453872616,
|
|
"learning_rate": 5.107482344931448e-06,
|
|
"loss": 1.5774227380752563,
|
|
"step": 1326
|
|
},
|
|
{
|
|
"epoch": 1.6342364532019704,
|
|
"grad_norm": 7.569882170382433,
|
|
"learning_rate": 5.100317851257057e-06,
|
|
"loss": 1.6811349391937256,
|
|
"step": 1327
|
|
},
|
|
{
|
|
"epoch": 1.6354679802955665,
|
|
"grad_norm": 13.234466243138659,
|
|
"learning_rate": 5.093153151521196e-06,
|
|
"loss": 1.563596487045288,
|
|
"step": 1328
|
|
},
|
|
{
|
|
"epoch": 1.6366995073891626,
|
|
"grad_norm": 13.317086470459271,
|
|
"learning_rate": 5.085988260440776e-06,
|
|
"loss": 1.44309401512146,
|
|
"step": 1329
|
|
},
|
|
{
|
|
"epoch": 1.6379310344827587,
|
|
"grad_norm": 12.614583983426193,
|
|
"learning_rate": 5.0788231927330924e-06,
|
|
"loss": 1.5392205715179443,
|
|
"step": 1330
|
|
},
|
|
{
|
|
"epoch": 1.6391625615763545,
|
|
"grad_norm": 19.688183928504156,
|
|
"learning_rate": 5.0716579631158124e-06,
|
|
"loss": 0.9557719826698303,
|
|
"step": 1331
|
|
},
|
|
{
|
|
"epoch": 1.6403940886699506,
|
|
"grad_norm": 12.748000945416605,
|
|
"learning_rate": 5.064492586306931e-06,
|
|
"loss": 1.1032493114471436,
|
|
"step": 1332
|
|
},
|
|
{
|
|
"epoch": 1.6416256157635467,
|
|
"grad_norm": 14.590229259835747,
|
|
"learning_rate": 5.057327077024745e-06,
|
|
"loss": 1.4907091856002808,
|
|
"step": 1333
|
|
},
|
|
{
|
|
"epoch": 1.6428571428571428,
|
|
"grad_norm": 13.569513298786392,
|
|
"learning_rate": 5.050161449987828e-06,
|
|
"loss": 1.4919164180755615,
|
|
"step": 1334
|
|
},
|
|
{
|
|
"epoch": 1.6440886699507389,
|
|
"grad_norm": 17.53788627610522,
|
|
"learning_rate": 5.0429957199149905e-06,
|
|
"loss": 2.177396297454834,
|
|
"step": 1335
|
|
},
|
|
{
|
|
"epoch": 1.645320197044335,
|
|
"grad_norm": 9.011039030303097,
|
|
"learning_rate": 5.035829901525258e-06,
|
|
"loss": 1.2386332750320435,
|
|
"step": 1336
|
|
},
|
|
{
|
|
"epoch": 1.646551724137931,
|
|
"grad_norm": 7.326320563707851,
|
|
"learning_rate": 5.028664009537835e-06,
|
|
"loss": 1.2984986305236816,
|
|
"step": 1337
|
|
},
|
|
{
|
|
"epoch": 1.6477832512315271,
|
|
"grad_norm": 8.373461994458872,
|
|
"learning_rate": 5.021498058672076e-06,
|
|
"loss": 1.1399617195129395,
|
|
"step": 1338
|
|
},
|
|
{
|
|
"epoch": 1.6490147783251232,
|
|
"grad_norm": 7.295316739226097,
|
|
"learning_rate": 5.014332063647462e-06,
|
|
"loss": 1.9816789627075195,
|
|
"step": 1339
|
|
},
|
|
{
|
|
"epoch": 1.6502463054187193,
|
|
"grad_norm": 7.86464342129843,
|
|
"learning_rate": 5.007166039183561e-06,
|
|
"loss": 1.4210541248321533,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 1.6514778325123154,
|
|
"grad_norm": 12.713637168049194,
|
|
"learning_rate": 5e-06,
|
|
"loss": 1.5061390399932861,
|
|
"step": 1341
|
|
},
|
|
{
|
|
"epoch": 1.6527093596059115,
|
|
"grad_norm": 8.899156333262312,
|
|
"learning_rate": 4.99283396081644e-06,
|
|
"loss": 1.4701118469238281,
|
|
"step": 1342
|
|
},
|
|
{
|
|
"epoch": 1.6539408866995073,
|
|
"grad_norm": 10.54571567541005,
|
|
"learning_rate": 4.985667936352538e-06,
|
|
"loss": 1.4879779815673828,
|
|
"step": 1343
|
|
},
|
|
{
|
|
"epoch": 1.6551724137931034,
|
|
"grad_norm": 10.432279538827562,
|
|
"learning_rate": 4.978501941327926e-06,
|
|
"loss": 1.51373291015625,
|
|
"step": 1344
|
|
},
|
|
{
|
|
"epoch": 1.6564039408866995,
|
|
"grad_norm": 7.981064947021898,
|
|
"learning_rate": 4.971335990462168e-06,
|
|
"loss": 1.5439019203186035,
|
|
"step": 1345
|
|
},
|
|
{
|
|
"epoch": 1.6576354679802956,
|
|
"grad_norm": 14.863181962691362,
|
|
"learning_rate": 4.964170098474744e-06,
|
|
"loss": 1.7145721912384033,
|
|
"step": 1346
|
|
},
|
|
{
|
|
"epoch": 1.6588669950738915,
|
|
"grad_norm": 7.816226303611453,
|
|
"learning_rate": 4.95700428008501e-06,
|
|
"loss": 1.6367833614349365,
|
|
"step": 1347
|
|
},
|
|
{
|
|
"epoch": 1.6600985221674875,
|
|
"grad_norm": 12.087333147554537,
|
|
"learning_rate": 4.949838550012172e-06,
|
|
"loss": 1.4300103187561035,
|
|
"step": 1348
|
|
},
|
|
{
|
|
"epoch": 1.6613300492610836,
|
|
"grad_norm": 6.881924405292677,
|
|
"learning_rate": 4.942672922975255e-06,
|
|
"loss": 2.0569915771484375,
|
|
"step": 1349
|
|
},
|
|
{
|
|
"epoch": 1.6625615763546797,
|
|
"grad_norm": 15.296469591183284,
|
|
"learning_rate": 4.935507413693071e-06,
|
|
"loss": 1.1028980016708374,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 1.6637931034482758,
|
|
"grad_norm": 9.201861102909985,
|
|
"learning_rate": 4.928342036884189e-06,
|
|
"loss": 1.6323003768920898,
|
|
"step": 1351
|
|
},
|
|
{
|
|
"epoch": 1.6650246305418719,
|
|
"grad_norm": 10.996157407203105,
|
|
"learning_rate": 4.921176807266909e-06,
|
|
"loss": 1.5050472021102905,
|
|
"step": 1352
|
|
},
|
|
{
|
|
"epoch": 1.666256157635468,
|
|
"grad_norm": 17.127722044101333,
|
|
"learning_rate": 4.914011739559225e-06,
|
|
"loss": 1.3893849849700928,
|
|
"step": 1353
|
|
},
|
|
{
|
|
"epoch": 1.667487684729064,
|
|
"grad_norm": 13.548169676262727,
|
|
"learning_rate": 4.906846848478803e-06,
|
|
"loss": 1.1478514671325684,
|
|
"step": 1354
|
|
},
|
|
{
|
|
"epoch": 1.6687192118226601,
|
|
"grad_norm": 16.337726396970115,
|
|
"learning_rate": 4.899682148742944e-06,
|
|
"loss": 1.2397665977478027,
|
|
"step": 1355
|
|
},
|
|
{
|
|
"epoch": 1.6699507389162562,
|
|
"grad_norm": 8.122019629920894,
|
|
"learning_rate": 4.892517655068555e-06,
|
|
"loss": 1.1658974885940552,
|
|
"step": 1356
|
|
},
|
|
{
|
|
"epoch": 1.6711822660098523,
|
|
"grad_norm": 10.105771734426996,
|
|
"learning_rate": 4.8853533821721175e-06,
|
|
"loss": 1.7130283117294312,
|
|
"step": 1357
|
|
},
|
|
{
|
|
"epoch": 1.6724137931034484,
|
|
"grad_norm": 10.758386009234124,
|
|
"learning_rate": 4.878189344769666e-06,
|
|
"loss": 0.9516315460205078,
|
|
"step": 1358
|
|
},
|
|
{
|
|
"epoch": 1.6736453201970445,
|
|
"grad_norm": 11.103808898671073,
|
|
"learning_rate": 4.871025557576747e-06,
|
|
"loss": 1.143174171447754,
|
|
"step": 1359
|
|
},
|
|
{
|
|
"epoch": 1.6748768472906403,
|
|
"grad_norm": 11.525961008953772,
|
|
"learning_rate": 4.863862035308392e-06,
|
|
"loss": 1.7117831707000732,
|
|
"step": 1360
|
|
},
|
|
{
|
|
"epoch": 1.6761083743842364,
|
|
"grad_norm": 17.64687941795743,
|
|
"learning_rate": 4.8566987926790946e-06,
|
|
"loss": 2.507868528366089,
|
|
"step": 1361
|
|
},
|
|
{
|
|
"epoch": 1.6773399014778325,
|
|
"grad_norm": 9.376137745201675,
|
|
"learning_rate": 4.849535844402762e-06,
|
|
"loss": 1.476400375366211,
|
|
"step": 1362
|
|
},
|
|
{
|
|
"epoch": 1.6785714285714286,
|
|
"grad_norm": 8.721089378493017,
|
|
"learning_rate": 4.8423732051927115e-06,
|
|
"loss": 1.3162943124771118,
|
|
"step": 1363
|
|
},
|
|
{
|
|
"epoch": 1.6798029556650245,
|
|
"grad_norm": 10.422911150427735,
|
|
"learning_rate": 4.835210889761614e-06,
|
|
"loss": 2.2291440963745117,
|
|
"step": 1364
|
|
},
|
|
{
|
|
"epoch": 1.6810344827586206,
|
|
"grad_norm": 9.602624562609396,
|
|
"learning_rate": 4.82804891282148e-06,
|
|
"loss": 1.2231886386871338,
|
|
"step": 1365
|
|
},
|
|
{
|
|
"epoch": 1.6822660098522166,
|
|
"grad_norm": 14.076238439157445,
|
|
"learning_rate": 4.820887289083629e-06,
|
|
"loss": 1.3799304962158203,
|
|
"step": 1366
|
|
},
|
|
{
|
|
"epoch": 1.6834975369458127,
|
|
"grad_norm": 15.54796648321669,
|
|
"learning_rate": 4.813726033258643e-06,
|
|
"loss": 1.856811761856079,
|
|
"step": 1367
|
|
},
|
|
{
|
|
"epoch": 1.6847290640394088,
|
|
"grad_norm": 9.64062645814171,
|
|
"learning_rate": 4.80656516005636e-06,
|
|
"loss": 1.5948967933654785,
|
|
"step": 1368
|
|
},
|
|
{
|
|
"epoch": 1.685960591133005,
|
|
"grad_norm": 13.962004352631022,
|
|
"learning_rate": 4.799404684185828e-06,
|
|
"loss": 1.5035887956619263,
|
|
"step": 1369
|
|
},
|
|
{
|
|
"epoch": 1.687192118226601,
|
|
"grad_norm": 11.27741103317867,
|
|
"learning_rate": 4.792244620355275e-06,
|
|
"loss": 1.4715675115585327,
|
|
"step": 1370
|
|
},
|
|
{
|
|
"epoch": 1.688423645320197,
|
|
"grad_norm": 15.373869655729267,
|
|
"learning_rate": 4.78508498327209e-06,
|
|
"loss": 1.393894076347351,
|
|
"step": 1371
|
|
},
|
|
{
|
|
"epoch": 1.6896551724137931,
|
|
"grad_norm": 12.537169523242483,
|
|
"learning_rate": 4.777925787642781e-06,
|
|
"loss": 1.8458061218261719,
|
|
"step": 1372
|
|
},
|
|
{
|
|
"epoch": 1.6908866995073892,
|
|
"grad_norm": 12.62635000347042,
|
|
"learning_rate": 4.770767048172948e-06,
|
|
"loss": 1.0604429244995117,
|
|
"step": 1373
|
|
},
|
|
{
|
|
"epoch": 1.6921182266009853,
|
|
"grad_norm": 10.74648464318841,
|
|
"learning_rate": 4.7636087795672565e-06,
|
|
"loss": 1.3261964321136475,
|
|
"step": 1374
|
|
},
|
|
{
|
|
"epoch": 1.6933497536945814,
|
|
"grad_norm": 9.576848082824501,
|
|
"learning_rate": 4.756450996529403e-06,
|
|
"loss": 1.6243900060653687,
|
|
"step": 1375
|
|
},
|
|
{
|
|
"epoch": 1.6945812807881775,
|
|
"grad_norm": 13.575969601291865,
|
|
"learning_rate": 4.749293713762091e-06,
|
|
"loss": 1.8087639808654785,
|
|
"step": 1376
|
|
},
|
|
{
|
|
"epoch": 1.6958128078817734,
|
|
"grad_norm": 8.48685992922433,
|
|
"learning_rate": 4.742136945966991e-06,
|
|
"loss": 1.9180892705917358,
|
|
"step": 1377
|
|
},
|
|
{
|
|
"epoch": 1.6970443349753694,
|
|
"grad_norm": 12.706829097920151,
|
|
"learning_rate": 4.734980707844716e-06,
|
|
"loss": 1.6797364950180054,
|
|
"step": 1378
|
|
},
|
|
{
|
|
"epoch": 1.6982758620689655,
|
|
"grad_norm": 10.281614379219002,
|
|
"learning_rate": 4.727825014094795e-06,
|
|
"loss": 0.9649052023887634,
|
|
"step": 1379
|
|
},
|
|
{
|
|
"epoch": 1.6995073891625616,
|
|
"grad_norm": 7.785652444986331,
|
|
"learning_rate": 4.720669879415637e-06,
|
|
"loss": 1.4185916185379028,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 1.7007389162561575,
|
|
"grad_norm": 10.73836489858494,
|
|
"learning_rate": 4.713515318504501e-06,
|
|
"loss": 1.8681238889694214,
|
|
"step": 1381
|
|
},
|
|
{
|
|
"epoch": 1.7019704433497536,
|
|
"grad_norm": 9.950804244952993,
|
|
"learning_rate": 4.706361346057468e-06,
|
|
"loss": 1.2830915451049805,
|
|
"step": 1382
|
|
},
|
|
{
|
|
"epoch": 1.7032019704433496,
|
|
"grad_norm": 18.988866497939586,
|
|
"learning_rate": 4.699207976769416e-06,
|
|
"loss": 1.0888878107070923,
|
|
"step": 1383
|
|
},
|
|
{
|
|
"epoch": 1.7044334975369457,
|
|
"grad_norm": 12.689992799691533,
|
|
"learning_rate": 4.692055225333972e-06,
|
|
"loss": 1.4439440965652466,
|
|
"step": 1384
|
|
},
|
|
{
|
|
"epoch": 1.7056650246305418,
|
|
"grad_norm": 7.183191439849756,
|
|
"learning_rate": 4.684903106443504e-06,
|
|
"loss": 1.0282858610153198,
|
|
"step": 1385
|
|
},
|
|
{
|
|
"epoch": 1.706896551724138,
|
|
"grad_norm": 13.261845343202891,
|
|
"learning_rate": 4.677751634789078e-06,
|
|
"loss": 1.6842533349990845,
|
|
"step": 1386
|
|
},
|
|
{
|
|
"epoch": 1.708128078817734,
|
|
"grad_norm": 14.612290761713947,
|
|
"learning_rate": 4.670600825060429e-06,
|
|
"loss": 1.5473763942718506,
|
|
"step": 1387
|
|
},
|
|
{
|
|
"epoch": 1.70935960591133,
|
|
"grad_norm": 19.73106165634469,
|
|
"learning_rate": 4.663450691945936e-06,
|
|
"loss": 1.839112401008606,
|
|
"step": 1388
|
|
},
|
|
{
|
|
"epoch": 1.7105911330049262,
|
|
"grad_norm": 10.917539579247505,
|
|
"learning_rate": 4.656301250132581e-06,
|
|
"loss": 1.5349544286727905,
|
|
"step": 1389
|
|
},
|
|
{
|
|
"epoch": 1.7118226600985222,
|
|
"grad_norm": 11.132766984186494,
|
|
"learning_rate": 4.649152514305934e-06,
|
|
"loss": 1.5788905620574951,
|
|
"step": 1390
|
|
},
|
|
{
|
|
"epoch": 1.7130541871921183,
|
|
"grad_norm": 10.21681078103426,
|
|
"learning_rate": 4.6420044991501104e-06,
|
|
"loss": 1.4541325569152832,
|
|
"step": 1391
|
|
},
|
|
{
|
|
"epoch": 1.7142857142857144,
|
|
"grad_norm": 9.227689699191664,
|
|
"learning_rate": 4.634857219347746e-06,
|
|
"loss": 1.8231902122497559,
|
|
"step": 1392
|
|
},
|
|
{
|
|
"epoch": 1.7155172413793105,
|
|
"grad_norm": 10.500866364265818,
|
|
"learning_rate": 4.627710689579968e-06,
|
|
"loss": 1.6302368640899658,
|
|
"step": 1393
|
|
},
|
|
{
|
|
"epoch": 1.7167487684729064,
|
|
"grad_norm": 17.60594188273056,
|
|
"learning_rate": 4.62056492452636e-06,
|
|
"loss": 1.497374415397644,
|
|
"step": 1394
|
|
},
|
|
{
|
|
"epoch": 1.7179802955665024,
|
|
"grad_norm": 15.287585545597818,
|
|
"learning_rate": 4.613419938864937e-06,
|
|
"loss": 1.1390448808670044,
|
|
"step": 1395
|
|
},
|
|
{
|
|
"epoch": 1.7192118226600985,
|
|
"grad_norm": 10.328419466218456,
|
|
"learning_rate": 4.606275747272112e-06,
|
|
"loss": 1.4320652484893799,
|
|
"step": 1396
|
|
},
|
|
{
|
|
"epoch": 1.7204433497536946,
|
|
"grad_norm": 9.176084187845012,
|
|
"learning_rate": 4.599132364422666e-06,
|
|
"loss": 1.2651784420013428,
|
|
"step": 1397
|
|
},
|
|
{
|
|
"epoch": 1.7216748768472905,
|
|
"grad_norm": 15.836729193949362,
|
|
"learning_rate": 4.5919898049897225e-06,
|
|
"loss": 1.719766616821289,
|
|
"step": 1398
|
|
},
|
|
{
|
|
"epoch": 1.7229064039408866,
|
|
"grad_norm": 12.937422715545681,
|
|
"learning_rate": 4.58484808364471e-06,
|
|
"loss": 1.707594394683838,
|
|
"step": 1399
|
|
},
|
|
{
|
|
"epoch": 1.7241379310344827,
|
|
"grad_norm": 14.730027238842638,
|
|
"learning_rate": 4.5777072150573355e-06,
|
|
"loss": 1.4608323574066162,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 1.7253694581280787,
|
|
"grad_norm": 9.894706364799527,
|
|
"learning_rate": 4.570567213895555e-06,
|
|
"loss": 1.5542428493499756,
|
|
"step": 1401
|
|
},
|
|
{
|
|
"epoch": 1.7266009852216748,
|
|
"grad_norm": 10.251938635324704,
|
|
"learning_rate": 4.563428094825546e-06,
|
|
"loss": 1.2282288074493408,
|
|
"step": 1402
|
|
},
|
|
{
|
|
"epoch": 1.727832512315271,
|
|
"grad_norm": 12.91095594163412,
|
|
"learning_rate": 4.556289872511669e-06,
|
|
"loss": 1.1870850324630737,
|
|
"step": 1403
|
|
},
|
|
{
|
|
"epoch": 1.729064039408867,
|
|
"grad_norm": 19.656749282746095,
|
|
"learning_rate": 4.549152561616445e-06,
|
|
"loss": 1.8125461339950562,
|
|
"step": 1404
|
|
},
|
|
{
|
|
"epoch": 1.730295566502463,
|
|
"grad_norm": 13.055834351152246,
|
|
"learning_rate": 4.542016176800527e-06,
|
|
"loss": 1.4419995546340942,
|
|
"step": 1405
|
|
},
|
|
{
|
|
"epoch": 1.7315270935960592,
|
|
"grad_norm": 12.427293973832745,
|
|
"learning_rate": 4.534880732722653e-06,
|
|
"loss": 1.8834543228149414,
|
|
"step": 1406
|
|
},
|
|
{
|
|
"epoch": 1.7327586206896552,
|
|
"grad_norm": 9.308568400780414,
|
|
"learning_rate": 4.527746244039644e-06,
|
|
"loss": 1.120203971862793,
|
|
"step": 1407
|
|
},
|
|
{
|
|
"epoch": 1.7339901477832513,
|
|
"grad_norm": 10.965136861668267,
|
|
"learning_rate": 4.5206127254063495e-06,
|
|
"loss": 0.9131630659103394,
|
|
"step": 1408
|
|
},
|
|
{
|
|
"epoch": 1.7352216748768474,
|
|
"grad_norm": 18.40693337146411,
|
|
"learning_rate": 4.513480191475627e-06,
|
|
"loss": 1.86919367313385,
|
|
"step": 1409
|
|
},
|
|
{
|
|
"epoch": 1.7364532019704435,
|
|
"grad_norm": 16.72423206220796,
|
|
"learning_rate": 4.506348656898316e-06,
|
|
"loss": 1.6573272943496704,
|
|
"step": 1410
|
|
},
|
|
{
|
|
"epoch": 1.7376847290640394,
|
|
"grad_norm": 12.29145112798753,
|
|
"learning_rate": 4.499218136323197e-06,
|
|
"loss": 1.2864340543746948,
|
|
"step": 1411
|
|
},
|
|
{
|
|
"epoch": 1.7389162561576355,
|
|
"grad_norm": 9.205794418080544,
|
|
"learning_rate": 4.492088644396972e-06,
|
|
"loss": 1.5519993305206299,
|
|
"step": 1412
|
|
},
|
|
{
|
|
"epoch": 1.7401477832512315,
|
|
"grad_norm": 10.304423144578244,
|
|
"learning_rate": 4.4849601957642295e-06,
|
|
"loss": 1.7556722164154053,
|
|
"step": 1413
|
|
},
|
|
{
|
|
"epoch": 1.7413793103448276,
|
|
"grad_norm": 12.170127229505125,
|
|
"learning_rate": 4.477832805067412e-06,
|
|
"loss": 1.6349589824676514,
|
|
"step": 1414
|
|
},
|
|
{
|
|
"epoch": 1.7426108374384235,
|
|
"grad_norm": 18.04544459439354,
|
|
"learning_rate": 4.470706486946797e-06,
|
|
"loss": 1.3583035469055176,
|
|
"step": 1415
|
|
},
|
|
{
|
|
"epoch": 1.7438423645320196,
|
|
"grad_norm": 16.035788014412844,
|
|
"learning_rate": 4.463581256040445e-06,
|
|
"loss": 1.5367932319641113,
|
|
"step": 1416
|
|
},
|
|
{
|
|
"epoch": 1.7450738916256157,
|
|
"grad_norm": 10.971734568897116,
|
|
"learning_rate": 4.456457126984196e-06,
|
|
"loss": 1.5078128576278687,
|
|
"step": 1417
|
|
},
|
|
{
|
|
"epoch": 1.7463054187192117,
|
|
"grad_norm": 8.435567334501869,
|
|
"learning_rate": 4.449334114411622e-06,
|
|
"loss": 1.8653573989868164,
|
|
"step": 1418
|
|
},
|
|
{
|
|
"epoch": 1.7475369458128078,
|
|
"grad_norm": 11.511023238806931,
|
|
"learning_rate": 4.4422122329539996e-06,
|
|
"loss": 1.1381313800811768,
|
|
"step": 1419
|
|
},
|
|
{
|
|
"epoch": 1.748768472906404,
|
|
"grad_norm": 9.115530827164923,
|
|
"learning_rate": 4.435091497240287e-06,
|
|
"loss": 1.4135184288024902,
|
|
"step": 1420
|
|
},
|
|
{
|
|
"epoch": 1.75,
|
|
"grad_norm": 19.148242044300115,
|
|
"learning_rate": 4.427971921897086e-06,
|
|
"loss": 1.2186479568481445,
|
|
"step": 1421
|
|
},
|
|
{
|
|
"epoch": 1.751231527093596,
|
|
"grad_norm": 11.735225834432583,
|
|
"learning_rate": 4.420853521548611e-06,
|
|
"loss": 1.3139259815216064,
|
|
"step": 1422
|
|
},
|
|
{
|
|
"epoch": 1.7524630541871922,
|
|
"grad_norm": 9.908228964820347,
|
|
"learning_rate": 4.413736310816669e-06,
|
|
"loss": 2.0143887996673584,
|
|
"step": 1423
|
|
},
|
|
{
|
|
"epoch": 1.7536945812807883,
|
|
"grad_norm": 11.72709904223931,
|
|
"learning_rate": 4.4066203043206226e-06,
|
|
"loss": 1.5800344944000244,
|
|
"step": 1424
|
|
},
|
|
{
|
|
"epoch": 1.7549261083743843,
|
|
"grad_norm": 13.351525970289408,
|
|
"learning_rate": 4.399505516677358e-06,
|
|
"loss": 1.449183702468872,
|
|
"step": 1425
|
|
},
|
|
{
|
|
"epoch": 1.7561576354679804,
|
|
"grad_norm": 14.449460918267059,
|
|
"learning_rate": 4.3923919625012605e-06,
|
|
"loss": 0.6957097053527832,
|
|
"step": 1426
|
|
},
|
|
{
|
|
"epoch": 1.7573891625615765,
|
|
"grad_norm": 16.656517142384814,
|
|
"learning_rate": 4.385279656404178e-06,
|
|
"loss": 1.0665647983551025,
|
|
"step": 1427
|
|
},
|
|
{
|
|
"epoch": 1.7586206896551724,
|
|
"grad_norm": 8.728452405950277,
|
|
"learning_rate": 4.3781686129953975e-06,
|
|
"loss": 1.2771016359329224,
|
|
"step": 1428
|
|
},
|
|
{
|
|
"epoch": 1.7598522167487685,
|
|
"grad_norm": 9.380843658329356,
|
|
"learning_rate": 4.371058846881614e-06,
|
|
"loss": 1.4222235679626465,
|
|
"step": 1429
|
|
},
|
|
{
|
|
"epoch": 1.7610837438423645,
|
|
"grad_norm": 18.6167744042239,
|
|
"learning_rate": 4.363950372666896e-06,
|
|
"loss": 2.1237497329711914,
|
|
"step": 1430
|
|
},
|
|
{
|
|
"epoch": 1.7623152709359606,
|
|
"grad_norm": 15.81534835320748,
|
|
"learning_rate": 4.356843204952657e-06,
|
|
"loss": 1.3875718116760254,
|
|
"step": 1431
|
|
},
|
|
{
|
|
"epoch": 1.7635467980295565,
|
|
"grad_norm": 11.325736932128727,
|
|
"learning_rate": 4.349737358337635e-06,
|
|
"loss": 1.2585203647613525,
|
|
"step": 1432
|
|
},
|
|
{
|
|
"epoch": 1.7647783251231526,
|
|
"grad_norm": 10.890833810787267,
|
|
"learning_rate": 4.3426328474178405e-06,
|
|
"loss": 1.3183746337890625,
|
|
"step": 1433
|
|
},
|
|
{
|
|
"epoch": 1.7660098522167487,
|
|
"grad_norm": 11.455742000334912,
|
|
"learning_rate": 4.335529686786554e-06,
|
|
"loss": 1.7174941301345825,
|
|
"step": 1434
|
|
},
|
|
{
|
|
"epoch": 1.7672413793103448,
|
|
"grad_norm": 9.946830568051285,
|
|
"learning_rate": 4.328427891034273e-06,
|
|
"loss": 1.9503614902496338,
|
|
"step": 1435
|
|
},
|
|
{
|
|
"epoch": 1.7684729064039408,
|
|
"grad_norm": 13.787149559571247,
|
|
"learning_rate": 4.321327474748697e-06,
|
|
"loss": 1.3797223567962646,
|
|
"step": 1436
|
|
},
|
|
{
|
|
"epoch": 1.769704433497537,
|
|
"grad_norm": 14.935693009519694,
|
|
"learning_rate": 4.3142284525146915e-06,
|
|
"loss": 1.4113730192184448,
|
|
"step": 1437
|
|
},
|
|
{
|
|
"epoch": 1.770935960591133,
|
|
"grad_norm": 11.978351079391912,
|
|
"learning_rate": 4.307130838914252e-06,
|
|
"loss": 2.383976697921753,
|
|
"step": 1438
|
|
},
|
|
{
|
|
"epoch": 1.772167487684729,
|
|
"grad_norm": 10.033247535379967,
|
|
"learning_rate": 4.300034648526489e-06,
|
|
"loss": 1.7687448263168335,
|
|
"step": 1439
|
|
},
|
|
{
|
|
"epoch": 1.7733990147783252,
|
|
"grad_norm": 15.25338664216219,
|
|
"learning_rate": 4.292939895927587e-06,
|
|
"loss": 1.5130079984664917,
|
|
"step": 1440
|
|
},
|
|
{
|
|
"epoch": 1.7746305418719213,
|
|
"grad_norm": 16.671641040457516,
|
|
"learning_rate": 4.2858465956907726e-06,
|
|
"loss": 1.0863475799560547,
|
|
"step": 1441
|
|
},
|
|
{
|
|
"epoch": 1.7758620689655173,
|
|
"grad_norm": 21.777249707868723,
|
|
"learning_rate": 4.278754762386297e-06,
|
|
"loss": 1.1504137516021729,
|
|
"step": 1442
|
|
},
|
|
{
|
|
"epoch": 1.7770935960591134,
|
|
"grad_norm": 10.960123964926488,
|
|
"learning_rate": 4.271664410581392e-06,
|
|
"loss": 1.1227596998214722,
|
|
"step": 1443
|
|
},
|
|
{
|
|
"epoch": 1.7783251231527095,
|
|
"grad_norm": 10.668478758892386,
|
|
"learning_rate": 4.264575554840248e-06,
|
|
"loss": 1.4501817226409912,
|
|
"step": 1444
|
|
},
|
|
{
|
|
"epoch": 1.7795566502463054,
|
|
"grad_norm": 8.508770946365994,
|
|
"learning_rate": 4.257488209723981e-06,
|
|
"loss": 0.48442721366882324,
|
|
"step": 1445
|
|
},
|
|
{
|
|
"epoch": 1.7807881773399015,
|
|
"grad_norm": 19.774025943442037,
|
|
"learning_rate": 4.25040238979061e-06,
|
|
"loss": 1.218263864517212,
|
|
"step": 1446
|
|
},
|
|
{
|
|
"epoch": 1.7820197044334976,
|
|
"grad_norm": 11.107941835251008,
|
|
"learning_rate": 4.243318109595014e-06,
|
|
"loss": 1.1711516380310059,
|
|
"step": 1447
|
|
},
|
|
{
|
|
"epoch": 1.7832512315270936,
|
|
"grad_norm": 14.393581709964357,
|
|
"learning_rate": 4.2362353836889126e-06,
|
|
"loss": 1.3575153350830078,
|
|
"step": 1448
|
|
},
|
|
{
|
|
"epoch": 1.7844827586206895,
|
|
"grad_norm": 15.514668018354685,
|
|
"learning_rate": 4.229154226620832e-06,
|
|
"loss": 2.6967573165893555,
|
|
"step": 1449
|
|
},
|
|
{
|
|
"epoch": 1.7857142857142856,
|
|
"grad_norm": 16.398555290477788,
|
|
"learning_rate": 4.2220746529360745e-06,
|
|
"loss": 2.2812700271606445,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 1.7869458128078817,
|
|
"grad_norm": 7.44372678737394,
|
|
"learning_rate": 4.2149966771766945e-06,
|
|
"loss": 1.2746225595474243,
|
|
"step": 1451
|
|
},
|
|
{
|
|
"epoch": 1.7881773399014778,
|
|
"grad_norm": 24.76309740203676,
|
|
"learning_rate": 4.207920313881459e-06,
|
|
"loss": 1.4866999387741089,
|
|
"step": 1452
|
|
},
|
|
{
|
|
"epoch": 1.7894088669950738,
|
|
"grad_norm": 12.129429402231283,
|
|
"learning_rate": 4.200845577585827e-06,
|
|
"loss": 1.4830021858215332,
|
|
"step": 1453
|
|
},
|
|
{
|
|
"epoch": 1.79064039408867,
|
|
"grad_norm": 14.927464924948287,
|
|
"learning_rate": 4.193772482821914e-06,
|
|
"loss": 2.5529747009277344,
|
|
"step": 1454
|
|
},
|
|
{
|
|
"epoch": 1.791871921182266,
|
|
"grad_norm": 10.342903175989482,
|
|
"learning_rate": 4.186701044118459e-06,
|
|
"loss": 1.413874626159668,
|
|
"step": 1455
|
|
},
|
|
{
|
|
"epoch": 1.793103448275862,
|
|
"grad_norm": 25.730295260232445,
|
|
"learning_rate": 4.179631276000807e-06,
|
|
"loss": 2.1567163467407227,
|
|
"step": 1456
|
|
},
|
|
{
|
|
"epoch": 1.7943349753694582,
|
|
"grad_norm": 30.70195031797357,
|
|
"learning_rate": 4.1725631929908684e-06,
|
|
"loss": 1.851858139038086,
|
|
"step": 1457
|
|
},
|
|
{
|
|
"epoch": 1.7955665024630543,
|
|
"grad_norm": 15.74317099171368,
|
|
"learning_rate": 4.165496809607089e-06,
|
|
"loss": 1.2765101194381714,
|
|
"step": 1458
|
|
},
|
|
{
|
|
"epoch": 1.7967980295566504,
|
|
"grad_norm": 10.995413854030392,
|
|
"learning_rate": 4.158432140364431e-06,
|
|
"loss": 1.9869401454925537,
|
|
"step": 1459
|
|
},
|
|
{
|
|
"epoch": 1.7980295566502464,
|
|
"grad_norm": 14.263851286153963,
|
|
"learning_rate": 4.151369199774325e-06,
|
|
"loss": 1.5319430828094482,
|
|
"step": 1460
|
|
},
|
|
{
|
|
"epoch": 1.7992610837438425,
|
|
"grad_norm": 10.506976676212952,
|
|
"learning_rate": 4.1443080023446605e-06,
|
|
"loss": 1.487468957901001,
|
|
"step": 1461
|
|
},
|
|
{
|
|
"epoch": 1.8004926108374384,
|
|
"grad_norm": 23.04137362584248,
|
|
"learning_rate": 4.137248562579742e-06,
|
|
"loss": 1.6152423620224,
|
|
"step": 1462
|
|
},
|
|
{
|
|
"epoch": 1.8017241379310345,
|
|
"grad_norm": 8.431434363474125,
|
|
"learning_rate": 4.130190894980262e-06,
|
|
"loss": 1.5262070894241333,
|
|
"step": 1463
|
|
},
|
|
{
|
|
"epoch": 1.8029556650246306,
|
|
"grad_norm": 9.129193697661835,
|
|
"learning_rate": 4.123135014043279e-06,
|
|
"loss": 1.6697289943695068,
|
|
"step": 1464
|
|
},
|
|
{
|
|
"epoch": 1.8041871921182266,
|
|
"grad_norm": 14.310350877734502,
|
|
"learning_rate": 4.116080934262175e-06,
|
|
"loss": 1.470789909362793,
|
|
"step": 1465
|
|
},
|
|
{
|
|
"epoch": 1.8054187192118225,
|
|
"grad_norm": 10.462627135626132,
|
|
"learning_rate": 4.109028670126635e-06,
|
|
"loss": 1.62421715259552,
|
|
"step": 1466
|
|
},
|
|
{
|
|
"epoch": 1.8066502463054186,
|
|
"grad_norm": 9.463272161807932,
|
|
"learning_rate": 4.101978236122613e-06,
|
|
"loss": 2.1249561309814453,
|
|
"step": 1467
|
|
},
|
|
{
|
|
"epoch": 1.8078817733990147,
|
|
"grad_norm": 10.291280772031216,
|
|
"learning_rate": 4.094929646732309e-06,
|
|
"loss": 1.3368217945098877,
|
|
"step": 1468
|
|
},
|
|
{
|
|
"epoch": 1.8091133004926108,
|
|
"grad_norm": 13.897028873169491,
|
|
"learning_rate": 4.087882916434126e-06,
|
|
"loss": 0.8684915900230408,
|
|
"step": 1469
|
|
},
|
|
{
|
|
"epoch": 1.8103448275862069,
|
|
"grad_norm": 9.114980502172534,
|
|
"learning_rate": 4.080838059702656e-06,
|
|
"loss": 1.6997764110565186,
|
|
"step": 1470
|
|
},
|
|
{
|
|
"epoch": 1.811576354679803,
|
|
"grad_norm": 15.00723435129453,
|
|
"learning_rate": 4.0737950910086354e-06,
|
|
"loss": 0.8933043479919434,
|
|
"step": 1471
|
|
},
|
|
{
|
|
"epoch": 1.812807881773399,
|
|
"grad_norm": 8.849165431721978,
|
|
"learning_rate": 4.0667540248189265e-06,
|
|
"loss": 1.689558982849121,
|
|
"step": 1472
|
|
},
|
|
{
|
|
"epoch": 1.814039408866995,
|
|
"grad_norm": 8.28022241305891,
|
|
"learning_rate": 4.059714875596486e-06,
|
|
"loss": 1.797630786895752,
|
|
"step": 1473
|
|
},
|
|
{
|
|
"epoch": 1.8152709359605912,
|
|
"grad_norm": 8.44088037241126,
|
|
"learning_rate": 4.052677657800327e-06,
|
|
"loss": 2.023120164871216,
|
|
"step": 1474
|
|
},
|
|
{
|
|
"epoch": 1.8165024630541873,
|
|
"grad_norm": 13.31766346957086,
|
|
"learning_rate": 4.045642385885497e-06,
|
|
"loss": 1.5412349700927734,
|
|
"step": 1475
|
|
},
|
|
{
|
|
"epoch": 1.8177339901477834,
|
|
"grad_norm": 11.713991741569846,
|
|
"learning_rate": 4.038609074303055e-06,
|
|
"loss": 0.786411464214325,
|
|
"step": 1476
|
|
},
|
|
{
|
|
"epoch": 1.8189655172413794,
|
|
"grad_norm": 12.300017528117012,
|
|
"learning_rate": 4.0315777375000185e-06,
|
|
"loss": 1.3470659255981445,
|
|
"step": 1477
|
|
},
|
|
{
|
|
"epoch": 1.8201970443349755,
|
|
"grad_norm": 10.149728213380525,
|
|
"learning_rate": 4.02454838991936e-06,
|
|
"loss": 1.3983774185180664,
|
|
"step": 1478
|
|
},
|
|
{
|
|
"epoch": 1.8214285714285714,
|
|
"grad_norm": 8.907879387840488,
|
|
"learning_rate": 4.017521045999961e-06,
|
|
"loss": 1.9945271015167236,
|
|
"step": 1479
|
|
},
|
|
{
|
|
"epoch": 1.8226600985221675,
|
|
"grad_norm": 14.485464092551117,
|
|
"learning_rate": 4.0104957201765874e-06,
|
|
"loss": 1.6103991270065308,
|
|
"step": 1480
|
|
},
|
|
{
|
|
"epoch": 1.8238916256157636,
|
|
"grad_norm": 10.17521459795804,
|
|
"learning_rate": 4.003472426879866e-06,
|
|
"loss": 1.2794644832611084,
|
|
"step": 1481
|
|
},
|
|
{
|
|
"epoch": 1.8251231527093597,
|
|
"grad_norm": 12.76602401465421,
|
|
"learning_rate": 3.996451180536237e-06,
|
|
"loss": 1.4485671520233154,
|
|
"step": 1482
|
|
},
|
|
{
|
|
"epoch": 1.8263546798029555,
|
|
"grad_norm": 10.794290467835673,
|
|
"learning_rate": 3.989431995567947e-06,
|
|
"loss": 1.1264885663986206,
|
|
"step": 1483
|
|
},
|
|
{
|
|
"epoch": 1.8275862068965516,
|
|
"grad_norm": 9.866085409894106,
|
|
"learning_rate": 3.982414886393002e-06,
|
|
"loss": 1.7849301099777222,
|
|
"step": 1484
|
|
},
|
|
{
|
|
"epoch": 1.8288177339901477,
|
|
"grad_norm": 12.201702589426084,
|
|
"learning_rate": 3.975399867425146e-06,
|
|
"loss": 2.4955849647521973,
|
|
"step": 1485
|
|
},
|
|
{
|
|
"epoch": 1.8300492610837438,
|
|
"grad_norm": 9.102568432625791,
|
|
"learning_rate": 3.96838695307383e-06,
|
|
"loss": 1.3440265655517578,
|
|
"step": 1486
|
|
},
|
|
{
|
|
"epoch": 1.8312807881773399,
|
|
"grad_norm": 8.145548979456889,
|
|
"learning_rate": 3.961376157744183e-06,
|
|
"loss": 1.7565090656280518,
|
|
"step": 1487
|
|
},
|
|
{
|
|
"epoch": 1.832512315270936,
|
|
"grad_norm": 10.525904376218351,
|
|
"learning_rate": 3.954367495836978e-06,
|
|
"loss": 2.086646318435669,
|
|
"step": 1488
|
|
},
|
|
{
|
|
"epoch": 1.833743842364532,
|
|
"grad_norm": 11.110223461103494,
|
|
"learning_rate": 3.947360981748607e-06,
|
|
"loss": 2.0356874465942383,
|
|
"step": 1489
|
|
},
|
|
{
|
|
"epoch": 1.8349753694581281,
|
|
"grad_norm": 18.648426152647907,
|
|
"learning_rate": 3.940356629871051e-06,
|
|
"loss": 1.3129501342773438,
|
|
"step": 1490
|
|
},
|
|
{
|
|
"epoch": 1.8362068965517242,
|
|
"grad_norm": 9.730568476467749,
|
|
"learning_rate": 3.933354454591851e-06,
|
|
"loss": 1.468184471130371,
|
|
"step": 1491
|
|
},
|
|
{
|
|
"epoch": 1.8374384236453203,
|
|
"grad_norm": 11.185413004826554,
|
|
"learning_rate": 3.926354470294077e-06,
|
|
"loss": 1.4110320806503296,
|
|
"step": 1492
|
|
},
|
|
{
|
|
"epoch": 1.8386699507389164,
|
|
"grad_norm": 12.98897769174535,
|
|
"learning_rate": 3.9193566913562915e-06,
|
|
"loss": 1.0595703125,
|
|
"step": 1493
|
|
},
|
|
{
|
|
"epoch": 1.8399014778325125,
|
|
"grad_norm": 10.530840377449582,
|
|
"learning_rate": 3.912361132152537e-06,
|
|
"loss": 1.628462791442871,
|
|
"step": 1494
|
|
},
|
|
{
|
|
"epoch": 1.8411330049261085,
|
|
"grad_norm": 14.948049661995398,
|
|
"learning_rate": 3.9053678070522904e-06,
|
|
"loss": 1.3903121948242188,
|
|
"step": 1495
|
|
},
|
|
{
|
|
"epoch": 1.8423645320197044,
|
|
"grad_norm": 9.309801488918017,
|
|
"learning_rate": 3.898376730420442e-06,
|
|
"loss": 1.6935603618621826,
|
|
"step": 1496
|
|
},
|
|
{
|
|
"epoch": 1.8435960591133005,
|
|
"grad_norm": 12.543386647265335,
|
|
"learning_rate": 3.891387916617261e-06,
|
|
"loss": 1.2785383462905884,
|
|
"step": 1497
|
|
},
|
|
{
|
|
"epoch": 1.8448275862068966,
|
|
"grad_norm": 16.302631057977127,
|
|
"learning_rate": 3.884401379998375e-06,
|
|
"loss": 0.9488393068313599,
|
|
"step": 1498
|
|
},
|
|
{
|
|
"epoch": 1.8460591133004927,
|
|
"grad_norm": 13.324215983939714,
|
|
"learning_rate": 3.877417134914724e-06,
|
|
"loss": 1.7822269201278687,
|
|
"step": 1499
|
|
},
|
|
{
|
|
"epoch": 1.8472906403940885,
|
|
"grad_norm": 18.86267601616338,
|
|
"learning_rate": 3.870435195712547e-06,
|
|
"loss": 2.0112462043762207,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 1.8485221674876846,
|
|
"grad_norm": 9.69652966834403,
|
|
"learning_rate": 3.863455576733349e-06,
|
|
"loss": 1.3558632135391235,
|
|
"step": 1501
|
|
},
|
|
{
|
|
"epoch": 1.8497536945812807,
|
|
"grad_norm": 11.295411751598015,
|
|
"learning_rate": 3.856478292313864e-06,
|
|
"loss": 1.34049391746521,
|
|
"step": 1502
|
|
},
|
|
{
|
|
"epoch": 1.8509852216748768,
|
|
"grad_norm": 14.146066291430358,
|
|
"learning_rate": 3.849503356786034e-06,
|
|
"loss": 1.5048649311065674,
|
|
"step": 1503
|
|
},
|
|
{
|
|
"epoch": 1.8522167487684729,
|
|
"grad_norm": 15.401780869737596,
|
|
"learning_rate": 3.842530784476971e-06,
|
|
"loss": 1.595820426940918,
|
|
"step": 1504
|
|
},
|
|
{
|
|
"epoch": 1.853448275862069,
|
|
"grad_norm": 14.910425010360937,
|
|
"learning_rate": 3.83556058970894e-06,
|
|
"loss": 1.4003782272338867,
|
|
"step": 1505
|
|
},
|
|
{
|
|
"epoch": 1.854679802955665,
|
|
"grad_norm": 7.9611824961674476,
|
|
"learning_rate": 3.828592786799318e-06,
|
|
"loss": 1.6082279682159424,
|
|
"step": 1506
|
|
},
|
|
{
|
|
"epoch": 1.8559113300492611,
|
|
"grad_norm": 10.255592390028927,
|
|
"learning_rate": 3.821627390060568e-06,
|
|
"loss": 1.7311087846755981,
|
|
"step": 1507
|
|
},
|
|
{
|
|
"epoch": 1.8571428571428572,
|
|
"grad_norm": 12.058780526558753,
|
|
"learning_rate": 3.8146644138002154e-06,
|
|
"loss": 1.2369680404663086,
|
|
"step": 1508
|
|
},
|
|
{
|
|
"epoch": 1.8583743842364533,
|
|
"grad_norm": 19.050247314658538,
|
|
"learning_rate": 3.807703872320809e-06,
|
|
"loss": 0.8267203569412231,
|
|
"step": 1509
|
|
},
|
|
{
|
|
"epoch": 1.8596059113300494,
|
|
"grad_norm": 10.351521057178017,
|
|
"learning_rate": 3.8007457799198977e-06,
|
|
"loss": 1.310041904449463,
|
|
"step": 1510
|
|
},
|
|
{
|
|
"epoch": 1.8608374384236455,
|
|
"grad_norm": 10.657442856658305,
|
|
"learning_rate": 3.79379015089e-06,
|
|
"loss": 1.483811378479004,
|
|
"step": 1511
|
|
},
|
|
{
|
|
"epoch": 1.8620689655172413,
|
|
"grad_norm": 11.888669790205059,
|
|
"learning_rate": 3.7868369995185734e-06,
|
|
"loss": 1.7339284420013428,
|
|
"step": 1512
|
|
},
|
|
{
|
|
"epoch": 1.8633004926108374,
|
|
"grad_norm": 10.593168183344854,
|
|
"learning_rate": 3.7798863400879894e-06,
|
|
"loss": 0.8915985822677612,
|
|
"step": 1513
|
|
},
|
|
{
|
|
"epoch": 1.8645320197044335,
|
|
"grad_norm": 10.734489115549072,
|
|
"learning_rate": 3.7729381868754985e-06,
|
|
"loss": 2.3413619995117188,
|
|
"step": 1514
|
|
},
|
|
{
|
|
"epoch": 1.8657635467980296,
|
|
"grad_norm": 9.967376867351366,
|
|
"learning_rate": 3.7659925541532006e-06,
|
|
"loss": 1.422214388847351,
|
|
"step": 1515
|
|
},
|
|
{
|
|
"epoch": 1.8669950738916257,
|
|
"grad_norm": 9.453365529159266,
|
|
"learning_rate": 3.759049456188022e-06,
|
|
"loss": 1.435701847076416,
|
|
"step": 1516
|
|
},
|
|
{
|
|
"epoch": 1.8682266009852215,
|
|
"grad_norm": 13.939960554468646,
|
|
"learning_rate": 3.752108907241682e-06,
|
|
"loss": 1.0702649354934692,
|
|
"step": 1517
|
|
},
|
|
{
|
|
"epoch": 1.8694581280788176,
|
|
"grad_norm": 14.375834204057075,
|
|
"learning_rate": 3.7451709215706643e-06,
|
|
"loss": 1.3625175952911377,
|
|
"step": 1518
|
|
},
|
|
{
|
|
"epoch": 1.8706896551724137,
|
|
"grad_norm": 14.38912976471083,
|
|
"learning_rate": 3.738235513426184e-06,
|
|
"loss": 0.6707335710525513,
|
|
"step": 1519
|
|
},
|
|
{
|
|
"epoch": 1.8719211822660098,
|
|
"grad_norm": 6.68307140655082,
|
|
"learning_rate": 3.7313026970541687e-06,
|
|
"loss": 0.9573410749435425,
|
|
"step": 1520
|
|
},
|
|
{
|
|
"epoch": 1.8731527093596059,
|
|
"grad_norm": 8.282620378739653,
|
|
"learning_rate": 3.7243724866952114e-06,
|
|
"loss": 1.625769853591919,
|
|
"step": 1521
|
|
},
|
|
{
|
|
"epoch": 1.874384236453202,
|
|
"grad_norm": 12.4684771792282,
|
|
"learning_rate": 3.717444896584562e-06,
|
|
"loss": 1.2327096462249756,
|
|
"step": 1522
|
|
},
|
|
{
|
|
"epoch": 1.875615763546798,
|
|
"grad_norm": 13.733071586817578,
|
|
"learning_rate": 3.710519940952085e-06,
|
|
"loss": 1.9436770677566528,
|
|
"step": 1523
|
|
},
|
|
{
|
|
"epoch": 1.8768472906403941,
|
|
"grad_norm": 11.428790282383929,
|
|
"learning_rate": 3.703597634022232e-06,
|
|
"loss": 1.260964274406433,
|
|
"step": 1524
|
|
},
|
|
{
|
|
"epoch": 1.8780788177339902,
|
|
"grad_norm": 10.74418094547702,
|
|
"learning_rate": 3.6966779900140193e-06,
|
|
"loss": 0.9448941946029663,
|
|
"step": 1525
|
|
},
|
|
{
|
|
"epoch": 1.8793103448275863,
|
|
"grad_norm": 14.784266967626037,
|
|
"learning_rate": 3.689761023140981e-06,
|
|
"loss": 1.0470240116119385,
|
|
"step": 1526
|
|
},
|
|
{
|
|
"epoch": 1.8805418719211824,
|
|
"grad_norm": 12.626289871406675,
|
|
"learning_rate": 3.6828467476111664e-06,
|
|
"loss": 1.290519118309021,
|
|
"step": 1527
|
|
},
|
|
{
|
|
"epoch": 1.8817733990147785,
|
|
"grad_norm": 8.368189133022403,
|
|
"learning_rate": 3.675935177627088e-06,
|
|
"loss": 1.6617997884750366,
|
|
"step": 1528
|
|
},
|
|
{
|
|
"epoch": 1.8830049261083743,
|
|
"grad_norm": 22.331563820583295,
|
|
"learning_rate": 3.6690263273857035e-06,
|
|
"loss": 2.624133825302124,
|
|
"step": 1529
|
|
},
|
|
{
|
|
"epoch": 1.8842364532019704,
|
|
"grad_norm": 11.125845605261798,
|
|
"learning_rate": 3.662120211078385e-06,
|
|
"loss": 1.189339518547058,
|
|
"step": 1530
|
|
},
|
|
{
|
|
"epoch": 1.8854679802955665,
|
|
"grad_norm": 11.063623504952298,
|
|
"learning_rate": 3.6552168428908886e-06,
|
|
"loss": 1.2045223712921143,
|
|
"step": 1531
|
|
},
|
|
{
|
|
"epoch": 1.8866995073891626,
|
|
"grad_norm": 21.05973901513674,
|
|
"learning_rate": 3.648316237003321e-06,
|
|
"loss": 1.4260770082473755,
|
|
"step": 1532
|
|
},
|
|
{
|
|
"epoch": 1.8879310344827587,
|
|
"grad_norm": 9.70528654459795,
|
|
"learning_rate": 3.6414184075901206e-06,
|
|
"loss": 1.1973135471343994,
|
|
"step": 1533
|
|
},
|
|
{
|
|
"epoch": 1.8891625615763545,
|
|
"grad_norm": 18.383885319550775,
|
|
"learning_rate": 3.6345233688200195e-06,
|
|
"loss": 1.4474105834960938,
|
|
"step": 1534
|
|
},
|
|
{
|
|
"epoch": 1.8903940886699506,
|
|
"grad_norm": 9.565993696711384,
|
|
"learning_rate": 3.62763113485602e-06,
|
|
"loss": 1.5732392072677612,
|
|
"step": 1535
|
|
},
|
|
{
|
|
"epoch": 1.8916256157635467,
|
|
"grad_norm": 18.830417927799424,
|
|
"learning_rate": 3.6207417198553624e-06,
|
|
"loss": 1.992612361907959,
|
|
"step": 1536
|
|
},
|
|
{
|
|
"epoch": 1.8928571428571428,
|
|
"grad_norm": 8.528733872408509,
|
|
"learning_rate": 3.6138551379694936e-06,
|
|
"loss": 1.8015589714050293,
|
|
"step": 1537
|
|
},
|
|
{
|
|
"epoch": 1.8940886699507389,
|
|
"grad_norm": 20.045548838222032,
|
|
"learning_rate": 3.606971403344044e-06,
|
|
"loss": 1.1887943744659424,
|
|
"step": 1538
|
|
},
|
|
{
|
|
"epoch": 1.895320197044335,
|
|
"grad_norm": 8.574686397942823,
|
|
"learning_rate": 3.6000905301187953e-06,
|
|
"loss": 1.035568118095398,
|
|
"step": 1539
|
|
},
|
|
{
|
|
"epoch": 1.896551724137931,
|
|
"grad_norm": 8.862677959647126,
|
|
"learning_rate": 3.5932125324276524e-06,
|
|
"loss": 1.8441094160079956,
|
|
"step": 1540
|
|
},
|
|
{
|
|
"epoch": 1.8977832512315271,
|
|
"grad_norm": 21.317551937175974,
|
|
"learning_rate": 3.586337424398609e-06,
|
|
"loss": 2.7305843830108643,
|
|
"step": 1541
|
|
},
|
|
{
|
|
"epoch": 1.8990147783251232,
|
|
"grad_norm": 12.092619936908829,
|
|
"learning_rate": 3.579465220153733e-06,
|
|
"loss": 2.1233139038085938,
|
|
"step": 1542
|
|
},
|
|
{
|
|
"epoch": 1.9002463054187193,
|
|
"grad_norm": 11.705206958955536,
|
|
"learning_rate": 3.5725959338091133e-06,
|
|
"loss": 1.232177495956421,
|
|
"step": 1543
|
|
},
|
|
{
|
|
"epoch": 1.9014778325123154,
|
|
"grad_norm": 7.174113743881224,
|
|
"learning_rate": 3.565729579474858e-06,
|
|
"loss": 1.89857017993927,
|
|
"step": 1544
|
|
},
|
|
{
|
|
"epoch": 1.9027093596059115,
|
|
"grad_norm": 15.788866110425763,
|
|
"learning_rate": 3.5588661712550464e-06,
|
|
"loss": 1.1281499862670898,
|
|
"step": 1545
|
|
},
|
|
{
|
|
"epoch": 1.9039408866995073,
|
|
"grad_norm": 10.470956040036935,
|
|
"learning_rate": 3.5520057232477073e-06,
|
|
"loss": 1.2526335716247559,
|
|
"step": 1546
|
|
},
|
|
{
|
|
"epoch": 1.9051724137931034,
|
|
"grad_norm": 9.301464059536526,
|
|
"learning_rate": 3.545148249544793e-06,
|
|
"loss": 1.8187229633331299,
|
|
"step": 1547
|
|
},
|
|
{
|
|
"epoch": 1.9064039408866995,
|
|
"grad_norm": 9.75451095353705,
|
|
"learning_rate": 3.5382937642321356e-06,
|
|
"loss": 2.5140726566314697,
|
|
"step": 1548
|
|
},
|
|
{
|
|
"epoch": 1.9076354679802956,
|
|
"grad_norm": 12.829934813861579,
|
|
"learning_rate": 3.5314422813894413e-06,
|
|
"loss": 1.4403750896453857,
|
|
"step": 1549
|
|
},
|
|
{
|
|
"epoch": 1.9088669950738915,
|
|
"grad_norm": 16.531679337353626,
|
|
"learning_rate": 3.524593815090241e-06,
|
|
"loss": 2.1372480392456055,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 1.9100985221674875,
|
|
"grad_norm": 15.674375359336546,
|
|
"learning_rate": 3.517748379401872e-06,
|
|
"loss": 1.3283928632736206,
|
|
"step": 1551
|
|
},
|
|
{
|
|
"epoch": 1.9113300492610836,
|
|
"grad_norm": 18.1169052598084,
|
|
"learning_rate": 3.510905988385449e-06,
|
|
"loss": 0.915777325630188,
|
|
"step": 1552
|
|
},
|
|
{
|
|
"epoch": 1.9125615763546797,
|
|
"grad_norm": 9.21207861248202,
|
|
"learning_rate": 3.5040666560958246e-06,
|
|
"loss": 1.4235864877700806,
|
|
"step": 1553
|
|
},
|
|
{
|
|
"epoch": 1.9137931034482758,
|
|
"grad_norm": 10.331880853016509,
|
|
"learning_rate": 3.497230396581579e-06,
|
|
"loss": 1.0727063417434692,
|
|
"step": 1554
|
|
},
|
|
{
|
|
"epoch": 1.9150246305418719,
|
|
"grad_norm": 6.2183233261424675,
|
|
"learning_rate": 3.4903972238849727e-06,
|
|
"loss": 1.2492493391036987,
|
|
"step": 1555
|
|
},
|
|
{
|
|
"epoch": 1.916256157635468,
|
|
"grad_norm": 8.689347093090742,
|
|
"learning_rate": 3.483567152041928e-06,
|
|
"loss": 1.855743408203125,
|
|
"step": 1556
|
|
},
|
|
{
|
|
"epoch": 1.917487684729064,
|
|
"grad_norm": 13.400775432098582,
|
|
"learning_rate": 3.4767401950820003e-06,
|
|
"loss": 1.2882115840911865,
|
|
"step": 1557
|
|
},
|
|
{
|
|
"epoch": 1.9187192118226601,
|
|
"grad_norm": 17.24953530796186,
|
|
"learning_rate": 3.469916367028345e-06,
|
|
"loss": 1.0586508512496948,
|
|
"step": 1558
|
|
},
|
|
{
|
|
"epoch": 1.9199507389162562,
|
|
"grad_norm": 7.936641918837841,
|
|
"learning_rate": 3.4630956818976875e-06,
|
|
"loss": 1.6678158044815063,
|
|
"step": 1559
|
|
},
|
|
{
|
|
"epoch": 1.9211822660098523,
|
|
"grad_norm": 7.533268622313887,
|
|
"learning_rate": 3.4562781537003e-06,
|
|
"loss": 1.242276906967163,
|
|
"step": 1560
|
|
},
|
|
{
|
|
"epoch": 1.9224137931034484,
|
|
"grad_norm": 11.64160436044446,
|
|
"learning_rate": 3.4494637964399723e-06,
|
|
"loss": 1.1909584999084473,
|
|
"step": 1561
|
|
},
|
|
{
|
|
"epoch": 1.9236453201970445,
|
|
"grad_norm": 10.255728334199201,
|
|
"learning_rate": 3.4426526241139778e-06,
|
|
"loss": 1.7636524438858032,
|
|
"step": 1562
|
|
},
|
|
{
|
|
"epoch": 1.9248768472906403,
|
|
"grad_norm": 9.49054957516609,
|
|
"learning_rate": 3.4358446507130503e-06,
|
|
"loss": 1.709825873374939,
|
|
"step": 1563
|
|
},
|
|
{
|
|
"epoch": 1.9261083743842364,
|
|
"grad_norm": 10.818350574028944,
|
|
"learning_rate": 3.4290398902213473e-06,
|
|
"loss": 1.0826925039291382,
|
|
"step": 1564
|
|
},
|
|
{
|
|
"epoch": 1.9273399014778325,
|
|
"grad_norm": 8.939498431984473,
|
|
"learning_rate": 3.4222383566164314e-06,
|
|
"loss": 1.2868252992630005,
|
|
"step": 1565
|
|
},
|
|
{
|
|
"epoch": 1.9285714285714286,
|
|
"grad_norm": 8.295112275795647,
|
|
"learning_rate": 3.4154400638692376e-06,
|
|
"loss": 1.9238274097442627,
|
|
"step": 1566
|
|
},
|
|
{
|
|
"epoch": 1.9298029556650245,
|
|
"grad_norm": 15.317456416232107,
|
|
"learning_rate": 3.408645025944042e-06,
|
|
"loss": 1.615818977355957,
|
|
"step": 1567
|
|
},
|
|
{
|
|
"epoch": 1.9310344827586206,
|
|
"grad_norm": 10.763654992556582,
|
|
"learning_rate": 3.4018532567984326e-06,
|
|
"loss": 1.124712586402893,
|
|
"step": 1568
|
|
},
|
|
{
|
|
"epoch": 1.9322660098522166,
|
|
"grad_norm": 12.365184508586257,
|
|
"learning_rate": 3.3950647703832907e-06,
|
|
"loss": 1.0411077737808228,
|
|
"step": 1569
|
|
},
|
|
{
|
|
"epoch": 1.9334975369458127,
|
|
"grad_norm": 12.632249203055522,
|
|
"learning_rate": 3.3882795806427437e-06,
|
|
"loss": 1.4247188568115234,
|
|
"step": 1570
|
|
},
|
|
{
|
|
"epoch": 1.9347290640394088,
|
|
"grad_norm": 9.103913844192295,
|
|
"learning_rate": 3.3814977015141576e-06,
|
|
"loss": 1.9558757543563843,
|
|
"step": 1571
|
|
},
|
|
{
|
|
"epoch": 1.935960591133005,
|
|
"grad_norm": 13.783502778663575,
|
|
"learning_rate": 3.3747191469280917e-06,
|
|
"loss": 1.4765770435333252,
|
|
"step": 1572
|
|
},
|
|
{
|
|
"epoch": 1.937192118226601,
|
|
"grad_norm": 12.11586545643866,
|
|
"learning_rate": 3.3679439308082777e-06,
|
|
"loss": 1.2025914192199707,
|
|
"step": 1573
|
|
},
|
|
{
|
|
"epoch": 1.938423645320197,
|
|
"grad_norm": 8.389746847537833,
|
|
"learning_rate": 3.361172067071595e-06,
|
|
"loss": 1.938293695449829,
|
|
"step": 1574
|
|
},
|
|
{
|
|
"epoch": 1.9396551724137931,
|
|
"grad_norm": 24.18653835255333,
|
|
"learning_rate": 3.3544035696280264e-06,
|
|
"loss": 1.9626538753509521,
|
|
"step": 1575
|
|
},
|
|
{
|
|
"epoch": 1.9408866995073892,
|
|
"grad_norm": 16.707227251461827,
|
|
"learning_rate": 3.34763845238065e-06,
|
|
"loss": 2.4771430492401123,
|
|
"step": 1576
|
|
},
|
|
{
|
|
"epoch": 1.9421182266009853,
|
|
"grad_norm": 9.24643762447737,
|
|
"learning_rate": 3.340876729225595e-06,
|
|
"loss": 1.5694981813430786,
|
|
"step": 1577
|
|
},
|
|
{
|
|
"epoch": 1.9433497536945814,
|
|
"grad_norm": 12.976086056891674,
|
|
"learning_rate": 3.334118414052021e-06,
|
|
"loss": 1.3358147144317627,
|
|
"step": 1578
|
|
},
|
|
{
|
|
"epoch": 1.9445812807881775,
|
|
"grad_norm": 10.05009781073385,
|
|
"learning_rate": 3.327363520742087e-06,
|
|
"loss": 1.6929140090942383,
|
|
"step": 1579
|
|
},
|
|
{
|
|
"epoch": 1.9458128078817734,
|
|
"grad_norm": 14.460477433027636,
|
|
"learning_rate": 3.320612063170926e-06,
|
|
"loss": 1.1454588174819946,
|
|
"step": 1580
|
|
},
|
|
{
|
|
"epoch": 1.9470443349753694,
|
|
"grad_norm": 15.890241219417488,
|
|
"learning_rate": 3.313864055206607e-06,
|
|
"loss": 1.3037209510803223,
|
|
"step": 1581
|
|
},
|
|
{
|
|
"epoch": 1.9482758620689655,
|
|
"grad_norm": 18.657112628058126,
|
|
"learning_rate": 3.3071195107101163e-06,
|
|
"loss": 1.2016770839691162,
|
|
"step": 1582
|
|
},
|
|
{
|
|
"epoch": 1.9495073891625616,
|
|
"grad_norm": 8.600208828774889,
|
|
"learning_rate": 3.3003784435353304e-06,
|
|
"loss": 1.5525718927383423,
|
|
"step": 1583
|
|
},
|
|
{
|
|
"epoch": 1.9507389162561575,
|
|
"grad_norm": 12.025296512404239,
|
|
"learning_rate": 3.293640867528978e-06,
|
|
"loss": 1.293796420097351,
|
|
"step": 1584
|
|
},
|
|
{
|
|
"epoch": 1.9519704433497536,
|
|
"grad_norm": 14.973626912716192,
|
|
"learning_rate": 3.2869067965306178e-06,
|
|
"loss": 1.544161081314087,
|
|
"step": 1585
|
|
},
|
|
{
|
|
"epoch": 1.9532019704433496,
|
|
"grad_norm": 12.518775732631475,
|
|
"learning_rate": 3.2801762443726087e-06,
|
|
"loss": 1.584174633026123,
|
|
"step": 1586
|
|
},
|
|
{
|
|
"epoch": 1.9544334975369457,
|
|
"grad_norm": 9.595940744200961,
|
|
"learning_rate": 3.273449224880081e-06,
|
|
"loss": 1.4985432624816895,
|
|
"step": 1587
|
|
},
|
|
{
|
|
"epoch": 1.9556650246305418,
|
|
"grad_norm": 14.194278219604545,
|
|
"learning_rate": 3.2667257518709124e-06,
|
|
"loss": 1.4310071468353271,
|
|
"step": 1588
|
|
},
|
|
{
|
|
"epoch": 1.956896551724138,
|
|
"grad_norm": 6.232251277924355,
|
|
"learning_rate": 3.260005839155691e-06,
|
|
"loss": 1.2174272537231445,
|
|
"step": 1589
|
|
},
|
|
{
|
|
"epoch": 1.958128078817734,
|
|
"grad_norm": 8.206207570805137,
|
|
"learning_rate": 3.2532895005376943e-06,
|
|
"loss": 1.4618067741394043,
|
|
"step": 1590
|
|
},
|
|
{
|
|
"epoch": 1.95935960591133,
|
|
"grad_norm": 9.028580710101858,
|
|
"learning_rate": 3.2465767498128596e-06,
|
|
"loss": 1.2786412239074707,
|
|
"step": 1591
|
|
},
|
|
{
|
|
"epoch": 1.9605911330049262,
|
|
"grad_norm": 14.53956960212149,
|
|
"learning_rate": 3.2398676007697495e-06,
|
|
"loss": 1.152226209640503,
|
|
"step": 1592
|
|
},
|
|
{
|
|
"epoch": 1.9618226600985222,
|
|
"grad_norm": 9.573027989064228,
|
|
"learning_rate": 3.233162067189533e-06,
|
|
"loss": 1.8345131874084473,
|
|
"step": 1593
|
|
},
|
|
{
|
|
"epoch": 1.9630541871921183,
|
|
"grad_norm": 12.386896406400556,
|
|
"learning_rate": 3.2264601628459513e-06,
|
|
"loss": 1.310433030128479,
|
|
"step": 1594
|
|
},
|
|
{
|
|
"epoch": 1.9642857142857144,
|
|
"grad_norm": 18.010952199354442,
|
|
"learning_rate": 3.2197619015052893e-06,
|
|
"loss": 2.3967676162719727,
|
|
"step": 1595
|
|
},
|
|
{
|
|
"epoch": 1.9655172413793105,
|
|
"grad_norm": 8.956387198130372,
|
|
"learning_rate": 3.2130672969263543e-06,
|
|
"loss": 1.7937273979187012,
|
|
"step": 1596
|
|
},
|
|
{
|
|
"epoch": 1.9667487684729064,
|
|
"grad_norm": 8.393117465017726,
|
|
"learning_rate": 3.206376362860432e-06,
|
|
"loss": 2.0265514850616455,
|
|
"step": 1597
|
|
},
|
|
{
|
|
"epoch": 1.9679802955665024,
|
|
"grad_norm": 21.13089299468655,
|
|
"learning_rate": 3.1996891130512796e-06,
|
|
"loss": 1.9514051675796509,
|
|
"step": 1598
|
|
},
|
|
{
|
|
"epoch": 1.9692118226600985,
|
|
"grad_norm": 13.738115707885685,
|
|
"learning_rate": 3.1930055612350795e-06,
|
|
"loss": 1.4068338871002197,
|
|
"step": 1599
|
|
},
|
|
{
|
|
"epoch": 1.9704433497536946,
|
|
"grad_norm": 11.875525005970715,
|
|
"learning_rate": 3.18632572114042e-06,
|
|
"loss": 1.9438577890396118,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 1.9716748768472905,
|
|
"grad_norm": 12.6800038807384,
|
|
"learning_rate": 3.1796496064882677e-06,
|
|
"loss": 1.432902455329895,
|
|
"step": 1601
|
|
},
|
|
{
|
|
"epoch": 1.9729064039408866,
|
|
"grad_norm": 10.748520734517344,
|
|
"learning_rate": 3.172977230991935e-06,
|
|
"loss": 1.6505646705627441,
|
|
"step": 1602
|
|
},
|
|
{
|
|
"epoch": 1.9741379310344827,
|
|
"grad_norm": 9.807738223531803,
|
|
"learning_rate": 3.1663086083570493e-06,
|
|
"loss": 2.332062005996704,
|
|
"step": 1603
|
|
},
|
|
{
|
|
"epoch": 1.9753694581280787,
|
|
"grad_norm": 7.777919459923873,
|
|
"learning_rate": 3.159643752281536e-06,
|
|
"loss": 1.737352967262268,
|
|
"step": 1604
|
|
},
|
|
{
|
|
"epoch": 1.9766009852216748,
|
|
"grad_norm": 12.828820681008972,
|
|
"learning_rate": 3.152982676455581e-06,
|
|
"loss": 1.5183820724487305,
|
|
"step": 1605
|
|
},
|
|
{
|
|
"epoch": 1.977832512315271,
|
|
"grad_norm": 12.058545370748947,
|
|
"learning_rate": 3.1463253945616056e-06,
|
|
"loss": 1.5560420751571655,
|
|
"step": 1606
|
|
},
|
|
{
|
|
"epoch": 1.979064039408867,
|
|
"grad_norm": 12.080370196486308,
|
|
"learning_rate": 3.1396719202742375e-06,
|
|
"loss": 2.2159786224365234,
|
|
"step": 1607
|
|
},
|
|
{
|
|
"epoch": 1.980295566502463,
|
|
"grad_norm": 11.349700550180101,
|
|
"learning_rate": 3.133022267260283e-06,
|
|
"loss": 3.4431471824645996,
|
|
"step": 1608
|
|
},
|
|
{
|
|
"epoch": 1.9815270935960592,
|
|
"grad_norm": 15.960971258656029,
|
|
"learning_rate": 3.1263764491786984e-06,
|
|
"loss": 1.0674099922180176,
|
|
"step": 1609
|
|
},
|
|
{
|
|
"epoch": 1.9827586206896552,
|
|
"grad_norm": 10.915353003367029,
|
|
"learning_rate": 3.1197344796805675e-06,
|
|
"loss": 1.2427492141723633,
|
|
"step": 1610
|
|
},
|
|
{
|
|
"epoch": 1.9839901477832513,
|
|
"grad_norm": 13.554860694250717,
|
|
"learning_rate": 3.1130963724090626e-06,
|
|
"loss": 1.5895799398422241,
|
|
"step": 1611
|
|
},
|
|
{
|
|
"epoch": 1.9852216748768474,
|
|
"grad_norm": 8.558375384118374,
|
|
"learning_rate": 3.1064621409994245e-06,
|
|
"loss": 1.3781355619430542,
|
|
"step": 1612
|
|
},
|
|
{
|
|
"epoch": 1.9864532019704435,
|
|
"grad_norm": 17.36928034840775,
|
|
"learning_rate": 3.0998317990789378e-06,
|
|
"loss": 1.3307732343673706,
|
|
"step": 1613
|
|
},
|
|
{
|
|
"epoch": 1.9876847290640394,
|
|
"grad_norm": 13.9784605520041,
|
|
"learning_rate": 3.0932053602668876e-06,
|
|
"loss": 1.340241551399231,
|
|
"step": 1614
|
|
},
|
|
{
|
|
"epoch": 1.9889162561576355,
|
|
"grad_norm": 9.756766918680166,
|
|
"learning_rate": 3.0865828381745515e-06,
|
|
"loss": 1.5866634845733643,
|
|
"step": 1615
|
|
},
|
|
{
|
|
"epoch": 1.9901477832512315,
|
|
"grad_norm": 14.514845100981475,
|
|
"learning_rate": 3.0799642464051573e-06,
|
|
"loss": 1.363608717918396,
|
|
"step": 1616
|
|
},
|
|
{
|
|
"epoch": 1.9913793103448276,
|
|
"grad_norm": 13.803723137880525,
|
|
"learning_rate": 3.0733495985538575e-06,
|
|
"loss": 0.8918144106864929,
|
|
"step": 1617
|
|
},
|
|
{
|
|
"epoch": 1.9926108374384235,
|
|
"grad_norm": 18.044340986569775,
|
|
"learning_rate": 3.0667389082077114e-06,
|
|
"loss": 1.4538538455963135,
|
|
"step": 1618
|
|
},
|
|
{
|
|
"epoch": 1.9938423645320196,
|
|
"grad_norm": 11.435301654271841,
|
|
"learning_rate": 3.0601321889456378e-06,
|
|
"loss": 1.6913137435913086,
|
|
"step": 1619
|
|
},
|
|
{
|
|
"epoch": 1.9950738916256157,
|
|
"grad_norm": 9.858778951797417,
|
|
"learning_rate": 3.0535294543384074e-06,
|
|
"loss": 1.4266109466552734,
|
|
"step": 1620
|
|
},
|
|
{
|
|
"epoch": 1.9963054187192117,
|
|
"grad_norm": 22.051543439765215,
|
|
"learning_rate": 3.046930717948604e-06,
|
|
"loss": 1.2479441165924072,
|
|
"step": 1621
|
|
},
|
|
{
|
|
"epoch": 1.9975369458128078,
|
|
"grad_norm": 9.286359312990374,
|
|
"learning_rate": 3.0403359933305965e-06,
|
|
"loss": 2.138500213623047,
|
|
"step": 1622
|
|
},
|
|
{
|
|
"epoch": 1.998768472906404,
|
|
"grad_norm": 7.759425069440999,
|
|
"learning_rate": 3.033745294030517e-06,
|
|
"loss": 1.7762420177459717,
|
|
"step": 1623
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"grad_norm": 16.72677410836059,
|
|
"learning_rate": 3.0271586335862258e-06,
|
|
"loss": 0.858219563961029,
|
|
"step": 1624
|
|
},
|
|
{
|
|
"epoch": 2.001231527093596,
|
|
"grad_norm": 14.643925249137768,
|
|
"learning_rate": 3.0205760255272874e-06,
|
|
"loss": 0.5493918657302856,
|
|
"step": 1625
|
|
},
|
|
{
|
|
"epoch": 2.002463054187192,
|
|
"grad_norm": 6.249448248328766,
|
|
"learning_rate": 3.013997483374944e-06,
|
|
"loss": 0.25155016779899597,
|
|
"step": 1626
|
|
},
|
|
{
|
|
"epoch": 2.0036945812807883,
|
|
"grad_norm": 12.443278487913815,
|
|
"learning_rate": 3.007423020642084e-06,
|
|
"loss": 0.7727752923965454,
|
|
"step": 1627
|
|
},
|
|
{
|
|
"epoch": 2.0049261083743843,
|
|
"grad_norm": 8.331944645794822,
|
|
"learning_rate": 3.0008526508332216e-06,
|
|
"loss": 0.43595510721206665,
|
|
"step": 1628
|
|
},
|
|
{
|
|
"epoch": 2.0061576354679804,
|
|
"grad_norm": 12.199248861649188,
|
|
"learning_rate": 2.9942863874444565e-06,
|
|
"loss": 0.3856297433376312,
|
|
"step": 1629
|
|
},
|
|
{
|
|
"epoch": 2.0073891625615765,
|
|
"grad_norm": 10.194964984786639,
|
|
"learning_rate": 2.987724243963458e-06,
|
|
"loss": 0.8458558917045593,
|
|
"step": 1630
|
|
},
|
|
{
|
|
"epoch": 2.0086206896551726,
|
|
"grad_norm": 10.400619109316716,
|
|
"learning_rate": 2.981166233869429e-06,
|
|
"loss": 0.46873772144317627,
|
|
"step": 1631
|
|
},
|
|
{
|
|
"epoch": 2.0098522167487687,
|
|
"grad_norm": 7.542731982064387,
|
|
"learning_rate": 2.9746123706330886e-06,
|
|
"loss": 0.42779290676116943,
|
|
"step": 1632
|
|
},
|
|
{
|
|
"epoch": 2.0110837438423643,
|
|
"grad_norm": 9.375159014521008,
|
|
"learning_rate": 2.9680626677166324e-06,
|
|
"loss": 0.627717912197113,
|
|
"step": 1633
|
|
},
|
|
{
|
|
"epoch": 2.0123152709359604,
|
|
"grad_norm": 7.3118642493157155,
|
|
"learning_rate": 2.9615171385737107e-06,
|
|
"loss": 1.0879265069961548,
|
|
"step": 1634
|
|
},
|
|
{
|
|
"epoch": 2.0135467980295565,
|
|
"grad_norm": 10.467281128404773,
|
|
"learning_rate": 2.9549757966494053e-06,
|
|
"loss": 0.6282559037208557,
|
|
"step": 1635
|
|
},
|
|
{
|
|
"epoch": 2.0147783251231526,
|
|
"grad_norm": 11.126192184454366,
|
|
"learning_rate": 2.9484386553801875e-06,
|
|
"loss": 0.5774171352386475,
|
|
"step": 1636
|
|
},
|
|
{
|
|
"epoch": 2.0160098522167487,
|
|
"grad_norm": 10.360450434232337,
|
|
"learning_rate": 2.9419057281939106e-06,
|
|
"loss": 0.38788995146751404,
|
|
"step": 1637
|
|
},
|
|
{
|
|
"epoch": 2.0172413793103448,
|
|
"grad_norm": 13.340772113855921,
|
|
"learning_rate": 2.935377028509766e-06,
|
|
"loss": 1.1726861000061035,
|
|
"step": 1638
|
|
},
|
|
{
|
|
"epoch": 2.018472906403941,
|
|
"grad_norm": 9.74656398362734,
|
|
"learning_rate": 2.9288525697382623e-06,
|
|
"loss": 0.7854858636856079,
|
|
"step": 1639
|
|
},
|
|
{
|
|
"epoch": 2.019704433497537,
|
|
"grad_norm": 11.086797967435993,
|
|
"learning_rate": 2.922332365281201e-06,
|
|
"loss": 0.25507253408432007,
|
|
"step": 1640
|
|
},
|
|
{
|
|
"epoch": 2.020935960591133,
|
|
"grad_norm": 13.738902835067712,
|
|
"learning_rate": 2.9158164285316356e-06,
|
|
"loss": 0.5835862755775452,
|
|
"step": 1641
|
|
},
|
|
{
|
|
"epoch": 2.022167487684729,
|
|
"grad_norm": 12.908512466729006,
|
|
"learning_rate": 2.9093047728738604e-06,
|
|
"loss": 0.49123138189315796,
|
|
"step": 1642
|
|
},
|
|
{
|
|
"epoch": 2.023399014778325,
|
|
"grad_norm": 6.708189349635942,
|
|
"learning_rate": 2.9027974116833756e-06,
|
|
"loss": 0.20273317396640778,
|
|
"step": 1643
|
|
},
|
|
{
|
|
"epoch": 2.0246305418719213,
|
|
"grad_norm": 12.517783768989945,
|
|
"learning_rate": 2.896294358326862e-06,
|
|
"loss": 0.46980565786361694,
|
|
"step": 1644
|
|
},
|
|
{
|
|
"epoch": 2.0258620689655173,
|
|
"grad_norm": 12.98671748044912,
|
|
"learning_rate": 2.889795626162143e-06,
|
|
"loss": 0.23243547976016998,
|
|
"step": 1645
|
|
},
|
|
{
|
|
"epoch": 2.0270935960591134,
|
|
"grad_norm": 21.52509717224934,
|
|
"learning_rate": 2.883301228538178e-06,
|
|
"loss": 1.3259830474853516,
|
|
"step": 1646
|
|
},
|
|
{
|
|
"epoch": 2.0283251231527095,
|
|
"grad_norm": 10.539113199927511,
|
|
"learning_rate": 2.8768111787950105e-06,
|
|
"loss": 0.3021068274974823,
|
|
"step": 1647
|
|
},
|
|
{
|
|
"epoch": 2.0295566502463056,
|
|
"grad_norm": 9.17401806944997,
|
|
"learning_rate": 2.8703254902637646e-06,
|
|
"loss": 0.3854427933692932,
|
|
"step": 1648
|
|
},
|
|
{
|
|
"epoch": 2.0307881773399017,
|
|
"grad_norm": 14.201306893364228,
|
|
"learning_rate": 2.8638441762665957e-06,
|
|
"loss": 0.3356427848339081,
|
|
"step": 1649
|
|
},
|
|
{
|
|
"epoch": 2.0320197044334973,
|
|
"grad_norm": 17.83956908779597,
|
|
"learning_rate": 2.857367250116682e-06,
|
|
"loss": 0.4785861372947693,
|
|
"step": 1650
|
|
},
|
|
{
|
|
"epoch": 2.0332512315270934,
|
|
"grad_norm": 7.19305688493566,
|
|
"learning_rate": 2.8508947251181885e-06,
|
|
"loss": 0.1944020539522171,
|
|
"step": 1651
|
|
},
|
|
{
|
|
"epoch": 2.0344827586206895,
|
|
"grad_norm": 10.046970652926046,
|
|
"learning_rate": 2.8444266145662284e-06,
|
|
"loss": 0.29677248001098633,
|
|
"step": 1652
|
|
},
|
|
{
|
|
"epoch": 2.0357142857142856,
|
|
"grad_norm": 24.647186410998657,
|
|
"learning_rate": 2.8379629317468604e-06,
|
|
"loss": 1.517862319946289,
|
|
"step": 1653
|
|
},
|
|
{
|
|
"epoch": 2.0369458128078817,
|
|
"grad_norm": 13.23680169167266,
|
|
"learning_rate": 2.8315036899370442e-06,
|
|
"loss": 0.5191118717193604,
|
|
"step": 1654
|
|
},
|
|
{
|
|
"epoch": 2.0381773399014778,
|
|
"grad_norm": 13.059908687808356,
|
|
"learning_rate": 2.825048902404612e-06,
|
|
"loss": 0.42354950308799744,
|
|
"step": 1655
|
|
},
|
|
{
|
|
"epoch": 2.039408866995074,
|
|
"grad_norm": 12.282344754345834,
|
|
"learning_rate": 2.818598582408255e-06,
|
|
"loss": 0.6974557638168335,
|
|
"step": 1656
|
|
},
|
|
{
|
|
"epoch": 2.04064039408867,
|
|
"grad_norm": 11.678426390945974,
|
|
"learning_rate": 2.8121527431974838e-06,
|
|
"loss": 0.8337801694869995,
|
|
"step": 1657
|
|
},
|
|
{
|
|
"epoch": 2.041871921182266,
|
|
"grad_norm": 11.653625925472546,
|
|
"learning_rate": 2.805711398012604e-06,
|
|
"loss": 0.48300114274024963,
|
|
"step": 1658
|
|
},
|
|
{
|
|
"epoch": 2.043103448275862,
|
|
"grad_norm": 8.699921165351283,
|
|
"learning_rate": 2.799274560084688e-06,
|
|
"loss": 0.2231900542974472,
|
|
"step": 1659
|
|
},
|
|
{
|
|
"epoch": 2.044334975369458,
|
|
"grad_norm": 11.080926890704283,
|
|
"learning_rate": 2.7928422426355554e-06,
|
|
"loss": 0.7431713342666626,
|
|
"step": 1660
|
|
},
|
|
{
|
|
"epoch": 2.0455665024630543,
|
|
"grad_norm": 10.18242138749306,
|
|
"learning_rate": 2.7864144588777403e-06,
|
|
"loss": 0.5905585289001465,
|
|
"step": 1661
|
|
},
|
|
{
|
|
"epoch": 2.0467980295566504,
|
|
"grad_norm": 12.79007023215843,
|
|
"learning_rate": 2.779991222014459e-06,
|
|
"loss": 0.5379045009613037,
|
|
"step": 1662
|
|
},
|
|
{
|
|
"epoch": 2.0480295566502464,
|
|
"grad_norm": 10.204627357114346,
|
|
"learning_rate": 2.77357254523959e-06,
|
|
"loss": 0.4073173403739929,
|
|
"step": 1663
|
|
},
|
|
{
|
|
"epoch": 2.0492610837438425,
|
|
"grad_norm": 16.54029756463169,
|
|
"learning_rate": 2.767158441737646e-06,
|
|
"loss": 0.37792834639549255,
|
|
"step": 1664
|
|
},
|
|
{
|
|
"epoch": 2.0504926108374386,
|
|
"grad_norm": 12.199606214048373,
|
|
"learning_rate": 2.7607489246837505e-06,
|
|
"loss": 0.5250200629234314,
|
|
"step": 1665
|
|
},
|
|
{
|
|
"epoch": 2.0517241379310347,
|
|
"grad_norm": 15.23569807667072,
|
|
"learning_rate": 2.754344007243594e-06,
|
|
"loss": 0.7716425061225891,
|
|
"step": 1666
|
|
},
|
|
{
|
|
"epoch": 2.0529556650246303,
|
|
"grad_norm": 7.925817755895629,
|
|
"learning_rate": 2.74794370257343e-06,
|
|
"loss": 0.6505113244056702,
|
|
"step": 1667
|
|
},
|
|
{
|
|
"epoch": 2.0541871921182264,
|
|
"grad_norm": 13.232372975936459,
|
|
"learning_rate": 2.741548023820037e-06,
|
|
"loss": 1.237591028213501,
|
|
"step": 1668
|
|
},
|
|
{
|
|
"epoch": 2.0554187192118225,
|
|
"grad_norm": 7.821194651549222,
|
|
"learning_rate": 2.7351569841206792e-06,
|
|
"loss": 0.33151859045028687,
|
|
"step": 1669
|
|
},
|
|
{
|
|
"epoch": 2.0566502463054186,
|
|
"grad_norm": 9.91473906287112,
|
|
"learning_rate": 2.728770596603105e-06,
|
|
"loss": 0.42522889375686646,
|
|
"step": 1670
|
|
},
|
|
{
|
|
"epoch": 2.0578817733990147,
|
|
"grad_norm": 10.678926533172987,
|
|
"learning_rate": 2.722388874385503e-06,
|
|
"loss": 0.3359280824661255,
|
|
"step": 1671
|
|
},
|
|
{
|
|
"epoch": 2.0591133004926108,
|
|
"grad_norm": 9.193563725792906,
|
|
"learning_rate": 2.716011830576475e-06,
|
|
"loss": 0.23182198405265808,
|
|
"step": 1672
|
|
},
|
|
{
|
|
"epoch": 2.060344827586207,
|
|
"grad_norm": 13.12855060675622,
|
|
"learning_rate": 2.7096394782750186e-06,
|
|
"loss": 0.30262982845306396,
|
|
"step": 1673
|
|
},
|
|
{
|
|
"epoch": 2.061576354679803,
|
|
"grad_norm": 7.791350721856929,
|
|
"learning_rate": 2.7032718305704887e-06,
|
|
"loss": 0.23311859369277954,
|
|
"step": 1674
|
|
},
|
|
{
|
|
"epoch": 2.062807881773399,
|
|
"grad_norm": 12.221292312776084,
|
|
"learning_rate": 2.696908900542584e-06,
|
|
"loss": 0.6328019499778748,
|
|
"step": 1675
|
|
},
|
|
{
|
|
"epoch": 2.064039408866995,
|
|
"grad_norm": 10.8289045782447,
|
|
"learning_rate": 2.690550701261304e-06,
|
|
"loss": 0.30473750829696655,
|
|
"step": 1676
|
|
},
|
|
{
|
|
"epoch": 2.065270935960591,
|
|
"grad_norm": 8.921318423622994,
|
|
"learning_rate": 2.684197245786938e-06,
|
|
"loss": 0.2824372947216034,
|
|
"step": 1677
|
|
},
|
|
{
|
|
"epoch": 2.0665024630541873,
|
|
"grad_norm": 15.101179094698006,
|
|
"learning_rate": 2.677848547170029e-06,
|
|
"loss": 0.3543265163898468,
|
|
"step": 1678
|
|
},
|
|
{
|
|
"epoch": 2.0677339901477834,
|
|
"grad_norm": 8.79612621311314,
|
|
"learning_rate": 2.671504618451348e-06,
|
|
"loss": 0.6176484823226929,
|
|
"step": 1679
|
|
},
|
|
{
|
|
"epoch": 2.0689655172413794,
|
|
"grad_norm": 10.985306627235934,
|
|
"learning_rate": 2.665165472661866e-06,
|
|
"loss": 0.5290611386299133,
|
|
"step": 1680
|
|
},
|
|
{
|
|
"epoch": 2.0701970443349755,
|
|
"grad_norm": 8.398062035832517,
|
|
"learning_rate": 2.658831122822735e-06,
|
|
"loss": 0.5321454405784607,
|
|
"step": 1681
|
|
},
|
|
{
|
|
"epoch": 2.0714285714285716,
|
|
"grad_norm": 11.540193919775621,
|
|
"learning_rate": 2.6525015819452504e-06,
|
|
"loss": 0.27902156114578247,
|
|
"step": 1682
|
|
},
|
|
{
|
|
"epoch": 2.0726600985221673,
|
|
"grad_norm": 12.60801369495054,
|
|
"learning_rate": 2.6461768630308326e-06,
|
|
"loss": 0.46582847833633423,
|
|
"step": 1683
|
|
},
|
|
{
|
|
"epoch": 2.0738916256157633,
|
|
"grad_norm": 15.322116984466021,
|
|
"learning_rate": 2.6398569790710007e-06,
|
|
"loss": 0.651951014995575,
|
|
"step": 1684
|
|
},
|
|
{
|
|
"epoch": 2.0751231527093594,
|
|
"grad_norm": 9.74038331873093,
|
|
"learning_rate": 2.633541943047334e-06,
|
|
"loss": 0.36612239480018616,
|
|
"step": 1685
|
|
},
|
|
{
|
|
"epoch": 2.0763546798029555,
|
|
"grad_norm": 7.730903286765135,
|
|
"learning_rate": 2.6272317679314573e-06,
|
|
"loss": 0.22278031706809998,
|
|
"step": 1686
|
|
},
|
|
{
|
|
"epoch": 2.0775862068965516,
|
|
"grad_norm": 7.781634586207103,
|
|
"learning_rate": 2.620926466685013e-06,
|
|
"loss": 0.33012956380844116,
|
|
"step": 1687
|
|
},
|
|
{
|
|
"epoch": 2.0788177339901477,
|
|
"grad_norm": 9.397683957095191,
|
|
"learning_rate": 2.6146260522596334e-06,
|
|
"loss": 0.7396690845489502,
|
|
"step": 1688
|
|
},
|
|
{
|
|
"epoch": 2.0800492610837438,
|
|
"grad_norm": 11.988801603692485,
|
|
"learning_rate": 2.608330537596907e-06,
|
|
"loss": 0.8257578611373901,
|
|
"step": 1689
|
|
},
|
|
{
|
|
"epoch": 2.08128078817734,
|
|
"grad_norm": 8.855369489146483,
|
|
"learning_rate": 2.6020399356283586e-06,
|
|
"loss": 0.4538348317146301,
|
|
"step": 1690
|
|
},
|
|
{
|
|
"epoch": 2.082512315270936,
|
|
"grad_norm": 9.991399228257757,
|
|
"learning_rate": 2.595754259275428e-06,
|
|
"loss": 0.992777943611145,
|
|
"step": 1691
|
|
},
|
|
{
|
|
"epoch": 2.083743842364532,
|
|
"grad_norm": 11.406818947912145,
|
|
"learning_rate": 2.589473521449434e-06,
|
|
"loss": 0.346379816532135,
|
|
"step": 1692
|
|
},
|
|
{
|
|
"epoch": 2.084975369458128,
|
|
"grad_norm": 18.61665504561422,
|
|
"learning_rate": 2.583197735051546e-06,
|
|
"loss": 0.4523533284664154,
|
|
"step": 1693
|
|
},
|
|
{
|
|
"epoch": 2.086206896551724,
|
|
"grad_norm": 9.296672908995824,
|
|
"learning_rate": 2.576926912972771e-06,
|
|
"loss": 0.11842907965183258,
|
|
"step": 1694
|
|
},
|
|
{
|
|
"epoch": 2.0874384236453203,
|
|
"grad_norm": 8.459525770988064,
|
|
"learning_rate": 2.5706610680939186e-06,
|
|
"loss": 0.381897896528244,
|
|
"step": 1695
|
|
},
|
|
{
|
|
"epoch": 2.0886699507389164,
|
|
"grad_norm": 11.109371262298351,
|
|
"learning_rate": 2.564400213285564e-06,
|
|
"loss": 0.3824227452278137,
|
|
"step": 1696
|
|
},
|
|
{
|
|
"epoch": 2.0899014778325125,
|
|
"grad_norm": 7.622915250326246,
|
|
"learning_rate": 2.5581443614080433e-06,
|
|
"loss": 0.4153192639350891,
|
|
"step": 1697
|
|
},
|
|
{
|
|
"epoch": 2.0911330049261085,
|
|
"grad_norm": 12.840140963343943,
|
|
"learning_rate": 2.5518935253114153e-06,
|
|
"loss": 0.3284783959388733,
|
|
"step": 1698
|
|
},
|
|
{
|
|
"epoch": 2.0923645320197046,
|
|
"grad_norm": 9.586633818986163,
|
|
"learning_rate": 2.545647717835428e-06,
|
|
"loss": 0.7730638980865479,
|
|
"step": 1699
|
|
},
|
|
{
|
|
"epoch": 2.0935960591133007,
|
|
"grad_norm": 9.329889124511917,
|
|
"learning_rate": 2.539406951809512e-06,
|
|
"loss": 0.31647253036499023,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 2.0948275862068964,
|
|
"grad_norm": 12.004447197114908,
|
|
"learning_rate": 2.53317124005273e-06,
|
|
"loss": 0.5977708101272583,
|
|
"step": 1701
|
|
},
|
|
{
|
|
"epoch": 2.0960591133004924,
|
|
"grad_norm": 8.69992433934411,
|
|
"learning_rate": 2.5269405953737735e-06,
|
|
"loss": 0.2646758556365967,
|
|
"step": 1702
|
|
},
|
|
{
|
|
"epoch": 2.0972906403940885,
|
|
"grad_norm": 8.02489022856674,
|
|
"learning_rate": 2.5207150305709167e-06,
|
|
"loss": 0.5242122411727905,
|
|
"step": 1703
|
|
},
|
|
{
|
|
"epoch": 2.0985221674876846,
|
|
"grad_norm": 13.343080912035035,
|
|
"learning_rate": 2.5144945584320056e-06,
|
|
"loss": 0.43271976709365845,
|
|
"step": 1704
|
|
},
|
|
{
|
|
"epoch": 2.0997536945812807,
|
|
"grad_norm": 16.386560709178422,
|
|
"learning_rate": 2.5082791917344256e-06,
|
|
"loss": 0.902009904384613,
|
|
"step": 1705
|
|
},
|
|
{
|
|
"epoch": 2.100985221674877,
|
|
"grad_norm": 8.363747351262921,
|
|
"learning_rate": 2.5020689432450706e-06,
|
|
"loss": 0.5218071937561035,
|
|
"step": 1706
|
|
},
|
|
{
|
|
"epoch": 2.102216748768473,
|
|
"grad_norm": 13.441523308623053,
|
|
"learning_rate": 2.495863825720322e-06,
|
|
"loss": 0.7475143671035767,
|
|
"step": 1707
|
|
},
|
|
{
|
|
"epoch": 2.103448275862069,
|
|
"grad_norm": 9.20779623087441,
|
|
"learning_rate": 2.4896638519060257e-06,
|
|
"loss": 0.31655290722846985,
|
|
"step": 1708
|
|
},
|
|
{
|
|
"epoch": 2.104679802955665,
|
|
"grad_norm": 12.453919142267711,
|
|
"learning_rate": 2.4834690345374608e-06,
|
|
"loss": 0.30808842182159424,
|
|
"step": 1709
|
|
},
|
|
{
|
|
"epoch": 2.105911330049261,
|
|
"grad_norm": 12.241452294332287,
|
|
"learning_rate": 2.477279386339309e-06,
|
|
"loss": 0.7037611603736877,
|
|
"step": 1710
|
|
},
|
|
{
|
|
"epoch": 2.107142857142857,
|
|
"grad_norm": 14.091630182879387,
|
|
"learning_rate": 2.471094920025644e-06,
|
|
"loss": 0.4699273407459259,
|
|
"step": 1711
|
|
},
|
|
{
|
|
"epoch": 2.1083743842364533,
|
|
"grad_norm": 13.920276564221119,
|
|
"learning_rate": 2.4649156482998873e-06,
|
|
"loss": 0.5032830238342285,
|
|
"step": 1712
|
|
},
|
|
{
|
|
"epoch": 2.1096059113300494,
|
|
"grad_norm": 12.895772980307312,
|
|
"learning_rate": 2.45874158385479e-06,
|
|
"loss": 1.2563080787658691,
|
|
"step": 1713
|
|
},
|
|
{
|
|
"epoch": 2.1108374384236455,
|
|
"grad_norm": 7.446774906593091,
|
|
"learning_rate": 2.4525727393724136e-06,
|
|
"loss": 0.29728978872299194,
|
|
"step": 1714
|
|
},
|
|
{
|
|
"epoch": 2.1120689655172415,
|
|
"grad_norm": 9.446867560016528,
|
|
"learning_rate": 2.446409127524094e-06,
|
|
"loss": 0.2391032576560974,
|
|
"step": 1715
|
|
},
|
|
{
|
|
"epoch": 2.1133004926108376,
|
|
"grad_norm": 13.287475847065688,
|
|
"learning_rate": 2.4402507609704163e-06,
|
|
"loss": 0.4612117409706116,
|
|
"step": 1716
|
|
},
|
|
{
|
|
"epoch": 2.1145320197044333,
|
|
"grad_norm": 9.000836025460185,
|
|
"learning_rate": 2.4340976523611957e-06,
|
|
"loss": 0.36539849638938904,
|
|
"step": 1717
|
|
},
|
|
{
|
|
"epoch": 2.1157635467980294,
|
|
"grad_norm": 6.954876550316873,
|
|
"learning_rate": 2.427949814335443e-06,
|
|
"loss": 0.2918080687522888,
|
|
"step": 1718
|
|
},
|
|
{
|
|
"epoch": 2.1169950738916254,
|
|
"grad_norm": 12.290862216055704,
|
|
"learning_rate": 2.4218072595213467e-06,
|
|
"loss": 0.4508627653121948,
|
|
"step": 1719
|
|
},
|
|
{
|
|
"epoch": 2.1182266009852215,
|
|
"grad_norm": 10.395578945684981,
|
|
"learning_rate": 2.4156700005362384e-06,
|
|
"loss": 0.43477705121040344,
|
|
"step": 1720
|
|
},
|
|
{
|
|
"epoch": 2.1194581280788176,
|
|
"grad_norm": 13.97203519429258,
|
|
"learning_rate": 2.409538049986576e-06,
|
|
"loss": 0.36739200353622437,
|
|
"step": 1721
|
|
},
|
|
{
|
|
"epoch": 2.1206896551724137,
|
|
"grad_norm": 10.000232328294244,
|
|
"learning_rate": 2.403411420467916e-06,
|
|
"loss": 0.722801923751831,
|
|
"step": 1722
|
|
},
|
|
{
|
|
"epoch": 2.12192118226601,
|
|
"grad_norm": 8.047857628714285,
|
|
"learning_rate": 2.3972901245648724e-06,
|
|
"loss": 0.3729158043861389,
|
|
"step": 1723
|
|
},
|
|
{
|
|
"epoch": 2.123152709359606,
|
|
"grad_norm": 9.083191980371518,
|
|
"learning_rate": 2.3911741748511163e-06,
|
|
"loss": 0.741644024848938,
|
|
"step": 1724
|
|
},
|
|
{
|
|
"epoch": 2.124384236453202,
|
|
"grad_norm": 11.04614906019948,
|
|
"learning_rate": 2.385063583889335e-06,
|
|
"loss": 0.21925917267799377,
|
|
"step": 1725
|
|
},
|
|
{
|
|
"epoch": 2.125615763546798,
|
|
"grad_norm": 8.204563983460345,
|
|
"learning_rate": 2.378958364231202e-06,
|
|
"loss": 0.3161308765411377,
|
|
"step": 1726
|
|
},
|
|
{
|
|
"epoch": 2.126847290640394,
|
|
"grad_norm": 9.198617981495676,
|
|
"learning_rate": 2.3728585284173646e-06,
|
|
"loss": 0.2520957887172699,
|
|
"step": 1727
|
|
},
|
|
{
|
|
"epoch": 2.12807881773399,
|
|
"grad_norm": 17.99753939345998,
|
|
"learning_rate": 2.3667640889774096e-06,
|
|
"loss": 0.5538915991783142,
|
|
"step": 1728
|
|
},
|
|
{
|
|
"epoch": 2.1293103448275863,
|
|
"grad_norm": 15.205601395041407,
|
|
"learning_rate": 2.3606750584298375e-06,
|
|
"loss": 0.5438660979270935,
|
|
"step": 1729
|
|
},
|
|
{
|
|
"epoch": 2.1305418719211824,
|
|
"grad_norm": 11.445216371439214,
|
|
"learning_rate": 2.3545914492820366e-06,
|
|
"loss": 0.39724698662757874,
|
|
"step": 1730
|
|
},
|
|
{
|
|
"epoch": 2.1317733990147785,
|
|
"grad_norm": 13.240651517787109,
|
|
"learning_rate": 2.348513274030264e-06,
|
|
"loss": 0.3480866551399231,
|
|
"step": 1731
|
|
},
|
|
{
|
|
"epoch": 2.1330049261083746,
|
|
"grad_norm": 8.909285636059167,
|
|
"learning_rate": 2.3424405451596143e-06,
|
|
"loss": 0.9076392650604248,
|
|
"step": 1732
|
|
},
|
|
{
|
|
"epoch": 2.1342364532019706,
|
|
"grad_norm": 10.08773566622176,
|
|
"learning_rate": 2.3363732751439926e-06,
|
|
"loss": 0.19863876700401306,
|
|
"step": 1733
|
|
},
|
|
{
|
|
"epoch": 2.1354679802955667,
|
|
"grad_norm": 18.974399402946254,
|
|
"learning_rate": 2.3303114764460887e-06,
|
|
"loss": 0.5347404479980469,
|
|
"step": 1734
|
|
},
|
|
{
|
|
"epoch": 2.1366995073891624,
|
|
"grad_norm": 13.439122993751143,
|
|
"learning_rate": 2.32425516151736e-06,
|
|
"loss": 0.4876821041107178,
|
|
"step": 1735
|
|
},
|
|
{
|
|
"epoch": 2.1379310344827585,
|
|
"grad_norm": 11.45775521594229,
|
|
"learning_rate": 2.3182043427979973e-06,
|
|
"loss": 0.24914954602718353,
|
|
"step": 1736
|
|
},
|
|
{
|
|
"epoch": 2.1391625615763545,
|
|
"grad_norm": 8.201340069963411,
|
|
"learning_rate": 2.3121590327168987e-06,
|
|
"loss": 0.5773565769195557,
|
|
"step": 1737
|
|
},
|
|
{
|
|
"epoch": 2.1403940886699506,
|
|
"grad_norm": 11.57987957433396,
|
|
"learning_rate": 2.30611924369165e-06,
|
|
"loss": 0.7779598832130432,
|
|
"step": 1738
|
|
},
|
|
{
|
|
"epoch": 2.1416256157635467,
|
|
"grad_norm": 10.793230544693655,
|
|
"learning_rate": 2.3000849881285016e-06,
|
|
"loss": 0.27866464853286743,
|
|
"step": 1739
|
|
},
|
|
{
|
|
"epoch": 2.142857142857143,
|
|
"grad_norm": 10.857850500188468,
|
|
"learning_rate": 2.2940562784223224e-06,
|
|
"loss": 0.5243108868598938,
|
|
"step": 1740
|
|
},
|
|
{
|
|
"epoch": 2.144088669950739,
|
|
"grad_norm": 11.19069440448601,
|
|
"learning_rate": 2.2880331269566043e-06,
|
|
"loss": 0.6560786366462708,
|
|
"step": 1741
|
|
},
|
|
{
|
|
"epoch": 2.145320197044335,
|
|
"grad_norm": 13.01584696243558,
|
|
"learning_rate": 2.282015546103418e-06,
|
|
"loss": 0.6339880228042603,
|
|
"step": 1742
|
|
},
|
|
{
|
|
"epoch": 2.146551724137931,
|
|
"grad_norm": 9.571310950804556,
|
|
"learning_rate": 2.2760035482233868e-06,
|
|
"loss": 0.2517808973789215,
|
|
"step": 1743
|
|
},
|
|
{
|
|
"epoch": 2.147783251231527,
|
|
"grad_norm": 20.291798315352697,
|
|
"learning_rate": 2.269997145665674e-06,
|
|
"loss": 0.40347909927368164,
|
|
"step": 1744
|
|
},
|
|
{
|
|
"epoch": 2.149014778325123,
|
|
"grad_norm": 9.550073631094609,
|
|
"learning_rate": 2.263996350767942e-06,
|
|
"loss": 0.4681488573551178,
|
|
"step": 1745
|
|
},
|
|
{
|
|
"epoch": 2.1502463054187193,
|
|
"grad_norm": 9.340283980757114,
|
|
"learning_rate": 2.2580011758563418e-06,
|
|
"loss": 0.6371068954467773,
|
|
"step": 1746
|
|
},
|
|
{
|
|
"epoch": 2.1514778325123154,
|
|
"grad_norm": 21.612590436052542,
|
|
"learning_rate": 2.2520116332454726e-06,
|
|
"loss": 0.4741581678390503,
|
|
"step": 1747
|
|
},
|
|
{
|
|
"epoch": 2.1527093596059115,
|
|
"grad_norm": 8.523455664504207,
|
|
"learning_rate": 2.2460277352383713e-06,
|
|
"loss": 0.3354438543319702,
|
|
"step": 1748
|
|
},
|
|
{
|
|
"epoch": 2.1539408866995076,
|
|
"grad_norm": 14.050991791769299,
|
|
"learning_rate": 2.240049494126479e-06,
|
|
"loss": 0.593233585357666,
|
|
"step": 1749
|
|
},
|
|
{
|
|
"epoch": 2.1551724137931036,
|
|
"grad_norm": 11.626128632656414,
|
|
"learning_rate": 2.234076922189613e-06,
|
|
"loss": 0.32123100757598877,
|
|
"step": 1750
|
|
},
|
|
{
|
|
"epoch": 2.1564039408866993,
|
|
"grad_norm": 17.381626157091297,
|
|
"learning_rate": 2.2281100316959476e-06,
|
|
"loss": 1.0594584941864014,
|
|
"step": 1751
|
|
},
|
|
{
|
|
"epoch": 2.1576354679802954,
|
|
"grad_norm": 9.794184199968742,
|
|
"learning_rate": 2.2221488349019903e-06,
|
|
"loss": 0.8586208820343018,
|
|
"step": 1752
|
|
},
|
|
{
|
|
"epoch": 2.1588669950738915,
|
|
"grad_norm": 10.979739823361593,
|
|
"learning_rate": 2.2161933440525474e-06,
|
|
"loss": 0.38074642419815063,
|
|
"step": 1753
|
|
},
|
|
{
|
|
"epoch": 2.1600985221674875,
|
|
"grad_norm": 10.732650739543086,
|
|
"learning_rate": 2.21024357138071e-06,
|
|
"loss": 0.28768736124038696,
|
|
"step": 1754
|
|
},
|
|
{
|
|
"epoch": 2.1613300492610836,
|
|
"grad_norm": 10.263056998284627,
|
|
"learning_rate": 2.2042995291078227e-06,
|
|
"loss": 1.1843211650848389,
|
|
"step": 1755
|
|
},
|
|
{
|
|
"epoch": 2.1625615763546797,
|
|
"grad_norm": 13.635797719225163,
|
|
"learning_rate": 2.1983612294434563e-06,
|
|
"loss": 0.7616925835609436,
|
|
"step": 1756
|
|
},
|
|
{
|
|
"epoch": 2.163793103448276,
|
|
"grad_norm": 9.78260695772624,
|
|
"learning_rate": 2.192428684585386e-06,
|
|
"loss": 0.4518227279186249,
|
|
"step": 1757
|
|
},
|
|
{
|
|
"epoch": 2.165024630541872,
|
|
"grad_norm": 14.669561384919394,
|
|
"learning_rate": 2.1865019067195685e-06,
|
|
"loss": 0.9173997640609741,
|
|
"step": 1758
|
|
},
|
|
{
|
|
"epoch": 2.166256157635468,
|
|
"grad_norm": 9.861706475635476,
|
|
"learning_rate": 2.180580908020117e-06,
|
|
"loss": 0.4044645428657532,
|
|
"step": 1759
|
|
},
|
|
{
|
|
"epoch": 2.167487684729064,
|
|
"grad_norm": 11.783858103052328,
|
|
"learning_rate": 2.174665700649267e-06,
|
|
"loss": 0.7771418690681458,
|
|
"step": 1760
|
|
},
|
|
{
|
|
"epoch": 2.16871921182266,
|
|
"grad_norm": 12.555695641041428,
|
|
"learning_rate": 2.1687562967573645e-06,
|
|
"loss": 0.39461982250213623,
|
|
"step": 1761
|
|
},
|
|
{
|
|
"epoch": 2.1699507389162562,
|
|
"grad_norm": 8.510682084443147,
|
|
"learning_rate": 2.1628527084828283e-06,
|
|
"loss": 0.2924491763114929,
|
|
"step": 1762
|
|
},
|
|
{
|
|
"epoch": 2.1711822660098523,
|
|
"grad_norm": 7.789254339344862,
|
|
"learning_rate": 2.156954947952139e-06,
|
|
"loss": 0.2507514953613281,
|
|
"step": 1763
|
|
},
|
|
{
|
|
"epoch": 2.1724137931034484,
|
|
"grad_norm": 9.474786369957261,
|
|
"learning_rate": 2.151063027279798e-06,
|
|
"loss": 0.44257861375808716,
|
|
"step": 1764
|
|
},
|
|
{
|
|
"epoch": 2.1736453201970445,
|
|
"grad_norm": 9.165088005805186,
|
|
"learning_rate": 2.1451769585683196e-06,
|
|
"loss": 0.2863251268863678,
|
|
"step": 1765
|
|
},
|
|
{
|
|
"epoch": 2.1748768472906406,
|
|
"grad_norm": 14.506373027900759,
|
|
"learning_rate": 2.139296753908195e-06,
|
|
"loss": 0.6882431507110596,
|
|
"step": 1766
|
|
},
|
|
{
|
|
"epoch": 2.1761083743842367,
|
|
"grad_norm": 10.237681928740948,
|
|
"learning_rate": 2.1334224253778628e-06,
|
|
"loss": 0.8318816423416138,
|
|
"step": 1767
|
|
},
|
|
{
|
|
"epoch": 2.1773399014778327,
|
|
"grad_norm": 8.92298078848023,
|
|
"learning_rate": 2.1275539850437006e-06,
|
|
"loss": 0.3899531364440918,
|
|
"step": 1768
|
|
},
|
|
{
|
|
"epoch": 2.1785714285714284,
|
|
"grad_norm": 10.24700092560103,
|
|
"learning_rate": 2.1216914449599905e-06,
|
|
"loss": 0.6424532532691956,
|
|
"step": 1769
|
|
},
|
|
{
|
|
"epoch": 2.1798029556650245,
|
|
"grad_norm": 10.006066437806421,
|
|
"learning_rate": 2.1158348171688888e-06,
|
|
"loss": 0.6676028370857239,
|
|
"step": 1770
|
|
},
|
|
{
|
|
"epoch": 2.1810344827586206,
|
|
"grad_norm": 11.577953051638056,
|
|
"learning_rate": 2.109984113700413e-06,
|
|
"loss": 0.4219639301300049,
|
|
"step": 1771
|
|
},
|
|
{
|
|
"epoch": 2.1822660098522166,
|
|
"grad_norm": 6.842671899586793,
|
|
"learning_rate": 2.1041393465724114e-06,
|
|
"loss": 0.32283568382263184,
|
|
"step": 1772
|
|
},
|
|
{
|
|
"epoch": 2.1834975369458127,
|
|
"grad_norm": 9.373944237506624,
|
|
"learning_rate": 2.0983005277905348e-06,
|
|
"loss": 0.26172614097595215,
|
|
"step": 1773
|
|
},
|
|
{
|
|
"epoch": 2.184729064039409,
|
|
"grad_norm": 8.04859888971959,
|
|
"learning_rate": 2.092467669348217e-06,
|
|
"loss": 0.585732638835907,
|
|
"step": 1774
|
|
},
|
|
{
|
|
"epoch": 2.185960591133005,
|
|
"grad_norm": 17.13691371915511,
|
|
"learning_rate": 2.0866407832266506e-06,
|
|
"loss": 0.42734187841415405,
|
|
"step": 1775
|
|
},
|
|
{
|
|
"epoch": 2.187192118226601,
|
|
"grad_norm": 9.353812644763135,
|
|
"learning_rate": 2.0808198813947606e-06,
|
|
"loss": 0.24151989817619324,
|
|
"step": 1776
|
|
},
|
|
{
|
|
"epoch": 2.188423645320197,
|
|
"grad_norm": 6.491521280477716,
|
|
"learning_rate": 2.0750049758091778e-06,
|
|
"loss": 0.12940426170825958,
|
|
"step": 1777
|
|
},
|
|
{
|
|
"epoch": 2.189655172413793,
|
|
"grad_norm": 12.137046868295176,
|
|
"learning_rate": 2.0691960784142143e-06,
|
|
"loss": 0.7501548528671265,
|
|
"step": 1778
|
|
},
|
|
{
|
|
"epoch": 2.1908866995073892,
|
|
"grad_norm": 8.28614035816523,
|
|
"learning_rate": 2.063393201141846e-06,
|
|
"loss": 0.43730083107948303,
|
|
"step": 1779
|
|
},
|
|
{
|
|
"epoch": 2.1921182266009853,
|
|
"grad_norm": 7.426728577487124,
|
|
"learning_rate": 2.0575963559116823e-06,
|
|
"loss": 0.3335978388786316,
|
|
"step": 1780
|
|
},
|
|
{
|
|
"epoch": 2.1933497536945814,
|
|
"grad_norm": 7.727814229698406,
|
|
"learning_rate": 2.0518055546309362e-06,
|
|
"loss": 0.3262137174606323,
|
|
"step": 1781
|
|
},
|
|
{
|
|
"epoch": 2.1945812807881775,
|
|
"grad_norm": 12.218163734992793,
|
|
"learning_rate": 2.0460208091944122e-06,
|
|
"loss": 0.3336663544178009,
|
|
"step": 1782
|
|
},
|
|
{
|
|
"epoch": 2.1958128078817736,
|
|
"grad_norm": 12.61978263562606,
|
|
"learning_rate": 2.0402421314844774e-06,
|
|
"loss": 0.6050255298614502,
|
|
"step": 1783
|
|
},
|
|
{
|
|
"epoch": 2.1970443349753697,
|
|
"grad_norm": 10.058297792191603,
|
|
"learning_rate": 2.0344695333710234e-06,
|
|
"loss": 0.33584898710250854,
|
|
"step": 1784
|
|
},
|
|
{
|
|
"epoch": 2.1982758620689653,
|
|
"grad_norm": 7.629807101727278,
|
|
"learning_rate": 2.0287030267114665e-06,
|
|
"loss": 0.4711458683013916,
|
|
"step": 1785
|
|
},
|
|
{
|
|
"epoch": 2.1995073891625614,
|
|
"grad_norm": 7.348268103503395,
|
|
"learning_rate": 2.0229426233507067e-06,
|
|
"loss": 0.6127311587333679,
|
|
"step": 1786
|
|
},
|
|
{
|
|
"epoch": 2.2007389162561575,
|
|
"grad_norm": 8.230284472347915,
|
|
"learning_rate": 2.0171883351211038e-06,
|
|
"loss": 0.7195362448692322,
|
|
"step": 1787
|
|
},
|
|
{
|
|
"epoch": 2.2019704433497536,
|
|
"grad_norm": 20.032548588100823,
|
|
"learning_rate": 2.0114401738424618e-06,
|
|
"loss": 1.412251591682434,
|
|
"step": 1788
|
|
},
|
|
{
|
|
"epoch": 2.2032019704433496,
|
|
"grad_norm": 11.361862300830705,
|
|
"learning_rate": 2.0056981513219944e-06,
|
|
"loss": 0.48954465985298157,
|
|
"step": 1789
|
|
},
|
|
{
|
|
"epoch": 2.2044334975369457,
|
|
"grad_norm": 10.14335903404985,
|
|
"learning_rate": 1.999962279354311e-06,
|
|
"loss": 0.32414451241493225,
|
|
"step": 1790
|
|
},
|
|
{
|
|
"epoch": 2.205665024630542,
|
|
"grad_norm": 11.365030809564745,
|
|
"learning_rate": 1.9942325697213817e-06,
|
|
"loss": 0.4072822034358978,
|
|
"step": 1791
|
|
},
|
|
{
|
|
"epoch": 2.206896551724138,
|
|
"grad_norm": 9.518825727757552,
|
|
"learning_rate": 1.988509034192522e-06,
|
|
"loss": 0.25958192348480225,
|
|
"step": 1792
|
|
},
|
|
{
|
|
"epoch": 2.208128078817734,
|
|
"grad_norm": 7.689606665993246,
|
|
"learning_rate": 1.9827916845243687e-06,
|
|
"loss": 0.2943662405014038,
|
|
"step": 1793
|
|
},
|
|
{
|
|
"epoch": 2.20935960591133,
|
|
"grad_norm": 11.749853788306439,
|
|
"learning_rate": 1.9770805324608446e-06,
|
|
"loss": 0.6713488698005676,
|
|
"step": 1794
|
|
},
|
|
{
|
|
"epoch": 2.210591133004926,
|
|
"grad_norm": 8.987827629233262,
|
|
"learning_rate": 1.971375589733145e-06,
|
|
"loss": 0.5103387236595154,
|
|
"step": 1795
|
|
},
|
|
{
|
|
"epoch": 2.2118226600985222,
|
|
"grad_norm": 14.84712925009146,
|
|
"learning_rate": 1.965676868059714e-06,
|
|
"loss": 0.4981153905391693,
|
|
"step": 1796
|
|
},
|
|
{
|
|
"epoch": 2.2130541871921183,
|
|
"grad_norm": 9.829434549611708,
|
|
"learning_rate": 1.9599843791462123e-06,
|
|
"loss": 0.2828434407711029,
|
|
"step": 1797
|
|
},
|
|
{
|
|
"epoch": 2.2142857142857144,
|
|
"grad_norm": 11.531079285990483,
|
|
"learning_rate": 1.9542981346855015e-06,
|
|
"loss": 0.36899659037590027,
|
|
"step": 1798
|
|
},
|
|
{
|
|
"epoch": 2.2155172413793105,
|
|
"grad_norm": 10.264635301771921,
|
|
"learning_rate": 1.9486181463576176e-06,
|
|
"loss": 0.46039581298828125,
|
|
"step": 1799
|
|
},
|
|
{
|
|
"epoch": 2.2167487684729066,
|
|
"grad_norm": 7.994315710714336,
|
|
"learning_rate": 1.942944425829741e-06,
|
|
"loss": 0.611553966999054,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 2.2179802955665027,
|
|
"grad_norm": 10.64295367375575,
|
|
"learning_rate": 1.937276984756179e-06,
|
|
"loss": 0.23928876221179962,
|
|
"step": 1801
|
|
},
|
|
{
|
|
"epoch": 2.2192118226600988,
|
|
"grad_norm": 11.919180580141987,
|
|
"learning_rate": 1.9316158347783436e-06,
|
|
"loss": 0.3270934820175171,
|
|
"step": 1802
|
|
},
|
|
{
|
|
"epoch": 2.2204433497536944,
|
|
"grad_norm": 9.438403907761801,
|
|
"learning_rate": 1.925960987524724e-06,
|
|
"loss": 0.30926424264907837,
|
|
"step": 1803
|
|
},
|
|
{
|
|
"epoch": 2.2216748768472905,
|
|
"grad_norm": 11.903671185207038,
|
|
"learning_rate": 1.9203124546108583e-06,
|
|
"loss": 0.6049486994743347,
|
|
"step": 1804
|
|
},
|
|
{
|
|
"epoch": 2.2229064039408866,
|
|
"grad_norm": 14.861992075187999,
|
|
"learning_rate": 1.91467024763932e-06,
|
|
"loss": 0.7592355012893677,
|
|
"step": 1805
|
|
},
|
|
{
|
|
"epoch": 2.2241379310344827,
|
|
"grad_norm": 11.790018718519686,
|
|
"learning_rate": 1.9090343781996828e-06,
|
|
"loss": 0.26057887077331543,
|
|
"step": 1806
|
|
},
|
|
{
|
|
"epoch": 2.2253694581280787,
|
|
"grad_norm": 17.03673279052151,
|
|
"learning_rate": 1.9034048578685099e-06,
|
|
"loss": 0.4014609754085541,
|
|
"step": 1807
|
|
},
|
|
{
|
|
"epoch": 2.226600985221675,
|
|
"grad_norm": 10.412774433531801,
|
|
"learning_rate": 1.897781698209315e-06,
|
|
"loss": 0.26397138833999634,
|
|
"step": 1808
|
|
},
|
|
{
|
|
"epoch": 2.227832512315271,
|
|
"grad_norm": 11.809020308728643,
|
|
"learning_rate": 1.8921649107725525e-06,
|
|
"loss": 0.8727256059646606,
|
|
"step": 1809
|
|
},
|
|
{
|
|
"epoch": 2.229064039408867,
|
|
"grad_norm": 8.838116472787092,
|
|
"learning_rate": 1.8865545070955882e-06,
|
|
"loss": 0.45729875564575195,
|
|
"step": 1810
|
|
},
|
|
{
|
|
"epoch": 2.230295566502463,
|
|
"grad_norm": 13.341384604613445,
|
|
"learning_rate": 1.880950498702666e-06,
|
|
"loss": 0.3261849880218506,
|
|
"step": 1811
|
|
},
|
|
{
|
|
"epoch": 2.231527093596059,
|
|
"grad_norm": 16.210141929264246,
|
|
"learning_rate": 1.875352897104903e-06,
|
|
"loss": 0.682532787322998,
|
|
"step": 1812
|
|
},
|
|
{
|
|
"epoch": 2.2327586206896552,
|
|
"grad_norm": 16.44333196476405,
|
|
"learning_rate": 1.8697617138002545e-06,
|
|
"loss": 0.4255359470844269,
|
|
"step": 1813
|
|
},
|
|
{
|
|
"epoch": 2.2339901477832513,
|
|
"grad_norm": 8.460123548003127,
|
|
"learning_rate": 1.8641769602734872e-06,
|
|
"loss": 0.3307432234287262,
|
|
"step": 1814
|
|
},
|
|
{
|
|
"epoch": 2.2352216748768474,
|
|
"grad_norm": 9.96917434972206,
|
|
"learning_rate": 1.8585986479961653e-06,
|
|
"loss": 0.26837313175201416,
|
|
"step": 1815
|
|
},
|
|
{
|
|
"epoch": 2.2364532019704435,
|
|
"grad_norm": 12.410587151566334,
|
|
"learning_rate": 1.8530267884266228e-06,
|
|
"loss": 0.5036531686782837,
|
|
"step": 1816
|
|
},
|
|
{
|
|
"epoch": 2.2376847290640396,
|
|
"grad_norm": 13.229449859916322,
|
|
"learning_rate": 1.8474613930099356e-06,
|
|
"loss": 0.4444383680820465,
|
|
"step": 1817
|
|
},
|
|
{
|
|
"epoch": 2.2389162561576357,
|
|
"grad_norm": 10.366174513602477,
|
|
"learning_rate": 1.8419024731779e-06,
|
|
"loss": 0.24592629075050354,
|
|
"step": 1818
|
|
},
|
|
{
|
|
"epoch": 2.2401477832512313,
|
|
"grad_norm": 21.212742320307363,
|
|
"learning_rate": 1.8363500403490175e-06,
|
|
"loss": 0.9310093522071838,
|
|
"step": 1819
|
|
},
|
|
{
|
|
"epoch": 2.2413793103448274,
|
|
"grad_norm": 10.041916938686702,
|
|
"learning_rate": 1.8308041059284621e-06,
|
|
"loss": 0.3252318799495697,
|
|
"step": 1820
|
|
},
|
|
{
|
|
"epoch": 2.2426108374384235,
|
|
"grad_norm": 10.169102582875109,
|
|
"learning_rate": 1.8252646813080566e-06,
|
|
"loss": 0.44218361377716064,
|
|
"step": 1821
|
|
},
|
|
{
|
|
"epoch": 2.2438423645320196,
|
|
"grad_norm": 13.658159402672133,
|
|
"learning_rate": 1.8197317778662533e-06,
|
|
"loss": 0.631632924079895,
|
|
"step": 1822
|
|
},
|
|
{
|
|
"epoch": 2.2450738916256157,
|
|
"grad_norm": 11.284192076783485,
|
|
"learning_rate": 1.814205406968112e-06,
|
|
"loss": 0.2570488154888153,
|
|
"step": 1823
|
|
},
|
|
{
|
|
"epoch": 2.2463054187192117,
|
|
"grad_norm": 10.661610786830831,
|
|
"learning_rate": 1.8086855799652737e-06,
|
|
"loss": 0.6113500595092773,
|
|
"step": 1824
|
|
},
|
|
{
|
|
"epoch": 2.247536945812808,
|
|
"grad_norm": 9.883591422459872,
|
|
"learning_rate": 1.8031723081959334e-06,
|
|
"loss": 0.5997953414916992,
|
|
"step": 1825
|
|
},
|
|
{
|
|
"epoch": 2.248768472906404,
|
|
"grad_norm": 12.888281661513009,
|
|
"learning_rate": 1.7976656029848271e-06,
|
|
"loss": 0.501262903213501,
|
|
"step": 1826
|
|
},
|
|
{
|
|
"epoch": 2.25,
|
|
"grad_norm": 9.87397702836225,
|
|
"learning_rate": 1.792165475643199e-06,
|
|
"loss": 0.9116629362106323,
|
|
"step": 1827
|
|
},
|
|
{
|
|
"epoch": 2.251231527093596,
|
|
"grad_norm": 8.421237466791723,
|
|
"learning_rate": 1.786671937468779e-06,
|
|
"loss": 0.3302918076515198,
|
|
"step": 1828
|
|
},
|
|
{
|
|
"epoch": 2.252463054187192,
|
|
"grad_norm": 9.25026361639238,
|
|
"learning_rate": 1.7811849997457681e-06,
|
|
"loss": 0.26528751850128174,
|
|
"step": 1829
|
|
},
|
|
{
|
|
"epoch": 2.2536945812807883,
|
|
"grad_norm": 11.490820404812338,
|
|
"learning_rate": 1.775704673744809e-06,
|
|
"loss": 0.25929901003837585,
|
|
"step": 1830
|
|
},
|
|
{
|
|
"epoch": 2.2549261083743843,
|
|
"grad_norm": 13.127115940994786,
|
|
"learning_rate": 1.7702309707229576e-06,
|
|
"loss": 0.4980836808681488,
|
|
"step": 1831
|
|
},
|
|
{
|
|
"epoch": 2.2561576354679804,
|
|
"grad_norm": 16.054819413361866,
|
|
"learning_rate": 1.764763901923673e-06,
|
|
"loss": 0.5196325182914734,
|
|
"step": 1832
|
|
},
|
|
{
|
|
"epoch": 2.2573891625615765,
|
|
"grad_norm": 8.101995143129717,
|
|
"learning_rate": 1.7593034785767788e-06,
|
|
"loss": 0.20513209700584412,
|
|
"step": 1833
|
|
},
|
|
{
|
|
"epoch": 2.2586206896551726,
|
|
"grad_norm": 11.005823004560217,
|
|
"learning_rate": 1.753849711898457e-06,
|
|
"loss": 0.3052961826324463,
|
|
"step": 1834
|
|
},
|
|
{
|
|
"epoch": 2.2598522167487687,
|
|
"grad_norm": 14.916636143940408,
|
|
"learning_rate": 1.7484026130912097e-06,
|
|
"loss": 0.32289302349090576,
|
|
"step": 1835
|
|
},
|
|
{
|
|
"epoch": 2.2610837438423648,
|
|
"grad_norm": 10.783629716557854,
|
|
"learning_rate": 1.742962193343845e-06,
|
|
"loss": 0.5892568826675415,
|
|
"step": 1836
|
|
},
|
|
{
|
|
"epoch": 2.2623152709359604,
|
|
"grad_norm": 8.680159409558001,
|
|
"learning_rate": 1.737528463831456e-06,
|
|
"loss": 0.24824300408363342,
|
|
"step": 1837
|
|
},
|
|
{
|
|
"epoch": 2.2635467980295565,
|
|
"grad_norm": 28.059213249121456,
|
|
"learning_rate": 1.7321014357153815e-06,
|
|
"loss": 0.23833397030830383,
|
|
"step": 1838
|
|
},
|
|
{
|
|
"epoch": 2.2647783251231526,
|
|
"grad_norm": 10.866697094389515,
|
|
"learning_rate": 1.726681120143207e-06,
|
|
"loss": 0.4855925738811493,
|
|
"step": 1839
|
|
},
|
|
{
|
|
"epoch": 2.2660098522167487,
|
|
"grad_norm": 11.048047137574908,
|
|
"learning_rate": 1.7212675282487269e-06,
|
|
"loss": 0.44992727041244507,
|
|
"step": 1840
|
|
},
|
|
{
|
|
"epoch": 2.2672413793103448,
|
|
"grad_norm": 19.236329816785574,
|
|
"learning_rate": 1.7158606711519193e-06,
|
|
"loss": 0.41251128911972046,
|
|
"step": 1841
|
|
},
|
|
{
|
|
"epoch": 2.268472906403941,
|
|
"grad_norm": 8.021805078822515,
|
|
"learning_rate": 1.7104605599589353e-06,
|
|
"loss": 0.4418972134590149,
|
|
"step": 1842
|
|
},
|
|
{
|
|
"epoch": 2.269704433497537,
|
|
"grad_norm": 14.577958176696848,
|
|
"learning_rate": 1.7050672057620666e-06,
|
|
"loss": 0.4425298571586609,
|
|
"step": 1843
|
|
},
|
|
{
|
|
"epoch": 2.270935960591133,
|
|
"grad_norm": 13.33684949043127,
|
|
"learning_rate": 1.6996806196397243e-06,
|
|
"loss": 0.3141231834888458,
|
|
"step": 1844
|
|
},
|
|
{
|
|
"epoch": 2.272167487684729,
|
|
"grad_norm": 14.191190475097011,
|
|
"learning_rate": 1.6943008126564164e-06,
|
|
"loss": 0.2843426764011383,
|
|
"step": 1845
|
|
},
|
|
{
|
|
"epoch": 2.273399014778325,
|
|
"grad_norm": 8.774563230877245,
|
|
"learning_rate": 1.6889277958627293e-06,
|
|
"loss": 0.36104702949523926,
|
|
"step": 1846
|
|
},
|
|
{
|
|
"epoch": 2.2746305418719213,
|
|
"grad_norm": 8.915062589804638,
|
|
"learning_rate": 1.6835615802953026e-06,
|
|
"loss": 0.3061131536960602,
|
|
"step": 1847
|
|
},
|
|
{
|
|
"epoch": 2.2758620689655173,
|
|
"grad_norm": 14.006563372468205,
|
|
"learning_rate": 1.6782021769768015e-06,
|
|
"loss": 0.26009926199913025,
|
|
"step": 1848
|
|
},
|
|
{
|
|
"epoch": 2.2770935960591134,
|
|
"grad_norm": 8.127500944165664,
|
|
"learning_rate": 1.6728495969158976e-06,
|
|
"loss": 0.33785128593444824,
|
|
"step": 1849
|
|
},
|
|
{
|
|
"epoch": 2.2783251231527095,
|
|
"grad_norm": 13.84769147602863,
|
|
"learning_rate": 1.6675038511072518e-06,
|
|
"loss": 0.675277829170227,
|
|
"step": 1850
|
|
},
|
|
{
|
|
"epoch": 2.2795566502463056,
|
|
"grad_norm": 10.2024379894797,
|
|
"learning_rate": 1.6621649505314853e-06,
|
|
"loss": 0.30536460876464844,
|
|
"step": 1851
|
|
},
|
|
{
|
|
"epoch": 2.2807881773399012,
|
|
"grad_norm": 13.905669065241,
|
|
"learning_rate": 1.6568329061551552e-06,
|
|
"loss": 0.483297735452652,
|
|
"step": 1852
|
|
},
|
|
{
|
|
"epoch": 2.2820197044334973,
|
|
"grad_norm": 13.831832440802502,
|
|
"learning_rate": 1.6515077289307391e-06,
|
|
"loss": 1.2728561162948608,
|
|
"step": 1853
|
|
},
|
|
{
|
|
"epoch": 2.2832512315270934,
|
|
"grad_norm": 12.809334971632179,
|
|
"learning_rate": 1.6461894297966113e-06,
|
|
"loss": 1.2634159326553345,
|
|
"step": 1854
|
|
},
|
|
{
|
|
"epoch": 2.2844827586206895,
|
|
"grad_norm": 7.191323391539922,
|
|
"learning_rate": 1.640878019677008e-06,
|
|
"loss": 0.2823532819747925,
|
|
"step": 1855
|
|
},
|
|
{
|
|
"epoch": 2.2857142857142856,
|
|
"grad_norm": 10.11071089918571,
|
|
"learning_rate": 1.6355735094820236e-06,
|
|
"loss": 0.34143221378326416,
|
|
"step": 1856
|
|
},
|
|
{
|
|
"epoch": 2.2869458128078817,
|
|
"grad_norm": 21.093284752390208,
|
|
"learning_rate": 1.6302759101075788e-06,
|
|
"loss": 1.6820435523986816,
|
|
"step": 1857
|
|
},
|
|
{
|
|
"epoch": 2.2881773399014778,
|
|
"grad_norm": 10.354309593440153,
|
|
"learning_rate": 1.6249852324353943e-06,
|
|
"loss": 0.5194296836853027,
|
|
"step": 1858
|
|
},
|
|
{
|
|
"epoch": 2.289408866995074,
|
|
"grad_norm": 17.44623842314838,
|
|
"learning_rate": 1.619701487332978e-06,
|
|
"loss": 0.5637781023979187,
|
|
"step": 1859
|
|
},
|
|
{
|
|
"epoch": 2.29064039408867,
|
|
"grad_norm": 25.69777716112705,
|
|
"learning_rate": 1.6144246856535933e-06,
|
|
"loss": 0.34875303506851196,
|
|
"step": 1860
|
|
},
|
|
{
|
|
"epoch": 2.291871921182266,
|
|
"grad_norm": 12.072258734899453,
|
|
"learning_rate": 1.609154838236246e-06,
|
|
"loss": 1.098509430885315,
|
|
"step": 1861
|
|
},
|
|
{
|
|
"epoch": 2.293103448275862,
|
|
"grad_norm": 9.38995256932923,
|
|
"learning_rate": 1.603891955905652e-06,
|
|
"loss": 0.28303658962249756,
|
|
"step": 1862
|
|
},
|
|
{
|
|
"epoch": 2.294334975369458,
|
|
"grad_norm": 8.876257541157115,
|
|
"learning_rate": 1.5986360494722237e-06,
|
|
"loss": 0.2923981547355652,
|
|
"step": 1863
|
|
},
|
|
{
|
|
"epoch": 2.2955665024630543,
|
|
"grad_norm": 12.816591257478263,
|
|
"learning_rate": 1.5933871297320458e-06,
|
|
"loss": 0.7381842136383057,
|
|
"step": 1864
|
|
},
|
|
{
|
|
"epoch": 2.2967980295566504,
|
|
"grad_norm": 11.151348038557627,
|
|
"learning_rate": 1.5881452074668474e-06,
|
|
"loss": 0.3092786371707916,
|
|
"step": 1865
|
|
},
|
|
{
|
|
"epoch": 2.2980295566502464,
|
|
"grad_norm": 7.288277848225151,
|
|
"learning_rate": 1.5829102934439855e-06,
|
|
"loss": 0.23155847191810608,
|
|
"step": 1866
|
|
},
|
|
{
|
|
"epoch": 2.2992610837438425,
|
|
"grad_norm": 6.9100983038059685,
|
|
"learning_rate": 1.577682398416424e-06,
|
|
"loss": 0.28587496280670166,
|
|
"step": 1867
|
|
},
|
|
{
|
|
"epoch": 2.3004926108374386,
|
|
"grad_norm": 10.179482607383743,
|
|
"learning_rate": 1.572461533122709e-06,
|
|
"loss": 0.28047090768814087,
|
|
"step": 1868
|
|
},
|
|
{
|
|
"epoch": 2.3017241379310347,
|
|
"grad_norm": 9.853152635402589,
|
|
"learning_rate": 1.567247708286942e-06,
|
|
"loss": 0.23015758395195007,
|
|
"step": 1869
|
|
},
|
|
{
|
|
"epoch": 2.302955665024631,
|
|
"grad_norm": 11.277401391934358,
|
|
"learning_rate": 1.5620409346187697e-06,
|
|
"loss": 0.4323405623435974,
|
|
"step": 1870
|
|
},
|
|
{
|
|
"epoch": 2.3041871921182264,
|
|
"grad_norm": 11.297467766496554,
|
|
"learning_rate": 1.5568412228133506e-06,
|
|
"loss": 0.23572880029678345,
|
|
"step": 1871
|
|
},
|
|
{
|
|
"epoch": 2.3054187192118225,
|
|
"grad_norm": 13.421885123492197,
|
|
"learning_rate": 1.5516485835513368e-06,
|
|
"loss": 0.3727877140045166,
|
|
"step": 1872
|
|
},
|
|
{
|
|
"epoch": 2.3066502463054186,
|
|
"grad_norm": 12.62430001790282,
|
|
"learning_rate": 1.5464630274988558e-06,
|
|
"loss": 0.45042985677719116,
|
|
"step": 1873
|
|
},
|
|
{
|
|
"epoch": 2.3078817733990147,
|
|
"grad_norm": 14.933222032568711,
|
|
"learning_rate": 1.5412845653074871e-06,
|
|
"loss": 0.2898573875427246,
|
|
"step": 1874
|
|
},
|
|
{
|
|
"epoch": 2.3091133004926108,
|
|
"grad_norm": 13.678732792764093,
|
|
"learning_rate": 1.5361132076142316e-06,
|
|
"loss": 0.5285981893539429,
|
|
"step": 1875
|
|
},
|
|
{
|
|
"epoch": 2.310344827586207,
|
|
"grad_norm": 11.195106285237618,
|
|
"learning_rate": 1.5309489650415056e-06,
|
|
"loss": 0.32582932710647583,
|
|
"step": 1876
|
|
},
|
|
{
|
|
"epoch": 2.311576354679803,
|
|
"grad_norm": 10.519489956392377,
|
|
"learning_rate": 1.5257918481971028e-06,
|
|
"loss": 0.2169458121061325,
|
|
"step": 1877
|
|
},
|
|
{
|
|
"epoch": 2.312807881773399,
|
|
"grad_norm": 13.764556882530254,
|
|
"learning_rate": 1.5206418676741868e-06,
|
|
"loss": 0.618523359298706,
|
|
"step": 1878
|
|
},
|
|
{
|
|
"epoch": 2.314039408866995,
|
|
"grad_norm": 11.040931356433024,
|
|
"learning_rate": 1.515499034051256e-06,
|
|
"loss": 0.7014099359512329,
|
|
"step": 1879
|
|
},
|
|
{
|
|
"epoch": 2.315270935960591,
|
|
"grad_norm": 13.213679491063276,
|
|
"learning_rate": 1.510363357892133e-06,
|
|
"loss": 0.44798558950424194,
|
|
"step": 1880
|
|
},
|
|
{
|
|
"epoch": 2.3165024630541873,
|
|
"grad_norm": 77.68330951092015,
|
|
"learning_rate": 1.50523484974594e-06,
|
|
"loss": 0.4824434220790863,
|
|
"step": 1881
|
|
},
|
|
{
|
|
"epoch": 2.3177339901477834,
|
|
"grad_norm": 5.871453538227446,
|
|
"learning_rate": 1.5001135201470673e-06,
|
|
"loss": 0.16904819011688232,
|
|
"step": 1882
|
|
},
|
|
{
|
|
"epoch": 2.3189655172413794,
|
|
"grad_norm": 10.296708154719132,
|
|
"learning_rate": 1.4949993796151675e-06,
|
|
"loss": 0.8792778253555298,
|
|
"step": 1883
|
|
},
|
|
{
|
|
"epoch": 2.3201970443349755,
|
|
"grad_norm": 12.549086016226653,
|
|
"learning_rate": 1.4898924386551256e-06,
|
|
"loss": 0.6592487096786499,
|
|
"step": 1884
|
|
},
|
|
{
|
|
"epoch": 2.3214285714285716,
|
|
"grad_norm": 20.275701743724124,
|
|
"learning_rate": 1.4847927077570324e-06,
|
|
"loss": 1.6036354303359985,
|
|
"step": 1885
|
|
},
|
|
{
|
|
"epoch": 2.3226600985221673,
|
|
"grad_norm": 9.24831145241808,
|
|
"learning_rate": 1.4797001973961755e-06,
|
|
"loss": 0.34490981698036194,
|
|
"step": 1886
|
|
},
|
|
{
|
|
"epoch": 2.3238916256157633,
|
|
"grad_norm": 8.476000589981345,
|
|
"learning_rate": 1.4746149180330082e-06,
|
|
"loss": 0.3186146914958954,
|
|
"step": 1887
|
|
},
|
|
{
|
|
"epoch": 2.3251231527093594,
|
|
"grad_norm": 18.44274912327115,
|
|
"learning_rate": 1.4695368801131293e-06,
|
|
"loss": 0.5050108432769775,
|
|
"step": 1888
|
|
},
|
|
{
|
|
"epoch": 2.3263546798029555,
|
|
"grad_norm": 12.028503330268482,
|
|
"learning_rate": 1.4644660940672628e-06,
|
|
"loss": 0.3541644215583801,
|
|
"step": 1889
|
|
},
|
|
{
|
|
"epoch": 2.3275862068965516,
|
|
"grad_norm": 6.910684312350736,
|
|
"learning_rate": 1.4594025703112397e-06,
|
|
"loss": 0.3495083749294281,
|
|
"step": 1890
|
|
},
|
|
{
|
|
"epoch": 2.3288177339901477,
|
|
"grad_norm": 11.582636749838006,
|
|
"learning_rate": 1.4543463192459728e-06,
|
|
"loss": 0.9918674826622009,
|
|
"step": 1891
|
|
},
|
|
{
|
|
"epoch": 2.3300492610837438,
|
|
"grad_norm": 12.929277927199294,
|
|
"learning_rate": 1.4492973512574348e-06,
|
|
"loss": 0.9601753950119019,
|
|
"step": 1892
|
|
},
|
|
{
|
|
"epoch": 2.33128078817734,
|
|
"grad_norm": 8.289898772410082,
|
|
"learning_rate": 1.4442556767166371e-06,
|
|
"loss": 0.48341238498687744,
|
|
"step": 1893
|
|
},
|
|
{
|
|
"epoch": 2.332512315270936,
|
|
"grad_norm": 11.044218498303557,
|
|
"learning_rate": 1.4392213059796133e-06,
|
|
"loss": 0.38372108340263367,
|
|
"step": 1894
|
|
},
|
|
{
|
|
"epoch": 2.333743842364532,
|
|
"grad_norm": 17.672025418443823,
|
|
"learning_rate": 1.4341942493873934e-06,
|
|
"loss": 0.45662760734558105,
|
|
"step": 1895
|
|
},
|
|
{
|
|
"epoch": 2.334975369458128,
|
|
"grad_norm": 8.57989944923008,
|
|
"learning_rate": 1.4291745172659804e-06,
|
|
"loss": 0.6601132154464722,
|
|
"step": 1896
|
|
},
|
|
{
|
|
"epoch": 2.336206896551724,
|
|
"grad_norm": 10.831792328536467,
|
|
"learning_rate": 1.4241621199263362e-06,
|
|
"loss": 0.7569577097892761,
|
|
"step": 1897
|
|
},
|
|
{
|
|
"epoch": 2.3374384236453203,
|
|
"grad_norm": 14.76295283801852,
|
|
"learning_rate": 1.4191570676643573e-06,
|
|
"loss": 0.7162508964538574,
|
|
"step": 1898
|
|
},
|
|
{
|
|
"epoch": 2.3386699507389164,
|
|
"grad_norm": 16.808898262444146,
|
|
"learning_rate": 1.4141593707608441e-06,
|
|
"loss": 0.6121374368667603,
|
|
"step": 1899
|
|
},
|
|
{
|
|
"epoch": 2.3399014778325125,
|
|
"grad_norm": 14.404980275639364,
|
|
"learning_rate": 1.4091690394814989e-06,
|
|
"loss": 0.550343930721283,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 2.3411330049261085,
|
|
"grad_norm": 13.189507504332187,
|
|
"learning_rate": 1.40418608407689e-06,
|
|
"loss": 0.644547700881958,
|
|
"step": 1901
|
|
},
|
|
{
|
|
"epoch": 2.3423645320197046,
|
|
"grad_norm": 10.144794457121083,
|
|
"learning_rate": 1.3992105147824326e-06,
|
|
"loss": 0.463761568069458,
|
|
"step": 1902
|
|
},
|
|
{
|
|
"epoch": 2.3435960591133007,
|
|
"grad_norm": 9.21109140090456,
|
|
"learning_rate": 1.3942423418183764e-06,
|
|
"loss": 0.5593357682228088,
|
|
"step": 1903
|
|
},
|
|
{
|
|
"epoch": 2.344827586206897,
|
|
"grad_norm": 12.967643967580644,
|
|
"learning_rate": 1.3892815753897708e-06,
|
|
"loss": 0.5090635418891907,
|
|
"step": 1904
|
|
},
|
|
{
|
|
"epoch": 2.3460591133004924,
|
|
"grad_norm": 13.46983908302652,
|
|
"learning_rate": 1.3843282256864599e-06,
|
|
"loss": 0.4595394432544708,
|
|
"step": 1905
|
|
},
|
|
{
|
|
"epoch": 2.3472906403940885,
|
|
"grad_norm": 11.392389994781835,
|
|
"learning_rate": 1.379382302883044e-06,
|
|
"loss": 0.8381729125976562,
|
|
"step": 1906
|
|
},
|
|
{
|
|
"epoch": 2.3485221674876846,
|
|
"grad_norm": 8.85214424769499,
|
|
"learning_rate": 1.3744438171388752e-06,
|
|
"loss": 0.37937110662460327,
|
|
"step": 1907
|
|
},
|
|
{
|
|
"epoch": 2.3497536945812807,
|
|
"grad_norm": 17.78975528440709,
|
|
"learning_rate": 1.3695127785980279e-06,
|
|
"loss": 0.4255325496196747,
|
|
"step": 1908
|
|
},
|
|
{
|
|
"epoch": 2.350985221674877,
|
|
"grad_norm": 11.69369455239838,
|
|
"learning_rate": 1.3645891973892772e-06,
|
|
"loss": 1.1354942321777344,
|
|
"step": 1909
|
|
},
|
|
{
|
|
"epoch": 2.352216748768473,
|
|
"grad_norm": 7.241901848192273,
|
|
"learning_rate": 1.359673083626079e-06,
|
|
"loss": 0.30018460750579834,
|
|
"step": 1910
|
|
},
|
|
{
|
|
"epoch": 2.353448275862069,
|
|
"grad_norm": 10.130306855965305,
|
|
"learning_rate": 1.3547644474065557e-06,
|
|
"loss": 0.22174029052257538,
|
|
"step": 1911
|
|
},
|
|
{
|
|
"epoch": 2.354679802955665,
|
|
"grad_norm": 10.818242567623516,
|
|
"learning_rate": 1.349863298813464e-06,
|
|
"loss": 0.27310076355934143,
|
|
"step": 1912
|
|
},
|
|
{
|
|
"epoch": 2.355911330049261,
|
|
"grad_norm": 13.041781733429923,
|
|
"learning_rate": 1.3449696479141855e-06,
|
|
"loss": 0.39454638957977295,
|
|
"step": 1913
|
|
},
|
|
{
|
|
"epoch": 2.357142857142857,
|
|
"grad_norm": 10.18283763523278,
|
|
"learning_rate": 1.3400835047606997e-06,
|
|
"loss": 0.39921119809150696,
|
|
"step": 1914
|
|
},
|
|
{
|
|
"epoch": 2.3583743842364533,
|
|
"grad_norm": 10.365856020003331,
|
|
"learning_rate": 1.3352048793895623e-06,
|
|
"loss": 0.45110660791397095,
|
|
"step": 1915
|
|
},
|
|
{
|
|
"epoch": 2.3596059113300494,
|
|
"grad_norm": 8.256618178243365,
|
|
"learning_rate": 1.330333781821887e-06,
|
|
"loss": 0.5453286170959473,
|
|
"step": 1916
|
|
},
|
|
{
|
|
"epoch": 2.3608374384236455,
|
|
"grad_norm": 7.676268533106476,
|
|
"learning_rate": 1.325470222063327e-06,
|
|
"loss": 0.21928450465202332,
|
|
"step": 1917
|
|
},
|
|
{
|
|
"epoch": 2.3620689655172415,
|
|
"grad_norm": 11.703145589738702,
|
|
"learning_rate": 1.3206142101040525e-06,
|
|
"loss": 0.8491370677947998,
|
|
"step": 1918
|
|
},
|
|
{
|
|
"epoch": 2.363300492610837,
|
|
"grad_norm": 11.375579827407606,
|
|
"learning_rate": 1.3157657559187264e-06,
|
|
"loss": 0.5052551031112671,
|
|
"step": 1919
|
|
},
|
|
{
|
|
"epoch": 2.3645320197044333,
|
|
"grad_norm": 14.124196950433179,
|
|
"learning_rate": 1.3109248694664917e-06,
|
|
"loss": 1.0034559965133667,
|
|
"step": 1920
|
|
},
|
|
{
|
|
"epoch": 2.3657635467980294,
|
|
"grad_norm": 16.92878880493155,
|
|
"learning_rate": 1.3060915606909413e-06,
|
|
"loss": 0.3685661554336548,
|
|
"step": 1921
|
|
},
|
|
{
|
|
"epoch": 2.3669950738916254,
|
|
"grad_norm": 9.744666272771802,
|
|
"learning_rate": 1.301265839520109e-06,
|
|
"loss": 0.33304983377456665,
|
|
"step": 1922
|
|
},
|
|
{
|
|
"epoch": 2.3682266009852215,
|
|
"grad_norm": 9.861413232471296,
|
|
"learning_rate": 1.2964477158664367e-06,
|
|
"loss": 1.3396000862121582,
|
|
"step": 1923
|
|
},
|
|
{
|
|
"epoch": 2.3694581280788176,
|
|
"grad_norm": 13.403135613317723,
|
|
"learning_rate": 1.2916371996267656e-06,
|
|
"loss": 0.3852962851524353,
|
|
"step": 1924
|
|
},
|
|
{
|
|
"epoch": 2.3706896551724137,
|
|
"grad_norm": 12.989833739172669,
|
|
"learning_rate": 1.2868343006823113e-06,
|
|
"loss": 0.5070800185203552,
|
|
"step": 1925
|
|
},
|
|
{
|
|
"epoch": 2.37192118226601,
|
|
"grad_norm": 10.592089371352348,
|
|
"learning_rate": 1.2820390288986345e-06,
|
|
"loss": 0.1917571723461151,
|
|
"step": 1926
|
|
},
|
|
{
|
|
"epoch": 2.373152709359606,
|
|
"grad_norm": 6.248268258840329,
|
|
"learning_rate": 1.2772513941256371e-06,
|
|
"loss": 0.19884659349918365,
|
|
"step": 1927
|
|
},
|
|
{
|
|
"epoch": 2.374384236453202,
|
|
"grad_norm": 13.319990126266617,
|
|
"learning_rate": 1.2724714061975335e-06,
|
|
"loss": 0.27710244059562683,
|
|
"step": 1928
|
|
},
|
|
{
|
|
"epoch": 2.375615763546798,
|
|
"grad_norm": 12.638294589181001,
|
|
"learning_rate": 1.2676990749328255e-06,
|
|
"loss": 0.7216998338699341,
|
|
"step": 1929
|
|
},
|
|
{
|
|
"epoch": 2.376847290640394,
|
|
"grad_norm": 7.68797287512978,
|
|
"learning_rate": 1.262934410134292e-06,
|
|
"loss": 0.35512983798980713,
|
|
"step": 1930
|
|
},
|
|
{
|
|
"epoch": 2.37807881773399,
|
|
"grad_norm": 7.682504760826181,
|
|
"learning_rate": 1.2581774215889653e-06,
|
|
"loss": 0.21548208594322205,
|
|
"step": 1931
|
|
},
|
|
{
|
|
"epoch": 2.3793103448275863,
|
|
"grad_norm": 10.576319148708158,
|
|
"learning_rate": 1.2534281190681059e-06,
|
|
"loss": 0.7191505432128906,
|
|
"step": 1932
|
|
},
|
|
{
|
|
"epoch": 2.3805418719211824,
|
|
"grad_norm": 28.03273248427961,
|
|
"learning_rate": 1.2486865123271868e-06,
|
|
"loss": 0.5658040046691895,
|
|
"step": 1933
|
|
},
|
|
{
|
|
"epoch": 2.3817733990147785,
|
|
"grad_norm": 7.429440108605395,
|
|
"learning_rate": 1.243952611105877e-06,
|
|
"loss": 0.42820805311203003,
|
|
"step": 1934
|
|
},
|
|
{
|
|
"epoch": 2.3830049261083746,
|
|
"grad_norm": 8.913271204535084,
|
|
"learning_rate": 1.2392264251280167e-06,
|
|
"loss": 0.3223640024662018,
|
|
"step": 1935
|
|
},
|
|
{
|
|
"epoch": 2.3842364532019706,
|
|
"grad_norm": 16.39061337542185,
|
|
"learning_rate": 1.2345079641015955e-06,
|
|
"loss": 0.5262437462806702,
|
|
"step": 1936
|
|
},
|
|
{
|
|
"epoch": 2.3854679802955667,
|
|
"grad_norm": 12.040132799234067,
|
|
"learning_rate": 1.2297972377187361e-06,
|
|
"loss": 0.32022416591644287,
|
|
"step": 1937
|
|
},
|
|
{
|
|
"epoch": 2.386699507389163,
|
|
"grad_norm": 10.197992684406291,
|
|
"learning_rate": 1.2250942556556754e-06,
|
|
"loss": 0.76932692527771,
|
|
"step": 1938
|
|
},
|
|
{
|
|
"epoch": 2.3879310344827585,
|
|
"grad_norm": 9.459909563147203,
|
|
"learning_rate": 1.2203990275727435e-06,
|
|
"loss": 0.23026564717292786,
|
|
"step": 1939
|
|
},
|
|
{
|
|
"epoch": 2.3891625615763545,
|
|
"grad_norm": 11.035875303455253,
|
|
"learning_rate": 1.2157115631143384e-06,
|
|
"loss": 0.4533492624759674,
|
|
"step": 1940
|
|
},
|
|
{
|
|
"epoch": 2.3903940886699506,
|
|
"grad_norm": 10.823301129205994,
|
|
"learning_rate": 1.211031871908916e-06,
|
|
"loss": 0.6235211491584778,
|
|
"step": 1941
|
|
},
|
|
{
|
|
"epoch": 2.3916256157635467,
|
|
"grad_norm": 9.073613663519735,
|
|
"learning_rate": 1.206359963568966e-06,
|
|
"loss": 0.2519042193889618,
|
|
"step": 1942
|
|
},
|
|
{
|
|
"epoch": 2.392857142857143,
|
|
"grad_norm": 9.128265200465231,
|
|
"learning_rate": 1.201695847690983e-06,
|
|
"loss": 0.3229137659072876,
|
|
"step": 1943
|
|
},
|
|
{
|
|
"epoch": 2.394088669950739,
|
|
"grad_norm": 11.336508477709275,
|
|
"learning_rate": 1.1970395338554642e-06,
|
|
"loss": 0.19324302673339844,
|
|
"step": 1944
|
|
},
|
|
{
|
|
"epoch": 2.395320197044335,
|
|
"grad_norm": 11.07861313896692,
|
|
"learning_rate": 1.1923910316268783e-06,
|
|
"loss": 0.6342459917068481,
|
|
"step": 1945
|
|
},
|
|
{
|
|
"epoch": 2.396551724137931,
|
|
"grad_norm": 11.018070634448504,
|
|
"learning_rate": 1.1877503505536453e-06,
|
|
"loss": 0.3010944724082947,
|
|
"step": 1946
|
|
},
|
|
{
|
|
"epoch": 2.397783251231527,
|
|
"grad_norm": 8.241609243061369,
|
|
"learning_rate": 1.183117500168125e-06,
|
|
"loss": 0.40499716997146606,
|
|
"step": 1947
|
|
},
|
|
{
|
|
"epoch": 2.399014778325123,
|
|
"grad_norm": 18.259844198245478,
|
|
"learning_rate": 1.1784924899865856e-06,
|
|
"loss": 0.9692997336387634,
|
|
"step": 1948
|
|
},
|
|
{
|
|
"epoch": 2.4002463054187193,
|
|
"grad_norm": 15.459619863404178,
|
|
"learning_rate": 1.1738753295091986e-06,
|
|
"loss": 0.3848229646682739,
|
|
"step": 1949
|
|
},
|
|
{
|
|
"epoch": 2.4014778325123154,
|
|
"grad_norm": 10.437656103417114,
|
|
"learning_rate": 1.169266028220004e-06,
|
|
"loss": 0.4472384750843048,
|
|
"step": 1950
|
|
},
|
|
{
|
|
"epoch": 2.4027093596059115,
|
|
"grad_norm": 8.14141154883163,
|
|
"learning_rate": 1.164664595586904e-06,
|
|
"loss": 0.21374854445457458,
|
|
"step": 1951
|
|
},
|
|
{
|
|
"epoch": 2.4039408866995076,
|
|
"grad_norm": 9.895182845073167,
|
|
"learning_rate": 1.1600710410616367e-06,
|
|
"loss": 0.4789981544017792,
|
|
"step": 1952
|
|
},
|
|
{
|
|
"epoch": 2.405172413793103,
|
|
"grad_norm": 14.330046153248214,
|
|
"learning_rate": 1.1554853740797556e-06,
|
|
"loss": 0.6235543489456177,
|
|
"step": 1953
|
|
},
|
|
{
|
|
"epoch": 2.4064039408866993,
|
|
"grad_norm": 11.28922905122106,
|
|
"learning_rate": 1.1509076040606127e-06,
|
|
"loss": 0.42575669288635254,
|
|
"step": 1954
|
|
},
|
|
{
|
|
"epoch": 2.4076354679802954,
|
|
"grad_norm": 10.213241448714898,
|
|
"learning_rate": 1.1463377404073433e-06,
|
|
"loss": 0.22154280543327332,
|
|
"step": 1955
|
|
},
|
|
{
|
|
"epoch": 2.4088669950738915,
|
|
"grad_norm": 9.867650979911392,
|
|
"learning_rate": 1.1417757925068362e-06,
|
|
"loss": 0.5722556114196777,
|
|
"step": 1956
|
|
},
|
|
{
|
|
"epoch": 2.4100985221674875,
|
|
"grad_norm": 7.554394124376038,
|
|
"learning_rate": 1.137221769729725e-06,
|
|
"loss": 0.6502832174301147,
|
|
"step": 1957
|
|
},
|
|
{
|
|
"epoch": 2.4113300492610836,
|
|
"grad_norm": 13.191804943156788,
|
|
"learning_rate": 1.132675681430364e-06,
|
|
"loss": 0.41717976331710815,
|
|
"step": 1958
|
|
},
|
|
{
|
|
"epoch": 2.4125615763546797,
|
|
"grad_norm": 12.040721504656855,
|
|
"learning_rate": 1.1281375369468078e-06,
|
|
"loss": 0.3705020248889923,
|
|
"step": 1959
|
|
},
|
|
{
|
|
"epoch": 2.413793103448276,
|
|
"grad_norm": 19.08924876929562,
|
|
"learning_rate": 1.1236073456007928e-06,
|
|
"loss": 0.8128242492675781,
|
|
"step": 1960
|
|
},
|
|
{
|
|
"epoch": 2.415024630541872,
|
|
"grad_norm": 16.296662141524465,
|
|
"learning_rate": 1.1190851166977218e-06,
|
|
"loss": 0.7350403070449829,
|
|
"step": 1961
|
|
},
|
|
{
|
|
"epoch": 2.416256157635468,
|
|
"grad_norm": 7.0582572680809195,
|
|
"learning_rate": 1.1145708595266418e-06,
|
|
"loss": 0.5837904214859009,
|
|
"step": 1962
|
|
},
|
|
{
|
|
"epoch": 2.417487684729064,
|
|
"grad_norm": 8.875645426047061,
|
|
"learning_rate": 1.1100645833602231e-06,
|
|
"loss": 0.436983585357666,
|
|
"step": 1963
|
|
},
|
|
{
|
|
"epoch": 2.41871921182266,
|
|
"grad_norm": 9.396076477777111,
|
|
"learning_rate": 1.105566297454742e-06,
|
|
"loss": 0.4708068370819092,
|
|
"step": 1964
|
|
},
|
|
{
|
|
"epoch": 2.4199507389162562,
|
|
"grad_norm": 12.540961285951255,
|
|
"learning_rate": 1.1010760110500652e-06,
|
|
"loss": 0.37972012162208557,
|
|
"step": 1965
|
|
},
|
|
{
|
|
"epoch": 2.4211822660098523,
|
|
"grad_norm": 9.511768233063343,
|
|
"learning_rate": 1.0965937333696264e-06,
|
|
"loss": 0.3167269229888916,
|
|
"step": 1966
|
|
},
|
|
{
|
|
"epoch": 2.4224137931034484,
|
|
"grad_norm": 8.997618711574894,
|
|
"learning_rate": 1.0921194736204066e-06,
|
|
"loss": 0.3407049775123596,
|
|
"step": 1967
|
|
},
|
|
{
|
|
"epoch": 2.4236453201970445,
|
|
"grad_norm": 26.50748327469745,
|
|
"learning_rate": 1.0876532409929208e-06,
|
|
"loss": 0.7673642635345459,
|
|
"step": 1968
|
|
},
|
|
{
|
|
"epoch": 2.4248768472906406,
|
|
"grad_norm": 7.428296790887836,
|
|
"learning_rate": 1.083195044661195e-06,
|
|
"loss": 0.3029213845729828,
|
|
"step": 1969
|
|
},
|
|
{
|
|
"epoch": 2.4261083743842367,
|
|
"grad_norm": 16.297521234369484,
|
|
"learning_rate": 1.0787448937827428e-06,
|
|
"loss": 0.5143488049507141,
|
|
"step": 1970
|
|
},
|
|
{
|
|
"epoch": 2.4273399014778327,
|
|
"grad_norm": 9.838022492363262,
|
|
"learning_rate": 1.0743027974985576e-06,
|
|
"loss": 0.5086369514465332,
|
|
"step": 1971
|
|
},
|
|
{
|
|
"epoch": 2.4285714285714284,
|
|
"grad_norm": 11.760234490761677,
|
|
"learning_rate": 1.069868764933088e-06,
|
|
"loss": 0.7999781966209412,
|
|
"step": 1972
|
|
},
|
|
{
|
|
"epoch": 2.4298029556650245,
|
|
"grad_norm": 8.348930224912683,
|
|
"learning_rate": 1.065442805194214e-06,
|
|
"loss": 0.2686223089694977,
|
|
"step": 1973
|
|
},
|
|
{
|
|
"epoch": 2.4310344827586206,
|
|
"grad_norm": 10.189321214439989,
|
|
"learning_rate": 1.0610249273732393e-06,
|
|
"loss": 0.2520446181297302,
|
|
"step": 1974
|
|
},
|
|
{
|
|
"epoch": 2.4322660098522166,
|
|
"grad_norm": 11.006280468973555,
|
|
"learning_rate": 1.056615140544861e-06,
|
|
"loss": 0.28887757658958435,
|
|
"step": 1975
|
|
},
|
|
{
|
|
"epoch": 2.4334975369458127,
|
|
"grad_norm": 17.908792965669562,
|
|
"learning_rate": 1.0522134537671625e-06,
|
|
"loss": 0.3709273338317871,
|
|
"step": 1976
|
|
},
|
|
{
|
|
"epoch": 2.434729064039409,
|
|
"grad_norm": 8.261377574040777,
|
|
"learning_rate": 1.0478198760815833e-06,
|
|
"loss": 0.6718100309371948,
|
|
"step": 1977
|
|
},
|
|
{
|
|
"epoch": 2.435960591133005,
|
|
"grad_norm": 8.787835782948932,
|
|
"learning_rate": 1.0434344165129095e-06,
|
|
"loss": 0.17143529653549194,
|
|
"step": 1978
|
|
},
|
|
{
|
|
"epoch": 2.437192118226601,
|
|
"grad_norm": 15.115289039167425,
|
|
"learning_rate": 1.0390570840692527e-06,
|
|
"loss": 0.7128796577453613,
|
|
"step": 1979
|
|
},
|
|
{
|
|
"epoch": 2.438423645320197,
|
|
"grad_norm": 13.46718512167487,
|
|
"learning_rate": 1.034687887742028e-06,
|
|
"loss": 0.24575555324554443,
|
|
"step": 1980
|
|
},
|
|
{
|
|
"epoch": 2.439655172413793,
|
|
"grad_norm": 15.637303471440513,
|
|
"learning_rate": 1.0303268365059383e-06,
|
|
"loss": 0.5631250739097595,
|
|
"step": 1981
|
|
},
|
|
{
|
|
"epoch": 2.4408866995073892,
|
|
"grad_norm": 10.921107789227744,
|
|
"learning_rate": 1.0259739393189573e-06,
|
|
"loss": 0.3094029128551483,
|
|
"step": 1982
|
|
},
|
|
{
|
|
"epoch": 2.4421182266009853,
|
|
"grad_norm": 9.876371637108129,
|
|
"learning_rate": 1.021629205122311e-06,
|
|
"loss": 0.4754146635532379,
|
|
"step": 1983
|
|
},
|
|
{
|
|
"epoch": 2.4433497536945814,
|
|
"grad_norm": 11.197843935010443,
|
|
"learning_rate": 1.0172926428404527e-06,
|
|
"loss": 0.18599992990493774,
|
|
"step": 1984
|
|
},
|
|
{
|
|
"epoch": 2.4445812807881775,
|
|
"grad_norm": 11.60242134696919,
|
|
"learning_rate": 1.0129642613810576e-06,
|
|
"loss": 0.3831806480884552,
|
|
"step": 1985
|
|
},
|
|
{
|
|
"epoch": 2.4458128078817736,
|
|
"grad_norm": 10.915359357263476,
|
|
"learning_rate": 1.008644069634989e-06,
|
|
"loss": 0.7717353105545044,
|
|
"step": 1986
|
|
},
|
|
{
|
|
"epoch": 2.447044334975369,
|
|
"grad_norm": 16.40151326361354,
|
|
"learning_rate": 1.0043320764762915e-06,
|
|
"loss": 0.3248934745788574,
|
|
"step": 1987
|
|
},
|
|
{
|
|
"epoch": 2.4482758620689653,
|
|
"grad_norm": 7.869645643343828,
|
|
"learning_rate": 1.0000282907621694e-06,
|
|
"loss": 0.27836111187934875,
|
|
"step": 1988
|
|
},
|
|
{
|
|
"epoch": 2.4495073891625614,
|
|
"grad_norm": 10.609052698858209,
|
|
"learning_rate": 9.957327213329687e-07,
|
|
"loss": 0.20251630246639252,
|
|
"step": 1989
|
|
},
|
|
{
|
|
"epoch": 2.4507389162561575,
|
|
"grad_norm": 15.802681481740834,
|
|
"learning_rate": 9.914453770121557e-07,
|
|
"loss": 0.6009274125099182,
|
|
"step": 1990
|
|
},
|
|
{
|
|
"epoch": 2.4519704433497536,
|
|
"grad_norm": 12.5975867275524,
|
|
"learning_rate": 9.871662666063054e-07,
|
|
"loss": 0.3312684893608093,
|
|
"step": 1991
|
|
},
|
|
{
|
|
"epoch": 2.4532019704433496,
|
|
"grad_norm": 11.710094793009787,
|
|
"learning_rate": 9.828953989050744e-07,
|
|
"loss": 0.38521629571914673,
|
|
"step": 1992
|
|
},
|
|
{
|
|
"epoch": 2.4544334975369457,
|
|
"grad_norm": 7.249324950790913,
|
|
"learning_rate": 9.786327826811942e-07,
|
|
"loss": 0.2508774995803833,
|
|
"step": 1993
|
|
},
|
|
{
|
|
"epoch": 2.455665024630542,
|
|
"grad_norm": 9.220463260574913,
|
|
"learning_rate": 9.743784266904422e-07,
|
|
"loss": 0.36097291111946106,
|
|
"step": 1994
|
|
},
|
|
{
|
|
"epoch": 2.456896551724138,
|
|
"grad_norm": 22.22398053360695,
|
|
"learning_rate": 9.701323396716312e-07,
|
|
"loss": 0.6703237295150757,
|
|
"step": 1995
|
|
},
|
|
{
|
|
"epoch": 2.458128078817734,
|
|
"grad_norm": 10.185390156514575,
|
|
"learning_rate": 9.6589453034659e-07,
|
|
"loss": 0.9553302526473999,
|
|
"step": 1996
|
|
},
|
|
{
|
|
"epoch": 2.45935960591133,
|
|
"grad_norm": 10.103225854124274,
|
|
"learning_rate": 9.616650074201383e-07,
|
|
"loss": 0.3288821578025818,
|
|
"step": 1997
|
|
},
|
|
{
|
|
"epoch": 2.460591133004926,
|
|
"grad_norm": 9.00369401838797,
|
|
"learning_rate": 9.574437795800806e-07,
|
|
"loss": 0.3195754885673523,
|
|
"step": 1998
|
|
},
|
|
{
|
|
"epoch": 2.4618226600985222,
|
|
"grad_norm": 15.805795563779297,
|
|
"learning_rate": 9.532308554971831e-07,
|
|
"loss": 0.26505401730537415,
|
|
"step": 1999
|
|
},
|
|
{
|
|
"epoch": 2.4630541871921183,
|
|
"grad_norm": 11.25947467258853,
|
|
"learning_rate": 9.490262438251496e-07,
|
|
"loss": 0.43558627367019653,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 2.4642857142857144,
|
|
"grad_norm": 10.457734518302678,
|
|
"learning_rate": 9.44829953200615e-07,
|
|
"loss": 0.3582439720630646,
|
|
"step": 2001
|
|
},
|
|
{
|
|
"epoch": 2.4655172413793105,
|
|
"grad_norm": 12.231152863168465,
|
|
"learning_rate": 9.406419922431214e-07,
|
|
"loss": 0.7142423987388611,
|
|
"step": 2002
|
|
},
|
|
{
|
|
"epoch": 2.4667487684729066,
|
|
"grad_norm": 12.479544686562418,
|
|
"learning_rate": 9.364623695550979e-07,
|
|
"loss": 0.24947094917297363,
|
|
"step": 2003
|
|
},
|
|
{
|
|
"epoch": 2.4679802955665027,
|
|
"grad_norm": 16.323337348543824,
|
|
"learning_rate": 9.322910937218471e-07,
|
|
"loss": 1.0376765727996826,
|
|
"step": 2004
|
|
},
|
|
{
|
|
"epoch": 2.4692118226600988,
|
|
"grad_norm": 12.025786233159009,
|
|
"learning_rate": 9.281281733115288e-07,
|
|
"loss": 0.39291733503341675,
|
|
"step": 2005
|
|
},
|
|
{
|
|
"epoch": 2.4704433497536944,
|
|
"grad_norm": 15.526509163555014,
|
|
"learning_rate": 9.239736168751395e-07,
|
|
"loss": 1.1038362979888916,
|
|
"step": 2006
|
|
},
|
|
{
|
|
"epoch": 2.4716748768472905,
|
|
"grad_norm": 10.027251067087649,
|
|
"learning_rate": 9.198274329464929e-07,
|
|
"loss": 0.8542830944061279,
|
|
"step": 2007
|
|
},
|
|
{
|
|
"epoch": 2.4729064039408866,
|
|
"grad_norm": 20.306111450694207,
|
|
"learning_rate": 9.156896300422053e-07,
|
|
"loss": 0.807994544506073,
|
|
"step": 2008
|
|
},
|
|
{
|
|
"epoch": 2.4741379310344827,
|
|
"grad_norm": 5.653479787843331,
|
|
"learning_rate": 9.115602166616805e-07,
|
|
"loss": 0.17016081511974335,
|
|
"step": 2009
|
|
},
|
|
{
|
|
"epoch": 2.4753694581280787,
|
|
"grad_norm": 11.492766886926658,
|
|
"learning_rate": 9.07439201287088e-07,
|
|
"loss": 0.7831156849861145,
|
|
"step": 2010
|
|
},
|
|
{
|
|
"epoch": 2.476600985221675,
|
|
"grad_norm": 9.3732349373237,
|
|
"learning_rate": 9.033265923833446e-07,
|
|
"loss": 0.5146660804748535,
|
|
"step": 2011
|
|
},
|
|
{
|
|
"epoch": 2.477832512315271,
|
|
"grad_norm": 13.78559435557381,
|
|
"learning_rate": 8.992223983981035e-07,
|
|
"loss": 0.5641926527023315,
|
|
"step": 2012
|
|
},
|
|
{
|
|
"epoch": 2.479064039408867,
|
|
"grad_norm": 7.867545716232377,
|
|
"learning_rate": 8.951266277617326e-07,
|
|
"loss": 0.2155514359474182,
|
|
"step": 2013
|
|
},
|
|
{
|
|
"epoch": 2.480295566502463,
|
|
"grad_norm": 11.172087233714553,
|
|
"learning_rate": 8.91039288887292e-07,
|
|
"loss": 0.28125351667404175,
|
|
"step": 2014
|
|
},
|
|
{
|
|
"epoch": 2.481527093596059,
|
|
"grad_norm": 10.827596711387834,
|
|
"learning_rate": 8.869603901705287e-07,
|
|
"loss": 0.5349509716033936,
|
|
"step": 2015
|
|
},
|
|
{
|
|
"epoch": 2.4827586206896552,
|
|
"grad_norm": 10.652684351436065,
|
|
"learning_rate": 8.82889939989851e-07,
|
|
"loss": 0.43747422099113464,
|
|
"step": 2016
|
|
},
|
|
{
|
|
"epoch": 2.4839901477832513,
|
|
"grad_norm": 8.656359342370678,
|
|
"learning_rate": 8.78827946706311e-07,
|
|
"loss": 0.4629102647304535,
|
|
"step": 2017
|
|
},
|
|
{
|
|
"epoch": 2.4852216748768474,
|
|
"grad_norm": 9.302169561481923,
|
|
"learning_rate": 8.747744186635932e-07,
|
|
"loss": 0.41271477937698364,
|
|
"step": 2018
|
|
},
|
|
{
|
|
"epoch": 2.4864532019704435,
|
|
"grad_norm": 7.585718354318216,
|
|
"learning_rate": 8.707293641879888e-07,
|
|
"loss": 0.27247580885887146,
|
|
"step": 2019
|
|
},
|
|
{
|
|
"epoch": 2.4876847290640396,
|
|
"grad_norm": 11.7662978456361,
|
|
"learning_rate": 8.666927915883905e-07,
|
|
"loss": 1.4255273342132568,
|
|
"step": 2020
|
|
},
|
|
{
|
|
"epoch": 2.4889162561576352,
|
|
"grad_norm": 12.62783666106837,
|
|
"learning_rate": 8.626647091562612e-07,
|
|
"loss": 0.8762021660804749,
|
|
"step": 2021
|
|
},
|
|
{
|
|
"epoch": 2.4901477832512313,
|
|
"grad_norm": 7.781392053224673,
|
|
"learning_rate": 8.586451251656286e-07,
|
|
"loss": 0.43475109338760376,
|
|
"step": 2022
|
|
},
|
|
{
|
|
"epoch": 2.4913793103448274,
|
|
"grad_norm": 8.647004326334777,
|
|
"learning_rate": 8.546340478730647e-07,
|
|
"loss": 0.16091346740722656,
|
|
"step": 2023
|
|
},
|
|
{
|
|
"epoch": 2.4926108374384235,
|
|
"grad_norm": 10.050856051691818,
|
|
"learning_rate": 8.506314855176651e-07,
|
|
"loss": 0.491144061088562,
|
|
"step": 2024
|
|
},
|
|
{
|
|
"epoch": 2.4938423645320196,
|
|
"grad_norm": 15.049291696206959,
|
|
"learning_rate": 8.466374463210348e-07,
|
|
"loss": 0.792976438999176,
|
|
"step": 2025
|
|
},
|
|
{
|
|
"epoch": 2.4950738916256157,
|
|
"grad_norm": 13.192276803646186,
|
|
"learning_rate": 8.426519384872733e-07,
|
|
"loss": 0.8023815155029297,
|
|
"step": 2026
|
|
},
|
|
{
|
|
"epoch": 2.4963054187192117,
|
|
"grad_norm": 10.183319190154988,
|
|
"learning_rate": 8.386749702029578e-07,
|
|
"loss": 0.7008549571037292,
|
|
"step": 2027
|
|
},
|
|
{
|
|
"epoch": 2.497536945812808,
|
|
"grad_norm": 9.306826775675583,
|
|
"learning_rate": 8.347065496371193e-07,
|
|
"loss": 0.3158326745033264,
|
|
"step": 2028
|
|
},
|
|
{
|
|
"epoch": 2.498768472906404,
|
|
"grad_norm": 11.439845656368037,
|
|
"learning_rate": 8.307466849412365e-07,
|
|
"loss": 0.4847475588321686,
|
|
"step": 2029
|
|
},
|
|
{
|
|
"epoch": 2.5,
|
|
"grad_norm": 8.392845077442193,
|
|
"learning_rate": 8.2679538424921e-07,
|
|
"loss": 0.42490729689598083,
|
|
"step": 2030
|
|
},
|
|
{
|
|
"epoch": 2.501231527093596,
|
|
"grad_norm": 8.86668163556195,
|
|
"learning_rate": 8.228526556773486e-07,
|
|
"loss": 0.4303053021430969,
|
|
"step": 2031
|
|
},
|
|
{
|
|
"epoch": 2.502463054187192,
|
|
"grad_norm": 9.647239720582808,
|
|
"learning_rate": 8.18918507324356e-07,
|
|
"loss": 0.20669305324554443,
|
|
"step": 2032
|
|
},
|
|
{
|
|
"epoch": 2.5036945812807883,
|
|
"grad_norm": 14.868819185388821,
|
|
"learning_rate": 8.149929472713126e-07,
|
|
"loss": 0.4146193265914917,
|
|
"step": 2033
|
|
},
|
|
{
|
|
"epoch": 2.5049261083743843,
|
|
"grad_norm": 8.521845217294674,
|
|
"learning_rate": 8.110759835816518e-07,
|
|
"loss": 0.2852465510368347,
|
|
"step": 2034
|
|
},
|
|
{
|
|
"epoch": 2.5061576354679804,
|
|
"grad_norm": 9.65764576867383,
|
|
"learning_rate": 8.071676243011556e-07,
|
|
"loss": 0.5811144113540649,
|
|
"step": 2035
|
|
},
|
|
{
|
|
"epoch": 2.5073891625615765,
|
|
"grad_norm": 13.619550034189677,
|
|
"learning_rate": 8.032678774579272e-07,
|
|
"loss": 0.6767745614051819,
|
|
"step": 2036
|
|
},
|
|
{
|
|
"epoch": 2.5086206896551726,
|
|
"grad_norm": 10.986185907881213,
|
|
"learning_rate": 7.993767510623834e-07,
|
|
"loss": 0.5063849687576294,
|
|
"step": 2037
|
|
},
|
|
{
|
|
"epoch": 2.5098522167487687,
|
|
"grad_norm": 11.539593137413142,
|
|
"learning_rate": 7.954942531072285e-07,
|
|
"loss": 0.534786581993103,
|
|
"step": 2038
|
|
},
|
|
{
|
|
"epoch": 2.5110837438423648,
|
|
"grad_norm": 12.505177711554532,
|
|
"learning_rate": 7.91620391567448e-07,
|
|
"loss": 0.45122361183166504,
|
|
"step": 2039
|
|
},
|
|
{
|
|
"epoch": 2.512315270935961,
|
|
"grad_norm": 8.839741542848381,
|
|
"learning_rate": 7.877551744002881e-07,
|
|
"loss": 0.2832280099391937,
|
|
"step": 2040
|
|
},
|
|
{
|
|
"epoch": 2.5135467980295565,
|
|
"grad_norm": 11.718433441522615,
|
|
"learning_rate": 7.838986095452311e-07,
|
|
"loss": 0.8926963806152344,
|
|
"step": 2041
|
|
},
|
|
{
|
|
"epoch": 2.5147783251231526,
|
|
"grad_norm": 9.73145152883671,
|
|
"learning_rate": 7.800507049239947e-07,
|
|
"loss": 0.9263632893562317,
|
|
"step": 2042
|
|
},
|
|
{
|
|
"epoch": 2.5160098522167487,
|
|
"grad_norm": 16.48224794173804,
|
|
"learning_rate": 7.762114684405064e-07,
|
|
"loss": 0.3994196653366089,
|
|
"step": 2043
|
|
},
|
|
{
|
|
"epoch": 2.5172413793103448,
|
|
"grad_norm": 10.084446546675132,
|
|
"learning_rate": 7.723809079808842e-07,
|
|
"loss": 0.3273079991340637,
|
|
"step": 2044
|
|
},
|
|
{
|
|
"epoch": 2.518472906403941,
|
|
"grad_norm": 19.899209678081235,
|
|
"learning_rate": 7.685590314134294e-07,
|
|
"loss": 0.4566258192062378,
|
|
"step": 2045
|
|
},
|
|
{
|
|
"epoch": 2.519704433497537,
|
|
"grad_norm": 16.13317422246351,
|
|
"learning_rate": 7.647458465886055e-07,
|
|
"loss": 0.4199177026748657,
|
|
"step": 2046
|
|
},
|
|
{
|
|
"epoch": 2.520935960591133,
|
|
"grad_norm": 7.584665550484686,
|
|
"learning_rate": 7.609413613390199e-07,
|
|
"loss": 0.2789694666862488,
|
|
"step": 2047
|
|
},
|
|
{
|
|
"epoch": 2.522167487684729,
|
|
"grad_norm": 12.08003380462593,
|
|
"learning_rate": 7.571455834794095e-07,
|
|
"loss": 0.39359426498413086,
|
|
"step": 2048
|
|
},
|
|
{
|
|
"epoch": 2.523399014778325,
|
|
"grad_norm": 16.766513036441403,
|
|
"learning_rate": 7.533585208066302e-07,
|
|
"loss": 0.38510677218437195,
|
|
"step": 2049
|
|
},
|
|
{
|
|
"epoch": 2.5246305418719213,
|
|
"grad_norm": 14.332573036568608,
|
|
"learning_rate": 7.495801810996334e-07,
|
|
"loss": 1.0861276388168335,
|
|
"step": 2050
|
|
},
|
|
{
|
|
"epoch": 2.5258620689655173,
|
|
"grad_norm": 13.180696978229305,
|
|
"learning_rate": 7.458105721194525e-07,
|
|
"loss": 0.35866010189056396,
|
|
"step": 2051
|
|
},
|
|
{
|
|
"epoch": 2.5270935960591134,
|
|
"grad_norm": 8.80983116890946,
|
|
"learning_rate": 7.420497016091866e-07,
|
|
"loss": 0.3436219394207001,
|
|
"step": 2052
|
|
},
|
|
{
|
|
"epoch": 2.5283251231527095,
|
|
"grad_norm": 12.383092324048317,
|
|
"learning_rate": 7.382975772939866e-07,
|
|
"loss": 0.3687105178833008,
|
|
"step": 2053
|
|
},
|
|
{
|
|
"epoch": 2.529556650246305,
|
|
"grad_norm": 8.240739854437226,
|
|
"learning_rate": 7.34554206881039e-07,
|
|
"loss": 0.32671070098876953,
|
|
"step": 2054
|
|
},
|
|
{
|
|
"epoch": 2.5307881773399012,
|
|
"grad_norm": 11.575392957436732,
|
|
"learning_rate": 7.308195980595462e-07,
|
|
"loss": 0.7302184104919434,
|
|
"step": 2055
|
|
},
|
|
{
|
|
"epoch": 2.5320197044334973,
|
|
"grad_norm": 13.7288446044892,
|
|
"learning_rate": 7.270937585007149e-07,
|
|
"loss": 0.7430564761161804,
|
|
"step": 2056
|
|
},
|
|
{
|
|
"epoch": 2.5332512315270934,
|
|
"grad_norm": 8.666358783874388,
|
|
"learning_rate": 7.233766958577421e-07,
|
|
"loss": 0.305151104927063,
|
|
"step": 2057
|
|
},
|
|
{
|
|
"epoch": 2.5344827586206895,
|
|
"grad_norm": 17.881705697560324,
|
|
"learning_rate": 7.196684177657887e-07,
|
|
"loss": 0.4311235547065735,
|
|
"step": 2058
|
|
},
|
|
{
|
|
"epoch": 2.5357142857142856,
|
|
"grad_norm": 13.989195036115625,
|
|
"learning_rate": 7.159689318419777e-07,
|
|
"loss": 0.29697108268737793,
|
|
"step": 2059
|
|
},
|
|
{
|
|
"epoch": 2.5369458128078817,
|
|
"grad_norm": 10.004375359602093,
|
|
"learning_rate": 7.122782456853722e-07,
|
|
"loss": 0.5012999176979065,
|
|
"step": 2060
|
|
},
|
|
{
|
|
"epoch": 2.5381773399014778,
|
|
"grad_norm": 10.441122865704237,
|
|
"learning_rate": 7.085963668769552e-07,
|
|
"loss": 0.24754227697849274,
|
|
"step": 2061
|
|
},
|
|
{
|
|
"epoch": 2.539408866995074,
|
|
"grad_norm": 7.415294238465162,
|
|
"learning_rate": 7.049233029796243e-07,
|
|
"loss": 0.1311894953250885,
|
|
"step": 2062
|
|
},
|
|
{
|
|
"epoch": 2.54064039408867,
|
|
"grad_norm": 11.745936375906483,
|
|
"learning_rate": 7.012590615381654e-07,
|
|
"loss": 0.3458009958267212,
|
|
"step": 2063
|
|
},
|
|
{
|
|
"epoch": 2.541871921182266,
|
|
"grad_norm": 19.579629082198277,
|
|
"learning_rate": 6.976036500792466e-07,
|
|
"loss": 0.6216360330581665,
|
|
"step": 2064
|
|
},
|
|
{
|
|
"epoch": 2.543103448275862,
|
|
"grad_norm": 17.511409594621433,
|
|
"learning_rate": 6.939570761113939e-07,
|
|
"loss": 0.41114604473114014,
|
|
"step": 2065
|
|
},
|
|
{
|
|
"epoch": 2.544334975369458,
|
|
"grad_norm": 12.769592062525021,
|
|
"learning_rate": 6.903193471249853e-07,
|
|
"loss": 0.35362619161605835,
|
|
"step": 2066
|
|
},
|
|
{
|
|
"epoch": 2.5455665024630543,
|
|
"grad_norm": 15.37068507816602,
|
|
"learning_rate": 6.866904705922284e-07,
|
|
"loss": 1.7280857563018799,
|
|
"step": 2067
|
|
},
|
|
{
|
|
"epoch": 2.5467980295566504,
|
|
"grad_norm": 12.864848425460373,
|
|
"learning_rate": 6.830704539671462e-07,
|
|
"loss": 1.3645777702331543,
|
|
"step": 2068
|
|
},
|
|
{
|
|
"epoch": 2.5480295566502464,
|
|
"grad_norm": 8.663375537691056,
|
|
"learning_rate": 6.794593046855613e-07,
|
|
"loss": 0.46488872170448303,
|
|
"step": 2069
|
|
},
|
|
{
|
|
"epoch": 2.5492610837438425,
|
|
"grad_norm": 11.746641376676559,
|
|
"learning_rate": 6.758570301650869e-07,
|
|
"loss": 0.9913250803947449,
|
|
"step": 2070
|
|
},
|
|
{
|
|
"epoch": 2.5504926108374386,
|
|
"grad_norm": 14.714182423444447,
|
|
"learning_rate": 6.722636378051011e-07,
|
|
"loss": 0.8180273771286011,
|
|
"step": 2071
|
|
},
|
|
{
|
|
"epoch": 2.5517241379310347,
|
|
"grad_norm": 7.848050259431333,
|
|
"learning_rate": 6.686791349867422e-07,
|
|
"loss": 0.5234679579734802,
|
|
"step": 2072
|
|
},
|
|
{
|
|
"epoch": 2.552955665024631,
|
|
"grad_norm": 6.903410737354236,
|
|
"learning_rate": 6.651035290728858e-07,
|
|
"loss": 0.08975313603878021,
|
|
"step": 2073
|
|
},
|
|
{
|
|
"epoch": 2.554187192118227,
|
|
"grad_norm": 11.27527783341364,
|
|
"learning_rate": 6.615368274081335e-07,
|
|
"loss": 0.35545456409454346,
|
|
"step": 2074
|
|
},
|
|
{
|
|
"epoch": 2.5554187192118225,
|
|
"grad_norm": 11.726857926860664,
|
|
"learning_rate": 6.579790373187944e-07,
|
|
"loss": 1.192006230354309,
|
|
"step": 2075
|
|
},
|
|
{
|
|
"epoch": 2.5566502463054186,
|
|
"grad_norm": 18.37387229568444,
|
|
"learning_rate": 6.54430166112876e-07,
|
|
"loss": 0.35069915652275085,
|
|
"step": 2076
|
|
},
|
|
{
|
|
"epoch": 2.5578817733990147,
|
|
"grad_norm": 9.620718531681447,
|
|
"learning_rate": 6.508902210800649e-07,
|
|
"loss": 0.20691820979118347,
|
|
"step": 2077
|
|
},
|
|
{
|
|
"epoch": 2.5591133004926108,
|
|
"grad_norm": 16.343394062782135,
|
|
"learning_rate": 6.473592094917092e-07,
|
|
"loss": 0.4561042785644531,
|
|
"step": 2078
|
|
},
|
|
{
|
|
"epoch": 2.560344827586207,
|
|
"grad_norm": 11.889860706895831,
|
|
"learning_rate": 6.43837138600813e-07,
|
|
"loss": 0.32198822498321533,
|
|
"step": 2079
|
|
},
|
|
{
|
|
"epoch": 2.561576354679803,
|
|
"grad_norm": 10.519181625251578,
|
|
"learning_rate": 6.403240156420087e-07,
|
|
"loss": 0.35681653022766113,
|
|
"step": 2080
|
|
},
|
|
{
|
|
"epoch": 2.562807881773399,
|
|
"grad_norm": 9.426944191114051,
|
|
"learning_rate": 6.36819847831554e-07,
|
|
"loss": 0.5826268196105957,
|
|
"step": 2081
|
|
},
|
|
{
|
|
"epoch": 2.564039408866995,
|
|
"grad_norm": 10.18400417142911,
|
|
"learning_rate": 6.333246423673096e-07,
|
|
"loss": 0.23084279894828796,
|
|
"step": 2082
|
|
},
|
|
{
|
|
"epoch": 2.565270935960591,
|
|
"grad_norm": 8.146966381833735,
|
|
"learning_rate": 6.298384064287261e-07,
|
|
"loss": 0.5527750253677368,
|
|
"step": 2083
|
|
},
|
|
{
|
|
"epoch": 2.5665024630541873,
|
|
"grad_norm": 7.581778739386861,
|
|
"learning_rate": 6.263611471768349e-07,
|
|
"loss": 0.4125085175037384,
|
|
"step": 2084
|
|
},
|
|
{
|
|
"epoch": 2.5677339901477834,
|
|
"grad_norm": 9.31385960486644,
|
|
"learning_rate": 6.228928717542205e-07,
|
|
"loss": 0.37431174516677856,
|
|
"step": 2085
|
|
},
|
|
{
|
|
"epoch": 2.5689655172413794,
|
|
"grad_norm": 9.72676402112677,
|
|
"learning_rate": 6.194335872850188e-07,
|
|
"loss": 0.17119471728801727,
|
|
"step": 2086
|
|
},
|
|
{
|
|
"epoch": 2.5701970443349755,
|
|
"grad_norm": 11.790310632986847,
|
|
"learning_rate": 6.159833008748988e-07,
|
|
"loss": 0.9465748071670532,
|
|
"step": 2087
|
|
},
|
|
{
|
|
"epoch": 2.571428571428571,
|
|
"grad_norm": 25.018614409312026,
|
|
"learning_rate": 6.125420196110426e-07,
|
|
"loss": 0.48980847001075745,
|
|
"step": 2088
|
|
},
|
|
{
|
|
"epoch": 2.5726600985221673,
|
|
"grad_norm": 8.85280166601153,
|
|
"learning_rate": 6.091097505621374e-07,
|
|
"loss": 0.7195557951927185,
|
|
"step": 2089
|
|
},
|
|
{
|
|
"epoch": 2.5738916256157633,
|
|
"grad_norm": 12.112085029881426,
|
|
"learning_rate": 6.056865007783602e-07,
|
|
"loss": 1.83125638961792,
|
|
"step": 2090
|
|
},
|
|
{
|
|
"epoch": 2.5751231527093594,
|
|
"grad_norm": 9.94028667902401,
|
|
"learning_rate": 6.022722772913581e-07,
|
|
"loss": 0.3298517167568207,
|
|
"step": 2091
|
|
},
|
|
{
|
|
"epoch": 2.5763546798029555,
|
|
"grad_norm": 11.18503180129702,
|
|
"learning_rate": 5.988670871142377e-07,
|
|
"loss": 0.47125905752182007,
|
|
"step": 2092
|
|
},
|
|
{
|
|
"epoch": 2.5775862068965516,
|
|
"grad_norm": 9.413844300619951,
|
|
"learning_rate": 5.954709372415524e-07,
|
|
"loss": 0.288496196269989,
|
|
"step": 2093
|
|
},
|
|
{
|
|
"epoch": 2.5788177339901477,
|
|
"grad_norm": 7.1811144983138675,
|
|
"learning_rate": 5.920838346492874e-07,
|
|
"loss": 0.3627285957336426,
|
|
"step": 2094
|
|
},
|
|
{
|
|
"epoch": 2.5800492610837438,
|
|
"grad_norm": 14.830294096591077,
|
|
"learning_rate": 5.887057862948403e-07,
|
|
"loss": 0.7072806358337402,
|
|
"step": 2095
|
|
},
|
|
{
|
|
"epoch": 2.58128078817734,
|
|
"grad_norm": 10.644924386002677,
|
|
"learning_rate": 5.853367991170106e-07,
|
|
"loss": 0.3386034071445465,
|
|
"step": 2096
|
|
},
|
|
{
|
|
"epoch": 2.582512315270936,
|
|
"grad_norm": 14.094564220777247,
|
|
"learning_rate": 5.819768800359882e-07,
|
|
"loss": 0.4901737570762634,
|
|
"step": 2097
|
|
},
|
|
{
|
|
"epoch": 2.583743842364532,
|
|
"grad_norm": 10.630160715256755,
|
|
"learning_rate": 5.786260359533369e-07,
|
|
"loss": 1.683629035949707,
|
|
"step": 2098
|
|
},
|
|
{
|
|
"epoch": 2.584975369458128,
|
|
"grad_norm": 8.221455266315619,
|
|
"learning_rate": 5.752842737519743e-07,
|
|
"loss": 0.4275779128074646,
|
|
"step": 2099
|
|
},
|
|
{
|
|
"epoch": 2.586206896551724,
|
|
"grad_norm": 8.989808316079593,
|
|
"learning_rate": 5.7195160029617e-07,
|
|
"loss": 0.6892256736755371,
|
|
"step": 2100
|
|
},
|
|
{
|
|
"epoch": 2.5874384236453203,
|
|
"grad_norm": 10.390493407130242,
|
|
"learning_rate": 5.686280224315189e-07,
|
|
"loss": 0.6548988819122314,
|
|
"step": 2101
|
|
},
|
|
{
|
|
"epoch": 2.5886699507389164,
|
|
"grad_norm": 8.365114703591324,
|
|
"learning_rate": 5.653135469849347e-07,
|
|
"loss": 0.4431142807006836,
|
|
"step": 2102
|
|
},
|
|
{
|
|
"epoch": 2.5899014778325125,
|
|
"grad_norm": 20.296284889046316,
|
|
"learning_rate": 5.62008180764635e-07,
|
|
"loss": 0.5730191469192505,
|
|
"step": 2103
|
|
},
|
|
{
|
|
"epoch": 2.5911330049261085,
|
|
"grad_norm": 7.886033521206941,
|
|
"learning_rate": 5.587119305601263e-07,
|
|
"loss": 0.8734421730041504,
|
|
"step": 2104
|
|
},
|
|
{
|
|
"epoch": 2.5923645320197046,
|
|
"grad_norm": 7.851476190792639,
|
|
"learning_rate": 5.554248031421872e-07,
|
|
"loss": 0.30810514092445374,
|
|
"step": 2105
|
|
},
|
|
{
|
|
"epoch": 2.5935960591133007,
|
|
"grad_norm": 10.114012805058133,
|
|
"learning_rate": 5.521468052628615e-07,
|
|
"loss": 0.5941227078437805,
|
|
"step": 2106
|
|
},
|
|
{
|
|
"epoch": 2.594827586206897,
|
|
"grad_norm": 11.5276807645432,
|
|
"learning_rate": 5.488779436554359e-07,
|
|
"loss": 0.32648181915283203,
|
|
"step": 2107
|
|
},
|
|
{
|
|
"epoch": 2.596059113300493,
|
|
"grad_norm": 12.384461199116616,
|
|
"learning_rate": 5.456182250344349e-07,
|
|
"loss": 0.2934610843658447,
|
|
"step": 2108
|
|
},
|
|
{
|
|
"epoch": 2.5972906403940885,
|
|
"grad_norm": 9.420595645239136,
|
|
"learning_rate": 5.423676560955976e-07,
|
|
"loss": 0.20387941598892212,
|
|
"step": 2109
|
|
},
|
|
{
|
|
"epoch": 2.5985221674876846,
|
|
"grad_norm": 10.459297088933635,
|
|
"learning_rate": 5.391262435158722e-07,
|
|
"loss": 0.6115235090255737,
|
|
"step": 2110
|
|
},
|
|
{
|
|
"epoch": 2.5997536945812807,
|
|
"grad_norm": 13.891885044549888,
|
|
"learning_rate": 5.358939939534002e-07,
|
|
"loss": 0.45280611515045166,
|
|
"step": 2111
|
|
},
|
|
{
|
|
"epoch": 2.600985221674877,
|
|
"grad_norm": 8.172861215602202,
|
|
"learning_rate": 5.326709140474962e-07,
|
|
"loss": 0.29169538617134094,
|
|
"step": 2112
|
|
},
|
|
{
|
|
"epoch": 2.602216748768473,
|
|
"grad_norm": 6.844042320685791,
|
|
"learning_rate": 5.294570104186436e-07,
|
|
"loss": 0.4924798011779785,
|
|
"step": 2113
|
|
},
|
|
{
|
|
"epoch": 2.603448275862069,
|
|
"grad_norm": 12.392169249298135,
|
|
"learning_rate": 5.262522896684774e-07,
|
|
"loss": 0.6751348376274109,
|
|
"step": 2114
|
|
},
|
|
{
|
|
"epoch": 2.604679802955665,
|
|
"grad_norm": 13.993739996881734,
|
|
"learning_rate": 5.230567583797674e-07,
|
|
"loss": 0.6676002740859985,
|
|
"step": 2115
|
|
},
|
|
{
|
|
"epoch": 2.605911330049261,
|
|
"grad_norm": 12.746427038097593,
|
|
"learning_rate": 5.198704231164093e-07,
|
|
"loss": 0.3112475275993347,
|
|
"step": 2116
|
|
},
|
|
{
|
|
"epoch": 2.607142857142857,
|
|
"grad_norm": 9.88854663199865,
|
|
"learning_rate": 5.166932904234101e-07,
|
|
"loss": 0.5024739503860474,
|
|
"step": 2117
|
|
},
|
|
{
|
|
"epoch": 2.6083743842364533,
|
|
"grad_norm": 18.4856178419616,
|
|
"learning_rate": 5.135253668268724e-07,
|
|
"loss": 2.6769824028015137,
|
|
"step": 2118
|
|
},
|
|
{
|
|
"epoch": 2.6096059113300494,
|
|
"grad_norm": 12.280278924091732,
|
|
"learning_rate": 5.103666588339812e-07,
|
|
"loss": 0.4120222330093384,
|
|
"step": 2119
|
|
},
|
|
{
|
|
"epoch": 2.6108374384236455,
|
|
"grad_norm": 8.106704210398478,
|
|
"learning_rate": 5.072171729329944e-07,
|
|
"loss": 0.3238741457462311,
|
|
"step": 2120
|
|
},
|
|
{
|
|
"epoch": 2.612068965517241,
|
|
"grad_norm": 9.476233543897594,
|
|
"learning_rate": 5.040769155932285e-07,
|
|
"loss": 0.41853106021881104,
|
|
"step": 2121
|
|
},
|
|
{
|
|
"epoch": 2.613300492610837,
|
|
"grad_norm": 9.382868411266552,
|
|
"learning_rate": 5.00945893265039e-07,
|
|
"loss": 0.5511228442192078,
|
|
"step": 2122
|
|
},
|
|
{
|
|
"epoch": 2.6145320197044333,
|
|
"grad_norm": 10.011756541997418,
|
|
"learning_rate": 4.978241123798133e-07,
|
|
"loss": 0.6076939105987549,
|
|
"step": 2123
|
|
},
|
|
{
|
|
"epoch": 2.6157635467980294,
|
|
"grad_norm": 11.969458383094386,
|
|
"learning_rate": 4.94711579349959e-07,
|
|
"loss": 0.32137832045555115,
|
|
"step": 2124
|
|
},
|
|
{
|
|
"epoch": 2.6169950738916254,
|
|
"grad_norm": 9.120309940189742,
|
|
"learning_rate": 4.916083005688865e-07,
|
|
"loss": 0.2919730246067047,
|
|
"step": 2125
|
|
},
|
|
{
|
|
"epoch": 2.6182266009852215,
|
|
"grad_norm": 11.012298283555321,
|
|
"learning_rate": 4.885142824109946e-07,
|
|
"loss": 0.3521897792816162,
|
|
"step": 2126
|
|
},
|
|
{
|
|
"epoch": 2.6194581280788176,
|
|
"grad_norm": 10.719771585992975,
|
|
"learning_rate": 4.85429531231662e-07,
|
|
"loss": 0.5645777583122253,
|
|
"step": 2127
|
|
},
|
|
{
|
|
"epoch": 2.6206896551724137,
|
|
"grad_norm": 8.564760545887571,
|
|
"learning_rate": 4.823540533672355e-07,
|
|
"loss": 0.21364668011665344,
|
|
"step": 2128
|
|
},
|
|
{
|
|
"epoch": 2.62192118226601,
|
|
"grad_norm": 10.461100625681352,
|
|
"learning_rate": 4.792878551350055e-07,
|
|
"loss": 0.3472633957862854,
|
|
"step": 2129
|
|
},
|
|
{
|
|
"epoch": 2.623152709359606,
|
|
"grad_norm": 7.7796379590314295,
|
|
"learning_rate": 4.7623094283320905e-07,
|
|
"loss": 0.2312706857919693,
|
|
"step": 2130
|
|
},
|
|
{
|
|
"epoch": 2.624384236453202,
|
|
"grad_norm": 10.908716191951015,
|
|
"learning_rate": 4.7318332274100595e-07,
|
|
"loss": 0.4227292835712433,
|
|
"step": 2131
|
|
},
|
|
{
|
|
"epoch": 2.625615763546798,
|
|
"grad_norm": 11.077941430018797,
|
|
"learning_rate": 4.701450011184677e-07,
|
|
"loss": 0.4835679531097412,
|
|
"step": 2132
|
|
},
|
|
{
|
|
"epoch": 2.626847290640394,
|
|
"grad_norm": 8.011667181424437,
|
|
"learning_rate": 4.671159842065698e-07,
|
|
"loss": 0.30153489112854004,
|
|
"step": 2133
|
|
},
|
|
{
|
|
"epoch": 2.62807881773399,
|
|
"grad_norm": 9.961423240887521,
|
|
"learning_rate": 4.640962782271707e-07,
|
|
"loss": 0.19820570945739746,
|
|
"step": 2134
|
|
},
|
|
{
|
|
"epoch": 2.6293103448275863,
|
|
"grad_norm": 18.168474918209572,
|
|
"learning_rate": 4.6108588938300725e-07,
|
|
"loss": 0.5798308253288269,
|
|
"step": 2135
|
|
},
|
|
{
|
|
"epoch": 2.6305418719211824,
|
|
"grad_norm": 14.982461578988175,
|
|
"learning_rate": 4.5808482385767407e-07,
|
|
"loss": 0.4840395450592041,
|
|
"step": 2136
|
|
},
|
|
{
|
|
"epoch": 2.6317733990147785,
|
|
"grad_norm": 12.540506897781501,
|
|
"learning_rate": 4.5509308781561846e-07,
|
|
"loss": 0.33036884665489197,
|
|
"step": 2137
|
|
},
|
|
{
|
|
"epoch": 2.6330049261083746,
|
|
"grad_norm": 10.69964555424519,
|
|
"learning_rate": 4.521106874021242e-07,
|
|
"loss": 0.4032250642776489,
|
|
"step": 2138
|
|
},
|
|
{
|
|
"epoch": 2.6342364532019706,
|
|
"grad_norm": 10.190070867602095,
|
|
"learning_rate": 4.4913762874329527e-07,
|
|
"loss": 0.5196541547775269,
|
|
"step": 2139
|
|
},
|
|
{
|
|
"epoch": 2.6354679802955667,
|
|
"grad_norm": 15.414254295489695,
|
|
"learning_rate": 4.4617391794604946e-07,
|
|
"loss": 0.5049697160720825,
|
|
"step": 2140
|
|
},
|
|
{
|
|
"epoch": 2.636699507389163,
|
|
"grad_norm": 11.232489708483897,
|
|
"learning_rate": 4.4321956109810327e-07,
|
|
"loss": 0.6910302639007568,
|
|
"step": 2141
|
|
},
|
|
{
|
|
"epoch": 2.637931034482759,
|
|
"grad_norm": 17.874353794074672,
|
|
"learning_rate": 4.4027456426796014e-07,
|
|
"loss": 0.8860565423965454,
|
|
"step": 2142
|
|
},
|
|
{
|
|
"epoch": 2.6391625615763545,
|
|
"grad_norm": 8.315561152824909,
|
|
"learning_rate": 4.3733893350489386e-07,
|
|
"loss": 0.3347795307636261,
|
|
"step": 2143
|
|
},
|
|
{
|
|
"epoch": 2.6403940886699506,
|
|
"grad_norm": 8.406655821874109,
|
|
"learning_rate": 4.344126748389438e-07,
|
|
"loss": 0.5979218482971191,
|
|
"step": 2144
|
|
},
|
|
{
|
|
"epoch": 2.6416256157635467,
|
|
"grad_norm": 10.633642678256232,
|
|
"learning_rate": 4.314957942808956e-07,
|
|
"loss": 0.6724722385406494,
|
|
"step": 2145
|
|
},
|
|
{
|
|
"epoch": 2.642857142857143,
|
|
"grad_norm": 11.37770126439957,
|
|
"learning_rate": 4.2858829782227107e-07,
|
|
"loss": 0.23655423521995544,
|
|
"step": 2146
|
|
},
|
|
{
|
|
"epoch": 2.644088669950739,
|
|
"grad_norm": 13.564798867932334,
|
|
"learning_rate": 4.2569019143531845e-07,
|
|
"loss": 0.7535929679870605,
|
|
"step": 2147
|
|
},
|
|
{
|
|
"epoch": 2.645320197044335,
|
|
"grad_norm": 7.225057762729149,
|
|
"learning_rate": 4.228014810729963e-07,
|
|
"loss": 0.5065590143203735,
|
|
"step": 2148
|
|
},
|
|
{
|
|
"epoch": 2.646551724137931,
|
|
"grad_norm": 11.646047154930116,
|
|
"learning_rate": 4.199221726689634e-07,
|
|
"loss": 0.8232078552246094,
|
|
"step": 2149
|
|
},
|
|
{
|
|
"epoch": 2.647783251231527,
|
|
"grad_norm": 12.627075206048184,
|
|
"learning_rate": 4.170522721375669e-07,
|
|
"loss": 0.3928985595703125,
|
|
"step": 2150
|
|
},
|
|
{
|
|
"epoch": 2.649014778325123,
|
|
"grad_norm": 11.823044988035218,
|
|
"learning_rate": 4.1419178537382756e-07,
|
|
"loss": 0.6924771070480347,
|
|
"step": 2151
|
|
},
|
|
{
|
|
"epoch": 2.6502463054187193,
|
|
"grad_norm": 8.99171598727701,
|
|
"learning_rate": 4.1134071825343124e-07,
|
|
"loss": 0.3323458135128021,
|
|
"step": 2152
|
|
},
|
|
{
|
|
"epoch": 2.6514778325123154,
|
|
"grad_norm": 8.020309669901565,
|
|
"learning_rate": 4.0849907663271346e-07,
|
|
"loss": 0.6068896651268005,
|
|
"step": 2153
|
|
},
|
|
{
|
|
"epoch": 2.6527093596059115,
|
|
"grad_norm": 9.698785865473045,
|
|
"learning_rate": 4.0566686634865016e-07,
|
|
"loss": 0.2112211287021637,
|
|
"step": 2154
|
|
},
|
|
{
|
|
"epoch": 2.653940886699507,
|
|
"grad_norm": 8.70939943207942,
|
|
"learning_rate": 4.028440932188465e-07,
|
|
"loss": 0.3340219259262085,
|
|
"step": 2155
|
|
},
|
|
{
|
|
"epoch": 2.655172413793103,
|
|
"grad_norm": 16.06563756982883,
|
|
"learning_rate": 4.0003076304151624e-07,
|
|
"loss": 0.4172120690345764,
|
|
"step": 2156
|
|
},
|
|
{
|
|
"epoch": 2.6564039408866993,
|
|
"grad_norm": 10.448504154619048,
|
|
"learning_rate": 3.972268815954833e-07,
|
|
"loss": 0.3891775608062744,
|
|
"step": 2157
|
|
},
|
|
{
|
|
"epoch": 2.6576354679802954,
|
|
"grad_norm": 14.733135115767965,
|
|
"learning_rate": 3.944324546401607e-07,
|
|
"loss": 0.4906957149505615,
|
|
"step": 2158
|
|
},
|
|
{
|
|
"epoch": 2.6588669950738915,
|
|
"grad_norm": 9.613272858024363,
|
|
"learning_rate": 3.916474879155402e-07,
|
|
"loss": 0.8216167688369751,
|
|
"step": 2159
|
|
},
|
|
{
|
|
"epoch": 2.6600985221674875,
|
|
"grad_norm": 10.257611413751764,
|
|
"learning_rate": 3.8887198714218255e-07,
|
|
"loss": 0.2030409872531891,
|
|
"step": 2160
|
|
},
|
|
{
|
|
"epoch": 2.6613300492610836,
|
|
"grad_norm": 7.648297896745766,
|
|
"learning_rate": 3.8610595802120564e-07,
|
|
"loss": 0.24565047025680542,
|
|
"step": 2161
|
|
},
|
|
{
|
|
"epoch": 2.6625615763546797,
|
|
"grad_norm": 10.822762486642535,
|
|
"learning_rate": 3.833494062342691e-07,
|
|
"loss": 0.3111516833305359,
|
|
"step": 2162
|
|
},
|
|
{
|
|
"epoch": 2.663793103448276,
|
|
"grad_norm": 7.318326050197103,
|
|
"learning_rate": 3.8060233744356634e-07,
|
|
"loss": 0.32978883385658264,
|
|
"step": 2163
|
|
},
|
|
{
|
|
"epoch": 2.665024630541872,
|
|
"grad_norm": 12.599543466460439,
|
|
"learning_rate": 3.7786475729181314e-07,
|
|
"loss": 0.5468876361846924,
|
|
"step": 2164
|
|
},
|
|
{
|
|
"epoch": 2.666256157635468,
|
|
"grad_norm": 8.338604416987764,
|
|
"learning_rate": 3.751366714022342e-07,
|
|
"loss": 0.25511908531188965,
|
|
"step": 2165
|
|
},
|
|
{
|
|
"epoch": 2.667487684729064,
|
|
"grad_norm": 10.389301741607085,
|
|
"learning_rate": 3.724180853785514e-07,
|
|
"loss": 0.9938629269599915,
|
|
"step": 2166
|
|
},
|
|
{
|
|
"epoch": 2.66871921182266,
|
|
"grad_norm": 12.267953130443164,
|
|
"learning_rate": 3.6970900480497287e-07,
|
|
"loss": 0.4233144223690033,
|
|
"step": 2167
|
|
},
|
|
{
|
|
"epoch": 2.6699507389162562,
|
|
"grad_norm": 11.571711586702998,
|
|
"learning_rate": 3.6700943524618284e-07,
|
|
"loss": 0.39373546838760376,
|
|
"step": 2168
|
|
},
|
|
{
|
|
"epoch": 2.6711822660098523,
|
|
"grad_norm": 9.063048538209927,
|
|
"learning_rate": 3.643193822473301e-07,
|
|
"loss": 0.40346717834472656,
|
|
"step": 2169
|
|
},
|
|
{
|
|
"epoch": 2.6724137931034484,
|
|
"grad_norm": 14.384271085159352,
|
|
"learning_rate": 3.616388513340124e-07,
|
|
"loss": 0.35343194007873535,
|
|
"step": 2170
|
|
},
|
|
{
|
|
"epoch": 2.6736453201970445,
|
|
"grad_norm": 16.277411971018296,
|
|
"learning_rate": 3.5896784801227046e-07,
|
|
"loss": 0.38300061225891113,
|
|
"step": 2171
|
|
},
|
|
{
|
|
"epoch": 2.6748768472906406,
|
|
"grad_norm": 7.950757575573031,
|
|
"learning_rate": 3.56306377768576e-07,
|
|
"loss": 0.5319961905479431,
|
|
"step": 2172
|
|
},
|
|
{
|
|
"epoch": 2.6761083743842367,
|
|
"grad_norm": 19.004855778838706,
|
|
"learning_rate": 3.5365444606981434e-07,
|
|
"loss": 0.45474281907081604,
|
|
"step": 2173
|
|
},
|
|
{
|
|
"epoch": 2.6773399014778327,
|
|
"grad_norm": 13.211081908527799,
|
|
"learning_rate": 3.5101205836328144e-07,
|
|
"loss": 0.41422080993652344,
|
|
"step": 2174
|
|
},
|
|
{
|
|
"epoch": 2.678571428571429,
|
|
"grad_norm": 12.892521639907137,
|
|
"learning_rate": 3.4837922007667e-07,
|
|
"loss": 0.5486617088317871,
|
|
"step": 2175
|
|
},
|
|
{
|
|
"epoch": 2.6798029556650245,
|
|
"grad_norm": 10.113357639811962,
|
|
"learning_rate": 3.4575593661805296e-07,
|
|
"loss": 0.27931463718414307,
|
|
"step": 2176
|
|
},
|
|
{
|
|
"epoch": 2.6810344827586206,
|
|
"grad_norm": 9.357499790574233,
|
|
"learning_rate": 3.4314221337588217e-07,
|
|
"loss": 0.45936134457588196,
|
|
"step": 2177
|
|
},
|
|
{
|
|
"epoch": 2.6822660098522166,
|
|
"grad_norm": 12.597881278175105,
|
|
"learning_rate": 3.405380557189669e-07,
|
|
"loss": 0.5659298896789551,
|
|
"step": 2178
|
|
},
|
|
{
|
|
"epoch": 2.6834975369458127,
|
|
"grad_norm": 16.9103130329337,
|
|
"learning_rate": 3.379434689964728e-07,
|
|
"loss": 0.3952332139015198,
|
|
"step": 2179
|
|
},
|
|
{
|
|
"epoch": 2.684729064039409,
|
|
"grad_norm": 13.280154300410791,
|
|
"learning_rate": 3.3535845853790105e-07,
|
|
"loss": 0.36344432830810547,
|
|
"step": 2180
|
|
},
|
|
{
|
|
"epoch": 2.685960591133005,
|
|
"grad_norm": 8.267427758719474,
|
|
"learning_rate": 3.3278302965308593e-07,
|
|
"loss": 0.29526573419570923,
|
|
"step": 2181
|
|
},
|
|
{
|
|
"epoch": 2.687192118226601,
|
|
"grad_norm": 14.172270303989801,
|
|
"learning_rate": 3.3021718763218025e-07,
|
|
"loss": 0.35098952054977417,
|
|
"step": 2182
|
|
},
|
|
{
|
|
"epoch": 2.688423645320197,
|
|
"grad_norm": 15.442089142249914,
|
|
"learning_rate": 3.276609377456419e-07,
|
|
"loss": 0.9407736659049988,
|
|
"step": 2183
|
|
},
|
|
{
|
|
"epoch": 2.689655172413793,
|
|
"grad_norm": 10.545470371926038,
|
|
"learning_rate": 3.2511428524422793e-07,
|
|
"loss": 0.29226356744766235,
|
|
"step": 2184
|
|
},
|
|
{
|
|
"epoch": 2.6908866995073892,
|
|
"grad_norm": 11.590832336497728,
|
|
"learning_rate": 3.2257723535898177e-07,
|
|
"loss": 0.78415846824646,
|
|
"step": 2185
|
|
},
|
|
{
|
|
"epoch": 2.6921182266009853,
|
|
"grad_norm": 10.523504017171055,
|
|
"learning_rate": 3.200497933012198e-07,
|
|
"loss": 0.22600015997886658,
|
|
"step": 2186
|
|
},
|
|
{
|
|
"epoch": 2.6933497536945814,
|
|
"grad_norm": 16.18317423891681,
|
|
"learning_rate": 3.1753196426252573e-07,
|
|
"loss": 0.3907809853553772,
|
|
"step": 2187
|
|
},
|
|
{
|
|
"epoch": 2.6945812807881775,
|
|
"grad_norm": 12.272867485671698,
|
|
"learning_rate": 3.150237534147366e-07,
|
|
"loss": 0.7056915760040283,
|
|
"step": 2188
|
|
},
|
|
{
|
|
"epoch": 2.695812807881773,
|
|
"grad_norm": 11.590493499262351,
|
|
"learning_rate": 3.125251659099332e-07,
|
|
"loss": 0.35921359062194824,
|
|
"step": 2189
|
|
},
|
|
{
|
|
"epoch": 2.697044334975369,
|
|
"grad_norm": 7.139507013908415,
|
|
"learning_rate": 3.1003620688042636e-07,
|
|
"loss": 0.17715278267860413,
|
|
"step": 2190
|
|
},
|
|
{
|
|
"epoch": 2.6982758620689653,
|
|
"grad_norm": 6.945336769527092,
|
|
"learning_rate": 3.0755688143875253e-07,
|
|
"loss": 0.20512376725673676,
|
|
"step": 2191
|
|
},
|
|
{
|
|
"epoch": 2.6995073891625614,
|
|
"grad_norm": 11.666932414854655,
|
|
"learning_rate": 3.050871946776596e-07,
|
|
"loss": 0.38939356803894043,
|
|
"step": 2192
|
|
},
|
|
{
|
|
"epoch": 2.7007389162561575,
|
|
"grad_norm": 8.970559885182587,
|
|
"learning_rate": 3.026271516700946e-07,
|
|
"loss": 0.3292514681816101,
|
|
"step": 2193
|
|
},
|
|
{
|
|
"epoch": 2.7019704433497536,
|
|
"grad_norm": 8.920484564263525,
|
|
"learning_rate": 3.0017675746919883e-07,
|
|
"loss": 0.2732661962509155,
|
|
"step": 2194
|
|
},
|
|
{
|
|
"epoch": 2.7032019704433496,
|
|
"grad_norm": 14.273169657648177,
|
|
"learning_rate": 2.9773601710828937e-07,
|
|
"loss": 0.3058941960334778,
|
|
"step": 2195
|
|
},
|
|
{
|
|
"epoch": 2.7044334975369457,
|
|
"grad_norm": 16.20827847981958,
|
|
"learning_rate": 2.953049356008586e-07,
|
|
"loss": 0.7454397082328796,
|
|
"step": 2196
|
|
},
|
|
{
|
|
"epoch": 2.705665024630542,
|
|
"grad_norm": 17.54054653840535,
|
|
"learning_rate": 2.928835179405548e-07,
|
|
"loss": 0.3679504692554474,
|
|
"step": 2197
|
|
},
|
|
{
|
|
"epoch": 2.706896551724138,
|
|
"grad_norm": 9.77472352239386,
|
|
"learning_rate": 2.9047176910117824e-07,
|
|
"loss": 0.2241794466972351,
|
|
"step": 2198
|
|
},
|
|
{
|
|
"epoch": 2.708128078817734,
|
|
"grad_norm": 8.561542797938362,
|
|
"learning_rate": 2.8806969403666897e-07,
|
|
"loss": 0.19927407801151276,
|
|
"step": 2199
|
|
},
|
|
{
|
|
"epoch": 2.70935960591133,
|
|
"grad_norm": 7.0959519302312195,
|
|
"learning_rate": 2.856772976810929e-07,
|
|
"loss": 0.2808955907821655,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 2.710591133004926,
|
|
"grad_norm": 21.456216648925764,
|
|
"learning_rate": 2.8329458494863846e-07,
|
|
"loss": 0.7279784083366394,
|
|
"step": 2201
|
|
},
|
|
{
|
|
"epoch": 2.7118226600985222,
|
|
"grad_norm": 8.853404617031957,
|
|
"learning_rate": 2.809215607336024e-07,
|
|
"loss": 0.47690945863723755,
|
|
"step": 2202
|
|
},
|
|
{
|
|
"epoch": 2.7130541871921183,
|
|
"grad_norm": 9.19562501308832,
|
|
"learning_rate": 2.7855822991037895e-07,
|
|
"loss": 0.1997358649969101,
|
|
"step": 2203
|
|
},
|
|
{
|
|
"epoch": 2.7142857142857144,
|
|
"grad_norm": 12.418182947084489,
|
|
"learning_rate": 2.762045973334526e-07,
|
|
"loss": 0.3269602954387665,
|
|
"step": 2204
|
|
},
|
|
{
|
|
"epoch": 2.7155172413793105,
|
|
"grad_norm": 9.253477256115538,
|
|
"learning_rate": 2.738606678373873e-07,
|
|
"loss": 0.5450934767723083,
|
|
"step": 2205
|
|
},
|
|
{
|
|
"epoch": 2.7167487684729066,
|
|
"grad_norm": 12.029880579085864,
|
|
"learning_rate": 2.7152644623681503e-07,
|
|
"loss": 0.4732050895690918,
|
|
"step": 2206
|
|
},
|
|
{
|
|
"epoch": 2.7179802955665027,
|
|
"grad_norm": 13.561046323857816,
|
|
"learning_rate": 2.6920193732642594e-07,
|
|
"loss": 0.26588505506515503,
|
|
"step": 2207
|
|
},
|
|
{
|
|
"epoch": 2.7192118226600988,
|
|
"grad_norm": 4.326966860689474,
|
|
"learning_rate": 2.668871458809613e-07,
|
|
"loss": 0.09280772507190704,
|
|
"step": 2208
|
|
},
|
|
{
|
|
"epoch": 2.720443349753695,
|
|
"grad_norm": 12.851246166510439,
|
|
"learning_rate": 2.6458207665520266e-07,
|
|
"loss": 0.3763241767883301,
|
|
"step": 2209
|
|
},
|
|
{
|
|
"epoch": 2.7216748768472905,
|
|
"grad_norm": 11.562947215162826,
|
|
"learning_rate": 2.6228673438395804e-07,
|
|
"loss": 0.46730220317840576,
|
|
"step": 2210
|
|
},
|
|
{
|
|
"epoch": 2.7229064039408866,
|
|
"grad_norm": 11.5850144160988,
|
|
"learning_rate": 2.600011237820577e-07,
|
|
"loss": 0.42677825689315796,
|
|
"step": 2211
|
|
},
|
|
{
|
|
"epoch": 2.7241379310344827,
|
|
"grad_norm": 15.077683389725815,
|
|
"learning_rate": 2.577252495443422e-07,
|
|
"loss": 0.4460552930831909,
|
|
"step": 2212
|
|
},
|
|
{
|
|
"epoch": 2.7253694581280787,
|
|
"grad_norm": 8.23073307445448,
|
|
"learning_rate": 2.5545911634565266e-07,
|
|
"loss": 0.5031150579452515,
|
|
"step": 2213
|
|
},
|
|
{
|
|
"epoch": 2.726600985221675,
|
|
"grad_norm": 11.590947176695321,
|
|
"learning_rate": 2.5320272884081955e-07,
|
|
"loss": 0.18559831380844116,
|
|
"step": 2214
|
|
},
|
|
{
|
|
"epoch": 2.727832512315271,
|
|
"grad_norm": 10.364105747898172,
|
|
"learning_rate": 2.5095609166465805e-07,
|
|
"loss": 0.2087395340204239,
|
|
"step": 2215
|
|
},
|
|
{
|
|
"epoch": 2.729064039408867,
|
|
"grad_norm": 7.72131921454244,
|
|
"learning_rate": 2.4871920943195404e-07,
|
|
"loss": 0.21503375470638275,
|
|
"step": 2216
|
|
},
|
|
{
|
|
"epoch": 2.730295566502463,
|
|
"grad_norm": 13.07348837914591,
|
|
"learning_rate": 2.4649208673745317e-07,
|
|
"loss": 0.20347240567207336,
|
|
"step": 2217
|
|
},
|
|
{
|
|
"epoch": 2.731527093596059,
|
|
"grad_norm": 7.396681990877147,
|
|
"learning_rate": 2.442747281558572e-07,
|
|
"loss": 0.20019523799419403,
|
|
"step": 2218
|
|
},
|
|
{
|
|
"epoch": 2.7327586206896552,
|
|
"grad_norm": 7.384056914568049,
|
|
"learning_rate": 2.420671382418122e-07,
|
|
"loss": 0.6672437191009521,
|
|
"step": 2219
|
|
},
|
|
{
|
|
"epoch": 2.7339901477832513,
|
|
"grad_norm": 9.4227706186618,
|
|
"learning_rate": 2.398693215298953e-07,
|
|
"loss": 0.28304070234298706,
|
|
"step": 2220
|
|
},
|
|
{
|
|
"epoch": 2.7352216748768474,
|
|
"grad_norm": 13.10398470275865,
|
|
"learning_rate": 2.3768128253461253e-07,
|
|
"loss": 0.7915571331977844,
|
|
"step": 2221
|
|
},
|
|
{
|
|
"epoch": 2.7364532019704435,
|
|
"grad_norm": 14.271199864374358,
|
|
"learning_rate": 2.3550302575038154e-07,
|
|
"loss": 0.2920302152633667,
|
|
"step": 2222
|
|
},
|
|
{
|
|
"epoch": 2.737684729064039,
|
|
"grad_norm": 9.98798818011476,
|
|
"learning_rate": 2.333345556515304e-07,
|
|
"loss": 0.7924119830131531,
|
|
"step": 2223
|
|
},
|
|
{
|
|
"epoch": 2.7389162561576352,
|
|
"grad_norm": 16.52502448354582,
|
|
"learning_rate": 2.311758766922806e-07,
|
|
"loss": 2.4264345169067383,
|
|
"step": 2224
|
|
},
|
|
{
|
|
"epoch": 2.7401477832512313,
|
|
"grad_norm": 11.115670896935416,
|
|
"learning_rate": 2.290269933067457e-07,
|
|
"loss": 0.6286523342132568,
|
|
"step": 2225
|
|
},
|
|
{
|
|
"epoch": 2.7413793103448274,
|
|
"grad_norm": 10.041583417397344,
|
|
"learning_rate": 2.2688790990891606e-07,
|
|
"loss": 0.4733774662017822,
|
|
"step": 2226
|
|
},
|
|
{
|
|
"epoch": 2.7426108374384235,
|
|
"grad_norm": 9.613596914422414,
|
|
"learning_rate": 2.2475863089265193e-07,
|
|
"loss": 0.41262203454971313,
|
|
"step": 2227
|
|
},
|
|
{
|
|
"epoch": 2.7438423645320196,
|
|
"grad_norm": 12.211203634057204,
|
|
"learning_rate": 2.2263916063167523e-07,
|
|
"loss": 0.9069987535476685,
|
|
"step": 2228
|
|
},
|
|
{
|
|
"epoch": 2.7450738916256157,
|
|
"grad_norm": 8.477983222031407,
|
|
"learning_rate": 2.205295034795596e-07,
|
|
"loss": 0.33371949195861816,
|
|
"step": 2229
|
|
},
|
|
{
|
|
"epoch": 2.7463054187192117,
|
|
"grad_norm": 10.672673009053705,
|
|
"learning_rate": 2.1842966376972142e-07,
|
|
"loss": 0.2515576183795929,
|
|
"step": 2230
|
|
},
|
|
{
|
|
"epoch": 2.747536945812808,
|
|
"grad_norm": 15.919489094243,
|
|
"learning_rate": 2.1633964581541212e-07,
|
|
"loss": 0.5854448080062866,
|
|
"step": 2231
|
|
},
|
|
{
|
|
"epoch": 2.748768472906404,
|
|
"grad_norm": 8.34813593109363,
|
|
"learning_rate": 2.1425945390970816e-07,
|
|
"loss": 0.36172378063201904,
|
|
"step": 2232
|
|
},
|
|
{
|
|
"epoch": 2.75,
|
|
"grad_norm": 13.095561050747872,
|
|
"learning_rate": 2.1218909232550156e-07,
|
|
"loss": 0.8217978477478027,
|
|
"step": 2233
|
|
},
|
|
{
|
|
"epoch": 2.751231527093596,
|
|
"grad_norm": 10.987521536719951,
|
|
"learning_rate": 2.1012856531549163e-07,
|
|
"loss": 0.5560616850852966,
|
|
"step": 2234
|
|
},
|
|
{
|
|
"epoch": 2.752463054187192,
|
|
"grad_norm": 15.220877022032928,
|
|
"learning_rate": 2.0807787711217887e-07,
|
|
"loss": 0.3503821790218353,
|
|
"step": 2235
|
|
},
|
|
{
|
|
"epoch": 2.7536945812807883,
|
|
"grad_norm": 17.985871130679012,
|
|
"learning_rate": 2.0603703192785264e-07,
|
|
"loss": 0.6000460982322693,
|
|
"step": 2236
|
|
},
|
|
{
|
|
"epoch": 2.7549261083743843,
|
|
"grad_norm": 10.345272170286153,
|
|
"learning_rate": 2.0400603395458408e-07,
|
|
"loss": 0.20410886406898499,
|
|
"step": 2237
|
|
},
|
|
{
|
|
"epoch": 2.7561576354679804,
|
|
"grad_norm": 10.777826560400182,
|
|
"learning_rate": 2.0198488736421607e-07,
|
|
"loss": 0.2497151494026184,
|
|
"step": 2238
|
|
},
|
|
{
|
|
"epoch": 2.7573891625615765,
|
|
"grad_norm": 9.330808767879285,
|
|
"learning_rate": 1.999735963083571e-07,
|
|
"loss": 0.2881111800670624,
|
|
"step": 2239
|
|
},
|
|
{
|
|
"epoch": 2.7586206896551726,
|
|
"grad_norm": 19.301319480093145,
|
|
"learning_rate": 1.9797216491837356e-07,
|
|
"loss": 0.38934653997421265,
|
|
"step": 2240
|
|
},
|
|
{
|
|
"epoch": 2.7598522167487687,
|
|
"grad_norm": 13.511728912052765,
|
|
"learning_rate": 1.9598059730537465e-07,
|
|
"loss": 0.3553803563117981,
|
|
"step": 2241
|
|
},
|
|
{
|
|
"epoch": 2.7610837438423648,
|
|
"grad_norm": 13.74634988747894,
|
|
"learning_rate": 1.9399889756021196e-07,
|
|
"loss": 0.3653762936592102,
|
|
"step": 2242
|
|
},
|
|
{
|
|
"epoch": 2.762315270935961,
|
|
"grad_norm": 9.247962499458838,
|
|
"learning_rate": 1.9202706975346875e-07,
|
|
"loss": 0.2600834369659424,
|
|
"step": 2243
|
|
},
|
|
{
|
|
"epoch": 2.7635467980295565,
|
|
"grad_norm": 11.458094202817868,
|
|
"learning_rate": 1.9006511793544458e-07,
|
|
"loss": 0.4601256847381592,
|
|
"step": 2244
|
|
},
|
|
{
|
|
"epoch": 2.7647783251231526,
|
|
"grad_norm": 17.193961086363156,
|
|
"learning_rate": 1.881130461361591e-07,
|
|
"loss": 0.33677470684051514,
|
|
"step": 2245
|
|
},
|
|
{
|
|
"epoch": 2.7660098522167487,
|
|
"grad_norm": 8.524927066266194,
|
|
"learning_rate": 1.8617085836533544e-07,
|
|
"loss": 0.8099600672721863,
|
|
"step": 2246
|
|
},
|
|
{
|
|
"epoch": 2.7672413793103448,
|
|
"grad_norm": 15.804119634424612,
|
|
"learning_rate": 1.8423855861239238e-07,
|
|
"loss": 0.6992620229721069,
|
|
"step": 2247
|
|
},
|
|
{
|
|
"epoch": 2.768472906403941,
|
|
"grad_norm": 9.647846553411064,
|
|
"learning_rate": 1.8231615084644105e-07,
|
|
"loss": 0.3640286326408386,
|
|
"step": 2248
|
|
},
|
|
{
|
|
"epoch": 2.769704433497537,
|
|
"grad_norm": 8.955751617734634,
|
|
"learning_rate": 1.8040363901627001e-07,
|
|
"loss": 0.2996286451816559,
|
|
"step": 2249
|
|
},
|
|
{
|
|
"epoch": 2.770935960591133,
|
|
"grad_norm": 11.938038283583609,
|
|
"learning_rate": 1.7850102705034455e-07,
|
|
"loss": 0.43687328696250916,
|
|
"step": 2250
|
|
},
|
|
{
|
|
"epoch": 2.772167487684729,
|
|
"grad_norm": 17.093390601969645,
|
|
"learning_rate": 1.7660831885679074e-07,
|
|
"loss": 0.7942696809768677,
|
|
"step": 2251
|
|
},
|
|
{
|
|
"epoch": 2.773399014778325,
|
|
"grad_norm": 13.100096515382093,
|
|
"learning_rate": 1.747255183233948e-07,
|
|
"loss": 1.1030818223953247,
|
|
"step": 2252
|
|
},
|
|
{
|
|
"epoch": 2.7746305418719213,
|
|
"grad_norm": 8.873613224852555,
|
|
"learning_rate": 1.7285262931759084e-07,
|
|
"loss": 0.5030316114425659,
|
|
"step": 2253
|
|
},
|
|
{
|
|
"epoch": 2.7758620689655173,
|
|
"grad_norm": 12.14741952725113,
|
|
"learning_rate": 1.7098965568645264e-07,
|
|
"loss": 0.6707223653793335,
|
|
"step": 2254
|
|
},
|
|
{
|
|
"epoch": 2.7770935960591134,
|
|
"grad_norm": 11.75778232712136,
|
|
"learning_rate": 1.6913660125668806e-07,
|
|
"loss": 0.2983396351337433,
|
|
"step": 2255
|
|
},
|
|
{
|
|
"epoch": 2.7783251231527095,
|
|
"grad_norm": 14.41974913977501,
|
|
"learning_rate": 1.6729346983462957e-07,
|
|
"loss": 0.6233869791030884,
|
|
"step": 2256
|
|
},
|
|
{
|
|
"epoch": 2.779556650246305,
|
|
"grad_norm": 13.000501735636352,
|
|
"learning_rate": 1.654602652062276e-07,
|
|
"loss": 0.2838573455810547,
|
|
"step": 2257
|
|
},
|
|
{
|
|
"epoch": 2.7807881773399012,
|
|
"grad_norm": 8.269339223606165,
|
|
"learning_rate": 1.636369911370417e-07,
|
|
"loss": 0.516904354095459,
|
|
"step": 2258
|
|
},
|
|
{
|
|
"epoch": 2.7820197044334973,
|
|
"grad_norm": 12.228570926666848,
|
|
"learning_rate": 1.6182365137223266e-07,
|
|
"loss": 0.2637355625629425,
|
|
"step": 2259
|
|
},
|
|
{
|
|
"epoch": 2.7832512315270934,
|
|
"grad_norm": 12.77963989317756,
|
|
"learning_rate": 1.600202496365566e-07,
|
|
"loss": 0.2973381280899048,
|
|
"step": 2260
|
|
},
|
|
{
|
|
"epoch": 2.7844827586206895,
|
|
"grad_norm": 12.028070410415097,
|
|
"learning_rate": 1.5822678963435479e-07,
|
|
"loss": 0.731842041015625,
|
|
"step": 2261
|
|
},
|
|
{
|
|
"epoch": 2.7857142857142856,
|
|
"grad_norm": 16.480537506483405,
|
|
"learning_rate": 1.564432750495476e-07,
|
|
"loss": 0.9091979265213013,
|
|
"step": 2262
|
|
},
|
|
{
|
|
"epoch": 2.7869458128078817,
|
|
"grad_norm": 14.778758482272446,
|
|
"learning_rate": 1.5466970954562786e-07,
|
|
"loss": 0.9223085641860962,
|
|
"step": 2263
|
|
},
|
|
{
|
|
"epoch": 2.7881773399014778,
|
|
"grad_norm": 12.767601072668027,
|
|
"learning_rate": 1.5290609676564982e-07,
|
|
"loss": 0.35786327719688416,
|
|
"step": 2264
|
|
},
|
|
{
|
|
"epoch": 2.789408866995074,
|
|
"grad_norm": 10.468097971683415,
|
|
"learning_rate": 1.5115244033222732e-07,
|
|
"loss": 0.7312544584274292,
|
|
"step": 2265
|
|
},
|
|
{
|
|
"epoch": 2.79064039408867,
|
|
"grad_norm": 9.834986856814911,
|
|
"learning_rate": 1.4940874384751947e-07,
|
|
"loss": 0.8420913219451904,
|
|
"step": 2266
|
|
},
|
|
{
|
|
"epoch": 2.791871921182266,
|
|
"grad_norm": 16.21429528610728,
|
|
"learning_rate": 1.47675010893229e-07,
|
|
"loss": 0.3239392042160034,
|
|
"step": 2267
|
|
},
|
|
{
|
|
"epoch": 2.793103448275862,
|
|
"grad_norm": 8.629439268560123,
|
|
"learning_rate": 1.4595124503059165e-07,
|
|
"loss": 0.3498873710632324,
|
|
"step": 2268
|
|
},
|
|
{
|
|
"epoch": 2.794334975369458,
|
|
"grad_norm": 6.690308017489741,
|
|
"learning_rate": 1.4423744980037068e-07,
|
|
"loss": 0.22733798623085022,
|
|
"step": 2269
|
|
},
|
|
{
|
|
"epoch": 2.7955665024630543,
|
|
"grad_norm": 8.212515181619986,
|
|
"learning_rate": 1.425336287228496e-07,
|
|
"loss": 0.2721923291683197,
|
|
"step": 2270
|
|
},
|
|
{
|
|
"epoch": 2.7967980295566504,
|
|
"grad_norm": 9.080877903298425,
|
|
"learning_rate": 1.408397852978205e-07,
|
|
"loss": 0.344375342130661,
|
|
"step": 2271
|
|
},
|
|
{
|
|
"epoch": 2.7980295566502464,
|
|
"grad_norm": 9.45480785329488,
|
|
"learning_rate": 1.391559230045847e-07,
|
|
"loss": 0.4529953896999359,
|
|
"step": 2272
|
|
},
|
|
{
|
|
"epoch": 2.7992610837438425,
|
|
"grad_norm": 9.214190080042984,
|
|
"learning_rate": 1.3748204530193987e-07,
|
|
"loss": 0.1639999896287918,
|
|
"step": 2273
|
|
},
|
|
{
|
|
"epoch": 2.8004926108374386,
|
|
"grad_norm": 13.6280899298915,
|
|
"learning_rate": 1.3581815562817402e-07,
|
|
"loss": 0.23326484858989716,
|
|
"step": 2274
|
|
},
|
|
{
|
|
"epoch": 2.8017241379310347,
|
|
"grad_norm": 8.920482755226637,
|
|
"learning_rate": 1.341642574010582e-07,
|
|
"loss": 0.22694149613380432,
|
|
"step": 2275
|
|
},
|
|
{
|
|
"epoch": 2.802955665024631,
|
|
"grad_norm": 8.710884196173295,
|
|
"learning_rate": 1.3252035401784324e-07,
|
|
"loss": 0.3588021993637085,
|
|
"step": 2276
|
|
},
|
|
{
|
|
"epoch": 2.804187192118227,
|
|
"grad_norm": 11.632314435280234,
|
|
"learning_rate": 1.3088644885524637e-07,
|
|
"loss": 0.4335256516933441,
|
|
"step": 2277
|
|
},
|
|
{
|
|
"epoch": 2.8054187192118225,
|
|
"grad_norm": 6.272067777885255,
|
|
"learning_rate": 1.2926254526944904e-07,
|
|
"loss": 0.1874769926071167,
|
|
"step": 2278
|
|
},
|
|
{
|
|
"epoch": 2.8066502463054186,
|
|
"grad_norm": 8.936224496797552,
|
|
"learning_rate": 1.27648646596088e-07,
|
|
"loss": 0.3144474923610687,
|
|
"step": 2279
|
|
},
|
|
{
|
|
"epoch": 2.8078817733990147,
|
|
"grad_norm": 19.58883398368707,
|
|
"learning_rate": 1.2604475615025092e-07,
|
|
"loss": 0.7241795063018799,
|
|
"step": 2280
|
|
},
|
|
{
|
|
"epoch": 2.8091133004926108,
|
|
"grad_norm": 16.726363332544537,
|
|
"learning_rate": 1.2445087722646576e-07,
|
|
"loss": 0.5169468522071838,
|
|
"step": 2281
|
|
},
|
|
{
|
|
"epoch": 2.810344827586207,
|
|
"grad_norm": 30.94634458747577,
|
|
"learning_rate": 1.228670130986953e-07,
|
|
"loss": 1.6869860887527466,
|
|
"step": 2282
|
|
},
|
|
{
|
|
"epoch": 2.811576354679803,
|
|
"grad_norm": 10.707666993688912,
|
|
"learning_rate": 1.212931670203338e-07,
|
|
"loss": 0.47550255060195923,
|
|
"step": 2283
|
|
},
|
|
{
|
|
"epoch": 2.812807881773399,
|
|
"grad_norm": 9.540335234729794,
|
|
"learning_rate": 1.197293422241952e-07,
|
|
"loss": 0.2437782883644104,
|
|
"step": 2284
|
|
},
|
|
{
|
|
"epoch": 2.814039408866995,
|
|
"grad_norm": 6.665490888518648,
|
|
"learning_rate": 1.1817554192251002e-07,
|
|
"loss": 0.37867432832717896,
|
|
"step": 2285
|
|
},
|
|
{
|
|
"epoch": 2.815270935960591,
|
|
"grad_norm": 9.667222509113516,
|
|
"learning_rate": 1.1663176930691744e-07,
|
|
"loss": 0.8604614734649658,
|
|
"step": 2286
|
|
},
|
|
{
|
|
"epoch": 2.8165024630541873,
|
|
"grad_norm": 12.759555548828967,
|
|
"learning_rate": 1.1509802754845978e-07,
|
|
"loss": 1.1947153806686401,
|
|
"step": 2287
|
|
},
|
|
{
|
|
"epoch": 2.8177339901477834,
|
|
"grad_norm": 9.33176290924216,
|
|
"learning_rate": 1.1357431979757194e-07,
|
|
"loss": 0.30131372809410095,
|
|
"step": 2288
|
|
},
|
|
{
|
|
"epoch": 2.8189655172413794,
|
|
"grad_norm": 10.72676065785706,
|
|
"learning_rate": 1.1206064918408143e-07,
|
|
"loss": 0.47112587094306946,
|
|
"step": 2289
|
|
},
|
|
{
|
|
"epoch": 2.8201970443349755,
|
|
"grad_norm": 11.488110070600202,
|
|
"learning_rate": 1.1055701881719838e-07,
|
|
"loss": 0.2062550187110901,
|
|
"step": 2290
|
|
},
|
|
{
|
|
"epoch": 2.821428571428571,
|
|
"grad_norm": 8.859910558029405,
|
|
"learning_rate": 1.0906343178550715e-07,
|
|
"loss": 0.30918222665786743,
|
|
"step": 2291
|
|
},
|
|
{
|
|
"epoch": 2.8226600985221673,
|
|
"grad_norm": 7.645494812767514,
|
|
"learning_rate": 1.0757989115696421e-07,
|
|
"loss": 0.46675896644592285,
|
|
"step": 2292
|
|
},
|
|
{
|
|
"epoch": 2.8238916256157633,
|
|
"grad_norm": 7.696373009746994,
|
|
"learning_rate": 1.0610639997888917e-07,
|
|
"loss": 0.2514066696166992,
|
|
"step": 2293
|
|
},
|
|
{
|
|
"epoch": 2.8251231527093594,
|
|
"grad_norm": 20.301202253116305,
|
|
"learning_rate": 1.0464296127795926e-07,
|
|
"loss": 0.37799739837646484,
|
|
"step": 2294
|
|
},
|
|
{
|
|
"epoch": 2.8263546798029555,
|
|
"grad_norm": 10.51342866650685,
|
|
"learning_rate": 1.0318957806020269e-07,
|
|
"loss": 1.170919418334961,
|
|
"step": 2295
|
|
},
|
|
{
|
|
"epoch": 2.8275862068965516,
|
|
"grad_norm": 10.322546313834785,
|
|
"learning_rate": 1.0174625331099363e-07,
|
|
"loss": 0.34683138132095337,
|
|
"step": 2296
|
|
},
|
|
{
|
|
"epoch": 2.8288177339901477,
|
|
"grad_norm": 13.218925485338286,
|
|
"learning_rate": 1.0031298999504557e-07,
|
|
"loss": 0.24154211580753326,
|
|
"step": 2297
|
|
},
|
|
{
|
|
"epoch": 2.8300492610837438,
|
|
"grad_norm": 11.94151576403668,
|
|
"learning_rate": 9.888979105640295e-08,
|
|
"loss": 0.3270137906074524,
|
|
"step": 2298
|
|
},
|
|
{
|
|
"epoch": 2.83128078817734,
|
|
"grad_norm": 10.157922840931477,
|
|
"learning_rate": 9.747665941843953e-08,
|
|
"loss": 0.33205774426460266,
|
|
"step": 2299
|
|
},
|
|
{
|
|
"epoch": 2.832512315270936,
|
|
"grad_norm": 15.674554832536234,
|
|
"learning_rate": 9.607359798384785e-08,
|
|
"loss": 1.5672454833984375,
|
|
"step": 2300
|
|
},
|
|
{
|
|
"epoch": 2.833743842364532,
|
|
"grad_norm": 7.89425528282641,
|
|
"learning_rate": 9.468060963463754e-08,
|
|
"loss": 0.1868615597486496,
|
|
"step": 2301
|
|
},
|
|
{
|
|
"epoch": 2.834975369458128,
|
|
"grad_norm": 16.06809449939127,
|
|
"learning_rate": 9.329769723212478e-08,
|
|
"loss": 0.3485974371433258,
|
|
"step": 2302
|
|
},
|
|
{
|
|
"epoch": 2.836206896551724,
|
|
"grad_norm": 22.06944110945676,
|
|
"learning_rate": 9.192486361693175e-08,
|
|
"loss": 0.5702242851257324,
|
|
"step": 2303
|
|
},
|
|
{
|
|
"epoch": 2.8374384236453203,
|
|
"grad_norm": 13.611203107193855,
|
|
"learning_rate": 9.056211160897555e-08,
|
|
"loss": 0.7004730105400085,
|
|
"step": 2304
|
|
},
|
|
{
|
|
"epoch": 2.8386699507389164,
|
|
"grad_norm": 10.23772277567979,
|
|
"learning_rate": 8.920944400746589e-08,
|
|
"loss": 0.29311710596084595,
|
|
"step": 2305
|
|
},
|
|
{
|
|
"epoch": 2.8399014778325125,
|
|
"grad_norm": 7.167372063418741,
|
|
"learning_rate": 8.786686359089747e-08,
|
|
"loss": 0.18041157722473145,
|
|
"step": 2306
|
|
},
|
|
{
|
|
"epoch": 2.8411330049261085,
|
|
"grad_norm": 8.672887051600437,
|
|
"learning_rate": 8.653437311704648e-08,
|
|
"loss": 0.2873387634754181,
|
|
"step": 2307
|
|
},
|
|
{
|
|
"epoch": 2.8423645320197046,
|
|
"grad_norm": 9.699021546064241,
|
|
"learning_rate": 8.521197532296188e-08,
|
|
"loss": 0.23781178891658783,
|
|
"step": 2308
|
|
},
|
|
{
|
|
"epoch": 2.8435960591133007,
|
|
"grad_norm": 11.643059711853965,
|
|
"learning_rate": 8.38996729249636e-08,
|
|
"loss": 0.5913131833076477,
|
|
"step": 2309
|
|
},
|
|
{
|
|
"epoch": 2.844827586206897,
|
|
"grad_norm": 12.799008291574818,
|
|
"learning_rate": 8.259746861863094e-08,
|
|
"loss": 0.9139914512634277,
|
|
"step": 2310
|
|
},
|
|
{
|
|
"epoch": 2.846059113300493,
|
|
"grad_norm": 10.980579183559623,
|
|
"learning_rate": 8.130536507880538e-08,
|
|
"loss": 0.22883841395378113,
|
|
"step": 2311
|
|
},
|
|
{
|
|
"epoch": 2.8472906403940885,
|
|
"grad_norm": 9.488904590414009,
|
|
"learning_rate": 8.002336495957664e-08,
|
|
"loss": 0.6467199325561523,
|
|
"step": 2312
|
|
},
|
|
{
|
|
"epoch": 2.8485221674876846,
|
|
"grad_norm": 17.044793614561804,
|
|
"learning_rate": 7.875147089428436e-08,
|
|
"loss": 0.48100385069847107,
|
|
"step": 2313
|
|
},
|
|
{
|
|
"epoch": 2.8497536945812807,
|
|
"grad_norm": 6.232324566569768,
|
|
"learning_rate": 7.748968549550761e-08,
|
|
"loss": 0.22535499930381775,
|
|
"step": 2314
|
|
},
|
|
{
|
|
"epoch": 2.850985221674877,
|
|
"grad_norm": 16.357795976490426,
|
|
"learning_rate": 7.623801135506148e-08,
|
|
"loss": 0.7971012592315674,
|
|
"step": 2315
|
|
},
|
|
{
|
|
"epoch": 2.852216748768473,
|
|
"grad_norm": 10.56546293503534,
|
|
"learning_rate": 7.499645104399156e-08,
|
|
"loss": 0.6965846419334412,
|
|
"step": 2316
|
|
},
|
|
{
|
|
"epoch": 2.853448275862069,
|
|
"grad_norm": 10.699552582949096,
|
|
"learning_rate": 7.376500711257062e-08,
|
|
"loss": 0.2827698588371277,
|
|
"step": 2317
|
|
},
|
|
{
|
|
"epoch": 2.854679802955665,
|
|
"grad_norm": 11.75504997847818,
|
|
"learning_rate": 7.254368209028862e-08,
|
|
"loss": 0.4453064203262329,
|
|
"step": 2318
|
|
},
|
|
{
|
|
"epoch": 2.855911330049261,
|
|
"grad_norm": 10.373311779049724,
|
|
"learning_rate": 7.133247848585268e-08,
|
|
"loss": 0.5363994836807251,
|
|
"step": 2319
|
|
},
|
|
{
|
|
"epoch": 2.857142857142857,
|
|
"grad_norm": 10.742091428994968,
|
|
"learning_rate": 7.013139878717934e-08,
|
|
"loss": 0.33071067929267883,
|
|
"step": 2320
|
|
},
|
|
{
|
|
"epoch": 2.8583743842364533,
|
|
"grad_norm": 10.02135718464731,
|
|
"learning_rate": 6.894044546138845e-08,
|
|
"loss": 0.6118582487106323,
|
|
"step": 2321
|
|
},
|
|
{
|
|
"epoch": 2.8596059113300494,
|
|
"grad_norm": 11.952226631897975,
|
|
"learning_rate": 6.775962095480037e-08,
|
|
"loss": 0.4941851496696472,
|
|
"step": 2322
|
|
},
|
|
{
|
|
"epoch": 2.8608374384236455,
|
|
"grad_norm": 12.467253293652027,
|
|
"learning_rate": 6.65889276929299e-08,
|
|
"loss": 0.9043294191360474,
|
|
"step": 2323
|
|
},
|
|
{
|
|
"epoch": 2.862068965517241,
|
|
"grad_norm": 9.372107033246923,
|
|
"learning_rate": 6.542836808048181e-08,
|
|
"loss": 0.5352662801742554,
|
|
"step": 2324
|
|
},
|
|
{
|
|
"epoch": 2.863300492610837,
|
|
"grad_norm": 13.465637997675985,
|
|
"learning_rate": 6.427794450134529e-08,
|
|
"loss": 0.622706413269043,
|
|
"step": 2325
|
|
},
|
|
{
|
|
"epoch": 2.8645320197044333,
|
|
"grad_norm": 10.951531479275452,
|
|
"learning_rate": 6.313765931858785e-08,
|
|
"loss": 0.32065168023109436,
|
|
"step": 2326
|
|
},
|
|
{
|
|
"epoch": 2.8657635467980294,
|
|
"grad_norm": 11.940905797523131,
|
|
"learning_rate": 6.200751487445367e-08,
|
|
"loss": 0.5308477878570557,
|
|
"step": 2327
|
|
},
|
|
{
|
|
"epoch": 2.8669950738916254,
|
|
"grad_norm": 12.032315008603385,
|
|
"learning_rate": 6.088751349035693e-08,
|
|
"loss": 0.4006965756416321,
|
|
"step": 2328
|
|
},
|
|
{
|
|
"epoch": 2.8682266009852215,
|
|
"grad_norm": 14.936202143915887,
|
|
"learning_rate": 5.977765746687569e-08,
|
|
"loss": 0.29346001148223877,
|
|
"step": 2329
|
|
},
|
|
{
|
|
"epoch": 2.8694581280788176,
|
|
"grad_norm": 12.39243720991369,
|
|
"learning_rate": 5.8677949083749686e-08,
|
|
"loss": 0.17921757698059082,
|
|
"step": 2330
|
|
},
|
|
{
|
|
"epoch": 2.8706896551724137,
|
|
"grad_norm": 9.58038552158238,
|
|
"learning_rate": 5.758839059987531e-08,
|
|
"loss": 0.3909390866756439,
|
|
"step": 2331
|
|
},
|
|
{
|
|
"epoch": 2.87192118226601,
|
|
"grad_norm": 15.9782663440221,
|
|
"learning_rate": 5.650898425329676e-08,
|
|
"loss": 0.2947097420692444,
|
|
"step": 2332
|
|
},
|
|
{
|
|
"epoch": 2.873152709359606,
|
|
"grad_norm": 10.207214673211949,
|
|
"learning_rate": 5.5439732261209356e-08,
|
|
"loss": 0.27580755949020386,
|
|
"step": 2333
|
|
},
|
|
{
|
|
"epoch": 2.874384236453202,
|
|
"grad_norm": 10.944513423861029,
|
|
"learning_rate": 5.438063681994732e-08,
|
|
"loss": 0.5352618098258972,
|
|
"step": 2334
|
|
},
|
|
{
|
|
"epoch": 2.875615763546798,
|
|
"grad_norm": 11.026909219005717,
|
|
"learning_rate": 5.333170010498434e-08,
|
|
"loss": 0.4425346553325653,
|
|
"step": 2335
|
|
},
|
|
{
|
|
"epoch": 2.876847290640394,
|
|
"grad_norm": 10.718057032304046,
|
|
"learning_rate": 5.229292427092525e-08,
|
|
"loss": 0.3107433319091797,
|
|
"step": 2336
|
|
},
|
|
{
|
|
"epoch": 2.87807881773399,
|
|
"grad_norm": 12.247326551233483,
|
|
"learning_rate": 5.126431145150546e-08,
|
|
"loss": 0.8459264039993286,
|
|
"step": 2337
|
|
},
|
|
{
|
|
"epoch": 2.8793103448275863,
|
|
"grad_norm": 9.9858024833323,
|
|
"learning_rate": 5.024586375958429e-08,
|
|
"loss": 0.6122205257415771,
|
|
"step": 2338
|
|
},
|
|
{
|
|
"epoch": 2.8805418719211824,
|
|
"grad_norm": 8.326107009918898,
|
|
"learning_rate": 4.9237583287139454e-08,
|
|
"loss": 0.28234463930130005,
|
|
"step": 2339
|
|
},
|
|
{
|
|
"epoch": 2.8817733990147785,
|
|
"grad_norm": 9.707118891697133,
|
|
"learning_rate": 4.823947210526647e-08,
|
|
"loss": 0.26258403062820435,
|
|
"step": 2340
|
|
},
|
|
{
|
|
"epoch": 2.8830049261083746,
|
|
"grad_norm": 11.37690573459154,
|
|
"learning_rate": 4.72515322641709e-08,
|
|
"loss": 0.16676993668079376,
|
|
"step": 2341
|
|
},
|
|
{
|
|
"epoch": 2.8842364532019706,
|
|
"grad_norm": 10.744107147683183,
|
|
"learning_rate": 4.627376579316667e-08,
|
|
"loss": 0.5982980132102966,
|
|
"step": 2342
|
|
},
|
|
{
|
|
"epoch": 2.8854679802955667,
|
|
"grad_norm": 11.814730049244856,
|
|
"learning_rate": 4.530617470066834e-08,
|
|
"loss": 0.3576871156692505,
|
|
"step": 2343
|
|
},
|
|
{
|
|
"epoch": 2.886699507389163,
|
|
"grad_norm": 7.558098865292991,
|
|
"learning_rate": 4.4348760974192715e-08,
|
|
"loss": 0.22213858366012573,
|
|
"step": 2344
|
|
},
|
|
{
|
|
"epoch": 2.887931034482759,
|
|
"grad_norm": 31.227769055767126,
|
|
"learning_rate": 4.340152658034835e-08,
|
|
"loss": 0.7075624465942383,
|
|
"step": 2345
|
|
},
|
|
{
|
|
"epoch": 2.8891625615763545,
|
|
"grad_norm": 13.602269942674353,
|
|
"learning_rate": 4.246447346483662e-08,
|
|
"loss": 0.35476282238960266,
|
|
"step": 2346
|
|
},
|
|
{
|
|
"epoch": 2.8903940886699506,
|
|
"grad_norm": 11.66167288478714,
|
|
"learning_rate": 4.153760355244507e-08,
|
|
"loss": 0.4569534659385681,
|
|
"step": 2347
|
|
},
|
|
{
|
|
"epoch": 2.8916256157635467,
|
|
"grad_norm": 12.232619433370953,
|
|
"learning_rate": 4.062091874704355e-08,
|
|
"loss": 0.8425757884979248,
|
|
"step": 2348
|
|
},
|
|
{
|
|
"epoch": 2.892857142857143,
|
|
"grad_norm": 15.584381566055246,
|
|
"learning_rate": 3.971442093158195e-08,
|
|
"loss": 0.6543349623680115,
|
|
"step": 2349
|
|
},
|
|
{
|
|
"epoch": 2.894088669950739,
|
|
"grad_norm": 12.232909525407603,
|
|
"learning_rate": 3.8818111968083607e-08,
|
|
"loss": 0.4949587285518646,
|
|
"step": 2350
|
|
},
|
|
{
|
|
"epoch": 2.895320197044335,
|
|
"grad_norm": 28.009977519758436,
|
|
"learning_rate": 3.7931993697644664e-08,
|
|
"loss": 1.0205111503601074,
|
|
"step": 2351
|
|
},
|
|
{
|
|
"epoch": 2.896551724137931,
|
|
"grad_norm": 8.083430035021566,
|
|
"learning_rate": 3.7056067940427484e-08,
|
|
"loss": 0.429599404335022,
|
|
"step": 2352
|
|
},
|
|
{
|
|
"epoch": 2.897783251231527,
|
|
"grad_norm": 11.304307823971973,
|
|
"learning_rate": 3.6190336495659504e-08,
|
|
"loss": 0.6471319198608398,
|
|
"step": 2353
|
|
},
|
|
{
|
|
"epoch": 2.899014778325123,
|
|
"grad_norm": 11.052274245265034,
|
|
"learning_rate": 3.533480114162713e-08,
|
|
"loss": 0.6227458715438843,
|
|
"step": 2354
|
|
},
|
|
{
|
|
"epoch": 2.9002463054187193,
|
|
"grad_norm": 10.145305358695179,
|
|
"learning_rate": 3.448946363567296e-08,
|
|
"loss": 0.35620149970054626,
|
|
"step": 2355
|
|
},
|
|
{
|
|
"epoch": 2.9014778325123154,
|
|
"grad_norm": 9.735362530555188,
|
|
"learning_rate": 3.365432571419247e-08,
|
|
"loss": 0.41157659888267517,
|
|
"step": 2356
|
|
},
|
|
{
|
|
"epoch": 2.9027093596059115,
|
|
"grad_norm": 16.113614254695477,
|
|
"learning_rate": 3.282938909263122e-08,
|
|
"loss": 0.39660418033599854,
|
|
"step": 2357
|
|
},
|
|
{
|
|
"epoch": 2.903940886699507,
|
|
"grad_norm": 12.303598539070832,
|
|
"learning_rate": 3.201465546547988e-08,
|
|
"loss": 0.37891146540641785,
|
|
"step": 2358
|
|
},
|
|
{
|
|
"epoch": 2.905172413793103,
|
|
"grad_norm": 11.49013243084427,
|
|
"learning_rate": 3.121012650627031e-08,
|
|
"loss": 0.4459425210952759,
|
|
"step": 2359
|
|
},
|
|
{
|
|
"epoch": 2.9064039408866993,
|
|
"grad_norm": 12.062068468114942,
|
|
"learning_rate": 3.041580386757448e-08,
|
|
"loss": 0.4933587610721588,
|
|
"step": 2360
|
|
},
|
|
{
|
|
"epoch": 2.9076354679802954,
|
|
"grad_norm": 7.691939807180967,
|
|
"learning_rate": 2.9631689180999457e-08,
|
|
"loss": 0.16229723393917084,
|
|
"step": 2361
|
|
},
|
|
{
|
|
"epoch": 2.9088669950738915,
|
|
"grad_norm": 11.649633348013484,
|
|
"learning_rate": 2.885778405718409e-08,
|
|
"loss": 0.4784936308860779,
|
|
"step": 2362
|
|
},
|
|
{
|
|
"epoch": 2.9100985221674875,
|
|
"grad_norm": 20.64984541908695,
|
|
"learning_rate": 2.8094090085795112e-08,
|
|
"loss": 0.6622560620307922,
|
|
"step": 2363
|
|
},
|
|
{
|
|
"epoch": 2.9113300492610836,
|
|
"grad_norm": 9.783513206502265,
|
|
"learning_rate": 2.7340608835526584e-08,
|
|
"loss": 0.3672278821468353,
|
|
"step": 2364
|
|
},
|
|
{
|
|
"epoch": 2.9125615763546797,
|
|
"grad_norm": 6.04349473256102,
|
|
"learning_rate": 2.6597341854092685e-08,
|
|
"loss": 0.3247770667076111,
|
|
"step": 2365
|
|
},
|
|
{
|
|
"epoch": 2.913793103448276,
|
|
"grad_norm": 11.650085297412613,
|
|
"learning_rate": 2.586429066822771e-08,
|
|
"loss": 0.3467229902744293,
|
|
"step": 2366
|
|
},
|
|
{
|
|
"epoch": 2.915024630541872,
|
|
"grad_norm": 11.842612737683362,
|
|
"learning_rate": 2.514145678368163e-08,
|
|
"loss": 0.6725019812583923,
|
|
"step": 2367
|
|
},
|
|
{
|
|
"epoch": 2.916256157635468,
|
|
"grad_norm": 8.454338307427385,
|
|
"learning_rate": 2.4428841685217863e-08,
|
|
"loss": 0.6760755777359009,
|
|
"step": 2368
|
|
},
|
|
{
|
|
"epoch": 2.917487684729064,
|
|
"grad_norm": 13.555178809367312,
|
|
"learning_rate": 2.3726446836608298e-08,
|
|
"loss": 0.5354422330856323,
|
|
"step": 2369
|
|
},
|
|
{
|
|
"epoch": 2.91871921182266,
|
|
"grad_norm": 11.004737348047312,
|
|
"learning_rate": 2.3034273680632157e-08,
|
|
"loss": 0.3656280040740967,
|
|
"step": 2370
|
|
},
|
|
{
|
|
"epoch": 2.9199507389162562,
|
|
"grad_norm": 9.99595612427158,
|
|
"learning_rate": 2.235232363907269e-08,
|
|
"loss": 0.28186920285224915,
|
|
"step": 2371
|
|
},
|
|
{
|
|
"epoch": 2.9211822660098523,
|
|
"grad_norm": 16.789031513751276,
|
|
"learning_rate": 2.168059811271439e-08,
|
|
"loss": 0.31556010246276855,
|
|
"step": 2372
|
|
},
|
|
{
|
|
"epoch": 2.9224137931034484,
|
|
"grad_norm": 7.870447962098653,
|
|
"learning_rate": 2.101909848133743e-08,
|
|
"loss": 0.33978280425071716,
|
|
"step": 2373
|
|
},
|
|
{
|
|
"epoch": 2.9236453201970445,
|
|
"grad_norm": 13.322556254888749,
|
|
"learning_rate": 2.0367826103720457e-08,
|
|
"loss": 0.5645813941955566,
|
|
"step": 2374
|
|
},
|
|
{
|
|
"epoch": 2.9248768472906406,
|
|
"grad_norm": 6.936377752521131,
|
|
"learning_rate": 1.9726782317632255e-08,
|
|
"loss": 0.21976767480373383,
|
|
"step": 2375
|
|
},
|
|
{
|
|
"epoch": 2.9261083743842367,
|
|
"grad_norm": 16.201679118604396,
|
|
"learning_rate": 1.9095968439830637e-08,
|
|
"loss": 0.6068276166915894,
|
|
"step": 2376
|
|
},
|
|
{
|
|
"epoch": 2.9273399014778327,
|
|
"grad_norm": 10.683769815067068,
|
|
"learning_rate": 1.8475385766063002e-08,
|
|
"loss": 0.2844882607460022,
|
|
"step": 2377
|
|
},
|
|
{
|
|
"epoch": 2.928571428571429,
|
|
"grad_norm": 22.182288301690132,
|
|
"learning_rate": 1.786503557105912e-08,
|
|
"loss": 1.1885827779769897,
|
|
"step": 2378
|
|
},
|
|
{
|
|
"epoch": 2.9298029556650245,
|
|
"grad_norm": 8.221573464179809,
|
|
"learning_rate": 1.7264919108529455e-08,
|
|
"loss": 0.4241114854812622,
|
|
"step": 2379
|
|
},
|
|
{
|
|
"epoch": 2.9310344827586206,
|
|
"grad_norm": 10.23479597630979,
|
|
"learning_rate": 1.6675037611165735e-08,
|
|
"loss": 0.9062713980674744,
|
|
"step": 2380
|
|
},
|
|
{
|
|
"epoch": 2.9322660098522166,
|
|
"grad_norm": 9.83143734077978,
|
|
"learning_rate": 1.6095392290635393e-08,
|
|
"loss": 0.29996055364608765,
|
|
"step": 2381
|
|
},
|
|
{
|
|
"epoch": 2.9334975369458127,
|
|
"grad_norm": 9.191744534619497,
|
|
"learning_rate": 1.552598433757879e-08,
|
|
"loss": 0.3901692032814026,
|
|
"step": 2382
|
|
},
|
|
{
|
|
"epoch": 2.934729064039409,
|
|
"grad_norm": 10.314975796862411,
|
|
"learning_rate": 1.4966814921608674e-08,
|
|
"loss": 0.36974531412124634,
|
|
"step": 2383
|
|
},
|
|
{
|
|
"epoch": 2.935960591133005,
|
|
"grad_norm": 10.965587726479475,
|
|
"learning_rate": 1.441788519130738e-08,
|
|
"loss": 0.2913818359375,
|
|
"step": 2384
|
|
},
|
|
{
|
|
"epoch": 2.937192118226601,
|
|
"grad_norm": 26.225721932440074,
|
|
"learning_rate": 1.3879196274224626e-08,
|
|
"loss": 2.8897290229797363,
|
|
"step": 2385
|
|
},
|
|
{
|
|
"epoch": 2.938423645320197,
|
|
"grad_norm": 16.567199226805975,
|
|
"learning_rate": 1.335074927687141e-08,
|
|
"loss": 0.7396224141120911,
|
|
"step": 2386
|
|
},
|
|
{
|
|
"epoch": 2.939655172413793,
|
|
"grad_norm": 10.384159480919202,
|
|
"learning_rate": 1.2832545284724995e-08,
|
|
"loss": 0.2923913896083832,
|
|
"step": 2387
|
|
},
|
|
{
|
|
"epoch": 2.9408866995073892,
|
|
"grad_norm": 12.315507900916186,
|
|
"learning_rate": 1.2324585362220032e-08,
|
|
"loss": 0.60726398229599,
|
|
"step": 2388
|
|
},
|
|
{
|
|
"epoch": 2.9421182266009853,
|
|
"grad_norm": 10.077538225946919,
|
|
"learning_rate": 1.1826870552749669e-08,
|
|
"loss": 0.3081626892089844,
|
|
"step": 2389
|
|
},
|
|
{
|
|
"epoch": 2.9433497536945814,
|
|
"grad_norm": 15.192636407836343,
|
|
"learning_rate": 1.1339401878663337e-08,
|
|
"loss": 0.7774905562400818,
|
|
"step": 2390
|
|
},
|
|
{
|
|
"epoch": 2.9445812807881775,
|
|
"grad_norm": 12.649581445218459,
|
|
"learning_rate": 1.0862180341263962e-08,
|
|
"loss": 0.5568622350692749,
|
|
"step": 2391
|
|
},
|
|
{
|
|
"epoch": 2.945812807881773,
|
|
"grad_norm": 11.4557765341612,
|
|
"learning_rate": 1.039520692080409e-08,
|
|
"loss": 0.42753443121910095,
|
|
"step": 2392
|
|
},
|
|
{
|
|
"epoch": 2.947044334975369,
|
|
"grad_norm": 12.049826060673517,
|
|
"learning_rate": 9.938482576487551e-09,
|
|
"loss": 0.33313125371932983,
|
|
"step": 2393
|
|
},
|
|
{
|
|
"epoch": 2.9482758620689653,
|
|
"grad_norm": 11.358169603413613,
|
|
"learning_rate": 9.492008246466122e-09,
|
|
"loss": 0.4345099925994873,
|
|
"step": 2394
|
|
},
|
|
{
|
|
"epoch": 2.9495073891625614,
|
|
"grad_norm": 15.061185553672066,
|
|
"learning_rate": 9.055784847836202e-09,
|
|
"loss": 0.6844139695167542,
|
|
"step": 2395
|
|
},
|
|
{
|
|
"epoch": 2.9507389162561575,
|
|
"grad_norm": 12.25434358933355,
|
|
"learning_rate": 8.629813276637144e-09,
|
|
"loss": 0.4944530725479126,
|
|
"step": 2396
|
|
},
|
|
{
|
|
"epoch": 2.9519704433497536,
|
|
"grad_norm": 7.240836775147592,
|
|
"learning_rate": 8.214094407851814e-09,
|
|
"loss": 0.1517336368560791,
|
|
"step": 2397
|
|
},
|
|
{
|
|
"epoch": 2.9532019704433496,
|
|
"grad_norm": 11.570980194113849,
|
|
"learning_rate": 7.808629095402697e-09,
|
|
"loss": 0.24804279208183289,
|
|
"step": 2398
|
|
},
|
|
{
|
|
"epoch": 2.9544334975369457,
|
|
"grad_norm": 15.785024108321435,
|
|
"learning_rate": 7.413418172149689e-09,
|
|
"loss": 1.2773240804672241,
|
|
"step": 2399
|
|
},
|
|
{
|
|
"epoch": 2.955665024630542,
|
|
"grad_norm": 12.516388230034497,
|
|
"learning_rate": 7.028462449889528e-09,
|
|
"loss": 0.20905320346355438,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 2.956896551724138,
|
|
"grad_norm": 6.362652358430743,
|
|
"learning_rate": 6.6537627193558055e-09,
|
|
"loss": 0.24830211699008942,
|
|
"step": 2401
|
|
},
|
|
{
|
|
"epoch": 2.958128078817734,
|
|
"grad_norm": 9.391013644944394,
|
|
"learning_rate": 6.289319750212852e-09,
|
|
"loss": 0.30148234963417053,
|
|
"step": 2402
|
|
},
|
|
{
|
|
"epoch": 2.95935960591133,
|
|
"grad_norm": 11.036169214095409,
|
|
"learning_rate": 5.93513429105741e-09,
|
|
"loss": 0.7273882031440735,
|
|
"step": 2403
|
|
},
|
|
{
|
|
"epoch": 2.960591133004926,
|
|
"grad_norm": 10.956019864515577,
|
|
"learning_rate": 5.591207069417515e-09,
|
|
"loss": 0.4958484172821045,
|
|
"step": 2404
|
|
},
|
|
{
|
|
"epoch": 2.9618226600985222,
|
|
"grad_norm": 13.272684139309336,
|
|
"learning_rate": 5.257538791749173e-09,
|
|
"loss": 0.5852301120758057,
|
|
"step": 2405
|
|
},
|
|
{
|
|
"epoch": 2.9630541871921183,
|
|
"grad_norm": 15.300683310135565,
|
|
"learning_rate": 4.934130143435245e-09,
|
|
"loss": 0.5483534336090088,
|
|
"step": 2406
|
|
},
|
|
{
|
|
"epoch": 2.9642857142857144,
|
|
"grad_norm": 9.624016617554009,
|
|
"learning_rate": 4.6209817887848955e-09,
|
|
"loss": 0.49854928255081177,
|
|
"step": 2407
|
|
},
|
|
{
|
|
"epoch": 2.9655172413793105,
|
|
"grad_norm": 8.615173379839112,
|
|
"learning_rate": 4.318094371031922e-09,
|
|
"loss": 0.9770829677581787,
|
|
"step": 2408
|
|
},
|
|
{
|
|
"epoch": 2.9667487684729066,
|
|
"grad_norm": 15.370084776473758,
|
|
"learning_rate": 4.025468512333098e-09,
|
|
"loss": 0.4265647530555725,
|
|
"step": 2409
|
|
},
|
|
{
|
|
"epoch": 2.9679802955665027,
|
|
"grad_norm": 12.632393723486729,
|
|
"learning_rate": 3.743104813767051e-09,
|
|
"loss": 0.6890873908996582,
|
|
"step": 2410
|
|
},
|
|
{
|
|
"epoch": 2.9692118226600988,
|
|
"grad_norm": 8.772985107195037,
|
|
"learning_rate": 3.471003855332611e-09,
|
|
"loss": 0.28604504466056824,
|
|
"step": 2411
|
|
},
|
|
{
|
|
"epoch": 2.970443349753695,
|
|
"grad_norm": 9.587235477416659,
|
|
"learning_rate": 3.2091661959487986e-09,
|
|
"loss": 0.3280025124549866,
|
|
"step": 2412
|
|
},
|
|
{
|
|
"epoch": 2.9716748768472905,
|
|
"grad_norm": 9.74052346916064,
|
|
"learning_rate": 2.9575923734520562e-09,
|
|
"loss": 0.23375985026359558,
|
|
"step": 2413
|
|
},
|
|
{
|
|
"epoch": 2.9729064039408866,
|
|
"grad_norm": 14.377712378651319,
|
|
"learning_rate": 2.7162829045979113e-09,
|
|
"loss": 0.5062013864517212,
|
|
"step": 2414
|
|
},
|
|
{
|
|
"epoch": 2.9741379310344827,
|
|
"grad_norm": 10.486023439825937,
|
|
"learning_rate": 2.4852382850554245e-09,
|
|
"loss": 0.46517398953437805,
|
|
"step": 2415
|
|
},
|
|
{
|
|
"epoch": 2.9753694581280787,
|
|
"grad_norm": 7.705201332847603,
|
|
"learning_rate": 2.264458989410523e-09,
|
|
"loss": 0.43281105160713196,
|
|
"step": 2416
|
|
},
|
|
{
|
|
"epoch": 2.976600985221675,
|
|
"grad_norm": 9.481633319521942,
|
|
"learning_rate": 2.0539454711626663e-09,
|
|
"loss": 0.6278485655784607,
|
|
"step": 2417
|
|
},
|
|
{
|
|
"epoch": 2.977832512315271,
|
|
"grad_norm": 12.691647261969463,
|
|
"learning_rate": 1.8536981627254036e-09,
|
|
"loss": 0.3320518136024475,
|
|
"step": 2418
|
|
},
|
|
{
|
|
"epoch": 2.979064039408867,
|
|
"grad_norm": 9.582038617142,
|
|
"learning_rate": 1.6637174754230435e-09,
|
|
"loss": 0.4568738341331482,
|
|
"step": 2419
|
|
},
|
|
{
|
|
"epoch": 2.980295566502463,
|
|
"grad_norm": 10.563009615677867,
|
|
"learning_rate": 1.4840037994923173e-09,
|
|
"loss": 0.24025380611419678,
|
|
"step": 2420
|
|
},
|
|
{
|
|
"epoch": 2.981527093596059,
|
|
"grad_norm": 14.650292148384931,
|
|
"learning_rate": 1.3145575040801605e-09,
|
|
"loss": 0.33217573165893555,
|
|
"step": 2421
|
|
},
|
|
{
|
|
"epoch": 2.9827586206896552,
|
|
"grad_norm": 23.286828169967034,
|
|
"learning_rate": 1.1553789372453771e-09,
|
|
"loss": 1.5295354127883911,
|
|
"step": 2422
|
|
},
|
|
{
|
|
"epoch": 2.9839901477832513,
|
|
"grad_norm": 16.800662700378666,
|
|
"learning_rate": 1.0064684259525337e-09,
|
|
"loss": 0.6207250952720642,
|
|
"step": 2423
|
|
},
|
|
{
|
|
"epoch": 2.9852216748768474,
|
|
"grad_norm": 20.655163645870832,
|
|
"learning_rate": 8.678262760775102e-10,
|
|
"loss": 0.4011062681674957,
|
|
"step": 2424
|
|
},
|
|
{
|
|
"epoch": 2.9864532019704435,
|
|
"grad_norm": 12.812116716093689,
|
|
"learning_rate": 7.394527724030598e-10,
|
|
"loss": 0.8355351090431213,
|
|
"step": 2425
|
|
},
|
|
{
|
|
"epoch": 2.987684729064039,
|
|
"grad_norm": 13.524667045497342,
|
|
"learning_rate": 6.213481786199182e-10,
|
|
"loss": 0.6552157998085022,
|
|
"step": 2426
|
|
},
|
|
{
|
|
"epoch": 2.9889162561576352,
|
|
"grad_norm": 9.071239617590464,
|
|
"learning_rate": 5.13512737324029e-10,
|
|
"loss": 0.4416411519050598,
|
|
"step": 2427
|
|
},
|
|
{
|
|
"epoch": 2.9901477832512313,
|
|
"grad_norm": 12.103653519709662,
|
|
"learning_rate": 4.159466700187631e-10,
|
|
"loss": 0.3720128834247589,
|
|
"step": 2428
|
|
},
|
|
{
|
|
"epoch": 2.9913793103448274,
|
|
"grad_norm": 7.981239501743612,
|
|
"learning_rate": 3.2865017711380955e-10,
|
|
"loss": 0.6710848212242126,
|
|
"step": 2429
|
|
},
|
|
{
|
|
"epoch": 2.9926108374384235,
|
|
"grad_norm": 11.769326063023964,
|
|
"learning_rate": 2.516234379235094e-10,
|
|
"loss": 0.7640970349311829,
|
|
"step": 2430
|
|
},
|
|
{
|
|
"epoch": 2.9938423645320196,
|
|
"grad_norm": 11.664052062324599,
|
|
"learning_rate": 1.848666106674113e-10,
|
|
"loss": 0.5783921480178833,
|
|
"step": 2431
|
|
},
|
|
{
|
|
"epoch": 2.9950738916256157,
|
|
"grad_norm": 11.283478806003906,
|
|
"learning_rate": 1.2837983246916098e-10,
|
|
"loss": 0.411626935005188,
|
|
"step": 2432
|
|
},
|
|
{
|
|
"epoch": 2.9963054187192117,
|
|
"grad_norm": 11.703360380276939,
|
|
"learning_rate": 8.216321935816673e-11,
|
|
"loss": 0.529446005821228,
|
|
"step": 2433
|
|
},
|
|
{
|
|
"epoch": 2.997536945812808,
|
|
"grad_norm": 9.632699414961296,
|
|
"learning_rate": 4.6216866266823867e-11,
|
|
"loss": 0.44549500942230225,
|
|
"step": 2434
|
|
},
|
|
{
|
|
"epoch": 2.998768472906404,
|
|
"grad_norm": 9.699682514575105,
|
|
"learning_rate": 2.0540847032179955e-11,
|
|
"loss": 0.2854122519493103,
|
|
"step": 2435
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"grad_norm": 6.925750902905979,
|
|
"learning_rate": 5.135214394824672e-12,
|
|
"loss": 0.4455873966217041,
|
|
"step": 2436
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"step": 2436,
|
|
"total_flos": 6456127242240.0,
|
|
"train_loss": 1.6602046456561104,
|
|
"train_runtime": 2865.3381,
|
|
"train_samples_per_second": 3.4,
|
|
"train_steps_per_second": 0.85
|
|
}
|
|
],
|
|
"logging_steps": 1,
|
|
"max_steps": 2436,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 3,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 6456127242240.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|