10453 lines
256 KiB
JSON
10453 lines
256 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.9999327369341494,
|
|
"eval_steps": 500,
|
|
"global_step": 7433,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0006726306585054147,
|
|
"grad_norm": 29.793027079973278,
|
|
"learning_rate": 2.016129032258064e-06,
|
|
"loss": 3.0338,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.0013452613170108294,
|
|
"grad_norm": 29.256907968213007,
|
|
"learning_rate": 4.032258064516128e-06,
|
|
"loss": 3.0206,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.002017891975516244,
|
|
"grad_norm": 15.261520394111779,
|
|
"learning_rate": 6.0483870967741925e-06,
|
|
"loss": 2.7838,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.002690522634021659,
|
|
"grad_norm": 9.515391114280666,
|
|
"learning_rate": 8.064516129032257e-06,
|
|
"loss": 2.5087,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.0033631532925270735,
|
|
"grad_norm": 6.1699236436111615,
|
|
"learning_rate": 1.0080645161290321e-05,
|
|
"loss": 2.1504,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.004035783951032488,
|
|
"grad_norm": 3.33300114769491,
|
|
"learning_rate": 1.2096774193548385e-05,
|
|
"loss": 1.7603,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.004708414609537903,
|
|
"grad_norm": 2.380295712226883,
|
|
"learning_rate": 1.4112903225806451e-05,
|
|
"loss": 1.6398,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.005381045268043318,
|
|
"grad_norm": 1.2705897972397782,
|
|
"learning_rate": 1.6129032258064513e-05,
|
|
"loss": 1.4737,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.006053675926548732,
|
|
"grad_norm": 1.0972481996311119,
|
|
"learning_rate": 1.814516129032258e-05,
|
|
"loss": 1.3779,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.006726306585054147,
|
|
"grad_norm": 0.8863957157122089,
|
|
"learning_rate": 2.0161290322580642e-05,
|
|
"loss": 1.2916,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.007398937243559562,
|
|
"grad_norm": 0.7177830723122377,
|
|
"learning_rate": 2.2177419354838708e-05,
|
|
"loss": 1.2852,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.008071567902064977,
|
|
"grad_norm": 0.5895990622701546,
|
|
"learning_rate": 2.419354838709677e-05,
|
|
"loss": 1.2467,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.00874419856057039,
|
|
"grad_norm": 0.6482412600733727,
|
|
"learning_rate": 2.620967741935484e-05,
|
|
"loss": 1.2336,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.009416829219075806,
|
|
"grad_norm": 0.45626769818341484,
|
|
"learning_rate": 2.8225806451612902e-05,
|
|
"loss": 1.1711,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.01008945987758122,
|
|
"grad_norm": 0.4598615730787672,
|
|
"learning_rate": 3.0241935483870964e-05,
|
|
"loss": 1.1733,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.010762090536086635,
|
|
"grad_norm": 0.4275974805901917,
|
|
"learning_rate": 3.225806451612903e-05,
|
|
"loss": 1.1692,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.01143472119459205,
|
|
"grad_norm": 0.2961863467328374,
|
|
"learning_rate": 3.427419354838709e-05,
|
|
"loss": 1.1361,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.012107351853097465,
|
|
"grad_norm": 0.3011832758903724,
|
|
"learning_rate": 3.629032258064516e-05,
|
|
"loss": 1.1497,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.012779982511602879,
|
|
"grad_norm": 0.3437144549576709,
|
|
"learning_rate": 3.8306451612903224e-05,
|
|
"loss": 1.115,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.013452613170108294,
|
|
"grad_norm": 0.38338441022677405,
|
|
"learning_rate": 4.0322580645161284e-05,
|
|
"loss": 1.134,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.014125243828613708,
|
|
"grad_norm": 0.3458095713498744,
|
|
"learning_rate": 4.2338709677419356e-05,
|
|
"loss": 1.0928,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.014797874487119124,
|
|
"grad_norm": 0.213297032520373,
|
|
"learning_rate": 4.4354838709677415e-05,
|
|
"loss": 1.143,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.015470505145624537,
|
|
"grad_norm": 0.15456137343431237,
|
|
"learning_rate": 4.637096774193548e-05,
|
|
"loss": 1.11,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.016143135804129953,
|
|
"grad_norm": 0.17049538367849387,
|
|
"learning_rate": 4.838709677419354e-05,
|
|
"loss": 1.11,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.016815766462635367,
|
|
"grad_norm": 0.16056803823178314,
|
|
"learning_rate": 5.0403225806451606e-05,
|
|
"loss": 1.0855,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.01748839712114078,
|
|
"grad_norm": 0.16118382096312336,
|
|
"learning_rate": 5.241935483870968e-05,
|
|
"loss": 1.0714,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.018161027779646195,
|
|
"grad_norm": 0.1132867285571746,
|
|
"learning_rate": 5.443548387096774e-05,
|
|
"loss": 1.0775,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.018833658438151612,
|
|
"grad_norm": 0.11714635540888474,
|
|
"learning_rate": 5.6451612903225804e-05,
|
|
"loss": 1.1172,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.019506289096657026,
|
|
"grad_norm": 0.14223271120783545,
|
|
"learning_rate": 5.846774193548386e-05,
|
|
"loss": 1.025,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.02017891975516244,
|
|
"grad_norm": 0.11556328672795584,
|
|
"learning_rate": 6.048387096774193e-05,
|
|
"loss": 1.0493,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.020851550413667853,
|
|
"grad_norm": 0.10980588262602826,
|
|
"learning_rate": 6.25e-05,
|
|
"loss": 1.0856,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.02152418107217327,
|
|
"grad_norm": 0.11923628645400541,
|
|
"learning_rate": 6.451612903225805e-05,
|
|
"loss": 1.088,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.022196811730678685,
|
|
"grad_norm": 0.1302600360663136,
|
|
"learning_rate": 6.653225806451612e-05,
|
|
"loss": 1.074,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.0228694423891841,
|
|
"grad_norm": 0.10841395305583203,
|
|
"learning_rate": 6.854838709677419e-05,
|
|
"loss": 1.0825,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.023542073047689512,
|
|
"grad_norm": 0.10691292450010598,
|
|
"learning_rate": 7.056451612903225e-05,
|
|
"loss": 1.0185,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.02421470370619493,
|
|
"grad_norm": 0.10288434679024688,
|
|
"learning_rate": 7.258064516129032e-05,
|
|
"loss": 1.0621,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.024887334364700343,
|
|
"grad_norm": 0.09551744684673671,
|
|
"learning_rate": 7.459677419354838e-05,
|
|
"loss": 1.0671,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.025559965023205757,
|
|
"grad_norm": 0.1023530781591793,
|
|
"learning_rate": 7.661290322580645e-05,
|
|
"loss": 1.0542,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.02623259568171117,
|
|
"grad_norm": 0.10650285325092443,
|
|
"learning_rate": 7.86290322580645e-05,
|
|
"loss": 1.0716,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 0.02690522634021659,
|
|
"grad_norm": 0.09426817941239474,
|
|
"learning_rate": 8.064516129032257e-05,
|
|
"loss": 1.004,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.027577856998722002,
|
|
"grad_norm": 0.09989428312797478,
|
|
"learning_rate": 8.266129032258063e-05,
|
|
"loss": 1.0739,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 0.028250487657227416,
|
|
"grad_norm": 0.10006362944609468,
|
|
"learning_rate": 8.467741935483871e-05,
|
|
"loss": 1.0905,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.02892311831573283,
|
|
"grad_norm": 0.10481768262243495,
|
|
"learning_rate": 8.669354838709678e-05,
|
|
"loss": 1.0209,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 0.029595748974238247,
|
|
"grad_norm": 0.09265084528231703,
|
|
"learning_rate": 8.870967741935483e-05,
|
|
"loss": 1.0309,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.03026837963274366,
|
|
"grad_norm": 0.08616737627661364,
|
|
"learning_rate": 9.07258064516129e-05,
|
|
"loss": 1.0233,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.030941010291249075,
|
|
"grad_norm": 0.09416043226383441,
|
|
"learning_rate": 9.274193548387096e-05,
|
|
"loss": 1.0251,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.03161364094975449,
|
|
"grad_norm": 0.10034605553663441,
|
|
"learning_rate": 9.475806451612903e-05,
|
|
"loss": 1.0013,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 0.032286271608259906,
|
|
"grad_norm": 0.0959718573339108,
|
|
"learning_rate": 9.677419354838708e-05,
|
|
"loss": 1.0215,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.032958902266765316,
|
|
"grad_norm": 0.10210970867228082,
|
|
"learning_rate": 9.879032258064515e-05,
|
|
"loss": 1.0518,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 0.033631532925270734,
|
|
"grad_norm": 0.09862230935521245,
|
|
"learning_rate": 0.00010080645161290321,
|
|
"loss": 1.0957,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.03430416358377615,
|
|
"grad_norm": 0.10083093939252007,
|
|
"learning_rate": 0.00010282258064516128,
|
|
"loss": 1.0558,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 0.03497679424228156,
|
|
"grad_norm": 0.09415129429211958,
|
|
"learning_rate": 0.00010483870967741936,
|
|
"loss": 1.0521,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.03564942490078698,
|
|
"grad_norm": 0.11114031366094976,
|
|
"learning_rate": 0.00010685483870967741,
|
|
"loss": 1.0847,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 0.03632205555929239,
|
|
"grad_norm": 0.09713797329341636,
|
|
"learning_rate": 0.00010887096774193548,
|
|
"loss": 1.0728,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.036994686217797806,
|
|
"grad_norm": 0.09788369320757713,
|
|
"learning_rate": 0.00011088709677419354,
|
|
"loss": 1.0346,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.037667316876303224,
|
|
"grad_norm": 0.1006296151589254,
|
|
"learning_rate": 0.00011290322580645161,
|
|
"loss": 1.0475,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.038339947534808634,
|
|
"grad_norm": 0.09942300500167914,
|
|
"learning_rate": 0.00011491935483870966,
|
|
"loss": 1.0172,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 0.03901257819331405,
|
|
"grad_norm": 0.0934491829625178,
|
|
"learning_rate": 0.00011693548387096773,
|
|
"loss": 1.0506,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.03968520885181947,
|
|
"grad_norm": 0.09551251502862168,
|
|
"learning_rate": 0.00011895161290322579,
|
|
"loss": 1.0173,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 0.04035783951032488,
|
|
"grad_norm": 0.09171601005317345,
|
|
"learning_rate": 0.00012096774193548386,
|
|
"loss": 1.0274,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.041030470168830296,
|
|
"grad_norm": 0.09273487710305259,
|
|
"learning_rate": 0.00012298387096774192,
|
|
"loss": 0.995,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 0.04170310082733571,
|
|
"grad_norm": 0.09862330057801429,
|
|
"learning_rate": 0.000125,
|
|
"loss": 1.0348,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.042375731485841124,
|
|
"grad_norm": 0.09362404636564037,
|
|
"learning_rate": 0.00012701612903225805,
|
|
"loss": 1.0167,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 0.04304836214434654,
|
|
"grad_norm": 0.09329273833910302,
|
|
"learning_rate": 0.0001290322580645161,
|
|
"loss": 1.0835,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.04372099280285195,
|
|
"grad_norm": 0.09298449638448188,
|
|
"learning_rate": 0.0001310483870967742,
|
|
"loss": 1.0176,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 0.04439362346135737,
|
|
"grad_norm": 0.09347384249973195,
|
|
"learning_rate": 0.00013306451612903224,
|
|
"loss": 0.9619,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.045066254119862786,
|
|
"grad_norm": 0.13463693712447972,
|
|
"learning_rate": 0.00013508064516129032,
|
|
"loss": 0.9868,
|
|
"step": 335
|
|
},
|
|
{
|
|
"epoch": 0.0457388847783682,
|
|
"grad_norm": 0.09815201596450862,
|
|
"learning_rate": 0.00013709677419354837,
|
|
"loss": 1.0736,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.046411515436873614,
|
|
"grad_norm": 0.09232625821163153,
|
|
"learning_rate": 0.00013911290322580642,
|
|
"loss": 1.0299,
|
|
"step": 345
|
|
},
|
|
{
|
|
"epoch": 0.047084146095379024,
|
|
"grad_norm": 0.09070277309304493,
|
|
"learning_rate": 0.0001411290322580645,
|
|
"loss": 1.0336,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.04775677675388444,
|
|
"grad_norm": 0.10932321230103388,
|
|
"learning_rate": 0.00014314516129032258,
|
|
"loss": 1.0073,
|
|
"step": 355
|
|
},
|
|
{
|
|
"epoch": 0.04842940741238986,
|
|
"grad_norm": 0.09657601874101734,
|
|
"learning_rate": 0.00014516129032258063,
|
|
"loss": 1.0251,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.04910203807089527,
|
|
"grad_norm": 0.09877927764481637,
|
|
"learning_rate": 0.0001471774193548387,
|
|
"loss": 1.0507,
|
|
"step": 365
|
|
},
|
|
{
|
|
"epoch": 0.04977466872940069,
|
|
"grad_norm": 0.09274278341834487,
|
|
"learning_rate": 0.00014919354838709677,
|
|
"loss": 1.0534,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.050447299387906104,
|
|
"grad_norm": 0.09198545780463178,
|
|
"learning_rate": 0.00015120967741935482,
|
|
"loss": 1.0185,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 0.051119930046411514,
|
|
"grad_norm": 0.08891910715711743,
|
|
"learning_rate": 0.0001532258064516129,
|
|
"loss": 1.0039,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.05179256070491693,
|
|
"grad_norm": 0.09078315297270034,
|
|
"learning_rate": 0.00015524193548387095,
|
|
"loss": 1.0444,
|
|
"step": 385
|
|
},
|
|
{
|
|
"epoch": 0.05246519136342234,
|
|
"grad_norm": 0.08977914921659898,
|
|
"learning_rate": 0.000157258064516129,
|
|
"loss": 1.0976,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.05313782202192776,
|
|
"grad_norm": 0.09433437227793554,
|
|
"learning_rate": 0.00015927419354838708,
|
|
"loss": 1.0502,
|
|
"step": 395
|
|
},
|
|
{
|
|
"epoch": 0.05381045268043318,
|
|
"grad_norm": 0.08762290810061227,
|
|
"learning_rate": 0.00016129032258064513,
|
|
"loss": 1.0349,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.05448308333893859,
|
|
"grad_norm": 0.0933428344700274,
|
|
"learning_rate": 0.00016330645161290319,
|
|
"loss": 1.0371,
|
|
"step": 405
|
|
},
|
|
{
|
|
"epoch": 0.055155713997444004,
|
|
"grad_norm": 0.09147179314981382,
|
|
"learning_rate": 0.00016532258064516127,
|
|
"loss": 1.0665,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.055828344655949415,
|
|
"grad_norm": 0.09293037669639792,
|
|
"learning_rate": 0.00016733870967741932,
|
|
"loss": 1.0305,
|
|
"step": 415
|
|
},
|
|
{
|
|
"epoch": 0.05650097531445483,
|
|
"grad_norm": 0.09036496873154756,
|
|
"learning_rate": 0.00016935483870967742,
|
|
"loss": 1.0543,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.05717360597296025,
|
|
"grad_norm": 0.09167654419704568,
|
|
"learning_rate": 0.00017137096774193548,
|
|
"loss": 1.0301,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 0.05784623663146566,
|
|
"grad_norm": 0.09209240875308733,
|
|
"learning_rate": 0.00017338709677419356,
|
|
"loss": 1.0729,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.05851886728997108,
|
|
"grad_norm": 0.135579302744148,
|
|
"learning_rate": 0.0001754032258064516,
|
|
"loss": 1.0798,
|
|
"step": 435
|
|
},
|
|
{
|
|
"epoch": 0.059191497948476494,
|
|
"grad_norm": 0.09653916343025865,
|
|
"learning_rate": 0.00017741935483870966,
|
|
"loss": 1.0937,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.059864128606981905,
|
|
"grad_norm": 0.09958629951032674,
|
|
"learning_rate": 0.00017943548387096774,
|
|
"loss": 1.078,
|
|
"step": 445
|
|
},
|
|
{
|
|
"epoch": 0.06053675926548732,
|
|
"grad_norm": 0.08629494311396081,
|
|
"learning_rate": 0.0001814516129032258,
|
|
"loss": 1.0798,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.06120938992399273,
|
|
"grad_norm": 0.09869583205407412,
|
|
"learning_rate": 0.00018346774193548385,
|
|
"loss": 1.0374,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 0.06188202058249815,
|
|
"grad_norm": 0.09053280646063235,
|
|
"learning_rate": 0.00018548387096774192,
|
|
"loss": 1.0505,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.06255465124100357,
|
|
"grad_norm": 0.08150427237605223,
|
|
"learning_rate": 0.00018749999999999998,
|
|
"loss": 1.0606,
|
|
"step": 465
|
|
},
|
|
{
|
|
"epoch": 0.06322728189950898,
|
|
"grad_norm": 0.0901461935028475,
|
|
"learning_rate": 0.00018951612903225806,
|
|
"loss": 1.042,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.06389991255801439,
|
|
"grad_norm": 0.08190865253332126,
|
|
"learning_rate": 0.0001915322580645161,
|
|
"loss": 1.0209,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 0.06457254321651981,
|
|
"grad_norm": 0.08607633580363463,
|
|
"learning_rate": 0.00019354838709677416,
|
|
"loss": 0.9894,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.06524517387502522,
|
|
"grad_norm": 0.09241895549323972,
|
|
"learning_rate": 0.00019556451612903224,
|
|
"loss": 1.0602,
|
|
"step": 485
|
|
},
|
|
{
|
|
"epoch": 0.06591780453353063,
|
|
"grad_norm": 0.08596935339404507,
|
|
"learning_rate": 0.0001975806451612903,
|
|
"loss": 1.0719,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.06659043519203606,
|
|
"grad_norm": 0.08488359435021268,
|
|
"learning_rate": 0.00019959677419354837,
|
|
"loss": 1.0252,
|
|
"step": 495
|
|
},
|
|
{
|
|
"epoch": 0.06726306585054147,
|
|
"grad_norm": 0.09196056231065085,
|
|
"learning_rate": 0.00020161290322580642,
|
|
"loss": 1.0454,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.06793569650904688,
|
|
"grad_norm": 0.11419199766934089,
|
|
"learning_rate": 0.00020362903225806448,
|
|
"loss": 1.049,
|
|
"step": 505
|
|
},
|
|
{
|
|
"epoch": 0.0686083271675523,
|
|
"grad_norm": 0.08906626167489842,
|
|
"learning_rate": 0.00020564516129032256,
|
|
"loss": 1.0105,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.06928095782605771,
|
|
"grad_norm": 0.08517561915403676,
|
|
"learning_rate": 0.00020766129032258064,
|
|
"loss": 1.0855,
|
|
"step": 515
|
|
},
|
|
{
|
|
"epoch": 0.06995358848456312,
|
|
"grad_norm": 0.0903550456898868,
|
|
"learning_rate": 0.00020967741935483871,
|
|
"loss": 1.0575,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.07062621914306855,
|
|
"grad_norm": 0.08278762418068097,
|
|
"learning_rate": 0.00021169354838709677,
|
|
"loss": 1.0535,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 0.07129884980157396,
|
|
"grad_norm": 0.08481194574291347,
|
|
"learning_rate": 0.00021370967741935482,
|
|
"loss": 1.0293,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.07197148046007937,
|
|
"grad_norm": 0.08984307347575393,
|
|
"learning_rate": 0.0002157258064516129,
|
|
"loss": 1.0219,
|
|
"step": 535
|
|
},
|
|
{
|
|
"epoch": 0.07264411111858478,
|
|
"grad_norm": 0.09076249423258674,
|
|
"learning_rate": 0.00021774193548387095,
|
|
"loss": 1.0587,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.0733167417770902,
|
|
"grad_norm": 0.1002173498829235,
|
|
"learning_rate": 0.00021975806451612903,
|
|
"loss": 1.037,
|
|
"step": 545
|
|
},
|
|
{
|
|
"epoch": 0.07398937243559561,
|
|
"grad_norm": 0.08872604532857477,
|
|
"learning_rate": 0.00022177419354838708,
|
|
"loss": 1.0333,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.07466200309410102,
|
|
"grad_norm": 0.08375526502846077,
|
|
"learning_rate": 0.00022379032258064514,
|
|
"loss": 1.059,
|
|
"step": 555
|
|
},
|
|
{
|
|
"epoch": 0.07533463375260645,
|
|
"grad_norm": 0.09978318424394747,
|
|
"learning_rate": 0.00022580645161290321,
|
|
"loss": 1.0845,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.07600726441111186,
|
|
"grad_norm": 0.07782302685062692,
|
|
"learning_rate": 0.00022782258064516127,
|
|
"loss": 0.9958,
|
|
"step": 565
|
|
},
|
|
{
|
|
"epoch": 0.07667989506961727,
|
|
"grad_norm": 0.0856006706991171,
|
|
"learning_rate": 0.00022983870967741932,
|
|
"loss": 1.0523,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.07735252572812269,
|
|
"grad_norm": 0.08571466890563395,
|
|
"learning_rate": 0.0002318548387096774,
|
|
"loss": 1.1096,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 0.0780251563866281,
|
|
"grad_norm": 0.08709457854528387,
|
|
"learning_rate": 0.00023387096774193545,
|
|
"loss": 1.0607,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.07869778704513351,
|
|
"grad_norm": 0.09018186960178792,
|
|
"learning_rate": 0.00023588709677419353,
|
|
"loss": 1.0724,
|
|
"step": 585
|
|
},
|
|
{
|
|
"epoch": 0.07937041770363894,
|
|
"grad_norm": 0.08199549738270526,
|
|
"learning_rate": 0.00023790322580645158,
|
|
"loss": 1.0743,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.08004304836214435,
|
|
"grad_norm": 0.080357031829569,
|
|
"learning_rate": 0.00023991935483870964,
|
|
"loss": 1.0353,
|
|
"step": 595
|
|
},
|
|
{
|
|
"epoch": 0.08071567902064976,
|
|
"grad_norm": 0.07898024579852181,
|
|
"learning_rate": 0.00024193548387096771,
|
|
"loss": 1.0322,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.08138830967915518,
|
|
"grad_norm": 0.08594691467762515,
|
|
"learning_rate": 0.0002439516129032258,
|
|
"loss": 1.026,
|
|
"step": 605
|
|
},
|
|
{
|
|
"epoch": 0.08206094033766059,
|
|
"grad_norm": 0.07960732979223949,
|
|
"learning_rate": 0.00024596774193548385,
|
|
"loss": 1.0701,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.082733570996166,
|
|
"grad_norm": 0.07748692647646714,
|
|
"learning_rate": 0.00024798387096774195,
|
|
"loss": 1.0279,
|
|
"step": 615
|
|
},
|
|
{
|
|
"epoch": 0.08340620165467141,
|
|
"grad_norm": 0.09702102839233316,
|
|
"learning_rate": 0.00025,
|
|
"loss": 1.0221,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.08407883231317684,
|
|
"grad_norm": 0.08104610825395912,
|
|
"learning_rate": 0.00025201612903225806,
|
|
"loss": 1.0486,
|
|
"step": 625
|
|
},
|
|
{
|
|
"epoch": 0.08475146297168225,
|
|
"grad_norm": 0.08063025008619103,
|
|
"learning_rate": 0.0002540322580645161,
|
|
"loss": 1.0459,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.08542409363018766,
|
|
"grad_norm": 0.08087778063873288,
|
|
"learning_rate": 0.00025604838709677416,
|
|
"loss": 1.0908,
|
|
"step": 635
|
|
},
|
|
{
|
|
"epoch": 0.08609672428869308,
|
|
"grad_norm": 0.07888634968036065,
|
|
"learning_rate": 0.0002580645161290322,
|
|
"loss": 1.0408,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.0867693549471985,
|
|
"grad_norm": 0.08297792160081463,
|
|
"learning_rate": 0.0002600806451612903,
|
|
"loss": 1.1192,
|
|
"step": 645
|
|
},
|
|
{
|
|
"epoch": 0.0874419856057039,
|
|
"grad_norm": 0.08286481962331686,
|
|
"learning_rate": 0.0002620967741935484,
|
|
"loss": 1.0719,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.08811461626420933,
|
|
"grad_norm": 0.07572488265547365,
|
|
"learning_rate": 0.0002641129032258064,
|
|
"loss": 1.0561,
|
|
"step": 655
|
|
},
|
|
{
|
|
"epoch": 0.08878724692271474,
|
|
"grad_norm": 0.08337241862121214,
|
|
"learning_rate": 0.0002661290322580645,
|
|
"loss": 1.1061,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.08945987758122015,
|
|
"grad_norm": 0.07959536841791225,
|
|
"learning_rate": 0.00026814516129032253,
|
|
"loss": 1.0959,
|
|
"step": 665
|
|
},
|
|
{
|
|
"epoch": 0.09013250823972557,
|
|
"grad_norm": 0.07879200275084125,
|
|
"learning_rate": 0.00027016129032258064,
|
|
"loss": 1.0907,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 0.09080513889823098,
|
|
"grad_norm": 0.08437472314091585,
|
|
"learning_rate": 0.0002721774193548387,
|
|
"loss": 1.0902,
|
|
"step": 675
|
|
},
|
|
{
|
|
"epoch": 0.0914777695567364,
|
|
"grad_norm": 0.07936113375420903,
|
|
"learning_rate": 0.00027419354838709674,
|
|
"loss": 1.0857,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 0.0921504002152418,
|
|
"grad_norm": 0.07404043199519923,
|
|
"learning_rate": 0.0002762096774193548,
|
|
"loss": 1.022,
|
|
"step": 685
|
|
},
|
|
{
|
|
"epoch": 0.09282303087374723,
|
|
"grad_norm": 0.07966312939128797,
|
|
"learning_rate": 0.00027822580645161285,
|
|
"loss": 1.0562,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 0.09349566153225264,
|
|
"grad_norm": 0.07988901185413153,
|
|
"learning_rate": 0.00028024193548387095,
|
|
"loss": 1.035,
|
|
"step": 695
|
|
},
|
|
{
|
|
"epoch": 0.09416829219075805,
|
|
"grad_norm": 0.0753613113842339,
|
|
"learning_rate": 0.000282258064516129,
|
|
"loss": 1.0645,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.09484092284926347,
|
|
"grad_norm": 0.07626937908013373,
|
|
"learning_rate": 0.0002842741935483871,
|
|
"loss": 1.0377,
|
|
"step": 705
|
|
},
|
|
{
|
|
"epoch": 0.09551355350776888,
|
|
"grad_norm": 0.09235167533834955,
|
|
"learning_rate": 0.00028629032258064516,
|
|
"loss": 1.0979,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 0.0961861841662743,
|
|
"grad_norm": 0.08055166929289474,
|
|
"learning_rate": 0.0002883064516129032,
|
|
"loss": 1.0852,
|
|
"step": 715
|
|
},
|
|
{
|
|
"epoch": 0.09685881482477972,
|
|
"grad_norm": 0.08429213138249105,
|
|
"learning_rate": 0.00029032258064516127,
|
|
"loss": 1.0447,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 0.09753144548328513,
|
|
"grad_norm": 0.08437607034953815,
|
|
"learning_rate": 0.0002923387096774193,
|
|
"loss": 1.0558,
|
|
"step": 725
|
|
},
|
|
{
|
|
"epoch": 0.09820407614179054,
|
|
"grad_norm": 0.07492047494320839,
|
|
"learning_rate": 0.0002943548387096774,
|
|
"loss": 1.0291,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 0.09887670680029596,
|
|
"grad_norm": 0.0787931854781473,
|
|
"learning_rate": 0.0002963709677419355,
|
|
"loss": 1.0582,
|
|
"step": 735
|
|
},
|
|
{
|
|
"epoch": 0.09954933745880137,
|
|
"grad_norm": 0.07645123746511935,
|
|
"learning_rate": 0.00029838709677419353,
|
|
"loss": 1.0174,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 0.10022196811730678,
|
|
"grad_norm": 0.07409129763787864,
|
|
"learning_rate": 0.00029999998345607283,
|
|
"loss": 1.079,
|
|
"step": 745
|
|
},
|
|
{
|
|
"epoch": 0.10089459877581221,
|
|
"grad_norm": 0.08041650749931936,
|
|
"learning_rate": 0.00029999940441900595,
|
|
"loss": 1.0439,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 0.10156722943431762,
|
|
"grad_norm": 0.08027960443856536,
|
|
"learning_rate": 0.0002999979981892312,
|
|
"loss": 1.047,
|
|
"step": 755
|
|
},
|
|
{
|
|
"epoch": 0.10223986009282303,
|
|
"grad_norm": 0.07906827754778097,
|
|
"learning_rate": 0.0002999957647745034,
|
|
"loss": 1.0961,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 0.10291249075132844,
|
|
"grad_norm": 0.071215050684659,
|
|
"learning_rate": 0.00029999270418713906,
|
|
"loss": 1.0405,
|
|
"step": 765
|
|
},
|
|
{
|
|
"epoch": 0.10358512140983386,
|
|
"grad_norm": 0.0829151410610896,
|
|
"learning_rate": 0.00029998881644401624,
|
|
"loss": 1.0923,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 0.10425775206833927,
|
|
"grad_norm": 0.07164078011185154,
|
|
"learning_rate": 0.00029998410156657437,
|
|
"loss": 1.0274,
|
|
"step": 775
|
|
},
|
|
{
|
|
"epoch": 0.10493038272684468,
|
|
"grad_norm": 0.07588880429132575,
|
|
"learning_rate": 0.0002999785595808143,
|
|
"loss": 1.0823,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 0.10560301338535011,
|
|
"grad_norm": 0.079369424589866,
|
|
"learning_rate": 0.0002999721905172982,
|
|
"loss": 1.0376,
|
|
"step": 785
|
|
},
|
|
{
|
|
"epoch": 0.10627564404385552,
|
|
"grad_norm": 0.07662095383626488,
|
|
"learning_rate": 0.0002999649944111491,
|
|
"loss": 1.0539,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 0.10694827470236093,
|
|
"grad_norm": 0.07486801172848384,
|
|
"learning_rate": 0.0002999569713020509,
|
|
"loss": 1.0783,
|
|
"step": 795
|
|
},
|
|
{
|
|
"epoch": 0.10762090536086635,
|
|
"grad_norm": 0.0766666428812244,
|
|
"learning_rate": 0.00029994812123424814,
|
|
"loss": 1.0635,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.10829353601937176,
|
|
"grad_norm": 0.0754261513974334,
|
|
"learning_rate": 0.00029993844425654586,
|
|
"loss": 1.0546,
|
|
"step": 805
|
|
},
|
|
{
|
|
"epoch": 0.10896616667787717,
|
|
"grad_norm": 0.07071826590853397,
|
|
"learning_rate": 0.000299927940422309,
|
|
"loss": 1.1055,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 0.1096387973363826,
|
|
"grad_norm": 0.07693148525549479,
|
|
"learning_rate": 0.0002999166097894625,
|
|
"loss": 1.0925,
|
|
"step": 815
|
|
},
|
|
{
|
|
"epoch": 0.11031142799488801,
|
|
"grad_norm": 0.0723848682308289,
|
|
"learning_rate": 0.0002999044524204907,
|
|
"loss": 1.0711,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 0.11098405865339342,
|
|
"grad_norm": 0.07549989385866508,
|
|
"learning_rate": 0.0002998914683824371,
|
|
"loss": 1.0759,
|
|
"step": 825
|
|
},
|
|
{
|
|
"epoch": 0.11165668931189883,
|
|
"grad_norm": 0.08028495278710619,
|
|
"learning_rate": 0.00029987765774690397,
|
|
"loss": 1.0693,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 0.11232931997040425,
|
|
"grad_norm": 0.06771708980583116,
|
|
"learning_rate": 0.00029986302059005206,
|
|
"loss": 1.0791,
|
|
"step": 835
|
|
},
|
|
{
|
|
"epoch": 0.11300195062890966,
|
|
"grad_norm": 0.07284561060447658,
|
|
"learning_rate": 0.00029984755699259994,
|
|
"loss": 1.0383,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 0.11367458128741507,
|
|
"grad_norm": 0.07793144682196305,
|
|
"learning_rate": 0.00029983126703982387,
|
|
"loss": 1.0343,
|
|
"step": 845
|
|
},
|
|
{
|
|
"epoch": 0.1143472119459205,
|
|
"grad_norm": 0.0773418539344277,
|
|
"learning_rate": 0.000299814150821557,
|
|
"loss": 1.0565,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 0.11501984260442591,
|
|
"grad_norm": 0.07164210767629635,
|
|
"learning_rate": 0.00029979620843218917,
|
|
"loss": 1.0236,
|
|
"step": 855
|
|
},
|
|
{
|
|
"epoch": 0.11569247326293132,
|
|
"grad_norm": 0.07393499209078849,
|
|
"learning_rate": 0.0002997774399706661,
|
|
"loss": 1.0753,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 0.11636510392143674,
|
|
"grad_norm": 0.07288026009996475,
|
|
"learning_rate": 0.0002997578455404892,
|
|
"loss": 1.0438,
|
|
"step": 865
|
|
},
|
|
{
|
|
"epoch": 0.11703773457994215,
|
|
"grad_norm": 0.0954759009693307,
|
|
"learning_rate": 0.0002997374252497147,
|
|
"loss": 1.0868,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 0.11771036523844756,
|
|
"grad_norm": 0.07327833047011302,
|
|
"learning_rate": 0.00029971617921095305,
|
|
"loss": 1.0715,
|
|
"step": 875
|
|
},
|
|
{
|
|
"epoch": 0.11838299589695299,
|
|
"grad_norm": 0.07131786035588028,
|
|
"learning_rate": 0.0002996941075413686,
|
|
"loss": 1.0803,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 0.1190556265554584,
|
|
"grad_norm": 0.08956736884401889,
|
|
"learning_rate": 0.0002996712103626786,
|
|
"loss": 1.0627,
|
|
"step": 885
|
|
},
|
|
{
|
|
"epoch": 0.11972825721396381,
|
|
"grad_norm": 0.07934523762735987,
|
|
"learning_rate": 0.0002996474878011529,
|
|
"loss": 1.0543,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 0.12040088787246923,
|
|
"grad_norm": 0.09002705521503765,
|
|
"learning_rate": 0.00029962293998761263,
|
|
"loss": 1.0609,
|
|
"step": 895
|
|
},
|
|
{
|
|
"epoch": 0.12107351853097464,
|
|
"grad_norm": 0.06925053859969088,
|
|
"learning_rate": 0.0002995975670574303,
|
|
"loss": 1.0624,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 0.12174614918948005,
|
|
"grad_norm": 0.07512999142720386,
|
|
"learning_rate": 0.00029957136915052845,
|
|
"loss": 1.0619,
|
|
"step": 905
|
|
},
|
|
{
|
|
"epoch": 0.12241877984798546,
|
|
"grad_norm": 0.0715274327128957,
|
|
"learning_rate": 0.0002995443464113791,
|
|
"loss": 1.0458,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 0.12309141050649089,
|
|
"grad_norm": 0.07036352659794959,
|
|
"learning_rate": 0.000299516498989003,
|
|
"loss": 1.0875,
|
|
"step": 915
|
|
},
|
|
{
|
|
"epoch": 0.1237640411649963,
|
|
"grad_norm": 0.07413577353443873,
|
|
"learning_rate": 0.0002994878270369685,
|
|
"loss": 1.0649,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 0.12443667182350171,
|
|
"grad_norm": 0.07516574985840074,
|
|
"learning_rate": 0.0002994583307133913,
|
|
"loss": 1.0298,
|
|
"step": 925
|
|
},
|
|
{
|
|
"epoch": 0.12510930248200713,
|
|
"grad_norm": 0.07127442173766656,
|
|
"learning_rate": 0.00029942801018093283,
|
|
"loss": 1.0719,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 0.12578193314051253,
|
|
"grad_norm": 0.07197516123092482,
|
|
"learning_rate": 0.0002993968656068,
|
|
"loss": 1.0403,
|
|
"step": 935
|
|
},
|
|
{
|
|
"epoch": 0.12645456379901795,
|
|
"grad_norm": 0.07093702068308229,
|
|
"learning_rate": 0.0002993648971627438,
|
|
"loss": 1.0304,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 0.12712719445752338,
|
|
"grad_norm": 0.0718158231945994,
|
|
"learning_rate": 0.00029933210502505893,
|
|
"loss": 1.0646,
|
|
"step": 945
|
|
},
|
|
{
|
|
"epoch": 0.12779982511602878,
|
|
"grad_norm": 0.07700946884853352,
|
|
"learning_rate": 0.00029929848937458196,
|
|
"loss": 1.051,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 0.1284724557745342,
|
|
"grad_norm": 0.37155780391202864,
|
|
"learning_rate": 0.0002992640503966913,
|
|
"loss": 1.0834,
|
|
"step": 955
|
|
},
|
|
{
|
|
"epoch": 0.12914508643303962,
|
|
"grad_norm": 0.20536638900723664,
|
|
"learning_rate": 0.0002992287882813053,
|
|
"loss": 1.1027,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 0.12981771709154502,
|
|
"grad_norm": 0.08841263116800245,
|
|
"learning_rate": 0.00029919270322288215,
|
|
"loss": 1.0939,
|
|
"step": 965
|
|
},
|
|
{
|
|
"epoch": 0.13049034775005044,
|
|
"grad_norm": 0.6429288189128298,
|
|
"learning_rate": 0.00029915579542041763,
|
|
"loss": 1.0454,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 0.13116297840855587,
|
|
"grad_norm": 0.07893043114765416,
|
|
"learning_rate": 0.00029911806507744513,
|
|
"loss": 1.0522,
|
|
"step": 975
|
|
},
|
|
{
|
|
"epoch": 0.13183560906706127,
|
|
"grad_norm": 0.08700899117062741,
|
|
"learning_rate": 0.0002990795124020339,
|
|
"loss": 1.0802,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 0.1325082397255667,
|
|
"grad_norm": 0.40838005866687616,
|
|
"learning_rate": 0.0002990401376067881,
|
|
"loss": 1.5534,
|
|
"step": 985
|
|
},
|
|
{
|
|
"epoch": 0.13318087038407211,
|
|
"grad_norm": 0.10787306069567282,
|
|
"learning_rate": 0.00029899994090884545,
|
|
"loss": 1.1244,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 0.1338535010425775,
|
|
"grad_norm": 4.0271804629424635,
|
|
"learning_rate": 0.0002989589225298763,
|
|
"loss": 1.0778,
|
|
"step": 995
|
|
},
|
|
{
|
|
"epoch": 0.13452613170108293,
|
|
"grad_norm": 0.1798230372096084,
|
|
"learning_rate": 0.00029891708269608235,
|
|
"loss": 1.0863,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.13519876235958836,
|
|
"grad_norm": 0.0918137691588807,
|
|
"learning_rate": 0.00029887442163819503,
|
|
"loss": 1.1321,
|
|
"step": 1005
|
|
},
|
|
{
|
|
"epoch": 0.13587139301809376,
|
|
"grad_norm": 0.2945178807632444,
|
|
"learning_rate": 0.0002988309395914749,
|
|
"loss": 1.115,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 0.13654402367659918,
|
|
"grad_norm": 0.5933305026235249,
|
|
"learning_rate": 0.00029878663679570964,
|
|
"loss": 1.0957,
|
|
"step": 1015
|
|
},
|
|
{
|
|
"epoch": 0.1372166543351046,
|
|
"grad_norm": 0.17535925174542233,
|
|
"learning_rate": 0.00029874151349521336,
|
|
"loss": 1.0946,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 0.13788928499361,
|
|
"grad_norm": 0.11591544009308001,
|
|
"learning_rate": 0.0002986955699388247,
|
|
"loss": 1.1157,
|
|
"step": 1025
|
|
},
|
|
{
|
|
"epoch": 0.13856191565211542,
|
|
"grad_norm": 0.0927251025295262,
|
|
"learning_rate": 0.0002986488063799058,
|
|
"loss": 1.091,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 0.13923454631062085,
|
|
"grad_norm": 0.07932226343469499,
|
|
"learning_rate": 0.000298601223076341,
|
|
"loss": 1.0534,
|
|
"step": 1035
|
|
},
|
|
{
|
|
"epoch": 0.13990717696912625,
|
|
"grad_norm": 0.0807602717471893,
|
|
"learning_rate": 0.000298552820290535,
|
|
"loss": 1.0309,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 0.14057980762763167,
|
|
"grad_norm": 0.09433877161816717,
|
|
"learning_rate": 0.00029850359828941176,
|
|
"loss": 1.0476,
|
|
"step": 1045
|
|
},
|
|
{
|
|
"epoch": 0.1412524382861371,
|
|
"grad_norm": 0.7121539088215401,
|
|
"learning_rate": 0.0002984535573444129,
|
|
"loss": 1.0698,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 0.1419250689446425,
|
|
"grad_norm": 0.08321713836368394,
|
|
"learning_rate": 0.00029840269773149614,
|
|
"loss": 1.0677,
|
|
"step": 1055
|
|
},
|
|
{
|
|
"epoch": 0.14259769960314791,
|
|
"grad_norm": 0.09079845364417001,
|
|
"learning_rate": 0.00029835101973113397,
|
|
"loss": 1.058,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 0.14327033026165334,
|
|
"grad_norm": 0.0707607351743187,
|
|
"learning_rate": 0.000298298523628312,
|
|
"loss": 1.0503,
|
|
"step": 1065
|
|
},
|
|
{
|
|
"epoch": 0.14394296092015874,
|
|
"grad_norm": 0.14627547315644027,
|
|
"learning_rate": 0.0002982452097125273,
|
|
"loss": 1.045,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 0.14461559157866416,
|
|
"grad_norm": 0.07536850562776565,
|
|
"learning_rate": 0.0002981910782777869,
|
|
"loss": 1.0376,
|
|
"step": 1075
|
|
},
|
|
{
|
|
"epoch": 0.14528822223716956,
|
|
"grad_norm": 0.06861636061492025,
|
|
"learning_rate": 0.0002981361296226063,
|
|
"loss": 1.0692,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 0.14596085289567498,
|
|
"grad_norm": 0.0709747455258586,
|
|
"learning_rate": 0.0002980803640500073,
|
|
"loss": 1.0789,
|
|
"step": 1085
|
|
},
|
|
{
|
|
"epoch": 0.1466334835541804,
|
|
"grad_norm": 0.07359845843812414,
|
|
"learning_rate": 0.0002980237818675172,
|
|
"loss": 1.0958,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"epoch": 0.1473061142126858,
|
|
"grad_norm": 0.06901465022817592,
|
|
"learning_rate": 0.00029796638338716624,
|
|
"loss": 1.024,
|
|
"step": 1095
|
|
},
|
|
{
|
|
"epoch": 0.14797874487119123,
|
|
"grad_norm": 0.06673500731770182,
|
|
"learning_rate": 0.00029790816892548644,
|
|
"loss": 1.0705,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 0.14865137552969665,
|
|
"grad_norm": 0.07469404586580403,
|
|
"learning_rate": 0.00029784913880350947,
|
|
"loss": 1.0511,
|
|
"step": 1105
|
|
},
|
|
{
|
|
"epoch": 0.14932400618820205,
|
|
"grad_norm": 0.07010042708592439,
|
|
"learning_rate": 0.00029778929334676535,
|
|
"loss": 1.0391,
|
|
"step": 1110
|
|
},
|
|
{
|
|
"epoch": 0.14999663684670747,
|
|
"grad_norm": 0.0686148252448679,
|
|
"learning_rate": 0.0002977286328852802,
|
|
"loss": 1.0535,
|
|
"step": 1115
|
|
},
|
|
{
|
|
"epoch": 0.1506692675052129,
|
|
"grad_norm": 0.07242929663095796,
|
|
"learning_rate": 0.00029766715775357447,
|
|
"loss": 0.9996,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 0.1513418981637183,
|
|
"grad_norm": 0.07237289997413665,
|
|
"learning_rate": 0.00029760486829066157,
|
|
"loss": 1.0542,
|
|
"step": 1125
|
|
},
|
|
{
|
|
"epoch": 0.15201452882222372,
|
|
"grad_norm": 0.06706892168069356,
|
|
"learning_rate": 0.00029754176484004537,
|
|
"loss": 1.0491,
|
|
"step": 1130
|
|
},
|
|
{
|
|
"epoch": 0.15268715948072914,
|
|
"grad_norm": 0.070480319021014,
|
|
"learning_rate": 0.00029747784774971866,
|
|
"loss": 1.0674,
|
|
"step": 1135
|
|
},
|
|
{
|
|
"epoch": 0.15335979013923454,
|
|
"grad_norm": 0.06989655990759643,
|
|
"learning_rate": 0.00029741311737216126,
|
|
"loss": 1.0303,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 0.15403242079773996,
|
|
"grad_norm": 0.06540017208658754,
|
|
"learning_rate": 0.0002973475740643378,
|
|
"loss": 1.0451,
|
|
"step": 1145
|
|
},
|
|
{
|
|
"epoch": 0.15470505145624538,
|
|
"grad_norm": 0.06689774497179028,
|
|
"learning_rate": 0.0002972812181876961,
|
|
"loss": 1.0409,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 0.15537768211475078,
|
|
"grad_norm": 0.06655596147731474,
|
|
"learning_rate": 0.00029721405010816487,
|
|
"loss": 1.0401,
|
|
"step": 1155
|
|
},
|
|
{
|
|
"epoch": 0.1560503127732562,
|
|
"grad_norm": 0.06680398472117724,
|
|
"learning_rate": 0.00029714607019615193,
|
|
"loss": 1.0009,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 0.15672294343176163,
|
|
"grad_norm": 0.06987909994212581,
|
|
"learning_rate": 0.000297077278826542,
|
|
"loss": 1.0814,
|
|
"step": 1165
|
|
},
|
|
{
|
|
"epoch": 0.15739557409026703,
|
|
"grad_norm": 0.0688888114655529,
|
|
"learning_rate": 0.00029700767637869476,
|
|
"loss": 1.0194,
|
|
"step": 1170
|
|
},
|
|
{
|
|
"epoch": 0.15806820474877245,
|
|
"grad_norm": 0.07635128442413575,
|
|
"learning_rate": 0.0002969372632364426,
|
|
"loss": 1.027,
|
|
"step": 1175
|
|
},
|
|
{
|
|
"epoch": 0.15874083540727787,
|
|
"grad_norm": 0.06901775101746678,
|
|
"learning_rate": 0.0002968660397880886,
|
|
"loss": 0.9823,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 0.15941346606578327,
|
|
"grad_norm": 0.06666991934971492,
|
|
"learning_rate": 0.0002967940064264045,
|
|
"loss": 1.0149,
|
|
"step": 1185
|
|
},
|
|
{
|
|
"epoch": 0.1600860967242887,
|
|
"grad_norm": 0.06881649593440277,
|
|
"learning_rate": 0.00029672116354862837,
|
|
"loss": 1.0462,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"epoch": 0.16075872738279412,
|
|
"grad_norm": 0.07225944057318469,
|
|
"learning_rate": 0.0002966475115564624,
|
|
"loss": 1.0607,
|
|
"step": 1195
|
|
},
|
|
{
|
|
"epoch": 0.16143135804129952,
|
|
"grad_norm": 0.07486073617413597,
|
|
"learning_rate": 0.0002965730508560709,
|
|
"loss": 1.0394,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.16210398869980494,
|
|
"grad_norm": 0.07577029538496009,
|
|
"learning_rate": 0.0002964977818580777,
|
|
"loss": 1.0071,
|
|
"step": 1205
|
|
},
|
|
{
|
|
"epoch": 0.16277661935831036,
|
|
"grad_norm": 0.06698327059024388,
|
|
"learning_rate": 0.0002964217049775642,
|
|
"loss": 1.0587,
|
|
"step": 1210
|
|
},
|
|
{
|
|
"epoch": 0.16344925001681576,
|
|
"grad_norm": 0.06454076686592887,
|
|
"learning_rate": 0.00029634482063406725,
|
|
"loss": 1.0545,
|
|
"step": 1215
|
|
},
|
|
{
|
|
"epoch": 0.16412188067532119,
|
|
"grad_norm": 0.06876206027401735,
|
|
"learning_rate": 0.00029626712925157604,
|
|
"loss": 0.9889,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 0.16479451133382658,
|
|
"grad_norm": 0.07048318796162506,
|
|
"learning_rate": 0.0002961886312585307,
|
|
"loss": 1.0433,
|
|
"step": 1225
|
|
},
|
|
{
|
|
"epoch": 0.165467141992332,
|
|
"grad_norm": 0.06525242272303788,
|
|
"learning_rate": 0.0002961093270878194,
|
|
"loss": 1.0634,
|
|
"step": 1230
|
|
},
|
|
{
|
|
"epoch": 0.16613977265083743,
|
|
"grad_norm": 0.06852185934917507,
|
|
"learning_rate": 0.0002960292171767761,
|
|
"loss": 1.0391,
|
|
"step": 1235
|
|
},
|
|
{
|
|
"epoch": 0.16681240330934283,
|
|
"grad_norm": 0.06860996659742673,
|
|
"learning_rate": 0.0002959483019671781,
|
|
"loss": 1.0555,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 0.16748503396784825,
|
|
"grad_norm": 0.08453865171661228,
|
|
"learning_rate": 0.0002958665819052436,
|
|
"loss": 1.0307,
|
|
"step": 1245
|
|
},
|
|
{
|
|
"epoch": 0.16815766462635368,
|
|
"grad_norm": 0.07555088886637626,
|
|
"learning_rate": 0.00029578405744162936,
|
|
"loss": 1.0488,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 0.16883029528485907,
|
|
"grad_norm": 0.06406919323423478,
|
|
"learning_rate": 0.0002957007290314281,
|
|
"loss": 1.0251,
|
|
"step": 1255
|
|
},
|
|
{
|
|
"epoch": 0.1695029259433645,
|
|
"grad_norm": 0.06549772066839298,
|
|
"learning_rate": 0.00029561659713416596,
|
|
"loss": 1.0424,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 0.17017555660186992,
|
|
"grad_norm": 0.06632082776805458,
|
|
"learning_rate": 0.00029553166221380004,
|
|
"loss": 1.0585,
|
|
"step": 1265
|
|
},
|
|
{
|
|
"epoch": 0.17084818726037532,
|
|
"grad_norm": 0.06748134536180718,
|
|
"learning_rate": 0.00029544592473871597,
|
|
"loss": 1.0221,
|
|
"step": 1270
|
|
},
|
|
{
|
|
"epoch": 0.17152081791888074,
|
|
"grad_norm": 0.06825285055234158,
|
|
"learning_rate": 0.0002953593851817249,
|
|
"loss": 1.0438,
|
|
"step": 1275
|
|
},
|
|
{
|
|
"epoch": 0.17219344857738617,
|
|
"grad_norm": 0.06203129710524907,
|
|
"learning_rate": 0.00029527204402006143,
|
|
"loss": 1.0114,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 0.17286607923589156,
|
|
"grad_norm": 0.0722647514508147,
|
|
"learning_rate": 0.0002951839017353806,
|
|
"loss": 0.9961,
|
|
"step": 1285
|
|
},
|
|
{
|
|
"epoch": 0.173538709894397,
|
|
"grad_norm": 0.06839703114950606,
|
|
"learning_rate": 0.0002950949588137553,
|
|
"loss": 1.068,
|
|
"step": 1290
|
|
},
|
|
{
|
|
"epoch": 0.1742113405529024,
|
|
"grad_norm": 0.06504863220824372,
|
|
"learning_rate": 0.00029500521574567386,
|
|
"loss": 1.0338,
|
|
"step": 1295
|
|
},
|
|
{
|
|
"epoch": 0.1748839712114078,
|
|
"grad_norm": 0.06912815184187844,
|
|
"learning_rate": 0.00029491467302603694,
|
|
"loss": 1.075,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 0.17555660186991323,
|
|
"grad_norm": 0.06795709704635283,
|
|
"learning_rate": 0.0002948233311541549,
|
|
"loss": 1.0522,
|
|
"step": 1305
|
|
},
|
|
{
|
|
"epoch": 0.17622923252841866,
|
|
"grad_norm": 0.07980869749638134,
|
|
"learning_rate": 0.00029473119063374545,
|
|
"loss": 1.0135,
|
|
"step": 1310
|
|
},
|
|
{
|
|
"epoch": 0.17690186318692405,
|
|
"grad_norm": 0.0631244748578968,
|
|
"learning_rate": 0.00029463825197293027,
|
|
"loss": 1.0729,
|
|
"step": 1315
|
|
},
|
|
{
|
|
"epoch": 0.17757449384542948,
|
|
"grad_norm": 0.06703926037394037,
|
|
"learning_rate": 0.0002945445156842327,
|
|
"loss": 1.0336,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 0.1782471245039349,
|
|
"grad_norm": 0.06522291398196874,
|
|
"learning_rate": 0.0002944499822845746,
|
|
"loss": 1.0136,
|
|
"step": 1325
|
|
},
|
|
{
|
|
"epoch": 0.1789197551624403,
|
|
"grad_norm": 0.0685652259526568,
|
|
"learning_rate": 0.00029435465229527355,
|
|
"loss": 1.0532,
|
|
"step": 1330
|
|
},
|
|
{
|
|
"epoch": 0.17959238582094572,
|
|
"grad_norm": 0.06121295547267051,
|
|
"learning_rate": 0.0002942585262420402,
|
|
"loss": 1.0215,
|
|
"step": 1335
|
|
},
|
|
{
|
|
"epoch": 0.18026501647945115,
|
|
"grad_norm": 0.06860811463441442,
|
|
"learning_rate": 0.00029416160465497516,
|
|
"loss": 1.0204,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 0.18093764713795654,
|
|
"grad_norm": 0.06445083042599647,
|
|
"learning_rate": 0.000294063888068566,
|
|
"loss": 1.0451,
|
|
"step": 1345
|
|
},
|
|
{
|
|
"epoch": 0.18161027779646197,
|
|
"grad_norm": 0.06632986737605773,
|
|
"learning_rate": 0.0002939653770216845,
|
|
"loss": 0.9969,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 0.1822829084549674,
|
|
"grad_norm": 0.0682696802304684,
|
|
"learning_rate": 0.00029386607205758374,
|
|
"loss": 1.0251,
|
|
"step": 1355
|
|
},
|
|
{
|
|
"epoch": 0.1829555391134728,
|
|
"grad_norm": 0.06474417456320194,
|
|
"learning_rate": 0.00029376597372389473,
|
|
"loss": 0.988,
|
|
"step": 1360
|
|
},
|
|
{
|
|
"epoch": 0.1836281697719782,
|
|
"grad_norm": 0.06702069019563696,
|
|
"learning_rate": 0.00029366508257262373,
|
|
"loss": 1.0201,
|
|
"step": 1365
|
|
},
|
|
{
|
|
"epoch": 0.1843008004304836,
|
|
"grad_norm": 0.07355768046137742,
|
|
"learning_rate": 0.00029356339916014916,
|
|
"loss": 1.0264,
|
|
"step": 1370
|
|
},
|
|
{
|
|
"epoch": 0.18497343108898903,
|
|
"grad_norm": 0.06503361777179631,
|
|
"learning_rate": 0.00029346092404721846,
|
|
"loss": 1.0451,
|
|
"step": 1375
|
|
},
|
|
{
|
|
"epoch": 0.18564606174749446,
|
|
"grad_norm": 0.06906367933360327,
|
|
"learning_rate": 0.0002933576577989449,
|
|
"loss": 1.0196,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 0.18631869240599985,
|
|
"grad_norm": 0.07061676537966292,
|
|
"learning_rate": 0.0002932536009848047,
|
|
"loss": 1.0301,
|
|
"step": 1385
|
|
},
|
|
{
|
|
"epoch": 0.18699132306450528,
|
|
"grad_norm": 0.069484640759308,
|
|
"learning_rate": 0.00029314875417863373,
|
|
"loss": 1.0168,
|
|
"step": 1390
|
|
},
|
|
{
|
|
"epoch": 0.1876639537230107,
|
|
"grad_norm": 0.07052322125572345,
|
|
"learning_rate": 0.0002930431179586244,
|
|
"loss": 1.0015,
|
|
"step": 1395
|
|
},
|
|
{
|
|
"epoch": 0.1883365843815161,
|
|
"grad_norm": 0.0658786556485662,
|
|
"learning_rate": 0.0002929366929073224,
|
|
"loss": 1.0006,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.18900921504002152,
|
|
"grad_norm": 0.06732232716188208,
|
|
"learning_rate": 0.00029282947961162357,
|
|
"loss": 1.0667,
|
|
"step": 1405
|
|
},
|
|
{
|
|
"epoch": 0.18968184569852695,
|
|
"grad_norm": 0.06984081400541851,
|
|
"learning_rate": 0.00029272147866277057,
|
|
"loss": 1.0306,
|
|
"step": 1410
|
|
},
|
|
{
|
|
"epoch": 0.19035447635703234,
|
|
"grad_norm": 0.06366943481097173,
|
|
"learning_rate": 0.00029261269065634986,
|
|
"loss": 1.0238,
|
|
"step": 1415
|
|
},
|
|
{
|
|
"epoch": 0.19102710701553777,
|
|
"grad_norm": 0.0612748756213378,
|
|
"learning_rate": 0.00029250311619228805,
|
|
"loss": 1.0256,
|
|
"step": 1420
|
|
},
|
|
{
|
|
"epoch": 0.1916997376740432,
|
|
"grad_norm": 0.06771279040627849,
|
|
"learning_rate": 0.0002923927558748489,
|
|
"loss": 1.0281,
|
|
"step": 1425
|
|
},
|
|
{
|
|
"epoch": 0.1923723683325486,
|
|
"grad_norm": 0.06617871674134063,
|
|
"learning_rate": 0.0002922816103126298,
|
|
"loss": 1.0184,
|
|
"step": 1430
|
|
},
|
|
{
|
|
"epoch": 0.193044998991054,
|
|
"grad_norm": 0.0638741626749275,
|
|
"learning_rate": 0.0002921696801185585,
|
|
"loss": 1.049,
|
|
"step": 1435
|
|
},
|
|
{
|
|
"epoch": 0.19371762964955944,
|
|
"grad_norm": 0.07508489340820632,
|
|
"learning_rate": 0.0002920569659098898,
|
|
"loss": 1.0629,
|
|
"step": 1440
|
|
},
|
|
{
|
|
"epoch": 0.19439026030806483,
|
|
"grad_norm": 0.06868936170957156,
|
|
"learning_rate": 0.0002919434683082018,
|
|
"loss": 1.0554,
|
|
"step": 1445
|
|
},
|
|
{
|
|
"epoch": 0.19506289096657026,
|
|
"grad_norm": 0.06950639681678793,
|
|
"learning_rate": 0.0002918291879393931,
|
|
"loss": 1.0253,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 0.19573552162507568,
|
|
"grad_norm": 0.06524073101638589,
|
|
"learning_rate": 0.0002917141254336787,
|
|
"loss": 1.0272,
|
|
"step": 1455
|
|
},
|
|
{
|
|
"epoch": 0.19640815228358108,
|
|
"grad_norm": 0.06648087678063275,
|
|
"learning_rate": 0.00029159828142558694,
|
|
"loss": 0.9573,
|
|
"step": 1460
|
|
},
|
|
{
|
|
"epoch": 0.1970807829420865,
|
|
"grad_norm": 0.06488313142830643,
|
|
"learning_rate": 0.0002914816565539557,
|
|
"loss": 1.0286,
|
|
"step": 1465
|
|
},
|
|
{
|
|
"epoch": 0.19775341360059193,
|
|
"grad_norm": 0.07163591621516463,
|
|
"learning_rate": 0.00029136425146192925,
|
|
"loss": 0.987,
|
|
"step": 1470
|
|
},
|
|
{
|
|
"epoch": 0.19842604425909732,
|
|
"grad_norm": 0.0736966578106896,
|
|
"learning_rate": 0.00029124606679695436,
|
|
"loss": 1.0321,
|
|
"step": 1475
|
|
},
|
|
{
|
|
"epoch": 0.19909867491760275,
|
|
"grad_norm": 0.06765832425648322,
|
|
"learning_rate": 0.00029112710321077697,
|
|
"loss": 1.0299,
|
|
"step": 1480
|
|
},
|
|
{
|
|
"epoch": 0.19977130557610817,
|
|
"grad_norm": 0.06835624737932443,
|
|
"learning_rate": 0.00029100736135943833,
|
|
"loss": 1.014,
|
|
"step": 1485
|
|
},
|
|
{
|
|
"epoch": 0.20044393623461357,
|
|
"grad_norm": 0.06535609384863643,
|
|
"learning_rate": 0.0002908868419032717,
|
|
"loss": 0.9961,
|
|
"step": 1490
|
|
},
|
|
{
|
|
"epoch": 0.201116566893119,
|
|
"grad_norm": 0.06399387489597937,
|
|
"learning_rate": 0.0002907655455068985,
|
|
"loss": 1.0251,
|
|
"step": 1495
|
|
},
|
|
{
|
|
"epoch": 0.20178919755162442,
|
|
"grad_norm": 0.06910761394744257,
|
|
"learning_rate": 0.0002906434728392247,
|
|
"loss": 1.0211,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 0.2024618282101298,
|
|
"grad_norm": 0.06771869394836479,
|
|
"learning_rate": 0.00029052062457343697,
|
|
"loss": 1.0672,
|
|
"step": 1505
|
|
},
|
|
{
|
|
"epoch": 0.20313445886863524,
|
|
"grad_norm": 0.06512325002739387,
|
|
"learning_rate": 0.0002903970013869994,
|
|
"loss": 0.9796,
|
|
"step": 1510
|
|
},
|
|
{
|
|
"epoch": 0.20380708952714063,
|
|
"grad_norm": 0.06907395495907703,
|
|
"learning_rate": 0.0002902726039616493,
|
|
"loss": 1.0199,
|
|
"step": 1515
|
|
},
|
|
{
|
|
"epoch": 0.20447972018564606,
|
|
"grad_norm": 0.07845817619134507,
|
|
"learning_rate": 0.0002901474329833937,
|
|
"loss": 1.036,
|
|
"step": 1520
|
|
},
|
|
{
|
|
"epoch": 0.20515235084415148,
|
|
"grad_norm": 0.06464732031438017,
|
|
"learning_rate": 0.00029002148914250553,
|
|
"loss": 1.0068,
|
|
"step": 1525
|
|
},
|
|
{
|
|
"epoch": 0.20582498150265688,
|
|
"grad_norm": 0.06627647931212392,
|
|
"learning_rate": 0.00028989477313351957,
|
|
"loss": 0.9713,
|
|
"step": 1530
|
|
},
|
|
{
|
|
"epoch": 0.2064976121611623,
|
|
"grad_norm": 0.06628903796474898,
|
|
"learning_rate": 0.00028976728565522915,
|
|
"loss": 1.0242,
|
|
"step": 1535
|
|
},
|
|
{
|
|
"epoch": 0.20717024281966773,
|
|
"grad_norm": 0.07099236238385558,
|
|
"learning_rate": 0.00028963902741068175,
|
|
"loss": 1.0052,
|
|
"step": 1540
|
|
},
|
|
{
|
|
"epoch": 0.20784287347817312,
|
|
"grad_norm": 0.06704768653098384,
|
|
"learning_rate": 0.0002895099991071754,
|
|
"loss": 1.0003,
|
|
"step": 1545
|
|
},
|
|
{
|
|
"epoch": 0.20851550413667855,
|
|
"grad_norm": 0.06714193079186036,
|
|
"learning_rate": 0.00028938020145625467,
|
|
"loss": 1.0034,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 0.20918813479518397,
|
|
"grad_norm": 0.07669111095324174,
|
|
"learning_rate": 0.00028924963517370703,
|
|
"loss": 1.0353,
|
|
"step": 1555
|
|
},
|
|
{
|
|
"epoch": 0.20986076545368937,
|
|
"grad_norm": 0.06543093269181684,
|
|
"learning_rate": 0.0002891183009795584,
|
|
"loss": 1.0281,
|
|
"step": 1560
|
|
},
|
|
{
|
|
"epoch": 0.2105333961121948,
|
|
"grad_norm": 0.06620556706376186,
|
|
"learning_rate": 0.0002889861995980696,
|
|
"loss": 0.9855,
|
|
"step": 1565
|
|
},
|
|
{
|
|
"epoch": 0.21120602677070022,
|
|
"grad_norm": 0.06987041404693957,
|
|
"learning_rate": 0.0002888533317577322,
|
|
"loss": 1.0137,
|
|
"step": 1570
|
|
},
|
|
{
|
|
"epoch": 0.2118786574292056,
|
|
"grad_norm": 0.06420643002717302,
|
|
"learning_rate": 0.00028871969819126446,
|
|
"loss": 0.9949,
|
|
"step": 1575
|
|
},
|
|
{
|
|
"epoch": 0.21255128808771104,
|
|
"grad_norm": 0.06880594384530382,
|
|
"learning_rate": 0.00028858529963560745,
|
|
"loss": 1.0173,
|
|
"step": 1580
|
|
},
|
|
{
|
|
"epoch": 0.21322391874621646,
|
|
"grad_norm": 0.0747143183769185,
|
|
"learning_rate": 0.00028845013683192073,
|
|
"loss": 1.0239,
|
|
"step": 1585
|
|
},
|
|
{
|
|
"epoch": 0.21389654940472186,
|
|
"grad_norm": 0.06767835738030381,
|
|
"learning_rate": 0.00028831421052557854,
|
|
"loss": 1.0345,
|
|
"step": 1590
|
|
},
|
|
{
|
|
"epoch": 0.21456918006322728,
|
|
"grad_norm": 0.06245317417171576,
|
|
"learning_rate": 0.0002881775214661656,
|
|
"loss": 1.046,
|
|
"step": 1595
|
|
},
|
|
{
|
|
"epoch": 0.2152418107217327,
|
|
"grad_norm": 0.06851847320305292,
|
|
"learning_rate": 0.0002880400704074727,
|
|
"loss": 1.0237,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.2159144413802381,
|
|
"grad_norm": 0.07060354312063585,
|
|
"learning_rate": 0.00028790185810749307,
|
|
"loss": 1.0544,
|
|
"step": 1605
|
|
},
|
|
{
|
|
"epoch": 0.21658707203874353,
|
|
"grad_norm": 0.06780678570608949,
|
|
"learning_rate": 0.0002877628853284177,
|
|
"loss": 1.0454,
|
|
"step": 1610
|
|
},
|
|
{
|
|
"epoch": 0.21725970269724895,
|
|
"grad_norm": 0.06415115958536001,
|
|
"learning_rate": 0.00028762315283663146,
|
|
"loss": 1.0533,
|
|
"step": 1615
|
|
},
|
|
{
|
|
"epoch": 0.21793233335575435,
|
|
"grad_norm": 0.06810949401736772,
|
|
"learning_rate": 0.0002874826614027087,
|
|
"loss": 1.0502,
|
|
"step": 1620
|
|
},
|
|
{
|
|
"epoch": 0.21860496401425977,
|
|
"grad_norm": 0.06495370571278798,
|
|
"learning_rate": 0.0002873414118014092,
|
|
"loss": 0.9889,
|
|
"step": 1625
|
|
},
|
|
{
|
|
"epoch": 0.2192775946727652,
|
|
"grad_norm": 0.06650525610263389,
|
|
"learning_rate": 0.0002871994048116735,
|
|
"loss": 0.9953,
|
|
"step": 1630
|
|
},
|
|
{
|
|
"epoch": 0.2199502253312706,
|
|
"grad_norm": 0.9281345660239769,
|
|
"learning_rate": 0.0002870566412166192,
|
|
"loss": 0.9753,
|
|
"step": 1635
|
|
},
|
|
{
|
|
"epoch": 0.22062285598977602,
|
|
"grad_norm": 0.07874999810678028,
|
|
"learning_rate": 0.000286913121803536,
|
|
"loss": 1.0589,
|
|
"step": 1640
|
|
},
|
|
{
|
|
"epoch": 0.22129548664828144,
|
|
"grad_norm": 0.07137195124766428,
|
|
"learning_rate": 0.00028676884736388166,
|
|
"loss": 1.0262,
|
|
"step": 1645
|
|
},
|
|
{
|
|
"epoch": 0.22196811730678684,
|
|
"grad_norm": 0.07407503743741842,
|
|
"learning_rate": 0.0002866238186932781,
|
|
"loss": 1.062,
|
|
"step": 1650
|
|
},
|
|
{
|
|
"epoch": 0.22264074796529226,
|
|
"grad_norm": 0.07384616098410135,
|
|
"learning_rate": 0.0002864780365915059,
|
|
"loss": 1.0539,
|
|
"step": 1655
|
|
},
|
|
{
|
|
"epoch": 0.22331337862379766,
|
|
"grad_norm": 0.07446398508616892,
|
|
"learning_rate": 0.0002863315018625011,
|
|
"loss": 0.9937,
|
|
"step": 1660
|
|
},
|
|
{
|
|
"epoch": 0.22398600928230308,
|
|
"grad_norm": 0.0667414804323104,
|
|
"learning_rate": 0.0002861842153143499,
|
|
"loss": 0.9915,
|
|
"step": 1665
|
|
},
|
|
{
|
|
"epoch": 0.2246586399408085,
|
|
"grad_norm": 0.07501767013141597,
|
|
"learning_rate": 0.0002860361777592845,
|
|
"loss": 1.032,
|
|
"step": 1670
|
|
},
|
|
{
|
|
"epoch": 0.2253312705993139,
|
|
"grad_norm": 0.07397962895213549,
|
|
"learning_rate": 0.0002858873900136787,
|
|
"loss": 1.023,
|
|
"step": 1675
|
|
},
|
|
{
|
|
"epoch": 0.22600390125781933,
|
|
"grad_norm": 0.06986271250274151,
|
|
"learning_rate": 0.0002857378528980435,
|
|
"loss": 1.0069,
|
|
"step": 1680
|
|
},
|
|
{
|
|
"epoch": 0.22667653191632475,
|
|
"grad_norm": 0.06758485727346657,
|
|
"learning_rate": 0.0002855875672370222,
|
|
"loss": 1.0125,
|
|
"step": 1685
|
|
},
|
|
{
|
|
"epoch": 0.22734916257483015,
|
|
"grad_norm": 0.06966621104334436,
|
|
"learning_rate": 0.00028543653385938603,
|
|
"loss": 1.019,
|
|
"step": 1690
|
|
},
|
|
{
|
|
"epoch": 0.22802179323333557,
|
|
"grad_norm": 0.07355147933025005,
|
|
"learning_rate": 0.00028528475359802975,
|
|
"loss": 1.0433,
|
|
"step": 1695
|
|
},
|
|
{
|
|
"epoch": 0.228694423891841,
|
|
"grad_norm": 0.06265452252311694,
|
|
"learning_rate": 0.0002851322272899668,
|
|
"loss": 1.0184,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 0.2293670545503464,
|
|
"grad_norm": 0.061438088188945056,
|
|
"learning_rate": 0.0002849789557763249,
|
|
"loss": 1.0061,
|
|
"step": 1705
|
|
},
|
|
{
|
|
"epoch": 0.23003968520885182,
|
|
"grad_norm": 0.06445178157108626,
|
|
"learning_rate": 0.00028482493990234127,
|
|
"loss": 0.9983,
|
|
"step": 1710
|
|
},
|
|
{
|
|
"epoch": 0.23071231586735724,
|
|
"grad_norm": 0.06827810304381737,
|
|
"learning_rate": 0.000284670180517358,
|
|
"loss": 1.0497,
|
|
"step": 1715
|
|
},
|
|
{
|
|
"epoch": 0.23138494652586264,
|
|
"grad_norm": 0.06271824849288654,
|
|
"learning_rate": 0.0002845146784748173,
|
|
"loss": 1.0258,
|
|
"step": 1720
|
|
},
|
|
{
|
|
"epoch": 0.23205757718436806,
|
|
"grad_norm": 0.06209741624879291,
|
|
"learning_rate": 0.00028435843463225707,
|
|
"loss": 1.0015,
|
|
"step": 1725
|
|
},
|
|
{
|
|
"epoch": 0.2327302078428735,
|
|
"grad_norm": 0.06712008833902464,
|
|
"learning_rate": 0.0002842014498513057,
|
|
"loss": 1.0297,
|
|
"step": 1730
|
|
},
|
|
{
|
|
"epoch": 0.23340283850137888,
|
|
"grad_norm": 0.07663574411261381,
|
|
"learning_rate": 0.00028404372499767793,
|
|
"loss": 1.0363,
|
|
"step": 1735
|
|
},
|
|
{
|
|
"epoch": 0.2340754691598843,
|
|
"grad_norm": 0.07360436071600215,
|
|
"learning_rate": 0.00028388526094116933,
|
|
"loss": 1.0156,
|
|
"step": 1740
|
|
},
|
|
{
|
|
"epoch": 0.23474809981838973,
|
|
"grad_norm": 0.06983865341864652,
|
|
"learning_rate": 0.0002837260585556523,
|
|
"loss": 1.0211,
|
|
"step": 1745
|
|
},
|
|
{
|
|
"epoch": 0.23542073047689513,
|
|
"grad_norm": 0.06827222444417715,
|
|
"learning_rate": 0.0002835661187190705,
|
|
"loss": 1.0142,
|
|
"step": 1750
|
|
},
|
|
{
|
|
"epoch": 0.23609336113540055,
|
|
"grad_norm": 0.057534870796608614,
|
|
"learning_rate": 0.00028340544231343466,
|
|
"loss": 0.9828,
|
|
"step": 1755
|
|
},
|
|
{
|
|
"epoch": 0.23676599179390598,
|
|
"grad_norm": 0.062030787408893256,
|
|
"learning_rate": 0.0002832440302248173,
|
|
"loss": 1.0166,
|
|
"step": 1760
|
|
},
|
|
{
|
|
"epoch": 0.23743862245241137,
|
|
"grad_norm": 0.06976421841182824,
|
|
"learning_rate": 0.0002830818833433479,
|
|
"loss": 1.0838,
|
|
"step": 1765
|
|
},
|
|
{
|
|
"epoch": 0.2381112531109168,
|
|
"grad_norm": 0.06119115702007277,
|
|
"learning_rate": 0.0002829190025632082,
|
|
"loss": 1.0102,
|
|
"step": 1770
|
|
},
|
|
{
|
|
"epoch": 0.23878388376942222,
|
|
"grad_norm": 0.06903918932060034,
|
|
"learning_rate": 0.000282755388782627,
|
|
"loss": 1.0068,
|
|
"step": 1775
|
|
},
|
|
{
|
|
"epoch": 0.23945651442792762,
|
|
"grad_norm": 0.07303208628213309,
|
|
"learning_rate": 0.0002825910429038755,
|
|
"loss": 1.0236,
|
|
"step": 1780
|
|
},
|
|
{
|
|
"epoch": 0.24012914508643304,
|
|
"grad_norm": 0.06557501175714182,
|
|
"learning_rate": 0.00028242596583326194,
|
|
"loss": 1.0096,
|
|
"step": 1785
|
|
},
|
|
{
|
|
"epoch": 0.24080177574493847,
|
|
"grad_norm": 0.07024712893766441,
|
|
"learning_rate": 0.00028226015848112693,
|
|
"loss": 1.024,
|
|
"step": 1790
|
|
},
|
|
{
|
|
"epoch": 0.24147440640344386,
|
|
"grad_norm": 0.061836689683176946,
|
|
"learning_rate": 0.00028209362176183833,
|
|
"loss": 1.0118,
|
|
"step": 1795
|
|
},
|
|
{
|
|
"epoch": 0.2421470370619493,
|
|
"grad_norm": 0.06288178365841733,
|
|
"learning_rate": 0.00028192635659378623,
|
|
"loss": 0.9944,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.24281966772045468,
|
|
"grad_norm": 0.0653498569458799,
|
|
"learning_rate": 0.0002817583638993778,
|
|
"loss": 0.9821,
|
|
"step": 1805
|
|
},
|
|
{
|
|
"epoch": 0.2434922983789601,
|
|
"grad_norm": 0.06501875393694694,
|
|
"learning_rate": 0.0002815896446050322,
|
|
"loss": 0.979,
|
|
"step": 1810
|
|
},
|
|
{
|
|
"epoch": 0.24416492903746553,
|
|
"grad_norm": 0.06860382803225752,
|
|
"learning_rate": 0.0002814201996411757,
|
|
"loss": 1.0661,
|
|
"step": 1815
|
|
},
|
|
{
|
|
"epoch": 0.24483755969597093,
|
|
"grad_norm": 0.06882483957372922,
|
|
"learning_rate": 0.0002812500299422362,
|
|
"loss": 0.9763,
|
|
"step": 1820
|
|
},
|
|
{
|
|
"epoch": 0.24551019035447635,
|
|
"grad_norm": 0.06819349685039716,
|
|
"learning_rate": 0.0002810791364466383,
|
|
"loss": 0.9679,
|
|
"step": 1825
|
|
},
|
|
{
|
|
"epoch": 0.24618282101298178,
|
|
"grad_norm": 0.06755766217610122,
|
|
"learning_rate": 0.0002809075200967981,
|
|
"loss": 1.0079,
|
|
"step": 1830
|
|
},
|
|
{
|
|
"epoch": 0.24685545167148717,
|
|
"grad_norm": 0.06646837866500027,
|
|
"learning_rate": 0.000280735181839118,
|
|
"loss": 0.9782,
|
|
"step": 1835
|
|
},
|
|
{
|
|
"epoch": 0.2475280823299926,
|
|
"grad_norm": 0.06901054481657276,
|
|
"learning_rate": 0.00028056212262398143,
|
|
"loss": 0.9849,
|
|
"step": 1840
|
|
},
|
|
{
|
|
"epoch": 0.24820071298849802,
|
|
"grad_norm": 0.06470902262220722,
|
|
"learning_rate": 0.0002803883434057477,
|
|
"loss": 1.0168,
|
|
"step": 1845
|
|
},
|
|
{
|
|
"epoch": 0.24887334364700342,
|
|
"grad_norm": 0.06359268644716512,
|
|
"learning_rate": 0.00028021384514274655,
|
|
"loss": 1.016,
|
|
"step": 1850
|
|
},
|
|
{
|
|
"epoch": 0.24954597430550884,
|
|
"grad_norm": 0.06467268670269033,
|
|
"learning_rate": 0.0002800386287972731,
|
|
"loss": 1.0076,
|
|
"step": 1855
|
|
},
|
|
{
|
|
"epoch": 0.25021860496401427,
|
|
"grad_norm": 0.0632393566846638,
|
|
"learning_rate": 0.0002798626953355825,
|
|
"loss": 0.9818,
|
|
"step": 1860
|
|
},
|
|
{
|
|
"epoch": 0.2508912356225197,
|
|
"grad_norm": 0.06540791283180712,
|
|
"learning_rate": 0.0002796860457278843,
|
|
"loss": 1.0428,
|
|
"step": 1865
|
|
},
|
|
{
|
|
"epoch": 0.25156386628102506,
|
|
"grad_norm": 0.06094576123930132,
|
|
"learning_rate": 0.0002795086809483376,
|
|
"loss": 0.944,
|
|
"step": 1870
|
|
},
|
|
{
|
|
"epoch": 0.2522364969395305,
|
|
"grad_norm": 0.06411498342379629,
|
|
"learning_rate": 0.0002793306019750452,
|
|
"loss": 1.0099,
|
|
"step": 1875
|
|
},
|
|
{
|
|
"epoch": 0.2529091275980359,
|
|
"grad_norm": 0.06860575565519289,
|
|
"learning_rate": 0.00027915180979004855,
|
|
"loss": 0.9889,
|
|
"step": 1880
|
|
},
|
|
{
|
|
"epoch": 0.25358175825654133,
|
|
"grad_norm": 0.07330880194764075,
|
|
"learning_rate": 0.00027897230537932225,
|
|
"loss": 1.037,
|
|
"step": 1885
|
|
},
|
|
{
|
|
"epoch": 0.25425438891504676,
|
|
"grad_norm": 0.06400885002099273,
|
|
"learning_rate": 0.0002787920897327684,
|
|
"loss": 1.0211,
|
|
"step": 1890
|
|
},
|
|
{
|
|
"epoch": 0.2549270195735522,
|
|
"grad_norm": 0.06447537245876075,
|
|
"learning_rate": 0.0002786111638442115,
|
|
"loss": 0.9675,
|
|
"step": 1895
|
|
},
|
|
{
|
|
"epoch": 0.25559965023205755,
|
|
"grad_norm": 0.06438920821051117,
|
|
"learning_rate": 0.00027842952871139255,
|
|
"loss": 1.0014,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 0.256272280890563,
|
|
"grad_norm": 0.06337500273733042,
|
|
"learning_rate": 0.000278247185335964,
|
|
"loss": 0.9479,
|
|
"step": 1905
|
|
},
|
|
{
|
|
"epoch": 0.2569449115490684,
|
|
"grad_norm": 0.06693550599643627,
|
|
"learning_rate": 0.0002780641347234839,
|
|
"loss": 0.9948,
|
|
"step": 1910
|
|
},
|
|
{
|
|
"epoch": 0.2576175422075738,
|
|
"grad_norm": 0.06189864322039309,
|
|
"learning_rate": 0.0002778803778834105,
|
|
"loss": 0.9892,
|
|
"step": 1915
|
|
},
|
|
{
|
|
"epoch": 0.25829017286607925,
|
|
"grad_norm": 0.06327884986591356,
|
|
"learning_rate": 0.00027769591582909654,
|
|
"loss": 0.9805,
|
|
"step": 1920
|
|
},
|
|
{
|
|
"epoch": 0.2589628035245847,
|
|
"grad_norm": 0.06719861601589218,
|
|
"learning_rate": 0.0002775107495777839,
|
|
"loss": 0.9874,
|
|
"step": 1925
|
|
},
|
|
{
|
|
"epoch": 0.25963543418309004,
|
|
"grad_norm": 0.0802158924797212,
|
|
"learning_rate": 0.00027732488015059777,
|
|
"loss": 1.0113,
|
|
"step": 1930
|
|
},
|
|
{
|
|
"epoch": 0.26030806484159547,
|
|
"grad_norm": 0.06727053907609576,
|
|
"learning_rate": 0.00027713830857254107,
|
|
"loss": 0.9994,
|
|
"step": 1935
|
|
},
|
|
{
|
|
"epoch": 0.2609806955001009,
|
|
"grad_norm": 0.06258712788033971,
|
|
"learning_rate": 0.0002769510358724889,
|
|
"loss": 1.0189,
|
|
"step": 1940
|
|
},
|
|
{
|
|
"epoch": 0.2616533261586063,
|
|
"grad_norm": 0.06977543060346805,
|
|
"learning_rate": 0.00027676306308318285,
|
|
"loss": 1.0124,
|
|
"step": 1945
|
|
},
|
|
{
|
|
"epoch": 0.26232595681711174,
|
|
"grad_norm": 0.07132869843251438,
|
|
"learning_rate": 0.00027657439124122504,
|
|
"loss": 1.0114,
|
|
"step": 1950
|
|
},
|
|
{
|
|
"epoch": 0.26299858747561716,
|
|
"grad_norm": 0.06589196655607017,
|
|
"learning_rate": 0.00027638502138707286,
|
|
"loss": 0.9763,
|
|
"step": 1955
|
|
},
|
|
{
|
|
"epoch": 0.26367121813412253,
|
|
"grad_norm": 0.0628837134210337,
|
|
"learning_rate": 0.0002761949545650328,
|
|
"loss": 1.022,
|
|
"step": 1960
|
|
},
|
|
{
|
|
"epoch": 0.26434384879262796,
|
|
"grad_norm": 0.06500486647928407,
|
|
"learning_rate": 0.00027600419182325503,
|
|
"loss": 0.9963,
|
|
"step": 1965
|
|
},
|
|
{
|
|
"epoch": 0.2650164794511334,
|
|
"grad_norm": 0.06352366459995422,
|
|
"learning_rate": 0.0002758127342137273,
|
|
"loss": 1.0398,
|
|
"step": 1970
|
|
},
|
|
{
|
|
"epoch": 0.2656891101096388,
|
|
"grad_norm": 0.06734294053419544,
|
|
"learning_rate": 0.00027562058279226943,
|
|
"loss": 0.991,
|
|
"step": 1975
|
|
},
|
|
{
|
|
"epoch": 0.26636174076814423,
|
|
"grad_norm": 0.05908395334931715,
|
|
"learning_rate": 0.00027542773861852736,
|
|
"loss": 0.9446,
|
|
"step": 1980
|
|
},
|
|
{
|
|
"epoch": 0.26703437142664965,
|
|
"grad_norm": 0.06394425508709521,
|
|
"learning_rate": 0.0002752342027559672,
|
|
"loss": 0.9973,
|
|
"step": 1985
|
|
},
|
|
{
|
|
"epoch": 0.267707002085155,
|
|
"grad_norm": 0.061889140536584404,
|
|
"learning_rate": 0.0002750399762718696,
|
|
"loss": 0.9833,
|
|
"step": 1990
|
|
},
|
|
{
|
|
"epoch": 0.26837963274366045,
|
|
"grad_norm": 0.06101925340274876,
|
|
"learning_rate": 0.0002748450602373237,
|
|
"loss": 0.9978,
|
|
"step": 1995
|
|
},
|
|
{
|
|
"epoch": 0.26905226340216587,
|
|
"grad_norm": 0.060092029180786675,
|
|
"learning_rate": 0.00027464945572722117,
|
|
"loss": 0.9893,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.2697248940606713,
|
|
"grad_norm": 0.06441845729424575,
|
|
"learning_rate": 0.0002744531638202506,
|
|
"loss": 1.027,
|
|
"step": 2005
|
|
},
|
|
{
|
|
"epoch": 0.2703975247191767,
|
|
"grad_norm": 0.07685133272762304,
|
|
"learning_rate": 0.00027425618559889103,
|
|
"loss": 1.0008,
|
|
"step": 2010
|
|
},
|
|
{
|
|
"epoch": 0.2710701553776821,
|
|
"grad_norm": 0.07296855729895588,
|
|
"learning_rate": 0.0002740585221494065,
|
|
"loss": 1.0134,
|
|
"step": 2015
|
|
},
|
|
{
|
|
"epoch": 0.2717427860361875,
|
|
"grad_norm": 0.06305663285637599,
|
|
"learning_rate": 0.00027386017456183977,
|
|
"loss": 0.9751,
|
|
"step": 2020
|
|
},
|
|
{
|
|
"epoch": 0.27241541669469294,
|
|
"grad_norm": 0.06665022031648576,
|
|
"learning_rate": 0.00027366114393000634,
|
|
"loss": 1.0051,
|
|
"step": 2025
|
|
},
|
|
{
|
|
"epoch": 0.27308804735319836,
|
|
"grad_norm": 0.06831910827083325,
|
|
"learning_rate": 0.00027346143135148845,
|
|
"loss": 0.9834,
|
|
"step": 2030
|
|
},
|
|
{
|
|
"epoch": 0.2737606780117038,
|
|
"grad_norm": 0.0790830090918589,
|
|
"learning_rate": 0.0002732610379276292,
|
|
"loss": 1.0133,
|
|
"step": 2035
|
|
},
|
|
{
|
|
"epoch": 0.2744333086702092,
|
|
"grad_norm": 0.061991084813353935,
|
|
"learning_rate": 0.0002730599647635261,
|
|
"loss": 1.0148,
|
|
"step": 2040
|
|
},
|
|
{
|
|
"epoch": 0.2751059393287146,
|
|
"grad_norm": 0.06401059184328592,
|
|
"learning_rate": 0.0002728582129680251,
|
|
"loss": 0.9944,
|
|
"step": 2045
|
|
},
|
|
{
|
|
"epoch": 0.27577856998722,
|
|
"grad_norm": 0.06976597880172361,
|
|
"learning_rate": 0.00027265578365371496,
|
|
"loss": 0.9849,
|
|
"step": 2050
|
|
},
|
|
{
|
|
"epoch": 0.2764512006457254,
|
|
"grad_norm": 0.058809321177156884,
|
|
"learning_rate": 0.0002724526779369204,
|
|
"loss": 0.963,
|
|
"step": 2055
|
|
},
|
|
{
|
|
"epoch": 0.27712383130423085,
|
|
"grad_norm": 0.0653799780787328,
|
|
"learning_rate": 0.00027224889693769615,
|
|
"loss": 1.0104,
|
|
"step": 2060
|
|
},
|
|
{
|
|
"epoch": 0.2777964619627363,
|
|
"grad_norm": 0.06621560992168146,
|
|
"learning_rate": 0.0002720444417798212,
|
|
"loss": 1.0291,
|
|
"step": 2065
|
|
},
|
|
{
|
|
"epoch": 0.2784690926212417,
|
|
"grad_norm": 0.07406952786150485,
|
|
"learning_rate": 0.0002718393135907922,
|
|
"loss": 0.9909,
|
|
"step": 2070
|
|
},
|
|
{
|
|
"epoch": 0.27914172327974707,
|
|
"grad_norm": 0.06411491189711067,
|
|
"learning_rate": 0.00027163351350181704,
|
|
"loss": 0.9848,
|
|
"step": 2075
|
|
},
|
|
{
|
|
"epoch": 0.2798143539382525,
|
|
"grad_norm": 0.06793287903099283,
|
|
"learning_rate": 0.0002714270426478093,
|
|
"loss": 0.9862,
|
|
"step": 2080
|
|
},
|
|
{
|
|
"epoch": 0.2804869845967579,
|
|
"grad_norm": 0.06071468451699804,
|
|
"learning_rate": 0.00027121990216738133,
|
|
"loss": 1.0121,
|
|
"step": 2085
|
|
},
|
|
{
|
|
"epoch": 0.28115961525526334,
|
|
"grad_norm": 0.06439036739512706,
|
|
"learning_rate": 0.00027101209320283824,
|
|
"loss": 0.9859,
|
|
"step": 2090
|
|
},
|
|
{
|
|
"epoch": 0.28183224591376876,
|
|
"grad_norm": 0.05944129800789135,
|
|
"learning_rate": 0.00027080361690017175,
|
|
"loss": 0.985,
|
|
"step": 2095
|
|
},
|
|
{
|
|
"epoch": 0.2825048765722742,
|
|
"grad_norm": 0.061243133513179636,
|
|
"learning_rate": 0.0002705944744090536,
|
|
"loss": 0.9901,
|
|
"step": 2100
|
|
},
|
|
{
|
|
"epoch": 0.28317750723077956,
|
|
"grad_norm": 0.06746937900880097,
|
|
"learning_rate": 0.0002703846668828292,
|
|
"loss": 1.0113,
|
|
"step": 2105
|
|
},
|
|
{
|
|
"epoch": 0.283850137889285,
|
|
"grad_norm": 0.05961664999737001,
|
|
"learning_rate": 0.00027017419547851167,
|
|
"loss": 1.0125,
|
|
"step": 2110
|
|
},
|
|
{
|
|
"epoch": 0.2845227685477904,
|
|
"grad_norm": 0.06395608178278261,
|
|
"learning_rate": 0.000269963061356775,
|
|
"loss": 1.0041,
|
|
"step": 2115
|
|
},
|
|
{
|
|
"epoch": 0.28519539920629583,
|
|
"grad_norm": 0.0662373002194877,
|
|
"learning_rate": 0.0002697512656819477,
|
|
"loss": 0.9957,
|
|
"step": 2120
|
|
},
|
|
{
|
|
"epoch": 0.28586802986480125,
|
|
"grad_norm": 0.06121788566565536,
|
|
"learning_rate": 0.0002695388096220068,
|
|
"loss": 1.0034,
|
|
"step": 2125
|
|
},
|
|
{
|
|
"epoch": 0.2865406605233067,
|
|
"grad_norm": 0.12099988704064925,
|
|
"learning_rate": 0.00026932569434857104,
|
|
"loss": 0.9866,
|
|
"step": 2130
|
|
},
|
|
{
|
|
"epoch": 0.28721329118181205,
|
|
"grad_norm": 0.0821229785976475,
|
|
"learning_rate": 0.00026911192103689426,
|
|
"loss": 1.0447,
|
|
"step": 2135
|
|
},
|
|
{
|
|
"epoch": 0.28788592184031747,
|
|
"grad_norm": 0.0709363771493444,
|
|
"learning_rate": 0.00026889749086585934,
|
|
"loss": 1.0257,
|
|
"step": 2140
|
|
},
|
|
{
|
|
"epoch": 0.2885585524988229,
|
|
"grad_norm": 0.07815479270365608,
|
|
"learning_rate": 0.00026868240501797154,
|
|
"loss": 1.0101,
|
|
"step": 2145
|
|
},
|
|
{
|
|
"epoch": 0.2892311831573283,
|
|
"grad_norm": 0.06846619759519718,
|
|
"learning_rate": 0.00026846666467935184,
|
|
"loss": 0.9875,
|
|
"step": 2150
|
|
},
|
|
{
|
|
"epoch": 0.28990381381583374,
|
|
"grad_norm": 0.06393262884579075,
|
|
"learning_rate": 0.00026825027103973047,
|
|
"loss": 0.9773,
|
|
"step": 2155
|
|
},
|
|
{
|
|
"epoch": 0.2905764444743391,
|
|
"grad_norm": 0.06516709589071833,
|
|
"learning_rate": 0.00026803322529244056,
|
|
"loss": 1.0289,
|
|
"step": 2160
|
|
},
|
|
{
|
|
"epoch": 0.29124907513284454,
|
|
"grad_norm": 0.06385243590151152,
|
|
"learning_rate": 0.0002678155286344111,
|
|
"loss": 0.9987,
|
|
"step": 2165
|
|
},
|
|
{
|
|
"epoch": 0.29192170579134996,
|
|
"grad_norm": 0.06202180097824563,
|
|
"learning_rate": 0.00026759718226616094,
|
|
"loss": 1.0024,
|
|
"step": 2170
|
|
},
|
|
{
|
|
"epoch": 0.2925943364498554,
|
|
"grad_norm": 0.06175463083001653,
|
|
"learning_rate": 0.00026737818739179156,
|
|
"loss": 0.966,
|
|
"step": 2175
|
|
},
|
|
{
|
|
"epoch": 0.2932669671083608,
|
|
"grad_norm": 0.0657893166600458,
|
|
"learning_rate": 0.00026715854521898094,
|
|
"loss": 0.9561,
|
|
"step": 2180
|
|
},
|
|
{
|
|
"epoch": 0.29393959776686623,
|
|
"grad_norm": 0.06518043702884813,
|
|
"learning_rate": 0.0002669382569589765,
|
|
"loss": 1.0009,
|
|
"step": 2185
|
|
},
|
|
{
|
|
"epoch": 0.2946122284253716,
|
|
"grad_norm": 0.060097592389166545,
|
|
"learning_rate": 0.00026671732382658873,
|
|
"loss": 0.958,
|
|
"step": 2190
|
|
},
|
|
{
|
|
"epoch": 0.295284859083877,
|
|
"grad_norm": 0.06436827662279306,
|
|
"learning_rate": 0.0002664957470401842,
|
|
"loss": 1.0057,
|
|
"step": 2195
|
|
},
|
|
{
|
|
"epoch": 0.29595748974238245,
|
|
"grad_norm": 0.06429485213345078,
|
|
"learning_rate": 0.0002662735278216793,
|
|
"loss": 1.0076,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 0.2966301204008879,
|
|
"grad_norm": 0.06111361270816539,
|
|
"learning_rate": 0.0002660506673965329,
|
|
"loss": 1.0002,
|
|
"step": 2205
|
|
},
|
|
{
|
|
"epoch": 0.2973027510593933,
|
|
"grad_norm": 0.06077882997745099,
|
|
"learning_rate": 0.00026582716699373996,
|
|
"loss": 0.9766,
|
|
"step": 2210
|
|
},
|
|
{
|
|
"epoch": 0.2979753817178987,
|
|
"grad_norm": 0.06458898622394665,
|
|
"learning_rate": 0.0002656030278458248,
|
|
"loss": 0.9988,
|
|
"step": 2215
|
|
},
|
|
{
|
|
"epoch": 0.2986480123764041,
|
|
"grad_norm": 0.06430844883272462,
|
|
"learning_rate": 0.0002653782511888341,
|
|
"loss": 0.9967,
|
|
"step": 2220
|
|
},
|
|
{
|
|
"epoch": 0.2993206430349095,
|
|
"grad_norm": 0.06247648417092113,
|
|
"learning_rate": 0.0002651528382623302,
|
|
"loss": 0.9935,
|
|
"step": 2225
|
|
},
|
|
{
|
|
"epoch": 0.29999327369341494,
|
|
"grad_norm": 0.06232942636976054,
|
|
"learning_rate": 0.0002649267903093842,
|
|
"loss": 0.9849,
|
|
"step": 2230
|
|
},
|
|
{
|
|
"epoch": 0.30066590435192037,
|
|
"grad_norm": 0.0625250684904268,
|
|
"learning_rate": 0.0002647001085765692,
|
|
"loss": 0.9912,
|
|
"step": 2235
|
|
},
|
|
{
|
|
"epoch": 0.3013385350104258,
|
|
"grad_norm": 0.05955858192998459,
|
|
"learning_rate": 0.0002644727943139534,
|
|
"loss": 0.9351,
|
|
"step": 2240
|
|
},
|
|
{
|
|
"epoch": 0.3020111656689312,
|
|
"grad_norm": 0.06612187680155705,
|
|
"learning_rate": 0.000264244848775093,
|
|
"loss": 0.9876,
|
|
"step": 2245
|
|
},
|
|
{
|
|
"epoch": 0.3026837963274366,
|
|
"grad_norm": 0.06173310312205808,
|
|
"learning_rate": 0.00026401627321702556,
|
|
"loss": 0.9625,
|
|
"step": 2250
|
|
},
|
|
{
|
|
"epoch": 0.303356426985942,
|
|
"grad_norm": 0.06917659803618215,
|
|
"learning_rate": 0.00026378706890026307,
|
|
"loss": 1.0174,
|
|
"step": 2255
|
|
},
|
|
{
|
|
"epoch": 0.30402905764444743,
|
|
"grad_norm": 0.06556532003559523,
|
|
"learning_rate": 0.00026355723708878484,
|
|
"loss": 0.961,
|
|
"step": 2260
|
|
},
|
|
{
|
|
"epoch": 0.30470168830295286,
|
|
"grad_norm": 0.06287181767310183,
|
|
"learning_rate": 0.00026332677905003047,
|
|
"loss": 0.9994,
|
|
"step": 2265
|
|
},
|
|
{
|
|
"epoch": 0.3053743189614583,
|
|
"grad_norm": 0.06342147196329556,
|
|
"learning_rate": 0.00026309569605489306,
|
|
"loss": 0.9949,
|
|
"step": 2270
|
|
},
|
|
{
|
|
"epoch": 0.3060469496199637,
|
|
"grad_norm": 0.06730056184618043,
|
|
"learning_rate": 0.00026286398937771225,
|
|
"loss": 0.9572,
|
|
"step": 2275
|
|
},
|
|
{
|
|
"epoch": 0.3067195802784691,
|
|
"grad_norm": 0.0602458232782407,
|
|
"learning_rate": 0.00026263166029626676,
|
|
"loss": 1.0175,
|
|
"step": 2280
|
|
},
|
|
{
|
|
"epoch": 0.3073922109369745,
|
|
"grad_norm": 0.0578357423452784,
|
|
"learning_rate": 0.000262398710091768,
|
|
"loss": 0.9709,
|
|
"step": 2285
|
|
},
|
|
{
|
|
"epoch": 0.3080648415954799,
|
|
"grad_norm": 0.060253136424880616,
|
|
"learning_rate": 0.00026216514004885237,
|
|
"loss": 0.981,
|
|
"step": 2290
|
|
},
|
|
{
|
|
"epoch": 0.30873747225398535,
|
|
"grad_norm": 0.06306283740453837,
|
|
"learning_rate": 0.00026193095145557455,
|
|
"loss": 0.9884,
|
|
"step": 2295
|
|
},
|
|
{
|
|
"epoch": 0.30941010291249077,
|
|
"grad_norm": 0.06721710931703574,
|
|
"learning_rate": 0.0002616961456034004,
|
|
"loss": 0.9555,
|
|
"step": 2300
|
|
},
|
|
{
|
|
"epoch": 0.31008273357099614,
|
|
"grad_norm": 0.06139546272042707,
|
|
"learning_rate": 0.0002614607237871996,
|
|
"loss": 0.9812,
|
|
"step": 2305
|
|
},
|
|
{
|
|
"epoch": 0.31075536422950156,
|
|
"grad_norm": 0.06423824892716638,
|
|
"learning_rate": 0.00026122468730523866,
|
|
"loss": 0.9589,
|
|
"step": 2310
|
|
},
|
|
{
|
|
"epoch": 0.311427994888007,
|
|
"grad_norm": 0.07227792403797496,
|
|
"learning_rate": 0.0002609880374591738,
|
|
"loss": 0.9587,
|
|
"step": 2315
|
|
},
|
|
{
|
|
"epoch": 0.3121006255465124,
|
|
"grad_norm": 0.06874247357671834,
|
|
"learning_rate": 0.0002607507755540438,
|
|
"loss": 0.9866,
|
|
"step": 2320
|
|
},
|
|
{
|
|
"epoch": 0.31277325620501784,
|
|
"grad_norm": 0.06658784787531048,
|
|
"learning_rate": 0.0002605129028982626,
|
|
"loss": 0.9238,
|
|
"step": 2325
|
|
},
|
|
{
|
|
"epoch": 0.31344588686352326,
|
|
"grad_norm": 0.06167580860405572,
|
|
"learning_rate": 0.0002602744208036122,
|
|
"loss": 1.0139,
|
|
"step": 2330
|
|
},
|
|
{
|
|
"epoch": 0.31411851752202863,
|
|
"grad_norm": 0.059141851611564725,
|
|
"learning_rate": 0.00026003533058523555,
|
|
"loss": 1.0216,
|
|
"step": 2335
|
|
},
|
|
{
|
|
"epoch": 0.31479114818053405,
|
|
"grad_norm": 0.059896676536393356,
|
|
"learning_rate": 0.00025979563356162905,
|
|
"loss": 0.9906,
|
|
"step": 2340
|
|
},
|
|
{
|
|
"epoch": 0.3154637788390395,
|
|
"grad_norm": 0.06304581093570669,
|
|
"learning_rate": 0.0002595553310546356,
|
|
"loss": 1.0064,
|
|
"step": 2345
|
|
},
|
|
{
|
|
"epoch": 0.3161364094975449,
|
|
"grad_norm": 0.0654071698153169,
|
|
"learning_rate": 0.00025931442438943686,
|
|
"loss": 1.0005,
|
|
"step": 2350
|
|
},
|
|
{
|
|
"epoch": 0.3168090401560503,
|
|
"grad_norm": 0.0850720234118959,
|
|
"learning_rate": 0.00025907291489454646,
|
|
"loss": 0.9737,
|
|
"step": 2355
|
|
},
|
|
{
|
|
"epoch": 0.31748167081455575,
|
|
"grad_norm": 0.05842749891613876,
|
|
"learning_rate": 0.0002588308039018023,
|
|
"loss": 1.0032,
|
|
"step": 2360
|
|
},
|
|
{
|
|
"epoch": 0.3181543014730611,
|
|
"grad_norm": 0.06233792615496135,
|
|
"learning_rate": 0.00025858809274635923,
|
|
"loss": 0.9542,
|
|
"step": 2365
|
|
},
|
|
{
|
|
"epoch": 0.31882693213156654,
|
|
"grad_norm": 0.05905164630122034,
|
|
"learning_rate": 0.000258344782766682,
|
|
"loss": 0.9624,
|
|
"step": 2370
|
|
},
|
|
{
|
|
"epoch": 0.31949956279007197,
|
|
"grad_norm": 0.0700392215816944,
|
|
"learning_rate": 0.0002581008753045375,
|
|
"loss": 0.9903,
|
|
"step": 2375
|
|
},
|
|
{
|
|
"epoch": 0.3201721934485774,
|
|
"grad_norm": 0.06257351219582429,
|
|
"learning_rate": 0.00025785637170498753,
|
|
"loss": 0.9664,
|
|
"step": 2380
|
|
},
|
|
{
|
|
"epoch": 0.3208448241070828,
|
|
"grad_norm": 0.06323178744828126,
|
|
"learning_rate": 0.0002576112733163815,
|
|
"loss": 0.9698,
|
|
"step": 2385
|
|
},
|
|
{
|
|
"epoch": 0.32151745476558824,
|
|
"grad_norm": 0.06424742383987511,
|
|
"learning_rate": 0.00025736558149034867,
|
|
"loss": 0.9562,
|
|
"step": 2390
|
|
},
|
|
{
|
|
"epoch": 0.3221900854240936,
|
|
"grad_norm": 0.06672423896058365,
|
|
"learning_rate": 0.00025711929758179107,
|
|
"loss": 1.0332,
|
|
"step": 2395
|
|
},
|
|
{
|
|
"epoch": 0.32286271608259903,
|
|
"grad_norm": 0.0670913173713535,
|
|
"learning_rate": 0.00025687242294887574,
|
|
"loss": 0.9857,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.32353534674110446,
|
|
"grad_norm": 0.06892018721458289,
|
|
"learning_rate": 0.0002566249589530274,
|
|
"loss": 0.9551,
|
|
"step": 2405
|
|
},
|
|
{
|
|
"epoch": 0.3242079773996099,
|
|
"grad_norm": 0.06009175251349125,
|
|
"learning_rate": 0.00025637690695892094,
|
|
"loss": 1.0093,
|
|
"step": 2410
|
|
},
|
|
{
|
|
"epoch": 0.3248806080581153,
|
|
"grad_norm": 0.08176920992681376,
|
|
"learning_rate": 0.0002561282683344737,
|
|
"loss": 1.0043,
|
|
"step": 2415
|
|
},
|
|
{
|
|
"epoch": 0.32555323871662073,
|
|
"grad_norm": 0.07145448290324558,
|
|
"learning_rate": 0.00025587904445083823,
|
|
"loss": 1.0117,
|
|
"step": 2420
|
|
},
|
|
{
|
|
"epoch": 0.3262258693751261,
|
|
"grad_norm": 0.07543609879058265,
|
|
"learning_rate": 0.00025562923668239455,
|
|
"loss": 1.0457,
|
|
"step": 2425
|
|
},
|
|
{
|
|
"epoch": 0.3268985000336315,
|
|
"grad_norm": 0.06829416483656771,
|
|
"learning_rate": 0.0002553788464067425,
|
|
"loss": 0.9526,
|
|
"step": 2430
|
|
},
|
|
{
|
|
"epoch": 0.32757113069213695,
|
|
"grad_norm": 0.06395861434359994,
|
|
"learning_rate": 0.00025512787500469426,
|
|
"loss": 0.9813,
|
|
"step": 2435
|
|
},
|
|
{
|
|
"epoch": 0.32824376135064237,
|
|
"grad_norm": 0.0611865196811849,
|
|
"learning_rate": 0.00025487632386026686,
|
|
"loss": 0.974,
|
|
"step": 2440
|
|
},
|
|
{
|
|
"epoch": 0.3289163920091478,
|
|
"grad_norm": 0.06491018496652956,
|
|
"learning_rate": 0.0002546241943606742,
|
|
"loss": 0.9992,
|
|
"step": 2445
|
|
},
|
|
{
|
|
"epoch": 0.32958902266765316,
|
|
"grad_norm": 0.06292898906856961,
|
|
"learning_rate": 0.00025437148789631984,
|
|
"loss": 1.0456,
|
|
"step": 2450
|
|
},
|
|
{
|
|
"epoch": 0.3302616533261586,
|
|
"grad_norm": 0.06167564316715784,
|
|
"learning_rate": 0.0002541182058607887,
|
|
"loss": 0.9629,
|
|
"step": 2455
|
|
},
|
|
{
|
|
"epoch": 0.330934283984664,
|
|
"grad_norm": 0.06391041486032593,
|
|
"learning_rate": 0.00025386434965084015,
|
|
"loss": 1.0035,
|
|
"step": 2460
|
|
},
|
|
{
|
|
"epoch": 0.33160691464316944,
|
|
"grad_norm": 0.06787356716894612,
|
|
"learning_rate": 0.00025360992066639985,
|
|
"loss": 1.0041,
|
|
"step": 2465
|
|
},
|
|
{
|
|
"epoch": 0.33227954530167486,
|
|
"grad_norm": 0.06086575812752024,
|
|
"learning_rate": 0.0002533549203105519,
|
|
"loss": 0.9999,
|
|
"step": 2470
|
|
},
|
|
{
|
|
"epoch": 0.3329521759601803,
|
|
"grad_norm": 0.06057902002599989,
|
|
"learning_rate": 0.00025309934998953156,
|
|
"loss": 0.9367,
|
|
"step": 2475
|
|
},
|
|
{
|
|
"epoch": 0.33362480661868565,
|
|
"grad_norm": 0.0593042754107753,
|
|
"learning_rate": 0.0002528432111127171,
|
|
"loss": 0.9398,
|
|
"step": 2480
|
|
},
|
|
{
|
|
"epoch": 0.3342974372771911,
|
|
"grad_norm": 0.059538222557524834,
|
|
"learning_rate": 0.0002525865050926222,
|
|
"loss": 1.0005,
|
|
"step": 2485
|
|
},
|
|
{
|
|
"epoch": 0.3349700679356965,
|
|
"grad_norm": 0.06011503757302019,
|
|
"learning_rate": 0.00025232923334488804,
|
|
"loss": 1.0205,
|
|
"step": 2490
|
|
},
|
|
{
|
|
"epoch": 0.3356426985942019,
|
|
"grad_norm": 0.06122430156326717,
|
|
"learning_rate": 0.0002520713972882758,
|
|
"loss": 1.0145,
|
|
"step": 2495
|
|
},
|
|
{
|
|
"epoch": 0.33631532925270735,
|
|
"grad_norm": 0.06805159146871266,
|
|
"learning_rate": 0.00025181299834465854,
|
|
"loss": 1.001,
|
|
"step": 2500
|
|
},
|
|
{
|
|
"epoch": 0.3369879599112128,
|
|
"grad_norm": 0.057493415342664264,
|
|
"learning_rate": 0.00025155403793901323,
|
|
"loss": 0.9765,
|
|
"step": 2505
|
|
},
|
|
{
|
|
"epoch": 0.33766059056971814,
|
|
"grad_norm": 0.06235990972901437,
|
|
"learning_rate": 0.0002512945174994134,
|
|
"loss": 0.9828,
|
|
"step": 2510
|
|
},
|
|
{
|
|
"epoch": 0.33833322122822357,
|
|
"grad_norm": 0.06022070223153401,
|
|
"learning_rate": 0.00025103443845702077,
|
|
"loss": 1.0284,
|
|
"step": 2515
|
|
},
|
|
{
|
|
"epoch": 0.339005851886729,
|
|
"grad_norm": 0.06135291469414431,
|
|
"learning_rate": 0.0002507738022460776,
|
|
"loss": 0.9375,
|
|
"step": 2520
|
|
},
|
|
{
|
|
"epoch": 0.3396784825452344,
|
|
"grad_norm": 0.0643989781631382,
|
|
"learning_rate": 0.0002505126103038989,
|
|
"loss": 0.9855,
|
|
"step": 2525
|
|
},
|
|
{
|
|
"epoch": 0.34035111320373984,
|
|
"grad_norm": 0.06378266187767027,
|
|
"learning_rate": 0.000250250864070864,
|
|
"loss": 0.9545,
|
|
"step": 2530
|
|
},
|
|
{
|
|
"epoch": 0.34102374386224527,
|
|
"grad_norm": 0.0673159597514603,
|
|
"learning_rate": 0.0002499885649904092,
|
|
"loss": 1.0326,
|
|
"step": 2535
|
|
},
|
|
{
|
|
"epoch": 0.34169637452075063,
|
|
"grad_norm": 0.06067930335234511,
|
|
"learning_rate": 0.0002497257145090195,
|
|
"loss": 0.9806,
|
|
"step": 2540
|
|
},
|
|
{
|
|
"epoch": 0.34236900517925606,
|
|
"grad_norm": 0.059146219957598896,
|
|
"learning_rate": 0.0002494623140762207,
|
|
"loss": 0.9508,
|
|
"step": 2545
|
|
},
|
|
{
|
|
"epoch": 0.3430416358377615,
|
|
"grad_norm": 0.06451800724463314,
|
|
"learning_rate": 0.00024919836514457127,
|
|
"loss": 0.9896,
|
|
"step": 2550
|
|
},
|
|
{
|
|
"epoch": 0.3437142664962669,
|
|
"grad_norm": 0.06081402928938052,
|
|
"learning_rate": 0.0002489338691696546,
|
|
"loss": 0.9383,
|
|
"step": 2555
|
|
},
|
|
{
|
|
"epoch": 0.34438689715477233,
|
|
"grad_norm": 0.06009484154209872,
|
|
"learning_rate": 0.00024866882761007076,
|
|
"loss": 0.9952,
|
|
"step": 2560
|
|
},
|
|
{
|
|
"epoch": 0.34505952781327776,
|
|
"grad_norm": 0.05973548410026698,
|
|
"learning_rate": 0.00024840324192742846,
|
|
"loss": 0.9796,
|
|
"step": 2565
|
|
},
|
|
{
|
|
"epoch": 0.3457321584717831,
|
|
"grad_norm": 0.06553152032293165,
|
|
"learning_rate": 0.00024813711358633717,
|
|
"loss": 0.9923,
|
|
"step": 2570
|
|
},
|
|
{
|
|
"epoch": 0.34640478913028855,
|
|
"grad_norm": 0.06471026587362512,
|
|
"learning_rate": 0.00024787044405439885,
|
|
"loss": 1.0067,
|
|
"step": 2575
|
|
},
|
|
{
|
|
"epoch": 0.347077419788794,
|
|
"grad_norm": 0.05757825937888166,
|
|
"learning_rate": 0.0002476032348021999,
|
|
"loss": 0.9595,
|
|
"step": 2580
|
|
},
|
|
{
|
|
"epoch": 0.3477500504472994,
|
|
"grad_norm": 0.05837414362073145,
|
|
"learning_rate": 0.0002473354873033033,
|
|
"loss": 0.998,
|
|
"step": 2585
|
|
},
|
|
{
|
|
"epoch": 0.3484226811058048,
|
|
"grad_norm": 0.061813392737187556,
|
|
"learning_rate": 0.00024706720303423993,
|
|
"loss": 0.967,
|
|
"step": 2590
|
|
},
|
|
{
|
|
"epoch": 0.3490953117643102,
|
|
"grad_norm": 0.0634130054981169,
|
|
"learning_rate": 0.000246798383474501,
|
|
"loss": 0.9489,
|
|
"step": 2595
|
|
},
|
|
{
|
|
"epoch": 0.3497679424228156,
|
|
"grad_norm": 0.06617696567513282,
|
|
"learning_rate": 0.0002465290301065296,
|
|
"loss": 0.9629,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 0.35044057308132104,
|
|
"grad_norm": 0.060723179362491476,
|
|
"learning_rate": 0.00024625914441571265,
|
|
"loss": 0.9565,
|
|
"step": 2605
|
|
},
|
|
{
|
|
"epoch": 0.35111320373982646,
|
|
"grad_norm": 0.0612745323214066,
|
|
"learning_rate": 0.0002459887278903724,
|
|
"loss": 0.9811,
|
|
"step": 2610
|
|
},
|
|
{
|
|
"epoch": 0.3517858343983319,
|
|
"grad_norm": 0.0655966405158917,
|
|
"learning_rate": 0.00024571778202175877,
|
|
"loss": 0.9754,
|
|
"step": 2615
|
|
},
|
|
{
|
|
"epoch": 0.3524584650568373,
|
|
"grad_norm": 0.06727151630166943,
|
|
"learning_rate": 0.0002454463083040405,
|
|
"loss": 0.9293,
|
|
"step": 2620
|
|
},
|
|
{
|
|
"epoch": 0.3531310957153427,
|
|
"grad_norm": 0.06178442765486125,
|
|
"learning_rate": 0.00024517430823429764,
|
|
"loss": 1.0013,
|
|
"step": 2625
|
|
},
|
|
{
|
|
"epoch": 0.3538037263738481,
|
|
"grad_norm": 0.06683142790810723,
|
|
"learning_rate": 0.00024490178331251246,
|
|
"loss": 0.9749,
|
|
"step": 2630
|
|
},
|
|
{
|
|
"epoch": 0.35447635703235353,
|
|
"grad_norm": 0.06054586886009189,
|
|
"learning_rate": 0.0002446287350415618,
|
|
"loss": 0.9478,
|
|
"step": 2635
|
|
},
|
|
{
|
|
"epoch": 0.35514898769085895,
|
|
"grad_norm": 0.05950647200965788,
|
|
"learning_rate": 0.0002443551649272086,
|
|
"loss": 0.9934,
|
|
"step": 2640
|
|
},
|
|
{
|
|
"epoch": 0.3558216183493644,
|
|
"grad_norm": 0.0625836813067742,
|
|
"learning_rate": 0.00024408107447809353,
|
|
"loss": 0.9213,
|
|
"step": 2645
|
|
},
|
|
{
|
|
"epoch": 0.3564942490078698,
|
|
"grad_norm": 0.05991503892499088,
|
|
"learning_rate": 0.00024380646520572675,
|
|
"loss": 0.9831,
|
|
"step": 2650
|
|
},
|
|
{
|
|
"epoch": 0.35716687966637517,
|
|
"grad_norm": 0.05370777920413785,
|
|
"learning_rate": 0.0002435313386244795,
|
|
"loss": 0.9479,
|
|
"step": 2655
|
|
},
|
|
{
|
|
"epoch": 0.3578395103248806,
|
|
"grad_norm": 0.06270981502346962,
|
|
"learning_rate": 0.00024325569625157587,
|
|
"loss": 0.9965,
|
|
"step": 2660
|
|
},
|
|
{
|
|
"epoch": 0.358512140983386,
|
|
"grad_norm": 0.0613724918822416,
|
|
"learning_rate": 0.00024297953960708416,
|
|
"loss": 0.9595,
|
|
"step": 2665
|
|
},
|
|
{
|
|
"epoch": 0.35918477164189144,
|
|
"grad_norm": 0.06038112429006151,
|
|
"learning_rate": 0.00024270287021390898,
|
|
"loss": 0.9411,
|
|
"step": 2670
|
|
},
|
|
{
|
|
"epoch": 0.35985740230039687,
|
|
"grad_norm": 0.06111430494806048,
|
|
"learning_rate": 0.00024242568959778236,
|
|
"loss": 0.9649,
|
|
"step": 2675
|
|
},
|
|
{
|
|
"epoch": 0.3605300329589023,
|
|
"grad_norm": 0.05995428682403505,
|
|
"learning_rate": 0.0002421479992872556,
|
|
"loss": 1.0257,
|
|
"step": 2680
|
|
},
|
|
{
|
|
"epoch": 0.36120266361740766,
|
|
"grad_norm": 0.06396379465235519,
|
|
"learning_rate": 0.0002418698008136908,
|
|
"loss": 0.9543,
|
|
"step": 2685
|
|
},
|
|
{
|
|
"epoch": 0.3618752942759131,
|
|
"grad_norm": 0.06323410022200342,
|
|
"learning_rate": 0.00024159109571125236,
|
|
"loss": 0.9651,
|
|
"step": 2690
|
|
},
|
|
{
|
|
"epoch": 0.3625479249344185,
|
|
"grad_norm": 0.06167970738868046,
|
|
"learning_rate": 0.00024131188551689852,
|
|
"loss": 0.979,
|
|
"step": 2695
|
|
},
|
|
{
|
|
"epoch": 0.36322055559292393,
|
|
"grad_norm": 0.06687815516918413,
|
|
"learning_rate": 0.0002410321717703731,
|
|
"loss": 0.9907,
|
|
"step": 2700
|
|
},
|
|
{
|
|
"epoch": 0.36389318625142936,
|
|
"grad_norm": 0.05564819242650189,
|
|
"learning_rate": 0.00024075195601419659,
|
|
"loss": 0.9618,
|
|
"step": 2705
|
|
},
|
|
{
|
|
"epoch": 0.3645658169099348,
|
|
"grad_norm": 0.05748296768430323,
|
|
"learning_rate": 0.00024047123979365804,
|
|
"loss": 0.9354,
|
|
"step": 2710
|
|
},
|
|
{
|
|
"epoch": 0.36523844756844015,
|
|
"grad_norm": 0.0631764311365665,
|
|
"learning_rate": 0.0002401900246568063,
|
|
"loss": 0.9555,
|
|
"step": 2715
|
|
},
|
|
{
|
|
"epoch": 0.3659110782269456,
|
|
"grad_norm": 0.05880985537442802,
|
|
"learning_rate": 0.0002399083121544416,
|
|
"loss": 0.9891,
|
|
"step": 2720
|
|
},
|
|
{
|
|
"epoch": 0.366583708885451,
|
|
"grad_norm": 0.061391215929407515,
|
|
"learning_rate": 0.00023962610384010706,
|
|
"loss": 0.9924,
|
|
"step": 2725
|
|
},
|
|
{
|
|
"epoch": 0.3672563395439564,
|
|
"grad_norm": 0.06316691200369454,
|
|
"learning_rate": 0.0002393434012700798,
|
|
"loss": 0.9731,
|
|
"step": 2730
|
|
},
|
|
{
|
|
"epoch": 0.36792897020246185,
|
|
"grad_norm": 0.06400332296272057,
|
|
"learning_rate": 0.00023906020600336273,
|
|
"loss": 0.9717,
|
|
"step": 2735
|
|
},
|
|
{
|
|
"epoch": 0.3686016008609672,
|
|
"grad_norm": 0.06081035933701875,
|
|
"learning_rate": 0.0002387765196016758,
|
|
"loss": 0.9974,
|
|
"step": 2740
|
|
},
|
|
{
|
|
"epoch": 0.36927423151947264,
|
|
"grad_norm": 0.05692689703123599,
|
|
"learning_rate": 0.0002384923436294474,
|
|
"loss": 0.9584,
|
|
"step": 2745
|
|
},
|
|
{
|
|
"epoch": 0.36994686217797806,
|
|
"grad_norm": 0.05920016044645724,
|
|
"learning_rate": 0.00023820767965380567,
|
|
"loss": 0.9977,
|
|
"step": 2750
|
|
},
|
|
{
|
|
"epoch": 0.3706194928364835,
|
|
"grad_norm": 0.06108926211201016,
|
|
"learning_rate": 0.0002379225292445699,
|
|
"loss": 0.955,
|
|
"step": 2755
|
|
},
|
|
{
|
|
"epoch": 0.3712921234949889,
|
|
"grad_norm": 0.060480343091332565,
|
|
"learning_rate": 0.00023763689397424202,
|
|
"loss": 0.9811,
|
|
"step": 2760
|
|
},
|
|
{
|
|
"epoch": 0.37196475415349434,
|
|
"grad_norm": 0.06059292270073528,
|
|
"learning_rate": 0.00023735077541799766,
|
|
"loss": 0.9839,
|
|
"step": 2765
|
|
},
|
|
{
|
|
"epoch": 0.3726373848119997,
|
|
"grad_norm": 0.0609211390213739,
|
|
"learning_rate": 0.00023706417515367763,
|
|
"loss": 1.0154,
|
|
"step": 2770
|
|
},
|
|
{
|
|
"epoch": 0.37331001547050513,
|
|
"grad_norm": 0.058067385580532836,
|
|
"learning_rate": 0.00023677709476177915,
|
|
"loss": 0.9757,
|
|
"step": 2775
|
|
},
|
|
{
|
|
"epoch": 0.37398264612901055,
|
|
"grad_norm": 0.07045648483439977,
|
|
"learning_rate": 0.00023648953582544732,
|
|
"loss": 1.0288,
|
|
"step": 2780
|
|
},
|
|
{
|
|
"epoch": 0.374655276787516,
|
|
"grad_norm": 0.06288843858803263,
|
|
"learning_rate": 0.00023620149993046612,
|
|
"loss": 0.99,
|
|
"step": 2785
|
|
},
|
|
{
|
|
"epoch": 0.3753279074460214,
|
|
"grad_norm": 0.058383628447802906,
|
|
"learning_rate": 0.00023591298866524973,
|
|
"loss": 0.9501,
|
|
"step": 2790
|
|
},
|
|
{
|
|
"epoch": 0.3760005381045268,
|
|
"grad_norm": 0.06067878041524621,
|
|
"learning_rate": 0.00023562400362083394,
|
|
"loss": 1.0125,
|
|
"step": 2795
|
|
},
|
|
{
|
|
"epoch": 0.3766731687630322,
|
|
"grad_norm": 0.05983019573461252,
|
|
"learning_rate": 0.00023533454639086722,
|
|
"loss": 0.9508,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 0.3773457994215376,
|
|
"grad_norm": 0.06531517886917629,
|
|
"learning_rate": 0.00023504461857160202,
|
|
"loss": 1.0286,
|
|
"step": 2805
|
|
},
|
|
{
|
|
"epoch": 0.37801843008004304,
|
|
"grad_norm": 0.05526918386931232,
|
|
"learning_rate": 0.0002347542217618858,
|
|
"loss": 0.9792,
|
|
"step": 2810
|
|
},
|
|
{
|
|
"epoch": 0.37869106073854847,
|
|
"grad_norm": 0.06536819488016454,
|
|
"learning_rate": 0.00023446335756315237,
|
|
"loss": 0.9562,
|
|
"step": 2815
|
|
},
|
|
{
|
|
"epoch": 0.3793636913970539,
|
|
"grad_norm": 0.05798352559617903,
|
|
"learning_rate": 0.0002341720275794132,
|
|
"loss": 0.9227,
|
|
"step": 2820
|
|
},
|
|
{
|
|
"epoch": 0.3800363220555593,
|
|
"grad_norm": 0.065861968577201,
|
|
"learning_rate": 0.00023388023341724815,
|
|
"loss": 0.9576,
|
|
"step": 2825
|
|
},
|
|
{
|
|
"epoch": 0.3807089527140647,
|
|
"grad_norm": 0.07654494776838282,
|
|
"learning_rate": 0.00023358797668579704,
|
|
"loss": 0.9635,
|
|
"step": 2830
|
|
},
|
|
{
|
|
"epoch": 0.3813815833725701,
|
|
"grad_norm": 0.05681789783018427,
|
|
"learning_rate": 0.00023329525899675043,
|
|
"loss": 0.9992,
|
|
"step": 2835
|
|
},
|
|
{
|
|
"epoch": 0.38205421403107553,
|
|
"grad_norm": 0.061087863865119885,
|
|
"learning_rate": 0.00023300208196434105,
|
|
"loss": 0.9922,
|
|
"step": 2840
|
|
},
|
|
{
|
|
"epoch": 0.38272684468958096,
|
|
"grad_norm": 0.06490394468959859,
|
|
"learning_rate": 0.00023270844720533468,
|
|
"loss": 0.9434,
|
|
"step": 2845
|
|
},
|
|
{
|
|
"epoch": 0.3833994753480864,
|
|
"grad_norm": 0.06590103407762701,
|
|
"learning_rate": 0.0002324143563390212,
|
|
"loss": 0.9662,
|
|
"step": 2850
|
|
},
|
|
{
|
|
"epoch": 0.3840721060065918,
|
|
"grad_norm": 0.061039736014859715,
|
|
"learning_rate": 0.00023211981098720592,
|
|
"loss": 0.9156,
|
|
"step": 2855
|
|
},
|
|
{
|
|
"epoch": 0.3847447366650972,
|
|
"grad_norm": 0.06153018865866922,
|
|
"learning_rate": 0.00023182481277420048,
|
|
"loss": 1.0125,
|
|
"step": 2860
|
|
},
|
|
{
|
|
"epoch": 0.3854173673236026,
|
|
"grad_norm": 0.05678690696784472,
|
|
"learning_rate": 0.00023152936332681363,
|
|
"loss": 0.9962,
|
|
"step": 2865
|
|
},
|
|
{
|
|
"epoch": 0.386089997982108,
|
|
"grad_norm": 0.05884783737931574,
|
|
"learning_rate": 0.0002312334642743428,
|
|
"loss": 0.9485,
|
|
"step": 2870
|
|
},
|
|
{
|
|
"epoch": 0.38676262864061345,
|
|
"grad_norm": 0.06427259534288689,
|
|
"learning_rate": 0.00023093711724856477,
|
|
"loss": 0.9418,
|
|
"step": 2875
|
|
},
|
|
{
|
|
"epoch": 0.3874352592991189,
|
|
"grad_norm": 0.06429957247409737,
|
|
"learning_rate": 0.0002306403238837266,
|
|
"loss": 0.9483,
|
|
"step": 2880
|
|
},
|
|
{
|
|
"epoch": 0.38810788995762424,
|
|
"grad_norm": 0.062021118991297704,
|
|
"learning_rate": 0.00023034308581653686,
|
|
"loss": 0.9411,
|
|
"step": 2885
|
|
},
|
|
{
|
|
"epoch": 0.38878052061612967,
|
|
"grad_norm": 0.06410320324942427,
|
|
"learning_rate": 0.0002300454046861565,
|
|
"loss": 0.9566,
|
|
"step": 2890
|
|
},
|
|
{
|
|
"epoch": 0.3894531512746351,
|
|
"grad_norm": 0.06153174109726368,
|
|
"learning_rate": 0.00022974728213418977,
|
|
"loss": 0.9353,
|
|
"step": 2895
|
|
},
|
|
{
|
|
"epoch": 0.3901257819331405,
|
|
"grad_norm": 0.06383339692853464,
|
|
"learning_rate": 0.00022944871980467514,
|
|
"loss": 0.9259,
|
|
"step": 2900
|
|
},
|
|
{
|
|
"epoch": 0.39079841259164594,
|
|
"grad_norm": 0.06314785752506272,
|
|
"learning_rate": 0.0002291497193440764,
|
|
"loss": 0.9627,
|
|
"step": 2905
|
|
},
|
|
{
|
|
"epoch": 0.39147104325015136,
|
|
"grad_norm": 0.060212140876222744,
|
|
"learning_rate": 0.00022885028240127351,
|
|
"loss": 1.0133,
|
|
"step": 2910
|
|
},
|
|
{
|
|
"epoch": 0.39214367390865673,
|
|
"grad_norm": 0.06021180941945565,
|
|
"learning_rate": 0.0002285504106275533,
|
|
"loss": 0.9555,
|
|
"step": 2915
|
|
},
|
|
{
|
|
"epoch": 0.39281630456716216,
|
|
"grad_norm": 0.05937678999742469,
|
|
"learning_rate": 0.00022825010567660065,
|
|
"loss": 0.9888,
|
|
"step": 2920
|
|
},
|
|
{
|
|
"epoch": 0.3934889352256676,
|
|
"grad_norm": 0.06756328132509674,
|
|
"learning_rate": 0.00022794936920448927,
|
|
"loss": 0.9709,
|
|
"step": 2925
|
|
},
|
|
{
|
|
"epoch": 0.394161565884173,
|
|
"grad_norm": 0.05770914377377636,
|
|
"learning_rate": 0.0002276482028696725,
|
|
"loss": 0.9196,
|
|
"step": 2930
|
|
},
|
|
{
|
|
"epoch": 0.39483419654267843,
|
|
"grad_norm": 0.06259513664165894,
|
|
"learning_rate": 0.00022734660833297426,
|
|
"loss": 0.9551,
|
|
"step": 2935
|
|
},
|
|
{
|
|
"epoch": 0.39550682720118385,
|
|
"grad_norm": 0.06495754463275584,
|
|
"learning_rate": 0.00022704458725757975,
|
|
"loss": 0.9212,
|
|
"step": 2940
|
|
},
|
|
{
|
|
"epoch": 0.3961794578596892,
|
|
"grad_norm": 0.057088746499664306,
|
|
"learning_rate": 0.0002267421413090266,
|
|
"loss": 0.9333,
|
|
"step": 2945
|
|
},
|
|
{
|
|
"epoch": 0.39685208851819465,
|
|
"grad_norm": 0.06493801528794958,
|
|
"learning_rate": 0.0002264392721551952,
|
|
"loss": 0.9824,
|
|
"step": 2950
|
|
},
|
|
{
|
|
"epoch": 0.39752471917670007,
|
|
"grad_norm": 0.06280692484375891,
|
|
"learning_rate": 0.00022613598146629992,
|
|
"loss": 0.96,
|
|
"step": 2955
|
|
},
|
|
{
|
|
"epoch": 0.3981973498352055,
|
|
"grad_norm": 0.0686225385390284,
|
|
"learning_rate": 0.00022583227091487975,
|
|
"loss": 0.9956,
|
|
"step": 2960
|
|
},
|
|
{
|
|
"epoch": 0.3988699804937109,
|
|
"grad_norm": 0.06267398055791588,
|
|
"learning_rate": 0.00022552814217578898,
|
|
"loss": 0.9574,
|
|
"step": 2965
|
|
},
|
|
{
|
|
"epoch": 0.39954261115221634,
|
|
"grad_norm": 0.05951574707871612,
|
|
"learning_rate": 0.00022522359692618815,
|
|
"loss": 0.9197,
|
|
"step": 2970
|
|
},
|
|
{
|
|
"epoch": 0.4002152418107217,
|
|
"grad_norm": 0.06398428054392459,
|
|
"learning_rate": 0.00022491863684553462,
|
|
"loss": 1.0082,
|
|
"step": 2975
|
|
},
|
|
{
|
|
"epoch": 0.40088787246922714,
|
|
"grad_norm": 0.0700256713940724,
|
|
"learning_rate": 0.0002246132636155734,
|
|
"loss": 0.9425,
|
|
"step": 2980
|
|
},
|
|
{
|
|
"epoch": 0.40156050312773256,
|
|
"grad_norm": 0.06169910527354521,
|
|
"learning_rate": 0.000224307478920328,
|
|
"loss": 0.9251,
|
|
"step": 2985
|
|
},
|
|
{
|
|
"epoch": 0.402233133786238,
|
|
"grad_norm": 0.05819021747648658,
|
|
"learning_rate": 0.00022400128444609085,
|
|
"loss": 1.0028,
|
|
"step": 2990
|
|
},
|
|
{
|
|
"epoch": 0.4029057644447434,
|
|
"grad_norm": 0.060335882535438916,
|
|
"learning_rate": 0.00022369468188141424,
|
|
"loss": 0.955,
|
|
"step": 2995
|
|
},
|
|
{
|
|
"epoch": 0.40357839510324883,
|
|
"grad_norm": 0.061545586889936825,
|
|
"learning_rate": 0.00022338767291710091,
|
|
"loss": 0.9493,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.4042510257617542,
|
|
"grad_norm": 0.05466365110860337,
|
|
"learning_rate": 0.0002230802592461948,
|
|
"loss": 0.9248,
|
|
"step": 3005
|
|
},
|
|
{
|
|
"epoch": 0.4049236564202596,
|
|
"grad_norm": 0.057841302052886304,
|
|
"learning_rate": 0.00022277244256397157,
|
|
"loss": 0.9261,
|
|
"step": 3010
|
|
},
|
|
{
|
|
"epoch": 0.40559628707876505,
|
|
"grad_norm": 0.06424399123898437,
|
|
"learning_rate": 0.00022246422456792948,
|
|
"loss": 0.9429,
|
|
"step": 3015
|
|
},
|
|
{
|
|
"epoch": 0.4062689177372705,
|
|
"grad_norm": 0.0552749410063343,
|
|
"learning_rate": 0.00022215560695777967,
|
|
"loss": 0.9515,
|
|
"step": 3020
|
|
},
|
|
{
|
|
"epoch": 0.4069415483957759,
|
|
"grad_norm": 0.06265094401317216,
|
|
"learning_rate": 0.00022184659143543724,
|
|
"loss": 0.9341,
|
|
"step": 3025
|
|
},
|
|
{
|
|
"epoch": 0.40761417905428127,
|
|
"grad_norm": 0.05546283825158401,
|
|
"learning_rate": 0.00022153717970501148,
|
|
"loss": 1.016,
|
|
"step": 3030
|
|
},
|
|
{
|
|
"epoch": 0.4082868097127867,
|
|
"grad_norm": 0.06269519935456656,
|
|
"learning_rate": 0.00022122737347279677,
|
|
"loss": 0.9844,
|
|
"step": 3035
|
|
},
|
|
{
|
|
"epoch": 0.4089594403712921,
|
|
"grad_norm": 0.06296574489581559,
|
|
"learning_rate": 0.00022091717444726281,
|
|
"loss": 0.9714,
|
|
"step": 3040
|
|
},
|
|
{
|
|
"epoch": 0.40963207102979754,
|
|
"grad_norm": 0.09543369741779884,
|
|
"learning_rate": 0.0002206065843390456,
|
|
"loss": 0.9487,
|
|
"step": 3045
|
|
},
|
|
{
|
|
"epoch": 0.41030470168830296,
|
|
"grad_norm": 0.0636053950815713,
|
|
"learning_rate": 0.0002202956048609378,
|
|
"loss": 0.9656,
|
|
"step": 3050
|
|
},
|
|
{
|
|
"epoch": 0.4109773323468084,
|
|
"grad_norm": 0.05681442271993179,
|
|
"learning_rate": 0.0002199842377278792,
|
|
"loss": 0.9436,
|
|
"step": 3055
|
|
},
|
|
{
|
|
"epoch": 0.41164996300531376,
|
|
"grad_norm": 0.05973400372173602,
|
|
"learning_rate": 0.00021967248465694746,
|
|
"loss": 0.9305,
|
|
"step": 3060
|
|
},
|
|
{
|
|
"epoch": 0.4123225936638192,
|
|
"grad_norm": 0.06157069248280991,
|
|
"learning_rate": 0.0002193603473673485,
|
|
"loss": 0.8928,
|
|
"step": 3065
|
|
},
|
|
{
|
|
"epoch": 0.4129952243223246,
|
|
"grad_norm": 0.05798487431085614,
|
|
"learning_rate": 0.00021904782758040708,
|
|
"loss": 0.9809,
|
|
"step": 3070
|
|
},
|
|
{
|
|
"epoch": 0.41366785498083003,
|
|
"grad_norm": 0.06072960578084768,
|
|
"learning_rate": 0.00021873492701955736,
|
|
"loss": 0.9573,
|
|
"step": 3075
|
|
},
|
|
{
|
|
"epoch": 0.41434048563933545,
|
|
"grad_norm": 0.05940975893380244,
|
|
"learning_rate": 0.0002184216474103332,
|
|
"loss": 0.9243,
|
|
"step": 3080
|
|
},
|
|
{
|
|
"epoch": 0.4150131162978409,
|
|
"grad_norm": 0.054497510448308945,
|
|
"learning_rate": 0.00021810799048035885,
|
|
"loss": 0.9216,
|
|
"step": 3085
|
|
},
|
|
{
|
|
"epoch": 0.41568574695634625,
|
|
"grad_norm": 0.08550065849959732,
|
|
"learning_rate": 0.00021779395795933944,
|
|
"loss": 0.9765,
|
|
"step": 3090
|
|
},
|
|
{
|
|
"epoch": 0.41635837761485167,
|
|
"grad_norm": 0.059486110560569884,
|
|
"learning_rate": 0.0002174795515790512,
|
|
"loss": 0.9201,
|
|
"step": 3095
|
|
},
|
|
{
|
|
"epoch": 0.4170310082733571,
|
|
"grad_norm": 0.05891019294903501,
|
|
"learning_rate": 0.00021716477307333204,
|
|
"loss": 0.9368,
|
|
"step": 3100
|
|
},
|
|
{
|
|
"epoch": 0.4177036389318625,
|
|
"grad_norm": 0.06073346780088524,
|
|
"learning_rate": 0.00021684962417807218,
|
|
"loss": 0.9416,
|
|
"step": 3105
|
|
},
|
|
{
|
|
"epoch": 0.41837626959036794,
|
|
"grad_norm": 0.06057976675059688,
|
|
"learning_rate": 0.0002165341066312043,
|
|
"loss": 0.9726,
|
|
"step": 3110
|
|
},
|
|
{
|
|
"epoch": 0.41904890024887337,
|
|
"grad_norm": 0.06057164967765705,
|
|
"learning_rate": 0.00021621822217269404,
|
|
"loss": 0.9431,
|
|
"step": 3115
|
|
},
|
|
{
|
|
"epoch": 0.41972153090737874,
|
|
"grad_norm": 0.058279122248688205,
|
|
"learning_rate": 0.00021590197254453043,
|
|
"loss": 0.908,
|
|
"step": 3120
|
|
},
|
|
{
|
|
"epoch": 0.42039416156588416,
|
|
"grad_norm": 0.0568996491174471,
|
|
"learning_rate": 0.00021558535949071632,
|
|
"loss": 0.9896,
|
|
"step": 3125
|
|
},
|
|
{
|
|
"epoch": 0.4210667922243896,
|
|
"grad_norm": 0.059337189805589746,
|
|
"learning_rate": 0.00021526838475725875,
|
|
"loss": 0.9769,
|
|
"step": 3130
|
|
},
|
|
{
|
|
"epoch": 0.421739422882895,
|
|
"grad_norm": 0.05438214173444285,
|
|
"learning_rate": 0.00021495105009215924,
|
|
"loss": 0.9555,
|
|
"step": 3135
|
|
},
|
|
{
|
|
"epoch": 0.42241205354140043,
|
|
"grad_norm": 0.06703375158787411,
|
|
"learning_rate": 0.00021463335724540415,
|
|
"loss": 0.9732,
|
|
"step": 3140
|
|
},
|
|
{
|
|
"epoch": 0.42308468419990586,
|
|
"grad_norm": 0.062204374471896154,
|
|
"learning_rate": 0.00021431530796895516,
|
|
"loss": 0.9394,
|
|
"step": 3145
|
|
},
|
|
{
|
|
"epoch": 0.4237573148584112,
|
|
"grad_norm": 0.05834404775925495,
|
|
"learning_rate": 0.00021399690401673958,
|
|
"loss": 0.9172,
|
|
"step": 3150
|
|
},
|
|
{
|
|
"epoch": 0.42442994551691665,
|
|
"grad_norm": 0.05859617011162029,
|
|
"learning_rate": 0.0002136781471446405,
|
|
"loss": 0.915,
|
|
"step": 3155
|
|
},
|
|
{
|
|
"epoch": 0.4251025761754221,
|
|
"grad_norm": 0.06096509769736175,
|
|
"learning_rate": 0.0002133590391104873,
|
|
"loss": 1.0003,
|
|
"step": 3160
|
|
},
|
|
{
|
|
"epoch": 0.4257752068339275,
|
|
"grad_norm": 0.0598832189207119,
|
|
"learning_rate": 0.00021303958167404594,
|
|
"loss": 0.9468,
|
|
"step": 3165
|
|
},
|
|
{
|
|
"epoch": 0.4264478374924329,
|
|
"grad_norm": 0.08446921453288947,
|
|
"learning_rate": 0.00021271977659700916,
|
|
"loss": 0.9523,
|
|
"step": 3170
|
|
},
|
|
{
|
|
"epoch": 0.4271204681509383,
|
|
"grad_norm": 0.06424965869527009,
|
|
"learning_rate": 0.00021239962564298674,
|
|
"loss": 0.978,
|
|
"step": 3175
|
|
},
|
|
{
|
|
"epoch": 0.4277930988094437,
|
|
"grad_norm": 0.05617639752428042,
|
|
"learning_rate": 0.00021207913057749603,
|
|
"loss": 0.9014,
|
|
"step": 3180
|
|
},
|
|
{
|
|
"epoch": 0.42846572946794914,
|
|
"grad_norm": 0.059893940523906626,
|
|
"learning_rate": 0.00021175829316795182,
|
|
"loss": 0.9579,
|
|
"step": 3185
|
|
},
|
|
{
|
|
"epoch": 0.42913836012645457,
|
|
"grad_norm": 0.06181936765038198,
|
|
"learning_rate": 0.00021143711518365694,
|
|
"loss": 0.9371,
|
|
"step": 3190
|
|
},
|
|
{
|
|
"epoch": 0.42981099078496,
|
|
"grad_norm": 0.06379488040498672,
|
|
"learning_rate": 0.00021111559839579236,
|
|
"loss": 0.9499,
|
|
"step": 3195
|
|
},
|
|
{
|
|
"epoch": 0.4304836214434654,
|
|
"grad_norm": 0.06857337809733563,
|
|
"learning_rate": 0.00021079374457740735,
|
|
"loss": 0.9653,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 0.4311562521019708,
|
|
"grad_norm": 0.05352630790450643,
|
|
"learning_rate": 0.0002104715555034099,
|
|
"loss": 0.9247,
|
|
"step": 3205
|
|
},
|
|
{
|
|
"epoch": 0.4318288827604762,
|
|
"grad_norm": 0.06406514593002467,
|
|
"learning_rate": 0.0002101490329505567,
|
|
"loss": 0.9402,
|
|
"step": 3210
|
|
},
|
|
{
|
|
"epoch": 0.43250151341898163,
|
|
"grad_norm": 0.066879962816942,
|
|
"learning_rate": 0.00020982617869744354,
|
|
"loss": 0.9366,
|
|
"step": 3215
|
|
},
|
|
{
|
|
"epoch": 0.43317414407748706,
|
|
"grad_norm": 0.05966424809748197,
|
|
"learning_rate": 0.00020950299452449534,
|
|
"loss": 1.0069,
|
|
"step": 3220
|
|
},
|
|
{
|
|
"epoch": 0.4338467747359925,
|
|
"grad_norm": 0.06577677126311732,
|
|
"learning_rate": 0.0002091794822139565,
|
|
"loss": 0.9677,
|
|
"step": 3225
|
|
},
|
|
{
|
|
"epoch": 0.4345194053944979,
|
|
"grad_norm": 0.06266553082204902,
|
|
"learning_rate": 0.00020885564354988084,
|
|
"loss": 0.9236,
|
|
"step": 3230
|
|
},
|
|
{
|
|
"epoch": 0.4351920360530033,
|
|
"grad_norm": 0.05941360402735012,
|
|
"learning_rate": 0.0002085314803181221,
|
|
"loss": 0.9181,
|
|
"step": 3235
|
|
},
|
|
{
|
|
"epoch": 0.4358646667115087,
|
|
"grad_norm": 0.06190268094887974,
|
|
"learning_rate": 0.00020820699430632375,
|
|
"loss": 0.9551,
|
|
"step": 3240
|
|
},
|
|
{
|
|
"epoch": 0.4365372973700141,
|
|
"grad_norm": 0.06056805532726624,
|
|
"learning_rate": 0.00020788218730390933,
|
|
"loss": 0.8976,
|
|
"step": 3245
|
|
},
|
|
{
|
|
"epoch": 0.43720992802851955,
|
|
"grad_norm": 0.06124181758108306,
|
|
"learning_rate": 0.00020755706110207246,
|
|
"loss": 0.8889,
|
|
"step": 3250
|
|
},
|
|
{
|
|
"epoch": 0.43788255868702497,
|
|
"grad_norm": 0.06113599870511463,
|
|
"learning_rate": 0.0002072316174937671,
|
|
"loss": 0.9618,
|
|
"step": 3255
|
|
},
|
|
{
|
|
"epoch": 0.4385551893455304,
|
|
"grad_norm": 0.06416322533929952,
|
|
"learning_rate": 0.0002069058582736976,
|
|
"loss": 1.0092,
|
|
"step": 3260
|
|
},
|
|
{
|
|
"epoch": 0.43922782000403576,
|
|
"grad_norm": 0.05978927844116775,
|
|
"learning_rate": 0.00020657978523830876,
|
|
"loss": 0.9675,
|
|
"step": 3265
|
|
},
|
|
{
|
|
"epoch": 0.4399004506625412,
|
|
"grad_norm": 0.062194118475185425,
|
|
"learning_rate": 0.00020625340018577592,
|
|
"loss": 0.9499,
|
|
"step": 3270
|
|
},
|
|
{
|
|
"epoch": 0.4405730813210466,
|
|
"grad_norm": 0.05696297738645936,
|
|
"learning_rate": 0.00020592670491599522,
|
|
"loss": 0.9493,
|
|
"step": 3275
|
|
},
|
|
{
|
|
"epoch": 0.44124571197955204,
|
|
"grad_norm": 0.05793758859996811,
|
|
"learning_rate": 0.00020559970123057339,
|
|
"loss": 0.935,
|
|
"step": 3280
|
|
},
|
|
{
|
|
"epoch": 0.44191834263805746,
|
|
"grad_norm": 0.05837746992606914,
|
|
"learning_rate": 0.000205272390932818,
|
|
"loss": 0.8965,
|
|
"step": 3285
|
|
},
|
|
{
|
|
"epoch": 0.4425909732965629,
|
|
"grad_norm": 0.058159737597925955,
|
|
"learning_rate": 0.0002049447758277275,
|
|
"loss": 0.9659,
|
|
"step": 3290
|
|
},
|
|
{
|
|
"epoch": 0.44326360395506825,
|
|
"grad_norm": 0.05827695355235013,
|
|
"learning_rate": 0.0002046168577219813,
|
|
"loss": 0.9865,
|
|
"step": 3295
|
|
},
|
|
{
|
|
"epoch": 0.4439362346135737,
|
|
"grad_norm": 0.05878679479381314,
|
|
"learning_rate": 0.00020428863842392961,
|
|
"loss": 0.9387,
|
|
"step": 3300
|
|
},
|
|
{
|
|
"epoch": 0.4446088652720791,
|
|
"grad_norm": 0.05644539332794492,
|
|
"learning_rate": 0.0002039601197435837,
|
|
"loss": 0.9081,
|
|
"step": 3305
|
|
},
|
|
{
|
|
"epoch": 0.4452814959305845,
|
|
"grad_norm": 0.06743915523773947,
|
|
"learning_rate": 0.00020363130349260585,
|
|
"loss": 0.9593,
|
|
"step": 3310
|
|
},
|
|
{
|
|
"epoch": 0.44595412658908995,
|
|
"grad_norm": 0.05757563715919714,
|
|
"learning_rate": 0.00020330219148429927,
|
|
"loss": 0.8999,
|
|
"step": 3315
|
|
},
|
|
{
|
|
"epoch": 0.4466267572475953,
|
|
"grad_norm": 0.05965160618003564,
|
|
"learning_rate": 0.00020297278553359812,
|
|
"loss": 0.9624,
|
|
"step": 3320
|
|
},
|
|
{
|
|
"epoch": 0.44729938790610074,
|
|
"grad_norm": 0.06549241702388957,
|
|
"learning_rate": 0.0002026430874570577,
|
|
"loss": 0.9429,
|
|
"step": 3325
|
|
},
|
|
{
|
|
"epoch": 0.44797201856460617,
|
|
"grad_norm": 0.06099252478931159,
|
|
"learning_rate": 0.0002023130990728442,
|
|
"loss": 0.9091,
|
|
"step": 3330
|
|
},
|
|
{
|
|
"epoch": 0.4486446492231116,
|
|
"grad_norm": 0.06214228049542274,
|
|
"learning_rate": 0.0002019828222007247,
|
|
"loss": 0.9672,
|
|
"step": 3335
|
|
},
|
|
{
|
|
"epoch": 0.449317279881617,
|
|
"grad_norm": 0.06099156836145796,
|
|
"learning_rate": 0.0002016522586620572,
|
|
"loss": 0.9447,
|
|
"step": 3340
|
|
},
|
|
{
|
|
"epoch": 0.44998991054012244,
|
|
"grad_norm": 0.05582309593148562,
|
|
"learning_rate": 0.0002013214102797807,
|
|
"loss": 0.892,
|
|
"step": 3345
|
|
},
|
|
{
|
|
"epoch": 0.4506625411986278,
|
|
"grad_norm": 0.06172945054743475,
|
|
"learning_rate": 0.0002009902788784049,
|
|
"loss": 0.9256,
|
|
"step": 3350
|
|
},
|
|
{
|
|
"epoch": 0.45133517185713323,
|
|
"grad_norm": 0.0703038249721671,
|
|
"learning_rate": 0.00020065886628400012,
|
|
"loss": 0.9613,
|
|
"step": 3355
|
|
},
|
|
{
|
|
"epoch": 0.45200780251563866,
|
|
"grad_norm": 0.05656485171471553,
|
|
"learning_rate": 0.0002003271743241876,
|
|
"loss": 0.994,
|
|
"step": 3360
|
|
},
|
|
{
|
|
"epoch": 0.4526804331741441,
|
|
"grad_norm": 0.06050217278714377,
|
|
"learning_rate": 0.00019999520482812905,
|
|
"loss": 0.9483,
|
|
"step": 3365
|
|
},
|
|
{
|
|
"epoch": 0.4533530638326495,
|
|
"grad_norm": 0.05935441754881465,
|
|
"learning_rate": 0.00019966295962651676,
|
|
"loss": 0.9528,
|
|
"step": 3370
|
|
},
|
|
{
|
|
"epoch": 0.45402569449115493,
|
|
"grad_norm": 0.06499672588065591,
|
|
"learning_rate": 0.0001993304405515633,
|
|
"loss": 0.9232,
|
|
"step": 3375
|
|
},
|
|
{
|
|
"epoch": 0.4546983251496603,
|
|
"grad_norm": 0.06326505765450459,
|
|
"learning_rate": 0.00019899764943699167,
|
|
"loss": 0.924,
|
|
"step": 3380
|
|
},
|
|
{
|
|
"epoch": 0.4553709558081657,
|
|
"grad_norm": 0.06670527494401302,
|
|
"learning_rate": 0.00019866458811802513,
|
|
"loss": 0.9689,
|
|
"step": 3385
|
|
},
|
|
{
|
|
"epoch": 0.45604358646667115,
|
|
"grad_norm": 0.05756123229797278,
|
|
"learning_rate": 0.00019833125843137685,
|
|
"loss": 0.9486,
|
|
"step": 3390
|
|
},
|
|
{
|
|
"epoch": 0.45671621712517657,
|
|
"grad_norm": 0.057811629402424236,
|
|
"learning_rate": 0.00019799766221524002,
|
|
"loss": 0.9232,
|
|
"step": 3395
|
|
},
|
|
{
|
|
"epoch": 0.457388847783682,
|
|
"grad_norm": 0.0574321256942919,
|
|
"learning_rate": 0.00019766380130927772,
|
|
"loss": 0.899,
|
|
"step": 3400
|
|
},
|
|
{
|
|
"epoch": 0.4580614784421874,
|
|
"grad_norm": 0.05998915305723974,
|
|
"learning_rate": 0.00019732967755461264,
|
|
"loss": 0.9645,
|
|
"step": 3405
|
|
},
|
|
{
|
|
"epoch": 0.4587341091006928,
|
|
"grad_norm": 0.053707341836902525,
|
|
"learning_rate": 0.00019699529279381688,
|
|
"loss": 0.9359,
|
|
"step": 3410
|
|
},
|
|
{
|
|
"epoch": 0.4594067397591982,
|
|
"grad_norm": 0.062177399130418344,
|
|
"learning_rate": 0.0001966606488709022,
|
|
"loss": 0.9692,
|
|
"step": 3415
|
|
},
|
|
{
|
|
"epoch": 0.46007937041770364,
|
|
"grad_norm": 0.06399892597430598,
|
|
"learning_rate": 0.00019632574763130914,
|
|
"loss": 0.9866,
|
|
"step": 3420
|
|
},
|
|
{
|
|
"epoch": 0.46075200107620906,
|
|
"grad_norm": 0.057829963118091195,
|
|
"learning_rate": 0.0001959905909218976,
|
|
"loss": 0.9061,
|
|
"step": 3425
|
|
},
|
|
{
|
|
"epoch": 0.4614246317347145,
|
|
"grad_norm": 0.05680345513459258,
|
|
"learning_rate": 0.00019565518059093607,
|
|
"loss": 0.8951,
|
|
"step": 3430
|
|
},
|
|
{
|
|
"epoch": 0.4620972623932199,
|
|
"grad_norm": 0.057098569579280174,
|
|
"learning_rate": 0.00019531951848809177,
|
|
"loss": 0.9231,
|
|
"step": 3435
|
|
},
|
|
{
|
|
"epoch": 0.4627698930517253,
|
|
"grad_norm": 0.057724603331952985,
|
|
"learning_rate": 0.0001949836064644204,
|
|
"loss": 0.8883,
|
|
"step": 3440
|
|
},
|
|
{
|
|
"epoch": 0.4634425237102307,
|
|
"grad_norm": 0.07527210764389476,
|
|
"learning_rate": 0.0001946474463723558,
|
|
"loss": 0.9226,
|
|
"step": 3445
|
|
},
|
|
{
|
|
"epoch": 0.4641151543687361,
|
|
"grad_norm": 0.06129756698317641,
|
|
"learning_rate": 0.00019431104006569977,
|
|
"loss": 0.9479,
|
|
"step": 3450
|
|
},
|
|
{
|
|
"epoch": 0.46478778502724155,
|
|
"grad_norm": 0.05622430370986196,
|
|
"learning_rate": 0.00019397438939961208,
|
|
"loss": 0.9239,
|
|
"step": 3455
|
|
},
|
|
{
|
|
"epoch": 0.465460415685747,
|
|
"grad_norm": 0.062249854993252765,
|
|
"learning_rate": 0.00019363749623059985,
|
|
"loss": 0.9662,
|
|
"step": 3460
|
|
},
|
|
{
|
|
"epoch": 0.46613304634425234,
|
|
"grad_norm": 0.05761845543442503,
|
|
"learning_rate": 0.00019330036241650768,
|
|
"loss": 0.9294,
|
|
"step": 3465
|
|
},
|
|
{
|
|
"epoch": 0.46680567700275777,
|
|
"grad_norm": 0.10323423472788318,
|
|
"learning_rate": 0.0001929629898165071,
|
|
"loss": 0.9805,
|
|
"step": 3470
|
|
},
|
|
{
|
|
"epoch": 0.4674783076612632,
|
|
"grad_norm": 0.06118143312492509,
|
|
"learning_rate": 0.00019262538029108663,
|
|
"loss": 0.9449,
|
|
"step": 3475
|
|
},
|
|
{
|
|
"epoch": 0.4681509383197686,
|
|
"grad_norm": 0.06057337443243259,
|
|
"learning_rate": 0.00019228753570204113,
|
|
"loss": 0.8763,
|
|
"step": 3480
|
|
},
|
|
{
|
|
"epoch": 0.46882356897827404,
|
|
"grad_norm": 0.06086236617653455,
|
|
"learning_rate": 0.00019194945791246192,
|
|
"loss": 0.8968,
|
|
"step": 3485
|
|
},
|
|
{
|
|
"epoch": 0.46949619963677947,
|
|
"grad_norm": 0.059555741757573326,
|
|
"learning_rate": 0.00019161114878672635,
|
|
"loss": 0.9706,
|
|
"step": 3490
|
|
},
|
|
{
|
|
"epoch": 0.47016883029528483,
|
|
"grad_norm": 0.058668845924034846,
|
|
"learning_rate": 0.0001912726101904873,
|
|
"loss": 0.921,
|
|
"step": 3495
|
|
},
|
|
{
|
|
"epoch": 0.47084146095379026,
|
|
"grad_norm": 0.058784578173225625,
|
|
"learning_rate": 0.0001909338439906633,
|
|
"loss": 0.9497,
|
|
"step": 3500
|
|
},
|
|
{
|
|
"epoch": 0.4715140916122957,
|
|
"grad_norm": 0.05866558280501008,
|
|
"learning_rate": 0.00019059485205542802,
|
|
"loss": 0.9753,
|
|
"step": 3505
|
|
},
|
|
{
|
|
"epoch": 0.4721867222708011,
|
|
"grad_norm": 0.05491405268934744,
|
|
"learning_rate": 0.0001902556362541999,
|
|
"loss": 0.943,
|
|
"step": 3510
|
|
},
|
|
{
|
|
"epoch": 0.47285935292930653,
|
|
"grad_norm": 0.059976784247428534,
|
|
"learning_rate": 0.0001899161984576319,
|
|
"loss": 0.9678,
|
|
"step": 3515
|
|
},
|
|
{
|
|
"epoch": 0.47353198358781196,
|
|
"grad_norm": 0.058020248226877215,
|
|
"learning_rate": 0.00018957654053760128,
|
|
"loss": 0.9183,
|
|
"step": 3520
|
|
},
|
|
{
|
|
"epoch": 0.4742046142463173,
|
|
"grad_norm": 0.06000157115028833,
|
|
"learning_rate": 0.00018923666436719918,
|
|
"loss": 0.9166,
|
|
"step": 3525
|
|
},
|
|
{
|
|
"epoch": 0.47487724490482275,
|
|
"grad_norm": 0.05711578525715549,
|
|
"learning_rate": 0.0001888965718207204,
|
|
"loss": 0.8866,
|
|
"step": 3530
|
|
},
|
|
{
|
|
"epoch": 0.4755498755633282,
|
|
"grad_norm": 0.05726576470343737,
|
|
"learning_rate": 0.0001885562647736527,
|
|
"loss": 0.9193,
|
|
"step": 3535
|
|
},
|
|
{
|
|
"epoch": 0.4762225062218336,
|
|
"grad_norm": 0.056688406578091884,
|
|
"learning_rate": 0.00018821574510266702,
|
|
"loss": 0.9215,
|
|
"step": 3540
|
|
},
|
|
{
|
|
"epoch": 0.476895136880339,
|
|
"grad_norm": 0.05845990276691751,
|
|
"learning_rate": 0.0001878750146856067,
|
|
"loss": 0.9221,
|
|
"step": 3545
|
|
},
|
|
{
|
|
"epoch": 0.47756776753884445,
|
|
"grad_norm": 0.0635662486938678,
|
|
"learning_rate": 0.00018753407540147743,
|
|
"loss": 0.8874,
|
|
"step": 3550
|
|
},
|
|
{
|
|
"epoch": 0.4782403981973498,
|
|
"grad_norm": 0.056415473310414646,
|
|
"learning_rate": 0.00018719292913043644,
|
|
"loss": 0.9473,
|
|
"step": 3555
|
|
},
|
|
{
|
|
"epoch": 0.47891302885585524,
|
|
"grad_norm": 0.05832846701805165,
|
|
"learning_rate": 0.0001868515777537827,
|
|
"loss": 1.0012,
|
|
"step": 3560
|
|
},
|
|
{
|
|
"epoch": 0.47958565951436066,
|
|
"grad_norm": 0.05842768032767335,
|
|
"learning_rate": 0.00018651002315394607,
|
|
"loss": 0.9591,
|
|
"step": 3565
|
|
},
|
|
{
|
|
"epoch": 0.4802582901728661,
|
|
"grad_norm": 0.05949799045924495,
|
|
"learning_rate": 0.0001861682672144773,
|
|
"loss": 0.9184,
|
|
"step": 3570
|
|
},
|
|
{
|
|
"epoch": 0.4809309208313715,
|
|
"grad_norm": 0.05882141302826352,
|
|
"learning_rate": 0.0001858263118200372,
|
|
"loss": 0.8963,
|
|
"step": 3575
|
|
},
|
|
{
|
|
"epoch": 0.48160355148987694,
|
|
"grad_norm": 0.05878082425498393,
|
|
"learning_rate": 0.0001854841588563868,
|
|
"loss": 0.9397,
|
|
"step": 3580
|
|
},
|
|
{
|
|
"epoch": 0.4822761821483823,
|
|
"grad_norm": 0.06226290012513929,
|
|
"learning_rate": 0.00018514181021037638,
|
|
"loss": 0.916,
|
|
"step": 3585
|
|
},
|
|
{
|
|
"epoch": 0.48294881280688773,
|
|
"grad_norm": 0.056647741034773165,
|
|
"learning_rate": 0.0001847992677699355,
|
|
"loss": 0.9266,
|
|
"step": 3590
|
|
},
|
|
{
|
|
"epoch": 0.48362144346539315,
|
|
"grad_norm": 0.05797267421634493,
|
|
"learning_rate": 0.0001844565334240624,
|
|
"loss": 0.9084,
|
|
"step": 3595
|
|
},
|
|
{
|
|
"epoch": 0.4842940741238986,
|
|
"grad_norm": 0.05974632387924615,
|
|
"learning_rate": 0.00018411360906281363,
|
|
"loss": 0.9442,
|
|
"step": 3600
|
|
},
|
|
{
|
|
"epoch": 0.484966704782404,
|
|
"grad_norm": 0.05888007684789391,
|
|
"learning_rate": 0.00018377049657729348,
|
|
"loss": 0.9507,
|
|
"step": 3605
|
|
},
|
|
{
|
|
"epoch": 0.48563933544090937,
|
|
"grad_norm": 0.06275261115925915,
|
|
"learning_rate": 0.00018342719785964382,
|
|
"loss": 0.9249,
|
|
"step": 3610
|
|
},
|
|
{
|
|
"epoch": 0.4863119660994148,
|
|
"grad_norm": 0.05780095763360672,
|
|
"learning_rate": 0.00018308371480303348,
|
|
"loss": 0.9342,
|
|
"step": 3615
|
|
},
|
|
{
|
|
"epoch": 0.4869845967579202,
|
|
"grad_norm": 0.059559111340805826,
|
|
"learning_rate": 0.00018274004930164786,
|
|
"loss": 0.9419,
|
|
"step": 3620
|
|
},
|
|
{
|
|
"epoch": 0.48765722741642564,
|
|
"grad_norm": 0.0719883639093611,
|
|
"learning_rate": 0.00018239620325067842,
|
|
"loss": 0.9123,
|
|
"step": 3625
|
|
},
|
|
{
|
|
"epoch": 0.48832985807493107,
|
|
"grad_norm": 0.05652059169463742,
|
|
"learning_rate": 0.00018205217854631232,
|
|
"loss": 0.8824,
|
|
"step": 3630
|
|
},
|
|
{
|
|
"epoch": 0.4890024887334365,
|
|
"grad_norm": 0.05780157013574382,
|
|
"learning_rate": 0.00018170797708572204,
|
|
"loss": 0.9345,
|
|
"step": 3635
|
|
},
|
|
{
|
|
"epoch": 0.48967511939194186,
|
|
"grad_norm": 0.05728822335624413,
|
|
"learning_rate": 0.00018136360076705463,
|
|
"loss": 0.8981,
|
|
"step": 3640
|
|
},
|
|
{
|
|
"epoch": 0.4903477500504473,
|
|
"grad_norm": 0.06019083234834377,
|
|
"learning_rate": 0.0001810190514894214,
|
|
"loss": 0.889,
|
|
"step": 3645
|
|
},
|
|
{
|
|
"epoch": 0.4910203807089527,
|
|
"grad_norm": 0.05914213146336189,
|
|
"learning_rate": 0.00018067433115288774,
|
|
"loss": 0.9162,
|
|
"step": 3650
|
|
},
|
|
{
|
|
"epoch": 0.49169301136745813,
|
|
"grad_norm": 0.05671214044848169,
|
|
"learning_rate": 0.0001803294416584621,
|
|
"loss": 0.9193,
|
|
"step": 3655
|
|
},
|
|
{
|
|
"epoch": 0.49236564202596356,
|
|
"grad_norm": 0.05880394037256467,
|
|
"learning_rate": 0.00017998438490808588,
|
|
"loss": 0.9218,
|
|
"step": 3660
|
|
},
|
|
{
|
|
"epoch": 0.493038272684469,
|
|
"grad_norm": 0.052818215027820234,
|
|
"learning_rate": 0.00017963916280462275,
|
|
"loss": 0.8766,
|
|
"step": 3665
|
|
},
|
|
{
|
|
"epoch": 0.49371090334297435,
|
|
"grad_norm": 0.055708114418970923,
|
|
"learning_rate": 0.00017929377725184843,
|
|
"loss": 0.9543,
|
|
"step": 3670
|
|
},
|
|
{
|
|
"epoch": 0.4943835340014798,
|
|
"grad_norm": 0.06351901627480289,
|
|
"learning_rate": 0.0001789482301544398,
|
|
"loss": 0.9742,
|
|
"step": 3675
|
|
},
|
|
{
|
|
"epoch": 0.4950561646599852,
|
|
"grad_norm": 0.05461654119012564,
|
|
"learning_rate": 0.00017860252341796474,
|
|
"loss": 0.8991,
|
|
"step": 3680
|
|
},
|
|
{
|
|
"epoch": 0.4957287953184906,
|
|
"grad_norm": 0.056349511534200064,
|
|
"learning_rate": 0.0001782566589488714,
|
|
"loss": 0.9212,
|
|
"step": 3685
|
|
},
|
|
{
|
|
"epoch": 0.49640142597699605,
|
|
"grad_norm": 0.05485006315481513,
|
|
"learning_rate": 0.0001779106386544778,
|
|
"loss": 0.9113,
|
|
"step": 3690
|
|
},
|
|
{
|
|
"epoch": 0.49707405663550147,
|
|
"grad_norm": 0.05974053898813881,
|
|
"learning_rate": 0.00017756446444296129,
|
|
"loss": 0.9165,
|
|
"step": 3695
|
|
},
|
|
{
|
|
"epoch": 0.49774668729400684,
|
|
"grad_norm": 0.05854153642359085,
|
|
"learning_rate": 0.000177218138223348,
|
|
"loss": 0.8972,
|
|
"step": 3700
|
|
},
|
|
{
|
|
"epoch": 0.49841931795251226,
|
|
"grad_norm": 0.06770077007123941,
|
|
"learning_rate": 0.00017687166190550233,
|
|
"loss": 0.8938,
|
|
"step": 3705
|
|
},
|
|
{
|
|
"epoch": 0.4990919486110177,
|
|
"grad_norm": 0.06254808010672673,
|
|
"learning_rate": 0.00017652503740011642,
|
|
"loss": 0.9793,
|
|
"step": 3710
|
|
},
|
|
{
|
|
"epoch": 0.4997645792695231,
|
|
"grad_norm": 0.06010377767315021,
|
|
"learning_rate": 0.00017617826661869967,
|
|
"loss": 0.9486,
|
|
"step": 3715
|
|
},
|
|
{
|
|
"epoch": 0.5004372099280285,
|
|
"grad_norm": 0.05446285095195939,
|
|
"learning_rate": 0.000175831351473568,
|
|
"loss": 0.8987,
|
|
"step": 3720
|
|
},
|
|
{
|
|
"epoch": 0.501109840586534,
|
|
"grad_norm": 0.05784448976997943,
|
|
"learning_rate": 0.00017548429387783358,
|
|
"loss": 0.9282,
|
|
"step": 3725
|
|
},
|
|
{
|
|
"epoch": 0.5017824712450394,
|
|
"grad_norm": 0.054802892079251174,
|
|
"learning_rate": 0.00017513709574539408,
|
|
"loss": 0.899,
|
|
"step": 3730
|
|
},
|
|
{
|
|
"epoch": 0.5024551019035448,
|
|
"grad_norm": 0.061133507811646316,
|
|
"learning_rate": 0.00017478975899092218,
|
|
"loss": 0.9002,
|
|
"step": 3735
|
|
},
|
|
{
|
|
"epoch": 0.5031277325620501,
|
|
"grad_norm": 0.057132215529536755,
|
|
"learning_rate": 0.00017444228552985504,
|
|
"loss": 0.9633,
|
|
"step": 3740
|
|
},
|
|
{
|
|
"epoch": 0.5038003632205555,
|
|
"grad_norm": 0.05560545701980792,
|
|
"learning_rate": 0.00017409467727838368,
|
|
"loss": 0.924,
|
|
"step": 3745
|
|
},
|
|
{
|
|
"epoch": 0.504472993879061,
|
|
"grad_norm": 0.054372307623308136,
|
|
"learning_rate": 0.00017374693615344243,
|
|
"loss": 0.9158,
|
|
"step": 3750
|
|
},
|
|
{
|
|
"epoch": 0.5051456245375664,
|
|
"grad_norm": 0.06565578023323308,
|
|
"learning_rate": 0.00017339906407269845,
|
|
"loss": 0.8696,
|
|
"step": 3755
|
|
},
|
|
{
|
|
"epoch": 0.5058182551960718,
|
|
"grad_norm": 0.06099191037998514,
|
|
"learning_rate": 0.00017305106295454096,
|
|
"loss": 0.9452,
|
|
"step": 3760
|
|
},
|
|
{
|
|
"epoch": 0.5064908858545772,
|
|
"grad_norm": 0.056830514265882634,
|
|
"learning_rate": 0.0001727029347180708,
|
|
"loss": 0.8809,
|
|
"step": 3765
|
|
},
|
|
{
|
|
"epoch": 0.5071635165130827,
|
|
"grad_norm": 0.056703302827126156,
|
|
"learning_rate": 0.00017235468128308994,
|
|
"loss": 0.9237,
|
|
"step": 3770
|
|
},
|
|
{
|
|
"epoch": 0.5078361471715881,
|
|
"grad_norm": 0.07203208057997011,
|
|
"learning_rate": 0.00017200630457009066,
|
|
"loss": 0.9571,
|
|
"step": 3775
|
|
},
|
|
{
|
|
"epoch": 0.5085087778300935,
|
|
"grad_norm": 0.057478081242506765,
|
|
"learning_rate": 0.00017165780650024503,
|
|
"loss": 0.8937,
|
|
"step": 3780
|
|
},
|
|
{
|
|
"epoch": 0.5091814084885989,
|
|
"grad_norm": 0.053243619421471995,
|
|
"learning_rate": 0.00017130918899539447,
|
|
"loss": 0.8609,
|
|
"step": 3785
|
|
},
|
|
{
|
|
"epoch": 0.5098540391471044,
|
|
"grad_norm": 0.06516652067823515,
|
|
"learning_rate": 0.00017096045397803903,
|
|
"loss": 0.8968,
|
|
"step": 3790
|
|
},
|
|
{
|
|
"epoch": 0.5105266698056098,
|
|
"grad_norm": 0.05203291222802004,
|
|
"learning_rate": 0.00017061160337132673,
|
|
"loss": 0.9189,
|
|
"step": 3795
|
|
},
|
|
{
|
|
"epoch": 0.5111993004641151,
|
|
"grad_norm": 0.06306295515600813,
|
|
"learning_rate": 0.00017026263909904307,
|
|
"loss": 0.9705,
|
|
"step": 3800
|
|
},
|
|
{
|
|
"epoch": 0.5118719311226205,
|
|
"grad_norm": 0.05615502764015949,
|
|
"learning_rate": 0.0001699135630856004,
|
|
"loss": 0.8814,
|
|
"step": 3805
|
|
},
|
|
{
|
|
"epoch": 0.512544561781126,
|
|
"grad_norm": 0.0560452705368292,
|
|
"learning_rate": 0.00016956437725602715,
|
|
"loss": 0.9215,
|
|
"step": 3810
|
|
},
|
|
{
|
|
"epoch": 0.5132171924396314,
|
|
"grad_norm": 0.05568518863652634,
|
|
"learning_rate": 0.0001692150835359576,
|
|
"loss": 0.9557,
|
|
"step": 3815
|
|
},
|
|
{
|
|
"epoch": 0.5138898230981368,
|
|
"grad_norm": 0.06054613780853645,
|
|
"learning_rate": 0.00016886568385162073,
|
|
"loss": 0.9322,
|
|
"step": 3820
|
|
},
|
|
{
|
|
"epoch": 0.5145624537566422,
|
|
"grad_norm": 0.052409857268644666,
|
|
"learning_rate": 0.00016851618012983,
|
|
"loss": 0.9368,
|
|
"step": 3825
|
|
},
|
|
{
|
|
"epoch": 0.5152350844151476,
|
|
"grad_norm": 0.05444283087401524,
|
|
"learning_rate": 0.00016816657429797262,
|
|
"loss": 0.9647,
|
|
"step": 3830
|
|
},
|
|
{
|
|
"epoch": 0.5159077150736531,
|
|
"grad_norm": 0.06452949924040549,
|
|
"learning_rate": 0.00016781686828399897,
|
|
"loss": 0.9436,
|
|
"step": 3835
|
|
},
|
|
{
|
|
"epoch": 0.5165803457321585,
|
|
"grad_norm": 0.05645688357294275,
|
|
"learning_rate": 0.00016746706401641167,
|
|
"loss": 0.8871,
|
|
"step": 3840
|
|
},
|
|
{
|
|
"epoch": 0.5172529763906639,
|
|
"grad_norm": 0.05810377319641876,
|
|
"learning_rate": 0.00016711716342425538,
|
|
"loss": 0.9476,
|
|
"step": 3845
|
|
},
|
|
{
|
|
"epoch": 0.5179256070491693,
|
|
"grad_norm": 0.05461149675976878,
|
|
"learning_rate": 0.00016676716843710583,
|
|
"loss": 0.8799,
|
|
"step": 3850
|
|
},
|
|
{
|
|
"epoch": 0.5185982377076748,
|
|
"grad_norm": 0.0564223024642207,
|
|
"learning_rate": 0.00016641708098505943,
|
|
"loss": 0.9193,
|
|
"step": 3855
|
|
},
|
|
{
|
|
"epoch": 0.5192708683661801,
|
|
"grad_norm": 0.06066172101050625,
|
|
"learning_rate": 0.00016606690299872238,
|
|
"loss": 0.9004,
|
|
"step": 3860
|
|
},
|
|
{
|
|
"epoch": 0.5199434990246855,
|
|
"grad_norm": 0.05611140658907044,
|
|
"learning_rate": 0.00016571663640920013,
|
|
"loss": 0.9337,
|
|
"step": 3865
|
|
},
|
|
{
|
|
"epoch": 0.5206161296831909,
|
|
"grad_norm": 0.057689031864237836,
|
|
"learning_rate": 0.00016536628314808697,
|
|
"loss": 0.9112,
|
|
"step": 3870
|
|
},
|
|
{
|
|
"epoch": 0.5212887603416964,
|
|
"grad_norm": 0.055731587959152395,
|
|
"learning_rate": 0.0001650158451474549,
|
|
"loss": 0.898,
|
|
"step": 3875
|
|
},
|
|
{
|
|
"epoch": 0.5219613910002018,
|
|
"grad_norm": 0.06201233139255153,
|
|
"learning_rate": 0.0001646653243398433,
|
|
"loss": 0.9145,
|
|
"step": 3880
|
|
},
|
|
{
|
|
"epoch": 0.5226340216587072,
|
|
"grad_norm": 0.06107927805244479,
|
|
"learning_rate": 0.00016431472265824814,
|
|
"loss": 0.9161,
|
|
"step": 3885
|
|
},
|
|
{
|
|
"epoch": 0.5233066523172126,
|
|
"grad_norm": 0.0661283923149189,
|
|
"learning_rate": 0.00016396404203611166,
|
|
"loss": 0.9115,
|
|
"step": 3890
|
|
},
|
|
{
|
|
"epoch": 0.523979282975718,
|
|
"grad_norm": 0.0604974766358996,
|
|
"learning_rate": 0.00016361328440731113,
|
|
"loss": 0.9455,
|
|
"step": 3895
|
|
},
|
|
{
|
|
"epoch": 0.5246519136342235,
|
|
"grad_norm": 0.058113713123233206,
|
|
"learning_rate": 0.00016326245170614854,
|
|
"loss": 0.8781,
|
|
"step": 3900
|
|
},
|
|
{
|
|
"epoch": 0.5253245442927289,
|
|
"grad_norm": 0.06015481165291708,
|
|
"learning_rate": 0.00016291154586733998,
|
|
"loss": 0.8848,
|
|
"step": 3905
|
|
},
|
|
{
|
|
"epoch": 0.5259971749512343,
|
|
"grad_norm": 0.0559671255985385,
|
|
"learning_rate": 0.00016256056882600476,
|
|
"loss": 0.9371,
|
|
"step": 3910
|
|
},
|
|
{
|
|
"epoch": 0.5266698056097396,
|
|
"grad_norm": 0.056837421939094905,
|
|
"learning_rate": 0.00016220952251765492,
|
|
"loss": 0.85,
|
|
"step": 3915
|
|
},
|
|
{
|
|
"epoch": 0.5273424362682451,
|
|
"grad_norm": 0.05168059406474343,
|
|
"learning_rate": 0.00016185840887818445,
|
|
"loss": 0.9226,
|
|
"step": 3920
|
|
},
|
|
{
|
|
"epoch": 0.5280150669267505,
|
|
"grad_norm": 0.05857810372921838,
|
|
"learning_rate": 0.00016150722984385865,
|
|
"loss": 0.9395,
|
|
"step": 3925
|
|
},
|
|
{
|
|
"epoch": 0.5286876975852559,
|
|
"grad_norm": 0.05722621782286285,
|
|
"learning_rate": 0.00016115598735130343,
|
|
"loss": 0.9425,
|
|
"step": 3930
|
|
},
|
|
{
|
|
"epoch": 0.5293603282437613,
|
|
"grad_norm": 0.059103865719039965,
|
|
"learning_rate": 0.00016080468333749478,
|
|
"loss": 0.8452,
|
|
"step": 3935
|
|
},
|
|
{
|
|
"epoch": 0.5300329589022668,
|
|
"grad_norm": 0.05555055554042676,
|
|
"learning_rate": 0.00016045331973974766,
|
|
"loss": 0.9027,
|
|
"step": 3940
|
|
},
|
|
{
|
|
"epoch": 0.5307055895607722,
|
|
"grad_norm": 0.052513345803769935,
|
|
"learning_rate": 0.00016010189849570595,
|
|
"loss": 0.8805,
|
|
"step": 3945
|
|
},
|
|
{
|
|
"epoch": 0.5313782202192776,
|
|
"grad_norm": 0.06037608421554789,
|
|
"learning_rate": 0.00015975042154333125,
|
|
"loss": 0.937,
|
|
"step": 3950
|
|
},
|
|
{
|
|
"epoch": 0.532050850877783,
|
|
"grad_norm": 0.056481515391140207,
|
|
"learning_rate": 0.0001593988908208924,
|
|
"loss": 0.943,
|
|
"step": 3955
|
|
},
|
|
{
|
|
"epoch": 0.5327234815362885,
|
|
"grad_norm": 0.06184729512471909,
|
|
"learning_rate": 0.00015904730826695474,
|
|
"loss": 0.9505,
|
|
"step": 3960
|
|
},
|
|
{
|
|
"epoch": 0.5333961121947939,
|
|
"grad_norm": 0.06127002318323847,
|
|
"learning_rate": 0.00015869567582036946,
|
|
"loss": 0.8742,
|
|
"step": 3965
|
|
},
|
|
{
|
|
"epoch": 0.5340687428532993,
|
|
"grad_norm": 0.06185433303343996,
|
|
"learning_rate": 0.00015834399542026298,
|
|
"loss": 0.9058,
|
|
"step": 3970
|
|
},
|
|
{
|
|
"epoch": 0.5347413735118046,
|
|
"grad_norm": 0.06027891444038201,
|
|
"learning_rate": 0.00015799226900602598,
|
|
"loss": 0.8915,
|
|
"step": 3975
|
|
},
|
|
{
|
|
"epoch": 0.53541400417031,
|
|
"grad_norm": 0.057040976959674576,
|
|
"learning_rate": 0.00015764049851730306,
|
|
"loss": 0.9281,
|
|
"step": 3980
|
|
},
|
|
{
|
|
"epoch": 0.5360866348288155,
|
|
"grad_norm": 0.07354905892891744,
|
|
"learning_rate": 0.00015728868589398178,
|
|
"loss": 0.8798,
|
|
"step": 3985
|
|
},
|
|
{
|
|
"epoch": 0.5367592654873209,
|
|
"grad_norm": 0.053310299991943276,
|
|
"learning_rate": 0.0001569368330761821,
|
|
"loss": 0.8899,
|
|
"step": 3990
|
|
},
|
|
{
|
|
"epoch": 0.5374318961458263,
|
|
"grad_norm": 0.0628036632961106,
|
|
"learning_rate": 0.0001565849420042456,
|
|
"loss": 0.9191,
|
|
"step": 3995
|
|
},
|
|
{
|
|
"epoch": 0.5381045268043317,
|
|
"grad_norm": 0.05421995436525058,
|
|
"learning_rate": 0.00015623301461872488,
|
|
"loss": 0.9471,
|
|
"step": 4000
|
|
},
|
|
{
|
|
"epoch": 0.5387771574628372,
|
|
"grad_norm": 0.052719478039766475,
|
|
"learning_rate": 0.00015588105286037276,
|
|
"loss": 0.8718,
|
|
"step": 4005
|
|
},
|
|
{
|
|
"epoch": 0.5394497881213426,
|
|
"grad_norm": 0.0544131184510201,
|
|
"learning_rate": 0.00015552905867013156,
|
|
"loss": 0.9153,
|
|
"step": 4010
|
|
},
|
|
{
|
|
"epoch": 0.540122418779848,
|
|
"grad_norm": 0.055134853031770285,
|
|
"learning_rate": 0.00015517703398912255,
|
|
"loss": 0.8935,
|
|
"step": 4015
|
|
},
|
|
{
|
|
"epoch": 0.5407950494383534,
|
|
"grad_norm": 0.06099844314567582,
|
|
"learning_rate": 0.00015482498075863513,
|
|
"loss": 0.8387,
|
|
"step": 4020
|
|
},
|
|
{
|
|
"epoch": 0.5414676800968589,
|
|
"grad_norm": 0.06712533260751871,
|
|
"learning_rate": 0.00015447290092011602,
|
|
"loss": 0.939,
|
|
"step": 4025
|
|
},
|
|
{
|
|
"epoch": 0.5421403107553642,
|
|
"grad_norm": 0.05720276550328766,
|
|
"learning_rate": 0.00015412079641515878,
|
|
"loss": 0.8824,
|
|
"step": 4030
|
|
},
|
|
{
|
|
"epoch": 0.5428129414138696,
|
|
"grad_norm": 0.06011803041726182,
|
|
"learning_rate": 0.00015376866918549308,
|
|
"loss": 0.9513,
|
|
"step": 4035
|
|
},
|
|
{
|
|
"epoch": 0.543485572072375,
|
|
"grad_norm": 0.059287881649759205,
|
|
"learning_rate": 0.00015341652117297372,
|
|
"loss": 0.9054,
|
|
"step": 4040
|
|
},
|
|
{
|
|
"epoch": 0.5441582027308804,
|
|
"grad_norm": 0.05299022648963555,
|
|
"learning_rate": 0.0001530643543195702,
|
|
"loss": 0.9335,
|
|
"step": 4045
|
|
},
|
|
{
|
|
"epoch": 0.5448308333893859,
|
|
"grad_norm": 0.054676871019681765,
|
|
"learning_rate": 0.00015271217056735592,
|
|
"loss": 0.8307,
|
|
"step": 4050
|
|
},
|
|
{
|
|
"epoch": 0.5455034640478913,
|
|
"grad_norm": 0.05936475254102487,
|
|
"learning_rate": 0.00015235997185849754,
|
|
"loss": 0.9142,
|
|
"step": 4055
|
|
},
|
|
{
|
|
"epoch": 0.5461760947063967,
|
|
"grad_norm": 0.05743243525838816,
|
|
"learning_rate": 0.00015200776013524404,
|
|
"loss": 0.8874,
|
|
"step": 4060
|
|
},
|
|
{
|
|
"epoch": 0.5468487253649021,
|
|
"grad_norm": 0.056156997238234564,
|
|
"learning_rate": 0.0001516555373399162,
|
|
"loss": 0.867,
|
|
"step": 4065
|
|
},
|
|
{
|
|
"epoch": 0.5475213560234076,
|
|
"grad_norm": 0.054258410598457395,
|
|
"learning_rate": 0.0001513033054148961,
|
|
"loss": 0.898,
|
|
"step": 4070
|
|
},
|
|
{
|
|
"epoch": 0.548193986681913,
|
|
"grad_norm": 0.053619280218852455,
|
|
"learning_rate": 0.00015095106630261593,
|
|
"loss": 0.9184,
|
|
"step": 4075
|
|
},
|
|
{
|
|
"epoch": 0.5488666173404184,
|
|
"grad_norm": 0.05479069536605181,
|
|
"learning_rate": 0.0001505988219455475,
|
|
"loss": 0.848,
|
|
"step": 4080
|
|
},
|
|
{
|
|
"epoch": 0.5495392479989238,
|
|
"grad_norm": 0.04972042649918088,
|
|
"learning_rate": 0.00015024657428619156,
|
|
"loss": 0.9102,
|
|
"step": 4085
|
|
},
|
|
{
|
|
"epoch": 0.5502118786574292,
|
|
"grad_norm": 0.0537833564245492,
|
|
"learning_rate": 0.00014989432526706735,
|
|
"loss": 0.8817,
|
|
"step": 4090
|
|
},
|
|
{
|
|
"epoch": 0.5508845093159346,
|
|
"grad_norm": 0.05831550073111491,
|
|
"learning_rate": 0.00014954207683070116,
|
|
"loss": 0.8746,
|
|
"step": 4095
|
|
},
|
|
{
|
|
"epoch": 0.55155713997444,
|
|
"grad_norm": 0.05715884422720536,
|
|
"learning_rate": 0.00014918983091961638,
|
|
"loss": 0.8898,
|
|
"step": 4100
|
|
},
|
|
{
|
|
"epoch": 0.5522297706329454,
|
|
"grad_norm": 0.05902152296123058,
|
|
"learning_rate": 0.0001488375894763224,
|
|
"loss": 0.9082,
|
|
"step": 4105
|
|
},
|
|
{
|
|
"epoch": 0.5529024012914509,
|
|
"grad_norm": 0.058375481662671105,
|
|
"learning_rate": 0.0001484853544433039,
|
|
"loss": 0.8988,
|
|
"step": 4110
|
|
},
|
|
{
|
|
"epoch": 0.5535750319499563,
|
|
"grad_norm": 0.05339178022779875,
|
|
"learning_rate": 0.0001481331277630103,
|
|
"loss": 0.8585,
|
|
"step": 4115
|
|
},
|
|
{
|
|
"epoch": 0.5542476626084617,
|
|
"grad_norm": 0.05713255377942084,
|
|
"learning_rate": 0.00014778091137784493,
|
|
"loss": 0.8841,
|
|
"step": 4120
|
|
},
|
|
{
|
|
"epoch": 0.5549202932669671,
|
|
"grad_norm": 0.05830955458693526,
|
|
"learning_rate": 0.00014742870723015433,
|
|
"loss": 0.828,
|
|
"step": 4125
|
|
},
|
|
{
|
|
"epoch": 0.5555929239254725,
|
|
"grad_norm": 0.06080721903708676,
|
|
"learning_rate": 0.0001470765172622176,
|
|
"loss": 0.8978,
|
|
"step": 4130
|
|
},
|
|
{
|
|
"epoch": 0.556265554583978,
|
|
"grad_norm": 0.05958603835036692,
|
|
"learning_rate": 0.00014672434341623549,
|
|
"loss": 0.9092,
|
|
"step": 4135
|
|
},
|
|
{
|
|
"epoch": 0.5569381852424834,
|
|
"grad_norm": 0.05825095975769997,
|
|
"learning_rate": 0.00014637218763432003,
|
|
"loss": 0.8763,
|
|
"step": 4140
|
|
},
|
|
{
|
|
"epoch": 0.5576108159009888,
|
|
"grad_norm": 0.0587468065652045,
|
|
"learning_rate": 0.00014602005185848364,
|
|
"loss": 0.8766,
|
|
"step": 4145
|
|
},
|
|
{
|
|
"epoch": 0.5582834465594941,
|
|
"grad_norm": 0.056558891133557256,
|
|
"learning_rate": 0.00014566793803062823,
|
|
"loss": 0.9658,
|
|
"step": 4150
|
|
},
|
|
{
|
|
"epoch": 0.5589560772179996,
|
|
"grad_norm": 0.05207206688489681,
|
|
"learning_rate": 0.0001453158480925348,
|
|
"loss": 0.9154,
|
|
"step": 4155
|
|
},
|
|
{
|
|
"epoch": 0.559628707876505,
|
|
"grad_norm": 0.06268612193351983,
|
|
"learning_rate": 0.00014496378398585262,
|
|
"loss": 0.8945,
|
|
"step": 4160
|
|
},
|
|
{
|
|
"epoch": 0.5603013385350104,
|
|
"grad_norm": 0.055810120355651985,
|
|
"learning_rate": 0.00014461174765208843,
|
|
"loss": 0.8514,
|
|
"step": 4165
|
|
},
|
|
{
|
|
"epoch": 0.5609739691935158,
|
|
"grad_norm": 0.06012035951220586,
|
|
"learning_rate": 0.00014425974103259592,
|
|
"loss": 0.9309,
|
|
"step": 4170
|
|
},
|
|
{
|
|
"epoch": 0.5616465998520213,
|
|
"grad_norm": 0.05205807171138911,
|
|
"learning_rate": 0.00014390776606856481,
|
|
"loss": 0.8314,
|
|
"step": 4175
|
|
},
|
|
{
|
|
"epoch": 0.5623192305105267,
|
|
"grad_norm": 0.0542122704361648,
|
|
"learning_rate": 0.00014355582470101033,
|
|
"loss": 0.9146,
|
|
"step": 4180
|
|
},
|
|
{
|
|
"epoch": 0.5629918611690321,
|
|
"grad_norm": 0.06119537195488446,
|
|
"learning_rate": 0.00014320391887076244,
|
|
"loss": 0.9141,
|
|
"step": 4185
|
|
},
|
|
{
|
|
"epoch": 0.5636644918275375,
|
|
"grad_norm": 0.052147743194251195,
|
|
"learning_rate": 0.00014285205051845499,
|
|
"loss": 0.8538,
|
|
"step": 4190
|
|
},
|
|
{
|
|
"epoch": 0.564337122486043,
|
|
"grad_norm": 0.058745044900606766,
|
|
"learning_rate": 0.0001425002215845153,
|
|
"loss": 0.9293,
|
|
"step": 4195
|
|
},
|
|
{
|
|
"epoch": 0.5650097531445484,
|
|
"grad_norm": 0.059321637563402155,
|
|
"learning_rate": 0.00014214843400915325,
|
|
"loss": 0.9303,
|
|
"step": 4200
|
|
},
|
|
{
|
|
"epoch": 0.5656823838030537,
|
|
"grad_norm": 0.05701045172370044,
|
|
"learning_rate": 0.00014179668973235068,
|
|
"loss": 0.8771,
|
|
"step": 4205
|
|
},
|
|
{
|
|
"epoch": 0.5663550144615591,
|
|
"grad_norm": 0.06196077189437652,
|
|
"learning_rate": 0.00014144499069385064,
|
|
"loss": 0.8745,
|
|
"step": 4210
|
|
},
|
|
{
|
|
"epoch": 0.5670276451200645,
|
|
"grad_norm": 0.0574145705590886,
|
|
"learning_rate": 0.00014109333883314667,
|
|
"loss": 0.9236,
|
|
"step": 4215
|
|
},
|
|
{
|
|
"epoch": 0.56770027577857,
|
|
"grad_norm": 0.06338155467612702,
|
|
"learning_rate": 0.00014074173608947214,
|
|
"loss": 0.9423,
|
|
"step": 4220
|
|
},
|
|
{
|
|
"epoch": 0.5683729064370754,
|
|
"grad_norm": 0.06248233624602792,
|
|
"learning_rate": 0.0001403901844017897,
|
|
"loss": 0.841,
|
|
"step": 4225
|
|
},
|
|
{
|
|
"epoch": 0.5690455370955808,
|
|
"grad_norm": 0.0694864531170763,
|
|
"learning_rate": 0.00014003868570878022,
|
|
"loss": 0.9356,
|
|
"step": 4230
|
|
},
|
|
{
|
|
"epoch": 0.5697181677540862,
|
|
"grad_norm": 0.06348150388439985,
|
|
"learning_rate": 0.00013968724194883252,
|
|
"loss": 0.9308,
|
|
"step": 4235
|
|
},
|
|
{
|
|
"epoch": 0.5703907984125917,
|
|
"grad_norm": 0.061117572773617,
|
|
"learning_rate": 0.00013933585506003228,
|
|
"loss": 0.8817,
|
|
"step": 4240
|
|
},
|
|
{
|
|
"epoch": 0.5710634290710971,
|
|
"grad_norm": 0.05989230491533799,
|
|
"learning_rate": 0.00013898452698015177,
|
|
"loss": 0.9248,
|
|
"step": 4245
|
|
},
|
|
{
|
|
"epoch": 0.5717360597296025,
|
|
"grad_norm": 0.05943495368109778,
|
|
"learning_rate": 0.00013863325964663884,
|
|
"loss": 0.917,
|
|
"step": 4250
|
|
},
|
|
{
|
|
"epoch": 0.5724086903881079,
|
|
"grad_norm": 0.0582336713656894,
|
|
"learning_rate": 0.00013828205499660632,
|
|
"loss": 0.8604,
|
|
"step": 4255
|
|
},
|
|
{
|
|
"epoch": 0.5730813210466134,
|
|
"grad_norm": 0.05411543005433317,
|
|
"learning_rate": 0.0001379309149668214,
|
|
"loss": 0.8695,
|
|
"step": 4260
|
|
},
|
|
{
|
|
"epoch": 0.5737539517051187,
|
|
"grad_norm": 0.0581837735189759,
|
|
"learning_rate": 0.00013757984149369504,
|
|
"loss": 0.8793,
|
|
"step": 4265
|
|
},
|
|
{
|
|
"epoch": 0.5744265823636241,
|
|
"grad_norm": 0.05585377116595472,
|
|
"learning_rate": 0.0001372288365132709,
|
|
"loss": 0.8832,
|
|
"step": 4270
|
|
},
|
|
{
|
|
"epoch": 0.5750992130221295,
|
|
"grad_norm": 0.05516798560782244,
|
|
"learning_rate": 0.00013687790196121517,
|
|
"loss": 0.8876,
|
|
"step": 4275
|
|
},
|
|
{
|
|
"epoch": 0.5757718436806349,
|
|
"grad_norm": 0.05770652015506851,
|
|
"learning_rate": 0.0001365270397728054,
|
|
"loss": 0.9036,
|
|
"step": 4280
|
|
},
|
|
{
|
|
"epoch": 0.5764444743391404,
|
|
"grad_norm": 0.05511634842230322,
|
|
"learning_rate": 0.00013617625188292034,
|
|
"loss": 0.8413,
|
|
"step": 4285
|
|
},
|
|
{
|
|
"epoch": 0.5771171049976458,
|
|
"grad_norm": 0.0581917308559524,
|
|
"learning_rate": 0.00013582554022602896,
|
|
"loss": 0.879,
|
|
"step": 4290
|
|
},
|
|
{
|
|
"epoch": 0.5777897356561512,
|
|
"grad_norm": 0.057635907441933136,
|
|
"learning_rate": 0.00013547490673617964,
|
|
"loss": 0.8731,
|
|
"step": 4295
|
|
},
|
|
{
|
|
"epoch": 0.5784623663146566,
|
|
"grad_norm": 0.054839688661184126,
|
|
"learning_rate": 0.00013512435334698988,
|
|
"loss": 0.9481,
|
|
"step": 4300
|
|
},
|
|
{
|
|
"epoch": 0.5791349969731621,
|
|
"grad_norm": 0.05745991388972774,
|
|
"learning_rate": 0.00013477388199163544,
|
|
"loss": 0.8818,
|
|
"step": 4305
|
|
},
|
|
{
|
|
"epoch": 0.5798076276316675,
|
|
"grad_norm": 0.061297124958904645,
|
|
"learning_rate": 0.00013442349460283964,
|
|
"loss": 0.9059,
|
|
"step": 4310
|
|
},
|
|
{
|
|
"epoch": 0.5804802582901729,
|
|
"grad_norm": 0.06461719548175024,
|
|
"learning_rate": 0.00013407319311286277,
|
|
"loss": 0.8842,
|
|
"step": 4315
|
|
},
|
|
{
|
|
"epoch": 0.5811528889486782,
|
|
"grad_norm": 0.05003410246581846,
|
|
"learning_rate": 0.00013372297945349137,
|
|
"loss": 0.9352,
|
|
"step": 4320
|
|
},
|
|
{
|
|
"epoch": 0.5818255196071836,
|
|
"grad_norm": 0.05244921652553115,
|
|
"learning_rate": 0.00013337285555602773,
|
|
"loss": 0.921,
|
|
"step": 4325
|
|
},
|
|
{
|
|
"epoch": 0.5824981502656891,
|
|
"grad_norm": 0.05766957348830907,
|
|
"learning_rate": 0.00013302282335127914,
|
|
"loss": 0.8749,
|
|
"step": 4330
|
|
},
|
|
{
|
|
"epoch": 0.5831707809241945,
|
|
"grad_norm": 0.061301170245086384,
|
|
"learning_rate": 0.00013267288476954704,
|
|
"loss": 0.8933,
|
|
"step": 4335
|
|
},
|
|
{
|
|
"epoch": 0.5838434115826999,
|
|
"grad_norm": 0.05932960924176723,
|
|
"learning_rate": 0.00013232304174061674,
|
|
"loss": 0.9374,
|
|
"step": 4340
|
|
},
|
|
{
|
|
"epoch": 0.5845160422412053,
|
|
"grad_norm": 0.06319852513692248,
|
|
"learning_rate": 0.00013197329619374677,
|
|
"loss": 0.8601,
|
|
"step": 4345
|
|
},
|
|
{
|
|
"epoch": 0.5851886728997108,
|
|
"grad_norm": 0.055916865171505896,
|
|
"learning_rate": 0.00013162365005765764,
|
|
"loss": 0.9082,
|
|
"step": 4350
|
|
},
|
|
{
|
|
"epoch": 0.5858613035582162,
|
|
"grad_norm": 0.0545360860790789,
|
|
"learning_rate": 0.00013127410526052208,
|
|
"loss": 0.9341,
|
|
"step": 4355
|
|
},
|
|
{
|
|
"epoch": 0.5865339342167216,
|
|
"grad_norm": 0.054118565941251126,
|
|
"learning_rate": 0.00013092466372995366,
|
|
"loss": 0.8528,
|
|
"step": 4360
|
|
},
|
|
{
|
|
"epoch": 0.587206564875227,
|
|
"grad_norm": 0.05653106870225469,
|
|
"learning_rate": 0.00013057532739299668,
|
|
"loss": 0.8948,
|
|
"step": 4365
|
|
},
|
|
{
|
|
"epoch": 0.5878791955337325,
|
|
"grad_norm": 0.05401286588096964,
|
|
"learning_rate": 0.0001302260981761153,
|
|
"loss": 0.9151,
|
|
"step": 4370
|
|
},
|
|
{
|
|
"epoch": 0.5885518261922379,
|
|
"grad_norm": 0.06148934703285774,
|
|
"learning_rate": 0.0001298769780051828,
|
|
"loss": 0.9045,
|
|
"step": 4375
|
|
},
|
|
{
|
|
"epoch": 0.5892244568507432,
|
|
"grad_norm": 0.06093837315082601,
|
|
"learning_rate": 0.00012952796880547128,
|
|
"loss": 0.8844,
|
|
"step": 4380
|
|
},
|
|
{
|
|
"epoch": 0.5898970875092486,
|
|
"grad_norm": 0.05669194121807215,
|
|
"learning_rate": 0.0001291790725016409,
|
|
"loss": 0.92,
|
|
"step": 4385
|
|
},
|
|
{
|
|
"epoch": 0.590569718167754,
|
|
"grad_norm": 0.06158002592468912,
|
|
"learning_rate": 0.000128830291017729,
|
|
"loss": 0.876,
|
|
"step": 4390
|
|
},
|
|
{
|
|
"epoch": 0.5912423488262595,
|
|
"grad_norm": 0.05740935206115831,
|
|
"learning_rate": 0.00012848162627714,
|
|
"loss": 0.829,
|
|
"step": 4395
|
|
},
|
|
{
|
|
"epoch": 0.5919149794847649,
|
|
"grad_norm": 0.05772541420273194,
|
|
"learning_rate": 0.00012813308020263428,
|
|
"loss": 0.8735,
|
|
"step": 4400
|
|
},
|
|
{
|
|
"epoch": 0.5925876101432703,
|
|
"grad_norm": 0.06350978931255861,
|
|
"learning_rate": 0.00012778465471631806,
|
|
"loss": 0.9195,
|
|
"step": 4405
|
|
},
|
|
{
|
|
"epoch": 0.5932602408017758,
|
|
"grad_norm": 0.051441424187463246,
|
|
"learning_rate": 0.00012743635173963246,
|
|
"loss": 0.8725,
|
|
"step": 4410
|
|
},
|
|
{
|
|
"epoch": 0.5939328714602812,
|
|
"grad_norm": 0.05739880159869317,
|
|
"learning_rate": 0.0001270881731933429,
|
|
"loss": 0.9279,
|
|
"step": 4415
|
|
},
|
|
{
|
|
"epoch": 0.5946055021187866,
|
|
"grad_norm": 0.057953361597903576,
|
|
"learning_rate": 0.00012674012099752872,
|
|
"loss": 0.9029,
|
|
"step": 4420
|
|
},
|
|
{
|
|
"epoch": 0.595278132777292,
|
|
"grad_norm": 0.0587792183282691,
|
|
"learning_rate": 0.00012639219707157254,
|
|
"loss": 0.9057,
|
|
"step": 4425
|
|
},
|
|
{
|
|
"epoch": 0.5959507634357974,
|
|
"grad_norm": 0.06134378487549523,
|
|
"learning_rate": 0.00012604440333414946,
|
|
"loss": 0.887,
|
|
"step": 4430
|
|
},
|
|
{
|
|
"epoch": 0.5966233940943029,
|
|
"grad_norm": 0.06366789691162225,
|
|
"learning_rate": 0.0001256967417032168,
|
|
"loss": 0.9016,
|
|
"step": 4435
|
|
},
|
|
{
|
|
"epoch": 0.5972960247528082,
|
|
"grad_norm": 0.054919981047600736,
|
|
"learning_rate": 0.00012534921409600318,
|
|
"loss": 0.8743,
|
|
"step": 4440
|
|
},
|
|
{
|
|
"epoch": 0.5979686554113136,
|
|
"grad_norm": 0.16672557636164484,
|
|
"learning_rate": 0.00012500182242899827,
|
|
"loss": 0.9003,
|
|
"step": 4445
|
|
},
|
|
{
|
|
"epoch": 0.598641286069819,
|
|
"grad_norm": 0.05503535479792428,
|
|
"learning_rate": 0.00012465456861794204,
|
|
"loss": 0.8848,
|
|
"step": 4450
|
|
},
|
|
{
|
|
"epoch": 0.5993139167283245,
|
|
"grad_norm": 0.054776610640831065,
|
|
"learning_rate": 0.0001243074545778142,
|
|
"loss": 0.9217,
|
|
"step": 4455
|
|
},
|
|
{
|
|
"epoch": 0.5999865473868299,
|
|
"grad_norm": 0.05883313327981152,
|
|
"learning_rate": 0.00012396048222282374,
|
|
"loss": 0.8789,
|
|
"step": 4460
|
|
},
|
|
{
|
|
"epoch": 0.6006591780453353,
|
|
"grad_norm": 0.053191200550714766,
|
|
"learning_rate": 0.0001236136534663983,
|
|
"loss": 0.9423,
|
|
"step": 4465
|
|
},
|
|
{
|
|
"epoch": 0.6013318087038407,
|
|
"grad_norm": 0.05662708709001015,
|
|
"learning_rate": 0.0001232669702211735,
|
|
"loss": 0.8702,
|
|
"step": 4470
|
|
},
|
|
{
|
|
"epoch": 0.6020044393623462,
|
|
"grad_norm": 0.05705030841856459,
|
|
"learning_rate": 0.00012292043439898274,
|
|
"loss": 0.9329,
|
|
"step": 4475
|
|
},
|
|
{
|
|
"epoch": 0.6026770700208516,
|
|
"grad_norm": 0.06547197669722017,
|
|
"learning_rate": 0.00012257404791084616,
|
|
"loss": 0.9337,
|
|
"step": 4480
|
|
},
|
|
{
|
|
"epoch": 0.603349700679357,
|
|
"grad_norm": 0.05619282666130086,
|
|
"learning_rate": 0.00012222781266696056,
|
|
"loss": 0.9097,
|
|
"step": 4485
|
|
},
|
|
{
|
|
"epoch": 0.6040223313378624,
|
|
"grad_norm": 0.058899280210475524,
|
|
"learning_rate": 0.00012188173057668881,
|
|
"loss": 0.8789,
|
|
"step": 4490
|
|
},
|
|
{
|
|
"epoch": 0.6046949619963677,
|
|
"grad_norm": 0.05736846197147452,
|
|
"learning_rate": 0.00012153580354854885,
|
|
"loss": 0.9097,
|
|
"step": 4495
|
|
},
|
|
{
|
|
"epoch": 0.6053675926548732,
|
|
"grad_norm": 0.05425110212071724,
|
|
"learning_rate": 0.0001211900334902037,
|
|
"loss": 0.8751,
|
|
"step": 4500
|
|
},
|
|
{
|
|
"epoch": 0.6060402233133786,
|
|
"grad_norm": 0.05485136697764707,
|
|
"learning_rate": 0.00012084442230845087,
|
|
"loss": 0.8551,
|
|
"step": 4505
|
|
},
|
|
{
|
|
"epoch": 0.606712853971884,
|
|
"grad_norm": 0.06282812420498524,
|
|
"learning_rate": 0.00012049897190921143,
|
|
"loss": 0.9172,
|
|
"step": 4510
|
|
},
|
|
{
|
|
"epoch": 0.6073854846303894,
|
|
"grad_norm": 0.058713453811793,
|
|
"learning_rate": 0.00012015368419752009,
|
|
"loss": 0.9139,
|
|
"step": 4515
|
|
},
|
|
{
|
|
"epoch": 0.6080581152888949,
|
|
"grad_norm": 0.05588704652885199,
|
|
"learning_rate": 0.00011980856107751414,
|
|
"loss": 0.8895,
|
|
"step": 4520
|
|
},
|
|
{
|
|
"epoch": 0.6087307459474003,
|
|
"grad_norm": 0.05606004982577002,
|
|
"learning_rate": 0.0001194636044524234,
|
|
"loss": 0.8542,
|
|
"step": 4525
|
|
},
|
|
{
|
|
"epoch": 0.6094033766059057,
|
|
"grad_norm": 0.06075755945838316,
|
|
"learning_rate": 0.00011911881622455947,
|
|
"loss": 0.9091,
|
|
"step": 4530
|
|
},
|
|
{
|
|
"epoch": 0.6100760072644111,
|
|
"grad_norm": 0.05777711265416582,
|
|
"learning_rate": 0.0001187741982953052,
|
|
"loss": 0.8863,
|
|
"step": 4535
|
|
},
|
|
{
|
|
"epoch": 0.6107486379229166,
|
|
"grad_norm": 0.052534492649992555,
|
|
"learning_rate": 0.00011842975256510439,
|
|
"loss": 0.9213,
|
|
"step": 4540
|
|
},
|
|
{
|
|
"epoch": 0.611421268581422,
|
|
"grad_norm": 0.060979276869632684,
|
|
"learning_rate": 0.0001180854809334514,
|
|
"loss": 0.9091,
|
|
"step": 4545
|
|
},
|
|
{
|
|
"epoch": 0.6120938992399274,
|
|
"grad_norm": 0.05636369366484177,
|
|
"learning_rate": 0.0001177413852988801,
|
|
"loss": 0.9109,
|
|
"step": 4550
|
|
},
|
|
{
|
|
"epoch": 0.6127665298984327,
|
|
"grad_norm": 0.06375696295920616,
|
|
"learning_rate": 0.00011739746755895416,
|
|
"loss": 0.9199,
|
|
"step": 4555
|
|
},
|
|
{
|
|
"epoch": 0.6134391605569381,
|
|
"grad_norm": 0.06022822131153983,
|
|
"learning_rate": 0.00011705372961025602,
|
|
"loss": 0.9181,
|
|
"step": 4560
|
|
},
|
|
{
|
|
"epoch": 0.6141117912154436,
|
|
"grad_norm": 0.05818657066276916,
|
|
"learning_rate": 0.00011671017334837674,
|
|
"loss": 0.9297,
|
|
"step": 4565
|
|
},
|
|
{
|
|
"epoch": 0.614784421873949,
|
|
"grad_norm": 0.05707183156732193,
|
|
"learning_rate": 0.0001163668006679054,
|
|
"loss": 0.9006,
|
|
"step": 4570
|
|
},
|
|
{
|
|
"epoch": 0.6154570525324544,
|
|
"grad_norm": 0.057762504480045336,
|
|
"learning_rate": 0.00011602361346241869,
|
|
"loss": 0.9213,
|
|
"step": 4575
|
|
},
|
|
{
|
|
"epoch": 0.6161296831909598,
|
|
"grad_norm": 0.05622121629708367,
|
|
"learning_rate": 0.00011568061362447048,
|
|
"loss": 0.8442,
|
|
"step": 4580
|
|
},
|
|
{
|
|
"epoch": 0.6168023138494653,
|
|
"grad_norm": 0.06842696169362386,
|
|
"learning_rate": 0.00011533780304558146,
|
|
"loss": 0.91,
|
|
"step": 4585
|
|
},
|
|
{
|
|
"epoch": 0.6174749445079707,
|
|
"grad_norm": 0.0538174023674992,
|
|
"learning_rate": 0.0001149951836162284,
|
|
"loss": 0.8732,
|
|
"step": 4590
|
|
},
|
|
{
|
|
"epoch": 0.6181475751664761,
|
|
"grad_norm": 0.0559745986424301,
|
|
"learning_rate": 0.0001146527572258342,
|
|
"loss": 0.8556,
|
|
"step": 4595
|
|
},
|
|
{
|
|
"epoch": 0.6188202058249815,
|
|
"grad_norm": 0.05889316352261125,
|
|
"learning_rate": 0.00011431052576275704,
|
|
"loss": 0.8636,
|
|
"step": 4600
|
|
},
|
|
{
|
|
"epoch": 0.619492836483487,
|
|
"grad_norm": 0.0644317221183652,
|
|
"learning_rate": 0.00011396849111428026,
|
|
"loss": 0.8829,
|
|
"step": 4605
|
|
},
|
|
{
|
|
"epoch": 0.6201654671419923,
|
|
"grad_norm": 0.05378896021016429,
|
|
"learning_rate": 0.00011362665516660181,
|
|
"loss": 0.8692,
|
|
"step": 4610
|
|
},
|
|
{
|
|
"epoch": 0.6208380978004977,
|
|
"grad_norm": 0.06101966621474693,
|
|
"learning_rate": 0.00011328501980482382,
|
|
"loss": 0.9022,
|
|
"step": 4615
|
|
},
|
|
{
|
|
"epoch": 0.6215107284590031,
|
|
"grad_norm": 0.05748333047789456,
|
|
"learning_rate": 0.00011294358691294232,
|
|
"loss": 0.865,
|
|
"step": 4620
|
|
},
|
|
{
|
|
"epoch": 0.6221833591175085,
|
|
"grad_norm": 0.059256917830513484,
|
|
"learning_rate": 0.00011260235837383684,
|
|
"loss": 0.8582,
|
|
"step": 4625
|
|
},
|
|
{
|
|
"epoch": 0.622855989776014,
|
|
"grad_norm": 0.05943237627383429,
|
|
"learning_rate": 0.00011226133606925981,
|
|
"loss": 0.9069,
|
|
"step": 4630
|
|
},
|
|
{
|
|
"epoch": 0.6235286204345194,
|
|
"grad_norm": 0.054627263562646904,
|
|
"learning_rate": 0.00011192052187982654,
|
|
"loss": 0.9239,
|
|
"step": 4635
|
|
},
|
|
{
|
|
"epoch": 0.6242012510930248,
|
|
"grad_norm": 0.06107879674633574,
|
|
"learning_rate": 0.00011157991768500451,
|
|
"loss": 0.8936,
|
|
"step": 4640
|
|
},
|
|
{
|
|
"epoch": 0.6248738817515302,
|
|
"grad_norm": 0.058746911687480065,
|
|
"learning_rate": 0.00011123952536310321,
|
|
"loss": 0.8503,
|
|
"step": 4645
|
|
},
|
|
{
|
|
"epoch": 0.6255465124100357,
|
|
"grad_norm": 0.05687652600714167,
|
|
"learning_rate": 0.00011089934679126383,
|
|
"loss": 0.8842,
|
|
"step": 4650
|
|
},
|
|
{
|
|
"epoch": 0.6262191430685411,
|
|
"grad_norm": 0.05737409928479921,
|
|
"learning_rate": 0.00011055938384544861,
|
|
"loss": 0.8413,
|
|
"step": 4655
|
|
},
|
|
{
|
|
"epoch": 0.6268917737270465,
|
|
"grad_norm": 0.05281557762068941,
|
|
"learning_rate": 0.00011021963840043082,
|
|
"loss": 0.8714,
|
|
"step": 4660
|
|
},
|
|
{
|
|
"epoch": 0.6275644043855519,
|
|
"grad_norm": 0.05653456815014127,
|
|
"learning_rate": 0.00010988011232978433,
|
|
"loss": 0.8631,
|
|
"step": 4665
|
|
},
|
|
{
|
|
"epoch": 0.6282370350440573,
|
|
"grad_norm": 0.05820443994353081,
|
|
"learning_rate": 0.00010954080750587308,
|
|
"loss": 0.8458,
|
|
"step": 4670
|
|
},
|
|
{
|
|
"epoch": 0.6289096657025627,
|
|
"grad_norm": 0.058053700502948814,
|
|
"learning_rate": 0.00010920172579984113,
|
|
"loss": 0.8708,
|
|
"step": 4675
|
|
},
|
|
{
|
|
"epoch": 0.6295822963610681,
|
|
"grad_norm": 0.052586921573302296,
|
|
"learning_rate": 0.00010886286908160178,
|
|
"loss": 0.837,
|
|
"step": 4680
|
|
},
|
|
{
|
|
"epoch": 0.6302549270195735,
|
|
"grad_norm": 0.05836180559612674,
|
|
"learning_rate": 0.00010852423921982804,
|
|
"loss": 0.8881,
|
|
"step": 4685
|
|
},
|
|
{
|
|
"epoch": 0.630927557678079,
|
|
"grad_norm": 0.05330666907721731,
|
|
"learning_rate": 0.00010818583808194165,
|
|
"loss": 0.8655,
|
|
"step": 4690
|
|
},
|
|
{
|
|
"epoch": 0.6316001883365844,
|
|
"grad_norm": 0.05931093065925741,
|
|
"learning_rate": 0.00010784766753410292,
|
|
"loss": 0.867,
|
|
"step": 4695
|
|
},
|
|
{
|
|
"epoch": 0.6322728189950898,
|
|
"grad_norm": 0.059390965217927305,
|
|
"learning_rate": 0.00010750972944120074,
|
|
"loss": 0.9129,
|
|
"step": 4700
|
|
},
|
|
{
|
|
"epoch": 0.6329454496535952,
|
|
"grad_norm": 0.057009911395215766,
|
|
"learning_rate": 0.00010717202566684205,
|
|
"loss": 0.8754,
|
|
"step": 4705
|
|
},
|
|
{
|
|
"epoch": 0.6336180803121007,
|
|
"grad_norm": 0.05813916757602384,
|
|
"learning_rate": 0.00010683455807334149,
|
|
"loss": 0.9253,
|
|
"step": 4710
|
|
},
|
|
{
|
|
"epoch": 0.6342907109706061,
|
|
"grad_norm": 0.05554690018381647,
|
|
"learning_rate": 0.0001064973285217114,
|
|
"loss": 0.9079,
|
|
"step": 4715
|
|
},
|
|
{
|
|
"epoch": 0.6349633416291115,
|
|
"grad_norm": 0.05823337802695429,
|
|
"learning_rate": 0.0001061603388716513,
|
|
"loss": 0.8678,
|
|
"step": 4720
|
|
},
|
|
{
|
|
"epoch": 0.6356359722876169,
|
|
"grad_norm": 0.060183801311714434,
|
|
"learning_rate": 0.00010582359098153779,
|
|
"loss": 0.9334,
|
|
"step": 4725
|
|
},
|
|
{
|
|
"epoch": 0.6363086029461222,
|
|
"grad_norm": 0.06158375211136442,
|
|
"learning_rate": 0.00010548708670841432,
|
|
"loss": 0.8998,
|
|
"step": 4730
|
|
},
|
|
{
|
|
"epoch": 0.6369812336046277,
|
|
"grad_norm": 0.05841236712079292,
|
|
"learning_rate": 0.00010515082790798064,
|
|
"loss": 0.8454,
|
|
"step": 4735
|
|
},
|
|
{
|
|
"epoch": 0.6376538642631331,
|
|
"grad_norm": 0.058285702230372556,
|
|
"learning_rate": 0.000104814816434583,
|
|
"loss": 0.9201,
|
|
"step": 4740
|
|
},
|
|
{
|
|
"epoch": 0.6383264949216385,
|
|
"grad_norm": 0.05752261109466009,
|
|
"learning_rate": 0.00010447905414120385,
|
|
"loss": 0.9009,
|
|
"step": 4745
|
|
},
|
|
{
|
|
"epoch": 0.6389991255801439,
|
|
"grad_norm": 0.054666275354256486,
|
|
"learning_rate": 0.00010414354287945116,
|
|
"loss": 0.8836,
|
|
"step": 4750
|
|
},
|
|
{
|
|
"epoch": 0.6396717562386494,
|
|
"grad_norm": 0.054230923288243436,
|
|
"learning_rate": 0.00010380828449954886,
|
|
"loss": 0.9051,
|
|
"step": 4755
|
|
},
|
|
{
|
|
"epoch": 0.6403443868971548,
|
|
"grad_norm": 0.05811940329323424,
|
|
"learning_rate": 0.0001034732808503261,
|
|
"loss": 0.8842,
|
|
"step": 4760
|
|
},
|
|
{
|
|
"epoch": 0.6410170175556602,
|
|
"grad_norm": 0.06251179179779437,
|
|
"learning_rate": 0.00010313853377920744,
|
|
"loss": 0.8882,
|
|
"step": 4765
|
|
},
|
|
{
|
|
"epoch": 0.6416896482141656,
|
|
"grad_norm": 0.052634147322929405,
|
|
"learning_rate": 0.0001028040451322025,
|
|
"loss": 0.9015,
|
|
"step": 4770
|
|
},
|
|
{
|
|
"epoch": 0.642362278872671,
|
|
"grad_norm": 0.059163841667578786,
|
|
"learning_rate": 0.00010246981675389563,
|
|
"loss": 0.8905,
|
|
"step": 4775
|
|
},
|
|
{
|
|
"epoch": 0.6430349095311765,
|
|
"grad_norm": 0.055204378998180585,
|
|
"learning_rate": 0.00010213585048743608,
|
|
"loss": 0.8629,
|
|
"step": 4780
|
|
},
|
|
{
|
|
"epoch": 0.6437075401896818,
|
|
"grad_norm": 0.06100319448728361,
|
|
"learning_rate": 0.00010180214817452759,
|
|
"loss": 0.922,
|
|
"step": 4785
|
|
},
|
|
{
|
|
"epoch": 0.6443801708481872,
|
|
"grad_norm": 0.05767459460028311,
|
|
"learning_rate": 0.00010146871165541816,
|
|
"loss": 0.8928,
|
|
"step": 4790
|
|
},
|
|
{
|
|
"epoch": 0.6450528015066926,
|
|
"grad_norm": 0.05607341654418674,
|
|
"learning_rate": 0.0001011355427688902,
|
|
"loss": 0.8736,
|
|
"step": 4795
|
|
},
|
|
{
|
|
"epoch": 0.6457254321651981,
|
|
"grad_norm": 0.0535834405716432,
|
|
"learning_rate": 0.00010080264335225016,
|
|
"loss": 0.9161,
|
|
"step": 4800
|
|
},
|
|
{
|
|
"epoch": 0.6463980628237035,
|
|
"grad_norm": 0.056346526158196436,
|
|
"learning_rate": 0.00010047001524131844,
|
|
"loss": 0.883,
|
|
"step": 4805
|
|
},
|
|
{
|
|
"epoch": 0.6470706934822089,
|
|
"grad_norm": 0.05586293404952587,
|
|
"learning_rate": 0.00010013766027041936,
|
|
"loss": 0.8463,
|
|
"step": 4810
|
|
},
|
|
{
|
|
"epoch": 0.6477433241407143,
|
|
"grad_norm": 0.05723029408458351,
|
|
"learning_rate": 9.980558027237084e-05,
|
|
"loss": 0.8411,
|
|
"step": 4815
|
|
},
|
|
{
|
|
"epoch": 0.6484159547992198,
|
|
"grad_norm": 0.058448218690204695,
|
|
"learning_rate": 9.947377707847463e-05,
|
|
"loss": 0.8481,
|
|
"step": 4820
|
|
},
|
|
{
|
|
"epoch": 0.6490885854577252,
|
|
"grad_norm": 0.07509436198212023,
|
|
"learning_rate": 9.914225251850568e-05,
|
|
"loss": 0.8862,
|
|
"step": 4825
|
|
},
|
|
{
|
|
"epoch": 0.6497612161162306,
|
|
"grad_norm": 0.05253604166156805,
|
|
"learning_rate": 9.881100842070275e-05,
|
|
"loss": 0.8384,
|
|
"step": 4830
|
|
},
|
|
{
|
|
"epoch": 0.650433846774736,
|
|
"grad_norm": 0.054848894274745104,
|
|
"learning_rate": 9.848004661175775e-05,
|
|
"loss": 0.8584,
|
|
"step": 4835
|
|
},
|
|
{
|
|
"epoch": 0.6511064774332415,
|
|
"grad_norm": 0.06695121957621178,
|
|
"learning_rate": 9.814936891680581e-05,
|
|
"loss": 0.8816,
|
|
"step": 4840
|
|
},
|
|
{
|
|
"epoch": 0.6517791080917468,
|
|
"grad_norm": 0.05596682397619204,
|
|
"learning_rate": 9.78189771594154e-05,
|
|
"loss": 0.9007,
|
|
"step": 4845
|
|
},
|
|
{
|
|
"epoch": 0.6524517387502522,
|
|
"grad_norm": 0.05517815446096068,
|
|
"learning_rate": 9.748887316157814e-05,
|
|
"loss": 0.8934,
|
|
"step": 4850
|
|
},
|
|
{
|
|
"epoch": 0.6531243694087576,
|
|
"grad_norm": 0.06497381529538195,
|
|
"learning_rate": 9.715905874369865e-05,
|
|
"loss": 0.915,
|
|
"step": 4855
|
|
},
|
|
{
|
|
"epoch": 0.653797000067263,
|
|
"grad_norm": 0.056024264255754005,
|
|
"learning_rate": 9.682953572458477e-05,
|
|
"loss": 0.8571,
|
|
"step": 4860
|
|
},
|
|
{
|
|
"epoch": 0.6544696307257685,
|
|
"grad_norm": 0.08419868114514564,
|
|
"learning_rate": 9.650030592143723e-05,
|
|
"loss": 0.9008,
|
|
"step": 4865
|
|
},
|
|
{
|
|
"epoch": 0.6551422613842739,
|
|
"grad_norm": 0.055564563924577495,
|
|
"learning_rate": 9.61713711498399e-05,
|
|
"loss": 0.8797,
|
|
"step": 4870
|
|
},
|
|
{
|
|
"epoch": 0.6558148920427793,
|
|
"grad_norm": 0.062082572684302575,
|
|
"learning_rate": 9.58427332237497e-05,
|
|
"loss": 0.8628,
|
|
"step": 4875
|
|
},
|
|
{
|
|
"epoch": 0.6564875227012847,
|
|
"grad_norm": 0.060099200321734096,
|
|
"learning_rate": 9.551439395548624e-05,
|
|
"loss": 0.8484,
|
|
"step": 4880
|
|
},
|
|
{
|
|
"epoch": 0.6571601533597902,
|
|
"grad_norm": 0.058009190055296006,
|
|
"learning_rate": 9.518635515572253e-05,
|
|
"loss": 0.8501,
|
|
"step": 4885
|
|
},
|
|
{
|
|
"epoch": 0.6578327840182956,
|
|
"grad_norm": 0.05797176811552356,
|
|
"learning_rate": 9.48586186334745e-05,
|
|
"loss": 0.8473,
|
|
"step": 4890
|
|
},
|
|
{
|
|
"epoch": 0.658505414676801,
|
|
"grad_norm": 0.06380953263521327,
|
|
"learning_rate": 9.453118619609089e-05,
|
|
"loss": 0.8727,
|
|
"step": 4895
|
|
},
|
|
{
|
|
"epoch": 0.6591780453353063,
|
|
"grad_norm": 0.05688225891322997,
|
|
"learning_rate": 9.420405964924383e-05,
|
|
"loss": 0.8673,
|
|
"step": 4900
|
|
},
|
|
{
|
|
"epoch": 0.6598506759938118,
|
|
"grad_norm": 0.05613539116592728,
|
|
"learning_rate": 9.387724079691836e-05,
|
|
"loss": 0.8699,
|
|
"step": 4905
|
|
},
|
|
{
|
|
"epoch": 0.6605233066523172,
|
|
"grad_norm": 0.060851757847964476,
|
|
"learning_rate": 9.355073144140283e-05,
|
|
"loss": 0.8902,
|
|
"step": 4910
|
|
},
|
|
{
|
|
"epoch": 0.6611959373108226,
|
|
"grad_norm": 0.06414740795910594,
|
|
"learning_rate": 9.322453338327879e-05,
|
|
"loss": 0.8337,
|
|
"step": 4915
|
|
},
|
|
{
|
|
"epoch": 0.661868567969328,
|
|
"grad_norm": 0.059468629399861576,
|
|
"learning_rate": 9.289864842141101e-05,
|
|
"loss": 0.9023,
|
|
"step": 4920
|
|
},
|
|
{
|
|
"epoch": 0.6625411986278334,
|
|
"grad_norm": 0.05758556350462922,
|
|
"learning_rate": 9.257307835293778e-05,
|
|
"loss": 0.8525,
|
|
"step": 4925
|
|
},
|
|
{
|
|
"epoch": 0.6632138292863389,
|
|
"grad_norm": 0.06171673443618236,
|
|
"learning_rate": 9.224782497326085e-05,
|
|
"loss": 0.9572,
|
|
"step": 4930
|
|
},
|
|
{
|
|
"epoch": 0.6638864599448443,
|
|
"grad_norm": 0.062334143766405774,
|
|
"learning_rate": 9.192289007603538e-05,
|
|
"loss": 0.8436,
|
|
"step": 4935
|
|
},
|
|
{
|
|
"epoch": 0.6645590906033497,
|
|
"grad_norm": 0.05685720623051231,
|
|
"learning_rate": 9.159827545316043e-05,
|
|
"loss": 0.8374,
|
|
"step": 4940
|
|
},
|
|
{
|
|
"epoch": 0.6652317212618551,
|
|
"grad_norm": 0.05733734226480475,
|
|
"learning_rate": 9.127398289476871e-05,
|
|
"loss": 0.8669,
|
|
"step": 4945
|
|
},
|
|
{
|
|
"epoch": 0.6659043519203606,
|
|
"grad_norm": 0.06246178021253094,
|
|
"learning_rate": 9.095001418921694e-05,
|
|
"loss": 0.8825,
|
|
"step": 4950
|
|
},
|
|
{
|
|
"epoch": 0.666576982578866,
|
|
"grad_norm": 0.05680246697220381,
|
|
"learning_rate": 9.062637112307591e-05,
|
|
"loss": 0.8424,
|
|
"step": 4955
|
|
},
|
|
{
|
|
"epoch": 0.6672496132373713,
|
|
"grad_norm": 0.05684976523742173,
|
|
"learning_rate": 9.030305548112056e-05,
|
|
"loss": 0.8337,
|
|
"step": 4960
|
|
},
|
|
{
|
|
"epoch": 0.6679222438958767,
|
|
"grad_norm": 0.05869191623510943,
|
|
"learning_rate": 8.998006904632027e-05,
|
|
"loss": 0.8393,
|
|
"step": 4965
|
|
},
|
|
{
|
|
"epoch": 0.6685948745543822,
|
|
"grad_norm": 0.05499638998233168,
|
|
"learning_rate": 8.965741359982895e-05,
|
|
"loss": 0.8856,
|
|
"step": 4970
|
|
},
|
|
{
|
|
"epoch": 0.6692675052128876,
|
|
"grad_norm": 0.05407722242744602,
|
|
"learning_rate": 8.933509092097516e-05,
|
|
"loss": 0.8438,
|
|
"step": 4975
|
|
},
|
|
{
|
|
"epoch": 0.669940135871393,
|
|
"grad_norm": 0.05632782452701334,
|
|
"learning_rate": 8.901310278725254e-05,
|
|
"loss": 0.8714,
|
|
"step": 4980
|
|
},
|
|
{
|
|
"epoch": 0.6706127665298984,
|
|
"grad_norm": 0.052171899433051214,
|
|
"learning_rate": 8.869145097430955e-05,
|
|
"loss": 0.8876,
|
|
"step": 4985
|
|
},
|
|
{
|
|
"epoch": 0.6712853971884039,
|
|
"grad_norm": 0.06663532562647967,
|
|
"learning_rate": 8.837013725594021e-05,
|
|
"loss": 0.8599,
|
|
"step": 4990
|
|
},
|
|
{
|
|
"epoch": 0.6719580278469093,
|
|
"grad_norm": 0.05861717662104597,
|
|
"learning_rate": 8.804916340407401e-05,
|
|
"loss": 0.8277,
|
|
"step": 4995
|
|
},
|
|
{
|
|
"epoch": 0.6726306585054147,
|
|
"grad_norm": 0.05879425667066216,
|
|
"learning_rate": 8.772853118876615e-05,
|
|
"loss": 0.8587,
|
|
"step": 5000
|
|
},
|
|
{
|
|
"epoch": 0.6733032891639201,
|
|
"grad_norm": 0.061444791492560505,
|
|
"learning_rate": 8.740824237818783e-05,
|
|
"loss": 0.8554,
|
|
"step": 5005
|
|
},
|
|
{
|
|
"epoch": 0.6739759198224256,
|
|
"grad_norm": 0.05819734558194491,
|
|
"learning_rate": 8.708829873861664e-05,
|
|
"loss": 0.8355,
|
|
"step": 5010
|
|
},
|
|
{
|
|
"epoch": 0.674648550480931,
|
|
"grad_norm": 0.0520509008119525,
|
|
"learning_rate": 8.676870203442635e-05,
|
|
"loss": 0.8569,
|
|
"step": 5015
|
|
},
|
|
{
|
|
"epoch": 0.6753211811394363,
|
|
"grad_norm": 0.05681589132682784,
|
|
"learning_rate": 8.64494540280779e-05,
|
|
"loss": 0.8725,
|
|
"step": 5020
|
|
},
|
|
{
|
|
"epoch": 0.6759938117979417,
|
|
"grad_norm": 0.056879680943254775,
|
|
"learning_rate": 8.613055648010899e-05,
|
|
"loss": 0.8493,
|
|
"step": 5025
|
|
},
|
|
{
|
|
"epoch": 0.6766664424564471,
|
|
"grad_norm": 0.059375704514106294,
|
|
"learning_rate": 8.581201114912477e-05,
|
|
"loss": 0.8605,
|
|
"step": 5030
|
|
},
|
|
{
|
|
"epoch": 0.6773390731149526,
|
|
"grad_norm": 0.05894030965563083,
|
|
"learning_rate": 8.549381979178815e-05,
|
|
"loss": 0.8568,
|
|
"step": 5035
|
|
},
|
|
{
|
|
"epoch": 0.678011703773458,
|
|
"grad_norm": 0.054221362443161204,
|
|
"learning_rate": 8.517598416280985e-05,
|
|
"loss": 0.8593,
|
|
"step": 5040
|
|
},
|
|
{
|
|
"epoch": 0.6786843344319634,
|
|
"grad_norm": 0.051825037887154625,
|
|
"learning_rate": 8.485850601493885e-05,
|
|
"loss": 0.8618,
|
|
"step": 5045
|
|
},
|
|
{
|
|
"epoch": 0.6793569650904688,
|
|
"grad_norm": 0.05777524128307275,
|
|
"learning_rate": 8.4541387098953e-05,
|
|
"loss": 0.8643,
|
|
"step": 5050
|
|
},
|
|
{
|
|
"epoch": 0.6800295957489743,
|
|
"grad_norm": 0.05914658196865087,
|
|
"learning_rate": 8.422462916364875e-05,
|
|
"loss": 0.8348,
|
|
"step": 5055
|
|
},
|
|
{
|
|
"epoch": 0.6807022264074797,
|
|
"grad_norm": 0.055461026646056615,
|
|
"learning_rate": 8.390823395583218e-05,
|
|
"loss": 0.8929,
|
|
"step": 5060
|
|
},
|
|
{
|
|
"epoch": 0.6813748570659851,
|
|
"grad_norm": 0.05492126944791927,
|
|
"learning_rate": 8.35922032203089e-05,
|
|
"loss": 0.8758,
|
|
"step": 5065
|
|
},
|
|
{
|
|
"epoch": 0.6820474877244905,
|
|
"grad_norm": 0.060505610514154255,
|
|
"learning_rate": 8.327653869987462e-05,
|
|
"loss": 0.8925,
|
|
"step": 5070
|
|
},
|
|
{
|
|
"epoch": 0.6827201183829958,
|
|
"grad_norm": 0.05811965799556169,
|
|
"learning_rate": 8.296124213530556e-05,
|
|
"loss": 0.8233,
|
|
"step": 5075
|
|
},
|
|
{
|
|
"epoch": 0.6833927490415013,
|
|
"grad_norm": 0.05678640199443679,
|
|
"learning_rate": 8.264631526534875e-05,
|
|
"loss": 0.8505,
|
|
"step": 5080
|
|
},
|
|
{
|
|
"epoch": 0.6840653797000067,
|
|
"grad_norm": 0.05374131818929565,
|
|
"learning_rate": 8.233175982671241e-05,
|
|
"loss": 0.87,
|
|
"step": 5085
|
|
},
|
|
{
|
|
"epoch": 0.6847380103585121,
|
|
"grad_norm": 0.06337938406070896,
|
|
"learning_rate": 8.201757755405663e-05,
|
|
"loss": 0.8285,
|
|
"step": 5090
|
|
},
|
|
{
|
|
"epoch": 0.6854106410170175,
|
|
"grad_norm": 0.05307559383586948,
|
|
"learning_rate": 8.170377017998347e-05,
|
|
"loss": 0.8377,
|
|
"step": 5095
|
|
},
|
|
{
|
|
"epoch": 0.686083271675523,
|
|
"grad_norm": 0.06681792778316958,
|
|
"learning_rate": 8.139033943502764e-05,
|
|
"loss": 0.9141,
|
|
"step": 5100
|
|
},
|
|
{
|
|
"epoch": 0.6867559023340284,
|
|
"grad_norm": 0.060052245002273755,
|
|
"learning_rate": 8.107728704764678e-05,
|
|
"loss": 0.8644,
|
|
"step": 5105
|
|
},
|
|
{
|
|
"epoch": 0.6874285329925338,
|
|
"grad_norm": 0.057388420287701074,
|
|
"learning_rate": 8.076461474421212e-05,
|
|
"loss": 0.9123,
|
|
"step": 5110
|
|
},
|
|
{
|
|
"epoch": 0.6881011636510392,
|
|
"grad_norm": 0.05383822565809219,
|
|
"learning_rate": 8.045232424899889e-05,
|
|
"loss": 0.8784,
|
|
"step": 5115
|
|
},
|
|
{
|
|
"epoch": 0.6887737943095447,
|
|
"grad_norm": 0.05473684772662266,
|
|
"learning_rate": 8.014041728417671e-05,
|
|
"loss": 0.9026,
|
|
"step": 5120
|
|
},
|
|
{
|
|
"epoch": 0.6894464249680501,
|
|
"grad_norm": 0.05301092470468256,
|
|
"learning_rate": 7.982889556980006e-05,
|
|
"loss": 0.8791,
|
|
"step": 5125
|
|
},
|
|
{
|
|
"epoch": 0.6901190556265555,
|
|
"grad_norm": 0.056506205793787935,
|
|
"learning_rate": 7.951776082379924e-05,
|
|
"loss": 0.8834,
|
|
"step": 5130
|
|
},
|
|
{
|
|
"epoch": 0.6907916862850608,
|
|
"grad_norm": 0.059817759285931964,
|
|
"learning_rate": 7.920701476197025e-05,
|
|
"loss": 0.862,
|
|
"step": 5135
|
|
},
|
|
{
|
|
"epoch": 0.6914643169435662,
|
|
"grad_norm": 0.055806318389620795,
|
|
"learning_rate": 7.889665909796574e-05,
|
|
"loss": 0.8829,
|
|
"step": 5140
|
|
},
|
|
{
|
|
"epoch": 0.6921369476020717,
|
|
"grad_norm": 0.0625441497627379,
|
|
"learning_rate": 7.858669554328537e-05,
|
|
"loss": 0.8643,
|
|
"step": 5145
|
|
},
|
|
{
|
|
"epoch": 0.6928095782605771,
|
|
"grad_norm": 0.05220170307922808,
|
|
"learning_rate": 7.827712580726669e-05,
|
|
"loss": 0.8349,
|
|
"step": 5150
|
|
},
|
|
{
|
|
"epoch": 0.6934822089190825,
|
|
"grad_norm": 0.05474943327503433,
|
|
"learning_rate": 7.796795159707525e-05,
|
|
"loss": 0.863,
|
|
"step": 5155
|
|
},
|
|
{
|
|
"epoch": 0.694154839577588,
|
|
"grad_norm": 0.05222518858286006,
|
|
"learning_rate": 7.765917461769553e-05,
|
|
"loss": 0.8209,
|
|
"step": 5160
|
|
},
|
|
{
|
|
"epoch": 0.6948274702360934,
|
|
"grad_norm": 0.052445023606749054,
|
|
"learning_rate": 7.735079657192132e-05,
|
|
"loss": 0.8674,
|
|
"step": 5165
|
|
},
|
|
{
|
|
"epoch": 0.6955001008945988,
|
|
"grad_norm": 0.05143832891654637,
|
|
"learning_rate": 7.704281916034664e-05,
|
|
"loss": 0.8327,
|
|
"step": 5170
|
|
},
|
|
{
|
|
"epoch": 0.6961727315531042,
|
|
"grad_norm": 0.05618205697005048,
|
|
"learning_rate": 7.673524408135593e-05,
|
|
"loss": 0.8802,
|
|
"step": 5175
|
|
},
|
|
{
|
|
"epoch": 0.6968453622116096,
|
|
"grad_norm": 0.05484727707900561,
|
|
"learning_rate": 7.642807303111504e-05,
|
|
"loss": 0.8014,
|
|
"step": 5180
|
|
},
|
|
{
|
|
"epoch": 0.6975179928701151,
|
|
"grad_norm": 0.05398969664658753,
|
|
"learning_rate": 7.612130770356167e-05,
|
|
"loss": 0.8828,
|
|
"step": 5185
|
|
},
|
|
{
|
|
"epoch": 0.6981906235286204,
|
|
"grad_norm": 0.05740973161611498,
|
|
"learning_rate": 7.581494979039625e-05,
|
|
"loss": 0.8603,
|
|
"step": 5190
|
|
},
|
|
{
|
|
"epoch": 0.6988632541871258,
|
|
"grad_norm": 0.060089661288700226,
|
|
"learning_rate": 7.550900098107229e-05,
|
|
"loss": 0.9219,
|
|
"step": 5195
|
|
},
|
|
{
|
|
"epoch": 0.6995358848456312,
|
|
"grad_norm": 0.05315516282253319,
|
|
"learning_rate": 7.520346296278729e-05,
|
|
"loss": 0.8561,
|
|
"step": 5200
|
|
},
|
|
{
|
|
"epoch": 0.7002085155041367,
|
|
"grad_norm": 0.06405746424013456,
|
|
"learning_rate": 7.48983374204735e-05,
|
|
"loss": 0.8765,
|
|
"step": 5205
|
|
},
|
|
{
|
|
"epoch": 0.7008811461626421,
|
|
"grad_norm": 0.06621206104407008,
|
|
"learning_rate": 7.459362603678839e-05,
|
|
"loss": 0.8813,
|
|
"step": 5210
|
|
},
|
|
{
|
|
"epoch": 0.7015537768211475,
|
|
"grad_norm": 0.05691919117892703,
|
|
"learning_rate": 7.428933049210552e-05,
|
|
"loss": 0.8651,
|
|
"step": 5215
|
|
},
|
|
{
|
|
"epoch": 0.7022264074796529,
|
|
"grad_norm": 0.05275469160566453,
|
|
"learning_rate": 7.398545246450524e-05,
|
|
"loss": 0.8916,
|
|
"step": 5220
|
|
},
|
|
{
|
|
"epoch": 0.7028990381381583,
|
|
"grad_norm": 0.05944483436680081,
|
|
"learning_rate": 7.368199362976542e-05,
|
|
"loss": 0.8627,
|
|
"step": 5225
|
|
},
|
|
{
|
|
"epoch": 0.7035716687966638,
|
|
"grad_norm": 0.05490184754535832,
|
|
"learning_rate": 7.337895566135241e-05,
|
|
"loss": 0.831,
|
|
"step": 5230
|
|
},
|
|
{
|
|
"epoch": 0.7042442994551692,
|
|
"grad_norm": 0.053453524101461904,
|
|
"learning_rate": 7.307634023041139e-05,
|
|
"loss": 0.8457,
|
|
"step": 5235
|
|
},
|
|
{
|
|
"epoch": 0.7049169301136746,
|
|
"grad_norm": 0.056089856365205054,
|
|
"learning_rate": 7.277414900575749e-05,
|
|
"loss": 0.8812,
|
|
"step": 5240
|
|
},
|
|
{
|
|
"epoch": 0.70558956077218,
|
|
"grad_norm": 0.054477356366257466,
|
|
"learning_rate": 7.247238365386659e-05,
|
|
"loss": 0.8843,
|
|
"step": 5245
|
|
},
|
|
{
|
|
"epoch": 0.7062621914306854,
|
|
"grad_norm": 0.05635284790716959,
|
|
"learning_rate": 7.217104583886593e-05,
|
|
"loss": 0.8279,
|
|
"step": 5250
|
|
},
|
|
{
|
|
"epoch": 0.7069348220891908,
|
|
"grad_norm": 0.05640773573113742,
|
|
"learning_rate": 7.187013722252498e-05,
|
|
"loss": 0.837,
|
|
"step": 5255
|
|
},
|
|
{
|
|
"epoch": 0.7076074527476962,
|
|
"grad_norm": 0.05680088370065849,
|
|
"learning_rate": 7.15696594642466e-05,
|
|
"loss": 0.8603,
|
|
"step": 5260
|
|
},
|
|
{
|
|
"epoch": 0.7082800834062016,
|
|
"grad_norm": 0.057411411632092874,
|
|
"learning_rate": 7.126961422105722e-05,
|
|
"loss": 0.8384,
|
|
"step": 5265
|
|
},
|
|
{
|
|
"epoch": 0.7089527140647071,
|
|
"grad_norm": 0.05306386883430388,
|
|
"learning_rate": 7.097000314759847e-05,
|
|
"loss": 0.8616,
|
|
"step": 5270
|
|
},
|
|
{
|
|
"epoch": 0.7096253447232125,
|
|
"grad_norm": 0.0545156816122668,
|
|
"learning_rate": 7.067082789611752e-05,
|
|
"loss": 0.9009,
|
|
"step": 5275
|
|
},
|
|
{
|
|
"epoch": 0.7102979753817179,
|
|
"grad_norm": 0.05614266230435318,
|
|
"learning_rate": 7.037209011645806e-05,
|
|
"loss": 0.8161,
|
|
"step": 5280
|
|
},
|
|
{
|
|
"epoch": 0.7109706060402233,
|
|
"grad_norm": 0.05836083731921901,
|
|
"learning_rate": 7.007379145605155e-05,
|
|
"loss": 0.8508,
|
|
"step": 5285
|
|
},
|
|
{
|
|
"epoch": 0.7116432366987288,
|
|
"grad_norm": 0.05889834886106434,
|
|
"learning_rate": 6.977593355990762e-05,
|
|
"loss": 0.9123,
|
|
"step": 5290
|
|
},
|
|
{
|
|
"epoch": 0.7123158673572342,
|
|
"grad_norm": 0.0599010873399523,
|
|
"learning_rate": 6.947851807060526e-05,
|
|
"loss": 0.8549,
|
|
"step": 5295
|
|
},
|
|
{
|
|
"epoch": 0.7129884980157396,
|
|
"grad_norm": 0.05703117987548505,
|
|
"learning_rate": 6.918154662828397e-05,
|
|
"loss": 0.8999,
|
|
"step": 5300
|
|
},
|
|
{
|
|
"epoch": 0.713661128674245,
|
|
"grad_norm": 0.05747829883745667,
|
|
"learning_rate": 6.888502087063412e-05,
|
|
"loss": 0.87,
|
|
"step": 5305
|
|
},
|
|
{
|
|
"epoch": 0.7143337593327503,
|
|
"grad_norm": 0.053424673994670414,
|
|
"learning_rate": 6.858894243288863e-05,
|
|
"loss": 0.8663,
|
|
"step": 5310
|
|
},
|
|
{
|
|
"epoch": 0.7150063899912558,
|
|
"grad_norm": 0.06336136647095073,
|
|
"learning_rate": 6.829331294781356e-05,
|
|
"loss": 0.8631,
|
|
"step": 5315
|
|
},
|
|
{
|
|
"epoch": 0.7156790206497612,
|
|
"grad_norm": 0.058971758715485705,
|
|
"learning_rate": 6.799813404569887e-05,
|
|
"loss": 0.8914,
|
|
"step": 5320
|
|
},
|
|
{
|
|
"epoch": 0.7163516513082666,
|
|
"grad_norm": 0.05792271524665821,
|
|
"learning_rate": 6.770340735435007e-05,
|
|
"loss": 0.8691,
|
|
"step": 5325
|
|
},
|
|
{
|
|
"epoch": 0.717024281966772,
|
|
"grad_norm": 0.054830731560030654,
|
|
"learning_rate": 6.740913449907874e-05,
|
|
"loss": 0.8648,
|
|
"step": 5330
|
|
},
|
|
{
|
|
"epoch": 0.7176969126252775,
|
|
"grad_norm": 0.060764115904758366,
|
|
"learning_rate": 6.711531710269361e-05,
|
|
"loss": 0.8335,
|
|
"step": 5335
|
|
},
|
|
{
|
|
"epoch": 0.7183695432837829,
|
|
"grad_norm": 0.05910840524389887,
|
|
"learning_rate": 6.682195678549198e-05,
|
|
"loss": 0.839,
|
|
"step": 5340
|
|
},
|
|
{
|
|
"epoch": 0.7190421739422883,
|
|
"grad_norm": 0.05091175020470114,
|
|
"learning_rate": 6.652905516525032e-05,
|
|
"loss": 0.8276,
|
|
"step": 5345
|
|
},
|
|
{
|
|
"epoch": 0.7197148046007937,
|
|
"grad_norm": 0.06631137750724338,
|
|
"learning_rate": 6.623661385721553e-05,
|
|
"loss": 0.8601,
|
|
"step": 5350
|
|
},
|
|
{
|
|
"epoch": 0.7203874352592992,
|
|
"grad_norm": 0.056251177055676505,
|
|
"learning_rate": 6.594463447409631e-05,
|
|
"loss": 0.8637,
|
|
"step": 5355
|
|
},
|
|
{
|
|
"epoch": 0.7210600659178046,
|
|
"grad_norm": 0.060321727455837935,
|
|
"learning_rate": 6.56531186260536e-05,
|
|
"loss": 0.8488,
|
|
"step": 5360
|
|
},
|
|
{
|
|
"epoch": 0.7217326965763099,
|
|
"grad_norm": 0.06356804542613306,
|
|
"learning_rate": 6.536206792069246e-05,
|
|
"loss": 0.8258,
|
|
"step": 5365
|
|
},
|
|
{
|
|
"epoch": 0.7224053272348153,
|
|
"grad_norm": 0.05927222864540253,
|
|
"learning_rate": 6.507148396305285e-05,
|
|
"loss": 0.8802,
|
|
"step": 5370
|
|
},
|
|
{
|
|
"epoch": 0.7230779578933207,
|
|
"grad_norm": 0.060242024985454945,
|
|
"learning_rate": 6.478136835560043e-05,
|
|
"loss": 0.8515,
|
|
"step": 5375
|
|
},
|
|
{
|
|
"epoch": 0.7237505885518262,
|
|
"grad_norm": 0.0572408416108945,
|
|
"learning_rate": 6.44917226982185e-05,
|
|
"loss": 0.8853,
|
|
"step": 5380
|
|
},
|
|
{
|
|
"epoch": 0.7244232192103316,
|
|
"grad_norm": 0.055779812319483826,
|
|
"learning_rate": 6.420254858819853e-05,
|
|
"loss": 0.8551,
|
|
"step": 5385
|
|
},
|
|
{
|
|
"epoch": 0.725095849868837,
|
|
"grad_norm": 0.05372587428464241,
|
|
"learning_rate": 6.391384762023155e-05,
|
|
"loss": 0.8959,
|
|
"step": 5390
|
|
},
|
|
{
|
|
"epoch": 0.7257684805273424,
|
|
"grad_norm": 0.056265169274321215,
|
|
"learning_rate": 6.362562138639957e-05,
|
|
"loss": 0.8711,
|
|
"step": 5395
|
|
},
|
|
{
|
|
"epoch": 0.7264411111858479,
|
|
"grad_norm": 0.0643302407697523,
|
|
"learning_rate": 6.333787147616641e-05,
|
|
"loss": 0.851,
|
|
"step": 5400
|
|
},
|
|
{
|
|
"epoch": 0.7271137418443533,
|
|
"grad_norm": 0.06009641755807671,
|
|
"learning_rate": 6.305059947636921e-05,
|
|
"loss": 0.9158,
|
|
"step": 5405
|
|
},
|
|
{
|
|
"epoch": 0.7277863725028587,
|
|
"grad_norm": 0.057868353247351734,
|
|
"learning_rate": 6.276380697120974e-05,
|
|
"loss": 0.8374,
|
|
"step": 5410
|
|
},
|
|
{
|
|
"epoch": 0.7284590031613641,
|
|
"grad_norm": 0.051991130536517494,
|
|
"learning_rate": 6.24774955422452e-05,
|
|
"loss": 0.8388,
|
|
"step": 5415
|
|
},
|
|
{
|
|
"epoch": 0.7291316338198696,
|
|
"grad_norm": 0.05501086613762105,
|
|
"learning_rate": 6.21916667683802e-05,
|
|
"loss": 0.8288,
|
|
"step": 5420
|
|
},
|
|
{
|
|
"epoch": 0.7298042644783749,
|
|
"grad_norm": 0.05651661862407232,
|
|
"learning_rate": 6.190632222585747e-05,
|
|
"loss": 0.8497,
|
|
"step": 5425
|
|
},
|
|
{
|
|
"epoch": 0.7304768951368803,
|
|
"grad_norm": 0.05910681682465812,
|
|
"learning_rate": 6.162146348824935e-05,
|
|
"loss": 0.8451,
|
|
"step": 5430
|
|
},
|
|
{
|
|
"epoch": 0.7311495257953857,
|
|
"grad_norm": 0.05209037147565984,
|
|
"learning_rate": 6.133709212644934e-05,
|
|
"loss": 0.8186,
|
|
"step": 5435
|
|
},
|
|
{
|
|
"epoch": 0.7318221564538911,
|
|
"grad_norm": 0.061761656354733095,
|
|
"learning_rate": 6.105320970866307e-05,
|
|
"loss": 0.8711,
|
|
"step": 5440
|
|
},
|
|
{
|
|
"epoch": 0.7324947871123966,
|
|
"grad_norm": 0.053209654344044566,
|
|
"learning_rate": 6.076981780039982e-05,
|
|
"loss": 0.8419,
|
|
"step": 5445
|
|
},
|
|
{
|
|
"epoch": 0.733167417770902,
|
|
"grad_norm": 0.06048025162698968,
|
|
"learning_rate": 6.048691796446396e-05,
|
|
"loss": 0.8751,
|
|
"step": 5450
|
|
},
|
|
{
|
|
"epoch": 0.7338400484294074,
|
|
"grad_norm": 0.05364619597271036,
|
|
"learning_rate": 6.0204511760946156e-05,
|
|
"loss": 0.8346,
|
|
"step": 5455
|
|
},
|
|
{
|
|
"epoch": 0.7345126790879128,
|
|
"grad_norm": 0.05362076203885372,
|
|
"learning_rate": 5.992260074721506e-05,
|
|
"loss": 0.8472,
|
|
"step": 5460
|
|
},
|
|
{
|
|
"epoch": 0.7351853097464183,
|
|
"grad_norm": 0.05416794573637957,
|
|
"learning_rate": 5.964118647790836e-05,
|
|
"loss": 0.8705,
|
|
"step": 5465
|
|
},
|
|
{
|
|
"epoch": 0.7358579404049237,
|
|
"grad_norm": 0.057308449167418285,
|
|
"learning_rate": 5.936027050492436e-05,
|
|
"loss": 0.8262,
|
|
"step": 5470
|
|
},
|
|
{
|
|
"epoch": 0.7365305710634291,
|
|
"grad_norm": 0.05537632404183107,
|
|
"learning_rate": 5.907985437741361e-05,
|
|
"loss": 0.8502,
|
|
"step": 5475
|
|
},
|
|
{
|
|
"epoch": 0.7372032017219344,
|
|
"grad_norm": 0.054499705173430356,
|
|
"learning_rate": 5.879993964177006e-05,
|
|
"loss": 0.8962,
|
|
"step": 5480
|
|
},
|
|
{
|
|
"epoch": 0.7378758323804399,
|
|
"grad_norm": 0.058847549524604685,
|
|
"learning_rate": 5.8520527841622674e-05,
|
|
"loss": 0.8351,
|
|
"step": 5485
|
|
},
|
|
{
|
|
"epoch": 0.7385484630389453,
|
|
"grad_norm": 0.05696132922969094,
|
|
"learning_rate": 5.824162051782689e-05,
|
|
"loss": 0.912,
|
|
"step": 5490
|
|
},
|
|
{
|
|
"epoch": 0.7392210936974507,
|
|
"grad_norm": 0.054340906074491255,
|
|
"learning_rate": 5.7963219208456244e-05,
|
|
"loss": 0.8386,
|
|
"step": 5495
|
|
},
|
|
{
|
|
"epoch": 0.7398937243559561,
|
|
"grad_norm": 0.05690271575321237,
|
|
"learning_rate": 5.7685325448793715e-05,
|
|
"loss": 0.8477,
|
|
"step": 5500
|
|
},
|
|
{
|
|
"epoch": 0.7405663550144616,
|
|
"grad_norm": 0.05291001329815415,
|
|
"learning_rate": 5.7407940771323305e-05,
|
|
"loss": 0.838,
|
|
"step": 5505
|
|
},
|
|
{
|
|
"epoch": 0.741238985672967,
|
|
"grad_norm": 0.05659296345689606,
|
|
"learning_rate": 5.71310667057216e-05,
|
|
"loss": 0.8696,
|
|
"step": 5510
|
|
},
|
|
{
|
|
"epoch": 0.7419116163314724,
|
|
"grad_norm": 0.05997774398356355,
|
|
"learning_rate": 5.685470477884947e-05,
|
|
"loss": 0.8528,
|
|
"step": 5515
|
|
},
|
|
{
|
|
"epoch": 0.7425842469899778,
|
|
"grad_norm": 0.058931635556607694,
|
|
"learning_rate": 5.6578856514743393e-05,
|
|
"loss": 0.8782,
|
|
"step": 5520
|
|
},
|
|
{
|
|
"epoch": 0.7432568776484832,
|
|
"grad_norm": 0.06128548736268873,
|
|
"learning_rate": 5.6303523434607235e-05,
|
|
"loss": 0.8376,
|
|
"step": 5525
|
|
},
|
|
{
|
|
"epoch": 0.7439295083069887,
|
|
"grad_norm": 0.05660298846428016,
|
|
"learning_rate": 5.602870705680373e-05,
|
|
"loss": 0.849,
|
|
"step": 5530
|
|
},
|
|
{
|
|
"epoch": 0.7446021389654941,
|
|
"grad_norm": 0.059111841539666116,
|
|
"learning_rate": 5.575440889684638e-05,
|
|
"loss": 0.8678,
|
|
"step": 5535
|
|
},
|
|
{
|
|
"epoch": 0.7452747696239994,
|
|
"grad_norm": 0.07854228394149744,
|
|
"learning_rate": 5.5480630467390694e-05,
|
|
"loss": 0.8451,
|
|
"step": 5540
|
|
},
|
|
{
|
|
"epoch": 0.7459474002825048,
|
|
"grad_norm": 0.06125700566348329,
|
|
"learning_rate": 5.520737327822609e-05,
|
|
"loss": 0.8112,
|
|
"step": 5545
|
|
},
|
|
{
|
|
"epoch": 0.7466200309410103,
|
|
"grad_norm": 0.06171779829091447,
|
|
"learning_rate": 5.4934638836267705e-05,
|
|
"loss": 0.8246,
|
|
"step": 5550
|
|
},
|
|
{
|
|
"epoch": 0.7472926615995157,
|
|
"grad_norm": 0.05866009770905458,
|
|
"learning_rate": 5.4662428645547726e-05,
|
|
"loss": 0.8519,
|
|
"step": 5555
|
|
},
|
|
{
|
|
"epoch": 0.7479652922580211,
|
|
"grad_norm": 0.05666771692923985,
|
|
"learning_rate": 5.439074420720734e-05,
|
|
"loss": 0.8493,
|
|
"step": 5560
|
|
},
|
|
{
|
|
"epoch": 0.7486379229165265,
|
|
"grad_norm": 0.05432519494767404,
|
|
"learning_rate": 5.4119587019488426e-05,
|
|
"loss": 0.8884,
|
|
"step": 5565
|
|
},
|
|
{
|
|
"epoch": 0.749310553575032,
|
|
"grad_norm": 0.054351604297057335,
|
|
"learning_rate": 5.384895857772516e-05,
|
|
"loss": 0.8333,
|
|
"step": 5570
|
|
},
|
|
{
|
|
"epoch": 0.7499831842335374,
|
|
"grad_norm": 0.059333956268234095,
|
|
"learning_rate": 5.357886037433607e-05,
|
|
"loss": 0.8283,
|
|
"step": 5575
|
|
},
|
|
{
|
|
"epoch": 0.7506558148920428,
|
|
"grad_norm": 0.05336078672412873,
|
|
"learning_rate": 5.330929389881545e-05,
|
|
"loss": 0.8876,
|
|
"step": 5580
|
|
},
|
|
{
|
|
"epoch": 0.7513284455505482,
|
|
"grad_norm": 0.0537530672664337,
|
|
"learning_rate": 5.304026063772532e-05,
|
|
"loss": 0.8082,
|
|
"step": 5585
|
|
},
|
|
{
|
|
"epoch": 0.7520010762090537,
|
|
"grad_norm": 0.06417130967521156,
|
|
"learning_rate": 5.2771762074687324e-05,
|
|
"loss": 0.8607,
|
|
"step": 5590
|
|
},
|
|
{
|
|
"epoch": 0.7526737068675591,
|
|
"grad_norm": 0.0592025759553816,
|
|
"learning_rate": 5.250379969037433e-05,
|
|
"loss": 0.8574,
|
|
"step": 5595
|
|
},
|
|
{
|
|
"epoch": 0.7533463375260644,
|
|
"grad_norm": 0.0529486580965492,
|
|
"learning_rate": 5.2236374962502345e-05,
|
|
"loss": 0.8191,
|
|
"step": 5600
|
|
},
|
|
{
|
|
"epoch": 0.7540189681845698,
|
|
"grad_norm": 0.054655553837085674,
|
|
"learning_rate": 5.196948936582263e-05,
|
|
"loss": 0.8464,
|
|
"step": 5605
|
|
},
|
|
{
|
|
"epoch": 0.7546915988430752,
|
|
"grad_norm": 0.06556056478392819,
|
|
"learning_rate": 5.1703144372112934e-05,
|
|
"loss": 0.8156,
|
|
"step": 5610
|
|
},
|
|
{
|
|
"epoch": 0.7553642295015807,
|
|
"grad_norm": 0.061584993331036585,
|
|
"learning_rate": 5.143734145017016e-05,
|
|
"loss": 0.8543,
|
|
"step": 5615
|
|
},
|
|
{
|
|
"epoch": 0.7560368601600861,
|
|
"grad_norm": 0.055055753002702476,
|
|
"learning_rate": 5.1172082065801655e-05,
|
|
"loss": 0.8324,
|
|
"step": 5620
|
|
},
|
|
{
|
|
"epoch": 0.7567094908185915,
|
|
"grad_norm": 0.055015531712228206,
|
|
"learning_rate": 5.0907367681817405e-05,
|
|
"loss": 0.8223,
|
|
"step": 5625
|
|
},
|
|
{
|
|
"epoch": 0.7573821214770969,
|
|
"grad_norm": 0.05608683516437471,
|
|
"learning_rate": 5.064319975802199e-05,
|
|
"loss": 0.8603,
|
|
"step": 5630
|
|
},
|
|
{
|
|
"epoch": 0.7580547521356024,
|
|
"grad_norm": 0.055961769828895516,
|
|
"learning_rate": 5.0379579751206345e-05,
|
|
"loss": 0.8853,
|
|
"step": 5635
|
|
},
|
|
{
|
|
"epoch": 0.7587273827941078,
|
|
"grad_norm": 0.05225964844822088,
|
|
"learning_rate": 5.011650911513988e-05,
|
|
"loss": 0.9061,
|
|
"step": 5640
|
|
},
|
|
{
|
|
"epoch": 0.7594000134526132,
|
|
"grad_norm": 0.05236358496294752,
|
|
"learning_rate": 4.9853989300562524e-05,
|
|
"loss": 0.8287,
|
|
"step": 5645
|
|
},
|
|
{
|
|
"epoch": 0.7600726441111186,
|
|
"grad_norm": 0.048850458923294954,
|
|
"learning_rate": 4.9592021755176384e-05,
|
|
"loss": 0.813,
|
|
"step": 5650
|
|
},
|
|
{
|
|
"epoch": 0.760745274769624,
|
|
"grad_norm": 0.0553497239014274,
|
|
"learning_rate": 4.933060792363824e-05,
|
|
"loss": 0.8105,
|
|
"step": 5655
|
|
},
|
|
{
|
|
"epoch": 0.7614179054281294,
|
|
"grad_norm": 0.05655304760380461,
|
|
"learning_rate": 4.906974924755133e-05,
|
|
"loss": 0.8515,
|
|
"step": 5660
|
|
},
|
|
{
|
|
"epoch": 0.7620905360866348,
|
|
"grad_norm": 0.05777857275432363,
|
|
"learning_rate": 4.880944716545717e-05,
|
|
"loss": 0.8456,
|
|
"step": 5665
|
|
},
|
|
{
|
|
"epoch": 0.7627631667451402,
|
|
"grad_norm": 0.05635569506280685,
|
|
"learning_rate": 4.854970311282812e-05,
|
|
"loss": 0.8522,
|
|
"step": 5670
|
|
},
|
|
{
|
|
"epoch": 0.7634357974036456,
|
|
"grad_norm": 0.05834534565421247,
|
|
"learning_rate": 4.82905185220591e-05,
|
|
"loss": 0.8472,
|
|
"step": 5675
|
|
},
|
|
{
|
|
"epoch": 0.7641084280621511,
|
|
"grad_norm": 0.061697683149533326,
|
|
"learning_rate": 4.8031894822459736e-05,
|
|
"loss": 0.8145,
|
|
"step": 5680
|
|
},
|
|
{
|
|
"epoch": 0.7647810587206565,
|
|
"grad_norm": 0.05812977896957012,
|
|
"learning_rate": 4.777383344024672e-05,
|
|
"loss": 0.9117,
|
|
"step": 5685
|
|
},
|
|
{
|
|
"epoch": 0.7654536893791619,
|
|
"grad_norm": 0.06166103572138631,
|
|
"learning_rate": 4.751633579853561e-05,
|
|
"loss": 0.8913,
|
|
"step": 5690
|
|
},
|
|
{
|
|
"epoch": 0.7661263200376673,
|
|
"grad_norm": 0.06136708791864048,
|
|
"learning_rate": 4.7259403317333126e-05,
|
|
"loss": 0.8399,
|
|
"step": 5695
|
|
},
|
|
{
|
|
"epoch": 0.7667989506961728,
|
|
"grad_norm": 0.05596607951538446,
|
|
"learning_rate": 4.7003037413529574e-05,
|
|
"loss": 0.8296,
|
|
"step": 5700
|
|
},
|
|
{
|
|
"epoch": 0.7674715813546782,
|
|
"grad_norm": 0.05708602863383148,
|
|
"learning_rate": 4.674723950089038e-05,
|
|
"loss": 0.8412,
|
|
"step": 5705
|
|
},
|
|
{
|
|
"epoch": 0.7681442120131836,
|
|
"grad_norm": 0.05552110409211543,
|
|
"learning_rate": 4.649201099004904e-05,
|
|
"loss": 0.8032,
|
|
"step": 5710
|
|
},
|
|
{
|
|
"epoch": 0.7688168426716889,
|
|
"grad_norm": 0.059392941246110734,
|
|
"learning_rate": 4.6237353288498985e-05,
|
|
"loss": 0.8135,
|
|
"step": 5715
|
|
},
|
|
{
|
|
"epoch": 0.7694894733301944,
|
|
"grad_norm": 0.05703942924248708,
|
|
"learning_rate": 4.598326780058557e-05,
|
|
"loss": 0.8675,
|
|
"step": 5720
|
|
},
|
|
{
|
|
"epoch": 0.7701621039886998,
|
|
"grad_norm": 0.05451025292158218,
|
|
"learning_rate": 4.572975592749893e-05,
|
|
"loss": 0.8543,
|
|
"step": 5725
|
|
},
|
|
{
|
|
"epoch": 0.7708347346472052,
|
|
"grad_norm": 0.05389981922242321,
|
|
"learning_rate": 4.54768190672657e-05,
|
|
"loss": 0.8691,
|
|
"step": 5730
|
|
},
|
|
{
|
|
"epoch": 0.7715073653057106,
|
|
"grad_norm": 0.05844537688650812,
|
|
"learning_rate": 4.522445861474154e-05,
|
|
"loss": 0.8783,
|
|
"step": 5735
|
|
},
|
|
{
|
|
"epoch": 0.772179995964216,
|
|
"grad_norm": 0.05419960076549575,
|
|
"learning_rate": 4.4972675961603595e-05,
|
|
"loss": 0.8363,
|
|
"step": 5740
|
|
},
|
|
{
|
|
"epoch": 0.7728526266227215,
|
|
"grad_norm": 0.05644761781377002,
|
|
"learning_rate": 4.4721472496342495e-05,
|
|
"loss": 0.8797,
|
|
"step": 5745
|
|
},
|
|
{
|
|
"epoch": 0.7735252572812269,
|
|
"grad_norm": 0.055204582352275294,
|
|
"learning_rate": 4.447084960425484e-05,
|
|
"loss": 0.8793,
|
|
"step": 5750
|
|
},
|
|
{
|
|
"epoch": 0.7741978879397323,
|
|
"grad_norm": 0.05219581202676311,
|
|
"learning_rate": 4.422080866743582e-05,
|
|
"loss": 0.8228,
|
|
"step": 5755
|
|
},
|
|
{
|
|
"epoch": 0.7748705185982377,
|
|
"grad_norm": 0.05674250597271216,
|
|
"learning_rate": 4.3971351064770946e-05,
|
|
"loss": 0.887,
|
|
"step": 5760
|
|
},
|
|
{
|
|
"epoch": 0.7755431492567432,
|
|
"grad_norm": 0.056527448986813174,
|
|
"learning_rate": 4.372247817192922e-05,
|
|
"loss": 0.9162,
|
|
"step": 5765
|
|
},
|
|
{
|
|
"epoch": 0.7762157799152485,
|
|
"grad_norm": 0.05450427810949781,
|
|
"learning_rate": 4.347419136135504e-05,
|
|
"loss": 0.8248,
|
|
"step": 5770
|
|
},
|
|
{
|
|
"epoch": 0.7768884105737539,
|
|
"grad_norm": 0.054433127989288496,
|
|
"learning_rate": 4.322649200226067e-05,
|
|
"loss": 0.863,
|
|
"step": 5775
|
|
},
|
|
{
|
|
"epoch": 0.7775610412322593,
|
|
"grad_norm": 0.056552172499089434,
|
|
"learning_rate": 4.297938146061903e-05,
|
|
"loss": 0.8532,
|
|
"step": 5780
|
|
},
|
|
{
|
|
"epoch": 0.7782336718907648,
|
|
"grad_norm": 0.05481351515370127,
|
|
"learning_rate": 4.2732861099155695e-05,
|
|
"loss": 0.8644,
|
|
"step": 5785
|
|
},
|
|
{
|
|
"epoch": 0.7789063025492702,
|
|
"grad_norm": 0.0521662997394286,
|
|
"learning_rate": 4.248693227734166e-05,
|
|
"loss": 0.8873,
|
|
"step": 5790
|
|
},
|
|
{
|
|
"epoch": 0.7795789332077756,
|
|
"grad_norm": 0.05705457216343529,
|
|
"learning_rate": 4.2241596351385955e-05,
|
|
"loss": 0.8622,
|
|
"step": 5795
|
|
},
|
|
{
|
|
"epoch": 0.780251563866281,
|
|
"grad_norm": 0.052719950954204266,
|
|
"learning_rate": 4.1996854674227816e-05,
|
|
"loss": 0.8826,
|
|
"step": 5800
|
|
},
|
|
{
|
|
"epoch": 0.7809241945247865,
|
|
"grad_norm": 0.05724964717366726,
|
|
"learning_rate": 4.1752708595529444e-05,
|
|
"loss": 0.8377,
|
|
"step": 5805
|
|
},
|
|
{
|
|
"epoch": 0.7815968251832919,
|
|
"grad_norm": 0.057199861980904605,
|
|
"learning_rate": 4.15091594616686e-05,
|
|
"loss": 0.8544,
|
|
"step": 5810
|
|
},
|
|
{
|
|
"epoch": 0.7822694558417973,
|
|
"grad_norm": 0.06592489371919232,
|
|
"learning_rate": 4.1266208615730994e-05,
|
|
"loss": 0.8349,
|
|
"step": 5815
|
|
},
|
|
{
|
|
"epoch": 0.7829420865003027,
|
|
"grad_norm": 0.05890644319372499,
|
|
"learning_rate": 4.102385739750317e-05,
|
|
"loss": 0.7997,
|
|
"step": 5820
|
|
},
|
|
{
|
|
"epoch": 0.7836147171588081,
|
|
"grad_norm": 0.05862081496562069,
|
|
"learning_rate": 4.07821071434648e-05,
|
|
"loss": 0.8307,
|
|
"step": 5825
|
|
},
|
|
{
|
|
"epoch": 0.7842873478173135,
|
|
"grad_norm": 0.0547581105650986,
|
|
"learning_rate": 4.054095918678143e-05,
|
|
"loss": 0.8585,
|
|
"step": 5830
|
|
},
|
|
{
|
|
"epoch": 0.7849599784758189,
|
|
"grad_norm": 0.05812187289880372,
|
|
"learning_rate": 4.03004148572973e-05,
|
|
"loss": 0.7999,
|
|
"step": 5835
|
|
},
|
|
{
|
|
"epoch": 0.7856326091343243,
|
|
"grad_norm": 0.06039957695819741,
|
|
"learning_rate": 4.006047548152777e-05,
|
|
"loss": 0.8523,
|
|
"step": 5840
|
|
},
|
|
{
|
|
"epoch": 0.7863052397928297,
|
|
"grad_norm": 0.056273134561604085,
|
|
"learning_rate": 3.9821142382652066e-05,
|
|
"loss": 0.8957,
|
|
"step": 5845
|
|
},
|
|
{
|
|
"epoch": 0.7869778704513352,
|
|
"grad_norm": 0.059358267937848326,
|
|
"learning_rate": 3.9582416880506076e-05,
|
|
"loss": 0.8684,
|
|
"step": 5850
|
|
},
|
|
{
|
|
"epoch": 0.7876505011098406,
|
|
"grad_norm": 0.056033369088731466,
|
|
"learning_rate": 3.934430029157494e-05,
|
|
"loss": 0.8324,
|
|
"step": 5855
|
|
},
|
|
{
|
|
"epoch": 0.788323131768346,
|
|
"grad_norm": 0.05454574945735745,
|
|
"learning_rate": 3.910679392898601e-05,
|
|
"loss": 0.8172,
|
|
"step": 5860
|
|
},
|
|
{
|
|
"epoch": 0.7889957624268514,
|
|
"grad_norm": 0.05520965397535946,
|
|
"learning_rate": 3.886989910250131e-05,
|
|
"loss": 0.8256,
|
|
"step": 5865
|
|
},
|
|
{
|
|
"epoch": 0.7896683930853569,
|
|
"grad_norm": 0.05354408451523844,
|
|
"learning_rate": 3.863361711851047e-05,
|
|
"loss": 0.8403,
|
|
"step": 5870
|
|
},
|
|
{
|
|
"epoch": 0.7903410237438623,
|
|
"grad_norm": 0.06049601814389344,
|
|
"learning_rate": 3.839794928002363e-05,
|
|
"loss": 0.8738,
|
|
"step": 5875
|
|
},
|
|
{
|
|
"epoch": 0.7910136544023677,
|
|
"grad_norm": 0.054589926246424225,
|
|
"learning_rate": 3.8162896886664056e-05,
|
|
"loss": 0.862,
|
|
"step": 5880
|
|
},
|
|
{
|
|
"epoch": 0.7916862850608731,
|
|
"grad_norm": 0.07172856741569986,
|
|
"learning_rate": 3.7928461234661053e-05,
|
|
"loss": 0.9012,
|
|
"step": 5885
|
|
},
|
|
{
|
|
"epoch": 0.7923589157193784,
|
|
"grad_norm": 0.05802045149036376,
|
|
"learning_rate": 3.769464361684277e-05,
|
|
"loss": 0.8757,
|
|
"step": 5890
|
|
},
|
|
{
|
|
"epoch": 0.7930315463778839,
|
|
"grad_norm": 0.05608922145460016,
|
|
"learning_rate": 3.746144532262931e-05,
|
|
"loss": 0.8566,
|
|
"step": 5895
|
|
},
|
|
{
|
|
"epoch": 0.7937041770363893,
|
|
"grad_norm": 0.05737451720854343,
|
|
"learning_rate": 3.7228867638025225e-05,
|
|
"loss": 0.8482,
|
|
"step": 5900
|
|
},
|
|
{
|
|
"epoch": 0.7943768076948947,
|
|
"grad_norm": 0.06243507484604905,
|
|
"learning_rate": 3.699691184561271e-05,
|
|
"loss": 0.8555,
|
|
"step": 5905
|
|
},
|
|
{
|
|
"epoch": 0.7950494383534001,
|
|
"grad_norm": 0.05625355913625404,
|
|
"learning_rate": 3.6765579224544404e-05,
|
|
"loss": 0.8327,
|
|
"step": 5910
|
|
},
|
|
{
|
|
"epoch": 0.7957220690119056,
|
|
"grad_norm": 0.05399521331082984,
|
|
"learning_rate": 3.6534871050536515e-05,
|
|
"loss": 0.8364,
|
|
"step": 5915
|
|
},
|
|
{
|
|
"epoch": 0.796394699670411,
|
|
"grad_norm": 0.0595413280030016,
|
|
"learning_rate": 3.63047885958615e-05,
|
|
"loss": 0.8524,
|
|
"step": 5920
|
|
},
|
|
{
|
|
"epoch": 0.7970673303289164,
|
|
"grad_norm": 0.060737734887294295,
|
|
"learning_rate": 3.607533312934127e-05,
|
|
"loss": 0.8332,
|
|
"step": 5925
|
|
},
|
|
{
|
|
"epoch": 0.7977399609874218,
|
|
"grad_norm": 0.058576781066161154,
|
|
"learning_rate": 3.584650591634006e-05,
|
|
"loss": 0.8713,
|
|
"step": 5930
|
|
},
|
|
{
|
|
"epoch": 0.7984125916459273,
|
|
"grad_norm": 0.057089755190360525,
|
|
"learning_rate": 3.561830821875764e-05,
|
|
"loss": 0.8647,
|
|
"step": 5935
|
|
},
|
|
{
|
|
"epoch": 0.7990852223044327,
|
|
"grad_norm": 0.054430708296939447,
|
|
"learning_rate": 3.5390741295022096e-05,
|
|
"loss": 0.8696,
|
|
"step": 5940
|
|
},
|
|
{
|
|
"epoch": 0.799757852962938,
|
|
"grad_norm": 0.055218219076360806,
|
|
"learning_rate": 3.516380640008306e-05,
|
|
"loss": 0.8831,
|
|
"step": 5945
|
|
},
|
|
{
|
|
"epoch": 0.8004304836214434,
|
|
"grad_norm": 0.05691195406760957,
|
|
"learning_rate": 3.4937504785404836e-05,
|
|
"loss": 0.7638,
|
|
"step": 5950
|
|
},
|
|
{
|
|
"epoch": 0.8011031142799488,
|
|
"grad_norm": 0.057263267865896667,
|
|
"learning_rate": 3.471183769895931e-05,
|
|
"loss": 0.8816,
|
|
"step": 5955
|
|
},
|
|
{
|
|
"epoch": 0.8017757449384543,
|
|
"grad_norm": 0.06494993390956837,
|
|
"learning_rate": 3.448680638521922e-05,
|
|
"loss": 0.8197,
|
|
"step": 5960
|
|
},
|
|
{
|
|
"epoch": 0.8024483755969597,
|
|
"grad_norm": 0.05553648524493593,
|
|
"learning_rate": 3.426241208515122e-05,
|
|
"loss": 0.8664,
|
|
"step": 5965
|
|
},
|
|
{
|
|
"epoch": 0.8031210062554651,
|
|
"grad_norm": 0.05889635161842296,
|
|
"learning_rate": 3.4038656036209055e-05,
|
|
"loss": 0.8223,
|
|
"step": 5970
|
|
},
|
|
{
|
|
"epoch": 0.8037936369139705,
|
|
"grad_norm": 0.05031588773521702,
|
|
"learning_rate": 3.3815539472326864e-05,
|
|
"loss": 0.823,
|
|
"step": 5975
|
|
},
|
|
{
|
|
"epoch": 0.804466267572476,
|
|
"grad_norm": 0.05726274410373388,
|
|
"learning_rate": 3.35930636239121e-05,
|
|
"loss": 0.8331,
|
|
"step": 5980
|
|
},
|
|
{
|
|
"epoch": 0.8051388982309814,
|
|
"grad_norm": 0.05569140298750744,
|
|
"learning_rate": 3.3371229717838924e-05,
|
|
"loss": 0.8566,
|
|
"step": 5985
|
|
},
|
|
{
|
|
"epoch": 0.8058115288894868,
|
|
"grad_norm": 0.05460163820200784,
|
|
"learning_rate": 3.315003897744157e-05,
|
|
"loss": 0.8306,
|
|
"step": 5990
|
|
},
|
|
{
|
|
"epoch": 0.8064841595479922,
|
|
"grad_norm": 0.05846879661123691,
|
|
"learning_rate": 3.292949262250725e-05,
|
|
"loss": 0.7746,
|
|
"step": 5995
|
|
},
|
|
{
|
|
"epoch": 0.8071567902064977,
|
|
"grad_norm": 0.05494495492901229,
|
|
"learning_rate": 3.270959186926966e-05,
|
|
"loss": 0.7972,
|
|
"step": 6000
|
|
},
|
|
{
|
|
"epoch": 0.807829420865003,
|
|
"grad_norm": 0.05998506596591228,
|
|
"learning_rate": 3.249033793040244e-05,
|
|
"loss": 0.8058,
|
|
"step": 6005
|
|
},
|
|
{
|
|
"epoch": 0.8085020515235084,
|
|
"grad_norm": 0.06763589968444475,
|
|
"learning_rate": 3.227173201501195e-05,
|
|
"loss": 0.8724,
|
|
"step": 6010
|
|
},
|
|
{
|
|
"epoch": 0.8091746821820138,
|
|
"grad_norm": 0.05529737312453018,
|
|
"learning_rate": 3.20537753286312e-05,
|
|
"loss": 0.8504,
|
|
"step": 6015
|
|
},
|
|
{
|
|
"epoch": 0.8098473128405193,
|
|
"grad_norm": 0.058924905982349764,
|
|
"learning_rate": 3.183646907321282e-05,
|
|
"loss": 0.8331,
|
|
"step": 6020
|
|
},
|
|
{
|
|
"epoch": 0.8105199434990247,
|
|
"grad_norm": 0.054076196022565776,
|
|
"learning_rate": 3.161981444712251e-05,
|
|
"loss": 0.8423,
|
|
"step": 6025
|
|
},
|
|
{
|
|
"epoch": 0.8111925741575301,
|
|
"grad_norm": 0.055090039911914114,
|
|
"learning_rate": 3.140381264513263e-05,
|
|
"loss": 0.8456,
|
|
"step": 6030
|
|
},
|
|
{
|
|
"epoch": 0.8118652048160355,
|
|
"grad_norm": 0.05194250967465533,
|
|
"learning_rate": 3.118846485841528e-05,
|
|
"loss": 0.8186,
|
|
"step": 6035
|
|
},
|
|
{
|
|
"epoch": 0.812537835474541,
|
|
"grad_norm": 0.055299499667346826,
|
|
"learning_rate": 3.097377227453592e-05,
|
|
"loss": 0.8192,
|
|
"step": 6040
|
|
},
|
|
{
|
|
"epoch": 0.8132104661330464,
|
|
"grad_norm": 0.05702324547815417,
|
|
"learning_rate": 3.075973607744703e-05,
|
|
"loss": 0.7962,
|
|
"step": 6045
|
|
},
|
|
{
|
|
"epoch": 0.8138830967915518,
|
|
"grad_norm": 0.053357064367373114,
|
|
"learning_rate": 3.054635744748095e-05,
|
|
"loss": 0.8157,
|
|
"step": 6050
|
|
},
|
|
{
|
|
"epoch": 0.8145557274500572,
|
|
"grad_norm": 0.0540305307848929,
|
|
"learning_rate": 3.0333637561344094e-05,
|
|
"loss": 0.8531,
|
|
"step": 6055
|
|
},
|
|
{
|
|
"epoch": 0.8152283581085625,
|
|
"grad_norm": 0.06722887878787541,
|
|
"learning_rate": 3.0121577592110142e-05,
|
|
"loss": 0.8157,
|
|
"step": 6060
|
|
},
|
|
{
|
|
"epoch": 0.815900988767068,
|
|
"grad_norm": 0.05613468827920807,
|
|
"learning_rate": 2.9910178709213294e-05,
|
|
"loss": 0.8236,
|
|
"step": 6065
|
|
},
|
|
{
|
|
"epoch": 0.8165736194255734,
|
|
"grad_norm": 0.05757688307873639,
|
|
"learning_rate": 2.9699442078442404e-05,
|
|
"loss": 0.8626,
|
|
"step": 6070
|
|
},
|
|
{
|
|
"epoch": 0.8172462500840788,
|
|
"grad_norm": 0.05190644378889452,
|
|
"learning_rate": 2.948936886193407e-05,
|
|
"loss": 0.8699,
|
|
"step": 6075
|
|
},
|
|
{
|
|
"epoch": 0.8179188807425842,
|
|
"grad_norm": 0.06058364911518402,
|
|
"learning_rate": 2.927996021816641e-05,
|
|
"loss": 0.8555,
|
|
"step": 6080
|
|
},
|
|
{
|
|
"epoch": 0.8185915114010897,
|
|
"grad_norm": 0.05267663660873433,
|
|
"learning_rate": 2.9071217301952748e-05,
|
|
"loss": 0.8515,
|
|
"step": 6085
|
|
},
|
|
{
|
|
"epoch": 0.8192641420595951,
|
|
"grad_norm": 0.05663551603220171,
|
|
"learning_rate": 2.8863141264435118e-05,
|
|
"loss": 0.8551,
|
|
"step": 6090
|
|
},
|
|
{
|
|
"epoch": 0.8199367727181005,
|
|
"grad_norm": 0.06164283048494597,
|
|
"learning_rate": 2.865573325307786e-05,
|
|
"loss": 0.8508,
|
|
"step": 6095
|
|
},
|
|
{
|
|
"epoch": 0.8206094033766059,
|
|
"grad_norm": 0.05686398651550054,
|
|
"learning_rate": 2.8448994411661646e-05,
|
|
"loss": 0.8058,
|
|
"step": 6100
|
|
},
|
|
{
|
|
"epoch": 0.8212820340351114,
|
|
"grad_norm": 0.05699264568964854,
|
|
"learning_rate": 2.8242925880276546e-05,
|
|
"loss": 0.8845,
|
|
"step": 6105
|
|
},
|
|
{
|
|
"epoch": 0.8219546646936168,
|
|
"grad_norm": 0.047913505171422455,
|
|
"learning_rate": 2.803752879531647e-05,
|
|
"loss": 0.8271,
|
|
"step": 6110
|
|
},
|
|
{
|
|
"epoch": 0.8226272953521222,
|
|
"grad_norm": 0.057259062175757665,
|
|
"learning_rate": 2.7832804289472317e-05,
|
|
"loss": 0.8799,
|
|
"step": 6115
|
|
},
|
|
{
|
|
"epoch": 0.8232999260106275,
|
|
"grad_norm": 0.05428004232970161,
|
|
"learning_rate": 2.7628753491726018e-05,
|
|
"loss": 0.8545,
|
|
"step": 6120
|
|
},
|
|
{
|
|
"epoch": 0.8239725566691329,
|
|
"grad_norm": 0.05485478370375546,
|
|
"learning_rate": 2.7425377527344296e-05,
|
|
"loss": 0.8166,
|
|
"step": 6125
|
|
},
|
|
{
|
|
"epoch": 0.8246451873276384,
|
|
"grad_norm": 0.05280322376501419,
|
|
"learning_rate": 2.7222677517872366e-05,
|
|
"loss": 0.831,
|
|
"step": 6130
|
|
},
|
|
{
|
|
"epoch": 0.8253178179861438,
|
|
"grad_norm": 0.056970755995371276,
|
|
"learning_rate": 2.7020654581127737e-05,
|
|
"loss": 0.8742,
|
|
"step": 6135
|
|
},
|
|
{
|
|
"epoch": 0.8259904486446492,
|
|
"grad_norm": 0.055352006316194045,
|
|
"learning_rate": 2.681930983119425e-05,
|
|
"loss": 0.8013,
|
|
"step": 6140
|
|
},
|
|
{
|
|
"epoch": 0.8266630793031546,
|
|
"grad_norm": 0.05083832213731214,
|
|
"learning_rate": 2.6618644378415676e-05,
|
|
"loss": 0.8601,
|
|
"step": 6145
|
|
},
|
|
{
|
|
"epoch": 0.8273357099616601,
|
|
"grad_norm": 0.05455909665181691,
|
|
"learning_rate": 2.6418659329389723e-05,
|
|
"loss": 0.8594,
|
|
"step": 6150
|
|
},
|
|
{
|
|
"epoch": 0.8280083406201655,
|
|
"grad_norm": 0.056936005310754684,
|
|
"learning_rate": 2.6219355786961925e-05,
|
|
"loss": 0.8448,
|
|
"step": 6155
|
|
},
|
|
{
|
|
"epoch": 0.8286809712786709,
|
|
"grad_norm": 0.05372150118403549,
|
|
"learning_rate": 2.6020734850219556e-05,
|
|
"loss": 0.8402,
|
|
"step": 6160
|
|
},
|
|
{
|
|
"epoch": 0.8293536019371763,
|
|
"grad_norm": 0.06450196137290737,
|
|
"learning_rate": 2.5822797614485606e-05,
|
|
"loss": 0.8961,
|
|
"step": 6165
|
|
},
|
|
{
|
|
"epoch": 0.8300262325956818,
|
|
"grad_norm": 0.05987145868487474,
|
|
"learning_rate": 2.5625545171312634e-05,
|
|
"loss": 0.8573,
|
|
"step": 6170
|
|
},
|
|
{
|
|
"epoch": 0.8306988632541872,
|
|
"grad_norm": 0.05609755266031922,
|
|
"learning_rate": 2.5428978608476834e-05,
|
|
"loss": 0.8479,
|
|
"step": 6175
|
|
},
|
|
{
|
|
"epoch": 0.8313714939126925,
|
|
"grad_norm": 0.05986487697713568,
|
|
"learning_rate": 2.523309900997206e-05,
|
|
"loss": 0.8871,
|
|
"step": 6180
|
|
},
|
|
{
|
|
"epoch": 0.8320441245711979,
|
|
"grad_norm": 0.05605815696390431,
|
|
"learning_rate": 2.5037907456003757e-05,
|
|
"loss": 0.838,
|
|
"step": 6185
|
|
},
|
|
{
|
|
"epoch": 0.8327167552297033,
|
|
"grad_norm": 0.05605263224140276,
|
|
"learning_rate": 2.4843405022983064e-05,
|
|
"loss": 0.8451,
|
|
"step": 6190
|
|
},
|
|
{
|
|
"epoch": 0.8333893858882088,
|
|
"grad_norm": 0.051626633695471935,
|
|
"learning_rate": 2.4649592783520828e-05,
|
|
"loss": 0.8205,
|
|
"step": 6195
|
|
},
|
|
{
|
|
"epoch": 0.8340620165467142,
|
|
"grad_norm": 0.06010243950226696,
|
|
"learning_rate": 2.445647180642184e-05,
|
|
"loss": 0.852,
|
|
"step": 6200
|
|
},
|
|
{
|
|
"epoch": 0.8347346472052196,
|
|
"grad_norm": 0.05600829537038441,
|
|
"learning_rate": 2.426404315667873e-05,
|
|
"loss": 0.8239,
|
|
"step": 6205
|
|
},
|
|
{
|
|
"epoch": 0.835407277863725,
|
|
"grad_norm": 0.05559431056965923,
|
|
"learning_rate": 2.40723078954662e-05,
|
|
"loss": 0.8413,
|
|
"step": 6210
|
|
},
|
|
{
|
|
"epoch": 0.8360799085222305,
|
|
"grad_norm": 0.052606571948545705,
|
|
"learning_rate": 2.3881267080135145e-05,
|
|
"loss": 0.8483,
|
|
"step": 6215
|
|
},
|
|
{
|
|
"epoch": 0.8367525391807359,
|
|
"grad_norm": 0.057151174334509676,
|
|
"learning_rate": 2.3690921764206967e-05,
|
|
"loss": 0.848,
|
|
"step": 6220
|
|
},
|
|
{
|
|
"epoch": 0.8374251698392413,
|
|
"grad_norm": 0.05485937311783543,
|
|
"learning_rate": 2.3501272997367494e-05,
|
|
"loss": 0.8115,
|
|
"step": 6225
|
|
},
|
|
{
|
|
"epoch": 0.8380978004977467,
|
|
"grad_norm": 0.057679697757406556,
|
|
"learning_rate": 2.3312321825461405e-05,
|
|
"loss": 0.8605,
|
|
"step": 6230
|
|
},
|
|
{
|
|
"epoch": 0.838770431156252,
|
|
"grad_norm": 0.06015687395002475,
|
|
"learning_rate": 2.312406929048634e-05,
|
|
"loss": 0.7852,
|
|
"step": 6235
|
|
},
|
|
{
|
|
"epoch": 0.8394430618147575,
|
|
"grad_norm": 0.05621629174433919,
|
|
"learning_rate": 2.2936516430587322e-05,
|
|
"loss": 0.8688,
|
|
"step": 6240
|
|
},
|
|
{
|
|
"epoch": 0.8401156924732629,
|
|
"grad_norm": 0.05515376481880061,
|
|
"learning_rate": 2.27496642800508e-05,
|
|
"loss": 0.8153,
|
|
"step": 6245
|
|
},
|
|
{
|
|
"epoch": 0.8407883231317683,
|
|
"grad_norm": 0.07267256435070797,
|
|
"learning_rate": 2.2563513869299127e-05,
|
|
"loss": 0.8182,
|
|
"step": 6250
|
|
},
|
|
{
|
|
"epoch": 0.8414609537902737,
|
|
"grad_norm": 0.0545501771483703,
|
|
"learning_rate": 2.2378066224884746e-05,
|
|
"loss": 0.8485,
|
|
"step": 6255
|
|
},
|
|
{
|
|
"epoch": 0.8421335844487792,
|
|
"grad_norm": 0.05542398846337032,
|
|
"learning_rate": 2.2193322369484713e-05,
|
|
"loss": 0.8103,
|
|
"step": 6260
|
|
},
|
|
{
|
|
"epoch": 0.8428062151072846,
|
|
"grad_norm": 0.05775699421005842,
|
|
"learning_rate": 2.2009283321894844e-05,
|
|
"loss": 0.8355,
|
|
"step": 6265
|
|
},
|
|
{
|
|
"epoch": 0.84347884576579,
|
|
"grad_norm": 0.061116771629393135,
|
|
"learning_rate": 2.1825950097024213e-05,
|
|
"loss": 0.8278,
|
|
"step": 6270
|
|
},
|
|
{
|
|
"epoch": 0.8441514764242954,
|
|
"grad_norm": 0.0540819660202808,
|
|
"learning_rate": 2.1643323705889526e-05,
|
|
"loss": 0.8094,
|
|
"step": 6275
|
|
},
|
|
{
|
|
"epoch": 0.8448241070828009,
|
|
"grad_norm": 0.05737711537280462,
|
|
"learning_rate": 2.146140515560965e-05,
|
|
"loss": 0.8307,
|
|
"step": 6280
|
|
},
|
|
{
|
|
"epoch": 0.8454967377413063,
|
|
"grad_norm": 0.05656829425350953,
|
|
"learning_rate": 2.1280195449399835e-05,
|
|
"loss": 0.8335,
|
|
"step": 6285
|
|
},
|
|
{
|
|
"epoch": 0.8461693683998117,
|
|
"grad_norm": 0.05677701600352013,
|
|
"learning_rate": 2.1099695586566345e-05,
|
|
"loss": 0.8519,
|
|
"step": 6290
|
|
},
|
|
{
|
|
"epoch": 0.846841999058317,
|
|
"grad_norm": 0.059042745055333475,
|
|
"learning_rate": 2.0919906562500986e-05,
|
|
"loss": 0.8341,
|
|
"step": 6295
|
|
},
|
|
{
|
|
"epoch": 0.8475146297168225,
|
|
"grad_norm": 0.05840627836007293,
|
|
"learning_rate": 2.0740829368675484e-05,
|
|
"loss": 0.8285,
|
|
"step": 6300
|
|
},
|
|
{
|
|
"epoch": 0.8481872603753279,
|
|
"grad_norm": 0.057683231041837084,
|
|
"learning_rate": 2.0562464992636057e-05,
|
|
"loss": 0.8644,
|
|
"step": 6305
|
|
},
|
|
{
|
|
"epoch": 0.8488598910338333,
|
|
"grad_norm": 0.06940718187443026,
|
|
"learning_rate": 2.0384814417998037e-05,
|
|
"loss": 0.8138,
|
|
"step": 6310
|
|
},
|
|
{
|
|
"epoch": 0.8495325216923387,
|
|
"grad_norm": 0.05532173250390559,
|
|
"learning_rate": 2.0207878624440356e-05,
|
|
"loss": 0.823,
|
|
"step": 6315
|
|
},
|
|
{
|
|
"epoch": 0.8502051523508442,
|
|
"grad_norm": 0.05656647116535209,
|
|
"learning_rate": 2.0031658587700256e-05,
|
|
"loss": 0.8324,
|
|
"step": 6320
|
|
},
|
|
{
|
|
"epoch": 0.8508777830093496,
|
|
"grad_norm": 0.05855962244394195,
|
|
"learning_rate": 1.985615527956777e-05,
|
|
"loss": 0.8304,
|
|
"step": 6325
|
|
},
|
|
{
|
|
"epoch": 0.851550413667855,
|
|
"grad_norm": 0.053095341157430594,
|
|
"learning_rate": 1.968136966788041e-05,
|
|
"loss": 0.8146,
|
|
"step": 6330
|
|
},
|
|
{
|
|
"epoch": 0.8522230443263604,
|
|
"grad_norm": 0.05744344754191223,
|
|
"learning_rate": 1.9507302716517948e-05,
|
|
"loss": 0.8335,
|
|
"step": 6335
|
|
},
|
|
{
|
|
"epoch": 0.8528956749848658,
|
|
"grad_norm": 0.0658666167608361,
|
|
"learning_rate": 1.933395538539695e-05,
|
|
"loss": 0.8632,
|
|
"step": 6340
|
|
},
|
|
{
|
|
"epoch": 0.8535683056433713,
|
|
"grad_norm": 0.054351818029514914,
|
|
"learning_rate": 1.9161328630465466e-05,
|
|
"loss": 0.8447,
|
|
"step": 6345
|
|
},
|
|
{
|
|
"epoch": 0.8542409363018766,
|
|
"grad_norm": 0.054387469255191084,
|
|
"learning_rate": 1.8989423403698018e-05,
|
|
"loss": 0.8252,
|
|
"step": 6350
|
|
},
|
|
{
|
|
"epoch": 0.854913566960382,
|
|
"grad_norm": 0.054990218347926514,
|
|
"learning_rate": 1.88182406530899e-05,
|
|
"loss": 0.7834,
|
|
"step": 6355
|
|
},
|
|
{
|
|
"epoch": 0.8555861976188874,
|
|
"grad_norm": 0.05337737093629034,
|
|
"learning_rate": 1.8647781322652433e-05,
|
|
"loss": 0.8214,
|
|
"step": 6360
|
|
},
|
|
{
|
|
"epoch": 0.8562588282773929,
|
|
"grad_norm": 0.055254501098107256,
|
|
"learning_rate": 1.8478046352407428e-05,
|
|
"loss": 0.8073,
|
|
"step": 6365
|
|
},
|
|
{
|
|
"epoch": 0.8569314589358983,
|
|
"grad_norm": 0.05450744542884276,
|
|
"learning_rate": 1.830903667838209e-05,
|
|
"loss": 0.8043,
|
|
"step": 6370
|
|
},
|
|
{
|
|
"epoch": 0.8576040895944037,
|
|
"grad_norm": 0.05911858369289859,
|
|
"learning_rate": 1.8140753232604005e-05,
|
|
"loss": 0.8255,
|
|
"step": 6375
|
|
},
|
|
{
|
|
"epoch": 0.8582767202529091,
|
|
"grad_norm": 0.059893021233977536,
|
|
"learning_rate": 1.7973196943095718e-05,
|
|
"loss": 0.8285,
|
|
"step": 6380
|
|
},
|
|
{
|
|
"epoch": 0.8589493509114146,
|
|
"grad_norm": 0.055273372638828,
|
|
"learning_rate": 1.7806368733869846e-05,
|
|
"loss": 0.8306,
|
|
"step": 6385
|
|
},
|
|
{
|
|
"epoch": 0.85962198156992,
|
|
"grad_norm": 0.05735158659763661,
|
|
"learning_rate": 1.7640269524923966e-05,
|
|
"loss": 0.8784,
|
|
"step": 6390
|
|
},
|
|
{
|
|
"epoch": 0.8602946122284254,
|
|
"grad_norm": 0.05800089219667076,
|
|
"learning_rate": 1.747490023223529e-05,
|
|
"loss": 0.8554,
|
|
"step": 6395
|
|
},
|
|
{
|
|
"epoch": 0.8609672428869308,
|
|
"grad_norm": 0.058282597154273114,
|
|
"learning_rate": 1.7310261767755996e-05,
|
|
"loss": 0.8028,
|
|
"step": 6400
|
|
},
|
|
{
|
|
"epoch": 0.8616398735454363,
|
|
"grad_norm": 0.055664911895973175,
|
|
"learning_rate": 1.7146355039407987e-05,
|
|
"loss": 0.8483,
|
|
"step": 6405
|
|
},
|
|
{
|
|
"epoch": 0.8623125042039416,
|
|
"grad_norm": 0.058430396050760815,
|
|
"learning_rate": 1.6983180951077733e-05,
|
|
"loss": 0.8632,
|
|
"step": 6410
|
|
},
|
|
{
|
|
"epoch": 0.862985134862447,
|
|
"grad_norm": 0.056113876010569066,
|
|
"learning_rate": 1.6820740402611683e-05,
|
|
"loss": 0.8384,
|
|
"step": 6415
|
|
},
|
|
{
|
|
"epoch": 0.8636577655209524,
|
|
"grad_norm": 0.05606910031852883,
|
|
"learning_rate": 1.665903428981093e-05,
|
|
"loss": 0.869,
|
|
"step": 6420
|
|
},
|
|
{
|
|
"epoch": 0.8643303961794578,
|
|
"grad_norm": 0.05660103519452574,
|
|
"learning_rate": 1.6498063504426436e-05,
|
|
"loss": 0.875,
|
|
"step": 6425
|
|
},
|
|
{
|
|
"epoch": 0.8650030268379633,
|
|
"grad_norm": 0.05564907615214855,
|
|
"learning_rate": 1.6337828934154214e-05,
|
|
"loss": 0.8796,
|
|
"step": 6430
|
|
},
|
|
{
|
|
"epoch": 0.8656756574964687,
|
|
"grad_norm": 0.056719253319279316,
|
|
"learning_rate": 1.6178331462630147e-05,
|
|
"loss": 0.8531,
|
|
"step": 6435
|
|
},
|
|
{
|
|
"epoch": 0.8663482881549741,
|
|
"grad_norm": 0.054424041588265346,
|
|
"learning_rate": 1.6019571969425365e-05,
|
|
"loss": 0.8062,
|
|
"step": 6440
|
|
},
|
|
{
|
|
"epoch": 0.8670209188134795,
|
|
"grad_norm": 0.05628980367650593,
|
|
"learning_rate": 1.586155133004141e-05,
|
|
"loss": 0.8569,
|
|
"step": 6445
|
|
},
|
|
{
|
|
"epoch": 0.867693549471985,
|
|
"grad_norm": 0.058910883963393904,
|
|
"learning_rate": 1.5704270415905062e-05,
|
|
"loss": 0.8056,
|
|
"step": 6450
|
|
},
|
|
{
|
|
"epoch": 0.8683661801304904,
|
|
"grad_norm": 0.05506769991703955,
|
|
"learning_rate": 1.5547730094364013e-05,
|
|
"loss": 0.7787,
|
|
"step": 6455
|
|
},
|
|
{
|
|
"epoch": 0.8690388107889958,
|
|
"grad_norm": 0.05284393894902228,
|
|
"learning_rate": 1.5391931228681825e-05,
|
|
"loss": 0.8619,
|
|
"step": 6460
|
|
},
|
|
{
|
|
"epoch": 0.8697114414475012,
|
|
"grad_norm": 0.057806312978608075,
|
|
"learning_rate": 1.5236874678033046e-05,
|
|
"loss": 0.7836,
|
|
"step": 6465
|
|
},
|
|
{
|
|
"epoch": 0.8703840721060065,
|
|
"grad_norm": 0.05827049124793768,
|
|
"learning_rate": 1.508256129749878e-05,
|
|
"loss": 0.8171,
|
|
"step": 6470
|
|
},
|
|
{
|
|
"epoch": 0.871056702764512,
|
|
"grad_norm": 0.056373755145065804,
|
|
"learning_rate": 1.4928991938061763e-05,
|
|
"loss": 0.816,
|
|
"step": 6475
|
|
},
|
|
{
|
|
"epoch": 0.8717293334230174,
|
|
"grad_norm": 0.05053172144818605,
|
|
"learning_rate": 1.4776167446601661e-05,
|
|
"loss": 0.7998,
|
|
"step": 6480
|
|
},
|
|
{
|
|
"epoch": 0.8724019640815228,
|
|
"grad_norm": 0.05913567736346084,
|
|
"learning_rate": 1.462408866589061e-05,
|
|
"loss": 0.8285,
|
|
"step": 6485
|
|
},
|
|
{
|
|
"epoch": 0.8730745947400282,
|
|
"grad_norm": 0.053859356228762825,
|
|
"learning_rate": 1.4472756434588285e-05,
|
|
"loss": 0.8464,
|
|
"step": 6490
|
|
},
|
|
{
|
|
"epoch": 0.8737472253985337,
|
|
"grad_norm": 0.05635235866353657,
|
|
"learning_rate": 1.432217158723742e-05,
|
|
"loss": 0.8142,
|
|
"step": 6495
|
|
},
|
|
{
|
|
"epoch": 0.8744198560570391,
|
|
"grad_norm": 0.05531847109049426,
|
|
"learning_rate": 1.4172334954259312e-05,
|
|
"loss": 0.8459,
|
|
"step": 6500
|
|
},
|
|
{
|
|
"epoch": 0.8750924867155445,
|
|
"grad_norm": 0.05609083991899534,
|
|
"learning_rate": 1.4023247361948947e-05,
|
|
"loss": 0.8028,
|
|
"step": 6505
|
|
},
|
|
{
|
|
"epoch": 0.8757651173740499,
|
|
"grad_norm": 0.052740932814354304,
|
|
"learning_rate": 1.3874909632470794e-05,
|
|
"loss": 0.8435,
|
|
"step": 6510
|
|
},
|
|
{
|
|
"epoch": 0.8764377480325554,
|
|
"grad_norm": 0.05414838892939342,
|
|
"learning_rate": 1.3727322583853978e-05,
|
|
"loss": 0.8329,
|
|
"step": 6515
|
|
},
|
|
{
|
|
"epoch": 0.8771103786910608,
|
|
"grad_norm": 0.05193151913599139,
|
|
"learning_rate": 1.358048702998794e-05,
|
|
"loss": 0.8514,
|
|
"step": 6520
|
|
},
|
|
{
|
|
"epoch": 0.8777830093495661,
|
|
"grad_norm": 0.054388643704421494,
|
|
"learning_rate": 1.343440378061792e-05,
|
|
"loss": 0.8261,
|
|
"step": 6525
|
|
},
|
|
{
|
|
"epoch": 0.8784556400080715,
|
|
"grad_norm": 0.05483708117307544,
|
|
"learning_rate": 1.3289073641340404e-05,
|
|
"loss": 0.8181,
|
|
"step": 6530
|
|
},
|
|
{
|
|
"epoch": 0.879128270666577,
|
|
"grad_norm": 0.05704308117845801,
|
|
"learning_rate": 1.3144497413598786e-05,
|
|
"loss": 0.8303,
|
|
"step": 6535
|
|
},
|
|
{
|
|
"epoch": 0.8798009013250824,
|
|
"grad_norm": 0.0641318422436249,
|
|
"learning_rate": 1.3000675894678958e-05,
|
|
"loss": 0.8557,
|
|
"step": 6540
|
|
},
|
|
{
|
|
"epoch": 0.8804735319835878,
|
|
"grad_norm": 0.06452744180453934,
|
|
"learning_rate": 1.2857609877704766e-05,
|
|
"loss": 0.8754,
|
|
"step": 6545
|
|
},
|
|
{
|
|
"epoch": 0.8811461626420932,
|
|
"grad_norm": 0.05486306222600904,
|
|
"learning_rate": 1.2715300151633795e-05,
|
|
"loss": 0.8418,
|
|
"step": 6550
|
|
},
|
|
{
|
|
"epoch": 0.8818187933005986,
|
|
"grad_norm": 0.05682623509055596,
|
|
"learning_rate": 1.2573747501252929e-05,
|
|
"loss": 0.789,
|
|
"step": 6555
|
|
},
|
|
{
|
|
"epoch": 0.8824914239591041,
|
|
"grad_norm": 0.05793181026249577,
|
|
"learning_rate": 1.2432952707174077e-05,
|
|
"loss": 0.8419,
|
|
"step": 6560
|
|
},
|
|
{
|
|
"epoch": 0.8831640546176095,
|
|
"grad_norm": 0.05489821037443936,
|
|
"learning_rate": 1.2292916545829857e-05,
|
|
"loss": 0.799,
|
|
"step": 6565
|
|
},
|
|
{
|
|
"epoch": 0.8838366852761149,
|
|
"grad_norm": 0.055009781968734066,
|
|
"learning_rate": 1.2153639789469266e-05,
|
|
"loss": 0.8353,
|
|
"step": 6570
|
|
},
|
|
{
|
|
"epoch": 0.8845093159346203,
|
|
"grad_norm": 0.055488426737696446,
|
|
"learning_rate": 1.2015123206153438e-05,
|
|
"loss": 0.7957,
|
|
"step": 6575
|
|
},
|
|
{
|
|
"epoch": 0.8851819465931258,
|
|
"grad_norm": 0.056704982958688414,
|
|
"learning_rate": 1.1877367559751505e-05,
|
|
"loss": 0.8774,
|
|
"step": 6580
|
|
},
|
|
{
|
|
"epoch": 0.8858545772516311,
|
|
"grad_norm": 0.056620034329013004,
|
|
"learning_rate": 1.1740373609936243e-05,
|
|
"loss": 0.7935,
|
|
"step": 6585
|
|
},
|
|
{
|
|
"epoch": 0.8865272079101365,
|
|
"grad_norm": 0.05453856210095586,
|
|
"learning_rate": 1.1604142112179954e-05,
|
|
"loss": 0.8338,
|
|
"step": 6590
|
|
},
|
|
{
|
|
"epoch": 0.8871998385686419,
|
|
"grad_norm": 0.056687966623131726,
|
|
"learning_rate": 1.1468673817750268e-05,
|
|
"loss": 0.8078,
|
|
"step": 6595
|
|
},
|
|
{
|
|
"epoch": 0.8878724692271474,
|
|
"grad_norm": 0.05698668416033602,
|
|
"learning_rate": 1.13339694737061e-05,
|
|
"loss": 0.8264,
|
|
"step": 6600
|
|
},
|
|
{
|
|
"epoch": 0.8885450998856528,
|
|
"grad_norm": 0.06045248837509163,
|
|
"learning_rate": 1.1200029822893403e-05,
|
|
"loss": 0.8371,
|
|
"step": 6605
|
|
},
|
|
{
|
|
"epoch": 0.8892177305441582,
|
|
"grad_norm": 0.05550968614064584,
|
|
"learning_rate": 1.1066855603941116e-05,
|
|
"loss": 0.8438,
|
|
"step": 6610
|
|
},
|
|
{
|
|
"epoch": 0.8898903612026636,
|
|
"grad_norm": 0.0544441054388003,
|
|
"learning_rate": 1.0934447551257075e-05,
|
|
"loss": 0.7952,
|
|
"step": 6615
|
|
},
|
|
{
|
|
"epoch": 0.890562991861169,
|
|
"grad_norm": 0.059624142666569964,
|
|
"learning_rate": 1.0802806395024077e-05,
|
|
"loss": 0.8208,
|
|
"step": 6620
|
|
},
|
|
{
|
|
"epoch": 0.8912356225196745,
|
|
"grad_norm": 0.16363131712067192,
|
|
"learning_rate": 1.0671932861195653e-05,
|
|
"loss": 0.8434,
|
|
"step": 6625
|
|
},
|
|
{
|
|
"epoch": 0.8919082531781799,
|
|
"grad_norm": 0.06206283538299498,
|
|
"learning_rate": 1.0541827671492254e-05,
|
|
"loss": 0.8057,
|
|
"step": 6630
|
|
},
|
|
{
|
|
"epoch": 0.8925808838366853,
|
|
"grad_norm": 0.05836709662569859,
|
|
"learning_rate": 1.04124915433971e-05,
|
|
"loss": 0.8422,
|
|
"step": 6635
|
|
},
|
|
{
|
|
"epoch": 0.8932535144951906,
|
|
"grad_norm": 0.061930616062657796,
|
|
"learning_rate": 1.028392519015246e-05,
|
|
"loss": 0.8281,
|
|
"step": 6640
|
|
},
|
|
{
|
|
"epoch": 0.8939261451536961,
|
|
"grad_norm": 0.058062089585763935,
|
|
"learning_rate": 1.0156129320755407e-05,
|
|
"loss": 0.8381,
|
|
"step": 6645
|
|
},
|
|
{
|
|
"epoch": 0.8945987758122015,
|
|
"grad_norm": 0.05726651358791121,
|
|
"learning_rate": 1.0029104639954155e-05,
|
|
"loss": 0.803,
|
|
"step": 6650
|
|
},
|
|
{
|
|
"epoch": 0.8952714064707069,
|
|
"grad_norm": 0.05470645605167178,
|
|
"learning_rate": 9.90285184824413e-06,
|
|
"loss": 0.8297,
|
|
"step": 6655
|
|
},
|
|
{
|
|
"epoch": 0.8959440371292123,
|
|
"grad_norm": 0.05675315145907022,
|
|
"learning_rate": 9.777371641864001e-06,
|
|
"loss": 0.8213,
|
|
"step": 6660
|
|
},
|
|
{
|
|
"epoch": 0.8966166677877178,
|
|
"grad_norm": 0.058185966792208735,
|
|
"learning_rate": 9.652664712791908e-06,
|
|
"loss": 0.8086,
|
|
"step": 6665
|
|
},
|
|
{
|
|
"epoch": 0.8972892984462232,
|
|
"grad_norm": 0.05818770614969196,
|
|
"learning_rate": 9.528731748741669e-06,
|
|
"loss": 0.8059,
|
|
"step": 6670
|
|
},
|
|
{
|
|
"epoch": 0.8979619291047286,
|
|
"grad_norm": 0.0598088005724284,
|
|
"learning_rate": 9.405573433158935e-06,
|
|
"loss": 0.7718,
|
|
"step": 6675
|
|
},
|
|
{
|
|
"epoch": 0.898634559763234,
|
|
"grad_norm": 0.05579621293351216,
|
|
"learning_rate": 9.283190445217543e-06,
|
|
"loss": 0.8388,
|
|
"step": 6680
|
|
},
|
|
{
|
|
"epoch": 0.8993071904217395,
|
|
"grad_norm": 0.057509096259254926,
|
|
"learning_rate": 9.161583459815541e-06,
|
|
"loss": 0.8184,
|
|
"step": 6685
|
|
},
|
|
{
|
|
"epoch": 0.8999798210802449,
|
|
"grad_norm": 0.05567137387737026,
|
|
"learning_rate": 9.040753147571694e-06,
|
|
"loss": 0.825,
|
|
"step": 6690
|
|
},
|
|
{
|
|
"epoch": 0.9006524517387503,
|
|
"grad_norm": 0.054220374114114143,
|
|
"learning_rate": 8.920700174821704e-06,
|
|
"loss": 0.8298,
|
|
"step": 6695
|
|
},
|
|
{
|
|
"epoch": 0.9013250823972556,
|
|
"grad_norm": 0.05624449640927385,
|
|
"learning_rate": 8.801425203614403e-06,
|
|
"loss": 0.8378,
|
|
"step": 6700
|
|
},
|
|
{
|
|
"epoch": 0.901997713055761,
|
|
"grad_norm": 0.05358240234964744,
|
|
"learning_rate": 8.68292889170839e-06,
|
|
"loss": 0.862,
|
|
"step": 6705
|
|
},
|
|
{
|
|
"epoch": 0.9026703437142665,
|
|
"grad_norm": 0.05764638889180579,
|
|
"learning_rate": 8.565211892568147e-06,
|
|
"loss": 0.7911,
|
|
"step": 6710
|
|
},
|
|
{
|
|
"epoch": 0.9033429743727719,
|
|
"grad_norm": 0.06469252177667065,
|
|
"learning_rate": 8.448274855360493e-06,
|
|
"loss": 0.8209,
|
|
"step": 6715
|
|
},
|
|
{
|
|
"epoch": 0.9040156050312773,
|
|
"grad_norm": 0.05723782270004928,
|
|
"learning_rate": 8.33211842495114e-06,
|
|
"loss": 0.8373,
|
|
"step": 6720
|
|
},
|
|
{
|
|
"epoch": 0.9046882356897827,
|
|
"grad_norm": 0.05407480477711123,
|
|
"learning_rate": 8.216743241900958e-06,
|
|
"loss": 0.825,
|
|
"step": 6725
|
|
},
|
|
{
|
|
"epoch": 0.9053608663482882,
|
|
"grad_norm": 0.05634412862285222,
|
|
"learning_rate": 8.102149942462516e-06,
|
|
"loss": 0.8393,
|
|
"step": 6730
|
|
},
|
|
{
|
|
"epoch": 0.9060334970067936,
|
|
"grad_norm": 0.053895311446623226,
|
|
"learning_rate": 7.988339158576678e-06,
|
|
"loss": 0.8404,
|
|
"step": 6735
|
|
},
|
|
{
|
|
"epoch": 0.906706127665299,
|
|
"grad_norm": 0.05503723217321855,
|
|
"learning_rate": 7.875311517868848e-06,
|
|
"loss": 0.8447,
|
|
"step": 6740
|
|
},
|
|
{
|
|
"epoch": 0.9073787583238044,
|
|
"grad_norm": 0.05593172816905371,
|
|
"learning_rate": 7.763067643645798e-06,
|
|
"loss": 0.8082,
|
|
"step": 6745
|
|
},
|
|
{
|
|
"epoch": 0.9080513889823099,
|
|
"grad_norm": 0.061482767030254945,
|
|
"learning_rate": 7.651608154892124e-06,
|
|
"loss": 0.8367,
|
|
"step": 6750
|
|
},
|
|
{
|
|
"epoch": 0.9087240196408153,
|
|
"grad_norm": 0.05746238917932355,
|
|
"learning_rate": 7.540933666266719e-06,
|
|
"loss": 0.8386,
|
|
"step": 6755
|
|
},
|
|
{
|
|
"epoch": 0.9093966502993206,
|
|
"grad_norm": 0.05576392822962793,
|
|
"learning_rate": 7.431044788099583e-06,
|
|
"loss": 0.8405,
|
|
"step": 6760
|
|
},
|
|
{
|
|
"epoch": 0.910069280957826,
|
|
"grad_norm": 0.05616250631192827,
|
|
"learning_rate": 7.321942126388286e-06,
|
|
"loss": 0.8322,
|
|
"step": 6765
|
|
},
|
|
{
|
|
"epoch": 0.9107419116163314,
|
|
"grad_norm": 0.058014672803636855,
|
|
"learning_rate": 7.213626282794699e-06,
|
|
"loss": 0.8366,
|
|
"step": 6770
|
|
},
|
|
{
|
|
"epoch": 0.9114145422748369,
|
|
"grad_norm": 0.05772216240114444,
|
|
"learning_rate": 7.106097854641779e-06,
|
|
"loss": 0.8541,
|
|
"step": 6775
|
|
},
|
|
{
|
|
"epoch": 0.9120871729333423,
|
|
"grad_norm": 0.05757569186911316,
|
|
"learning_rate": 6.999357434910025e-06,
|
|
"loss": 0.856,
|
|
"step": 6780
|
|
},
|
|
{
|
|
"epoch": 0.9127598035918477,
|
|
"grad_norm": 0.05727236113439516,
|
|
"learning_rate": 6.893405612234426e-06,
|
|
"loss": 0.7808,
|
|
"step": 6785
|
|
},
|
|
{
|
|
"epoch": 0.9134324342503531,
|
|
"grad_norm": 0.05536651126384854,
|
|
"learning_rate": 6.788242970901187e-06,
|
|
"loss": 0.8185,
|
|
"step": 6790
|
|
},
|
|
{
|
|
"epoch": 0.9141050649088586,
|
|
"grad_norm": 0.05593060161696056,
|
|
"learning_rate": 6.68387009084434e-06,
|
|
"loss": 0.8486,
|
|
"step": 6795
|
|
},
|
|
{
|
|
"epoch": 0.914777695567364,
|
|
"grad_norm": 0.053834765840133,
|
|
"learning_rate": 6.580287547642771e-06,
|
|
"loss": 0.8146,
|
|
"step": 6800
|
|
},
|
|
{
|
|
"epoch": 0.9154503262258694,
|
|
"grad_norm": 0.05870029877629974,
|
|
"learning_rate": 6.47749591251695e-06,
|
|
"loss": 0.8048,
|
|
"step": 6805
|
|
},
|
|
{
|
|
"epoch": 0.9161229568843748,
|
|
"grad_norm": 0.05721706302420605,
|
|
"learning_rate": 6.3754957523256715e-06,
|
|
"loss": 0.8188,
|
|
"step": 6810
|
|
},
|
|
{
|
|
"epoch": 0.9167955875428802,
|
|
"grad_norm": 0.055140825623670245,
|
|
"learning_rate": 6.274287629563119e-06,
|
|
"loss": 0.7844,
|
|
"step": 6815
|
|
},
|
|
{
|
|
"epoch": 0.9174682182013856,
|
|
"grad_norm": 0.051653691682714734,
|
|
"learning_rate": 6.173872102355654e-06,
|
|
"loss": 0.805,
|
|
"step": 6820
|
|
},
|
|
{
|
|
"epoch": 0.918140848859891,
|
|
"grad_norm": 0.05876727045069556,
|
|
"learning_rate": 6.074249724458735e-06,
|
|
"loss": 0.8785,
|
|
"step": 6825
|
|
},
|
|
{
|
|
"epoch": 0.9188134795183964,
|
|
"grad_norm": 0.054597347339118626,
|
|
"learning_rate": 5.975421045253953e-06,
|
|
"loss": 0.8406,
|
|
"step": 6830
|
|
},
|
|
{
|
|
"epoch": 0.9194861101769018,
|
|
"grad_norm": 0.0523982140672833,
|
|
"learning_rate": 5.877386609745832e-06,
|
|
"loss": 0.8333,
|
|
"step": 6835
|
|
},
|
|
{
|
|
"epoch": 0.9201587408354073,
|
|
"grad_norm": 0.0538127071729553,
|
|
"learning_rate": 5.780146958559017e-06,
|
|
"loss": 0.8413,
|
|
"step": 6840
|
|
},
|
|
{
|
|
"epoch": 0.9208313714939127,
|
|
"grad_norm": 0.05806274114630895,
|
|
"learning_rate": 5.683702627935194e-06,
|
|
"loss": 0.8516,
|
|
"step": 6845
|
|
},
|
|
{
|
|
"epoch": 0.9215040021524181,
|
|
"grad_norm": 0.05360779533579333,
|
|
"learning_rate": 5.58805414973007e-06,
|
|
"loss": 0.8544,
|
|
"step": 6850
|
|
},
|
|
{
|
|
"epoch": 0.9221766328109235,
|
|
"grad_norm": 0.055601450532106245,
|
|
"learning_rate": 5.4932020514106e-06,
|
|
"loss": 0.8552,
|
|
"step": 6855
|
|
},
|
|
{
|
|
"epoch": 0.922849263469429,
|
|
"grad_norm": 0.0521116662169681,
|
|
"learning_rate": 5.3991468560519666e-06,
|
|
"loss": 0.8574,
|
|
"step": 6860
|
|
},
|
|
{
|
|
"epoch": 0.9235218941279344,
|
|
"grad_norm": 0.052641462098686344,
|
|
"learning_rate": 5.305889082334652e-06,
|
|
"loss": 0.86,
|
|
"step": 6865
|
|
},
|
|
{
|
|
"epoch": 0.9241945247864398,
|
|
"grad_norm": 0.0593752389096462,
|
|
"learning_rate": 5.213429244541756e-06,
|
|
"loss": 0.84,
|
|
"step": 6870
|
|
},
|
|
{
|
|
"epoch": 0.9248671554449451,
|
|
"grad_norm": 0.05770582668727739,
|
|
"learning_rate": 5.121767852555963e-06,
|
|
"loss": 0.8699,
|
|
"step": 6875
|
|
},
|
|
{
|
|
"epoch": 0.9255397861034506,
|
|
"grad_norm": 0.05826386563520872,
|
|
"learning_rate": 5.030905411856851e-06,
|
|
"loss": 0.8269,
|
|
"step": 6880
|
|
},
|
|
{
|
|
"epoch": 0.926212416761956,
|
|
"grad_norm": 0.05588686227140018,
|
|
"learning_rate": 4.940842423518082e-06,
|
|
"loss": 0.8338,
|
|
"step": 6885
|
|
},
|
|
{
|
|
"epoch": 0.9268850474204614,
|
|
"grad_norm": 0.059415826857863695,
|
|
"learning_rate": 4.851579384204602e-06,
|
|
"loss": 0.8057,
|
|
"step": 6890
|
|
},
|
|
{
|
|
"epoch": 0.9275576780789668,
|
|
"grad_norm": 0.05721555947001062,
|
|
"learning_rate": 4.763116786169929e-06,
|
|
"loss": 0.8111,
|
|
"step": 6895
|
|
},
|
|
{
|
|
"epoch": 0.9282303087374723,
|
|
"grad_norm": 0.06965978230318905,
|
|
"learning_rate": 4.675455117253452e-06,
|
|
"loss": 0.8653,
|
|
"step": 6900
|
|
},
|
|
{
|
|
"epoch": 0.9289029393959777,
|
|
"grad_norm": 0.05263142191055551,
|
|
"learning_rate": 4.58859486087767e-06,
|
|
"loss": 0.7804,
|
|
"step": 6905
|
|
},
|
|
{
|
|
"epoch": 0.9295755700544831,
|
|
"grad_norm": 0.054579198109746645,
|
|
"learning_rate": 4.502536496045672e-06,
|
|
"loss": 0.8243,
|
|
"step": 6910
|
|
},
|
|
{
|
|
"epoch": 0.9302482007129885,
|
|
"grad_norm": 0.05758805432002544,
|
|
"learning_rate": 4.417280497338349e-06,
|
|
"loss": 0.8117,
|
|
"step": 6915
|
|
},
|
|
{
|
|
"epoch": 0.930920831371494,
|
|
"grad_norm": 0.05901453910245508,
|
|
"learning_rate": 4.3328273349117985e-06,
|
|
"loss": 0.7957,
|
|
"step": 6920
|
|
},
|
|
{
|
|
"epoch": 0.9315934620299994,
|
|
"grad_norm": 0.05998448102095731,
|
|
"learning_rate": 4.249177474494858e-06,
|
|
"loss": 0.8475,
|
|
"step": 6925
|
|
},
|
|
{
|
|
"epoch": 0.9322660926885047,
|
|
"grad_norm": 0.05962514390284862,
|
|
"learning_rate": 4.166331377386361e-06,
|
|
"loss": 0.8389,
|
|
"step": 6930
|
|
},
|
|
{
|
|
"epoch": 0.9329387233470101,
|
|
"grad_norm": 0.05075854568401546,
|
|
"learning_rate": 4.084289500452686e-06,
|
|
"loss": 0.8383,
|
|
"step": 6935
|
|
},
|
|
{
|
|
"epoch": 0.9336113540055155,
|
|
"grad_norm": 0.056229295540856775,
|
|
"learning_rate": 4.003052296125275e-06,
|
|
"loss": 0.8432,
|
|
"step": 6940
|
|
},
|
|
{
|
|
"epoch": 0.934283984664021,
|
|
"grad_norm": 0.05707159182418883,
|
|
"learning_rate": 3.922620212398053e-06,
|
|
"loss": 0.7909,
|
|
"step": 6945
|
|
},
|
|
{
|
|
"epoch": 0.9349566153225264,
|
|
"grad_norm": 0.056476160275041105,
|
|
"learning_rate": 3.842993692824997e-06,
|
|
"loss": 0.7794,
|
|
"step": 6950
|
|
},
|
|
{
|
|
"epoch": 0.9356292459810318,
|
|
"grad_norm": 0.0575531338498515,
|
|
"learning_rate": 3.7641731765176875e-06,
|
|
"loss": 0.8389,
|
|
"step": 6955
|
|
},
|
|
{
|
|
"epoch": 0.9363018766395372,
|
|
"grad_norm": 0.06345716898715516,
|
|
"learning_rate": 3.6861590981428936e-06,
|
|
"loss": 0.8821,
|
|
"step": 6960
|
|
},
|
|
{
|
|
"epoch": 0.9369745072980427,
|
|
"grad_norm": 0.05436220392225888,
|
|
"learning_rate": 3.6089518879201918e-06,
|
|
"loss": 0.8533,
|
|
"step": 6965
|
|
},
|
|
{
|
|
"epoch": 0.9376471379565481,
|
|
"grad_norm": 0.05591349911377824,
|
|
"learning_rate": 3.5325519716195184e-06,
|
|
"loss": 0.8549,
|
|
"step": 6970
|
|
},
|
|
{
|
|
"epoch": 0.9383197686150535,
|
|
"grad_norm": 0.058008597775419855,
|
|
"learning_rate": 3.4569597705589368e-06,
|
|
"loss": 0.8246,
|
|
"step": 6975
|
|
},
|
|
{
|
|
"epoch": 0.9389923992735589,
|
|
"grad_norm": 0.05621372976671557,
|
|
"learning_rate": 3.3821757016021746e-06,
|
|
"loss": 0.7854,
|
|
"step": 6980
|
|
},
|
|
{
|
|
"epoch": 0.9396650299320644,
|
|
"grad_norm": 0.05591057307278753,
|
|
"learning_rate": 3.3082001771564724e-06,
|
|
"loss": 0.7782,
|
|
"step": 6985
|
|
},
|
|
{
|
|
"epoch": 0.9403376605905697,
|
|
"grad_norm": 0.05748855166025494,
|
|
"learning_rate": 3.235033605170223e-06,
|
|
"loss": 0.8704,
|
|
"step": 6990
|
|
},
|
|
{
|
|
"epoch": 0.9410102912490751,
|
|
"grad_norm": 0.061305295309165904,
|
|
"learning_rate": 3.162676389130686e-06,
|
|
"loss": 0.8486,
|
|
"step": 6995
|
|
},
|
|
{
|
|
"epoch": 0.9416829219075805,
|
|
"grad_norm": 0.05261450189267661,
|
|
"learning_rate": 3.091128928061909e-06,
|
|
"loss": 0.8399,
|
|
"step": 7000
|
|
},
|
|
{
|
|
"epoch": 0.9423555525660859,
|
|
"grad_norm": 0.05918033527471021,
|
|
"learning_rate": 3.020391616522344e-06,
|
|
"loss": 0.9212,
|
|
"step": 7005
|
|
},
|
|
{
|
|
"epoch": 0.9430281832245914,
|
|
"grad_norm": 0.056455106699066976,
|
|
"learning_rate": 2.950464844602818e-06,
|
|
"loss": 0.8073,
|
|
"step": 7010
|
|
},
|
|
{
|
|
"epoch": 0.9437008138830968,
|
|
"grad_norm": 0.055853500334889054,
|
|
"learning_rate": 2.881348997924282e-06,
|
|
"loss": 0.8696,
|
|
"step": 7015
|
|
},
|
|
{
|
|
"epoch": 0.9443734445416022,
|
|
"grad_norm": 0.061415441559629934,
|
|
"learning_rate": 2.8130444576357323e-06,
|
|
"loss": 0.8452,
|
|
"step": 7020
|
|
},
|
|
{
|
|
"epoch": 0.9450460752001076,
|
|
"grad_norm": 0.06526320016491569,
|
|
"learning_rate": 2.7455516004121436e-06,
|
|
"loss": 0.7945,
|
|
"step": 7025
|
|
},
|
|
{
|
|
"epoch": 0.9457187058586131,
|
|
"grad_norm": 0.05844190506558609,
|
|
"learning_rate": 2.6788707984523207e-06,
|
|
"loss": 0.9219,
|
|
"step": 7030
|
|
},
|
|
{
|
|
"epoch": 0.9463913365171185,
|
|
"grad_norm": 0.055208528689147815,
|
|
"learning_rate": 2.6130024194768675e-06,
|
|
"loss": 0.8225,
|
|
"step": 7035
|
|
},
|
|
{
|
|
"epoch": 0.9470639671756239,
|
|
"grad_norm": 0.05708207902042239,
|
|
"learning_rate": 2.5479468267261715e-06,
|
|
"loss": 0.8278,
|
|
"step": 7040
|
|
},
|
|
{
|
|
"epoch": 0.9477365978341293,
|
|
"grad_norm": 0.05520554814739056,
|
|
"learning_rate": 2.4837043789584233e-06,
|
|
"loss": 0.8559,
|
|
"step": 7045
|
|
},
|
|
{
|
|
"epoch": 0.9484092284926346,
|
|
"grad_norm": 0.05526467988369699,
|
|
"learning_rate": 2.420275430447566e-06,
|
|
"loss": 0.822,
|
|
"step": 7050
|
|
},
|
|
{
|
|
"epoch": 0.9490818591511401,
|
|
"grad_norm": 0.04965099416809,
|
|
"learning_rate": 2.357660330981448e-06,
|
|
"loss": 0.7798,
|
|
"step": 7055
|
|
},
|
|
{
|
|
"epoch": 0.9497544898096455,
|
|
"grad_norm": 0.053339771340983035,
|
|
"learning_rate": 2.2958594258597583e-06,
|
|
"loss": 0.7858,
|
|
"step": 7060
|
|
},
|
|
{
|
|
"epoch": 0.9504271204681509,
|
|
"grad_norm": 0.06424545288393499,
|
|
"learning_rate": 2.2348730558922613e-06,
|
|
"loss": 0.8881,
|
|
"step": 7065
|
|
},
|
|
{
|
|
"epoch": 0.9510997511266563,
|
|
"grad_norm": 0.05476638703664176,
|
|
"learning_rate": 2.174701557396846e-06,
|
|
"loss": 0.822,
|
|
"step": 7070
|
|
},
|
|
{
|
|
"epoch": 0.9517723817851618,
|
|
"grad_norm": 0.05866256833078084,
|
|
"learning_rate": 2.1153452621976153e-06,
|
|
"loss": 0.8408,
|
|
"step": 7075
|
|
},
|
|
{
|
|
"epoch": 0.9524450124436672,
|
|
"grad_norm": 0.05569889642062564,
|
|
"learning_rate": 2.056804497623199e-06,
|
|
"loss": 0.81,
|
|
"step": 7080
|
|
},
|
|
{
|
|
"epoch": 0.9531176431021726,
|
|
"grad_norm": 0.057235768610266455,
|
|
"learning_rate": 1.999079586504826e-06,
|
|
"loss": 0.8469,
|
|
"step": 7085
|
|
},
|
|
{
|
|
"epoch": 0.953790273760678,
|
|
"grad_norm": 0.05737113793545201,
|
|
"learning_rate": 1.94217084717459e-06,
|
|
"loss": 0.8727,
|
|
"step": 7090
|
|
},
|
|
{
|
|
"epoch": 0.9544629044191835,
|
|
"grad_norm": 0.05474868616850645,
|
|
"learning_rate": 1.8860785934637357e-06,
|
|
"loss": 0.7925,
|
|
"step": 7095
|
|
},
|
|
{
|
|
"epoch": 0.9551355350776889,
|
|
"grad_norm": 0.0595122324063722,
|
|
"learning_rate": 1.8308031347007923e-06,
|
|
"loss": 0.8016,
|
|
"step": 7100
|
|
},
|
|
{
|
|
"epoch": 0.9558081657361942,
|
|
"grad_norm": 0.055035263805029065,
|
|
"learning_rate": 1.7763447757100425e-06,
|
|
"loss": 0.8676,
|
|
"step": 7105
|
|
},
|
|
{
|
|
"epoch": 0.9564807963946996,
|
|
"grad_norm": 0.05590246554884426,
|
|
"learning_rate": 1.7227038168097395e-06,
|
|
"loss": 0.873,
|
|
"step": 7110
|
|
},
|
|
{
|
|
"epoch": 0.957153427053205,
|
|
"grad_norm": 0.05119675154618015,
|
|
"learning_rate": 1.66988055381041e-06,
|
|
"loss": 0.8029,
|
|
"step": 7115
|
|
},
|
|
{
|
|
"epoch": 0.9578260577117105,
|
|
"grad_norm": 0.05234276265076411,
|
|
"learning_rate": 1.6178752780133864e-06,
|
|
"loss": 0.8318,
|
|
"step": 7120
|
|
},
|
|
{
|
|
"epoch": 0.9584986883702159,
|
|
"grad_norm": 0.05323151931453215,
|
|
"learning_rate": 1.5666882762090272e-06,
|
|
"loss": 0.8052,
|
|
"step": 7125
|
|
},
|
|
{
|
|
"epoch": 0.9591713190287213,
|
|
"grad_norm": 0.05694376211991154,
|
|
"learning_rate": 1.5163198306751834e-06,
|
|
"loss": 0.8961,
|
|
"step": 7130
|
|
},
|
|
{
|
|
"epoch": 0.9598439496872267,
|
|
"grad_norm": 0.05719101441637811,
|
|
"learning_rate": 1.4667702191757502e-06,
|
|
"loss": 0.8327,
|
|
"step": 7135
|
|
},
|
|
{
|
|
"epoch": 0.9605165803457322,
|
|
"grad_norm": 0.05715755903971462,
|
|
"learning_rate": 1.4180397149589352e-06,
|
|
"loss": 0.8363,
|
|
"step": 7140
|
|
},
|
|
{
|
|
"epoch": 0.9611892110042376,
|
|
"grad_norm": 0.056060246764845556,
|
|
"learning_rate": 1.3701285867559586e-06,
|
|
"loss": 0.8023,
|
|
"step": 7145
|
|
},
|
|
{
|
|
"epoch": 0.961861841662743,
|
|
"grad_norm": 0.05502048962304145,
|
|
"learning_rate": 1.3230370987794558e-06,
|
|
"loss": 0.8221,
|
|
"step": 7150
|
|
},
|
|
{
|
|
"epoch": 0.9625344723212484,
|
|
"grad_norm": 0.05675516018646223,
|
|
"learning_rate": 1.2767655107219942e-06,
|
|
"loss": 0.79,
|
|
"step": 7155
|
|
},
|
|
{
|
|
"epoch": 0.9632071029797539,
|
|
"grad_norm": 0.06612529899480675,
|
|
"learning_rate": 1.2313140777547414e-06,
|
|
"loss": 0.8517,
|
|
"step": 7160
|
|
},
|
|
{
|
|
"epoch": 0.9638797336382592,
|
|
"grad_norm": 0.05363591253086607,
|
|
"learning_rate": 1.1866830505259828e-06,
|
|
"loss": 0.8356,
|
|
"step": 7165
|
|
},
|
|
{
|
|
"epoch": 0.9645523642967646,
|
|
"grad_norm": 0.05922830366383967,
|
|
"learning_rate": 1.1428726751597561e-06,
|
|
"loss": 0.8425,
|
|
"step": 7170
|
|
},
|
|
{
|
|
"epoch": 0.96522499495527,
|
|
"grad_norm": 0.05594096747574841,
|
|
"learning_rate": 1.0998831932545194e-06,
|
|
"loss": 0.8429,
|
|
"step": 7175
|
|
},
|
|
{
|
|
"epoch": 0.9658976256137755,
|
|
"grad_norm": 0.05487500145092262,
|
|
"learning_rate": 1.0577148418817848e-06,
|
|
"loss": 0.776,
|
|
"step": 7180
|
|
},
|
|
{
|
|
"epoch": 0.9665702562722809,
|
|
"grad_norm": 0.05596838295843446,
|
|
"learning_rate": 1.0163678535848041e-06,
|
|
"loss": 0.8421,
|
|
"step": 7185
|
|
},
|
|
{
|
|
"epoch": 0.9672428869307863,
|
|
"grad_norm": 0.05441153542171681,
|
|
"learning_rate": 9.75842456377368e-07,
|
|
"loss": 0.8411,
|
|
"step": 7190
|
|
},
|
|
{
|
|
"epoch": 0.9679155175892917,
|
|
"grad_norm": 0.05366297165683922,
|
|
"learning_rate": 9.361388737424258e-07,
|
|
"loss": 0.824,
|
|
"step": 7195
|
|
},
|
|
{
|
|
"epoch": 0.9685881482477972,
|
|
"grad_norm": 0.05545163132286662,
|
|
"learning_rate": 8.972573246309345e-07,
|
|
"loss": 0.8556,
|
|
"step": 7200
|
|
},
|
|
{
|
|
"epoch": 0.9692607789063026,
|
|
"grad_norm": 0.064116228334137,
|
|
"learning_rate": 8.591980234606777e-07,
|
|
"loss": 0.7905,
|
|
"step": 7205
|
|
},
|
|
{
|
|
"epoch": 0.969933409564808,
|
|
"grad_norm": 0.05312151035526173,
|
|
"learning_rate": 8.219611801149495e-07,
|
|
"loss": 0.8268,
|
|
"step": 7210
|
|
},
|
|
{
|
|
"epoch": 0.9706060402233134,
|
|
"grad_norm": 0.058194773519087764,
|
|
"learning_rate": 7.855469999415719e-07,
|
|
"loss": 0.7896,
|
|
"step": 7215
|
|
},
|
|
{
|
|
"epoch": 0.9712786708818187,
|
|
"grad_norm": 0.05863705966020045,
|
|
"learning_rate": 7.49955683751613e-07,
|
|
"loss": 0.8392,
|
|
"step": 7220
|
|
},
|
|
{
|
|
"epoch": 0.9719513015403242,
|
|
"grad_norm": 0.05436753504886398,
|
|
"learning_rate": 7.151874278183701e-07,
|
|
"loss": 0.8249,
|
|
"step": 7225
|
|
},
|
|
{
|
|
"epoch": 0.9726239321988296,
|
|
"grad_norm": 0.05899966125168415,
|
|
"learning_rate": 6.812424238762714e-07,
|
|
"loss": 0.8125,
|
|
"step": 7230
|
|
},
|
|
{
|
|
"epoch": 0.973296562857335,
|
|
"grad_norm": 0.05519895444874772,
|
|
"learning_rate": 6.481208591197773e-07,
|
|
"loss": 0.8619,
|
|
"step": 7235
|
|
},
|
|
{
|
|
"epoch": 0.9739691935158404,
|
|
"grad_norm": 0.05253771806298885,
|
|
"learning_rate": 6.158229162023798e-07,
|
|
"loss": 0.8025,
|
|
"step": 7240
|
|
},
|
|
{
|
|
"epoch": 0.9746418241743459,
|
|
"grad_norm": 0.0536469386569897,
|
|
"learning_rate": 5.843487732356545e-07,
|
|
"loss": 0.8377,
|
|
"step": 7245
|
|
},
|
|
{
|
|
"epoch": 0.9753144548328513,
|
|
"grad_norm": 0.0531695013404097,
|
|
"learning_rate": 5.536986037881108e-07,
|
|
"loss": 0.8034,
|
|
"step": 7250
|
|
},
|
|
{
|
|
"epoch": 0.9759870854913567,
|
|
"grad_norm": 0.06873102864152862,
|
|
"learning_rate": 5.238725768844265e-07,
|
|
"loss": 0.8184,
|
|
"step": 7255
|
|
},
|
|
{
|
|
"epoch": 0.9766597161498621,
|
|
"grad_norm": 0.05324596611103951,
|
|
"learning_rate": 4.94870857004398e-07,
|
|
"loss": 0.8976,
|
|
"step": 7260
|
|
},
|
|
{
|
|
"epoch": 0.9773323468083676,
|
|
"grad_norm": 0.057897135420761366,
|
|
"learning_rate": 4.666936040820746e-07,
|
|
"loss": 0.813,
|
|
"step": 7265
|
|
},
|
|
{
|
|
"epoch": 0.978004977466873,
|
|
"grad_norm": 0.0570970584749799,
|
|
"learning_rate": 4.3934097350484254e-07,
|
|
"loss": 0.8261,
|
|
"step": 7270
|
|
},
|
|
{
|
|
"epoch": 0.9786776081253784,
|
|
"grad_norm": 0.05240797601588687,
|
|
"learning_rate": 4.128131161126258e-07,
|
|
"loss": 0.7956,
|
|
"step": 7275
|
|
},
|
|
{
|
|
"epoch": 0.9793502387838837,
|
|
"grad_norm": 0.057826394271235156,
|
|
"learning_rate": 3.8711017819698655e-07,
|
|
"loss": 0.8371,
|
|
"step": 7280
|
|
},
|
|
{
|
|
"epoch": 0.9800228694423891,
|
|
"grad_norm": 0.06015396248293072,
|
|
"learning_rate": 3.6223230150040914e-07,
|
|
"loss": 0.8334,
|
|
"step": 7285
|
|
},
|
|
{
|
|
"epoch": 0.9806955001008946,
|
|
"grad_norm": 0.06055468276551505,
|
|
"learning_rate": 3.3817962321540104e-07,
|
|
"loss": 0.7761,
|
|
"step": 7290
|
|
},
|
|
{
|
|
"epoch": 0.9813681307594,
|
|
"grad_norm": 0.05544436920380559,
|
|
"learning_rate": 3.149522759838263e-07,
|
|
"loss": 0.8265,
|
|
"step": 7295
|
|
},
|
|
{
|
|
"epoch": 0.9820407614179054,
|
|
"grad_norm": 0.06544097337785165,
|
|
"learning_rate": 2.9255038789613993e-07,
|
|
"loss": 0.8964,
|
|
"step": 7300
|
|
},
|
|
{
|
|
"epoch": 0.9827133920764108,
|
|
"grad_norm": 0.0518140128621406,
|
|
"learning_rate": 2.709740824906881e-07,
|
|
"loss": 0.8337,
|
|
"step": 7305
|
|
},
|
|
{
|
|
"epoch": 0.9833860227349163,
|
|
"grad_norm": 0.05706722221557735,
|
|
"learning_rate": 2.502234787530255e-07,
|
|
"loss": 0.7966,
|
|
"step": 7310
|
|
},
|
|
{
|
|
"epoch": 0.9840586533934217,
|
|
"grad_norm": 0.056752158307538485,
|
|
"learning_rate": 2.3029869111528265e-07,
|
|
"loss": 0.8182,
|
|
"step": 7315
|
|
},
|
|
{
|
|
"epoch": 0.9847312840519271,
|
|
"grad_norm": 0.05552321850596303,
|
|
"learning_rate": 2.111998294554662e-07,
|
|
"loss": 0.8017,
|
|
"step": 7320
|
|
},
|
|
{
|
|
"epoch": 0.9854039147104325,
|
|
"grad_norm": 0.059757292747136675,
|
|
"learning_rate": 1.929269990969262e-07,
|
|
"loss": 0.8744,
|
|
"step": 7325
|
|
},
|
|
{
|
|
"epoch": 0.986076545368938,
|
|
"grad_norm": 0.056043523845769314,
|
|
"learning_rate": 1.754803008077399e-07,
|
|
"loss": 0.838,
|
|
"step": 7330
|
|
},
|
|
{
|
|
"epoch": 0.9867491760274433,
|
|
"grad_norm": 0.053170486374633105,
|
|
"learning_rate": 1.588598308001787e-07,
|
|
"loss": 0.8019,
|
|
"step": 7335
|
|
},
|
|
{
|
|
"epoch": 0.9874218066859487,
|
|
"grad_norm": 0.057704518479353664,
|
|
"learning_rate": 1.430656807301256e-07,
|
|
"loss": 0.805,
|
|
"step": 7340
|
|
},
|
|
{
|
|
"epoch": 0.9880944373444541,
|
|
"grad_norm": 0.05413218505886977,
|
|
"learning_rate": 1.2809793769665844e-07,
|
|
"loss": 0.8237,
|
|
"step": 7345
|
|
},
|
|
{
|
|
"epoch": 0.9887670680029595,
|
|
"grad_norm": 0.057060373317335183,
|
|
"learning_rate": 1.1395668424148408e-07,
|
|
"loss": 0.8282,
|
|
"step": 7350
|
|
},
|
|
{
|
|
"epoch": 0.989439698661465,
|
|
"grad_norm": 0.0596101384204014,
|
|
"learning_rate": 1.0064199834852182e-07,
|
|
"loss": 0.8736,
|
|
"step": 7355
|
|
},
|
|
{
|
|
"epoch": 0.9901123293199704,
|
|
"grad_norm": 0.05133772973983179,
|
|
"learning_rate": 8.815395344347054e-08,
|
|
"loss": 0.7716,
|
|
"step": 7360
|
|
},
|
|
{
|
|
"epoch": 0.9907849599784758,
|
|
"grad_norm": 0.057739414768695714,
|
|
"learning_rate": 7.649261839340893e-08,
|
|
"loss": 0.8556,
|
|
"step": 7365
|
|
},
|
|
{
|
|
"epoch": 0.9914575906369812,
|
|
"grad_norm": 0.05337934101490364,
|
|
"learning_rate": 6.56580575063792e-08,
|
|
"loss": 0.7968,
|
|
"step": 7370
|
|
},
|
|
{
|
|
"epoch": 0.9921302212954867,
|
|
"grad_norm": 0.06044145462857247,
|
|
"learning_rate": 5.565033053108736e-08,
|
|
"loss": 0.8404,
|
|
"step": 7375
|
|
},
|
|
{
|
|
"epoch": 0.9928028519539921,
|
|
"grad_norm": 0.055178278745134306,
|
|
"learning_rate": 4.6469492656570074e-08,
|
|
"loss": 0.8422,
|
|
"step": 7380
|
|
},
|
|
{
|
|
"epoch": 0.9934754826124975,
|
|
"grad_norm": 0.0556030772132154,
|
|
"learning_rate": 3.811559451182833e-08,
|
|
"loss": 0.8267,
|
|
"step": 7385
|
|
},
|
|
{
|
|
"epoch": 0.9941481132710029,
|
|
"grad_norm": 0.0588301847035514,
|
|
"learning_rate": 3.0588682165594294e-08,
|
|
"loss": 0.8654,
|
|
"step": 7390
|
|
},
|
|
{
|
|
"epoch": 0.9948207439295083,
|
|
"grad_norm": 0.05942396313597498,
|
|
"learning_rate": 2.3888797126081494e-08,
|
|
"loss": 0.7917,
|
|
"step": 7395
|
|
},
|
|
{
|
|
"epoch": 0.9954933745880137,
|
|
"grad_norm": 0.05141832056196206,
|
|
"learning_rate": 1.8015976340751693e-08,
|
|
"loss": 0.7945,
|
|
"step": 7400
|
|
},
|
|
{
|
|
"epoch": 0.9961660052465191,
|
|
"grad_norm": 0.06354419407035226,
|
|
"learning_rate": 1.2970252196098373e-08,
|
|
"loss": 0.8445,
|
|
"step": 7405
|
|
},
|
|
{
|
|
"epoch": 0.9968386359050245,
|
|
"grad_norm": 0.05794706209934556,
|
|
"learning_rate": 8.751652517463569e-09,
|
|
"loss": 0.7889,
|
|
"step": 7410
|
|
},
|
|
{
|
|
"epoch": 0.99751126656353,
|
|
"grad_norm": 0.05731964589174625,
|
|
"learning_rate": 5.360200568904627e-09,
|
|
"loss": 0.819,
|
|
"step": 7415
|
|
},
|
|
{
|
|
"epoch": 0.9981838972220354,
|
|
"grad_norm": 0.052763476093352656,
|
|
"learning_rate": 2.795915053077635e-09,
|
|
"loss": 0.8117,
|
|
"step": 7420
|
|
},
|
|
{
|
|
"epoch": 0.9988565278805408,
|
|
"grad_norm": 0.058025404251231066,
|
|
"learning_rate": 1.0588101110708958e-09,
|
|
"loss": 0.7894,
|
|
"step": 7425
|
|
},
|
|
{
|
|
"epoch": 0.9995291585390462,
|
|
"grad_norm": 0.053894275277534875,
|
|
"learning_rate": 1.488953224049183e-10,
|
|
"loss": 0.8043,
|
|
"step": 7430
|
|
},
|
|
{
|
|
"epoch": 0.9999327369341494,
|
|
"eval_loss": 0.8264116644859314,
|
|
"eval_runtime": 195.7739,
|
|
"eval_samples_per_second": 255.795,
|
|
"eval_steps_per_second": 7.994,
|
|
"step": 7433
|
|
},
|
|
{
|
|
"epoch": 0.9999327369341494,
|
|
"step": 7433,
|
|
"total_flos": 416264403386368.0,
|
|
"train_loss": 0.9400932153108766,
|
|
"train_runtime": 15499.1561,
|
|
"train_samples_per_second": 61.389,
|
|
"train_steps_per_second": 0.48
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 7433,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": false,
|
|
"should_training_stop": false
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 416264403386368.0,
|
|
"train_batch_size": 4,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|