6923 lines
193 KiB
JSON
6923 lines
193 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 5.0,
|
|
"eval_steps": 500,
|
|
"global_step": 3125,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.008009611533840609,
|
|
"grad_norm": 17.524433163649007,
|
|
"learning_rate": 5.111821086261981e-07,
|
|
"loss": 1.1917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3438943922519684,
|
|
"step": 5,
|
|
"valid_targets_mean": 4098.5,
|
|
"valid_targets_min": 1895
|
|
},
|
|
{
|
|
"epoch": 0.016019223067681217,
|
|
"grad_norm": 13.791076615656825,
|
|
"learning_rate": 1.1501597444089457e-06,
|
|
"loss": 1.0982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21005761623382568,
|
|
"step": 10,
|
|
"valid_targets_mean": 2511.5,
|
|
"valid_targets_min": 950
|
|
},
|
|
{
|
|
"epoch": 0.024028834601521828,
|
|
"grad_norm": 5.517946865057316,
|
|
"learning_rate": 1.7891373801916933e-06,
|
|
"loss": 1.0183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18308956921100616,
|
|
"step": 15,
|
|
"valid_targets_mean": 2126.5,
|
|
"valid_targets_min": 1635
|
|
},
|
|
{
|
|
"epoch": 0.032038446135362435,
|
|
"grad_norm": 2.744227608142671,
|
|
"learning_rate": 2.428115015974441e-06,
|
|
"loss": 0.8124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13750654458999634,
|
|
"step": 20,
|
|
"valid_targets_mean": 2022.2,
|
|
"valid_targets_min": 1407
|
|
},
|
|
{
|
|
"epoch": 0.040048057669203045,
|
|
"grad_norm": 1.5332207285725141,
|
|
"learning_rate": 3.0670926517571885e-06,
|
|
"loss": 0.7519,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20823931694030762,
|
|
"step": 25,
|
|
"valid_targets_mean": 3344.0,
|
|
"valid_targets_min": 2137
|
|
},
|
|
{
|
|
"epoch": 0.048057669203043656,
|
|
"grad_norm": 1.1218212946382582,
|
|
"learning_rate": 3.7060702875399364e-06,
|
|
"loss": 0.6987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1677684783935547,
|
|
"step": 30,
|
|
"valid_targets_mean": 3821.8,
|
|
"valid_targets_min": 1308
|
|
},
|
|
{
|
|
"epoch": 0.05606728073688426,
|
|
"grad_norm": 1.0902409229649108,
|
|
"learning_rate": 4.345047923322684e-06,
|
|
"loss": 0.642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15482103824615479,
|
|
"step": 35,
|
|
"valid_targets_mean": 2872.0,
|
|
"valid_targets_min": 1665
|
|
},
|
|
{
|
|
"epoch": 0.06407689227072487,
|
|
"grad_norm": 0.9182419812835773,
|
|
"learning_rate": 4.984025559105431e-06,
|
|
"loss": 0.6346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18592140078544617,
|
|
"step": 40,
|
|
"valid_targets_mean": 4220.0,
|
|
"valid_targets_min": 3191
|
|
},
|
|
{
|
|
"epoch": 0.07208650380456548,
|
|
"grad_norm": 0.7485580986041784,
|
|
"learning_rate": 5.623003194888179e-06,
|
|
"loss": 0.5798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.153937429189682,
|
|
"step": 45,
|
|
"valid_targets_mean": 4791.2,
|
|
"valid_targets_min": 4097
|
|
},
|
|
{
|
|
"epoch": 0.08009611533840609,
|
|
"grad_norm": 0.6482153629550863,
|
|
"learning_rate": 6.261980830670928e-06,
|
|
"loss": 0.5628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11947568506002426,
|
|
"step": 50,
|
|
"valid_targets_mean": 5747.0,
|
|
"valid_targets_min": 2284
|
|
},
|
|
{
|
|
"epoch": 0.0881057268722467,
|
|
"grad_norm": 0.6395493563858011,
|
|
"learning_rate": 6.900958466453675e-06,
|
|
"loss": 0.5212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1530144214630127,
|
|
"step": 55,
|
|
"valid_targets_mean": 3851.0,
|
|
"valid_targets_min": 2391
|
|
},
|
|
{
|
|
"epoch": 0.09611533840608731,
|
|
"grad_norm": 0.7819288140459203,
|
|
"learning_rate": 7.5399361022364225e-06,
|
|
"loss": 0.5565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1609588861465454,
|
|
"step": 60,
|
|
"valid_targets_mean": 3773.2,
|
|
"valid_targets_min": 3192
|
|
},
|
|
{
|
|
"epoch": 0.10412494993992791,
|
|
"grad_norm": 0.6245397470393927,
|
|
"learning_rate": 8.17891373801917e-06,
|
|
"loss": 0.5051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.117870032787323,
|
|
"step": 65,
|
|
"valid_targets_mean": 3683.5,
|
|
"valid_targets_min": 1911
|
|
},
|
|
{
|
|
"epoch": 0.11213456147376852,
|
|
"grad_norm": 0.8396655982440933,
|
|
"learning_rate": 8.817891373801917e-06,
|
|
"loss": 0.508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10804140567779541,
|
|
"step": 70,
|
|
"valid_targets_mean": 1953.5,
|
|
"valid_targets_min": 1611
|
|
},
|
|
{
|
|
"epoch": 0.12014417300760913,
|
|
"grad_norm": 0.7741005218378155,
|
|
"learning_rate": 9.456869009584665e-06,
|
|
"loss": 0.5028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1475619077682495,
|
|
"step": 75,
|
|
"valid_targets_mean": 3450.8,
|
|
"valid_targets_min": 1888
|
|
},
|
|
{
|
|
"epoch": 0.12815378454144974,
|
|
"grad_norm": 0.7554461782154397,
|
|
"learning_rate": 1.0095846645367413e-05,
|
|
"loss": 0.4603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14573875069618225,
|
|
"step": 80,
|
|
"valid_targets_mean": 3919.8,
|
|
"valid_targets_min": 1893
|
|
},
|
|
{
|
|
"epoch": 0.13616339607529035,
|
|
"grad_norm": 0.7681935214787825,
|
|
"learning_rate": 1.073482428115016e-05,
|
|
"loss": 0.501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10919628292322159,
|
|
"step": 85,
|
|
"valid_targets_mean": 2428.2,
|
|
"valid_targets_min": 1300
|
|
},
|
|
{
|
|
"epoch": 0.14417300760913096,
|
|
"grad_norm": 0.7291840806567966,
|
|
"learning_rate": 1.1373801916932907e-05,
|
|
"loss": 0.4883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1210450530052185,
|
|
"step": 90,
|
|
"valid_targets_mean": 3008.2,
|
|
"valid_targets_min": 1018
|
|
},
|
|
{
|
|
"epoch": 0.15218261914297157,
|
|
"grad_norm": 0.7991480370876204,
|
|
"learning_rate": 1.2012779552715656e-05,
|
|
"loss": 0.4764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10408658534288406,
|
|
"step": 95,
|
|
"valid_targets_mean": 2879.5,
|
|
"valid_targets_min": 1639
|
|
},
|
|
{
|
|
"epoch": 0.16019223067681218,
|
|
"grad_norm": 0.7778175313973651,
|
|
"learning_rate": 1.2651757188498404e-05,
|
|
"loss": 0.4679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09021638333797455,
|
|
"step": 100,
|
|
"valid_targets_mean": 2469.2,
|
|
"valid_targets_min": 2060
|
|
},
|
|
{
|
|
"epoch": 0.1682018422106528,
|
|
"grad_norm": 0.749376443613068,
|
|
"learning_rate": 1.329073482428115e-05,
|
|
"loss": 0.4604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10945254564285278,
|
|
"step": 105,
|
|
"valid_targets_mean": 3259.2,
|
|
"valid_targets_min": 1365
|
|
},
|
|
{
|
|
"epoch": 0.1762114537444934,
|
|
"grad_norm": 0.6734827486969247,
|
|
"learning_rate": 1.39297124600639e-05,
|
|
"loss": 0.4609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07655281573534012,
|
|
"step": 110,
|
|
"valid_targets_mean": 2490.2,
|
|
"valid_targets_min": 1679
|
|
},
|
|
{
|
|
"epoch": 0.184221065278334,
|
|
"grad_norm": 0.6203007637869049,
|
|
"learning_rate": 1.4568690095846648e-05,
|
|
"loss": 0.451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0996263325214386,
|
|
"step": 115,
|
|
"valid_targets_mean": 4032.2,
|
|
"valid_targets_min": 1439
|
|
},
|
|
{
|
|
"epoch": 0.19223067681217462,
|
|
"grad_norm": 0.7375660347377841,
|
|
"learning_rate": 1.5207667731629394e-05,
|
|
"loss": 0.4778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14520315825939178,
|
|
"step": 120,
|
|
"valid_targets_mean": 3729.0,
|
|
"valid_targets_min": 2157
|
|
},
|
|
{
|
|
"epoch": 0.2002402883460152,
|
|
"grad_norm": 0.7457476200844877,
|
|
"learning_rate": 1.584664536741214e-05,
|
|
"loss": 0.4563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11521410942077637,
|
|
"step": 125,
|
|
"valid_targets_mean": 3270.8,
|
|
"valid_targets_min": 2318
|
|
},
|
|
{
|
|
"epoch": 0.20824989987985582,
|
|
"grad_norm": 0.6839947819029605,
|
|
"learning_rate": 1.648562300319489e-05,
|
|
"loss": 0.4297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0838354155421257,
|
|
"step": 130,
|
|
"valid_targets_mean": 3178.5,
|
|
"valid_targets_min": 824
|
|
},
|
|
{
|
|
"epoch": 0.21625951141369643,
|
|
"grad_norm": 0.678046886513861,
|
|
"learning_rate": 1.712460063897764e-05,
|
|
"loss": 0.4041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07396633177995682,
|
|
"step": 135,
|
|
"valid_targets_mean": 2994.8,
|
|
"valid_targets_min": 1210
|
|
},
|
|
{
|
|
"epoch": 0.22426912294753704,
|
|
"grad_norm": 0.74075175797063,
|
|
"learning_rate": 1.7763578274760385e-05,
|
|
"loss": 0.4128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11071508377790451,
|
|
"step": 140,
|
|
"valid_targets_mean": 4503.5,
|
|
"valid_targets_min": 1548
|
|
},
|
|
{
|
|
"epoch": 0.23227873448137765,
|
|
"grad_norm": 0.8125730060591674,
|
|
"learning_rate": 1.840255591054313e-05,
|
|
"loss": 0.4104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10540525615215302,
|
|
"step": 145,
|
|
"valid_targets_mean": 2806.2,
|
|
"valid_targets_min": 1188
|
|
},
|
|
{
|
|
"epoch": 0.24028834601521826,
|
|
"grad_norm": 0.6272955067502476,
|
|
"learning_rate": 1.904153354632588e-05,
|
|
"loss": 0.4081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08774794638156891,
|
|
"step": 150,
|
|
"valid_targets_mean": 4892.8,
|
|
"valid_targets_min": 2500
|
|
},
|
|
{
|
|
"epoch": 0.24829795754905887,
|
|
"grad_norm": 0.7677314792983804,
|
|
"learning_rate": 1.9680511182108627e-05,
|
|
"loss": 0.3917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16529104113578796,
|
|
"step": 155,
|
|
"valid_targets_mean": 6856.2,
|
|
"valid_targets_min": 2058
|
|
},
|
|
{
|
|
"epoch": 0.2563075690828995,
|
|
"grad_norm": 0.48470359020567205,
|
|
"learning_rate": 2.0319488817891376e-05,
|
|
"loss": 0.2926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06480596959590912,
|
|
"step": 160,
|
|
"valid_targets_mean": 5445.0,
|
|
"valid_targets_min": 2569
|
|
},
|
|
{
|
|
"epoch": 0.2643171806167401,
|
|
"grad_norm": 0.48969999605349884,
|
|
"learning_rate": 2.0958466453674126e-05,
|
|
"loss": 0.2895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0826011598110199,
|
|
"step": 165,
|
|
"valid_targets_mean": 6826.8,
|
|
"valid_targets_min": 4640
|
|
},
|
|
{
|
|
"epoch": 0.2723267921505807,
|
|
"grad_norm": 0.4331959509451022,
|
|
"learning_rate": 2.1597444089456872e-05,
|
|
"loss": 0.2567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.051865704357624054,
|
|
"step": 170,
|
|
"valid_targets_mean": 7376.2,
|
|
"valid_targets_min": 6135
|
|
},
|
|
{
|
|
"epoch": 0.2803364036844213,
|
|
"grad_norm": 0.42788740496031336,
|
|
"learning_rate": 2.2236421725239618e-05,
|
|
"loss": 0.2474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06763521581888199,
|
|
"step": 175,
|
|
"valid_targets_mean": 6002.2,
|
|
"valid_targets_min": 3847
|
|
},
|
|
{
|
|
"epoch": 0.2883460152182619,
|
|
"grad_norm": 0.4289823169889131,
|
|
"learning_rate": 2.2875399361022364e-05,
|
|
"loss": 0.2582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06470870971679688,
|
|
"step": 180,
|
|
"valid_targets_mean": 7146.2,
|
|
"valid_targets_min": 5036
|
|
},
|
|
{
|
|
"epoch": 0.29635562675210253,
|
|
"grad_norm": 0.41245976807501994,
|
|
"learning_rate": 2.3514376996805114e-05,
|
|
"loss": 0.2494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08640505373477936,
|
|
"step": 185,
|
|
"valid_targets_mean": 8201.5,
|
|
"valid_targets_min": 6022
|
|
},
|
|
{
|
|
"epoch": 0.30436523828594314,
|
|
"grad_norm": 0.4803959953905529,
|
|
"learning_rate": 2.415335463258786e-05,
|
|
"loss": 0.2554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06353765726089478,
|
|
"step": 190,
|
|
"valid_targets_mean": 4982.8,
|
|
"valid_targets_min": 1508
|
|
},
|
|
{
|
|
"epoch": 0.31237484981978375,
|
|
"grad_norm": 0.45388022821951873,
|
|
"learning_rate": 2.4792332268370606e-05,
|
|
"loss": 0.2389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06412840634584427,
|
|
"step": 195,
|
|
"valid_targets_mean": 6249.8,
|
|
"valid_targets_min": 3601
|
|
},
|
|
{
|
|
"epoch": 0.32038446135362436,
|
|
"grad_norm": 0.4687748214526049,
|
|
"learning_rate": 2.543130990415336e-05,
|
|
"loss": 0.2458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07363644242286682,
|
|
"step": 200,
|
|
"valid_targets_mean": 7850.5,
|
|
"valid_targets_min": 3289
|
|
},
|
|
{
|
|
"epoch": 0.32839407288746497,
|
|
"grad_norm": 0.5083076512294771,
|
|
"learning_rate": 2.6070287539936105e-05,
|
|
"loss": 0.2587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0684114471077919,
|
|
"step": 205,
|
|
"valid_targets_mean": 6368.5,
|
|
"valid_targets_min": 4071
|
|
},
|
|
{
|
|
"epoch": 0.3364036844213056,
|
|
"grad_norm": 0.40768837508146905,
|
|
"learning_rate": 2.670926517571885e-05,
|
|
"loss": 0.2561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.059518154710531235,
|
|
"step": 210,
|
|
"valid_targets_mean": 7169.2,
|
|
"valid_targets_min": 5138
|
|
},
|
|
{
|
|
"epoch": 0.3444132959551462,
|
|
"grad_norm": 0.4497342007741914,
|
|
"learning_rate": 2.73482428115016e-05,
|
|
"loss": 0.2419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07923542708158493,
|
|
"step": 215,
|
|
"valid_targets_mean": 6420.8,
|
|
"valid_targets_min": 2320
|
|
},
|
|
{
|
|
"epoch": 0.3524229074889868,
|
|
"grad_norm": 0.4430236516195157,
|
|
"learning_rate": 2.7987220447284347e-05,
|
|
"loss": 0.2429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06357500702142715,
|
|
"step": 220,
|
|
"valid_targets_mean": 6447.0,
|
|
"valid_targets_min": 5443
|
|
},
|
|
{
|
|
"epoch": 0.3604325190228274,
|
|
"grad_norm": 0.5039479276174045,
|
|
"learning_rate": 2.8626198083067093e-05,
|
|
"loss": 0.2419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05941087007522583,
|
|
"step": 225,
|
|
"valid_targets_mean": 4346.2,
|
|
"valid_targets_min": 2941
|
|
},
|
|
{
|
|
"epoch": 0.368442130556668,
|
|
"grad_norm": 0.5190787000804311,
|
|
"learning_rate": 2.9265175718849843e-05,
|
|
"loss": 0.2373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08084723353385925,
|
|
"step": 230,
|
|
"valid_targets_mean": 6363.5,
|
|
"valid_targets_min": 4955
|
|
},
|
|
{
|
|
"epoch": 0.37645174209050863,
|
|
"grad_norm": 0.47927283189955944,
|
|
"learning_rate": 2.9904153354632592e-05,
|
|
"loss": 0.2345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04771065711975098,
|
|
"step": 235,
|
|
"valid_targets_mean": 3673.2,
|
|
"valid_targets_min": 2303
|
|
},
|
|
{
|
|
"epoch": 0.38446135362434924,
|
|
"grad_norm": 0.4799428869705874,
|
|
"learning_rate": 3.054313099041534e-05,
|
|
"loss": 0.2295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.027770165354013443,
|
|
"step": 240,
|
|
"valid_targets_mean": 4228.0,
|
|
"valid_targets_min": 2085
|
|
},
|
|
{
|
|
"epoch": 0.39247096515818986,
|
|
"grad_norm": 0.44181161602539337,
|
|
"learning_rate": 3.1182108626198084e-05,
|
|
"loss": 0.2399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0667654275894165,
|
|
"step": 245,
|
|
"valid_targets_mean": 5465.5,
|
|
"valid_targets_min": 3720
|
|
},
|
|
{
|
|
"epoch": 0.4004805766920304,
|
|
"grad_norm": 0.5042049928793526,
|
|
"learning_rate": 3.1821086261980834e-05,
|
|
"loss": 0.2384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0638137236237526,
|
|
"step": 250,
|
|
"valid_targets_mean": 4442.2,
|
|
"valid_targets_min": 3149
|
|
},
|
|
{
|
|
"epoch": 0.408490188225871,
|
|
"grad_norm": 0.4505100655959601,
|
|
"learning_rate": 3.246006389776358e-05,
|
|
"loss": 0.2262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07805617153644562,
|
|
"step": 255,
|
|
"valid_targets_mean": 6434.2,
|
|
"valid_targets_min": 4692
|
|
},
|
|
{
|
|
"epoch": 0.41649979975971163,
|
|
"grad_norm": 0.418500902175401,
|
|
"learning_rate": 3.3099041533546326e-05,
|
|
"loss": 0.2481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06286820769309998,
|
|
"step": 260,
|
|
"valid_targets_mean": 6182.2,
|
|
"valid_targets_min": 4171
|
|
},
|
|
{
|
|
"epoch": 0.42450941129355224,
|
|
"grad_norm": 0.4323866186161622,
|
|
"learning_rate": 3.3738019169329076e-05,
|
|
"loss": 0.2543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.042258359491825104,
|
|
"step": 265,
|
|
"valid_targets_mean": 4245.0,
|
|
"valid_targets_min": 3545
|
|
},
|
|
{
|
|
"epoch": 0.43251902282739285,
|
|
"grad_norm": 0.40981401226106484,
|
|
"learning_rate": 3.4376996805111825e-05,
|
|
"loss": 0.2313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07183978706598282,
|
|
"step": 270,
|
|
"valid_targets_mean": 6278.5,
|
|
"valid_targets_min": 3918
|
|
},
|
|
{
|
|
"epoch": 0.44052863436123346,
|
|
"grad_norm": 0.423960771111804,
|
|
"learning_rate": 3.5015974440894575e-05,
|
|
"loss": 0.2356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06508942693471909,
|
|
"step": 275,
|
|
"valid_targets_mean": 6698.0,
|
|
"valid_targets_min": 5070
|
|
},
|
|
{
|
|
"epoch": 0.4485382458950741,
|
|
"grad_norm": 0.40417683152073197,
|
|
"learning_rate": 3.565495207667732e-05,
|
|
"loss": 0.2515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06127385050058365,
|
|
"step": 280,
|
|
"valid_targets_mean": 5043.5,
|
|
"valid_targets_min": 2590
|
|
},
|
|
{
|
|
"epoch": 0.4565478574289147,
|
|
"grad_norm": 0.3799429136441951,
|
|
"learning_rate": 3.629392971246007e-05,
|
|
"loss": 0.2316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04474610090255737,
|
|
"step": 285,
|
|
"valid_targets_mean": 4594.5,
|
|
"valid_targets_min": 1557
|
|
},
|
|
{
|
|
"epoch": 0.4645574689627553,
|
|
"grad_norm": 0.3773742386639677,
|
|
"learning_rate": 3.6932907348242816e-05,
|
|
"loss": 0.2303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05980531871318817,
|
|
"step": 290,
|
|
"valid_targets_mean": 7859.2,
|
|
"valid_targets_min": 4849
|
|
},
|
|
{
|
|
"epoch": 0.4725670804965959,
|
|
"grad_norm": 0.3848518901466792,
|
|
"learning_rate": 3.757188498402556e-05,
|
|
"loss": 0.2343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05975812301039696,
|
|
"step": 295,
|
|
"valid_targets_mean": 7530.8,
|
|
"valid_targets_min": 3861
|
|
},
|
|
{
|
|
"epoch": 0.4805766920304365,
|
|
"grad_norm": 0.44308841029727336,
|
|
"learning_rate": 3.821086261980831e-05,
|
|
"loss": 0.2402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04050041735172272,
|
|
"step": 300,
|
|
"valid_targets_mean": 4505.0,
|
|
"valid_targets_min": 2278
|
|
},
|
|
{
|
|
"epoch": 0.4885863035642771,
|
|
"grad_norm": 0.38012743452252434,
|
|
"learning_rate": 3.884984025559106e-05,
|
|
"loss": 0.2139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.040553368628025055,
|
|
"step": 305,
|
|
"valid_targets_mean": 4934.5,
|
|
"valid_targets_min": 2017
|
|
},
|
|
{
|
|
"epoch": 0.49659591509811774,
|
|
"grad_norm": 0.4024780251407713,
|
|
"learning_rate": 3.94888178913738e-05,
|
|
"loss": 0.237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06877285242080688,
|
|
"step": 310,
|
|
"valid_targets_mean": 6368.8,
|
|
"valid_targets_min": 2515
|
|
},
|
|
{
|
|
"epoch": 0.5046055266319583,
|
|
"grad_norm": 1.0715117997185921,
|
|
"learning_rate": 3.9999987518434296e-05,
|
|
"loss": 0.4689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1336270272731781,
|
|
"step": 315,
|
|
"valid_targets_mean": 1458.8,
|
|
"valid_targets_min": 751
|
|
},
|
|
{
|
|
"epoch": 0.512615138165799,
|
|
"grad_norm": 1.0590000700764914,
|
|
"learning_rate": 3.999955066527015e-05,
|
|
"loss": 0.5348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10927103459835052,
|
|
"step": 320,
|
|
"valid_targets_mean": 1471.5,
|
|
"valid_targets_min": 769
|
|
},
|
|
{
|
|
"epoch": 0.5206247496996396,
|
|
"grad_norm": 0.8116978896480239,
|
|
"learning_rate": 3.999848974939926e-05,
|
|
"loss": 0.5388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1355082392692566,
|
|
"step": 325,
|
|
"valid_targets_mean": 2936.8,
|
|
"valid_targets_min": 1206
|
|
},
|
|
{
|
|
"epoch": 0.5286343612334802,
|
|
"grad_norm": 0.876911719502306,
|
|
"learning_rate": 3.999680480392626e-05,
|
|
"loss": 0.5552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10081252455711365,
|
|
"step": 330,
|
|
"valid_targets_mean": 1157.2,
|
|
"valid_targets_min": 988
|
|
},
|
|
{
|
|
"epoch": 0.5366439727673208,
|
|
"grad_norm": 0.8664540361552933,
|
|
"learning_rate": 3.999449588142792e-05,
|
|
"loss": 0.5724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15301242470741272,
|
|
"step": 335,
|
|
"valid_targets_mean": 2150.8,
|
|
"valid_targets_min": 820
|
|
},
|
|
{
|
|
"epoch": 0.5446535843011614,
|
|
"grad_norm": 0.8006272038893535,
|
|
"learning_rate": 3.9991563053951476e-05,
|
|
"loss": 0.5475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1246386244893074,
|
|
"step": 340,
|
|
"valid_targets_mean": 2603.5,
|
|
"valid_targets_min": 751
|
|
},
|
|
{
|
|
"epoch": 0.552663195835002,
|
|
"grad_norm": 0.8031126749788109,
|
|
"learning_rate": 3.99880064130124e-05,
|
|
"loss": 0.5417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11496362090110779,
|
|
"step": 345,
|
|
"valid_targets_mean": 2382.8,
|
|
"valid_targets_min": 614
|
|
},
|
|
{
|
|
"epoch": 0.5606728073688426,
|
|
"grad_norm": 0.8333356903374727,
|
|
"learning_rate": 3.9983826069591535e-05,
|
|
"loss": 0.5749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1094886064529419,
|
|
"step": 350,
|
|
"valid_targets_mean": 1733.0,
|
|
"valid_targets_min": 768
|
|
},
|
|
{
|
|
"epoch": 0.5686824189026832,
|
|
"grad_norm": 0.7607251322047641,
|
|
"learning_rate": 3.997902215413163e-05,
|
|
"loss": 0.5677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0879240408539772,
|
|
"step": 355,
|
|
"valid_targets_mean": 1715.8,
|
|
"valid_targets_min": 704
|
|
},
|
|
{
|
|
"epoch": 0.5766920304365238,
|
|
"grad_norm": 0.8016683392992084,
|
|
"learning_rate": 3.997359481653327e-05,
|
|
"loss": 0.534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1602325439453125,
|
|
"step": 360,
|
|
"valid_targets_mean": 2182.5,
|
|
"valid_targets_min": 1218
|
|
},
|
|
{
|
|
"epoch": 0.5847016419703644,
|
|
"grad_norm": 0.7685762079336828,
|
|
"learning_rate": 3.996754422615023e-05,
|
|
"loss": 0.5213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1331164836883545,
|
|
"step": 365,
|
|
"valid_targets_mean": 2525.8,
|
|
"valid_targets_min": 1524
|
|
},
|
|
{
|
|
"epoch": 0.5927112535042051,
|
|
"grad_norm": 0.8222133849094888,
|
|
"learning_rate": 3.996087057178411e-05,
|
|
"loss": 0.5252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.120429128408432,
|
|
"step": 370,
|
|
"valid_targets_mean": 1936.8,
|
|
"valid_targets_min": 987
|
|
},
|
|
{
|
|
"epoch": 0.6007208650380457,
|
|
"grad_norm": 0.7537570438948556,
|
|
"learning_rate": 3.995357406167856e-05,
|
|
"loss": 0.5467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08994294703006744,
|
|
"step": 375,
|
|
"valid_targets_mean": 1548.0,
|
|
"valid_targets_min": 1169
|
|
},
|
|
{
|
|
"epoch": 0.6087304765718863,
|
|
"grad_norm": 0.699698039067041,
|
|
"learning_rate": 3.994565492351267e-05,
|
|
"loss": 0.5521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09777212142944336,
|
|
"step": 380,
|
|
"valid_targets_mean": 1888.8,
|
|
"valid_targets_min": 471
|
|
},
|
|
{
|
|
"epoch": 0.6167400881057269,
|
|
"grad_norm": 0.9159463193411346,
|
|
"learning_rate": 3.993711340439394e-05,
|
|
"loss": 0.5714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1483481228351593,
|
|
"step": 385,
|
|
"valid_targets_mean": 1614.0,
|
|
"valid_targets_min": 1137
|
|
},
|
|
{
|
|
"epoch": 0.6247496996395675,
|
|
"grad_norm": 0.8301734628985116,
|
|
"learning_rate": 3.9927949770850535e-05,
|
|
"loss": 0.5509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1278407871723175,
|
|
"step": 390,
|
|
"valid_targets_mean": 1651.2,
|
|
"valid_targets_min": 1298
|
|
},
|
|
{
|
|
"epoch": 0.6327593111734081,
|
|
"grad_norm": 0.8527521853162238,
|
|
"learning_rate": 3.991816430882297e-05,
|
|
"loss": 0.5728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09603852033615112,
|
|
"step": 395,
|
|
"valid_targets_mean": 1275.0,
|
|
"valid_targets_min": 1061
|
|
},
|
|
{
|
|
"epoch": 0.6407689227072487,
|
|
"grad_norm": 0.8414754998739568,
|
|
"learning_rate": 3.9907757323655206e-05,
|
|
"loss": 0.568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15311065316200256,
|
|
"step": 400,
|
|
"valid_targets_mean": 2261.0,
|
|
"valid_targets_min": 1181
|
|
},
|
|
{
|
|
"epoch": 0.6487785342410893,
|
|
"grad_norm": 0.8277792024831763,
|
|
"learning_rate": 3.98967291400851e-05,
|
|
"loss": 0.5683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14796672761440277,
|
|
"step": 405,
|
|
"valid_targets_mean": 1702.5,
|
|
"valid_targets_min": 1219
|
|
},
|
|
{
|
|
"epoch": 0.6567881457749299,
|
|
"grad_norm": 0.7634008706047728,
|
|
"learning_rate": 3.98850801022343e-05,
|
|
"loss": 0.5594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1123025044798851,
|
|
"step": 410,
|
|
"valid_targets_mean": 2216.8,
|
|
"valid_targets_min": 943
|
|
},
|
|
{
|
|
"epoch": 0.6647977573087706,
|
|
"grad_norm": 0.7163659116552932,
|
|
"learning_rate": 3.987281057359746e-05,
|
|
"loss": 0.557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09656307846307755,
|
|
"step": 415,
|
|
"valid_targets_mean": 1605.5,
|
|
"valid_targets_min": 1114
|
|
},
|
|
{
|
|
"epoch": 0.6728073688426112,
|
|
"grad_norm": 0.8287714493864086,
|
|
"learning_rate": 3.985992093703096e-05,
|
|
"loss": 0.5517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16317743062973022,
|
|
"step": 420,
|
|
"valid_targets_mean": 2072.8,
|
|
"valid_targets_min": 838
|
|
},
|
|
{
|
|
"epoch": 0.6808169803764518,
|
|
"grad_norm": 0.8636810924003531,
|
|
"learning_rate": 3.98464115947409e-05,
|
|
"loss": 0.5803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13711334764957428,
|
|
"step": 425,
|
|
"valid_targets_mean": 1854.5,
|
|
"valid_targets_min": 1194
|
|
},
|
|
{
|
|
"epoch": 0.6888265919102924,
|
|
"grad_norm": 0.8278214363356415,
|
|
"learning_rate": 3.9832282968270595e-05,
|
|
"loss": 0.5195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.142492413520813,
|
|
"step": 430,
|
|
"valid_targets_mean": 1483.2,
|
|
"valid_targets_min": 1123
|
|
},
|
|
{
|
|
"epoch": 0.696836203444133,
|
|
"grad_norm": 0.8164741678197476,
|
|
"learning_rate": 3.9817535498487385e-05,
|
|
"loss": 0.544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14797070622444153,
|
|
"step": 435,
|
|
"valid_targets_mean": 2120.8,
|
|
"valid_targets_min": 1292
|
|
},
|
|
{
|
|
"epoch": 0.7048458149779736,
|
|
"grad_norm": 0.8495515388005855,
|
|
"learning_rate": 3.980216964556892e-05,
|
|
"loss": 0.5663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23450730741024017,
|
|
"step": 440,
|
|
"valid_targets_mean": 2185.5,
|
|
"valid_targets_min": 985
|
|
},
|
|
{
|
|
"epoch": 0.7128554265118142,
|
|
"grad_norm": 0.878402828181315,
|
|
"learning_rate": 3.978618588898873e-05,
|
|
"loss": 0.5732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09538668394088745,
|
|
"step": 445,
|
|
"valid_targets_mean": 1273.0,
|
|
"valid_targets_min": 854
|
|
},
|
|
{
|
|
"epoch": 0.7208650380456548,
|
|
"grad_norm": 0.8933161814235442,
|
|
"learning_rate": 3.976958472750137e-05,
|
|
"loss": 0.5465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12326841056346893,
|
|
"step": 450,
|
|
"valid_targets_mean": 1165.2,
|
|
"valid_targets_min": 1051
|
|
},
|
|
{
|
|
"epoch": 0.7288746495794954,
|
|
"grad_norm": 0.6783810157909197,
|
|
"learning_rate": 3.9752366679126754e-05,
|
|
"loss": 0.5309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07751099765300751,
|
|
"step": 455,
|
|
"valid_targets_mean": 1887.8,
|
|
"valid_targets_min": 1076
|
|
},
|
|
{
|
|
"epoch": 0.736884261113336,
|
|
"grad_norm": 0.7699873120100638,
|
|
"learning_rate": 3.973453228113405e-05,
|
|
"loss": 0.5663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1369703710079193,
|
|
"step": 460,
|
|
"valid_targets_mean": 1764.0,
|
|
"valid_targets_min": 1232
|
|
},
|
|
{
|
|
"epoch": 0.7448938726471767,
|
|
"grad_norm": 0.8853422949675895,
|
|
"learning_rate": 3.971608209002489e-05,
|
|
"loss": 0.5489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13540863990783691,
|
|
"step": 465,
|
|
"valid_targets_mean": 1403.2,
|
|
"valid_targets_min": 745
|
|
},
|
|
{
|
|
"epoch": 0.7529034841810173,
|
|
"grad_norm": 0.44655508883118616,
|
|
"learning_rate": 3.969701668151603e-05,
|
|
"loss": 0.4561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09126046299934387,
|
|
"step": 470,
|
|
"valid_targets_mean": 6643.5,
|
|
"valid_targets_min": 5946
|
|
},
|
|
{
|
|
"epoch": 0.7609130957148579,
|
|
"grad_norm": 0.5078199385340368,
|
|
"learning_rate": 3.9677336650521336e-05,
|
|
"loss": 0.3063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0833105817437172,
|
|
"step": 475,
|
|
"valid_targets_mean": 5136.0,
|
|
"valid_targets_min": 3229
|
|
},
|
|
{
|
|
"epoch": 0.7689227072486985,
|
|
"grad_norm": 0.5021395222355512,
|
|
"learning_rate": 3.9657042611133294e-05,
|
|
"loss": 0.3397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09859202802181244,
|
|
"step": 480,
|
|
"valid_targets_mean": 5650.8,
|
|
"valid_targets_min": 3530
|
|
},
|
|
{
|
|
"epoch": 0.7769323187825391,
|
|
"grad_norm": 0.4883558771934247,
|
|
"learning_rate": 3.963613519660379e-05,
|
|
"loss": 0.3538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07809653878211975,
|
|
"step": 485,
|
|
"valid_targets_mean": 4949.2,
|
|
"valid_targets_min": 4191
|
|
},
|
|
{
|
|
"epoch": 0.7849419303163797,
|
|
"grad_norm": 0.5020629163058918,
|
|
"learning_rate": 3.961461505932435e-05,
|
|
"loss": 0.3542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0639006495475769,
|
|
"step": 490,
|
|
"valid_targets_mean": 3846.0,
|
|
"valid_targets_min": 2321
|
|
},
|
|
{
|
|
"epoch": 0.7929515418502202,
|
|
"grad_norm": 0.4727270622396367,
|
|
"learning_rate": 3.959248287080583e-05,
|
|
"loss": 0.33,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.057286497205495834,
|
|
"step": 495,
|
|
"valid_targets_mean": 4102.8,
|
|
"valid_targets_min": 2515
|
|
},
|
|
{
|
|
"epoch": 0.8009611533840608,
|
|
"grad_norm": 0.41752348597532946,
|
|
"learning_rate": 3.9569739321657416e-05,
|
|
"loss": 0.3767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1293354481458664,
|
|
"step": 500,
|
|
"valid_targets_mean": 7856.2,
|
|
"valid_targets_min": 3551
|
|
},
|
|
{
|
|
"epoch": 0.8089707649179014,
|
|
"grad_norm": 0.4343725347376789,
|
|
"learning_rate": 3.9546385121565095e-05,
|
|
"loss": 0.3302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07614593207836151,
|
|
"step": 505,
|
|
"valid_targets_mean": 5124.0,
|
|
"valid_targets_min": 2097
|
|
},
|
|
{
|
|
"epoch": 0.816980376451742,
|
|
"grad_norm": 0.3920375897840942,
|
|
"learning_rate": 3.952242099926951e-05,
|
|
"loss": 0.3849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09289588779211044,
|
|
"step": 510,
|
|
"valid_targets_mean": 6862.0,
|
|
"valid_targets_min": 3586
|
|
},
|
|
{
|
|
"epoch": 0.8249899879855827,
|
|
"grad_norm": 0.4957796209542788,
|
|
"learning_rate": 3.9497847702543196e-05,
|
|
"loss": 0.372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14882716536521912,
|
|
"step": 515,
|
|
"valid_targets_mean": 7773.0,
|
|
"valid_targets_min": 5273
|
|
},
|
|
{
|
|
"epoch": 0.8329995995194233,
|
|
"grad_norm": 0.397926379405522,
|
|
"learning_rate": 3.94726659981673e-05,
|
|
"loss": 0.3619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13054664433002472,
|
|
"step": 520,
|
|
"valid_targets_mean": 7539.5,
|
|
"valid_targets_min": 4721
|
|
},
|
|
{
|
|
"epoch": 0.8410092110532639,
|
|
"grad_norm": 0.4698789118508684,
|
|
"learning_rate": 3.94468766719076e-05,
|
|
"loss": 0.3448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07119232416152954,
|
|
"step": 525,
|
|
"valid_targets_mean": 5395.0,
|
|
"valid_targets_min": 4460
|
|
},
|
|
{
|
|
"epoch": 0.8490188225871045,
|
|
"grad_norm": 0.44083859758639976,
|
|
"learning_rate": 3.942048052849001e-05,
|
|
"loss": 0.3547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07542470097541809,
|
|
"step": 530,
|
|
"valid_targets_mean": 5397.8,
|
|
"valid_targets_min": 4366
|
|
},
|
|
{
|
|
"epoch": 0.8570284341209451,
|
|
"grad_norm": 0.4064353929780661,
|
|
"learning_rate": 3.939347839157548e-05,
|
|
"loss": 0.3307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0994889885187149,
|
|
"step": 535,
|
|
"valid_targets_mean": 6880.8,
|
|
"valid_targets_min": 3749
|
|
},
|
|
{
|
|
"epoch": 0.8650380456547857,
|
|
"grad_norm": 0.43492108039142363,
|
|
"learning_rate": 3.9365871103734264e-05,
|
|
"loss": 0.3225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0719805359840393,
|
|
"step": 540,
|
|
"valid_targets_mean": 4727.8,
|
|
"valid_targets_min": 2789
|
|
},
|
|
{
|
|
"epoch": 0.8730476571886263,
|
|
"grad_norm": 0.4116445882010372,
|
|
"learning_rate": 3.933765952641965e-05,
|
|
"loss": 0.2924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0633612796664238,
|
|
"step": 545,
|
|
"valid_targets_mean": 4575.0,
|
|
"valid_targets_min": 2232
|
|
},
|
|
{
|
|
"epoch": 0.8810572687224669,
|
|
"grad_norm": 0.3855189568502111,
|
|
"learning_rate": 3.930884453994109e-05,
|
|
"loss": 0.3117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07632459700107574,
|
|
"step": 550,
|
|
"valid_targets_mean": 6189.2,
|
|
"valid_targets_min": 4692
|
|
},
|
|
{
|
|
"epoch": 0.8890668802563075,
|
|
"grad_norm": 0.42830818087488276,
|
|
"learning_rate": 3.9279427043436706e-05,
|
|
"loss": 0.3388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05154121667146683,
|
|
"step": 555,
|
|
"valid_targets_mean": 4232.2,
|
|
"valid_targets_min": 2376
|
|
},
|
|
{
|
|
"epoch": 0.8970764917901481,
|
|
"grad_norm": 0.44086980844757234,
|
|
"learning_rate": 3.924940795484525e-05,
|
|
"loss": 0.3752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04488445445895195,
|
|
"step": 560,
|
|
"valid_targets_mean": 4608.8,
|
|
"valid_targets_min": 3264
|
|
},
|
|
{
|
|
"epoch": 0.9050861033239888,
|
|
"grad_norm": 0.44901611078209913,
|
|
"learning_rate": 3.9218788210877436e-05,
|
|
"loss": 0.3344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08685051649808884,
|
|
"step": 565,
|
|
"valid_targets_mean": 5691.5,
|
|
"valid_targets_min": 4828
|
|
},
|
|
{
|
|
"epoch": 0.9130957148578294,
|
|
"grad_norm": 0.5317518227054663,
|
|
"learning_rate": 3.918756876698676e-05,
|
|
"loss": 0.3462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11217975616455078,
|
|
"step": 570,
|
|
"valid_targets_mean": 5483.0,
|
|
"valid_targets_min": 4351
|
|
},
|
|
{
|
|
"epoch": 0.92110532639167,
|
|
"grad_norm": 0.45017655796610734,
|
|
"learning_rate": 3.9155750597339634e-05,
|
|
"loss": 0.3186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05694647133350372,
|
|
"step": 575,
|
|
"valid_targets_mean": 3804.2,
|
|
"valid_targets_min": 2359
|
|
},
|
|
{
|
|
"epoch": 0.9291149379255106,
|
|
"grad_norm": 0.4962053716194564,
|
|
"learning_rate": 3.912333469478502e-05,
|
|
"loss": 0.4225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06532659381628036,
|
|
"step": 580,
|
|
"valid_targets_mean": 3795.5,
|
|
"valid_targets_min": 2035
|
|
},
|
|
{
|
|
"epoch": 0.9371245494593512,
|
|
"grad_norm": 0.40043319267184296,
|
|
"learning_rate": 3.909032207082344e-05,
|
|
"loss": 0.3832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1120402067899704,
|
|
"step": 585,
|
|
"valid_targets_mean": 6354.0,
|
|
"valid_targets_min": 4890
|
|
},
|
|
{
|
|
"epoch": 0.9451341609931918,
|
|
"grad_norm": 0.4154947705040722,
|
|
"learning_rate": 3.90567137555754e-05,
|
|
"loss": 0.3439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08378976583480835,
|
|
"step": 590,
|
|
"valid_targets_mean": 6546.8,
|
|
"valid_targets_min": 2609
|
|
},
|
|
{
|
|
"epoch": 0.9531437725270324,
|
|
"grad_norm": 0.4132102301128767,
|
|
"learning_rate": 3.9022510797749286e-05,
|
|
"loss": 0.2978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05636051297187805,
|
|
"step": 595,
|
|
"valid_targets_mean": 4817.8,
|
|
"valid_targets_min": 3153
|
|
},
|
|
{
|
|
"epoch": 0.961153384060873,
|
|
"grad_norm": 0.467721089889869,
|
|
"learning_rate": 3.898771426460859e-05,
|
|
"loss": 0.2719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03947315365076065,
|
|
"step": 600,
|
|
"valid_targets_mean": 2771.0,
|
|
"valid_targets_min": 2295
|
|
},
|
|
{
|
|
"epoch": 0.9691629955947136,
|
|
"grad_norm": 0.4114945150121572,
|
|
"learning_rate": 3.8952325241938635e-05,
|
|
"loss": 0.2591,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05305478349328041,
|
|
"step": 605,
|
|
"valid_targets_mean": 4317.0,
|
|
"valid_targets_min": 3829
|
|
},
|
|
{
|
|
"epoch": 0.9771726071285542,
|
|
"grad_norm": 0.4127210926839282,
|
|
"learning_rate": 3.8916344834012695e-05,
|
|
"loss": 0.3369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08185027539730072,
|
|
"step": 610,
|
|
"valid_targets_mean": 6788.8,
|
|
"valid_targets_min": 5244
|
|
},
|
|
{
|
|
"epoch": 0.9851822186623949,
|
|
"grad_norm": 0.36656119473928644,
|
|
"learning_rate": 3.887977416355754e-05,
|
|
"loss": 0.3219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.090815469622612,
|
|
"step": 615,
|
|
"valid_targets_mean": 7649.8,
|
|
"valid_targets_min": 6890
|
|
},
|
|
{
|
|
"epoch": 0.9931918301962355,
|
|
"grad_norm": 0.3656087584839383,
|
|
"learning_rate": 3.884261437171838e-05,
|
|
"loss": 0.2945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07569041103124619,
|
|
"step": 620,
|
|
"valid_targets_mean": 6744.8,
|
|
"valid_targets_min": 3459
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"grad_norm": 0.3778984159114088,
|
|
"learning_rate": 3.8804866618023284e-05,
|
|
"loss": 0.2984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3383905291557312,
|
|
"step": 625,
|
|
"valid_targets_mean": 6467.2,
|
|
"valid_targets_min": 3371
|
|
},
|
|
{
|
|
"epoch": 1.0080096115338406,
|
|
"grad_norm": 0.6540415801457766,
|
|
"learning_rate": 3.876653208034698e-05,
|
|
"loss": 0.5045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14651647210121155,
|
|
"step": 630,
|
|
"valid_targets_mean": 4098.5,
|
|
"valid_targets_min": 1895
|
|
},
|
|
{
|
|
"epoch": 1.0160192230676812,
|
|
"grad_norm": 0.670491013973996,
|
|
"learning_rate": 3.8727611954874114e-05,
|
|
"loss": 0.4707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08398725092411041,
|
|
"step": 635,
|
|
"valid_targets_mean": 2511.5,
|
|
"valid_targets_min": 950
|
|
},
|
|
{
|
|
"epoch": 1.0240288346015218,
|
|
"grad_norm": 0.6697280920586458,
|
|
"learning_rate": 3.8688107456061904e-05,
|
|
"loss": 0.4665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0767315998673439,
|
|
"step": 640,
|
|
"valid_targets_mean": 2126.5,
|
|
"valid_targets_min": 1635
|
|
},
|
|
{
|
|
"epoch": 1.0320384461353624,
|
|
"grad_norm": 0.5480779649177725,
|
|
"learning_rate": 3.864801981660227e-05,
|
|
"loss": 0.4398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06988725066184998,
|
|
"step": 645,
|
|
"valid_targets_mean": 2022.2,
|
|
"valid_targets_min": 1407
|
|
},
|
|
{
|
|
"epoch": 1.040048057669203,
|
|
"grad_norm": 1.5276225140316455,
|
|
"learning_rate": 3.860735028738337e-05,
|
|
"loss": 0.4399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12660488486289978,
|
|
"step": 650,
|
|
"valid_targets_mean": 3344.0,
|
|
"valid_targets_min": 2137
|
|
},
|
|
{
|
|
"epoch": 1.0480576692030437,
|
|
"grad_norm": 0.5607461303631824,
|
|
"learning_rate": 3.856610013745051e-05,
|
|
"loss": 0.4441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11083018779754639,
|
|
"step": 655,
|
|
"valid_targets_mean": 3821.8,
|
|
"valid_targets_min": 1308
|
|
},
|
|
{
|
|
"epoch": 1.0560672807368843,
|
|
"grad_norm": 0.6148964798902606,
|
|
"learning_rate": 3.852427065396665e-05,
|
|
"loss": 0.4323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10052657127380371,
|
|
"step": 660,
|
|
"valid_targets_mean": 2872.0,
|
|
"valid_targets_min": 1665
|
|
},
|
|
{
|
|
"epoch": 1.0640768922707249,
|
|
"grad_norm": 0.6025272441927578,
|
|
"learning_rate": 3.848186314217213e-05,
|
|
"loss": 0.4469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1355205774307251,
|
|
"step": 665,
|
|
"valid_targets_mean": 4220.0,
|
|
"valid_targets_min": 3191
|
|
},
|
|
{
|
|
"epoch": 1.0720865038045655,
|
|
"grad_norm": 0.5398610129881493,
|
|
"learning_rate": 3.843887892534402e-05,
|
|
"loss": 0.4252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11434533447027206,
|
|
"step": 670,
|
|
"valid_targets_mean": 4791.2,
|
|
"valid_targets_min": 4097
|
|
},
|
|
{
|
|
"epoch": 1.080096115338406,
|
|
"grad_norm": 0.5485960316575847,
|
|
"learning_rate": 3.8395319344754776e-05,
|
|
"loss": 0.4291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09059801697731018,
|
|
"step": 675,
|
|
"valid_targets_mean": 5747.0,
|
|
"valid_targets_min": 2284
|
|
},
|
|
{
|
|
"epoch": 1.0881057268722467,
|
|
"grad_norm": 0.468736374374983,
|
|
"learning_rate": 3.8351185759630435e-05,
|
|
"loss": 0.3991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12052180618047714,
|
|
"step": 680,
|
|
"valid_targets_mean": 3851.0,
|
|
"valid_targets_min": 2391
|
|
},
|
|
{
|
|
"epoch": 1.0961153384060873,
|
|
"grad_norm": 0.5075671388343633,
|
|
"learning_rate": 3.830647954710816e-05,
|
|
"loss": 0.4347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12471823394298553,
|
|
"step": 685,
|
|
"valid_targets_mean": 3773.2,
|
|
"valid_targets_min": 3192
|
|
},
|
|
{
|
|
"epoch": 1.104124949939928,
|
|
"grad_norm": 0.542371825724921,
|
|
"learning_rate": 3.826120210219331e-05,
|
|
"loss": 0.3976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09522342681884766,
|
|
"step": 690,
|
|
"valid_targets_mean": 3683.5,
|
|
"valid_targets_min": 1911
|
|
},
|
|
{
|
|
"epoch": 1.1121345614737685,
|
|
"grad_norm": 0.5740717370073322,
|
|
"learning_rate": 3.8215354837715836e-05,
|
|
"loss": 0.4006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08507896959781647,
|
|
"step": 695,
|
|
"valid_targets_mean": 1953.5,
|
|
"valid_targets_min": 1611
|
|
},
|
|
{
|
|
"epoch": 1.1201441730076092,
|
|
"grad_norm": 0.6219703005275402,
|
|
"learning_rate": 3.816893918428631e-05,
|
|
"loss": 0.4012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1159299984574318,
|
|
"step": 700,
|
|
"valid_targets_mean": 3450.8,
|
|
"valid_targets_min": 1888
|
|
},
|
|
{
|
|
"epoch": 1.1281537845414498,
|
|
"grad_norm": 0.5137511597239844,
|
|
"learning_rate": 3.8121956590251153e-05,
|
|
"loss": 0.3668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11531707644462585,
|
|
"step": 705,
|
|
"valid_targets_mean": 3919.8,
|
|
"valid_targets_min": 1893
|
|
},
|
|
{
|
|
"epoch": 1.1361633960752904,
|
|
"grad_norm": 0.5316121477902497,
|
|
"learning_rate": 3.8074408521647576e-05,
|
|
"loss": 0.4024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09123504161834717,
|
|
"step": 710,
|
|
"valid_targets_mean": 2428.2,
|
|
"valid_targets_min": 1300
|
|
},
|
|
{
|
|
"epoch": 1.144173007609131,
|
|
"grad_norm": 0.6440911348167736,
|
|
"learning_rate": 3.802629646215771e-05,
|
|
"loss": 0.3939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09770418703556061,
|
|
"step": 715,
|
|
"valid_targets_mean": 3008.2,
|
|
"valid_targets_min": 1018
|
|
},
|
|
{
|
|
"epoch": 1.1521826191429716,
|
|
"grad_norm": 0.5830717340810724,
|
|
"learning_rate": 3.79776219130624e-05,
|
|
"loss": 0.3789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08197665214538574,
|
|
"step": 720,
|
|
"valid_targets_mean": 2879.5,
|
|
"valid_targets_min": 1639
|
|
},
|
|
{
|
|
"epoch": 1.1601922306768122,
|
|
"grad_norm": 0.659966822814018,
|
|
"learning_rate": 3.792838639319431e-05,
|
|
"loss": 0.4055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07291534543037415,
|
|
"step": 725,
|
|
"valid_targets_mean": 2469.2,
|
|
"valid_targets_min": 2060
|
|
},
|
|
{
|
|
"epoch": 1.1682018422106528,
|
|
"grad_norm": 0.5267377530665949,
|
|
"learning_rate": 3.787859143889054e-05,
|
|
"loss": 0.3711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08884596824645996,
|
|
"step": 730,
|
|
"valid_targets_mean": 3259.2,
|
|
"valid_targets_min": 1365
|
|
},
|
|
{
|
|
"epoch": 1.1762114537444934,
|
|
"grad_norm": 0.5989583064517962,
|
|
"learning_rate": 3.782823860394469e-05,
|
|
"loss": 0.3745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06123165041208267,
|
|
"step": 735,
|
|
"valid_targets_mean": 2490.2,
|
|
"valid_targets_min": 1679
|
|
},
|
|
{
|
|
"epoch": 1.184221065278334,
|
|
"grad_norm": 0.6087995893117895,
|
|
"learning_rate": 3.777732945955841e-05,
|
|
"loss": 0.3604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08103024959564209,
|
|
"step": 740,
|
|
"valid_targets_mean": 4032.2,
|
|
"valid_targets_min": 1439
|
|
},
|
|
{
|
|
"epoch": 1.1922306768121747,
|
|
"grad_norm": 0.571201837533488,
|
|
"learning_rate": 3.772586559429229e-05,
|
|
"loss": 0.3821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11829707026481628,
|
|
"step": 745,
|
|
"valid_targets_mean": 3729.0,
|
|
"valid_targets_min": 2157
|
|
},
|
|
{
|
|
"epoch": 1.2002402883460153,
|
|
"grad_norm": 0.5726585223379813,
|
|
"learning_rate": 3.767384861401636e-05,
|
|
"loss": 0.3573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09119193255901337,
|
|
"step": 750,
|
|
"valid_targets_mean": 3270.8,
|
|
"valid_targets_min": 2318
|
|
},
|
|
{
|
|
"epoch": 1.2082498998798559,
|
|
"grad_norm": 0.5276392791546647,
|
|
"learning_rate": 3.762128014185998e-05,
|
|
"loss": 0.3413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06674891710281372,
|
|
"step": 755,
|
|
"valid_targets_mean": 3178.5,
|
|
"valid_targets_min": 824
|
|
},
|
|
{
|
|
"epoch": 1.2162595114136965,
|
|
"grad_norm": 0.5540802870692201,
|
|
"learning_rate": 3.7568161818161135e-05,
|
|
"loss": 0.3206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.056200817227363586,
|
|
"step": 760,
|
|
"valid_targets_mean": 2994.8,
|
|
"valid_targets_min": 1210
|
|
},
|
|
{
|
|
"epoch": 1.224269122947537,
|
|
"grad_norm": 0.5013881756954743,
|
|
"learning_rate": 3.751449530041532e-05,
|
|
"loss": 0.3266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0877077728509903,
|
|
"step": 765,
|
|
"valid_targets_mean": 4503.5,
|
|
"valid_targets_min": 1548
|
|
},
|
|
{
|
|
"epoch": 1.2322787344813777,
|
|
"grad_norm": 0.5470496967715482,
|
|
"learning_rate": 3.7460282263223764e-05,
|
|
"loss": 0.3229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08452261984348297,
|
|
"step": 770,
|
|
"valid_targets_mean": 2806.2,
|
|
"valid_targets_min": 1188
|
|
},
|
|
{
|
|
"epoch": 1.2402883460152183,
|
|
"grad_norm": 0.47823144060020073,
|
|
"learning_rate": 3.740552439824122e-05,
|
|
"loss": 0.325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06890635192394257,
|
|
"step": 775,
|
|
"valid_targets_mean": 4892.8,
|
|
"valid_targets_min": 2500
|
|
},
|
|
{
|
|
"epoch": 1.248297957549059,
|
|
"grad_norm": 0.4670948376548824,
|
|
"learning_rate": 3.735022341412314e-05,
|
|
"loss": 0.3143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1324836015701294,
|
|
"step": 780,
|
|
"valid_targets_mean": 6856.2,
|
|
"valid_targets_min": 2058
|
|
},
|
|
{
|
|
"epoch": 1.2563075690828995,
|
|
"grad_norm": 0.33391870396212114,
|
|
"learning_rate": 3.7294381036472386e-05,
|
|
"loss": 0.2145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04633893817663193,
|
|
"step": 785,
|
|
"valid_targets_mean": 5445.0,
|
|
"valid_targets_min": 2569
|
|
},
|
|
{
|
|
"epoch": 1.2643171806167401,
|
|
"grad_norm": 0.3824691764570469,
|
|
"learning_rate": 3.723799900778538e-05,
|
|
"loss": 0.2093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06105303764343262,
|
|
"step": 790,
|
|
"valid_targets_mean": 6826.8,
|
|
"valid_targets_min": 4640
|
|
},
|
|
{
|
|
"epoch": 1.2723267921505808,
|
|
"grad_norm": 0.34319355821154224,
|
|
"learning_rate": 3.7181079087397705e-05,
|
|
"loss": 0.1842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.038283996284008026,
|
|
"step": 795,
|
|
"valid_targets_mean": 7376.2,
|
|
"valid_targets_min": 6135
|
|
},
|
|
{
|
|
"epoch": 1.2803364036844214,
|
|
"grad_norm": 0.31316708165946167,
|
|
"learning_rate": 3.712362305142926e-05,
|
|
"loss": 0.1788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.048212647438049316,
|
|
"step": 800,
|
|
"valid_targets_mean": 6002.2,
|
|
"valid_targets_min": 3847
|
|
},
|
|
{
|
|
"epoch": 1.288346015218262,
|
|
"grad_norm": 0.3187211195978479,
|
|
"learning_rate": 3.706563269272878e-05,
|
|
"loss": 0.1847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.045798733830451965,
|
|
"step": 805,
|
|
"valid_targets_mean": 7146.2,
|
|
"valid_targets_min": 5036
|
|
},
|
|
{
|
|
"epoch": 1.2963556267521026,
|
|
"grad_norm": 0.3097544299041166,
|
|
"learning_rate": 3.700710982081794e-05,
|
|
"loss": 0.1803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06357955932617188,
|
|
"step": 810,
|
|
"valid_targets_mean": 8201.5,
|
|
"valid_targets_min": 6022
|
|
},
|
|
{
|
|
"epoch": 1.3043652382859432,
|
|
"grad_norm": 0.4001263441869011,
|
|
"learning_rate": 3.694805626183486e-05,
|
|
"loss": 0.1843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04611557722091675,
|
|
"step": 815,
|
|
"valid_targets_mean": 4982.8,
|
|
"valid_targets_min": 1508
|
|
},
|
|
{
|
|
"epoch": 1.3123748498197838,
|
|
"grad_norm": 0.3508650587920551,
|
|
"learning_rate": 3.688847385847711e-05,
|
|
"loss": 0.1707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04423719644546509,
|
|
"step": 820,
|
|
"valid_targets_mean": 6249.8,
|
|
"valid_targets_min": 3601
|
|
},
|
|
{
|
|
"epoch": 1.3203844613536244,
|
|
"grad_norm": 0.31301461756597554,
|
|
"learning_rate": 3.682836446994428e-05,
|
|
"loss": 0.1741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.053104691207408905,
|
|
"step": 825,
|
|
"valid_targets_mean": 7850.5,
|
|
"valid_targets_min": 3289
|
|
},
|
|
{
|
|
"epoch": 1.328394072887465,
|
|
"grad_norm": 0.4426008548210674,
|
|
"learning_rate": 3.676772997187989e-05,
|
|
"loss": 0.1846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05142742022871971,
|
|
"step": 830,
|
|
"valid_targets_mean": 6368.5,
|
|
"valid_targets_min": 4071
|
|
},
|
|
{
|
|
"epoch": 1.3364036844213056,
|
|
"grad_norm": 0.31870299327242313,
|
|
"learning_rate": 3.670657225631289e-05,
|
|
"loss": 0.1788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0418720543384552,
|
|
"step": 835,
|
|
"valid_targets_mean": 7169.2,
|
|
"valid_targets_min": 5138
|
|
},
|
|
{
|
|
"epoch": 1.3444132959551462,
|
|
"grad_norm": 0.3332886650006749,
|
|
"learning_rate": 3.6644893231598635e-05,
|
|
"loss": 0.1668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05265376716852188,
|
|
"step": 840,
|
|
"valid_targets_mean": 6420.8,
|
|
"valid_targets_min": 2320
|
|
},
|
|
{
|
|
"epoch": 1.3524229074889869,
|
|
"grad_norm": 0.3668485685901626,
|
|
"learning_rate": 3.658269482235932e-05,
|
|
"loss": 0.168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04361281543970108,
|
|
"step": 845,
|
|
"valid_targets_mean": 6447.0,
|
|
"valid_targets_min": 5443
|
|
},
|
|
{
|
|
"epoch": 1.3604325190228275,
|
|
"grad_norm": 0.3892622669877241,
|
|
"learning_rate": 3.651997896942394e-05,
|
|
"loss": 0.1654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.038852378726005554,
|
|
"step": 850,
|
|
"valid_targets_mean": 4346.2,
|
|
"valid_targets_min": 2941
|
|
},
|
|
{
|
|
"epoch": 1.368442130556668,
|
|
"grad_norm": 0.3570410574615469,
|
|
"learning_rate": 3.645674762976769e-05,
|
|
"loss": 0.1611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05695415288209915,
|
|
"step": 855,
|
|
"valid_targets_mean": 6363.5,
|
|
"valid_targets_min": 4955
|
|
},
|
|
{
|
|
"epoch": 1.3764517420905087,
|
|
"grad_norm": 0.36785715621365156,
|
|
"learning_rate": 3.639300277645096e-05,
|
|
"loss": 0.1696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03323707729578018,
|
|
"step": 860,
|
|
"valid_targets_mean": 3673.2,
|
|
"valid_targets_min": 2303
|
|
},
|
|
{
|
|
"epoch": 1.3844613536243493,
|
|
"grad_norm": 0.3615896328586642,
|
|
"learning_rate": 3.6328746398557715e-05,
|
|
"loss": 0.1566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.021509967744350433,
|
|
"step": 865,
|
|
"valid_targets_mean": 4228.0,
|
|
"valid_targets_min": 2085
|
|
},
|
|
{
|
|
"epoch": 1.39247096515819,
|
|
"grad_norm": 0.3411305058278392,
|
|
"learning_rate": 3.6263980501133466e-05,
|
|
"loss": 0.1624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.044665221124887466,
|
|
"step": 870,
|
|
"valid_targets_mean": 5465.5,
|
|
"valid_targets_min": 3720
|
|
},
|
|
{
|
|
"epoch": 1.4004805766920305,
|
|
"grad_norm": 0.40543952314948417,
|
|
"learning_rate": 3.619870710512268e-05,
|
|
"loss": 0.1575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04113004356622696,
|
|
"step": 875,
|
|
"valid_targets_mean": 4442.2,
|
|
"valid_targets_min": 3149
|
|
},
|
|
{
|
|
"epoch": 1.408490188225871,
|
|
"grad_norm": 0.36998373211659047,
|
|
"learning_rate": 3.6132928247305713e-05,
|
|
"loss": 0.1494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04625851660966873,
|
|
"step": 880,
|
|
"valid_targets_mean": 6434.2,
|
|
"valid_targets_min": 4692
|
|
},
|
|
{
|
|
"epoch": 1.4164997997597117,
|
|
"grad_norm": 0.5145814146263006,
|
|
"learning_rate": 3.60666459802353e-05,
|
|
"loss": 0.167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04299291968345642,
|
|
"step": 885,
|
|
"valid_targets_mean": 6182.2,
|
|
"valid_targets_min": 4171
|
|
},
|
|
{
|
|
"epoch": 1.4245094112935521,
|
|
"grad_norm": 0.37451151287524986,
|
|
"learning_rate": 3.599986237217245e-05,
|
|
"loss": 0.1689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.027214203029870987,
|
|
"step": 890,
|
|
"valid_targets_mean": 4245.0,
|
|
"valid_targets_min": 3545
|
|
},
|
|
{
|
|
"epoch": 1.432519022827393,
|
|
"grad_norm": 0.41814789527319035,
|
|
"learning_rate": 3.593257950702194e-05,
|
|
"loss": 0.1502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04474259540438652,
|
|
"step": 895,
|
|
"valid_targets_mean": 6278.5,
|
|
"valid_targets_min": 3918
|
|
},
|
|
{
|
|
"epoch": 1.4405286343612334,
|
|
"grad_norm": 0.3428465487447553,
|
|
"learning_rate": 3.586479948426728e-05,
|
|
"loss": 0.1506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04373568296432495,
|
|
"step": 900,
|
|
"valid_targets_mean": 6698.0,
|
|
"valid_targets_min": 5070
|
|
},
|
|
{
|
|
"epoch": 1.4485382458950742,
|
|
"grad_norm": 0.33563980324527376,
|
|
"learning_rate": 3.579652441890523e-05,
|
|
"loss": 0.1561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03625631332397461,
|
|
"step": 905,
|
|
"valid_targets_mean": 5043.5,
|
|
"valid_targets_min": 2590
|
|
},
|
|
{
|
|
"epoch": 1.4565478574289146,
|
|
"grad_norm": 0.3518876031868258,
|
|
"learning_rate": 3.572775644137974e-05,
|
|
"loss": 0.1433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.026623956859111786,
|
|
"step": 910,
|
|
"valid_targets_mean": 4594.5,
|
|
"valid_targets_min": 1557
|
|
},
|
|
{
|
|
"epoch": 1.4645574689627554,
|
|
"grad_norm": 0.30312239686656134,
|
|
"learning_rate": 3.5658497697515534e-05,
|
|
"loss": 0.1422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03494893014431,
|
|
"step": 915,
|
|
"valid_targets_mean": 7859.2,
|
|
"valid_targets_min": 4849
|
|
},
|
|
{
|
|
"epoch": 1.4725670804965958,
|
|
"grad_norm": 0.331985193897914,
|
|
"learning_rate": 3.558875034845113e-05,
|
|
"loss": 0.1438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.037756018340587616,
|
|
"step": 920,
|
|
"valid_targets_mean": 7530.8,
|
|
"valid_targets_min": 3861
|
|
},
|
|
{
|
|
"epoch": 1.4805766920304366,
|
|
"grad_norm": 0.3521284010779265,
|
|
"learning_rate": 3.551851657057139e-05,
|
|
"loss": 0.1555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02649506740272045,
|
|
"step": 925,
|
|
"valid_targets_mean": 4505.0,
|
|
"valid_targets_min": 2278
|
|
},
|
|
{
|
|
"epoch": 1.488586303564277,
|
|
"grad_norm": 0.35028382974942723,
|
|
"learning_rate": 3.544779855543963e-05,
|
|
"loss": 0.1335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02385680563747883,
|
|
"step": 930,
|
|
"valid_targets_mean": 4934.5,
|
|
"valid_targets_min": 2017
|
|
},
|
|
{
|
|
"epoch": 1.4965959150981178,
|
|
"grad_norm": 0.31886205961601105,
|
|
"learning_rate": 3.5376598509729226e-05,
|
|
"loss": 0.1448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04172584414482117,
|
|
"step": 935,
|
|
"valid_targets_mean": 6368.8,
|
|
"valid_targets_min": 2515
|
|
},
|
|
{
|
|
"epoch": 1.5046055266319582,
|
|
"grad_norm": 0.8471282489868537,
|
|
"learning_rate": 3.5304918655154754e-05,
|
|
"loss": 0.2606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0702909380197525,
|
|
"step": 940,
|
|
"valid_targets_mean": 1458.8,
|
|
"valid_targets_min": 751
|
|
},
|
|
{
|
|
"epoch": 1.512615138165799,
|
|
"grad_norm": 0.8671161699571933,
|
|
"learning_rate": 3.523276122840266e-05,
|
|
"loss": 0.2995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06322944164276123,
|
|
"step": 945,
|
|
"valid_targets_mean": 1471.5,
|
|
"valid_targets_min": 769
|
|
},
|
|
{
|
|
"epoch": 1.5206247496996395,
|
|
"grad_norm": 0.7773757858910911,
|
|
"learning_rate": 3.516012848106149e-05,
|
|
"loss": 0.3033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07790671288967133,
|
|
"step": 950,
|
|
"valid_targets_mean": 2936.8,
|
|
"valid_targets_min": 1206
|
|
},
|
|
{
|
|
"epoch": 1.5286343612334803,
|
|
"grad_norm": 0.8973199474923843,
|
|
"learning_rate": 3.5087022679551614e-05,
|
|
"loss": 0.3023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0557282492518425,
|
|
"step": 955,
|
|
"valid_targets_mean": 1157.2,
|
|
"valid_targets_min": 988
|
|
},
|
|
{
|
|
"epoch": 1.5366439727673207,
|
|
"grad_norm": 0.690015217683748,
|
|
"learning_rate": 3.5013446105054486e-05,
|
|
"loss": 0.3305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0893687903881073,
|
|
"step": 960,
|
|
"valid_targets_mean": 2150.8,
|
|
"valid_targets_min": 820
|
|
},
|
|
{
|
|
"epoch": 1.5446535843011615,
|
|
"grad_norm": 0.7339091055812118,
|
|
"learning_rate": 3.493940105344152e-05,
|
|
"loss": 0.2999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07210154831409454,
|
|
"step": 965,
|
|
"valid_targets_mean": 2603.5,
|
|
"valid_targets_min": 751
|
|
},
|
|
{
|
|
"epoch": 1.552663195835002,
|
|
"grad_norm": 0.7569751024456212,
|
|
"learning_rate": 3.4864889835202366e-05,
|
|
"loss": 0.3051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06868860870599747,
|
|
"step": 970,
|
|
"valid_targets_mean": 2382.8,
|
|
"valid_targets_min": 614
|
|
},
|
|
{
|
|
"epoch": 1.5606728073688427,
|
|
"grad_norm": 0.9601245376814217,
|
|
"learning_rate": 3.4789914775372905e-05,
|
|
"loss": 0.334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06296397745609283,
|
|
"step": 975,
|
|
"valid_targets_mean": 1733.0,
|
|
"valid_targets_min": 768
|
|
},
|
|
{
|
|
"epoch": 1.5686824189026831,
|
|
"grad_norm": 0.887417144137006,
|
|
"learning_rate": 3.471447821346264e-05,
|
|
"loss": 0.3472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0519888773560524,
|
|
"step": 980,
|
|
"valid_targets_mean": 1715.8,
|
|
"valid_targets_min": 704
|
|
},
|
|
{
|
|
"epoch": 1.576692030436524,
|
|
"grad_norm": 0.8142446228724348,
|
|
"learning_rate": 3.463858250338168e-05,
|
|
"loss": 0.3006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08111092448234558,
|
|
"step": 985,
|
|
"valid_targets_mean": 2182.5,
|
|
"valid_targets_min": 1218
|
|
},
|
|
{
|
|
"epoch": 1.5847016419703643,
|
|
"grad_norm": 0.7205618097281907,
|
|
"learning_rate": 3.4562230013367374e-05,
|
|
"loss": 0.2937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07731902599334717,
|
|
"step": 990,
|
|
"valid_targets_mean": 2525.8,
|
|
"valid_targets_min": 1524
|
|
},
|
|
{
|
|
"epoch": 1.5927112535042052,
|
|
"grad_norm": 0.7107631066283354,
|
|
"learning_rate": 3.448542312591032e-05,
|
|
"loss": 0.3095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.061693351715803146,
|
|
"step": 995,
|
|
"valid_targets_mean": 1936.8,
|
|
"valid_targets_min": 987
|
|
},
|
|
{
|
|
"epoch": 1.6007208650380456,
|
|
"grad_norm": 0.8053744888014461,
|
|
"learning_rate": 3.440816423768007e-05,
|
|
"loss": 0.3112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04471175745129585,
|
|
"step": 1000,
|
|
"valid_targets_mean": 1548.0,
|
|
"valid_targets_min": 1169
|
|
},
|
|
{
|
|
"epoch": 1.6087304765718864,
|
|
"grad_norm": 0.7869269711916793,
|
|
"learning_rate": 3.433045575945031e-05,
|
|
"loss": 0.303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05572526156902313,
|
|
"step": 1005,
|
|
"valid_targets_mean": 1888.8,
|
|
"valid_targets_min": 471
|
|
},
|
|
{
|
|
"epoch": 1.6167400881057268,
|
|
"grad_norm": 0.8745482042579645,
|
|
"learning_rate": 3.42523001160237e-05,
|
|
"loss": 0.3246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08253291249275208,
|
|
"step": 1010,
|
|
"valid_targets_mean": 1614.0,
|
|
"valid_targets_min": 1137
|
|
},
|
|
{
|
|
"epoch": 1.6247496996395676,
|
|
"grad_norm": 0.8199147187959038,
|
|
"learning_rate": 3.417369974615615e-05,
|
|
"loss": 0.2997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0696907788515091,
|
|
"step": 1015,
|
|
"valid_targets_mean": 1651.2,
|
|
"valid_targets_min": 1298
|
|
},
|
|
{
|
|
"epoch": 1.632759311173408,
|
|
"grad_norm": 0.8264543519517318,
|
|
"learning_rate": 3.409465710248074e-05,
|
|
"loss": 0.328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05947771668434143,
|
|
"step": 1020,
|
|
"valid_targets_mean": 1275.0,
|
|
"valid_targets_min": 1061
|
|
},
|
|
{
|
|
"epoch": 1.6407689227072488,
|
|
"grad_norm": 0.8173543212112805,
|
|
"learning_rate": 3.401517465143119e-05,
|
|
"loss": 0.2954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08402656018733978,
|
|
"step": 1025,
|
|
"valid_targets_mean": 2261.0,
|
|
"valid_targets_min": 1181
|
|
},
|
|
{
|
|
"epoch": 1.6487785342410892,
|
|
"grad_norm": 0.7615035506195951,
|
|
"learning_rate": 3.393525487316489e-05,
|
|
"loss": 0.2999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0804925411939621,
|
|
"step": 1030,
|
|
"valid_targets_mean": 1702.5,
|
|
"valid_targets_min": 1219
|
|
},
|
|
{
|
|
"epoch": 1.65678814577493,
|
|
"grad_norm": 0.7194767644202842,
|
|
"learning_rate": 3.385490026148554e-05,
|
|
"loss": 0.308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05836835131049156,
|
|
"step": 1035,
|
|
"valid_targets_mean": 2216.8,
|
|
"valid_targets_min": 943
|
|
},
|
|
{
|
|
"epoch": 1.6647977573087704,
|
|
"grad_norm": 0.8427938633703873,
|
|
"learning_rate": 3.377411332376529e-05,
|
|
"loss": 0.308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.050775591284036636,
|
|
"step": 1040,
|
|
"valid_targets_mean": 1605.5,
|
|
"valid_targets_min": 1114
|
|
},
|
|
{
|
|
"epoch": 1.6728073688426113,
|
|
"grad_norm": 0.7674414905329175,
|
|
"learning_rate": 3.369289658086651e-05,
|
|
"loss": 0.2913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0904841274023056,
|
|
"step": 1045,
|
|
"valid_targets_mean": 2072.8,
|
|
"valid_targets_min": 838
|
|
},
|
|
{
|
|
"epoch": 1.6808169803764517,
|
|
"grad_norm": 0.7557004176765384,
|
|
"learning_rate": 3.3611252567063184e-05,
|
|
"loss": 0.3129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07004745304584503,
|
|
"step": 1050,
|
|
"valid_targets_mean": 1854.5,
|
|
"valid_targets_min": 1194
|
|
},
|
|
{
|
|
"epoch": 1.6888265919102925,
|
|
"grad_norm": 0.7884523665676797,
|
|
"learning_rate": 3.352918382996174e-05,
|
|
"loss": 0.2778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0772811770439148,
|
|
"step": 1055,
|
|
"valid_targets_mean": 1483.2,
|
|
"valid_targets_min": 1123
|
|
},
|
|
{
|
|
"epoch": 1.6968362034441329,
|
|
"grad_norm": 0.7896248171495808,
|
|
"learning_rate": 3.344669293042163e-05,
|
|
"loss": 0.2922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08026602864265442,
|
|
"step": 1060,
|
|
"valid_targets_mean": 2120.8,
|
|
"valid_targets_min": 1292
|
|
},
|
|
{
|
|
"epoch": 1.7048458149779737,
|
|
"grad_norm": 0.7144565309384396,
|
|
"learning_rate": 3.336378244247539e-05,
|
|
"loss": 0.3065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12764765322208405,
|
|
"step": 1065,
|
|
"valid_targets_mean": 2185.5,
|
|
"valid_targets_min": 985
|
|
},
|
|
{
|
|
"epoch": 1.712855426511814,
|
|
"grad_norm": 0.8574017902006749,
|
|
"learning_rate": 3.3280454953248326e-05,
|
|
"loss": 0.3,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04705497995018959,
|
|
"step": 1070,
|
|
"valid_targets_mean": 1273.0,
|
|
"valid_targets_min": 854
|
|
},
|
|
{
|
|
"epoch": 1.720865038045655,
|
|
"grad_norm": 0.8535038458217312,
|
|
"learning_rate": 3.3196713062877765e-05,
|
|
"loss": 0.2957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0675487071275711,
|
|
"step": 1075,
|
|
"valid_targets_mean": 1165.2,
|
|
"valid_targets_min": 1051
|
|
},
|
|
{
|
|
"epoch": 1.7288746495794953,
|
|
"grad_norm": 0.7090750674060404,
|
|
"learning_rate": 3.311255938443196e-05,
|
|
"loss": 0.2941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04536786675453186,
|
|
"step": 1080,
|
|
"valid_targets_mean": 1887.8,
|
|
"valid_targets_min": 1076
|
|
},
|
|
{
|
|
"epoch": 1.7368842611133362,
|
|
"grad_norm": 0.7550930006357505,
|
|
"learning_rate": 3.3027996543828524e-05,
|
|
"loss": 0.3067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07553685456514359,
|
|
"step": 1085,
|
|
"valid_targets_mean": 1764.0,
|
|
"valid_targets_min": 1232
|
|
},
|
|
{
|
|
"epoch": 1.7448938726471765,
|
|
"grad_norm": 0.7879869664096272,
|
|
"learning_rate": 3.2943027179752494e-05,
|
|
"loss": 0.2908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06396374106407166,
|
|
"step": 1090,
|
|
"valid_targets_mean": 1403.2,
|
|
"valid_targets_min": 745
|
|
},
|
|
{
|
|
"epoch": 1.7529034841810174,
|
|
"grad_norm": 0.4644175625063066,
|
|
"learning_rate": 3.285765394357401e-05,
|
|
"loss": 0.2581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05714649707078934,
|
|
"step": 1095,
|
|
"valid_targets_mean": 6643.5,
|
|
"valid_targets_min": 5946
|
|
},
|
|
{
|
|
"epoch": 1.7609130957148578,
|
|
"grad_norm": 0.3670680081906914,
|
|
"learning_rate": 3.277187949926556e-05,
|
|
"loss": 0.1996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05411025881767273,
|
|
"step": 1100,
|
|
"valid_targets_mean": 5136.0,
|
|
"valid_targets_min": 3229
|
|
},
|
|
{
|
|
"epoch": 1.7689227072486986,
|
|
"grad_norm": 0.440062643480158,
|
|
"learning_rate": 3.268570652331888e-05,
|
|
"loss": 0.238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06591090559959412,
|
|
"step": 1105,
|
|
"valid_targets_mean": 5650.8,
|
|
"valid_targets_min": 3530
|
|
},
|
|
{
|
|
"epoch": 1.776932318782539,
|
|
"grad_norm": 0.5056997967574931,
|
|
"learning_rate": 3.2599137704661405e-05,
|
|
"loss": 0.2424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.052409976720809937,
|
|
"step": 1110,
|
|
"valid_targets_mean": 4949.2,
|
|
"valid_targets_min": 4191
|
|
},
|
|
{
|
|
"epoch": 1.7849419303163798,
|
|
"grad_norm": 0.40418285292107164,
|
|
"learning_rate": 3.251217574457239e-05,
|
|
"loss": 0.246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04364973306655884,
|
|
"step": 1115,
|
|
"valid_targets_mean": 3846.0,
|
|
"valid_targets_min": 2321
|
|
},
|
|
{
|
|
"epoch": 1.7929515418502202,
|
|
"grad_norm": 0.4085740918906434,
|
|
"learning_rate": 3.242482335659861e-05,
|
|
"loss": 0.2217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.039606157690286636,
|
|
"step": 1120,
|
|
"valid_targets_mean": 4102.8,
|
|
"valid_targets_min": 2515
|
|
},
|
|
{
|
|
"epoch": 1.8009611533840608,
|
|
"grad_norm": 0.3466067802333226,
|
|
"learning_rate": 3.2337083266469687e-05,
|
|
"loss": 0.2677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08883285522460938,
|
|
"step": 1125,
|
|
"valid_targets_mean": 7856.2,
|
|
"valid_targets_min": 3551
|
|
},
|
|
{
|
|
"epoch": 1.8089707649179014,
|
|
"grad_norm": 0.4096473717065439,
|
|
"learning_rate": 3.224895821201304e-05,
|
|
"loss": 0.2201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.052127398550510406,
|
|
"step": 1130,
|
|
"valid_targets_mean": 5124.0,
|
|
"valid_targets_min": 2097
|
|
},
|
|
{
|
|
"epoch": 1.816980376451742,
|
|
"grad_norm": 0.3375084885386615,
|
|
"learning_rate": 3.2160450943068446e-05,
|
|
"loss": 0.2863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06557562947273254,
|
|
"step": 1135,
|
|
"valid_targets_mean": 6862.0,
|
|
"valid_targets_min": 3586
|
|
},
|
|
{
|
|
"epoch": 1.8249899879855827,
|
|
"grad_norm": 0.3768644661913977,
|
|
"learning_rate": 3.207156422140225e-05,
|
|
"loss": 0.2559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09596361964941025,
|
|
"step": 1140,
|
|
"valid_targets_mean": 7773.0,
|
|
"valid_targets_min": 5273
|
|
},
|
|
{
|
|
"epoch": 1.8329995995194233,
|
|
"grad_norm": 0.3769074047931234,
|
|
"learning_rate": 3.198230082062115e-05,
|
|
"loss": 0.2566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08482344448566437,
|
|
"step": 1145,
|
|
"valid_targets_mean": 7539.5,
|
|
"valid_targets_min": 4721
|
|
},
|
|
{
|
|
"epoch": 1.8410092110532639,
|
|
"grad_norm": 0.3824428416071478,
|
|
"learning_rate": 3.189266352608574e-05,
|
|
"loss": 0.2404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0491480827331543,
|
|
"step": 1150,
|
|
"valid_targets_mean": 5395.0,
|
|
"valid_targets_min": 4460
|
|
},
|
|
{
|
|
"epoch": 1.8490188225871045,
|
|
"grad_norm": 0.3764306614236919,
|
|
"learning_rate": 3.180265513482345e-05,
|
|
"loss": 0.2564,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.053960904479026794,
|
|
"step": 1155,
|
|
"valid_targets_mean": 5397.8,
|
|
"valid_targets_min": 4366
|
|
},
|
|
{
|
|
"epoch": 1.857028434120945,
|
|
"grad_norm": 0.4130188483471904,
|
|
"learning_rate": 3.171227845544143e-05,
|
|
"loss": 0.2279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06659761071205139,
|
|
"step": 1160,
|
|
"valid_targets_mean": 6880.8,
|
|
"valid_targets_min": 3749
|
|
},
|
|
{
|
|
"epoch": 1.8650380456547857,
|
|
"grad_norm": 0.3647472930778772,
|
|
"learning_rate": 3.162153630803877e-05,
|
|
"loss": 0.2238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04995249956846237,
|
|
"step": 1165,
|
|
"valid_targets_mean": 4727.8,
|
|
"valid_targets_min": 2789
|
|
},
|
|
{
|
|
"epoch": 1.8730476571886263,
|
|
"grad_norm": 0.38128674860337136,
|
|
"learning_rate": 3.153043152411861e-05,
|
|
"loss": 0.1995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03858102858066559,
|
|
"step": 1170,
|
|
"valid_targets_mean": 4575.0,
|
|
"valid_targets_min": 2232
|
|
},
|
|
{
|
|
"epoch": 1.881057268722467,
|
|
"grad_norm": 0.3771475582590147,
|
|
"learning_rate": 3.14389669464997e-05,
|
|
"loss": 0.2228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.051282938569784164,
|
|
"step": 1175,
|
|
"valid_targets_mean": 6189.2,
|
|
"valid_targets_min": 4692
|
|
},
|
|
{
|
|
"epoch": 1.8890668802563075,
|
|
"grad_norm": 0.5069801645266406,
|
|
"learning_rate": 3.134714542922777e-05,
|
|
"loss": 0.2518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03462839871644974,
|
|
"step": 1180,
|
|
"valid_targets_mean": 4232.2,
|
|
"valid_targets_min": 2376
|
|
},
|
|
{
|
|
"epoch": 1.8970764917901481,
|
|
"grad_norm": 0.49585469324020964,
|
|
"learning_rate": 3.1254969837486425e-05,
|
|
"loss": 0.277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03316638618707657,
|
|
"step": 1185,
|
|
"valid_targets_mean": 4608.8,
|
|
"valid_targets_min": 3264
|
|
},
|
|
{
|
|
"epoch": 1.9050861033239888,
|
|
"grad_norm": 0.5478687259184984,
|
|
"learning_rate": 3.116244304750774e-05,
|
|
"loss": 0.2366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06075169891119003,
|
|
"step": 1190,
|
|
"valid_targets_mean": 5691.5,
|
|
"valid_targets_min": 4828
|
|
},
|
|
{
|
|
"epoch": 1.9130957148578294,
|
|
"grad_norm": 0.4217404547945008,
|
|
"learning_rate": 3.106956794648254e-05,
|
|
"loss": 0.2472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07265351712703705,
|
|
"step": 1195,
|
|
"valid_targets_mean": 5483.0,
|
|
"valid_targets_min": 4351
|
|
},
|
|
{
|
|
"epoch": 1.92110532639167,
|
|
"grad_norm": 0.44670505247868375,
|
|
"learning_rate": 3.097634743247026e-05,
|
|
"loss": 0.2115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03720955550670624,
|
|
"step": 1200,
|
|
"valid_targets_mean": 3804.2,
|
|
"valid_targets_min": 2359
|
|
},
|
|
{
|
|
"epoch": 1.9291149379255106,
|
|
"grad_norm": 0.5486976422445344,
|
|
"learning_rate": 3.08827844143086e-05,
|
|
"loss": 0.3195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0461294949054718,
|
|
"step": 1205,
|
|
"valid_targets_mean": 3795.5,
|
|
"valid_targets_min": 2035
|
|
},
|
|
{
|
|
"epoch": 1.9371245494593512,
|
|
"grad_norm": 0.36375087247363924,
|
|
"learning_rate": 3.078888181152264e-05,
|
|
"loss": 0.2811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06845322251319885,
|
|
"step": 1210,
|
|
"valid_targets_mean": 6354.0,
|
|
"valid_targets_min": 4890
|
|
},
|
|
{
|
|
"epoch": 1.9451341609931918,
|
|
"grad_norm": 0.31766609851581823,
|
|
"learning_rate": 3.0694642554233855e-05,
|
|
"loss": 0.2502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05521411448717117,
|
|
"step": 1215,
|
|
"valid_targets_mean": 6546.8,
|
|
"valid_targets_min": 2609
|
|
},
|
|
{
|
|
"epoch": 1.9531437725270324,
|
|
"grad_norm": 0.44289071832133264,
|
|
"learning_rate": 3.0600069583068594e-05,
|
|
"loss": 0.2042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.039409711956977844,
|
|
"step": 1220,
|
|
"valid_targets_mean": 4817.8,
|
|
"valid_targets_min": 3153
|
|
},
|
|
{
|
|
"epoch": 1.961153384060873,
|
|
"grad_norm": 0.38501679113676557,
|
|
"learning_rate": 3.0505165849066394e-05,
|
|
"loss": 0.1792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.027895990759134293,
|
|
"step": 1225,
|
|
"valid_targets_mean": 2771.0,
|
|
"valid_targets_min": 2295
|
|
},
|
|
{
|
|
"epoch": 1.9691629955947136,
|
|
"grad_norm": 0.37214495698923594,
|
|
"learning_rate": 3.040993431358782e-05,
|
|
"loss": 0.1679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03417789191007614,
|
|
"step": 1230,
|
|
"valid_targets_mean": 4317.0,
|
|
"valid_targets_min": 3829
|
|
},
|
|
{
|
|
"epoch": 1.9771726071285542,
|
|
"grad_norm": 0.37548478676503183,
|
|
"learning_rate": 3.031437794822215e-05,
|
|
"loss": 0.2538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05814184620976448,
|
|
"step": 1235,
|
|
"valid_targets_mean": 6788.8,
|
|
"valid_targets_min": 5244
|
|
},
|
|
{
|
|
"epoch": 1.9851822186623949,
|
|
"grad_norm": 0.43492838647071513,
|
|
"learning_rate": 3.021849973469455e-05,
|
|
"loss": 0.2317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06505689024925232,
|
|
"step": 1240,
|
|
"valid_targets_mean": 7649.8,
|
|
"valid_targets_min": 6890
|
|
},
|
|
{
|
|
"epoch": 1.9931918301962355,
|
|
"grad_norm": 0.3219125417544361,
|
|
"learning_rate": 3.012230266477313e-05,
|
|
"loss": 0.2017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05035298317670822,
|
|
"step": 1245,
|
|
"valid_targets_mean": 6744.8,
|
|
"valid_targets_min": 3459
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"grad_norm": 0.43012023957486334,
|
|
"learning_rate": 3.0025789740175502e-05,
|
|
"loss": 0.2049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19862064719200134,
|
|
"step": 1250,
|
|
"valid_targets_mean": 6467.2,
|
|
"valid_targets_min": 3371
|
|
},
|
|
{
|
|
"epoch": 2.0080096115338404,
|
|
"grad_norm": 0.5087007255740122,
|
|
"learning_rate": 2.9928963972475186e-05,
|
|
"loss": 0.3146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09383691847324371,
|
|
"step": 1255,
|
|
"valid_targets_mean": 4098.5,
|
|
"valid_targets_min": 1895
|
|
},
|
|
{
|
|
"epoch": 2.016019223067681,
|
|
"grad_norm": 0.5461003370807352,
|
|
"learning_rate": 2.9831828383007585e-05,
|
|
"loss": 0.2874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.049090676009655,
|
|
"step": 1260,
|
|
"valid_targets_mean": 2511.5,
|
|
"valid_targets_min": 950
|
|
},
|
|
{
|
|
"epoch": 2.0240288346015216,
|
|
"grad_norm": 0.6559640812707666,
|
|
"learning_rate": 2.9734386002775754e-05,
|
|
"loss": 0.2772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.042685024440288544,
|
|
"step": 1265,
|
|
"valid_targets_mean": 2126.5,
|
|
"valid_targets_min": 1635
|
|
},
|
|
{
|
|
"epoch": 2.0320384461353624,
|
|
"grad_norm": 0.527827013617546,
|
|
"learning_rate": 2.963663987235577e-05,
|
|
"loss": 0.2701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03850069269537926,
|
|
"step": 1270,
|
|
"valid_targets_mean": 2022.2,
|
|
"valid_targets_min": 1407
|
|
},
|
|
{
|
|
"epoch": 2.040048057669203,
|
|
"grad_norm": 0.6972734498811939,
|
|
"learning_rate": 2.95385930418019e-05,
|
|
"loss": 0.2763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07021202147006989,
|
|
"step": 1275,
|
|
"valid_targets_mean": 3344.0,
|
|
"valid_targets_min": 2137
|
|
},
|
|
{
|
|
"epoch": 2.0480576692030437,
|
|
"grad_norm": 0.5467052056125071,
|
|
"learning_rate": 2.9440248570551406e-05,
|
|
"loss": 0.2632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07065360248088837,
|
|
"step": 1280,
|
|
"valid_targets_mean": 3821.8,
|
|
"valid_targets_min": 1308
|
|
},
|
|
{
|
|
"epoch": 2.056067280736884,
|
|
"grad_norm": 0.5620347458576342,
|
|
"learning_rate": 2.934160952732907e-05,
|
|
"loss": 0.2468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05946531146764755,
|
|
"step": 1285,
|
|
"valid_targets_mean": 2872.0,
|
|
"valid_targets_min": 1665
|
|
},
|
|
{
|
|
"epoch": 2.064076892270725,
|
|
"grad_norm": 0.5830364344453041,
|
|
"learning_rate": 2.9242678990051462e-05,
|
|
"loss": 0.2634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08494800329208374,
|
|
"step": 1290,
|
|
"valid_targets_mean": 4220.0,
|
|
"valid_targets_min": 3191
|
|
},
|
|
{
|
|
"epoch": 2.0720865038045653,
|
|
"grad_norm": 0.4447704188169296,
|
|
"learning_rate": 2.9143460045730886e-05,
|
|
"loss": 0.2604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07532425224781036,
|
|
"step": 1295,
|
|
"valid_targets_mean": 4791.2,
|
|
"valid_targets_min": 4097
|
|
},
|
|
{
|
|
"epoch": 2.080096115338406,
|
|
"grad_norm": 0.5352388808512131,
|
|
"learning_rate": 2.9043955790379035e-05,
|
|
"loss": 0.253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.058120422065258026,
|
|
"step": 1300,
|
|
"valid_targets_mean": 5747.0,
|
|
"valid_targets_min": 2284
|
|
},
|
|
{
|
|
"epoch": 2.0881057268722465,
|
|
"grad_norm": 0.5389046846621328,
|
|
"learning_rate": 2.8944169328910427e-05,
|
|
"loss": 0.3053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07434704899787903,
|
|
"step": 1305,
|
|
"valid_targets_mean": 3851.0,
|
|
"valid_targets_min": 2391
|
|
},
|
|
{
|
|
"epoch": 2.0961153384060873,
|
|
"grad_norm": 0.4994504617385715,
|
|
"learning_rate": 2.884410377504547e-05,
|
|
"loss": 0.2623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07392135262489319,
|
|
"step": 1310,
|
|
"valid_targets_mean": 3773.2,
|
|
"valid_targets_min": 3192
|
|
},
|
|
{
|
|
"epoch": 2.1041249499399277,
|
|
"grad_norm": 0.5544435323199706,
|
|
"learning_rate": 2.8743762251213333e-05,
|
|
"loss": 0.2343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06358776986598969,
|
|
"step": 1315,
|
|
"valid_targets_mean": 3683.5,
|
|
"valid_targets_min": 1911
|
|
},
|
|
{
|
|
"epoch": 2.1121345614737685,
|
|
"grad_norm": 0.4996926643940453,
|
|
"learning_rate": 2.8643147888454507e-05,
|
|
"loss": 0.2301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04209335520863533,
|
|
"step": 1320,
|
|
"valid_targets_mean": 1953.5,
|
|
"valid_targets_min": 1611
|
|
},
|
|
{
|
|
"epoch": 2.120144173007609,
|
|
"grad_norm": 0.5743092579229245,
|
|
"learning_rate": 2.854226382632312e-05,
|
|
"loss": 0.238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06564871966838837,
|
|
"step": 1325,
|
|
"valid_targets_mean": 3450.8,
|
|
"valid_targets_min": 1888
|
|
},
|
|
{
|
|
"epoch": 2.1281537845414498,
|
|
"grad_norm": 0.4681651346845681,
|
|
"learning_rate": 2.844111321278893e-05,
|
|
"loss": 0.2239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0644155889749527,
|
|
"step": 1330,
|
|
"valid_targets_mean": 3919.8,
|
|
"valid_targets_min": 1893
|
|
},
|
|
{
|
|
"epoch": 2.13616339607529,
|
|
"grad_norm": 0.4748951088906524,
|
|
"learning_rate": 2.833969920413913e-05,
|
|
"loss": 0.2295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.058069877326488495,
|
|
"step": 1335,
|
|
"valid_targets_mean": 2428.2,
|
|
"valid_targets_min": 1300
|
|
},
|
|
{
|
|
"epoch": 2.144173007609131,
|
|
"grad_norm": 0.6302923323020396,
|
|
"learning_rate": 2.8238024964879857e-05,
|
|
"loss": 0.2308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05584725737571716,
|
|
"step": 1340,
|
|
"valid_targets_mean": 3008.2,
|
|
"valid_targets_min": 1018
|
|
},
|
|
{
|
|
"epoch": 2.1521826191429714,
|
|
"grad_norm": 0.5553004896998627,
|
|
"learning_rate": 2.8136093667637438e-05,
|
|
"loss": 0.23,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.048354048281908035,
|
|
"step": 1345,
|
|
"valid_targets_mean": 2879.5,
|
|
"valid_targets_min": 1639
|
|
},
|
|
{
|
|
"epoch": 2.160192230676812,
|
|
"grad_norm": 0.6382997686758338,
|
|
"learning_rate": 2.8033908493059394e-05,
|
|
"loss": 0.242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03803035989403725,
|
|
"step": 1350,
|
|
"valid_targets_mean": 2469.2,
|
|
"valid_targets_min": 2060
|
|
},
|
|
{
|
|
"epoch": 2.1682018422106526,
|
|
"grad_norm": 0.48059260406372195,
|
|
"learning_rate": 2.793147262971519e-05,
|
|
"loss": 0.2286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.051980093121528625,
|
|
"step": 1355,
|
|
"valid_targets_mean": 3259.2,
|
|
"valid_targets_min": 1365
|
|
},
|
|
{
|
|
"epoch": 2.1762114537444934,
|
|
"grad_norm": 0.603141141677437,
|
|
"learning_rate": 2.7828789273996748e-05,
|
|
"loss": 0.2176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0365680530667305,
|
|
"step": 1360,
|
|
"valid_targets_mean": 2490.2,
|
|
"valid_targets_min": 1679
|
|
},
|
|
{
|
|
"epoch": 2.184221065278334,
|
|
"grad_norm": 0.5356664926650465,
|
|
"learning_rate": 2.7725861630018703e-05,
|
|
"loss": 0.2098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.052938062697649,
|
|
"step": 1365,
|
|
"valid_targets_mean": 4032.2,
|
|
"valid_targets_min": 1439
|
|
},
|
|
{
|
|
"epoch": 2.1922306768121747,
|
|
"grad_norm": 0.6189786776708307,
|
|
"learning_rate": 2.7622692909518423e-05,
|
|
"loss": 0.2104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06646661460399628,
|
|
"step": 1370,
|
|
"valid_targets_mean": 3729.0,
|
|
"valid_targets_min": 2157
|
|
},
|
|
{
|
|
"epoch": 2.200240288346015,
|
|
"grad_norm": 0.5503316967584895,
|
|
"learning_rate": 2.7519286331755766e-05,
|
|
"loss": 0.2155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05545353889465332,
|
|
"step": 1375,
|
|
"valid_targets_mean": 3270.8,
|
|
"valid_targets_min": 2318
|
|
},
|
|
{
|
|
"epoch": 2.208249899879856,
|
|
"grad_norm": 0.647026048962965,
|
|
"learning_rate": 2.7415645123412672e-05,
|
|
"loss": 0.207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04211638867855072,
|
|
"step": 1380,
|
|
"valid_targets_mean": 3178.5,
|
|
"valid_targets_min": 824
|
|
},
|
|
{
|
|
"epoch": 2.2162595114136963,
|
|
"grad_norm": 0.5512583643007355,
|
|
"learning_rate": 2.731177251849246e-05,
|
|
"loss": 0.1921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.034008294343948364,
|
|
"step": 1385,
|
|
"valid_targets_mean": 2994.8,
|
|
"valid_targets_min": 1210
|
|
},
|
|
{
|
|
"epoch": 2.224269122947537,
|
|
"grad_norm": 0.4662315623419887,
|
|
"learning_rate": 2.7207671758218884e-05,
|
|
"loss": 0.1998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.055893898010253906,
|
|
"step": 1390,
|
|
"valid_targets_mean": 4503.5,
|
|
"valid_targets_min": 1548
|
|
},
|
|
{
|
|
"epoch": 2.2322787344813775,
|
|
"grad_norm": 0.5036944024152561,
|
|
"learning_rate": 2.710334609093504e-05,
|
|
"loss": 0.1904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.046944573521614075,
|
|
"step": 1395,
|
|
"valid_targets_mean": 2806.2,
|
|
"valid_targets_min": 1188
|
|
},
|
|
{
|
|
"epoch": 2.2402883460152183,
|
|
"grad_norm": 0.4909282197847471,
|
|
"learning_rate": 2.699879877200198e-05,
|
|
"loss": 0.1966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04708274453878403,
|
|
"step": 1400,
|
|
"valid_targets_mean": 4892.8,
|
|
"valid_targets_min": 2500
|
|
},
|
|
{
|
|
"epoch": 2.2482979575490587,
|
|
"grad_norm": 0.5043872342700252,
|
|
"learning_rate": 2.6894033063697143e-05,
|
|
"loss": 0.1886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0814126655459404,
|
|
"step": 1405,
|
|
"valid_targets_mean": 6856.2,
|
|
"valid_targets_min": 2058
|
|
},
|
|
{
|
|
"epoch": 2.2563075690828995,
|
|
"grad_norm": 0.3381939537284356,
|
|
"learning_rate": 2.6789052235112554e-05,
|
|
"loss": 0.1275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.028490839526057243,
|
|
"step": 1410,
|
|
"valid_targets_mean": 5445.0,
|
|
"valid_targets_min": 2569
|
|
},
|
|
{
|
|
"epoch": 2.26431718061674,
|
|
"grad_norm": 0.4007220259103596,
|
|
"learning_rate": 2.66838595620528e-05,
|
|
"loss": 0.1217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.036405034363269806,
|
|
"step": 1415,
|
|
"valid_targets_mean": 6826.8,
|
|
"valid_targets_min": 4640
|
|
},
|
|
{
|
|
"epoch": 2.2723267921505808,
|
|
"grad_norm": 0.3587794251403167,
|
|
"learning_rate": 2.6578458326932842e-05,
|
|
"loss": 0.1088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.026125647127628326,
|
|
"step": 1420,
|
|
"valid_targets_mean": 7376.2,
|
|
"valid_targets_min": 6135
|
|
},
|
|
{
|
|
"epoch": 2.280336403684421,
|
|
"grad_norm": 0.2909755535765704,
|
|
"learning_rate": 2.6472851818675583e-05,
|
|
"loss": 0.1045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.027100814506411552,
|
|
"step": 1425,
|
|
"valid_targets_mean": 6002.2,
|
|
"valid_targets_min": 3847
|
|
},
|
|
{
|
|
"epoch": 2.288346015218262,
|
|
"grad_norm": 0.29383984408757513,
|
|
"learning_rate": 2.6367043332609223e-05,
|
|
"loss": 0.1065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.025835495442152023,
|
|
"step": 1430,
|
|
"valid_targets_mean": 7146.2,
|
|
"valid_targets_min": 5036
|
|
},
|
|
{
|
|
"epoch": 2.2963556267521024,
|
|
"grad_norm": 0.35016460933283705,
|
|
"learning_rate": 2.6261036170364448e-05,
|
|
"loss": 0.1055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04111432284116745,
|
|
"step": 1435,
|
|
"valid_targets_mean": 8201.5,
|
|
"valid_targets_min": 6022
|
|
},
|
|
{
|
|
"epoch": 2.304365238285943,
|
|
"grad_norm": 0.37486430031683204,
|
|
"learning_rate": 2.6154833639771415e-05,
|
|
"loss": 0.112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02866670861840248,
|
|
"step": 1440,
|
|
"valid_targets_mean": 4982.8,
|
|
"valid_targets_min": 1508
|
|
},
|
|
{
|
|
"epoch": 2.3123748498197836,
|
|
"grad_norm": 0.32521074210879475,
|
|
"learning_rate": 2.6048439054756492e-05,
|
|
"loss": 0.0982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02288370579481125,
|
|
"step": 1445,
|
|
"valid_targets_mean": 6249.8,
|
|
"valid_targets_min": 3601
|
|
},
|
|
{
|
|
"epoch": 2.3203844613536244,
|
|
"grad_norm": 0.3621401597593272,
|
|
"learning_rate": 2.594185573523892e-05,
|
|
"loss": 0.098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03147714585065842,
|
|
"step": 1450,
|
|
"valid_targets_mean": 7850.5,
|
|
"valid_targets_min": 3289
|
|
},
|
|
{
|
|
"epoch": 2.328394072887465,
|
|
"grad_norm": 0.43632351944593695,
|
|
"learning_rate": 2.583508700702716e-05,
|
|
"loss": 0.1112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03067987971007824,
|
|
"step": 1455,
|
|
"valid_targets_mean": 6368.5,
|
|
"valid_targets_min": 4071
|
|
},
|
|
{
|
|
"epoch": 2.3364036844213056,
|
|
"grad_norm": 0.3475038301504849,
|
|
"learning_rate": 2.572813620171513e-05,
|
|
"loss": 0.1026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02492939494550228,
|
|
"step": 1460,
|
|
"valid_targets_mean": 7169.2,
|
|
"valid_targets_min": 5138
|
|
},
|
|
{
|
|
"epoch": 2.344413295955146,
|
|
"grad_norm": 0.4184304241975851,
|
|
"learning_rate": 2.5621006656578267e-05,
|
|
"loss": 0.0946,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.029697462916374207,
|
|
"step": 1465,
|
|
"valid_targets_mean": 6420.8,
|
|
"valid_targets_min": 2320
|
|
},
|
|
{
|
|
"epoch": 2.352422907488987,
|
|
"grad_norm": 0.36783401265339605,
|
|
"learning_rate": 2.5513701714469373e-05,
|
|
"loss": 0.1012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02509601227939129,
|
|
"step": 1470,
|
|
"valid_targets_mean": 6447.0,
|
|
"valid_targets_min": 5443
|
|
},
|
|
{
|
|
"epoch": 2.3604325190228272,
|
|
"grad_norm": 0.39488525338618313,
|
|
"learning_rate": 2.540622472371429e-05,
|
|
"loss": 0.0945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02181481197476387,
|
|
"step": 1475,
|
|
"valid_targets_mean": 4346.2,
|
|
"valid_targets_min": 2941
|
|
},
|
|
{
|
|
"epoch": 2.368442130556668,
|
|
"grad_norm": 0.33149806126300474,
|
|
"learning_rate": 2.5298579038007478e-05,
|
|
"loss": 0.0966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.034374285489320755,
|
|
"step": 1480,
|
|
"valid_targets_mean": 6363.5,
|
|
"valid_targets_min": 4955
|
|
},
|
|
{
|
|
"epoch": 2.3764517420905085,
|
|
"grad_norm": 0.33875141635774814,
|
|
"learning_rate": 2.519076801630727e-05,
|
|
"loss": 0.1086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.01919686049222946,
|
|
"step": 1485,
|
|
"valid_targets_mean": 3673.2,
|
|
"valid_targets_min": 2303
|
|
},
|
|
{
|
|
"epoch": 2.3844613536243493,
|
|
"grad_norm": 0.3715580253574201,
|
|
"learning_rate": 2.508279502273117e-05,
|
|
"loss": 0.09,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.014164748601615429,
|
|
"step": 1490,
|
|
"valid_targets_mean": 4228.0,
|
|
"valid_targets_min": 2085
|
|
},
|
|
{
|
|
"epoch": 2.3924709651581897,
|
|
"grad_norm": 0.3590860891477475,
|
|
"learning_rate": 2.4974663426450798e-05,
|
|
"loss": 0.0901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.025156505405902863,
|
|
"step": 1495,
|
|
"valid_targets_mean": 5465.5,
|
|
"valid_targets_min": 3720
|
|
},
|
|
{
|
|
"epoch": 2.4004805766920305,
|
|
"grad_norm": 0.39599943649913555,
|
|
"learning_rate": 2.4866376601586798e-05,
|
|
"loss": 0.0908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02067527547478676,
|
|
"step": 1500,
|
|
"valid_targets_mean": 4442.2,
|
|
"valid_targets_min": 3149
|
|
},
|
|
{
|
|
"epoch": 2.408490188225871,
|
|
"grad_norm": 0.3397094293439572,
|
|
"learning_rate": 2.475793792710352e-05,
|
|
"loss": 0.0874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02678208239376545,
|
|
"step": 1505,
|
|
"valid_targets_mean": 6434.2,
|
|
"valid_targets_min": 4692
|
|
},
|
|
{
|
|
"epoch": 2.4164997997597117,
|
|
"grad_norm": 0.4215477034408141,
|
|
"learning_rate": 2.4649350786703637e-05,
|
|
"loss": 0.1039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03145715594291687,
|
|
"step": 1510,
|
|
"valid_targets_mean": 6182.2,
|
|
"valid_targets_min": 4171
|
|
},
|
|
{
|
|
"epoch": 2.424509411293552,
|
|
"grad_norm": 0.5603112239679372,
|
|
"learning_rate": 2.45406185687225e-05,
|
|
"loss": 0.1018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.014135487377643585,
|
|
"step": 1515,
|
|
"valid_targets_mean": 4245.0,
|
|
"valid_targets_min": 3545
|
|
},
|
|
{
|
|
"epoch": 2.432519022827393,
|
|
"grad_norm": 0.3608640781521906,
|
|
"learning_rate": 2.443174466602246e-05,
|
|
"loss": 0.0866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.026907628402113914,
|
|
"step": 1520,
|
|
"valid_targets_mean": 6278.5,
|
|
"valid_targets_min": 3918
|
|
},
|
|
{
|
|
"epoch": 2.4405286343612334,
|
|
"grad_norm": 0.3510303610507931,
|
|
"learning_rate": 2.4322732475886953e-05,
|
|
"loss": 0.0871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.026553813368082047,
|
|
"step": 1525,
|
|
"valid_targets_mean": 6698.0,
|
|
"valid_targets_min": 5070
|
|
},
|
|
{
|
|
"epoch": 2.448538245895074,
|
|
"grad_norm": 0.35616008545843025,
|
|
"learning_rate": 2.4213585399914528e-05,
|
|
"loss": 0.0961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.022685253992676735,
|
|
"step": 1530,
|
|
"valid_targets_mean": 5043.5,
|
|
"valid_targets_min": 2590
|
|
},
|
|
{
|
|
"epoch": 2.4565478574289146,
|
|
"grad_norm": 0.33285624439226186,
|
|
"learning_rate": 2.4104306843912687e-05,
|
|
"loss": 0.0816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.015239531174302101,
|
|
"step": 1535,
|
|
"valid_targets_mean": 4594.5,
|
|
"valid_targets_min": 1557
|
|
},
|
|
{
|
|
"epoch": 2.4645574689627554,
|
|
"grad_norm": 0.34165305378403,
|
|
"learning_rate": 2.3994900217791615e-05,
|
|
"loss": 0.0813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.022765278816223145,
|
|
"step": 1540,
|
|
"valid_targets_mean": 7859.2,
|
|
"valid_targets_min": 4849
|
|
},
|
|
{
|
|
"epoch": 2.472567080496596,
|
|
"grad_norm": 0.3831575656101532,
|
|
"learning_rate": 2.3885368935457762e-05,
|
|
"loss": 0.0816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.024117065593600273,
|
|
"step": 1545,
|
|
"valid_targets_mean": 7530.8,
|
|
"valid_targets_min": 3861
|
|
},
|
|
{
|
|
"epoch": 2.4805766920304366,
|
|
"grad_norm": 0.3918182121480168,
|
|
"learning_rate": 2.3775716414707355e-05,
|
|
"loss": 0.0911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0161636620759964,
|
|
"step": 1550,
|
|
"valid_targets_mean": 4505.0,
|
|
"valid_targets_min": 2278
|
|
},
|
|
{
|
|
"epoch": 2.488586303564277,
|
|
"grad_norm": 0.3572659469216293,
|
|
"learning_rate": 2.36659460771197e-05,
|
|
"loss": 0.0784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.014116305857896805,
|
|
"step": 1555,
|
|
"valid_targets_mean": 4934.5,
|
|
"valid_targets_min": 2017
|
|
},
|
|
{
|
|
"epoch": 2.496595915098118,
|
|
"grad_norm": 0.3463791492757148,
|
|
"learning_rate": 2.3556061347950455e-05,
|
|
"loss": 0.0834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02446204051375389,
|
|
"step": 1560,
|
|
"valid_targets_mean": 6368.8,
|
|
"valid_targets_min": 2515
|
|
},
|
|
{
|
|
"epoch": 2.5046055266319582,
|
|
"grad_norm": 0.9092953176284908,
|
|
"learning_rate": 2.3446065656024734e-05,
|
|
"loss": 0.136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0353982150554657,
|
|
"step": 1565,
|
|
"valid_targets_mean": 1458.8,
|
|
"valid_targets_min": 751
|
|
},
|
|
{
|
|
"epoch": 2.512615138165799,
|
|
"grad_norm": 0.8973072105431493,
|
|
"learning_rate": 2.33359624336301e-05,
|
|
"loss": 0.1563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.037419550120830536,
|
|
"step": 1570,
|
|
"valid_targets_mean": 1471.5,
|
|
"valid_targets_min": 769
|
|
},
|
|
{
|
|
"epoch": 2.5206247496996395,
|
|
"grad_norm": 0.8356345651403605,
|
|
"learning_rate": 2.3225755116409497e-05,
|
|
"loss": 0.1701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0420101173222065,
|
|
"step": 1575,
|
|
"valid_targets_mean": 2936.8,
|
|
"valid_targets_min": 1206
|
|
},
|
|
{
|
|
"epoch": 2.5286343612334803,
|
|
"grad_norm": 0.8819551869258159,
|
|
"learning_rate": 2.311544714325403e-05,
|
|
"loss": 0.1572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.028521310538053513,
|
|
"step": 1580,
|
|
"valid_targets_mean": 1157.2,
|
|
"valid_targets_min": 988
|
|
},
|
|
{
|
|
"epoch": 2.5366439727673207,
|
|
"grad_norm": 0.6469377558136924,
|
|
"learning_rate": 2.300504195619563e-05,
|
|
"loss": 0.1777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04535526782274246,
|
|
"step": 1585,
|
|
"valid_targets_mean": 2150.8,
|
|
"valid_targets_min": 820
|
|
},
|
|
{
|
|
"epoch": 2.5446535843011615,
|
|
"grad_norm": 0.8085152274217459,
|
|
"learning_rate": 2.2894543000299697e-05,
|
|
"loss": 0.156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04498184099793434,
|
|
"step": 1590,
|
|
"valid_targets_mean": 2603.5,
|
|
"valid_targets_min": 751
|
|
},
|
|
{
|
|
"epoch": 2.552663195835002,
|
|
"grad_norm": 0.7224503736521858,
|
|
"learning_rate": 2.2783953723557572e-05,
|
|
"loss": 0.158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0366603322327137,
|
|
"step": 1595,
|
|
"valid_targets_mean": 2382.8,
|
|
"valid_targets_min": 614
|
|
},
|
|
{
|
|
"epoch": 2.5606728073688427,
|
|
"grad_norm": 1.3631072654930065,
|
|
"learning_rate": 2.2673277576778946e-05,
|
|
"loss": 0.1877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.030237577855587006,
|
|
"step": 1600,
|
|
"valid_targets_mean": 1733.0,
|
|
"valid_targets_min": 768
|
|
},
|
|
{
|
|
"epoch": 2.568682418902683,
|
|
"grad_norm": 0.8332996858302264,
|
|
"learning_rate": 2.2562518013484208e-05,
|
|
"loss": 0.2165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03147868067026138,
|
|
"step": 1605,
|
|
"valid_targets_mean": 1715.8,
|
|
"valid_targets_min": 704
|
|
},
|
|
{
|
|
"epoch": 2.576692030436524,
|
|
"grad_norm": 0.7142151515443875,
|
|
"learning_rate": 2.245167848979664e-05,
|
|
"loss": 0.1526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.036833539605140686,
|
|
"step": 1610,
|
|
"valid_targets_mean": 2182.5,
|
|
"valid_targets_min": 1218
|
|
},
|
|
{
|
|
"epoch": 2.5847016419703643,
|
|
"grad_norm": 0.6879860634012305,
|
|
"learning_rate": 2.23407624643346e-05,
|
|
"loss": 0.1496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04658939689397812,
|
|
"step": 1615,
|
|
"valid_targets_mean": 2525.8,
|
|
"valid_targets_min": 1524
|
|
},
|
|
{
|
|
"epoch": 2.592711253504205,
|
|
"grad_norm": 0.6210770752821418,
|
|
"learning_rate": 2.2229773398103606e-05,
|
|
"loss": 0.1706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.029774608090519905,
|
|
"step": 1620,
|
|
"valid_targets_mean": 1936.8,
|
|
"valid_targets_min": 987
|
|
},
|
|
{
|
|
"epoch": 2.6007208650380456,
|
|
"grad_norm": 0.6747451909279786,
|
|
"learning_rate": 2.2118714754388323e-05,
|
|
"loss": 0.1582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.024420931935310364,
|
|
"step": 1625,
|
|
"valid_targets_mean": 1548.0,
|
|
"valid_targets_min": 1169
|
|
},
|
|
{
|
|
"epoch": 2.6087304765718864,
|
|
"grad_norm": 0.815695921330268,
|
|
"learning_rate": 2.200758999864449e-05,
|
|
"loss": 0.153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.029317732900381088,
|
|
"step": 1630,
|
|
"valid_targets_mean": 1888.8,
|
|
"valid_targets_min": 471
|
|
},
|
|
{
|
|
"epoch": 2.616740088105727,
|
|
"grad_norm": 0.7905128568849215,
|
|
"learning_rate": 2.1896402598390818e-05,
|
|
"loss": 0.1672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03831934183835983,
|
|
"step": 1635,
|
|
"valid_targets_mean": 1614.0,
|
|
"valid_targets_min": 1137
|
|
},
|
|
{
|
|
"epoch": 2.6247496996395676,
|
|
"grad_norm": 0.8485711497740434,
|
|
"learning_rate": 2.178515602310074e-05,
|
|
"loss": 0.1467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.031538669019937515,
|
|
"step": 1640,
|
|
"valid_targets_mean": 1651.2,
|
|
"valid_targets_min": 1298
|
|
},
|
|
{
|
|
"epoch": 2.632759311173408,
|
|
"grad_norm": 0.9592134307140445,
|
|
"learning_rate": 2.1673853744094193e-05,
|
|
"loss": 0.1743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02765805274248123,
|
|
"step": 1645,
|
|
"valid_targets_mean": 1275.0,
|
|
"valid_targets_min": 1061
|
|
},
|
|
{
|
|
"epoch": 2.640768922707249,
|
|
"grad_norm": 0.7144985661330658,
|
|
"learning_rate": 2.1562499234429283e-05,
|
|
"loss": 0.138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04128270968794823,
|
|
"step": 1650,
|
|
"valid_targets_mean": 2261.0,
|
|
"valid_targets_min": 1181
|
|
},
|
|
{
|
|
"epoch": 2.648778534241089,
|
|
"grad_norm": 0.8466880126913768,
|
|
"learning_rate": 2.1451095968793908e-05,
|
|
"loss": 0.1423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04242691397666931,
|
|
"step": 1655,
|
|
"valid_targets_mean": 1702.5,
|
|
"valid_targets_min": 1219
|
|
},
|
|
{
|
|
"epoch": 2.65678814577493,
|
|
"grad_norm": 0.7866319826222847,
|
|
"learning_rate": 2.1339647423397337e-05,
|
|
"loss": 0.1479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02598552405834198,
|
|
"step": 1660,
|
|
"valid_targets_mean": 2216.8,
|
|
"valid_targets_min": 943
|
|
},
|
|
{
|
|
"epoch": 2.6647977573087704,
|
|
"grad_norm": 0.9171859402951109,
|
|
"learning_rate": 2.122815707586176e-05,
|
|
"loss": 0.1541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.024826187640428543,
|
|
"step": 1665,
|
|
"valid_targets_mean": 1605.5,
|
|
"valid_targets_min": 1114
|
|
},
|
|
{
|
|
"epoch": 2.6728073688426113,
|
|
"grad_norm": 0.6959204350034082,
|
|
"learning_rate": 2.111662840511373e-05,
|
|
"loss": 0.1384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0449749231338501,
|
|
"step": 1670,
|
|
"valid_targets_mean": 2072.8,
|
|
"valid_targets_min": 838
|
|
},
|
|
{
|
|
"epoch": 2.6808169803764517,
|
|
"grad_norm": 0.6631417582569736,
|
|
"learning_rate": 2.1005064891275638e-05,
|
|
"loss": 0.1493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.034550637006759644,
|
|
"step": 1675,
|
|
"valid_targets_mean": 1854.5,
|
|
"valid_targets_min": 1194
|
|
},
|
|
{
|
|
"epoch": 2.6888265919102925,
|
|
"grad_norm": 0.8085399840212727,
|
|
"learning_rate": 2.0893470015557126e-05,
|
|
"loss": 0.1291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.035040490329265594,
|
|
"step": 1680,
|
|
"valid_targets_mean": 1483.2,
|
|
"valid_targets_min": 1123
|
|
},
|
|
{
|
|
"epoch": 2.696836203444133,
|
|
"grad_norm": 0.7748379075175836,
|
|
"learning_rate": 2.078184726014643e-05,
|
|
"loss": 0.1418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04248945415019989,
|
|
"step": 1685,
|
|
"valid_targets_mean": 2120.8,
|
|
"valid_targets_min": 1292
|
|
},
|
|
{
|
|
"epoch": 2.7048458149779737,
|
|
"grad_norm": 0.7118032612773244,
|
|
"learning_rate": 2.0670200108101754e-05,
|
|
"loss": 0.1437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06219960004091263,
|
|
"step": 1690,
|
|
"valid_targets_mean": 2185.5,
|
|
"valid_targets_min": 985
|
|
},
|
|
{
|
|
"epoch": 2.712855426511814,
|
|
"grad_norm": 0.8287851262774912,
|
|
"learning_rate": 2.0558532043242557e-05,
|
|
"loss": 0.1383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.021735573187470436,
|
|
"step": 1695,
|
|
"valid_targets_mean": 1273.0,
|
|
"valid_targets_min": 854
|
|
},
|
|
{
|
|
"epoch": 2.720865038045655,
|
|
"grad_norm": 0.8791496243461001,
|
|
"learning_rate": 2.0446846550040863e-05,
|
|
"loss": 0.1405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.032637402415275574,
|
|
"step": 1700,
|
|
"valid_targets_mean": 1165.2,
|
|
"valid_targets_min": 1051
|
|
},
|
|
{
|
|
"epoch": 2.7288746495794953,
|
|
"grad_norm": 0.7686286497468529,
|
|
"learning_rate": 2.033514711351253e-05,
|
|
"loss": 0.1437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02368607185781002,
|
|
"step": 1705,
|
|
"valid_targets_mean": 1887.8,
|
|
"valid_targets_min": 1076
|
|
},
|
|
{
|
|
"epoch": 2.736884261113336,
|
|
"grad_norm": 0.7946852131024437,
|
|
"learning_rate": 2.022343721910851e-05,
|
|
"loss": 0.15,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.035813625901937485,
|
|
"step": 1710,
|
|
"valid_targets_mean": 1764.0,
|
|
"valid_targets_min": 1232
|
|
},
|
|
{
|
|
"epoch": 2.7448938726471765,
|
|
"grad_norm": 0.7793283037319778,
|
|
"learning_rate": 2.0111720352606054e-05,
|
|
"loss": 0.1389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02754664048552513,
|
|
"step": 1715,
|
|
"valid_targets_mean": 1403.2,
|
|
"valid_targets_min": 745
|
|
},
|
|
{
|
|
"epoch": 2.7529034841810174,
|
|
"grad_norm": 0.5987029158761116,
|
|
"learning_rate": 2e-05,
|
|
"loss": 0.1359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03888958320021629,
|
|
"step": 1720,
|
|
"valid_targets_mean": 6643.5,
|
|
"valid_targets_min": 5946
|
|
},
|
|
{
|
|
"epoch": 2.7609130957148578,
|
|
"grad_norm": 0.3756362109440174,
|
|
"learning_rate": 1.988827964739395e-05,
|
|
"loss": 0.1306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.038006775081157684,
|
|
"step": 1725,
|
|
"valid_targets_mean": 5136.0,
|
|
"valid_targets_min": 3229
|
|
},
|
|
{
|
|
"epoch": 2.7689227072486986,
|
|
"grad_norm": 0.5483781997764232,
|
|
"learning_rate": 1.9776562780891494e-05,
|
|
"loss": 0.1681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04832994192838669,
|
|
"step": 1730,
|
|
"valid_targets_mean": 5650.8,
|
|
"valid_targets_min": 3530
|
|
},
|
|
{
|
|
"epoch": 2.776932318782539,
|
|
"grad_norm": 0.6050797605674297,
|
|
"learning_rate": 1.966485288648747e-05,
|
|
"loss": 0.1681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.035941701382398605,
|
|
"step": 1735,
|
|
"valid_targets_mean": 4949.2,
|
|
"valid_targets_min": 4191
|
|
},
|
|
{
|
|
"epoch": 2.78494193031638,
|
|
"grad_norm": 0.5582529033690404,
|
|
"learning_rate": 1.9553153449959144e-05,
|
|
"loss": 0.1707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.029457952827215195,
|
|
"step": 1740,
|
|
"valid_targets_mean": 3846.0,
|
|
"valid_targets_min": 2321
|
|
},
|
|
{
|
|
"epoch": 2.79295154185022,
|
|
"grad_norm": 0.5093998836328193,
|
|
"learning_rate": 1.9441467956757453e-05,
|
|
"loss": 0.1409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.026404164731502533,
|
|
"step": 1745,
|
|
"valid_targets_mean": 4102.8,
|
|
"valid_targets_min": 2515
|
|
},
|
|
{
|
|
"epoch": 2.800961153384061,
|
|
"grad_norm": 0.4355404665199724,
|
|
"learning_rate": 1.9329799891898256e-05,
|
|
"loss": 0.1857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.065243199467659,
|
|
"step": 1750,
|
|
"valid_targets_mean": 7856.2,
|
|
"valid_targets_min": 3551
|
|
},
|
|
{
|
|
"epoch": 2.8089707649179014,
|
|
"grad_norm": 0.43708140898602593,
|
|
"learning_rate": 1.9218152739853576e-05,
|
|
"loss": 0.1434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03540034964680672,
|
|
"step": 1755,
|
|
"valid_targets_mean": 5124.0,
|
|
"valid_targets_min": 2097
|
|
},
|
|
{
|
|
"epoch": 2.816980376451742,
|
|
"grad_norm": 0.3795728459208723,
|
|
"learning_rate": 1.9106529984442884e-05,
|
|
"loss": 0.2049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04286336898803711,
|
|
"step": 1760,
|
|
"valid_targets_mean": 6862.0,
|
|
"valid_targets_min": 3586
|
|
},
|
|
{
|
|
"epoch": 2.8249899879855827,
|
|
"grad_norm": 0.43705659934061725,
|
|
"learning_rate": 1.8994935108724366e-05,
|
|
"loss": 0.1728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.059942036867141724,
|
|
"step": 1765,
|
|
"valid_targets_mean": 7773.0,
|
|
"valid_targets_min": 5273
|
|
},
|
|
{
|
|
"epoch": 2.8329995995194235,
|
|
"grad_norm": 0.6490107049955897,
|
|
"learning_rate": 1.8883371594886276e-05,
|
|
"loss": 0.1722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.057290829718112946,
|
|
"step": 1770,
|
|
"valid_targets_mean": 7539.5,
|
|
"valid_targets_min": 4721
|
|
},
|
|
{
|
|
"epoch": 2.841009211053264,
|
|
"grad_norm": 0.4306989651119894,
|
|
"learning_rate": 1.877184292413824e-05,
|
|
"loss": 0.1614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03317209333181381,
|
|
"step": 1775,
|
|
"valid_targets_mean": 5395.0,
|
|
"valid_targets_min": 4460
|
|
},
|
|
{
|
|
"epoch": 2.8490188225871043,
|
|
"grad_norm": 0.34802493226633163,
|
|
"learning_rate": 1.8660352576602663e-05,
|
|
"loss": 0.185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.039249248802661896,
|
|
"step": 1780,
|
|
"valid_targets_mean": 5397.8,
|
|
"valid_targets_min": 4366
|
|
},
|
|
{
|
|
"epoch": 2.857028434120945,
|
|
"grad_norm": 0.4058374601073005,
|
|
"learning_rate": 1.8548904031206102e-05,
|
|
"loss": 0.1518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.044224813580513,
|
|
"step": 1785,
|
|
"valid_targets_mean": 6880.8,
|
|
"valid_targets_min": 3749
|
|
},
|
|
{
|
|
"epoch": 2.865038045654786,
|
|
"grad_norm": 0.3806197065418136,
|
|
"learning_rate": 1.843750076557072e-05,
|
|
"loss": 0.1509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.032120805233716965,
|
|
"step": 1790,
|
|
"valid_targets_mean": 4727.8,
|
|
"valid_targets_min": 2789
|
|
},
|
|
{
|
|
"epoch": 2.8730476571886263,
|
|
"grad_norm": 0.41283915664899745,
|
|
"learning_rate": 1.832614625590581e-05,
|
|
"loss": 0.1229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.022641388699412346,
|
|
"step": 1795,
|
|
"valid_targets_mean": 4575.0,
|
|
"valid_targets_min": 2232
|
|
},
|
|
{
|
|
"epoch": 2.8810572687224667,
|
|
"grad_norm": 0.4273371499350396,
|
|
"learning_rate": 1.8214843976899264e-05,
|
|
"loss": 0.1438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.033698879182338715,
|
|
"step": 1800,
|
|
"valid_targets_mean": 6189.2,
|
|
"valid_targets_min": 4692
|
|
},
|
|
{
|
|
"epoch": 2.8890668802563075,
|
|
"grad_norm": 0.545196593516779,
|
|
"learning_rate": 1.810359740160919e-05,
|
|
"loss": 0.1784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.023359842598438263,
|
|
"step": 1805,
|
|
"valid_targets_mean": 4232.2,
|
|
"valid_targets_min": 2376
|
|
},
|
|
{
|
|
"epoch": 2.8970764917901484,
|
|
"grad_norm": 0.5220700759231465,
|
|
"learning_rate": 1.7992410001355515e-05,
|
|
"loss": 0.1995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.025488127022981644,
|
|
"step": 1810,
|
|
"valid_targets_mean": 4608.8,
|
|
"valid_targets_min": 3264
|
|
},
|
|
{
|
|
"epoch": 2.9050861033239888,
|
|
"grad_norm": 0.5731303752380601,
|
|
"learning_rate": 1.788128524561168e-05,
|
|
"loss": 0.1641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04095638543367386,
|
|
"step": 1815,
|
|
"valid_targets_mean": 5691.5,
|
|
"valid_targets_min": 4828
|
|
},
|
|
{
|
|
"epoch": 2.913095714857829,
|
|
"grad_norm": 0.4462408937770736,
|
|
"learning_rate": 1.7770226601896397e-05,
|
|
"loss": 0.1619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04294648766517639,
|
|
"step": 1820,
|
|
"valid_targets_mean": 5483.0,
|
|
"valid_targets_min": 4351
|
|
},
|
|
{
|
|
"epoch": 2.92110532639167,
|
|
"grad_norm": 0.5177703862363623,
|
|
"learning_rate": 1.7659237535665404e-05,
|
|
"loss": 0.1295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.022184578701853752,
|
|
"step": 1825,
|
|
"valid_targets_mean": 3804.2,
|
|
"valid_targets_min": 2359
|
|
},
|
|
{
|
|
"epoch": 2.929114937925511,
|
|
"grad_norm": 0.8562723825261513,
|
|
"learning_rate": 1.754832151020337e-05,
|
|
"loss": 0.2358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02520735189318657,
|
|
"step": 1830,
|
|
"valid_targets_mean": 3795.5,
|
|
"valid_targets_min": 2035
|
|
},
|
|
{
|
|
"epoch": 2.937124549459351,
|
|
"grad_norm": 0.37960535398930373,
|
|
"learning_rate": 1.74374819865158e-05,
|
|
"loss": 0.2001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0427187979221344,
|
|
"step": 1835,
|
|
"valid_targets_mean": 6354.0,
|
|
"valid_targets_min": 4890
|
|
},
|
|
{
|
|
"epoch": 2.9451341609931916,
|
|
"grad_norm": 0.3443679752976815,
|
|
"learning_rate": 1.7326722423221057e-05,
|
|
"loss": 0.1729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.034914370626211166,
|
|
"step": 1840,
|
|
"valid_targets_mean": 6546.8,
|
|
"valid_targets_min": 2609
|
|
},
|
|
{
|
|
"epoch": 2.9531437725270324,
|
|
"grad_norm": 0.4069178184371745,
|
|
"learning_rate": 1.7216046276442438e-05,
|
|
"loss": 0.1291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0259261317551136,
|
|
"step": 1845,
|
|
"valid_targets_mean": 4817.8,
|
|
"valid_targets_min": 3153
|
|
},
|
|
{
|
|
"epoch": 2.9611533840608733,
|
|
"grad_norm": 0.5167548560607208,
|
|
"learning_rate": 1.7105456999700306e-05,
|
|
"loss": 0.1061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.01622290536761284,
|
|
"step": 1850,
|
|
"valid_targets_mean": 2771.0,
|
|
"valid_targets_min": 2295
|
|
},
|
|
{
|
|
"epoch": 2.9691629955947136,
|
|
"grad_norm": 0.4832144873513158,
|
|
"learning_rate": 1.6994958043804374e-05,
|
|
"loss": 0.1002,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.020583849400281906,
|
|
"step": 1855,
|
|
"valid_targets_mean": 4317.0,
|
|
"valid_targets_min": 3829
|
|
},
|
|
{
|
|
"epoch": 2.977172607128554,
|
|
"grad_norm": 0.43402024639588116,
|
|
"learning_rate": 1.6884552856745972e-05,
|
|
"loss": 0.1746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03871969133615494,
|
|
"step": 1860,
|
|
"valid_targets_mean": 6788.8,
|
|
"valid_targets_min": 5244
|
|
},
|
|
{
|
|
"epoch": 2.985182218662395,
|
|
"grad_norm": 0.4027322414850749,
|
|
"learning_rate": 1.6774244883590503e-05,
|
|
"loss": 0.1567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04548288881778717,
|
|
"step": 1865,
|
|
"valid_targets_mean": 7649.8,
|
|
"valid_targets_min": 6890
|
|
},
|
|
{
|
|
"epoch": 2.9931918301962357,
|
|
"grad_norm": 0.33119693035714604,
|
|
"learning_rate": 1.6664037566369905e-05,
|
|
"loss": 0.1289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.030794765800237656,
|
|
"step": 1870,
|
|
"valid_targets_mean": 6744.8,
|
|
"valid_targets_min": 3459
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"grad_norm": 0.443782382817241,
|
|
"learning_rate": 1.6553934343975273e-05,
|
|
"loss": 0.1319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1093645691871643,
|
|
"step": 1875,
|
|
"valid_targets_mean": 6467.2,
|
|
"valid_targets_min": 3371
|
|
},
|
|
{
|
|
"epoch": 3.0080096115338404,
|
|
"grad_norm": 0.5837898748474536,
|
|
"learning_rate": 1.644393865204955e-05,
|
|
"loss": 0.1797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.053182609379291534,
|
|
"step": 1880,
|
|
"valid_targets_mean": 4098.5,
|
|
"valid_targets_min": 1895
|
|
},
|
|
{
|
|
"epoch": 3.016019223067681,
|
|
"grad_norm": 0.5858814090201864,
|
|
"learning_rate": 1.6334053922880304e-05,
|
|
"loss": 0.1628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03044111654162407,
|
|
"step": 1885,
|
|
"valid_targets_mean": 2511.5,
|
|
"valid_targets_min": 950
|
|
},
|
|
{
|
|
"epoch": 3.0240288346015216,
|
|
"grad_norm": 0.663960396624996,
|
|
"learning_rate": 1.622428358529265e-05,
|
|
"loss": 0.1521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.021786952391266823,
|
|
"step": 1890,
|
|
"valid_targets_mean": 2126.5,
|
|
"valid_targets_min": 1635
|
|
},
|
|
{
|
|
"epoch": 3.0320384461353624,
|
|
"grad_norm": 0.49716984115966456,
|
|
"learning_rate": 1.611463106454224e-05,
|
|
"loss": 0.1471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.019637729972600937,
|
|
"step": 1895,
|
|
"valid_targets_mean": 2022.2,
|
|
"valid_targets_min": 1407
|
|
},
|
|
{
|
|
"epoch": 3.040048057669203,
|
|
"grad_norm": 0.724883251490939,
|
|
"learning_rate": 1.6005099782208392e-05,
|
|
"loss": 0.1433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03603637218475342,
|
|
"step": 1900,
|
|
"valid_targets_mean": 3344.0,
|
|
"valid_targets_min": 2137
|
|
},
|
|
{
|
|
"epoch": 3.0480576692030437,
|
|
"grad_norm": 0.5913160521304119,
|
|
"learning_rate": 1.5895693156087317e-05,
|
|
"loss": 0.1375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.039571892470121384,
|
|
"step": 1905,
|
|
"valid_targets_mean": 3821.8,
|
|
"valid_targets_min": 1308
|
|
},
|
|
{
|
|
"epoch": 3.056067280736884,
|
|
"grad_norm": 0.5742599167700511,
|
|
"learning_rate": 1.578641460008548e-05,
|
|
"loss": 0.1241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03202173858880997,
|
|
"step": 1910,
|
|
"valid_targets_mean": 2872.0,
|
|
"valid_targets_min": 1665
|
|
},
|
|
{
|
|
"epoch": 3.064076892270725,
|
|
"grad_norm": 0.5731368764061298,
|
|
"learning_rate": 1.5677267524113054e-05,
|
|
"loss": 0.1343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04484371095895767,
|
|
"step": 1915,
|
|
"valid_targets_mean": 4220.0,
|
|
"valid_targets_min": 3191
|
|
},
|
|
{
|
|
"epoch": 3.0720865038045653,
|
|
"grad_norm": 0.5881661749302238,
|
|
"learning_rate": 1.5568255333977547e-05,
|
|
"loss": 0.1336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04331899434328079,
|
|
"step": 1920,
|
|
"valid_targets_mean": 4791.2,
|
|
"valid_targets_min": 4097
|
|
},
|
|
{
|
|
"epoch": 3.080096115338406,
|
|
"grad_norm": 0.5275584346832931,
|
|
"learning_rate": 1.5459381431277506e-05,
|
|
"loss": 0.1343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.032179027795791626,
|
|
"step": 1925,
|
|
"valid_targets_mean": 5747.0,
|
|
"valid_targets_min": 2284
|
|
},
|
|
{
|
|
"epoch": 3.0881057268722465,
|
|
"grad_norm": 0.49075183449481535,
|
|
"learning_rate": 1.5350649213296373e-05,
|
|
"loss": 0.214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.041602835059165955,
|
|
"step": 1930,
|
|
"valid_targets_mean": 3851.0,
|
|
"valid_targets_min": 2391
|
|
},
|
|
{
|
|
"epoch": 3.0961153384060873,
|
|
"grad_norm": 0.5366552004813917,
|
|
"learning_rate": 1.5242062072896483e-05,
|
|
"loss": 0.1317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03879409283399582,
|
|
"step": 1935,
|
|
"valid_targets_mean": 3773.2,
|
|
"valid_targets_min": 3192
|
|
},
|
|
{
|
|
"epoch": 3.1041249499399277,
|
|
"grad_norm": 0.5905474660713247,
|
|
"learning_rate": 1.5133623398413209e-05,
|
|
"loss": 0.1188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03518740087747574,
|
|
"step": 1940,
|
|
"valid_targets_mean": 3683.5,
|
|
"valid_targets_min": 1911
|
|
},
|
|
{
|
|
"epoch": 3.1121345614737685,
|
|
"grad_norm": 0.5273571641067659,
|
|
"learning_rate": 1.50253365735492e-05,
|
|
"loss": 0.1142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.01810375228524208,
|
|
"step": 1945,
|
|
"valid_targets_mean": 1953.5,
|
|
"valid_targets_min": 1611
|
|
},
|
|
{
|
|
"epoch": 3.120144173007609,
|
|
"grad_norm": 0.5507555457447985,
|
|
"learning_rate": 1.4917204977268833e-05,
|
|
"loss": 0.1183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03124232590198517,
|
|
"step": 1950,
|
|
"valid_targets_mean": 3450.8,
|
|
"valid_targets_min": 1888
|
|
},
|
|
{
|
|
"epoch": 3.1281537845414498,
|
|
"grad_norm": 0.46837503005960773,
|
|
"learning_rate": 1.4809231983692733e-05,
|
|
"loss": 0.1109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.031337711960077286,
|
|
"step": 1955,
|
|
"valid_targets_mean": 3919.8,
|
|
"valid_targets_min": 1893
|
|
},
|
|
{
|
|
"epoch": 3.13616339607529,
|
|
"grad_norm": 0.46646082917475723,
|
|
"learning_rate": 1.4701420961992533e-05,
|
|
"loss": 0.1139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.034783318638801575,
|
|
"step": 1960,
|
|
"valid_targets_mean": 2428.2,
|
|
"valid_targets_min": 1300
|
|
},
|
|
{
|
|
"epoch": 3.144173007609131,
|
|
"grad_norm": 0.6457109459565669,
|
|
"learning_rate": 1.459377527628571e-05,
|
|
"loss": 0.1142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.027997445315122604,
|
|
"step": 1965,
|
|
"valid_targets_mean": 3008.2,
|
|
"valid_targets_min": 1018
|
|
},
|
|
{
|
|
"epoch": 3.1521826191429714,
|
|
"grad_norm": 0.5570296313077966,
|
|
"learning_rate": 1.4486298285530634e-05,
|
|
"loss": 0.1143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02478872984647751,
|
|
"step": 1970,
|
|
"valid_targets_mean": 2879.5,
|
|
"valid_targets_min": 1639
|
|
},
|
|
{
|
|
"epoch": 3.160192230676812,
|
|
"grad_norm": 0.5664398542775647,
|
|
"learning_rate": 1.4378993343421736e-05,
|
|
"loss": 0.1202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.01747770980000496,
|
|
"step": 1975,
|
|
"valid_targets_mean": 2469.2,
|
|
"valid_targets_min": 2060
|
|
},
|
|
{
|
|
"epoch": 3.1682018422106526,
|
|
"grad_norm": 0.5049258899431966,
|
|
"learning_rate": 1.4271863798284877e-05,
|
|
"loss": 0.1098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.026987329125404358,
|
|
"step": 1980,
|
|
"valid_targets_mean": 3259.2,
|
|
"valid_targets_min": 1365
|
|
},
|
|
{
|
|
"epoch": 3.1762114537444934,
|
|
"grad_norm": 0.5855052840855303,
|
|
"learning_rate": 1.4164912992972846e-05,
|
|
"loss": 0.1106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.018977167084813118,
|
|
"step": 1985,
|
|
"valid_targets_mean": 2490.2,
|
|
"valid_targets_min": 1679
|
|
},
|
|
{
|
|
"epoch": 3.184221065278334,
|
|
"grad_norm": 0.5212733918305963,
|
|
"learning_rate": 1.4058144264761087e-05,
|
|
"loss": 0.1032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0280669666826725,
|
|
"step": 1990,
|
|
"valid_targets_mean": 4032.2,
|
|
"valid_targets_min": 1439
|
|
},
|
|
{
|
|
"epoch": 3.1922306768121747,
|
|
"grad_norm": 0.5179555941116122,
|
|
"learning_rate": 1.3951560945243517e-05,
|
|
"loss": 0.1003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.029785599559545517,
|
|
"step": 1995,
|
|
"valid_targets_mean": 3729.0,
|
|
"valid_targets_min": 2157
|
|
},
|
|
{
|
|
"epoch": 3.200240288346015,
|
|
"grad_norm": 0.7142188966569968,
|
|
"learning_rate": 1.3845166360228597e-05,
|
|
"loss": 0.1219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.032647505402565,
|
|
"step": 2000,
|
|
"valid_targets_mean": 3270.8,
|
|
"valid_targets_min": 2318
|
|
},
|
|
{
|
|
"epoch": 3.208249899879856,
|
|
"grad_norm": 0.473500509129013,
|
|
"learning_rate": 1.3738963829635559e-05,
|
|
"loss": 0.1179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.024235211312770844,
|
|
"step": 2005,
|
|
"valid_targets_mean": 3178.5,
|
|
"valid_targets_min": 824
|
|
},
|
|
{
|
|
"epoch": 3.2162595114136963,
|
|
"grad_norm": 0.5124893761833413,
|
|
"learning_rate": 1.3632956667390784e-05,
|
|
"loss": 0.1057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.019067689776420593,
|
|
"step": 2010,
|
|
"valid_targets_mean": 2994.8,
|
|
"valid_targets_min": 1210
|
|
},
|
|
{
|
|
"epoch": 3.224269122947537,
|
|
"grad_norm": 0.4264538029399881,
|
|
"learning_rate": 1.3527148181324425e-05,
|
|
"loss": 0.1064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.030452841892838478,
|
|
"step": 2015,
|
|
"valid_targets_mean": 4503.5,
|
|
"valid_targets_min": 1548
|
|
},
|
|
{
|
|
"epoch": 3.2322787344813775,
|
|
"grad_norm": 0.5558415042499253,
|
|
"learning_rate": 1.3421541673067168e-05,
|
|
"loss": 0.0949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.023069536313414574,
|
|
"step": 2020,
|
|
"valid_targets_mean": 2806.2,
|
|
"valid_targets_min": 1188
|
|
},
|
|
{
|
|
"epoch": 3.2402883460152183,
|
|
"grad_norm": 0.5561491988932697,
|
|
"learning_rate": 1.3316140437947207e-05,
|
|
"loss": 0.1003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.027036240324378014,
|
|
"step": 2025,
|
|
"valid_targets_mean": 4892.8,
|
|
"valid_targets_min": 2500
|
|
},
|
|
{
|
|
"epoch": 3.2482979575490587,
|
|
"grad_norm": 0.5577852695508807,
|
|
"learning_rate": 1.321094776488745e-05,
|
|
"loss": 0.0951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.043761737644672394,
|
|
"step": 2030,
|
|
"valid_targets_mean": 6856.2,
|
|
"valid_targets_min": 2058
|
|
},
|
|
{
|
|
"epoch": 3.2563075690828995,
|
|
"grad_norm": 0.32575721610423974,
|
|
"learning_rate": 1.3105966936302856e-05,
|
|
"loss": 0.0727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.01677459478378296,
|
|
"step": 2035,
|
|
"valid_targets_mean": 5445.0,
|
|
"valid_targets_min": 2569
|
|
},
|
|
{
|
|
"epoch": 3.26431718061674,
|
|
"grad_norm": 0.357841148903157,
|
|
"learning_rate": 1.3001201227998023e-05,
|
|
"loss": 0.0679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02109489217400551,
|
|
"step": 2040,
|
|
"valid_targets_mean": 6826.8,
|
|
"valid_targets_min": 4640
|
|
},
|
|
{
|
|
"epoch": 3.2723267921505808,
|
|
"grad_norm": 0.35245516721729675,
|
|
"learning_rate": 1.2896653909064964e-05,
|
|
"loss": 0.0627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.015493746846914291,
|
|
"step": 2045,
|
|
"valid_targets_mean": 7376.2,
|
|
"valid_targets_min": 6135
|
|
},
|
|
{
|
|
"epoch": 3.280336403684421,
|
|
"grad_norm": 0.2685783704833888,
|
|
"learning_rate": 1.2792328241781124e-05,
|
|
"loss": 0.0584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.015125373378396034,
|
|
"step": 2050,
|
|
"valid_targets_mean": 6002.2,
|
|
"valid_targets_min": 3847
|
|
},
|
|
{
|
|
"epoch": 3.288346015218262,
|
|
"grad_norm": 0.3100287754653156,
|
|
"learning_rate": 1.2688227481507546e-05,
|
|
"loss": 0.0595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.014190098270773888,
|
|
"step": 2055,
|
|
"valid_targets_mean": 7146.2,
|
|
"valid_targets_min": 5036
|
|
},
|
|
{
|
|
"epoch": 3.2963556267521024,
|
|
"grad_norm": 0.3428112825972525,
|
|
"learning_rate": 1.258435487658733e-05,
|
|
"loss": 0.0607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.022796286270022392,
|
|
"step": 2060,
|
|
"valid_targets_mean": 8201.5,
|
|
"valid_targets_min": 6022
|
|
},
|
|
{
|
|
"epoch": 3.304365238285943,
|
|
"grad_norm": 0.3181157869046504,
|
|
"learning_rate": 1.2480713668244243e-05,
|
|
"loss": 0.0618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.01672467216849327,
|
|
"step": 2065,
|
|
"valid_targets_mean": 4982.8,
|
|
"valid_targets_min": 1508
|
|
},
|
|
{
|
|
"epoch": 3.3123748498197836,
|
|
"grad_norm": 0.3116152430617948,
|
|
"learning_rate": 1.2377307090481586e-05,
|
|
"loss": 0.0527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.011895786970853806,
|
|
"step": 2070,
|
|
"valid_targets_mean": 6249.8,
|
|
"valid_targets_min": 3601
|
|
},
|
|
{
|
|
"epoch": 3.3203844613536244,
|
|
"grad_norm": 0.2915293436180664,
|
|
"learning_rate": 1.2274138369981298e-05,
|
|
"loss": 0.0516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.01540968008339405,
|
|
"step": 2075,
|
|
"valid_targets_mean": 7850.5,
|
|
"valid_targets_min": 3289
|
|
},
|
|
{
|
|
"epoch": 3.328394072887465,
|
|
"grad_norm": 0.36226301002681105,
|
|
"learning_rate": 1.2171210726003256e-05,
|
|
"loss": 0.0598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.01674043759703636,
|
|
"step": 2080,
|
|
"valid_targets_mean": 6368.5,
|
|
"valid_targets_min": 4071
|
|
},
|
|
{
|
|
"epoch": 3.3364036844213056,
|
|
"grad_norm": 0.30870131672030404,
|
|
"learning_rate": 1.2068527370284815e-05,
|
|
"loss": 0.0544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.012327834032475948,
|
|
"step": 2085,
|
|
"valid_targets_mean": 7169.2,
|
|
"valid_targets_min": 5138
|
|
},
|
|
{
|
|
"epoch": 3.344413295955146,
|
|
"grad_norm": 0.3157850750351719,
|
|
"learning_rate": 1.1966091506940616e-05,
|
|
"loss": 0.0499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.01468539796769619,
|
|
"step": 2090,
|
|
"valid_targets_mean": 6420.8,
|
|
"valid_targets_min": 2320
|
|
},
|
|
{
|
|
"epoch": 3.352422907488987,
|
|
"grad_norm": 0.35979195178467144,
|
|
"learning_rate": 1.1863906332362569e-05,
|
|
"loss": 0.0538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.013666454702615738,
|
|
"step": 2095,
|
|
"valid_targets_mean": 6447.0,
|
|
"valid_targets_min": 5443
|
|
},
|
|
{
|
|
"epoch": 3.3604325190228272,
|
|
"grad_norm": 0.3819631856124961,
|
|
"learning_rate": 1.176197503512015e-05,
|
|
"loss": 0.0496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.011048472486436367,
|
|
"step": 2100,
|
|
"valid_targets_mean": 4346.2,
|
|
"valid_targets_min": 2941
|
|
},
|
|
{
|
|
"epoch": 3.368442130556668,
|
|
"grad_norm": 0.3034965961960689,
|
|
"learning_rate": 1.1660300795860877e-05,
|
|
"loss": 0.0537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.019051872193813324,
|
|
"step": 2105,
|
|
"valid_targets_mean": 6363.5,
|
|
"valid_targets_min": 4955
|
|
},
|
|
{
|
|
"epoch": 3.3764517420905085,
|
|
"grad_norm": 0.28793547356161675,
|
|
"learning_rate": 1.1558886787211071e-05,
|
|
"loss": 0.0572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.009753275662660599,
|
|
"step": 2110,
|
|
"valid_targets_mean": 3673.2,
|
|
"valid_targets_min": 2303
|
|
},
|
|
{
|
|
"epoch": 3.3844613536243493,
|
|
"grad_norm": 0.30220814334562185,
|
|
"learning_rate": 1.1457736173676883e-05,
|
|
"loss": 0.0459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.008747074753046036,
|
|
"step": 2115,
|
|
"valid_targets_mean": 4228.0,
|
|
"valid_targets_min": 2085
|
|
},
|
|
{
|
|
"epoch": 3.3924709651581897,
|
|
"grad_norm": 0.32440442896220606,
|
|
"learning_rate": 1.1356852111545493e-05,
|
|
"loss": 0.0442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.011686144396662712,
|
|
"step": 2120,
|
|
"valid_targets_mean": 5465.5,
|
|
"valid_targets_min": 3720
|
|
},
|
|
{
|
|
"epoch": 3.4004805766920305,
|
|
"grad_norm": 0.311713316531329,
|
|
"learning_rate": 1.1256237748786675e-05,
|
|
"loss": 0.046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.010932056233286858,
|
|
"step": 2125,
|
|
"valid_targets_mean": 4442.2,
|
|
"valid_targets_min": 3149
|
|
},
|
|
{
|
|
"epoch": 3.408490188225871,
|
|
"grad_norm": 0.25654930080106925,
|
|
"learning_rate": 1.1155896224954543e-05,
|
|
"loss": 0.0421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0121063943952322,
|
|
"step": 2130,
|
|
"valid_targets_mean": 6434.2,
|
|
"valid_targets_min": 4692
|
|
},
|
|
{
|
|
"epoch": 3.4164997997597117,
|
|
"grad_norm": 0.36816717432440704,
|
|
"learning_rate": 1.1055830671089578e-05,
|
|
"loss": 0.051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.014987494796514511,
|
|
"step": 2135,
|
|
"valid_targets_mean": 6182.2,
|
|
"valid_targets_min": 4171
|
|
},
|
|
{
|
|
"epoch": 3.424509411293552,
|
|
"grad_norm": 0.47423940286584954,
|
|
"learning_rate": 1.0956044209620966e-05,
|
|
"loss": 0.0509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0060920557007193565,
|
|
"step": 2140,
|
|
"valid_targets_mean": 4245.0,
|
|
"valid_targets_min": 3545
|
|
},
|
|
{
|
|
"epoch": 3.432519022827393,
|
|
"grad_norm": 0.3169333158529673,
|
|
"learning_rate": 1.0856539954269121e-05,
|
|
"loss": 0.0421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.01345783844590187,
|
|
"step": 2145,
|
|
"valid_targets_mean": 6278.5,
|
|
"valid_targets_min": 3918
|
|
},
|
|
{
|
|
"epoch": 3.4405286343612334,
|
|
"grad_norm": 0.2957668407926322,
|
|
"learning_rate": 1.0757321009948543e-05,
|
|
"loss": 0.0417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.013319984078407288,
|
|
"step": 2150,
|
|
"valid_targets_mean": 6698.0,
|
|
"valid_targets_min": 5070
|
|
},
|
|
{
|
|
"epoch": 3.448538245895074,
|
|
"grad_norm": 0.3425640868573807,
|
|
"learning_rate": 1.0658390472670938e-05,
|
|
"loss": 0.0455,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.010332060977816582,
|
|
"step": 2155,
|
|
"valid_targets_mean": 5043.5,
|
|
"valid_targets_min": 2590
|
|
},
|
|
{
|
|
"epoch": 3.4565478574289146,
|
|
"grad_norm": 0.2718524560617526,
|
|
"learning_rate": 1.0559751429448597e-05,
|
|
"loss": 0.038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.007161797024309635,
|
|
"step": 2160,
|
|
"valid_targets_mean": 4594.5,
|
|
"valid_targets_min": 1557
|
|
},
|
|
{
|
|
"epoch": 3.4645574689627554,
|
|
"grad_norm": 0.3068164866130068,
|
|
"learning_rate": 1.0461406958198101e-05,
|
|
"loss": 0.0391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.011574627831578255,
|
|
"step": 2165,
|
|
"valid_targets_mean": 7859.2,
|
|
"valid_targets_min": 4849
|
|
},
|
|
{
|
|
"epoch": 3.472567080496596,
|
|
"grad_norm": 0.3213368587911677,
|
|
"learning_rate": 1.0363360127644235e-05,
|
|
"loss": 0.0387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.011268961243331432,
|
|
"step": 2170,
|
|
"valid_targets_mean": 7530.8,
|
|
"valid_targets_min": 3861
|
|
},
|
|
{
|
|
"epoch": 3.4805766920304366,
|
|
"grad_norm": 0.3279231247280875,
|
|
"learning_rate": 1.0265613997224255e-05,
|
|
"loss": 0.043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.00789633858948946,
|
|
"step": 2175,
|
|
"valid_targets_mean": 4505.0,
|
|
"valid_targets_min": 2278
|
|
},
|
|
{
|
|
"epoch": 3.488586303564277,
|
|
"grad_norm": 0.30561045076813315,
|
|
"learning_rate": 1.0168171616992422e-05,
|
|
"loss": 0.035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0071163480170071125,
|
|
"step": 2180,
|
|
"valid_targets_mean": 4934.5,
|
|
"valid_targets_min": 2017
|
|
},
|
|
{
|
|
"epoch": 3.496595915098118,
|
|
"grad_norm": 0.3040188069233306,
|
|
"learning_rate": 1.007103602752483e-05,
|
|
"loss": 0.0377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.011935720220208168,
|
|
"step": 2185,
|
|
"valid_targets_mean": 6368.8,
|
|
"valid_targets_min": 2515
|
|
},
|
|
{
|
|
"epoch": 3.5046055266319582,
|
|
"grad_norm": 0.7580800987967724,
|
|
"learning_rate": 9.974210259824505e-06,
|
|
"loss": 0.0625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.015211760997772217,
|
|
"step": 2190,
|
|
"valid_targets_mean": 1458.8,
|
|
"valid_targets_min": 751
|
|
},
|
|
{
|
|
"epoch": 3.512615138165799,
|
|
"grad_norm": 0.6938484769285029,
|
|
"learning_rate": 9.877697335226872e-06,
|
|
"loss": 0.0655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.014714337885379791,
|
|
"step": 2195,
|
|
"valid_targets_mean": 1471.5,
|
|
"valid_targets_min": 769
|
|
},
|
|
{
|
|
"epoch": 3.5206247496996395,
|
|
"grad_norm": 0.6029863231629417,
|
|
"learning_rate": 9.781500265305448e-06,
|
|
"loss": 0.0706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.01874360628426075,
|
|
"step": 2200,
|
|
"valid_targets_mean": 2936.8,
|
|
"valid_targets_min": 1206
|
|
},
|
|
{
|
|
"epoch": 3.5286343612334803,
|
|
"grad_norm": 0.6471044882374141,
|
|
"learning_rate": 9.685622051777856e-06,
|
|
"loss": 0.0654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.014414064586162567,
|
|
"step": 2205,
|
|
"valid_targets_mean": 1157.2,
|
|
"valid_targets_min": 988
|
|
},
|
|
{
|
|
"epoch": 3.5366439727673207,
|
|
"grad_norm": 0.5864138107259204,
|
|
"learning_rate": 9.590065686412182e-06,
|
|
"loss": 0.0764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.020656820386648178,
|
|
"step": 2210,
|
|
"valid_targets_mean": 2150.8,
|
|
"valid_targets_min": 820
|
|
},
|
|
{
|
|
"epoch": 3.5446535843011615,
|
|
"grad_norm": 0.5707696581953461,
|
|
"learning_rate": 9.494834150933616e-06,
|
|
"loss": 0.0636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.018849622458219528,
|
|
"step": 2215,
|
|
"valid_targets_mean": 2603.5,
|
|
"valid_targets_min": 751
|
|
},
|
|
{
|
|
"epoch": 3.552663195835002,
|
|
"grad_norm": 0.5237234401933294,
|
|
"learning_rate": 9.399930416931404e-06,
|
|
"loss": 0.0607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.01728007197380066,
|
|
"step": 2220,
|
|
"valid_targets_mean": 2382.8,
|
|
"valid_targets_min": 614
|
|
},
|
|
{
|
|
"epoch": 3.5606728073688427,
|
|
"grad_norm": 0.8275854633090242,
|
|
"learning_rate": 9.30535744576615e-06,
|
|
"loss": 0.0728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.01150110550224781,
|
|
"step": 2225,
|
|
"valid_targets_mean": 1733.0,
|
|
"valid_targets_min": 768
|
|
},
|
|
{
|
|
"epoch": 3.568682418902683,
|
|
"grad_norm": 0.6996173366002233,
|
|
"learning_rate": 9.211118188477362e-06,
|
|
"loss": 0.091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.012131967581808567,
|
|
"step": 2230,
|
|
"valid_targets_mean": 1715.8,
|
|
"valid_targets_min": 704
|
|
},
|
|
{
|
|
"epoch": 3.576692030436524,
|
|
"grad_norm": 0.5746076276243122,
|
|
"learning_rate": 9.117215585691408e-06,
|
|
"loss": 0.0572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.01349081564694643,
|
|
"step": 2235,
|
|
"valid_targets_mean": 2182.5,
|
|
"valid_targets_min": 1218
|
|
},
|
|
{
|
|
"epoch": 3.5847016419703643,
|
|
"grad_norm": 0.48293795600221306,
|
|
"learning_rate": 9.023652567529744e-06,
|
|
"loss": 0.0551,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.019143477082252502,
|
|
"step": 2240,
|
|
"valid_targets_mean": 2525.8,
|
|
"valid_targets_min": 1524
|
|
},
|
|
{
|
|
"epoch": 3.592711253504205,
|
|
"grad_norm": 0.5115406803849402,
|
|
"learning_rate": 8.930432053517465e-06,
|
|
"loss": 0.0651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.010252771899104118,
|
|
"step": 2245,
|
|
"valid_targets_mean": 1936.8,
|
|
"valid_targets_min": 987
|
|
},
|
|
{
|
|
"epoch": 3.6007208650380456,
|
|
"grad_norm": 0.6132007385016043,
|
|
"learning_rate": 8.837556952492264e-06,
|
|
"loss": 0.0565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.00839340128004551,
|
|
"step": 2250,
|
|
"valid_targets_mean": 1548.0,
|
|
"valid_targets_min": 1169
|
|
},
|
|
{
|
|
"epoch": 3.6087304765718864,
|
|
"grad_norm": 0.6725235542678418,
|
|
"learning_rate": 8.745030162513582e-06,
|
|
"loss": 0.0568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.01267075166106224,
|
|
"step": 2255,
|
|
"valid_targets_mean": 1888.8,
|
|
"valid_targets_min": 471
|
|
},
|
|
{
|
|
"epoch": 3.616740088105727,
|
|
"grad_norm": 0.6389222930519749,
|
|
"learning_rate": 8.652854570772236e-06,
|
|
"loss": 0.062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.013893316499888897,
|
|
"step": 2260,
|
|
"valid_targets_mean": 1614.0,
|
|
"valid_targets_min": 1137
|
|
},
|
|
{
|
|
"epoch": 3.6247496996395676,
|
|
"grad_norm": 0.5818135166959325,
|
|
"learning_rate": 8.561033053500312e-06,
|
|
"loss": 0.0532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.012762725353240967,
|
|
"step": 2265,
|
|
"valid_targets_mean": 1651.2,
|
|
"valid_targets_min": 1298
|
|
},
|
|
{
|
|
"epoch": 3.632759311173408,
|
|
"grad_norm": 0.6745064174105169,
|
|
"learning_rate": 8.46956847588141e-06,
|
|
"loss": 0.0653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.010742013342678547,
|
|
"step": 2270,
|
|
"valid_targets_mean": 1275.0,
|
|
"valid_targets_min": 1061
|
|
},
|
|
{
|
|
"epoch": 3.640768922707249,
|
|
"grad_norm": 0.5921901784136538,
|
|
"learning_rate": 8.378463691961237e-06,
|
|
"loss": 0.0457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.016796845942735672,
|
|
"step": 2275,
|
|
"valid_targets_mean": 2261.0,
|
|
"valid_targets_min": 1181
|
|
},
|
|
{
|
|
"epoch": 3.648778534241089,
|
|
"grad_norm": 0.6328897788733934,
|
|
"learning_rate": 8.287721544558574e-06,
|
|
"loss": 0.0484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.015148041769862175,
|
|
"step": 2280,
|
|
"valid_targets_mean": 1702.5,
|
|
"valid_targets_min": 1219
|
|
},
|
|
{
|
|
"epoch": 3.65678814577493,
|
|
"grad_norm": 0.6359586863086371,
|
|
"learning_rate": 8.197344865176548e-06,
|
|
"loss": 0.052,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.008199378848075867,
|
|
"step": 2285,
|
|
"valid_targets_mean": 2216.8,
|
|
"valid_targets_min": 943
|
|
},
|
|
{
|
|
"epoch": 3.6647977573087704,
|
|
"grad_norm": 0.6306613482978004,
|
|
"learning_rate": 8.10733647391427e-06,
|
|
"loss": 0.0511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.007926307618618011,
|
|
"step": 2290,
|
|
"valid_targets_mean": 1605.5,
|
|
"valid_targets_min": 1114
|
|
},
|
|
{
|
|
"epoch": 3.6728073688426113,
|
|
"grad_norm": 0.5581848003618292,
|
|
"learning_rate": 8.017699179378849e-06,
|
|
"loss": 0.045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.015513489954173565,
|
|
"step": 2295,
|
|
"valid_targets_mean": 2072.8,
|
|
"valid_targets_min": 838
|
|
},
|
|
{
|
|
"epoch": 3.6808169803764517,
|
|
"grad_norm": 0.5587854919620766,
|
|
"learning_rate": 7.928435778597763e-06,
|
|
"loss": 0.0472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.009433910250663757,
|
|
"step": 2300,
|
|
"valid_targets_mean": 1854.5,
|
|
"valid_targets_min": 1194
|
|
},
|
|
{
|
|
"epoch": 3.6888265919102925,
|
|
"grad_norm": 0.5400227459192101,
|
|
"learning_rate": 7.839549056931557e-06,
|
|
"loss": 0.0419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.010621829889714718,
|
|
"step": 2305,
|
|
"valid_targets_mean": 1483.2,
|
|
"valid_targets_min": 1123
|
|
},
|
|
{
|
|
"epoch": 3.696836203444133,
|
|
"grad_norm": 0.5814636798704783,
|
|
"learning_rate": 7.751041787986965e-06,
|
|
"loss": 0.0453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.014290489256381989,
|
|
"step": 2310,
|
|
"valid_targets_mean": 2120.8,
|
|
"valid_targets_min": 1292
|
|
},
|
|
{
|
|
"epoch": 3.7048458149779737,
|
|
"grad_norm": 0.55109226449842,
|
|
"learning_rate": 7.662916733530317e-06,
|
|
"loss": 0.0463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.021787574514746666,
|
|
"step": 2315,
|
|
"valid_targets_mean": 2185.5,
|
|
"valid_targets_min": 985
|
|
},
|
|
{
|
|
"epoch": 3.712855426511814,
|
|
"grad_norm": 0.6298710134391526,
|
|
"learning_rate": 7.575176643401394e-06,
|
|
"loss": 0.0466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0066481707617640495,
|
|
"step": 2320,
|
|
"valid_targets_mean": 1273.0,
|
|
"valid_targets_min": 854
|
|
},
|
|
{
|
|
"epoch": 3.720865038045655,
|
|
"grad_norm": 0.6160900765954831,
|
|
"learning_rate": 7.487824255427616e-06,
|
|
"loss": 0.0473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.009602631442248821,
|
|
"step": 2325,
|
|
"valid_targets_mean": 1165.2,
|
|
"valid_targets_min": 1051
|
|
},
|
|
{
|
|
"epoch": 3.7288746495794953,
|
|
"grad_norm": 0.5550215027363392,
|
|
"learning_rate": 7.400862295338595e-06,
|
|
"loss": 0.0492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.008349218405783176,
|
|
"step": 2330,
|
|
"valid_targets_mean": 1887.8,
|
|
"valid_targets_min": 1076
|
|
},
|
|
{
|
|
"epoch": 3.736884261113336,
|
|
"grad_norm": 0.5608075519999484,
|
|
"learning_rate": 7.314293476681122e-06,
|
|
"loss": 0.0479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.012244230136275291,
|
|
"step": 2335,
|
|
"valid_targets_mean": 1764.0,
|
|
"valid_targets_min": 1232
|
|
},
|
|
{
|
|
"epoch": 3.7448938726471765,
|
|
"grad_norm": 0.5342509238709633,
|
|
"learning_rate": 7.228120500734443e-06,
|
|
"loss": 0.042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.006746052298694849,
|
|
"step": 2340,
|
|
"valid_targets_mean": 1403.2,
|
|
"valid_targets_min": 745
|
|
},
|
|
{
|
|
"epoch": 3.7529034841810174,
|
|
"grad_norm": 0.5736934312451866,
|
|
"learning_rate": 7.1423460564259995e-06,
|
|
"loss": 0.0601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.024807695299386978,
|
|
"step": 2345,
|
|
"valid_targets_mean": 6643.5,
|
|
"valid_targets_min": 5946
|
|
},
|
|
{
|
|
"epoch": 3.7609130957148578,
|
|
"grad_norm": 0.3976100741560625,
|
|
"learning_rate": 7.056972820247516e-06,
|
|
"loss": 0.0784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.021875973790884018,
|
|
"step": 2350,
|
|
"valid_targets_mean": 5136.0,
|
|
"valid_targets_min": 3229
|
|
},
|
|
{
|
|
"epoch": 3.7689227072486986,
|
|
"grad_norm": 0.5751360279803786,
|
|
"learning_rate": 6.97200345617149e-06,
|
|
"loss": 0.1051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.028168369084596634,
|
|
"step": 2355,
|
|
"valid_targets_mean": 5650.8,
|
|
"valid_targets_min": 3530
|
|
},
|
|
{
|
|
"epoch": 3.776932318782539,
|
|
"grad_norm": 0.5421550950600535,
|
|
"learning_rate": 6.887440615568044e-06,
|
|
"loss": 0.1056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.023393627256155014,
|
|
"step": 2360,
|
|
"valid_targets_mean": 4949.2,
|
|
"valid_targets_min": 4191
|
|
},
|
|
{
|
|
"epoch": 3.78494193031638,
|
|
"grad_norm": 0.4491658253333411,
|
|
"learning_rate": 6.803286937122233e-06,
|
|
"loss": 0.1105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.018921280279755592,
|
|
"step": 2365,
|
|
"valid_targets_mean": 3846.0,
|
|
"valid_targets_min": 2321
|
|
},
|
|
{
|
|
"epoch": 3.79295154185022,
|
|
"grad_norm": 0.35429092000874757,
|
|
"learning_rate": 6.719545046751674e-06,
|
|
"loss": 0.0831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.015906432643532753,
|
|
"step": 2370,
|
|
"valid_targets_mean": 4102.8,
|
|
"valid_targets_min": 2515
|
|
},
|
|
{
|
|
"epoch": 3.800961153384061,
|
|
"grad_norm": 0.4020543848404522,
|
|
"learning_rate": 6.636217557524605e-06,
|
|
"loss": 0.1128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03531377762556076,
|
|
"step": 2375,
|
|
"valid_targets_mean": 7856.2,
|
|
"valid_targets_min": 3551
|
|
},
|
|
{
|
|
"epoch": 3.8089707649179014,
|
|
"grad_norm": 0.34484935457476934,
|
|
"learning_rate": 6.55330706957837e-06,
|
|
"loss": 0.0812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.022231310606002808,
|
|
"step": 2380,
|
|
"valid_targets_mean": 5124.0,
|
|
"valid_targets_min": 2097
|
|
},
|
|
{
|
|
"epoch": 3.816980376451742,
|
|
"grad_norm": 0.5129021378997595,
|
|
"learning_rate": 6.4708161700382655e-06,
|
|
"loss": 0.1338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.025359731167554855,
|
|
"step": 2385,
|
|
"valid_targets_mean": 6862.0,
|
|
"valid_targets_min": 3586
|
|
},
|
|
{
|
|
"epoch": 3.8249899879855827,
|
|
"grad_norm": 0.41837884601224123,
|
|
"learning_rate": 6.388747432936819e-06,
|
|
"loss": 0.1047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03131610527634621,
|
|
"step": 2390,
|
|
"valid_targets_mean": 7773.0,
|
|
"valid_targets_min": 5273
|
|
},
|
|
{
|
|
"epoch": 3.8329995995194235,
|
|
"grad_norm": 0.44064567620787626,
|
|
"learning_rate": 6.3071034191334915e-06,
|
|
"loss": 0.1051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02980436012148857,
|
|
"step": 2395,
|
|
"valid_targets_mean": 7539.5,
|
|
"valid_targets_min": 4721
|
|
},
|
|
{
|
|
"epoch": 3.841009211053264,
|
|
"grad_norm": 0.4186964990158793,
|
|
"learning_rate": 6.22588667623472e-06,
|
|
"loss": 0.0906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.019276263192296028,
|
|
"step": 2400,
|
|
"valid_targets_mean": 5395.0,
|
|
"valid_targets_min": 4460
|
|
},
|
|
{
|
|
"epoch": 3.8490188225871043,
|
|
"grad_norm": 0.3806461440054923,
|
|
"learning_rate": 6.145099738514466e-06,
|
|
"loss": 0.1146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.024702083319425583,
|
|
"step": 2405,
|
|
"valid_targets_mean": 5397.8,
|
|
"valid_targets_min": 4366
|
|
},
|
|
{
|
|
"epoch": 3.857028434120945,
|
|
"grad_norm": 0.3925708154940076,
|
|
"learning_rate": 6.064745126835112e-06,
|
|
"loss": 0.0875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.023383330553770065,
|
|
"step": 2410,
|
|
"valid_targets_mean": 6880.8,
|
|
"valid_targets_min": 3749
|
|
},
|
|
{
|
|
"epoch": 3.865038045654786,
|
|
"grad_norm": 0.3409224568955925,
|
|
"learning_rate": 5.984825348568812e-06,
|
|
"loss": 0.089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.01937369629740715,
|
|
"step": 2415,
|
|
"valid_targets_mean": 4727.8,
|
|
"valid_targets_min": 2789
|
|
},
|
|
{
|
|
"epoch": 3.8730476571886263,
|
|
"grad_norm": 0.392337083778957,
|
|
"learning_rate": 5.905342897519262e-06,
|
|
"loss": 0.0692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.011381832882761955,
|
|
"step": 2420,
|
|
"valid_targets_mean": 4575.0,
|
|
"valid_targets_min": 2232
|
|
},
|
|
{
|
|
"epoch": 3.8810572687224667,
|
|
"grad_norm": 0.4133167575935718,
|
|
"learning_rate": 5.826300253843851e-06,
|
|
"loss": 0.0818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0190584659576416,
|
|
"step": 2425,
|
|
"valid_targets_mean": 6189.2,
|
|
"valid_targets_min": 4692
|
|
},
|
|
{
|
|
"epoch": 3.8890668802563075,
|
|
"grad_norm": 0.517004742802757,
|
|
"learning_rate": 5.7476998839763035e-06,
|
|
"loss": 0.111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.011651026085019112,
|
|
"step": 2430,
|
|
"valid_targets_mean": 4232.2,
|
|
"valid_targets_min": 2376
|
|
},
|
|
{
|
|
"epoch": 3.8970764917901484,
|
|
"grad_norm": 1.8291805373918621,
|
|
"learning_rate": 5.669544240549698e-06,
|
|
"loss": 0.1484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.016657385975122452,
|
|
"step": 2435,
|
|
"valid_targets_mean": 4608.8,
|
|
"valid_targets_min": 3264
|
|
},
|
|
{
|
|
"epoch": 3.9050861033239888,
|
|
"grad_norm": 0.5989095231610289,
|
|
"learning_rate": 5.591835762319946e-06,
|
|
"loss": 0.1001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.022755850106477737,
|
|
"step": 2440,
|
|
"valid_targets_mean": 5691.5,
|
|
"valid_targets_min": 4828
|
|
},
|
|
{
|
|
"epoch": 3.913095714857829,
|
|
"grad_norm": 0.3626396873413653,
|
|
"learning_rate": 5.514576874089683e-06,
|
|
"loss": 0.0929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.021042577922344208,
|
|
"step": 2445,
|
|
"valid_targets_mean": 5483.0,
|
|
"valid_targets_min": 4351
|
|
},
|
|
{
|
|
"epoch": 3.92110532639167,
|
|
"grad_norm": 0.4013687468745221,
|
|
"learning_rate": 5.437769986632622e-06,
|
|
"loss": 0.0703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.010984890162944794,
|
|
"step": 2450,
|
|
"valid_targets_mean": 3804.2,
|
|
"valid_targets_min": 2359
|
|
},
|
|
{
|
|
"epoch": 3.929114937925511,
|
|
"grad_norm": 0.670786940477875,
|
|
"learning_rate": 5.361417496618315e-06,
|
|
"loss": 0.1554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.013671481981873512,
|
|
"step": 2455,
|
|
"valid_targets_mean": 3795.5,
|
|
"valid_targets_min": 2035
|
|
},
|
|
{
|
|
"epoch": 3.937124549459351,
|
|
"grad_norm": 0.42731937905140654,
|
|
"learning_rate": 5.285521786537368e-06,
|
|
"loss": 0.1247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.024802636355161667,
|
|
"step": 2460,
|
|
"valid_targets_mean": 6354.0,
|
|
"valid_targets_min": 4890
|
|
},
|
|
{
|
|
"epoch": 3.9451341609931916,
|
|
"grad_norm": 0.34015329343285766,
|
|
"learning_rate": 5.2100852246270975e-06,
|
|
"loss": 0.1054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0189439095556736,
|
|
"step": 2465,
|
|
"valid_targets_mean": 6546.8,
|
|
"valid_targets_min": 2609
|
|
},
|
|
{
|
|
"epoch": 3.9531437725270324,
|
|
"grad_norm": 0.380189399202825,
|
|
"learning_rate": 5.135110164797637e-06,
|
|
"loss": 0.0711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.015444258227944374,
|
|
"step": 2470,
|
|
"valid_targets_mean": 4817.8,
|
|
"valid_targets_min": 3153
|
|
},
|
|
{
|
|
"epoch": 3.9611533840608733,
|
|
"grad_norm": 0.3736782039075832,
|
|
"learning_rate": 5.060598946558484e-06,
|
|
"loss": 0.0578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.009200219064950943,
|
|
"step": 2475,
|
|
"valid_targets_mean": 2771.0,
|
|
"valid_targets_min": 2295
|
|
},
|
|
{
|
|
"epoch": 3.9691629955947136,
|
|
"grad_norm": 0.39167385396466126,
|
|
"learning_rate": 4.986553894945512e-06,
|
|
"loss": 0.0501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.009968343190848827,
|
|
"step": 2480,
|
|
"valid_targets_mean": 4317.0,
|
|
"valid_targets_min": 3829
|
|
},
|
|
{
|
|
"epoch": 3.977172607128554,
|
|
"grad_norm": 0.47382543877041583,
|
|
"learning_rate": 4.912977320448391e-06,
|
|
"loss": 0.102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02160932868719101,
|
|
"step": 2485,
|
|
"valid_targets_mean": 6788.8,
|
|
"valid_targets_min": 5244
|
|
},
|
|
{
|
|
"epoch": 3.985182218662395,
|
|
"grad_norm": 0.5254691100699673,
|
|
"learning_rate": 4.839871518938513e-06,
|
|
"loss": 0.0914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02825193665921688,
|
|
"step": 2490,
|
|
"valid_targets_mean": 7649.8,
|
|
"valid_targets_min": 6890
|
|
},
|
|
{
|
|
"epoch": 3.9931918301962357,
|
|
"grad_norm": 0.37466517639487923,
|
|
"learning_rate": 4.767238771597347e-06,
|
|
"loss": 0.07,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.016600189730525017,
|
|
"step": 2495,
|
|
"valid_targets_mean": 6744.8,
|
|
"valid_targets_min": 3459
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"grad_norm": 0.6596351782108862,
|
|
"learning_rate": 4.695081344845254e-06,
|
|
"loss": 0.0705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04466036334633827,
|
|
"step": 2500,
|
|
"valid_targets_mean": 6467.2,
|
|
"valid_targets_min": 3371
|
|
},
|
|
{
|
|
"epoch": 4.008009611533841,
|
|
"grad_norm": 0.5261575442420726,
|
|
"learning_rate": 4.623401490270778e-06,
|
|
"loss": 0.088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02538900636136532,
|
|
"step": 2505,
|
|
"valid_targets_mean": 4098.5,
|
|
"valid_targets_min": 1895
|
|
},
|
|
{
|
|
"epoch": 4.016019223067681,
|
|
"grad_norm": 0.5273401951666762,
|
|
"learning_rate": 4.552201444560373e-06,
|
|
"loss": 0.0807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.016584068536758423,
|
|
"step": 2510,
|
|
"valid_targets_mean": 2511.5,
|
|
"valid_targets_min": 950
|
|
},
|
|
{
|
|
"epoch": 4.024028834601522,
|
|
"grad_norm": 0.500898352369778,
|
|
"learning_rate": 4.481483429428615e-06,
|
|
"loss": 0.073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.00987254735082388,
|
|
"step": 2515,
|
|
"valid_targets_mean": 2126.5,
|
|
"valid_targets_min": 1635
|
|
},
|
|
{
|
|
"epoch": 4.032038446135362,
|
|
"grad_norm": 0.4129807119089104,
|
|
"learning_rate": 4.4112496515488765e-06,
|
|
"loss": 0.0695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.00929091963917017,
|
|
"step": 2520,
|
|
"valid_targets_mean": 2022.2,
|
|
"valid_targets_min": 1407
|
|
},
|
|
{
|
|
"epoch": 4.040048057669203,
|
|
"grad_norm": 0.530051225069077,
|
|
"learning_rate": 4.341502302484472e-06,
|
|
"loss": 0.0652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.015852056443691254,
|
|
"step": 2525,
|
|
"valid_targets_mean": 3344.0,
|
|
"valid_targets_min": 2137
|
|
},
|
|
{
|
|
"epoch": 4.048057669203043,
|
|
"grad_norm": 0.4731689741771552,
|
|
"learning_rate": 4.272243558620264e-06,
|
|
"loss": 0.0606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.019559800624847412,
|
|
"step": 2530,
|
|
"valid_targets_mean": 3821.8,
|
|
"valid_targets_min": 1308
|
|
},
|
|
{
|
|
"epoch": 4.056067280736884,
|
|
"grad_norm": 0.46852238858877476,
|
|
"learning_rate": 4.203475581094771e-06,
|
|
"loss": 0.0548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.01456384640187025,
|
|
"step": 2535,
|
|
"valid_targets_mean": 2872.0,
|
|
"valid_targets_min": 1665
|
|
},
|
|
{
|
|
"epoch": 4.064076892270725,
|
|
"grad_norm": 0.5125062927844847,
|
|
"learning_rate": 4.135200515732716e-06,
|
|
"loss": 0.059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.019327469170093536,
|
|
"step": 2540,
|
|
"valid_targets_mean": 4220.0,
|
|
"valid_targets_min": 3191
|
|
},
|
|
{
|
|
"epoch": 4.072086503804566,
|
|
"grad_norm": 0.4594966553018079,
|
|
"learning_rate": 4.067420492978065e-06,
|
|
"loss": 0.0586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.017938848584890366,
|
|
"step": 2545,
|
|
"valid_targets_mean": 4791.2,
|
|
"valid_targets_min": 4097
|
|
},
|
|
{
|
|
"epoch": 4.080096115338406,
|
|
"grad_norm": 0.48606745046662464,
|
|
"learning_rate": 4.000137627827554e-06,
|
|
"loss": 0.0625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.016958706080913544,
|
|
"step": 2550,
|
|
"valid_targets_mean": 5747.0,
|
|
"valid_targets_min": 2284
|
|
},
|
|
{
|
|
"epoch": 4.0881057268722465,
|
|
"grad_norm": 0.4241178256427371,
|
|
"learning_rate": 3.9333540197647035e-06,
|
|
"loss": 0.1026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.018895983695983887,
|
|
"step": 2555,
|
|
"valid_targets_mean": 3851.0,
|
|
"valid_targets_min": 2391
|
|
},
|
|
{
|
|
"epoch": 4.096115338406087,
|
|
"grad_norm": 0.4225142526319239,
|
|
"learning_rate": 3.867071752694282e-06,
|
|
"loss": 0.0541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.01742047630250454,
|
|
"step": 2560,
|
|
"valid_targets_mean": 3773.2,
|
|
"valid_targets_min": 3192
|
|
},
|
|
{
|
|
"epoch": 4.104124949939928,
|
|
"grad_norm": 0.40854496040313687,
|
|
"learning_rate": 3.8012928948773243e-06,
|
|
"loss": 0.049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.016253113746643066,
|
|
"step": 2565,
|
|
"valid_targets_mean": 3683.5,
|
|
"valid_targets_min": 1911
|
|
},
|
|
{
|
|
"epoch": 4.112134561473768,
|
|
"grad_norm": 0.39708485193059273,
|
|
"learning_rate": 3.7360194988665364e-06,
|
|
"loss": 0.0488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.006625662557780743,
|
|
"step": 2570,
|
|
"valid_targets_mean": 1953.5,
|
|
"valid_targets_min": 1611
|
|
},
|
|
{
|
|
"epoch": 4.120144173007609,
|
|
"grad_norm": 0.4170942312619489,
|
|
"learning_rate": 3.6712536014422885e-06,
|
|
"loss": 0.0494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.01199355348944664,
|
|
"step": 2575,
|
|
"valid_targets_mean": 3450.8,
|
|
"valid_targets_min": 1888
|
|
},
|
|
{
|
|
"epoch": 4.12815378454145,
|
|
"grad_norm": 0.3923289330532135,
|
|
"learning_rate": 3.606997223549049e-06,
|
|
"loss": 0.0451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.011378319934010506,
|
|
"step": 2580,
|
|
"valid_targets_mean": 3919.8,
|
|
"valid_targets_min": 1893
|
|
},
|
|
{
|
|
"epoch": 4.136163396075291,
|
|
"grad_norm": 0.4409059812937508,
|
|
"learning_rate": 3.543252370232313e-06,
|
|
"loss": 0.0463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.016186460852622986,
|
|
"step": 2585,
|
|
"valid_targets_mean": 2428.2,
|
|
"valid_targets_min": 1300
|
|
},
|
|
{
|
|
"epoch": 4.1441730076091305,
|
|
"grad_norm": 0.4314629037286106,
|
|
"learning_rate": 3.4800210305760662e-06,
|
|
"loss": 0.0458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.011463895440101624,
|
|
"step": 2590,
|
|
"valid_targets_mean": 3008.2,
|
|
"valid_targets_min": 1018
|
|
},
|
|
{
|
|
"epoch": 4.152182619142971,
|
|
"grad_norm": 0.4365938860248358,
|
|
"learning_rate": 3.4173051776406817e-06,
|
|
"loss": 0.0456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.009113586507737637,
|
|
"step": 2595,
|
|
"valid_targets_mean": 2879.5,
|
|
"valid_targets_min": 1639
|
|
},
|
|
{
|
|
"epoch": 4.160192230676812,
|
|
"grad_norm": 0.38712706743603437,
|
|
"learning_rate": 3.3551067684013706e-06,
|
|
"loss": 0.0493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.006580232176929712,
|
|
"step": 2600,
|
|
"valid_targets_mean": 2469.2,
|
|
"valid_targets_min": 2060
|
|
},
|
|
{
|
|
"epoch": 4.168201842210653,
|
|
"grad_norm": 0.3840427960944813,
|
|
"learning_rate": 3.2934277436871187e-06,
|
|
"loss": 0.0418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.009975997731089592,
|
|
"step": 2605,
|
|
"valid_targets_mean": 3259.2,
|
|
"valid_targets_min": 1365
|
|
},
|
|
{
|
|
"epoch": 4.176211453744493,
|
|
"grad_norm": 0.3862749708830179,
|
|
"learning_rate": 3.232270028120121e-06,
|
|
"loss": 0.0454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.008361486718058586,
|
|
"step": 2610,
|
|
"valid_targets_mean": 2490.2,
|
|
"valid_targets_min": 1679
|
|
},
|
|
{
|
|
"epoch": 4.184221065278334,
|
|
"grad_norm": 0.3877900210910342,
|
|
"learning_rate": 3.1716355300557256e-06,
|
|
"loss": 0.0413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.012127812951803207,
|
|
"step": 2615,
|
|
"valid_targets_mean": 4032.2,
|
|
"valid_targets_min": 1439
|
|
},
|
|
{
|
|
"epoch": 4.192230676812175,
|
|
"grad_norm": 0.37403920491128656,
|
|
"learning_rate": 3.111526141522896e-06,
|
|
"loss": 0.0379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.010507754981517792,
|
|
"step": 2620,
|
|
"valid_targets_mean": 3729.0,
|
|
"valid_targets_min": 2157
|
|
},
|
|
{
|
|
"epoch": 4.2002402883460155,
|
|
"grad_norm": 0.489026253852053,
|
|
"learning_rate": 3.0519437381651507e-06,
|
|
"loss": 0.0582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.015390513464808464,
|
|
"step": 2625,
|
|
"valid_targets_mean": 3270.8,
|
|
"valid_targets_min": 2318
|
|
},
|
|
{
|
|
"epoch": 4.208249899879855,
|
|
"grad_norm": 0.49774941702587183,
|
|
"learning_rate": 2.992890179182062e-06,
|
|
"loss": 0.0575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.012589462101459503,
|
|
"step": 2630,
|
|
"valid_targets_mean": 3178.5,
|
|
"valid_targets_min": 824
|
|
},
|
|
{
|
|
"epoch": 4.216259511413696,
|
|
"grad_norm": 0.42941443284475755,
|
|
"learning_rate": 2.93436730727122e-06,
|
|
"loss": 0.0507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.008904894813895226,
|
|
"step": 2635,
|
|
"valid_targets_mean": 2994.8,
|
|
"valid_targets_min": 1210
|
|
},
|
|
{
|
|
"epoch": 4.224269122947537,
|
|
"grad_norm": 0.40368433938574116,
|
|
"learning_rate": 2.8763769485707447e-06,
|
|
"loss": 0.0514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.013715188950300217,
|
|
"step": 2640,
|
|
"valid_targets_mean": 4503.5,
|
|
"valid_targets_min": 1548
|
|
},
|
|
{
|
|
"epoch": 4.232278734481378,
|
|
"grad_norm": 0.3880292101599968,
|
|
"learning_rate": 2.818920912602294e-06,
|
|
"loss": 0.0413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.009501033462584019,
|
|
"step": 2645,
|
|
"valid_targets_mean": 2806.2,
|
|
"valid_targets_min": 1188
|
|
},
|
|
{
|
|
"epoch": 4.240288346015218,
|
|
"grad_norm": 0.3534466748069463,
|
|
"learning_rate": 2.762000992214626e-06,
|
|
"loss": 0.0421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.011742590926587582,
|
|
"step": 2650,
|
|
"valid_targets_mean": 4892.8,
|
|
"valid_targets_min": 2500
|
|
},
|
|
{
|
|
"epoch": 4.248297957549059,
|
|
"grad_norm": 0.4092897921911983,
|
|
"learning_rate": 2.7056189635276162e-06,
|
|
"loss": 0.0365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.017392607405781746,
|
|
"step": 2655,
|
|
"valid_targets_mean": 6856.2,
|
|
"valid_targets_min": 2058
|
|
},
|
|
{
|
|
"epoch": 4.2563075690828995,
|
|
"grad_norm": 0.30724012863941247,
|
|
"learning_rate": 2.6497765858768643e-06,
|
|
"loss": 0.0365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.008890224620699883,
|
|
"step": 2660,
|
|
"valid_targets_mean": 5445.0,
|
|
"valid_targets_min": 2569
|
|
},
|
|
{
|
|
"epoch": 4.26431718061674,
|
|
"grad_norm": 0.32862116655493734,
|
|
"learning_rate": 2.594475601758786e-06,
|
|
"loss": 0.0369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.011442323215305805,
|
|
"step": 2665,
|
|
"valid_targets_mean": 6826.8,
|
|
"valid_targets_min": 4640
|
|
},
|
|
{
|
|
"epoch": 4.27232679215058,
|
|
"grad_norm": 0.26674719849904693,
|
|
"learning_rate": 2.539717736776237e-06,
|
|
"loss": 0.0322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.008357984945178032,
|
|
"step": 2670,
|
|
"valid_targets_mean": 7376.2,
|
|
"valid_targets_min": 6135
|
|
},
|
|
{
|
|
"epoch": 4.280336403684421,
|
|
"grad_norm": 0.23976035100606502,
|
|
"learning_rate": 2.4855046995846844e-06,
|
|
"loss": 0.029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0077203139662742615,
|
|
"step": 2675,
|
|
"valid_targets_mean": 6002.2,
|
|
"valid_targets_min": 3847
|
|
},
|
|
{
|
|
"epoch": 4.288346015218262,
|
|
"grad_norm": 0.22532694629701736,
|
|
"learning_rate": 2.431838181838868e-06,
|
|
"loss": 0.0287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0067066531628370285,
|
|
"step": 2680,
|
|
"valid_targets_mean": 7146.2,
|
|
"valid_targets_min": 5036
|
|
},
|
|
{
|
|
"epoch": 4.296355626752103,
|
|
"grad_norm": 0.2578935564952035,
|
|
"learning_rate": 2.3787198581400285e-06,
|
|
"loss": 0.0303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.010819928720593452,
|
|
"step": 2685,
|
|
"valid_targets_mean": 8201.5,
|
|
"valid_targets_min": 6022
|
|
},
|
|
{
|
|
"epoch": 4.304365238285943,
|
|
"grad_norm": 0.23890209248473898,
|
|
"learning_rate": 2.3261513859836437e-06,
|
|
"loss": 0.0302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.007468602154403925,
|
|
"step": 2690,
|
|
"valid_targets_mean": 4982.8,
|
|
"valid_targets_min": 1508
|
|
},
|
|
{
|
|
"epoch": 4.312374849819784,
|
|
"grad_norm": 0.22335630174328372,
|
|
"learning_rate": 2.27413440570772e-06,
|
|
"loss": 0.0246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.005689718760550022,
|
|
"step": 2695,
|
|
"valid_targets_mean": 6249.8,
|
|
"valid_targets_min": 3601
|
|
},
|
|
{
|
|
"epoch": 4.320384461353624,
|
|
"grad_norm": 0.22915517688005416,
|
|
"learning_rate": 2.222670540441596e-06,
|
|
"loss": 0.0234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.006231151521205902,
|
|
"step": 2700,
|
|
"valid_targets_mean": 7850.5,
|
|
"valid_targets_min": 3289
|
|
},
|
|
{
|
|
"epoch": 4.328394072887465,
|
|
"grad_norm": 0.3480139223152131,
|
|
"learning_rate": 2.17176139605531e-06,
|
|
"loss": 0.0276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.00873448047786951,
|
|
"step": 2705,
|
|
"valid_targets_mean": 6368.5,
|
|
"valid_targets_min": 4071
|
|
},
|
|
{
|
|
"epoch": 4.336403684421305,
|
|
"grad_norm": 0.2308369283012566,
|
|
"learning_rate": 2.121408561109466e-06,
|
|
"loss": 0.0243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.005020568147301674,
|
|
"step": 2710,
|
|
"valid_targets_mean": 7169.2,
|
|
"valid_targets_min": 5138
|
|
},
|
|
{
|
|
"epoch": 4.344413295955146,
|
|
"grad_norm": 0.24205622320850861,
|
|
"learning_rate": 2.071613606805696e-06,
|
|
"loss": 0.0233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0065345196053385735,
|
|
"step": 2715,
|
|
"valid_targets_mean": 6420.8,
|
|
"valid_targets_min": 2320
|
|
},
|
|
{
|
|
"epoch": 4.352422907488987,
|
|
"grad_norm": 0.25050803020987944,
|
|
"learning_rate": 2.0223780869376018e-06,
|
|
"loss": 0.0242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0059038992039859295,
|
|
"step": 2720,
|
|
"valid_targets_mean": 6447.0,
|
|
"valid_targets_min": 5443
|
|
},
|
|
{
|
|
"epoch": 4.360432519022828,
|
|
"grad_norm": 0.2303769461388871,
|
|
"learning_rate": 1.9737035378422907e-06,
|
|
"loss": 0.0217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.005152651574462652,
|
|
"step": 2725,
|
|
"valid_targets_mean": 4346.2,
|
|
"valid_targets_min": 2941
|
|
},
|
|
{
|
|
"epoch": 4.368442130556668,
|
|
"grad_norm": 0.25966266071203103,
|
|
"learning_rate": 1.925591478352424e-06,
|
|
"loss": 0.0242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.008485857397317886,
|
|
"step": 2730,
|
|
"valid_targets_mean": 6363.5,
|
|
"valid_targets_min": 4955
|
|
},
|
|
{
|
|
"epoch": 4.3764517420905085,
|
|
"grad_norm": 0.2160938856900943,
|
|
"learning_rate": 1.8780434097488443e-06,
|
|
"loss": 0.0285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.004602254368364811,
|
|
"step": 2735,
|
|
"valid_targets_mean": 3673.2,
|
|
"valid_targets_min": 2303
|
|
},
|
|
{
|
|
"epoch": 4.384461353624349,
|
|
"grad_norm": 0.21800178414560759,
|
|
"learning_rate": 1.831060815713699e-06,
|
|
"loss": 0.019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.004092586226761341,
|
|
"step": 2740,
|
|
"valid_targets_mean": 4228.0,
|
|
"valid_targets_min": 2085
|
|
},
|
|
{
|
|
"epoch": 4.39247096515819,
|
|
"grad_norm": 0.2342908515396402,
|
|
"learning_rate": 1.7846451622841643e-06,
|
|
"loss": 0.0178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.00421184953302145,
|
|
"step": 2745,
|
|
"valid_targets_mean": 5465.5,
|
|
"valid_targets_min": 3720
|
|
},
|
|
{
|
|
"epoch": 4.40048057669203,
|
|
"grad_norm": 0.22005427949719655,
|
|
"learning_rate": 1.7387978978066988e-06,
|
|
"loss": 0.0194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0045888046734035015,
|
|
"step": 2750,
|
|
"valid_targets_mean": 4442.2,
|
|
"valid_targets_min": 3149
|
|
},
|
|
{
|
|
"epoch": 4.408490188225871,
|
|
"grad_norm": 0.22292167944705465,
|
|
"learning_rate": 1.6935204528918347e-06,
|
|
"loss": 0.0173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.005383871495723724,
|
|
"step": 2755,
|
|
"valid_targets_mean": 6434.2,
|
|
"valid_targets_min": 4692
|
|
},
|
|
{
|
|
"epoch": 4.416499799759712,
|
|
"grad_norm": 0.26165777649863947,
|
|
"learning_rate": 1.6488142403695651e-06,
|
|
"loss": 0.0213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0062151080928742886,
|
|
"step": 2760,
|
|
"valid_targets_mean": 6182.2,
|
|
"valid_targets_min": 4171
|
|
},
|
|
{
|
|
"epoch": 4.424509411293553,
|
|
"grad_norm": 0.22205126769160863,
|
|
"learning_rate": 1.6046806552452254e-06,
|
|
"loss": 0.0218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0024244938977062702,
|
|
"step": 2765,
|
|
"valid_targets_mean": 4245.0,
|
|
"valid_targets_min": 3545
|
|
},
|
|
{
|
|
"epoch": 4.4325190228273925,
|
|
"grad_norm": 0.21730767265512088,
|
|
"learning_rate": 1.5611210746559868e-06,
|
|
"loss": 0.0171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0051185861229896545,
|
|
"step": 2770,
|
|
"valid_targets_mean": 6278.5,
|
|
"valid_targets_min": 3918
|
|
},
|
|
{
|
|
"epoch": 4.440528634361233,
|
|
"grad_norm": 0.21832833325881706,
|
|
"learning_rate": 1.5181368578278744e-06,
|
|
"loss": 0.0174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.005742904730141163,
|
|
"step": 2775,
|
|
"valid_targets_mean": 6698.0,
|
|
"valid_targets_min": 5070
|
|
},
|
|
{
|
|
"epoch": 4.448538245895074,
|
|
"grad_norm": 0.19678985609125352,
|
|
"learning_rate": 1.4757293460333566e-06,
|
|
"loss": 0.0175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0036108577623963356,
|
|
"step": 2780,
|
|
"valid_targets_mean": 5043.5,
|
|
"valid_targets_min": 2590
|
|
},
|
|
{
|
|
"epoch": 4.456547857428915,
|
|
"grad_norm": 0.1725816212942764,
|
|
"learning_rate": 1.4338998625494905e-06,
|
|
"loss": 0.0147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.002651121001690626,
|
|
"step": 2785,
|
|
"valid_targets_mean": 4594.5,
|
|
"valid_targets_min": 1557
|
|
},
|
|
{
|
|
"epoch": 4.464557468962755,
|
|
"grad_norm": 0.20314575659662054,
|
|
"learning_rate": 1.3926497126166405e-06,
|
|
"loss": 0.0152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.004732338711619377,
|
|
"step": 2790,
|
|
"valid_targets_mean": 7859.2,
|
|
"valid_targets_min": 4849
|
|
},
|
|
{
|
|
"epoch": 4.472567080496596,
|
|
"grad_norm": 0.2040036994218858,
|
|
"learning_rate": 1.3519801833977298e-06,
|
|
"loss": 0.0148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.004215016961097717,
|
|
"step": 2795,
|
|
"valid_targets_mean": 7530.8,
|
|
"valid_targets_min": 3861
|
|
},
|
|
{
|
|
"epoch": 4.480576692030437,
|
|
"grad_norm": 0.2404382519698876,
|
|
"learning_rate": 1.3118925439381003e-06,
|
|
"loss": 0.0167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0031050911638885736,
|
|
"step": 2800,
|
|
"valid_targets_mean": 4505.0,
|
|
"valid_targets_min": 2278
|
|
},
|
|
{
|
|
"epoch": 4.4885863035642775,
|
|
"grad_norm": 0.16553506441831442,
|
|
"learning_rate": 1.2723880451258918e-06,
|
|
"loss": 0.0124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.002553843427449465,
|
|
"step": 2805,
|
|
"valid_targets_mean": 4934.5,
|
|
"valid_targets_min": 2017
|
|
},
|
|
{
|
|
"epoch": 4.496595915098117,
|
|
"grad_norm": 0.20081404940791364,
|
|
"learning_rate": 1.2334679196530219e-06,
|
|
"loss": 0.0131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.004381764680147171,
|
|
"step": 2810,
|
|
"valid_targets_mean": 6368.8,
|
|
"valid_targets_min": 2515
|
|
},
|
|
{
|
|
"epoch": 4.504605526631958,
|
|
"grad_norm": 0.4819883375473916,
|
|
"learning_rate": 1.1951333819767163e-06,
|
|
"loss": 0.022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.005449524149298668,
|
|
"step": 2815,
|
|
"valid_targets_mean": 1458.8,
|
|
"valid_targets_min": 751
|
|
},
|
|
{
|
|
"epoch": 4.512615138165799,
|
|
"grad_norm": 0.5223374827709039,
|
|
"learning_rate": 1.157385628281622e-06,
|
|
"loss": 0.0231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.005884670652449131,
|
|
"step": 2820,
|
|
"valid_targets_mean": 1471.5,
|
|
"valid_targets_min": 769
|
|
},
|
|
{
|
|
"epoch": 4.52062474969964,
|
|
"grad_norm": 0.5313050068108364,
|
|
"learning_rate": 1.1202258364424633e-06,
|
|
"loss": 0.0271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.006905954331159592,
|
|
"step": 2825,
|
|
"valid_targets_mean": 2936.8,
|
|
"valid_targets_min": 1206
|
|
},
|
|
{
|
|
"epoch": 4.52863436123348,
|
|
"grad_norm": 0.43017056432687895,
|
|
"learning_rate": 1.0836551659873073e-06,
|
|
"loss": 0.023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.005521543323993683,
|
|
"step": 2830,
|
|
"valid_targets_mean": 1157.2,
|
|
"valid_targets_min": 988
|
|
},
|
|
{
|
|
"epoch": 4.536643972767321,
|
|
"grad_norm": 0.45137041805906397,
|
|
"learning_rate": 1.0476747580613723e-06,
|
|
"loss": 0.0277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.006572369486093521,
|
|
"step": 2835,
|
|
"valid_targets_mean": 2150.8,
|
|
"valid_targets_min": 820
|
|
},
|
|
{
|
|
"epoch": 4.5446535843011615,
|
|
"grad_norm": 0.41920022498724685,
|
|
"learning_rate": 1.012285735391416e-06,
|
|
"loss": 0.0203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0071044061332941055,
|
|
"step": 2840,
|
|
"valid_targets_mean": 2603.5,
|
|
"valid_targets_min": 751
|
|
},
|
|
{
|
|
"epoch": 4.552663195835002,
|
|
"grad_norm": 0.4277733887406432,
|
|
"learning_rate": 9.774892022507166e-07,
|
|
"loss": 0.0204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.007875761948525906,
|
|
"step": 2845,
|
|
"valid_targets_mean": 2382.8,
|
|
"valid_targets_min": 614
|
|
},
|
|
{
|
|
"epoch": 4.560672807368842,
|
|
"grad_norm": 0.5291850621024743,
|
|
"learning_rate": 9.432862444245994e-07,
|
|
"loss": 0.0258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0037110093981027603,
|
|
"step": 2850,
|
|
"valid_targets_mean": 1733.0,
|
|
"valid_targets_min": 768
|
|
},
|
|
{
|
|
"epoch": 4.568682418902683,
|
|
"grad_norm": 0.40144427473521,
|
|
"learning_rate": 9.096779291765667e-07,
|
|
"loss": 0.0376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.004997552372515202,
|
|
"step": 2855,
|
|
"valid_targets_mean": 1715.8,
|
|
"valid_targets_min": 704
|
|
},
|
|
{
|
|
"epoch": 4.576692030436524,
|
|
"grad_norm": 0.37729754902757434,
|
|
"learning_rate": 8.766653052149831e-07,
|
|
"loss": 0.0193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.004043888300657272,
|
|
"step": 2860,
|
|
"valid_targets_mean": 2182.5,
|
|
"valid_targets_min": 1218
|
|
},
|
|
{
|
|
"epoch": 4.584701641970365,
|
|
"grad_norm": 0.40512124213573425,
|
|
"learning_rate": 8.442494026603709e-07,
|
|
"loss": 0.0174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.009138538502156734,
|
|
"step": 2865,
|
|
"valid_targets_mean": 2525.8,
|
|
"valid_targets_min": 1524
|
|
},
|
|
{
|
|
"epoch": 4.592711253504205,
|
|
"grad_norm": 0.30591584827894536,
|
|
"learning_rate": 8.124312330132423e-07,
|
|
"loss": 0.023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0028275391086935997,
|
|
"step": 2870,
|
|
"valid_targets_mean": 1936.8,
|
|
"valid_targets_min": 987
|
|
},
|
|
{
|
|
"epoch": 4.600720865038046,
|
|
"grad_norm": 0.33165070145188275,
|
|
"learning_rate": 7.812117891225667e-07,
|
|
"loss": 0.0178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.002544086892157793,
|
|
"step": 2875,
|
|
"valid_targets_mean": 1548.0,
|
|
"valid_targets_min": 1169
|
|
},
|
|
{
|
|
"epoch": 4.608730476571886,
|
|
"grad_norm": 0.3397804542537085,
|
|
"learning_rate": 7.505920451547544e-07,
|
|
"loss": 0.018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.004221238195896149,
|
|
"step": 2880,
|
|
"valid_targets_mean": 1888.8,
|
|
"valid_targets_min": 471
|
|
},
|
|
{
|
|
"epoch": 4.616740088105727,
|
|
"grad_norm": 0.4086525588090419,
|
|
"learning_rate": 7.205729565632947e-07,
|
|
"loss": 0.0205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0046452805399894714,
|
|
"step": 2885,
|
|
"valid_targets_mean": 1614.0,
|
|
"valid_targets_min": 1137
|
|
},
|
|
{
|
|
"epoch": 4.624749699639567,
|
|
"grad_norm": 0.3882986456558545,
|
|
"learning_rate": 6.911554600589121e-07,
|
|
"loss": 0.0179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0040503935888409615,
|
|
"step": 2890,
|
|
"valid_targets_mean": 1651.2,
|
|
"valid_targets_min": 1298
|
|
},
|
|
{
|
|
"epoch": 4.632759311173408,
|
|
"grad_norm": 0.4020919937030956,
|
|
"learning_rate": 6.62340473580354e-07,
|
|
"loss": 0.0222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0039815474301576614,
|
|
"step": 2895,
|
|
"valid_targets_mean": 1275.0,
|
|
"valid_targets_min": 1061
|
|
},
|
|
{
|
|
"epoch": 4.640768922707249,
|
|
"grad_norm": 0.36780147360518844,
|
|
"learning_rate": 6.341288962657422e-07,
|
|
"loss": 0.0137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.005725189112126827,
|
|
"step": 2900,
|
|
"valid_targets_mean": 2261.0,
|
|
"valid_targets_min": 1181
|
|
},
|
|
{
|
|
"epoch": 4.64877853424109,
|
|
"grad_norm": 0.3969386892152545,
|
|
"learning_rate": 6.06521608424524e-07,
|
|
"loss": 0.0153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.005156328901648521,
|
|
"step": 2905,
|
|
"valid_targets_mean": 1702.5,
|
|
"valid_targets_min": 1219
|
|
},
|
|
{
|
|
"epoch": 4.65678814577493,
|
|
"grad_norm": 0.31441356287635236,
|
|
"learning_rate": 5.795194715099905e-07,
|
|
"loss": 0.016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0018683484522625804,
|
|
"step": 2910,
|
|
"valid_targets_mean": 2216.8,
|
|
"valid_targets_min": 943
|
|
},
|
|
{
|
|
"epoch": 4.66479775730877,
|
|
"grad_norm": 0.37087816859697303,
|
|
"learning_rate": 5.531233280924042e-07,
|
|
"loss": 0.0156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0023959206882864237,
|
|
"step": 2915,
|
|
"valid_targets_mean": 1605.5,
|
|
"valid_targets_min": 1114
|
|
},
|
|
{
|
|
"epoch": 4.672807368842611,
|
|
"grad_norm": 0.35988199455615155,
|
|
"learning_rate": 5.273340018327044e-07,
|
|
"loss": 0.0139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0044454727321863174,
|
|
"step": 2920,
|
|
"valid_targets_mean": 2072.8,
|
|
"valid_targets_min": 838
|
|
},
|
|
{
|
|
"epoch": 4.680816980376452,
|
|
"grad_norm": 0.31634781297093684,
|
|
"learning_rate": 5.02152297456806e-07,
|
|
"loss": 0.0139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0030531948432326317,
|
|
"step": 2925,
|
|
"valid_targets_mean": 1854.5,
|
|
"valid_targets_min": 1194
|
|
},
|
|
{
|
|
"epoch": 4.688826591910292,
|
|
"grad_norm": 0.34005711859992244,
|
|
"learning_rate": 4.775790007304993e-07,
|
|
"loss": 0.0122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0020766272209584713,
|
|
"step": 2930,
|
|
"valid_targets_mean": 1483.2,
|
|
"valid_targets_min": 1123
|
|
},
|
|
{
|
|
"epoch": 4.696836203444133,
|
|
"grad_norm": 0.3722703949830525,
|
|
"learning_rate": 4.5361487843490924e-07,
|
|
"loss": 0.0124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0038124434649944305,
|
|
"step": 2935,
|
|
"valid_targets_mean": 2120.8,
|
|
"valid_targets_min": 1292
|
|
},
|
|
{
|
|
"epoch": 4.704845814977974,
|
|
"grad_norm": 0.4632329548321045,
|
|
"learning_rate": 4.3026067834258667e-07,
|
|
"loss": 0.0135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.006385087501257658,
|
|
"step": 2940,
|
|
"valid_targets_mean": 2185.5,
|
|
"valid_targets_min": 985
|
|
},
|
|
{
|
|
"epoch": 4.7128554265118145,
|
|
"grad_norm": 0.4546379071864116,
|
|
"learning_rate": 4.0751712919417484e-07,
|
|
"loss": 0.0123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0017161949072033167,
|
|
"step": 2945,
|
|
"valid_targets_mean": 1273.0,
|
|
"valid_targets_min": 854
|
|
},
|
|
{
|
|
"epoch": 4.7208650380456545,
|
|
"grad_norm": 0.4057474805077139,
|
|
"learning_rate": 3.853849406756549e-07,
|
|
"loss": 0.0129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.002400461118668318,
|
|
"step": 2950,
|
|
"valid_targets_mean": 1165.2,
|
|
"valid_targets_min": 1051
|
|
},
|
|
{
|
|
"epoch": 4.728874649579495,
|
|
"grad_norm": 0.3174977482773058,
|
|
"learning_rate": 3.6386480339621886e-07,
|
|
"loss": 0.0137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.002654407639056444,
|
|
"step": 2955,
|
|
"valid_targets_mean": 1887.8,
|
|
"valid_targets_min": 1076
|
|
},
|
|
{
|
|
"epoch": 4.736884261113336,
|
|
"grad_norm": 0.31216733827899745,
|
|
"learning_rate": 3.4295738886670925e-07,
|
|
"loss": 0.012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0036342234816402197,
|
|
"step": 2960,
|
|
"valid_targets_mean": 1764.0,
|
|
"valid_targets_min": 1232
|
|
},
|
|
{
|
|
"epoch": 4.744893872647177,
|
|
"grad_norm": 0.2229966821647088,
|
|
"learning_rate": 3.226633494786668e-07,
|
|
"loss": 0.0101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0010336769046261907,
|
|
"step": 2965,
|
|
"valid_targets_mean": 1403.2,
|
|
"valid_targets_min": 745
|
|
},
|
|
{
|
|
"epoch": 4.752903484181017,
|
|
"grad_norm": 0.45208498186379675,
|
|
"learning_rate": 3.0298331848398033e-07,
|
|
"loss": 0.0282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.01599767431616783,
|
|
"step": 2970,
|
|
"valid_targets_mean": 6643.5,
|
|
"valid_targets_min": 5946
|
|
},
|
|
{
|
|
"epoch": 4.760913095714858,
|
|
"grad_norm": 0.481847558698925,
|
|
"learning_rate": 2.839179099751133e-07,
|
|
"loss": 0.0512,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.012532619759440422,
|
|
"step": 2975,
|
|
"valid_targets_mean": 5136.0,
|
|
"valid_targets_min": 3229
|
|
},
|
|
{
|
|
"epoch": 4.768922707248699,
|
|
"grad_norm": 0.6888827941410374,
|
|
"learning_rate": 2.654677188659549e-07,
|
|
"loss": 0.0765,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.019851699471473694,
|
|
"step": 2980,
|
|
"valid_targets_mean": 5650.8,
|
|
"valid_targets_min": 3530
|
|
},
|
|
{
|
|
"epoch": 4.776932318782539,
|
|
"grad_norm": 0.7267080215435993,
|
|
"learning_rate": 2.476333208732462e-07,
|
|
"loss": 0.0744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.017861615866422653,
|
|
"step": 2985,
|
|
"valid_targets_mean": 4949.2,
|
|
"valid_targets_min": 4191
|
|
},
|
|
{
|
|
"epoch": 4.784941930316379,
|
|
"grad_norm": 0.6200515579809707,
|
|
"learning_rate": 2.3041527249863193e-07,
|
|
"loss": 0.0815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.014388665556907654,
|
|
"step": 2990,
|
|
"valid_targets_mean": 3846.0,
|
|
"valid_targets_min": 2321
|
|
},
|
|
{
|
|
"epoch": 4.79295154185022,
|
|
"grad_norm": 0.5344420628118597,
|
|
"learning_rate": 2.1381411101127013e-07,
|
|
"loss": 0.0592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.011457081884145737,
|
|
"step": 2995,
|
|
"valid_targets_mean": 4102.8,
|
|
"valid_targets_min": 2515
|
|
},
|
|
{
|
|
"epoch": 4.800961153384061,
|
|
"grad_norm": 0.44893963575142803,
|
|
"learning_rate": 1.9783035443108999e-07,
|
|
"loss": 0.0818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.022778000682592392,
|
|
"step": 3000,
|
|
"valid_targets_mean": 7856.2,
|
|
"valid_targets_min": 3551
|
|
},
|
|
{
|
|
"epoch": 4.808970764917902,
|
|
"grad_norm": 0.529928181368045,
|
|
"learning_rate": 1.8246450151261362e-07,
|
|
"loss": 0.0552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.01595860719680786,
|
|
"step": 3005,
|
|
"valid_targets_mean": 5124.0,
|
|
"valid_targets_min": 2097
|
|
},
|
|
{
|
|
"epoch": 4.816980376451742,
|
|
"grad_norm": 0.6131367385149193,
|
|
"learning_rate": 1.6771703172940635e-07,
|
|
"loss": 0.1021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0183606818318367,
|
|
"step": 3010,
|
|
"valid_targets_mean": 6862.0,
|
|
"valid_targets_min": 3586
|
|
},
|
|
{
|
|
"epoch": 4.824989987985583,
|
|
"grad_norm": 0.4805073860442967,
|
|
"learning_rate": 1.5358840525909967e-07,
|
|
"loss": 0.0764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.01910022459924221,
|
|
"step": 3015,
|
|
"valid_targets_mean": 7773.0,
|
|
"valid_targets_min": 5273
|
|
},
|
|
{
|
|
"epoch": 4.8329995995194235,
|
|
"grad_norm": 0.4982226792737129,
|
|
"learning_rate": 1.4007906296904072e-07,
|
|
"loss": 0.0769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.017956791445612907,
|
|
"step": 3020,
|
|
"valid_targets_mean": 7539.5,
|
|
"valid_targets_min": 4721
|
|
},
|
|
{
|
|
"epoch": 4.841009211053263,
|
|
"grad_norm": 0.40272952121486455,
|
|
"learning_rate": 1.2718942640254084e-07,
|
|
"loss": 0.0618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.013858837075531483,
|
|
"step": 3025,
|
|
"valid_targets_mean": 5395.0,
|
|
"valid_targets_min": 4460
|
|
},
|
|
{
|
|
"epoch": 4.849018822587104,
|
|
"grad_norm": 0.4111522439246591,
|
|
"learning_rate": 1.1491989776570623e-07,
|
|
"loss": 0.0828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.01820860244333744,
|
|
"step": 3030,
|
|
"valid_targets_mean": 5397.8,
|
|
"valid_targets_min": 4366
|
|
},
|
|
{
|
|
"epoch": 4.857028434120945,
|
|
"grad_norm": 0.3851344116463836,
|
|
"learning_rate": 1.0327085991490127e-07,
|
|
"loss": 0.0593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.015348225831985474,
|
|
"step": 3035,
|
|
"valid_targets_mean": 6880.8,
|
|
"valid_targets_min": 3749
|
|
},
|
|
{
|
|
"epoch": 4.865038045654786,
|
|
"grad_norm": 0.35920962302030307,
|
|
"learning_rate": 9.22426763447981e-08,
|
|
"loss": 0.0623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.014026781544089317,
|
|
"step": 3040,
|
|
"valid_targets_mean": 4727.8,
|
|
"valid_targets_min": 2789
|
|
},
|
|
{
|
|
"epoch": 4.873047657188627,
|
|
"grad_norm": 0.38739669920225117,
|
|
"learning_rate": 8.183569117703461e-08,
|
|
"loss": 0.048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.007748588919639587,
|
|
"step": 3045,
|
|
"valid_targets_mean": 4575.0,
|
|
"valid_targets_min": 2232
|
|
},
|
|
{
|
|
"epoch": 4.881057268722467,
|
|
"grad_norm": 0.4096661066551452,
|
|
"learning_rate": 7.205022914946957e-08,
|
|
"loss": 0.0599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.013611424714326859,
|
|
"step": 3050,
|
|
"valid_targets_mean": 6189.2,
|
|
"valid_targets_min": 4692
|
|
},
|
|
{
|
|
"epoch": 4.8890668802563075,
|
|
"grad_norm": 0.43631484073209664,
|
|
"learning_rate": 6.288659560606203e-08,
|
|
"loss": 0.0803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.007561445236206055,
|
|
"step": 3055,
|
|
"valid_targets_mean": 4232.2,
|
|
"valid_targets_min": 2376
|
|
},
|
|
{
|
|
"epoch": 4.897076491790148,
|
|
"grad_norm": 0.5600226294274417,
|
|
"learning_rate": 5.4345076487332114e-08,
|
|
"loss": 0.104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.013704903423786163,
|
|
"step": 3060,
|
|
"valid_targets_mean": 4608.8,
|
|
"valid_targets_min": 3264
|
|
},
|
|
{
|
|
"epoch": 4.905086103323988,
|
|
"grad_norm": 0.4599628050143849,
|
|
"learning_rate": 4.642593832144382e-08,
|
|
"loss": 0.069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.014675215817987919,
|
|
"step": 3065,
|
|
"valid_targets_mean": 5691.5,
|
|
"valid_targets_min": 4828
|
|
},
|
|
{
|
|
"epoch": 4.913095714857829,
|
|
"grad_norm": 0.3977110414863037,
|
|
"learning_rate": 3.912942821589161e-08,
|
|
"loss": 0.0634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.013072891160845757,
|
|
"step": 3070,
|
|
"valid_targets_mean": 5483.0,
|
|
"valid_targets_min": 4351
|
|
},
|
|
{
|
|
"epoch": 4.92110532639167,
|
|
"grad_norm": 0.36103582977810245,
|
|
"learning_rate": 3.2455773849779935e-08,
|
|
"loss": 0.0483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.007494109217077494,
|
|
"step": 3075,
|
|
"valid_targets_mean": 3804.2,
|
|
"valid_targets_min": 2359
|
|
},
|
|
{
|
|
"epoch": 4.929114937925511,
|
|
"grad_norm": 0.49024951135582845,
|
|
"learning_rate": 2.6405183466731154e-08,
|
|
"loss": 0.1183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.009179000742733479,
|
|
"step": 3080,
|
|
"valid_targets_mean": 3795.5,
|
|
"valid_targets_min": 2035
|
|
},
|
|
{
|
|
"epoch": 4.937124549459352,
|
|
"grad_norm": 0.3297715393171128,
|
|
"learning_rate": 2.0977845868375145e-08,
|
|
"loss": 0.0908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.014823229983448982,
|
|
"step": 3085,
|
|
"valid_targets_mean": 6354.0,
|
|
"valid_targets_min": 4890
|
|
},
|
|
{
|
|
"epoch": 4.945134160993192,
|
|
"grad_norm": 0.31705191572960606,
|
|
"learning_rate": 1.6173930408467376e-08,
|
|
"loss": 0.0751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.011466408148407936,
|
|
"step": 3090,
|
|
"valid_targets_mean": 6546.8,
|
|
"valid_targets_min": 2609
|
|
},
|
|
{
|
|
"epoch": 4.953143772527032,
|
|
"grad_norm": 0.40726605143515165,
|
|
"learning_rate": 1.199358698759978e-08,
|
|
"loss": 0.0491,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.010653853416442871,
|
|
"step": 3095,
|
|
"valid_targets_mean": 4817.8,
|
|
"valid_targets_min": 3153
|
|
},
|
|
{
|
|
"epoch": 4.961153384060873,
|
|
"grad_norm": 0.3030065264695645,
|
|
"learning_rate": 8.436946048522298e-09,
|
|
"loss": 0.0386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.006467252969741821,
|
|
"step": 3100,
|
|
"valid_targets_mean": 2771.0,
|
|
"valid_targets_min": 2295
|
|
},
|
|
{
|
|
"epoch": 4.969162995594713,
|
|
"grad_norm": 0.3043698990618487,
|
|
"learning_rate": 5.504118572081662e-09,
|
|
"loss": 0.0327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.006383640691637993,
|
|
"step": 3105,
|
|
"valid_targets_mean": 4317.0,
|
|
"valid_targets_min": 3829
|
|
},
|
|
{
|
|
"epoch": 4.977172607128554,
|
|
"grad_norm": 0.36102852778853645,
|
|
"learning_rate": 3.1951960737419686e-09,
|
|
"loss": 0.0701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.013967076316475868,
|
|
"step": 3110,
|
|
"valid_targets_mean": 6788.8,
|
|
"valid_targets_min": 5244
|
|
},
|
|
{
|
|
"epoch": 4.985182218662395,
|
|
"grad_norm": 0.41034655022078576,
|
|
"learning_rate": 1.5102506007447227e-09,
|
|
"loss": 0.0609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.01863592490553856,
|
|
"step": 3115,
|
|
"valid_targets_mean": 7649.8,
|
|
"valid_targets_min": 6890
|
|
},
|
|
{
|
|
"epoch": 4.993191830196236,
|
|
"grad_norm": 0.2828432242862505,
|
|
"learning_rate": 4.493347298528683e-10,
|
|
"loss": 0.0437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.009837220422923565,
|
|
"step": 3120,
|
|
"valid_targets_mean": 6744.8,
|
|
"valid_targets_min": 3459
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.3892103766980359,
|
|
"learning_rate": 1.248156571209691e-11,
|
|
"loss": 0.0448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.01754429191350937,
|
|
"step": 3125,
|
|
"valid_targets_mean": 6467.2,
|
|
"valid_targets_min": 3371
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.01754429191350937,
|
|
"step": 3125,
|
|
"total_flos": 1.586502852660953e+18,
|
|
"train_loss": 0.19733358554124833,
|
|
"train_runtime": 95981.0062,
|
|
"train_samples_per_second": 0.52,
|
|
"train_steps_per_second": 0.033,
|
|
"valid_targets_mean": 6467.2,
|
|
"valid_targets_min": 3371
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 3125,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 5,
|
|
"save_steps": 750,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1.586502852660953e+18,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|