Model: laion/nemosci-tasrep-a1mfc-dev1-maxeps-swes-r2eg__Qwen3-8B Source: Original Platform
8045 lines
224 KiB
JSON
8045 lines
224 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 5.0,
|
|
"eval_steps": 500,
|
|
"global_step": 3635,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.006883891693437357,
|
|
"grad_norm": 13.694241855634743,
|
|
"learning_rate": 4.395604395604396e-07,
|
|
"loss": 0.9672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3309265673160553,
|
|
"step": 5,
|
|
"valid_targets_mean": 8170.6,
|
|
"valid_targets_min": 3077
|
|
},
|
|
{
|
|
"epoch": 0.013767783386874713,
|
|
"grad_norm": 12.578564323146997,
|
|
"learning_rate": 9.890109890109891e-07,
|
|
"loss": 0.9668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30831611156463623,
|
|
"step": 10,
|
|
"valid_targets_mean": 6980.7,
|
|
"valid_targets_min": 2298
|
|
},
|
|
{
|
|
"epoch": 0.02065167508031207,
|
|
"grad_norm": 7.7944206304684025,
|
|
"learning_rate": 1.5384615384615387e-06,
|
|
"loss": 0.9211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2686140537261963,
|
|
"step": 15,
|
|
"valid_targets_mean": 6148.2,
|
|
"valid_targets_min": 2167
|
|
},
|
|
{
|
|
"epoch": 0.027535566773749427,
|
|
"grad_norm": 3.7615150954296,
|
|
"learning_rate": 2.0879120879120883e-06,
|
|
"loss": 0.8559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2865496277809143,
|
|
"step": 20,
|
|
"valid_targets_mean": 7165.8,
|
|
"valid_targets_min": 2278
|
|
},
|
|
{
|
|
"epoch": 0.03441945846718678,
|
|
"grad_norm": 1.9967996468850564,
|
|
"learning_rate": 2.6373626373626375e-06,
|
|
"loss": 0.7873,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2609645128250122,
|
|
"step": 25,
|
|
"valid_targets_mean": 7202.3,
|
|
"valid_targets_min": 2907
|
|
},
|
|
{
|
|
"epoch": 0.04130335016062414,
|
|
"grad_norm": 1.642494044026072,
|
|
"learning_rate": 3.1868131868131867e-06,
|
|
"loss": 0.7615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2548447251319885,
|
|
"step": 30,
|
|
"valid_targets_mean": 7477.5,
|
|
"valid_targets_min": 2714
|
|
},
|
|
{
|
|
"epoch": 0.048187241854061494,
|
|
"grad_norm": 1.1331298421000486,
|
|
"learning_rate": 3.7362637362637367e-06,
|
|
"loss": 0.7274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24324637651443481,
|
|
"step": 35,
|
|
"valid_targets_mean": 7314.0,
|
|
"valid_targets_min": 2883
|
|
},
|
|
{
|
|
"epoch": 0.05507113354749885,
|
|
"grad_norm": 0.8610066611189371,
|
|
"learning_rate": 4.2857142857142855e-06,
|
|
"loss": 0.6803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21853984892368317,
|
|
"step": 40,
|
|
"valid_targets_mean": 7126.6,
|
|
"valid_targets_min": 2597
|
|
},
|
|
{
|
|
"epoch": 0.06195502524093621,
|
|
"grad_norm": 0.6199513616997635,
|
|
"learning_rate": 4.8351648351648355e-06,
|
|
"loss": 0.6548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20629478991031647,
|
|
"step": 45,
|
|
"valid_targets_mean": 7369.8,
|
|
"valid_targets_min": 3375
|
|
},
|
|
{
|
|
"epoch": 0.06883891693437356,
|
|
"grad_norm": 0.5292903199674134,
|
|
"learning_rate": 5.384615384615385e-06,
|
|
"loss": 0.6199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19665127992630005,
|
|
"step": 50,
|
|
"valid_targets_mean": 7356.4,
|
|
"valid_targets_min": 2005
|
|
},
|
|
{
|
|
"epoch": 0.07572280862781092,
|
|
"grad_norm": 0.43990945332342174,
|
|
"learning_rate": 5.934065934065935e-06,
|
|
"loss": 0.5721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17901910841464996,
|
|
"step": 55,
|
|
"valid_targets_mean": 8719.0,
|
|
"valid_targets_min": 589
|
|
},
|
|
{
|
|
"epoch": 0.08260670032124828,
|
|
"grad_norm": 0.34345855694621463,
|
|
"learning_rate": 6.483516483516485e-06,
|
|
"loss": 0.5352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18726885318756104,
|
|
"step": 60,
|
|
"valid_targets_mean": 9029.5,
|
|
"valid_targets_min": 4655
|
|
},
|
|
{
|
|
"epoch": 0.08949059201468564,
|
|
"grad_norm": 0.2745587801279678,
|
|
"learning_rate": 7.032967032967034e-06,
|
|
"loss": 0.509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17130973935127258,
|
|
"step": 65,
|
|
"valid_targets_mean": 10019.8,
|
|
"valid_targets_min": 4996
|
|
},
|
|
{
|
|
"epoch": 0.09637448370812299,
|
|
"grad_norm": 0.24899014086502314,
|
|
"learning_rate": 7.582417582417583e-06,
|
|
"loss": 0.4905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16219963133335114,
|
|
"step": 70,
|
|
"valid_targets_mean": 9413.2,
|
|
"valid_targets_min": 3337
|
|
},
|
|
{
|
|
"epoch": 0.10325837540156035,
|
|
"grad_norm": 0.6632172104478877,
|
|
"learning_rate": 8.131868131868132e-06,
|
|
"loss": 0.4843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16867445409297943,
|
|
"step": 75,
|
|
"valid_targets_mean": 10127.1,
|
|
"valid_targets_min": 3360
|
|
},
|
|
{
|
|
"epoch": 0.1101422670949977,
|
|
"grad_norm": 0.22112536714345443,
|
|
"learning_rate": 8.681318681318681e-06,
|
|
"loss": 0.4647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16406437754631042,
|
|
"step": 80,
|
|
"valid_targets_mean": 9319.1,
|
|
"valid_targets_min": 685
|
|
},
|
|
{
|
|
"epoch": 0.11702615878843506,
|
|
"grad_norm": 0.23254063354964463,
|
|
"learning_rate": 9.230769230769232e-06,
|
|
"loss": 0.4668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15404294431209564,
|
|
"step": 85,
|
|
"valid_targets_mean": 9109.9,
|
|
"valid_targets_min": 4379
|
|
},
|
|
{
|
|
"epoch": 0.12391005048187242,
|
|
"grad_norm": 0.2289262735187177,
|
|
"learning_rate": 9.780219780219781e-06,
|
|
"loss": 0.4513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14573627710342407,
|
|
"step": 90,
|
|
"valid_targets_mean": 9313.2,
|
|
"valid_targets_min": 3452
|
|
},
|
|
{
|
|
"epoch": 0.13079394217530976,
|
|
"grad_norm": 0.2069573644523245,
|
|
"learning_rate": 1.0329670329670332e-05,
|
|
"loss": 0.444,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14479273557662964,
|
|
"step": 95,
|
|
"valid_targets_mean": 8640.9,
|
|
"valid_targets_min": 383
|
|
},
|
|
{
|
|
"epoch": 0.13767783386874713,
|
|
"grad_norm": 0.2358199789841859,
|
|
"learning_rate": 1.087912087912088e-05,
|
|
"loss": 0.4416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14525240659713745,
|
|
"step": 100,
|
|
"valid_targets_mean": 9029.9,
|
|
"valid_targets_min": 4552
|
|
},
|
|
{
|
|
"epoch": 0.1445617255621845,
|
|
"grad_norm": 0.21577235061513939,
|
|
"learning_rate": 1.1428571428571429e-05,
|
|
"loss": 0.4334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14647001028060913,
|
|
"step": 105,
|
|
"valid_targets_mean": 10390.2,
|
|
"valid_targets_min": 4833
|
|
},
|
|
{
|
|
"epoch": 0.15144561725562183,
|
|
"grad_norm": 0.23169177022891713,
|
|
"learning_rate": 1.1978021978021978e-05,
|
|
"loss": 0.424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14595040678977966,
|
|
"step": 110,
|
|
"valid_targets_mean": 9771.6,
|
|
"valid_targets_min": 3641
|
|
},
|
|
{
|
|
"epoch": 0.1583295089490592,
|
|
"grad_norm": 0.23315620907648432,
|
|
"learning_rate": 1.2527472527472529e-05,
|
|
"loss": 0.4219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13074077665805817,
|
|
"step": 115,
|
|
"valid_targets_mean": 9316.0,
|
|
"valid_targets_min": 1929
|
|
},
|
|
{
|
|
"epoch": 0.16521340064249657,
|
|
"grad_norm": 0.23578755278571784,
|
|
"learning_rate": 1.3076923076923078e-05,
|
|
"loss": 0.422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14109788835048676,
|
|
"step": 120,
|
|
"valid_targets_mean": 9838.7,
|
|
"valid_targets_min": 4324
|
|
},
|
|
{
|
|
"epoch": 0.1720972923359339,
|
|
"grad_norm": 0.244154827729474,
|
|
"learning_rate": 1.3626373626373627e-05,
|
|
"loss": 0.4272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14524628221988678,
|
|
"step": 125,
|
|
"valid_targets_mean": 9693.1,
|
|
"valid_targets_min": 3811
|
|
},
|
|
{
|
|
"epoch": 0.17898118402937127,
|
|
"grad_norm": 0.2089268624041823,
|
|
"learning_rate": 1.4175824175824178e-05,
|
|
"loss": 0.4122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12458304315805435,
|
|
"step": 130,
|
|
"valid_targets_mean": 9686.9,
|
|
"valid_targets_min": 3338
|
|
},
|
|
{
|
|
"epoch": 0.18586507572280864,
|
|
"grad_norm": 0.2328880543137963,
|
|
"learning_rate": 1.4725274725274727e-05,
|
|
"loss": 0.4021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12566791474819183,
|
|
"step": 135,
|
|
"valid_targets_mean": 9590.6,
|
|
"valid_targets_min": 4785
|
|
},
|
|
{
|
|
"epoch": 0.19274896741624598,
|
|
"grad_norm": 0.22383231353766703,
|
|
"learning_rate": 1.5274725274725277e-05,
|
|
"loss": 0.4044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14299173653125763,
|
|
"step": 140,
|
|
"valid_targets_mean": 10718.9,
|
|
"valid_targets_min": 4892
|
|
},
|
|
{
|
|
"epoch": 0.19963285910968334,
|
|
"grad_norm": 0.2464207171177229,
|
|
"learning_rate": 1.5824175824175826e-05,
|
|
"loss": 0.4077,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1544559895992279,
|
|
"step": 145,
|
|
"valid_targets_mean": 10753.2,
|
|
"valid_targets_min": 3836
|
|
},
|
|
{
|
|
"epoch": 0.2065167508031207,
|
|
"grad_norm": 0.22729169591824225,
|
|
"learning_rate": 1.6373626373626375e-05,
|
|
"loss": 0.4011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11852603405714035,
|
|
"step": 150,
|
|
"valid_targets_mean": 8877.9,
|
|
"valid_targets_min": 424
|
|
},
|
|
{
|
|
"epoch": 0.21340064249655805,
|
|
"grad_norm": 0.22989523354603325,
|
|
"learning_rate": 1.6923076923076924e-05,
|
|
"loss": 0.4017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1433342844247818,
|
|
"step": 155,
|
|
"valid_targets_mean": 11518.0,
|
|
"valid_targets_min": 4504
|
|
},
|
|
{
|
|
"epoch": 0.2202845341899954,
|
|
"grad_norm": 0.2669020061521446,
|
|
"learning_rate": 1.7472527472527473e-05,
|
|
"loss": 0.3994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14459729194641113,
|
|
"step": 160,
|
|
"valid_targets_mean": 10316.1,
|
|
"valid_targets_min": 4842
|
|
},
|
|
{
|
|
"epoch": 0.22716842588343278,
|
|
"grad_norm": 0.23145661525974606,
|
|
"learning_rate": 1.8021978021978023e-05,
|
|
"loss": 0.3972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13346941769123077,
|
|
"step": 165,
|
|
"valid_targets_mean": 10630.5,
|
|
"valid_targets_min": 5085
|
|
},
|
|
{
|
|
"epoch": 0.23405231757687012,
|
|
"grad_norm": 0.23112572970307219,
|
|
"learning_rate": 1.8571428571428575e-05,
|
|
"loss": 0.3939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14197933673858643,
|
|
"step": 170,
|
|
"valid_targets_mean": 10322.6,
|
|
"valid_targets_min": 4928
|
|
},
|
|
{
|
|
"epoch": 0.24093620927030748,
|
|
"grad_norm": 0.2707224338675443,
|
|
"learning_rate": 1.9120879120879124e-05,
|
|
"loss": 0.3896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13661183416843414,
|
|
"step": 175,
|
|
"valid_targets_mean": 10297.2,
|
|
"valid_targets_min": 3888
|
|
},
|
|
{
|
|
"epoch": 0.24782010096374485,
|
|
"grad_norm": 0.2622838211701269,
|
|
"learning_rate": 1.967032967032967e-05,
|
|
"loss": 0.3882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12619677186012268,
|
|
"step": 180,
|
|
"valid_targets_mean": 8726.7,
|
|
"valid_targets_min": 265
|
|
},
|
|
{
|
|
"epoch": 0.2547039926571822,
|
|
"grad_norm": 0.3018730334828126,
|
|
"learning_rate": 2.021978021978022e-05,
|
|
"loss": 0.3827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11439290642738342,
|
|
"step": 185,
|
|
"valid_targets_mean": 8473.2,
|
|
"valid_targets_min": 3703
|
|
},
|
|
{
|
|
"epoch": 0.2615878843506195,
|
|
"grad_norm": 0.27854199506530175,
|
|
"learning_rate": 2.0769230769230772e-05,
|
|
"loss": 0.3852,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1296503096818924,
|
|
"step": 190,
|
|
"valid_targets_mean": 10562.8,
|
|
"valid_targets_min": 4059
|
|
},
|
|
{
|
|
"epoch": 0.2684717760440569,
|
|
"grad_norm": 0.32965118396403853,
|
|
"learning_rate": 2.131868131868132e-05,
|
|
"loss": 0.4062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13311529159545898,
|
|
"step": 195,
|
|
"valid_targets_mean": 5732.4,
|
|
"valid_targets_min": 996
|
|
},
|
|
{
|
|
"epoch": 0.27535566773749426,
|
|
"grad_norm": 0.34563256253130675,
|
|
"learning_rate": 2.1868131868131867e-05,
|
|
"loss": 0.4219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13766756653785706,
|
|
"step": 200,
|
|
"valid_targets_mean": 5909.7,
|
|
"valid_targets_min": 322
|
|
},
|
|
{
|
|
"epoch": 0.2822395594309316,
|
|
"grad_norm": 3.800335015027523,
|
|
"learning_rate": 2.241758241758242e-05,
|
|
"loss": 0.6701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3517041504383087,
|
|
"step": 205,
|
|
"valid_targets_mean": 3622.3,
|
|
"valid_targets_min": 1417
|
|
},
|
|
{
|
|
"epoch": 0.289123451124369,
|
|
"grad_norm": 1.2361781661013793,
|
|
"learning_rate": 2.296703296703297e-05,
|
|
"loss": 0.9022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3068923056125641,
|
|
"step": 210,
|
|
"valid_targets_mean": 4234.8,
|
|
"valid_targets_min": 1229
|
|
},
|
|
{
|
|
"epoch": 0.29600734281780633,
|
|
"grad_norm": 0.5360103186756147,
|
|
"learning_rate": 2.3516483516483518e-05,
|
|
"loss": 0.7856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1845906674861908,
|
|
"step": 215,
|
|
"valid_targets_mean": 3496.8,
|
|
"valid_targets_min": 792
|
|
},
|
|
{
|
|
"epoch": 0.30289123451124367,
|
|
"grad_norm": 0.4462165113821834,
|
|
"learning_rate": 2.4065934065934067e-05,
|
|
"loss": 0.8366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27402248978614807,
|
|
"step": 220,
|
|
"valid_targets_mean": 4790.7,
|
|
"valid_targets_min": 1423
|
|
},
|
|
{
|
|
"epoch": 0.30977512620468106,
|
|
"grad_norm": 0.4075124579712277,
|
|
"learning_rate": 2.461538461538462e-05,
|
|
"loss": 0.7601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29145288467407227,
|
|
"step": 225,
|
|
"valid_targets_mean": 4529.3,
|
|
"valid_targets_min": 1236
|
|
},
|
|
{
|
|
"epoch": 0.3166590178981184,
|
|
"grad_norm": 0.37093297756645077,
|
|
"learning_rate": 2.5164835164835165e-05,
|
|
"loss": 0.7176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21884280443191528,
|
|
"step": 230,
|
|
"valid_targets_mean": 3307.1,
|
|
"valid_targets_min": 1399
|
|
},
|
|
{
|
|
"epoch": 0.32354290959155574,
|
|
"grad_norm": 0.3496546260867017,
|
|
"learning_rate": 2.5714285714285718e-05,
|
|
"loss": 0.7248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23322607576847076,
|
|
"step": 235,
|
|
"valid_targets_mean": 4017.8,
|
|
"valid_targets_min": 1176
|
|
},
|
|
{
|
|
"epoch": 0.33042680128499313,
|
|
"grad_norm": 0.3187272125501439,
|
|
"learning_rate": 2.6263736263736267e-05,
|
|
"loss": 0.6923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14147228002548218,
|
|
"step": 240,
|
|
"valid_targets_mean": 1564.6,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 0.33731069297843047,
|
|
"grad_norm": 0.28962426263990193,
|
|
"learning_rate": 2.6813186813186813e-05,
|
|
"loss": 0.7099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24177516996860504,
|
|
"step": 245,
|
|
"valid_targets_mean": 4694.7,
|
|
"valid_targets_min": 1910
|
|
},
|
|
{
|
|
"epoch": 0.3441945846718678,
|
|
"grad_norm": 0.35220902165497164,
|
|
"learning_rate": 2.7362637362637365e-05,
|
|
"loss": 0.6842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2731242775917053,
|
|
"step": 250,
|
|
"valid_targets_mean": 4543.5,
|
|
"valid_targets_min": 712
|
|
},
|
|
{
|
|
"epoch": 0.3510784763653052,
|
|
"grad_norm": 0.3316735178709283,
|
|
"learning_rate": 2.7912087912087915e-05,
|
|
"loss": 0.6671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2385597825050354,
|
|
"step": 255,
|
|
"valid_targets_mean": 3509.4,
|
|
"valid_targets_min": 1442
|
|
},
|
|
{
|
|
"epoch": 0.35796236805874254,
|
|
"grad_norm": 0.30655273710875297,
|
|
"learning_rate": 2.8461538461538464e-05,
|
|
"loss": 0.6768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1650443971157074,
|
|
"step": 260,
|
|
"valid_targets_mean": 3599.4,
|
|
"valid_targets_min": 328
|
|
},
|
|
{
|
|
"epoch": 0.3648462597521799,
|
|
"grad_norm": 0.34083054525967005,
|
|
"learning_rate": 2.9010989010989013e-05,
|
|
"loss": 0.639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11122392863035202,
|
|
"step": 265,
|
|
"valid_targets_mean": 980.5,
|
|
"valid_targets_min": 475
|
|
},
|
|
{
|
|
"epoch": 0.3717301514456173,
|
|
"grad_norm": 0.3124276244867637,
|
|
"learning_rate": 2.9560439560439565e-05,
|
|
"loss": 0.6749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24359172582626343,
|
|
"step": 270,
|
|
"valid_targets_mean": 4849.9,
|
|
"valid_targets_min": 1243
|
|
},
|
|
{
|
|
"epoch": 0.3786140431390546,
|
|
"grad_norm": 0.3181564926526228,
|
|
"learning_rate": 3.010989010989011e-05,
|
|
"loss": 0.6522,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2207922339439392,
|
|
"step": 275,
|
|
"valid_targets_mean": 4380.0,
|
|
"valid_targets_min": 1142
|
|
},
|
|
{
|
|
"epoch": 0.38549793483249195,
|
|
"grad_norm": 0.37007524888913634,
|
|
"learning_rate": 3.065934065934067e-05,
|
|
"loss": 0.6879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25520047545433044,
|
|
"step": 280,
|
|
"valid_targets_mean": 3904.7,
|
|
"valid_targets_min": 1187
|
|
},
|
|
{
|
|
"epoch": 0.39238182652592934,
|
|
"grad_norm": 0.30196455505898595,
|
|
"learning_rate": 3.120879120879121e-05,
|
|
"loss": 0.6619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1992959976196289,
|
|
"step": 285,
|
|
"valid_targets_mean": 3962.2,
|
|
"valid_targets_min": 1010
|
|
},
|
|
{
|
|
"epoch": 0.3992657182193667,
|
|
"grad_norm": 0.37081909199931196,
|
|
"learning_rate": 3.175824175824176e-05,
|
|
"loss": 0.6366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13232207298278809,
|
|
"step": 290,
|
|
"valid_targets_mean": 1099.3,
|
|
"valid_targets_min": 280
|
|
},
|
|
{
|
|
"epoch": 0.406149609912804,
|
|
"grad_norm": 0.3104836906355742,
|
|
"learning_rate": 3.230769230769231e-05,
|
|
"loss": 0.6609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24800434708595276,
|
|
"step": 295,
|
|
"valid_targets_mean": 5050.1,
|
|
"valid_targets_min": 1178
|
|
},
|
|
{
|
|
"epoch": 0.4130335016062414,
|
|
"grad_norm": 0.28643747421830573,
|
|
"learning_rate": 3.285714285714286e-05,
|
|
"loss": 0.6472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2316061109304428,
|
|
"step": 300,
|
|
"valid_targets_mean": 5151.1,
|
|
"valid_targets_min": 1346
|
|
},
|
|
{
|
|
"epoch": 0.41991739329967875,
|
|
"grad_norm": 0.32511728531116485,
|
|
"learning_rate": 3.340659340659341e-05,
|
|
"loss": 0.4525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09048983454704285,
|
|
"step": 305,
|
|
"valid_targets_mean": 5515.5,
|
|
"valid_targets_min": 1936
|
|
},
|
|
{
|
|
"epoch": 0.4268012849931161,
|
|
"grad_norm": 0.30988433634655266,
|
|
"learning_rate": 3.3956043956043956e-05,
|
|
"loss": 0.2494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07446835190057755,
|
|
"step": 310,
|
|
"valid_targets_mean": 5127.6,
|
|
"valid_targets_min": 2180
|
|
},
|
|
{
|
|
"epoch": 0.4336851766865535,
|
|
"grad_norm": 0.2617016938161096,
|
|
"learning_rate": 3.450549450549451e-05,
|
|
"loss": 0.2298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06975876539945602,
|
|
"step": 315,
|
|
"valid_targets_mean": 5021.8,
|
|
"valid_targets_min": 2808
|
|
},
|
|
{
|
|
"epoch": 0.4405690683799908,
|
|
"grad_norm": 0.2685638299614147,
|
|
"learning_rate": 3.505494505494506e-05,
|
|
"loss": 0.222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07339228689670563,
|
|
"step": 320,
|
|
"valid_targets_mean": 5327.4,
|
|
"valid_targets_min": 2771
|
|
},
|
|
{
|
|
"epoch": 0.44745296007342816,
|
|
"grad_norm": 0.2621136266965172,
|
|
"learning_rate": 3.56043956043956e-05,
|
|
"loss": 0.2158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08050457388162613,
|
|
"step": 325,
|
|
"valid_targets_mean": 5568.0,
|
|
"valid_targets_min": 3219
|
|
},
|
|
{
|
|
"epoch": 0.45433685176686556,
|
|
"grad_norm": 0.28262871610816387,
|
|
"learning_rate": 3.615384615384616e-05,
|
|
"loss": 0.2122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0704975575208664,
|
|
"step": 330,
|
|
"valid_targets_mean": 5279.6,
|
|
"valid_targets_min": 3339
|
|
},
|
|
{
|
|
"epoch": 0.4612207434603029,
|
|
"grad_norm": 0.23957182507594812,
|
|
"learning_rate": 3.67032967032967e-05,
|
|
"loss": 0.2063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06967966258525848,
|
|
"step": 335,
|
|
"valid_targets_mean": 5462.8,
|
|
"valid_targets_min": 1825
|
|
},
|
|
{
|
|
"epoch": 0.46810463515374023,
|
|
"grad_norm": 0.2590213889061684,
|
|
"learning_rate": 3.725274725274726e-05,
|
|
"loss": 0.2062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06722544878721237,
|
|
"step": 340,
|
|
"valid_targets_mean": 5447.9,
|
|
"valid_targets_min": 2456
|
|
},
|
|
{
|
|
"epoch": 0.47498852684717763,
|
|
"grad_norm": 0.2786922148808639,
|
|
"learning_rate": 3.7802197802197807e-05,
|
|
"loss": 0.2075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07029011845588684,
|
|
"step": 345,
|
|
"valid_targets_mean": 5365.3,
|
|
"valid_targets_min": 2902
|
|
},
|
|
{
|
|
"epoch": 0.48187241854061497,
|
|
"grad_norm": 0.24349634807812348,
|
|
"learning_rate": 3.8351648351648356e-05,
|
|
"loss": 0.1988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.063286691904068,
|
|
"step": 350,
|
|
"valid_targets_mean": 4987.0,
|
|
"valid_targets_min": 3287
|
|
},
|
|
{
|
|
"epoch": 0.4887563102340523,
|
|
"grad_norm": 0.22666899467248464,
|
|
"learning_rate": 3.8901098901098905e-05,
|
|
"loss": 0.1979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05999894440174103,
|
|
"step": 355,
|
|
"valid_targets_mean": 5047.8,
|
|
"valid_targets_min": 2252
|
|
},
|
|
{
|
|
"epoch": 0.4956402019274897,
|
|
"grad_norm": 0.2326604668633739,
|
|
"learning_rate": 3.9450549450549454e-05,
|
|
"loss": 0.1993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07216238975524902,
|
|
"step": 360,
|
|
"valid_targets_mean": 5323.9,
|
|
"valid_targets_min": 2401
|
|
},
|
|
{
|
|
"epoch": 0.502524093620927,
|
|
"grad_norm": 0.23872575419905823,
|
|
"learning_rate": 4e-05,
|
|
"loss": 0.194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06324415653944016,
|
|
"step": 365,
|
|
"valid_targets_mean": 5421.0,
|
|
"valid_targets_min": 916
|
|
},
|
|
{
|
|
"epoch": 0.5094079853143644,
|
|
"grad_norm": 0.24034930967082238,
|
|
"learning_rate": 3.999976939016174e-05,
|
|
"loss": 0.1944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.059262461960315704,
|
|
"step": 370,
|
|
"valid_targets_mean": 5157.3,
|
|
"valid_targets_min": 2787
|
|
},
|
|
{
|
|
"epoch": 0.5162918770078018,
|
|
"grad_norm": 0.2633330384697458,
|
|
"learning_rate": 3.999907756596503e-05,
|
|
"loss": 0.1946,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06677208840847015,
|
|
"step": 375,
|
|
"valid_targets_mean": 5285.9,
|
|
"valid_targets_min": 2725
|
|
},
|
|
{
|
|
"epoch": 0.523175768701239,
|
|
"grad_norm": 0.20733514452513443,
|
|
"learning_rate": 3.999792454336403e-05,
|
|
"loss": 0.195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06367021799087524,
|
|
"step": 380,
|
|
"valid_targets_mean": 4471.1,
|
|
"valid_targets_min": 773
|
|
},
|
|
{
|
|
"epoch": 0.5300596603946764,
|
|
"grad_norm": 0.22923343428156906,
|
|
"learning_rate": 3.999631034894857e-05,
|
|
"loss": 0.1933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06512193381786346,
|
|
"step": 385,
|
|
"valid_targets_mean": 5323.2,
|
|
"valid_targets_min": 3113
|
|
},
|
|
{
|
|
"epoch": 0.5369435520881138,
|
|
"grad_norm": 0.21779536364748414,
|
|
"learning_rate": 3.999423501994356e-05,
|
|
"loss": 0.1952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06609879434108734,
|
|
"step": 390,
|
|
"valid_targets_mean": 5507.8,
|
|
"valid_targets_min": 3054
|
|
},
|
|
{
|
|
"epoch": 0.5438274437815511,
|
|
"grad_norm": 0.24992019658429662,
|
|
"learning_rate": 3.999169860420813e-05,
|
|
"loss": 0.1936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06085608899593353,
|
|
"step": 395,
|
|
"valid_targets_mean": 5065.1,
|
|
"valid_targets_min": 2739
|
|
},
|
|
{
|
|
"epoch": 0.5507113354749885,
|
|
"grad_norm": 0.23256672963489214,
|
|
"learning_rate": 3.9988701160234525e-05,
|
|
"loss": 0.1885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0652182325720787,
|
|
"step": 400,
|
|
"valid_targets_mean": 5267.3,
|
|
"valid_targets_min": 3285
|
|
},
|
|
{
|
|
"epoch": 0.5575952271684259,
|
|
"grad_norm": 0.20980766039558524,
|
|
"learning_rate": 3.998524275714675e-05,
|
|
"loss": 0.1864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06631162762641907,
|
|
"step": 405,
|
|
"valid_targets_mean": 5350.7,
|
|
"valid_targets_min": 2809
|
|
},
|
|
{
|
|
"epoch": 0.5644791188618632,
|
|
"grad_norm": 0.47517037709324333,
|
|
"learning_rate": 3.998132347469898e-05,
|
|
"loss": 0.5104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19077736139297485,
|
|
"step": 410,
|
|
"valid_targets_mean": 4092.8,
|
|
"valid_targets_min": 1262
|
|
},
|
|
{
|
|
"epoch": 0.5713630105553006,
|
|
"grad_norm": 0.33682213693847374,
|
|
"learning_rate": 3.997694340327373e-05,
|
|
"loss": 0.5536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18831773102283478,
|
|
"step": 415,
|
|
"valid_targets_mean": 3959.6,
|
|
"valid_targets_min": 922
|
|
},
|
|
{
|
|
"epoch": 0.578246902248738,
|
|
"grad_norm": 0.35226360811084995,
|
|
"learning_rate": 3.9972102643879754e-05,
|
|
"loss": 0.5528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17390920221805573,
|
|
"step": 420,
|
|
"valid_targets_mean": 3500.9,
|
|
"valid_targets_min": 1193
|
|
},
|
|
{
|
|
"epoch": 0.5851307939421753,
|
|
"grad_norm": 0.3482865590559308,
|
|
"learning_rate": 3.996680130814972e-05,
|
|
"loss": 0.5426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17027497291564941,
|
|
"step": 425,
|
|
"valid_targets_mean": 3293.8,
|
|
"valid_targets_min": 1211
|
|
},
|
|
{
|
|
"epoch": 0.5920146856356127,
|
|
"grad_norm": 0.2975124836077972,
|
|
"learning_rate": 3.996103951833766e-05,
|
|
"loss": 0.5358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1929185539484024,
|
|
"step": 430,
|
|
"valid_targets_mean": 4374.8,
|
|
"valid_targets_min": 1271
|
|
},
|
|
{
|
|
"epoch": 0.59889857732905,
|
|
"grad_norm": 0.3333015107750898,
|
|
"learning_rate": 3.995481740731609e-05,
|
|
"loss": 0.5545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15541371703147888,
|
|
"step": 435,
|
|
"valid_targets_mean": 2742.0,
|
|
"valid_targets_min": 1163
|
|
},
|
|
{
|
|
"epoch": 0.6057824690224873,
|
|
"grad_norm": 0.36064760501351867,
|
|
"learning_rate": 3.994813511857304e-05,
|
|
"loss": 0.528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17974039912223816,
|
|
"step": 440,
|
|
"valid_targets_mean": 2944.6,
|
|
"valid_targets_min": 747
|
|
},
|
|
{
|
|
"epoch": 0.6126663607159247,
|
|
"grad_norm": 0.33429331281353,
|
|
"learning_rate": 3.994099280620865e-05,
|
|
"loss": 0.4993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10126718133687973,
|
|
"step": 445,
|
|
"valid_targets_mean": 1390.7,
|
|
"valid_targets_min": 617
|
|
},
|
|
{
|
|
"epoch": 0.6195502524093621,
|
|
"grad_norm": 0.2999415809286727,
|
|
"learning_rate": 3.9933390634931674e-05,
|
|
"loss": 0.5597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20320364832878113,
|
|
"step": 450,
|
|
"valid_targets_mean": 5282.3,
|
|
"valid_targets_min": 1416
|
|
},
|
|
{
|
|
"epoch": 0.6264341441027994,
|
|
"grad_norm": 0.3059815328895148,
|
|
"learning_rate": 3.992532878005565e-05,
|
|
"loss": 0.5185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21273663640022278,
|
|
"step": 455,
|
|
"valid_targets_mean": 5102.7,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 0.6333180357962368,
|
|
"grad_norm": 0.30974645010152174,
|
|
"learning_rate": 3.9916807427494895e-05,
|
|
"loss": 0.541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17771270871162415,
|
|
"step": 460,
|
|
"valid_targets_mean": 4471.8,
|
|
"valid_targets_min": 1166
|
|
},
|
|
{
|
|
"epoch": 0.6402019274896742,
|
|
"grad_norm": 0.3374419937482823,
|
|
"learning_rate": 3.9907826773760175e-05,
|
|
"loss": 0.5126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18608638644218445,
|
|
"step": 465,
|
|
"valid_targets_mean": 3015.3,
|
|
"valid_targets_min": 644
|
|
},
|
|
{
|
|
"epoch": 0.6470858191831115,
|
|
"grad_norm": 0.3305201729547266,
|
|
"learning_rate": 3.98983870259542e-05,
|
|
"loss": 0.5091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16256271302700043,
|
|
"step": 470,
|
|
"valid_targets_mean": 4168.5,
|
|
"valid_targets_min": 1159
|
|
},
|
|
{
|
|
"epoch": 0.6539697108765489,
|
|
"grad_norm": 0.314999512758721,
|
|
"learning_rate": 3.988848840176685e-05,
|
|
"loss": 0.5552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19044813513755798,
|
|
"step": 475,
|
|
"valid_targets_mean": 4139.7,
|
|
"valid_targets_min": 1171
|
|
},
|
|
{
|
|
"epoch": 0.6608536025699863,
|
|
"grad_norm": 0.3304218478626873,
|
|
"learning_rate": 3.987813112947013e-05,
|
|
"loss": 0.5305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1605474352836609,
|
|
"step": 480,
|
|
"valid_targets_mean": 3524.7,
|
|
"valid_targets_min": 706
|
|
},
|
|
{
|
|
"epoch": 0.6677374942634235,
|
|
"grad_norm": 0.5399772854729523,
|
|
"learning_rate": 3.986731544791293e-05,
|
|
"loss": 0.5285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13693946599960327,
|
|
"step": 485,
|
|
"valid_targets_mean": 974.1,
|
|
"valid_targets_min": 373
|
|
},
|
|
{
|
|
"epoch": 0.6746213859568609,
|
|
"grad_norm": 0.29344221922807473,
|
|
"learning_rate": 3.9856041606515514e-05,
|
|
"loss": 0.5146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15577596426010132,
|
|
"step": 490,
|
|
"valid_targets_mean": 4034.9,
|
|
"valid_targets_min": 1413
|
|
},
|
|
{
|
|
"epoch": 0.6815052776502983,
|
|
"grad_norm": 0.30625888942494645,
|
|
"learning_rate": 3.984430986526375e-05,
|
|
"loss": 0.5226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18413081765174866,
|
|
"step": 495,
|
|
"valid_targets_mean": 4597.8,
|
|
"valid_targets_min": 1386
|
|
},
|
|
{
|
|
"epoch": 0.6883891693437356,
|
|
"grad_norm": 0.35972444033240863,
|
|
"learning_rate": 3.983212049470313e-05,
|
|
"loss": 0.5065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14991965889930725,
|
|
"step": 500,
|
|
"valid_targets_mean": 3344.0,
|
|
"valid_targets_min": 1074
|
|
},
|
|
{
|
|
"epoch": 0.695273061037173,
|
|
"grad_norm": 0.2963430462822694,
|
|
"learning_rate": 3.981947377593254e-05,
|
|
"loss": 0.5326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25120216608047485,
|
|
"step": 505,
|
|
"valid_targets_mean": 4781.9,
|
|
"valid_targets_min": 1038
|
|
},
|
|
{
|
|
"epoch": 0.7021569527306104,
|
|
"grad_norm": 0.2728001352410558,
|
|
"learning_rate": 3.9806370000597745e-05,
|
|
"loss": 0.5083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14410412311553955,
|
|
"step": 510,
|
|
"valid_targets_mean": 4052.2,
|
|
"valid_targets_min": 1148
|
|
},
|
|
{
|
|
"epoch": 0.7090408444240477,
|
|
"grad_norm": 0.29626178481350945,
|
|
"learning_rate": 3.9792809470884705e-05,
|
|
"loss": 0.4415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09617213904857635,
|
|
"step": 515,
|
|
"valid_targets_mean": 5513.2,
|
|
"valid_targets_min": 260
|
|
},
|
|
{
|
|
"epoch": 0.7159247361174851,
|
|
"grad_norm": 0.28371396859472753,
|
|
"learning_rate": 3.977879249951258e-05,
|
|
"loss": 0.3555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1328997015953064,
|
|
"step": 520,
|
|
"valid_targets_mean": 6674.3,
|
|
"valid_targets_min": 667
|
|
},
|
|
{
|
|
"epoch": 0.7228086278109225,
|
|
"grad_norm": 0.260103490841822,
|
|
"learning_rate": 3.976431940972651e-05,
|
|
"loss": 0.3654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11287006735801697,
|
|
"step": 525,
|
|
"valid_targets_mean": 5004.6,
|
|
"valid_targets_min": 184
|
|
},
|
|
{
|
|
"epoch": 0.7296925195043598,
|
|
"grad_norm": 0.2368718325327304,
|
|
"learning_rate": 3.974939053529019e-05,
|
|
"loss": 0.352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11172248423099518,
|
|
"step": 530,
|
|
"valid_targets_mean": 6059.3,
|
|
"valid_targets_min": 496
|
|
},
|
|
{
|
|
"epoch": 0.7365764111977972,
|
|
"grad_norm": 0.2320176040482457,
|
|
"learning_rate": 3.9734006220478156e-05,
|
|
"loss": 0.3474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.055815745145082474,
|
|
"step": 535,
|
|
"valid_targets_mean": 1604.8,
|
|
"valid_targets_min": 520
|
|
},
|
|
{
|
|
"epoch": 0.7434603028912345,
|
|
"grad_norm": 0.23911921052474808,
|
|
"learning_rate": 3.971816682006783e-05,
|
|
"loss": 0.3444,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09215737134218216,
|
|
"step": 540,
|
|
"valid_targets_mean": 6672.8,
|
|
"valid_targets_min": 750
|
|
},
|
|
{
|
|
"epoch": 0.7503441945846718,
|
|
"grad_norm": 0.23373511921257778,
|
|
"learning_rate": 3.9701872699331384e-05,
|
|
"loss": 0.3257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10297545790672302,
|
|
"step": 545,
|
|
"valid_targets_mean": 5849.8,
|
|
"valid_targets_min": 225
|
|
},
|
|
{
|
|
"epoch": 0.7572280862781092,
|
|
"grad_norm": 0.2224554796838409,
|
|
"learning_rate": 3.9685124234027264e-05,
|
|
"loss": 0.3309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11439158767461777,
|
|
"step": 550,
|
|
"valid_targets_mean": 6783.3,
|
|
"valid_targets_min": 643
|
|
},
|
|
{
|
|
"epoch": 0.7641119779715466,
|
|
"grad_norm": 0.20688218598394778,
|
|
"learning_rate": 3.966792181039156e-05,
|
|
"loss": 0.3208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09738504886627197,
|
|
"step": 555,
|
|
"valid_targets_mean": 6440.8,
|
|
"valid_targets_min": 392
|
|
},
|
|
{
|
|
"epoch": 0.7709958696649839,
|
|
"grad_norm": 0.2515540099960665,
|
|
"learning_rate": 3.965026582512908e-05,
|
|
"loss": 0.3374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17061451077461243,
|
|
"step": 560,
|
|
"valid_targets_mean": 8714.9,
|
|
"valid_targets_min": 1646
|
|
},
|
|
{
|
|
"epoch": 0.7778797613584213,
|
|
"grad_norm": 0.24202712344703942,
|
|
"learning_rate": 3.963215668540423e-05,
|
|
"loss": 0.3186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08065859973430634,
|
|
"step": 565,
|
|
"valid_targets_mean": 6242.6,
|
|
"valid_targets_min": 1338
|
|
},
|
|
{
|
|
"epoch": 0.7847636530518587,
|
|
"grad_norm": 0.20728694668286363,
|
|
"learning_rate": 3.9613594808831565e-05,
|
|
"loss": 0.324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1332765817642212,
|
|
"step": 570,
|
|
"valid_targets_mean": 7600.2,
|
|
"valid_targets_min": 399
|
|
},
|
|
{
|
|
"epoch": 0.791647544745296,
|
|
"grad_norm": 0.22011068416498333,
|
|
"learning_rate": 3.959458062346624e-05,
|
|
"loss": 0.3403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07994946092367172,
|
|
"step": 575,
|
|
"valid_targets_mean": 6031.7,
|
|
"valid_targets_min": 993
|
|
},
|
|
{
|
|
"epoch": 0.7985314364387334,
|
|
"grad_norm": 0.20100710625863627,
|
|
"learning_rate": 3.957511456779407e-05,
|
|
"loss": 0.2958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09593126177787781,
|
|
"step": 580,
|
|
"valid_targets_mean": 6480.3,
|
|
"valid_targets_min": 279
|
|
},
|
|
{
|
|
"epoch": 0.8054153281321708,
|
|
"grad_norm": 0.204267562824806,
|
|
"learning_rate": 3.9555197090721446e-05,
|
|
"loss": 0.3282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08679480850696564,
|
|
"step": 585,
|
|
"valid_targets_mean": 6095.1,
|
|
"valid_targets_min": 1420
|
|
},
|
|
{
|
|
"epoch": 0.812299219825608,
|
|
"grad_norm": 0.20762239543542987,
|
|
"learning_rate": 3.9534828651564987e-05,
|
|
"loss": 0.3035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09902271628379822,
|
|
"step": 590,
|
|
"valid_targets_mean": 6487.8,
|
|
"valid_targets_min": 491
|
|
},
|
|
{
|
|
"epoch": 0.8191831115190454,
|
|
"grad_norm": 0.24753637813892274,
|
|
"learning_rate": 3.951400972004094e-05,
|
|
"loss": 0.2981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08057793974876404,
|
|
"step": 595,
|
|
"valid_targets_mean": 1157.9,
|
|
"valid_targets_min": 493
|
|
},
|
|
{
|
|
"epoch": 0.8260670032124828,
|
|
"grad_norm": 0.21936291447133946,
|
|
"learning_rate": 3.949274077625435e-05,
|
|
"loss": 0.3133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10192103683948517,
|
|
"step": 600,
|
|
"valid_targets_mean": 6671.6,
|
|
"valid_targets_min": 354
|
|
},
|
|
{
|
|
"epoch": 0.8329508949059201,
|
|
"grad_norm": 0.22563637490059102,
|
|
"learning_rate": 3.947102231068798e-05,
|
|
"loss": 0.3015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12188886851072311,
|
|
"step": 605,
|
|
"valid_targets_mean": 7020.9,
|
|
"valid_targets_min": 848
|
|
},
|
|
{
|
|
"epoch": 0.8398347865993575,
|
|
"grad_norm": 0.39524725243572917,
|
|
"learning_rate": 3.944885482419102e-05,
|
|
"loss": 0.3186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10121281445026398,
|
|
"step": 610,
|
|
"valid_targets_mean": 3855.2,
|
|
"valid_targets_min": 967
|
|
},
|
|
{
|
|
"epoch": 0.8467186782927949,
|
|
"grad_norm": 0.2991875521643564,
|
|
"learning_rate": 3.942623882796751e-05,
|
|
"loss": 0.2921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10876717418432236,
|
|
"step": 615,
|
|
"valid_targets_mean": 4457.2,
|
|
"valid_targets_min": 642
|
|
},
|
|
{
|
|
"epoch": 0.8536025699862322,
|
|
"grad_norm": 0.2875325088708275,
|
|
"learning_rate": 3.940317484356459e-05,
|
|
"loss": 0.2817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09426983445882797,
|
|
"step": 620,
|
|
"valid_targets_mean": 3784.5,
|
|
"valid_targets_min": 456
|
|
},
|
|
{
|
|
"epoch": 0.8604864616796696,
|
|
"grad_norm": 0.26938998781457324,
|
|
"learning_rate": 3.93796634028604e-05,
|
|
"loss": 0.2724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09109891951084137,
|
|
"step": 625,
|
|
"valid_targets_mean": 3845.3,
|
|
"valid_targets_min": 338
|
|
},
|
|
{
|
|
"epoch": 0.867370353373107,
|
|
"grad_norm": 0.2668306298041638,
|
|
"learning_rate": 3.935570504805192e-05,
|
|
"loss": 0.2718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10715287178754807,
|
|
"step": 630,
|
|
"valid_targets_mean": 4422.9,
|
|
"valid_targets_min": 1886
|
|
},
|
|
{
|
|
"epoch": 0.8742542450665443,
|
|
"grad_norm": 0.2619060097050138,
|
|
"learning_rate": 3.933130033164238e-05,
|
|
"loss": 0.2657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09461759775876999,
|
|
"step": 635,
|
|
"valid_targets_mean": 4114.4,
|
|
"valid_targets_min": 738
|
|
},
|
|
{
|
|
"epoch": 0.8811381367599816,
|
|
"grad_norm": 0.29246965248688367,
|
|
"learning_rate": 3.930644981642854e-05,
|
|
"loss": 0.2587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0922466367483139,
|
|
"step": 640,
|
|
"valid_targets_mean": 3997.7,
|
|
"valid_targets_min": 1348
|
|
},
|
|
{
|
|
"epoch": 0.888022028453419,
|
|
"grad_norm": 0.27956681063543165,
|
|
"learning_rate": 3.928115407548774e-05,
|
|
"loss": 0.2603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07921630144119263,
|
|
"step": 645,
|
|
"valid_targets_mean": 3248.9,
|
|
"valid_targets_min": 330
|
|
},
|
|
{
|
|
"epoch": 0.8949059201468563,
|
|
"grad_norm": 0.3100537797560849,
|
|
"learning_rate": 3.925541369216465e-05,
|
|
"loss": 0.2554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09321758151054382,
|
|
"step": 650,
|
|
"valid_targets_mean": 4329.2,
|
|
"valid_targets_min": 1777
|
|
},
|
|
{
|
|
"epoch": 0.9017898118402937,
|
|
"grad_norm": 0.23952771554338054,
|
|
"learning_rate": 3.9229229260057824e-05,
|
|
"loss": 0.2538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08680932223796844,
|
|
"step": 655,
|
|
"valid_targets_mean": 3712.8,
|
|
"valid_targets_min": 421
|
|
},
|
|
{
|
|
"epoch": 0.9086737035337311,
|
|
"grad_norm": 0.2475904156940587,
|
|
"learning_rate": 3.920260138300604e-05,
|
|
"loss": 0.2513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07667960226535797,
|
|
"step": 660,
|
|
"valid_targets_mean": 3789.1,
|
|
"valid_targets_min": 454
|
|
},
|
|
{
|
|
"epoch": 0.9155575952271684,
|
|
"grad_norm": 0.26171304487950436,
|
|
"learning_rate": 3.917553067507433e-05,
|
|
"loss": 0.2482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09974247217178345,
|
|
"step": 665,
|
|
"valid_targets_mean": 3968.0,
|
|
"valid_targets_min": 349
|
|
},
|
|
{
|
|
"epoch": 0.9224414869206058,
|
|
"grad_norm": 0.2404358549592192,
|
|
"learning_rate": 3.914801776053986e-05,
|
|
"loss": 0.2474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07945016771554947,
|
|
"step": 670,
|
|
"valid_targets_mean": 3647.0,
|
|
"valid_targets_min": 470
|
|
},
|
|
{
|
|
"epoch": 0.9293253786140432,
|
|
"grad_norm": 0.2460827868496975,
|
|
"learning_rate": 3.91200632738775e-05,
|
|
"loss": 0.2484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07959331572055817,
|
|
"step": 675,
|
|
"valid_targets_mean": 3824.6,
|
|
"valid_targets_min": 559
|
|
},
|
|
{
|
|
"epoch": 0.9362092703074805,
|
|
"grad_norm": 0.24390454380801813,
|
|
"learning_rate": 3.9091667859745225e-05,
|
|
"loss": 0.2495,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08360792696475983,
|
|
"step": 680,
|
|
"valid_targets_mean": 3890.2,
|
|
"valid_targets_min": 1822
|
|
},
|
|
{
|
|
"epoch": 0.9430931620009179,
|
|
"grad_norm": 0.4337940381759169,
|
|
"learning_rate": 3.9062832172969206e-05,
|
|
"loss": 0.3283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11606889218091965,
|
|
"step": 685,
|
|
"valid_targets_mean": 7327.0,
|
|
"valid_targets_min": 1240
|
|
},
|
|
{
|
|
"epoch": 0.9499770536943553,
|
|
"grad_norm": 0.30889787404915514,
|
|
"learning_rate": 3.903355687852876e-05,
|
|
"loss": 0.3196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1101982593536377,
|
|
"step": 690,
|
|
"valid_targets_mean": 7614.4,
|
|
"valid_targets_min": 389
|
|
},
|
|
{
|
|
"epoch": 0.9568609453877925,
|
|
"grad_norm": 0.2542265522759153,
|
|
"learning_rate": 3.900384265154098e-05,
|
|
"loss": 0.2933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09973853081464767,
|
|
"step": 695,
|
|
"valid_targets_mean": 7250.6,
|
|
"valid_targets_min": 2163
|
|
},
|
|
{
|
|
"epoch": 0.9637448370812299,
|
|
"grad_norm": 0.2901157227890436,
|
|
"learning_rate": 3.897369017724517e-05,
|
|
"loss": 0.3011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10261671245098114,
|
|
"step": 700,
|
|
"valid_targets_mean": 6762.3,
|
|
"valid_targets_min": 541
|
|
},
|
|
{
|
|
"epoch": 0.9706287287746673,
|
|
"grad_norm": 0.25100553909772455,
|
|
"learning_rate": 3.894310015098705e-05,
|
|
"loss": 0.2922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11131678521633148,
|
|
"step": 705,
|
|
"valid_targets_mean": 6911.7,
|
|
"valid_targets_min": 306
|
|
},
|
|
{
|
|
"epoch": 0.9775126204681046,
|
|
"grad_norm": 0.25326241165531344,
|
|
"learning_rate": 3.8912073278202716e-05,
|
|
"loss": 0.2917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10062243789434433,
|
|
"step": 710,
|
|
"valid_targets_mean": 7406.8,
|
|
"valid_targets_min": 457
|
|
},
|
|
{
|
|
"epoch": 0.984396512161542,
|
|
"grad_norm": 0.24214203836987996,
|
|
"learning_rate": 3.88806102744024e-05,
|
|
"loss": 0.2855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09995930641889572,
|
|
"step": 715,
|
|
"valid_targets_mean": 7208.4,
|
|
"valid_targets_min": 3982
|
|
},
|
|
{
|
|
"epoch": 0.9912804038549794,
|
|
"grad_norm": 0.20629658879201448,
|
|
"learning_rate": 3.8848711865153906e-05,
|
|
"loss": 0.2667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08350709080696106,
|
|
"step": 720,
|
|
"valid_targets_mean": 6352.4,
|
|
"valid_targets_min": 410
|
|
},
|
|
{
|
|
"epoch": 0.9981642955484167,
|
|
"grad_norm": 0.22921521139927106,
|
|
"learning_rate": 3.881637878606594e-05,
|
|
"loss": 0.2714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09085282683372498,
|
|
"step": 725,
|
|
"valid_targets_mean": 6267.2,
|
|
"valid_targets_min": 102
|
|
},
|
|
{
|
|
"epoch": 1.0041303350160624,
|
|
"grad_norm": 1.2507481131485862,
|
|
"learning_rate": 3.878361178277111e-05,
|
|
"loss": 0.5635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2309202402830124,
|
|
"step": 730,
|
|
"valid_targets_mean": 7854.0,
|
|
"valid_targets_min": 2676
|
|
},
|
|
{
|
|
"epoch": 1.0110142267094997,
|
|
"grad_norm": 0.7360689628527337,
|
|
"learning_rate": 3.875041161090875e-05,
|
|
"loss": 0.564,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16059252619743347,
|
|
"step": 735,
|
|
"valid_targets_mean": 6740.2,
|
|
"valid_targets_min": 3017
|
|
},
|
|
{
|
|
"epoch": 1.0178981184029372,
|
|
"grad_norm": 0.49931300255816746,
|
|
"learning_rate": 3.8716779036107494e-05,
|
|
"loss": 0.5041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1491689383983612,
|
|
"step": 740,
|
|
"valid_targets_mean": 7294.2,
|
|
"valid_targets_min": 3200
|
|
},
|
|
{
|
|
"epoch": 1.0247820100963745,
|
|
"grad_norm": 0.3505316965796473,
|
|
"learning_rate": 3.8682714833967606e-05,
|
|
"loss": 0.4711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16744652390480042,
|
|
"step": 745,
|
|
"valid_targets_mean": 7212.2,
|
|
"valid_targets_min": 2554
|
|
},
|
|
{
|
|
"epoch": 1.0316659017898118,
|
|
"grad_norm": 0.29590178804551,
|
|
"learning_rate": 3.8648219790043086e-05,
|
|
"loss": 0.4479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15151597559452057,
|
|
"step": 750,
|
|
"valid_targets_mean": 7016.0,
|
|
"valid_targets_min": 2429
|
|
},
|
|
{
|
|
"epoch": 1.0385497934832493,
|
|
"grad_norm": 0.263662987853797,
|
|
"learning_rate": 3.86132946998236e-05,
|
|
"loss": 0.4401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13648268580436707,
|
|
"step": 755,
|
|
"valid_targets_mean": 7897.7,
|
|
"valid_targets_min": 2091
|
|
},
|
|
{
|
|
"epoch": 1.0454336851766866,
|
|
"grad_norm": 0.27437828717361396,
|
|
"learning_rate": 3.8577940368716076e-05,
|
|
"loss": 0.433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15187282860279083,
|
|
"step": 760,
|
|
"valid_targets_mean": 7362.4,
|
|
"valid_targets_min": 1674
|
|
},
|
|
{
|
|
"epoch": 1.0523175768701238,
|
|
"grad_norm": 0.27325212819692263,
|
|
"learning_rate": 3.8542157612026184e-05,
|
|
"loss": 0.4184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13746723532676697,
|
|
"step": 765,
|
|
"valid_targets_mean": 6896.6,
|
|
"valid_targets_min": 2522
|
|
},
|
|
{
|
|
"epoch": 1.0592014685635613,
|
|
"grad_norm": 0.23473196308314714,
|
|
"learning_rate": 3.850594725493949e-05,
|
|
"loss": 0.4161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1299147754907608,
|
|
"step": 770,
|
|
"valid_targets_mean": 7470.7,
|
|
"valid_targets_min": 2790
|
|
},
|
|
{
|
|
"epoch": 1.0660853602569986,
|
|
"grad_norm": 0.28555087309472654,
|
|
"learning_rate": 3.846931013250246e-05,
|
|
"loss": 0.4132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14030131697654724,
|
|
"step": 775,
|
|
"valid_targets_mean": 8266.8,
|
|
"valid_targets_min": 3307
|
|
},
|
|
{
|
|
"epoch": 1.072969251950436,
|
|
"grad_norm": 0.23621951887818643,
|
|
"learning_rate": 3.8432247089603167e-05,
|
|
"loss": 0.4042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11525264382362366,
|
|
"step": 780,
|
|
"valid_targets_mean": 8286.0,
|
|
"valid_targets_min": 325
|
|
},
|
|
{
|
|
"epoch": 1.0798531436438734,
|
|
"grad_norm": 0.21450593319657246,
|
|
"learning_rate": 3.8394758980951864e-05,
|
|
"loss": 0.3895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13908235728740692,
|
|
"step": 785,
|
|
"valid_targets_mean": 9267.8,
|
|
"valid_targets_min": 5215
|
|
},
|
|
{
|
|
"epoch": 1.0867370353373107,
|
|
"grad_norm": 0.23844000544208796,
|
|
"learning_rate": 3.83568466710612e-05,
|
|
"loss": 0.3801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12820349633693695,
|
|
"step": 790,
|
|
"valid_targets_mean": 9206.4,
|
|
"valid_targets_min": 3552
|
|
},
|
|
{
|
|
"epoch": 1.093620927030748,
|
|
"grad_norm": 0.2103809566833362,
|
|
"learning_rate": 3.8318511034226344e-05,
|
|
"loss": 0.372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13335391879081726,
|
|
"step": 795,
|
|
"valid_targets_mean": 9805.0,
|
|
"valid_targets_min": 435
|
|
},
|
|
{
|
|
"epoch": 1.1005048187241855,
|
|
"grad_norm": 0.23124182975495058,
|
|
"learning_rate": 3.8279752954504814e-05,
|
|
"loss": 0.3748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1326015442609787,
|
|
"step": 800,
|
|
"valid_targets_mean": 9971.8,
|
|
"valid_targets_min": 3896
|
|
},
|
|
{
|
|
"epoch": 1.1073887104176228,
|
|
"grad_norm": 0.23438818276332576,
|
|
"learning_rate": 3.824057332569604e-05,
|
|
"loss": 0.3691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12420172989368439,
|
|
"step": 805,
|
|
"valid_targets_mean": 9100.1,
|
|
"valid_targets_min": 3598
|
|
},
|
|
{
|
|
"epoch": 1.11427260211106,
|
|
"grad_norm": 0.24214472926654473,
|
|
"learning_rate": 3.820097305132081e-05,
|
|
"loss": 0.3668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12364979088306427,
|
|
"step": 810,
|
|
"valid_targets_mean": 9095.8,
|
|
"valid_targets_min": 3870
|
|
},
|
|
{
|
|
"epoch": 1.1211564938044976,
|
|
"grad_norm": 0.2218011971147791,
|
|
"learning_rate": 3.8160953044600426e-05,
|
|
"loss": 0.3703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11890929937362671,
|
|
"step": 815,
|
|
"valid_targets_mean": 8719.1,
|
|
"valid_targets_min": 3803
|
|
},
|
|
{
|
|
"epoch": 1.1280403854979348,
|
|
"grad_norm": 0.2563084709321499,
|
|
"learning_rate": 3.81205142284356e-05,
|
|
"loss": 0.3606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12726369500160217,
|
|
"step": 820,
|
|
"valid_targets_mean": 9952.8,
|
|
"valid_targets_min": 703
|
|
},
|
|
{
|
|
"epoch": 1.1349242771913721,
|
|
"grad_norm": 0.22881759734160312,
|
|
"learning_rate": 3.807965753538522e-05,
|
|
"loss": 0.3648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12239524722099304,
|
|
"step": 825,
|
|
"valid_targets_mean": 9790.2,
|
|
"valid_targets_min": 1990
|
|
},
|
|
{
|
|
"epoch": 1.1418081688848096,
|
|
"grad_norm": 0.20713165564079833,
|
|
"learning_rate": 3.8038383907644834e-05,
|
|
"loss": 0.363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11979646980762482,
|
|
"step": 830,
|
|
"valid_targets_mean": 9145.0,
|
|
"valid_targets_min": 3036
|
|
},
|
|
{
|
|
"epoch": 1.148692060578247,
|
|
"grad_norm": 0.22499311166594924,
|
|
"learning_rate": 3.7996694297024895e-05,
|
|
"loss": 0.3578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1096620187163353,
|
|
"step": 835,
|
|
"valid_targets_mean": 9393.3,
|
|
"valid_targets_min": 749
|
|
},
|
|
{
|
|
"epoch": 1.1555759522716842,
|
|
"grad_norm": 0.21590056824128998,
|
|
"learning_rate": 3.795458966492884e-05,
|
|
"loss": 0.3581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10937801003456116,
|
|
"step": 840,
|
|
"valid_targets_mean": 8753.6,
|
|
"valid_targets_min": 721
|
|
},
|
|
{
|
|
"epoch": 1.1624598439651217,
|
|
"grad_norm": 0.22742334578308168,
|
|
"learning_rate": 3.7912070982330906e-05,
|
|
"loss": 0.3575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1197114810347557,
|
|
"step": 845,
|
|
"valid_targets_mean": 9578.5,
|
|
"valid_targets_min": 2306
|
|
},
|
|
{
|
|
"epoch": 1.169343735658559,
|
|
"grad_norm": 0.2224738441211927,
|
|
"learning_rate": 3.786913922975375e-05,
|
|
"loss": 0.3634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11998347193002701,
|
|
"step": 850,
|
|
"valid_targets_mean": 9520.8,
|
|
"valid_targets_min": 4270
|
|
},
|
|
{
|
|
"epoch": 1.1762276273519963,
|
|
"grad_norm": 0.22538381825701656,
|
|
"learning_rate": 3.782579539724582e-05,
|
|
"loss": 0.3621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12625733017921448,
|
|
"step": 855,
|
|
"valid_targets_mean": 10044.1,
|
|
"valid_targets_min": 5057
|
|
},
|
|
{
|
|
"epoch": 1.1831115190454338,
|
|
"grad_norm": 0.21800136414016363,
|
|
"learning_rate": 3.7782040484358547e-05,
|
|
"loss": 0.3465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1133735179901123,
|
|
"step": 860,
|
|
"valid_targets_mean": 9430.1,
|
|
"valid_targets_min": 4478
|
|
},
|
|
{
|
|
"epoch": 1.189995410738871,
|
|
"grad_norm": 0.21734230529243184,
|
|
"learning_rate": 3.773787550012325e-05,
|
|
"loss": 0.3506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11655744165182114,
|
|
"step": 865,
|
|
"valid_targets_mean": 9906.8,
|
|
"valid_targets_min": 4602
|
|
},
|
|
{
|
|
"epoch": 1.1968793024323083,
|
|
"grad_norm": 0.2263363296427952,
|
|
"learning_rate": 3.7693301463027935e-05,
|
|
"loss": 0.3507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12009304761886597,
|
|
"step": 870,
|
|
"valid_targets_mean": 9816.9,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 1.2037631941257458,
|
|
"grad_norm": 0.23210893100009392,
|
|
"learning_rate": 3.764831940099374e-05,
|
|
"loss": 0.3563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11706417053937912,
|
|
"step": 875,
|
|
"valid_targets_mean": 9615.7,
|
|
"valid_targets_min": 321
|
|
},
|
|
{
|
|
"epoch": 1.2106470858191831,
|
|
"grad_norm": 0.1985963493513162,
|
|
"learning_rate": 3.760293035135127e-05,
|
|
"loss": 0.3447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11027579009532928,
|
|
"step": 880,
|
|
"valid_targets_mean": 9669.4,
|
|
"valid_targets_min": 4373
|
|
},
|
|
{
|
|
"epoch": 1.2175309775126204,
|
|
"grad_norm": 0.2247655964238049,
|
|
"learning_rate": 3.755713536081667e-05,
|
|
"loss": 0.3545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12357079982757568,
|
|
"step": 885,
|
|
"valid_targets_mean": 10030.3,
|
|
"valid_targets_min": 4760
|
|
},
|
|
{
|
|
"epoch": 1.224414869206058,
|
|
"grad_norm": 0.2459045345695429,
|
|
"learning_rate": 3.751093548546748e-05,
|
|
"loss": 0.3461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11696697026491165,
|
|
"step": 890,
|
|
"valid_targets_mean": 9881.2,
|
|
"valid_targets_min": 4785
|
|
},
|
|
{
|
|
"epoch": 1.2312987608994952,
|
|
"grad_norm": 0.22797315282104857,
|
|
"learning_rate": 3.7464331790718265e-05,
|
|
"loss": 0.3492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11382002383470535,
|
|
"step": 895,
|
|
"valid_targets_mean": 9669.7,
|
|
"valid_targets_min": 3932
|
|
},
|
|
{
|
|
"epoch": 1.2381826525929325,
|
|
"grad_norm": 0.19552476356000423,
|
|
"learning_rate": 3.7417325351296086e-05,
|
|
"loss": 0.3434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11515460908412933,
|
|
"step": 900,
|
|
"valid_targets_mean": 9425.6,
|
|
"valid_targets_min": 3063
|
|
},
|
|
{
|
|
"epoch": 1.24506654428637,
|
|
"grad_norm": 0.21705318540183,
|
|
"learning_rate": 3.736991725121567e-05,
|
|
"loss": 0.3454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1254062056541443,
|
|
"step": 905,
|
|
"valid_targets_mean": 9942.2,
|
|
"valid_targets_min": 3468
|
|
},
|
|
{
|
|
"epoch": 1.2519504359798073,
|
|
"grad_norm": 0.21696447157131418,
|
|
"learning_rate": 3.732210858375447e-05,
|
|
"loss": 0.3399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11086012423038483,
|
|
"step": 910,
|
|
"valid_targets_mean": 9693.8,
|
|
"valid_targets_min": 562
|
|
},
|
|
{
|
|
"epoch": 1.2588343276732445,
|
|
"grad_norm": 0.2169988687602966,
|
|
"learning_rate": 3.727390045142737e-05,
|
|
"loss": 0.3421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12142230570316315,
|
|
"step": 915,
|
|
"valid_targets_mean": 10011.0,
|
|
"valid_targets_min": 3774
|
|
},
|
|
{
|
|
"epoch": 1.2657182193666818,
|
|
"grad_norm": 0.22548894074150583,
|
|
"learning_rate": 3.722529396596133e-05,
|
|
"loss": 0.3473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08062661439180374,
|
|
"step": 920,
|
|
"valid_targets_mean": 5209.4,
|
|
"valid_targets_min": 484
|
|
},
|
|
{
|
|
"epoch": 1.2726021110601193,
|
|
"grad_norm": 0.2845110154034618,
|
|
"learning_rate": 3.717629024826974e-05,
|
|
"loss": 0.3733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12345914542675018,
|
|
"step": 925,
|
|
"valid_targets_mean": 6386.3,
|
|
"valid_targets_min": 3796
|
|
},
|
|
{
|
|
"epoch": 1.2794860027535566,
|
|
"grad_norm": 0.2968424819324445,
|
|
"learning_rate": 3.712689042842654e-05,
|
|
"loss": 0.374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14066240191459656,
|
|
"step": 930,
|
|
"valid_targets_mean": 6407.3,
|
|
"valid_targets_min": 2561
|
|
},
|
|
{
|
|
"epoch": 1.286369894446994,
|
|
"grad_norm": 0.6809334906683846,
|
|
"learning_rate": 3.707709564564017e-05,
|
|
"loss": 0.785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22839263081550598,
|
|
"step": 935,
|
|
"valid_targets_mean": 4497.1,
|
|
"valid_targets_min": 1348
|
|
},
|
|
{
|
|
"epoch": 1.2932537861404314,
|
|
"grad_norm": 0.44144670158983224,
|
|
"learning_rate": 3.70269070482273e-05,
|
|
"loss": 0.6665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18578793108463287,
|
|
"step": 940,
|
|
"valid_targets_mean": 4428.4,
|
|
"valid_targets_min": 1031
|
|
},
|
|
{
|
|
"epoch": 1.3001376778338687,
|
|
"grad_norm": 0.41426028869608567,
|
|
"learning_rate": 3.697632579358638e-05,
|
|
"loss": 0.6768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24046120047569275,
|
|
"step": 945,
|
|
"valid_targets_mean": 3730.1,
|
|
"valid_targets_min": 827
|
|
},
|
|
{
|
|
"epoch": 1.307021569527306,
|
|
"grad_norm": 0.29429169560205576,
|
|
"learning_rate": 3.69253530481709e-05,
|
|
"loss": 0.629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2377484142780304,
|
|
"step": 950,
|
|
"valid_targets_mean": 4497.8,
|
|
"valid_targets_min": 1422
|
|
},
|
|
{
|
|
"epoch": 1.3139054612207435,
|
|
"grad_norm": 0.32861368080088155,
|
|
"learning_rate": 3.687398998746252e-05,
|
|
"loss": 0.6108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17878547310829163,
|
|
"step": 955,
|
|
"valid_targets_mean": 2116.2,
|
|
"valid_targets_min": 785
|
|
},
|
|
{
|
|
"epoch": 1.3207893529141808,
|
|
"grad_norm": 0.26314724951089413,
|
|
"learning_rate": 3.6822237795943954e-05,
|
|
"loss": 0.606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1861293911933899,
|
|
"step": 960,
|
|
"valid_targets_mean": 4756.1,
|
|
"valid_targets_min": 1301
|
|
},
|
|
{
|
|
"epoch": 1.327673244607618,
|
|
"grad_norm": 0.2615679937645198,
|
|
"learning_rate": 3.6770097667071644e-05,
|
|
"loss": 0.596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18951809406280518,
|
|
"step": 965,
|
|
"valid_targets_mean": 4760.8,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 1.3345571363010555,
|
|
"grad_norm": 0.26095009111284634,
|
|
"learning_rate": 3.671757080324826e-05,
|
|
"loss": 0.6175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2091958373785019,
|
|
"step": 970,
|
|
"valid_targets_mean": 3936.6,
|
|
"valid_targets_min": 1412
|
|
},
|
|
{
|
|
"epoch": 1.3414410279944928,
|
|
"grad_norm": 0.2776471727246407,
|
|
"learning_rate": 3.666465841579497e-05,
|
|
"loss": 0.5951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21168652176856995,
|
|
"step": 975,
|
|
"valid_targets_mean": 4049.9,
|
|
"valid_targets_min": 928
|
|
},
|
|
{
|
|
"epoch": 1.3483249196879301,
|
|
"grad_norm": 0.33267792860891676,
|
|
"learning_rate": 3.661136172492347e-05,
|
|
"loss": 0.5772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22148503363132477,
|
|
"step": 980,
|
|
"valid_targets_mean": 3266.9,
|
|
"valid_targets_min": 782
|
|
},
|
|
{
|
|
"epoch": 1.3552088113813676,
|
|
"grad_norm": 0.25102430709893414,
|
|
"learning_rate": 3.6557681959707905e-05,
|
|
"loss": 0.5882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19643747806549072,
|
|
"step": 985,
|
|
"valid_targets_mean": 4822.6,
|
|
"valid_targets_min": 1093
|
|
},
|
|
{
|
|
"epoch": 1.362092703074805,
|
|
"grad_norm": 0.26862571049180023,
|
|
"learning_rate": 3.6503620358056455e-05,
|
|
"loss": 0.5854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17380109429359436,
|
|
"step": 990,
|
|
"valid_targets_mean": 3932.0,
|
|
"valid_targets_min": 1179
|
|
},
|
|
{
|
|
"epoch": 1.3689765947682422,
|
|
"grad_norm": 0.287984411639415,
|
|
"learning_rate": 3.6449178166682846e-05,
|
|
"loss": 0.571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19724878668785095,
|
|
"step": 995,
|
|
"valid_targets_mean": 3117.2,
|
|
"valid_targets_min": 1152
|
|
},
|
|
{
|
|
"epoch": 1.3758604864616797,
|
|
"grad_norm": 0.2796683062977205,
|
|
"learning_rate": 3.639435664107758e-05,
|
|
"loss": 0.598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16841533780097961,
|
|
"step": 1000,
|
|
"valid_targets_mean": 3402.1,
|
|
"valid_targets_min": 1357
|
|
},
|
|
{
|
|
"epoch": 1.382744378155117,
|
|
"grad_norm": 0.4239174740835374,
|
|
"learning_rate": 3.633915704547897e-05,
|
|
"loss": 0.594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2718241810798645,
|
|
"step": 1005,
|
|
"valid_targets_mean": 3427.9,
|
|
"valid_targets_min": 1115
|
|
},
|
|
{
|
|
"epoch": 1.3896282698485543,
|
|
"grad_norm": 0.2647039858767095,
|
|
"learning_rate": 3.628358065284399e-05,
|
|
"loss": 0.5874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17966550588607788,
|
|
"step": 1010,
|
|
"valid_targets_mean": 4640.0,
|
|
"valid_targets_min": 1563
|
|
},
|
|
{
|
|
"epoch": 1.3965121615419918,
|
|
"grad_norm": 0.2678172631650953,
|
|
"learning_rate": 3.6227628744818935e-05,
|
|
"loss": 0.5892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18576546013355255,
|
|
"step": 1015,
|
|
"valid_targets_mean": 3996.0,
|
|
"valid_targets_min": 1296
|
|
},
|
|
{
|
|
"epoch": 1.403396053235429,
|
|
"grad_norm": 0.317908066233358,
|
|
"learning_rate": 3.617130261170985e-05,
|
|
"loss": 0.5805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17968283593654633,
|
|
"step": 1020,
|
|
"valid_targets_mean": 3636.5,
|
|
"valid_targets_min": 1273
|
|
},
|
|
{
|
|
"epoch": 1.4102799449288663,
|
|
"grad_norm": 0.28706768200325383,
|
|
"learning_rate": 3.611460355245279e-05,
|
|
"loss": 0.6015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19896183907985687,
|
|
"step": 1025,
|
|
"valid_targets_mean": 3391.8,
|
|
"valid_targets_min": 1536
|
|
},
|
|
{
|
|
"epoch": 1.4171638366223038,
|
|
"grad_norm": 0.4445920863658933,
|
|
"learning_rate": 3.6057532874583825e-05,
|
|
"loss": 0.5255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1340232938528061,
|
|
"step": 1030,
|
|
"valid_targets_mean": 5537.1,
|
|
"valid_targets_min": 2564
|
|
},
|
|
{
|
|
"epoch": 1.4240477283157411,
|
|
"grad_norm": 0.28330961789345677,
|
|
"learning_rate": 3.600009189420895e-05,
|
|
"loss": 0.195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06687448918819427,
|
|
"step": 1035,
|
|
"valid_targets_mean": 5478.3,
|
|
"valid_targets_min": 2226
|
|
},
|
|
{
|
|
"epoch": 1.4309316200091784,
|
|
"grad_norm": 0.22020183707384766,
|
|
"learning_rate": 3.594228193597369e-05,
|
|
"loss": 0.1805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.052432358264923096,
|
|
"step": 1040,
|
|
"valid_targets_mean": 4935.5,
|
|
"valid_targets_min": 2518
|
|
},
|
|
{
|
|
"epoch": 1.437815511702616,
|
|
"grad_norm": 0.25073378136582336,
|
|
"learning_rate": 3.588410433303253e-05,
|
|
"loss": 0.1786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06154193356633186,
|
|
"step": 1045,
|
|
"valid_targets_mean": 5529.9,
|
|
"valid_targets_min": 3140
|
|
},
|
|
{
|
|
"epoch": 1.4446994033960532,
|
|
"grad_norm": 0.20180073452824873,
|
|
"learning_rate": 3.582556042701825e-05,
|
|
"loss": 0.1734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05435199290513992,
|
|
"step": 1050,
|
|
"valid_targets_mean": 4935.2,
|
|
"valid_targets_min": 2984
|
|
},
|
|
{
|
|
"epoch": 1.4515832950894905,
|
|
"grad_norm": 0.23113035959905193,
|
|
"learning_rate": 3.5766651568010926e-05,
|
|
"loss": 0.1756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05378912016749382,
|
|
"step": 1055,
|
|
"valid_targets_mean": 4934.8,
|
|
"valid_targets_min": 2648
|
|
},
|
|
{
|
|
"epoch": 1.458467186782928,
|
|
"grad_norm": 0.24237558785107005,
|
|
"learning_rate": 3.5707379114506785e-05,
|
|
"loss": 0.1717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05625016614794731,
|
|
"step": 1060,
|
|
"valid_targets_mean": 5136.8,
|
|
"valid_targets_min": 2159
|
|
},
|
|
{
|
|
"epoch": 1.4653510784763653,
|
|
"grad_norm": 0.2162706048508137,
|
|
"learning_rate": 3.564774443338692e-05,
|
|
"loss": 0.1729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05345233902335167,
|
|
"step": 1065,
|
|
"valid_targets_mean": 5029.4,
|
|
"valid_targets_min": 2963
|
|
},
|
|
{
|
|
"epoch": 1.4722349701698025,
|
|
"grad_norm": 0.22973501526812276,
|
|
"learning_rate": 3.558774889988577e-05,
|
|
"loss": 0.1731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05721583217382431,
|
|
"step": 1070,
|
|
"valid_targets_mean": 5930.7,
|
|
"valid_targets_min": 2714
|
|
},
|
|
{
|
|
"epoch": 1.47911886186324,
|
|
"grad_norm": 0.20364094812823397,
|
|
"learning_rate": 3.552739389755934e-05,
|
|
"loss": 0.1673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.052951518446207047,
|
|
"step": 1075,
|
|
"valid_targets_mean": 5439.3,
|
|
"valid_targets_min": 2668
|
|
},
|
|
{
|
|
"epoch": 1.4860027535566773,
|
|
"grad_norm": 0.1986389709515152,
|
|
"learning_rate": 3.546668081825337e-05,
|
|
"loss": 0.166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.052483733743429184,
|
|
"step": 1080,
|
|
"valid_targets_mean": 5153.3,
|
|
"valid_targets_min": 2730
|
|
},
|
|
{
|
|
"epoch": 1.4928866452501146,
|
|
"grad_norm": 0.24482610765610374,
|
|
"learning_rate": 3.54056110620712e-05,
|
|
"loss": 0.1704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06315732002258301,
|
|
"step": 1085,
|
|
"valid_targets_mean": 5870.0,
|
|
"valid_targets_min": 3065
|
|
},
|
|
{
|
|
"epoch": 1.4997705369435521,
|
|
"grad_norm": 0.20159556415367041,
|
|
"learning_rate": 3.534418603734149e-05,
|
|
"loss": 0.1652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.058553457260131836,
|
|
"step": 1090,
|
|
"valid_targets_mean": 5658.6,
|
|
"valid_targets_min": 2803
|
|
},
|
|
{
|
|
"epoch": 1.5066544286369894,
|
|
"grad_norm": 0.19181232874888832,
|
|
"learning_rate": 3.5282407160585736e-05,
|
|
"loss": 0.1659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.052791528403759,
|
|
"step": 1095,
|
|
"valid_targets_mean": 5224.7,
|
|
"valid_targets_min": 2628
|
|
},
|
|
{
|
|
"epoch": 1.5135383203304267,
|
|
"grad_norm": 0.1976739120005175,
|
|
"learning_rate": 3.522027585648562e-05,
|
|
"loss": 0.1652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0571698397397995,
|
|
"step": 1100,
|
|
"valid_targets_mean": 5735.1,
|
|
"valid_targets_min": 3533
|
|
},
|
|
{
|
|
"epoch": 1.5204222120238642,
|
|
"grad_norm": 0.21306112608703381,
|
|
"learning_rate": 3.515779355785015e-05,
|
|
"loss": 0.1632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.052656762301921844,
|
|
"step": 1105,
|
|
"valid_targets_mean": 5265.8,
|
|
"valid_targets_min": 2050
|
|
},
|
|
{
|
|
"epoch": 1.5273061037173015,
|
|
"grad_norm": 0.22603016387768135,
|
|
"learning_rate": 3.50949617055826e-05,
|
|
"loss": 0.167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06061364710330963,
|
|
"step": 1110,
|
|
"valid_targets_mean": 5609.0,
|
|
"valid_targets_min": 3609
|
|
},
|
|
{
|
|
"epoch": 1.5341899954107387,
|
|
"grad_norm": 0.1980792666536461,
|
|
"learning_rate": 3.5031781748647286e-05,
|
|
"loss": 0.166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05426044017076492,
|
|
"step": 1115,
|
|
"valid_targets_mean": 5324.1,
|
|
"valid_targets_min": 1713
|
|
},
|
|
{
|
|
"epoch": 1.5410738871041763,
|
|
"grad_norm": 0.23644084395489454,
|
|
"learning_rate": 3.496825514403618e-05,
|
|
"loss": 0.1664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05629802495241165,
|
|
"step": 1120,
|
|
"valid_targets_mean": 4990.3,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 1.5479577787976135,
|
|
"grad_norm": 0.22755681790453738,
|
|
"learning_rate": 3.490438335673529e-05,
|
|
"loss": 0.1652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05739838257431984,
|
|
"step": 1125,
|
|
"valid_targets_mean": 5599.9,
|
|
"valid_targets_min": 2657
|
|
},
|
|
{
|
|
"epoch": 1.5548416704910508,
|
|
"grad_norm": 0.19425963994466555,
|
|
"learning_rate": 3.484016785969087e-05,
|
|
"loss": 0.1618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.055991411209106445,
|
|
"step": 1130,
|
|
"valid_targets_mean": 5342.1,
|
|
"valid_targets_min": 2880
|
|
},
|
|
{
|
|
"epoch": 1.5617255621844883,
|
|
"grad_norm": 0.5078671708098624,
|
|
"learning_rate": 3.4775610133775444e-05,
|
|
"loss": 0.3155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19734632968902588,
|
|
"step": 1135,
|
|
"valid_targets_mean": 3803.8,
|
|
"valid_targets_min": 1361
|
|
},
|
|
{
|
|
"epoch": 1.5686094538779256,
|
|
"grad_norm": 0.34030876024881695,
|
|
"learning_rate": 3.47107116677537e-05,
|
|
"loss": 0.5021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1680268496274948,
|
|
"step": 1140,
|
|
"valid_targets_mean": 4280.1,
|
|
"valid_targets_min": 341
|
|
},
|
|
{
|
|
"epoch": 1.575493345571363,
|
|
"grad_norm": 0.3386827436095329,
|
|
"learning_rate": 3.464547395824811e-05,
|
|
"loss": 0.4634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1324179768562317,
|
|
"step": 1145,
|
|
"valid_targets_mean": 4561.8,
|
|
"valid_targets_min": 1051
|
|
},
|
|
{
|
|
"epoch": 1.5823772372648004,
|
|
"grad_norm": 0.2926014105305259,
|
|
"learning_rate": 3.457989850970444e-05,
|
|
"loss": 0.513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17301608622074127,
|
|
"step": 1150,
|
|
"valid_targets_mean": 3689.7,
|
|
"valid_targets_min": 1235
|
|
},
|
|
{
|
|
"epoch": 1.5892611289582377,
|
|
"grad_norm": 0.27196678108600447,
|
|
"learning_rate": 3.451398683435704e-05,
|
|
"loss": 0.4809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14032022655010223,
|
|
"step": 1155,
|
|
"valid_targets_mean": 3426.4,
|
|
"valid_targets_min": 974
|
|
},
|
|
{
|
|
"epoch": 1.596145020651675,
|
|
"grad_norm": 0.3130005942988459,
|
|
"learning_rate": 3.4447740452193995e-05,
|
|
"loss": 0.4868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20944523811340332,
|
|
"step": 1160,
|
|
"valid_targets_mean": 3986.3,
|
|
"valid_targets_min": 1079
|
|
},
|
|
{
|
|
"epoch": 1.6030289123451125,
|
|
"grad_norm": 0.2717471489428658,
|
|
"learning_rate": 3.438116089092205e-05,
|
|
"loss": 0.4682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.139183908700943,
|
|
"step": 1165,
|
|
"valid_targets_mean": 3593.2,
|
|
"valid_targets_min": 926
|
|
},
|
|
{
|
|
"epoch": 1.6099128040385497,
|
|
"grad_norm": 0.27909638857698826,
|
|
"learning_rate": 3.43142496859314e-05,
|
|
"loss": 0.4608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13917654752731323,
|
|
"step": 1170,
|
|
"valid_targets_mean": 3480.5,
|
|
"valid_targets_min": 1151
|
|
},
|
|
{
|
|
"epoch": 1.616796695731987,
|
|
"grad_norm": 0.26768993562882953,
|
|
"learning_rate": 3.4247008380260244e-05,
|
|
"loss": 0.488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17577114701271057,
|
|
"step": 1175,
|
|
"valid_targets_mean": 3933.4,
|
|
"valid_targets_min": 1036
|
|
},
|
|
{
|
|
"epoch": 1.6236805874254245,
|
|
"grad_norm": 0.2932889829762393,
|
|
"learning_rate": 3.4179438524559255e-05,
|
|
"loss": 0.4728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15570342540740967,
|
|
"step": 1180,
|
|
"valid_targets_mean": 3254.5,
|
|
"valid_targets_min": 1190
|
|
},
|
|
{
|
|
"epoch": 1.6305644791188618,
|
|
"grad_norm": 0.24071768152710976,
|
|
"learning_rate": 3.411154167705578e-05,
|
|
"loss": 0.4511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1142294779419899,
|
|
"step": 1185,
|
|
"valid_targets_mean": 3184.5,
|
|
"valid_targets_min": 627
|
|
},
|
|
{
|
|
"epoch": 1.637448370812299,
|
|
"grad_norm": 0.2752558490458923,
|
|
"learning_rate": 3.404331940351793e-05,
|
|
"loss": 0.4907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14964590966701508,
|
|
"step": 1190,
|
|
"valid_targets_mean": 4686.7,
|
|
"valid_targets_min": 1218
|
|
},
|
|
{
|
|
"epoch": 1.6443322625057366,
|
|
"grad_norm": 0.2949052887253943,
|
|
"learning_rate": 3.397477327721844e-05,
|
|
"loss": 0.4596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17306478321552277,
|
|
"step": 1195,
|
|
"valid_targets_mean": 4532.6,
|
|
"valid_targets_min": 613
|
|
},
|
|
{
|
|
"epoch": 1.651216154199174,
|
|
"grad_norm": 0.28952244979890357,
|
|
"learning_rate": 3.390590487889842e-05,
|
|
"loss": 0.4909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15265388786792755,
|
|
"step": 1200,
|
|
"valid_targets_mean": 3717.2,
|
|
"valid_targets_min": 1137
|
|
},
|
|
{
|
|
"epoch": 1.6581000458926112,
|
|
"grad_norm": 0.3121644616582937,
|
|
"learning_rate": 3.38367157967309e-05,
|
|
"loss": 0.47,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1337219774723053,
|
|
"step": 1205,
|
|
"valid_targets_mean": 3432.2,
|
|
"valid_targets_min": 766
|
|
},
|
|
{
|
|
"epoch": 1.6649839375860487,
|
|
"grad_norm": 0.25739831721805007,
|
|
"learning_rate": 3.3767207626284184e-05,
|
|
"loss": 0.4744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17433468997478485,
|
|
"step": 1210,
|
|
"valid_targets_mean": 4030.4,
|
|
"valid_targets_min": 1035
|
|
},
|
|
{
|
|
"epoch": 1.671867829279486,
|
|
"grad_norm": 0.2998480779773187,
|
|
"learning_rate": 3.369738197048505e-05,
|
|
"loss": 0.4813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13764141499996185,
|
|
"step": 1215,
|
|
"valid_targets_mean": 3379.8,
|
|
"valid_targets_min": 1265
|
|
},
|
|
{
|
|
"epoch": 1.6787517209729232,
|
|
"grad_norm": 0.3600517795584768,
|
|
"learning_rate": 3.362724043958184e-05,
|
|
"loss": 0.4755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14118696749210358,
|
|
"step": 1220,
|
|
"valid_targets_mean": 2594.7,
|
|
"valid_targets_min": 1135
|
|
},
|
|
{
|
|
"epoch": 1.6856356126663607,
|
|
"grad_norm": 0.3194827365553974,
|
|
"learning_rate": 3.355678465110725e-05,
|
|
"loss": 0.4506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08822810649871826,
|
|
"step": 1225,
|
|
"valid_targets_mean": 972.6,
|
|
"valid_targets_min": 546
|
|
},
|
|
{
|
|
"epoch": 1.692519504359798,
|
|
"grad_norm": 0.2769855539856663,
|
|
"learning_rate": 3.348601622984107e-05,
|
|
"loss": 0.4816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1904619336128235,
|
|
"step": 1230,
|
|
"valid_targets_mean": 5099.9,
|
|
"valid_targets_min": 1465
|
|
},
|
|
{
|
|
"epoch": 1.6994033960532353,
|
|
"grad_norm": 0.2923413221945997,
|
|
"learning_rate": 3.3414936807772736e-05,
|
|
"loss": 0.4764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18333196640014648,
|
|
"step": 1235,
|
|
"valid_targets_mean": 5051.9,
|
|
"valid_targets_min": 986
|
|
},
|
|
{
|
|
"epoch": 1.7062872877466728,
|
|
"grad_norm": 0.39463221323145375,
|
|
"learning_rate": 3.334354802406364e-05,
|
|
"loss": 0.4142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11668369174003601,
|
|
"step": 1240,
|
|
"valid_targets_mean": 6354.4,
|
|
"valid_targets_min": 348
|
|
},
|
|
{
|
|
"epoch": 1.71317117944011,
|
|
"grad_norm": 0.29121380337364977,
|
|
"learning_rate": 3.327185152500937e-05,
|
|
"loss": 0.302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09022815525531769,
|
|
"step": 1245,
|
|
"valid_targets_mean": 6446.1,
|
|
"valid_targets_min": 139
|
|
},
|
|
{
|
|
"epoch": 1.7200550711335474,
|
|
"grad_norm": 0.2108193199124782,
|
|
"learning_rate": 3.3199848964001744e-05,
|
|
"loss": 0.2803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0786634087562561,
|
|
"step": 1250,
|
|
"valid_targets_mean": 3949.5,
|
|
"valid_targets_min": 306
|
|
},
|
|
{
|
|
"epoch": 1.726938962826985,
|
|
"grad_norm": 0.2164063397768852,
|
|
"learning_rate": 3.312754200149065e-05,
|
|
"loss": 0.2977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10437431931495667,
|
|
"step": 1255,
|
|
"valid_targets_mean": 6445.5,
|
|
"valid_targets_min": 501
|
|
},
|
|
{
|
|
"epoch": 1.7338228545204222,
|
|
"grad_norm": 0.20918460158950558,
|
|
"learning_rate": 3.305493230494576e-05,
|
|
"loss": 0.2856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09302932769060135,
|
|
"step": 1260,
|
|
"valid_targets_mean": 6484.1,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 1.7407067462138595,
|
|
"grad_norm": 0.20429734603153177,
|
|
"learning_rate": 3.298202154881814e-05,
|
|
"loss": 0.3108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09457381814718246,
|
|
"step": 1265,
|
|
"valid_targets_mean": 7440.5,
|
|
"valid_targets_min": 914
|
|
},
|
|
{
|
|
"epoch": 1.747590637907297,
|
|
"grad_norm": 0.18234677362943436,
|
|
"learning_rate": 3.2908811414501545e-05,
|
|
"loss": 0.2734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10430276393890381,
|
|
"step": 1270,
|
|
"valid_targets_mean": 7078.0,
|
|
"valid_targets_min": 280
|
|
},
|
|
{
|
|
"epoch": 1.7544745296007342,
|
|
"grad_norm": 0.24974357174847586,
|
|
"learning_rate": 3.283530359029369e-05,
|
|
"loss": 0.29,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09898485243320465,
|
|
"step": 1275,
|
|
"valid_targets_mean": 6463.1,
|
|
"valid_targets_min": 1221
|
|
},
|
|
{
|
|
"epoch": 1.7613584212941715,
|
|
"grad_norm": 0.19176634782115026,
|
|
"learning_rate": 3.276149977135735e-05,
|
|
"loss": 0.2718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07478626072406769,
|
|
"step": 1280,
|
|
"valid_targets_mean": 6061.8,
|
|
"valid_targets_min": 1412
|
|
},
|
|
{
|
|
"epoch": 1.768242312987609,
|
|
"grad_norm": 0.1999902463647874,
|
|
"learning_rate": 3.268740165968116e-05,
|
|
"loss": 0.2876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11764705926179886,
|
|
"step": 1285,
|
|
"valid_targets_mean": 6472.4,
|
|
"valid_targets_min": 277
|
|
},
|
|
{
|
|
"epoch": 1.7751262046810463,
|
|
"grad_norm": 0.1789066747451937,
|
|
"learning_rate": 3.261301096404051e-05,
|
|
"loss": 0.2911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07152275741100311,
|
|
"step": 1290,
|
|
"valid_targets_mean": 7596.1,
|
|
"valid_targets_min": 2298
|
|
},
|
|
{
|
|
"epoch": 1.7820100963744836,
|
|
"grad_norm": 0.1824655024569692,
|
|
"learning_rate": 3.2538329399958006e-05,
|
|
"loss": 0.2909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08981531858444214,
|
|
"step": 1295,
|
|
"valid_targets_mean": 7278.7,
|
|
"valid_targets_min": 299
|
|
},
|
|
{
|
|
"epoch": 1.788893988067921,
|
|
"grad_norm": 0.23888882746848344,
|
|
"learning_rate": 3.2463358689664e-05,
|
|
"loss": 0.3069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10454994440078735,
|
|
"step": 1300,
|
|
"valid_targets_mean": 6472.9,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 1.7957778797613584,
|
|
"grad_norm": 0.18924585287250698,
|
|
"learning_rate": 3.238810056205682e-05,
|
|
"loss": 0.265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09392829984426498,
|
|
"step": 1305,
|
|
"valid_targets_mean": 6411.4,
|
|
"valid_targets_min": 176
|
|
},
|
|
{
|
|
"epoch": 1.8026617714547957,
|
|
"grad_norm": 0.1958691238588584,
|
|
"learning_rate": 3.2312556752662946e-05,
|
|
"loss": 0.2753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10470084100961685,
|
|
"step": 1310,
|
|
"valid_targets_mean": 5064.2,
|
|
"valid_targets_min": 746
|
|
},
|
|
{
|
|
"epoch": 1.8095456631482332,
|
|
"grad_norm": 0.2170787439967381,
|
|
"learning_rate": 3.223672900359693e-05,
|
|
"loss": 0.2801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11556200683116913,
|
|
"step": 1315,
|
|
"valid_targets_mean": 6593.2,
|
|
"valid_targets_min": 1045
|
|
},
|
|
{
|
|
"epoch": 1.8164295548416705,
|
|
"grad_norm": 0.1867201765991889,
|
|
"learning_rate": 3.2160619063521274e-05,
|
|
"loss": 0.264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07859818637371063,
|
|
"step": 1320,
|
|
"valid_targets_mean": 5822.4,
|
|
"valid_targets_min": 952
|
|
},
|
|
{
|
|
"epoch": 1.8233134465351077,
|
|
"grad_norm": 0.18138072725490167,
|
|
"learning_rate": 3.2084228687606076e-05,
|
|
"loss": 0.2889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12460223585367203,
|
|
"step": 1325,
|
|
"valid_targets_mean": 8131.2,
|
|
"valid_targets_min": 270
|
|
},
|
|
{
|
|
"epoch": 1.8301973382285452,
|
|
"grad_norm": 0.19754129821945798,
|
|
"learning_rate": 3.200755963748856e-05,
|
|
"loss": 0.2693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0860404372215271,
|
|
"step": 1330,
|
|
"valid_targets_mean": 6484.8,
|
|
"valid_targets_min": 1247
|
|
},
|
|
{
|
|
"epoch": 1.8370812299219825,
|
|
"grad_norm": 0.4698750684367156,
|
|
"learning_rate": 3.193061368123244e-05,
|
|
"loss": 0.2806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11438726633787155,
|
|
"step": 1335,
|
|
"valid_targets_mean": 3920.8,
|
|
"valid_targets_min": 2348
|
|
},
|
|
{
|
|
"epoch": 1.8439651216154198,
|
|
"grad_norm": 0.2603830415640527,
|
|
"learning_rate": 3.185339259328718e-05,
|
|
"loss": 0.2375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08097126334905624,
|
|
"step": 1340,
|
|
"valid_targets_mean": 4168.3,
|
|
"valid_targets_min": 1174
|
|
},
|
|
{
|
|
"epoch": 1.8508490133088573,
|
|
"grad_norm": 0.26754326662182265,
|
|
"learning_rate": 3.1775898154447035e-05,
|
|
"loss": 0.2296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07933999598026276,
|
|
"step": 1345,
|
|
"valid_targets_mean": 3803.8,
|
|
"valid_targets_min": 1855
|
|
},
|
|
{
|
|
"epoch": 1.8577329050022946,
|
|
"grad_norm": 0.2627395021404963,
|
|
"learning_rate": 3.169813215181e-05,
|
|
"loss": 0.2249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0736798420548439,
|
|
"step": 1350,
|
|
"valid_targets_mean": 4049.7,
|
|
"valid_targets_min": 2535
|
|
},
|
|
{
|
|
"epoch": 1.8646167966957319,
|
|
"grad_norm": 0.26403469391586964,
|
|
"learning_rate": 3.162009637873662e-05,
|
|
"loss": 0.2269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07726424932479858,
|
|
"step": 1355,
|
|
"valid_targets_mean": 3835.0,
|
|
"valid_targets_min": 423
|
|
},
|
|
{
|
|
"epoch": 1.8715006883891694,
|
|
"grad_norm": 0.2252698294361399,
|
|
"learning_rate": 3.1541792634808586e-05,
|
|
"loss": 0.2237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07776709645986557,
|
|
"step": 1360,
|
|
"valid_targets_mean": 3791.8,
|
|
"valid_targets_min": 1366
|
|
},
|
|
{
|
|
"epoch": 1.8783845800826067,
|
|
"grad_norm": 0.21611929490789147,
|
|
"learning_rate": 3.146322272578726e-05,
|
|
"loss": 0.2209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07243067771196365,
|
|
"step": 1365,
|
|
"valid_targets_mean": 3614.4,
|
|
"valid_targets_min": 452
|
|
},
|
|
{
|
|
"epoch": 1.885268471776044,
|
|
"grad_norm": 0.23385487665271673,
|
|
"learning_rate": 3.138438846357208e-05,
|
|
"loss": 0.2223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06762323528528214,
|
|
"step": 1370,
|
|
"valid_targets_mean": 3766.9,
|
|
"valid_targets_min": 505
|
|
},
|
|
{
|
|
"epoch": 1.8921523634694815,
|
|
"grad_norm": 0.26904239709962535,
|
|
"learning_rate": 3.1305291666158645e-05,
|
|
"loss": 0.2204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06977090984582901,
|
|
"step": 1375,
|
|
"valid_targets_mean": 3492.3,
|
|
"valid_targets_min": 289
|
|
},
|
|
{
|
|
"epoch": 1.8990362551629187,
|
|
"grad_norm": 0.25067490435632334,
|
|
"learning_rate": 3.1225934157596946e-05,
|
|
"loss": 0.2174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07045575231313705,
|
|
"step": 1380,
|
|
"valid_targets_mean": 3381.6,
|
|
"valid_targets_min": 460
|
|
},
|
|
{
|
|
"epoch": 1.905920146856356,
|
|
"grad_norm": 0.2627943908553539,
|
|
"learning_rate": 3.1146317767949205e-05,
|
|
"loss": 0.2163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07210519909858704,
|
|
"step": 1385,
|
|
"valid_targets_mean": 3726.1,
|
|
"valid_targets_min": 441
|
|
},
|
|
{
|
|
"epoch": 1.9128040385497935,
|
|
"grad_norm": 0.24210474794590595,
|
|
"learning_rate": 3.106644433324768e-05,
|
|
"loss": 0.2147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05783864110708237,
|
|
"step": 1390,
|
|
"valid_targets_mean": 3206.2,
|
|
"valid_targets_min": 488
|
|
},
|
|
{
|
|
"epoch": 1.9196879302432308,
|
|
"grad_norm": 0.2788195303094242,
|
|
"learning_rate": 3.098631569545238e-05,
|
|
"loss": 0.2167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06336452066898346,
|
|
"step": 1395,
|
|
"valid_targets_mean": 3425.1,
|
|
"valid_targets_min": 454
|
|
},
|
|
{
|
|
"epoch": 1.926571821936668,
|
|
"grad_norm": 0.22849195239295697,
|
|
"learning_rate": 3.090593370240852e-05,
|
|
"loss": 0.2135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07397540658712387,
|
|
"step": 1400,
|
|
"valid_targets_mean": 4109.9,
|
|
"valid_targets_min": 462
|
|
},
|
|
{
|
|
"epoch": 1.9334557136301056,
|
|
"grad_norm": 0.2409752826569829,
|
|
"learning_rate": 3.082530020780392e-05,
|
|
"loss": 0.2169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06807292997837067,
|
|
"step": 1405,
|
|
"valid_targets_mean": 3850.9,
|
|
"valid_targets_min": 919
|
|
},
|
|
{
|
|
"epoch": 1.9403396053235429,
|
|
"grad_norm": 0.45451934745934763,
|
|
"learning_rate": 3.074441707112632e-05,
|
|
"loss": 0.2511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09514793753623962,
|
|
"step": 1410,
|
|
"valid_targets_mean": 6487.8,
|
|
"valid_targets_min": 453
|
|
},
|
|
{
|
|
"epoch": 1.9472234970169802,
|
|
"grad_norm": 0.31985575958658746,
|
|
"learning_rate": 3.0663286157620414e-05,
|
|
"loss": 0.2854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0950220376253128,
|
|
"step": 1415,
|
|
"valid_targets_mean": 7570.3,
|
|
"valid_targets_min": 1928
|
|
},
|
|
{
|
|
"epoch": 1.9541073887104177,
|
|
"grad_norm": 0.28719145252156947,
|
|
"learning_rate": 3.058190933824489e-05,
|
|
"loss": 0.2593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08216243982315063,
|
|
"step": 1420,
|
|
"valid_targets_mean": 6536.0,
|
|
"valid_targets_min": 386
|
|
},
|
|
{
|
|
"epoch": 1.960991280403855,
|
|
"grad_norm": 0.23128236961277554,
|
|
"learning_rate": 3.0500288489629263e-05,
|
|
"loss": 0.2617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08936893939971924,
|
|
"step": 1425,
|
|
"valid_targets_mean": 7177.7,
|
|
"valid_targets_min": 446
|
|
},
|
|
{
|
|
"epoch": 1.9678751720972922,
|
|
"grad_norm": 0.24609523942492573,
|
|
"learning_rate": 3.0418425494030596e-05,
|
|
"loss": 0.2623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07563286274671555,
|
|
"step": 1430,
|
|
"valid_targets_mean": 6110.9,
|
|
"valid_targets_min": 321
|
|
},
|
|
{
|
|
"epoch": 1.9747590637907297,
|
|
"grad_norm": 0.24658402476334376,
|
|
"learning_rate": 3.0336322239290118e-05,
|
|
"loss": 0.259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08098824322223663,
|
|
"step": 1435,
|
|
"valid_targets_mean": 6657.7,
|
|
"valid_targets_min": 3389
|
|
},
|
|
{
|
|
"epoch": 1.981642955484167,
|
|
"grad_norm": 0.22318614135355666,
|
|
"learning_rate": 3.0253980618789654e-05,
|
|
"loss": 0.2513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07510203123092651,
|
|
"step": 1440,
|
|
"valid_targets_mean": 6613.5,
|
|
"valid_targets_min": 524
|
|
},
|
|
{
|
|
"epoch": 1.9885268471776043,
|
|
"grad_norm": 0.21078425849583504,
|
|
"learning_rate": 3.0171402531407982e-05,
|
|
"loss": 0.2431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0857941061258316,
|
|
"step": 1445,
|
|
"valid_targets_mean": 7165.2,
|
|
"valid_targets_min": 2065
|
|
},
|
|
{
|
|
"epoch": 1.9954107388710418,
|
|
"grad_norm": 0.22410382081565297,
|
|
"learning_rate": 3.008858988147704e-05,
|
|
"loss": 0.2389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08800241351127625,
|
|
"step": 1450,
|
|
"valid_targets_mean": 6591.6,
|
|
"valid_targets_min": 155
|
|
},
|
|
{
|
|
"epoch": 2.0013767783386873,
|
|
"grad_norm": 2.6466904749858258,
|
|
"learning_rate": 3.0005544578738005e-05,
|
|
"loss": 0.3707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23792487382888794,
|
|
"step": 1455,
|
|
"valid_targets_mean": 7264.4,
|
|
"valid_targets_min": 1973
|
|
},
|
|
{
|
|
"epoch": 2.008260670032125,
|
|
"grad_norm": 0.9648943337750195,
|
|
"learning_rate": 2.9922268538297267e-05,
|
|
"loss": 0.5503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20204994082450867,
|
|
"step": 1460,
|
|
"valid_targets_mean": 9100.2,
|
|
"valid_targets_min": 3178
|
|
},
|
|
{
|
|
"epoch": 2.0151445617255623,
|
|
"grad_norm": 0.8297593468986739,
|
|
"learning_rate": 2.9838763680582247e-05,
|
|
"loss": 0.4629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14032147824764252,
|
|
"step": 1465,
|
|
"valid_targets_mean": 6959.6,
|
|
"valid_targets_min": 2166
|
|
},
|
|
{
|
|
"epoch": 2.0220284534189994,
|
|
"grad_norm": 0.3948957029510476,
|
|
"learning_rate": 2.9755031931297113e-05,
|
|
"loss": 0.4265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1425497829914093,
|
|
"step": 1470,
|
|
"valid_targets_mean": 7188.0,
|
|
"valid_targets_min": 2166
|
|
},
|
|
{
|
|
"epoch": 2.028912345112437,
|
|
"grad_norm": 0.2843596306589016,
|
|
"learning_rate": 2.9671075221378386e-05,
|
|
"loss": 0.3967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14745484292507172,
|
|
"step": 1475,
|
|
"valid_targets_mean": 6959.9,
|
|
"valid_targets_min": 2083
|
|
},
|
|
{
|
|
"epoch": 2.0357962368058744,
|
|
"grad_norm": 0.7483106688731904,
|
|
"learning_rate": 2.9586895486950387e-05,
|
|
"loss": 0.3908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14458361268043518,
|
|
"step": 1480,
|
|
"valid_targets_mean": 7818.4,
|
|
"valid_targets_min": 2042
|
|
},
|
|
{
|
|
"epoch": 2.0426801284993115,
|
|
"grad_norm": 0.24953497116367615,
|
|
"learning_rate": 2.950249466928062e-05,
|
|
"loss": 0.3858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1309637874364853,
|
|
"step": 1485,
|
|
"valid_targets_mean": 7926.4,
|
|
"valid_targets_min": 3527
|
|
},
|
|
{
|
|
"epoch": 2.049564020192749,
|
|
"grad_norm": 0.23725116507324068,
|
|
"learning_rate": 2.9417874714734977e-05,
|
|
"loss": 0.3794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11365851759910583,
|
|
"step": 1490,
|
|
"valid_targets_mean": 6605.4,
|
|
"valid_targets_min": 2848
|
|
},
|
|
{
|
|
"epoch": 2.0564479118861865,
|
|
"grad_norm": 0.3158203272689995,
|
|
"learning_rate": 2.9333037574732847e-05,
|
|
"loss": 0.3706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1223248615860939,
|
|
"step": 1495,
|
|
"valid_targets_mean": 6947.1,
|
|
"valid_targets_min": 2244
|
|
},
|
|
{
|
|
"epoch": 2.0633318035796235,
|
|
"grad_norm": 0.25112193214684,
|
|
"learning_rate": 2.9247985205702166e-05,
|
|
"loss": 0.3735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12720884382724762,
|
|
"step": 1500,
|
|
"valid_targets_mean": 7027.9,
|
|
"valid_targets_min": 340
|
|
},
|
|
{
|
|
"epoch": 2.070215695273061,
|
|
"grad_norm": 0.24375670298938254,
|
|
"learning_rate": 2.9162719569034216e-05,
|
|
"loss": 0.369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14586299657821655,
|
|
"step": 1505,
|
|
"valid_targets_mean": 8959.9,
|
|
"valid_targets_min": 3446
|
|
},
|
|
{
|
|
"epoch": 2.0770995869664985,
|
|
"grad_norm": 0.21230058397434262,
|
|
"learning_rate": 2.9077242631038487e-05,
|
|
"loss": 0.3496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11099869012832642,
|
|
"step": 1510,
|
|
"valid_targets_mean": 9482.4,
|
|
"valid_targets_min": 5095
|
|
},
|
|
{
|
|
"epoch": 2.0839834786599356,
|
|
"grad_norm": 0.20509730804130927,
|
|
"learning_rate": 2.8991556362897248e-05,
|
|
"loss": 0.3384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10739248991012573,
|
|
"step": 1515,
|
|
"valid_targets_mean": 8913.7,
|
|
"valid_targets_min": 251
|
|
},
|
|
{
|
|
"epoch": 2.090867370353373,
|
|
"grad_norm": 0.22226379394756782,
|
|
"learning_rate": 2.890566274062015e-05,
|
|
"loss": 0.3336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11422709375619888,
|
|
"step": 1520,
|
|
"valid_targets_mean": 8780.3,
|
|
"valid_targets_min": 349
|
|
},
|
|
{
|
|
"epoch": 2.0977512620468106,
|
|
"grad_norm": 0.20071008297351936,
|
|
"learning_rate": 2.8819563744998626e-05,
|
|
"loss": 0.3326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1113002598285675,
|
|
"step": 1525,
|
|
"valid_targets_mean": 9303.8,
|
|
"valid_targets_min": 773
|
|
},
|
|
{
|
|
"epoch": 2.1046351537402477,
|
|
"grad_norm": 0.20192807188862597,
|
|
"learning_rate": 2.8733261361560223e-05,
|
|
"loss": 0.3293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10916690528392792,
|
|
"step": 1530,
|
|
"valid_targets_mean": 9819.0,
|
|
"valid_targets_min": 5584
|
|
},
|
|
{
|
|
"epoch": 2.111519045433685,
|
|
"grad_norm": 0.19824569692117233,
|
|
"learning_rate": 2.864675758052281e-05,
|
|
"loss": 0.3285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11355476081371307,
|
|
"step": 1535,
|
|
"valid_targets_mean": 9100.7,
|
|
"valid_targets_min": 306
|
|
},
|
|
{
|
|
"epoch": 2.1184029371271227,
|
|
"grad_norm": 0.24406167259488418,
|
|
"learning_rate": 2.8560054396748673e-05,
|
|
"loss": 0.3371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11132253706455231,
|
|
"step": 1540,
|
|
"valid_targets_mean": 9212.3,
|
|
"valid_targets_min": 378
|
|
},
|
|
{
|
|
"epoch": 2.1252868288205597,
|
|
"grad_norm": 0.20727883416494522,
|
|
"learning_rate": 2.8473153809698546e-05,
|
|
"loss": 0.3277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.098292276263237,
|
|
"step": 1545,
|
|
"valid_targets_mean": 8648.9,
|
|
"valid_targets_min": 1557
|
|
},
|
|
{
|
|
"epoch": 2.1321707205139973,
|
|
"grad_norm": 0.26662728617842596,
|
|
"learning_rate": 2.8386057823385446e-05,
|
|
"loss": 0.3296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12168661504983902,
|
|
"step": 1550,
|
|
"valid_targets_mean": 10183.4,
|
|
"valid_targets_min": 3886
|
|
},
|
|
{
|
|
"epoch": 2.1390546122074348,
|
|
"grad_norm": 0.2278362271965239,
|
|
"learning_rate": 2.829876844632852e-05,
|
|
"loss": 0.3261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10494972765445709,
|
|
"step": 1555,
|
|
"valid_targets_mean": 9594.6,
|
|
"valid_targets_min": 344
|
|
},
|
|
{
|
|
"epoch": 2.145938503900872,
|
|
"grad_norm": 0.21689494772378176,
|
|
"learning_rate": 2.821128769150667e-05,
|
|
"loss": 0.3273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1015724167227745,
|
|
"step": 1560,
|
|
"valid_targets_mean": 8928.9,
|
|
"valid_targets_min": 897
|
|
},
|
|
{
|
|
"epoch": 2.1528223955943093,
|
|
"grad_norm": 0.21647054727360174,
|
|
"learning_rate": 2.8123617576312167e-05,
|
|
"loss": 0.3259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11096221953630447,
|
|
"step": 1565,
|
|
"valid_targets_mean": 9772.7,
|
|
"valid_targets_min": 3574
|
|
},
|
|
{
|
|
"epoch": 2.159706287287747,
|
|
"grad_norm": 0.22433319070414948,
|
|
"learning_rate": 2.8035760122504126e-05,
|
|
"loss": 0.3258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1108248233795166,
|
|
"step": 1570,
|
|
"valid_targets_mean": 10406.6,
|
|
"valid_targets_min": 5877
|
|
},
|
|
{
|
|
"epoch": 2.166590178981184,
|
|
"grad_norm": 0.20015161510933208,
|
|
"learning_rate": 2.7947717356161867e-05,
|
|
"loss": 0.3265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11052754521369934,
|
|
"step": 1575,
|
|
"valid_targets_mean": 9535.4,
|
|
"valid_targets_min": 3557
|
|
},
|
|
{
|
|
"epoch": 2.1734740706746214,
|
|
"grad_norm": 0.2158786430865961,
|
|
"learning_rate": 2.78594913076382e-05,
|
|
"loss": 0.3337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10300415754318237,
|
|
"step": 1580,
|
|
"valid_targets_mean": 9136.9,
|
|
"valid_targets_min": 1343
|
|
},
|
|
{
|
|
"epoch": 2.180357962368059,
|
|
"grad_norm": 0.18903953041106164,
|
|
"learning_rate": 2.7771084011512603e-05,
|
|
"loss": 0.3227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09355498105287552,
|
|
"step": 1585,
|
|
"valid_targets_mean": 9388.7,
|
|
"valid_targets_min": 3282
|
|
},
|
|
{
|
|
"epoch": 2.187241854061496,
|
|
"grad_norm": 0.2308426422061156,
|
|
"learning_rate": 2.76824975065443e-05,
|
|
"loss": 0.3184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10470138490200043,
|
|
"step": 1590,
|
|
"valid_targets_mean": 9729.2,
|
|
"valid_targets_min": 3231
|
|
},
|
|
{
|
|
"epoch": 2.1941257457549335,
|
|
"grad_norm": 0.21713575495158471,
|
|
"learning_rate": 2.7593733835625246e-05,
|
|
"loss": 0.3215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12001024186611176,
|
|
"step": 1595,
|
|
"valid_targets_mean": 10332.3,
|
|
"valid_targets_min": 4344
|
|
},
|
|
{
|
|
"epoch": 2.201009637448371,
|
|
"grad_norm": 0.18564762498854304,
|
|
"learning_rate": 2.750479504573303e-05,
|
|
"loss": 0.3241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11725566536188126,
|
|
"step": 1600,
|
|
"valid_targets_mean": 11408.0,
|
|
"valid_targets_min": 5726
|
|
},
|
|
{
|
|
"epoch": 2.207893529141808,
|
|
"grad_norm": 0.20551407108267855,
|
|
"learning_rate": 2.7415683187883647e-05,
|
|
"loss": 0.3221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11440514028072357,
|
|
"step": 1605,
|
|
"valid_targets_mean": 10250.5,
|
|
"valid_targets_min": 2607
|
|
},
|
|
{
|
|
"epoch": 2.2147774208352455,
|
|
"grad_norm": 0.19505075166019809,
|
|
"learning_rate": 2.7326400317084202e-05,
|
|
"loss": 0.3232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10536989569664001,
|
|
"step": 1610,
|
|
"valid_targets_mean": 9131.8,
|
|
"valid_targets_min": 761
|
|
},
|
|
{
|
|
"epoch": 2.221661312528683,
|
|
"grad_norm": 0.19443056596986627,
|
|
"learning_rate": 2.7236948492285535e-05,
|
|
"loss": 0.3214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11025330424308777,
|
|
"step": 1615,
|
|
"valid_targets_mean": 10329.2,
|
|
"valid_targets_min": 5291
|
|
},
|
|
{
|
|
"epoch": 2.22854520422212,
|
|
"grad_norm": 0.2110434913158342,
|
|
"learning_rate": 2.7147329776334742e-05,
|
|
"loss": 0.3232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1029968410730362,
|
|
"step": 1620,
|
|
"valid_targets_mean": 8999.2,
|
|
"valid_targets_min": 2381
|
|
},
|
|
{
|
|
"epoch": 2.2354290959155576,
|
|
"grad_norm": 0.1987583998355825,
|
|
"learning_rate": 2.7057546235927565e-05,
|
|
"loss": 0.316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11397416889667511,
|
|
"step": 1625,
|
|
"valid_targets_mean": 9227.1,
|
|
"valid_targets_min": 437
|
|
},
|
|
{
|
|
"epoch": 2.242312987608995,
|
|
"grad_norm": 0.20249691334408507,
|
|
"learning_rate": 2.696759994156079e-05,
|
|
"loss": 0.3173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10142004489898682,
|
|
"step": 1630,
|
|
"valid_targets_mean": 9084.0,
|
|
"valid_targets_min": 1451
|
|
},
|
|
{
|
|
"epoch": 2.249196879302432,
|
|
"grad_norm": 0.19109001564744665,
|
|
"learning_rate": 2.6877492967484447e-05,
|
|
"loss": 0.3158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09147472679615021,
|
|
"step": 1635,
|
|
"valid_targets_mean": 9274.7,
|
|
"valid_targets_min": 4259
|
|
},
|
|
{
|
|
"epoch": 2.2560807709958697,
|
|
"grad_norm": 0.1973586040294035,
|
|
"learning_rate": 2.6787227391654025e-05,
|
|
"loss": 0.3112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09772805124521255,
|
|
"step": 1640,
|
|
"valid_targets_mean": 8931.9,
|
|
"valid_targets_min": 410
|
|
},
|
|
{
|
|
"epoch": 2.262964662689307,
|
|
"grad_norm": 0.22153726074279795,
|
|
"learning_rate": 2.6696805295682487e-05,
|
|
"loss": 0.3215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13094830513000488,
|
|
"step": 1645,
|
|
"valid_targets_mean": 11522.8,
|
|
"valid_targets_min": 620
|
|
},
|
|
{
|
|
"epoch": 2.2698485543827442,
|
|
"grad_norm": 0.25466761523016024,
|
|
"learning_rate": 2.660622876479234e-05,
|
|
"loss": 0.3332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11351979523897171,
|
|
"step": 1650,
|
|
"valid_targets_mean": 6154.2,
|
|
"valid_targets_min": 386
|
|
},
|
|
{
|
|
"epoch": 2.2767324460761817,
|
|
"grad_norm": 0.2803967972978861,
|
|
"learning_rate": 2.6515499887767495e-05,
|
|
"loss": 0.3377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11425979435443878,
|
|
"step": 1655,
|
|
"valid_targets_mean": 6070.9,
|
|
"valid_targets_min": 2967
|
|
},
|
|
{
|
|
"epoch": 2.2836163377696193,
|
|
"grad_norm": 1.6466553952943501,
|
|
"learning_rate": 2.642462075690511e-05,
|
|
"loss": 0.6025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20921450853347778,
|
|
"step": 1660,
|
|
"valid_targets_mean": 3383.0,
|
|
"valid_targets_min": 1270
|
|
},
|
|
{
|
|
"epoch": 2.2905002294630563,
|
|
"grad_norm": 0.6287251863877925,
|
|
"learning_rate": 2.633359346796736e-05,
|
|
"loss": 0.6721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16572773456573486,
|
|
"step": 1665,
|
|
"valid_targets_mean": 2937.2,
|
|
"valid_targets_min": 1026
|
|
},
|
|
{
|
|
"epoch": 2.297384121156494,
|
|
"grad_norm": 0.49928140036251006,
|
|
"learning_rate": 2.6242420120133086e-05,
|
|
"loss": 0.6071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.290330171585083,
|
|
"step": 1670,
|
|
"valid_targets_mean": 4071.6,
|
|
"valid_targets_min": 1539
|
|
},
|
|
{
|
|
"epoch": 2.3042680128499313,
|
|
"grad_norm": 0.38142909127873137,
|
|
"learning_rate": 2.615110281594938e-05,
|
|
"loss": 0.59,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19661152362823486,
|
|
"step": 1675,
|
|
"valid_targets_mean": 5239.0,
|
|
"valid_targets_min": 1739
|
|
},
|
|
{
|
|
"epoch": 2.3111519045433684,
|
|
"grad_norm": 0.3099380229416545,
|
|
"learning_rate": 2.6059643661283116e-05,
|
|
"loss": 0.5761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17346453666687012,
|
|
"step": 1680,
|
|
"valid_targets_mean": 4039.2,
|
|
"valid_targets_min": 1204
|
|
},
|
|
{
|
|
"epoch": 2.318035796236806,
|
|
"grad_norm": 0.30542809625015865,
|
|
"learning_rate": 2.5968044765272394e-05,
|
|
"loss": 0.5542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1582844853401184,
|
|
"step": 1685,
|
|
"valid_targets_mean": 3473.9,
|
|
"valid_targets_min": 1130
|
|
},
|
|
{
|
|
"epoch": 2.3249196879302434,
|
|
"grad_norm": 0.3051234978697733,
|
|
"learning_rate": 2.5876308240277857e-05,
|
|
"loss": 0.561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16743376851081848,
|
|
"step": 1690,
|
|
"valid_targets_mean": 3420.7,
|
|
"valid_targets_min": 891
|
|
},
|
|
{
|
|
"epoch": 2.3318035796236805,
|
|
"grad_norm": 0.3250560787056461,
|
|
"learning_rate": 2.578443620183403e-05,
|
|
"loss": 0.5461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24719908833503723,
|
|
"step": 1695,
|
|
"valid_targets_mean": 3772.6,
|
|
"valid_targets_min": 1497
|
|
},
|
|
{
|
|
"epoch": 2.338687471317118,
|
|
"grad_norm": 0.2660151177567861,
|
|
"learning_rate": 2.5692430768600512e-05,
|
|
"loss": 0.54,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17158254981040955,
|
|
"step": 1700,
|
|
"valid_targets_mean": 4168.5,
|
|
"valid_targets_min": 972
|
|
},
|
|
{
|
|
"epoch": 2.3455713630105555,
|
|
"grad_norm": 0.25544944955250865,
|
|
"learning_rate": 2.5600294062313103e-05,
|
|
"loss": 0.5428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1272633671760559,
|
|
"step": 1705,
|
|
"valid_targets_mean": 3252.4,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 2.3524552547039925,
|
|
"grad_norm": 0.26750917864344126,
|
|
"learning_rate": 2.55080282077349e-05,
|
|
"loss": 0.5336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2098875492811203,
|
|
"step": 1710,
|
|
"valid_targets_mean": 4318.8,
|
|
"valid_targets_min": 1103
|
|
},
|
|
{
|
|
"epoch": 2.35933914639743,
|
|
"grad_norm": 0.26536069162137604,
|
|
"learning_rate": 2.5415635332607284e-05,
|
|
"loss": 0.5449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15961642563343048,
|
|
"step": 1715,
|
|
"valid_targets_mean": 3735.0,
|
|
"valid_targets_min": 1048
|
|
},
|
|
{
|
|
"epoch": 2.3662230380908675,
|
|
"grad_norm": 0.27381817984219636,
|
|
"learning_rate": 2.5323117567600845e-05,
|
|
"loss": 0.5188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22347742319107056,
|
|
"step": 1720,
|
|
"valid_targets_mean": 4098.8,
|
|
"valid_targets_min": 1319
|
|
},
|
|
{
|
|
"epoch": 2.3731069297843046,
|
|
"grad_norm": 0.2572874712994384,
|
|
"learning_rate": 2.523047704626628e-05,
|
|
"loss": 0.5298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16295018792152405,
|
|
"step": 1725,
|
|
"valid_targets_mean": 4343.0,
|
|
"valid_targets_min": 1454
|
|
},
|
|
{
|
|
"epoch": 2.379990821477742,
|
|
"grad_norm": 0.2724707677172288,
|
|
"learning_rate": 2.513771590498514e-05,
|
|
"loss": 0.5431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16079822182655334,
|
|
"step": 1730,
|
|
"valid_targets_mean": 3758.9,
|
|
"valid_targets_min": 1151
|
|
},
|
|
{
|
|
"epoch": 2.3868747131711796,
|
|
"grad_norm": 0.27991161433019873,
|
|
"learning_rate": 2.504483628292061e-05,
|
|
"loss": 0.5578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15567082166671753,
|
|
"step": 1735,
|
|
"valid_targets_mean": 3194.6,
|
|
"valid_targets_min": 994
|
|
},
|
|
{
|
|
"epoch": 2.3937586048646167,
|
|
"grad_norm": 0.2632081621333093,
|
|
"learning_rate": 2.4951840321968157e-05,
|
|
"loss": 0.5505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1653202325105667,
|
|
"step": 1740,
|
|
"valid_targets_mean": 3790.3,
|
|
"valid_targets_min": 1391
|
|
},
|
|
{
|
|
"epoch": 2.400642496558054,
|
|
"grad_norm": 0.27239266163390957,
|
|
"learning_rate": 2.4858730166706125e-05,
|
|
"loss": 0.5337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22418425977230072,
|
|
"step": 1745,
|
|
"valid_targets_mean": 4685.3,
|
|
"valid_targets_min": 822
|
|
},
|
|
{
|
|
"epoch": 2.4075263882514917,
|
|
"grad_norm": 0.2587574171184569,
|
|
"learning_rate": 2.47655079643463e-05,
|
|
"loss": 0.5376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14405536651611328,
|
|
"step": 1750,
|
|
"valid_targets_mean": 3750.0,
|
|
"valid_targets_min": 1314
|
|
},
|
|
{
|
|
"epoch": 2.4144102799449287,
|
|
"grad_norm": 0.2631506572474716,
|
|
"learning_rate": 2.467217586468438e-05,
|
|
"loss": 0.5434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17319154739379883,
|
|
"step": 1755,
|
|
"valid_targets_mean": 4435.8,
|
|
"valid_targets_min": 901
|
|
},
|
|
{
|
|
"epoch": 2.4212941716383662,
|
|
"grad_norm": 0.3163059805630689,
|
|
"learning_rate": 2.4578736020050423e-05,
|
|
"loss": 0.2811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05564213544130325,
|
|
"step": 1760,
|
|
"valid_targets_mean": 5312.0,
|
|
"valid_targets_min": 2490
|
|
},
|
|
{
|
|
"epoch": 2.4281780633318037,
|
|
"grad_norm": 0.28503436007188904,
|
|
"learning_rate": 2.448519058525915e-05,
|
|
"loss": 0.1657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05835745483636856,
|
|
"step": 1765,
|
|
"valid_targets_mean": 5549.4,
|
|
"valid_targets_min": 3000
|
|
},
|
|
{
|
|
"epoch": 2.435061955025241,
|
|
"grad_norm": 0.26157242926928764,
|
|
"learning_rate": 2.4391541717560333e-05,
|
|
"loss": 0.1539,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04818875715136528,
|
|
"step": 1770,
|
|
"valid_targets_mean": 5357.1,
|
|
"valid_targets_min": 2787
|
|
},
|
|
{
|
|
"epoch": 2.4419458467186783,
|
|
"grad_norm": 0.2023497066739585,
|
|
"learning_rate": 2.4297791576588993e-05,
|
|
"loss": 0.1542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06049264594912529,
|
|
"step": 1775,
|
|
"valid_targets_mean": 5853.2,
|
|
"valid_targets_min": 1305
|
|
},
|
|
{
|
|
"epoch": 2.448829738412116,
|
|
"grad_norm": 0.20086856688434088,
|
|
"learning_rate": 2.4203942324315623e-05,
|
|
"loss": 0.1489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04810313135385513,
|
|
"step": 1780,
|
|
"valid_targets_mean": 5323.5,
|
|
"valid_targets_min": 2609
|
|
},
|
|
{
|
|
"epoch": 2.455713630105553,
|
|
"grad_norm": 0.17236295233420956,
|
|
"learning_rate": 2.4109996124996297e-05,
|
|
"loss": 0.1514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05059269070625305,
|
|
"step": 1785,
|
|
"valid_targets_mean": 5320.9,
|
|
"valid_targets_min": 3076
|
|
},
|
|
{
|
|
"epoch": 2.4625975217989904,
|
|
"grad_norm": 0.18260197196728628,
|
|
"learning_rate": 2.4015955145122807e-05,
|
|
"loss": 0.1481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04599791020154953,
|
|
"step": 1790,
|
|
"valid_targets_mean": 5294.3,
|
|
"valid_targets_min": 1502
|
|
},
|
|
{
|
|
"epoch": 2.469481413492428,
|
|
"grad_norm": 0.20512227541440275,
|
|
"learning_rate": 2.3921821553372668e-05,
|
|
"loss": 0.1503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05129881948232651,
|
|
"step": 1795,
|
|
"valid_targets_mean": 5738.7,
|
|
"valid_targets_min": 3508
|
|
},
|
|
{
|
|
"epoch": 2.476365305185865,
|
|
"grad_norm": 0.19867031260793397,
|
|
"learning_rate": 2.3827597520559114e-05,
|
|
"loss": 0.1524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04662464186549187,
|
|
"step": 1800,
|
|
"valid_targets_mean": 5052.5,
|
|
"valid_targets_min": 2487
|
|
},
|
|
{
|
|
"epoch": 2.4832491968793025,
|
|
"grad_norm": 0.18169951666122502,
|
|
"learning_rate": 2.3733285219581044e-05,
|
|
"loss": 0.1423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04784727469086647,
|
|
"step": 1805,
|
|
"valid_targets_mean": 5108.5,
|
|
"valid_targets_min": 3236
|
|
},
|
|
{
|
|
"epoch": 2.49013308857274,
|
|
"grad_norm": 0.1804382758900024,
|
|
"learning_rate": 2.3638886825372905e-05,
|
|
"loss": 0.1457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0475069135427475,
|
|
"step": 1810,
|
|
"valid_targets_mean": 5397.8,
|
|
"valid_targets_min": 2803
|
|
},
|
|
{
|
|
"epoch": 2.497016980266177,
|
|
"grad_norm": 0.20004059160223273,
|
|
"learning_rate": 2.3544404514854546e-05,
|
|
"loss": 0.1462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.046164870262145996,
|
|
"step": 1815,
|
|
"valid_targets_mean": 5513.7,
|
|
"valid_targets_min": 2867
|
|
},
|
|
{
|
|
"epoch": 2.5039008719596145,
|
|
"grad_norm": 0.219326819094649,
|
|
"learning_rate": 2.3449840466880982e-05,
|
|
"loss": 0.1453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04952413961291313,
|
|
"step": 1820,
|
|
"valid_targets_mean": 5217.7,
|
|
"valid_targets_min": 2433
|
|
},
|
|
{
|
|
"epoch": 2.510784763653052,
|
|
"grad_norm": 0.20249255196335733,
|
|
"learning_rate": 2.3355196862192217e-05,
|
|
"loss": 0.1401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04487111419439316,
|
|
"step": 1825,
|
|
"valid_targets_mean": 5169.1,
|
|
"valid_targets_min": 2789
|
|
},
|
|
{
|
|
"epoch": 2.517668655346489,
|
|
"grad_norm": 0.17956302100157867,
|
|
"learning_rate": 2.3260475883362875e-05,
|
|
"loss": 0.1437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0479186587035656,
|
|
"step": 1830,
|
|
"valid_targets_mean": 5809.1,
|
|
"valid_targets_min": 2841
|
|
},
|
|
{
|
|
"epoch": 2.5245525470399266,
|
|
"grad_norm": 0.19201673260015767,
|
|
"learning_rate": 2.316567971475192e-05,
|
|
"loss": 0.1457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05057358741760254,
|
|
"step": 1835,
|
|
"valid_targets_mean": 5502.1,
|
|
"valid_targets_min": 2911
|
|
},
|
|
{
|
|
"epoch": 2.5314364387333637,
|
|
"grad_norm": 0.20471095297210237,
|
|
"learning_rate": 2.307081054245226e-05,
|
|
"loss": 0.1431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0484449602663517,
|
|
"step": 1840,
|
|
"valid_targets_mean": 5274.9,
|
|
"valid_targets_min": 2672
|
|
},
|
|
{
|
|
"epoch": 2.538320330426801,
|
|
"grad_norm": 0.18776851268143915,
|
|
"learning_rate": 2.2975870554240355e-05,
|
|
"loss": 0.1445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.046514444053173065,
|
|
"step": 1845,
|
|
"valid_targets_mean": 4966.8,
|
|
"valid_targets_min": 2931
|
|
},
|
|
{
|
|
"epoch": 2.5452042221202387,
|
|
"grad_norm": 0.2093548275865132,
|
|
"learning_rate": 2.2880861939525723e-05,
|
|
"loss": 0.1463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05320793390274048,
|
|
"step": 1850,
|
|
"valid_targets_mean": 5898.0,
|
|
"valid_targets_min": 3337
|
|
},
|
|
{
|
|
"epoch": 2.552088113813676,
|
|
"grad_norm": 0.21043193594542342,
|
|
"learning_rate": 2.2785786889300497e-05,
|
|
"loss": 0.1411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.048174209892749786,
|
|
"step": 1855,
|
|
"valid_targets_mean": 5214.2,
|
|
"valid_targets_min": 2978
|
|
},
|
|
{
|
|
"epoch": 2.5589720055071132,
|
|
"grad_norm": 0.2562077072298455,
|
|
"learning_rate": 2.2690647596088874e-05,
|
|
"loss": 0.1515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09924779087305069,
|
|
"step": 1860,
|
|
"valid_targets_mean": 3851.8,
|
|
"valid_targets_min": 887
|
|
},
|
|
{
|
|
"epoch": 2.5658558972005507,
|
|
"grad_norm": 0.2663250390364335,
|
|
"learning_rate": 2.2595446253896554e-05,
|
|
"loss": 0.474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1439560055732727,
|
|
"step": 1865,
|
|
"valid_targets_mean": 4596.2,
|
|
"valid_targets_min": 1143
|
|
},
|
|
{
|
|
"epoch": 2.572739788893988,
|
|
"grad_norm": 0.2688731135078293,
|
|
"learning_rate": 2.250018505816015e-05,
|
|
"loss": 0.4627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1430397629737854,
|
|
"step": 1870,
|
|
"valid_targets_mean": 4087.0,
|
|
"valid_targets_min": 1058
|
|
},
|
|
{
|
|
"epoch": 2.5796236805874253,
|
|
"grad_norm": 0.31694294040387333,
|
|
"learning_rate": 2.2404866205696557e-05,
|
|
"loss": 0.452,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1368146389722824,
|
|
"step": 1875,
|
|
"valid_targets_mean": 3146.8,
|
|
"valid_targets_min": 960
|
|
},
|
|
{
|
|
"epoch": 2.586507572280863,
|
|
"grad_norm": 0.2939674070397036,
|
|
"learning_rate": 2.2309491894652285e-05,
|
|
"loss": 0.4504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16308170557022095,
|
|
"step": 1880,
|
|
"valid_targets_mean": 4243.4,
|
|
"valid_targets_min": 890
|
|
},
|
|
{
|
|
"epoch": 2.5933914639743003,
|
|
"grad_norm": 0.2277866326879019,
|
|
"learning_rate": 2.2214064324452785e-05,
|
|
"loss": 0.4171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11347027122974396,
|
|
"step": 1885,
|
|
"valid_targets_mean": 4787.6,
|
|
"valid_targets_min": 989
|
|
},
|
|
{
|
|
"epoch": 2.6002753556677374,
|
|
"grad_norm": 0.2719922355168564,
|
|
"learning_rate": 2.2118585695751712e-05,
|
|
"loss": 0.4561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19940060377120972,
|
|
"step": 1890,
|
|
"valid_targets_mean": 4654.7,
|
|
"valid_targets_min": 385
|
|
},
|
|
{
|
|
"epoch": 2.607159247361175,
|
|
"grad_norm": 0.31742422458381,
|
|
"learning_rate": 2.202305821038017e-05,
|
|
"loss": 0.423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1434064507484436,
|
|
"step": 1895,
|
|
"valid_targets_mean": 3702.7,
|
|
"valid_targets_min": 1391
|
|
},
|
|
{
|
|
"epoch": 2.614043139054612,
|
|
"grad_norm": 0.30176842670334525,
|
|
"learning_rate": 2.1927484071295965e-05,
|
|
"loss": 0.4208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1468219757080078,
|
|
"step": 1900,
|
|
"valid_targets_mean": 3575.6,
|
|
"valid_targets_min": 1115
|
|
},
|
|
{
|
|
"epoch": 2.6209270307480494,
|
|
"grad_norm": 0.24794596280960404,
|
|
"learning_rate": 2.1831865482532753e-05,
|
|
"loss": 0.4435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12187361717224121,
|
|
"step": 1905,
|
|
"valid_targets_mean": 3883.8,
|
|
"valid_targets_min": 1256
|
|
},
|
|
{
|
|
"epoch": 2.627810922441487,
|
|
"grad_norm": 0.2852241331128035,
|
|
"learning_rate": 2.173620464914929e-05,
|
|
"loss": 0.4286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12532787024974823,
|
|
"step": 1910,
|
|
"valid_targets_mean": 4064.0,
|
|
"valid_targets_min": 1346
|
|
},
|
|
{
|
|
"epoch": 2.6346948141349245,
|
|
"grad_norm": 0.31724825994333544,
|
|
"learning_rate": 2.1640503777178484e-05,
|
|
"loss": 0.4433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1480903923511505,
|
|
"step": 1915,
|
|
"valid_targets_mean": 3096.2,
|
|
"valid_targets_min": 695
|
|
},
|
|
{
|
|
"epoch": 2.6415787058283615,
|
|
"grad_norm": 0.26842526190523636,
|
|
"learning_rate": 2.154476507357661e-05,
|
|
"loss": 0.4216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14191970229148865,
|
|
"step": 1920,
|
|
"valid_targets_mean": 3573.1,
|
|
"valid_targets_min": 1272
|
|
},
|
|
{
|
|
"epoch": 2.648462597521799,
|
|
"grad_norm": 0.24591428355457226,
|
|
"learning_rate": 2.1448990746172353e-05,
|
|
"loss": 0.4188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1298634558916092,
|
|
"step": 1925,
|
|
"valid_targets_mean": 4696.2,
|
|
"valid_targets_min": 939
|
|
},
|
|
{
|
|
"epoch": 2.655346489215236,
|
|
"grad_norm": 0.2586924114853833,
|
|
"learning_rate": 2.1353183003615944e-05,
|
|
"loss": 0.4675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18989674746990204,
|
|
"step": 1930,
|
|
"valid_targets_mean": 4314.5,
|
|
"valid_targets_min": 1135
|
|
},
|
|
{
|
|
"epoch": 2.6622303809086736,
|
|
"grad_norm": 0.2824215367703712,
|
|
"learning_rate": 2.1257344055328163e-05,
|
|
"loss": 0.4233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14473338425159454,
|
|
"step": 1935,
|
|
"valid_targets_mean": 3551.0,
|
|
"valid_targets_min": 909
|
|
},
|
|
{
|
|
"epoch": 2.669114272602111,
|
|
"grad_norm": 0.281092812644092,
|
|
"learning_rate": 2.1161476111449466e-05,
|
|
"loss": 0.4524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14296843111515045,
|
|
"step": 1940,
|
|
"valid_targets_mean": 3728.1,
|
|
"valid_targets_min": 970
|
|
},
|
|
{
|
|
"epoch": 2.6759981642955486,
|
|
"grad_norm": 0.2633497596047535,
|
|
"learning_rate": 2.106558138278894e-05,
|
|
"loss": 0.4234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11898775398731232,
|
|
"step": 1945,
|
|
"valid_targets_mean": 3462.3,
|
|
"valid_targets_min": 1420
|
|
},
|
|
{
|
|
"epoch": 2.6828820559889857,
|
|
"grad_norm": 0.2627345443724075,
|
|
"learning_rate": 2.0969662080773387e-05,
|
|
"loss": 0.4354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13881132006645203,
|
|
"step": 1950,
|
|
"valid_targets_mean": 4587.7,
|
|
"valid_targets_min": 1133
|
|
},
|
|
{
|
|
"epoch": 2.689765947682423,
|
|
"grad_norm": 0.26816569681245367,
|
|
"learning_rate": 2.0873720417396265e-05,
|
|
"loss": 0.4246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12991484999656677,
|
|
"step": 1955,
|
|
"valid_targets_mean": 3638.8,
|
|
"valid_targets_min": 1166
|
|
},
|
|
{
|
|
"epoch": 2.6966498393758602,
|
|
"grad_norm": 0.25549945448822214,
|
|
"learning_rate": 2.0777758605166733e-05,
|
|
"loss": 0.4463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14429476857185364,
|
|
"step": 1960,
|
|
"valid_targets_mean": 4151.9,
|
|
"valid_targets_min": 1404
|
|
},
|
|
{
|
|
"epoch": 2.7035337310692977,
|
|
"grad_norm": 0.2661070040391692,
|
|
"learning_rate": 2.0681778857058584e-05,
|
|
"loss": 0.4209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0850117951631546,
|
|
"step": 1965,
|
|
"valid_targets_mean": 1979.0,
|
|
"valid_targets_min": 506
|
|
},
|
|
{
|
|
"epoch": 2.7104176227627352,
|
|
"grad_norm": 0.20166001158860308,
|
|
"learning_rate": 2.0585783386459242e-05,
|
|
"loss": 0.3063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09919463843107224,
|
|
"step": 1970,
|
|
"valid_targets_mean": 6913.1,
|
|
"valid_targets_min": 817
|
|
},
|
|
{
|
|
"epoch": 2.7173015144561727,
|
|
"grad_norm": 0.21527235088365207,
|
|
"learning_rate": 2.0489774407118695e-05,
|
|
"loss": 0.2554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09187182784080505,
|
|
"step": 1975,
|
|
"valid_targets_mean": 6177.3,
|
|
"valid_targets_min": 509
|
|
},
|
|
{
|
|
"epoch": 2.72418540614961,
|
|
"grad_norm": 0.19810833897696425,
|
|
"learning_rate": 2.039375413309847e-05,
|
|
"loss": 0.2647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0892031192779541,
|
|
"step": 1980,
|
|
"valid_targets_mean": 6737.9,
|
|
"valid_targets_min": 1241
|
|
},
|
|
{
|
|
"epoch": 2.7310692978430473,
|
|
"grad_norm": 0.19012384443935784,
|
|
"learning_rate": 2.0297724778720553e-05,
|
|
"loss": 0.2704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09897127747535706,
|
|
"step": 1985,
|
|
"valid_targets_mean": 7625.8,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 2.7379531895364844,
|
|
"grad_norm": 0.2465631293054896,
|
|
"learning_rate": 2.0201688558516324e-05,
|
|
"loss": 0.2768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13522624969482422,
|
|
"step": 1990,
|
|
"valid_targets_mean": 6605.6,
|
|
"valid_targets_min": 335
|
|
},
|
|
{
|
|
"epoch": 2.744837081229922,
|
|
"grad_norm": 0.19684510076736875,
|
|
"learning_rate": 2.0105647687175507e-05,
|
|
"loss": 0.2601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08646862208843231,
|
|
"step": 1995,
|
|
"valid_targets_mean": 6378.9,
|
|
"valid_targets_min": 281
|
|
},
|
|
{
|
|
"epoch": 2.7517209729233594,
|
|
"grad_norm": 0.1893668038549979,
|
|
"learning_rate": 2.000960437949509e-05,
|
|
"loss": 0.2542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07368569076061249,
|
|
"step": 2000,
|
|
"valid_targets_mean": 5468.6,
|
|
"valid_targets_min": 543
|
|
},
|
|
{
|
|
"epoch": 2.758604864616797,
|
|
"grad_norm": 0.16937118963601522,
|
|
"learning_rate": 1.991356085032823e-05,
|
|
"loss": 0.255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08165471255779266,
|
|
"step": 2005,
|
|
"valid_targets_mean": 6290.5,
|
|
"valid_targets_min": 301
|
|
},
|
|
{
|
|
"epoch": 2.765488756310234,
|
|
"grad_norm": 0.17417345586661112,
|
|
"learning_rate": 1.9817519314533203e-05,
|
|
"loss": 0.2556,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05768326669931412,
|
|
"step": 2010,
|
|
"valid_targets_mean": 4845.6,
|
|
"valid_targets_min": 401
|
|
},
|
|
{
|
|
"epoch": 2.7723726480036714,
|
|
"grad_norm": 0.18330526531977226,
|
|
"learning_rate": 1.9721481986922314e-05,
|
|
"loss": 0.2694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08753426373004913,
|
|
"step": 2015,
|
|
"valid_targets_mean": 6736.4,
|
|
"valid_targets_min": 134
|
|
},
|
|
{
|
|
"epoch": 2.7792565396971085,
|
|
"grad_norm": 0.174366139502485,
|
|
"learning_rate": 1.9625451082210815e-05,
|
|
"loss": 0.2621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09649921953678131,
|
|
"step": 2020,
|
|
"valid_targets_mean": 7333.2,
|
|
"valid_targets_min": 1427
|
|
},
|
|
{
|
|
"epoch": 2.786140431390546,
|
|
"grad_norm": 0.23761842585783033,
|
|
"learning_rate": 1.9529428814965855e-05,
|
|
"loss": 0.2696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.052613697946071625,
|
|
"step": 2025,
|
|
"valid_targets_mean": 1272.8,
|
|
"valid_targets_min": 374
|
|
},
|
|
{
|
|
"epoch": 2.7930243230839835,
|
|
"grad_norm": 0.17619680289425318,
|
|
"learning_rate": 1.9433417399555385e-05,
|
|
"loss": 0.2625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08473365008831024,
|
|
"step": 2030,
|
|
"valid_targets_mean": 6596.2,
|
|
"valid_targets_min": 491
|
|
},
|
|
{
|
|
"epoch": 2.799908214777421,
|
|
"grad_norm": 0.17424767345113856,
|
|
"learning_rate": 1.93374190500971e-05,
|
|
"loss": 0.2402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06102645769715309,
|
|
"step": 2035,
|
|
"valid_targets_mean": 5984.0,
|
|
"valid_targets_min": 350
|
|
},
|
|
{
|
|
"epoch": 2.806792106470858,
|
|
"grad_norm": 0.19023496668990852,
|
|
"learning_rate": 1.924143598040738e-05,
|
|
"loss": 0.2668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09187609702348709,
|
|
"step": 2040,
|
|
"valid_targets_mean": 6808.1,
|
|
"valid_targets_min": 367
|
|
},
|
|
{
|
|
"epoch": 2.8136759981642956,
|
|
"grad_norm": 0.19414274218047128,
|
|
"learning_rate": 1.9145470403950246e-05,
|
|
"loss": 0.2515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09320957958698273,
|
|
"step": 2045,
|
|
"valid_targets_mean": 6776.0,
|
|
"valid_targets_min": 475
|
|
},
|
|
{
|
|
"epoch": 2.8205598898577326,
|
|
"grad_norm": 0.21805678499250258,
|
|
"learning_rate": 1.9049524533786306e-05,
|
|
"loss": 0.2461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10997814685106277,
|
|
"step": 2050,
|
|
"valid_targets_mean": 7100.5,
|
|
"valid_targets_min": 370
|
|
},
|
|
{
|
|
"epoch": 2.82744378155117,
|
|
"grad_norm": 0.1923498241545188,
|
|
"learning_rate": 1.8953600582521733e-05,
|
|
"loss": 0.2535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06720973551273346,
|
|
"step": 2055,
|
|
"valid_targets_mean": 5628.6,
|
|
"valid_targets_min": 503
|
|
},
|
|
{
|
|
"epoch": 2.8343276732446077,
|
|
"grad_norm": 0.1716392833907659,
|
|
"learning_rate": 1.8857700762257188e-05,
|
|
"loss": 0.2547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09128379821777344,
|
|
"step": 2060,
|
|
"valid_targets_mean": 6859.2,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 2.841211564938045,
|
|
"grad_norm": 0.2612671468981132,
|
|
"learning_rate": 1.8761827284536894e-05,
|
|
"loss": 0.2277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07055749744176865,
|
|
"step": 2065,
|
|
"valid_targets_mean": 3765.6,
|
|
"valid_targets_min": 1393
|
|
},
|
|
{
|
|
"epoch": 2.8480954566314822,
|
|
"grad_norm": 0.26011044179693166,
|
|
"learning_rate": 1.8665982360297564e-05,
|
|
"loss": 0.2047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0651964545249939,
|
|
"step": 2070,
|
|
"valid_targets_mean": 3591.2,
|
|
"valid_targets_min": 1507
|
|
},
|
|
{
|
|
"epoch": 2.8549793483249197,
|
|
"grad_norm": 0.2609222687498387,
|
|
"learning_rate": 1.8570168199817454e-05,
|
|
"loss": 0.1989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05844973772764206,
|
|
"step": 2075,
|
|
"valid_targets_mean": 3575.5,
|
|
"valid_targets_min": 533
|
|
},
|
|
{
|
|
"epoch": 2.861863240018357,
|
|
"grad_norm": 0.26962687587535233,
|
|
"learning_rate": 1.8474387012665357e-05,
|
|
"loss": 0.1992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0727514773607254,
|
|
"step": 2080,
|
|
"valid_targets_mean": 3438.5,
|
|
"valid_targets_min": 426
|
|
},
|
|
{
|
|
"epoch": 2.8687471317117943,
|
|
"grad_norm": 0.2348996563907632,
|
|
"learning_rate": 1.8378641007649686e-05,
|
|
"loss": 0.1959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.058854710310697556,
|
|
"step": 2085,
|
|
"valid_targets_mean": 3471.2,
|
|
"valid_targets_min": 440
|
|
},
|
|
{
|
|
"epoch": 2.875631023405232,
|
|
"grad_norm": 0.24990418831063008,
|
|
"learning_rate": 1.8282932392767516e-05,
|
|
"loss": 0.1959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06448241323232651,
|
|
"step": 2090,
|
|
"valid_targets_mean": 3325.1,
|
|
"valid_targets_min": 712
|
|
},
|
|
{
|
|
"epoch": 2.8825149150986693,
|
|
"grad_norm": 0.23030694879818017,
|
|
"learning_rate": 1.8187263375153664e-05,
|
|
"loss": 0.1927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.060683317482471466,
|
|
"step": 2095,
|
|
"valid_targets_mean": 3658.6,
|
|
"valid_targets_min": 448
|
|
},
|
|
{
|
|
"epoch": 2.8893988067921064,
|
|
"grad_norm": 0.23232648155318278,
|
|
"learning_rate": 1.809163616102981e-05,
|
|
"loss": 0.1923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06554745137691498,
|
|
"step": 2100,
|
|
"valid_targets_mean": 3833.8,
|
|
"valid_targets_min": 418
|
|
},
|
|
{
|
|
"epoch": 2.896282698485544,
|
|
"grad_norm": 0.25730041564114836,
|
|
"learning_rate": 1.799605295565357e-05,
|
|
"loss": 0.1927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06680840253829956,
|
|
"step": 2105,
|
|
"valid_targets_mean": 3820.5,
|
|
"valid_targets_min": 434
|
|
},
|
|
{
|
|
"epoch": 2.903166590178981,
|
|
"grad_norm": 0.22753368510563451,
|
|
"learning_rate": 1.7900515963267708e-05,
|
|
"loss": 0.1909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06230275332927704,
|
|
"step": 2110,
|
|
"valid_targets_mean": 3828.8,
|
|
"valid_targets_min": 707
|
|
},
|
|
{
|
|
"epoch": 2.9100504818724184,
|
|
"grad_norm": 0.23769119372061534,
|
|
"learning_rate": 1.7805027387049266e-05,
|
|
"loss": 0.1885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.056608423590660095,
|
|
"step": 2115,
|
|
"valid_targets_mean": 3212.1,
|
|
"valid_targets_min": 402
|
|
},
|
|
{
|
|
"epoch": 2.916934373565856,
|
|
"grad_norm": 0.2446021340553814,
|
|
"learning_rate": 1.7709589429058756e-05,
|
|
"loss": 0.1895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.059441857039928436,
|
|
"step": 2120,
|
|
"valid_targets_mean": 3774.8,
|
|
"valid_targets_min": 390
|
|
},
|
|
{
|
|
"epoch": 2.9238182652592934,
|
|
"grad_norm": 0.23890540039137606,
|
|
"learning_rate": 1.761420429018937e-05,
|
|
"loss": 0.1902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06359831988811493,
|
|
"step": 2125,
|
|
"valid_targets_mean": 3427.3,
|
|
"valid_targets_min": 269
|
|
},
|
|
{
|
|
"epoch": 2.9307021569527305,
|
|
"grad_norm": 0.23688083508729743,
|
|
"learning_rate": 1.7518874170116262e-05,
|
|
"loss": 0.1878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06328218430280685,
|
|
"step": 2130,
|
|
"valid_targets_mean": 3810.5,
|
|
"valid_targets_min": 405
|
|
},
|
|
{
|
|
"epoch": 2.937586048646168,
|
|
"grad_norm": 0.24155127909279164,
|
|
"learning_rate": 1.742360126724579e-05,
|
|
"loss": 0.1925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06442797929048538,
|
|
"step": 2135,
|
|
"valid_targets_mean": 3882.4,
|
|
"valid_targets_min": 478
|
|
},
|
|
{
|
|
"epoch": 2.944469940339605,
|
|
"grad_norm": 0.3631819880378349,
|
|
"learning_rate": 1.732838777866483e-05,
|
|
"loss": 0.273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09788917005062103,
|
|
"step": 2140,
|
|
"valid_targets_mean": 6992.9,
|
|
"valid_targets_min": 347
|
|
},
|
|
{
|
|
"epoch": 2.9513538320330426,
|
|
"grad_norm": 0.3172950606636833,
|
|
"learning_rate": 1.7233235900090094e-05,
|
|
"loss": 0.2511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08133139461278915,
|
|
"step": 2145,
|
|
"valid_targets_mean": 6991.2,
|
|
"valid_targets_min": 301
|
|
},
|
|
{
|
|
"epoch": 2.95823772372648,
|
|
"grad_norm": 0.25813777821116024,
|
|
"learning_rate": 1.7138147825817516e-05,
|
|
"loss": 0.2402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08661668747663498,
|
|
"step": 2150,
|
|
"valid_targets_mean": 7084.7,
|
|
"valid_targets_min": 231
|
|
},
|
|
{
|
|
"epoch": 2.9651216154199176,
|
|
"grad_norm": 0.2575657738469496,
|
|
"learning_rate": 1.7043125748671637e-05,
|
|
"loss": 0.2449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.087073914706707,
|
|
"step": 2155,
|
|
"valid_targets_mean": 7077.1,
|
|
"valid_targets_min": 3319
|
|
},
|
|
{
|
|
"epoch": 2.9720055071133546,
|
|
"grad_norm": 0.2259993012650762,
|
|
"learning_rate": 1.6948171859955054e-05,
|
|
"loss": 0.234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08060729503631592,
|
|
"step": 2160,
|
|
"valid_targets_mean": 6901.2,
|
|
"valid_targets_min": 421
|
|
},
|
|
{
|
|
"epoch": 2.978889398806792,
|
|
"grad_norm": 0.21460575181010538,
|
|
"learning_rate": 1.6853288349397842e-05,
|
|
"loss": 0.2363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07245530188083649,
|
|
"step": 2165,
|
|
"valid_targets_mean": 6402.1,
|
|
"valid_targets_min": 331
|
|
},
|
|
{
|
|
"epoch": 2.985773290500229,
|
|
"grad_norm": 0.26274959327700337,
|
|
"learning_rate": 1.675847740510712e-05,
|
|
"loss": 0.2275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07267634570598602,
|
|
"step": 2170,
|
|
"valid_targets_mean": 6595.1,
|
|
"valid_targets_min": 111
|
|
},
|
|
{
|
|
"epoch": 2.9926571821936667,
|
|
"grad_norm": 0.20507915627157383,
|
|
"learning_rate": 1.666374121351652e-05,
|
|
"loss": 0.2142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07876935601234436,
|
|
"step": 2175,
|
|
"valid_targets_mean": 6902.3,
|
|
"valid_targets_min": 2196
|
|
},
|
|
{
|
|
"epoch": 2.9995410738871042,
|
|
"grad_norm": 0.2067208575198668,
|
|
"learning_rate": 1.6569081959335843e-05,
|
|
"loss": 0.2189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0786517783999443,
|
|
"step": 2180,
|
|
"valid_targets_mean": 7013.4,
|
|
"valid_targets_min": 1068
|
|
},
|
|
{
|
|
"epoch": 3.0055071133547497,
|
|
"grad_norm": 1.47143463512981,
|
|
"learning_rate": 1.6474501825500617e-05,
|
|
"loss": 0.5678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17598947882652283,
|
|
"step": 2185,
|
|
"valid_targets_mean": 6954.8,
|
|
"valid_targets_min": 1761
|
|
},
|
|
{
|
|
"epoch": 3.0123910050481872,
|
|
"grad_norm": 0.8296001361456135,
|
|
"learning_rate": 1.6380002993121768e-05,
|
|
"loss": 0.4782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16915714740753174,
|
|
"step": 2190,
|
|
"valid_targets_mean": 8220.0,
|
|
"valid_targets_min": 2842
|
|
},
|
|
{
|
|
"epoch": 3.0192748967416247,
|
|
"grad_norm": 0.5343323686375032,
|
|
"learning_rate": 1.6285587641435347e-05,
|
|
"loss": 0.4252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12750446796417236,
|
|
"step": 2195,
|
|
"valid_targets_mean": 7166.5,
|
|
"valid_targets_min": 2501
|
|
},
|
|
{
|
|
"epoch": 3.026158788435062,
|
|
"grad_norm": 0.3998460344540367,
|
|
"learning_rate": 1.6191257947752254e-05,
|
|
"loss": 0.3891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12517595291137695,
|
|
"step": 2200,
|
|
"valid_targets_mean": 6987.4,
|
|
"valid_targets_min": 1709
|
|
},
|
|
{
|
|
"epoch": 3.0330426801284993,
|
|
"grad_norm": 0.2685805910516251,
|
|
"learning_rate": 1.609701608740803e-05,
|
|
"loss": 0.3665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12631681561470032,
|
|
"step": 2205,
|
|
"valid_targets_mean": 7609.8,
|
|
"valid_targets_min": 2002
|
|
},
|
|
{
|
|
"epoch": 3.039926571821937,
|
|
"grad_norm": 0.24262874044823118,
|
|
"learning_rate": 1.6002864233712684e-05,
|
|
"loss": 0.3682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11692699790000916,
|
|
"step": 2210,
|
|
"valid_targets_mean": 7067.3,
|
|
"valid_targets_min": 2449
|
|
},
|
|
{
|
|
"epoch": 3.046810463515374,
|
|
"grad_norm": 0.23285762855561765,
|
|
"learning_rate": 1.5908804557900595e-05,
|
|
"loss": 0.3565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10911443829536438,
|
|
"step": 2215,
|
|
"valid_targets_mean": 7091.4,
|
|
"valid_targets_min": 2861
|
|
},
|
|
{
|
|
"epoch": 3.0536943552088114,
|
|
"grad_norm": 0.20546076130954244,
|
|
"learning_rate": 1.581483922908043e-05,
|
|
"loss": 0.3451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1265745759010315,
|
|
"step": 2220,
|
|
"valid_targets_mean": 7742.7,
|
|
"valid_targets_min": 341
|
|
},
|
|
{
|
|
"epoch": 3.060578246902249,
|
|
"grad_norm": 0.22268477828232294,
|
|
"learning_rate": 1.572097041418512e-05,
|
|
"loss": 0.3502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1311422437429428,
|
|
"step": 2225,
|
|
"valid_targets_mean": 8509.0,
|
|
"valid_targets_min": 3043
|
|
},
|
|
{
|
|
"epoch": 3.067462138595686,
|
|
"grad_norm": 0.22540249838886647,
|
|
"learning_rate": 1.562720027792188e-05,
|
|
"loss": 0.3478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11340901255607605,
|
|
"step": 2230,
|
|
"valid_targets_mean": 6931.7,
|
|
"valid_targets_min": 2360
|
|
},
|
|
{
|
|
"epoch": 3.0743460302891235,
|
|
"grad_norm": 0.19718354585484277,
|
|
"learning_rate": 1.55335309827223e-05,
|
|
"loss": 0.3357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10545062273740768,
|
|
"step": 2235,
|
|
"valid_targets_mean": 9712.0,
|
|
"valid_targets_min": 2685
|
|
},
|
|
{
|
|
"epoch": 3.081229921982561,
|
|
"grad_norm": 0.18725111683861656,
|
|
"learning_rate": 1.5439964688692497e-05,
|
|
"loss": 0.3134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1086466908454895,
|
|
"step": 2240,
|
|
"valid_targets_mean": 9876.8,
|
|
"valid_targets_min": 387
|
|
},
|
|
{
|
|
"epoch": 3.088113813675998,
|
|
"grad_norm": 0.1845673546242831,
|
|
"learning_rate": 1.534650355356325e-05,
|
|
"loss": 0.3151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10268598794937134,
|
|
"step": 2245,
|
|
"valid_targets_mean": 9530.9,
|
|
"valid_targets_min": 5556
|
|
},
|
|
{
|
|
"epoch": 3.0949977053694355,
|
|
"grad_norm": 0.19522776849842052,
|
|
"learning_rate": 1.5253149732640305e-05,
|
|
"loss": 0.3037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10937610268592834,
|
|
"step": 2250,
|
|
"valid_targets_mean": 9594.7,
|
|
"valid_targets_min": 4603
|
|
},
|
|
{
|
|
"epoch": 3.101881597062873,
|
|
"grad_norm": 0.18222363325054514,
|
|
"learning_rate": 1.515990537875459e-05,
|
|
"loss": 0.3112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10362711548805237,
|
|
"step": 2255,
|
|
"valid_targets_mean": 9466.0,
|
|
"valid_targets_min": 6389
|
|
},
|
|
{
|
|
"epoch": 3.10876548875631,
|
|
"grad_norm": 0.18401641053290826,
|
|
"learning_rate": 1.5066772642212657e-05,
|
|
"loss": 0.3041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.102295882999897,
|
|
"step": 2260,
|
|
"valid_targets_mean": 9956.4,
|
|
"valid_targets_min": 4744
|
|
},
|
|
{
|
|
"epoch": 3.1156493804497476,
|
|
"grad_norm": 0.18509759714005736,
|
|
"learning_rate": 1.4973753670747033e-05,
|
|
"loss": 0.3109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10919614136219025,
|
|
"step": 2265,
|
|
"valid_targets_mean": 9960.5,
|
|
"valid_targets_min": 6172
|
|
},
|
|
{
|
|
"epoch": 3.122533272143185,
|
|
"grad_norm": 0.19550761601783884,
|
|
"learning_rate": 1.4880850609466726e-05,
|
|
"loss": 0.3082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10317480564117432,
|
|
"step": 2270,
|
|
"valid_targets_mean": 9848.8,
|
|
"valid_targets_min": 751
|
|
},
|
|
{
|
|
"epoch": 3.129417163836622,
|
|
"grad_norm": 0.18779537300856247,
|
|
"learning_rate": 1.478806560080771e-05,
|
|
"loss": 0.3054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10043558478355408,
|
|
"step": 2275,
|
|
"valid_targets_mean": 9407.0,
|
|
"valid_targets_min": 2003
|
|
},
|
|
{
|
|
"epoch": 3.1363010555300597,
|
|
"grad_norm": 0.195833294224063,
|
|
"learning_rate": 1.469540078448358e-05,
|
|
"loss": 0.3074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10033436119556427,
|
|
"step": 2280,
|
|
"valid_targets_mean": 9471.5,
|
|
"valid_targets_min": 4297
|
|
},
|
|
{
|
|
"epoch": 3.143184947223497,
|
|
"grad_norm": 0.19526343486199585,
|
|
"learning_rate": 1.4602858297436164e-05,
|
|
"loss": 0.3058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10044103860855103,
|
|
"step": 2285,
|
|
"valid_targets_mean": 8577.6,
|
|
"valid_targets_min": 3933
|
|
},
|
|
{
|
|
"epoch": 3.1500688389169342,
|
|
"grad_norm": 0.19666326160932227,
|
|
"learning_rate": 1.451044027378627e-05,
|
|
"loss": 0.3035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1077999621629715,
|
|
"step": 2290,
|
|
"valid_targets_mean": 9960.7,
|
|
"valid_targets_min": 1490
|
|
},
|
|
{
|
|
"epoch": 3.1569527306103717,
|
|
"grad_norm": 0.1827729428333937,
|
|
"learning_rate": 1.441814884478443e-05,
|
|
"loss": 0.3045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09793984144926071,
|
|
"step": 2295,
|
|
"valid_targets_mean": 9948.3,
|
|
"valid_targets_min": 3344
|
|
},
|
|
{
|
|
"epoch": 3.1638366223038092,
|
|
"grad_norm": 0.18754922066894553,
|
|
"learning_rate": 1.4325986138761803e-05,
|
|
"loss": 0.3061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0920855700969696,
|
|
"step": 2300,
|
|
"valid_targets_mean": 9342.2,
|
|
"valid_targets_min": 3715
|
|
},
|
|
{
|
|
"epoch": 3.1707205139972463,
|
|
"grad_norm": 0.17968909828736596,
|
|
"learning_rate": 1.4233954281081057e-05,
|
|
"loss": 0.3138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10528083145618439,
|
|
"step": 2305,
|
|
"valid_targets_mean": 9508.4,
|
|
"valid_targets_min": 434
|
|
},
|
|
{
|
|
"epoch": 3.177604405690684,
|
|
"grad_norm": 0.17574266851428197,
|
|
"learning_rate": 1.4142055394087386e-05,
|
|
"loss": 0.3072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09850088506937027,
|
|
"step": 2310,
|
|
"valid_targets_mean": 9982.4,
|
|
"valid_targets_min": 3744
|
|
},
|
|
{
|
|
"epoch": 3.1844882973841213,
|
|
"grad_norm": 0.18508025791924973,
|
|
"learning_rate": 1.4050291597059529e-05,
|
|
"loss": 0.2963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10291430354118347,
|
|
"step": 2315,
|
|
"valid_targets_mean": 9832.3,
|
|
"valid_targets_min": 5100
|
|
},
|
|
{
|
|
"epoch": 3.1913721890775584,
|
|
"grad_norm": 0.1792881281784299,
|
|
"learning_rate": 1.3958665006160921e-05,
|
|
"loss": 0.2998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10465849936008453,
|
|
"step": 2320,
|
|
"valid_targets_mean": 10835.8,
|
|
"valid_targets_min": 2829
|
|
},
|
|
{
|
|
"epoch": 3.198256080770996,
|
|
"grad_norm": 0.20216865453090171,
|
|
"learning_rate": 1.3867177734390895e-05,
|
|
"loss": 0.3044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09900674223899841,
|
|
"step": 2325,
|
|
"valid_targets_mean": 10256.2,
|
|
"valid_targets_min": 5379
|
|
},
|
|
{
|
|
"epoch": 3.2051399724644334,
|
|
"grad_norm": 0.1945776010645318,
|
|
"learning_rate": 1.3775831891535947e-05,
|
|
"loss": 0.3025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08786694705486298,
|
|
"step": 2330,
|
|
"valid_targets_mean": 9251.9,
|
|
"valid_targets_min": 648
|
|
},
|
|
{
|
|
"epoch": 3.2120238641578704,
|
|
"grad_norm": 0.24172818869279109,
|
|
"learning_rate": 1.368462958412109e-05,
|
|
"loss": 0.299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10653791576623917,
|
|
"step": 2335,
|
|
"valid_targets_mean": 11007.1,
|
|
"valid_targets_min": 1456
|
|
},
|
|
{
|
|
"epoch": 3.218907755851308,
|
|
"grad_norm": 0.1898036433710197,
|
|
"learning_rate": 1.3593572915361243e-05,
|
|
"loss": 0.3049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10172994434833527,
|
|
"step": 2340,
|
|
"valid_targets_mean": 9657.8,
|
|
"valid_targets_min": 2733
|
|
},
|
|
{
|
|
"epoch": 3.2257916475447455,
|
|
"grad_norm": 0.19332251447397075,
|
|
"learning_rate": 1.3502663985112788e-05,
|
|
"loss": 0.2999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12016477435827255,
|
|
"step": 2345,
|
|
"valid_targets_mean": 10753.2,
|
|
"valid_targets_min": 5581
|
|
},
|
|
{
|
|
"epoch": 3.2326755392381825,
|
|
"grad_norm": 0.21510799857850824,
|
|
"learning_rate": 1.3411904889825084e-05,
|
|
"loss": 0.3003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10003630071878433,
|
|
"step": 2350,
|
|
"valid_targets_mean": 9586.8,
|
|
"valid_targets_min": 3841
|
|
},
|
|
{
|
|
"epoch": 3.23955943093162,
|
|
"grad_norm": 0.18385705814635578,
|
|
"learning_rate": 1.3321297722492167e-05,
|
|
"loss": 0.2958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09207296371459961,
|
|
"step": 2355,
|
|
"valid_targets_mean": 9004.5,
|
|
"valid_targets_min": 434
|
|
},
|
|
{
|
|
"epoch": 3.2464433226250575,
|
|
"grad_norm": 0.187556404915227,
|
|
"learning_rate": 1.323084457260445e-05,
|
|
"loss": 0.2972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08947405964136124,
|
|
"step": 2360,
|
|
"valid_targets_mean": 9294.9,
|
|
"valid_targets_min": 4130
|
|
},
|
|
{
|
|
"epoch": 3.2533272143184946,
|
|
"grad_norm": 0.2005331130468604,
|
|
"learning_rate": 1.3140547526100562e-05,
|
|
"loss": 0.293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1010352075099945,
|
|
"step": 2365,
|
|
"valid_targets_mean": 10179.0,
|
|
"valid_targets_min": 4267
|
|
},
|
|
{
|
|
"epoch": 3.260211106011932,
|
|
"grad_norm": 0.1968597743990302,
|
|
"learning_rate": 1.3050408665319237e-05,
|
|
"loss": 0.2972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08902589976787567,
|
|
"step": 2370,
|
|
"valid_targets_mean": 8532.7,
|
|
"valid_targets_min": 417
|
|
},
|
|
{
|
|
"epoch": 3.2670949977053696,
|
|
"grad_norm": 0.25214344550494583,
|
|
"learning_rate": 1.2960430068951288e-05,
|
|
"loss": 0.3037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11159469187259674,
|
|
"step": 2375,
|
|
"valid_targets_mean": 6747.5,
|
|
"valid_targets_min": 401
|
|
},
|
|
{
|
|
"epoch": 3.2739788893988067,
|
|
"grad_norm": 0.25887393760787125,
|
|
"learning_rate": 1.2870613811991657e-05,
|
|
"loss": 0.311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10041764378547668,
|
|
"step": 2380,
|
|
"valid_targets_mean": 5423.5,
|
|
"valid_targets_min": 369
|
|
},
|
|
{
|
|
"epoch": 3.280862781092244,
|
|
"grad_norm": 2.2198067200851077,
|
|
"learning_rate": 1.2780961965691603e-05,
|
|
"loss": 0.4073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2686154842376709,
|
|
"step": 2385,
|
|
"valid_targets_mean": 3791.8,
|
|
"valid_targets_min": 1231
|
|
},
|
|
{
|
|
"epoch": 3.2877466727856817,
|
|
"grad_norm": 0.9924225603619974,
|
|
"learning_rate": 1.2691476597510898e-05,
|
|
"loss": 0.6873,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2092369794845581,
|
|
"step": 2390,
|
|
"valid_targets_mean": 4159.7,
|
|
"valid_targets_min": 1267
|
|
},
|
|
{
|
|
"epoch": 3.2946305644791187,
|
|
"grad_norm": 0.5742567377325489,
|
|
"learning_rate": 1.2602159771070178e-05,
|
|
"loss": 0.6139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18377545475959778,
|
|
"step": 2395,
|
|
"valid_targets_mean": 3654.0,
|
|
"valid_targets_min": 1421
|
|
},
|
|
{
|
|
"epoch": 3.3015144561725562,
|
|
"grad_norm": 0.5576756466527528,
|
|
"learning_rate": 1.2513013546103335e-05,
|
|
"loss": 0.5911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16704589128494263,
|
|
"step": 2400,
|
|
"valid_targets_mean": 2945.1,
|
|
"valid_targets_min": 996
|
|
},
|
|
{
|
|
"epoch": 3.3083983478659937,
|
|
"grad_norm": 0.3074568996613724,
|
|
"learning_rate": 1.2424039978410005e-05,
|
|
"loss": 0.5589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1972390115261078,
|
|
"step": 2405,
|
|
"valid_targets_mean": 4179.1,
|
|
"valid_targets_min": 1183
|
|
},
|
|
{
|
|
"epoch": 3.315282239559431,
|
|
"grad_norm": 0.3032569694533608,
|
|
"learning_rate": 1.2335241119808203e-05,
|
|
"loss": 0.5219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1691218614578247,
|
|
"step": 2410,
|
|
"valid_targets_mean": 4048.5,
|
|
"valid_targets_min": 1297
|
|
},
|
|
{
|
|
"epoch": 3.3221661312528683,
|
|
"grad_norm": 0.2765510565360534,
|
|
"learning_rate": 1.2246619018086973e-05,
|
|
"loss": 0.511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16525253653526306,
|
|
"step": 2415,
|
|
"valid_targets_mean": 4272.8,
|
|
"valid_targets_min": 1187
|
|
},
|
|
{
|
|
"epoch": 3.329050022946306,
|
|
"grad_norm": 0.27395286901316585,
|
|
"learning_rate": 1.2158175716959178e-05,
|
|
"loss": 0.5239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2498304396867752,
|
|
"step": 2420,
|
|
"valid_targets_mean": 4842.6,
|
|
"valid_targets_min": 1298
|
|
},
|
|
{
|
|
"epoch": 3.335933914639743,
|
|
"grad_norm": 0.29231799803753794,
|
|
"learning_rate": 1.2069913256014336e-05,
|
|
"loss": 0.5106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.200586199760437,
|
|
"step": 2425,
|
|
"valid_targets_mean": 4255.8,
|
|
"valid_targets_min": 1314
|
|
},
|
|
{
|
|
"epoch": 3.3428178063331804,
|
|
"grad_norm": 0.27263355288284863,
|
|
"learning_rate": 1.1981833670671641e-05,
|
|
"loss": 0.5095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15810373425483704,
|
|
"step": 2430,
|
|
"valid_targets_mean": 4050.4,
|
|
"valid_targets_min": 1624
|
|
},
|
|
{
|
|
"epoch": 3.349701698026618,
|
|
"grad_norm": 0.2836133175960233,
|
|
"learning_rate": 1.1893938992132983e-05,
|
|
"loss": 0.4842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15525907278060913,
|
|
"step": 2435,
|
|
"valid_targets_mean": 3251.1,
|
|
"valid_targets_min": 1018
|
|
},
|
|
{
|
|
"epoch": 3.356585589720055,
|
|
"grad_norm": 0.26401709501959203,
|
|
"learning_rate": 1.1806231247336135e-05,
|
|
"loss": 0.4978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16985741257667542,
|
|
"step": 2440,
|
|
"valid_targets_mean": 3961.1,
|
|
"valid_targets_min": 1033
|
|
},
|
|
{
|
|
"epoch": 3.3634694814134924,
|
|
"grad_norm": 0.23776953582310714,
|
|
"learning_rate": 1.1718712458907961e-05,
|
|
"loss": 0.4965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18274521827697754,
|
|
"step": 2445,
|
|
"valid_targets_mean": 5018.0,
|
|
"valid_targets_min": 1101
|
|
},
|
|
{
|
|
"epoch": 3.37035337310693,
|
|
"grad_norm": 0.2640952890965296,
|
|
"learning_rate": 1.1631384645117831e-05,
|
|
"loss": 0.496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1817808896303177,
|
|
"step": 2450,
|
|
"valid_targets_mean": 4231.1,
|
|
"valid_targets_min": 1310
|
|
},
|
|
{
|
|
"epoch": 3.377237264800367,
|
|
"grad_norm": 0.28342473701431786,
|
|
"learning_rate": 1.154424981983106e-05,
|
|
"loss": 0.4972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1519908607006073,
|
|
"step": 2455,
|
|
"valid_targets_mean": 3704.2,
|
|
"valid_targets_min": 1093
|
|
},
|
|
{
|
|
"epoch": 3.3841211564938045,
|
|
"grad_norm": 0.2951557771645841,
|
|
"learning_rate": 1.1457309992462434e-05,
|
|
"loss": 0.5127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16076089441776276,
|
|
"step": 2460,
|
|
"valid_targets_mean": 3797.5,
|
|
"valid_targets_min": 1323
|
|
},
|
|
{
|
|
"epoch": 3.391005048187242,
|
|
"grad_norm": 0.2824279828368872,
|
|
"learning_rate": 1.1370567167929907e-05,
|
|
"loss": 0.5128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16737669706344604,
|
|
"step": 2465,
|
|
"valid_targets_mean": 3499.0,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 3.397888939880679,
|
|
"grad_norm": 0.24191641839490738,
|
|
"learning_rate": 1.1284023346608345e-05,
|
|
"loss": 0.499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1770470142364502,
|
|
"step": 2470,
|
|
"valid_targets_mean": 5092.9,
|
|
"valid_targets_min": 1149
|
|
},
|
|
{
|
|
"epoch": 3.4047728315741166,
|
|
"grad_norm": 0.25812009589666846,
|
|
"learning_rate": 1.1197680524283428e-05,
|
|
"loss": 0.5082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2065732181072235,
|
|
"step": 2475,
|
|
"valid_targets_mean": 4786.8,
|
|
"valid_targets_min": 1492
|
|
},
|
|
{
|
|
"epoch": 3.411656723267554,
|
|
"grad_norm": 0.2609107313315737,
|
|
"learning_rate": 1.111154069210557e-05,
|
|
"loss": 0.5111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.170506551861763,
|
|
"step": 2480,
|
|
"valid_targets_mean": 4219.8,
|
|
"valid_targets_min": 776
|
|
},
|
|
{
|
|
"epoch": 3.418540614960991,
|
|
"grad_norm": 0.4001514854422285,
|
|
"learning_rate": 1.1025605836544067e-05,
|
|
"loss": 0.3907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06078251823782921,
|
|
"step": 2485,
|
|
"valid_targets_mean": 5702.6,
|
|
"valid_targets_min": 2936
|
|
},
|
|
{
|
|
"epoch": 3.4254245066544287,
|
|
"grad_norm": 0.24598853218943317,
|
|
"learning_rate": 1.0939877939341206e-05,
|
|
"loss": 0.1557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05165702477097511,
|
|
"step": 2490,
|
|
"valid_targets_mean": 5383.9,
|
|
"valid_targets_min": 3193
|
|
},
|
|
{
|
|
"epoch": 3.432308398347866,
|
|
"grad_norm": 0.21842227235626044,
|
|
"learning_rate": 1.0854358977466664e-05,
|
|
"loss": 0.1416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.045272137969732285,
|
|
"step": 2495,
|
|
"valid_targets_mean": 5404.5,
|
|
"valid_targets_min": 3254
|
|
},
|
|
{
|
|
"epoch": 3.4391922900413032,
|
|
"grad_norm": 0.19248978802990788,
|
|
"learning_rate": 1.0769050923071813e-05,
|
|
"loss": 0.1404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04644084721803665,
|
|
"step": 2500,
|
|
"valid_targets_mean": 5669.6,
|
|
"valid_targets_min": 2339
|
|
},
|
|
{
|
|
"epoch": 3.4460761817347407,
|
|
"grad_norm": 0.18077746801671077,
|
|
"learning_rate": 1.0683955743444348e-05,
|
|
"loss": 0.1308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.045123808085918427,
|
|
"step": 2505,
|
|
"valid_targets_mean": 5712.7,
|
|
"valid_targets_min": 2963
|
|
},
|
|
{
|
|
"epoch": 3.4529600734281782,
|
|
"grad_norm": 0.20884570432769242,
|
|
"learning_rate": 1.0599075400962793e-05,
|
|
"loss": 0.1343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.042846180498600006,
|
|
"step": 2510,
|
|
"valid_targets_mean": 5044.9,
|
|
"valid_targets_min": 2759
|
|
},
|
|
{
|
|
"epoch": 3.4598439651216153,
|
|
"grad_norm": 0.18635172677850229,
|
|
"learning_rate": 1.0514411853051381e-05,
|
|
"loss": 0.1294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03951629251241684,
|
|
"step": 2515,
|
|
"valid_targets_mean": 5219.8,
|
|
"valid_targets_min": 2654
|
|
},
|
|
{
|
|
"epoch": 3.466727856815053,
|
|
"grad_norm": 0.16818273843917325,
|
|
"learning_rate": 1.0429967052134801e-05,
|
|
"loss": 0.1345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.042340002954006195,
|
|
"step": 2520,
|
|
"valid_targets_mean": 5413.7,
|
|
"valid_targets_min": 3427
|
|
},
|
|
{
|
|
"epoch": 3.4736117485084903,
|
|
"grad_norm": 0.18228194282838256,
|
|
"learning_rate": 1.0345742945593269e-05,
|
|
"loss": 0.133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0403037965297699,
|
|
"step": 2525,
|
|
"valid_targets_mean": 5462.3,
|
|
"valid_targets_min": 3458
|
|
},
|
|
{
|
|
"epoch": 3.4804956402019274,
|
|
"grad_norm": 0.17851242713475587,
|
|
"learning_rate": 1.026174147571751e-05,
|
|
"loss": 0.1274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.044752124696969986,
|
|
"step": 2530,
|
|
"valid_targets_mean": 5651.2,
|
|
"valid_targets_min": 2078
|
|
},
|
|
{
|
|
"epoch": 3.487379531895365,
|
|
"grad_norm": 0.20049056795095394,
|
|
"learning_rate": 1.0177964579664085e-05,
|
|
"loss": 0.125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03916207700967789,
|
|
"step": 2535,
|
|
"valid_targets_mean": 5431.2,
|
|
"valid_targets_min": 1278
|
|
},
|
|
{
|
|
"epoch": 3.4942634235888024,
|
|
"grad_norm": 0.18270835516537468,
|
|
"learning_rate": 1.0094414189410625e-05,
|
|
"loss": 0.1306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04059268906712532,
|
|
"step": 2540,
|
|
"valid_targets_mean": 5013.1,
|
|
"valid_targets_min": 2521
|
|
},
|
|
{
|
|
"epoch": 3.5011473152822394,
|
|
"grad_norm": 0.20034351958133112,
|
|
"learning_rate": 1.0011092231711346e-05,
|
|
"loss": 0.1257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.037493303418159485,
|
|
"step": 2545,
|
|
"valid_targets_mean": 5014.3,
|
|
"valid_targets_min": 2703
|
|
},
|
|
{
|
|
"epoch": 3.508031206975677,
|
|
"grad_norm": 0.17491191759515393,
|
|
"learning_rate": 9.928000628052552e-06,
|
|
"loss": 0.1248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0384521521627903,
|
|
"step": 2550,
|
|
"valid_targets_mean": 5006.2,
|
|
"valid_targets_min": 2670
|
|
},
|
|
{
|
|
"epoch": 3.514915098669114,
|
|
"grad_norm": 0.18654968465114588,
|
|
"learning_rate": 9.845141294608378e-06,
|
|
"loss": 0.1254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04389902949333191,
|
|
"step": 2555,
|
|
"valid_targets_mean": 6038.5,
|
|
"valid_targets_min": 3631
|
|
},
|
|
{
|
|
"epoch": 3.5217989903625515,
|
|
"grad_norm": 0.18761525640500146,
|
|
"learning_rate": 9.762516142196563e-06,
|
|
"loss": 0.1251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.045166172087192535,
|
|
"step": 2560,
|
|
"valid_targets_mean": 4937.4,
|
|
"valid_targets_min": 2598
|
|
},
|
|
{
|
|
"epoch": 3.528682882055989,
|
|
"grad_norm": 0.1849921246703477,
|
|
"learning_rate": 9.680127076234425e-06,
|
|
"loss": 0.1245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04124714434146881,
|
|
"step": 2565,
|
|
"valid_targets_mean": 5467.6,
|
|
"valid_targets_min": 2990
|
|
},
|
|
{
|
|
"epoch": 3.5355667737494265,
|
|
"grad_norm": 0.18437113306824465,
|
|
"learning_rate": 9.597975996694872e-06,
|
|
"loss": 0.1276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04236382991075516,
|
|
"step": 2570,
|
|
"valid_targets_mean": 5111.8,
|
|
"valid_targets_min": 2044
|
|
},
|
|
{
|
|
"epoch": 3.5424506654428636,
|
|
"grad_norm": 0.18573334202464706,
|
|
"learning_rate": 9.516064798062625e-06,
|
|
"loss": 0.1275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04364463686943054,
|
|
"step": 2575,
|
|
"valid_targets_mean": 5763.9,
|
|
"valid_targets_min": 2904
|
|
},
|
|
{
|
|
"epoch": 3.549334557136301,
|
|
"grad_norm": 0.1915027801445681,
|
|
"learning_rate": 9.434395369290499e-06,
|
|
"loss": 0.124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0378815159201622,
|
|
"step": 2580,
|
|
"valid_targets_mean": 4922.0,
|
|
"valid_targets_min": 2500
|
|
},
|
|
{
|
|
"epoch": 3.556218448829738,
|
|
"grad_norm": 0.19162396288341896,
|
|
"learning_rate": 9.35296959375589e-06,
|
|
"loss": 0.1229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03863206505775452,
|
|
"step": 2585,
|
|
"valid_targets_mean": 5118.9,
|
|
"valid_targets_min": 2923
|
|
},
|
|
{
|
|
"epoch": 3.5631023405231756,
|
|
"grad_norm": 0.5191629748207883,
|
|
"learning_rate": 9.271789349217278e-06,
|
|
"loss": 0.3343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13411441445350647,
|
|
"step": 2590,
|
|
"valid_targets_mean": 3042.4,
|
|
"valid_targets_min": 1100
|
|
},
|
|
{
|
|
"epoch": 3.569986232216613,
|
|
"grad_norm": 0.3085051395779566,
|
|
"learning_rate": 9.190856507770965e-06,
|
|
"loss": 0.4481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1711662858724594,
|
|
"step": 2595,
|
|
"valid_targets_mean": 4385.5,
|
|
"valid_targets_min": 891
|
|
},
|
|
{
|
|
"epoch": 3.5768701239100507,
|
|
"grad_norm": 0.4307024372042649,
|
|
"learning_rate": 9.11017293580791e-06,
|
|
"loss": 0.4122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13990041613578796,
|
|
"step": 2600,
|
|
"valid_targets_mean": 1813.8,
|
|
"valid_targets_min": 507
|
|
},
|
|
{
|
|
"epoch": 3.5837540156034877,
|
|
"grad_norm": 0.2657723985016283,
|
|
"learning_rate": 9.02974049397066e-06,
|
|
"loss": 0.4207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12291470170021057,
|
|
"step": 2605,
|
|
"valid_targets_mean": 3846.0,
|
|
"valid_targets_min": 1237
|
|
},
|
|
{
|
|
"epoch": 3.5906379072969252,
|
|
"grad_norm": 0.2615169033048777,
|
|
"learning_rate": 8.949561037110441e-06,
|
|
"loss": 0.4125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1354140192270279,
|
|
"step": 2610,
|
|
"valid_targets_mean": 4735.8,
|
|
"valid_targets_min": 910
|
|
},
|
|
{
|
|
"epoch": 3.5975217989903623,
|
|
"grad_norm": 0.28820899087310153,
|
|
"learning_rate": 8.869636414244432e-06,
|
|
"loss": 0.4018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12852361798286438,
|
|
"step": 2615,
|
|
"valid_targets_mean": 3882.9,
|
|
"valid_targets_min": 1176
|
|
},
|
|
{
|
|
"epoch": 3.6044056906838,
|
|
"grad_norm": 0.2842744225252841,
|
|
"learning_rate": 8.789968468513057e-06,
|
|
"loss": 0.4022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16103368997573853,
|
|
"step": 2620,
|
|
"valid_targets_mean": 4241.8,
|
|
"valid_targets_min": 1013
|
|
},
|
|
{
|
|
"epoch": 3.6112895823772373,
|
|
"grad_norm": 0.23028309888693424,
|
|
"learning_rate": 8.710559037137516e-06,
|
|
"loss": 0.3805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11136943101882935,
|
|
"step": 2625,
|
|
"valid_targets_mean": 4934.8,
|
|
"valid_targets_min": 1378
|
|
},
|
|
{
|
|
"epoch": 3.618173474070675,
|
|
"grad_norm": 0.27031546941287016,
|
|
"learning_rate": 8.631409951377442e-06,
|
|
"loss": 0.4201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14648756384849548,
|
|
"step": 2630,
|
|
"valid_targets_mean": 3635.7,
|
|
"valid_targets_min": 1162
|
|
},
|
|
{
|
|
"epoch": 3.625057365764112,
|
|
"grad_norm": 0.261440214703691,
|
|
"learning_rate": 8.552523036488605e-06,
|
|
"loss": 0.3955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11132633686065674,
|
|
"step": 2635,
|
|
"valid_targets_mean": 3513.8,
|
|
"valid_targets_min": 1441
|
|
},
|
|
{
|
|
"epoch": 3.6319412574575494,
|
|
"grad_norm": 0.34750801613748267,
|
|
"learning_rate": 8.47390011168088e-06,
|
|
"loss": 0.4035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25909337401390076,
|
|
"step": 2640,
|
|
"valid_targets_mean": 4413.0,
|
|
"valid_targets_min": 1233
|
|
},
|
|
{
|
|
"epoch": 3.6388251491509864,
|
|
"grad_norm": 0.25770762746792886,
|
|
"learning_rate": 8.395542990076256e-06,
|
|
"loss": 0.3975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10192108154296875,
|
|
"step": 2645,
|
|
"valid_targets_mean": 3737.7,
|
|
"valid_targets_min": 1650
|
|
},
|
|
{
|
|
"epoch": 3.645709040844424,
|
|
"grad_norm": 0.2420881480617191,
|
|
"learning_rate": 8.317453478667061e-06,
|
|
"loss": 0.3917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10463223606348038,
|
|
"step": 2650,
|
|
"valid_targets_mean": 3766.6,
|
|
"valid_targets_min": 1250
|
|
},
|
|
{
|
|
"epoch": 3.6525929325378614,
|
|
"grad_norm": 0.26662498322339967,
|
|
"learning_rate": 8.239633378274249e-06,
|
|
"loss": 0.4106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12207278609275818,
|
|
"step": 2655,
|
|
"valid_targets_mean": 3043.5,
|
|
"valid_targets_min": 1208
|
|
},
|
|
{
|
|
"epoch": 3.659476824231299,
|
|
"grad_norm": 0.2508023395017142,
|
|
"learning_rate": 8.162084483505892e-06,
|
|
"loss": 0.4167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14572551846504211,
|
|
"step": 2660,
|
|
"valid_targets_mean": 4615.1,
|
|
"valid_targets_min": 269
|
|
},
|
|
{
|
|
"epoch": 3.666360715924736,
|
|
"grad_norm": 0.23556631300518252,
|
|
"learning_rate": 8.084808582715795e-06,
|
|
"loss": 0.3892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1286328136920929,
|
|
"step": 2665,
|
|
"valid_targets_mean": 5172.2,
|
|
"valid_targets_min": 1589
|
|
},
|
|
{
|
|
"epoch": 3.6732446076181735,
|
|
"grad_norm": 0.26361055666988087,
|
|
"learning_rate": 8.007807457962272e-06,
|
|
"loss": 0.414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15162985026836395,
|
|
"step": 2670,
|
|
"valid_targets_mean": 4159.3,
|
|
"valid_targets_min": 1283
|
|
},
|
|
{
|
|
"epoch": 3.6801284993116106,
|
|
"grad_norm": 0.25764341984499367,
|
|
"learning_rate": 7.931082884966996e-06,
|
|
"loss": 0.402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14428415894508362,
|
|
"step": 2675,
|
|
"valid_targets_mean": 4058.6,
|
|
"valid_targets_min": 876
|
|
},
|
|
{
|
|
"epoch": 3.687012391005048,
|
|
"grad_norm": 0.2722663989969839,
|
|
"learning_rate": 7.854636633074128e-06,
|
|
"loss": 0.3874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17224925756454468,
|
|
"step": 2680,
|
|
"valid_targets_mean": 4401.0,
|
|
"valid_targets_min": 1349
|
|
},
|
|
{
|
|
"epoch": 3.6938962826984856,
|
|
"grad_norm": 0.25427623016618217,
|
|
"learning_rate": 7.778470465209417e-06,
|
|
"loss": 0.408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11766083538532257,
|
|
"step": 2685,
|
|
"valid_targets_mean": 3505.7,
|
|
"valid_targets_min": 809
|
|
},
|
|
{
|
|
"epoch": 3.700780174391923,
|
|
"grad_norm": 0.24889480025272662,
|
|
"learning_rate": 7.702586137839653e-06,
|
|
"loss": 0.4076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10833757370710373,
|
|
"step": 2690,
|
|
"valid_targets_mean": 3860.3,
|
|
"valid_targets_min": 602
|
|
},
|
|
{
|
|
"epoch": 3.70766406608536,
|
|
"grad_norm": 0.3707403029896572,
|
|
"learning_rate": 7.626985400932068e-06,
|
|
"loss": 0.3428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10806169360876083,
|
|
"step": 2695,
|
|
"valid_targets_mean": 8001.6,
|
|
"valid_targets_min": 1252
|
|
},
|
|
{
|
|
"epoch": 3.7145479577787976,
|
|
"grad_norm": 0.19669483141062702,
|
|
"learning_rate": 7.551669997914048e-06,
|
|
"loss": 0.2464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07481403648853302,
|
|
"step": 2700,
|
|
"valid_targets_mean": 6021.0,
|
|
"valid_targets_min": 389
|
|
},
|
|
{
|
|
"epoch": 3.7214318494722347,
|
|
"grad_norm": 0.23162368514613632,
|
|
"learning_rate": 7.47664166563286e-06,
|
|
"loss": 0.2478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11998644471168518,
|
|
"step": 2705,
|
|
"valid_targets_mean": 6741.6,
|
|
"valid_targets_min": 473
|
|
},
|
|
{
|
|
"epoch": 3.728315741165672,
|
|
"grad_norm": 0.17228834530698153,
|
|
"learning_rate": 7.401902134315684e-06,
|
|
"loss": 0.248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06548772752285004,
|
|
"step": 2710,
|
|
"valid_targets_mean": 5457.3,
|
|
"valid_targets_min": 253
|
|
},
|
|
{
|
|
"epoch": 3.7351996328591097,
|
|
"grad_norm": 0.17408247867794982,
|
|
"learning_rate": 7.3274531275296265e-06,
|
|
"loss": 0.2513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08035491406917572,
|
|
"step": 2715,
|
|
"valid_targets_mean": 6283.3,
|
|
"valid_targets_min": 146
|
|
},
|
|
{
|
|
"epoch": 3.7420835245525472,
|
|
"grad_norm": 0.17974994899581664,
|
|
"learning_rate": 7.253296362142048e-06,
|
|
"loss": 0.2539,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07769985496997833,
|
|
"step": 2720,
|
|
"valid_targets_mean": 6143.3,
|
|
"valid_targets_min": 233
|
|
},
|
|
{
|
|
"epoch": 3.7489674162459843,
|
|
"grad_norm": 0.1675477236149244,
|
|
"learning_rate": 7.179433548280892e-06,
|
|
"loss": 0.2372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06394631415605545,
|
|
"step": 2725,
|
|
"valid_targets_mean": 6087.1,
|
|
"valid_targets_min": 1118
|
|
},
|
|
{
|
|
"epoch": 3.755851307939422,
|
|
"grad_norm": 0.1733213132187292,
|
|
"learning_rate": 7.105866389295335e-06,
|
|
"loss": 0.2462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07828719168901443,
|
|
"step": 2730,
|
|
"valid_targets_mean": 6520.0,
|
|
"valid_targets_min": 178
|
|
},
|
|
{
|
|
"epoch": 3.762735199632859,
|
|
"grad_norm": 0.1785269252204138,
|
|
"learning_rate": 7.032596581716422e-06,
|
|
"loss": 0.2372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08230659365653992,
|
|
"step": 2735,
|
|
"valid_targets_mean": 6715.5,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 3.7696190913262964,
|
|
"grad_norm": 0.21946429665501535,
|
|
"learning_rate": 6.959625815218018e-06,
|
|
"loss": 0.2441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.045567117631435394,
|
|
"step": 2740,
|
|
"valid_targets_mean": 1099.6,
|
|
"valid_targets_min": 316
|
|
},
|
|
{
|
|
"epoch": 3.776502983019734,
|
|
"grad_norm": 0.17397920116832125,
|
|
"learning_rate": 6.8869557725777836e-06,
|
|
"loss": 0.242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05601717159152031,
|
|
"step": 2745,
|
|
"valid_targets_mean": 5019.2,
|
|
"valid_targets_min": 247
|
|
},
|
|
{
|
|
"epoch": 3.7833868747131714,
|
|
"grad_norm": 0.16751761187770453,
|
|
"learning_rate": 6.814588129638393e-06,
|
|
"loss": 0.2496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09275075793266296,
|
|
"step": 2750,
|
|
"valid_targets_mean": 7409.7,
|
|
"valid_targets_min": 1034
|
|
},
|
|
{
|
|
"epoch": 3.7902707664066084,
|
|
"grad_norm": 0.17280417479859242,
|
|
"learning_rate": 6.742524555268886e-06,
|
|
"loss": 0.26,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05152815580368042,
|
|
"step": 2755,
|
|
"valid_targets_mean": 4764.1,
|
|
"valid_targets_min": 243
|
|
},
|
|
{
|
|
"epoch": 3.797154658100046,
|
|
"grad_norm": 0.16496609026025888,
|
|
"learning_rate": 6.6707667113261956e-06,
|
|
"loss": 0.226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08169304579496384,
|
|
"step": 2760,
|
|
"valid_targets_mean": 6958.2,
|
|
"valid_targets_min": 1080
|
|
},
|
|
{
|
|
"epoch": 3.804038549793483,
|
|
"grad_norm": 0.19327664164399166,
|
|
"learning_rate": 6.599316252616796e-06,
|
|
"loss": 0.2431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13077764213085175,
|
|
"step": 2765,
|
|
"valid_targets_mean": 7221.4,
|
|
"valid_targets_min": 405
|
|
},
|
|
{
|
|
"epoch": 3.8109224414869205,
|
|
"grad_norm": 0.17470660816395175,
|
|
"learning_rate": 6.528174826858553e-06,
|
|
"loss": 0.2363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09528367966413498,
|
|
"step": 2770,
|
|
"valid_targets_mean": 7585.1,
|
|
"valid_targets_min": 1495
|
|
},
|
|
{
|
|
"epoch": 3.817806333180358,
|
|
"grad_norm": 0.18482290478544883,
|
|
"learning_rate": 6.457344074642751e-06,
|
|
"loss": 0.2199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10286924988031387,
|
|
"step": 2775,
|
|
"valid_targets_mean": 6407.4,
|
|
"valid_targets_min": 201
|
|
},
|
|
{
|
|
"epoch": 3.8246902248737955,
|
|
"grad_norm": 0.1666475205795951,
|
|
"learning_rate": 6.386825629396209e-06,
|
|
"loss": 0.2384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06370923668146133,
|
|
"step": 2780,
|
|
"valid_targets_mean": 6268.8,
|
|
"valid_targets_min": 536
|
|
},
|
|
{
|
|
"epoch": 3.8315741165672326,
|
|
"grad_norm": 0.1812248617883446,
|
|
"learning_rate": 6.316621117343657e-06,
|
|
"loss": 0.2349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1033419668674469,
|
|
"step": 2785,
|
|
"valid_targets_mean": 6778.3,
|
|
"valid_targets_min": 674
|
|
},
|
|
{
|
|
"epoch": 3.83845800826067,
|
|
"grad_norm": 0.3434779028623829,
|
|
"learning_rate": 6.246732157470205e-06,
|
|
"loss": 0.2297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.057458698749542236,
|
|
"step": 2790,
|
|
"valid_targets_mean": 3450.3,
|
|
"valid_targets_min": 449
|
|
},
|
|
{
|
|
"epoch": 3.845341899954107,
|
|
"grad_norm": 0.2506659571430072,
|
|
"learning_rate": 6.177160361484038e-06,
|
|
"loss": 0.1874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05707380175590515,
|
|
"step": 2795,
|
|
"valid_targets_mean": 3540.6,
|
|
"valid_targets_min": 443
|
|
},
|
|
{
|
|
"epoch": 3.8522257916475446,
|
|
"grad_norm": 0.25559819516599763,
|
|
"learning_rate": 6.107907333779204e-06,
|
|
"loss": 0.1852,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0660262405872345,
|
|
"step": 2800,
|
|
"valid_targets_mean": 3877.7,
|
|
"valid_targets_min": 388
|
|
},
|
|
{
|
|
"epoch": 3.859109683340982,
|
|
"grad_norm": 0.23529237449017507,
|
|
"learning_rate": 6.038974671398678e-06,
|
|
"loss": 0.1787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06375281512737274,
|
|
"step": 2805,
|
|
"valid_targets_mean": 4024.9,
|
|
"valid_targets_min": 1736
|
|
},
|
|
{
|
|
"epoch": 3.8659935750344196,
|
|
"grad_norm": 0.2643384345756322,
|
|
"learning_rate": 5.970363963997443e-06,
|
|
"loss": 0.1787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0625511109828949,
|
|
"step": 2810,
|
|
"valid_targets_mean": 3796.8,
|
|
"valid_targets_min": 551
|
|
},
|
|
{
|
|
"epoch": 3.8728774667278567,
|
|
"grad_norm": 0.2630480400208193,
|
|
"learning_rate": 5.902076793805933e-06,
|
|
"loss": 0.1752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06277286261320114,
|
|
"step": 2815,
|
|
"valid_targets_mean": 4011.0,
|
|
"valid_targets_min": 423
|
|
},
|
|
{
|
|
"epoch": 3.879761358421294,
|
|
"grad_norm": 0.2836202603651868,
|
|
"learning_rate": 5.834114735593464e-06,
|
|
"loss": 0.1743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06746001541614532,
|
|
"step": 2820,
|
|
"valid_targets_mean": 4130.9,
|
|
"valid_targets_min": 1201
|
|
},
|
|
{
|
|
"epoch": 3.8866452501147313,
|
|
"grad_norm": 0.2443667600066765,
|
|
"learning_rate": 5.766479356631971e-06,
|
|
"loss": 0.1739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05533027648925781,
|
|
"step": 2825,
|
|
"valid_targets_mean": 3678.6,
|
|
"valid_targets_min": 538
|
|
},
|
|
{
|
|
"epoch": 3.893529141808169,
|
|
"grad_norm": 0.23675573830153043,
|
|
"learning_rate": 5.69917221665983e-06,
|
|
"loss": 0.1703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05468790978193283,
|
|
"step": 2830,
|
|
"valid_targets_mean": 3560.1,
|
|
"valid_targets_min": 475
|
|
},
|
|
{
|
|
"epoch": 3.9004130335016063,
|
|
"grad_norm": 0.2334249200769521,
|
|
"learning_rate": 5.632194867845906e-06,
|
|
"loss": 0.172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05852728337049484,
|
|
"step": 2835,
|
|
"valid_targets_mean": 3700.8,
|
|
"valid_targets_min": 1268
|
|
},
|
|
{
|
|
"epoch": 3.907296925195044,
|
|
"grad_norm": 0.24410413934950573,
|
|
"learning_rate": 5.5655488547537525e-06,
|
|
"loss": 0.169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05753038823604584,
|
|
"step": 2840,
|
|
"valid_targets_mean": 3890.9,
|
|
"valid_targets_min": 1223
|
|
},
|
|
{
|
|
"epoch": 3.914180816888481,
|
|
"grad_norm": 0.24063324214163975,
|
|
"learning_rate": 5.499235714306013e-06,
|
|
"loss": 0.1677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05531303212046623,
|
|
"step": 2845,
|
|
"valid_targets_mean": 3898.9,
|
|
"valid_targets_min": 1311
|
|
},
|
|
{
|
|
"epoch": 3.9210647085819184,
|
|
"grad_norm": 0.3744111376550879,
|
|
"learning_rate": 5.433256975748935e-06,
|
|
"loss": 0.1708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05198235064744949,
|
|
"step": 2850,
|
|
"valid_targets_mean": 3726.0,
|
|
"valid_targets_min": 452
|
|
},
|
|
{
|
|
"epoch": 3.9279486002753554,
|
|
"grad_norm": 0.23540898155652942,
|
|
"learning_rate": 5.367614160617147e-06,
|
|
"loss": 0.1686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.062141187489032745,
|
|
"step": 2855,
|
|
"valid_targets_mean": 3976.4,
|
|
"valid_targets_min": 496
|
|
},
|
|
{
|
|
"epoch": 3.934832491968793,
|
|
"grad_norm": 0.25445169989275357,
|
|
"learning_rate": 5.3023087826985395e-06,
|
|
"loss": 0.1702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05715780705213547,
|
|
"step": 2860,
|
|
"valid_targets_mean": 3581.7,
|
|
"valid_targets_min": 495
|
|
},
|
|
{
|
|
"epoch": 3.9417163836622304,
|
|
"grad_norm": 0.6096258105448606,
|
|
"learning_rate": 5.237342347999386e-06,
|
|
"loss": 0.2319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08614245802164078,
|
|
"step": 2865,
|
|
"valid_targets_mean": 6912.2,
|
|
"valid_targets_min": 1543
|
|
},
|
|
{
|
|
"epoch": 3.948600275355668,
|
|
"grad_norm": 0.36201237541972986,
|
|
"learning_rate": 5.1727163547095835e-06,
|
|
"loss": 0.2529,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0755876824259758,
|
|
"step": 2870,
|
|
"valid_targets_mean": 6841.5,
|
|
"valid_targets_min": 453
|
|
},
|
|
{
|
|
"epoch": 3.955484167049105,
|
|
"grad_norm": 0.2473513142917225,
|
|
"learning_rate": 5.108432293168118e-06,
|
|
"loss": 0.2295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08236972987651825,
|
|
"step": 2875,
|
|
"valid_targets_mean": 7884.5,
|
|
"valid_targets_min": 924
|
|
},
|
|
{
|
|
"epoch": 3.9623680587425425,
|
|
"grad_norm": 0.254514479375607,
|
|
"learning_rate": 5.044491645828684e-06,
|
|
"loss": 0.2318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07513093948364258,
|
|
"step": 2880,
|
|
"valid_targets_mean": 6715.9,
|
|
"valid_targets_min": 367
|
|
},
|
|
{
|
|
"epoch": 3.9692519504359796,
|
|
"grad_norm": 0.24006908726730125,
|
|
"learning_rate": 4.980895887225529e-06,
|
|
"loss": 0.2254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05822212994098663,
|
|
"step": 2885,
|
|
"valid_targets_mean": 5647.7,
|
|
"valid_targets_min": 255
|
|
},
|
|
{
|
|
"epoch": 3.976135842129417,
|
|
"grad_norm": 0.2289050141214267,
|
|
"learning_rate": 4.917646483939405e-06,
|
|
"loss": 0.2268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.078819140791893,
|
|
"step": 2890,
|
|
"valid_targets_mean": 7380.5,
|
|
"valid_targets_min": 504
|
|
},
|
|
{
|
|
"epoch": 3.9830197338228546,
|
|
"grad_norm": 0.216564526739044,
|
|
"learning_rate": 4.85474489456379e-06,
|
|
"loss": 0.2158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06552626937627792,
|
|
"step": 2895,
|
|
"valid_targets_mean": 6277.5,
|
|
"valid_targets_min": 478
|
|
},
|
|
{
|
|
"epoch": 3.989903625516292,
|
|
"grad_norm": 0.1990891081128417,
|
|
"learning_rate": 4.792192569671202e-06,
|
|
"loss": 0.2051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06608976423740387,
|
|
"step": 2900,
|
|
"valid_targets_mean": 6117.1,
|
|
"valid_targets_min": 258
|
|
},
|
|
{
|
|
"epoch": 3.996787517209729,
|
|
"grad_norm": 0.1960679039089432,
|
|
"learning_rate": 4.7299909517798034e-06,
|
|
"loss": 0.2019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06078875809907913,
|
|
"step": 2905,
|
|
"valid_targets_mean": 5683.7,
|
|
"valid_targets_min": 364
|
|
},
|
|
{
|
|
"epoch": 4.002753556677375,
|
|
"grad_norm": 2.395952944498442,
|
|
"learning_rate": 4.668141475320096e-06,
|
|
"loss": 0.3927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16962723433971405,
|
|
"step": 2910,
|
|
"valid_targets_mean": 6370.5,
|
|
"valid_targets_min": 2089
|
|
},
|
|
{
|
|
"epoch": 4.009637448370812,
|
|
"grad_norm": 1.648670888994138,
|
|
"learning_rate": 4.6066455666018664e-06,
|
|
"loss": 0.5545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19499695301055908,
|
|
"step": 2915,
|
|
"valid_targets_mean": 7651.2,
|
|
"valid_targets_min": 2581
|
|
},
|
|
{
|
|
"epoch": 4.01652134006425,
|
|
"grad_norm": 0.9399053935552177,
|
|
"learning_rate": 4.5455046437812515e-06,
|
|
"loss": 0.4776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15764357149600983,
|
|
"step": 2920,
|
|
"valid_targets_mean": 7917.3,
|
|
"valid_targets_min": 2931
|
|
},
|
|
{
|
|
"epoch": 4.023405231757687,
|
|
"grad_norm": 0.6663207234555762,
|
|
"learning_rate": 4.4847201168280985e-06,
|
|
"loss": 0.4246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14368441700935364,
|
|
"step": 2925,
|
|
"valid_targets_mean": 7465.7,
|
|
"valid_targets_min": 1655
|
|
},
|
|
{
|
|
"epoch": 4.030289123451125,
|
|
"grad_norm": 0.5122623048638776,
|
|
"learning_rate": 4.42429338749339e-06,
|
|
"loss": 0.3812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11772125959396362,
|
|
"step": 2930,
|
|
"valid_targets_mean": 6986.5,
|
|
"valid_targets_min": 2745
|
|
},
|
|
{
|
|
"epoch": 4.037173015144562,
|
|
"grad_norm": 0.33779981131960823,
|
|
"learning_rate": 4.3642258492769685e-06,
|
|
"loss": 0.3676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10366019606590271,
|
|
"step": 2935,
|
|
"valid_targets_mean": 6226.8,
|
|
"valid_targets_min": 2078
|
|
},
|
|
{
|
|
"epoch": 4.044056906837999,
|
|
"grad_norm": 0.2703695467372586,
|
|
"learning_rate": 4.3045188873953506e-06,
|
|
"loss": 0.3527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10385061055421829,
|
|
"step": 2940,
|
|
"valid_targets_mean": 6504.0,
|
|
"valid_targets_min": 3466
|
|
},
|
|
{
|
|
"epoch": 4.050940798531436,
|
|
"grad_norm": 0.2448441853051352,
|
|
"learning_rate": 4.24517387874982e-06,
|
|
"loss": 0.3421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11456277221441269,
|
|
"step": 2945,
|
|
"valid_targets_mean": 7776.8,
|
|
"valid_targets_min": 2749
|
|
},
|
|
{
|
|
"epoch": 4.057824690224874,
|
|
"grad_norm": 0.21404234435367878,
|
|
"learning_rate": 4.186192191894656e-06,
|
|
"loss": 0.3363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09924231469631195,
|
|
"step": 2950,
|
|
"valid_targets_mean": 6354.8,
|
|
"valid_targets_min": 1941
|
|
},
|
|
{
|
|
"epoch": 4.064708581918311,
|
|
"grad_norm": 0.19782479723452737,
|
|
"learning_rate": 4.127575187005597e-06,
|
|
"loss": 0.3382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09340573847293854,
|
|
"step": 2955,
|
|
"valid_targets_mean": 6384.4,
|
|
"valid_targets_min": 2441
|
|
},
|
|
{
|
|
"epoch": 4.071592473611749,
|
|
"grad_norm": 0.19555802078655035,
|
|
"learning_rate": 4.069324215848438e-06,
|
|
"loss": 0.3356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11382173001766205,
|
|
"step": 2960,
|
|
"valid_targets_mean": 7465.9,
|
|
"valid_targets_min": 2228
|
|
},
|
|
{
|
|
"epoch": 4.078476365305185,
|
|
"grad_norm": 0.19204541810372228,
|
|
"learning_rate": 4.011440621747879e-06,
|
|
"loss": 0.304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09361422061920166,
|
|
"step": 2965,
|
|
"valid_targets_mean": 8703.7,
|
|
"valid_targets_min": 4392
|
|
},
|
|
{
|
|
"epoch": 4.085360256998623,
|
|
"grad_norm": 0.17690580645771956,
|
|
"learning_rate": 3.953925739556556e-06,
|
|
"loss": 0.2969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09472465515136719,
|
|
"step": 2970,
|
|
"valid_targets_mean": 9525.9,
|
|
"valid_targets_min": 3807
|
|
},
|
|
{
|
|
"epoch": 4.09224414869206,
|
|
"grad_norm": 0.17538796102613904,
|
|
"learning_rate": 3.896780895624235e-06,
|
|
"loss": 0.2967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09817524254322052,
|
|
"step": 2975,
|
|
"valid_targets_mean": 8976.7,
|
|
"valid_targets_min": 2713
|
|
},
|
|
{
|
|
"epoch": 4.099128040385498,
|
|
"grad_norm": 0.16782987576999478,
|
|
"learning_rate": 3.840007407767235e-06,
|
|
"loss": 0.2938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.100134938955307,
|
|
"step": 2980,
|
|
"valid_targets_mean": 9808.0,
|
|
"valid_targets_min": 5333
|
|
},
|
|
{
|
|
"epoch": 4.1060119320789354,
|
|
"grad_norm": 0.16950335218205084,
|
|
"learning_rate": 3.7836065852380355e-06,
|
|
"loss": 0.2931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09137974679470062,
|
|
"step": 2985,
|
|
"valid_targets_mean": 8583.8,
|
|
"valid_targets_min": 2662
|
|
},
|
|
{
|
|
"epoch": 4.112895823772373,
|
|
"grad_norm": 0.1699684602922145,
|
|
"learning_rate": 3.727579728695103e-06,
|
|
"loss": 0.2926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09598828107118607,
|
|
"step": 2990,
|
|
"valid_targets_mean": 8976.7,
|
|
"valid_targets_min": 899
|
|
},
|
|
{
|
|
"epoch": 4.1197797154658105,
|
|
"grad_norm": 0.16269551372279134,
|
|
"learning_rate": 3.671928130172864e-06,
|
|
"loss": 0.3,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0977531224489212,
|
|
"step": 2995,
|
|
"valid_targets_mean": 8862.6,
|
|
"valid_targets_min": 363
|
|
},
|
|
{
|
|
"epoch": 4.126663607159247,
|
|
"grad_norm": 0.16710826243141283,
|
|
"learning_rate": 3.6166530730519413e-06,
|
|
"loss": 0.291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09829722344875336,
|
|
"step": 3000,
|
|
"valid_targets_mean": 9068.4,
|
|
"valid_targets_min": 532
|
|
},
|
|
{
|
|
"epoch": 4.133547498852685,
|
|
"grad_norm": 0.15910196663534398,
|
|
"learning_rate": 3.561755832029519e-06,
|
|
"loss": 0.2949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10500204563140869,
|
|
"step": 3005,
|
|
"valid_targets_mean": 10089.5,
|
|
"valid_targets_min": 5419
|
|
},
|
|
{
|
|
"epoch": 4.140431390546122,
|
|
"grad_norm": 0.17704030770547838,
|
|
"learning_rate": 3.5072376730899937e-06,
|
|
"loss": 0.2919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09301409870386124,
|
|
"step": 3010,
|
|
"valid_targets_mean": 9003.8,
|
|
"valid_targets_min": 387
|
|
},
|
|
{
|
|
"epoch": 4.14731528223956,
|
|
"grad_norm": 0.1710309935861551,
|
|
"learning_rate": 3.453099853475739e-06,
|
|
"loss": 0.2932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10229654610157013,
|
|
"step": 3015,
|
|
"valid_targets_mean": 10012.9,
|
|
"valid_targets_min": 5197
|
|
},
|
|
{
|
|
"epoch": 4.154199173932997,
|
|
"grad_norm": 0.16458596144589727,
|
|
"learning_rate": 3.3993436216581556e-06,
|
|
"loss": 0.2931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09989041090011597,
|
|
"step": 3020,
|
|
"valid_targets_mean": 9826.7,
|
|
"valid_targets_min": 464
|
|
},
|
|
{
|
|
"epoch": 4.161083065626434,
|
|
"grad_norm": 0.1621123955498612,
|
|
"learning_rate": 3.3459702173088114e-06,
|
|
"loss": 0.2901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08722086995840073,
|
|
"step": 3025,
|
|
"valid_targets_mean": 8842.1,
|
|
"valid_targets_min": 4239
|
|
},
|
|
{
|
|
"epoch": 4.167966957319871,
|
|
"grad_norm": 0.1623556063331001,
|
|
"learning_rate": 3.2929808712709364e-06,
|
|
"loss": 0.2955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08920854330062866,
|
|
"step": 3030,
|
|
"valid_targets_mean": 9048.3,
|
|
"valid_targets_min": 5085
|
|
},
|
|
{
|
|
"epoch": 4.174850849013309,
|
|
"grad_norm": 0.15731886013609803,
|
|
"learning_rate": 3.2403768055309782e-06,
|
|
"loss": 0.2985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09582354873418808,
|
|
"step": 3035,
|
|
"valid_targets_mean": 9484.0,
|
|
"valid_targets_min": 4512
|
|
},
|
|
{
|
|
"epoch": 4.181734740706746,
|
|
"grad_norm": 0.15101610768604282,
|
|
"learning_rate": 3.1881592331904486e-06,
|
|
"loss": 0.2872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09862025827169418,
|
|
"step": 3040,
|
|
"valid_targets_mean": 9844.3,
|
|
"valid_targets_min": 3863
|
|
},
|
|
{
|
|
"epoch": 4.188618632400184,
|
|
"grad_norm": 0.15623253593056888,
|
|
"learning_rate": 3.136329358437937e-06,
|
|
"loss": 0.287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08751824498176575,
|
|
"step": 3045,
|
|
"valid_targets_mean": 9963.1,
|
|
"valid_targets_min": 5225
|
|
},
|
|
{
|
|
"epoch": 4.195502524093621,
|
|
"grad_norm": 0.16134010073160548,
|
|
"learning_rate": 3.084888376521349e-06,
|
|
"loss": 0.2897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08784323185682297,
|
|
"step": 3050,
|
|
"valid_targets_mean": 9133.7,
|
|
"valid_targets_min": 2782
|
|
},
|
|
{
|
|
"epoch": 4.202386415787059,
|
|
"grad_norm": 0.16838974268135262,
|
|
"learning_rate": 3.033837473720329e-06,
|
|
"loss": 0.2935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10435424745082855,
|
|
"step": 3055,
|
|
"valid_targets_mean": 9841.2,
|
|
"valid_targets_min": 885
|
|
},
|
|
{
|
|
"epoch": 4.209270307480495,
|
|
"grad_norm": 0.16279332645708683,
|
|
"learning_rate": 2.9831778273189306e-06,
|
|
"loss": 0.2843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08671228587627411,
|
|
"step": 3060,
|
|
"valid_targets_mean": 9260.1,
|
|
"valid_targets_min": 378
|
|
},
|
|
{
|
|
"epoch": 4.216154199173933,
|
|
"grad_norm": 0.1825984338791435,
|
|
"learning_rate": 2.9329106055784364e-06,
|
|
"loss": 0.2929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09681597352027893,
|
|
"step": 3065,
|
|
"valid_targets_mean": 9212.2,
|
|
"valid_targets_min": 3467
|
|
},
|
|
{
|
|
"epoch": 4.22303809086737,
|
|
"grad_norm": 0.15677839672709085,
|
|
"learning_rate": 2.883036967710431e-06,
|
|
"loss": 0.2871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09850604832172394,
|
|
"step": 3070,
|
|
"valid_targets_mean": 10565.9,
|
|
"valid_targets_min": 723
|
|
},
|
|
{
|
|
"epoch": 4.229921982560808,
|
|
"grad_norm": 0.21256236658118818,
|
|
"learning_rate": 2.8335580638500703e-06,
|
|
"loss": 0.2907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09888806939125061,
|
|
"step": 3075,
|
|
"valid_targets_mean": 10235.8,
|
|
"valid_targets_min": 5141
|
|
},
|
|
{
|
|
"epoch": 4.236805874254245,
|
|
"grad_norm": 0.1652684573375055,
|
|
"learning_rate": 2.7844750350295635e-06,
|
|
"loss": 0.2839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10100476443767548,
|
|
"step": 3080,
|
|
"valid_targets_mean": 10594.5,
|
|
"valid_targets_min": 4701
|
|
},
|
|
{
|
|
"epoch": 4.243689765947682,
|
|
"grad_norm": 0.1713255908842138,
|
|
"learning_rate": 2.7357890131518395e-06,
|
|
"loss": 0.2828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08998183906078339,
|
|
"step": 3085,
|
|
"valid_targets_mean": 9161.7,
|
|
"valid_targets_min": 3022
|
|
},
|
|
{
|
|
"epoch": 4.2505736576411195,
|
|
"grad_norm": 0.16553307695173833,
|
|
"learning_rate": 2.6875011209644617e-06,
|
|
"loss": 0.2846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09085310250520706,
|
|
"step": 3090,
|
|
"valid_targets_mean": 9593.7,
|
|
"valid_targets_min": 5120
|
|
},
|
|
{
|
|
"epoch": 4.257457549334557,
|
|
"grad_norm": 0.17160681639906475,
|
|
"learning_rate": 2.639612472033726e-06,
|
|
"loss": 0.2819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08973085880279541,
|
|
"step": 3095,
|
|
"valid_targets_mean": 9896.2,
|
|
"valid_targets_min": 3759
|
|
},
|
|
{
|
|
"epoch": 4.2643414410279945,
|
|
"grad_norm": 0.16123604490750948,
|
|
"learning_rate": 2.592124170718999e-06,
|
|
"loss": 0.2871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09492038190364838,
|
|
"step": 3100,
|
|
"valid_targets_mean": 9676.5,
|
|
"valid_targets_min": 4171
|
|
},
|
|
{
|
|
"epoch": 4.271225332721432,
|
|
"grad_norm": 0.20844821277823775,
|
|
"learning_rate": 2.545037312147223e-06,
|
|
"loss": 0.2925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09212687611579895,
|
|
"step": 3105,
|
|
"valid_targets_mean": 6269.3,
|
|
"valid_targets_min": 470
|
|
},
|
|
{
|
|
"epoch": 4.2781092244148695,
|
|
"grad_norm": 0.2188431410916334,
|
|
"learning_rate": 2.4983529821876885e-06,
|
|
"loss": 0.2915,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09872142970561981,
|
|
"step": 3110,
|
|
"valid_targets_mean": 6197.9,
|
|
"valid_targets_min": 350
|
|
},
|
|
{
|
|
"epoch": 4.284993116108307,
|
|
"grad_norm": 1.719226340993559,
|
|
"learning_rate": 2.4520722574269697e-06,
|
|
"loss": 0.6151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27690717577934265,
|
|
"step": 3115,
|
|
"valid_targets_mean": 5028.6,
|
|
"valid_targets_min": 935
|
|
},
|
|
{
|
|
"epoch": 4.291877007801744,
|
|
"grad_norm": 1.4178092966572517,
|
|
"learning_rate": 2.4061962051441113e-06,
|
|
"loss": 0.6474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19200366735458374,
|
|
"step": 3120,
|
|
"valid_targets_mean": 3933.0,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 4.298760899495181,
|
|
"grad_norm": 0.987071906372295,
|
|
"learning_rate": 2.36072588328601e-06,
|
|
"loss": 0.6191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17249909043312073,
|
|
"step": 3125,
|
|
"valid_targets_mean": 3912.4,
|
|
"valid_targets_min": 1137
|
|
},
|
|
{
|
|
"epoch": 4.305644791188619,
|
|
"grad_norm": 0.6333455736156541,
|
|
"learning_rate": 2.31566234044303e-06,
|
|
"loss": 0.5633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1948523372411728,
|
|
"step": 3130,
|
|
"valid_targets_mean": 3427.5,
|
|
"valid_targets_min": 570
|
|
},
|
|
{
|
|
"epoch": 4.312528682882056,
|
|
"grad_norm": 0.41971667810569413,
|
|
"learning_rate": 2.2710066158248025e-06,
|
|
"loss": 0.5424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1517712026834488,
|
|
"step": 3135,
|
|
"valid_targets_mean": 4285.9,
|
|
"valid_targets_min": 1385
|
|
},
|
|
{
|
|
"epoch": 4.319412574575494,
|
|
"grad_norm": 0.3805804910200558,
|
|
"learning_rate": 2.226759739236262e-06,
|
|
"loss": 0.517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19778530299663544,
|
|
"step": 3140,
|
|
"valid_targets_mean": 4571.2,
|
|
"valid_targets_min": 1195
|
|
},
|
|
{
|
|
"epoch": 4.32629646626893,
|
|
"grad_norm": 0.3279172632768147,
|
|
"learning_rate": 2.1829227310539245e-06,
|
|
"loss": 0.5062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1429353952407837,
|
|
"step": 3145,
|
|
"valid_targets_mean": 3360.0,
|
|
"valid_targets_min": 791
|
|
},
|
|
{
|
|
"epoch": 4.333180357962368,
|
|
"grad_norm": 0.30812203887995343,
|
|
"learning_rate": 2.1394966022023246e-06,
|
|
"loss": 0.506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15199241042137146,
|
|
"step": 3150,
|
|
"valid_targets_mean": 3748.8,
|
|
"valid_targets_min": 1312
|
|
},
|
|
{
|
|
"epoch": 4.340064249655805,
|
|
"grad_norm": 0.2681054596093381,
|
|
"learning_rate": 2.0964823541307157e-06,
|
|
"loss": 0.4878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21830233931541443,
|
|
"step": 3155,
|
|
"valid_targets_mean": 4315.9,
|
|
"valid_targets_min": 858
|
|
},
|
|
{
|
|
"epoch": 4.346948141349243,
|
|
"grad_norm": 0.23204534962247023,
|
|
"learning_rate": 2.0538809787899728e-06,
|
|
"loss": 0.4748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1714772880077362,
|
|
"step": 3160,
|
|
"valid_targets_mean": 5165.7,
|
|
"valid_targets_min": 1332
|
|
},
|
|
{
|
|
"epoch": 4.35383203304268,
|
|
"grad_norm": 0.25226657135645386,
|
|
"learning_rate": 2.0116934586097294e-06,
|
|
"loss": 0.4778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14213097095489502,
|
|
"step": 3165,
|
|
"valid_targets_mean": 4124.3,
|
|
"valid_targets_min": 1668
|
|
},
|
|
{
|
|
"epoch": 4.360715924736118,
|
|
"grad_norm": 0.2715861692969419,
|
|
"learning_rate": 1.9699207664757057e-06,
|
|
"loss": 0.4836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16056758165359497,
|
|
"step": 3170,
|
|
"valid_targets_mean": 4181.1,
|
|
"valid_targets_min": 897
|
|
},
|
|
{
|
|
"epoch": 4.367599816429555,
|
|
"grad_norm": 0.2602540839903562,
|
|
"learning_rate": 1.928563865707278e-06,
|
|
"loss": 0.458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.137937992811203,
|
|
"step": 3175,
|
|
"valid_targets_mean": 3683.8,
|
|
"valid_targets_min": 1300
|
|
},
|
|
{
|
|
"epoch": 4.374483708122992,
|
|
"grad_norm": 0.2559875333171663,
|
|
"learning_rate": 1.88762371003526e-06,
|
|
"loss": 0.4951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24103213846683502,
|
|
"step": 3180,
|
|
"valid_targets_mean": 4964.7,
|
|
"valid_targets_min": 906
|
|
},
|
|
{
|
|
"epoch": 4.381367599816429,
|
|
"grad_norm": 0.2349098552726344,
|
|
"learning_rate": 1.8471012435799317e-06,
|
|
"loss": 0.4678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16132110357284546,
|
|
"step": 3185,
|
|
"valid_targets_mean": 4663.9,
|
|
"valid_targets_min": 1014
|
|
},
|
|
{
|
|
"epoch": 4.388251491509867,
|
|
"grad_norm": 0.2367676970278582,
|
|
"learning_rate": 1.8069974008292246e-06,
|
|
"loss": 0.5026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1740289032459259,
|
|
"step": 3190,
|
|
"valid_targets_mean": 5404.8,
|
|
"valid_targets_min": 1229
|
|
},
|
|
{
|
|
"epoch": 4.395135383203304,
|
|
"grad_norm": 0.2414666913515072,
|
|
"learning_rate": 1.7673131066172233e-06,
|
|
"loss": 0.4813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14912189543247223,
|
|
"step": 3195,
|
|
"valid_targets_mean": 4281.5,
|
|
"valid_targets_min": 1057
|
|
},
|
|
{
|
|
"epoch": 4.402019274896742,
|
|
"grad_norm": 0.2522485667461358,
|
|
"learning_rate": 1.7280492761027767e-06,
|
|
"loss": 0.4806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18337562680244446,
|
|
"step": 3200,
|
|
"valid_targets_mean": 4570.4,
|
|
"valid_targets_min": 1376
|
|
},
|
|
{
|
|
"epoch": 4.408903166590179,
|
|
"grad_norm": 0.2513303316228368,
|
|
"learning_rate": 1.6892068147484563e-06,
|
|
"loss": 0.4986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18488086760044098,
|
|
"step": 3205,
|
|
"valid_targets_mean": 4505.6,
|
|
"valid_targets_min": 1222
|
|
},
|
|
{
|
|
"epoch": 4.415787058283616,
|
|
"grad_norm": 0.22856795174321398,
|
|
"learning_rate": 1.650786618299629e-06,
|
|
"loss": 0.4708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13014987111091614,
|
|
"step": 3210,
|
|
"valid_targets_mean": 4379.0,
|
|
"valid_targets_min": 558
|
|
},
|
|
{
|
|
"epoch": 4.422670949977054,
|
|
"grad_norm": 0.4103495629302222,
|
|
"learning_rate": 1.6127895727638354e-06,
|
|
"loss": 0.1883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05053357779979706,
|
|
"step": 3215,
|
|
"valid_targets_mean": 5018.2,
|
|
"valid_targets_min": 2523
|
|
},
|
|
{
|
|
"epoch": 4.429554841670491,
|
|
"grad_norm": 0.26594803976153025,
|
|
"learning_rate": 1.5752165543903108e-06,
|
|
"loss": 0.1447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04512697458267212,
|
|
"step": 3220,
|
|
"valid_targets_mean": 5262.3,
|
|
"valid_targets_min": 2967
|
|
},
|
|
{
|
|
"epoch": 4.436438733363929,
|
|
"grad_norm": 0.2128042462301709,
|
|
"learning_rate": 1.5380684296498372e-06,
|
|
"loss": 0.1348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03946739807724953,
|
|
"step": 3225,
|
|
"valid_targets_mean": 5112.8,
|
|
"valid_targets_min": 3095
|
|
},
|
|
{
|
|
"epoch": 4.443322625057366,
|
|
"grad_norm": 0.19728120260756227,
|
|
"learning_rate": 1.5013460552147119e-06,
|
|
"loss": 0.1295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03898797184228897,
|
|
"step": 3230,
|
|
"valid_targets_mean": 5352.1,
|
|
"valid_targets_min": 2034
|
|
},
|
|
{
|
|
"epoch": 4.450206516750804,
|
|
"grad_norm": 0.18576983317859747,
|
|
"learning_rate": 1.4650502779390285e-06,
|
|
"loss": 0.1247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04268582910299301,
|
|
"step": 3235,
|
|
"valid_targets_mean": 5610.1,
|
|
"valid_targets_min": 3069
|
|
},
|
|
{
|
|
"epoch": 4.45709040844424,
|
|
"grad_norm": 0.1864794932574978,
|
|
"learning_rate": 1.4291819348391034e-06,
|
|
"loss": 0.1256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.040051594376564026,
|
|
"step": 3240,
|
|
"valid_targets_mean": 5206.3,
|
|
"valid_targets_min": 3294
|
|
},
|
|
{
|
|
"epoch": 4.463974300137678,
|
|
"grad_norm": 0.17482555966140143,
|
|
"learning_rate": 1.3937418530742287e-06,
|
|
"loss": 0.1235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.042181774973869324,
|
|
"step": 3245,
|
|
"valid_targets_mean": 5573.9,
|
|
"valid_targets_min": 1997
|
|
},
|
|
{
|
|
"epoch": 4.470858191831115,
|
|
"grad_norm": 0.18070571117324402,
|
|
"learning_rate": 1.3587308499275475e-06,
|
|
"loss": 0.1269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.043698303401470184,
|
|
"step": 3250,
|
|
"valid_targets_mean": 5469.3,
|
|
"valid_targets_min": 3506
|
|
},
|
|
{
|
|
"epoch": 4.477742083524553,
|
|
"grad_norm": 0.1643633780132126,
|
|
"learning_rate": 1.3241497327872476e-06,
|
|
"loss": 0.1206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.040100231766700745,
|
|
"step": 3255,
|
|
"valid_targets_mean": 5436.2,
|
|
"valid_targets_min": 2886
|
|
},
|
|
{
|
|
"epoch": 4.48462597521799,
|
|
"grad_norm": 0.17207536132396473,
|
|
"learning_rate": 1.2899992991279086e-06,
|
|
"loss": 0.1169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.044177521020174026,
|
|
"step": 3260,
|
|
"valid_targets_mean": 5540.5,
|
|
"valid_targets_min": 3229
|
|
},
|
|
{
|
|
"epoch": 4.491509866911427,
|
|
"grad_norm": 0.16766746009169853,
|
|
"learning_rate": 1.2562803364921261e-06,
|
|
"loss": 0.1193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.041511788964271545,
|
|
"step": 3265,
|
|
"valid_targets_mean": 5831.1,
|
|
"valid_targets_min": 2745
|
|
},
|
|
{
|
|
"epoch": 4.498393758604864,
|
|
"grad_norm": 0.17030163346260857,
|
|
"learning_rate": 1.2229936224723483e-06,
|
|
"loss": 0.1191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03760635852813721,
|
|
"step": 3270,
|
|
"valid_targets_mean": 5347.5,
|
|
"valid_targets_min": 2417
|
|
},
|
|
{
|
|
"epoch": 4.505277650298302,
|
|
"grad_norm": 0.17085424627308554,
|
|
"learning_rate": 1.190139924692959e-06,
|
|
"loss": 0.1185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03664751350879669,
|
|
"step": 3275,
|
|
"valid_targets_mean": 5258.5,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 4.512161541991739,
|
|
"grad_norm": 0.17522494699655267,
|
|
"learning_rate": 1.157720000792546e-06,
|
|
"loss": 0.1141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.037477195262908936,
|
|
"step": 3280,
|
|
"valid_targets_mean": 5099.8,
|
|
"valid_targets_min": 1583
|
|
},
|
|
{
|
|
"epoch": 4.519045433685177,
|
|
"grad_norm": 0.17379587981619676,
|
|
"learning_rate": 1.125734598406447e-06,
|
|
"loss": 0.1148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04031280428171158,
|
|
"step": 3285,
|
|
"valid_targets_mean": 5578.4,
|
|
"valid_targets_min": 2798
|
|
},
|
|
{
|
|
"epoch": 4.525929325378614,
|
|
"grad_norm": 0.1695852085710305,
|
|
"learning_rate": 1.0941844551495162e-06,
|
|
"loss": 0.1159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03369366377592087,
|
|
"step": 3290,
|
|
"valid_targets_mean": 5036.6,
|
|
"valid_targets_min": 2310
|
|
},
|
|
{
|
|
"epoch": 4.532813217072052,
|
|
"grad_norm": 0.1786118550797064,
|
|
"learning_rate": 1.0630702985990914e-06,
|
|
"loss": 0.1169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03811822086572647,
|
|
"step": 3295,
|
|
"valid_targets_mean": 5270.8,
|
|
"valid_targets_min": 2978
|
|
},
|
|
{
|
|
"epoch": 4.5396971087654885,
|
|
"grad_norm": 0.16549826571880374,
|
|
"learning_rate": 1.0323928462782363e-06,
|
|
"loss": 0.116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03811001777648926,
|
|
"step": 3300,
|
|
"valid_targets_mean": 5462.3,
|
|
"valid_targets_min": 2571
|
|
},
|
|
{
|
|
"epoch": 4.546581000458926,
|
|
"grad_norm": 0.16459854028246523,
|
|
"learning_rate": 1.0021528056391782e-06,
|
|
"loss": 0.117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.034841179847717285,
|
|
"step": 3305,
|
|
"valid_targets_mean": 5422.2,
|
|
"valid_targets_min": 2689
|
|
},
|
|
{
|
|
"epoch": 4.5534648921523635,
|
|
"grad_norm": 0.17263786107418352,
|
|
"learning_rate": 9.723508740470123e-07,
|
|
"loss": 0.1115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.037016794085502625,
|
|
"step": 3310,
|
|
"valid_targets_mean": 5195.3,
|
|
"valid_targets_min": 3110
|
|
},
|
|
{
|
|
"epoch": 4.560348783845801,
|
|
"grad_norm": 0.6697689536803294,
|
|
"learning_rate": 9.429877387635944e-07,
|
|
"loss": 0.1947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16726571321487427,
|
|
"step": 3315,
|
|
"valid_targets_mean": 3959.7,
|
|
"valid_targets_min": 1023
|
|
},
|
|
{
|
|
"epoch": 4.5672326755392385,
|
|
"grad_norm": 0.5264114313853077,
|
|
"learning_rate": 9.140640769317177e-07,
|
|
"loss": 0.4262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12650127708911896,
|
|
"step": 3320,
|
|
"valid_targets_mean": 3057.9,
|
|
"valid_targets_min": 1066
|
|
},
|
|
{
|
|
"epoch": 4.574116567232675,
|
|
"grad_norm": 0.392961284141338,
|
|
"learning_rate": 8.855805555594777e-07,
|
|
"loss": 0.4177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11679504811763763,
|
|
"step": 3325,
|
|
"valid_targets_mean": 4226.2,
|
|
"valid_targets_min": 902
|
|
},
|
|
{
|
|
"epoch": 4.581000458926113,
|
|
"grad_norm": 0.3220633125591813,
|
|
"learning_rate": 8.575378315048999e-07,
|
|
"loss": 0.4132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1164650171995163,
|
|
"step": 3330,
|
|
"valid_targets_mean": 3537.5,
|
|
"valid_targets_min": 1090
|
|
},
|
|
{
|
|
"epoch": 4.58788435061955,
|
|
"grad_norm": 0.2836347059693572,
|
|
"learning_rate": 8.299365514607883e-07,
|
|
"loss": 0.4073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11003822088241577,
|
|
"step": 3335,
|
|
"valid_targets_mean": 3058.2,
|
|
"valid_targets_min": 771
|
|
},
|
|
{
|
|
"epoch": 4.594768242312988,
|
|
"grad_norm": 0.3923630777914454,
|
|
"learning_rate": 8.027773519398207e-07,
|
|
"loss": 0.3835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12148011475801468,
|
|
"step": 3340,
|
|
"valid_targets_mean": 1120.0,
|
|
"valid_targets_min": 521
|
|
},
|
|
{
|
|
"epoch": 4.601652134006425,
|
|
"grad_norm": 0.23198120955974,
|
|
"learning_rate": 7.760608592598528e-07,
|
|
"loss": 0.3943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13927549123764038,
|
|
"step": 3345,
|
|
"valid_targets_mean": 4731.8,
|
|
"valid_targets_min": 1389
|
|
},
|
|
{
|
|
"epoch": 4.608536025699863,
|
|
"grad_norm": 0.2175553487141594,
|
|
"learning_rate": 7.497876895294931e-07,
|
|
"loss": 0.3707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1311253309249878,
|
|
"step": 3350,
|
|
"valid_targets_mean": 4816.6,
|
|
"valid_targets_min": 1341
|
|
},
|
|
{
|
|
"epoch": 4.6154199173933,
|
|
"grad_norm": 0.2523032181271898,
|
|
"learning_rate": 7.239584486338814e-07,
|
|
"loss": 0.392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12201976776123047,
|
|
"step": 3355,
|
|
"valid_targets_mean": 3374.7,
|
|
"valid_targets_min": 1239
|
|
},
|
|
{
|
|
"epoch": 4.622303809086737,
|
|
"grad_norm": 0.26995208568046913,
|
|
"learning_rate": 6.985737322207287e-07,
|
|
"loss": 0.3859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13418743014335632,
|
|
"step": 3360,
|
|
"valid_targets_mean": 3214.4,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 4.629187700780174,
|
|
"grad_norm": 0.23355543900054457,
|
|
"learning_rate": 6.736341256865642e-07,
|
|
"loss": 0.3797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14379887282848358,
|
|
"step": 3365,
|
|
"valid_targets_mean": 5321.0,
|
|
"valid_targets_min": 1407
|
|
},
|
|
{
|
|
"epoch": 4.636071592473612,
|
|
"grad_norm": 0.2535456585399873,
|
|
"learning_rate": 6.491402041632544e-07,
|
|
"loss": 0.3888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11921147257089615,
|
|
"step": 3370,
|
|
"valid_targets_mean": 3536.7,
|
|
"valid_targets_min": 915
|
|
},
|
|
{
|
|
"epoch": 4.642955484167049,
|
|
"grad_norm": 0.2559436028551804,
|
|
"learning_rate": 6.250925325047252e-07,
|
|
"loss": 0.3744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09919558465480804,
|
|
"step": 3375,
|
|
"valid_targets_mean": 2772.1,
|
|
"valid_targets_min": 1106
|
|
},
|
|
{
|
|
"epoch": 4.649839375860487,
|
|
"grad_norm": 0.42012443586849063,
|
|
"learning_rate": 6.014916652739455e-07,
|
|
"loss": 0.3916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22293120622634888,
|
|
"step": 3380,
|
|
"valid_targets_mean": 3487.8,
|
|
"valid_targets_min": 523
|
|
},
|
|
{
|
|
"epoch": 4.656723267553923,
|
|
"grad_norm": 0.2264938566466451,
|
|
"learning_rate": 5.783381467301352e-07,
|
|
"loss": 0.3922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11772520840167999,
|
|
"step": 3385,
|
|
"valid_targets_mean": 4307.9,
|
|
"valid_targets_min": 1207
|
|
},
|
|
{
|
|
"epoch": 4.663607159247361,
|
|
"grad_norm": 0.23133653644845747,
|
|
"learning_rate": 5.556325108162064e-07,
|
|
"loss": 0.3825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14393877983093262,
|
|
"step": 3390,
|
|
"valid_targets_mean": 4449.8,
|
|
"valid_targets_min": 1036
|
|
},
|
|
{
|
|
"epoch": 4.670491050940798,
|
|
"grad_norm": 0.24444169804629398,
|
|
"learning_rate": 5.333752811464599e-07,
|
|
"loss": 0.3898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10904049873352051,
|
|
"step": 3395,
|
|
"valid_targets_mean": 3510.9,
|
|
"valid_targets_min": 1014
|
|
},
|
|
{
|
|
"epoch": 4.677374942634236,
|
|
"grad_norm": 0.2482052812902979,
|
|
"learning_rate": 5.115669709945148e-07,
|
|
"loss": 0.3865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13949604332447052,
|
|
"step": 3400,
|
|
"valid_targets_mean": 4310.5,
|
|
"valid_targets_min": 1193
|
|
},
|
|
{
|
|
"epoch": 4.684258834327673,
|
|
"grad_norm": 0.22143585652156172,
|
|
"learning_rate": 4.90208083281456e-07,
|
|
"loss": 0.3763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11191027611494064,
|
|
"step": 3405,
|
|
"valid_targets_mean": 4566.1,
|
|
"valid_targets_min": 1394
|
|
},
|
|
{
|
|
"epoch": 4.691142726021111,
|
|
"grad_norm": 0.28442610184717354,
|
|
"learning_rate": 4.692991105642519e-07,
|
|
"loss": 0.3825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.159225732088089,
|
|
"step": 3410,
|
|
"valid_targets_mean": 3325.2,
|
|
"valid_targets_min": 883
|
|
},
|
|
{
|
|
"epoch": 4.698026617714548,
|
|
"grad_norm": 0.24884829886272836,
|
|
"learning_rate": 4.488405350243752e-07,
|
|
"loss": 0.3955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12980762124061584,
|
|
"step": 3415,
|
|
"valid_targets_mean": 3990.3,
|
|
"valid_targets_min": 1374
|
|
},
|
|
{
|
|
"epoch": 4.704910509407985,
|
|
"grad_norm": 0.3883368549041478,
|
|
"learning_rate": 4.288328284567156e-07,
|
|
"loss": 0.359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11890369653701782,
|
|
"step": 3420,
|
|
"valid_targets_mean": 5897.4,
|
|
"valid_targets_min": 347
|
|
},
|
|
{
|
|
"epoch": 4.711794401101423,
|
|
"grad_norm": 0.37340217595337594,
|
|
"learning_rate": 4.092764522586601e-07,
|
|
"loss": 0.2684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07990404963493347,
|
|
"step": 3425,
|
|
"valid_targets_mean": 6455.6,
|
|
"valid_targets_min": 1359
|
|
},
|
|
{
|
|
"epoch": 4.71867829279486,
|
|
"grad_norm": 0.3565500859984899,
|
|
"learning_rate": 3.901718574194968e-07,
|
|
"loss": 0.2404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08166106045246124,
|
|
"step": 3430,
|
|
"valid_targets_mean": 6676.2,
|
|
"valid_targets_min": 1143
|
|
},
|
|
{
|
|
"epoch": 4.725562184488298,
|
|
"grad_norm": 0.313004986327095,
|
|
"learning_rate": 3.715194845099657e-07,
|
|
"loss": 0.2514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1039724126458168,
|
|
"step": 3435,
|
|
"valid_targets_mean": 6722.4,
|
|
"valid_targets_min": 505
|
|
},
|
|
{
|
|
"epoch": 4.732446076181735,
|
|
"grad_norm": 0.2809167735422918,
|
|
"learning_rate": 3.533197636721442e-07,
|
|
"loss": 0.248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08939207345247269,
|
|
"step": 3440,
|
|
"valid_targets_mean": 7329.8,
|
|
"valid_targets_min": 1300
|
|
},
|
|
{
|
|
"epoch": 4.739329967875172,
|
|
"grad_norm": 0.2368355625717822,
|
|
"learning_rate": 3.3557311460949537e-07,
|
|
"loss": 0.2602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06595554202795029,
|
|
"step": 3445,
|
|
"valid_targets_mean": 6100.7,
|
|
"valid_targets_min": 1328
|
|
},
|
|
{
|
|
"epoch": 4.746213859568609,
|
|
"grad_norm": 0.24153571397417448,
|
|
"learning_rate": 3.182799465772135e-07,
|
|
"loss": 0.2339,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05422850325703621,
|
|
"step": 3450,
|
|
"valid_targets_mean": 5562.0,
|
|
"valid_targets_min": 246
|
|
},
|
|
{
|
|
"epoch": 4.753097751262047,
|
|
"grad_norm": 0.2271226101292465,
|
|
"learning_rate": 3.014406583727603e-07,
|
|
"loss": 0.2453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05296333134174347,
|
|
"step": 3455,
|
|
"valid_targets_mean": 1929.8,
|
|
"valid_targets_min": 503
|
|
},
|
|
{
|
|
"epoch": 4.759981642955484,
|
|
"grad_norm": 0.21840229847924544,
|
|
"learning_rate": 2.8505563832669e-07,
|
|
"loss": 0.2358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0530148483812809,
|
|
"step": 3460,
|
|
"valid_targets_mean": 5253.4,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 4.766865534648922,
|
|
"grad_norm": 0.21527749452897255,
|
|
"learning_rate": 2.691252642936859e-07,
|
|
"loss": 0.2287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06063324958086014,
|
|
"step": 3465,
|
|
"valid_targets_mean": 5247.3,
|
|
"valid_targets_min": 257
|
|
},
|
|
{
|
|
"epoch": 4.773749426342359,
|
|
"grad_norm": 0.19041316495333657,
|
|
"learning_rate": 2.5364990364384887e-07,
|
|
"loss": 0.2505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0600147545337677,
|
|
"step": 3470,
|
|
"valid_targets_mean": 5607.3,
|
|
"valid_targets_min": 452
|
|
},
|
|
{
|
|
"epoch": 4.780633318035797,
|
|
"grad_norm": 0.1842851141579877,
|
|
"learning_rate": 2.386299132542136e-07,
|
|
"loss": 0.2485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09336844086647034,
|
|
"step": 3475,
|
|
"valid_targets_mean": 7863.1,
|
|
"valid_targets_min": 1340
|
|
},
|
|
{
|
|
"epoch": 4.787517209729233,
|
|
"grad_norm": 0.20898692218205595,
|
|
"learning_rate": 2.240656395005414e-07,
|
|
"loss": 0.2544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13251468539237976,
|
|
"step": 3480,
|
|
"valid_targets_mean": 7620.9,
|
|
"valid_targets_min": 1084
|
|
},
|
|
{
|
|
"epoch": 4.794401101422671,
|
|
"grad_norm": 0.18482544111086033,
|
|
"learning_rate": 2.0995741824931138e-07,
|
|
"loss": 0.2246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06816096603870392,
|
|
"step": 3485,
|
|
"valid_targets_mean": 5906.8,
|
|
"valid_targets_min": 1288
|
|
},
|
|
{
|
|
"epoch": 4.801284993116108,
|
|
"grad_norm": 0.20241099039229132,
|
|
"learning_rate": 1.9630557484998647e-07,
|
|
"loss": 0.2276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0711783766746521,
|
|
"step": 3490,
|
|
"valid_targets_mean": 5153.4,
|
|
"valid_targets_min": 221
|
|
},
|
|
{
|
|
"epoch": 4.808168884809546,
|
|
"grad_norm": 0.18774049657388894,
|
|
"learning_rate": 1.8311042412750613e-07,
|
|
"loss": 0.2375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08386833965778351,
|
|
"step": 3495,
|
|
"valid_targets_mean": 7219.4,
|
|
"valid_targets_min": 219
|
|
},
|
|
{
|
|
"epoch": 4.815052776502983,
|
|
"grad_norm": 0.1814257677961071,
|
|
"learning_rate": 1.7037227037502546e-07,
|
|
"loss": 0.2224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06124308705329895,
|
|
"step": 3500,
|
|
"valid_targets_mean": 5985.7,
|
|
"valid_targets_min": 292
|
|
},
|
|
{
|
|
"epoch": 4.82193666819642,
|
|
"grad_norm": 0.17570467156043065,
|
|
"learning_rate": 1.5809140734690753e-07,
|
|
"loss": 0.2342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0560992956161499,
|
|
"step": 3505,
|
|
"valid_targets_mean": 5758.3,
|
|
"valid_targets_min": 310
|
|
},
|
|
{
|
|
"epoch": 4.8288205598898575,
|
|
"grad_norm": 0.16687189317724344,
|
|
"learning_rate": 1.4626811825192878e-07,
|
|
"loss": 0.2247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04680527746677399,
|
|
"step": 3510,
|
|
"valid_targets_mean": 5599.4,
|
|
"valid_targets_min": 982
|
|
},
|
|
{
|
|
"epoch": 4.835704451583295,
|
|
"grad_norm": 0.21506554619197757,
|
|
"learning_rate": 1.3490267574677528e-07,
|
|
"loss": 0.2326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04162397235631943,
|
|
"step": 3515,
|
|
"valid_targets_mean": 2148.5,
|
|
"valid_targets_min": 457
|
|
},
|
|
{
|
|
"epoch": 4.8425883432767325,
|
|
"grad_norm": 0.3635867246852746,
|
|
"learning_rate": 1.2399534192972797e-07,
|
|
"loss": 0.1956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0672360360622406,
|
|
"step": 3520,
|
|
"valid_targets_mean": 4059.2,
|
|
"valid_targets_min": 1287
|
|
},
|
|
{
|
|
"epoch": 4.84947223497017,
|
|
"grad_norm": 0.3310539168438136,
|
|
"learning_rate": 1.1354636833463827e-07,
|
|
"loss": 0.1817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06172456964850426,
|
|
"step": 3525,
|
|
"valid_targets_mean": 3782.6,
|
|
"valid_targets_min": 428
|
|
},
|
|
{
|
|
"epoch": 4.8563561266636075,
|
|
"grad_norm": 0.33237941211528377,
|
|
"learning_rate": 1.0355599592511534e-07,
|
|
"loss": 0.1773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05413079261779785,
|
|
"step": 3530,
|
|
"valid_targets_mean": 3465.6,
|
|
"valid_targets_min": 478
|
|
},
|
|
{
|
|
"epoch": 4.863240018357045,
|
|
"grad_norm": 0.3094248279076241,
|
|
"learning_rate": 9.402445508897685e-08,
|
|
"loss": 0.1784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05988271161913872,
|
|
"step": 3535,
|
|
"valid_targets_mean": 3961.8,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 4.870123910050482,
|
|
"grad_norm": 0.29242550385557875,
|
|
"learning_rate": 8.495196563293118e-08,
|
|
"loss": 0.1737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05963732674717903,
|
|
"step": 3540,
|
|
"valid_targets_mean": 3737.8,
|
|
"valid_targets_min": 572
|
|
},
|
|
{
|
|
"epoch": 4.877007801743919,
|
|
"grad_norm": 0.28190238416129615,
|
|
"learning_rate": 7.633873677751036e-08,
|
|
"loss": 0.1744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0610860139131546,
|
|
"step": 3545,
|
|
"valid_targets_mean": 3892.3,
|
|
"valid_targets_min": 419
|
|
},
|
|
{
|
|
"epoch": 4.883891693437357,
|
|
"grad_norm": 0.264213197144755,
|
|
"learning_rate": 6.818496715224721e-08,
|
|
"loss": 0.1711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.056852683424949646,
|
|
"step": 3550,
|
|
"valid_targets_mean": 3785.3,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 4.890775585130794,
|
|
"grad_norm": 0.28203213789656356,
|
|
"learning_rate": 6.049084479109013e-08,
|
|
"loss": 0.1704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06058082729578018,
|
|
"step": 3555,
|
|
"valid_targets_mean": 3731.7,
|
|
"valid_targets_min": 430
|
|
},
|
|
{
|
|
"epoch": 4.897659476824232,
|
|
"grad_norm": 0.27415518234666697,
|
|
"learning_rate": 5.325654712807105e-08,
|
|
"loss": 0.1705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06047704070806503,
|
|
"step": 3560,
|
|
"valid_targets_mean": 3858.8,
|
|
"valid_targets_min": 704
|
|
},
|
|
{
|
|
"epoch": 4.904543368517668,
|
|
"grad_norm": 0.26692167270191103,
|
|
"learning_rate": 4.648224099321086e-08,
|
|
"loss": 0.1689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05759282037615776,
|
|
"step": 3565,
|
|
"valid_targets_mean": 3910.5,
|
|
"valid_targets_min": 433
|
|
},
|
|
{
|
|
"epoch": 4.911427260211106,
|
|
"grad_norm": 0.2613036512368632,
|
|
"learning_rate": 4.0168082608673666e-08,
|
|
"loss": 0.1664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05626670643687248,
|
|
"step": 3570,
|
|
"valid_targets_mean": 3696.9,
|
|
"valid_targets_min": 731
|
|
},
|
|
{
|
|
"epoch": 4.918311151904543,
|
|
"grad_norm": 0.2732742564108753,
|
|
"learning_rate": 3.431421758516518e-08,
|
|
"loss": 0.1702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0582839772105217,
|
|
"step": 3575,
|
|
"valid_targets_mean": 3753.9,
|
|
"valid_targets_min": 422
|
|
},
|
|
{
|
|
"epoch": 4.925195043597981,
|
|
"grad_norm": 0.25102179097888444,
|
|
"learning_rate": 2.8920780918570978e-08,
|
|
"loss": 0.1686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05331850051879883,
|
|
"step": 3580,
|
|
"valid_targets_mean": 3556.3,
|
|
"valid_targets_min": 462
|
|
},
|
|
{
|
|
"epoch": 4.932078935291418,
|
|
"grad_norm": 0.29257791826753676,
|
|
"learning_rate": 2.398789698684789e-08,
|
|
"loss": 0.1683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.059953607618808746,
|
|
"step": 3585,
|
|
"valid_targets_mean": 4006.8,
|
|
"valid_targets_min": 642
|
|
},
|
|
{
|
|
"epoch": 4.938962826984856,
|
|
"grad_norm": 0.524215825875638,
|
|
"learning_rate": 1.9515679547148504e-08,
|
|
"loss": 0.1863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10526755452156067,
|
|
"step": 3590,
|
|
"valid_targets_mean": 6701.7,
|
|
"valid_targets_min": 330
|
|
},
|
|
{
|
|
"epoch": 4.945846718678293,
|
|
"grad_norm": 0.5722645906369648,
|
|
"learning_rate": 1.550423173321214e-08,
|
|
"loss": 0.2581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09603773057460785,
|
|
"step": 3595,
|
|
"valid_targets_mean": 7118.6,
|
|
"valid_targets_min": 371
|
|
},
|
|
{
|
|
"epoch": 4.95273061037173,
|
|
"grad_norm": 0.5749651710529045,
|
|
"learning_rate": 1.1953646052969004e-08,
|
|
"loss": 0.2376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07218396663665771,
|
|
"step": 3600,
|
|
"valid_targets_mean": 6525.2,
|
|
"valid_targets_min": 437
|
|
},
|
|
{
|
|
"epoch": 4.959614502065167,
|
|
"grad_norm": 0.5773146128652207,
|
|
"learning_rate": 8.864004386419655e-09,
|
|
"loss": 0.2382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08016687631607056,
|
|
"step": 3605,
|
|
"valid_targets_mean": 6868.0,
|
|
"valid_targets_min": 860
|
|
},
|
|
{
|
|
"epoch": 4.966498393758605,
|
|
"grad_norm": 0.5465195501823726,
|
|
"learning_rate": 6.235377983738744e-09,
|
|
"loss": 0.239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0723617821931839,
|
|
"step": 3610,
|
|
"valid_targets_mean": 5525.6,
|
|
"valid_targets_min": 398
|
|
},
|
|
{
|
|
"epoch": 4.973382285452042,
|
|
"grad_norm": 0.5787672219727664,
|
|
"learning_rate": 4.067827463638541e-09,
|
|
"loss": 0.2314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07634909451007843,
|
|
"step": 3615,
|
|
"valid_targets_mean": 6691.8,
|
|
"valid_targets_min": 327
|
|
},
|
|
{
|
|
"epoch": 4.98026617714548,
|
|
"grad_norm": 0.5163551559182284,
|
|
"learning_rate": 2.3614028119656186e-09,
|
|
"loss": 0.2298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0796852856874466,
|
|
"step": 3620,
|
|
"valid_targets_mean": 7097.7,
|
|
"valid_targets_min": 411
|
|
},
|
|
{
|
|
"epoch": 4.9871500688389165,
|
|
"grad_norm": 0.5453881277330466,
|
|
"learning_rate": 1.116143380552881e-09,
|
|
"loss": 0.2197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0733230710029602,
|
|
"step": 3625,
|
|
"valid_targets_mean": 6452.2,
|
|
"valid_targets_min": 250
|
|
},
|
|
{
|
|
"epoch": 4.994033960532354,
|
|
"grad_norm": 0.55973181421581,
|
|
"learning_rate": 3.320778863069585e-10,
|
|
"loss": 0.2122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07475747168064117,
|
|
"step": 3630,
|
|
"valid_targets_mean": 6448.4,
|
|
"valid_targets_min": 117
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.9253128603774515,
|
|
"learning_rate": 9.224410548736018e-12,
|
|
"loss": 0.2367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3323420286178589,
|
|
"step": 3635,
|
|
"valid_targets_mean": 6423.8,
|
|
"valid_targets_min": 192
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3323420286178589,
|
|
"step": 3635,
|
|
"total_flos": 1.1450092629186839e+19,
|
|
"train_loss": 0.14989610923533592,
|
|
"train_runtime": 39124.5476,
|
|
"train_samples_per_second": 8.909,
|
|
"train_steps_per_second": 0.093,
|
|
"valid_targets_mean": 6423.8,
|
|
"valid_targets_min": 192
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 3635,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 5,
|
|
"save_steps": 300,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1.1450092629186839e+19,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|