9563 lines
265 KiB
JSON
9563 lines
265 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 4326,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.008090614886731391,
|
|
"grad_norm": 27.459321741406143,
|
|
"learning_rate": 3.695150115473441e-07,
|
|
"loss": 0.9065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.9109030961990356,
|
|
"step": 5,
|
|
"valid_targets_mean": 3808.2,
|
|
"valid_targets_min": 426
|
|
},
|
|
{
|
|
"epoch": 0.016181229773462782,
|
|
"grad_norm": 28.139176144208395,
|
|
"learning_rate": 8.314087759815243e-07,
|
|
"loss": 0.896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.9023652672767639,
|
|
"step": 10,
|
|
"valid_targets_mean": 3801.5,
|
|
"valid_targets_min": 1847
|
|
},
|
|
{
|
|
"epoch": 0.024271844660194174,
|
|
"grad_norm": 25.93354792492918,
|
|
"learning_rate": 1.2933025404157046e-06,
|
|
"loss": 0.8805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8771004676818848,
|
|
"step": 15,
|
|
"valid_targets_mean": 3371.9,
|
|
"valid_targets_min": 419
|
|
},
|
|
{
|
|
"epoch": 0.032362459546925564,
|
|
"grad_norm": 17.810433239456163,
|
|
"learning_rate": 1.7551963048498846e-06,
|
|
"loss": 0.7848,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7335680723190308,
|
|
"step": 20,
|
|
"valid_targets_mean": 3780.7,
|
|
"valid_targets_min": 725
|
|
},
|
|
{
|
|
"epoch": 0.040453074433656956,
|
|
"grad_norm": 11.26352797874236,
|
|
"learning_rate": 2.217090069284065e-06,
|
|
"loss": 0.7078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.70474773645401,
|
|
"step": 25,
|
|
"valid_targets_mean": 3891.1,
|
|
"valid_targets_min": 1957
|
|
},
|
|
{
|
|
"epoch": 0.04854368932038835,
|
|
"grad_norm": 5.655520832965064,
|
|
"learning_rate": 2.678983833718245e-06,
|
|
"loss": 0.6187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5794852375984192,
|
|
"step": 30,
|
|
"valid_targets_mean": 3575.2,
|
|
"valid_targets_min": 855
|
|
},
|
|
{
|
|
"epoch": 0.05663430420711974,
|
|
"grad_norm": 3.1011900466301516,
|
|
"learning_rate": 3.1408775981524254e-06,
|
|
"loss": 0.5928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.568791925907135,
|
|
"step": 35,
|
|
"valid_targets_mean": 3848.1,
|
|
"valid_targets_min": 462
|
|
},
|
|
{
|
|
"epoch": 0.06472491909385113,
|
|
"grad_norm": 1.8794850275863664,
|
|
"learning_rate": 3.6027713625866056e-06,
|
|
"loss": 0.5507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5347862839698792,
|
|
"step": 40,
|
|
"valid_targets_mean": 3735.5,
|
|
"valid_targets_min": 778
|
|
},
|
|
{
|
|
"epoch": 0.07281553398058252,
|
|
"grad_norm": 1.6933093485589523,
|
|
"learning_rate": 4.064665127020786e-06,
|
|
"loss": 0.5209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5246684551239014,
|
|
"step": 45,
|
|
"valid_targets_mean": 3173.0,
|
|
"valid_targets_min": 448
|
|
},
|
|
{
|
|
"epoch": 0.08090614886731391,
|
|
"grad_norm": 1.3451241482911473,
|
|
"learning_rate": 4.526558891454966e-06,
|
|
"loss": 0.5153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5409998893737793,
|
|
"step": 50,
|
|
"valid_targets_mean": 3681.6,
|
|
"valid_targets_min": 1298
|
|
},
|
|
{
|
|
"epoch": 0.0889967637540453,
|
|
"grad_norm": 0.9951670347910555,
|
|
"learning_rate": 4.988452655889146e-06,
|
|
"loss": 0.4808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49074310064315796,
|
|
"step": 55,
|
|
"valid_targets_mean": 4001.4,
|
|
"valid_targets_min": 1226
|
|
},
|
|
{
|
|
"epoch": 0.0970873786407767,
|
|
"grad_norm": 0.9366809742158559,
|
|
"learning_rate": 5.450346420323326e-06,
|
|
"loss": 0.4683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4610210657119751,
|
|
"step": 60,
|
|
"valid_targets_mean": 3441.2,
|
|
"valid_targets_min": 451
|
|
},
|
|
{
|
|
"epoch": 0.10517799352750809,
|
|
"grad_norm": 0.8147959249499319,
|
|
"learning_rate": 5.912240184757506e-06,
|
|
"loss": 0.444,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43035197257995605,
|
|
"step": 65,
|
|
"valid_targets_mean": 3823.9,
|
|
"valid_targets_min": 433
|
|
},
|
|
{
|
|
"epoch": 0.11326860841423948,
|
|
"grad_norm": 0.7676476543138736,
|
|
"learning_rate": 6.374133949191687e-06,
|
|
"loss": 0.4545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4499027132987976,
|
|
"step": 70,
|
|
"valid_targets_mean": 3687.8,
|
|
"valid_targets_min": 706
|
|
},
|
|
{
|
|
"epoch": 0.12135922330097088,
|
|
"grad_norm": 0.7061949661440534,
|
|
"learning_rate": 6.836027713625867e-06,
|
|
"loss": 0.4295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4421675205230713,
|
|
"step": 75,
|
|
"valid_targets_mean": 4411.3,
|
|
"valid_targets_min": 2975
|
|
},
|
|
{
|
|
"epoch": 0.12944983818770225,
|
|
"grad_norm": 0.6513122781968413,
|
|
"learning_rate": 7.297921478060047e-06,
|
|
"loss": 0.4279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43355122208595276,
|
|
"step": 80,
|
|
"valid_targets_mean": 4360.9,
|
|
"valid_targets_min": 2906
|
|
},
|
|
{
|
|
"epoch": 0.13754045307443366,
|
|
"grad_norm": 1.0234423222827205,
|
|
"learning_rate": 7.759815242494227e-06,
|
|
"loss": 0.4077,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4059848189353943,
|
|
"step": 85,
|
|
"valid_targets_mean": 3950.6,
|
|
"valid_targets_min": 1915
|
|
},
|
|
{
|
|
"epoch": 0.14563106796116504,
|
|
"grad_norm": 0.6393770994585668,
|
|
"learning_rate": 8.221709006928407e-06,
|
|
"loss": 0.404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4126480221748352,
|
|
"step": 90,
|
|
"valid_targets_mean": 3699.2,
|
|
"valid_targets_min": 465
|
|
},
|
|
{
|
|
"epoch": 0.15372168284789645,
|
|
"grad_norm": 0.6009758856614545,
|
|
"learning_rate": 8.683602771362586e-06,
|
|
"loss": 0.3888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37331610918045044,
|
|
"step": 95,
|
|
"valid_targets_mean": 4251.8,
|
|
"valid_targets_min": 535
|
|
},
|
|
{
|
|
"epoch": 0.16181229773462782,
|
|
"grad_norm": 0.6411212665967604,
|
|
"learning_rate": 9.145496535796767e-06,
|
|
"loss": 0.3838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37150484323501587,
|
|
"step": 100,
|
|
"valid_targets_mean": 3675.9,
|
|
"valid_targets_min": 613
|
|
},
|
|
{
|
|
"epoch": 0.16990291262135923,
|
|
"grad_norm": 0.6094648920085912,
|
|
"learning_rate": 9.607390300230948e-06,
|
|
"loss": 0.3689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3679890036582947,
|
|
"step": 105,
|
|
"valid_targets_mean": 4052.5,
|
|
"valid_targets_min": 428
|
|
},
|
|
{
|
|
"epoch": 0.1779935275080906,
|
|
"grad_norm": 0.6714933928137066,
|
|
"learning_rate": 1.0069284064665128e-05,
|
|
"loss": 0.3778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39726799726486206,
|
|
"step": 110,
|
|
"valid_targets_mean": 3751.6,
|
|
"valid_targets_min": 1619
|
|
},
|
|
{
|
|
"epoch": 0.18608414239482202,
|
|
"grad_norm": 0.6885921268773545,
|
|
"learning_rate": 1.0531177829099309e-05,
|
|
"loss": 0.3789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36161985993385315,
|
|
"step": 115,
|
|
"valid_targets_mean": 3398.1,
|
|
"valid_targets_min": 958
|
|
},
|
|
{
|
|
"epoch": 0.1941747572815534,
|
|
"grad_norm": 0.6786362718946853,
|
|
"learning_rate": 1.0993071593533488e-05,
|
|
"loss": 0.3512,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3428874611854553,
|
|
"step": 120,
|
|
"valid_targets_mean": 2924.1,
|
|
"valid_targets_min": 967
|
|
},
|
|
{
|
|
"epoch": 0.2022653721682848,
|
|
"grad_norm": 0.5847017454839042,
|
|
"learning_rate": 1.1454965357967669e-05,
|
|
"loss": 0.356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38618525862693787,
|
|
"step": 125,
|
|
"valid_targets_mean": 4299.1,
|
|
"valid_targets_min": 1861
|
|
},
|
|
{
|
|
"epoch": 0.21035598705501618,
|
|
"grad_norm": 0.6176988756110413,
|
|
"learning_rate": 1.1916859122401848e-05,
|
|
"loss": 0.3454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35746678709983826,
|
|
"step": 130,
|
|
"valid_targets_mean": 3582.0,
|
|
"valid_targets_min": 968
|
|
},
|
|
{
|
|
"epoch": 0.21844660194174756,
|
|
"grad_norm": 0.6868618523160048,
|
|
"learning_rate": 1.237875288683603e-05,
|
|
"loss": 0.3448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34397560358047485,
|
|
"step": 135,
|
|
"valid_targets_mean": 3697.5,
|
|
"valid_targets_min": 686
|
|
},
|
|
{
|
|
"epoch": 0.22653721682847897,
|
|
"grad_norm": 0.5948797006890421,
|
|
"learning_rate": 1.284064665127021e-05,
|
|
"loss": 0.345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32718050479888916,
|
|
"step": 140,
|
|
"valid_targets_mean": 3896.3,
|
|
"valid_targets_min": 2104
|
|
},
|
|
{
|
|
"epoch": 0.23462783171521034,
|
|
"grad_norm": 0.5529849234577984,
|
|
"learning_rate": 1.3302540415704388e-05,
|
|
"loss": 0.3417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35815680027008057,
|
|
"step": 145,
|
|
"valid_targets_mean": 4403.2,
|
|
"valid_targets_min": 2975
|
|
},
|
|
{
|
|
"epoch": 0.24271844660194175,
|
|
"grad_norm": 0.7558788212006664,
|
|
"learning_rate": 1.3764434180138568e-05,
|
|
"loss": 0.3451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34972795844078064,
|
|
"step": 150,
|
|
"valid_targets_mean": 3551.3,
|
|
"valid_targets_min": 1597
|
|
},
|
|
{
|
|
"epoch": 0.25080906148867316,
|
|
"grad_norm": 0.556839697232855,
|
|
"learning_rate": 1.4226327944572749e-05,
|
|
"loss": 0.3542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31378552317619324,
|
|
"step": 155,
|
|
"valid_targets_mean": 3750.6,
|
|
"valid_targets_min": 800
|
|
},
|
|
{
|
|
"epoch": 0.2588996763754045,
|
|
"grad_norm": 0.6341317824410588,
|
|
"learning_rate": 1.468822170900693e-05,
|
|
"loss": 0.3403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34858211874961853,
|
|
"step": 160,
|
|
"valid_targets_mean": 3940.8,
|
|
"valid_targets_min": 2064
|
|
},
|
|
{
|
|
"epoch": 0.2669902912621359,
|
|
"grad_norm": 0.5957011932092419,
|
|
"learning_rate": 1.5150115473441109e-05,
|
|
"loss": 0.3434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32572871446609497,
|
|
"step": 165,
|
|
"valid_targets_mean": 3897.5,
|
|
"valid_targets_min": 1732
|
|
},
|
|
{
|
|
"epoch": 0.2750809061488673,
|
|
"grad_norm": 0.6068605911979195,
|
|
"learning_rate": 1.561200923787529e-05,
|
|
"loss": 0.3472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32489800453186035,
|
|
"step": 170,
|
|
"valid_targets_mean": 4304.5,
|
|
"valid_targets_min": 1617
|
|
},
|
|
{
|
|
"epoch": 0.28317152103559873,
|
|
"grad_norm": 0.6959522324597113,
|
|
"learning_rate": 1.607390300230947e-05,
|
|
"loss": 0.3271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3342089056968689,
|
|
"step": 175,
|
|
"valid_targets_mean": 3460.9,
|
|
"valid_targets_min": 405
|
|
},
|
|
{
|
|
"epoch": 0.2912621359223301,
|
|
"grad_norm": 0.6993033966832811,
|
|
"learning_rate": 1.653579676674365e-05,
|
|
"loss": 0.3322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3438107371330261,
|
|
"step": 180,
|
|
"valid_targets_mean": 3719.4,
|
|
"valid_targets_min": 672
|
|
},
|
|
{
|
|
"epoch": 0.2993527508090615,
|
|
"grad_norm": 0.6054454798783228,
|
|
"learning_rate": 1.699769053117783e-05,
|
|
"loss": 0.3315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3246925473213196,
|
|
"step": 185,
|
|
"valid_targets_mean": 3983.6,
|
|
"valid_targets_min": 2354
|
|
},
|
|
{
|
|
"epoch": 0.3074433656957929,
|
|
"grad_norm": 0.6024947392151053,
|
|
"learning_rate": 1.7459584295612013e-05,
|
|
"loss": 0.3277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34920066595077515,
|
|
"step": 190,
|
|
"valid_targets_mean": 4179.0,
|
|
"valid_targets_min": 2225
|
|
},
|
|
{
|
|
"epoch": 0.3155339805825243,
|
|
"grad_norm": 0.5881601386489697,
|
|
"learning_rate": 1.792147806004619e-05,
|
|
"loss": 0.3097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30888259410858154,
|
|
"step": 195,
|
|
"valid_targets_mean": 3780.8,
|
|
"valid_targets_min": 522
|
|
},
|
|
{
|
|
"epoch": 0.32362459546925565,
|
|
"grad_norm": 0.6804460855209646,
|
|
"learning_rate": 1.838337182448037e-05,
|
|
"loss": 0.3132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3233288526535034,
|
|
"step": 200,
|
|
"valid_targets_mean": 3176.1,
|
|
"valid_targets_min": 579
|
|
},
|
|
{
|
|
"epoch": 0.33171521035598706,
|
|
"grad_norm": 0.641730901873238,
|
|
"learning_rate": 1.8845265588914552e-05,
|
|
"loss": 0.3274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3117061257362366,
|
|
"step": 205,
|
|
"valid_targets_mean": 3649.1,
|
|
"valid_targets_min": 1236
|
|
},
|
|
{
|
|
"epoch": 0.33980582524271846,
|
|
"grad_norm": 0.627739058134076,
|
|
"learning_rate": 1.9307159353348733e-05,
|
|
"loss": 0.3216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3217557370662689,
|
|
"step": 210,
|
|
"valid_targets_mean": 3684.2,
|
|
"valid_targets_min": 377
|
|
},
|
|
{
|
|
"epoch": 0.3478964401294498,
|
|
"grad_norm": 0.6743803896335775,
|
|
"learning_rate": 1.976905311778291e-05,
|
|
"loss": 0.3311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.363000750541687,
|
|
"step": 215,
|
|
"valid_targets_mean": 3696.2,
|
|
"valid_targets_min": 451
|
|
},
|
|
{
|
|
"epoch": 0.3559870550161812,
|
|
"grad_norm": 0.6503092166966725,
|
|
"learning_rate": 2.0230946882217092e-05,
|
|
"loss": 0.3092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3128376305103302,
|
|
"step": 220,
|
|
"valid_targets_mean": 3296.0,
|
|
"valid_targets_min": 1104
|
|
},
|
|
{
|
|
"epoch": 0.3640776699029126,
|
|
"grad_norm": 0.6460762248227256,
|
|
"learning_rate": 2.0692840646651273e-05,
|
|
"loss": 0.3111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31550925970077515,
|
|
"step": 225,
|
|
"valid_targets_mean": 3500.8,
|
|
"valid_targets_min": 430
|
|
},
|
|
{
|
|
"epoch": 0.37216828478964403,
|
|
"grad_norm": 0.6602639443938337,
|
|
"learning_rate": 2.115473441108545e-05,
|
|
"loss": 0.3168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31277865171432495,
|
|
"step": 230,
|
|
"valid_targets_mean": 2942.6,
|
|
"valid_targets_min": 480
|
|
},
|
|
{
|
|
"epoch": 0.3802588996763754,
|
|
"grad_norm": 0.6503964007795302,
|
|
"learning_rate": 2.1616628175519635e-05,
|
|
"loss": 0.287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27533742785453796,
|
|
"step": 235,
|
|
"valid_targets_mean": 3215.6,
|
|
"valid_targets_min": 418
|
|
},
|
|
{
|
|
"epoch": 0.3883495145631068,
|
|
"grad_norm": 0.6456014703726483,
|
|
"learning_rate": 2.2078521939953813e-05,
|
|
"loss": 0.2934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29455316066741943,
|
|
"step": 240,
|
|
"valid_targets_mean": 3503.1,
|
|
"valid_targets_min": 470
|
|
},
|
|
{
|
|
"epoch": 0.3964401294498382,
|
|
"grad_norm": 0.6050471441367182,
|
|
"learning_rate": 2.2540415704387994e-05,
|
|
"loss": 0.2985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3035230338573456,
|
|
"step": 245,
|
|
"valid_targets_mean": 3800.3,
|
|
"valid_targets_min": 1457
|
|
},
|
|
{
|
|
"epoch": 0.4045307443365696,
|
|
"grad_norm": 0.5739221439490244,
|
|
"learning_rate": 2.3002309468822172e-05,
|
|
"loss": 0.305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3035668730735779,
|
|
"step": 250,
|
|
"valid_targets_mean": 4172.5,
|
|
"valid_targets_min": 2005
|
|
},
|
|
{
|
|
"epoch": 0.41262135922330095,
|
|
"grad_norm": 0.6573809007717452,
|
|
"learning_rate": 2.346420323325635e-05,
|
|
"loss": 0.2987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33286434412002563,
|
|
"step": 255,
|
|
"valid_targets_mean": 3657.9,
|
|
"valid_targets_min": 410
|
|
},
|
|
{
|
|
"epoch": 0.42071197411003236,
|
|
"grad_norm": 0.621214006304506,
|
|
"learning_rate": 2.3926096997690534e-05,
|
|
"loss": 0.296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27294284105300903,
|
|
"step": 260,
|
|
"valid_targets_mean": 3487.0,
|
|
"valid_targets_min": 718
|
|
},
|
|
{
|
|
"epoch": 0.42880258899676377,
|
|
"grad_norm": 0.6194570084768386,
|
|
"learning_rate": 2.438799076212471e-05,
|
|
"loss": 0.3068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28968545794487,
|
|
"step": 265,
|
|
"valid_targets_mean": 4048.9,
|
|
"valid_targets_min": 259
|
|
},
|
|
{
|
|
"epoch": 0.4368932038834951,
|
|
"grad_norm": 0.6123508382890656,
|
|
"learning_rate": 2.4849884526558893e-05,
|
|
"loss": 0.2987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2817320227622986,
|
|
"step": 270,
|
|
"valid_targets_mean": 3786.6,
|
|
"valid_targets_min": 957
|
|
},
|
|
{
|
|
"epoch": 0.4449838187702265,
|
|
"grad_norm": 0.6349448992907608,
|
|
"learning_rate": 2.5311778290993074e-05,
|
|
"loss": 0.2875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2887231409549713,
|
|
"step": 275,
|
|
"valid_targets_mean": 3590.3,
|
|
"valid_targets_min": 691
|
|
},
|
|
{
|
|
"epoch": 0.45307443365695793,
|
|
"grad_norm": 0.6041395356484386,
|
|
"learning_rate": 2.5773672055427255e-05,
|
|
"loss": 0.3107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30666863918304443,
|
|
"step": 280,
|
|
"valid_targets_mean": 3870.8,
|
|
"valid_targets_min": 481
|
|
},
|
|
{
|
|
"epoch": 0.46116504854368934,
|
|
"grad_norm": 0.6423139569561586,
|
|
"learning_rate": 2.6235565819861432e-05,
|
|
"loss": 0.292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30878114700317383,
|
|
"step": 285,
|
|
"valid_targets_mean": 3677.9,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 0.4692556634304207,
|
|
"grad_norm": 0.5950734204281998,
|
|
"learning_rate": 2.6697459584295617e-05,
|
|
"loss": 0.2948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27899283170700073,
|
|
"step": 290,
|
|
"valid_targets_mean": 3649.2,
|
|
"valid_targets_min": 504
|
|
},
|
|
{
|
|
"epoch": 0.4773462783171521,
|
|
"grad_norm": 0.6370745562989614,
|
|
"learning_rate": 2.7159353348729794e-05,
|
|
"loss": 0.2971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29412591457366943,
|
|
"step": 295,
|
|
"valid_targets_mean": 3545.8,
|
|
"valid_targets_min": 454
|
|
},
|
|
{
|
|
"epoch": 0.4854368932038835,
|
|
"grad_norm": 1.92659801088735,
|
|
"learning_rate": 2.7621247113163975e-05,
|
|
"loss": 0.2825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25594255328178406,
|
|
"step": 300,
|
|
"valid_targets_mean": 3757.3,
|
|
"valid_targets_min": 1286
|
|
},
|
|
{
|
|
"epoch": 0.4935275080906149,
|
|
"grad_norm": 0.6375789687289607,
|
|
"learning_rate": 2.8083140877598153e-05,
|
|
"loss": 0.2849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2897154986858368,
|
|
"step": 305,
|
|
"valid_targets_mean": 3368.8,
|
|
"valid_targets_min": 460
|
|
},
|
|
{
|
|
"epoch": 0.5016181229773463,
|
|
"grad_norm": 0.6781242545223203,
|
|
"learning_rate": 2.8545034642032338e-05,
|
|
"loss": 0.2903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30849897861480713,
|
|
"step": 310,
|
|
"valid_targets_mean": 3861.2,
|
|
"valid_targets_min": 1085
|
|
},
|
|
{
|
|
"epoch": 0.5097087378640777,
|
|
"grad_norm": 0.5620898167135456,
|
|
"learning_rate": 2.9006928406466515e-05,
|
|
"loss": 0.3025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29005423188209534,
|
|
"step": 315,
|
|
"valid_targets_mean": 3949.1,
|
|
"valid_targets_min": 1506
|
|
},
|
|
{
|
|
"epoch": 0.517799352750809,
|
|
"grad_norm": 0.6011594540412086,
|
|
"learning_rate": 2.9468822170900696e-05,
|
|
"loss": 0.2882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24850499629974365,
|
|
"step": 320,
|
|
"valid_targets_mean": 3461.3,
|
|
"valid_targets_min": 579
|
|
},
|
|
{
|
|
"epoch": 0.5258899676375405,
|
|
"grad_norm": 0.5667335774154261,
|
|
"learning_rate": 2.9930715935334874e-05,
|
|
"loss": 0.2824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28869470953941345,
|
|
"step": 325,
|
|
"valid_targets_mean": 3888.8,
|
|
"valid_targets_min": 427
|
|
},
|
|
{
|
|
"epoch": 0.5339805825242718,
|
|
"grad_norm": 0.6018602985123674,
|
|
"learning_rate": 3.0392609699769055e-05,
|
|
"loss": 0.2745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25187310576438904,
|
|
"step": 330,
|
|
"valid_targets_mean": 3572.2,
|
|
"valid_targets_min": 416
|
|
},
|
|
{
|
|
"epoch": 0.5420711974110033,
|
|
"grad_norm": 0.5530170018702698,
|
|
"learning_rate": 3.0854503464203236e-05,
|
|
"loss": 0.3014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3016529679298401,
|
|
"step": 335,
|
|
"valid_targets_mean": 4167.5,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 0.5501618122977346,
|
|
"grad_norm": 0.5683688168773894,
|
|
"learning_rate": 3.131639722863742e-05,
|
|
"loss": 0.2959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2862555980682373,
|
|
"step": 340,
|
|
"valid_targets_mean": 4065.1,
|
|
"valid_targets_min": 490
|
|
},
|
|
{
|
|
"epoch": 0.558252427184466,
|
|
"grad_norm": 0.6422148617718519,
|
|
"learning_rate": 3.17782909930716e-05,
|
|
"loss": 0.2764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2575664222240448,
|
|
"step": 345,
|
|
"valid_targets_mean": 3347.6,
|
|
"valid_targets_min": 1490
|
|
},
|
|
{
|
|
"epoch": 0.5663430420711975,
|
|
"grad_norm": 0.6028555328769979,
|
|
"learning_rate": 3.224018475750577e-05,
|
|
"loss": 0.2794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2789534628391266,
|
|
"step": 350,
|
|
"valid_targets_mean": 3205.9,
|
|
"valid_targets_min": 514
|
|
},
|
|
{
|
|
"epoch": 0.5744336569579288,
|
|
"grad_norm": 0.703646722775362,
|
|
"learning_rate": 3.270207852193996e-05,
|
|
"loss": 0.2937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29907727241516113,
|
|
"step": 355,
|
|
"valid_targets_mean": 3719.6,
|
|
"valid_targets_min": 1032
|
|
},
|
|
{
|
|
"epoch": 0.5825242718446602,
|
|
"grad_norm": 0.686644747611541,
|
|
"learning_rate": 3.3163972286374135e-05,
|
|
"loss": 0.2907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2848784625530243,
|
|
"step": 360,
|
|
"valid_targets_mean": 3759.4,
|
|
"valid_targets_min": 1201
|
|
},
|
|
{
|
|
"epoch": 0.5906148867313916,
|
|
"grad_norm": 0.6324843088148043,
|
|
"learning_rate": 3.3625866050808316e-05,
|
|
"loss": 0.2979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3301808834075928,
|
|
"step": 365,
|
|
"valid_targets_mean": 3985.0,
|
|
"valid_targets_min": 559
|
|
},
|
|
{
|
|
"epoch": 0.598705501618123,
|
|
"grad_norm": 0.6657889896899425,
|
|
"learning_rate": 3.40877598152425e-05,
|
|
"loss": 0.2801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2617691159248352,
|
|
"step": 370,
|
|
"valid_targets_mean": 3649.1,
|
|
"valid_targets_min": 862
|
|
},
|
|
{
|
|
"epoch": 0.6067961165048543,
|
|
"grad_norm": 0.7044977758301637,
|
|
"learning_rate": 3.454965357967668e-05,
|
|
"loss": 0.2833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28061506152153015,
|
|
"step": 375,
|
|
"valid_targets_mean": 3758.7,
|
|
"valid_targets_min": 1980
|
|
},
|
|
{
|
|
"epoch": 0.6148867313915858,
|
|
"grad_norm": 0.5793210298303975,
|
|
"learning_rate": 3.501154734411086e-05,
|
|
"loss": 0.2817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28702807426452637,
|
|
"step": 380,
|
|
"valid_targets_mean": 3827.2,
|
|
"valid_targets_min": 703
|
|
},
|
|
{
|
|
"epoch": 0.6229773462783171,
|
|
"grad_norm": 0.6428205927219847,
|
|
"learning_rate": 3.547344110854504e-05,
|
|
"loss": 0.265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2545207738876343,
|
|
"step": 385,
|
|
"valid_targets_mean": 3273.9,
|
|
"valid_targets_min": 738
|
|
},
|
|
{
|
|
"epoch": 0.6310679611650486,
|
|
"grad_norm": 0.591271300160177,
|
|
"learning_rate": 3.5935334872979214e-05,
|
|
"loss": 0.2965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28500238060951233,
|
|
"step": 390,
|
|
"valid_targets_mean": 3836.4,
|
|
"valid_targets_min": 533
|
|
},
|
|
{
|
|
"epoch": 0.63915857605178,
|
|
"grad_norm": 0.5988205344788338,
|
|
"learning_rate": 3.63972286374134e-05,
|
|
"loss": 0.2982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3076947331428528,
|
|
"step": 395,
|
|
"valid_targets_mean": 3890.5,
|
|
"valid_targets_min": 877
|
|
},
|
|
{
|
|
"epoch": 0.6472491909385113,
|
|
"grad_norm": 0.5508624611045605,
|
|
"learning_rate": 3.6859122401847576e-05,
|
|
"loss": 0.2804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27103567123413086,
|
|
"step": 400,
|
|
"valid_targets_mean": 3915.4,
|
|
"valid_targets_min": 420
|
|
},
|
|
{
|
|
"epoch": 0.6553398058252428,
|
|
"grad_norm": 0.5985040648059813,
|
|
"learning_rate": 3.732101616628176e-05,
|
|
"loss": 0.2737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2661280333995819,
|
|
"step": 405,
|
|
"valid_targets_mean": 3904.9,
|
|
"valid_targets_min": 1350
|
|
},
|
|
{
|
|
"epoch": 0.6634304207119741,
|
|
"grad_norm": 0.5990129213673812,
|
|
"learning_rate": 3.778290993071594e-05,
|
|
"loss": 0.2839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26289594173431396,
|
|
"step": 410,
|
|
"valid_targets_mean": 3605.1,
|
|
"valid_targets_min": 840
|
|
},
|
|
{
|
|
"epoch": 0.6715210355987055,
|
|
"grad_norm": 0.6190982488306185,
|
|
"learning_rate": 3.824480369515012e-05,
|
|
"loss": 0.2935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31816816329956055,
|
|
"step": 415,
|
|
"valid_targets_mean": 3736.1,
|
|
"valid_targets_min": 427
|
|
},
|
|
{
|
|
"epoch": 0.6796116504854369,
|
|
"grad_norm": 0.5654861961445968,
|
|
"learning_rate": 3.87066974595843e-05,
|
|
"loss": 0.285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29399573802948,
|
|
"step": 420,
|
|
"valid_targets_mean": 4115.8,
|
|
"valid_targets_min": 2051
|
|
},
|
|
{
|
|
"epoch": 0.6877022653721683,
|
|
"grad_norm": 0.6733368871549665,
|
|
"learning_rate": 3.9168591224018475e-05,
|
|
"loss": 0.2795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.269350528717041,
|
|
"step": 425,
|
|
"valid_targets_mean": 3661.5,
|
|
"valid_targets_min": 446
|
|
},
|
|
{
|
|
"epoch": 0.6957928802588996,
|
|
"grad_norm": 0.5881509692853332,
|
|
"learning_rate": 3.963048498845266e-05,
|
|
"loss": 0.2812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3111186623573303,
|
|
"step": 430,
|
|
"valid_targets_mean": 3747.9,
|
|
"valid_targets_min": 1335
|
|
},
|
|
{
|
|
"epoch": 0.7038834951456311,
|
|
"grad_norm": 0.6653833282495153,
|
|
"learning_rate": 3.999999348775225e-05,
|
|
"loss": 0.2742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2823415696620941,
|
|
"step": 435,
|
|
"valid_targets_mean": 3997.1,
|
|
"valid_targets_min": 2126
|
|
},
|
|
{
|
|
"epoch": 0.7119741100323624,
|
|
"grad_norm": 0.5650668386347658,
|
|
"learning_rate": 3.9999765559526296e-05,
|
|
"loss": 0.2696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2572442591190338,
|
|
"step": 440,
|
|
"valid_targets_mean": 4013.0,
|
|
"valid_targets_min": 2021
|
|
},
|
|
{
|
|
"epoch": 0.7200647249190939,
|
|
"grad_norm": 0.5389309645168817,
|
|
"learning_rate": 3.999921202315374e-05,
|
|
"loss": 0.2799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28636130690574646,
|
|
"step": 445,
|
|
"valid_targets_mean": 4386.1,
|
|
"valid_targets_min": 2956
|
|
},
|
|
{
|
|
"epoch": 0.7281553398058253,
|
|
"grad_norm": 0.6194102937241955,
|
|
"learning_rate": 3.9998332887646504e-05,
|
|
"loss": 0.269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2838453948497772,
|
|
"step": 450,
|
|
"valid_targets_mean": 3617.1,
|
|
"valid_targets_min": 598
|
|
},
|
|
{
|
|
"epoch": 0.7362459546925566,
|
|
"grad_norm": 0.5816499043205773,
|
|
"learning_rate": 3.999712816731743e-05,
|
|
"loss": 0.2634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25679758191108704,
|
|
"step": 455,
|
|
"valid_targets_mean": 3520.3,
|
|
"valid_targets_min": 1240
|
|
},
|
|
{
|
|
"epoch": 0.7443365695792881,
|
|
"grad_norm": 0.579241211012032,
|
|
"learning_rate": 3.999559788178009e-05,
|
|
"loss": 0.2818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29625752568244934,
|
|
"step": 460,
|
|
"valid_targets_mean": 3625.7,
|
|
"valid_targets_min": 451
|
|
},
|
|
{
|
|
"epoch": 0.7524271844660194,
|
|
"grad_norm": 0.5909898166135259,
|
|
"learning_rate": 3.999374205594845e-05,
|
|
"loss": 0.2723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26413339376449585,
|
|
"step": 465,
|
|
"valid_targets_mean": 3644.6,
|
|
"valid_targets_min": 313
|
|
},
|
|
{
|
|
"epoch": 0.7605177993527508,
|
|
"grad_norm": 0.5693299452079705,
|
|
"learning_rate": 3.999156072003646e-05,
|
|
"loss": 0.2814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2781625986099243,
|
|
"step": 470,
|
|
"valid_targets_mean": 3661.7,
|
|
"valid_targets_min": 1014
|
|
},
|
|
{
|
|
"epoch": 0.7686084142394822,
|
|
"grad_norm": 0.5919226456235103,
|
|
"learning_rate": 3.9989053909557576e-05,
|
|
"loss": 0.2776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29603859782218933,
|
|
"step": 475,
|
|
"valid_targets_mean": 3722.9,
|
|
"valid_targets_min": 1030
|
|
},
|
|
{
|
|
"epoch": 0.7766990291262136,
|
|
"grad_norm": 0.5943887354321051,
|
|
"learning_rate": 3.998622166532417e-05,
|
|
"loss": 0.2729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2638964056968689,
|
|
"step": 480,
|
|
"valid_targets_mean": 4079.6,
|
|
"valid_targets_min": 1751
|
|
},
|
|
{
|
|
"epoch": 0.7847896440129449,
|
|
"grad_norm": 0.711842012876838,
|
|
"learning_rate": 3.998306403344688e-05,
|
|
"loss": 0.265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26152002811431885,
|
|
"step": 485,
|
|
"valid_targets_mean": 3510.2,
|
|
"valid_targets_min": 786
|
|
},
|
|
{
|
|
"epoch": 0.7928802588996764,
|
|
"grad_norm": 0.5734682192660957,
|
|
"learning_rate": 3.997958106533383e-05,
|
|
"loss": 0.2831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2649049758911133,
|
|
"step": 490,
|
|
"valid_targets_mean": 3927.6,
|
|
"valid_targets_min": 716
|
|
},
|
|
{
|
|
"epoch": 0.8009708737864077,
|
|
"grad_norm": 0.5361986202564702,
|
|
"learning_rate": 3.997577281768982e-05,
|
|
"loss": 0.2606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2324935793876648,
|
|
"step": 495,
|
|
"valid_targets_mean": 3626.5,
|
|
"valid_targets_min": 1365
|
|
},
|
|
{
|
|
"epoch": 0.8090614886731392,
|
|
"grad_norm": 0.5122712699333561,
|
|
"learning_rate": 3.997163935251543e-05,
|
|
"loss": 0.2933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27357301115989685,
|
|
"step": 500,
|
|
"valid_targets_mean": 4106.0,
|
|
"valid_targets_min": 2489
|
|
},
|
|
{
|
|
"epoch": 0.8171521035598706,
|
|
"grad_norm": 0.6066344337324161,
|
|
"learning_rate": 3.996718073710591e-05,
|
|
"loss": 0.2671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27873921394348145,
|
|
"step": 505,
|
|
"valid_targets_mean": 3503.3,
|
|
"valid_targets_min": 1526
|
|
},
|
|
{
|
|
"epoch": 0.8252427184466019,
|
|
"grad_norm": 0.5284961700363339,
|
|
"learning_rate": 3.9962397044050206e-05,
|
|
"loss": 0.2747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2654161751270294,
|
|
"step": 510,
|
|
"valid_targets_mean": 3800.1,
|
|
"valid_targets_min": 640
|
|
},
|
|
{
|
|
"epoch": 0.8333333333333334,
|
|
"grad_norm": 0.621823217819916,
|
|
"learning_rate": 3.99572883512297e-05,
|
|
"loss": 0.2678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27369263768196106,
|
|
"step": 515,
|
|
"valid_targets_mean": 3488.2,
|
|
"valid_targets_min": 1050
|
|
},
|
|
{
|
|
"epoch": 0.8414239482200647,
|
|
"grad_norm": 0.5267599319786561,
|
|
"learning_rate": 3.9951854741816954e-05,
|
|
"loss": 0.2728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2734227776527405,
|
|
"step": 520,
|
|
"valid_targets_mean": 3832.6,
|
|
"valid_targets_min": 481
|
|
},
|
|
{
|
|
"epoch": 0.8495145631067961,
|
|
"grad_norm": 0.6257107494353524,
|
|
"learning_rate": 3.99460963042744e-05,
|
|
"loss": 0.2693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28572967648506165,
|
|
"step": 525,
|
|
"valid_targets_mean": 3736.1,
|
|
"valid_targets_min": 668
|
|
},
|
|
{
|
|
"epoch": 0.8576051779935275,
|
|
"grad_norm": 1.2714399983105327,
|
|
"learning_rate": 3.994001313235283e-05,
|
|
"loss": 0.2936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.282131552696228,
|
|
"step": 530,
|
|
"valid_targets_mean": 3938.2,
|
|
"valid_targets_min": 1085
|
|
},
|
|
{
|
|
"epoch": 0.8656957928802589,
|
|
"grad_norm": 0.5806714978099429,
|
|
"learning_rate": 3.993360532508993e-05,
|
|
"loss": 0.2718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2508247494697571,
|
|
"step": 535,
|
|
"valid_targets_mean": 3894.8,
|
|
"valid_targets_min": 1526
|
|
},
|
|
{
|
|
"epoch": 0.8737864077669902,
|
|
"grad_norm": 0.5581090898685672,
|
|
"learning_rate": 3.9926872986808626e-05,
|
|
"loss": 0.2638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24472543597221375,
|
|
"step": 540,
|
|
"valid_targets_mean": 3818.2,
|
|
"valid_targets_min": 1048
|
|
},
|
|
{
|
|
"epoch": 0.8818770226537217,
|
|
"grad_norm": 0.602578818433595,
|
|
"learning_rate": 3.991981622711542e-05,
|
|
"loss": 0.2752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27875274419784546,
|
|
"step": 545,
|
|
"valid_targets_mean": 3362.0,
|
|
"valid_targets_min": 451
|
|
},
|
|
{
|
|
"epoch": 0.889967637540453,
|
|
"grad_norm": 0.6000853415599751,
|
|
"learning_rate": 3.991243516089859e-05,
|
|
"loss": 0.2792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28475719690322876,
|
|
"step": 550,
|
|
"valid_targets_mean": 3562.8,
|
|
"valid_targets_min": 494
|
|
},
|
|
{
|
|
"epoch": 0.8980582524271845,
|
|
"grad_norm": 0.6045079118077683,
|
|
"learning_rate": 3.9904729908326295e-05,
|
|
"loss": 0.254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2536866366863251,
|
|
"step": 555,
|
|
"valid_targets_mean": 3200.2,
|
|
"valid_targets_min": 441
|
|
},
|
|
{
|
|
"epoch": 0.9061488673139159,
|
|
"grad_norm": 0.5385268237514557,
|
|
"learning_rate": 3.989670059484465e-05,
|
|
"loss": 0.2616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25445741415023804,
|
|
"step": 560,
|
|
"valid_targets_mean": 3636.5,
|
|
"valid_targets_min": 471
|
|
},
|
|
{
|
|
"epoch": 0.9142394822006472,
|
|
"grad_norm": 0.569538684495337,
|
|
"learning_rate": 3.98883473511757e-05,
|
|
"loss": 0.2682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.259962797164917,
|
|
"step": 565,
|
|
"valid_targets_mean": 3567.1,
|
|
"valid_targets_min": 405
|
|
},
|
|
{
|
|
"epoch": 0.9223300970873787,
|
|
"grad_norm": 0.605779218415475,
|
|
"learning_rate": 3.987967031331523e-05,
|
|
"loss": 0.2738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27796828746795654,
|
|
"step": 570,
|
|
"valid_targets_mean": 3790.8,
|
|
"valid_targets_min": 1266
|
|
},
|
|
{
|
|
"epoch": 0.93042071197411,
|
|
"grad_norm": 0.5218505117582313,
|
|
"learning_rate": 3.987066962253063e-05,
|
|
"loss": 0.2725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26819664239883423,
|
|
"step": 575,
|
|
"valid_targets_mean": 4475.1,
|
|
"valid_targets_min": 2968
|
|
},
|
|
{
|
|
"epoch": 0.9385113268608414,
|
|
"grad_norm": 0.5475939469674147,
|
|
"learning_rate": 3.9861345425358506e-05,
|
|
"loss": 0.2532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2616320252418518,
|
|
"step": 580,
|
|
"valid_targets_mean": 3786.9,
|
|
"valid_targets_min": 1797
|
|
},
|
|
{
|
|
"epoch": 0.9466019417475728,
|
|
"grad_norm": 0.5842375761808629,
|
|
"learning_rate": 3.985169787360238e-05,
|
|
"loss": 0.2576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2507282793521881,
|
|
"step": 585,
|
|
"valid_targets_mean": 3231.1,
|
|
"valid_targets_min": 875
|
|
},
|
|
{
|
|
"epoch": 0.9546925566343042,
|
|
"grad_norm": 0.5858929733036307,
|
|
"learning_rate": 3.984172712433016e-05,
|
|
"loss": 0.2638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26595237851142883,
|
|
"step": 590,
|
|
"valid_targets_mean": 3756.8,
|
|
"valid_targets_min": 432
|
|
},
|
|
{
|
|
"epoch": 0.9627831715210357,
|
|
"grad_norm": 0.562817714467397,
|
|
"learning_rate": 3.983143333987161e-05,
|
|
"loss": 0.2726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2702093720436096,
|
|
"step": 595,
|
|
"valid_targets_mean": 3892.1,
|
|
"valid_targets_min": 885
|
|
},
|
|
{
|
|
"epoch": 0.970873786407767,
|
|
"grad_norm": 0.5674781126145693,
|
|
"learning_rate": 3.98208166878157e-05,
|
|
"loss": 0.2665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24162045121192932,
|
|
"step": 600,
|
|
"valid_targets_mean": 3564.8,
|
|
"valid_targets_min": 662
|
|
},
|
|
{
|
|
"epoch": 0.9789644012944984,
|
|
"grad_norm": 0.6062685894488582,
|
|
"learning_rate": 3.9809877341007865e-05,
|
|
"loss": 0.2622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25364941358566284,
|
|
"step": 605,
|
|
"valid_targets_mean": 3195.0,
|
|
"valid_targets_min": 462
|
|
},
|
|
{
|
|
"epoch": 0.9870550161812298,
|
|
"grad_norm": 0.5787698953603837,
|
|
"learning_rate": 3.979861547754723e-05,
|
|
"loss": 0.2605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2575009763240814,
|
|
"step": 610,
|
|
"valid_targets_mean": 3920.9,
|
|
"valid_targets_min": 1737
|
|
},
|
|
{
|
|
"epoch": 0.9951456310679612,
|
|
"grad_norm": 0.5410984656955982,
|
|
"learning_rate": 3.9787031280783665e-05,
|
|
"loss": 0.2589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24462872743606567,
|
|
"step": 615,
|
|
"valid_targets_mean": 3699.8,
|
|
"valid_targets_min": 370
|
|
},
|
|
{
|
|
"epoch": 1.0032362459546926,
|
|
"grad_norm": 0.5559196648009831,
|
|
"learning_rate": 3.977512493931482e-05,
|
|
"loss": 0.2556,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22372806072235107,
|
|
"step": 620,
|
|
"valid_targets_mean": 3276.6,
|
|
"valid_targets_min": 1253
|
|
},
|
|
{
|
|
"epoch": 1.0113268608414239,
|
|
"grad_norm": 0.562287316511375,
|
|
"learning_rate": 3.976289664698305e-05,
|
|
"loss": 0.262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25618696212768555,
|
|
"step": 625,
|
|
"valid_targets_mean": 3868.9,
|
|
"valid_targets_min": 1350
|
|
},
|
|
{
|
|
"epoch": 1.0194174757281553,
|
|
"grad_norm": 0.5369177782487862,
|
|
"learning_rate": 3.9750346602872275e-05,
|
|
"loss": 0.2524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2368406504392624,
|
|
"step": 630,
|
|
"valid_targets_mean": 3707.1,
|
|
"valid_targets_min": 671
|
|
},
|
|
{
|
|
"epoch": 1.0275080906148868,
|
|
"grad_norm": 0.5804008703361665,
|
|
"learning_rate": 3.973747501130473e-05,
|
|
"loss": 0.2578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25358450412750244,
|
|
"step": 635,
|
|
"valid_targets_mean": 4052.9,
|
|
"valid_targets_min": 468
|
|
},
|
|
{
|
|
"epoch": 1.035598705501618,
|
|
"grad_norm": 0.605778216509064,
|
|
"learning_rate": 3.97242820818376e-05,
|
|
"loss": 0.2365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2552924156188965,
|
|
"step": 640,
|
|
"valid_targets_mean": 3504.3,
|
|
"valid_targets_min": 422
|
|
},
|
|
{
|
|
"epoch": 1.0436893203883495,
|
|
"grad_norm": 0.5654932430709362,
|
|
"learning_rate": 3.9710768029259695e-05,
|
|
"loss": 0.2521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2729843854904175,
|
|
"step": 645,
|
|
"valid_targets_mean": 3681.1,
|
|
"valid_targets_min": 770
|
|
},
|
|
{
|
|
"epoch": 1.051779935275081,
|
|
"grad_norm": 0.7172757360142547,
|
|
"learning_rate": 3.9696933073587864e-05,
|
|
"loss": 0.257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27672040462493896,
|
|
"step": 650,
|
|
"valid_targets_mean": 3507.8,
|
|
"valid_targets_min": 957
|
|
},
|
|
{
|
|
"epoch": 1.0598705501618122,
|
|
"grad_norm": 0.5607347532030501,
|
|
"learning_rate": 3.9682777440063455e-05,
|
|
"loss": 0.2424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24472706019878387,
|
|
"step": 655,
|
|
"valid_targets_mean": 3917.8,
|
|
"valid_targets_min": 2470
|
|
},
|
|
{
|
|
"epoch": 1.0679611650485437,
|
|
"grad_norm": 0.5387121591142974,
|
|
"learning_rate": 3.9668301359148655e-05,
|
|
"loss": 0.2609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31839385628700256,
|
|
"step": 660,
|
|
"valid_targets_mean": 4574.2,
|
|
"valid_targets_min": 1939
|
|
},
|
|
{
|
|
"epoch": 1.0760517799352751,
|
|
"grad_norm": 0.4738915321475537,
|
|
"learning_rate": 3.965350506652272e-05,
|
|
"loss": 0.2386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22641263902187347,
|
|
"step": 665,
|
|
"valid_targets_mean": 4143.0,
|
|
"valid_targets_min": 896
|
|
},
|
|
{
|
|
"epoch": 1.0841423948220066,
|
|
"grad_norm": 0.673765362529483,
|
|
"learning_rate": 3.963838880307815e-05,
|
|
"loss": 0.2409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.253450870513916,
|
|
"step": 670,
|
|
"valid_targets_mean": 3418.4,
|
|
"valid_targets_min": 513
|
|
},
|
|
{
|
|
"epoch": 1.0922330097087378,
|
|
"grad_norm": 0.592144514072726,
|
|
"learning_rate": 3.962295281491674e-05,
|
|
"loss": 0.2521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22054757177829742,
|
|
"step": 675,
|
|
"valid_targets_mean": 3311.9,
|
|
"valid_targets_min": 1647
|
|
},
|
|
{
|
|
"epoch": 1.1003236245954693,
|
|
"grad_norm": 0.5462311520114211,
|
|
"learning_rate": 3.960719735334562e-05,
|
|
"loss": 0.2413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25668200850486755,
|
|
"step": 680,
|
|
"valid_targets_mean": 4145.4,
|
|
"valid_targets_min": 1357
|
|
},
|
|
{
|
|
"epoch": 1.1084142394822007,
|
|
"grad_norm": 0.5596472621487413,
|
|
"learning_rate": 3.959112267487314e-05,
|
|
"loss": 0.2522,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2636863589286804,
|
|
"step": 685,
|
|
"valid_targets_mean": 3808.9,
|
|
"valid_targets_min": 2280
|
|
},
|
|
{
|
|
"epoch": 1.116504854368932,
|
|
"grad_norm": 0.5925165742724042,
|
|
"learning_rate": 3.957472904120467e-05,
|
|
"loss": 0.2457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2322475016117096,
|
|
"step": 690,
|
|
"valid_targets_mean": 3328.8,
|
|
"valid_targets_min": 536
|
|
},
|
|
{
|
|
"epoch": 1.1245954692556634,
|
|
"grad_norm": 0.7318667286756769,
|
|
"learning_rate": 3.955801671923837e-05,
|
|
"loss": 0.2438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2675679326057434,
|
|
"step": 695,
|
|
"valid_targets_mean": 3433.7,
|
|
"valid_targets_min": 630
|
|
},
|
|
{
|
|
"epoch": 1.132686084142395,
|
|
"grad_norm": 0.5473167172657988,
|
|
"learning_rate": 3.954098598106084e-05,
|
|
"loss": 0.2441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26063698530197144,
|
|
"step": 700,
|
|
"valid_targets_mean": 3818.1,
|
|
"valid_targets_min": 718
|
|
},
|
|
{
|
|
"epoch": 1.1407766990291262,
|
|
"grad_norm": 0.5634978237369443,
|
|
"learning_rate": 3.952363710394269e-05,
|
|
"loss": 0.2473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25784575939178467,
|
|
"step": 705,
|
|
"valid_targets_mean": 3970.0,
|
|
"valid_targets_min": 1100
|
|
},
|
|
{
|
|
"epoch": 1.1488673139158576,
|
|
"grad_norm": 0.5655594345426586,
|
|
"learning_rate": 3.9505970370334015e-05,
|
|
"loss": 0.2485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25309622287750244,
|
|
"step": 710,
|
|
"valid_targets_mean": 3924.2,
|
|
"valid_targets_min": 1172
|
|
},
|
|
{
|
|
"epoch": 1.156957928802589,
|
|
"grad_norm": 0.5075142775296616,
|
|
"learning_rate": 3.94879860678598e-05,
|
|
"loss": 0.2538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22384929656982422,
|
|
"step": 715,
|
|
"valid_targets_mean": 3865.6,
|
|
"valid_targets_min": 1426
|
|
},
|
|
{
|
|
"epoch": 1.1650485436893203,
|
|
"grad_norm": 0.5601883688914359,
|
|
"learning_rate": 3.9469684489315256e-05,
|
|
"loss": 0.2473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2752339243888855,
|
|
"step": 720,
|
|
"valid_targets_mean": 4222.7,
|
|
"valid_targets_min": 2497
|
|
},
|
|
{
|
|
"epoch": 1.1731391585760518,
|
|
"grad_norm": 0.5134594549264883,
|
|
"learning_rate": 3.945106593266102e-05,
|
|
"loss": 0.2474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2594758868217468,
|
|
"step": 725,
|
|
"valid_targets_mean": 4206.9,
|
|
"valid_targets_min": 1984
|
|
},
|
|
{
|
|
"epoch": 1.1812297734627832,
|
|
"grad_norm": 0.5798021779182408,
|
|
"learning_rate": 3.943213070101834e-05,
|
|
"loss": 0.245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2584797143936157,
|
|
"step": 730,
|
|
"valid_targets_mean": 3489.6,
|
|
"valid_targets_min": 463
|
|
},
|
|
{
|
|
"epoch": 1.1893203883495145,
|
|
"grad_norm": 0.5348245496682883,
|
|
"learning_rate": 3.941287910266411e-05,
|
|
"loss": 0.2397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23523803055286407,
|
|
"step": 735,
|
|
"valid_targets_mean": 3916.9,
|
|
"valid_targets_min": 423
|
|
},
|
|
{
|
|
"epoch": 1.197411003236246,
|
|
"grad_norm": 0.5847494410182235,
|
|
"learning_rate": 3.9393311451025865e-05,
|
|
"loss": 0.2443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26313185691833496,
|
|
"step": 740,
|
|
"valid_targets_mean": 3422.8,
|
|
"valid_targets_min": 471
|
|
},
|
|
{
|
|
"epoch": 1.2055016181229774,
|
|
"grad_norm": 0.5715879469688606,
|
|
"learning_rate": 3.937342806467668e-05,
|
|
"loss": 0.2357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24966466426849365,
|
|
"step": 745,
|
|
"valid_targets_mean": 3915.3,
|
|
"valid_targets_min": 1299
|
|
},
|
|
{
|
|
"epoch": 1.2135922330097086,
|
|
"grad_norm": 0.5433469085571423,
|
|
"learning_rate": 3.935322926732998e-05,
|
|
"loss": 0.238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.234421506524086,
|
|
"step": 750,
|
|
"valid_targets_mean": 4102.1,
|
|
"valid_targets_min": 1597
|
|
},
|
|
{
|
|
"epoch": 1.22168284789644,
|
|
"grad_norm": 0.5868454178512109,
|
|
"learning_rate": 3.933271538783427e-05,
|
|
"loss": 0.2483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27477163076400757,
|
|
"step": 755,
|
|
"valid_targets_mean": 3842.6,
|
|
"valid_targets_min": 662
|
|
},
|
|
{
|
|
"epoch": 1.2297734627831716,
|
|
"grad_norm": 0.6436982098810824,
|
|
"learning_rate": 3.931188676016777e-05,
|
|
"loss": 0.2408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2649001479148865,
|
|
"step": 760,
|
|
"valid_targets_mean": 3557.1,
|
|
"valid_targets_min": 463
|
|
},
|
|
{
|
|
"epoch": 1.237864077669903,
|
|
"grad_norm": 0.6131515769175564,
|
|
"learning_rate": 3.9290743723433e-05,
|
|
"loss": 0.253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2513100206851959,
|
|
"step": 765,
|
|
"valid_targets_mean": 3496.9,
|
|
"valid_targets_min": 428
|
|
},
|
|
{
|
|
"epoch": 1.2459546925566343,
|
|
"grad_norm": 0.6865052077576037,
|
|
"learning_rate": 3.926928662185126e-05,
|
|
"loss": 0.2445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2739570140838623,
|
|
"step": 770,
|
|
"valid_targets_mean": 3481.2,
|
|
"valid_targets_min": 418
|
|
},
|
|
{
|
|
"epoch": 1.2540453074433657,
|
|
"grad_norm": 0.5799644871312117,
|
|
"learning_rate": 3.924751580475698e-05,
|
|
"loss": 0.2472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2648698687553406,
|
|
"step": 775,
|
|
"valid_targets_mean": 3644.6,
|
|
"valid_targets_min": 910
|
|
},
|
|
{
|
|
"epoch": 1.262135922330097,
|
|
"grad_norm": 0.5318310447138261,
|
|
"learning_rate": 3.922543162659209e-05,
|
|
"loss": 0.2289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20989498496055603,
|
|
"step": 780,
|
|
"valid_targets_mean": 3411.7,
|
|
"valid_targets_min": 1369
|
|
},
|
|
{
|
|
"epoch": 1.2702265372168284,
|
|
"grad_norm": 0.5814600954030472,
|
|
"learning_rate": 3.9203034446900224e-05,
|
|
"loss": 0.2394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24911276996135712,
|
|
"step": 785,
|
|
"valid_targets_mean": 3388.5,
|
|
"valid_targets_min": 1085
|
|
},
|
|
{
|
|
"epoch": 1.27831715210356,
|
|
"grad_norm": 0.5206654650887855,
|
|
"learning_rate": 3.918032463032086e-05,
|
|
"loss": 0.2635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2422664314508438,
|
|
"step": 790,
|
|
"valid_targets_mean": 3730.7,
|
|
"valid_targets_min": 1009
|
|
},
|
|
{
|
|
"epoch": 1.2864077669902914,
|
|
"grad_norm": 0.5964049859480364,
|
|
"learning_rate": 3.9157302546583406e-05,
|
|
"loss": 0.2338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2198229730129242,
|
|
"step": 795,
|
|
"valid_targets_mean": 3327.1,
|
|
"valid_targets_min": 877
|
|
},
|
|
{
|
|
"epoch": 1.2944983818770226,
|
|
"grad_norm": 0.6407156866162471,
|
|
"learning_rate": 3.913396857050115e-05,
|
|
"loss": 0.2557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2676980495452881,
|
|
"step": 800,
|
|
"valid_targets_mean": 3686.9,
|
|
"valid_targets_min": 330
|
|
},
|
|
{
|
|
"epoch": 1.302588996763754,
|
|
"grad_norm": 0.5372612492899006,
|
|
"learning_rate": 3.911032308196518e-05,
|
|
"loss": 0.2372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21625232696533203,
|
|
"step": 805,
|
|
"valid_targets_mean": 3276.7,
|
|
"valid_targets_min": 471
|
|
},
|
|
{
|
|
"epoch": 1.3106796116504853,
|
|
"grad_norm": 0.500246673496057,
|
|
"learning_rate": 3.9086366465938194e-05,
|
|
"loss": 0.2441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22332966327667236,
|
|
"step": 810,
|
|
"valid_targets_mean": 4221.2,
|
|
"valid_targets_min": 1014
|
|
},
|
|
{
|
|
"epoch": 1.3187702265372168,
|
|
"grad_norm": 0.5393069350727855,
|
|
"learning_rate": 3.906209911244823e-05,
|
|
"loss": 0.2328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2488965392112732,
|
|
"step": 815,
|
|
"valid_targets_mean": 3860.0,
|
|
"valid_targets_min": 1522
|
|
},
|
|
{
|
|
"epoch": 1.3268608414239482,
|
|
"grad_norm": 0.558283313672063,
|
|
"learning_rate": 3.903752141658232e-05,
|
|
"loss": 0.2412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24360036849975586,
|
|
"step": 820,
|
|
"valid_targets_mean": 3291.6,
|
|
"valid_targets_min": 450
|
|
},
|
|
{
|
|
"epoch": 1.3349514563106797,
|
|
"grad_norm": 0.527247168235714,
|
|
"learning_rate": 3.9012633778480054e-05,
|
|
"loss": 0.2345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23953786492347717,
|
|
"step": 825,
|
|
"valid_targets_mean": 3723.9,
|
|
"valid_targets_min": 624
|
|
},
|
|
{
|
|
"epoch": 1.343042071197411,
|
|
"grad_norm": 0.5365474363809069,
|
|
"learning_rate": 3.8987436603327064e-05,
|
|
"loss": 0.2405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2417599856853485,
|
|
"step": 830,
|
|
"valid_targets_mean": 3770.5,
|
|
"valid_targets_min": 496
|
|
},
|
|
{
|
|
"epoch": 1.3511326860841424,
|
|
"grad_norm": 0.627416131833293,
|
|
"learning_rate": 3.896193030134844e-05,
|
|
"loss": 0.24,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21616961061954498,
|
|
"step": 835,
|
|
"valid_targets_mean": 3342.2,
|
|
"valid_targets_min": 377
|
|
},
|
|
{
|
|
"epoch": 1.3592233009708738,
|
|
"grad_norm": 0.5201944735490818,
|
|
"learning_rate": 3.893611528780204e-05,
|
|
"loss": 0.2458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24615398049354553,
|
|
"step": 840,
|
|
"valid_targets_mean": 4086.8,
|
|
"valid_targets_min": 2711
|
|
},
|
|
{
|
|
"epoch": 1.367313915857605,
|
|
"grad_norm": 0.49258839979449737,
|
|
"learning_rate": 3.890999198297171e-05,
|
|
"loss": 0.2345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23036456108093262,
|
|
"step": 845,
|
|
"valid_targets_mean": 4122.5,
|
|
"valid_targets_min": 1744
|
|
},
|
|
{
|
|
"epoch": 1.3754045307443366,
|
|
"grad_norm": 0.5777476229346971,
|
|
"learning_rate": 3.888356081216049e-05,
|
|
"loss": 0.2449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24178913235664368,
|
|
"step": 850,
|
|
"valid_targets_mean": 3572.0,
|
|
"valid_targets_min": 523
|
|
},
|
|
{
|
|
"epoch": 1.383495145631068,
|
|
"grad_norm": 0.6128443853720431,
|
|
"learning_rate": 3.8856822205683646e-05,
|
|
"loss": 0.2397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2308482676744461,
|
|
"step": 855,
|
|
"valid_targets_mean": 2943.4,
|
|
"valid_targets_min": 427
|
|
},
|
|
{
|
|
"epoch": 1.3915857605177995,
|
|
"grad_norm": 0.5764652601903296,
|
|
"learning_rate": 3.882977659886169e-05,
|
|
"loss": 0.2436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25092625617980957,
|
|
"step": 860,
|
|
"valid_targets_mean": 3776.6,
|
|
"valid_targets_min": 1586
|
|
},
|
|
{
|
|
"epoch": 1.3996763754045307,
|
|
"grad_norm": 0.5072175975075331,
|
|
"learning_rate": 3.8802424432013283e-05,
|
|
"loss": 0.2405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2366875559091568,
|
|
"step": 865,
|
|
"valid_targets_mean": 4086.3,
|
|
"valid_targets_min": 1855
|
|
},
|
|
{
|
|
"epoch": 1.4077669902912622,
|
|
"grad_norm": 0.5559560078057603,
|
|
"learning_rate": 3.8774766150448055e-05,
|
|
"loss": 0.2404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25183963775634766,
|
|
"step": 870,
|
|
"valid_targets_mean": 3595.1,
|
|
"valid_targets_min": 454
|
|
},
|
|
{
|
|
"epoch": 1.4158576051779934,
|
|
"grad_norm": 0.5350029401989669,
|
|
"learning_rate": 3.874680220445938e-05,
|
|
"loss": 0.2315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22824493050575256,
|
|
"step": 875,
|
|
"valid_targets_mean": 3540.8,
|
|
"valid_targets_min": 579
|
|
},
|
|
{
|
|
"epoch": 1.4239482200647249,
|
|
"grad_norm": 0.5447669496074694,
|
|
"learning_rate": 3.871853304931701e-05,
|
|
"loss": 0.2437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23308587074279785,
|
|
"step": 880,
|
|
"valid_targets_mean": 3631.0,
|
|
"valid_targets_min": 1014
|
|
},
|
|
{
|
|
"epoch": 1.4320388349514563,
|
|
"grad_norm": 0.5411709921199677,
|
|
"learning_rate": 3.8689959145259725e-05,
|
|
"loss": 0.2306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25159168243408203,
|
|
"step": 885,
|
|
"valid_targets_mean": 4113.1,
|
|
"valid_targets_min": 950
|
|
},
|
|
{
|
|
"epoch": 1.4401294498381878,
|
|
"grad_norm": 0.5729522085372347,
|
|
"learning_rate": 3.866108095748776e-05,
|
|
"loss": 0.2436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24209068715572357,
|
|
"step": 890,
|
|
"valid_targets_mean": 3630.1,
|
|
"valid_targets_min": 985
|
|
},
|
|
{
|
|
"epoch": 1.448220064724919,
|
|
"grad_norm": 0.9115507641863799,
|
|
"learning_rate": 3.863189895615529e-05,
|
|
"loss": 0.2375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24850182235240936,
|
|
"step": 895,
|
|
"valid_targets_mean": 3811.0,
|
|
"valid_targets_min": 1259
|
|
},
|
|
{
|
|
"epoch": 1.4563106796116505,
|
|
"grad_norm": 0.5308936337618426,
|
|
"learning_rate": 3.8602413616362745e-05,
|
|
"loss": 0.2445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2650480270385742,
|
|
"step": 900,
|
|
"valid_targets_mean": 4117.1,
|
|
"valid_targets_min": 485
|
|
},
|
|
{
|
|
"epoch": 1.4644012944983817,
|
|
"grad_norm": 0.589828834343643,
|
|
"learning_rate": 3.85726254181491e-05,
|
|
"loss": 0.2326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25212791562080383,
|
|
"step": 905,
|
|
"valid_targets_mean": 3238.6,
|
|
"valid_targets_min": 615
|
|
},
|
|
{
|
|
"epoch": 1.4724919093851132,
|
|
"grad_norm": 0.5452549763443945,
|
|
"learning_rate": 3.8542534846484055e-05,
|
|
"loss": 0.2268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23172008991241455,
|
|
"step": 910,
|
|
"valid_targets_mean": 4015.4,
|
|
"valid_targets_min": 1840
|
|
},
|
|
{
|
|
"epoch": 1.4805825242718447,
|
|
"grad_norm": 0.488860349833199,
|
|
"learning_rate": 3.8512142391260085e-05,
|
|
"loss": 0.2333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21231934428215027,
|
|
"step": 915,
|
|
"valid_targets_mean": 3872.8,
|
|
"valid_targets_min": 1818
|
|
},
|
|
{
|
|
"epoch": 1.4886731391585761,
|
|
"grad_norm": 0.5579335354336327,
|
|
"learning_rate": 3.848144854728457e-05,
|
|
"loss": 0.2362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2260228395462036,
|
|
"step": 920,
|
|
"valid_targets_mean": 3835.0,
|
|
"valid_targets_min": 522
|
|
},
|
|
{
|
|
"epoch": 1.4967637540453074,
|
|
"grad_norm": 0.5381713237133415,
|
|
"learning_rate": 3.8450453814271636e-05,
|
|
"loss": 0.2418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2695598900318146,
|
|
"step": 925,
|
|
"valid_targets_mean": 3745.8,
|
|
"valid_targets_min": 269
|
|
},
|
|
{
|
|
"epoch": 1.5048543689320388,
|
|
"grad_norm": 0.5004455221868256,
|
|
"learning_rate": 3.8419158696834075e-05,
|
|
"loss": 0.2359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2213108390569687,
|
|
"step": 930,
|
|
"valid_targets_mean": 3773.2,
|
|
"valid_targets_min": 374
|
|
},
|
|
{
|
|
"epoch": 1.51294498381877,
|
|
"grad_norm": 0.5946986799926913,
|
|
"learning_rate": 3.8387563704475124e-05,
|
|
"loss": 0.2441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25130200386047363,
|
|
"step": 935,
|
|
"valid_targets_mean": 3319.4,
|
|
"valid_targets_min": 349
|
|
},
|
|
{
|
|
"epoch": 1.5210355987055015,
|
|
"grad_norm": 0.554915642632707,
|
|
"learning_rate": 3.835566935158015e-05,
|
|
"loss": 0.241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2504206895828247,
|
|
"step": 940,
|
|
"valid_targets_mean": 4108.0,
|
|
"valid_targets_min": 538
|
|
},
|
|
{
|
|
"epoch": 1.529126213592233,
|
|
"grad_norm": 0.6107974094646808,
|
|
"learning_rate": 3.8323476157408315e-05,
|
|
"loss": 0.245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2469642460346222,
|
|
"step": 945,
|
|
"valid_targets_mean": 3444.4,
|
|
"valid_targets_min": 405
|
|
},
|
|
{
|
|
"epoch": 1.5372168284789645,
|
|
"grad_norm": 0.48014681884612614,
|
|
"learning_rate": 3.8290984646084056e-05,
|
|
"loss": 0.2243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21410520374774933,
|
|
"step": 950,
|
|
"valid_targets_mean": 3711.6,
|
|
"valid_targets_min": 1014
|
|
},
|
|
{
|
|
"epoch": 1.545307443365696,
|
|
"grad_norm": 0.5852050304300088,
|
|
"learning_rate": 3.825819534658862e-05,
|
|
"loss": 0.2318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2539462745189667,
|
|
"step": 955,
|
|
"valid_targets_mean": 3593.4,
|
|
"valid_targets_min": 494
|
|
},
|
|
{
|
|
"epoch": 1.5533980582524272,
|
|
"grad_norm": 0.5324836100311932,
|
|
"learning_rate": 3.822510879275142e-05,
|
|
"loss": 0.229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2364928424358368,
|
|
"step": 960,
|
|
"valid_targets_mean": 4131.8,
|
|
"valid_targets_min": 749
|
|
},
|
|
{
|
|
"epoch": 1.5614886731391586,
|
|
"grad_norm": 0.5478699189171505,
|
|
"learning_rate": 3.8191725523241346e-05,
|
|
"loss": 0.2431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2546507716178894,
|
|
"step": 965,
|
|
"valid_targets_mean": 3874.2,
|
|
"valid_targets_min": 810
|
|
},
|
|
{
|
|
"epoch": 1.5695792880258899,
|
|
"grad_norm": 0.5661577436107965,
|
|
"learning_rate": 3.8158046081557986e-05,
|
|
"loss": 0.24,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24910283088684082,
|
|
"step": 970,
|
|
"valid_targets_mean": 3499.5,
|
|
"valid_targets_min": 433
|
|
},
|
|
{
|
|
"epoch": 1.5776699029126213,
|
|
"grad_norm": 0.5514241647506224,
|
|
"learning_rate": 3.812407101602281e-05,
|
|
"loss": 0.2295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2755923271179199,
|
|
"step": 975,
|
|
"valid_targets_mean": 4040.9,
|
|
"valid_targets_min": 1763
|
|
},
|
|
{
|
|
"epoch": 1.5857605177993528,
|
|
"grad_norm": 0.5112749027291317,
|
|
"learning_rate": 3.808980087977019e-05,
|
|
"loss": 0.2346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22368744015693665,
|
|
"step": 980,
|
|
"valid_targets_mean": 3874.2,
|
|
"valid_targets_min": 1686
|
|
},
|
|
{
|
|
"epoch": 1.5938511326860842,
|
|
"grad_norm": 0.5289695797679198,
|
|
"learning_rate": 3.8055236230738456e-05,
|
|
"loss": 0.2386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2390768676996231,
|
|
"step": 985,
|
|
"valid_targets_mean": 3974.6,
|
|
"valid_targets_min": 503
|
|
},
|
|
{
|
|
"epoch": 1.6019417475728155,
|
|
"grad_norm": 0.5705928004093642,
|
|
"learning_rate": 3.8020377631660756e-05,
|
|
"loss": 0.246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2246730476617813,
|
|
"step": 990,
|
|
"valid_targets_mean": 3101.2,
|
|
"valid_targets_min": 377
|
|
},
|
|
{
|
|
"epoch": 1.610032362459547,
|
|
"grad_norm": 0.7386498116488778,
|
|
"learning_rate": 3.7985225650055956e-05,
|
|
"loss": 0.243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2365446835756302,
|
|
"step": 995,
|
|
"valid_targets_mean": 3654.5,
|
|
"valid_targets_min": 370
|
|
},
|
|
{
|
|
"epoch": 1.6181229773462782,
|
|
"grad_norm": 0.5552219499272047,
|
|
"learning_rate": 3.794978085821933e-05,
|
|
"loss": 0.2346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.251254141330719,
|
|
"step": 1000,
|
|
"valid_targets_mean": 3474.9,
|
|
"valid_targets_min": 419
|
|
},
|
|
{
|
|
"epoch": 1.6262135922330097,
|
|
"grad_norm": 0.5607678761522751,
|
|
"learning_rate": 3.7914043833213294e-05,
|
|
"loss": 0.2303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2405933141708374,
|
|
"step": 1005,
|
|
"valid_targets_mean": 3489.9,
|
|
"valid_targets_min": 834
|
|
},
|
|
{
|
|
"epoch": 1.6343042071197411,
|
|
"grad_norm": 0.5527801381980166,
|
|
"learning_rate": 3.787801515685799e-05,
|
|
"loss": 0.2368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2157033085823059,
|
|
"step": 1010,
|
|
"valid_targets_mean": 3482.9,
|
|
"valid_targets_min": 1790
|
|
},
|
|
{
|
|
"epoch": 1.6423948220064726,
|
|
"grad_norm": 0.5352962063119632,
|
|
"learning_rate": 3.7841695415721826e-05,
|
|
"loss": 0.2308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24081295728683472,
|
|
"step": 1015,
|
|
"valid_targets_mean": 4187.8,
|
|
"valid_targets_min": 470
|
|
},
|
|
{
|
|
"epoch": 1.650485436893204,
|
|
"grad_norm": 0.5789057849731076,
|
|
"learning_rate": 3.78050852011119e-05,
|
|
"loss": 0.2387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25976818799972534,
|
|
"step": 1020,
|
|
"valid_targets_mean": 3401.8,
|
|
"valid_targets_min": 957
|
|
},
|
|
{
|
|
"epoch": 1.6585760517799353,
|
|
"grad_norm": 0.4954056092036578,
|
|
"learning_rate": 3.776818510906442e-05,
|
|
"loss": 0.2179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23031404614448547,
|
|
"step": 1025,
|
|
"valid_targets_mean": 4148.9,
|
|
"valid_targets_min": 2287
|
|
},
|
|
{
|
|
"epoch": 1.6666666666666665,
|
|
"grad_norm": 0.6286314409012811,
|
|
"learning_rate": 3.773099574033495e-05,
|
|
"loss": 0.2334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23707111179828644,
|
|
"step": 1030,
|
|
"valid_targets_mean": 3307.8,
|
|
"valid_targets_min": 672
|
|
},
|
|
{
|
|
"epoch": 1.674757281553398,
|
|
"grad_norm": 0.6553707458396025,
|
|
"learning_rate": 3.7693517700388655e-05,
|
|
"loss": 0.2301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23486413061618805,
|
|
"step": 1035,
|
|
"valid_targets_mean": 4047.2,
|
|
"valid_targets_min": 800
|
|
},
|
|
{
|
|
"epoch": 1.6828478964401294,
|
|
"grad_norm": 0.5934097873043159,
|
|
"learning_rate": 3.765575159939045e-05,
|
|
"loss": 0.2404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2771807610988617,
|
|
"step": 1040,
|
|
"valid_targets_mean": 3773.2,
|
|
"valid_targets_min": 507
|
|
},
|
|
{
|
|
"epoch": 1.690938511326861,
|
|
"grad_norm": 0.5378081205172018,
|
|
"learning_rate": 3.761769805219505e-05,
|
|
"loss": 0.2302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22613410651683807,
|
|
"step": 1045,
|
|
"valid_targets_mean": 3719.8,
|
|
"valid_targets_min": 1457
|
|
},
|
|
{
|
|
"epoch": 1.6990291262135924,
|
|
"grad_norm": 0.520359262056707,
|
|
"learning_rate": 3.757935767833696e-05,
|
|
"loss": 0.2311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22133636474609375,
|
|
"step": 1050,
|
|
"valid_targets_mean": 3763.7,
|
|
"valid_targets_min": 456
|
|
},
|
|
{
|
|
"epoch": 1.7071197411003236,
|
|
"grad_norm": 0.5008194986466029,
|
|
"learning_rate": 3.7540731102020405e-05,
|
|
"loss": 0.2326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22984489798545837,
|
|
"step": 1055,
|
|
"valid_targets_mean": 3929.0,
|
|
"valid_targets_min": 909
|
|
},
|
|
{
|
|
"epoch": 1.715210355987055,
|
|
"grad_norm": 0.5838525301390869,
|
|
"learning_rate": 3.7501818952109156e-05,
|
|
"loss": 0.2403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22893069684505463,
|
|
"step": 1060,
|
|
"valid_targets_mean": 3512.1,
|
|
"valid_targets_min": 739
|
|
},
|
|
{
|
|
"epoch": 1.7233009708737863,
|
|
"grad_norm": 1.3176771422050513,
|
|
"learning_rate": 3.746262186211629e-05,
|
|
"loss": 0.2381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24603205919265747,
|
|
"step": 1065,
|
|
"valid_targets_mean": 3416.9,
|
|
"valid_targets_min": 480
|
|
},
|
|
{
|
|
"epoch": 1.7313915857605178,
|
|
"grad_norm": 0.49404756439683184,
|
|
"learning_rate": 3.742314047019386e-05,
|
|
"loss": 0.2368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22380509972572327,
|
|
"step": 1070,
|
|
"valid_targets_mean": 3805.4,
|
|
"valid_targets_min": 259
|
|
},
|
|
{
|
|
"epoch": 1.7394822006472492,
|
|
"grad_norm": 0.5236273776513967,
|
|
"learning_rate": 3.7383375419122565e-05,
|
|
"loss": 0.2263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21787169575691223,
|
|
"step": 1075,
|
|
"valid_targets_mean": 3729.4,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 1.7475728155339807,
|
|
"grad_norm": 0.5366758234665157,
|
|
"learning_rate": 3.734332735630121e-05,
|
|
"loss": 0.2295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22697246074676514,
|
|
"step": 1080,
|
|
"valid_targets_mean": 3726.1,
|
|
"valid_targets_min": 460
|
|
},
|
|
{
|
|
"epoch": 1.755663430420712,
|
|
"grad_norm": 0.5880569909763975,
|
|
"learning_rate": 3.730299693373622e-05,
|
|
"loss": 0.2369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24402067065238953,
|
|
"step": 1085,
|
|
"valid_targets_mean": 3389.1,
|
|
"valid_targets_min": 551
|
|
},
|
|
{
|
|
"epoch": 1.7637540453074434,
|
|
"grad_norm": 0.5732738836935737,
|
|
"learning_rate": 3.7262384808031004e-05,
|
|
"loss": 0.2386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23770153522491455,
|
|
"step": 1090,
|
|
"valid_targets_mean": 3941.0,
|
|
"valid_targets_min": 2001
|
|
},
|
|
{
|
|
"epoch": 1.7718446601941746,
|
|
"grad_norm": 0.5790818828962175,
|
|
"learning_rate": 3.722149164037525e-05,
|
|
"loss": 0.2328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2508291006088257,
|
|
"step": 1095,
|
|
"valid_targets_mean": 3566.1,
|
|
"valid_targets_min": 872
|
|
},
|
|
{
|
|
"epoch": 1.779935275080906,
|
|
"grad_norm": 0.5588093189627107,
|
|
"learning_rate": 3.718031809653419e-05,
|
|
"loss": 0.2316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2560824155807495,
|
|
"step": 1100,
|
|
"valid_targets_mean": 4033.4,
|
|
"valid_targets_min": 1396
|
|
},
|
|
{
|
|
"epoch": 1.7880258899676376,
|
|
"grad_norm": 0.5358396313208293,
|
|
"learning_rate": 3.713886484683776e-05,
|
|
"loss": 0.2271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2311561405658722,
|
|
"step": 1105,
|
|
"valid_targets_mean": 3314.1,
|
|
"valid_targets_min": 372
|
|
},
|
|
{
|
|
"epoch": 1.796116504854369,
|
|
"grad_norm": 0.5222906484183111,
|
|
"learning_rate": 3.7097132566169644e-05,
|
|
"loss": 0.2446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2244446873664856,
|
|
"step": 1110,
|
|
"valid_targets_mean": 3713.9,
|
|
"valid_targets_min": 599
|
|
},
|
|
{
|
|
"epoch": 1.8042071197411005,
|
|
"grad_norm": 0.5719814501478453,
|
|
"learning_rate": 3.7055121933956344e-05,
|
|
"loss": 0.233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24464090168476105,
|
|
"step": 1115,
|
|
"valid_targets_mean": 3385.4,
|
|
"valid_targets_min": 1076
|
|
},
|
|
{
|
|
"epoch": 1.8122977346278317,
|
|
"grad_norm": 0.49942174241821524,
|
|
"learning_rate": 3.7012833634156074e-05,
|
|
"loss": 0.2284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21926362812519073,
|
|
"step": 1120,
|
|
"valid_targets_mean": 3780.5,
|
|
"valid_targets_min": 1138
|
|
},
|
|
{
|
|
"epoch": 1.820388349514563,
|
|
"grad_norm": 0.5236595180717353,
|
|
"learning_rate": 3.6970268355247664e-05,
|
|
"loss": 0.2257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23075798153877258,
|
|
"step": 1125,
|
|
"valid_targets_mean": 4367.0,
|
|
"valid_targets_min": 2408
|
|
},
|
|
{
|
|
"epoch": 1.8284789644012944,
|
|
"grad_norm": 0.512201160936164,
|
|
"learning_rate": 3.6927426790219296e-05,
|
|
"loss": 0.2275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22904011607170105,
|
|
"step": 1130,
|
|
"valid_targets_mean": 3974.8,
|
|
"valid_targets_min": 430
|
|
},
|
|
{
|
|
"epoch": 1.8365695792880259,
|
|
"grad_norm": 0.5421531914600399,
|
|
"learning_rate": 3.6884309636557294e-05,
|
|
"loss": 0.2289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23919343948364258,
|
|
"step": 1135,
|
|
"valid_targets_mean": 4035.8,
|
|
"valid_targets_min": 613
|
|
},
|
|
{
|
|
"epoch": 1.8446601941747574,
|
|
"grad_norm": 0.5030012800087369,
|
|
"learning_rate": 3.6840917596234706e-05,
|
|
"loss": 0.2294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21478207409381866,
|
|
"step": 1140,
|
|
"valid_targets_mean": 3792.1,
|
|
"valid_targets_min": 1940
|
|
},
|
|
{
|
|
"epoch": 1.8527508090614888,
|
|
"grad_norm": 0.4942511653414622,
|
|
"learning_rate": 3.67972513756999e-05,
|
|
"loss": 0.2313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22776880860328674,
|
|
"step": 1145,
|
|
"valid_targets_mean": 4292.5,
|
|
"valid_targets_min": 3142
|
|
},
|
|
{
|
|
"epoch": 1.86084142394822,
|
|
"grad_norm": 0.5531908060139232,
|
|
"learning_rate": 3.675331168586507e-05,
|
|
"loss": 0.2469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2260960340499878,
|
|
"step": 1150,
|
|
"valid_targets_mean": 3819.9,
|
|
"valid_targets_min": 2287
|
|
},
|
|
{
|
|
"epoch": 1.8689320388349513,
|
|
"grad_norm": 0.5470369953036037,
|
|
"learning_rate": 3.670909924209464e-05,
|
|
"loss": 0.2228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2212294042110443,
|
|
"step": 1155,
|
|
"valid_targets_mean": 3313.6,
|
|
"valid_targets_min": 1344
|
|
},
|
|
{
|
|
"epoch": 1.8770226537216828,
|
|
"grad_norm": 0.6663201618271906,
|
|
"learning_rate": 3.666461476419366e-05,
|
|
"loss": 0.2394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22908449172973633,
|
|
"step": 1160,
|
|
"valid_targets_mean": 3906.2,
|
|
"valid_targets_min": 1558
|
|
},
|
|
{
|
|
"epoch": 1.8851132686084142,
|
|
"grad_norm": 0.5574954468183086,
|
|
"learning_rate": 3.6619858976396024e-05,
|
|
"loss": 0.2272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2327789068222046,
|
|
"step": 1165,
|
|
"valid_targets_mean": 3522.2,
|
|
"valid_targets_min": 431
|
|
},
|
|
{
|
|
"epoch": 1.8932038834951457,
|
|
"grad_norm": 0.5620174972243343,
|
|
"learning_rate": 3.657483260735274e-05,
|
|
"loss": 0.2472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24146509170532227,
|
|
"step": 1170,
|
|
"valid_targets_mean": 3762.1,
|
|
"valid_targets_min": 488
|
|
},
|
|
{
|
|
"epoch": 1.9012944983818771,
|
|
"grad_norm": 0.5361897984263154,
|
|
"learning_rate": 3.652953639012001e-05,
|
|
"loss": 0.2321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23045086860656738,
|
|
"step": 1175,
|
|
"valid_targets_mean": 3549.8,
|
|
"valid_targets_min": 599
|
|
},
|
|
{
|
|
"epoch": 1.9093851132686084,
|
|
"grad_norm": 0.5229274187192743,
|
|
"learning_rate": 3.648397106214737e-05,
|
|
"loss": 0.2347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23464813828468323,
|
|
"step": 1180,
|
|
"valid_targets_mean": 3790.5,
|
|
"valid_targets_min": 509
|
|
},
|
|
{
|
|
"epoch": 1.9174757281553398,
|
|
"grad_norm": 0.5543253937763715,
|
|
"learning_rate": 3.6438137365265605e-05,
|
|
"loss": 0.2289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2228466123342514,
|
|
"step": 1185,
|
|
"valid_targets_mean": 3626.7,
|
|
"valid_targets_min": 1124
|
|
},
|
|
{
|
|
"epoch": 1.925566343042071,
|
|
"grad_norm": 0.529511456822995,
|
|
"learning_rate": 3.639203604567471e-05,
|
|
"loss": 0.227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24794839322566986,
|
|
"step": 1190,
|
|
"valid_targets_mean": 3834.1,
|
|
"valid_targets_min": 1399
|
|
},
|
|
{
|
|
"epoch": 1.9336569579288025,
|
|
"grad_norm": 0.531993228725407,
|
|
"learning_rate": 3.634566785393175e-05,
|
|
"loss": 0.2383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25982001423835754,
|
|
"step": 1195,
|
|
"valid_targets_mean": 4059.9,
|
|
"valid_targets_min": 436
|
|
},
|
|
{
|
|
"epoch": 1.941747572815534,
|
|
"grad_norm": 0.5265102733273151,
|
|
"learning_rate": 3.629903354493863e-05,
|
|
"loss": 0.232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2254936546087265,
|
|
"step": 1200,
|
|
"valid_targets_mean": 3641.8,
|
|
"valid_targets_min": 364
|
|
},
|
|
{
|
|
"epoch": 1.9498381877022655,
|
|
"grad_norm": 0.501456617132805,
|
|
"learning_rate": 3.625213387792979e-05,
|
|
"loss": 0.2327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2376459538936615,
|
|
"step": 1205,
|
|
"valid_targets_mean": 4138.9,
|
|
"valid_targets_min": 816
|
|
},
|
|
{
|
|
"epoch": 1.9579288025889967,
|
|
"grad_norm": 0.5764064629638148,
|
|
"learning_rate": 3.620496961645986e-05,
|
|
"loss": 0.2295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2591501772403717,
|
|
"step": 1210,
|
|
"valid_targets_mean": 3676.3,
|
|
"valid_targets_min": 1492
|
|
},
|
|
{
|
|
"epoch": 1.9660194174757282,
|
|
"grad_norm": 0.5031251840065841,
|
|
"learning_rate": 3.615754152839122e-05,
|
|
"loss": 0.2292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22879642248153687,
|
|
"step": 1215,
|
|
"valid_targets_mean": 4046.3,
|
|
"valid_targets_min": 2065
|
|
},
|
|
{
|
|
"epoch": 1.9741100323624594,
|
|
"grad_norm": 0.5059144405997464,
|
|
"learning_rate": 3.610985038588153e-05,
|
|
"loss": 0.2236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19675277173519135,
|
|
"step": 1220,
|
|
"valid_targets_mean": 3762.8,
|
|
"valid_targets_min": 1268
|
|
},
|
|
{
|
|
"epoch": 1.9822006472491909,
|
|
"grad_norm": 0.5147056567182494,
|
|
"learning_rate": 3.606189696537112e-05,
|
|
"loss": 0.2272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22215335071086884,
|
|
"step": 1225,
|
|
"valid_targets_mean": 4182.2,
|
|
"valid_targets_min": 2542
|
|
},
|
|
{
|
|
"epoch": 1.9902912621359223,
|
|
"grad_norm": 0.4962653381732891,
|
|
"learning_rate": 3.601368204757034e-05,
|
|
"loss": 0.2187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21097412705421448,
|
|
"step": 1230,
|
|
"valid_targets_mean": 4162.3,
|
|
"valid_targets_min": 1656
|
|
},
|
|
{
|
|
"epoch": 1.9983818770226538,
|
|
"grad_norm": 0.577252598785239,
|
|
"learning_rate": 3.596520641744691e-05,
|
|
"loss": 0.2244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23722471296787262,
|
|
"step": 1235,
|
|
"valid_targets_mean": 3291.0,
|
|
"valid_targets_min": 454
|
|
},
|
|
{
|
|
"epoch": 2.0064724919093853,
|
|
"grad_norm": 0.4993415945161253,
|
|
"learning_rate": 3.591647086421308e-05,
|
|
"loss": 0.2137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21230442821979523,
|
|
"step": 1240,
|
|
"valid_targets_mean": 3964.4,
|
|
"valid_targets_min": 2176
|
|
},
|
|
{
|
|
"epoch": 2.0145631067961167,
|
|
"grad_norm": 0.5966427219749822,
|
|
"learning_rate": 3.586747618131281e-05,
|
|
"loss": 0.2045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23013052344322205,
|
|
"step": 1245,
|
|
"valid_targets_mean": 3509.5,
|
|
"valid_targets_min": 478
|
|
},
|
|
{
|
|
"epoch": 2.0226537216828477,
|
|
"grad_norm": 0.5074745492260596,
|
|
"learning_rate": 3.581822316640884e-05,
|
|
"loss": 0.1965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18717864155769348,
|
|
"step": 1250,
|
|
"valid_targets_mean": 3663.1,
|
|
"valid_targets_min": 1630
|
|
},
|
|
{
|
|
"epoch": 2.030744336569579,
|
|
"grad_norm": 0.5173949946365176,
|
|
"learning_rate": 3.5768712621369724e-05,
|
|
"loss": 0.1974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18645232915878296,
|
|
"step": 1255,
|
|
"valid_targets_mean": 3951.8,
|
|
"valid_targets_min": 1333
|
|
},
|
|
{
|
|
"epoch": 2.0388349514563107,
|
|
"grad_norm": 0.5448045943609541,
|
|
"learning_rate": 3.571894535225674e-05,
|
|
"loss": 0.1983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1799754500389099,
|
|
"step": 1260,
|
|
"valid_targets_mean": 3515.6,
|
|
"valid_targets_min": 775
|
|
},
|
|
{
|
|
"epoch": 2.046925566343042,
|
|
"grad_norm": 0.5746768972560183,
|
|
"learning_rate": 3.566892216931081e-05,
|
|
"loss": 0.2061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19511979818344116,
|
|
"step": 1265,
|
|
"valid_targets_mean": 3279.9,
|
|
"valid_targets_min": 551
|
|
},
|
|
{
|
|
"epoch": 2.0550161812297736,
|
|
"grad_norm": 0.5648116924397264,
|
|
"learning_rate": 3.561864388693926e-05,
|
|
"loss": 0.2071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2113444060087204,
|
|
"step": 1270,
|
|
"valid_targets_mean": 3652.9,
|
|
"valid_targets_min": 1138
|
|
},
|
|
{
|
|
"epoch": 2.063106796116505,
|
|
"grad_norm": 0.5516073010997726,
|
|
"learning_rate": 3.556811132370261e-05,
|
|
"loss": 0.2095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20847252011299133,
|
|
"step": 1275,
|
|
"valid_targets_mean": 3552.1,
|
|
"valid_targets_min": 499
|
|
},
|
|
{
|
|
"epoch": 2.071197411003236,
|
|
"grad_norm": 0.7203895860407499,
|
|
"learning_rate": 3.551732530230123e-05,
|
|
"loss": 0.2194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21450018882751465,
|
|
"step": 1280,
|
|
"valid_targets_mean": 3142.6,
|
|
"valid_targets_min": 470
|
|
},
|
|
{
|
|
"epoch": 2.0792880258899675,
|
|
"grad_norm": 0.8756711994922913,
|
|
"learning_rate": 3.54662866495619e-05,
|
|
"loss": 0.2029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18415921926498413,
|
|
"step": 1285,
|
|
"valid_targets_mean": 3720.0,
|
|
"valid_targets_min": 1326
|
|
},
|
|
{
|
|
"epoch": 2.087378640776699,
|
|
"grad_norm": 0.5096759527620363,
|
|
"learning_rate": 3.5414996196424435e-05,
|
|
"loss": 0.2088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18555939197540283,
|
|
"step": 1290,
|
|
"valid_targets_mean": 3764.2,
|
|
"valid_targets_min": 533
|
|
},
|
|
{
|
|
"epoch": 2.0954692556634305,
|
|
"grad_norm": 0.5196043558651653,
|
|
"learning_rate": 3.536345477792809e-05,
|
|
"loss": 0.211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21714207530021667,
|
|
"step": 1295,
|
|
"valid_targets_mean": 4167.4,
|
|
"valid_targets_min": 1319
|
|
},
|
|
{
|
|
"epoch": 2.103559870550162,
|
|
"grad_norm": 0.5198619298874421,
|
|
"learning_rate": 3.5311663233197984e-05,
|
|
"loss": 0.1999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21272701025009155,
|
|
"step": 1300,
|
|
"valid_targets_mean": 3971.4,
|
|
"valid_targets_min": 2091
|
|
},
|
|
{
|
|
"epoch": 2.1116504854368934,
|
|
"grad_norm": 0.5928562565695267,
|
|
"learning_rate": 3.525962240543144e-05,
|
|
"loss": 0.2152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22063596546649933,
|
|
"step": 1305,
|
|
"valid_targets_mean": 3416.7,
|
|
"valid_targets_min": 607
|
|
},
|
|
{
|
|
"epoch": 2.1197411003236244,
|
|
"grad_norm": 0.5216255221584896,
|
|
"learning_rate": 3.520733314188429e-05,
|
|
"loss": 0.2193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21724799275398254,
|
|
"step": 1310,
|
|
"valid_targets_mean": 4127.8,
|
|
"valid_targets_min": 2446
|
|
},
|
|
{
|
|
"epoch": 2.127831715210356,
|
|
"grad_norm": 0.5474600583613157,
|
|
"learning_rate": 3.515479629385699e-05,
|
|
"loss": 0.2075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20810237526893616,
|
|
"step": 1315,
|
|
"valid_targets_mean": 3830.8,
|
|
"valid_targets_min": 834
|
|
},
|
|
{
|
|
"epoch": 2.1359223300970873,
|
|
"grad_norm": 0.48930392242915055,
|
|
"learning_rate": 3.5102012716680875e-05,
|
|
"loss": 0.1991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19523948431015015,
|
|
"step": 1320,
|
|
"valid_targets_mean": 4223.8,
|
|
"valid_targets_min": 2806
|
|
},
|
|
{
|
|
"epoch": 2.144012944983819,
|
|
"grad_norm": 0.5866244137062765,
|
|
"learning_rate": 3.504898326970414e-05,
|
|
"loss": 0.21,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.200786292552948,
|
|
"step": 1325,
|
|
"valid_targets_mean": 3740.6,
|
|
"valid_targets_min": 479
|
|
},
|
|
{
|
|
"epoch": 2.1521035598705502,
|
|
"grad_norm": 0.546507028724296,
|
|
"learning_rate": 3.499570881627791e-05,
|
|
"loss": 0.2036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20447948575019836,
|
|
"step": 1330,
|
|
"valid_targets_mean": 4271.7,
|
|
"valid_targets_min": 2185
|
|
},
|
|
{
|
|
"epoch": 2.1601941747572817,
|
|
"grad_norm": 0.5501111945463919,
|
|
"learning_rate": 3.494219022374215e-05,
|
|
"loss": 0.1963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17158368229866028,
|
|
"step": 1335,
|
|
"valid_targets_mean": 3503.9,
|
|
"valid_targets_min": 1555
|
|
},
|
|
{
|
|
"epoch": 2.168284789644013,
|
|
"grad_norm": 0.5332981963079229,
|
|
"learning_rate": 3.4888428363411564e-05,
|
|
"loss": 0.2126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2065584659576416,
|
|
"step": 1340,
|
|
"valid_targets_mean": 3692.7,
|
|
"valid_targets_min": 1725
|
|
},
|
|
{
|
|
"epoch": 2.176375404530744,
|
|
"grad_norm": 0.5227522173533274,
|
|
"learning_rate": 3.48344241105614e-05,
|
|
"loss": 0.2054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17856690287590027,
|
|
"step": 1345,
|
|
"valid_targets_mean": 3659.4,
|
|
"valid_targets_min": 428
|
|
},
|
|
{
|
|
"epoch": 2.1844660194174756,
|
|
"grad_norm": 0.5249981570603643,
|
|
"learning_rate": 3.478017834441319e-05,
|
|
"loss": 0.203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2090633660554886,
|
|
"step": 1350,
|
|
"valid_targets_mean": 3779.1,
|
|
"valid_targets_min": 1938
|
|
},
|
|
{
|
|
"epoch": 2.192556634304207,
|
|
"grad_norm": 0.5704929170229348,
|
|
"learning_rate": 3.472569194812045e-05,
|
|
"loss": 0.2067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20181623101234436,
|
|
"step": 1355,
|
|
"valid_targets_mean": 3276.9,
|
|
"valid_targets_min": 469
|
|
},
|
|
{
|
|
"epoch": 2.2006472491909386,
|
|
"grad_norm": 0.5588805782852425,
|
|
"learning_rate": 3.467096580875432e-05,
|
|
"loss": 0.2205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22426563501358032,
|
|
"step": 1360,
|
|
"valid_targets_mean": 3875.8,
|
|
"valid_targets_min": 1872
|
|
},
|
|
{
|
|
"epoch": 2.20873786407767,
|
|
"grad_norm": 0.5509007102722255,
|
|
"learning_rate": 3.4616000817289076e-05,
|
|
"loss": 0.2017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19825254380702972,
|
|
"step": 1365,
|
|
"valid_targets_mean": 3587.5,
|
|
"valid_targets_min": 503
|
|
},
|
|
{
|
|
"epoch": 2.2168284789644015,
|
|
"grad_norm": 0.5222770840734019,
|
|
"learning_rate": 3.456079786858766e-05,
|
|
"loss": 0.1953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18225571513175964,
|
|
"step": 1370,
|
|
"valid_targets_mean": 3472.8,
|
|
"valid_targets_min": 421
|
|
},
|
|
{
|
|
"epoch": 2.2249190938511325,
|
|
"grad_norm": 0.5335762760883815,
|
|
"learning_rate": 3.450535786138709e-05,
|
|
"loss": 0.2062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2157287895679474,
|
|
"step": 1375,
|
|
"valid_targets_mean": 4006.0,
|
|
"valid_targets_min": 810
|
|
},
|
|
{
|
|
"epoch": 2.233009708737864,
|
|
"grad_norm": 0.5911174264119916,
|
|
"learning_rate": 3.4449681698283856e-05,
|
|
"loss": 0.2241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22825801372528076,
|
|
"step": 1380,
|
|
"valid_targets_mean": 3319.5,
|
|
"valid_targets_min": 433
|
|
},
|
|
{
|
|
"epoch": 2.2411003236245954,
|
|
"grad_norm": 0.5582155327413821,
|
|
"learning_rate": 3.4393770285719196e-05,
|
|
"loss": 0.2028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18830609321594238,
|
|
"step": 1385,
|
|
"valid_targets_mean": 3346.9,
|
|
"valid_targets_min": 409
|
|
},
|
|
{
|
|
"epoch": 2.249190938511327,
|
|
"grad_norm": 0.5048820994486662,
|
|
"learning_rate": 3.433762453396434e-05,
|
|
"loss": 0.2062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18742072582244873,
|
|
"step": 1390,
|
|
"valid_targets_mean": 4147.5,
|
|
"valid_targets_min": 456
|
|
},
|
|
{
|
|
"epoch": 2.2572815533980584,
|
|
"grad_norm": 0.5410154491099326,
|
|
"learning_rate": 3.4281245357105726e-05,
|
|
"loss": 0.2033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20132315158843994,
|
|
"step": 1395,
|
|
"valid_targets_mean": 3883.0,
|
|
"valid_targets_min": 1641
|
|
},
|
|
{
|
|
"epoch": 2.26537216828479,
|
|
"grad_norm": 0.5709057031867241,
|
|
"learning_rate": 3.4224633673030074e-05,
|
|
"loss": 0.2069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20349285006523132,
|
|
"step": 1400,
|
|
"valid_targets_mean": 3687.4,
|
|
"valid_targets_min": 1259
|
|
},
|
|
{
|
|
"epoch": 2.273462783171521,
|
|
"grad_norm": 0.5195581585764436,
|
|
"learning_rate": 3.416779040340945e-05,
|
|
"loss": 0.2041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18851682543754578,
|
|
"step": 1405,
|
|
"valid_targets_mean": 3685.3,
|
|
"valid_targets_min": 547
|
|
},
|
|
{
|
|
"epoch": 2.2815533980582523,
|
|
"grad_norm": 0.6363313435467606,
|
|
"learning_rate": 3.4110716473686305e-05,
|
|
"loss": 0.2,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2130783498287201,
|
|
"step": 1410,
|
|
"valid_targets_mean": 3862.9,
|
|
"valid_targets_min": 479
|
|
},
|
|
{
|
|
"epoch": 2.2896440129449838,
|
|
"grad_norm": 0.5643587316981776,
|
|
"learning_rate": 3.405341281305835e-05,
|
|
"loss": 0.1994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18532046675682068,
|
|
"step": 1415,
|
|
"valid_targets_mean": 3771.4,
|
|
"valid_targets_min": 1340
|
|
},
|
|
{
|
|
"epoch": 2.2977346278317152,
|
|
"grad_norm": 0.6002935838348056,
|
|
"learning_rate": 3.3995880354463444e-05,
|
|
"loss": 0.1981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19355767965316772,
|
|
"step": 1420,
|
|
"valid_targets_mean": 3108.6,
|
|
"valid_targets_min": 457
|
|
},
|
|
{
|
|
"epoch": 2.3058252427184467,
|
|
"grad_norm": 0.6594173942377517,
|
|
"learning_rate": 3.393812003456444e-05,
|
|
"loss": 0.1975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1892772614955902,
|
|
"step": 1425,
|
|
"valid_targets_mean": 3523.7,
|
|
"valid_targets_min": 419
|
|
},
|
|
{
|
|
"epoch": 2.313915857605178,
|
|
"grad_norm": 0.5392073312759366,
|
|
"learning_rate": 3.3880132793733896e-05,
|
|
"loss": 0.2049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17127923667430878,
|
|
"step": 1430,
|
|
"valid_targets_mean": 3360.4,
|
|
"valid_targets_min": 909
|
|
},
|
|
{
|
|
"epoch": 2.3220064724919096,
|
|
"grad_norm": 0.6041744216730695,
|
|
"learning_rate": 3.382191957603878e-05,
|
|
"loss": 0.209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21991001069545746,
|
|
"step": 1435,
|
|
"valid_targets_mean": 3557.8,
|
|
"valid_targets_min": 1446
|
|
},
|
|
{
|
|
"epoch": 2.3300970873786406,
|
|
"grad_norm": 0.5036421334771191,
|
|
"learning_rate": 3.376348132922509e-05,
|
|
"loss": 0.2048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19798916578292847,
|
|
"step": 1440,
|
|
"valid_targets_mean": 4315.9,
|
|
"valid_targets_min": 3091
|
|
},
|
|
{
|
|
"epoch": 2.338187702265372,
|
|
"grad_norm": 0.5210163872616127,
|
|
"learning_rate": 3.3704819004702444e-05,
|
|
"loss": 0.2142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19083701074123383,
|
|
"step": 1445,
|
|
"valid_targets_mean": 3990.8,
|
|
"valid_targets_min": 1812
|
|
},
|
|
{
|
|
"epoch": 2.3462783171521036,
|
|
"grad_norm": 0.5382923205946923,
|
|
"learning_rate": 3.364593355752857e-05,
|
|
"loss": 0.2155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22279268503189087,
|
|
"step": 1450,
|
|
"valid_targets_mean": 4153.6,
|
|
"valid_targets_min": 2749
|
|
},
|
|
{
|
|
"epoch": 2.354368932038835,
|
|
"grad_norm": 0.5690802179863486,
|
|
"learning_rate": 3.358682594639379e-05,
|
|
"loss": 0.2003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21023952960968018,
|
|
"step": 1455,
|
|
"valid_targets_mean": 4108.8,
|
|
"valid_targets_min": 422
|
|
},
|
|
{
|
|
"epoch": 2.3624595469255665,
|
|
"grad_norm": 0.5587521088638144,
|
|
"learning_rate": 3.3527497133605345e-05,
|
|
"loss": 0.2032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21760553121566772,
|
|
"step": 1460,
|
|
"valid_targets_mean": 4084.8,
|
|
"valid_targets_min": 2096
|
|
},
|
|
{
|
|
"epoch": 2.3705501618122975,
|
|
"grad_norm": 0.6327920191794262,
|
|
"learning_rate": 3.346794808507182e-05,
|
|
"loss": 0.1995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19593237340450287,
|
|
"step": 1465,
|
|
"valid_targets_mean": 3724.5,
|
|
"valid_targets_min": 460
|
|
},
|
|
{
|
|
"epoch": 2.378640776699029,
|
|
"grad_norm": 0.5243331783837871,
|
|
"learning_rate": 3.340817977028732e-05,
|
|
"loss": 0.2065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1890985071659088,
|
|
"step": 1470,
|
|
"valid_targets_mean": 3856.2,
|
|
"valid_targets_min": 1060
|
|
},
|
|
{
|
|
"epoch": 2.3867313915857604,
|
|
"grad_norm": 0.6002411735204223,
|
|
"learning_rate": 3.3348193162315786e-05,
|
|
"loss": 0.2013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21150943636894226,
|
|
"step": 1475,
|
|
"valid_targets_mean": 3088.4,
|
|
"valid_targets_min": 426
|
|
},
|
|
{
|
|
"epoch": 2.394822006472492,
|
|
"grad_norm": 0.6149693570960357,
|
|
"learning_rate": 3.3287989237775064e-05,
|
|
"loss": 0.2047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20418787002563477,
|
|
"step": 1480,
|
|
"valid_targets_mean": 3793.6,
|
|
"valid_targets_min": 1215
|
|
},
|
|
{
|
|
"epoch": 2.4029126213592233,
|
|
"grad_norm": 0.5452824529033008,
|
|
"learning_rate": 3.322756897682106e-05,
|
|
"loss": 0.2027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19814196228981018,
|
|
"step": 1485,
|
|
"valid_targets_mean": 3985.6,
|
|
"valid_targets_min": 1819
|
|
},
|
|
{
|
|
"epoch": 2.411003236245955,
|
|
"grad_norm": 0.5481539653642269,
|
|
"learning_rate": 3.3166933363131765e-05,
|
|
"loss": 0.1931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18332578241825104,
|
|
"step": 1490,
|
|
"valid_targets_mean": 3920.4,
|
|
"valid_targets_min": 313
|
|
},
|
|
{
|
|
"epoch": 2.4190938511326863,
|
|
"grad_norm": 0.5389343062157606,
|
|
"learning_rate": 3.310608338389124e-05,
|
|
"loss": 0.204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19974827766418457,
|
|
"step": 1495,
|
|
"valid_targets_mean": 4036.6,
|
|
"valid_targets_min": 712
|
|
},
|
|
{
|
|
"epoch": 2.4271844660194173,
|
|
"grad_norm": 0.5611663097998205,
|
|
"learning_rate": 3.304502002977355e-05,
|
|
"loss": 0.205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18663233518600464,
|
|
"step": 1500,
|
|
"valid_targets_mean": 4147.1,
|
|
"valid_targets_min": 2026
|
|
},
|
|
{
|
|
"epoch": 2.4352750809061487,
|
|
"grad_norm": 0.5505208094874494,
|
|
"learning_rate": 3.2983744294926614e-05,
|
|
"loss": 0.205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20405398309230804,
|
|
"step": 1505,
|
|
"valid_targets_mean": 3872.2,
|
|
"valid_targets_min": 535
|
|
},
|
|
{
|
|
"epoch": 2.44336569579288,
|
|
"grad_norm": 0.7012874764369187,
|
|
"learning_rate": 3.292225717695606e-05,
|
|
"loss": 0.2087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2327345907688141,
|
|
"step": 1510,
|
|
"valid_targets_mean": 3637.6,
|
|
"valid_targets_min": 313
|
|
},
|
|
{
|
|
"epoch": 2.4514563106796117,
|
|
"grad_norm": 1.1024977498248867,
|
|
"learning_rate": 3.286055967690894e-05,
|
|
"loss": 0.2161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21150124073028564,
|
|
"step": 1515,
|
|
"valid_targets_mean": 4220.2,
|
|
"valid_targets_min": 3011
|
|
},
|
|
{
|
|
"epoch": 2.459546925566343,
|
|
"grad_norm": 0.7097144552349174,
|
|
"learning_rate": 3.279865279925748e-05,
|
|
"loss": 0.2051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20087671279907227,
|
|
"step": 1520,
|
|
"valid_targets_mean": 3596.6,
|
|
"valid_targets_min": 1667
|
|
},
|
|
{
|
|
"epoch": 2.4676375404530746,
|
|
"grad_norm": 0.6102242644893953,
|
|
"learning_rate": 3.273653755188265e-05,
|
|
"loss": 0.2071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21528849005699158,
|
|
"step": 1525,
|
|
"valid_targets_mean": 3797.2,
|
|
"valid_targets_min": 1174
|
|
},
|
|
{
|
|
"epoch": 2.475728155339806,
|
|
"grad_norm": 0.5996789804839444,
|
|
"learning_rate": 3.267421494605786e-05,
|
|
"loss": 0.1994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2003488689661026,
|
|
"step": 1530,
|
|
"valid_targets_mean": 3306.8,
|
|
"valid_targets_min": 330
|
|
},
|
|
{
|
|
"epoch": 2.483818770226537,
|
|
"grad_norm": 0.5539684053989401,
|
|
"learning_rate": 3.261168599643239e-05,
|
|
"loss": 0.2146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19859477877616882,
|
|
"step": 1535,
|
|
"valid_targets_mean": 3921.0,
|
|
"valid_targets_min": 1528
|
|
},
|
|
{
|
|
"epoch": 2.4919093851132685,
|
|
"grad_norm": 0.5800147569212741,
|
|
"learning_rate": 3.254895172101495e-05,
|
|
"loss": 0.2057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20788471400737762,
|
|
"step": 1540,
|
|
"valid_targets_mean": 3650.2,
|
|
"valid_targets_min": 1779
|
|
},
|
|
{
|
|
"epoch": 2.5,
|
|
"grad_norm": 0.5444327898653049,
|
|
"learning_rate": 3.248601314115709e-05,
|
|
"loss": 0.2042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19758197665214539,
|
|
"step": 1545,
|
|
"valid_targets_mean": 3911.5,
|
|
"valid_targets_min": 1092
|
|
},
|
|
{
|
|
"epoch": 2.5080906148867315,
|
|
"grad_norm": 0.5585959818809851,
|
|
"learning_rate": 3.2422871281536504e-05,
|
|
"loss": 0.2023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20119929313659668,
|
|
"step": 1550,
|
|
"valid_targets_mean": 3706.0,
|
|
"valid_targets_min": 391
|
|
},
|
|
{
|
|
"epoch": 2.516181229773463,
|
|
"grad_norm": 0.5786946765486497,
|
|
"learning_rate": 3.235952717014046e-05,
|
|
"loss": 0.1966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18431814014911652,
|
|
"step": 1555,
|
|
"valid_targets_mean": 3852.7,
|
|
"valid_targets_min": 1344
|
|
},
|
|
{
|
|
"epoch": 2.524271844660194,
|
|
"grad_norm": 0.722163694671597,
|
|
"learning_rate": 3.229598183824897e-05,
|
|
"loss": 0.2027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19687601923942566,
|
|
"step": 1560,
|
|
"valid_targets_mean": 3535.8,
|
|
"valid_targets_min": 377
|
|
},
|
|
{
|
|
"epoch": 2.5323624595469254,
|
|
"grad_norm": 0.5945221722717067,
|
|
"learning_rate": 3.223223632041806e-05,
|
|
"loss": 0.1972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19220665097236633,
|
|
"step": 1565,
|
|
"valid_targets_mean": 3312.9,
|
|
"valid_targets_min": 1484
|
|
},
|
|
{
|
|
"epoch": 2.540453074433657,
|
|
"grad_norm": 0.5463852945973736,
|
|
"learning_rate": 3.216829165446288e-05,
|
|
"loss": 0.2014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20611058175563812,
|
|
"step": 1570,
|
|
"valid_targets_mean": 3802.0,
|
|
"valid_targets_min": 997
|
|
},
|
|
{
|
|
"epoch": 2.5485436893203883,
|
|
"grad_norm": 0.5992106420260478,
|
|
"learning_rate": 3.210414888144085e-05,
|
|
"loss": 0.2119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2046264111995697,
|
|
"step": 1575,
|
|
"valid_targets_mean": 3380.8,
|
|
"valid_targets_min": 480
|
|
},
|
|
{
|
|
"epoch": 2.55663430420712,
|
|
"grad_norm": 0.5425409151257213,
|
|
"learning_rate": 3.203980904563467e-05,
|
|
"loss": 0.2031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19060689210891724,
|
|
"step": 1580,
|
|
"valid_targets_mean": 4033.8,
|
|
"valid_targets_min": 1890
|
|
},
|
|
{
|
|
"epoch": 2.5647249190938513,
|
|
"grad_norm": 0.5481719828708285,
|
|
"learning_rate": 3.1975273194535365e-05,
|
|
"loss": 0.2059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17384769022464752,
|
|
"step": 1585,
|
|
"valid_targets_mean": 3420.6,
|
|
"valid_targets_min": 422
|
|
},
|
|
{
|
|
"epoch": 2.5728155339805827,
|
|
"grad_norm": 0.5535876172620676,
|
|
"learning_rate": 3.191054237882519e-05,
|
|
"loss": 0.1992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20850872993469238,
|
|
"step": 1590,
|
|
"valid_targets_mean": 3588.6,
|
|
"valid_targets_min": 330
|
|
},
|
|
{
|
|
"epoch": 2.5809061488673137,
|
|
"grad_norm": 0.5799991839791976,
|
|
"learning_rate": 3.1845617652360556e-05,
|
|
"loss": 0.2066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.204398050904274,
|
|
"step": 1595,
|
|
"valid_targets_mean": 3439.6,
|
|
"valid_targets_min": 957
|
|
},
|
|
{
|
|
"epoch": 2.588996763754045,
|
|
"grad_norm": 0.5277347822812265,
|
|
"learning_rate": 3.178050007215483e-05,
|
|
"loss": 0.207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1802809089422226,
|
|
"step": 1600,
|
|
"valid_targets_mean": 3597.8,
|
|
"valid_targets_min": 409
|
|
},
|
|
{
|
|
"epoch": 2.5970873786407767,
|
|
"grad_norm": 0.5922385180939794,
|
|
"learning_rate": 3.171519069836118e-05,
|
|
"loss": 0.1975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20629601180553436,
|
|
"step": 1605,
|
|
"valid_targets_mean": 3622.6,
|
|
"valid_targets_min": 1991
|
|
},
|
|
{
|
|
"epoch": 2.605177993527508,
|
|
"grad_norm": 0.5880468064461831,
|
|
"learning_rate": 3.1649690594255265e-05,
|
|
"loss": 0.2017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19900715351104736,
|
|
"step": 1610,
|
|
"valid_targets_mean": 3396.1,
|
|
"valid_targets_min": 572
|
|
},
|
|
{
|
|
"epoch": 2.6132686084142396,
|
|
"grad_norm": 0.5697903771803728,
|
|
"learning_rate": 3.1584000826217974e-05,
|
|
"loss": 0.1984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18581387400627136,
|
|
"step": 1615,
|
|
"valid_targets_mean": 3602.9,
|
|
"valid_targets_min": 1060
|
|
},
|
|
{
|
|
"epoch": 2.6213592233009706,
|
|
"grad_norm": 0.6018213495545038,
|
|
"learning_rate": 3.151812246371802e-05,
|
|
"loss": 0.1943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18213129043579102,
|
|
"step": 1620,
|
|
"valid_targets_mean": 3693.2,
|
|
"valid_targets_min": 1311
|
|
},
|
|
{
|
|
"epoch": 2.6294498381877025,
|
|
"grad_norm": 0.5772009351308939,
|
|
"learning_rate": 3.145205657929454e-05,
|
|
"loss": 0.1989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20195090770721436,
|
|
"step": 1625,
|
|
"valid_targets_mean": 3644.8,
|
|
"valid_targets_min": 480
|
|
},
|
|
{
|
|
"epoch": 2.6375404530744335,
|
|
"grad_norm": 0.5878583193941402,
|
|
"learning_rate": 3.1385804248539665e-05,
|
|
"loss": 0.2139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2117636501789093,
|
|
"step": 1630,
|
|
"valid_targets_mean": 3500.8,
|
|
"valid_targets_min": 793
|
|
},
|
|
{
|
|
"epoch": 2.645631067961165,
|
|
"grad_norm": 0.561998252403094,
|
|
"learning_rate": 3.131936655008097e-05,
|
|
"loss": 0.2035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21909379959106445,
|
|
"step": 1635,
|
|
"valid_targets_mean": 4789.3,
|
|
"valid_targets_min": 2141
|
|
},
|
|
{
|
|
"epoch": 2.6537216828478964,
|
|
"grad_norm": 0.6250002374027832,
|
|
"learning_rate": 3.125274456556392e-05,
|
|
"loss": 0.2083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21586726605892181,
|
|
"step": 1640,
|
|
"valid_targets_mean": 3864.6,
|
|
"valid_targets_min": 441
|
|
},
|
|
{
|
|
"epoch": 2.661812297734628,
|
|
"grad_norm": 0.6877067793902955,
|
|
"learning_rate": 3.1185939379634274e-05,
|
|
"loss": 0.2058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2070537805557251,
|
|
"step": 1645,
|
|
"valid_targets_mean": 3693.6,
|
|
"valid_targets_min": 1004
|
|
},
|
|
{
|
|
"epoch": 2.6699029126213594,
|
|
"grad_norm": 0.5200491955654815,
|
|
"learning_rate": 3.111895207992042e-05,
|
|
"loss": 0.2095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1959010660648346,
|
|
"step": 1650,
|
|
"valid_targets_mean": 4079.2,
|
|
"valid_targets_min": 465
|
|
},
|
|
{
|
|
"epoch": 2.6779935275080904,
|
|
"grad_norm": 0.5326594070559258,
|
|
"learning_rate": 3.1051783757015686e-05,
|
|
"loss": 0.1937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18363815546035767,
|
|
"step": 1655,
|
|
"valid_targets_mean": 4011.5,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 2.686084142394822,
|
|
"grad_norm": 0.5389554367064366,
|
|
"learning_rate": 3.098443550446051e-05,
|
|
"loss": 0.2006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.209175705909729,
|
|
"step": 1660,
|
|
"valid_targets_mean": 4220.7,
|
|
"valid_targets_min": 2153
|
|
},
|
|
{
|
|
"epoch": 2.6941747572815533,
|
|
"grad_norm": 0.5459696709484382,
|
|
"learning_rate": 3.0916908418724765e-05,
|
|
"loss": 0.1971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18107455968856812,
|
|
"step": 1665,
|
|
"valid_targets_mean": 3520.8,
|
|
"valid_targets_min": 974
|
|
},
|
|
{
|
|
"epoch": 2.7022653721682848,
|
|
"grad_norm": 0.5798237314369729,
|
|
"learning_rate": 3.0849203599189776e-05,
|
|
"loss": 0.2044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20543158054351807,
|
|
"step": 1670,
|
|
"valid_targets_mean": 3632.6,
|
|
"valid_targets_min": 703
|
|
},
|
|
{
|
|
"epoch": 2.7103559870550162,
|
|
"grad_norm": 0.5316270837678921,
|
|
"learning_rate": 3.0781322148130514e-05,
|
|
"loss": 0.2248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20163953304290771,
|
|
"step": 1675,
|
|
"valid_targets_mean": 3576.5,
|
|
"valid_targets_min": 815
|
|
},
|
|
{
|
|
"epoch": 2.7184466019417477,
|
|
"grad_norm": 0.4904568873109919,
|
|
"learning_rate": 3.071326517069761e-05,
|
|
"loss": 0.2015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1812264323234558,
|
|
"step": 1680,
|
|
"valid_targets_mean": 4223.2,
|
|
"valid_targets_min": 2104
|
|
},
|
|
{
|
|
"epoch": 2.726537216828479,
|
|
"grad_norm": 0.5354083648536498,
|
|
"learning_rate": 3.064503377489936e-05,
|
|
"loss": 0.2016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20075714588165283,
|
|
"step": 1685,
|
|
"valid_targets_mean": 3889.9,
|
|
"valid_targets_min": 1032
|
|
},
|
|
{
|
|
"epoch": 2.73462783171521,
|
|
"grad_norm": 0.5820578665530104,
|
|
"learning_rate": 3.0576629071583704e-05,
|
|
"loss": 0.1991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22076207399368286,
|
|
"step": 1690,
|
|
"valid_targets_mean": 3753.3,
|
|
"valid_targets_min": 1575
|
|
},
|
|
{
|
|
"epoch": 2.7427184466019416,
|
|
"grad_norm": 0.60823767625081,
|
|
"learning_rate": 3.0508052174420132e-05,
|
|
"loss": 0.1976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21139661967754364,
|
|
"step": 1695,
|
|
"valid_targets_mean": 3182.8,
|
|
"valid_targets_min": 418
|
|
},
|
|
{
|
|
"epoch": 2.750809061488673,
|
|
"grad_norm": 0.5396414265678104,
|
|
"learning_rate": 3.0439304199881543e-05,
|
|
"loss": 0.1957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19884394109249115,
|
|
"step": 1700,
|
|
"valid_targets_mean": 3960.8,
|
|
"valid_targets_min": 444
|
|
},
|
|
{
|
|
"epoch": 2.7588996763754046,
|
|
"grad_norm": 0.6318198192740533,
|
|
"learning_rate": 3.03703862672261e-05,
|
|
"loss": 0.1971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2142893671989441,
|
|
"step": 1705,
|
|
"valid_targets_mean": 3710.0,
|
|
"valid_targets_min": 958
|
|
},
|
|
{
|
|
"epoch": 2.766990291262136,
|
|
"grad_norm": 0.5118804639309135,
|
|
"learning_rate": 3.030129949847895e-05,
|
|
"loss": 0.2,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19666945934295654,
|
|
"step": 1710,
|
|
"valid_targets_mean": 4017.2,
|
|
"valid_targets_min": 1524
|
|
},
|
|
{
|
|
"epoch": 2.775080906148867,
|
|
"grad_norm": 0.5233752971594281,
|
|
"learning_rate": 3.023204501841403e-05,
|
|
"loss": 0.2028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1879742294549942,
|
|
"step": 1715,
|
|
"valid_targets_mean": 3730.5,
|
|
"valid_targets_min": 411
|
|
},
|
|
{
|
|
"epoch": 2.783171521035599,
|
|
"grad_norm": 0.5545239643662999,
|
|
"learning_rate": 3.01626239545357e-05,
|
|
"loss": 0.2032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21372893452644348,
|
|
"step": 1720,
|
|
"valid_targets_mean": 3842.9,
|
|
"valid_targets_min": 615
|
|
},
|
|
{
|
|
"epoch": 2.79126213592233,
|
|
"grad_norm": 0.5296179785533861,
|
|
"learning_rate": 3.00930374370604e-05,
|
|
"loss": 0.198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17748533189296722,
|
|
"step": 1725,
|
|
"valid_targets_mean": 3760.2,
|
|
"valid_targets_min": 1638
|
|
},
|
|
{
|
|
"epoch": 2.7993527508090614,
|
|
"grad_norm": 0.5915432116224574,
|
|
"learning_rate": 3.002328659889826e-05,
|
|
"loss": 0.1891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1959744095802307,
|
|
"step": 1730,
|
|
"valid_targets_mean": 3509.1,
|
|
"valid_targets_min": 1201
|
|
},
|
|
{
|
|
"epoch": 2.807443365695793,
|
|
"grad_norm": 0.5095307798896164,
|
|
"learning_rate": 2.995337257563466e-05,
|
|
"loss": 0.1905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18040308356285095,
|
|
"step": 1735,
|
|
"valid_targets_mean": 4305.2,
|
|
"valid_targets_min": 3091
|
|
},
|
|
{
|
|
"epoch": 2.8155339805825244,
|
|
"grad_norm": 0.6272482204481657,
|
|
"learning_rate": 2.9883296505511704e-05,
|
|
"loss": 0.1931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19337265193462372,
|
|
"step": 1740,
|
|
"valid_targets_mean": 3533.1,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 2.823624595469256,
|
|
"grad_norm": 0.5315105215288927,
|
|
"learning_rate": 2.981305952940973e-05,
|
|
"loss": 0.1996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18375645577907562,
|
|
"step": 1745,
|
|
"valid_targets_mean": 4295.6,
|
|
"valid_targets_min": 2906
|
|
},
|
|
{
|
|
"epoch": 2.831715210355987,
|
|
"grad_norm": 0.5351373135111939,
|
|
"learning_rate": 2.9742662790828732e-05,
|
|
"loss": 0.1961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18155202269554138,
|
|
"step": 1750,
|
|
"valid_targets_mean": 3872.8,
|
|
"valid_targets_min": 718
|
|
},
|
|
{
|
|
"epoch": 2.8398058252427183,
|
|
"grad_norm": 0.5882867567839128,
|
|
"learning_rate": 2.9672107435869727e-05,
|
|
"loss": 0.196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20388536155223846,
|
|
"step": 1755,
|
|
"valid_targets_mean": 3398.5,
|
|
"valid_targets_min": 397
|
|
},
|
|
{
|
|
"epoch": 2.8478964401294498,
|
|
"grad_norm": 0.5166204472764317,
|
|
"learning_rate": 2.9601394613216086e-05,
|
|
"loss": 0.1972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1891598403453827,
|
|
"step": 1760,
|
|
"valid_targets_mean": 3952.1,
|
|
"valid_targets_min": 471
|
|
},
|
|
{
|
|
"epoch": 2.855987055016181,
|
|
"grad_norm": 0.495203793195684,
|
|
"learning_rate": 2.953052547411487e-05,
|
|
"loss": 0.1983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19284304976463318,
|
|
"step": 1765,
|
|
"valid_targets_mean": 4468.9,
|
|
"valid_targets_min": 3669
|
|
},
|
|
{
|
|
"epoch": 2.8640776699029127,
|
|
"grad_norm": 0.5707255444123573,
|
|
"learning_rate": 2.9459501172358046e-05,
|
|
"loss": 0.1986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19373999536037445,
|
|
"step": 1770,
|
|
"valid_targets_mean": 4088.8,
|
|
"valid_targets_min": 1916
|
|
},
|
|
{
|
|
"epoch": 2.872168284789644,
|
|
"grad_norm": 0.603965037928362,
|
|
"learning_rate": 2.9388322864263747e-05,
|
|
"loss": 0.2012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2008977234363556,
|
|
"step": 1775,
|
|
"valid_targets_mean": 3390.8,
|
|
"valid_targets_min": 397
|
|
},
|
|
{
|
|
"epoch": 2.8802588996763756,
|
|
"grad_norm": 0.6153412854635089,
|
|
"learning_rate": 2.9316991708657382e-05,
|
|
"loss": 0.1937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19763140380382538,
|
|
"step": 1780,
|
|
"valid_targets_mean": 3698.1,
|
|
"valid_targets_min": 442
|
|
},
|
|
{
|
|
"epoch": 2.8883495145631066,
|
|
"grad_norm": 0.5740792099777532,
|
|
"learning_rate": 2.924550886685285e-05,
|
|
"loss": 0.1971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21043045818805695,
|
|
"step": 1785,
|
|
"valid_targets_mean": 3918.6,
|
|
"valid_targets_min": 451
|
|
},
|
|
{
|
|
"epoch": 2.896440129449838,
|
|
"grad_norm": 0.5205108968750519,
|
|
"learning_rate": 2.917387550263357e-05,
|
|
"loss": 0.1984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19361340999603271,
|
|
"step": 1790,
|
|
"valid_targets_mean": 4294.6,
|
|
"valid_targets_min": 1695
|
|
},
|
|
{
|
|
"epoch": 2.9045307443365695,
|
|
"grad_norm": 0.5515212654195533,
|
|
"learning_rate": 2.9102092782233563e-05,
|
|
"loss": 0.197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19037625193595886,
|
|
"step": 1795,
|
|
"valid_targets_mean": 3688.1,
|
|
"valid_targets_min": 672
|
|
},
|
|
{
|
|
"epoch": 2.912621359223301,
|
|
"grad_norm": 0.5966136509189564,
|
|
"learning_rate": 2.9030161874318455e-05,
|
|
"loss": 0.202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21651627123355865,
|
|
"step": 1800,
|
|
"valid_targets_mean": 3206.2,
|
|
"valid_targets_min": 463
|
|
},
|
|
{
|
|
"epoch": 2.9207119741100325,
|
|
"grad_norm": 0.6501703618729953,
|
|
"learning_rate": 2.895808394996644e-05,
|
|
"loss": 0.2087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20761127769947052,
|
|
"step": 1805,
|
|
"valid_targets_mean": 3975.1,
|
|
"valid_targets_min": 721
|
|
},
|
|
{
|
|
"epoch": 2.9288025889967635,
|
|
"grad_norm": 0.5679104463871691,
|
|
"learning_rate": 2.8885860182649263e-05,
|
|
"loss": 0.2075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19638538360595703,
|
|
"step": 1810,
|
|
"valid_targets_mean": 3561.3,
|
|
"valid_targets_min": 478
|
|
},
|
|
{
|
|
"epoch": 2.9368932038834954,
|
|
"grad_norm": 0.6224037383738494,
|
|
"learning_rate": 2.881349174821305e-05,
|
|
"loss": 0.199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22906172275543213,
|
|
"step": 1815,
|
|
"valid_targets_mean": 3815.6,
|
|
"valid_targets_min": 269
|
|
},
|
|
{
|
|
"epoch": 2.9449838187702264,
|
|
"grad_norm": 0.5687126118661141,
|
|
"learning_rate": 2.8740979824859194e-05,
|
|
"loss": 0.1914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18152084946632385,
|
|
"step": 1820,
|
|
"valid_targets_mean": 3180.0,
|
|
"valid_targets_min": 445
|
|
},
|
|
{
|
|
"epoch": 2.953074433656958,
|
|
"grad_norm": 0.5633436924003058,
|
|
"learning_rate": 2.8668325593125192e-05,
|
|
"loss": 0.1853,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19939512014389038,
|
|
"step": 1825,
|
|
"valid_targets_mean": 3791.1,
|
|
"valid_targets_min": 599
|
|
},
|
|
{
|
|
"epoch": 2.9611650485436893,
|
|
"grad_norm": 0.5634935052804328,
|
|
"learning_rate": 2.8595530235865397e-05,
|
|
"loss": 0.2043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20533940196037292,
|
|
"step": 1830,
|
|
"valid_targets_mean": 3936.6,
|
|
"valid_targets_min": 775
|
|
},
|
|
{
|
|
"epoch": 2.969255663430421,
|
|
"grad_norm": 0.5169570794160626,
|
|
"learning_rate": 2.8522594938231755e-05,
|
|
"loss": 0.1933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1793152093887329,
|
|
"step": 1835,
|
|
"valid_targets_mean": 4222.0,
|
|
"valid_targets_min": 2249
|
|
},
|
|
{
|
|
"epoch": 2.9773462783171523,
|
|
"grad_norm": 0.5914192243747182,
|
|
"learning_rate": 2.8449520887654558e-05,
|
|
"loss": 0.2152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23565539717674255,
|
|
"step": 1840,
|
|
"valid_targets_mean": 3811.9,
|
|
"valid_targets_min": 606
|
|
},
|
|
{
|
|
"epoch": 2.9854368932038833,
|
|
"grad_norm": 0.5042664015367552,
|
|
"learning_rate": 2.8376309273823047e-05,
|
|
"loss": 0.2021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2007623314857483,
|
|
"step": 1845,
|
|
"valid_targets_mean": 4263.4,
|
|
"valid_targets_min": 2313
|
|
},
|
|
{
|
|
"epoch": 2.9935275080906147,
|
|
"grad_norm": 0.5391052395522601,
|
|
"learning_rate": 2.8302961288666094e-05,
|
|
"loss": 0.1948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1861807405948639,
|
|
"step": 1850,
|
|
"valid_targets_mean": 4023.2,
|
|
"valid_targets_min": 1822
|
|
},
|
|
{
|
|
"epoch": 3.001618122977346,
|
|
"grad_norm": 0.5919409123484987,
|
|
"learning_rate": 2.8229478126332766e-05,
|
|
"loss": 0.1958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1760110855102539,
|
|
"step": 1855,
|
|
"valid_targets_mean": 4402.6,
|
|
"valid_targets_min": 2340
|
|
},
|
|
{
|
|
"epoch": 3.0097087378640777,
|
|
"grad_norm": 0.6049776201529414,
|
|
"learning_rate": 2.815586098317291e-05,
|
|
"loss": 0.1743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18497231602668762,
|
|
"step": 1860,
|
|
"valid_targets_mean": 4040.8,
|
|
"valid_targets_min": 752
|
|
},
|
|
{
|
|
"epoch": 3.017799352750809,
|
|
"grad_norm": 0.5632642899702008,
|
|
"learning_rate": 2.8082111057717655e-05,
|
|
"loss": 0.177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16407430171966553,
|
|
"step": 1865,
|
|
"valid_targets_mean": 3755.1,
|
|
"valid_targets_min": 2032
|
|
},
|
|
{
|
|
"epoch": 3.0258899676375406,
|
|
"grad_norm": 0.5813140896256607,
|
|
"learning_rate": 2.8008229550659883e-05,
|
|
"loss": 0.1786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17744824290275574,
|
|
"step": 1870,
|
|
"valid_targets_mean": 3664.5,
|
|
"valid_targets_min": 1030
|
|
},
|
|
{
|
|
"epoch": 3.033980582524272,
|
|
"grad_norm": 0.5670865086302852,
|
|
"learning_rate": 2.793421766483474e-05,
|
|
"loss": 0.1674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.155634805560112,
|
|
"step": 1875,
|
|
"valid_targets_mean": 3756.2,
|
|
"valid_targets_min": 2223
|
|
},
|
|
{
|
|
"epoch": 3.042071197411003,
|
|
"grad_norm": 0.5823101361167331,
|
|
"learning_rate": 2.7860076605199995e-05,
|
|
"loss": 0.1658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16646599769592285,
|
|
"step": 1880,
|
|
"valid_targets_mean": 3607.3,
|
|
"valid_targets_min": 1695
|
|
},
|
|
{
|
|
"epoch": 3.0501618122977345,
|
|
"grad_norm": 0.8908406522912229,
|
|
"learning_rate": 2.7785807578816448e-05,
|
|
"loss": 0.1706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17764373123645782,
|
|
"step": 1885,
|
|
"valid_targets_mean": 3329.0,
|
|
"valid_targets_min": 269
|
|
},
|
|
{
|
|
"epoch": 3.058252427184466,
|
|
"grad_norm": 0.66787286744631,
|
|
"learning_rate": 2.7711411794828274e-05,
|
|
"loss": 0.1854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17671708762645721,
|
|
"step": 1890,
|
|
"valid_targets_mean": 3682.6,
|
|
"valid_targets_min": 786
|
|
},
|
|
{
|
|
"epoch": 3.0663430420711975,
|
|
"grad_norm": 1.713966990953561,
|
|
"learning_rate": 2.7636890464443333e-05,
|
|
"loss": 0.1764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1910078227519989,
|
|
"step": 1895,
|
|
"valid_targets_mean": 3873.7,
|
|
"valid_targets_min": 2165
|
|
},
|
|
{
|
|
"epoch": 3.074433656957929,
|
|
"grad_norm": 0.5994989410802046,
|
|
"learning_rate": 2.756224480091347e-05,
|
|
"loss": 0.1782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17505040764808655,
|
|
"step": 1900,
|
|
"valid_targets_mean": 3232.8,
|
|
"valid_targets_min": 481
|
|
},
|
|
{
|
|
"epoch": 3.0825242718446604,
|
|
"grad_norm": 0.5795213439206806,
|
|
"learning_rate": 2.7487476019514726e-05,
|
|
"loss": 0.1725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17841529846191406,
|
|
"step": 1905,
|
|
"valid_targets_mean": 3820.4,
|
|
"valid_targets_min": 634
|
|
},
|
|
{
|
|
"epoch": 3.0906148867313914,
|
|
"grad_norm": 0.630803442604153,
|
|
"learning_rate": 2.74125853375276e-05,
|
|
"loss": 0.1664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1717372089624405,
|
|
"step": 1910,
|
|
"valid_targets_mean": 3442.6,
|
|
"valid_targets_min": 488
|
|
},
|
|
{
|
|
"epoch": 3.098705501618123,
|
|
"grad_norm": 0.5607447512719843,
|
|
"learning_rate": 2.7337573974217177e-05,
|
|
"loss": 0.1682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18259483575820923,
|
|
"step": 1915,
|
|
"valid_targets_mean": 4125.4,
|
|
"valid_targets_min": 2164
|
|
},
|
|
{
|
|
"epoch": 3.1067961165048543,
|
|
"grad_norm": 0.5173894268282134,
|
|
"learning_rate": 2.726244315081334e-05,
|
|
"loss": 0.1783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1611001193523407,
|
|
"step": 1920,
|
|
"valid_targets_mean": 3982.5,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 3.114886731391586,
|
|
"grad_norm": 0.5917839297779477,
|
|
"learning_rate": 2.718719409049082e-05,
|
|
"loss": 0.1731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15416556596755981,
|
|
"step": 1925,
|
|
"valid_targets_mean": 4123.0,
|
|
"valid_targets_min": 2318
|
|
},
|
|
{
|
|
"epoch": 3.1229773462783172,
|
|
"grad_norm": 0.5712043686830873,
|
|
"learning_rate": 2.711182801834933e-05,
|
|
"loss": 0.1707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15566003322601318,
|
|
"step": 1930,
|
|
"valid_targets_mean": 3407.0,
|
|
"valid_targets_min": 499
|
|
},
|
|
{
|
|
"epoch": 3.1310679611650487,
|
|
"grad_norm": 0.5813335365274586,
|
|
"learning_rate": 2.7036346161393617e-05,
|
|
"loss": 0.1831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19251751899719238,
|
|
"step": 1935,
|
|
"valid_targets_mean": 3737.6,
|
|
"valid_targets_min": 1389
|
|
},
|
|
{
|
|
"epoch": 3.1391585760517797,
|
|
"grad_norm": 0.7481785553939617,
|
|
"learning_rate": 2.696074974851346e-05,
|
|
"loss": 0.1725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18153071403503418,
|
|
"step": 1940,
|
|
"valid_targets_mean": 3754.9,
|
|
"valid_targets_min": 1889
|
|
},
|
|
{
|
|
"epoch": 3.147249190938511,
|
|
"grad_norm": 0.6364983099115371,
|
|
"learning_rate": 2.688504001046367e-05,
|
|
"loss": 0.1776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17164380848407745,
|
|
"step": 1945,
|
|
"valid_targets_mean": 3317.8,
|
|
"valid_targets_min": 1357
|
|
},
|
|
{
|
|
"epoch": 3.1553398058252426,
|
|
"grad_norm": 0.5736813602974871,
|
|
"learning_rate": 2.6809218179844085e-05,
|
|
"loss": 0.1702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17162342369556427,
|
|
"step": 1950,
|
|
"valid_targets_mean": 3369.4,
|
|
"valid_targets_min": 397
|
|
},
|
|
{
|
|
"epoch": 3.163430420711974,
|
|
"grad_norm": 0.6226569121757463,
|
|
"learning_rate": 2.6733285491079453e-05,
|
|
"loss": 0.1771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19535747170448303,
|
|
"step": 1955,
|
|
"valid_targets_mean": 4032.9,
|
|
"valid_targets_min": 1219
|
|
},
|
|
{
|
|
"epoch": 3.1715210355987056,
|
|
"grad_norm": 0.6020916586272426,
|
|
"learning_rate": 2.6657243180399373e-05,
|
|
"loss": 0.1849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19260089099407196,
|
|
"step": 1960,
|
|
"valid_targets_mean": 3971.1,
|
|
"valid_targets_min": 507
|
|
},
|
|
{
|
|
"epoch": 3.179611650485437,
|
|
"grad_norm": 0.5228726712361733,
|
|
"learning_rate": 2.658109248581814e-05,
|
|
"loss": 0.1806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16513675451278687,
|
|
"step": 1965,
|
|
"valid_targets_mean": 4421.4,
|
|
"valid_targets_min": 2181
|
|
},
|
|
{
|
|
"epoch": 3.1877022653721685,
|
|
"grad_norm": 0.6756728849384417,
|
|
"learning_rate": 2.650483464711462e-05,
|
|
"loss": 0.1649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19772417843341827,
|
|
"step": 1970,
|
|
"valid_targets_mean": 3761.0,
|
|
"valid_targets_min": 834
|
|
},
|
|
{
|
|
"epoch": 3.1957928802588995,
|
|
"grad_norm": 0.5713446845470945,
|
|
"learning_rate": 2.6428470905812047e-05,
|
|
"loss": 0.1676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17231231927871704,
|
|
"step": 1975,
|
|
"valid_targets_mean": 3961.8,
|
|
"valid_targets_min": 391
|
|
},
|
|
{
|
|
"epoch": 3.203883495145631,
|
|
"grad_norm": 0.5742649893192334,
|
|
"learning_rate": 2.6352002505157802e-05,
|
|
"loss": 0.1703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17857617139816284,
|
|
"step": 1980,
|
|
"valid_targets_mean": 3959.9,
|
|
"valid_targets_min": 456
|
|
},
|
|
{
|
|
"epoch": 3.2119741100323624,
|
|
"grad_norm": 0.5570549296210755,
|
|
"learning_rate": 2.6275430690103188e-05,
|
|
"loss": 0.1716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16560588777065277,
|
|
"step": 1985,
|
|
"valid_targets_mean": 3895.8,
|
|
"valid_targets_min": 462
|
|
},
|
|
{
|
|
"epoch": 3.220064724919094,
|
|
"grad_norm": 0.6784253073209046,
|
|
"learning_rate": 2.6198756707283153e-05,
|
|
"loss": 0.1747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20054873824119568,
|
|
"step": 1990,
|
|
"valid_targets_mean": 3521.2,
|
|
"valid_targets_min": 984
|
|
},
|
|
{
|
|
"epoch": 3.2281553398058254,
|
|
"grad_norm": 0.5829948937510232,
|
|
"learning_rate": 2.612198180499601e-05,
|
|
"loss": 0.1735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1702016144990921,
|
|
"step": 1995,
|
|
"valid_targets_mean": 4062.2,
|
|
"valid_targets_min": 1310
|
|
},
|
|
{
|
|
"epoch": 3.236245954692557,
|
|
"grad_norm": 0.6454144281714788,
|
|
"learning_rate": 2.604510723318309e-05,
|
|
"loss": 0.1774,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19506880640983582,
|
|
"step": 2000,
|
|
"valid_targets_mean": 3623.4,
|
|
"valid_targets_min": 289
|
|
},
|
|
{
|
|
"epoch": 3.244336569579288,
|
|
"grad_norm": 0.6449676332877781,
|
|
"learning_rate": 2.5968134243408405e-05,
|
|
"loss": 0.1741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17631593346595764,
|
|
"step": 2005,
|
|
"valid_targets_mean": 3297.8,
|
|
"valid_targets_min": 444
|
|
},
|
|
{
|
|
"epoch": 3.2524271844660193,
|
|
"grad_norm": 0.5997971315950034,
|
|
"learning_rate": 2.5891064088838278e-05,
|
|
"loss": 0.1792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17917688190937042,
|
|
"step": 2010,
|
|
"valid_targets_mean": 3738.5,
|
|
"valid_targets_min": 1014
|
|
},
|
|
{
|
|
"epoch": 3.2605177993527508,
|
|
"grad_norm": 0.5492504542337715,
|
|
"learning_rate": 2.5813898024220912e-05,
|
|
"loss": 0.1652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1532193124294281,
|
|
"step": 2015,
|
|
"valid_targets_mean": 4056.2,
|
|
"valid_targets_min": 1876
|
|
},
|
|
{
|
|
"epoch": 3.2686084142394822,
|
|
"grad_norm": 0.6096358205706522,
|
|
"learning_rate": 2.573663730586601e-05,
|
|
"loss": 0.1797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17237809300422668,
|
|
"step": 2020,
|
|
"valid_targets_mean": 3330.9,
|
|
"valid_targets_min": 495
|
|
},
|
|
{
|
|
"epoch": 3.2766990291262137,
|
|
"grad_norm": 0.5783520220955675,
|
|
"learning_rate": 2.5659283191624277e-05,
|
|
"loss": 0.182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1740003228187561,
|
|
"step": 2025,
|
|
"valid_targets_mean": 4061.6,
|
|
"valid_targets_min": 1840
|
|
},
|
|
{
|
|
"epoch": 3.284789644012945,
|
|
"grad_norm": 0.6089773249005341,
|
|
"learning_rate": 2.5581836940866967e-05,
|
|
"loss": 0.1744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15423806011676788,
|
|
"step": 2030,
|
|
"valid_targets_mean": 3516.4,
|
|
"valid_targets_min": 706
|
|
},
|
|
{
|
|
"epoch": 3.292880258899676,
|
|
"grad_norm": 0.6155420560621121,
|
|
"learning_rate": 2.550429981446537e-05,
|
|
"loss": 0.1678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1603637933731079,
|
|
"step": 2035,
|
|
"valid_targets_mean": 3189.5,
|
|
"valid_targets_min": 731
|
|
},
|
|
{
|
|
"epoch": 3.3009708737864076,
|
|
"grad_norm": 0.5640533692970745,
|
|
"learning_rate": 2.542667307477027e-05,
|
|
"loss": 0.1696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16283290088176727,
|
|
"step": 2040,
|
|
"valid_targets_mean": 3909.0,
|
|
"valid_targets_min": 1231
|
|
},
|
|
{
|
|
"epoch": 3.309061488673139,
|
|
"grad_norm": 0.705469954007443,
|
|
"learning_rate": 2.534895798559144e-05,
|
|
"loss": 0.1643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1684490144252777,
|
|
"step": 2045,
|
|
"valid_targets_mean": 2735.7,
|
|
"valid_targets_min": 463
|
|
},
|
|
{
|
|
"epoch": 3.3171521035598706,
|
|
"grad_norm": 0.5321890260336842,
|
|
"learning_rate": 2.527115581217702e-05,
|
|
"loss": 0.1682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15115907788276672,
|
|
"step": 2050,
|
|
"valid_targets_mean": 3915.1,
|
|
"valid_targets_min": 1589
|
|
},
|
|
{
|
|
"epoch": 3.325242718446602,
|
|
"grad_norm": 0.6319090887888136,
|
|
"learning_rate": 2.5193267821192914e-05,
|
|
"loss": 0.174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1694541871547699,
|
|
"step": 2055,
|
|
"valid_targets_mean": 3444.1,
|
|
"valid_targets_min": 313
|
|
},
|
|
{
|
|
"epoch": 3.3333333333333335,
|
|
"grad_norm": 0.6041257377819086,
|
|
"learning_rate": 2.5115295280702222e-05,
|
|
"loss": 0.1759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.177719846367836,
|
|
"step": 2060,
|
|
"valid_targets_mean": 3456.6,
|
|
"valid_targets_min": 1040
|
|
},
|
|
{
|
|
"epoch": 3.341423948220065,
|
|
"grad_norm": 0.6358966333325975,
|
|
"learning_rate": 2.5037239460144534e-05,
|
|
"loss": 0.1806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17559629678726196,
|
|
"step": 2065,
|
|
"valid_targets_mean": 3449.5,
|
|
"valid_targets_min": 718
|
|
},
|
|
{
|
|
"epoch": 3.349514563106796,
|
|
"grad_norm": 0.5906966992314931,
|
|
"learning_rate": 2.4959101630315315e-05,
|
|
"loss": 0.1741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19974157214164734,
|
|
"step": 2070,
|
|
"valid_targets_mean": 4124.6,
|
|
"valid_targets_min": 377
|
|
},
|
|
{
|
|
"epoch": 3.3576051779935274,
|
|
"grad_norm": 0.5645320929874551,
|
|
"learning_rate": 2.4880883063345157e-05,
|
|
"loss": 0.1658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15666401386260986,
|
|
"step": 2075,
|
|
"valid_targets_mean": 3943.2,
|
|
"valid_targets_min": 2646
|
|
},
|
|
{
|
|
"epoch": 3.365695792880259,
|
|
"grad_norm": 0.640752643762248,
|
|
"learning_rate": 2.480258503267912e-05,
|
|
"loss": 0.1774,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16370724141597748,
|
|
"step": 2080,
|
|
"valid_targets_mean": 3595.3,
|
|
"valid_targets_min": 473
|
|
},
|
|
{
|
|
"epoch": 3.3737864077669903,
|
|
"grad_norm": 0.6051795750412866,
|
|
"learning_rate": 2.472420881305599e-05,
|
|
"loss": 0.1652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16652750968933105,
|
|
"step": 2085,
|
|
"valid_targets_mean": 3438.4,
|
|
"valid_targets_min": 249
|
|
},
|
|
{
|
|
"epoch": 3.381877022653722,
|
|
"grad_norm": 0.56914949365543,
|
|
"learning_rate": 2.4645755680487497e-05,
|
|
"loss": 0.1648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15044154226779938,
|
|
"step": 2090,
|
|
"valid_targets_mean": 3443.5,
|
|
"valid_targets_min": 803
|
|
},
|
|
{
|
|
"epoch": 3.389967637540453,
|
|
"grad_norm": 0.6078709562725402,
|
|
"learning_rate": 2.4567226912237566e-05,
|
|
"loss": 0.1681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18583205342292786,
|
|
"step": 2095,
|
|
"valid_targets_mean": 3916.0,
|
|
"valid_targets_min": 2345
|
|
},
|
|
{
|
|
"epoch": 3.3980582524271843,
|
|
"grad_norm": 0.632101264354982,
|
|
"learning_rate": 2.4488623786801523e-05,
|
|
"loss": 0.1836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18830320239067078,
|
|
"step": 2100,
|
|
"valid_targets_mean": 3117.6,
|
|
"valid_targets_min": 386
|
|
},
|
|
{
|
|
"epoch": 3.4061488673139158,
|
|
"grad_norm": 0.5952149912286673,
|
|
"learning_rate": 2.4409947583885272e-05,
|
|
"loss": 0.172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15162335336208344,
|
|
"step": 2105,
|
|
"valid_targets_mean": 3879.4,
|
|
"valid_targets_min": 1340
|
|
},
|
|
{
|
|
"epoch": 3.414239482200647,
|
|
"grad_norm": 0.5796451186001909,
|
|
"learning_rate": 2.4331199584384448e-05,
|
|
"loss": 0.1742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17198210954666138,
|
|
"step": 2110,
|
|
"valid_targets_mean": 4103.7,
|
|
"valid_targets_min": 579
|
|
},
|
|
{
|
|
"epoch": 3.4223300970873787,
|
|
"grad_norm": 0.5465540090675008,
|
|
"learning_rate": 2.425238107036359e-05,
|
|
"loss": 0.1789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14355331659317017,
|
|
"step": 2115,
|
|
"valid_targets_mean": 4076.2,
|
|
"valid_targets_min": 2289
|
|
},
|
|
{
|
|
"epoch": 3.43042071197411,
|
|
"grad_norm": 0.594717506650347,
|
|
"learning_rate": 2.4173493325035255e-05,
|
|
"loss": 0.1724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18043139576911926,
|
|
"step": 2120,
|
|
"valid_targets_mean": 3892.8,
|
|
"valid_targets_min": 443
|
|
},
|
|
{
|
|
"epoch": 3.4385113268608416,
|
|
"grad_norm": 0.5419410813351541,
|
|
"learning_rate": 2.4094537632739126e-05,
|
|
"loss": 0.1745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16672024130821228,
|
|
"step": 2125,
|
|
"valid_targets_mean": 4480.9,
|
|
"valid_targets_min": 462
|
|
},
|
|
{
|
|
"epoch": 3.4466019417475726,
|
|
"grad_norm": 0.6037829229049166,
|
|
"learning_rate": 2.40155152789211e-05,
|
|
"loss": 0.1724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17883463203907013,
|
|
"step": 2130,
|
|
"valid_targets_mean": 3816.6,
|
|
"valid_targets_min": 1345
|
|
},
|
|
{
|
|
"epoch": 3.454692556634304,
|
|
"grad_norm": 0.5647817759923721,
|
|
"learning_rate": 2.3936427550112375e-05,
|
|
"loss": 0.1779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17558303475379944,
|
|
"step": 2135,
|
|
"valid_targets_mean": 3933.2,
|
|
"valid_targets_min": 1142
|
|
},
|
|
{
|
|
"epoch": 3.4627831715210355,
|
|
"grad_norm": 0.6313512858911249,
|
|
"learning_rate": 2.3857275733908476e-05,
|
|
"loss": 0.1714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17078541219234467,
|
|
"step": 2140,
|
|
"valid_targets_mean": 3346.6,
|
|
"valid_targets_min": 494
|
|
},
|
|
{
|
|
"epoch": 3.470873786407767,
|
|
"grad_norm": 0.5891762304651471,
|
|
"learning_rate": 2.377806111894832e-05,
|
|
"loss": 0.1662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15590104460716248,
|
|
"step": 2145,
|
|
"valid_targets_mean": 3573.3,
|
|
"valid_targets_min": 966
|
|
},
|
|
{
|
|
"epoch": 3.4789644012944985,
|
|
"grad_norm": 0.6482887954040247,
|
|
"learning_rate": 2.3698784994893214e-05,
|
|
"loss": 0.1687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16722315549850464,
|
|
"step": 2150,
|
|
"valid_targets_mean": 3687.2,
|
|
"valid_targets_min": 473
|
|
},
|
|
{
|
|
"epoch": 3.48705501618123,
|
|
"grad_norm": 0.6128863861838072,
|
|
"learning_rate": 2.3619448652405885e-05,
|
|
"loss": 0.1665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15946364402770996,
|
|
"step": 2155,
|
|
"valid_targets_mean": 3502.8,
|
|
"valid_targets_min": 421
|
|
},
|
|
{
|
|
"epoch": 3.4951456310679614,
|
|
"grad_norm": 0.5765896527856256,
|
|
"learning_rate": 2.3540053383129458e-05,
|
|
"loss": 0.1754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.188400000333786,
|
|
"step": 2160,
|
|
"valid_targets_mean": 3874.4,
|
|
"valid_targets_min": 547
|
|
},
|
|
{
|
|
"epoch": 3.5032362459546924,
|
|
"grad_norm": 0.5859054394697829,
|
|
"learning_rate": 2.346060047966638e-05,
|
|
"loss": 0.1655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1690550446510315,
|
|
"step": 2165,
|
|
"valid_targets_mean": 4166.0,
|
|
"valid_targets_min": 1667
|
|
},
|
|
{
|
|
"epoch": 3.511326860841424,
|
|
"grad_norm": 0.7036463701667274,
|
|
"learning_rate": 2.3381091235557475e-05,
|
|
"loss": 0.1742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1957768201828003,
|
|
"step": 2170,
|
|
"valid_targets_mean": 3727.4,
|
|
"valid_targets_min": 1747
|
|
},
|
|
{
|
|
"epoch": 3.5194174757281553,
|
|
"grad_norm": 0.5916107265381505,
|
|
"learning_rate": 2.330152694526077e-05,
|
|
"loss": 0.1739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1751423478126526,
|
|
"step": 2175,
|
|
"valid_targets_mean": 3770.8,
|
|
"valid_targets_min": 1953
|
|
},
|
|
{
|
|
"epoch": 3.527508090614887,
|
|
"grad_norm": 0.593608794686705,
|
|
"learning_rate": 2.322190890413053e-05,
|
|
"loss": 0.1869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18094240128993988,
|
|
"step": 2180,
|
|
"valid_targets_mean": 4127.8,
|
|
"valid_targets_min": 2488
|
|
},
|
|
{
|
|
"epoch": 3.5355987055016183,
|
|
"grad_norm": 0.5654915385772826,
|
|
"learning_rate": 2.3142238408396076e-05,
|
|
"loss": 0.1762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14299830794334412,
|
|
"step": 2185,
|
|
"valid_targets_mean": 3797.2,
|
|
"valid_targets_min": 1422
|
|
},
|
|
{
|
|
"epoch": 3.5436893203883493,
|
|
"grad_norm": 0.6289935817923777,
|
|
"learning_rate": 2.3062516755140733e-05,
|
|
"loss": 0.1793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17214451730251312,
|
|
"step": 2190,
|
|
"valid_targets_mean": 3183.4,
|
|
"valid_targets_min": 718
|
|
},
|
|
{
|
|
"epoch": 3.551779935275081,
|
|
"grad_norm": 0.597140353068917,
|
|
"learning_rate": 2.2982745242280716e-05,
|
|
"loss": 0.1661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16503599286079407,
|
|
"step": 2195,
|
|
"valid_targets_mean": 3889.6,
|
|
"valid_targets_min": 654
|
|
},
|
|
{
|
|
"epoch": 3.559870550161812,
|
|
"grad_norm": 0.5994193806365925,
|
|
"learning_rate": 2.290292516854396e-05,
|
|
"loss": 0.17,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18332546949386597,
|
|
"step": 2200,
|
|
"valid_targets_mean": 3912.4,
|
|
"valid_targets_min": 683
|
|
},
|
|
{
|
|
"epoch": 3.5679611650485437,
|
|
"grad_norm": 0.6642282116127255,
|
|
"learning_rate": 2.2823057833449013e-05,
|
|
"loss": 0.1663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18105272948741913,
|
|
"step": 2205,
|
|
"valid_targets_mean": 3320.1,
|
|
"valid_targets_min": 523
|
|
},
|
|
{
|
|
"epoch": 3.576051779935275,
|
|
"grad_norm": 0.6015323897456493,
|
|
"learning_rate": 2.274314453728386e-05,
|
|
"loss": 0.1742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1692677140235901,
|
|
"step": 2210,
|
|
"valid_targets_mean": 3614.8,
|
|
"valid_targets_min": 374
|
|
},
|
|
{
|
|
"epoch": 3.5841423948220066,
|
|
"grad_norm": 0.6537271526172133,
|
|
"learning_rate": 2.2663186581084777e-05,
|
|
"loss": 0.1739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20455987751483917,
|
|
"step": 2215,
|
|
"valid_targets_mean": 3651.8,
|
|
"valid_targets_min": 842
|
|
},
|
|
{
|
|
"epoch": 3.592233009708738,
|
|
"grad_norm": 0.6232084648287293,
|
|
"learning_rate": 2.258318526661511e-05,
|
|
"loss": 0.1807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16232970356941223,
|
|
"step": 2220,
|
|
"valid_targets_mean": 3327.6,
|
|
"valid_targets_min": 749
|
|
},
|
|
{
|
|
"epoch": 3.600323624595469,
|
|
"grad_norm": 0.6296326256388624,
|
|
"learning_rate": 2.250314189634412e-05,
|
|
"loss": 0.1699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19072610139846802,
|
|
"step": 2225,
|
|
"valid_targets_mean": 4073.9,
|
|
"valid_targets_min": 2199
|
|
},
|
|
{
|
|
"epoch": 3.6084142394822005,
|
|
"grad_norm": 0.6449233967179974,
|
|
"learning_rate": 2.2423057773425745e-05,
|
|
"loss": 0.1746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1651957631111145,
|
|
"step": 2230,
|
|
"valid_targets_mean": 3022.1,
|
|
"valid_targets_min": 434
|
|
},
|
|
{
|
|
"epoch": 3.616504854368932,
|
|
"grad_norm": 0.6559395560780564,
|
|
"learning_rate": 2.2342934201677422e-05,
|
|
"loss": 0.1675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17878469824790955,
|
|
"step": 2235,
|
|
"valid_targets_mean": 3622.1,
|
|
"valid_targets_min": 516
|
|
},
|
|
{
|
|
"epoch": 3.6245954692556634,
|
|
"grad_norm": 0.6173129941768686,
|
|
"learning_rate": 2.2262772485558814e-05,
|
|
"loss": 0.1695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18011218309402466,
|
|
"step": 2240,
|
|
"valid_targets_mean": 3820.5,
|
|
"valid_targets_min": 1903
|
|
},
|
|
{
|
|
"epoch": 3.632686084142395,
|
|
"grad_norm": 0.5753461959712433,
|
|
"learning_rate": 2.218257393015062e-05,
|
|
"loss": 0.1756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1866929829120636,
|
|
"step": 2245,
|
|
"valid_targets_mean": 4092.4,
|
|
"valid_targets_min": 391
|
|
},
|
|
{
|
|
"epoch": 3.6407766990291264,
|
|
"grad_norm": 0.5728173710154111,
|
|
"learning_rate": 2.2102339841133285e-05,
|
|
"loss": 0.1708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1651284098625183,
|
|
"step": 2250,
|
|
"valid_targets_mean": 3805.8,
|
|
"valid_targets_min": 1586
|
|
},
|
|
{
|
|
"epoch": 3.648867313915858,
|
|
"grad_norm": 0.5916078253429511,
|
|
"learning_rate": 2.2022071524765786e-05,
|
|
"loss": 0.1646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.149724543094635,
|
|
"step": 2255,
|
|
"valid_targets_mean": 3961.8,
|
|
"valid_targets_min": 2621
|
|
},
|
|
{
|
|
"epoch": 3.656957928802589,
|
|
"grad_norm": 0.6744997056371559,
|
|
"learning_rate": 2.1941770287864315e-05,
|
|
"loss": 0.1804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19032660126686096,
|
|
"step": 2260,
|
|
"valid_targets_mean": 3410.2,
|
|
"valid_targets_min": 453
|
|
},
|
|
{
|
|
"epoch": 3.6650485436893203,
|
|
"grad_norm": 0.6195720257754521,
|
|
"learning_rate": 2.1861437437781045e-05,
|
|
"loss": 0.1681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1680455058813095,
|
|
"step": 2265,
|
|
"valid_targets_mean": 3784.2,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 3.6731391585760518,
|
|
"grad_norm": 0.672887046130233,
|
|
"learning_rate": 2.1781074282382834e-05,
|
|
"loss": 0.1716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1819503754377365,
|
|
"step": 2270,
|
|
"valid_targets_mean": 3372.5,
|
|
"valid_targets_min": 790
|
|
},
|
|
{
|
|
"epoch": 3.6812297734627832,
|
|
"grad_norm": 0.6776193321599489,
|
|
"learning_rate": 2.170068213002992e-05,
|
|
"loss": 0.1748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18788284063339233,
|
|
"step": 2275,
|
|
"valid_targets_mean": 3483.5,
|
|
"valid_targets_min": 419
|
|
},
|
|
{
|
|
"epoch": 3.6893203883495147,
|
|
"grad_norm": 0.6052000663341688,
|
|
"learning_rate": 2.1620262289554628e-05,
|
|
"loss": 0.1683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1812586486339569,
|
|
"step": 2280,
|
|
"valid_targets_mean": 3708.8,
|
|
"valid_targets_min": 1630
|
|
},
|
|
{
|
|
"epoch": 3.6974110032362457,
|
|
"grad_norm": 0.661641976363746,
|
|
"learning_rate": 2.1539816070240062e-05,
|
|
"loss": 0.1728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19451472163200378,
|
|
"step": 2285,
|
|
"valid_targets_mean": 3528.8,
|
|
"valid_targets_min": 456
|
|
},
|
|
{
|
|
"epoch": 3.705501618122977,
|
|
"grad_norm": 0.5930807874788253,
|
|
"learning_rate": 2.1459344781798807e-05,
|
|
"loss": 0.1716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15635108947753906,
|
|
"step": 2290,
|
|
"valid_targets_mean": 4005.1,
|
|
"valid_targets_min": 402
|
|
},
|
|
{
|
|
"epoch": 3.7135922330097086,
|
|
"grad_norm": 0.6112149905774165,
|
|
"learning_rate": 2.1378849734351565e-05,
|
|
"loss": 0.1821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.162587970495224,
|
|
"step": 2295,
|
|
"valid_targets_mean": 3795.4,
|
|
"valid_targets_min": 2380
|
|
},
|
|
{
|
|
"epoch": 3.72168284789644,
|
|
"grad_norm": 0.6057359622968203,
|
|
"learning_rate": 2.1298332238405856e-05,
|
|
"loss": 0.1693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17883029580116272,
|
|
"step": 2300,
|
|
"valid_targets_mean": 4006.3,
|
|
"valid_targets_min": 1048
|
|
},
|
|
{
|
|
"epoch": 3.7297734627831716,
|
|
"grad_norm": 0.6144431941266508,
|
|
"learning_rate": 2.1217793604834687e-05,
|
|
"loss": 0.168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17724230885505676,
|
|
"step": 2305,
|
|
"valid_targets_mean": 4024.4,
|
|
"valid_targets_min": 2274
|
|
},
|
|
{
|
|
"epoch": 3.737864077669903,
|
|
"grad_norm": 0.6057468889256321,
|
|
"learning_rate": 2.1137235144855177e-05,
|
|
"loss": 0.1732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15354929864406586,
|
|
"step": 2310,
|
|
"valid_targets_mean": 3366.8,
|
|
"valid_targets_min": 475
|
|
},
|
|
{
|
|
"epoch": 3.7459546925566345,
|
|
"grad_norm": 0.6053175336946324,
|
|
"learning_rate": 2.1056658170007247e-05,
|
|
"loss": 0.1656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17170631885528564,
|
|
"step": 2315,
|
|
"valid_targets_mean": 3616.9,
|
|
"valid_targets_min": 428
|
|
},
|
|
{
|
|
"epoch": 3.7540453074433655,
|
|
"grad_norm": 0.6213868430128134,
|
|
"learning_rate": 2.0976063992132252e-05,
|
|
"loss": 0.162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1570182740688324,
|
|
"step": 2320,
|
|
"valid_targets_mean": 3374.6,
|
|
"valid_targets_min": 650
|
|
},
|
|
{
|
|
"epoch": 3.762135922330097,
|
|
"grad_norm": 0.7286560834537692,
|
|
"learning_rate": 2.0895453923351613e-05,
|
|
"loss": 0.1664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20686554908752441,
|
|
"step": 2325,
|
|
"valid_targets_mean": 3858.3,
|
|
"valid_targets_min": 613
|
|
},
|
|
{
|
|
"epoch": 3.7702265372168284,
|
|
"grad_norm": 0.6560663109735195,
|
|
"learning_rate": 2.0814829276045465e-05,
|
|
"loss": 0.1751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18524110317230225,
|
|
"step": 2330,
|
|
"valid_targets_mean": 3464.4,
|
|
"valid_targets_min": 1675
|
|
},
|
|
{
|
|
"epoch": 3.77831715210356,
|
|
"grad_norm": 0.6254622237386656,
|
|
"learning_rate": 2.0734191362831294e-05,
|
|
"loss": 0.1636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16618870198726654,
|
|
"step": 2335,
|
|
"valid_targets_mean": 3355.0,
|
|
"valid_targets_min": 460
|
|
},
|
|
{
|
|
"epoch": 3.7864077669902914,
|
|
"grad_norm": 0.5906343788820818,
|
|
"learning_rate": 2.0653541496542566e-05,
|
|
"loss": 0.1705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17420311272144318,
|
|
"step": 2340,
|
|
"valid_targets_mean": 4128.1,
|
|
"valid_targets_min": 3301
|
|
},
|
|
{
|
|
"epoch": 3.794498381877023,
|
|
"grad_norm": 0.6495213062678279,
|
|
"learning_rate": 2.0572880990207343e-05,
|
|
"loss": 0.1757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1777752935886383,
|
|
"step": 2345,
|
|
"valid_targets_mean": 3703.5,
|
|
"valid_targets_min": 431
|
|
},
|
|
{
|
|
"epoch": 3.8025889967637543,
|
|
"grad_norm": 0.6281322531518733,
|
|
"learning_rate": 2.049221115702692e-05,
|
|
"loss": 0.1547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1519239991903305,
|
|
"step": 2350,
|
|
"valid_targets_mean": 3610.2,
|
|
"valid_targets_min": 1169
|
|
},
|
|
{
|
|
"epoch": 3.8106796116504853,
|
|
"grad_norm": 0.6161485822405318,
|
|
"learning_rate": 2.0411533310354433e-05,
|
|
"loss": 0.1738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15030154585838318,
|
|
"step": 2355,
|
|
"valid_targets_mean": 4126.1,
|
|
"valid_targets_min": 1810
|
|
},
|
|
{
|
|
"epoch": 3.8187702265372168,
|
|
"grad_norm": 0.5757629774093376,
|
|
"learning_rate": 2.0330848763673484e-05,
|
|
"loss": 0.1637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15009431540966034,
|
|
"step": 2360,
|
|
"valid_targets_mean": 3805.3,
|
|
"valid_targets_min": 1449
|
|
},
|
|
{
|
|
"epoch": 3.8268608414239482,
|
|
"grad_norm": 0.5666043552752347,
|
|
"learning_rate": 2.0250158830576764e-05,
|
|
"loss": 0.1632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14444783329963684,
|
|
"step": 2365,
|
|
"valid_targets_mean": 4227.1,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 3.8349514563106797,
|
|
"grad_norm": 0.6061569302969273,
|
|
"learning_rate": 2.0169464824744636e-05,
|
|
"loss": 0.1747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1677263081073761,
|
|
"step": 2370,
|
|
"valid_targets_mean": 3836.5,
|
|
"valid_targets_min": 413
|
|
},
|
|
{
|
|
"epoch": 3.843042071197411,
|
|
"grad_norm": 0.6299856336138258,
|
|
"learning_rate": 2.0088768059923795e-05,
|
|
"loss": 0.1728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18110263347625732,
|
|
"step": 2375,
|
|
"valid_targets_mean": 3476.6,
|
|
"valid_targets_min": 1588
|
|
},
|
|
{
|
|
"epoch": 3.851132686084142,
|
|
"grad_norm": 0.6644952355917083,
|
|
"learning_rate": 2.0008069849905847e-05,
|
|
"loss": 0.1816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18871355056762695,
|
|
"step": 2380,
|
|
"valid_targets_mean": 3636.7,
|
|
"valid_targets_min": 436
|
|
},
|
|
{
|
|
"epoch": 3.8592233009708736,
|
|
"grad_norm": 0.6650438404888883,
|
|
"learning_rate": 1.9927371508505915e-05,
|
|
"loss": 0.1747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1898743063211441,
|
|
"step": 2385,
|
|
"valid_targets_mean": 3361.4,
|
|
"valid_targets_min": 601
|
|
},
|
|
{
|
|
"epoch": 3.867313915857605,
|
|
"grad_norm": 0.6480640499016951,
|
|
"learning_rate": 1.984667434954126e-05,
|
|
"loss": 0.1782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17341741919517517,
|
|
"step": 2390,
|
|
"valid_targets_mean": 3357.9,
|
|
"valid_targets_min": 432
|
|
},
|
|
{
|
|
"epoch": 3.8754045307443366,
|
|
"grad_norm": 0.6704594696673299,
|
|
"learning_rate": 1.9765979686809912e-05,
|
|
"loss": 0.1724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16611242294311523,
|
|
"step": 2395,
|
|
"valid_targets_mean": 3253.0,
|
|
"valid_targets_min": 496
|
|
},
|
|
{
|
|
"epoch": 3.883495145631068,
|
|
"grad_norm": 0.5941524325792283,
|
|
"learning_rate": 1.968528883406925e-05,
|
|
"loss": 0.1579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16716042160987854,
|
|
"step": 2400,
|
|
"valid_targets_mean": 4301.2,
|
|
"valid_targets_min": 1379
|
|
},
|
|
{
|
|
"epoch": 3.8915857605177995,
|
|
"grad_norm": 0.6006935814066899,
|
|
"learning_rate": 1.9604603105014616e-05,
|
|
"loss": 0.1671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16704756021499634,
|
|
"step": 2405,
|
|
"valid_targets_mean": 3946.9,
|
|
"valid_targets_min": 2249
|
|
},
|
|
{
|
|
"epoch": 3.899676375404531,
|
|
"grad_norm": 0.6642684578913485,
|
|
"learning_rate": 1.9523923813257957e-05,
|
|
"loss": 0.167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.161843404173851,
|
|
"step": 2410,
|
|
"valid_targets_mean": 3472.9,
|
|
"valid_targets_min": 459
|
|
},
|
|
{
|
|
"epoch": 3.907766990291262,
|
|
"grad_norm": 0.5724065210606161,
|
|
"learning_rate": 1.9443252272306384e-05,
|
|
"loss": 0.1581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17966113984584808,
|
|
"step": 2415,
|
|
"valid_targets_mean": 4337.2,
|
|
"valid_targets_min": 1683
|
|
},
|
|
{
|
|
"epoch": 3.9158576051779934,
|
|
"grad_norm": 0.6194183255977415,
|
|
"learning_rate": 1.9362589795540852e-05,
|
|
"loss": 0.1741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16092973947525024,
|
|
"step": 2420,
|
|
"valid_targets_mean": 3506.6,
|
|
"valid_targets_min": 770
|
|
},
|
|
{
|
|
"epoch": 3.923948220064725,
|
|
"grad_norm": 0.6219635655220017,
|
|
"learning_rate": 1.9281937696194723e-05,
|
|
"loss": 0.1713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17510530352592468,
|
|
"step": 2425,
|
|
"valid_targets_mean": 3865.4,
|
|
"valid_targets_min": 1030
|
|
},
|
|
{
|
|
"epoch": 3.9320388349514563,
|
|
"grad_norm": 0.6231486842433605,
|
|
"learning_rate": 1.9201297287332428e-05,
|
|
"loss": 0.1645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16570451855659485,
|
|
"step": 2430,
|
|
"valid_targets_mean": 3851.4,
|
|
"valid_targets_min": 374
|
|
},
|
|
{
|
|
"epoch": 3.940129449838188,
|
|
"grad_norm": 0.7102714071978097,
|
|
"learning_rate": 1.912066988182806e-05,
|
|
"loss": 0.1785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1856686770915985,
|
|
"step": 2435,
|
|
"valid_targets_mean": 3550.0,
|
|
"valid_targets_min": 1528
|
|
},
|
|
{
|
|
"epoch": 3.948220064724919,
|
|
"grad_norm": 0.6300878458902386,
|
|
"learning_rate": 1.9040056792344005e-05,
|
|
"loss": 0.1713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1716354787349701,
|
|
"step": 2440,
|
|
"valid_targets_mean": 3878.1,
|
|
"valid_targets_min": 1171
|
|
},
|
|
{
|
|
"epoch": 3.9563106796116507,
|
|
"grad_norm": 0.5406998255380289,
|
|
"learning_rate": 1.8959459331309576e-05,
|
|
"loss": 0.165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1490856409072876,
|
|
"step": 2445,
|
|
"valid_targets_mean": 4219.6,
|
|
"valid_targets_min": 2389
|
|
},
|
|
{
|
|
"epoch": 3.9644012944983817,
|
|
"grad_norm": 0.6750562788965354,
|
|
"learning_rate": 1.8878878810899653e-05,
|
|
"loss": 0.1678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16222772002220154,
|
|
"step": 2450,
|
|
"valid_targets_mean": 3240.6,
|
|
"valid_targets_min": 506
|
|
},
|
|
{
|
|
"epoch": 3.972491909385113,
|
|
"grad_norm": 0.6453126907074695,
|
|
"learning_rate": 1.8798316543013317e-05,
|
|
"loss": 0.1683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18792478740215302,
|
|
"step": 2455,
|
|
"valid_targets_mean": 3656.6,
|
|
"valid_targets_min": 449
|
|
},
|
|
{
|
|
"epoch": 3.9805825242718447,
|
|
"grad_norm": 0.6466575183901547,
|
|
"learning_rate": 1.8717773839252477e-05,
|
|
"loss": 0.1745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17349807918071747,
|
|
"step": 2460,
|
|
"valid_targets_mean": 4207.3,
|
|
"valid_targets_min": 1258
|
|
},
|
|
{
|
|
"epoch": 3.988673139158576,
|
|
"grad_norm": 0.5677694171684091,
|
|
"learning_rate": 1.8637252010900515e-05,
|
|
"loss": 0.1623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14839619398117065,
|
|
"step": 2465,
|
|
"valid_targets_mean": 3616.2,
|
|
"valid_targets_min": 369
|
|
},
|
|
{
|
|
"epoch": 3.9967637540453076,
|
|
"grad_norm": 0.5817112797352827,
|
|
"learning_rate": 1.8556752368900972e-05,
|
|
"loss": 0.163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15644389390945435,
|
|
"step": 2470,
|
|
"valid_targets_mean": 4212.1,
|
|
"valid_targets_min": 598
|
|
},
|
|
{
|
|
"epoch": 4.004854368932039,
|
|
"grad_norm": 0.5838872954552842,
|
|
"learning_rate": 1.8476276223836177e-05,
|
|
"loss": 0.1516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1359456181526184,
|
|
"step": 2475,
|
|
"valid_targets_mean": 3580.9,
|
|
"valid_targets_min": 460
|
|
},
|
|
{
|
|
"epoch": 4.0129449838187705,
|
|
"grad_norm": 0.6181440361500631,
|
|
"learning_rate": 1.8395824885905898e-05,
|
|
"loss": 0.1483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15484167635440826,
|
|
"step": 2480,
|
|
"valid_targets_mean": 4019.4,
|
|
"valid_targets_min": 1593
|
|
},
|
|
{
|
|
"epoch": 4.0210355987055015,
|
|
"grad_norm": 0.6311832020407332,
|
|
"learning_rate": 1.8315399664906062e-05,
|
|
"loss": 0.1449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1441861242055893,
|
|
"step": 2485,
|
|
"valid_targets_mean": 3870.4,
|
|
"valid_targets_min": 1748
|
|
},
|
|
{
|
|
"epoch": 4.029126213592233,
|
|
"grad_norm": 0.6857583489532473,
|
|
"learning_rate": 1.823500187020735e-05,
|
|
"loss": 0.1505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1345384567975998,
|
|
"step": 2490,
|
|
"valid_targets_mean": 3765.9,
|
|
"valid_targets_min": 1322
|
|
},
|
|
{
|
|
"epoch": 4.0372168284789645,
|
|
"grad_norm": 0.7113640655797298,
|
|
"learning_rate": 1.815463281073396e-05,
|
|
"loss": 0.1502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1583227813243866,
|
|
"step": 2495,
|
|
"valid_targets_mean": 3249.9,
|
|
"valid_targets_min": 539
|
|
},
|
|
{
|
|
"epoch": 4.0453074433656955,
|
|
"grad_norm": 0.662581106462447,
|
|
"learning_rate": 1.8074293794942262e-05,
|
|
"loss": 0.143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1287870854139328,
|
|
"step": 2500,
|
|
"valid_targets_mean": 3816.8,
|
|
"valid_targets_min": 926
|
|
},
|
|
{
|
|
"epoch": 4.053398058252427,
|
|
"grad_norm": 0.5793919248054356,
|
|
"learning_rate": 1.7993986130799477e-05,
|
|
"loss": 0.1451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1330222338438034,
|
|
"step": 2505,
|
|
"valid_targets_mean": 4338.5,
|
|
"valid_targets_min": 2365
|
|
},
|
|
{
|
|
"epoch": 4.061488673139158,
|
|
"grad_norm": 0.6078390592693177,
|
|
"learning_rate": 1.7913711125762435e-05,
|
|
"loss": 0.1569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1425902545452118,
|
|
"step": 2510,
|
|
"valid_targets_mean": 4566.3,
|
|
"valid_targets_min": 2815
|
|
},
|
|
{
|
|
"epoch": 4.06957928802589,
|
|
"grad_norm": 0.8150168677382256,
|
|
"learning_rate": 1.7833470086756214e-05,
|
|
"loss": 0.1505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14174199104309082,
|
|
"step": 2515,
|
|
"valid_targets_mean": 3431.4,
|
|
"valid_targets_min": 780
|
|
},
|
|
{
|
|
"epoch": 4.077669902912621,
|
|
"grad_norm": 0.670471952566526,
|
|
"learning_rate": 1.7753264320152934e-05,
|
|
"loss": 0.1526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15361058712005615,
|
|
"step": 2520,
|
|
"valid_targets_mean": 3481.4,
|
|
"valid_targets_min": 816
|
|
},
|
|
{
|
|
"epoch": 4.085760517799352,
|
|
"grad_norm": 0.6021110394444014,
|
|
"learning_rate": 1.7673095131750454e-05,
|
|
"loss": 0.1513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14771947264671326,
|
|
"step": 2525,
|
|
"valid_targets_mean": 3895.3,
|
|
"valid_targets_min": 370
|
|
},
|
|
{
|
|
"epoch": 4.093851132686084,
|
|
"grad_norm": 0.6313198974033982,
|
|
"learning_rate": 1.759296382675112e-05,
|
|
"loss": 0.1446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13901178538799286,
|
|
"step": 2530,
|
|
"valid_targets_mean": 3643.3,
|
|
"valid_targets_min": 562
|
|
},
|
|
{
|
|
"epoch": 4.101941747572815,
|
|
"grad_norm": 1.115383670240578,
|
|
"learning_rate": 1.7512871709740515e-05,
|
|
"loss": 0.1382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14427712559700012,
|
|
"step": 2535,
|
|
"valid_targets_mean": 3357.8,
|
|
"valid_targets_min": 1258
|
|
},
|
|
{
|
|
"epoch": 4.110032362459547,
|
|
"grad_norm": 0.5945728257795431,
|
|
"learning_rate": 1.743282008466619e-05,
|
|
"loss": 0.1424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13450026512145996,
|
|
"step": 2540,
|
|
"valid_targets_mean": 3983.9,
|
|
"valid_targets_min": 2594
|
|
},
|
|
{
|
|
"epoch": 4.118122977346278,
|
|
"grad_norm": 0.6485166896234807,
|
|
"learning_rate": 1.7352810254816498e-05,
|
|
"loss": 0.1538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1662842035293579,
|
|
"step": 2545,
|
|
"valid_targets_mean": 3923.2,
|
|
"valid_targets_min": 691
|
|
},
|
|
{
|
|
"epoch": 4.12621359223301,
|
|
"grad_norm": 0.6534762301883935,
|
|
"learning_rate": 1.727284352279934e-05,
|
|
"loss": 0.1533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13402123749256134,
|
|
"step": 2550,
|
|
"valid_targets_mean": 3921.8,
|
|
"valid_targets_min": 2214
|
|
},
|
|
{
|
|
"epoch": 4.134304207119741,
|
|
"grad_norm": 0.6786977646467098,
|
|
"learning_rate": 1.7192921190520936e-05,
|
|
"loss": 0.1496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14459103345870972,
|
|
"step": 2555,
|
|
"valid_targets_mean": 4039.2,
|
|
"valid_targets_min": 1643
|
|
},
|
|
{
|
|
"epoch": 4.142394822006472,
|
|
"grad_norm": 0.6573301423442405,
|
|
"learning_rate": 1.7113044559164657e-05,
|
|
"loss": 0.145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15165367722511292,
|
|
"step": 2560,
|
|
"valid_targets_mean": 3763.1,
|
|
"valid_targets_min": 622
|
|
},
|
|
{
|
|
"epoch": 4.150485436893204,
|
|
"grad_norm": 0.6621324780494194,
|
|
"learning_rate": 1.7033214929169847e-05,
|
|
"loss": 0.1512,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14901897311210632,
|
|
"step": 2565,
|
|
"valid_targets_mean": 3928.9,
|
|
"valid_targets_min": 2003
|
|
},
|
|
{
|
|
"epoch": 4.158576051779935,
|
|
"grad_norm": 0.649660750750346,
|
|
"learning_rate": 1.695343360021064e-05,
|
|
"loss": 0.1412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14463084936141968,
|
|
"step": 2570,
|
|
"valid_targets_mean": 4004.2,
|
|
"valid_targets_min": 1628
|
|
},
|
|
{
|
|
"epoch": 4.166666666666667,
|
|
"grad_norm": 0.6870184255662538,
|
|
"learning_rate": 1.6873701871174782e-05,
|
|
"loss": 0.1524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14344152808189392,
|
|
"step": 2575,
|
|
"valid_targets_mean": 3691.4,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 4.174757281553398,
|
|
"grad_norm": 0.6505329392557849,
|
|
"learning_rate": 1.6794021040142534e-05,
|
|
"loss": 0.1439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1347620189189911,
|
|
"step": 2580,
|
|
"valid_targets_mean": 3929.4,
|
|
"valid_targets_min": 953
|
|
},
|
|
{
|
|
"epoch": 4.18284789644013,
|
|
"grad_norm": 0.7350776506050503,
|
|
"learning_rate": 1.6714392404365467e-05,
|
|
"loss": 0.1382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15007461607456207,
|
|
"step": 2585,
|
|
"valid_targets_mean": 4382.9,
|
|
"valid_targets_min": 2220
|
|
},
|
|
{
|
|
"epoch": 4.190938511326861,
|
|
"grad_norm": 0.754225049943721,
|
|
"learning_rate": 1.6634817260245417e-05,
|
|
"loss": 0.1483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16796696186065674,
|
|
"step": 2590,
|
|
"valid_targets_mean": 3107.8,
|
|
"valid_targets_min": 451
|
|
},
|
|
{
|
|
"epoch": 4.199029126213592,
|
|
"grad_norm": 0.651272931082461,
|
|
"learning_rate": 1.655529690331332e-05,
|
|
"loss": 0.146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1237691193819046,
|
|
"step": 2595,
|
|
"valid_targets_mean": 3806.5,
|
|
"valid_targets_min": 909
|
|
},
|
|
{
|
|
"epoch": 4.207119741100324,
|
|
"grad_norm": 0.6508181443890371,
|
|
"learning_rate": 1.6475832628208165e-05,
|
|
"loss": 0.1416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13523957133293152,
|
|
"step": 2600,
|
|
"valid_targets_mean": 3647.2,
|
|
"valid_targets_min": 957
|
|
},
|
|
{
|
|
"epoch": 4.215210355987055,
|
|
"grad_norm": 0.7227632839629261,
|
|
"learning_rate": 1.6396425728655874e-05,
|
|
"loss": 0.1464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1514357030391693,
|
|
"step": 2605,
|
|
"valid_targets_mean": 3626.4,
|
|
"valid_targets_min": 536
|
|
},
|
|
{
|
|
"epoch": 4.223300970873787,
|
|
"grad_norm": 0.6640700426495993,
|
|
"learning_rate": 1.6317077497448278e-05,
|
|
"loss": 0.1466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1584387570619583,
|
|
"step": 2610,
|
|
"valid_targets_mean": 3992.6,
|
|
"valid_targets_min": 451
|
|
},
|
|
{
|
|
"epoch": 4.231391585760518,
|
|
"grad_norm": 0.8556463930160811,
|
|
"learning_rate": 1.6237789226422033e-05,
|
|
"loss": 0.1442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14512553811073303,
|
|
"step": 2615,
|
|
"valid_targets_mean": 3118.4,
|
|
"valid_targets_min": 428
|
|
},
|
|
{
|
|
"epoch": 4.239482200647249,
|
|
"grad_norm": 0.7077309518646648,
|
|
"learning_rate": 1.6158562206437634e-05,
|
|
"loss": 0.1549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14674773812294006,
|
|
"step": 2620,
|
|
"valid_targets_mean": 3484.4,
|
|
"valid_targets_min": 349
|
|
},
|
|
{
|
|
"epoch": 4.247572815533981,
|
|
"grad_norm": 0.6862643174407498,
|
|
"learning_rate": 1.6079397727358345e-05,
|
|
"loss": 0.155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17743253707885742,
|
|
"step": 2625,
|
|
"valid_targets_mean": 3890.9,
|
|
"valid_targets_min": 1035
|
|
},
|
|
{
|
|
"epoch": 4.255663430420712,
|
|
"grad_norm": 0.6813524473623369,
|
|
"learning_rate": 1.6000297078029256e-05,
|
|
"loss": 0.1436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15772217512130737,
|
|
"step": 2630,
|
|
"valid_targets_mean": 4023.1,
|
|
"valid_targets_min": 533
|
|
},
|
|
{
|
|
"epoch": 4.263754045307444,
|
|
"grad_norm": 0.6563813142401669,
|
|
"learning_rate": 1.5921261546256236e-05,
|
|
"loss": 0.1459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12581901252269745,
|
|
"step": 2635,
|
|
"valid_targets_mean": 3186.6,
|
|
"valid_targets_min": 946
|
|
},
|
|
{
|
|
"epoch": 4.271844660194175,
|
|
"grad_norm": 0.679061048016753,
|
|
"learning_rate": 1.5842292418785026e-05,
|
|
"loss": 0.1412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13736608624458313,
|
|
"step": 2640,
|
|
"valid_targets_mean": 3609.4,
|
|
"valid_targets_min": 826
|
|
},
|
|
{
|
|
"epoch": 4.2799352750809065,
|
|
"grad_norm": 0.6955635366220325,
|
|
"learning_rate": 1.5763390981280258e-05,
|
|
"loss": 0.1328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13078239560127258,
|
|
"step": 2645,
|
|
"valid_targets_mean": 3810.5,
|
|
"valid_targets_min": 1642
|
|
},
|
|
{
|
|
"epoch": 4.288025889967638,
|
|
"grad_norm": 0.6710216957754729,
|
|
"learning_rate": 1.568455851830453e-05,
|
|
"loss": 0.1439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1374095380306244,
|
|
"step": 2650,
|
|
"valid_targets_mean": 3627.3,
|
|
"valid_targets_min": 1344
|
|
},
|
|
{
|
|
"epoch": 4.296116504854369,
|
|
"grad_norm": 0.6781973790355561,
|
|
"learning_rate": 1.5605796313297502e-05,
|
|
"loss": 0.143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14508819580078125,
|
|
"step": 2655,
|
|
"valid_targets_mean": 3500.6,
|
|
"valid_targets_min": 426
|
|
},
|
|
{
|
|
"epoch": 4.3042071197411005,
|
|
"grad_norm": 0.7376450559475305,
|
|
"learning_rate": 1.552710564855498e-05,
|
|
"loss": 0.1504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1402195245027542,
|
|
"step": 2660,
|
|
"valid_targets_mean": 3175.8,
|
|
"valid_targets_min": 449
|
|
},
|
|
{
|
|
"epoch": 4.3122977346278315,
|
|
"grad_norm": 0.7192468794992135,
|
|
"learning_rate": 1.5448487805208047e-05,
|
|
"loss": 0.1495,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14955146610736847,
|
|
"step": 2665,
|
|
"valid_targets_mean": 3211.4,
|
|
"valid_targets_min": 269
|
|
},
|
|
{
|
|
"epoch": 4.320388349514563,
|
|
"grad_norm": 0.6228075319720846,
|
|
"learning_rate": 1.5369944063202228e-05,
|
|
"loss": 0.1371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1207929328083992,
|
|
"step": 2670,
|
|
"valid_targets_mean": 3558.4,
|
|
"valid_targets_min": 499
|
|
},
|
|
{
|
|
"epoch": 4.328478964401294,
|
|
"grad_norm": 0.7485342397517419,
|
|
"learning_rate": 1.529147570127663e-05,
|
|
"loss": 0.1536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14952194690704346,
|
|
"step": 2675,
|
|
"valid_targets_mean": 3660.6,
|
|
"valid_targets_min": 790
|
|
},
|
|
{
|
|
"epoch": 4.336569579288026,
|
|
"grad_norm": 0.6931771759253382,
|
|
"learning_rate": 1.5213083996943124e-05,
|
|
"loss": 0.1375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1318070888519287,
|
|
"step": 2680,
|
|
"valid_targets_mean": 3454.6,
|
|
"valid_targets_min": 522
|
|
},
|
|
{
|
|
"epoch": 4.344660194174757,
|
|
"grad_norm": 0.6979626156230478,
|
|
"learning_rate": 1.5134770226465533e-05,
|
|
"loss": 0.1478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.146072655916214,
|
|
"step": 2685,
|
|
"valid_targets_mean": 3518.3,
|
|
"valid_targets_min": 538
|
|
},
|
|
{
|
|
"epoch": 4.352750809061488,
|
|
"grad_norm": 0.6557240448616157,
|
|
"learning_rate": 1.5056535664838894e-05,
|
|
"loss": 0.1495,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14116227626800537,
|
|
"step": 2690,
|
|
"valid_targets_mean": 4056.6,
|
|
"valid_targets_min": 2699
|
|
},
|
|
{
|
|
"epoch": 4.36084142394822,
|
|
"grad_norm": 0.6417394256829899,
|
|
"learning_rate": 1.4978381585768676e-05,
|
|
"loss": 0.1487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16907694935798645,
|
|
"step": 2695,
|
|
"valid_targets_mean": 4150.7,
|
|
"valid_targets_min": 1647
|
|
},
|
|
{
|
|
"epoch": 4.368932038834951,
|
|
"grad_norm": 0.6447473771244111,
|
|
"learning_rate": 1.490030926165002e-05,
|
|
"loss": 0.1425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12808853387832642,
|
|
"step": 2700,
|
|
"valid_targets_mean": 3274.6,
|
|
"valid_targets_min": 650
|
|
},
|
|
{
|
|
"epoch": 4.377022653721683,
|
|
"grad_norm": 0.6135227436036308,
|
|
"learning_rate": 1.4822319963547083e-05,
|
|
"loss": 0.151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15349863469600677,
|
|
"step": 2705,
|
|
"valid_targets_mean": 3956.3,
|
|
"valid_targets_min": 2449
|
|
},
|
|
{
|
|
"epoch": 4.385113268608414,
|
|
"grad_norm": 0.6632921549893784,
|
|
"learning_rate": 1.4744414961172267e-05,
|
|
"loss": 0.1478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.152324840426445,
|
|
"step": 2710,
|
|
"valid_targets_mean": 3791.8,
|
|
"valid_targets_min": 1014
|
|
},
|
|
{
|
|
"epoch": 4.393203883495145,
|
|
"grad_norm": 0.6832544029610803,
|
|
"learning_rate": 1.4666595522865628e-05,
|
|
"loss": 0.1456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16460612416267395,
|
|
"step": 2715,
|
|
"valid_targets_mean": 3845.2,
|
|
"valid_targets_min": 910
|
|
},
|
|
{
|
|
"epoch": 4.401294498381877,
|
|
"grad_norm": 0.6833501669663028,
|
|
"learning_rate": 1.4588862915574158e-05,
|
|
"loss": 0.1592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16949523985385895,
|
|
"step": 2720,
|
|
"valid_targets_mean": 4021.8,
|
|
"valid_targets_min": 1366
|
|
},
|
|
{
|
|
"epoch": 4.409385113268608,
|
|
"grad_norm": 0.6096949346655066,
|
|
"learning_rate": 1.4511218404831208e-05,
|
|
"loss": 0.145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12424943596124649,
|
|
"step": 2725,
|
|
"valid_targets_mean": 3627.6,
|
|
"valid_targets_min": 559
|
|
},
|
|
{
|
|
"epoch": 4.41747572815534,
|
|
"grad_norm": 0.6522445618690355,
|
|
"learning_rate": 1.4433663254735868e-05,
|
|
"loss": 0.1396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1399524211883545,
|
|
"step": 2730,
|
|
"valid_targets_mean": 3913.4,
|
|
"valid_targets_min": 2373
|
|
},
|
|
{
|
|
"epoch": 4.425566343042071,
|
|
"grad_norm": 0.6078107797038349,
|
|
"learning_rate": 1.4356198727932357e-05,
|
|
"loss": 0.152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14214907586574554,
|
|
"step": 2735,
|
|
"valid_targets_mean": 4400.7,
|
|
"valid_targets_min": 3234
|
|
},
|
|
{
|
|
"epoch": 4.433656957928803,
|
|
"grad_norm": 0.6322546695687536,
|
|
"learning_rate": 1.427882608558951e-05,
|
|
"loss": 0.1403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1225961297750473,
|
|
"step": 2740,
|
|
"valid_targets_mean": 3752.8,
|
|
"valid_targets_min": 1584
|
|
},
|
|
{
|
|
"epoch": 4.441747572815534,
|
|
"grad_norm": 0.6617047906893644,
|
|
"learning_rate": 1.420154658738023e-05,
|
|
"loss": 0.1489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15037743747234344,
|
|
"step": 2745,
|
|
"valid_targets_mean": 3884.7,
|
|
"valid_targets_min": 650
|
|
},
|
|
{
|
|
"epoch": 4.449838187702265,
|
|
"grad_norm": 0.6450489483141357,
|
|
"learning_rate": 1.4124361491460979e-05,
|
|
"loss": 0.1316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14448890089988708,
|
|
"step": 2750,
|
|
"valid_targets_mean": 4195.3,
|
|
"valid_targets_min": 2233
|
|
},
|
|
{
|
|
"epoch": 4.457928802588997,
|
|
"grad_norm": 0.7196615177429064,
|
|
"learning_rate": 1.4047272054451288e-05,
|
|
"loss": 0.1549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15937447547912598,
|
|
"step": 2755,
|
|
"valid_targets_mean": 3453.7,
|
|
"valid_targets_min": 442
|
|
},
|
|
{
|
|
"epoch": 4.466019417475728,
|
|
"grad_norm": 0.7185337546342929,
|
|
"learning_rate": 1.397027953141329e-05,
|
|
"loss": 0.1416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11660395562648773,
|
|
"step": 2760,
|
|
"valid_targets_mean": 3775.2,
|
|
"valid_targets_min": 1588
|
|
},
|
|
{
|
|
"epoch": 4.47411003236246,
|
|
"grad_norm": 0.689052637867341,
|
|
"learning_rate": 1.3893385175831326e-05,
|
|
"loss": 0.1412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15225495398044586,
|
|
"step": 2765,
|
|
"valid_targets_mean": 3532.5,
|
|
"valid_targets_min": 433
|
|
},
|
|
{
|
|
"epoch": 4.482200647249191,
|
|
"grad_norm": 0.6084241558336754,
|
|
"learning_rate": 1.3816590239591502e-05,
|
|
"loss": 0.1391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12444262951612473,
|
|
"step": 2770,
|
|
"valid_targets_mean": 3905.1,
|
|
"valid_targets_min": 997
|
|
},
|
|
{
|
|
"epoch": 4.490291262135923,
|
|
"grad_norm": 0.7634877881621217,
|
|
"learning_rate": 1.3739895972961312e-05,
|
|
"loss": 0.1459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16124451160430908,
|
|
"step": 2775,
|
|
"valid_targets_mean": 3246.4,
|
|
"valid_targets_min": 431
|
|
},
|
|
{
|
|
"epoch": 4.498381877022654,
|
|
"grad_norm": 0.7282789159692459,
|
|
"learning_rate": 1.3663303624569303e-05,
|
|
"loss": 0.1442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1560203731060028,
|
|
"step": 2780,
|
|
"valid_targets_mean": 3731.1,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 4.506472491909385,
|
|
"grad_norm": 0.6105558503523129,
|
|
"learning_rate": 1.3586814441384707e-05,
|
|
"loss": 0.1469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12341571599245071,
|
|
"step": 2785,
|
|
"valid_targets_mean": 4048.4,
|
|
"valid_targets_min": 2250
|
|
},
|
|
{
|
|
"epoch": 4.514563106796117,
|
|
"grad_norm": 0.6571177564002022,
|
|
"learning_rate": 1.3510429668697188e-05,
|
|
"loss": 0.1319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1218075305223465,
|
|
"step": 2790,
|
|
"valid_targets_mean": 3747.4,
|
|
"valid_targets_min": 1315
|
|
},
|
|
{
|
|
"epoch": 4.522653721682848,
|
|
"grad_norm": 0.6801848412658954,
|
|
"learning_rate": 1.3434150550096534e-05,
|
|
"loss": 0.1493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16177475452423096,
|
|
"step": 2795,
|
|
"valid_targets_mean": 4049.3,
|
|
"valid_targets_min": 1667
|
|
},
|
|
{
|
|
"epoch": 4.53074433656958,
|
|
"grad_norm": 0.6207342193834591,
|
|
"learning_rate": 1.3357978327452425e-05,
|
|
"loss": 0.1434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13945342600345612,
|
|
"step": 2800,
|
|
"valid_targets_mean": 4553.1,
|
|
"valid_targets_min": 2577
|
|
},
|
|
{
|
|
"epoch": 4.538834951456311,
|
|
"grad_norm": 0.6573784092270141,
|
|
"learning_rate": 1.3281914240894218e-05,
|
|
"loss": 0.1494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1442652940750122,
|
|
"step": 2805,
|
|
"valid_targets_mean": 4044.2,
|
|
"valid_targets_min": 1903
|
|
},
|
|
{
|
|
"epoch": 4.546925566343042,
|
|
"grad_norm": 0.7805486249949233,
|
|
"learning_rate": 1.320595952879073e-05,
|
|
"loss": 0.1471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15102678537368774,
|
|
"step": 2810,
|
|
"valid_targets_mean": 3677.2,
|
|
"valid_targets_min": 330
|
|
},
|
|
{
|
|
"epoch": 4.555016181229774,
|
|
"grad_norm": 0.67466501003837,
|
|
"learning_rate": 1.3130115427730106e-05,
|
|
"loss": 0.1483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14177349209785461,
|
|
"step": 2815,
|
|
"valid_targets_mean": 3778.1,
|
|
"valid_targets_min": 2097
|
|
},
|
|
{
|
|
"epoch": 4.563106796116505,
|
|
"grad_norm": 0.6530112142615018,
|
|
"learning_rate": 1.3054383172499688e-05,
|
|
"loss": 0.142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1437305212020874,
|
|
"step": 2820,
|
|
"valid_targets_mean": 4198.1,
|
|
"valid_targets_min": 2700
|
|
},
|
|
{
|
|
"epoch": 4.5711974110032365,
|
|
"grad_norm": 0.677754173384933,
|
|
"learning_rate": 1.2978763996065888e-05,
|
|
"loss": 0.1488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1430257260799408,
|
|
"step": 2825,
|
|
"valid_targets_mean": 3589.8,
|
|
"valid_targets_min": 1004
|
|
},
|
|
{
|
|
"epoch": 4.5792880258899675,
|
|
"grad_norm": 0.748140170284871,
|
|
"learning_rate": 1.2903259129554138e-05,
|
|
"loss": 0.1381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16187545657157898,
|
|
"step": 2830,
|
|
"valid_targets_mean": 4454.9,
|
|
"valid_targets_min": 1996
|
|
},
|
|
{
|
|
"epoch": 4.5873786407766985,
|
|
"grad_norm": 0.725070674975314,
|
|
"learning_rate": 1.2827869802228816e-05,
|
|
"loss": 0.1433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16014769673347473,
|
|
"step": 2835,
|
|
"valid_targets_mean": 3145.8,
|
|
"valid_targets_min": 473
|
|
},
|
|
{
|
|
"epoch": 4.5954692556634305,
|
|
"grad_norm": 0.6695155511796875,
|
|
"learning_rate": 1.2752597241473273e-05,
|
|
"loss": 0.1451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13507728278636932,
|
|
"step": 2840,
|
|
"valid_targets_mean": 3233.1,
|
|
"valid_targets_min": 522
|
|
},
|
|
{
|
|
"epoch": 4.6035598705501615,
|
|
"grad_norm": 0.6874745243656388,
|
|
"learning_rate": 1.2677442672769832e-05,
|
|
"loss": 0.1525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1317557394504547,
|
|
"step": 2845,
|
|
"valid_targets_mean": 4060.1,
|
|
"valid_targets_min": 1601
|
|
},
|
|
{
|
|
"epoch": 4.611650485436893,
|
|
"grad_norm": 0.6614289983469602,
|
|
"learning_rate": 1.2602407319679822e-05,
|
|
"loss": 0.1451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1529705822467804,
|
|
"step": 2850,
|
|
"valid_targets_mean": 3698.4,
|
|
"valid_targets_min": 1094
|
|
},
|
|
{
|
|
"epoch": 4.619741100323624,
|
|
"grad_norm": 0.6751540658943308,
|
|
"learning_rate": 1.2527492403823664e-05,
|
|
"loss": 0.1388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1433483511209488,
|
|
"step": 2855,
|
|
"valid_targets_mean": 3522.2,
|
|
"valid_targets_min": 249
|
|
},
|
|
{
|
|
"epoch": 4.627831715210356,
|
|
"grad_norm": 0.7103834955161041,
|
|
"learning_rate": 1.2452699144861012e-05,
|
|
"loss": 0.1468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1555805206298828,
|
|
"step": 2860,
|
|
"valid_targets_mean": 4060.1,
|
|
"valid_targets_min": 470
|
|
},
|
|
{
|
|
"epoch": 4.635922330097087,
|
|
"grad_norm": 0.7622609109059741,
|
|
"learning_rate": 1.237802876047086e-05,
|
|
"loss": 0.1371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1385229527950287,
|
|
"step": 2865,
|
|
"valid_targets_mean": 3845.6,
|
|
"valid_targets_min": 1340
|
|
},
|
|
{
|
|
"epoch": 4.644012944983819,
|
|
"grad_norm": 0.7214538094938855,
|
|
"learning_rate": 1.2303482466331727e-05,
|
|
"loss": 0.1513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15360170602798462,
|
|
"step": 2870,
|
|
"valid_targets_mean": 3656.2,
|
|
"valid_targets_min": 738
|
|
},
|
|
{
|
|
"epoch": 4.65210355987055,
|
|
"grad_norm": 0.6916396910551235,
|
|
"learning_rate": 1.2229061476101883e-05,
|
|
"loss": 0.1376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1525598168373108,
|
|
"step": 2875,
|
|
"valid_targets_mean": 3600.1,
|
|
"valid_targets_min": 1357
|
|
},
|
|
{
|
|
"epoch": 4.660194174757281,
|
|
"grad_norm": 0.6729985151314064,
|
|
"learning_rate": 1.2154767001399551e-05,
|
|
"loss": 0.1451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14135336875915527,
|
|
"step": 2880,
|
|
"valid_targets_mean": 3632.1,
|
|
"valid_targets_min": 575
|
|
},
|
|
{
|
|
"epoch": 4.668284789644013,
|
|
"grad_norm": 0.6962655582465614,
|
|
"learning_rate": 1.2080600251783233e-05,
|
|
"loss": 0.1393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1368042379617691,
|
|
"step": 2885,
|
|
"valid_targets_mean": 3786.1,
|
|
"valid_targets_min": 388
|
|
},
|
|
{
|
|
"epoch": 4.676375404530744,
|
|
"grad_norm": 0.7090725837062865,
|
|
"learning_rate": 1.2006562434731968e-05,
|
|
"loss": 0.1405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14932994544506073,
|
|
"step": 2890,
|
|
"valid_targets_mean": 3464.5,
|
|
"valid_targets_min": 2115
|
|
},
|
|
{
|
|
"epoch": 4.684466019417476,
|
|
"grad_norm": 0.6985018150678612,
|
|
"learning_rate": 1.193265475562571e-05,
|
|
"loss": 0.1413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1416228860616684,
|
|
"step": 2895,
|
|
"valid_targets_mean": 3852.8,
|
|
"valid_targets_min": 1231
|
|
},
|
|
{
|
|
"epoch": 4.692556634304207,
|
|
"grad_norm": 0.6608970430018517,
|
|
"learning_rate": 1.1858878417725683e-05,
|
|
"loss": 0.1393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13789209723472595,
|
|
"step": 2900,
|
|
"valid_targets_mean": 3671.8,
|
|
"valid_targets_min": 685
|
|
},
|
|
{
|
|
"epoch": 4.700647249190938,
|
|
"grad_norm": 0.6626344499235478,
|
|
"learning_rate": 1.1785234622154797e-05,
|
|
"loss": 0.1482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16734716296195984,
|
|
"step": 2905,
|
|
"valid_targets_mean": 3912.6,
|
|
"valid_targets_min": 444
|
|
},
|
|
{
|
|
"epoch": 4.70873786407767,
|
|
"grad_norm": 0.729467490979312,
|
|
"learning_rate": 1.1711724567878095e-05,
|
|
"loss": 0.1383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.141088604927063,
|
|
"step": 2910,
|
|
"valid_targets_mean": 3118.9,
|
|
"valid_targets_min": 1241
|
|
},
|
|
{
|
|
"epoch": 4.716828478964401,
|
|
"grad_norm": 0.6271378285039474,
|
|
"learning_rate": 1.1638349451683237e-05,
|
|
"loss": 0.1368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12910325825214386,
|
|
"step": 2915,
|
|
"valid_targets_mean": 3909.6,
|
|
"valid_targets_min": 481
|
|
},
|
|
{
|
|
"epoch": 4.724919093851133,
|
|
"grad_norm": 0.7315823978800721,
|
|
"learning_rate": 1.1565110468160996e-05,
|
|
"loss": 0.1403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16693362593650818,
|
|
"step": 2920,
|
|
"valid_targets_mean": 3973.8,
|
|
"valid_targets_min": 1861
|
|
},
|
|
{
|
|
"epoch": 4.733009708737864,
|
|
"grad_norm": 0.6961422251454465,
|
|
"learning_rate": 1.1492008809685856e-05,
|
|
"loss": 0.1599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15453936159610748,
|
|
"step": 2925,
|
|
"valid_targets_mean": 3943.8,
|
|
"valid_targets_min": 422
|
|
},
|
|
{
|
|
"epoch": 4.741100323624595,
|
|
"grad_norm": 0.7947679996073709,
|
|
"learning_rate": 1.141904566639652e-05,
|
|
"loss": 0.1398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.159670889377594,
|
|
"step": 2930,
|
|
"valid_targets_mean": 3625.1,
|
|
"valid_targets_min": 1755
|
|
},
|
|
{
|
|
"epoch": 4.749190938511327,
|
|
"grad_norm": 0.6820283456884491,
|
|
"learning_rate": 1.1346222226176606e-05,
|
|
"loss": 0.1447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13358992338180542,
|
|
"step": 2935,
|
|
"valid_targets_mean": 3843.3,
|
|
"valid_targets_min": 462
|
|
},
|
|
{
|
|
"epoch": 4.757281553398058,
|
|
"grad_norm": 0.688613911291754,
|
|
"learning_rate": 1.1273539674635296e-05,
|
|
"loss": 0.1319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14009517431259155,
|
|
"step": 2940,
|
|
"valid_targets_mean": 3796.8,
|
|
"valid_targets_min": 468
|
|
},
|
|
{
|
|
"epoch": 4.76537216828479,
|
|
"grad_norm": 0.741239831951198,
|
|
"learning_rate": 1.1200999195088e-05,
|
|
"loss": 0.1376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13354428112506866,
|
|
"step": 2945,
|
|
"valid_targets_mean": 3529.9,
|
|
"valid_targets_min": 985
|
|
},
|
|
{
|
|
"epoch": 4.773462783171521,
|
|
"grad_norm": 0.6855567557895231,
|
|
"learning_rate": 1.1128601968537111e-05,
|
|
"loss": 0.1426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14763250946998596,
|
|
"step": 2950,
|
|
"valid_targets_mean": 4232.2,
|
|
"valid_targets_min": 862
|
|
},
|
|
{
|
|
"epoch": 4.781553398058253,
|
|
"grad_norm": 0.6953856815463785,
|
|
"learning_rate": 1.1056349173652791e-05,
|
|
"loss": 0.1474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14699126780033112,
|
|
"step": 2955,
|
|
"valid_targets_mean": 3721.8,
|
|
"valid_targets_min": 463
|
|
},
|
|
{
|
|
"epoch": 4.789644012944984,
|
|
"grad_norm": 0.6429481817754351,
|
|
"learning_rate": 1.098424198675375e-05,
|
|
"loss": 0.1396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12160778045654297,
|
|
"step": 2960,
|
|
"valid_targets_mean": 3576.0,
|
|
"valid_targets_min": 1326
|
|
},
|
|
{
|
|
"epoch": 4.797734627831716,
|
|
"grad_norm": 0.6696602169332752,
|
|
"learning_rate": 1.0912281581788138e-05,
|
|
"loss": 0.1414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1489216685295105,
|
|
"step": 2965,
|
|
"valid_targets_mean": 4214.5,
|
|
"valid_targets_min": 515
|
|
},
|
|
{
|
|
"epoch": 4.805825242718447,
|
|
"grad_norm": 0.6628125000555849,
|
|
"learning_rate": 1.0840469130314382e-05,
|
|
"loss": 0.1449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1368810534477234,
|
|
"step": 2970,
|
|
"valid_targets_mean": 4117.8,
|
|
"valid_targets_min": 364
|
|
},
|
|
{
|
|
"epoch": 4.813915857605178,
|
|
"grad_norm": 0.6993787147902919,
|
|
"learning_rate": 1.0768805801482151e-05,
|
|
"loss": 0.1391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13343513011932373,
|
|
"step": 2975,
|
|
"valid_targets_mean": 3098.6,
|
|
"valid_targets_min": 427
|
|
},
|
|
{
|
|
"epoch": 4.82200647249191,
|
|
"grad_norm": 0.7330396364465878,
|
|
"learning_rate": 1.0697292762013304e-05,
|
|
"loss": 0.1253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1259125918149948,
|
|
"step": 2980,
|
|
"valid_targets_mean": 3147.1,
|
|
"valid_targets_min": 470
|
|
},
|
|
{
|
|
"epoch": 4.830097087378641,
|
|
"grad_norm": 0.722436884104266,
|
|
"learning_rate": 1.0625931176182905e-05,
|
|
"loss": 0.1365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10967875272035599,
|
|
"step": 2985,
|
|
"valid_targets_mean": 3203.4,
|
|
"valid_targets_min": 686
|
|
},
|
|
{
|
|
"epoch": 4.8381877022653725,
|
|
"grad_norm": 0.7138949308865148,
|
|
"learning_rate": 1.0554722205800245e-05,
|
|
"loss": 0.1371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1445237249135971,
|
|
"step": 2990,
|
|
"valid_targets_mean": 3971.1,
|
|
"valid_targets_min": 1266
|
|
},
|
|
{
|
|
"epoch": 4.8462783171521036,
|
|
"grad_norm": 0.6823622727229742,
|
|
"learning_rate": 1.0483667010189973e-05,
|
|
"loss": 0.1497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1553669422864914,
|
|
"step": 2995,
|
|
"valid_targets_mean": 3809.2,
|
|
"valid_targets_min": 422
|
|
},
|
|
{
|
|
"epoch": 4.854368932038835,
|
|
"grad_norm": 0.7813029712176784,
|
|
"learning_rate": 1.0412766746173168e-05,
|
|
"loss": 0.1455,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17688441276550293,
|
|
"step": 3000,
|
|
"valid_targets_mean": 4146.6,
|
|
"valid_targets_min": 2345
|
|
},
|
|
{
|
|
"epoch": 4.8624595469255665,
|
|
"grad_norm": 0.7018435375236487,
|
|
"learning_rate": 1.0342022568048543e-05,
|
|
"loss": 0.1399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13416077196598053,
|
|
"step": 3005,
|
|
"valid_targets_mean": 3526.8,
|
|
"valid_targets_min": 731
|
|
},
|
|
{
|
|
"epoch": 4.8705501618122975,
|
|
"grad_norm": 0.7073555559675111,
|
|
"learning_rate": 1.0271435627573631e-05,
|
|
"loss": 0.1357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13437259197235107,
|
|
"step": 3010,
|
|
"valid_targets_mean": 3547.7,
|
|
"valid_targets_min": 1613
|
|
},
|
|
{
|
|
"epoch": 4.878640776699029,
|
|
"grad_norm": 0.7970239445560131,
|
|
"learning_rate": 1.0201007073946041e-05,
|
|
"loss": 0.1294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11350645124912262,
|
|
"step": 3015,
|
|
"valid_targets_mean": 3346.7,
|
|
"valid_targets_min": 478
|
|
},
|
|
{
|
|
"epoch": 4.88673139158576,
|
|
"grad_norm": 0.6785346013570012,
|
|
"learning_rate": 1.0130738053784768e-05,
|
|
"loss": 0.1505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.144822895526886,
|
|
"step": 3020,
|
|
"valid_targets_mean": 4021.6,
|
|
"valid_targets_min": 1996
|
|
},
|
|
{
|
|
"epoch": 4.894822006472491,
|
|
"grad_norm": 0.7767919403819858,
|
|
"learning_rate": 1.0060629711111494e-05,
|
|
"loss": 0.1459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14588266611099243,
|
|
"step": 3025,
|
|
"valid_targets_mean": 3322.8,
|
|
"valid_targets_min": 1553
|
|
},
|
|
{
|
|
"epoch": 4.902912621359223,
|
|
"grad_norm": 0.6946416256584877,
|
|
"learning_rate": 9.99068318733195e-06,
|
|
"loss": 0.1431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14380928874015808,
|
|
"step": 3030,
|
|
"valid_targets_mean": 3390.6,
|
|
"valid_targets_min": 446
|
|
},
|
|
{
|
|
"epoch": 4.911003236245954,
|
|
"grad_norm": 0.6506329244470211,
|
|
"learning_rate": 9.92089962121741e-06,
|
|
"loss": 0.1473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11819320917129517,
|
|
"step": 3035,
|
|
"valid_targets_mean": 4117.6,
|
|
"valid_targets_min": 423
|
|
},
|
|
{
|
|
"epoch": 4.919093851132686,
|
|
"grad_norm": 0.6476148960343205,
|
|
"learning_rate": 9.851280148886061e-06,
|
|
"loss": 0.1444,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12191256880760193,
|
|
"step": 3040,
|
|
"valid_targets_mean": 3871.6,
|
|
"valid_targets_min": 1215
|
|
},
|
|
{
|
|
"epoch": 4.927184466019417,
|
|
"grad_norm": 0.6425251067861399,
|
|
"learning_rate": 9.78182590378455e-06,
|
|
"loss": 0.1353,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12856820225715637,
|
|
"step": 3045,
|
|
"valid_targets_mean": 3881.7,
|
|
"valid_targets_min": 590
|
|
},
|
|
{
|
|
"epoch": 4.935275080906149,
|
|
"grad_norm": 0.6608994309697848,
|
|
"learning_rate": 9.712538016669557e-06,
|
|
"loss": 0.1431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16227906942367554,
|
|
"step": 3050,
|
|
"valid_targets_mean": 4399.9,
|
|
"valid_targets_min": 1498
|
|
},
|
|
{
|
|
"epoch": 4.94336569579288,
|
|
"grad_norm": 0.6863530052280413,
|
|
"learning_rate": 9.643417615589299e-06,
|
|
"loss": 0.1443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1477239429950714,
|
|
"step": 3055,
|
|
"valid_targets_mean": 4083.6,
|
|
"valid_targets_min": 1446
|
|
},
|
|
{
|
|
"epoch": 4.951456310679612,
|
|
"grad_norm": 0.6955301304136801,
|
|
"learning_rate": 9.574465825865276e-06,
|
|
"loss": 0.1368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14413318037986755,
|
|
"step": 3060,
|
|
"valid_targets_mean": 3786.9,
|
|
"valid_targets_min": 909
|
|
},
|
|
{
|
|
"epoch": 4.959546925566343,
|
|
"grad_norm": 0.7140674650867661,
|
|
"learning_rate": 9.50568377007386e-06,
|
|
"loss": 0.1474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1486683040857315,
|
|
"step": 3065,
|
|
"valid_targets_mean": 3171.9,
|
|
"valid_targets_min": 405
|
|
},
|
|
{
|
|
"epoch": 4.967637540453074,
|
|
"grad_norm": 0.6709510332322777,
|
|
"learning_rate": 9.43707256802806e-06,
|
|
"loss": 0.1363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13565918803215027,
|
|
"step": 3070,
|
|
"valid_targets_mean": 4014.1,
|
|
"valid_targets_min": 450
|
|
},
|
|
{
|
|
"epoch": 4.975728155339806,
|
|
"grad_norm": 0.7251112737304829,
|
|
"learning_rate": 9.368633336759292e-06,
|
|
"loss": 0.1442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14906924962997437,
|
|
"step": 3075,
|
|
"valid_targets_mean": 3706.9,
|
|
"valid_targets_min": 1849
|
|
},
|
|
{
|
|
"epoch": 4.983818770226537,
|
|
"grad_norm": 0.6722351700077497,
|
|
"learning_rate": 9.300367190499178e-06,
|
|
"loss": 0.1391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1070292741060257,
|
|
"step": 3080,
|
|
"valid_targets_mean": 3536.6,
|
|
"valid_targets_min": 774
|
|
},
|
|
{
|
|
"epoch": 4.991909385113269,
|
|
"grad_norm": 0.7375188285335043,
|
|
"learning_rate": 9.232275240661403e-06,
|
|
"loss": 0.1398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14554689824581146,
|
|
"step": 3085,
|
|
"valid_targets_mean": 3175.9,
|
|
"valid_targets_min": 405
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.7498091683809376,
|
|
"learning_rate": 9.164358595823661e-06,
|
|
"loss": 0.1518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13450586795806885,
|
|
"step": 3090,
|
|
"valid_targets_mean": 3267.0,
|
|
"valid_targets_min": 390
|
|
},
|
|
{
|
|
"epoch": 5.008090614886731,
|
|
"grad_norm": 0.7255837834802069,
|
|
"learning_rate": 9.096618361709545e-06,
|
|
"loss": 0.1294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13300150632858276,
|
|
"step": 3095,
|
|
"valid_targets_mean": 3423.9,
|
|
"valid_targets_min": 522
|
|
},
|
|
{
|
|
"epoch": 5.016181229773463,
|
|
"grad_norm": 0.705829673432509,
|
|
"learning_rate": 9.029055641170588e-06,
|
|
"loss": 0.1221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11673476547002792,
|
|
"step": 3100,
|
|
"valid_targets_mean": 4109.0,
|
|
"valid_targets_min": 2786
|
|
},
|
|
{
|
|
"epoch": 5.024271844660194,
|
|
"grad_norm": 0.6817632348570839,
|
|
"learning_rate": 8.961671534168292e-06,
|
|
"loss": 0.1263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12463103979825974,
|
|
"step": 3105,
|
|
"valid_targets_mean": 3897.5,
|
|
"valid_targets_min": 1063
|
|
},
|
|
{
|
|
"epoch": 5.032362459546926,
|
|
"grad_norm": 0.7346020556413199,
|
|
"learning_rate": 8.894467137756228e-06,
|
|
"loss": 0.1212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13819938898086548,
|
|
"step": 3110,
|
|
"valid_targets_mean": 3778.1,
|
|
"valid_targets_min": 1357
|
|
},
|
|
{
|
|
"epoch": 5.040453074433657,
|
|
"grad_norm": 0.6649553794586656,
|
|
"learning_rate": 8.827443546062165e-06,
|
|
"loss": 0.1219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11913490295410156,
|
|
"step": 3115,
|
|
"valid_targets_mean": 3797.8,
|
|
"valid_targets_min": 1538
|
|
},
|
|
{
|
|
"epoch": 5.048543689320389,
|
|
"grad_norm": 0.7359575993819188,
|
|
"learning_rate": 8.760601850270277e-06,
|
|
"loss": 0.1215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1277879774570465,
|
|
"step": 3120,
|
|
"valid_targets_mean": 3530.2,
|
|
"valid_targets_min": 507
|
|
},
|
|
{
|
|
"epoch": 5.05663430420712,
|
|
"grad_norm": 0.7258097409461821,
|
|
"learning_rate": 8.69394313860335e-06,
|
|
"loss": 0.1286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1081918478012085,
|
|
"step": 3125,
|
|
"valid_targets_mean": 3826.4,
|
|
"valid_targets_min": 1124
|
|
},
|
|
{
|
|
"epoch": 5.064724919093851,
|
|
"grad_norm": 0.7402612846004758,
|
|
"learning_rate": 8.62746849630508e-06,
|
|
"loss": 0.125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12437973916530609,
|
|
"step": 3130,
|
|
"valid_targets_mean": 3779.6,
|
|
"valid_targets_min": 1287
|
|
},
|
|
{
|
|
"epoch": 5.072815533980583,
|
|
"grad_norm": 0.7365753473236456,
|
|
"learning_rate": 8.561179005622411e-06,
|
|
"loss": 0.1255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13570137321949005,
|
|
"step": 3135,
|
|
"valid_targets_mean": 3778.3,
|
|
"valid_targets_min": 374
|
|
},
|
|
{
|
|
"epoch": 5.080906148867314,
|
|
"grad_norm": 0.647195839952784,
|
|
"learning_rate": 8.495075745787895e-06,
|
|
"loss": 0.131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1312873214483261,
|
|
"step": 3140,
|
|
"valid_targets_mean": 4459.8,
|
|
"valid_targets_min": 1050
|
|
},
|
|
{
|
|
"epoch": 5.088996763754046,
|
|
"grad_norm": 0.6554961316240389,
|
|
"learning_rate": 8.429159793002164e-06,
|
|
"loss": 0.1289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11646975576877594,
|
|
"step": 3145,
|
|
"valid_targets_mean": 4235.4,
|
|
"valid_targets_min": 2303
|
|
},
|
|
{
|
|
"epoch": 5.097087378640777,
|
|
"grad_norm": 0.7274467943356496,
|
|
"learning_rate": 8.363432220416336e-06,
|
|
"loss": 0.1374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1406169980764389,
|
|
"step": 3150,
|
|
"valid_targets_mean": 3789.8,
|
|
"valid_targets_min": 377
|
|
},
|
|
{
|
|
"epoch": 5.105177993527508,
|
|
"grad_norm": 0.7281449684268655,
|
|
"learning_rate": 8.297894098114612e-06,
|
|
"loss": 0.1364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1407594084739685,
|
|
"step": 3155,
|
|
"valid_targets_mean": 3965.4,
|
|
"valid_targets_min": 1744
|
|
},
|
|
{
|
|
"epoch": 5.11326860841424,
|
|
"grad_norm": 0.645898547032898,
|
|
"learning_rate": 8.232546493096836e-06,
|
|
"loss": 0.1215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1111171543598175,
|
|
"step": 3160,
|
|
"valid_targets_mean": 4013.9,
|
|
"valid_targets_min": 1588
|
|
},
|
|
{
|
|
"epoch": 5.121359223300971,
|
|
"grad_norm": 0.838705244056076,
|
|
"learning_rate": 8.167390469261105e-06,
|
|
"loss": 0.1304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1620527058839798,
|
|
"step": 3165,
|
|
"valid_targets_mean": 3344.5,
|
|
"valid_targets_min": 733
|
|
},
|
|
{
|
|
"epoch": 5.1294498381877025,
|
|
"grad_norm": 0.7039566787306656,
|
|
"learning_rate": 8.102427087386457e-06,
|
|
"loss": 0.1274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12740206718444824,
|
|
"step": 3170,
|
|
"valid_targets_mean": 3907.6,
|
|
"valid_targets_min": 706
|
|
},
|
|
{
|
|
"epoch": 5.1375404530744335,
|
|
"grad_norm": 0.6536128106483406,
|
|
"learning_rate": 8.037657405115611e-06,
|
|
"loss": 0.1202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.111982561647892,
|
|
"step": 3175,
|
|
"valid_targets_mean": 4094.3,
|
|
"valid_targets_min": 2918
|
|
},
|
|
{
|
|
"epoch": 5.145631067961165,
|
|
"grad_norm": 0.7820516641963816,
|
|
"learning_rate": 7.973082476937728e-06,
|
|
"loss": 0.1179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10970476269721985,
|
|
"step": 3180,
|
|
"valid_targets_mean": 3919.2,
|
|
"valid_targets_min": 786
|
|
},
|
|
{
|
|
"epoch": 5.1537216828478964,
|
|
"grad_norm": 0.6291251865125252,
|
|
"learning_rate": 7.908703354171283e-06,
|
|
"loss": 0.1143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09216032922267914,
|
|
"step": 3185,
|
|
"valid_targets_mean": 3711.6,
|
|
"valid_targets_min": 460
|
|
},
|
|
{
|
|
"epoch": 5.1618122977346275,
|
|
"grad_norm": 0.7393911233590429,
|
|
"learning_rate": 7.844521084946895e-06,
|
|
"loss": 0.1306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1511136293411255,
|
|
"step": 3190,
|
|
"valid_targets_mean": 4163.1,
|
|
"valid_targets_min": 473
|
|
},
|
|
{
|
|
"epoch": 5.169902912621359,
|
|
"grad_norm": 0.7767299244512159,
|
|
"learning_rate": 7.780536714190298e-06,
|
|
"loss": 0.1273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1440705955028534,
|
|
"step": 3195,
|
|
"valid_targets_mean": 4079.9,
|
|
"valid_targets_min": 1240
|
|
},
|
|
{
|
|
"epoch": 5.17799352750809,
|
|
"grad_norm": 0.7478484576498746,
|
|
"learning_rate": 7.716751283605324e-06,
|
|
"loss": 0.1301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15292268991470337,
|
|
"step": 3200,
|
|
"valid_targets_mean": 3749.6,
|
|
"valid_targets_min": 428
|
|
},
|
|
{
|
|
"epoch": 5.186084142394822,
|
|
"grad_norm": 0.7339598062609202,
|
|
"learning_rate": 7.653165831656937e-06,
|
|
"loss": 0.1413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1475408971309662,
|
|
"step": 3205,
|
|
"valid_targets_mean": 3905.8,
|
|
"valid_targets_min": 494
|
|
},
|
|
{
|
|
"epoch": 5.194174757281553,
|
|
"grad_norm": 0.8093145246529954,
|
|
"learning_rate": 7.589781393554321e-06,
|
|
"loss": 0.1232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13094545900821686,
|
|
"step": 3210,
|
|
"valid_targets_mean": 3094.6,
|
|
"valid_targets_min": 402
|
|
},
|
|
{
|
|
"epoch": 5.202265372168285,
|
|
"grad_norm": 0.6854868228623259,
|
|
"learning_rate": 7.526599001234058e-06,
|
|
"loss": 0.1183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10869428515434265,
|
|
"step": 3215,
|
|
"valid_targets_mean": 3835.5,
|
|
"valid_targets_min": 502
|
|
},
|
|
{
|
|
"epoch": 5.210355987055016,
|
|
"grad_norm": 0.7125430732031628,
|
|
"learning_rate": 7.463619683343284e-06,
|
|
"loss": 0.1196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1297406703233719,
|
|
"step": 3220,
|
|
"valid_targets_mean": 3900.7,
|
|
"valid_targets_min": 1782
|
|
},
|
|
{
|
|
"epoch": 5.218446601941747,
|
|
"grad_norm": 0.7181396945472707,
|
|
"learning_rate": 7.400844465222963e-06,
|
|
"loss": 0.1241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11025745421648026,
|
|
"step": 3225,
|
|
"valid_targets_mean": 3510.6,
|
|
"valid_targets_min": 725
|
|
},
|
|
{
|
|
"epoch": 5.226537216828479,
|
|
"grad_norm": 0.7152506418537288,
|
|
"learning_rate": 7.338274368891198e-06,
|
|
"loss": 0.1314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1154252365231514,
|
|
"step": 3230,
|
|
"valid_targets_mean": 3738.3,
|
|
"valid_targets_min": 1605
|
|
},
|
|
{
|
|
"epoch": 5.23462783171521,
|
|
"grad_norm": 0.6911405200556111,
|
|
"learning_rate": 7.275910413026579e-06,
|
|
"loss": 0.1248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12859441339969635,
|
|
"step": 3235,
|
|
"valid_targets_mean": 4039.9,
|
|
"valid_targets_min": 1402
|
|
},
|
|
{
|
|
"epoch": 5.242718446601942,
|
|
"grad_norm": 0.7006616335265696,
|
|
"learning_rate": 7.213753612951624e-06,
|
|
"loss": 0.1312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14889748394489288,
|
|
"step": 3240,
|
|
"valid_targets_mean": 4240.6,
|
|
"valid_targets_min": 2902
|
|
},
|
|
{
|
|
"epoch": 5.250809061488673,
|
|
"grad_norm": 0.7881678572983208,
|
|
"learning_rate": 7.1518049806162196e-06,
|
|
"loss": 0.1193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11947671324014664,
|
|
"step": 3245,
|
|
"valid_targets_mean": 3953.6,
|
|
"valid_targets_min": 513
|
|
},
|
|
{
|
|
"epoch": 5.258899676375404,
|
|
"grad_norm": 0.721046849366806,
|
|
"learning_rate": 7.090065524581136e-06,
|
|
"loss": 0.1296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14251643419265747,
|
|
"step": 3250,
|
|
"valid_targets_mean": 3847.2,
|
|
"valid_targets_min": 496
|
|
},
|
|
{
|
|
"epoch": 5.266990291262136,
|
|
"grad_norm": 0.754366038514852,
|
|
"learning_rate": 7.0285362500016675e-06,
|
|
"loss": 0.1273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1330687701702118,
|
|
"step": 3255,
|
|
"valid_targets_mean": 3853.8,
|
|
"valid_targets_min": 518
|
|
},
|
|
{
|
|
"epoch": 5.275080906148867,
|
|
"grad_norm": 0.7508046199447793,
|
|
"learning_rate": 6.967218158611202e-06,
|
|
"loss": 0.1197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13121311366558075,
|
|
"step": 3260,
|
|
"valid_targets_mean": 3239.1,
|
|
"valid_targets_min": 454
|
|
},
|
|
{
|
|
"epoch": 5.283171521035599,
|
|
"grad_norm": 0.7209699847161913,
|
|
"learning_rate": 6.906112248704939e-06,
|
|
"loss": 0.1257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14587056636810303,
|
|
"step": 3265,
|
|
"valid_targets_mean": 3749.4,
|
|
"valid_targets_min": 1751
|
|
},
|
|
{
|
|
"epoch": 5.29126213592233,
|
|
"grad_norm": 0.6904937107421983,
|
|
"learning_rate": 6.845219515123667e-06,
|
|
"loss": 0.1159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12392056733369827,
|
|
"step": 3270,
|
|
"valid_targets_mean": 3961.6,
|
|
"valid_targets_min": 1610
|
|
},
|
|
{
|
|
"epoch": 5.299352750809062,
|
|
"grad_norm": 0.8350113985180451,
|
|
"learning_rate": 6.784540949237484e-06,
|
|
"loss": 0.13,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13344715535640717,
|
|
"step": 3275,
|
|
"valid_targets_mean": 3784.6,
|
|
"valid_targets_min": 749
|
|
},
|
|
{
|
|
"epoch": 5.307443365695793,
|
|
"grad_norm": 0.7364469113973013,
|
|
"learning_rate": 6.724077538929759e-06,
|
|
"loss": 0.1377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1438203901052475,
|
|
"step": 3280,
|
|
"valid_targets_mean": 3925.2,
|
|
"valid_targets_min": 2514
|
|
},
|
|
{
|
|
"epoch": 5.315533980582524,
|
|
"grad_norm": 0.7279618289406372,
|
|
"learning_rate": 6.663830268580971e-06,
|
|
"loss": 0.1308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13427822291851044,
|
|
"step": 3285,
|
|
"valid_targets_mean": 3814.9,
|
|
"valid_targets_min": 1737
|
|
},
|
|
{
|
|
"epoch": 5.323624595469256,
|
|
"grad_norm": 0.6239991635895354,
|
|
"learning_rate": 6.6038001190527146e-06,
|
|
"loss": 0.1207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09467694163322449,
|
|
"step": 3290,
|
|
"valid_targets_mean": 4052.6,
|
|
"valid_targets_min": 1040
|
|
},
|
|
{
|
|
"epoch": 5.331715210355987,
|
|
"grad_norm": 0.7630179063607263,
|
|
"learning_rate": 6.543988067671752e-06,
|
|
"loss": 0.1369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13112042844295502,
|
|
"step": 3295,
|
|
"valid_targets_mean": 3648.1,
|
|
"valid_targets_min": 433
|
|
},
|
|
{
|
|
"epoch": 5.339805825242719,
|
|
"grad_norm": 0.6959899769773115,
|
|
"learning_rate": 6.484395088214037e-06,
|
|
"loss": 0.1161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1262616068124771,
|
|
"step": 3300,
|
|
"valid_targets_mean": 3619.4,
|
|
"valid_targets_min": 1452
|
|
},
|
|
{
|
|
"epoch": 5.34789644012945,
|
|
"grad_norm": 0.6733214313566337,
|
|
"learning_rate": 6.425022150888924e-06,
|
|
"loss": 0.1235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10100290179252625,
|
|
"step": 3305,
|
|
"valid_targets_mean": 3921.1,
|
|
"valid_targets_min": 1636
|
|
},
|
|
{
|
|
"epoch": 5.355987055016181,
|
|
"grad_norm": 0.7645979406542432,
|
|
"learning_rate": 6.36587022232336e-06,
|
|
"loss": 0.1186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10890600085258484,
|
|
"step": 3310,
|
|
"valid_targets_mean": 3571.8,
|
|
"valid_targets_min": 1506
|
|
},
|
|
{
|
|
"epoch": 5.364077669902913,
|
|
"grad_norm": 0.7144191140895096,
|
|
"learning_rate": 6.306940265546117e-06,
|
|
"loss": 0.1205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11766283959150314,
|
|
"step": 3315,
|
|
"valid_targets_mean": 3672.7,
|
|
"valid_targets_min": 463
|
|
},
|
|
{
|
|
"epoch": 5.372168284789644,
|
|
"grad_norm": 0.686173617625609,
|
|
"learning_rate": 6.248233239972144e-06,
|
|
"loss": 0.1148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11546348035335541,
|
|
"step": 3320,
|
|
"valid_targets_mean": 3776.6,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 5.380258899676376,
|
|
"grad_norm": 0.7404569018613136,
|
|
"learning_rate": 6.189750101386931e-06,
|
|
"loss": 0.1266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14495286345481873,
|
|
"step": 3325,
|
|
"valid_targets_mean": 3686.4,
|
|
"valid_targets_min": 464
|
|
},
|
|
{
|
|
"epoch": 5.388349514563107,
|
|
"grad_norm": 0.6976672794771177,
|
|
"learning_rate": 6.1314918019309535e-06,
|
|
"loss": 0.1194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10965608060359955,
|
|
"step": 3330,
|
|
"valid_targets_mean": 4057.9,
|
|
"valid_targets_min": 1991
|
|
},
|
|
{
|
|
"epoch": 5.3964401294498385,
|
|
"grad_norm": 0.7464635834173435,
|
|
"learning_rate": 6.073459290084185e-06,
|
|
"loss": 0.125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14042946696281433,
|
|
"step": 3335,
|
|
"valid_targets_mean": 3666.9,
|
|
"valid_targets_min": 1900
|
|
},
|
|
{
|
|
"epoch": 5.4045307443365695,
|
|
"grad_norm": 0.758392528587745,
|
|
"learning_rate": 6.01565351065063e-06,
|
|
"loss": 0.1219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1299818456172943,
|
|
"step": 3340,
|
|
"valid_targets_mean": 3642.3,
|
|
"valid_targets_min": 439
|
|
},
|
|
{
|
|
"epoch": 5.412621359223301,
|
|
"grad_norm": 0.7761136854296529,
|
|
"learning_rate": 5.958075404742951e-06,
|
|
"loss": 0.1241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13393014669418335,
|
|
"step": 3345,
|
|
"valid_targets_mean": 3275.5,
|
|
"valid_targets_min": 269
|
|
},
|
|
{
|
|
"epoch": 5.4207119741100325,
|
|
"grad_norm": 0.7053876074430606,
|
|
"learning_rate": 5.900725909767155e-06,
|
|
"loss": 0.1159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10762079060077667,
|
|
"step": 3350,
|
|
"valid_targets_mean": 4022.1,
|
|
"valid_targets_min": 1340
|
|
},
|
|
{
|
|
"epoch": 5.4288025889967635,
|
|
"grad_norm": 0.7642056847412017,
|
|
"learning_rate": 5.843605959407326e-06,
|
|
"loss": 0.1281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14689894020557404,
|
|
"step": 3355,
|
|
"valid_targets_mean": 3795.8,
|
|
"valid_targets_min": 2510
|
|
},
|
|
{
|
|
"epoch": 5.436893203883495,
|
|
"grad_norm": 0.7355013386223096,
|
|
"learning_rate": 5.7867164836104174e-06,
|
|
"loss": 0.1247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13858428597450256,
|
|
"step": 3360,
|
|
"valid_targets_mean": 4008.9,
|
|
"valid_targets_min": 968
|
|
},
|
|
{
|
|
"epoch": 5.444983818770226,
|
|
"grad_norm": 0.7315669914910475,
|
|
"learning_rate": 5.730058408571135e-06,
|
|
"loss": 0.1315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10367206484079361,
|
|
"step": 3365,
|
|
"valid_targets_mean": 3741.8,
|
|
"valid_targets_min": 469
|
|
},
|
|
{
|
|
"epoch": 5.453074433656958,
|
|
"grad_norm": 0.7225588349944803,
|
|
"learning_rate": 5.673632656716825e-06,
|
|
"loss": 0.12,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10480669885873795,
|
|
"step": 3370,
|
|
"valid_targets_mean": 3860.3,
|
|
"valid_targets_min": 2206
|
|
},
|
|
{
|
|
"epoch": 5.461165048543689,
|
|
"grad_norm": 0.7237332483919786,
|
|
"learning_rate": 5.617440146692485e-06,
|
|
"loss": 0.1242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12040352076292038,
|
|
"step": 3375,
|
|
"valid_targets_mean": 3892.9,
|
|
"valid_targets_min": 1076
|
|
},
|
|
{
|
|
"epoch": 5.46925566343042,
|
|
"grad_norm": 0.7602472039558393,
|
|
"learning_rate": 5.561481793345786e-06,
|
|
"loss": 0.1155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12410160899162292,
|
|
"step": 3380,
|
|
"valid_targets_mean": 3685.4,
|
|
"valid_targets_min": 738
|
|
},
|
|
{
|
|
"epoch": 5.477346278317152,
|
|
"grad_norm": 0.7380439821179074,
|
|
"learning_rate": 5.505758507712196e-06,
|
|
"loss": 0.1214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12352344393730164,
|
|
"step": 3385,
|
|
"valid_targets_mean": 4037.8,
|
|
"valid_targets_min": 562
|
|
},
|
|
{
|
|
"epoch": 5.485436893203883,
|
|
"grad_norm": 0.7890588781049627,
|
|
"learning_rate": 5.450271197000128e-06,
|
|
"loss": 0.1315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1398930549621582,
|
|
"step": 3390,
|
|
"valid_targets_mean": 3493.1,
|
|
"valid_targets_min": 662
|
|
},
|
|
{
|
|
"epoch": 5.493527508090615,
|
|
"grad_norm": 0.8416557602820898,
|
|
"learning_rate": 5.395020764576211e-06,
|
|
"loss": 0.1284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1332603096961975,
|
|
"step": 3395,
|
|
"valid_targets_mean": 3456.3,
|
|
"valid_targets_min": 433
|
|
},
|
|
{
|
|
"epoch": 5.501618122977346,
|
|
"grad_norm": 0.6903723606199367,
|
|
"learning_rate": 5.340008109950512e-06,
|
|
"loss": 0.1294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1297823190689087,
|
|
"step": 3400,
|
|
"valid_targets_mean": 3866.8,
|
|
"valid_targets_min": 1020
|
|
},
|
|
{
|
|
"epoch": 5.509708737864077,
|
|
"grad_norm": 0.6579088713294159,
|
|
"learning_rate": 5.285234128761969e-06,
|
|
"loss": 0.1257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10706949234008789,
|
|
"step": 3405,
|
|
"valid_targets_mean": 3828.5,
|
|
"valid_targets_min": 1661
|
|
},
|
|
{
|
|
"epoch": 5.517799352750809,
|
|
"grad_norm": 0.7010522478745738,
|
|
"learning_rate": 5.230699712763758e-06,
|
|
"loss": 0.1237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12073405086994171,
|
|
"step": 3410,
|
|
"valid_targets_mean": 3811.7,
|
|
"valid_targets_min": 753
|
|
},
|
|
{
|
|
"epoch": 5.52588996763754,
|
|
"grad_norm": 0.7773182598525699,
|
|
"learning_rate": 5.176405749808786e-06,
|
|
"loss": 0.1304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15182915329933167,
|
|
"step": 3415,
|
|
"valid_targets_mean": 4130.1,
|
|
"valid_targets_min": 259
|
|
},
|
|
{
|
|
"epoch": 5.533980582524272,
|
|
"grad_norm": 0.8638464314660135,
|
|
"learning_rate": 5.122353123835262e-06,
|
|
"loss": 0.1384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1541995108127594,
|
|
"step": 3420,
|
|
"valid_targets_mean": 3474.5,
|
|
"valid_targets_min": 1122
|
|
},
|
|
{
|
|
"epoch": 5.542071197411003,
|
|
"grad_norm": 0.6892371890482751,
|
|
"learning_rate": 5.068542714852254e-06,
|
|
"loss": 0.1298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1293623000383377,
|
|
"step": 3425,
|
|
"valid_targets_mean": 3804.9,
|
|
"valid_targets_min": 1364
|
|
},
|
|
{
|
|
"epoch": 5.550161812297735,
|
|
"grad_norm": 0.7405131577003049,
|
|
"learning_rate": 5.014975398925408e-06,
|
|
"loss": 0.1174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12245788425207138,
|
|
"step": 3430,
|
|
"valid_targets_mean": 3846.7,
|
|
"valid_targets_min": 2016
|
|
},
|
|
{
|
|
"epoch": 5.558252427184466,
|
|
"grad_norm": 0.7063009742960525,
|
|
"learning_rate": 4.9616520481626794e-06,
|
|
"loss": 0.1193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11486178636550903,
|
|
"step": 3435,
|
|
"valid_targets_mean": 3717.2,
|
|
"valid_targets_min": 480
|
|
},
|
|
{
|
|
"epoch": 5.566343042071198,
|
|
"grad_norm": 0.7074391715770296,
|
|
"learning_rate": 4.908573530700111e-06,
|
|
"loss": 0.1276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13461247086524963,
|
|
"step": 3440,
|
|
"valid_targets_mean": 4163.8,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 5.574433656957929,
|
|
"grad_norm": 0.8056262006079112,
|
|
"learning_rate": 4.8557407106877175e-06,
|
|
"loss": 0.1248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11532510817050934,
|
|
"step": 3445,
|
|
"valid_targets_mean": 3564.0,
|
|
"valid_targets_min": 1009
|
|
},
|
|
{
|
|
"epoch": 5.58252427184466,
|
|
"grad_norm": 0.6689071640066644,
|
|
"learning_rate": 4.8031544482754136e-06,
|
|
"loss": 0.1312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12339470535516739,
|
|
"step": 3450,
|
|
"valid_targets_mean": 3999.7,
|
|
"valid_targets_min": 1739
|
|
},
|
|
{
|
|
"epoch": 5.590614886731392,
|
|
"grad_norm": 0.7727457741265664,
|
|
"learning_rate": 4.7508155995989944e-06,
|
|
"loss": 0.1307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12754319608211517,
|
|
"step": 3455,
|
|
"valid_targets_mean": 3478.6,
|
|
"valid_targets_min": 433
|
|
},
|
|
{
|
|
"epoch": 5.598705501618123,
|
|
"grad_norm": 0.7051623852726349,
|
|
"learning_rate": 4.6987250167662435e-06,
|
|
"loss": 0.1147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13093580305576324,
|
|
"step": 3460,
|
|
"valid_targets_mean": 4168.9,
|
|
"valid_targets_min": 897
|
|
},
|
|
{
|
|
"epoch": 5.606796116504855,
|
|
"grad_norm": 0.7539130701198374,
|
|
"learning_rate": 4.6468835478430045e-06,
|
|
"loss": 0.12,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12748193740844727,
|
|
"step": 3465,
|
|
"valid_targets_mean": 3325.4,
|
|
"valid_targets_min": 444
|
|
},
|
|
{
|
|
"epoch": 5.614886731391586,
|
|
"grad_norm": 0.7286866436675753,
|
|
"learning_rate": 4.595292036839383e-06,
|
|
"loss": 0.1225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11541740596294403,
|
|
"step": 3470,
|
|
"valid_targets_mean": 3640.0,
|
|
"valid_targets_min": 420
|
|
},
|
|
{
|
|
"epoch": 5.622977346278317,
|
|
"grad_norm": 0.7234246657508987,
|
|
"learning_rate": 4.543951323696058e-06,
|
|
"loss": 0.1204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11532118916511536,
|
|
"step": 3475,
|
|
"valid_targets_mean": 3745.8,
|
|
"valid_targets_min": 1187
|
|
},
|
|
{
|
|
"epoch": 5.631067961165049,
|
|
"grad_norm": 0.7620959965037666,
|
|
"learning_rate": 4.492862244270544e-06,
|
|
"loss": 0.1228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13576315343379974,
|
|
"step": 3480,
|
|
"valid_targets_mean": 3854.0,
|
|
"valid_targets_min": 862
|
|
},
|
|
{
|
|
"epoch": 5.63915857605178,
|
|
"grad_norm": 0.7060173631264054,
|
|
"learning_rate": 4.442025630323607e-06,
|
|
"loss": 0.1214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12959304451942444,
|
|
"step": 3485,
|
|
"valid_targets_mean": 4153.1,
|
|
"valid_targets_min": 515
|
|
},
|
|
{
|
|
"epoch": 5.647249190938512,
|
|
"grad_norm": 0.830192046283675,
|
|
"learning_rate": 4.3914423095057516e-06,
|
|
"loss": 0.1221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11251272261142731,
|
|
"step": 3490,
|
|
"valid_targets_mean": 3465.1,
|
|
"valid_targets_min": 612
|
|
},
|
|
{
|
|
"epoch": 5.655339805825243,
|
|
"grad_norm": 0.678502014152424,
|
|
"learning_rate": 4.341113105343673e-06,
|
|
"loss": 0.1222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11163593083620071,
|
|
"step": 3495,
|
|
"valid_targets_mean": 3950.2,
|
|
"valid_targets_min": 505
|
|
},
|
|
{
|
|
"epoch": 5.663430420711974,
|
|
"grad_norm": 0.7849757937871225,
|
|
"learning_rate": 4.291038837226935e-06,
|
|
"loss": 0.1209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13538207113742828,
|
|
"step": 3500,
|
|
"valid_targets_mean": 3912.1,
|
|
"valid_targets_min": 1647
|
|
},
|
|
{
|
|
"epoch": 5.671521035598706,
|
|
"grad_norm": 0.7234102654591658,
|
|
"learning_rate": 4.241220320394574e-06,
|
|
"loss": 0.128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1282060295343399,
|
|
"step": 3505,
|
|
"valid_targets_mean": 3848.4,
|
|
"valid_targets_min": 479
|
|
},
|
|
{
|
|
"epoch": 5.679611650485437,
|
|
"grad_norm": 0.7422885539309136,
|
|
"learning_rate": 4.191658365921838e-06,
|
|
"loss": 0.1247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12916360795497894,
|
|
"step": 3510,
|
|
"valid_targets_mean": 3610.4,
|
|
"valid_targets_min": 1850
|
|
},
|
|
{
|
|
"epoch": 5.6877022653721685,
|
|
"grad_norm": 0.7125773638661527,
|
|
"learning_rate": 4.1423537807070065e-06,
|
|
"loss": 0.1246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11903668940067291,
|
|
"step": 3515,
|
|
"valid_targets_mean": 3842.6,
|
|
"valid_targets_min": 1601
|
|
},
|
|
{
|
|
"epoch": 5.6957928802588995,
|
|
"grad_norm": 0.752523330730224,
|
|
"learning_rate": 4.0933073674582054e-06,
|
|
"loss": 0.1241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1267009973526001,
|
|
"step": 3520,
|
|
"valid_targets_mean": 3492.9,
|
|
"valid_targets_min": 1014
|
|
},
|
|
{
|
|
"epoch": 5.703883495145631,
|
|
"grad_norm": 0.7270519639308166,
|
|
"learning_rate": 4.044519924680379e-06,
|
|
"loss": 0.125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12510140240192413,
|
|
"step": 3525,
|
|
"valid_targets_mean": 3632.8,
|
|
"valid_targets_min": 460
|
|
},
|
|
{
|
|
"epoch": 5.711974110032362,
|
|
"grad_norm": 0.7187809324224477,
|
|
"learning_rate": 3.99599224666229e-06,
|
|
"loss": 0.1254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12706691026687622,
|
|
"step": 3530,
|
|
"valid_targets_mean": 3854.6,
|
|
"valid_targets_min": 2008
|
|
},
|
|
{
|
|
"epoch": 5.720064724919094,
|
|
"grad_norm": 0.6986751025778004,
|
|
"learning_rate": 3.947725123463559e-06,
|
|
"loss": 0.135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14450600743293762,
|
|
"step": 3535,
|
|
"valid_targets_mean": 4264.0,
|
|
"valid_targets_min": 449
|
|
},
|
|
{
|
|
"epoch": 5.728155339805825,
|
|
"grad_norm": 0.7752207817322075,
|
|
"learning_rate": 3.8997193409018245e-06,
|
|
"loss": 0.1177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11388484388589859,
|
|
"step": 3540,
|
|
"valid_targets_mean": 3403.1,
|
|
"valid_targets_min": 453
|
|
},
|
|
{
|
|
"epoch": 5.736245954692556,
|
|
"grad_norm": 0.8051040328349554,
|
|
"learning_rate": 3.851975680539941e-06,
|
|
"loss": 0.1228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13021591305732727,
|
|
"step": 3545,
|
|
"valid_targets_mean": 3859.8,
|
|
"valid_targets_min": 1954
|
|
},
|
|
{
|
|
"epoch": 5.744336569579288,
|
|
"grad_norm": 0.8416631487491211,
|
|
"learning_rate": 3.804494919673254e-06,
|
|
"loss": 0.1316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1455630362033844,
|
|
"step": 3550,
|
|
"valid_targets_mean": 3486.1,
|
|
"valid_targets_min": 731
|
|
},
|
|
{
|
|
"epoch": 5.752427184466019,
|
|
"grad_norm": 0.7539706877774351,
|
|
"learning_rate": 3.757277831316961e-06,
|
|
"loss": 0.1301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13057079911231995,
|
|
"step": 3555,
|
|
"valid_targets_mean": 3653.4,
|
|
"valid_targets_min": 460
|
|
},
|
|
{
|
|
"epoch": 5.760517799352751,
|
|
"grad_norm": 0.7345729665958677,
|
|
"learning_rate": 3.7103251841934993e-06,
|
|
"loss": 0.1243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.121879443526268,
|
|
"step": 3560,
|
|
"valid_targets_mean": 3892.4,
|
|
"valid_targets_min": 2001
|
|
},
|
|
{
|
|
"epoch": 5.768608414239482,
|
|
"grad_norm": 0.7300947831568095,
|
|
"learning_rate": 3.663637742720052e-06,
|
|
"loss": 0.1208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12066502124071121,
|
|
"step": 3565,
|
|
"valid_targets_mean": 3785.7,
|
|
"valid_targets_min": 1726
|
|
},
|
|
{
|
|
"epoch": 5.776699029126213,
|
|
"grad_norm": 0.7854108618258545,
|
|
"learning_rate": 3.617216266996093e-06,
|
|
"loss": 0.129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09828585386276245,
|
|
"step": 3570,
|
|
"valid_targets_mean": 3547.1,
|
|
"valid_targets_min": 448
|
|
},
|
|
{
|
|
"epoch": 5.784789644012945,
|
|
"grad_norm": 0.6950199968709713,
|
|
"learning_rate": 3.571061512791012e-06,
|
|
"loss": 0.1238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12642902135849,
|
|
"step": 3575,
|
|
"valid_targets_mean": 4099.1,
|
|
"valid_targets_min": 2514
|
|
},
|
|
{
|
|
"epoch": 5.792880258899676,
|
|
"grad_norm": 0.6788504116739233,
|
|
"learning_rate": 3.525174231531814e-06,
|
|
"loss": 0.1309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12123347818851471,
|
|
"step": 3580,
|
|
"valid_targets_mean": 3896.4,
|
|
"valid_targets_min": 642
|
|
},
|
|
{
|
|
"epoch": 5.800970873786408,
|
|
"grad_norm": 0.8682257553577993,
|
|
"learning_rate": 3.4795551702908935e-06,
|
|
"loss": 0.1357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16144192218780518,
|
|
"step": 3585,
|
|
"valid_targets_mean": 3735.9,
|
|
"valid_targets_min": 1174
|
|
},
|
|
{
|
|
"epoch": 5.809061488673139,
|
|
"grad_norm": 0.6913961014302824,
|
|
"learning_rate": 3.434205071773855e-06,
|
|
"loss": 0.1256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12861505150794983,
|
|
"step": 3590,
|
|
"valid_targets_mean": 4488.1,
|
|
"valid_targets_min": 2276
|
|
},
|
|
{
|
|
"epoch": 5.81715210355987,
|
|
"grad_norm": 0.7809800057454481,
|
|
"learning_rate": 3.3891246743074245e-06,
|
|
"loss": 0.1215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11062463372945786,
|
|
"step": 3595,
|
|
"valid_targets_mean": 4145.9,
|
|
"valid_targets_min": 2503
|
|
},
|
|
{
|
|
"epoch": 5.825242718446602,
|
|
"grad_norm": 0.6702913762142105,
|
|
"learning_rate": 3.344314711827441e-06,
|
|
"loss": 0.1309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11128797382116318,
|
|
"step": 3600,
|
|
"valid_targets_mean": 3917.0,
|
|
"valid_targets_min": 834
|
|
},
|
|
{
|
|
"epoch": 5.833333333333333,
|
|
"grad_norm": 0.7697182542474907,
|
|
"learning_rate": 3.299775913866894e-06,
|
|
"loss": 0.118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1268385350704193,
|
|
"step": 3605,
|
|
"valid_targets_mean": 3472.0,
|
|
"valid_targets_min": 459
|
|
},
|
|
{
|
|
"epoch": 5.841423948220065,
|
|
"grad_norm": 0.7155200788961812,
|
|
"learning_rate": 3.255509005544062e-06,
|
|
"loss": 0.1214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11924313008785248,
|
|
"step": 3610,
|
|
"valid_targets_mean": 4219.1,
|
|
"valid_targets_min": 3201
|
|
},
|
|
{
|
|
"epoch": 5.849514563106796,
|
|
"grad_norm": 0.6954308628669343,
|
|
"learning_rate": 3.2115147075506957e-06,
|
|
"loss": 0.1263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12587472796440125,
|
|
"step": 3615,
|
|
"valid_targets_mean": 3964.8,
|
|
"valid_targets_min": 2650
|
|
},
|
|
{
|
|
"epoch": 5.857605177993528,
|
|
"grad_norm": 0.7923704942527922,
|
|
"learning_rate": 3.1677937361402654e-06,
|
|
"loss": 0.1299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13604333996772766,
|
|
"step": 3620,
|
|
"valid_targets_mean": 3791.6,
|
|
"valid_targets_min": 1586
|
|
},
|
|
{
|
|
"epoch": 5.865695792880259,
|
|
"grad_norm": 0.7963806010437653,
|
|
"learning_rate": 3.124346803116354e-06,
|
|
"loss": 0.1196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12812381982803345,
|
|
"step": 3625,
|
|
"valid_targets_mean": 3818.2,
|
|
"valid_targets_min": 434
|
|
},
|
|
{
|
|
"epoch": 5.87378640776699,
|
|
"grad_norm": 0.8065922087851086,
|
|
"learning_rate": 3.0811746158210165e-06,
|
|
"loss": 0.1275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12494488060474396,
|
|
"step": 3630,
|
|
"valid_targets_mean": 3470.3,
|
|
"valid_targets_min": 801
|
|
},
|
|
{
|
|
"epoch": 5.881877022653722,
|
|
"grad_norm": 0.6934755628970144,
|
|
"learning_rate": 3.0382778771232766e-06,
|
|
"loss": 0.117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11633384227752686,
|
|
"step": 3635,
|
|
"valid_targets_mean": 3957.1,
|
|
"valid_targets_min": 1752
|
|
},
|
|
{
|
|
"epoch": 5.889967637540453,
|
|
"grad_norm": 0.6957018475982958,
|
|
"learning_rate": 2.9956572854077205e-06,
|
|
"loss": 0.1153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11870764195919037,
|
|
"step": 3640,
|
|
"valid_targets_mean": 4018.0,
|
|
"valid_targets_min": 1690
|
|
},
|
|
{
|
|
"epoch": 5.898058252427185,
|
|
"grad_norm": 0.7920517590567935,
|
|
"learning_rate": 2.9533135345630536e-06,
|
|
"loss": 0.1176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14628848433494568,
|
|
"step": 3645,
|
|
"valid_targets_mean": 3822.6,
|
|
"valid_targets_min": 950
|
|
},
|
|
{
|
|
"epoch": 5.906148867313916,
|
|
"grad_norm": 0.7682562383139114,
|
|
"learning_rate": 2.911247313970882e-06,
|
|
"loss": 0.1187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1245870590209961,
|
|
"step": 3650,
|
|
"valid_targets_mean": 3558.5,
|
|
"valid_targets_min": 643
|
|
},
|
|
{
|
|
"epoch": 5.914239482200648,
|
|
"grad_norm": 0.7764934542624735,
|
|
"learning_rate": 2.8694593084944356e-06,
|
|
"loss": 0.1329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11907865107059479,
|
|
"step": 3655,
|
|
"valid_targets_mean": 4022.9,
|
|
"valid_targets_min": 1227
|
|
},
|
|
{
|
|
"epoch": 5.922330097087379,
|
|
"grad_norm": 0.7915955290706294,
|
|
"learning_rate": 2.8279501984674396e-06,
|
|
"loss": 0.1121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12054628133773804,
|
|
"step": 3660,
|
|
"valid_targets_mean": 3649.9,
|
|
"valid_targets_min": 475
|
|
},
|
|
{
|
|
"epoch": 5.93042071197411,
|
|
"grad_norm": 0.686045896267701,
|
|
"learning_rate": 2.7867206596830355e-06,
|
|
"loss": 0.1223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10917463898658752,
|
|
"step": 3665,
|
|
"valid_targets_mean": 4130.0,
|
|
"valid_targets_min": 2201
|
|
},
|
|
{
|
|
"epoch": 5.938511326860842,
|
|
"grad_norm": 0.7249342585567816,
|
|
"learning_rate": 2.7457713633827763e-06,
|
|
"loss": 0.1233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12481742352247238,
|
|
"step": 3670,
|
|
"valid_targets_mean": 3999.8,
|
|
"valid_targets_min": 2119
|
|
},
|
|
{
|
|
"epoch": 5.946601941747573,
|
|
"grad_norm": 0.7335052345451426,
|
|
"learning_rate": 2.705102976245697e-06,
|
|
"loss": 0.1257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1226348727941513,
|
|
"step": 3675,
|
|
"valid_targets_mean": 3779.4,
|
|
"valid_targets_min": 1258
|
|
},
|
|
{
|
|
"epoch": 5.9546925566343045,
|
|
"grad_norm": 0.7240696636506034,
|
|
"learning_rate": 2.6647161603774763e-06,
|
|
"loss": 0.1225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12565986812114716,
|
|
"step": 3680,
|
|
"valid_targets_mean": 3699.1,
|
|
"valid_targets_min": 1361
|
|
},
|
|
{
|
|
"epoch": 5.9627831715210355,
|
|
"grad_norm": 0.9844611721489968,
|
|
"learning_rate": 2.624611573299629e-06,
|
|
"loss": 0.1214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12091508507728577,
|
|
"step": 3685,
|
|
"valid_targets_mean": 3558.1,
|
|
"valid_targets_min": 313
|
|
},
|
|
{
|
|
"epoch": 5.970873786407767,
|
|
"grad_norm": 0.7889095453729414,
|
|
"learning_rate": 2.5847898679388217e-06,
|
|
"loss": 0.1304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12522706389427185,
|
|
"step": 3690,
|
|
"valid_targets_mean": 3903.6,
|
|
"valid_targets_min": 1472
|
|
},
|
|
{
|
|
"epoch": 5.9789644012944985,
|
|
"grad_norm": 0.7583140618259258,
|
|
"learning_rate": 2.5452516926162394e-06,
|
|
"loss": 0.1195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1313149333000183,
|
|
"step": 3695,
|
|
"valid_targets_mean": 3939.1,
|
|
"valid_targets_min": 503
|
|
},
|
|
{
|
|
"epoch": 5.9870550161812295,
|
|
"grad_norm": 0.7620324187487387,
|
|
"learning_rate": 2.5059976910370255e-06,
|
|
"loss": 0.1188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1112971380352974,
|
|
"step": 3700,
|
|
"valid_targets_mean": 3635.0,
|
|
"valid_targets_min": 703
|
|
},
|
|
{
|
|
"epoch": 5.995145631067961,
|
|
"grad_norm": 0.7140358860481412,
|
|
"learning_rate": 2.467028502279802e-06,
|
|
"loss": 0.1171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11595533788204193,
|
|
"step": 3705,
|
|
"valid_targets_mean": 3788.3,
|
|
"valid_targets_min": 1853
|
|
},
|
|
{
|
|
"epoch": 6.003236245954692,
|
|
"grad_norm": 0.6509891193331984,
|
|
"learning_rate": 2.428344760786283e-06,
|
|
"loss": 0.115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09506258368492126,
|
|
"step": 3710,
|
|
"valid_targets_mean": 3532.4,
|
|
"valid_targets_min": 650
|
|
},
|
|
{
|
|
"epoch": 6.011326860841424,
|
|
"grad_norm": 0.7139744229116504,
|
|
"learning_rate": 2.389947096350913e-06,
|
|
"loss": 0.111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10088154673576355,
|
|
"step": 3715,
|
|
"valid_targets_mean": 3833.9,
|
|
"valid_targets_min": 1259
|
|
},
|
|
{
|
|
"epoch": 6.019417475728155,
|
|
"grad_norm": 0.6897387337767092,
|
|
"learning_rate": 2.3518361341106366e-06,
|
|
"loss": 0.1136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10516591370105743,
|
|
"step": 3720,
|
|
"valid_targets_mean": 3907.6,
|
|
"valid_targets_min": 1849
|
|
},
|
|
{
|
|
"epoch": 6.027508090614886,
|
|
"grad_norm": 0.7425321933146951,
|
|
"learning_rate": 2.3140124945347188e-06,
|
|
"loss": 0.1244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12143640220165253,
|
|
"step": 3725,
|
|
"valid_targets_mean": 3391.8,
|
|
"valid_targets_min": 397
|
|
},
|
|
{
|
|
"epoch": 6.035598705501618,
|
|
"grad_norm": 0.694108149750431,
|
|
"learning_rate": 2.2764767934146304e-06,
|
|
"loss": 0.1151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.120110422372818,
|
|
"step": 3730,
|
|
"valid_targets_mean": 3814.7,
|
|
"valid_targets_min": 1117
|
|
},
|
|
{
|
|
"epoch": 6.043689320388349,
|
|
"grad_norm": 0.7002918869971856,
|
|
"learning_rate": 2.2392296418540527e-06,
|
|
"loss": 0.1075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09147246181964874,
|
|
"step": 3735,
|
|
"valid_targets_mean": 3512.8,
|
|
"valid_targets_min": 1510
|
|
},
|
|
{
|
|
"epoch": 6.051779935275081,
|
|
"grad_norm": 0.7931772871720706,
|
|
"learning_rate": 2.20227164625888e-06,
|
|
"loss": 0.1246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13165760040283203,
|
|
"step": 3740,
|
|
"valid_targets_mean": 3220.9,
|
|
"valid_targets_min": 430
|
|
},
|
|
{
|
|
"epoch": 6.059870550161812,
|
|
"grad_norm": 0.7318335669267926,
|
|
"learning_rate": 2.165603408327386e-06,
|
|
"loss": 0.109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09360679239034653,
|
|
"step": 3745,
|
|
"valid_targets_mean": 3538.3,
|
|
"valid_targets_min": 1886
|
|
},
|
|
{
|
|
"epoch": 6.067961165048544,
|
|
"grad_norm": 0.6564949740471046,
|
|
"learning_rate": 2.129225525040428e-06,
|
|
"loss": 0.1182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11405914276838303,
|
|
"step": 3750,
|
|
"valid_targets_mean": 4078.1,
|
|
"valid_targets_min": 1100
|
|
},
|
|
{
|
|
"epoch": 6.076051779935275,
|
|
"grad_norm": 0.7352580575605189,
|
|
"learning_rate": 2.0931385886517043e-06,
|
|
"loss": 0.1231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11465087532997131,
|
|
"step": 3755,
|
|
"valid_targets_mean": 3844.5,
|
|
"valid_targets_min": 1201
|
|
},
|
|
{
|
|
"epoch": 6.084142394822006,
|
|
"grad_norm": 0.74020729169368,
|
|
"learning_rate": 2.05734318667812e-06,
|
|
"loss": 0.1111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10614818334579468,
|
|
"step": 3760,
|
|
"valid_targets_mean": 3700.8,
|
|
"valid_targets_min": 1405
|
|
},
|
|
{
|
|
"epoch": 6.092233009708738,
|
|
"grad_norm": 0.7715214057611403,
|
|
"learning_rate": 2.0218399018902368e-06,
|
|
"loss": 0.1152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.122889444231987,
|
|
"step": 3765,
|
|
"valid_targets_mean": 3979.6,
|
|
"valid_targets_min": 606
|
|
},
|
|
{
|
|
"epoch": 6.100323624595469,
|
|
"grad_norm": 0.7281761064430898,
|
|
"learning_rate": 1.986629312302759e-06,
|
|
"loss": 0.1123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1031067818403244,
|
|
"step": 3770,
|
|
"valid_targets_mean": 3470.1,
|
|
"valid_targets_min": 471
|
|
},
|
|
{
|
|
"epoch": 6.108414239482201,
|
|
"grad_norm": 0.7454398419847319,
|
|
"learning_rate": 1.9517119911651594e-06,
|
|
"loss": 0.1216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11875297129154205,
|
|
"step": 3775,
|
|
"valid_targets_mean": 3795.9,
|
|
"valid_targets_min": 1507
|
|
},
|
|
{
|
|
"epoch": 6.116504854368932,
|
|
"grad_norm": 0.661496166671743,
|
|
"learning_rate": 1.917088506952307e-06,
|
|
"loss": 0.1105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09311284124851227,
|
|
"step": 3780,
|
|
"valid_targets_mean": 3664.3,
|
|
"valid_targets_min": 1643
|
|
},
|
|
{
|
|
"epoch": 6.124595469255663,
|
|
"grad_norm": 0.835262575264935,
|
|
"learning_rate": 1.8827594233552338e-06,
|
|
"loss": 0.1078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1381199210882187,
|
|
"step": 3785,
|
|
"valid_targets_mean": 3730.8,
|
|
"valid_targets_min": 413
|
|
},
|
|
{
|
|
"epoch": 6.132686084142395,
|
|
"grad_norm": 0.7026566817099582,
|
|
"learning_rate": 1.8487252992719562e-06,
|
|
"loss": 0.116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1321025788784027,
|
|
"step": 3790,
|
|
"valid_targets_mean": 4019.1,
|
|
"valid_targets_min": 1063
|
|
},
|
|
{
|
|
"epoch": 6.140776699029126,
|
|
"grad_norm": 0.8163113871171781,
|
|
"learning_rate": 1.8149866887983747e-06,
|
|
"loss": 0.1145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11378465592861176,
|
|
"step": 3795,
|
|
"valid_targets_mean": 3231.6,
|
|
"valid_targets_min": 349
|
|
},
|
|
{
|
|
"epoch": 6.148867313915858,
|
|
"grad_norm": 0.7477376471533267,
|
|
"learning_rate": 1.7815441412192447e-06,
|
|
"loss": 0.1176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10754677653312683,
|
|
"step": 3800,
|
|
"valid_targets_mean": 3431.2,
|
|
"valid_targets_min": 1641
|
|
},
|
|
{
|
|
"epoch": 6.156957928802589,
|
|
"grad_norm": 0.728794180353719,
|
|
"learning_rate": 1.7483982009992506e-06,
|
|
"loss": 0.1166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08733691275119781,
|
|
"step": 3805,
|
|
"valid_targets_mean": 3602.6,
|
|
"valid_targets_min": 1726
|
|
},
|
|
{
|
|
"epoch": 6.165048543689321,
|
|
"grad_norm": 0.7562254178879685,
|
|
"learning_rate": 1.715549407774124e-06,
|
|
"loss": 0.1133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12288179993629456,
|
|
"step": 3810,
|
|
"valid_targets_mean": 4288.5,
|
|
"valid_targets_min": 1982
|
|
},
|
|
{
|
|
"epoch": 6.173139158576052,
|
|
"grad_norm": 0.8169277563659051,
|
|
"learning_rate": 1.6829982963418667e-06,
|
|
"loss": 0.1229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13156858086585999,
|
|
"step": 3815,
|
|
"valid_targets_mean": 3638.5,
|
|
"valid_targets_min": 450
|
|
},
|
|
{
|
|
"epoch": 6.181229773462783,
|
|
"grad_norm": 0.791581886463547,
|
|
"learning_rate": 1.6507453966540454e-06,
|
|
"loss": 0.1087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11052010953426361,
|
|
"step": 3820,
|
|
"valid_targets_mean": 3243.2,
|
|
"valid_targets_min": 872
|
|
},
|
|
{
|
|
"epoch": 6.189320388349515,
|
|
"grad_norm": 0.8071179807020908,
|
|
"learning_rate": 1.6187912338071577e-06,
|
|
"loss": 0.1142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09611040353775024,
|
|
"step": 3825,
|
|
"valid_targets_mean": 3300.4,
|
|
"valid_targets_min": 405
|
|
},
|
|
{
|
|
"epoch": 6.197411003236246,
|
|
"grad_norm": 0.7341034704709207,
|
|
"learning_rate": 1.5871363280340913e-06,
|
|
"loss": 0.1183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1326591819524765,
|
|
"step": 3830,
|
|
"valid_targets_mean": 4006.7,
|
|
"valid_targets_min": 2289
|
|
},
|
|
{
|
|
"epoch": 6.205501618122978,
|
|
"grad_norm": 0.6722924657092954,
|
|
"learning_rate": 1.555781194695649e-06,
|
|
"loss": 0.1102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09945492446422577,
|
|
"step": 3835,
|
|
"valid_targets_mean": 3795.8,
|
|
"valid_targets_min": 490
|
|
},
|
|
{
|
|
"epoch": 6.213592233009709,
|
|
"grad_norm": 0.7674389171998626,
|
|
"learning_rate": 1.5247263442721494e-06,
|
|
"loss": 0.1169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12633197009563446,
|
|
"step": 3840,
|
|
"valid_targets_mean": 3439.7,
|
|
"valid_targets_min": 462
|
|
},
|
|
{
|
|
"epoch": 6.2216828478964405,
|
|
"grad_norm": 0.7534393253923913,
|
|
"learning_rate": 1.4939722823551428e-06,
|
|
"loss": 0.1193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1213739737868309,
|
|
"step": 3845,
|
|
"valid_targets_mean": 3602.9,
|
|
"valid_targets_min": 397
|
|
},
|
|
{
|
|
"epoch": 6.229773462783172,
|
|
"grad_norm": 0.705373170660076,
|
|
"learning_rate": 1.4635195096391463e-06,
|
|
"loss": 0.1184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11145274341106415,
|
|
"step": 3850,
|
|
"valid_targets_mean": 3498.0,
|
|
"valid_targets_min": 1047
|
|
},
|
|
{
|
|
"epoch": 6.237864077669903,
|
|
"grad_norm": 0.7336287787765475,
|
|
"learning_rate": 1.4333685219135163e-06,
|
|
"loss": 0.1109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.134007066488266,
|
|
"step": 3855,
|
|
"valid_targets_mean": 4273.0,
|
|
"valid_targets_min": 1809
|
|
},
|
|
{
|
|
"epoch": 6.2459546925566345,
|
|
"grad_norm": 0.6955768107461284,
|
|
"learning_rate": 1.403519810054379e-06,
|
|
"loss": 0.1128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11127742379903793,
|
|
"step": 3860,
|
|
"valid_targets_mean": 4114.2,
|
|
"valid_targets_min": 1593
|
|
},
|
|
{
|
|
"epoch": 6.2540453074433655,
|
|
"grad_norm": 0.6957722780972634,
|
|
"learning_rate": 1.373973860016602e-06,
|
|
"loss": 0.1156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1035354882478714,
|
|
"step": 3865,
|
|
"valid_targets_mean": 3479.8,
|
|
"valid_targets_min": 706
|
|
},
|
|
{
|
|
"epoch": 6.262135922330097,
|
|
"grad_norm": 0.7271832148458794,
|
|
"learning_rate": 1.3447311528259354e-06,
|
|
"loss": 0.1151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12107687443494797,
|
|
"step": 3870,
|
|
"valid_targets_mean": 3875.2,
|
|
"valid_targets_min": 1717
|
|
},
|
|
{
|
|
"epoch": 6.270226537216828,
|
|
"grad_norm": 0.7225262524495937,
|
|
"learning_rate": 1.3157921645711436e-06,
|
|
"loss": 0.1151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12497846782207489,
|
|
"step": 3875,
|
|
"valid_targets_mean": 3914.5,
|
|
"valid_targets_min": 507
|
|
},
|
|
{
|
|
"epoch": 6.2783171521035595,
|
|
"grad_norm": 0.8559665525671778,
|
|
"learning_rate": 1.2871573663962611e-06,
|
|
"loss": 0.112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14065983891487122,
|
|
"step": 3880,
|
|
"valid_targets_mean": 3619.2,
|
|
"valid_targets_min": 514
|
|
},
|
|
{
|
|
"epoch": 6.286407766990291,
|
|
"grad_norm": 0.7858491084715699,
|
|
"learning_rate": 1.2588272244929401e-06,
|
|
"loss": 0.1103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10485831648111343,
|
|
"step": 3885,
|
|
"valid_targets_mean": 3352.1,
|
|
"valid_targets_min": 338
|
|
},
|
|
{
|
|
"epoch": 6.294498381877022,
|
|
"grad_norm": 0.8093000501376625,
|
|
"learning_rate": 1.2308022000928287e-06,
|
|
"loss": 0.1216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13177737593650818,
|
|
"step": 3890,
|
|
"valid_targets_mean": 3496.9,
|
|
"valid_targets_min": 1605
|
|
},
|
|
{
|
|
"epoch": 6.302588996763754,
|
|
"grad_norm": 0.7852769738079134,
|
|
"learning_rate": 1.203082749460085e-06,
|
|
"loss": 0.1158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12200696766376495,
|
|
"step": 3895,
|
|
"valid_targets_mean": 3838.5,
|
|
"valid_targets_min": 910
|
|
},
|
|
{
|
|
"epoch": 6.310679611650485,
|
|
"grad_norm": 0.7287577190496319,
|
|
"learning_rate": 1.1756693238839566e-06,
|
|
"loss": 0.1165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09417057782411575,
|
|
"step": 3900,
|
|
"valid_targets_mean": 3651.3,
|
|
"valid_targets_min": 815
|
|
},
|
|
{
|
|
"epoch": 6.318770226537217,
|
|
"grad_norm": 0.8302132261220526,
|
|
"learning_rate": 1.1485623696714043e-06,
|
|
"loss": 0.1174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11933784186840057,
|
|
"step": 3905,
|
|
"valid_targets_mean": 2984.4,
|
|
"valid_targets_min": 419
|
|
},
|
|
{
|
|
"epoch": 6.326860841423948,
|
|
"grad_norm": 0.7327391037417946,
|
|
"learning_rate": 1.1217623281398571e-06,
|
|
"loss": 0.109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10303106904029846,
|
|
"step": 3910,
|
|
"valid_targets_mean": 3733.2,
|
|
"valid_targets_min": 2249
|
|
},
|
|
{
|
|
"epoch": 6.334951456310679,
|
|
"grad_norm": 0.8208833041383687,
|
|
"learning_rate": 1.0952696356100234e-06,
|
|
"loss": 0.1164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14065951108932495,
|
|
"step": 3915,
|
|
"valid_targets_mean": 3564.1,
|
|
"valid_targets_min": 442
|
|
},
|
|
{
|
|
"epoch": 6.343042071197411,
|
|
"grad_norm": 0.8332779535360231,
|
|
"learning_rate": 1.069084723398781e-06,
|
|
"loss": 0.1235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12287405133247375,
|
|
"step": 3920,
|
|
"valid_targets_mean": 3350.1,
|
|
"valid_targets_min": 428
|
|
},
|
|
{
|
|
"epoch": 6.351132686084142,
|
|
"grad_norm": 0.7407469695551226,
|
|
"learning_rate": 1.0432080178121695e-06,
|
|
"loss": 0.1074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09893627464771271,
|
|
"step": 3925,
|
|
"valid_targets_mean": 3427.6,
|
|
"valid_targets_min": 411
|
|
},
|
|
{
|
|
"epoch": 6.359223300970874,
|
|
"grad_norm": 0.6991975233530747,
|
|
"learning_rate": 1.0176399401384306e-06,
|
|
"loss": 0.1132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11751756072044373,
|
|
"step": 3930,
|
|
"valid_targets_mean": 4126.1,
|
|
"valid_targets_min": 1631
|
|
},
|
|
{
|
|
"epoch": 6.367313915857605,
|
|
"grad_norm": 0.8407713857506784,
|
|
"learning_rate": 9.92380906641166e-07,
|
|
"loss": 0.116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12533585727214813,
|
|
"step": 3935,
|
|
"valid_targets_mean": 3362.1,
|
|
"valid_targets_min": 483
|
|
},
|
|
{
|
|
"epoch": 6.375404530744337,
|
|
"grad_norm": 0.7586330784594048,
|
|
"learning_rate": 9.674313285525484e-07,
|
|
"loss": 0.1123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09893623739480972,
|
|
"step": 3940,
|
|
"valid_targets_mean": 3274.6,
|
|
"valid_targets_min": 439
|
|
},
|
|
{
|
|
"epoch": 6.383495145631068,
|
|
"grad_norm": 0.7214335052263516,
|
|
"learning_rate": 9.427916120666314e-07,
|
|
"loss": 0.1176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1176610141992569,
|
|
"step": 3945,
|
|
"valid_targets_mean": 3927.4,
|
|
"valid_targets_min": 953
|
|
},
|
|
{
|
|
"epoch": 6.391585760517799,
|
|
"grad_norm": 0.7486444334046114,
|
|
"learning_rate": 9.18462158332738e-07,
|
|
"loss": 0.1155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11216044425964355,
|
|
"step": 3950,
|
|
"valid_targets_mean": 3802.6,
|
|
"valid_targets_min": 1736
|
|
},
|
|
{
|
|
"epoch": 6.399676375404531,
|
|
"grad_norm": 0.8208321060913947,
|
|
"learning_rate": 8.944433634489335e-07,
|
|
"loss": 0.1234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14981669187545776,
|
|
"step": 3955,
|
|
"valid_targets_mean": 4065.8,
|
|
"valid_targets_min": 1853
|
|
},
|
|
{
|
|
"epoch": 6.407766990291262,
|
|
"grad_norm": 0.7055682571578344,
|
|
"learning_rate": 8.707356184555626e-07,
|
|
"loss": 0.1195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1000659316778183,
|
|
"step": 3960,
|
|
"valid_targets_mean": 3765.9,
|
|
"valid_targets_min": 1240
|
|
},
|
|
{
|
|
"epoch": 6.415857605177994,
|
|
"grad_norm": 0.7801809393232032,
|
|
"learning_rate": 8.473393093288962e-07,
|
|
"loss": 0.1191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14615213871002197,
|
|
"step": 3965,
|
|
"valid_targets_mean": 3748.4,
|
|
"valid_targets_min": 2320
|
|
},
|
|
{
|
|
"epoch": 6.423948220064725,
|
|
"grad_norm": 0.7478694444542925,
|
|
"learning_rate": 8.242548169748388e-07,
|
|
"loss": 0.113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11330067366361618,
|
|
"step": 3970,
|
|
"valid_targets_mean": 3463.9,
|
|
"valid_targets_min": 877
|
|
},
|
|
{
|
|
"epoch": 6.432038834951456,
|
|
"grad_norm": 0.8291515604455668,
|
|
"learning_rate": 8.014825172227359e-07,
|
|
"loss": 0.1124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1242062896490097,
|
|
"step": 3975,
|
|
"valid_targets_mean": 2997.5,
|
|
"valid_targets_min": 786
|
|
},
|
|
{
|
|
"epoch": 6.440129449838188,
|
|
"grad_norm": 0.7779823610215374,
|
|
"learning_rate": 7.790227808192497e-07,
|
|
"loss": 0.1139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13047200441360474,
|
|
"step": 3980,
|
|
"valid_targets_mean": 3859.4,
|
|
"valid_targets_min": 791
|
|
},
|
|
{
|
|
"epoch": 6.448220064724919,
|
|
"grad_norm": 0.7255560841829936,
|
|
"learning_rate": 7.568759734223263e-07,
|
|
"loss": 0.1244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12434525042772293,
|
|
"step": 3985,
|
|
"valid_targets_mean": 4043.9,
|
|
"valid_targets_min": 2429
|
|
},
|
|
{
|
|
"epoch": 6.456310679611651,
|
|
"grad_norm": 0.7324000370218157,
|
|
"learning_rate": 7.350424555952318e-07,
|
|
"loss": 0.1054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08710053563117981,
|
|
"step": 3990,
|
|
"valid_targets_mean": 3343.6,
|
|
"valid_targets_min": 451
|
|
},
|
|
{
|
|
"epoch": 6.464401294498382,
|
|
"grad_norm": 0.7057838956585973,
|
|
"learning_rate": 7.135225828007009e-07,
|
|
"loss": 0.1056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12603047490119934,
|
|
"step": 3995,
|
|
"valid_targets_mean": 4327.1,
|
|
"valid_targets_min": 2402
|
|
},
|
|
{
|
|
"epoch": 6.472491909385114,
|
|
"grad_norm": 0.7370428093673492,
|
|
"learning_rate": 6.92316705395133e-07,
|
|
"loss": 0.1066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10717878490686417,
|
|
"step": 4000,
|
|
"valid_targets_mean": 3758.0,
|
|
"valid_targets_min": 416
|
|
},
|
|
{
|
|
"epoch": 6.480582524271845,
|
|
"grad_norm": 0.7267237980545936,
|
|
"learning_rate": 6.714251686228968e-07,
|
|
"loss": 0.116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11581575125455856,
|
|
"step": 4005,
|
|
"valid_targets_mean": 3525.8,
|
|
"valid_targets_min": 473
|
|
},
|
|
{
|
|
"epoch": 6.488673139158576,
|
|
"grad_norm": 0.7017774925879499,
|
|
"learning_rate": 6.508483126107146e-07,
|
|
"loss": 0.1107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09369301050901413,
|
|
"step": 4010,
|
|
"valid_targets_mean": 3786.3,
|
|
"valid_targets_min": 1142
|
|
},
|
|
{
|
|
"epoch": 6.496763754045308,
|
|
"grad_norm": 0.7376578583187179,
|
|
"learning_rate": 6.305864723621025e-07,
|
|
"loss": 0.1202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12038631737232208,
|
|
"step": 4015,
|
|
"valid_targets_mean": 3682.2,
|
|
"valid_targets_min": 505
|
|
},
|
|
{
|
|
"epoch": 6.504854368932039,
|
|
"grad_norm": 0.7144299289177106,
|
|
"learning_rate": 6.10639977751939e-07,
|
|
"loss": 0.1222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10524886101484299,
|
|
"step": 4020,
|
|
"valid_targets_mean": 3945.4,
|
|
"valid_targets_min": 2737
|
|
},
|
|
{
|
|
"epoch": 6.5129449838187705,
|
|
"grad_norm": 0.7662258033033662,
|
|
"learning_rate": 5.91009153521096e-07,
|
|
"loss": 0.121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11165094375610352,
|
|
"step": 4025,
|
|
"valid_targets_mean": 3897.4,
|
|
"valid_targets_min": 413
|
|
},
|
|
{
|
|
"epoch": 6.5210355987055015,
|
|
"grad_norm": 0.785191390319755,
|
|
"learning_rate": 5.716943192711277e-07,
|
|
"loss": 0.1306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13537812232971191,
|
|
"step": 4030,
|
|
"valid_targets_mean": 3815.9,
|
|
"valid_targets_min": 729
|
|
},
|
|
{
|
|
"epoch": 6.529126213592233,
|
|
"grad_norm": 0.7320628094314221,
|
|
"learning_rate": 5.526957894590923e-07,
|
|
"loss": 0.1147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12145470082759857,
|
|
"step": 4035,
|
|
"valid_targets_mean": 3545.4,
|
|
"valid_targets_min": 1248
|
|
},
|
|
{
|
|
"epoch": 6.5372168284789645,
|
|
"grad_norm": 0.7311528372301167,
|
|
"learning_rate": 5.340138733924161e-07,
|
|
"loss": 0.1129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10949958860874176,
|
|
"step": 4040,
|
|
"valid_targets_mean": 3639.9,
|
|
"valid_targets_min": 624
|
|
},
|
|
{
|
|
"epoch": 6.5453074433656955,
|
|
"grad_norm": 0.7597891188614194,
|
|
"learning_rate": 5.156488752238708e-07,
|
|
"loss": 0.1165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11516404151916504,
|
|
"step": 4045,
|
|
"valid_targets_mean": 3899.2,
|
|
"valid_targets_min": 1357
|
|
},
|
|
{
|
|
"epoch": 6.553398058252427,
|
|
"grad_norm": 0.7310469987564887,
|
|
"learning_rate": 4.976010939466136e-07,
|
|
"loss": 0.1116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11317206919193268,
|
|
"step": 4050,
|
|
"valid_targets_mean": 3734.9,
|
|
"valid_targets_min": 502
|
|
},
|
|
{
|
|
"epoch": 6.561488673139158,
|
|
"grad_norm": 0.8474280332116642,
|
|
"learning_rate": 4.798708233893168e-07,
|
|
"loss": 0.1282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1260000318288803,
|
|
"step": 4055,
|
|
"valid_targets_mean": 2874.1,
|
|
"valid_targets_min": 410
|
|
},
|
|
{
|
|
"epoch": 6.56957928802589,
|
|
"grad_norm": 0.7565848227122396,
|
|
"learning_rate": 4.624583522113879e-07,
|
|
"loss": 0.1138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11290078610181808,
|
|
"step": 4060,
|
|
"valid_targets_mean": 3216.5,
|
|
"valid_targets_min": 460
|
|
},
|
|
{
|
|
"epoch": 6.577669902912621,
|
|
"grad_norm": 0.7190620351644219,
|
|
"learning_rate": 4.4536396389827986e-07,
|
|
"loss": 0.1113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12443754076957703,
|
|
"step": 4065,
|
|
"valid_targets_mean": 3827.2,
|
|
"valid_targets_min": 445
|
|
},
|
|
{
|
|
"epoch": 6.585760517799352,
|
|
"grad_norm": 0.7681106155104229,
|
|
"learning_rate": 4.285879367568546e-07,
|
|
"loss": 0.1051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09754884243011475,
|
|
"step": 4070,
|
|
"valid_targets_mean": 3443.1,
|
|
"valid_targets_min": 468
|
|
},
|
|
{
|
|
"epoch": 6.593851132686084,
|
|
"grad_norm": 0.7045013465157438,
|
|
"learning_rate": 4.1213054391086914e-07,
|
|
"loss": 0.1198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11584879457950592,
|
|
"step": 4075,
|
|
"valid_targets_mean": 4158.1,
|
|
"valid_targets_min": 1562
|
|
},
|
|
{
|
|
"epoch": 6.601941747572815,
|
|
"grad_norm": 0.711241502567802,
|
|
"learning_rate": 3.959920532965278e-07,
|
|
"loss": 0.1092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09637920558452606,
|
|
"step": 4080,
|
|
"valid_targets_mean": 3688.4,
|
|
"valid_targets_min": 390
|
|
},
|
|
{
|
|
"epoch": 6.610032362459547,
|
|
"grad_norm": 0.7548140115130028,
|
|
"learning_rate": 3.8017272765810795e-07,
|
|
"loss": 0.1053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10875862836837769,
|
|
"step": 4085,
|
|
"valid_targets_mean": 3557.0,
|
|
"valid_targets_min": 1169
|
|
},
|
|
{
|
|
"epoch": 6.618122977346278,
|
|
"grad_norm": 0.7476845196613187,
|
|
"learning_rate": 3.646728245436926e-07,
|
|
"loss": 0.1052,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09515687823295593,
|
|
"step": 4090,
|
|
"valid_targets_mean": 3448.2,
|
|
"valid_targets_min": 459
|
|
},
|
|
{
|
|
"epoch": 6.62621359223301,
|
|
"grad_norm": 0.6760149991841168,
|
|
"learning_rate": 3.4949259630097985e-07,
|
|
"loss": 0.11,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09844258427619934,
|
|
"step": 4095,
|
|
"valid_targets_mean": 4119.4,
|
|
"valid_targets_min": 1825
|
|
},
|
|
{
|
|
"epoch": 6.634304207119741,
|
|
"grad_norm": 0.7450570767921598,
|
|
"learning_rate": 3.346322900731602e-07,
|
|
"loss": 0.122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.112000972032547,
|
|
"step": 4100,
|
|
"valid_targets_mean": 3919.7,
|
|
"valid_targets_min": 599
|
|
},
|
|
{
|
|
"epoch": 6.642394822006472,
|
|
"grad_norm": 0.7870969672189868,
|
|
"learning_rate": 3.2009214779491703e-07,
|
|
"loss": 0.123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12304633855819702,
|
|
"step": 4105,
|
|
"valid_targets_mean": 3315.8,
|
|
"valid_targets_min": 672
|
|
},
|
|
{
|
|
"epoch": 6.650485436893204,
|
|
"grad_norm": 0.7864861620237504,
|
|
"learning_rate": 3.0587240618845437e-07,
|
|
"loss": 0.1185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12119387090206146,
|
|
"step": 4110,
|
|
"valid_targets_mean": 3842.4,
|
|
"valid_targets_min": 1747
|
|
},
|
|
{
|
|
"epoch": 6.658576051779935,
|
|
"grad_norm": 0.7593935726615797,
|
|
"learning_rate": 2.9197329675967556e-07,
|
|
"loss": 0.1099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11751753836870193,
|
|
"step": 4115,
|
|
"valid_targets_mean": 3600.8,
|
|
"valid_targets_min": 443
|
|
},
|
|
{
|
|
"epoch": 6.666666666666667,
|
|
"grad_norm": 0.7668039686269429,
|
|
"learning_rate": 2.7839504579439734e-07,
|
|
"loss": 0.1147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11438554525375366,
|
|
"step": 4120,
|
|
"valid_targets_mean": 3635.9,
|
|
"valid_targets_min": 1920
|
|
},
|
|
{
|
|
"epoch": 6.674757281553398,
|
|
"grad_norm": 0.7321055225681964,
|
|
"learning_rate": 2.651378743546662e-07,
|
|
"loss": 0.1184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09255105257034302,
|
|
"step": 4125,
|
|
"valid_targets_mean": 3288.8,
|
|
"valid_targets_min": 459
|
|
},
|
|
{
|
|
"epoch": 6.68284789644013,
|
|
"grad_norm": 0.769844001201492,
|
|
"learning_rate": 2.5220199827516335e-07,
|
|
"loss": 0.1177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11600494384765625,
|
|
"step": 4130,
|
|
"valid_targets_mean": 3590.6,
|
|
"valid_targets_min": 828
|
|
},
|
|
{
|
|
"epoch": 6.690938511326861,
|
|
"grad_norm": 0.7654676282072668,
|
|
"learning_rate": 2.395876281596898e-07,
|
|
"loss": 0.1128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11756563186645508,
|
|
"step": 4135,
|
|
"valid_targets_mean": 3544.8,
|
|
"valid_targets_min": 433
|
|
},
|
|
{
|
|
"epoch": 6.699029126213592,
|
|
"grad_norm": 0.7222893887701092,
|
|
"learning_rate": 2.2729496937773375e-07,
|
|
"loss": 0.1036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10802868008613586,
|
|
"step": 4140,
|
|
"valid_targets_mean": 4240.8,
|
|
"valid_targets_min": 1119
|
|
},
|
|
{
|
|
"epoch": 6.707119741100324,
|
|
"grad_norm": 0.6656401401793538,
|
|
"learning_rate": 2.1532422206113957e-07,
|
|
"loss": 0.1119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10636568814516068,
|
|
"step": 4145,
|
|
"valid_targets_mean": 4059.1,
|
|
"valid_targets_min": 1253
|
|
},
|
|
{
|
|
"epoch": 6.715210355987055,
|
|
"grad_norm": 0.7023584062905446,
|
|
"learning_rate": 2.036755811008284e-07,
|
|
"loss": 0.1132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10692491382360458,
|
|
"step": 4150,
|
|
"valid_targets_mean": 4141.0,
|
|
"valid_targets_min": 1538
|
|
},
|
|
{
|
|
"epoch": 6.723300970873787,
|
|
"grad_norm": 0.7815100848686617,
|
|
"learning_rate": 1.9234923614364298e-07,
|
|
"loss": 0.1167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12436103820800781,
|
|
"step": 4155,
|
|
"valid_targets_mean": 3200.8,
|
|
"valid_targets_min": 443
|
|
},
|
|
{
|
|
"epoch": 6.731391585760518,
|
|
"grad_norm": 0.7383871231544672,
|
|
"learning_rate": 1.813453715892588e-07,
|
|
"loss": 0.1091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1116761788725853,
|
|
"step": 4160,
|
|
"valid_targets_mean": 3907.4,
|
|
"valid_targets_min": 1241
|
|
},
|
|
{
|
|
"epoch": 6.739482200647249,
|
|
"grad_norm": 0.8179783335491289,
|
|
"learning_rate": 1.706641665871689e-07,
|
|
"loss": 0.1257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14059647917747498,
|
|
"step": 4165,
|
|
"valid_targets_mean": 4028.5,
|
|
"valid_targets_min": 950
|
|
},
|
|
{
|
|
"epoch": 6.747572815533981,
|
|
"grad_norm": 0.778583312094626,
|
|
"learning_rate": 1.603057950337794e-07,
|
|
"loss": 0.125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11424525082111359,
|
|
"step": 4170,
|
|
"valid_targets_mean": 3257.9,
|
|
"valid_targets_min": 435
|
|
},
|
|
{
|
|
"epoch": 6.755663430420712,
|
|
"grad_norm": 0.7483501823341687,
|
|
"learning_rate": 1.5027042556958083e-07,
|
|
"loss": 0.113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11524796485900879,
|
|
"step": 4175,
|
|
"valid_targets_mean": 3492.9,
|
|
"valid_targets_min": 1974
|
|
},
|
|
{
|
|
"epoch": 6.763754045307444,
|
|
"grad_norm": 0.7262116629208208,
|
|
"learning_rate": 1.4055822157638566e-07,
|
|
"loss": 0.1057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1036527007818222,
|
|
"step": 4180,
|
|
"valid_targets_mean": 3663.7,
|
|
"valid_targets_min": 1761
|
|
},
|
|
{
|
|
"epoch": 6.771844660194175,
|
|
"grad_norm": 0.8294870662688552,
|
|
"learning_rate": 1.3116934117468617e-07,
|
|
"loss": 0.1134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13269130885601044,
|
|
"step": 4185,
|
|
"valid_targets_mean": 3551.8,
|
|
"valid_targets_min": 515
|
|
},
|
|
{
|
|
"epoch": 6.779935275080906,
|
|
"grad_norm": 0.7952419125548885,
|
|
"learning_rate": 1.2210393722106973e-07,
|
|
"loss": 0.1163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1315314769744873,
|
|
"step": 4190,
|
|
"valid_targets_mean": 3879.2,
|
|
"valid_targets_min": 418
|
|
},
|
|
{
|
|
"epoch": 6.788025889967638,
|
|
"grad_norm": 0.7813021917376338,
|
|
"learning_rate": 1.1336215730573863e-07,
|
|
"loss": 0.1227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12635990977287292,
|
|
"step": 4195,
|
|
"valid_targets_mean": 3937.9,
|
|
"valid_targets_min": 719
|
|
},
|
|
{
|
|
"epoch": 6.796116504854369,
|
|
"grad_norm": 0.7753053311834444,
|
|
"learning_rate": 1.0494414375009642e-07,
|
|
"loss": 0.1155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11267022788524628,
|
|
"step": 4200,
|
|
"valid_targets_mean": 3228.6,
|
|
"valid_targets_min": 505
|
|
},
|
|
{
|
|
"epoch": 6.8042071197411005,
|
|
"grad_norm": 0.7121555430743358,
|
|
"learning_rate": 9.68500336044409e-08,
|
|
"loss": 0.1121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0996587723493576,
|
|
"step": 4205,
|
|
"valid_targets_mean": 3622.3,
|
|
"valid_targets_min": 1215
|
|
},
|
|
{
|
|
"epoch": 6.8122977346278315,
|
|
"grad_norm": 0.7164530066965938,
|
|
"learning_rate": 8.907995864572583e-08,
|
|
"loss": 0.1141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11022158712148666,
|
|
"step": 4210,
|
|
"valid_targets_mean": 3876.0,
|
|
"valid_targets_min": 423
|
|
},
|
|
{
|
|
"epoch": 6.820388349514563,
|
|
"grad_norm": 0.6698497069366108,
|
|
"learning_rate": 8.16340453754183e-08,
|
|
"loss": 0.1245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11347459256649017,
|
|
"step": 4215,
|
|
"valid_targets_mean": 4525.3,
|
|
"valid_targets_min": 1506
|
|
},
|
|
{
|
|
"epoch": 6.828478964401294,
|
|
"grad_norm": 0.6864775791788594,
|
|
"learning_rate": 7.451241501744255e-08,
|
|
"loss": 0.1046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09482621401548386,
|
|
"step": 4220,
|
|
"valid_targets_mean": 3799.6,
|
|
"valid_targets_min": 463
|
|
},
|
|
{
|
|
"epoch": 6.836569579288026,
|
|
"grad_norm": 0.752535267660768,
|
|
"learning_rate": 6.771518351619932e-08,
|
|
"loss": 0.1182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11658801138401031,
|
|
"step": 4225,
|
|
"valid_targets_mean": 4117.6,
|
|
"valid_targets_min": 442
|
|
},
|
|
{
|
|
"epoch": 6.844660194174757,
|
|
"grad_norm": 0.7713882605978399,
|
|
"learning_rate": 6.124246153468516e-08,
|
|
"loss": 0.1156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12442426383495331,
|
|
"step": 4230,
|
|
"valid_targets_mean": 3770.4,
|
|
"valid_targets_min": 650
|
|
},
|
|
{
|
|
"epoch": 6.852750809061488,
|
|
"grad_norm": 0.7068637325141903,
|
|
"learning_rate": 5.5094354452684964e-08,
|
|
"loss": 0.1158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11987002193927765,
|
|
"step": 4235,
|
|
"valid_targets_mean": 3828.8,
|
|
"valid_targets_min": 1076
|
|
},
|
|
{
|
|
"epoch": 6.86084142394822,
|
|
"grad_norm": 0.8714684323249712,
|
|
"learning_rate": 4.927096236505779e-08,
|
|
"loss": 0.1145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1273711919784546,
|
|
"step": 4240,
|
|
"valid_targets_mean": 3429.4,
|
|
"valid_targets_min": 721
|
|
},
|
|
{
|
|
"epoch": 6.868932038834951,
|
|
"grad_norm": 0.7973774333149717,
|
|
"learning_rate": 4.3772380080111534e-08,
|
|
"loss": 0.1187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.120909184217453,
|
|
"step": 4245,
|
|
"valid_targets_mean": 3205.2,
|
|
"valid_targets_min": 524
|
|
},
|
|
{
|
|
"epoch": 6.877022653721683,
|
|
"grad_norm": 0.7932995409173373,
|
|
"learning_rate": 3.85986971180552e-08,
|
|
"loss": 0.1141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11187095940113068,
|
|
"step": 4250,
|
|
"valid_targets_mean": 3380.6,
|
|
"valid_targets_min": 910
|
|
},
|
|
{
|
|
"epoch": 6.885113268608414,
|
|
"grad_norm": 0.8134749447064917,
|
|
"learning_rate": 3.374999770954013e-08,
|
|
"loss": 0.1171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.132695734500885,
|
|
"step": 4255,
|
|
"valid_targets_mean": 3588.1,
|
|
"valid_targets_min": 497
|
|
},
|
|
{
|
|
"epoch": 6.893203883495145,
|
|
"grad_norm": 0.7963874900596022,
|
|
"learning_rate": 2.9226360794296638e-08,
|
|
"loss": 0.1208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14102879166603088,
|
|
"step": 4260,
|
|
"valid_targets_mean": 3959.9,
|
|
"valid_targets_min": 909
|
|
},
|
|
{
|
|
"epoch": 6.901294498381877,
|
|
"grad_norm": 0.7062095481538098,
|
|
"learning_rate": 2.502786001983726e-08,
|
|
"loss": 0.1134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.106046661734581,
|
|
"step": 4265,
|
|
"valid_targets_mean": 3602.3,
|
|
"valid_targets_min": 1241
|
|
},
|
|
{
|
|
"epoch": 6.909385113268608,
|
|
"grad_norm": 0.7013180525553683,
|
|
"learning_rate": 2.1154563740266588e-08,
|
|
"loss": 0.1251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11417189240455627,
|
|
"step": 4270,
|
|
"valid_targets_mean": 3765.8,
|
|
"valid_targets_min": 1292
|
|
},
|
|
{
|
|
"epoch": 6.91747572815534,
|
|
"grad_norm": 0.7261543939015853,
|
|
"learning_rate": 1.7606535015164405e-08,
|
|
"loss": 0.1156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0980582907795906,
|
|
"step": 4275,
|
|
"valid_targets_mean": 3846.7,
|
|
"valid_targets_min": 801
|
|
},
|
|
{
|
|
"epoch": 6.925566343042071,
|
|
"grad_norm": 0.7465722632159841,
|
|
"learning_rate": 1.4383831608562048e-08,
|
|
"loss": 0.1081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12335227429866791,
|
|
"step": 4280,
|
|
"valid_targets_mean": 3942.1,
|
|
"valid_targets_min": 701
|
|
},
|
|
{
|
|
"epoch": 6.933656957928802,
|
|
"grad_norm": 0.6729429573207937,
|
|
"learning_rate": 1.14865059879965e-08,
|
|
"loss": 0.1065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10008923709392548,
|
|
"step": 4285,
|
|
"valid_targets_mean": 4052.6,
|
|
"valid_targets_min": 1449
|
|
},
|
|
{
|
|
"epoch": 6.941747572815534,
|
|
"grad_norm": 0.7904605212036413,
|
|
"learning_rate": 8.914605323664394e-09,
|
|
"loss": 0.1101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11163502931594849,
|
|
"step": 4290,
|
|
"valid_targets_mean": 3616.7,
|
|
"valid_targets_min": 1563
|
|
},
|
|
{
|
|
"epoch": 6.949838187702265,
|
|
"grad_norm": 0.7795168919241651,
|
|
"learning_rate": 6.66817148764487e-09,
|
|
"loss": 0.1202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13894109427928925,
|
|
"step": 4295,
|
|
"valid_targets_mean": 3961.6,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 6.957928802588997,
|
|
"grad_norm": 0.6999769879445384,
|
|
"learning_rate": 4.7472410532245495e-09,
|
|
"loss": 0.115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10274814814329147,
|
|
"step": 4300,
|
|
"valid_targets_mean": 3830.3,
|
|
"valid_targets_min": 1634
|
|
},
|
|
{
|
|
"epoch": 6.966019417475728,
|
|
"grad_norm": 1.0280666868305712,
|
|
"learning_rate": 3.151845294302458e-09,
|
|
"loss": 0.1202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09658776223659515,
|
|
"step": 4305,
|
|
"valid_targets_mean": 3119.4,
|
|
"valid_targets_min": 443
|
|
},
|
|
{
|
|
"epoch": 6.97411003236246,
|
|
"grad_norm": 0.6709657407395602,
|
|
"learning_rate": 1.882010184874883e-09,
|
|
"loss": 0.1099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08560613542795181,
|
|
"step": 4310,
|
|
"valid_targets_mean": 3504.5,
|
|
"valid_targets_min": 433
|
|
},
|
|
{
|
|
"epoch": 6.982200647249191,
|
|
"grad_norm": 0.7173680419597602,
|
|
"learning_rate": 9.377563986157078e-10,
|
|
"loss": 0.1075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11601495742797852,
|
|
"step": 4315,
|
|
"valid_targets_mean": 3960.2,
|
|
"valid_targets_min": 2203
|
|
},
|
|
{
|
|
"epoch": 6.990291262135923,
|
|
"grad_norm": 0.7570102861156914,
|
|
"learning_rate": 3.1909930854112646e-10,
|
|
"loss": 0.1084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10227625072002411,
|
|
"step": 4320,
|
|
"valid_targets_mean": 3076.0,
|
|
"valid_targets_min": 1004
|
|
},
|
|
{
|
|
"epoch": 6.998381877022654,
|
|
"grad_norm": 0.7093115701651048,
|
|
"learning_rate": 2.6048986760951466e-11,
|
|
"loss": 0.1137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11005377024412155,
|
|
"step": 4325,
|
|
"valid_targets_mean": 3910.6,
|
|
"valid_targets_min": 1792
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11569061130285263,
|
|
"step": 4326,
|
|
"total_flos": 1582790264291328.0,
|
|
"train_loss": 0.19078887684569554,
|
|
"train_runtime": 21380.015,
|
|
"train_samples_per_second": 3.236,
|
|
"train_steps_per_second": 0.202,
|
|
"valid_targets_mean": 2886.1,
|
|
"valid_targets_min": 416
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 4326,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 1500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1582790264291328.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|