9695 lines
269 KiB
JSON
9695 lines
269 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 4389,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.007974481658692184,
|
|
"grad_norm": 18.209313989032676,
|
|
"learning_rate": 3.644646924829157e-07,
|
|
"loss": 0.6613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7083388566970825,
|
|
"step": 5,
|
|
"valid_targets_mean": 4274.7,
|
|
"valid_targets_min": 1375
|
|
},
|
|
{
|
|
"epoch": 0.01594896331738437,
|
|
"grad_norm": 17.565899360898413,
|
|
"learning_rate": 8.200455580865605e-07,
|
|
"loss": 0.6645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7002649307250977,
|
|
"step": 10,
|
|
"valid_targets_mean": 4057.4,
|
|
"valid_targets_min": 856
|
|
},
|
|
{
|
|
"epoch": 0.023923444976076555,
|
|
"grad_norm": 15.87052705649313,
|
|
"learning_rate": 1.2756264236902052e-06,
|
|
"loss": 0.6749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6710398197174072,
|
|
"step": 15,
|
|
"valid_targets_mean": 4749.7,
|
|
"valid_targets_min": 1757
|
|
},
|
|
{
|
|
"epoch": 0.03189792663476874,
|
|
"grad_norm": 12.636425630261991,
|
|
"learning_rate": 1.7312072892938498e-06,
|
|
"loss": 0.6233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6070592403411865,
|
|
"step": 20,
|
|
"valid_targets_mean": 4037.9,
|
|
"valid_targets_min": 1190
|
|
},
|
|
{
|
|
"epoch": 0.03987240829346093,
|
|
"grad_norm": 7.301088536990888,
|
|
"learning_rate": 2.1867881548974945e-06,
|
|
"loss": 0.5955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5352090001106262,
|
|
"step": 25,
|
|
"valid_targets_mean": 4646.4,
|
|
"valid_targets_min": 1007
|
|
},
|
|
{
|
|
"epoch": 0.04784688995215311,
|
|
"grad_norm": 4.620154565265324,
|
|
"learning_rate": 2.642369020501139e-06,
|
|
"loss": 0.4968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4887549579143524,
|
|
"step": 30,
|
|
"valid_targets_mean": 4638.0,
|
|
"valid_targets_min": 1815
|
|
},
|
|
{
|
|
"epoch": 0.05582137161084529,
|
|
"grad_norm": 2.97904782812409,
|
|
"learning_rate": 3.0979498861047843e-06,
|
|
"loss": 0.4695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4455524981021881,
|
|
"step": 35,
|
|
"valid_targets_mean": 3816.0,
|
|
"valid_targets_min": 1205
|
|
},
|
|
{
|
|
"epoch": 0.06379585326953748,
|
|
"grad_norm": 1.5438713586801176,
|
|
"learning_rate": 3.5535307517084285e-06,
|
|
"loss": 0.4283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41967082023620605,
|
|
"step": 40,
|
|
"valid_targets_mean": 4929.1,
|
|
"valid_targets_min": 1854
|
|
},
|
|
{
|
|
"epoch": 0.07177033492822966,
|
|
"grad_norm": 1.1290889855485853,
|
|
"learning_rate": 4.009111617312073e-06,
|
|
"loss": 0.4325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3867625594139099,
|
|
"step": 45,
|
|
"valid_targets_mean": 4424.9,
|
|
"valid_targets_min": 283
|
|
},
|
|
{
|
|
"epoch": 0.07974481658692185,
|
|
"grad_norm": 1.0028160461532303,
|
|
"learning_rate": 4.464692482915718e-06,
|
|
"loss": 0.4095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4304451048374176,
|
|
"step": 50,
|
|
"valid_targets_mean": 3928.1,
|
|
"valid_targets_min": 1108
|
|
},
|
|
{
|
|
"epoch": 0.08771929824561403,
|
|
"grad_norm": 0.9392725292438185,
|
|
"learning_rate": 4.920273348519363e-06,
|
|
"loss": 0.3932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43969500064849854,
|
|
"step": 55,
|
|
"valid_targets_mean": 4562.9,
|
|
"valid_targets_min": 1625
|
|
},
|
|
{
|
|
"epoch": 0.09569377990430622,
|
|
"grad_norm": 0.8202651765816429,
|
|
"learning_rate": 5.375854214123008e-06,
|
|
"loss": 0.3884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3811887204647064,
|
|
"step": 60,
|
|
"valid_targets_mean": 4266.2,
|
|
"valid_targets_min": 1729
|
|
},
|
|
{
|
|
"epoch": 0.10366826156299841,
|
|
"grad_norm": 0.7238246937293968,
|
|
"learning_rate": 5.831435079726651e-06,
|
|
"loss": 0.3781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37014704942703247,
|
|
"step": 65,
|
|
"valid_targets_mean": 3979.3,
|
|
"valid_targets_min": 1349
|
|
},
|
|
{
|
|
"epoch": 0.11164274322169059,
|
|
"grad_norm": 0.6194971616152736,
|
|
"learning_rate": 6.287015945330297e-06,
|
|
"loss": 0.3694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3885234594345093,
|
|
"step": 70,
|
|
"valid_targets_mean": 5062.5,
|
|
"valid_targets_min": 1067
|
|
},
|
|
{
|
|
"epoch": 0.11961722488038277,
|
|
"grad_norm": 0.7090849438825476,
|
|
"learning_rate": 6.742596810933942e-06,
|
|
"loss": 0.3442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3633916676044464,
|
|
"step": 75,
|
|
"valid_targets_mean": 3379.9,
|
|
"valid_targets_min": 1066
|
|
},
|
|
{
|
|
"epoch": 0.12759170653907495,
|
|
"grad_norm": 0.5920254851165562,
|
|
"learning_rate": 7.1981776765375854e-06,
|
|
"loss": 0.3376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3116822838783264,
|
|
"step": 80,
|
|
"valid_targets_mean": 3987.1,
|
|
"valid_targets_min": 1929
|
|
},
|
|
{
|
|
"epoch": 0.13556618819776714,
|
|
"grad_norm": 0.5737514009324758,
|
|
"learning_rate": 7.65375854214123e-06,
|
|
"loss": 0.3382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.345905601978302,
|
|
"step": 85,
|
|
"valid_targets_mean": 4241.7,
|
|
"valid_targets_min": 1889
|
|
},
|
|
{
|
|
"epoch": 0.14354066985645933,
|
|
"grad_norm": 0.6245359822401961,
|
|
"learning_rate": 8.109339407744875e-06,
|
|
"loss": 0.3254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34620386362075806,
|
|
"step": 90,
|
|
"valid_targets_mean": 3697.2,
|
|
"valid_targets_min": 1091
|
|
},
|
|
{
|
|
"epoch": 0.15151515151515152,
|
|
"grad_norm": 0.6069702878436305,
|
|
"learning_rate": 8.564920273348521e-06,
|
|
"loss": 0.3105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3138015866279602,
|
|
"step": 95,
|
|
"valid_targets_mean": 3749.2,
|
|
"valid_targets_min": 812
|
|
},
|
|
{
|
|
"epoch": 0.1594896331738437,
|
|
"grad_norm": 0.5255375106110045,
|
|
"learning_rate": 9.020501138952164e-06,
|
|
"loss": 0.3183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3427339792251587,
|
|
"step": 100,
|
|
"valid_targets_mean": 4708.4,
|
|
"valid_targets_min": 978
|
|
},
|
|
{
|
|
"epoch": 0.1674641148325359,
|
|
"grad_norm": 0.5468424188059171,
|
|
"learning_rate": 9.47608200455581e-06,
|
|
"loss": 0.3187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2675308585166931,
|
|
"step": 105,
|
|
"valid_targets_mean": 4770.1,
|
|
"valid_targets_min": 1643
|
|
},
|
|
{
|
|
"epoch": 0.17543859649122806,
|
|
"grad_norm": 0.4900887968653505,
|
|
"learning_rate": 9.931662870159453e-06,
|
|
"loss": 0.2931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2730833888053894,
|
|
"step": 110,
|
|
"valid_targets_mean": 4483.8,
|
|
"valid_targets_min": 706
|
|
},
|
|
{
|
|
"epoch": 0.18341307814992025,
|
|
"grad_norm": 0.5632756036846491,
|
|
"learning_rate": 1.03872437357631e-05,
|
|
"loss": 0.3131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28536278009414673,
|
|
"step": 115,
|
|
"valid_targets_mean": 3995.8,
|
|
"valid_targets_min": 1089
|
|
},
|
|
{
|
|
"epoch": 0.19138755980861244,
|
|
"grad_norm": 0.5478149732468116,
|
|
"learning_rate": 1.0842824601366744e-05,
|
|
"loss": 0.3079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3417927920818329,
|
|
"step": 120,
|
|
"valid_targets_mean": 4201.2,
|
|
"valid_targets_min": 1419
|
|
},
|
|
{
|
|
"epoch": 0.19936204146730463,
|
|
"grad_norm": 0.7166036845775202,
|
|
"learning_rate": 1.1298405466970387e-05,
|
|
"loss": 0.2786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29145631194114685,
|
|
"step": 125,
|
|
"valid_targets_mean": 4807.0,
|
|
"valid_targets_min": 2044
|
|
},
|
|
{
|
|
"epoch": 0.20733652312599682,
|
|
"grad_norm": 1.0456874967461594,
|
|
"learning_rate": 1.1753986332574032e-05,
|
|
"loss": 0.297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30068278312683105,
|
|
"step": 130,
|
|
"valid_targets_mean": 3277.1,
|
|
"valid_targets_min": 516
|
|
},
|
|
{
|
|
"epoch": 0.215311004784689,
|
|
"grad_norm": 0.6134986205295205,
|
|
"learning_rate": 1.2209567198177677e-05,
|
|
"loss": 0.3092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3421486020088196,
|
|
"step": 135,
|
|
"valid_targets_mean": 3532.4,
|
|
"valid_targets_min": 1131
|
|
},
|
|
{
|
|
"epoch": 0.22328548644338117,
|
|
"grad_norm": 0.6204495084108421,
|
|
"learning_rate": 1.2665148063781323e-05,
|
|
"loss": 0.2807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2868116497993469,
|
|
"step": 140,
|
|
"valid_targets_mean": 3922.3,
|
|
"valid_targets_min": 1345
|
|
},
|
|
{
|
|
"epoch": 0.23125996810207336,
|
|
"grad_norm": 0.5300101256809046,
|
|
"learning_rate": 1.3120728929384968e-05,
|
|
"loss": 0.2855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29809439182281494,
|
|
"step": 145,
|
|
"valid_targets_mean": 4709.6,
|
|
"valid_targets_min": 1357
|
|
},
|
|
{
|
|
"epoch": 0.23923444976076555,
|
|
"grad_norm": 0.4985004710187439,
|
|
"learning_rate": 1.357630979498861e-05,
|
|
"loss": 0.29,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25828999280929565,
|
|
"step": 150,
|
|
"valid_targets_mean": 4742.8,
|
|
"valid_targets_min": 1259
|
|
},
|
|
{
|
|
"epoch": 0.24720893141945774,
|
|
"grad_norm": 0.4874719165109116,
|
|
"learning_rate": 1.4031890660592255e-05,
|
|
"loss": 0.2928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28525930643081665,
|
|
"step": 155,
|
|
"valid_targets_mean": 4538.2,
|
|
"valid_targets_min": 1493
|
|
},
|
|
{
|
|
"epoch": 0.2551834130781499,
|
|
"grad_norm": 0.5160695131874979,
|
|
"learning_rate": 1.4487471526195902e-05,
|
|
"loss": 0.2829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2674846053123474,
|
|
"step": 160,
|
|
"valid_targets_mean": 4224.4,
|
|
"valid_targets_min": 1205
|
|
},
|
|
{
|
|
"epoch": 0.2631578947368421,
|
|
"grad_norm": 0.5891466415194097,
|
|
"learning_rate": 1.4943052391799546e-05,
|
|
"loss": 0.2737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28863510489463806,
|
|
"step": 165,
|
|
"valid_targets_mean": 3665.6,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 0.2711323763955343,
|
|
"grad_norm": 0.5145361680964159,
|
|
"learning_rate": 1.539863325740319e-05,
|
|
"loss": 0.2825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26548290252685547,
|
|
"step": 170,
|
|
"valid_targets_mean": 4553.8,
|
|
"valid_targets_min": 1511
|
|
},
|
|
{
|
|
"epoch": 0.27910685805422647,
|
|
"grad_norm": 0.6601572080361445,
|
|
"learning_rate": 1.5854214123006836e-05,
|
|
"loss": 0.2607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26017022132873535,
|
|
"step": 175,
|
|
"valid_targets_mean": 4356.2,
|
|
"valid_targets_min": 961
|
|
},
|
|
{
|
|
"epoch": 0.28708133971291866,
|
|
"grad_norm": 0.5470567123167829,
|
|
"learning_rate": 1.630979498861048e-05,
|
|
"loss": 0.276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2680243253707886,
|
|
"step": 180,
|
|
"valid_targets_mean": 4444.1,
|
|
"valid_targets_min": 778
|
|
},
|
|
{
|
|
"epoch": 0.29505582137161085,
|
|
"grad_norm": 0.5746616304410853,
|
|
"learning_rate": 1.6765375854214125e-05,
|
|
"loss": 0.2694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2717726528644562,
|
|
"step": 185,
|
|
"valid_targets_mean": 4274.6,
|
|
"valid_targets_min": 1636
|
|
},
|
|
{
|
|
"epoch": 0.30303030303030304,
|
|
"grad_norm": 0.5176129626940454,
|
|
"learning_rate": 1.722095671981777e-05,
|
|
"loss": 0.2983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2602561116218567,
|
|
"step": 190,
|
|
"valid_targets_mean": 4857.2,
|
|
"valid_targets_min": 1001
|
|
},
|
|
{
|
|
"epoch": 0.31100478468899523,
|
|
"grad_norm": 0.562347488191691,
|
|
"learning_rate": 1.7676537585421415e-05,
|
|
"loss": 0.2771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27074605226516724,
|
|
"step": 195,
|
|
"valid_targets_mean": 3558.6,
|
|
"valid_targets_min": 1139
|
|
},
|
|
{
|
|
"epoch": 0.3189792663476874,
|
|
"grad_norm": 0.5614487605840438,
|
|
"learning_rate": 1.813211845102506e-05,
|
|
"loss": 0.2697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2522098124027252,
|
|
"step": 200,
|
|
"valid_targets_mean": 3830.3,
|
|
"valid_targets_min": 939
|
|
},
|
|
{
|
|
"epoch": 0.3269537480063796,
|
|
"grad_norm": 0.4969424618520658,
|
|
"learning_rate": 1.8587699316628704e-05,
|
|
"loss": 0.2616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2337953746318817,
|
|
"step": 205,
|
|
"valid_targets_mean": 4135.1,
|
|
"valid_targets_min": 1402
|
|
},
|
|
{
|
|
"epoch": 0.3349282296650718,
|
|
"grad_norm": 0.6507347004788184,
|
|
"learning_rate": 1.904328018223235e-05,
|
|
"loss": 0.251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27326855063438416,
|
|
"step": 210,
|
|
"valid_targets_mean": 3666.0,
|
|
"valid_targets_min": 1555
|
|
},
|
|
{
|
|
"epoch": 0.34290271132376393,
|
|
"grad_norm": 0.5995906885581999,
|
|
"learning_rate": 1.9498861047835993e-05,
|
|
"loss": 0.2648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2668547034263611,
|
|
"step": 215,
|
|
"valid_targets_mean": 3878.4,
|
|
"valid_targets_min": 1172
|
|
},
|
|
{
|
|
"epoch": 0.3508771929824561,
|
|
"grad_norm": 0.664328034986349,
|
|
"learning_rate": 1.9954441913439638e-05,
|
|
"loss": 0.2617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3150981068611145,
|
|
"step": 220,
|
|
"valid_targets_mean": 3710.6,
|
|
"valid_targets_min": 742
|
|
},
|
|
{
|
|
"epoch": 0.3588516746411483,
|
|
"grad_norm": 0.5455136961351472,
|
|
"learning_rate": 2.0410022779043283e-05,
|
|
"loss": 0.275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28206348419189453,
|
|
"step": 225,
|
|
"valid_targets_mean": 4419.8,
|
|
"valid_targets_min": 1663
|
|
},
|
|
{
|
|
"epoch": 0.3668261562998405,
|
|
"grad_norm": 0.6469360356284334,
|
|
"learning_rate": 2.0865603644646927e-05,
|
|
"loss": 0.2558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30276286602020264,
|
|
"step": 230,
|
|
"valid_targets_mean": 3983.5,
|
|
"valid_targets_min": 737
|
|
},
|
|
{
|
|
"epoch": 0.3748006379585327,
|
|
"grad_norm": 0.4813930753264431,
|
|
"learning_rate": 2.1321184510250572e-05,
|
|
"loss": 0.2552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24887768924236298,
|
|
"step": 235,
|
|
"valid_targets_mean": 5244.1,
|
|
"valid_targets_min": 1467
|
|
},
|
|
{
|
|
"epoch": 0.3827751196172249,
|
|
"grad_norm": 0.6357050229555575,
|
|
"learning_rate": 2.1776765375854217e-05,
|
|
"loss": 0.2617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29940420389175415,
|
|
"step": 240,
|
|
"valid_targets_mean": 4581.2,
|
|
"valid_targets_min": 1267
|
|
},
|
|
{
|
|
"epoch": 0.39074960127591707,
|
|
"grad_norm": 0.5182052236757216,
|
|
"learning_rate": 2.223234624145786e-05,
|
|
"loss": 0.2689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24142391979694366,
|
|
"step": 245,
|
|
"valid_targets_mean": 3957.9,
|
|
"valid_targets_min": 1126
|
|
},
|
|
{
|
|
"epoch": 0.39872408293460926,
|
|
"grad_norm": 0.5325592560406327,
|
|
"learning_rate": 2.2687927107061506e-05,
|
|
"loss": 0.2413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26413482427597046,
|
|
"step": 250,
|
|
"valid_targets_mean": 4843.4,
|
|
"valid_targets_min": 1245
|
|
},
|
|
{
|
|
"epoch": 0.40669856459330145,
|
|
"grad_norm": 0.5402875675866221,
|
|
"learning_rate": 2.314350797266515e-05,
|
|
"loss": 0.2579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24113714694976807,
|
|
"step": 255,
|
|
"valid_targets_mean": 3996.4,
|
|
"valid_targets_min": 1718
|
|
},
|
|
{
|
|
"epoch": 0.41467304625199364,
|
|
"grad_norm": 0.6224075019796482,
|
|
"learning_rate": 2.3599088838268792e-05,
|
|
"loss": 0.2608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2829429805278778,
|
|
"step": 260,
|
|
"valid_targets_mean": 4358.6,
|
|
"valid_targets_min": 1842
|
|
},
|
|
{
|
|
"epoch": 0.4226475279106858,
|
|
"grad_norm": 0.5623312618548334,
|
|
"learning_rate": 2.4054669703872436e-05,
|
|
"loss": 0.2585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24700826406478882,
|
|
"step": 265,
|
|
"valid_targets_mean": 4174.6,
|
|
"valid_targets_min": 688
|
|
},
|
|
{
|
|
"epoch": 0.430622009569378,
|
|
"grad_norm": 0.5936830138426613,
|
|
"learning_rate": 2.4510250569476085e-05,
|
|
"loss": 0.2491,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.261145681142807,
|
|
"step": 270,
|
|
"valid_targets_mean": 4012.4,
|
|
"valid_targets_min": 2004
|
|
},
|
|
{
|
|
"epoch": 0.43859649122807015,
|
|
"grad_norm": 0.5816172057799641,
|
|
"learning_rate": 2.496583143507973e-05,
|
|
"loss": 0.2509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24251684546470642,
|
|
"step": 275,
|
|
"valid_targets_mean": 3785.7,
|
|
"valid_targets_min": 1072
|
|
},
|
|
{
|
|
"epoch": 0.44657097288676234,
|
|
"grad_norm": 0.5363952823204787,
|
|
"learning_rate": 2.5421412300683374e-05,
|
|
"loss": 0.2582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21991890668869019,
|
|
"step": 280,
|
|
"valid_targets_mean": 4118.2,
|
|
"valid_targets_min": 1556
|
|
},
|
|
{
|
|
"epoch": 0.45454545454545453,
|
|
"grad_norm": 0.5507874631028685,
|
|
"learning_rate": 2.587699316628702e-05,
|
|
"loss": 0.2554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23739475011825562,
|
|
"step": 285,
|
|
"valid_targets_mean": 4624.6,
|
|
"valid_targets_min": 1441
|
|
},
|
|
{
|
|
"epoch": 0.4625199362041467,
|
|
"grad_norm": 0.5892708926261455,
|
|
"learning_rate": 2.6332574031890663e-05,
|
|
"loss": 0.2503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2542714476585388,
|
|
"step": 290,
|
|
"valid_targets_mean": 4042.2,
|
|
"valid_targets_min": 977
|
|
},
|
|
{
|
|
"epoch": 0.4704944178628389,
|
|
"grad_norm": 0.5470604216754679,
|
|
"learning_rate": 2.6788154897494308e-05,
|
|
"loss": 0.247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24727243185043335,
|
|
"step": 295,
|
|
"valid_targets_mean": 5101.3,
|
|
"valid_targets_min": 1741
|
|
},
|
|
{
|
|
"epoch": 0.4784688995215311,
|
|
"grad_norm": 0.574379987246796,
|
|
"learning_rate": 2.7243735763097953e-05,
|
|
"loss": 0.2456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26218515634536743,
|
|
"step": 300,
|
|
"valid_targets_mean": 4304.6,
|
|
"valid_targets_min": 1287
|
|
},
|
|
{
|
|
"epoch": 0.4864433811802233,
|
|
"grad_norm": 0.7523937216403082,
|
|
"learning_rate": 2.7699316628701597e-05,
|
|
"loss": 0.2301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26889804005622864,
|
|
"step": 305,
|
|
"valid_targets_mean": 4536.9,
|
|
"valid_targets_min": 1081
|
|
},
|
|
{
|
|
"epoch": 0.4944178628389155,
|
|
"grad_norm": 0.5299100257347374,
|
|
"learning_rate": 2.815489749430524e-05,
|
|
"loss": 0.2508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22799797356128693,
|
|
"step": 310,
|
|
"valid_targets_mean": 3944.1,
|
|
"valid_targets_min": 1142
|
|
},
|
|
{
|
|
"epoch": 0.5023923444976076,
|
|
"grad_norm": 0.5121302510549167,
|
|
"learning_rate": 2.8610478359908883e-05,
|
|
"loss": 0.2435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21685777604579926,
|
|
"step": 315,
|
|
"valid_targets_mean": 3902.4,
|
|
"valid_targets_min": 1340
|
|
},
|
|
{
|
|
"epoch": 0.5103668261562998,
|
|
"grad_norm": 0.6666778484933589,
|
|
"learning_rate": 2.906605922551253e-05,
|
|
"loss": 0.2414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2527405023574829,
|
|
"step": 320,
|
|
"valid_targets_mean": 3753.1,
|
|
"valid_targets_min": 1189
|
|
},
|
|
{
|
|
"epoch": 0.518341307814992,
|
|
"grad_norm": 0.5629631879647229,
|
|
"learning_rate": 2.9521640091116176e-05,
|
|
"loss": 0.2469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2660377323627472,
|
|
"step": 325,
|
|
"valid_targets_mean": 4528.5,
|
|
"valid_targets_min": 1648
|
|
},
|
|
{
|
|
"epoch": 0.5263157894736842,
|
|
"grad_norm": 0.547886501540369,
|
|
"learning_rate": 2.997722095671982e-05,
|
|
"loss": 0.2652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25449228286743164,
|
|
"step": 330,
|
|
"valid_targets_mean": 4229.8,
|
|
"valid_targets_min": 1050
|
|
},
|
|
{
|
|
"epoch": 0.5342902711323764,
|
|
"grad_norm": 0.5252359642628719,
|
|
"learning_rate": 3.0432801822323465e-05,
|
|
"loss": 0.237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21056079864501953,
|
|
"step": 335,
|
|
"valid_targets_mean": 3687.8,
|
|
"valid_targets_min": 1439
|
|
},
|
|
{
|
|
"epoch": 0.5422647527910686,
|
|
"grad_norm": 0.627517410948377,
|
|
"learning_rate": 3.088838268792711e-05,
|
|
"loss": 0.243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23819315433502197,
|
|
"step": 340,
|
|
"valid_targets_mean": 3025.1,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 0.5502392344497608,
|
|
"grad_norm": 0.5124787162579827,
|
|
"learning_rate": 3.1343963553530755e-05,
|
|
"loss": 0.2348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2032354772090912,
|
|
"step": 345,
|
|
"valid_targets_mean": 4400.8,
|
|
"valid_targets_min": 1649
|
|
},
|
|
{
|
|
"epoch": 0.5582137161084529,
|
|
"grad_norm": 0.5955271698884919,
|
|
"learning_rate": 3.17995444191344e-05,
|
|
"loss": 0.251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2681797742843628,
|
|
"step": 350,
|
|
"valid_targets_mean": 3872.7,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 0.5661881977671451,
|
|
"grad_norm": 0.603984619491476,
|
|
"learning_rate": 3.2255125284738044e-05,
|
|
"loss": 0.2629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2733217179775238,
|
|
"step": 355,
|
|
"valid_targets_mean": 3847.1,
|
|
"valid_targets_min": 1316
|
|
},
|
|
{
|
|
"epoch": 0.5741626794258373,
|
|
"grad_norm": 0.5688812858187576,
|
|
"learning_rate": 3.271070615034169e-05,
|
|
"loss": 0.2428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.221302792429924,
|
|
"step": 360,
|
|
"valid_targets_mean": 4562.5,
|
|
"valid_targets_min": 1260
|
|
},
|
|
{
|
|
"epoch": 0.5821371610845295,
|
|
"grad_norm": 0.5711830635381313,
|
|
"learning_rate": 3.316628701594533e-05,
|
|
"loss": 0.2406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22806572914123535,
|
|
"step": 365,
|
|
"valid_targets_mean": 4239.7,
|
|
"valid_targets_min": 1563
|
|
},
|
|
{
|
|
"epoch": 0.5901116427432217,
|
|
"grad_norm": 0.6383939916129264,
|
|
"learning_rate": 3.362186788154898e-05,
|
|
"loss": 0.2407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2370656281709671,
|
|
"step": 370,
|
|
"valid_targets_mean": 4305.8,
|
|
"valid_targets_min": 986
|
|
},
|
|
{
|
|
"epoch": 0.5980861244019139,
|
|
"grad_norm": 0.5156811453902431,
|
|
"learning_rate": 3.407744874715262e-05,
|
|
"loss": 0.2268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2404838353395462,
|
|
"step": 375,
|
|
"valid_targets_mean": 4794.8,
|
|
"valid_targets_min": 750
|
|
},
|
|
{
|
|
"epoch": 0.6060606060606061,
|
|
"grad_norm": 0.48726742363902603,
|
|
"learning_rate": 3.453302961275627e-05,
|
|
"loss": 0.2306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2199721783399582,
|
|
"step": 380,
|
|
"valid_targets_mean": 4558.2,
|
|
"valid_targets_min": 1180
|
|
},
|
|
{
|
|
"epoch": 0.6140350877192983,
|
|
"grad_norm": 0.5411250794864436,
|
|
"learning_rate": 3.498861047835991e-05,
|
|
"loss": 0.2362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2332228720188141,
|
|
"step": 385,
|
|
"valid_targets_mean": 4203.2,
|
|
"valid_targets_min": 1432
|
|
},
|
|
{
|
|
"epoch": 0.6220095693779905,
|
|
"grad_norm": 0.5642511125075297,
|
|
"learning_rate": 3.5444191343963557e-05,
|
|
"loss": 0.2384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2551465630531311,
|
|
"step": 390,
|
|
"valid_targets_mean": 4718.6,
|
|
"valid_targets_min": 813
|
|
},
|
|
{
|
|
"epoch": 0.6299840510366826,
|
|
"grad_norm": 0.5566898900499291,
|
|
"learning_rate": 3.58997722095672e-05,
|
|
"loss": 0.258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2283218502998352,
|
|
"step": 395,
|
|
"valid_targets_mean": 4628.2,
|
|
"valid_targets_min": 1198
|
|
},
|
|
{
|
|
"epoch": 0.6379585326953748,
|
|
"grad_norm": 0.5643404325017214,
|
|
"learning_rate": 3.6355353075170846e-05,
|
|
"loss": 0.2304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21494194865226746,
|
|
"step": 400,
|
|
"valid_targets_mean": 3746.0,
|
|
"valid_targets_min": 1145
|
|
},
|
|
{
|
|
"epoch": 0.645933014354067,
|
|
"grad_norm": 0.5893865950161041,
|
|
"learning_rate": 3.681093394077449e-05,
|
|
"loss": 0.2458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26212871074676514,
|
|
"step": 405,
|
|
"valid_targets_mean": 3495.2,
|
|
"valid_targets_min": 899
|
|
},
|
|
{
|
|
"epoch": 0.6539074960127592,
|
|
"grad_norm": 0.5716329413997535,
|
|
"learning_rate": 3.7266514806378135e-05,
|
|
"loss": 0.2285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25604891777038574,
|
|
"step": 410,
|
|
"valid_targets_mean": 4562.3,
|
|
"valid_targets_min": 1747
|
|
},
|
|
{
|
|
"epoch": 0.6618819776714514,
|
|
"grad_norm": 1.0481433223447794,
|
|
"learning_rate": 3.772209567198178e-05,
|
|
"loss": 0.243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24092164635658264,
|
|
"step": 415,
|
|
"valid_targets_mean": 5237.6,
|
|
"valid_targets_min": 1193
|
|
},
|
|
{
|
|
"epoch": 0.6698564593301436,
|
|
"grad_norm": 0.5225869724976354,
|
|
"learning_rate": 3.8177676537585425e-05,
|
|
"loss": 0.2316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21885554492473602,
|
|
"step": 420,
|
|
"valid_targets_mean": 4645.8,
|
|
"valid_targets_min": 1784
|
|
},
|
|
{
|
|
"epoch": 0.6778309409888357,
|
|
"grad_norm": 0.5422126345688248,
|
|
"learning_rate": 3.863325740318907e-05,
|
|
"loss": 0.2474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22362150251865387,
|
|
"step": 425,
|
|
"valid_targets_mean": 3966.8,
|
|
"valid_targets_min": 1219
|
|
},
|
|
{
|
|
"epoch": 0.6858054226475279,
|
|
"grad_norm": 0.6217299725132367,
|
|
"learning_rate": 3.9088838268792714e-05,
|
|
"loss": 0.2535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27033206820487976,
|
|
"step": 430,
|
|
"valid_targets_mean": 4077.7,
|
|
"valid_targets_min": 1364
|
|
},
|
|
{
|
|
"epoch": 0.69377990430622,
|
|
"grad_norm": 0.5522536139744835,
|
|
"learning_rate": 3.954441913439636e-05,
|
|
"loss": 0.2214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22072309255599976,
|
|
"step": 435,
|
|
"valid_targets_mean": 3992.1,
|
|
"valid_targets_min": 2197
|
|
},
|
|
{
|
|
"epoch": 0.7017543859649122,
|
|
"grad_norm": 0.5896141650099441,
|
|
"learning_rate": 4e-05,
|
|
"loss": 0.2301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22706490755081177,
|
|
"step": 440,
|
|
"valid_targets_mean": 3845.8,
|
|
"valid_targets_min": 1295
|
|
},
|
|
{
|
|
"epoch": 0.7097288676236044,
|
|
"grad_norm": 0.49918684425793947,
|
|
"learning_rate": 3.9999841858814384e-05,
|
|
"loss": 0.2208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2301272749900818,
|
|
"step": 445,
|
|
"valid_targets_mean": 4235.4,
|
|
"valid_targets_min": 948
|
|
},
|
|
{
|
|
"epoch": 0.7177033492822966,
|
|
"grad_norm": 0.4745920857023304,
|
|
"learning_rate": 3.999936743775839e-05,
|
|
"loss": 0.2279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23754261434078217,
|
|
"step": 450,
|
|
"valid_targets_mean": 5214.8,
|
|
"valid_targets_min": 875
|
|
},
|
|
{
|
|
"epoch": 0.7256778309409888,
|
|
"grad_norm": 0.5726266794948712,
|
|
"learning_rate": 3.9998576744334574e-05,
|
|
"loss": 0.2286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.258882611989975,
|
|
"step": 455,
|
|
"valid_targets_mean": 4054.8,
|
|
"valid_targets_min": 1897
|
|
},
|
|
{
|
|
"epoch": 0.733652312599681,
|
|
"grad_norm": 0.5172599271671103,
|
|
"learning_rate": 3.999746979104705e-05,
|
|
"loss": 0.2261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23474350571632385,
|
|
"step": 460,
|
|
"valid_targets_mean": 4465.3,
|
|
"valid_targets_min": 1537
|
|
},
|
|
{
|
|
"epoch": 0.7416267942583732,
|
|
"grad_norm": 0.5700018129206589,
|
|
"learning_rate": 3.999604659540131e-05,
|
|
"loss": 0.2321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2658040225505829,
|
|
"step": 465,
|
|
"valid_targets_mean": 4404.1,
|
|
"valid_targets_min": 923
|
|
},
|
|
{
|
|
"epoch": 0.7496012759170654,
|
|
"grad_norm": 0.544608562254914,
|
|
"learning_rate": 3.999430717990395e-05,
|
|
"loss": 0.2306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21445083618164062,
|
|
"step": 470,
|
|
"valid_targets_mean": 4848.9,
|
|
"valid_targets_min": 2201
|
|
},
|
|
{
|
|
"epoch": 0.7575757575757576,
|
|
"grad_norm": 0.5440408470750774,
|
|
"learning_rate": 3.999225157206228e-05,
|
|
"loss": 0.2308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2246881127357483,
|
|
"step": 475,
|
|
"valid_targets_mean": 4445.8,
|
|
"valid_targets_min": 1451
|
|
},
|
|
{
|
|
"epoch": 0.7655502392344498,
|
|
"grad_norm": 0.4910154678701416,
|
|
"learning_rate": 3.998987980438393e-05,
|
|
"loss": 0.2167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21627771854400635,
|
|
"step": 480,
|
|
"valid_targets_mean": 4779.7,
|
|
"valid_targets_min": 1779
|
|
},
|
|
{
|
|
"epoch": 0.773524720893142,
|
|
"grad_norm": 0.49721737953299344,
|
|
"learning_rate": 3.9987191914376306e-05,
|
|
"loss": 0.2262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24980363249778748,
|
|
"step": 485,
|
|
"valid_targets_mean": 4823.7,
|
|
"valid_targets_min": 1753
|
|
},
|
|
{
|
|
"epoch": 0.7814992025518341,
|
|
"grad_norm": 0.5326435414223772,
|
|
"learning_rate": 3.998418794454604e-05,
|
|
"loss": 0.2403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.233478382229805,
|
|
"step": 490,
|
|
"valid_targets_mean": 5116.4,
|
|
"valid_targets_min": 1495
|
|
},
|
|
{
|
|
"epoch": 0.7894736842105263,
|
|
"grad_norm": 0.47168445536055376,
|
|
"learning_rate": 3.998086794239825e-05,
|
|
"loss": 0.2286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21351468563079834,
|
|
"step": 495,
|
|
"valid_targets_mean": 4241.2,
|
|
"valid_targets_min": 1487
|
|
},
|
|
{
|
|
"epoch": 0.7974481658692185,
|
|
"grad_norm": 0.5378456564747194,
|
|
"learning_rate": 3.997723196043585e-05,
|
|
"loss": 0.2337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24235227704048157,
|
|
"step": 500,
|
|
"valid_targets_mean": 4468.3,
|
|
"valid_targets_min": 1692
|
|
},
|
|
{
|
|
"epoch": 0.8054226475279107,
|
|
"grad_norm": 0.49332737950849986,
|
|
"learning_rate": 3.9973280056158695e-05,
|
|
"loss": 0.2161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21013407409191132,
|
|
"step": 505,
|
|
"valid_targets_mean": 4848.8,
|
|
"valid_targets_min": 1803
|
|
},
|
|
{
|
|
"epoch": 0.8133971291866029,
|
|
"grad_norm": 0.5084293615808084,
|
|
"learning_rate": 3.9969012292062655e-05,
|
|
"loss": 0.2275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22314345836639404,
|
|
"step": 510,
|
|
"valid_targets_mean": 4942.4,
|
|
"valid_targets_min": 904
|
|
},
|
|
{
|
|
"epoch": 0.8213716108452951,
|
|
"grad_norm": 0.5214468474293336,
|
|
"learning_rate": 3.996442873563866e-05,
|
|
"loss": 0.2298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2509833574295044,
|
|
"step": 515,
|
|
"valid_targets_mean": 4710.6,
|
|
"valid_targets_min": 1053
|
|
},
|
|
{
|
|
"epoch": 0.8293460925039873,
|
|
"grad_norm": 0.5606998837576238,
|
|
"learning_rate": 3.9959529459371624e-05,
|
|
"loss": 0.2225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22545850276947021,
|
|
"step": 520,
|
|
"valid_targets_mean": 3715.6,
|
|
"valid_targets_min": 816
|
|
},
|
|
{
|
|
"epoch": 0.8373205741626795,
|
|
"grad_norm": 0.5280478169346253,
|
|
"learning_rate": 3.9954314540739284e-05,
|
|
"loss": 0.2166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2452714741230011,
|
|
"step": 525,
|
|
"valid_targets_mean": 4315.8,
|
|
"valid_targets_min": 854
|
|
},
|
|
{
|
|
"epoch": 0.8452950558213717,
|
|
"grad_norm": 0.5422600888270647,
|
|
"learning_rate": 3.994878406221097e-05,
|
|
"loss": 0.2349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24596625566482544,
|
|
"step": 530,
|
|
"valid_targets_mean": 3957.6,
|
|
"valid_targets_min": 1589
|
|
},
|
|
{
|
|
"epoch": 0.8532695374800638,
|
|
"grad_norm": 0.507019066579747,
|
|
"learning_rate": 3.994293811124632e-05,
|
|
"loss": 0.208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22058746218681335,
|
|
"step": 535,
|
|
"valid_targets_mean": 4338.3,
|
|
"valid_targets_min": 1541
|
|
},
|
|
{
|
|
"epoch": 0.861244019138756,
|
|
"grad_norm": 0.5381743057138643,
|
|
"learning_rate": 3.993677678029392e-05,
|
|
"loss": 0.229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19621753692626953,
|
|
"step": 540,
|
|
"valid_targets_mean": 3516.2,
|
|
"valid_targets_min": 1384
|
|
},
|
|
{
|
|
"epoch": 0.8692185007974481,
|
|
"grad_norm": 0.45991761568101813,
|
|
"learning_rate": 3.9930300166789765e-05,
|
|
"loss": 0.2261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21199464797973633,
|
|
"step": 545,
|
|
"valid_targets_mean": 4659.9,
|
|
"valid_targets_min": 1701
|
|
},
|
|
{
|
|
"epoch": 0.8771929824561403,
|
|
"grad_norm": 0.5152683544618659,
|
|
"learning_rate": 3.992350837315581e-05,
|
|
"loss": 0.2143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2165919989347458,
|
|
"step": 550,
|
|
"valid_targets_mean": 4235.0,
|
|
"valid_targets_min": 1115
|
|
},
|
|
{
|
|
"epoch": 0.8851674641148325,
|
|
"grad_norm": 0.495899757310668,
|
|
"learning_rate": 3.991640150679826e-05,
|
|
"loss": 0.233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22138243913650513,
|
|
"step": 555,
|
|
"valid_targets_mean": 4780.8,
|
|
"valid_targets_min": 2033
|
|
},
|
|
{
|
|
"epoch": 0.8931419457735247,
|
|
"grad_norm": 1.0618139834975908,
|
|
"learning_rate": 3.990897968010596e-05,
|
|
"loss": 0.2204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2128307819366455,
|
|
"step": 560,
|
|
"valid_targets_mean": 4273.6,
|
|
"valid_targets_min": 911
|
|
},
|
|
{
|
|
"epoch": 0.9011164274322169,
|
|
"grad_norm": 0.7733846462707373,
|
|
"learning_rate": 3.990124301044855e-05,
|
|
"loss": 0.2235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21958939731121063,
|
|
"step": 565,
|
|
"valid_targets_mean": 4501.3,
|
|
"valid_targets_min": 576
|
|
},
|
|
{
|
|
"epoch": 0.9090909090909091,
|
|
"grad_norm": 0.6459581832184564,
|
|
"learning_rate": 3.989319162017465e-05,
|
|
"loss": 0.237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24889922142028809,
|
|
"step": 570,
|
|
"valid_targets_mean": 3888.4,
|
|
"valid_targets_min": 762
|
|
},
|
|
{
|
|
"epoch": 0.9170653907496013,
|
|
"grad_norm": 0.5233481857337777,
|
|
"learning_rate": 3.988482563660989e-05,
|
|
"loss": 0.241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22873693704605103,
|
|
"step": 575,
|
|
"valid_targets_mean": 4581.4,
|
|
"valid_targets_min": 1526
|
|
},
|
|
{
|
|
"epoch": 0.9250398724082934,
|
|
"grad_norm": 0.4661014756511067,
|
|
"learning_rate": 3.987614519205493e-05,
|
|
"loss": 0.2003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17319852113723755,
|
|
"step": 580,
|
|
"valid_targets_mean": 4144.8,
|
|
"valid_targets_min": 1218
|
|
},
|
|
{
|
|
"epoch": 0.9330143540669856,
|
|
"grad_norm": 0.5123973312794503,
|
|
"learning_rate": 3.986715042378334e-05,
|
|
"loss": 0.2323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23227417469024658,
|
|
"step": 585,
|
|
"valid_targets_mean": 4537.4,
|
|
"valid_targets_min": 1371
|
|
},
|
|
{
|
|
"epoch": 0.9409888357256778,
|
|
"grad_norm": 0.5277379596279795,
|
|
"learning_rate": 3.985784147403947e-05,
|
|
"loss": 0.2278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23037245869636536,
|
|
"step": 590,
|
|
"valid_targets_mean": 4016.3,
|
|
"valid_targets_min": 1464
|
|
},
|
|
{
|
|
"epoch": 0.94896331738437,
|
|
"grad_norm": 0.5022165605708244,
|
|
"learning_rate": 3.9848218490036144e-05,
|
|
"loss": 0.23,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20099911093711853,
|
|
"step": 595,
|
|
"valid_targets_mean": 4853.9,
|
|
"valid_targets_min": 1641
|
|
},
|
|
{
|
|
"epoch": 0.9569377990430622,
|
|
"grad_norm": 0.5558812503704039,
|
|
"learning_rate": 3.983828162395238e-05,
|
|
"loss": 0.2142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.210407093167305,
|
|
"step": 600,
|
|
"valid_targets_mean": 3924.6,
|
|
"valid_targets_min": 704
|
|
},
|
|
{
|
|
"epoch": 0.9649122807017544,
|
|
"grad_norm": 0.47064454606354345,
|
|
"learning_rate": 3.9828031032930944e-05,
|
|
"loss": 0.2239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22056901454925537,
|
|
"step": 605,
|
|
"valid_targets_mean": 4490.2,
|
|
"valid_targets_min": 1905
|
|
},
|
|
{
|
|
"epoch": 0.9728867623604466,
|
|
"grad_norm": 0.697270583820041,
|
|
"learning_rate": 3.98174668790759e-05,
|
|
"loss": 0.2159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25577813386917114,
|
|
"step": 610,
|
|
"valid_targets_mean": 3943.4,
|
|
"valid_targets_min": 498
|
|
},
|
|
{
|
|
"epoch": 0.9808612440191388,
|
|
"grad_norm": 0.4718246379918207,
|
|
"learning_rate": 3.9806589329450045e-05,
|
|
"loss": 0.2185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.222915917634964,
|
|
"step": 615,
|
|
"valid_targets_mean": 4911.8,
|
|
"valid_targets_min": 1502
|
|
},
|
|
{
|
|
"epoch": 0.988835725677831,
|
|
"grad_norm": 0.5884919525751748,
|
|
"learning_rate": 3.979539855607222e-05,
|
|
"loss": 0.2294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23299531638622284,
|
|
"step": 620,
|
|
"valid_targets_mean": 3551.5,
|
|
"valid_targets_min": 819
|
|
},
|
|
{
|
|
"epoch": 0.9968102073365231,
|
|
"grad_norm": 0.4784319785885809,
|
|
"learning_rate": 3.9783894735914646e-05,
|
|
"loss": 0.2056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19052976369857788,
|
|
"step": 625,
|
|
"valid_targets_mean": 4063.5,
|
|
"valid_targets_min": 1516
|
|
},
|
|
{
|
|
"epoch": 1.0047846889952152,
|
|
"grad_norm": 0.46460339195494116,
|
|
"learning_rate": 3.9772078050900105e-05,
|
|
"loss": 0.2199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21833254396915436,
|
|
"step": 630,
|
|
"valid_targets_mean": 4727.1,
|
|
"valid_targets_min": 2191
|
|
},
|
|
{
|
|
"epoch": 1.0127591706539074,
|
|
"grad_norm": 0.48698038036824903,
|
|
"learning_rate": 3.9759948687899055e-05,
|
|
"loss": 0.2277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22654496133327484,
|
|
"step": 635,
|
|
"valid_targets_mean": 4618.6,
|
|
"valid_targets_min": 1298
|
|
},
|
|
{
|
|
"epoch": 1.0207336523125996,
|
|
"grad_norm": 0.4945012501967262,
|
|
"learning_rate": 3.974750683872667e-05,
|
|
"loss": 0.2017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19344402849674225,
|
|
"step": 640,
|
|
"valid_targets_mean": 4245.7,
|
|
"valid_targets_min": 1243
|
|
},
|
|
{
|
|
"epoch": 1.0287081339712918,
|
|
"grad_norm": 0.5315995383625693,
|
|
"learning_rate": 3.973475270013984e-05,
|
|
"loss": 0.2122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25702545046806335,
|
|
"step": 645,
|
|
"valid_targets_mean": 4548.3,
|
|
"valid_targets_min": 608
|
|
},
|
|
{
|
|
"epoch": 1.036682615629984,
|
|
"grad_norm": 0.4612983874562097,
|
|
"learning_rate": 3.972168647383402e-05,
|
|
"loss": 0.2003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16062483191490173,
|
|
"step": 650,
|
|
"valid_targets_mean": 4234.8,
|
|
"valid_targets_min": 462
|
|
},
|
|
{
|
|
"epoch": 1.0446570972886762,
|
|
"grad_norm": 0.56865349444611,
|
|
"learning_rate": 3.970830836644006e-05,
|
|
"loss": 0.2243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21786797046661377,
|
|
"step": 655,
|
|
"valid_targets_mean": 3699.0,
|
|
"valid_targets_min": 759
|
|
},
|
|
{
|
|
"epoch": 1.0526315789473684,
|
|
"grad_norm": 0.5282858125221974,
|
|
"learning_rate": 3.9694618589520945e-05,
|
|
"loss": 0.2198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22115270793437958,
|
|
"step": 660,
|
|
"valid_targets_mean": 4516.8,
|
|
"valid_targets_min": 1266
|
|
},
|
|
{
|
|
"epoch": 1.0606060606060606,
|
|
"grad_norm": 0.4743679626617336,
|
|
"learning_rate": 3.9680617359568414e-05,
|
|
"loss": 0.2127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2579694092273712,
|
|
"step": 665,
|
|
"valid_targets_mean": 4811.8,
|
|
"valid_targets_min": 499
|
|
},
|
|
{
|
|
"epoch": 1.0685805422647527,
|
|
"grad_norm": 0.49340496679156404,
|
|
"learning_rate": 3.966630489799959e-05,
|
|
"loss": 0.2026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2095668911933899,
|
|
"step": 670,
|
|
"valid_targets_mean": 4462.6,
|
|
"valid_targets_min": 768
|
|
},
|
|
{
|
|
"epoch": 1.076555023923445,
|
|
"grad_norm": 0.5164882487713939,
|
|
"learning_rate": 3.9651681431153445e-05,
|
|
"loss": 0.2055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17490506172180176,
|
|
"step": 675,
|
|
"valid_targets_mean": 4067.7,
|
|
"valid_targets_min": 1327
|
|
},
|
|
{
|
|
"epoch": 1.0845295055821371,
|
|
"grad_norm": 0.5713815411108527,
|
|
"learning_rate": 3.96367471902872e-05,
|
|
"loss": 0.2158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2347341775894165,
|
|
"step": 680,
|
|
"valid_targets_mean": 4134.6,
|
|
"valid_targets_min": 1384
|
|
},
|
|
{
|
|
"epoch": 1.0925039872408293,
|
|
"grad_norm": 0.47456818238837095,
|
|
"learning_rate": 3.9621502411572705e-05,
|
|
"loss": 0.2129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20963379740715027,
|
|
"step": 685,
|
|
"valid_targets_mean": 5286.9,
|
|
"valid_targets_min": 3049
|
|
},
|
|
{
|
|
"epoch": 1.1004784688995215,
|
|
"grad_norm": 0.4863779478599738,
|
|
"learning_rate": 3.960594733609273e-05,
|
|
"loss": 0.2001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19514256715774536,
|
|
"step": 690,
|
|
"valid_targets_mean": 4440.2,
|
|
"valid_targets_min": 1001
|
|
},
|
|
{
|
|
"epoch": 1.1084529505582137,
|
|
"grad_norm": 0.4932905628271792,
|
|
"learning_rate": 3.9590082209837054e-05,
|
|
"loss": 0.1984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18851104378700256,
|
|
"step": 695,
|
|
"valid_targets_mean": 4061.9,
|
|
"valid_targets_min": 2048
|
|
},
|
|
{
|
|
"epoch": 1.1164274322169059,
|
|
"grad_norm": 0.45365103869992013,
|
|
"learning_rate": 3.957390728369867e-05,
|
|
"loss": 0.2104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24006181955337524,
|
|
"step": 700,
|
|
"valid_targets_mean": 5731.4,
|
|
"valid_targets_min": 2619
|
|
},
|
|
{
|
|
"epoch": 1.124401913875598,
|
|
"grad_norm": 0.5058182026865845,
|
|
"learning_rate": 3.955742281346979e-05,
|
|
"loss": 0.2108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19824036955833435,
|
|
"step": 705,
|
|
"valid_targets_mean": 4043.1,
|
|
"valid_targets_min": 1884
|
|
},
|
|
{
|
|
"epoch": 1.1323763955342903,
|
|
"grad_norm": 0.5128186001407955,
|
|
"learning_rate": 3.9540629059837767e-05,
|
|
"loss": 0.2228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2106734663248062,
|
|
"step": 710,
|
|
"valid_targets_mean": 4469.8,
|
|
"valid_targets_min": 1433
|
|
},
|
|
{
|
|
"epoch": 1.1403508771929824,
|
|
"grad_norm": 0.5021049092608386,
|
|
"learning_rate": 3.952352628838102e-05,
|
|
"loss": 0.2104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18943166732788086,
|
|
"step": 715,
|
|
"valid_targets_mean": 4009.2,
|
|
"valid_targets_min": 1305
|
|
},
|
|
{
|
|
"epoch": 1.1483253588516746,
|
|
"grad_norm": 0.5340344103102245,
|
|
"learning_rate": 3.95061147695648e-05,
|
|
"loss": 0.2224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21626971662044525,
|
|
"step": 720,
|
|
"valid_targets_mean": 3693.9,
|
|
"valid_targets_min": 766
|
|
},
|
|
{
|
|
"epoch": 1.1562998405103668,
|
|
"grad_norm": 0.4709056311492883,
|
|
"learning_rate": 3.9488394778736935e-05,
|
|
"loss": 0.2007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19259145855903625,
|
|
"step": 725,
|
|
"valid_targets_mean": 4224.8,
|
|
"valid_targets_min": 1419
|
|
},
|
|
{
|
|
"epoch": 1.164274322169059,
|
|
"grad_norm": 0.5656532496751977,
|
|
"learning_rate": 3.947036659612345e-05,
|
|
"loss": 0.2127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2136770784854889,
|
|
"step": 730,
|
|
"valid_targets_mean": 3510.9,
|
|
"valid_targets_min": 637
|
|
},
|
|
{
|
|
"epoch": 1.1722488038277512,
|
|
"grad_norm": 0.516909844827405,
|
|
"learning_rate": 3.945203050682418e-05,
|
|
"loss": 0.2162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19989225268363953,
|
|
"step": 735,
|
|
"valid_targets_mean": 4603.7,
|
|
"valid_targets_min": 674
|
|
},
|
|
{
|
|
"epoch": 1.1802232854864434,
|
|
"grad_norm": 0.4476575905996551,
|
|
"learning_rate": 3.94333868008082e-05,
|
|
"loss": 0.204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1822001039981842,
|
|
"step": 740,
|
|
"valid_targets_mean": 4317.9,
|
|
"valid_targets_min": 980
|
|
},
|
|
{
|
|
"epoch": 1.1881977671451356,
|
|
"grad_norm": 0.5512846666952635,
|
|
"learning_rate": 3.94144357729093e-05,
|
|
"loss": 0.2004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1952081322669983,
|
|
"step": 745,
|
|
"valid_targets_mean": 3049.8,
|
|
"valid_targets_min": 934
|
|
},
|
|
{
|
|
"epoch": 1.1961722488038278,
|
|
"grad_norm": 0.46864139449053893,
|
|
"learning_rate": 3.939517772282127e-05,
|
|
"loss": 0.21,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19514057040214539,
|
|
"step": 750,
|
|
"valid_targets_mean": 4949.1,
|
|
"valid_targets_min": 1515
|
|
},
|
|
{
|
|
"epoch": 1.20414673046252,
|
|
"grad_norm": 0.47586190601006667,
|
|
"learning_rate": 3.93756129550932e-05,
|
|
"loss": 0.2001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19161838293075562,
|
|
"step": 755,
|
|
"valid_targets_mean": 3673.9,
|
|
"valid_targets_min": 518
|
|
},
|
|
{
|
|
"epoch": 1.2121212121212122,
|
|
"grad_norm": 0.469259259330737,
|
|
"learning_rate": 3.935574177912465e-05,
|
|
"loss": 0.2143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22550539672374725,
|
|
"step": 760,
|
|
"valid_targets_mean": 4563.8,
|
|
"valid_targets_min": 1796
|
|
},
|
|
{
|
|
"epoch": 1.2200956937799043,
|
|
"grad_norm": 0.5595560718584884,
|
|
"learning_rate": 3.9335564509160746e-05,
|
|
"loss": 0.2202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21519671380519867,
|
|
"step": 765,
|
|
"valid_targets_mean": 3809.5,
|
|
"valid_targets_min": 1057
|
|
},
|
|
{
|
|
"epoch": 1.2280701754385965,
|
|
"grad_norm": 0.4683457487165706,
|
|
"learning_rate": 3.931508146428724e-05,
|
|
"loss": 0.2173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22939258813858032,
|
|
"step": 770,
|
|
"valid_targets_mean": 4674.4,
|
|
"valid_targets_min": 1670
|
|
},
|
|
{
|
|
"epoch": 1.2360446570972887,
|
|
"grad_norm": 0.4631171919932267,
|
|
"learning_rate": 3.929429296842542e-05,
|
|
"loss": 0.2206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22505074739456177,
|
|
"step": 775,
|
|
"valid_targets_mean": 4932.9,
|
|
"valid_targets_min": 781
|
|
},
|
|
{
|
|
"epoch": 1.244019138755981,
|
|
"grad_norm": 0.48793010325208086,
|
|
"learning_rate": 3.927319935032703e-05,
|
|
"loss": 0.2089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.210595041513443,
|
|
"step": 780,
|
|
"valid_targets_mean": 4669.9,
|
|
"valid_targets_min": 911
|
|
},
|
|
{
|
|
"epoch": 1.251993620414673,
|
|
"grad_norm": 0.5295446445015357,
|
|
"learning_rate": 3.925180094356905e-05,
|
|
"loss": 0.1916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17223702371120453,
|
|
"step": 785,
|
|
"valid_targets_mean": 3653.3,
|
|
"valid_targets_min": 1511
|
|
},
|
|
{
|
|
"epoch": 1.2599681020733653,
|
|
"grad_norm": 0.4929263607980674,
|
|
"learning_rate": 3.9230098086548414e-05,
|
|
"loss": 0.2177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21135661005973816,
|
|
"step": 790,
|
|
"valid_targets_mean": 4423.8,
|
|
"valid_targets_min": 1179
|
|
},
|
|
{
|
|
"epoch": 1.2679425837320575,
|
|
"grad_norm": 0.5532048860088302,
|
|
"learning_rate": 3.920809112247668e-05,
|
|
"loss": 0.2074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1872583031654358,
|
|
"step": 795,
|
|
"valid_targets_mean": 3538.3,
|
|
"valid_targets_min": 954
|
|
},
|
|
{
|
|
"epoch": 1.2759170653907497,
|
|
"grad_norm": 0.4727052430017335,
|
|
"learning_rate": 3.918578039937459e-05,
|
|
"loss": 0.2105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20145076513290405,
|
|
"step": 800,
|
|
"valid_targets_mean": 4678.7,
|
|
"valid_targets_min": 1501
|
|
},
|
|
{
|
|
"epoch": 1.2838915470494419,
|
|
"grad_norm": 0.505747505148101,
|
|
"learning_rate": 3.916316627006656e-05,
|
|
"loss": 0.2103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23686648905277252,
|
|
"step": 805,
|
|
"valid_targets_mean": 4906.5,
|
|
"valid_targets_min": 2248
|
|
},
|
|
{
|
|
"epoch": 1.291866028708134,
|
|
"grad_norm": 0.4783025319909653,
|
|
"learning_rate": 3.914024909217511e-05,
|
|
"loss": 0.2162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20679500699043274,
|
|
"step": 810,
|
|
"valid_targets_mean": 4391.6,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 1.2998405103668262,
|
|
"grad_norm": 0.4637771888874383,
|
|
"learning_rate": 3.911702922811522e-05,
|
|
"loss": 0.2103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.208163321018219,
|
|
"step": 815,
|
|
"valid_targets_mean": 5013.6,
|
|
"valid_targets_min": 1590
|
|
},
|
|
{
|
|
"epoch": 1.3078149920255182,
|
|
"grad_norm": 0.5347262889521851,
|
|
"learning_rate": 3.909350704508856e-05,
|
|
"loss": 0.2021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21651028096675873,
|
|
"step": 820,
|
|
"valid_targets_mean": 3909.6,
|
|
"valid_targets_min": 1142
|
|
},
|
|
{
|
|
"epoch": 1.3157894736842106,
|
|
"grad_norm": 0.5076565141198601,
|
|
"learning_rate": 3.906968291507773e-05,
|
|
"loss": 0.2038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20610755681991577,
|
|
"step": 825,
|
|
"valid_targets_mean": 4341.7,
|
|
"valid_targets_min": 1347
|
|
},
|
|
{
|
|
"epoch": 1.3237639553429026,
|
|
"grad_norm": 0.4341142615004726,
|
|
"learning_rate": 3.904555721484034e-05,
|
|
"loss": 0.2195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21865013241767883,
|
|
"step": 830,
|
|
"valid_targets_mean": 4704.8,
|
|
"valid_targets_min": 2345
|
|
},
|
|
{
|
|
"epoch": 1.331738437001595,
|
|
"grad_norm": 0.46885937407438444,
|
|
"learning_rate": 3.9021130325903076e-05,
|
|
"loss": 0.2108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2055540531873703,
|
|
"step": 835,
|
|
"valid_targets_mean": 4387.2,
|
|
"valid_targets_min": 1420
|
|
},
|
|
{
|
|
"epoch": 1.339712918660287,
|
|
"grad_norm": 0.513948069179042,
|
|
"learning_rate": 3.899640263455566e-05,
|
|
"loss": 0.197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18328502774238586,
|
|
"step": 840,
|
|
"valid_targets_mean": 3465.4,
|
|
"valid_targets_min": 1115
|
|
},
|
|
{
|
|
"epoch": 1.3476874003189794,
|
|
"grad_norm": 0.5375774941468009,
|
|
"learning_rate": 3.897137453184472e-05,
|
|
"loss": 0.2278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21731314063072205,
|
|
"step": 845,
|
|
"valid_targets_mean": 3993.8,
|
|
"valid_targets_min": 1561
|
|
},
|
|
{
|
|
"epoch": 1.3556618819776713,
|
|
"grad_norm": 0.5153424805710766,
|
|
"learning_rate": 3.894604641356767e-05,
|
|
"loss": 0.2071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20963263511657715,
|
|
"step": 850,
|
|
"valid_targets_mean": 3976.2,
|
|
"valid_targets_min": 834
|
|
},
|
|
{
|
|
"epoch": 1.3636363636363638,
|
|
"grad_norm": 0.5224765026212888,
|
|
"learning_rate": 3.8920418680266346e-05,
|
|
"loss": 0.2137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1973356455564499,
|
|
"step": 855,
|
|
"valid_targets_mean": 3445.7,
|
|
"valid_targets_min": 409
|
|
},
|
|
{
|
|
"epoch": 1.3716108452950557,
|
|
"grad_norm": 0.46224966479000895,
|
|
"learning_rate": 3.889449173722077e-05,
|
|
"loss": 0.21,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20595070719718933,
|
|
"step": 860,
|
|
"valid_targets_mean": 3950.9,
|
|
"valid_targets_min": 1583
|
|
},
|
|
{
|
|
"epoch": 1.3795853269537481,
|
|
"grad_norm": 0.5114613011100089,
|
|
"learning_rate": 3.8868265994442694e-05,
|
|
"loss": 0.2004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20371025800704956,
|
|
"step": 865,
|
|
"valid_targets_mean": 4787.2,
|
|
"valid_targets_min": 1626
|
|
},
|
|
{
|
|
"epoch": 1.38755980861244,
|
|
"grad_norm": 0.5095398969168226,
|
|
"learning_rate": 3.8841741866669126e-05,
|
|
"loss": 0.2068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21170800924301147,
|
|
"step": 870,
|
|
"valid_targets_mean": 3554.1,
|
|
"valid_targets_min": 1481
|
|
},
|
|
{
|
|
"epoch": 1.3955342902711323,
|
|
"grad_norm": 0.4576902762391595,
|
|
"learning_rate": 3.881491977335577e-05,
|
|
"loss": 0.1932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20836403965950012,
|
|
"step": 875,
|
|
"valid_targets_mean": 4527.4,
|
|
"valid_targets_min": 1742
|
|
},
|
|
{
|
|
"epoch": 1.4035087719298245,
|
|
"grad_norm": 0.43943299664639085,
|
|
"learning_rate": 3.878780013867038e-05,
|
|
"loss": 0.2052,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1901773363351822,
|
|
"step": 880,
|
|
"valid_targets_mean": 4414.6,
|
|
"valid_targets_min": 1372
|
|
},
|
|
{
|
|
"epoch": 1.4114832535885167,
|
|
"grad_norm": 0.4861108757180198,
|
|
"learning_rate": 3.8760383391486074e-05,
|
|
"loss": 0.201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19861257076263428,
|
|
"step": 885,
|
|
"valid_targets_mean": 4560.9,
|
|
"valid_targets_min": 984
|
|
},
|
|
{
|
|
"epoch": 1.4194577352472089,
|
|
"grad_norm": 0.46432243246741783,
|
|
"learning_rate": 3.873266996537456e-05,
|
|
"loss": 0.1984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20371460914611816,
|
|
"step": 890,
|
|
"valid_targets_mean": 4318.8,
|
|
"valid_targets_min": 1502
|
|
},
|
|
{
|
|
"epoch": 1.427432216905901,
|
|
"grad_norm": 0.5030299978723393,
|
|
"learning_rate": 3.8704660298599225e-05,
|
|
"loss": 0.2086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20227330923080444,
|
|
"step": 895,
|
|
"valid_targets_mean": 4333.7,
|
|
"valid_targets_min": 293
|
|
},
|
|
{
|
|
"epoch": 1.4354066985645932,
|
|
"grad_norm": 1.3056912492125416,
|
|
"learning_rate": 3.867635483410827e-05,
|
|
"loss": 0.2095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18487682938575745,
|
|
"step": 900,
|
|
"valid_targets_mean": 3468.8,
|
|
"valid_targets_min": 1172
|
|
},
|
|
{
|
|
"epoch": 1.4433811802232854,
|
|
"grad_norm": 0.5486433235699878,
|
|
"learning_rate": 3.864775401952767e-05,
|
|
"loss": 0.2091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20579613745212555,
|
|
"step": 905,
|
|
"valid_targets_mean": 3732.4,
|
|
"valid_targets_min": 1130
|
|
},
|
|
{
|
|
"epoch": 1.4513556618819776,
|
|
"grad_norm": 0.5039064482609992,
|
|
"learning_rate": 3.8618858307154085e-05,
|
|
"loss": 0.1953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20290926098823547,
|
|
"step": 910,
|
|
"valid_targets_mean": 4168.0,
|
|
"valid_targets_min": 1091
|
|
},
|
|
{
|
|
"epoch": 1.4593301435406698,
|
|
"grad_norm": 0.4546777478170348,
|
|
"learning_rate": 3.8589668153947743e-05,
|
|
"loss": 0.219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19483226537704468,
|
|
"step": 915,
|
|
"valid_targets_mean": 4289.8,
|
|
"valid_targets_min": 1843
|
|
},
|
|
{
|
|
"epoch": 1.467304625199362,
|
|
"grad_norm": 0.5496151123592051,
|
|
"learning_rate": 3.8560184021525194e-05,
|
|
"loss": 0.1955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1850767731666565,
|
|
"step": 920,
|
|
"valid_targets_mean": 3681.2,
|
|
"valid_targets_min": 813
|
|
},
|
|
{
|
|
"epoch": 1.4752791068580542,
|
|
"grad_norm": 0.528700704155516,
|
|
"learning_rate": 3.853040637615199e-05,
|
|
"loss": 0.1999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18653184175491333,
|
|
"step": 925,
|
|
"valid_targets_mean": 4080.4,
|
|
"valid_targets_min": 1112
|
|
},
|
|
{
|
|
"epoch": 1.4832535885167464,
|
|
"grad_norm": 0.457541888334622,
|
|
"learning_rate": 3.850033568873536e-05,
|
|
"loss": 0.2009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2181546837091446,
|
|
"step": 930,
|
|
"valid_targets_mean": 5131.0,
|
|
"valid_targets_min": 1574
|
|
},
|
|
{
|
|
"epoch": 1.4912280701754386,
|
|
"grad_norm": 0.49670304818772026,
|
|
"learning_rate": 3.8469972434816706e-05,
|
|
"loss": 0.2065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20165763795375824,
|
|
"step": 935,
|
|
"valid_targets_mean": 4633.3,
|
|
"valid_targets_min": 1929
|
|
},
|
|
{
|
|
"epoch": 1.4992025518341308,
|
|
"grad_norm": 0.43778979813005714,
|
|
"learning_rate": 3.843931709456414e-05,
|
|
"loss": 0.2191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19123652577400208,
|
|
"step": 940,
|
|
"valid_targets_mean": 4302.0,
|
|
"valid_targets_min": 1561
|
|
},
|
|
{
|
|
"epoch": 1.507177033492823,
|
|
"grad_norm": 0.47314899120725346,
|
|
"learning_rate": 3.840837015276483e-05,
|
|
"loss": 0.1988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19250443577766418,
|
|
"step": 945,
|
|
"valid_targets_mean": 4456.4,
|
|
"valid_targets_min": 1409
|
|
},
|
|
{
|
|
"epoch": 1.5151515151515151,
|
|
"grad_norm": 0.44711462883142566,
|
|
"learning_rate": 3.83771320988174e-05,
|
|
"loss": 0.1947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19445088505744934,
|
|
"step": 950,
|
|
"valid_targets_mean": 4738.4,
|
|
"valid_targets_min": 616
|
|
},
|
|
{
|
|
"epoch": 1.5231259968102073,
|
|
"grad_norm": 0.4541887939160226,
|
|
"learning_rate": 3.834560342672413e-05,
|
|
"loss": 0.2098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19433936476707458,
|
|
"step": 955,
|
|
"valid_targets_mean": 4475.5,
|
|
"valid_targets_min": 775
|
|
},
|
|
{
|
|
"epoch": 1.5311004784688995,
|
|
"grad_norm": 0.4929026519554441,
|
|
"learning_rate": 3.831378463508318e-05,
|
|
"loss": 0.1908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1928250789642334,
|
|
"step": 960,
|
|
"valid_targets_mean": 3808.7,
|
|
"valid_targets_min": 1489
|
|
},
|
|
{
|
|
"epoch": 1.5390749601275917,
|
|
"grad_norm": 0.4926504106147897,
|
|
"learning_rate": 3.8281676227080694e-05,
|
|
"loss": 0.1988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18234741687774658,
|
|
"step": 965,
|
|
"valid_targets_mean": 3902.6,
|
|
"valid_targets_min": 564
|
|
},
|
|
{
|
|
"epoch": 1.547049441786284,
|
|
"grad_norm": 0.43977022573919494,
|
|
"learning_rate": 3.824927871048284e-05,
|
|
"loss": 0.2067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19778308272361755,
|
|
"step": 970,
|
|
"valid_targets_mean": 4501.8,
|
|
"valid_targets_min": 1142
|
|
},
|
|
{
|
|
"epoch": 1.555023923444976,
|
|
"grad_norm": 0.5612261605565512,
|
|
"learning_rate": 3.8216592597627797e-05,
|
|
"loss": 0.2004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19665104150772095,
|
|
"step": 975,
|
|
"valid_targets_mean": 4264.5,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 1.5629984051036683,
|
|
"grad_norm": 0.5513943157494724,
|
|
"learning_rate": 3.818361840541761e-05,
|
|
"loss": 0.2021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19387869536876678,
|
|
"step": 980,
|
|
"valid_targets_mean": 3928.9,
|
|
"valid_targets_min": 1677
|
|
},
|
|
{
|
|
"epoch": 1.5709728867623605,
|
|
"grad_norm": 0.48128273747500927,
|
|
"learning_rate": 3.815035665531008e-05,
|
|
"loss": 0.1939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17882028222084045,
|
|
"step": 985,
|
|
"valid_targets_mean": 3590.7,
|
|
"valid_targets_min": 506
|
|
},
|
|
{
|
|
"epoch": 1.5789473684210527,
|
|
"grad_norm": 0.4400657294459371,
|
|
"learning_rate": 3.811680787331047e-05,
|
|
"loss": 0.2119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2177354395389557,
|
|
"step": 990,
|
|
"valid_targets_mean": 4974.2,
|
|
"valid_targets_min": 1372
|
|
},
|
|
{
|
|
"epoch": 1.5869218500797448,
|
|
"grad_norm": 0.48902527936052437,
|
|
"learning_rate": 3.8082972589963175e-05,
|
|
"loss": 0.2107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2221558690071106,
|
|
"step": 995,
|
|
"valid_targets_mean": 4577.4,
|
|
"valid_targets_min": 1395
|
|
},
|
|
{
|
|
"epoch": 1.594896331738437,
|
|
"grad_norm": 0.4308268005136,
|
|
"learning_rate": 3.80488513403434e-05,
|
|
"loss": 0.2088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21312400698661804,
|
|
"step": 1000,
|
|
"valid_targets_mean": 5298.4,
|
|
"valid_targets_min": 1504
|
|
},
|
|
{
|
|
"epoch": 1.6028708133971292,
|
|
"grad_norm": 0.45902438037814475,
|
|
"learning_rate": 3.8014444664048616e-05,
|
|
"loss": 0.2077,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19505807757377625,
|
|
"step": 1005,
|
|
"valid_targets_mean": 4389.6,
|
|
"valid_targets_min": 1381
|
|
},
|
|
{
|
|
"epoch": 1.6108452950558214,
|
|
"grad_norm": 0.4778718629656399,
|
|
"learning_rate": 3.797975310519009e-05,
|
|
"loss": 0.1923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19982807338237762,
|
|
"step": 1010,
|
|
"valid_targets_mean": 4396.7,
|
|
"valid_targets_min": 925
|
|
},
|
|
{
|
|
"epoch": 1.6188197767145136,
|
|
"grad_norm": 0.48622707383593916,
|
|
"learning_rate": 3.794477721238425e-05,
|
|
"loss": 0.1955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18839652836322784,
|
|
"step": 1015,
|
|
"valid_targets_mean": 3804.5,
|
|
"valid_targets_min": 594
|
|
},
|
|
{
|
|
"epoch": 1.6267942583732058,
|
|
"grad_norm": 0.5665567635489686,
|
|
"learning_rate": 3.7909517538744e-05,
|
|
"loss": 0.2001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20590761303901672,
|
|
"step": 1020,
|
|
"valid_targets_mean": 3231.2,
|
|
"valid_targets_min": 1399
|
|
},
|
|
{
|
|
"epoch": 1.6347687400318978,
|
|
"grad_norm": 0.5049656261407007,
|
|
"learning_rate": 3.7873974641870006e-05,
|
|
"loss": 0.2013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20510438084602356,
|
|
"step": 1025,
|
|
"valid_targets_mean": 3790.9,
|
|
"valid_targets_min": 1379
|
|
},
|
|
{
|
|
"epoch": 1.6427432216905902,
|
|
"grad_norm": 0.45488255738317696,
|
|
"learning_rate": 3.7838149083841856e-05,
|
|
"loss": 0.1994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20642077922821045,
|
|
"step": 1030,
|
|
"valid_targets_mean": 4890.2,
|
|
"valid_targets_min": 2415
|
|
},
|
|
{
|
|
"epoch": 1.6507177033492821,
|
|
"grad_norm": 0.49029900036193563,
|
|
"learning_rate": 3.7802041431209166e-05,
|
|
"loss": 0.211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1976226270198822,
|
|
"step": 1035,
|
|
"valid_targets_mean": 4140.9,
|
|
"valid_targets_min": 1699
|
|
},
|
|
{
|
|
"epoch": 1.6586921850079746,
|
|
"grad_norm": 0.5011044831830856,
|
|
"learning_rate": 3.776565225498264e-05,
|
|
"loss": 0.2083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19664883613586426,
|
|
"step": 1040,
|
|
"valid_targets_mean": 4005.1,
|
|
"valid_targets_min": 964
|
|
},
|
|
{
|
|
"epoch": 1.6666666666666665,
|
|
"grad_norm": 0.4586604319491814,
|
|
"learning_rate": 3.7728982130625025e-05,
|
|
"loss": 0.2074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19353047013282776,
|
|
"step": 1045,
|
|
"valid_targets_mean": 4026.0,
|
|
"valid_targets_min": 1802
|
|
},
|
|
{
|
|
"epoch": 1.674641148325359,
|
|
"grad_norm": 0.5234617539751385,
|
|
"learning_rate": 3.769203163804202e-05,
|
|
"loss": 0.1962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20830777287483215,
|
|
"step": 1050,
|
|
"valid_targets_mean": 3961.0,
|
|
"valid_targets_min": 968
|
|
},
|
|
{
|
|
"epoch": 1.682615629984051,
|
|
"grad_norm": 0.41519954391714675,
|
|
"learning_rate": 3.7654801361573076e-05,
|
|
"loss": 0.2051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23191536962985992,
|
|
"step": 1055,
|
|
"valid_targets_mean": 5507.1,
|
|
"valid_targets_min": 3188
|
|
},
|
|
{
|
|
"epoch": 1.6905901116427433,
|
|
"grad_norm": 0.5049328781976531,
|
|
"learning_rate": 3.761729188998222e-05,
|
|
"loss": 0.2033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19629567861557007,
|
|
"step": 1060,
|
|
"valid_targets_mean": 3569.4,
|
|
"valid_targets_min": 1012
|
|
},
|
|
{
|
|
"epoch": 1.6985645933014353,
|
|
"grad_norm": 0.41722841535693206,
|
|
"learning_rate": 3.757950381644868e-05,
|
|
"loss": 0.1912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1903328001499176,
|
|
"step": 1065,
|
|
"valid_targets_mean": 4751.0,
|
|
"valid_targets_min": 2434
|
|
},
|
|
{
|
|
"epoch": 1.7065390749601277,
|
|
"grad_norm": 0.4565879686383642,
|
|
"learning_rate": 3.7541437738557524e-05,
|
|
"loss": 0.2039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2171648144721985,
|
|
"step": 1070,
|
|
"valid_targets_mean": 4532.4,
|
|
"valid_targets_min": 1473
|
|
},
|
|
{
|
|
"epoch": 1.7145135566188197,
|
|
"grad_norm": 0.42379264711657094,
|
|
"learning_rate": 3.750309425829022e-05,
|
|
"loss": 0.1977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1874992549419403,
|
|
"step": 1075,
|
|
"valid_targets_mean": 4819.7,
|
|
"valid_targets_min": 552
|
|
},
|
|
{
|
|
"epoch": 1.722488038277512,
|
|
"grad_norm": 0.45653864817870665,
|
|
"learning_rate": 3.746447398201512e-05,
|
|
"loss": 0.1984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18069878220558167,
|
|
"step": 1080,
|
|
"valid_targets_mean": 3834.9,
|
|
"valid_targets_min": 1085
|
|
},
|
|
{
|
|
"epoch": 1.730462519936204,
|
|
"grad_norm": 0.5621267832839261,
|
|
"learning_rate": 3.7425577520477846e-05,
|
|
"loss": 0.2111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22049005329608917,
|
|
"step": 1085,
|
|
"valid_targets_mean": 3484.3,
|
|
"valid_targets_min": 1009
|
|
},
|
|
{
|
|
"epoch": 1.7384370015948964,
|
|
"grad_norm": 0.46732213705245634,
|
|
"learning_rate": 3.738640548879166e-05,
|
|
"loss": 0.2112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21354782581329346,
|
|
"step": 1090,
|
|
"valid_targets_mean": 4553.4,
|
|
"valid_targets_min": 363
|
|
},
|
|
{
|
|
"epoch": 1.7464114832535884,
|
|
"grad_norm": 0.48068360854718983,
|
|
"learning_rate": 3.7346958506427696e-05,
|
|
"loss": 0.2,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21564283967018127,
|
|
"step": 1095,
|
|
"valid_targets_mean": 4019.9,
|
|
"valid_targets_min": 1939
|
|
},
|
|
{
|
|
"epoch": 1.7543859649122808,
|
|
"grad_norm": 0.4514305169605714,
|
|
"learning_rate": 3.730723719720523e-05,
|
|
"loss": 0.2042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20781952142715454,
|
|
"step": 1100,
|
|
"valid_targets_mean": 4212.1,
|
|
"valid_targets_min": 1470
|
|
},
|
|
{
|
|
"epoch": 1.7623604465709728,
|
|
"grad_norm": 0.45030172661506773,
|
|
"learning_rate": 3.7267242189281746e-05,
|
|
"loss": 0.2088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20270563662052155,
|
|
"step": 1105,
|
|
"valid_targets_mean": 4686.4,
|
|
"valid_targets_min": 1109
|
|
},
|
|
{
|
|
"epoch": 1.7703349282296652,
|
|
"grad_norm": 0.4623573073282462,
|
|
"learning_rate": 3.722697411514305e-05,
|
|
"loss": 0.2037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20517674088478088,
|
|
"step": 1110,
|
|
"valid_targets_mean": 4608.8,
|
|
"valid_targets_min": 1193
|
|
},
|
|
{
|
|
"epoch": 1.7783094098883572,
|
|
"grad_norm": 0.47559932564785445,
|
|
"learning_rate": 3.7186433611593225e-05,
|
|
"loss": 0.2012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21667692065238953,
|
|
"step": 1115,
|
|
"valid_targets_mean": 3890.6,
|
|
"valid_targets_min": 1189
|
|
},
|
|
{
|
|
"epoch": 1.7862838915470496,
|
|
"grad_norm": 0.48853867691362973,
|
|
"learning_rate": 3.7145621319744614e-05,
|
|
"loss": 0.1922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19717054069042206,
|
|
"step": 1120,
|
|
"valid_targets_mean": 3605.8,
|
|
"valid_targets_min": 548
|
|
},
|
|
{
|
|
"epoch": 1.7942583732057416,
|
|
"grad_norm": 0.4916053059930409,
|
|
"learning_rate": 3.7104537885007635e-05,
|
|
"loss": 0.1943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1856386661529541,
|
|
"step": 1125,
|
|
"valid_targets_mean": 3682.4,
|
|
"valid_targets_min": 1353
|
|
},
|
|
{
|
|
"epoch": 1.802232854864434,
|
|
"grad_norm": 0.4589326747086072,
|
|
"learning_rate": 3.7063183957080594e-05,
|
|
"loss": 0.2115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19615423679351807,
|
|
"step": 1130,
|
|
"valid_targets_mean": 4138.4,
|
|
"valid_targets_min": 1100
|
|
},
|
|
{
|
|
"epoch": 1.810207336523126,
|
|
"grad_norm": 0.467625309156712,
|
|
"learning_rate": 3.7021560189939416e-05,
|
|
"loss": 0.2061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21701312065124512,
|
|
"step": 1135,
|
|
"valid_targets_mean": 4492.3,
|
|
"valid_targets_min": 802
|
|
},
|
|
{
|
|
"epoch": 1.8181818181818183,
|
|
"grad_norm": 0.4865781141772732,
|
|
"learning_rate": 3.697966724182729e-05,
|
|
"loss": 0.2015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21802526712417603,
|
|
"step": 1140,
|
|
"valid_targets_mean": 4728.7,
|
|
"valid_targets_min": 931
|
|
},
|
|
{
|
|
"epoch": 1.8261562998405103,
|
|
"grad_norm": 0.4328635012299769,
|
|
"learning_rate": 3.6937505775244246e-05,
|
|
"loss": 0.2071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1958141177892685,
|
|
"step": 1145,
|
|
"valid_targets_mean": 3928.8,
|
|
"valid_targets_min": 2033
|
|
},
|
|
{
|
|
"epoch": 1.8341307814992025,
|
|
"grad_norm": 0.3870188191194037,
|
|
"learning_rate": 3.689507645693674e-05,
|
|
"loss": 0.196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1766659915447235,
|
|
"step": 1150,
|
|
"valid_targets_mean": 4837.0,
|
|
"valid_targets_min": 304
|
|
},
|
|
{
|
|
"epoch": 1.8421052631578947,
|
|
"grad_norm": 0.45551612548129705,
|
|
"learning_rate": 3.6852379957887025e-05,
|
|
"loss": 0.1954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17505857348442078,
|
|
"step": 1155,
|
|
"valid_targets_mean": 4672.6,
|
|
"valid_targets_min": 1848
|
|
},
|
|
{
|
|
"epoch": 1.8500797448165869,
|
|
"grad_norm": 0.4210283880015843,
|
|
"learning_rate": 3.6809416953302606e-05,
|
|
"loss": 0.2062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19575092196464539,
|
|
"step": 1160,
|
|
"valid_targets_mean": 4542.7,
|
|
"valid_targets_min": 1017
|
|
},
|
|
{
|
|
"epoch": 1.858054226475279,
|
|
"grad_norm": 0.44631591817704225,
|
|
"learning_rate": 3.676618812260553e-05,
|
|
"loss": 0.1835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1655678153038025,
|
|
"step": 1165,
|
|
"valid_targets_mean": 4372.2,
|
|
"valid_targets_min": 1656
|
|
},
|
|
{
|
|
"epoch": 1.8660287081339713,
|
|
"grad_norm": 0.5663380180072773,
|
|
"learning_rate": 3.672269414942166e-05,
|
|
"loss": 0.208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21864964067935944,
|
|
"step": 1170,
|
|
"valid_targets_mean": 4041.1,
|
|
"valid_targets_min": 1091
|
|
},
|
|
{
|
|
"epoch": 1.8740031897926634,
|
|
"grad_norm": 0.4252347480347789,
|
|
"learning_rate": 3.6678935721569825e-05,
|
|
"loss": 0.1909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17158284783363342,
|
|
"step": 1175,
|
|
"valid_targets_mean": 4347.5,
|
|
"valid_targets_min": 305
|
|
},
|
|
{
|
|
"epoch": 1.8819776714513556,
|
|
"grad_norm": 0.4737692445600209,
|
|
"learning_rate": 3.663491353105101e-05,
|
|
"loss": 0.2138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18983688950538635,
|
|
"step": 1180,
|
|
"valid_targets_mean": 4607.4,
|
|
"valid_targets_min": 1806
|
|
},
|
|
{
|
|
"epoch": 1.8899521531100478,
|
|
"grad_norm": 0.40289904208748567,
|
|
"learning_rate": 3.659062827403735e-05,
|
|
"loss": 0.2139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19518983364105225,
|
|
"step": 1185,
|
|
"valid_targets_mean": 5030.7,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 1.89792663476874,
|
|
"grad_norm": 0.6292360603308512,
|
|
"learning_rate": 3.654608065086115e-05,
|
|
"loss": 0.1845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17038318514823914,
|
|
"step": 1190,
|
|
"valid_targets_mean": 3880.5,
|
|
"valid_targets_min": 1412
|
|
},
|
|
{
|
|
"epoch": 1.9059011164274322,
|
|
"grad_norm": 0.48651034116532005,
|
|
"learning_rate": 3.650127136600379e-05,
|
|
"loss": 0.192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19975776970386505,
|
|
"step": 1195,
|
|
"valid_targets_mean": 4049.2,
|
|
"valid_targets_min": 1656
|
|
},
|
|
{
|
|
"epoch": 1.9138755980861244,
|
|
"grad_norm": 0.5289798631775916,
|
|
"learning_rate": 3.645620112808464e-05,
|
|
"loss": 0.2127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20447996258735657,
|
|
"step": 1200,
|
|
"valid_targets_mean": 3823.9,
|
|
"valid_targets_min": 1073
|
|
},
|
|
{
|
|
"epoch": 1.9218500797448166,
|
|
"grad_norm": 0.8251546888434402,
|
|
"learning_rate": 3.641087064984977e-05,
|
|
"loss": 0.1892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16011501848697662,
|
|
"step": 1205,
|
|
"valid_targets_mean": 3738.0,
|
|
"valid_targets_min": 1121
|
|
},
|
|
{
|
|
"epoch": 1.9298245614035088,
|
|
"grad_norm": 0.4580536101431764,
|
|
"learning_rate": 3.636528064816073e-05,
|
|
"loss": 0.2086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1851533204317093,
|
|
"step": 1210,
|
|
"valid_targets_mean": 3918.2,
|
|
"valid_targets_min": 819
|
|
},
|
|
{
|
|
"epoch": 1.937799043062201,
|
|
"grad_norm": 0.5409518264196145,
|
|
"learning_rate": 3.6319431843983223e-05,
|
|
"loss": 0.1944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2143070101737976,
|
|
"step": 1215,
|
|
"valid_targets_mean": 3607.1,
|
|
"valid_targets_min": 998
|
|
},
|
|
{
|
|
"epoch": 1.9457735247208932,
|
|
"grad_norm": 0.5060571249518142,
|
|
"learning_rate": 3.6273324962375676e-05,
|
|
"loss": 0.1962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20672713220119476,
|
|
"step": 1220,
|
|
"valid_targets_mean": 4794.8,
|
|
"valid_targets_min": 1719
|
|
},
|
|
{
|
|
"epoch": 1.9537480063795853,
|
|
"grad_norm": 0.49513051244273104,
|
|
"learning_rate": 3.622696073247777e-05,
|
|
"loss": 0.1894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1882448047399521,
|
|
"step": 1225,
|
|
"valid_targets_mean": 4190.8,
|
|
"valid_targets_min": 1285
|
|
},
|
|
{
|
|
"epoch": 1.9617224880382775,
|
|
"grad_norm": 0.4944031548603045,
|
|
"learning_rate": 3.6180339887498953e-05,
|
|
"loss": 0.1992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21738609671592712,
|
|
"step": 1230,
|
|
"valid_targets_mean": 4372.5,
|
|
"valid_targets_min": 1074
|
|
},
|
|
{
|
|
"epoch": 1.9696969696969697,
|
|
"grad_norm": 0.3901156493659394,
|
|
"learning_rate": 3.613346316470678e-05,
|
|
"loss": 0.1981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17994585633277893,
|
|
"step": 1235,
|
|
"valid_targets_mean": 5061.2,
|
|
"valid_targets_min": 1944
|
|
},
|
|
{
|
|
"epoch": 1.977671451355662,
|
|
"grad_norm": 0.5074437304760915,
|
|
"learning_rate": 3.60863313054153e-05,
|
|
"loss": 0.1965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17410674691200256,
|
|
"step": 1240,
|
|
"valid_targets_mean": 3685.0,
|
|
"valid_targets_min": 1635
|
|
},
|
|
{
|
|
"epoch": 1.985645933014354,
|
|
"grad_norm": 0.4767214757499225,
|
|
"learning_rate": 3.6038945054973334e-05,
|
|
"loss": 0.2041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2028658390045166,
|
|
"step": 1245,
|
|
"valid_targets_mean": 4104.2,
|
|
"valid_targets_min": 1091
|
|
},
|
|
{
|
|
"epoch": 1.9936204146730463,
|
|
"grad_norm": 0.46368393342255176,
|
|
"learning_rate": 3.599130516275266e-05,
|
|
"loss": 0.2073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20923325419425964,
|
|
"step": 1250,
|
|
"valid_targets_mean": 4206.9,
|
|
"valid_targets_min": 1067
|
|
},
|
|
{
|
|
"epoch": 2.0015948963317385,
|
|
"grad_norm": 0.519506088271778,
|
|
"learning_rate": 3.594341238213618e-05,
|
|
"loss": 0.1977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23402440547943115,
|
|
"step": 1255,
|
|
"valid_targets_mean": 3437.1,
|
|
"valid_targets_min": 1309
|
|
},
|
|
{
|
|
"epoch": 2.0095693779904304,
|
|
"grad_norm": 0.5269865905988084,
|
|
"learning_rate": 3.589526747050601e-05,
|
|
"loss": 0.1779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18529696762561798,
|
|
"step": 1260,
|
|
"valid_targets_mean": 3578.9,
|
|
"valid_targets_min": 1331
|
|
},
|
|
{
|
|
"epoch": 2.017543859649123,
|
|
"grad_norm": 0.49040351067569954,
|
|
"learning_rate": 3.584687118923149e-05,
|
|
"loss": 0.1805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1823689341545105,
|
|
"step": 1265,
|
|
"valid_targets_mean": 3780.1,
|
|
"valid_targets_min": 1583
|
|
},
|
|
{
|
|
"epoch": 2.025518341307815,
|
|
"grad_norm": 0.45611287262240546,
|
|
"learning_rate": 3.579822430365714e-05,
|
|
"loss": 0.1916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19332489371299744,
|
|
"step": 1270,
|
|
"valid_targets_mean": 4809.6,
|
|
"valid_targets_min": 1327
|
|
},
|
|
{
|
|
"epoch": 2.0334928229665072,
|
|
"grad_norm": 0.48964384094372065,
|
|
"learning_rate": 3.57493275830906e-05,
|
|
"loss": 0.1829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18570424616336823,
|
|
"step": 1275,
|
|
"valid_targets_mean": 4225.8,
|
|
"valid_targets_min": 1007
|
|
},
|
|
{
|
|
"epoch": 2.041467304625199,
|
|
"grad_norm": 0.554839880803834,
|
|
"learning_rate": 3.570018180079037e-05,
|
|
"loss": 0.1836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17621077597141266,
|
|
"step": 1280,
|
|
"valid_targets_mean": 5034.4,
|
|
"valid_targets_min": 1155
|
|
},
|
|
{
|
|
"epoch": 2.0494417862838916,
|
|
"grad_norm": 0.46964302061613045,
|
|
"learning_rate": 3.5650787733953715e-05,
|
|
"loss": 0.1934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16897454857826233,
|
|
"step": 1285,
|
|
"valid_targets_mean": 4958.3,
|
|
"valid_targets_min": 2140
|
|
},
|
|
{
|
|
"epoch": 2.0574162679425836,
|
|
"grad_norm": 0.49125516627071425,
|
|
"learning_rate": 3.560114616370425e-05,
|
|
"loss": 0.1847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20321087539196014,
|
|
"step": 1290,
|
|
"valid_targets_mean": 4921.1,
|
|
"valid_targets_min": 2040
|
|
},
|
|
{
|
|
"epoch": 2.065390749601276,
|
|
"grad_norm": 0.5007972310093256,
|
|
"learning_rate": 3.555125787507964e-05,
|
|
"loss": 0.1818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21007579565048218,
|
|
"step": 1295,
|
|
"valid_targets_mean": 3846.7,
|
|
"valid_targets_min": 723
|
|
},
|
|
{
|
|
"epoch": 2.073365231259968,
|
|
"grad_norm": 0.4692791614460228,
|
|
"learning_rate": 3.550112365701921e-05,
|
|
"loss": 0.1768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1718657910823822,
|
|
"step": 1300,
|
|
"valid_targets_mean": 4610.1,
|
|
"valid_targets_min": 1179
|
|
},
|
|
{
|
|
"epoch": 2.0813397129186604,
|
|
"grad_norm": 0.4609945591837108,
|
|
"learning_rate": 3.545074430235142e-05,
|
|
"loss": 0.1698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17682959139347076,
|
|
"step": 1305,
|
|
"valid_targets_mean": 3913.4,
|
|
"valid_targets_min": 1499
|
|
},
|
|
{
|
|
"epoch": 2.0893141945773523,
|
|
"grad_norm": 0.6346413124048447,
|
|
"learning_rate": 3.540012060778137e-05,
|
|
"loss": 0.1847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2052421271800995,
|
|
"step": 1310,
|
|
"valid_targets_mean": 4634.0,
|
|
"valid_targets_min": 1514
|
|
},
|
|
{
|
|
"epoch": 2.0972886762360448,
|
|
"grad_norm": 0.8778374922632006,
|
|
"learning_rate": 3.534925337387816e-05,
|
|
"loss": 0.1752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.181362122297287,
|
|
"step": 1315,
|
|
"valid_targets_mean": 4312.7,
|
|
"valid_targets_min": 1419
|
|
},
|
|
{
|
|
"epoch": 2.1052631578947367,
|
|
"grad_norm": 0.49018678984448766,
|
|
"learning_rate": 3.529814340506226e-05,
|
|
"loss": 0.1861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15189549326896667,
|
|
"step": 1320,
|
|
"valid_targets_mean": 3797.4,
|
|
"valid_targets_min": 1142
|
|
},
|
|
{
|
|
"epoch": 2.113237639553429,
|
|
"grad_norm": 0.5159568152185725,
|
|
"learning_rate": 3.524679150959277e-05,
|
|
"loss": 0.1816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15643197298049927,
|
|
"step": 1325,
|
|
"valid_targets_mean": 2928.8,
|
|
"valid_targets_min": 1175
|
|
},
|
|
{
|
|
"epoch": 2.121212121212121,
|
|
"grad_norm": 0.4919410645352167,
|
|
"learning_rate": 3.519519849955466e-05,
|
|
"loss": 0.1894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1932581067085266,
|
|
"step": 1330,
|
|
"valid_targets_mean": 3966.1,
|
|
"valid_targets_min": 991
|
|
},
|
|
{
|
|
"epoch": 2.1291866028708135,
|
|
"grad_norm": 0.46520546969641535,
|
|
"learning_rate": 3.514336519084591e-05,
|
|
"loss": 0.1847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17339904606342316,
|
|
"step": 1335,
|
|
"valid_targets_mean": 3927.1,
|
|
"valid_targets_min": 1914
|
|
},
|
|
{
|
|
"epoch": 2.1371610845295055,
|
|
"grad_norm": 0.4839751135411932,
|
|
"learning_rate": 3.509129240316461e-05,
|
|
"loss": 0.1928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19153396785259247,
|
|
"step": 1340,
|
|
"valid_targets_mean": 3694.1,
|
|
"valid_targets_min": 1692
|
|
},
|
|
{
|
|
"epoch": 2.145135566188198,
|
|
"grad_norm": 0.46314065057948905,
|
|
"learning_rate": 3.5038980959995985e-05,
|
|
"loss": 0.1774,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16088995337486267,
|
|
"step": 1345,
|
|
"valid_targets_mean": 4204.6,
|
|
"valid_targets_min": 904
|
|
},
|
|
{
|
|
"epoch": 2.15311004784689,
|
|
"grad_norm": 0.4451162065481105,
|
|
"learning_rate": 3.498643168859941e-05,
|
|
"loss": 0.1792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1628057360649109,
|
|
"step": 1350,
|
|
"valid_targets_mean": 3865.2,
|
|
"valid_targets_min": 1257
|
|
},
|
|
{
|
|
"epoch": 2.1610845295055823,
|
|
"grad_norm": 0.407226097580985,
|
|
"learning_rate": 3.493364541999529e-05,
|
|
"loss": 0.1914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17469143867492676,
|
|
"step": 1355,
|
|
"valid_targets_mean": 4585.7,
|
|
"valid_targets_min": 1582
|
|
},
|
|
{
|
|
"epoch": 2.1690590111642742,
|
|
"grad_norm": 0.4940624921768106,
|
|
"learning_rate": 3.488062298895194e-05,
|
|
"loss": 0.1906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2189444750547409,
|
|
"step": 1360,
|
|
"valid_targets_mean": 3964.8,
|
|
"valid_targets_min": 1384
|
|
},
|
|
{
|
|
"epoch": 2.1770334928229667,
|
|
"grad_norm": 0.44042123584132087,
|
|
"learning_rate": 3.482736523397237e-05,
|
|
"loss": 0.1821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17309799790382385,
|
|
"step": 1365,
|
|
"valid_targets_mean": 3845.8,
|
|
"valid_targets_min": 889
|
|
},
|
|
{
|
|
"epoch": 2.1850079744816586,
|
|
"grad_norm": 0.4279061814298267,
|
|
"learning_rate": 3.4773872997281026e-05,
|
|
"loss": 0.1804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17056313157081604,
|
|
"step": 1370,
|
|
"valid_targets_mean": 4312.1,
|
|
"valid_targets_min": 1213
|
|
},
|
|
{
|
|
"epoch": 2.192982456140351,
|
|
"grad_norm": 0.470529122065064,
|
|
"learning_rate": 3.472014712481048e-05,
|
|
"loss": 0.1792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18978451192378998,
|
|
"step": 1375,
|
|
"valid_targets_mean": 4365.9,
|
|
"valid_targets_min": 1280
|
|
},
|
|
{
|
|
"epoch": 2.200956937799043,
|
|
"grad_norm": 0.48678062674774814,
|
|
"learning_rate": 3.466618846618806e-05,
|
|
"loss": 0.1696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17547622323036194,
|
|
"step": 1380,
|
|
"valid_targets_mean": 4055.9,
|
|
"valid_targets_min": 1047
|
|
},
|
|
{
|
|
"epoch": 2.2089314194577354,
|
|
"grad_norm": 0.47946864591799493,
|
|
"learning_rate": 3.461199787472238e-05,
|
|
"loss": 0.1778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18118873238563538,
|
|
"step": 1385,
|
|
"valid_targets_mean": 4465.2,
|
|
"valid_targets_min": 1205
|
|
},
|
|
{
|
|
"epoch": 2.2169059011164274,
|
|
"grad_norm": 0.5331785246246664,
|
|
"learning_rate": 3.455757620738989e-05,
|
|
"loss": 0.1739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16469255089759827,
|
|
"step": 1390,
|
|
"valid_targets_mean": 4402.5,
|
|
"valid_targets_min": 1687
|
|
},
|
|
{
|
|
"epoch": 2.22488038277512,
|
|
"grad_norm": 0.5683494920373958,
|
|
"learning_rate": 3.450292432482127e-05,
|
|
"loss": 0.1911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20690706372261047,
|
|
"step": 1395,
|
|
"valid_targets_mean": 3103.3,
|
|
"valid_targets_min": 1303
|
|
},
|
|
{
|
|
"epoch": 2.2328548644338118,
|
|
"grad_norm": 0.5162004102525409,
|
|
"learning_rate": 3.444804309128789e-05,
|
|
"loss": 0.1894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1757841855287552,
|
|
"step": 1400,
|
|
"valid_targets_mean": 4547.0,
|
|
"valid_targets_min": 886
|
|
},
|
|
{
|
|
"epoch": 2.240829346092504,
|
|
"grad_norm": 0.46834618305506404,
|
|
"learning_rate": 3.439293337468808e-05,
|
|
"loss": 0.17,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16722646355628967,
|
|
"step": 1405,
|
|
"valid_targets_mean": 3848.1,
|
|
"valid_targets_min": 462
|
|
},
|
|
{
|
|
"epoch": 2.248803827751196,
|
|
"grad_norm": 0.4867042847347927,
|
|
"learning_rate": 3.4337596046533426e-05,
|
|
"loss": 0.1801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1993994116783142,
|
|
"step": 1410,
|
|
"valid_targets_mean": 4098.9,
|
|
"valid_targets_min": 1724
|
|
},
|
|
{
|
|
"epoch": 2.2567783094098885,
|
|
"grad_norm": 0.5156576852812531,
|
|
"learning_rate": 3.4282031981935e-05,
|
|
"loss": 0.2023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1997295469045639,
|
|
"step": 1415,
|
|
"valid_targets_mean": 4022.2,
|
|
"valid_targets_min": 767
|
|
},
|
|
{
|
|
"epoch": 2.2647527910685805,
|
|
"grad_norm": 0.4731562906212673,
|
|
"learning_rate": 3.42262420595895e-05,
|
|
"loss": 0.1911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21227261424064636,
|
|
"step": 1420,
|
|
"valid_targets_mean": 4413.1,
|
|
"valid_targets_min": 1521
|
|
},
|
|
{
|
|
"epoch": 2.2727272727272725,
|
|
"grad_norm": 0.42860117305838263,
|
|
"learning_rate": 3.417022716176539e-05,
|
|
"loss": 0.1804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1850120574235916,
|
|
"step": 1425,
|
|
"valid_targets_mean": 4540.2,
|
|
"valid_targets_min": 899
|
|
},
|
|
{
|
|
"epoch": 2.280701754385965,
|
|
"grad_norm": 0.45606962391361333,
|
|
"learning_rate": 3.411398817428889e-05,
|
|
"loss": 0.1952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21006545424461365,
|
|
"step": 1430,
|
|
"valid_targets_mean": 5374.3,
|
|
"valid_targets_min": 2154
|
|
},
|
|
{
|
|
"epoch": 2.2886762360446573,
|
|
"grad_norm": 0.41589658400002255,
|
|
"learning_rate": 3.4057525986530016e-05,
|
|
"loss": 0.1842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21275420486927032,
|
|
"step": 1435,
|
|
"valid_targets_mean": 5168.5,
|
|
"valid_targets_min": 1802
|
|
},
|
|
{
|
|
"epoch": 2.2966507177033493,
|
|
"grad_norm": 0.49426732712273036,
|
|
"learning_rate": 3.400084149138851e-05,
|
|
"loss": 0.1888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19159185886383057,
|
|
"step": 1440,
|
|
"valid_targets_mean": 3927.2,
|
|
"valid_targets_min": 1267
|
|
},
|
|
{
|
|
"epoch": 2.3046251993620412,
|
|
"grad_norm": 0.44126516688368794,
|
|
"learning_rate": 3.394393558527969e-05,
|
|
"loss": 0.1792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18314291536808014,
|
|
"step": 1445,
|
|
"valid_targets_mean": 4429.8,
|
|
"valid_targets_min": 1988
|
|
},
|
|
{
|
|
"epoch": 2.3125996810207337,
|
|
"grad_norm": 0.5697872973779129,
|
|
"learning_rate": 3.388680916812031e-05,
|
|
"loss": 0.1891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.184618279337883,
|
|
"step": 1450,
|
|
"valid_targets_mean": 4403.7,
|
|
"valid_targets_min": 1291
|
|
},
|
|
{
|
|
"epoch": 2.320574162679426,
|
|
"grad_norm": 0.41256388130487637,
|
|
"learning_rate": 3.382946314331429e-05,
|
|
"loss": 0.1803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1506471186876297,
|
|
"step": 1455,
|
|
"valid_targets_mean": 5022.7,
|
|
"valid_targets_min": 1737
|
|
},
|
|
{
|
|
"epoch": 2.328548644338118,
|
|
"grad_norm": 0.4836224476402349,
|
|
"learning_rate": 3.377189841773848e-05,
|
|
"loss": 0.1767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18470489978790283,
|
|
"step": 1460,
|
|
"valid_targets_mean": 4299.0,
|
|
"valid_targets_min": 1203
|
|
},
|
|
{
|
|
"epoch": 2.33652312599681,
|
|
"grad_norm": 0.45936715788216503,
|
|
"learning_rate": 3.371411590172827e-05,
|
|
"loss": 0.1791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19627892971038818,
|
|
"step": 1465,
|
|
"valid_targets_mean": 4471.8,
|
|
"valid_targets_min": 1933
|
|
},
|
|
{
|
|
"epoch": 2.3444976076555024,
|
|
"grad_norm": 0.5172983323598414,
|
|
"learning_rate": 3.365611650906321e-05,
|
|
"loss": 0.1743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16794899106025696,
|
|
"step": 1470,
|
|
"valid_targets_mean": 3185.7,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 2.352472089314195,
|
|
"grad_norm": 0.42021952218197095,
|
|
"learning_rate": 3.359790115695259e-05,
|
|
"loss": 0.1865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1857813596725464,
|
|
"step": 1475,
|
|
"valid_targets_mean": 5048.8,
|
|
"valid_targets_min": 860
|
|
},
|
|
{
|
|
"epoch": 2.360446570972887,
|
|
"grad_norm": 0.5120057713456933,
|
|
"learning_rate": 3.353947076602088e-05,
|
|
"loss": 0.1856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16809436678886414,
|
|
"step": 1480,
|
|
"valid_targets_mean": 3932.3,
|
|
"valid_targets_min": 1059
|
|
},
|
|
{
|
|
"epoch": 2.3684210526315788,
|
|
"grad_norm": 0.4312234325941399,
|
|
"learning_rate": 3.34808262602932e-05,
|
|
"loss": 0.1834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16601577401161194,
|
|
"step": 1485,
|
|
"valid_targets_mean": 4678.6,
|
|
"valid_targets_min": 1723
|
|
},
|
|
{
|
|
"epoch": 2.376395534290271,
|
|
"grad_norm": 0.45816102695690875,
|
|
"learning_rate": 3.342196856718074e-05,
|
|
"loss": 0.1828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18682478368282318,
|
|
"step": 1490,
|
|
"valid_targets_mean": 4355.9,
|
|
"valid_targets_min": 1565
|
|
},
|
|
{
|
|
"epoch": 2.384370015948963,
|
|
"grad_norm": 0.5445318121560287,
|
|
"learning_rate": 3.336289861746602e-05,
|
|
"loss": 0.1709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16605886816978455,
|
|
"step": 1495,
|
|
"valid_targets_mean": 3463.1,
|
|
"valid_targets_min": 584
|
|
},
|
|
{
|
|
"epoch": 2.3923444976076556,
|
|
"grad_norm": 0.41620331471060057,
|
|
"learning_rate": 3.330361734528823e-05,
|
|
"loss": 0.1897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17274364829063416,
|
|
"step": 1500,
|
|
"valid_targets_mean": 5080.4,
|
|
"valid_targets_min": 2001
|
|
},
|
|
{
|
|
"epoch": 2.4003189792663475,
|
|
"grad_norm": 0.4525728695475107,
|
|
"learning_rate": 3.324412568812844e-05,
|
|
"loss": 0.1746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1925831437110901,
|
|
"step": 1505,
|
|
"valid_targets_mean": 4351.5,
|
|
"valid_targets_min": 940
|
|
},
|
|
{
|
|
"epoch": 2.40829346092504,
|
|
"grad_norm": 0.4499220665108352,
|
|
"learning_rate": 3.318442458679477e-05,
|
|
"loss": 0.1784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1566651612520218,
|
|
"step": 1510,
|
|
"valid_targets_mean": 3807.0,
|
|
"valid_targets_min": 1452
|
|
},
|
|
{
|
|
"epoch": 2.416267942583732,
|
|
"grad_norm": 0.4669093793648422,
|
|
"learning_rate": 3.312451498540751e-05,
|
|
"loss": 0.1883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1781565546989441,
|
|
"step": 1515,
|
|
"valid_targets_mean": 4532.7,
|
|
"valid_targets_min": 616
|
|
},
|
|
{
|
|
"epoch": 2.4242424242424243,
|
|
"grad_norm": 0.5198392020262639,
|
|
"learning_rate": 3.306439783138421e-05,
|
|
"loss": 0.1851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18451040983200073,
|
|
"step": 1520,
|
|
"valid_targets_mean": 3543.7,
|
|
"valid_targets_min": 719
|
|
},
|
|
{
|
|
"epoch": 2.4322169059011163,
|
|
"grad_norm": 0.5976391999962071,
|
|
"learning_rate": 3.3004074075424666e-05,
|
|
"loss": 0.1883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17789512872695923,
|
|
"step": 1525,
|
|
"valid_targets_mean": 4801.3,
|
|
"valid_targets_min": 1396
|
|
},
|
|
{
|
|
"epoch": 2.4401913875598087,
|
|
"grad_norm": 0.4612874498343982,
|
|
"learning_rate": 3.29435446714959e-05,
|
|
"loss": 0.1796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19112403690814972,
|
|
"step": 1530,
|
|
"valid_targets_mean": 4440.7,
|
|
"valid_targets_min": 731
|
|
},
|
|
{
|
|
"epoch": 2.4481658692185007,
|
|
"grad_norm": 0.46332566228851163,
|
|
"learning_rate": 3.288281057681709e-05,
|
|
"loss": 0.1838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1989520788192749,
|
|
"step": 1535,
|
|
"valid_targets_mean": 4258.1,
|
|
"valid_targets_min": 1427
|
|
},
|
|
{
|
|
"epoch": 2.456140350877193,
|
|
"grad_norm": 0.45323489367655784,
|
|
"learning_rate": 3.28218727518444e-05,
|
|
"loss": 0.1723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18074513971805573,
|
|
"step": 1540,
|
|
"valid_targets_mean": 4876.2,
|
|
"valid_targets_min": 410
|
|
},
|
|
{
|
|
"epoch": 2.464114832535885,
|
|
"grad_norm": 0.4782853908134842,
|
|
"learning_rate": 3.2760732160255835e-05,
|
|
"loss": 0.1881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20088213682174683,
|
|
"step": 1545,
|
|
"valid_targets_mean": 4526.9,
|
|
"valid_targets_min": 1091
|
|
},
|
|
{
|
|
"epoch": 2.4720893141945774,
|
|
"grad_norm": 0.4440793855907412,
|
|
"learning_rate": 3.269938976893595e-05,
|
|
"loss": 0.1845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17873185873031616,
|
|
"step": 1550,
|
|
"valid_targets_mean": 4363.1,
|
|
"valid_targets_min": 670
|
|
},
|
|
{
|
|
"epoch": 2.4800637958532694,
|
|
"grad_norm": 0.46438965956348993,
|
|
"learning_rate": 3.2637846547960596e-05,
|
|
"loss": 0.185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17281189560890198,
|
|
"step": 1555,
|
|
"valid_targets_mean": 3993.3,
|
|
"valid_targets_min": 977
|
|
},
|
|
{
|
|
"epoch": 2.488038277511962,
|
|
"grad_norm": 0.48168318397194837,
|
|
"learning_rate": 3.2576103470581564e-05,
|
|
"loss": 0.195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18900908529758453,
|
|
"step": 1560,
|
|
"valid_targets_mean": 3762.0,
|
|
"valid_targets_min": 1165
|
|
},
|
|
{
|
|
"epoch": 2.496012759170654,
|
|
"grad_norm": 0.47019410192144145,
|
|
"learning_rate": 3.25141615132112e-05,
|
|
"loss": 0.1752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1827808916568756,
|
|
"step": 1565,
|
|
"valid_targets_mean": 3981.6,
|
|
"valid_targets_min": 1305
|
|
},
|
|
{
|
|
"epoch": 2.503987240829346,
|
|
"grad_norm": 0.49388451517599286,
|
|
"learning_rate": 3.245202165540697e-05,
|
|
"loss": 0.1861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17718592286109924,
|
|
"step": 1570,
|
|
"valid_targets_mean": 3785.9,
|
|
"valid_targets_min": 380
|
|
},
|
|
{
|
|
"epoch": 2.511961722488038,
|
|
"grad_norm": 0.43255096922902325,
|
|
"learning_rate": 3.238968487985594e-05,
|
|
"loss": 0.1837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19639550149440765,
|
|
"step": 1575,
|
|
"valid_targets_mean": 4851.2,
|
|
"valid_targets_min": 708
|
|
},
|
|
{
|
|
"epoch": 2.5199362041467306,
|
|
"grad_norm": 0.4685043591834641,
|
|
"learning_rate": 3.232715217235927e-05,
|
|
"loss": 0.1835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16756711900234222,
|
|
"step": 1580,
|
|
"valid_targets_mean": 4069.0,
|
|
"valid_targets_min": 1756
|
|
},
|
|
{
|
|
"epoch": 2.5279106858054226,
|
|
"grad_norm": 0.5505112356406169,
|
|
"learning_rate": 3.226442452181662e-05,
|
|
"loss": 0.1914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16592299938201904,
|
|
"step": 1585,
|
|
"valid_targets_mean": 3441.1,
|
|
"valid_targets_min": 261
|
|
},
|
|
{
|
|
"epoch": 2.535885167464115,
|
|
"grad_norm": 0.41784371935534703,
|
|
"learning_rate": 3.220150292021049e-05,
|
|
"loss": 0.1843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15617865324020386,
|
|
"step": 1590,
|
|
"valid_targets_mean": 4339.2,
|
|
"valid_targets_min": 1783
|
|
},
|
|
{
|
|
"epoch": 2.543859649122807,
|
|
"grad_norm": 0.4172123616120955,
|
|
"learning_rate": 3.213838836259055e-05,
|
|
"loss": 0.1727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1682884842157364,
|
|
"step": 1595,
|
|
"valid_targets_mean": 5281.4,
|
|
"valid_targets_min": 2092
|
|
},
|
|
{
|
|
"epoch": 2.5518341307814993,
|
|
"grad_norm": 0.4276002779595094,
|
|
"learning_rate": 3.2075081847057886e-05,
|
|
"loss": 0.1758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15267080068588257,
|
|
"step": 1600,
|
|
"valid_targets_mean": 4200.4,
|
|
"valid_targets_min": 1176
|
|
},
|
|
{
|
|
"epoch": 2.5598086124401913,
|
|
"grad_norm": 0.43036761439685606,
|
|
"learning_rate": 3.201158437474925e-05,
|
|
"loss": 0.188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19284360110759735,
|
|
"step": 1605,
|
|
"valid_targets_mean": 4582.4,
|
|
"valid_targets_min": 1281
|
|
},
|
|
{
|
|
"epoch": 2.5677830940988837,
|
|
"grad_norm": 0.46693016249804425,
|
|
"learning_rate": 3.194789694982119e-05,
|
|
"loss": 0.1734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17372292280197144,
|
|
"step": 1610,
|
|
"valid_targets_mean": 4306.8,
|
|
"valid_targets_min": 1373
|
|
},
|
|
{
|
|
"epoch": 2.5757575757575757,
|
|
"grad_norm": 0.5650979854032511,
|
|
"learning_rate": 3.1884020579434216e-05,
|
|
"loss": 0.1829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18677470088005066,
|
|
"step": 1615,
|
|
"valid_targets_mean": 3293.8,
|
|
"valid_targets_min": 1080
|
|
},
|
|
{
|
|
"epoch": 2.583732057416268,
|
|
"grad_norm": 0.45212575221450013,
|
|
"learning_rate": 3.181995627373679e-05,
|
|
"loss": 0.193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18418222665786743,
|
|
"step": 1620,
|
|
"valid_targets_mean": 3959.9,
|
|
"valid_targets_min": 1437
|
|
},
|
|
{
|
|
"epoch": 2.59170653907496,
|
|
"grad_norm": 0.44905388524472045,
|
|
"learning_rate": 3.1755705045849465e-05,
|
|
"loss": 0.177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15992020070552826,
|
|
"step": 1625,
|
|
"valid_targets_mean": 4001.1,
|
|
"valid_targets_min": 1095
|
|
},
|
|
{
|
|
"epoch": 2.5996810207336525,
|
|
"grad_norm": 0.49184151808837695,
|
|
"learning_rate": 3.1691267911848765e-05,
|
|
"loss": 0.1837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20928852260112762,
|
|
"step": 1630,
|
|
"valid_targets_mean": 4297.2,
|
|
"valid_targets_min": 861
|
|
},
|
|
{
|
|
"epoch": 2.6076555023923444,
|
|
"grad_norm": 0.4976310508881261,
|
|
"learning_rate": 3.1626645890751167e-05,
|
|
"loss": 0.1802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17502568662166595,
|
|
"step": 1635,
|
|
"valid_targets_mean": 3787.2,
|
|
"valid_targets_min": 1399
|
|
},
|
|
{
|
|
"epoch": 2.6156299840510364,
|
|
"grad_norm": 0.5497089107214261,
|
|
"learning_rate": 3.156184000449697e-05,
|
|
"loss": 0.179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1869225800037384,
|
|
"step": 1640,
|
|
"valid_targets_mean": 4371.3,
|
|
"valid_targets_min": 1231
|
|
},
|
|
{
|
|
"epoch": 2.623604465709729,
|
|
"grad_norm": 0.5084849193280979,
|
|
"learning_rate": 3.149685127793415e-05,
|
|
"loss": 0.1852,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19224831461906433,
|
|
"step": 1645,
|
|
"valid_targets_mean": 3351.9,
|
|
"valid_targets_min": 770
|
|
},
|
|
{
|
|
"epoch": 2.6315789473684212,
|
|
"grad_norm": 0.47512659399529006,
|
|
"learning_rate": 3.143168073880214e-05,
|
|
"loss": 0.1962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17445094883441925,
|
|
"step": 1650,
|
|
"valid_targets_mean": 3682.1,
|
|
"valid_targets_min": 374
|
|
},
|
|
{
|
|
"epoch": 2.639553429027113,
|
|
"grad_norm": 0.3970470499213257,
|
|
"learning_rate": 3.1366329417715556e-05,
|
|
"loss": 0.1949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1893550604581833,
|
|
"step": 1655,
|
|
"valid_targets_mean": 4963.8,
|
|
"valid_targets_min": 1705
|
|
},
|
|
{
|
|
"epoch": 2.647527910685805,
|
|
"grad_norm": 0.9555744019218789,
|
|
"learning_rate": 3.1300798348147954e-05,
|
|
"loss": 0.1728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1694471836090088,
|
|
"step": 1660,
|
|
"valid_targets_mean": 4084.9,
|
|
"valid_targets_min": 867
|
|
},
|
|
{
|
|
"epoch": 2.6555023923444976,
|
|
"grad_norm": 0.4753811647502407,
|
|
"learning_rate": 3.123508856641542e-05,
|
|
"loss": 0.1775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.173654705286026,
|
|
"step": 1665,
|
|
"valid_targets_mean": 3810.8,
|
|
"valid_targets_min": 1180
|
|
},
|
|
{
|
|
"epoch": 2.66347687400319,
|
|
"grad_norm": 0.47905768137552995,
|
|
"learning_rate": 3.116920111166025e-05,
|
|
"loss": 0.1819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18475550413131714,
|
|
"step": 1670,
|
|
"valid_targets_mean": 3539.8,
|
|
"valid_targets_min": 1184
|
|
},
|
|
{
|
|
"epoch": 2.671451355661882,
|
|
"grad_norm": 0.5353090270082027,
|
|
"learning_rate": 3.1103137025834456e-05,
|
|
"loss": 0.1805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16288885474205017,
|
|
"step": 1675,
|
|
"valid_targets_mean": 3948.3,
|
|
"valid_targets_min": 648
|
|
},
|
|
{
|
|
"epoch": 2.679425837320574,
|
|
"grad_norm": 0.4480707130807755,
|
|
"learning_rate": 3.103689735368333e-05,
|
|
"loss": 0.1793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1761370152235031,
|
|
"step": 1680,
|
|
"valid_targets_mean": 4556.8,
|
|
"valid_targets_min": 1219
|
|
},
|
|
{
|
|
"epoch": 2.6874003189792663,
|
|
"grad_norm": 0.4864805722693758,
|
|
"learning_rate": 3.097048314272889e-05,
|
|
"loss": 0.1746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1910707652568817,
|
|
"step": 1685,
|
|
"valid_targets_mean": 3888.9,
|
|
"valid_targets_min": 1293
|
|
},
|
|
{
|
|
"epoch": 2.6953748006379588,
|
|
"grad_norm": 0.4437445222147613,
|
|
"learning_rate": 3.090389544325335e-05,
|
|
"loss": 0.1692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.173038050532341,
|
|
"step": 1690,
|
|
"valid_targets_mean": 4338.5,
|
|
"valid_targets_min": 1298
|
|
},
|
|
{
|
|
"epoch": 2.7033492822966507,
|
|
"grad_norm": 0.42911286735663395,
|
|
"learning_rate": 3.08371353082825e-05,
|
|
"loss": 0.173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17834161221981049,
|
|
"step": 1695,
|
|
"valid_targets_mean": 4724.9,
|
|
"valid_targets_min": 1491
|
|
},
|
|
{
|
|
"epoch": 2.7113237639553427,
|
|
"grad_norm": 0.4571143283395991,
|
|
"learning_rate": 3.0770203793568994e-05,
|
|
"loss": 0.187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18316294252872467,
|
|
"step": 1700,
|
|
"valid_targets_mean": 4421.2,
|
|
"valid_targets_min": 611
|
|
},
|
|
{
|
|
"epoch": 2.719298245614035,
|
|
"grad_norm": 0.40453772129881677,
|
|
"learning_rate": 3.0703101957575765e-05,
|
|
"loss": 0.1712,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1506449580192566,
|
|
"step": 1705,
|
|
"valid_targets_mean": 4804.1,
|
|
"valid_targets_min": 1788
|
|
},
|
|
{
|
|
"epoch": 2.7272727272727275,
|
|
"grad_norm": 0.42354702376015874,
|
|
"learning_rate": 3.0635830861459204e-05,
|
|
"loss": 0.1969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19284044206142426,
|
|
"step": 1710,
|
|
"valid_targets_mean": 4895.8,
|
|
"valid_targets_min": 1547
|
|
},
|
|
{
|
|
"epoch": 2.7352472089314195,
|
|
"grad_norm": 0.41732341407575596,
|
|
"learning_rate": 3.056839156905239e-05,
|
|
"loss": 0.1702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1627749353647232,
|
|
"step": 1715,
|
|
"valid_targets_mean": 4072.1,
|
|
"valid_targets_min": 1066
|
|
},
|
|
{
|
|
"epoch": 2.7432216905901115,
|
|
"grad_norm": 0.45306515005728004,
|
|
"learning_rate": 3.0500785146848303e-05,
|
|
"loss": 0.1887,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1957072764635086,
|
|
"step": 1720,
|
|
"valid_targets_mean": 5065.7,
|
|
"valid_targets_min": 1788
|
|
},
|
|
{
|
|
"epoch": 2.751196172248804,
|
|
"grad_norm": 0.49158127723486994,
|
|
"learning_rate": 3.04330126639829e-05,
|
|
"loss": 0.2011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21867769956588745,
|
|
"step": 1725,
|
|
"valid_targets_mean": 4066.2,
|
|
"valid_targets_min": 959
|
|
},
|
|
{
|
|
"epoch": 2.7591706539074963,
|
|
"grad_norm": 0.4407077966369007,
|
|
"learning_rate": 3.0365075192218278e-05,
|
|
"loss": 0.1835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1759914755821228,
|
|
"step": 1730,
|
|
"valid_targets_mean": 3877.3,
|
|
"valid_targets_min": 1144
|
|
},
|
|
{
|
|
"epoch": 2.7671451355661882,
|
|
"grad_norm": 0.42265790823347477,
|
|
"learning_rate": 3.0296973805925663e-05,
|
|
"loss": 0.1695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16038598120212555,
|
|
"step": 1735,
|
|
"valid_targets_mean": 3991.6,
|
|
"valid_targets_min": 620
|
|
},
|
|
{
|
|
"epoch": 2.77511961722488,
|
|
"grad_norm": 0.4032884842933645,
|
|
"learning_rate": 3.022870958206845e-05,
|
|
"loss": 0.1768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16309818625450134,
|
|
"step": 1740,
|
|
"valid_targets_mean": 5105.6,
|
|
"valid_targets_min": 1493
|
|
},
|
|
{
|
|
"epoch": 2.7830940988835726,
|
|
"grad_norm": 0.44017044941850964,
|
|
"learning_rate": 3.0160283600185168e-05,
|
|
"loss": 0.1849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17103657126426697,
|
|
"step": 1745,
|
|
"valid_targets_mean": 4181.8,
|
|
"valid_targets_min": 2168
|
|
},
|
|
{
|
|
"epoch": 2.7910685805422646,
|
|
"grad_norm": 0.4908848562785871,
|
|
"learning_rate": 3.0091696942372412e-05,
|
|
"loss": 0.1826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17112907767295837,
|
|
"step": 1750,
|
|
"valid_targets_mean": 5055.2,
|
|
"valid_targets_min": 674
|
|
},
|
|
{
|
|
"epoch": 2.799043062200957,
|
|
"grad_norm": 0.4501854405957578,
|
|
"learning_rate": 3.002295069326772e-05,
|
|
"loss": 0.1971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2242158055305481,
|
|
"step": 1755,
|
|
"valid_targets_mean": 4598.9,
|
|
"valid_targets_min": 1663
|
|
},
|
|
{
|
|
"epoch": 2.807017543859649,
|
|
"grad_norm": 0.45529810406476556,
|
|
"learning_rate": 2.9954045940032423e-05,
|
|
"loss": 0.1922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1715032309293747,
|
|
"step": 1760,
|
|
"valid_targets_mean": 3785.8,
|
|
"valid_targets_min": 1487
|
|
},
|
|
{
|
|
"epoch": 2.8149920255183414,
|
|
"grad_norm": 0.419471310020662,
|
|
"learning_rate": 2.988498377233446e-05,
|
|
"loss": 0.1846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1674804836511612,
|
|
"step": 1765,
|
|
"valid_targets_mean": 4545.6,
|
|
"valid_targets_min": 305
|
|
},
|
|
{
|
|
"epoch": 2.8229665071770333,
|
|
"grad_norm": 0.49471261198607247,
|
|
"learning_rate": 2.981576528233114e-05,
|
|
"loss": 0.1682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16480019688606262,
|
|
"step": 1770,
|
|
"valid_targets_mean": 3463.1,
|
|
"valid_targets_min": 397
|
|
},
|
|
{
|
|
"epoch": 2.8309409888357258,
|
|
"grad_norm": 0.43101507558197494,
|
|
"learning_rate": 2.9746391564651867e-05,
|
|
"loss": 0.1839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1700749397277832,
|
|
"step": 1775,
|
|
"valid_targets_mean": 3947.5,
|
|
"valid_targets_min": 924
|
|
},
|
|
{
|
|
"epoch": 2.8389154704944177,
|
|
"grad_norm": 0.47378950386747526,
|
|
"learning_rate": 2.9676863716380845e-05,
|
|
"loss": 0.1824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19406452775001526,
|
|
"step": 1780,
|
|
"valid_targets_mean": 4588.1,
|
|
"valid_targets_min": 993
|
|
},
|
|
{
|
|
"epoch": 2.84688995215311,
|
|
"grad_norm": 0.452665314271665,
|
|
"learning_rate": 2.9607182837039697e-05,
|
|
"loss": 0.1845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16163918375968933,
|
|
"step": 1785,
|
|
"valid_targets_mean": 3660.5,
|
|
"valid_targets_min": 1133
|
|
},
|
|
{
|
|
"epoch": 2.854864433811802,
|
|
"grad_norm": 0.4002297328897464,
|
|
"learning_rate": 2.9537350028570126e-05,
|
|
"loss": 0.1791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15586945414543152,
|
|
"step": 1790,
|
|
"valid_targets_mean": 4642.9,
|
|
"valid_targets_min": 880
|
|
},
|
|
{
|
|
"epoch": 2.8628389154704945,
|
|
"grad_norm": 0.45851045505769783,
|
|
"learning_rate": 2.946736639531643e-05,
|
|
"loss": 0.1799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17294219136238098,
|
|
"step": 1795,
|
|
"valid_targets_mean": 4074.2,
|
|
"valid_targets_min": 923
|
|
},
|
|
{
|
|
"epoch": 2.8708133971291865,
|
|
"grad_norm": 0.40460789347679105,
|
|
"learning_rate": 2.9397233044008092e-05,
|
|
"loss": 0.1879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15655416250228882,
|
|
"step": 1800,
|
|
"valid_targets_mean": 4750.2,
|
|
"valid_targets_min": 1213
|
|
},
|
|
{
|
|
"epoch": 2.878787878787879,
|
|
"grad_norm": 0.43004503809969075,
|
|
"learning_rate": 2.9326951083742243e-05,
|
|
"loss": 0.1928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1678469032049179,
|
|
"step": 1805,
|
|
"valid_targets_mean": 4040.9,
|
|
"valid_targets_min": 1848
|
|
},
|
|
{
|
|
"epoch": 2.886762360446571,
|
|
"grad_norm": 0.4386772227886357,
|
|
"learning_rate": 2.925652162596613e-05,
|
|
"loss": 0.1857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18081369996070862,
|
|
"step": 1810,
|
|
"valid_targets_mean": 4572.5,
|
|
"valid_targets_min": 1197
|
|
},
|
|
{
|
|
"epoch": 2.8947368421052633,
|
|
"grad_norm": 0.4594707135226717,
|
|
"learning_rate": 2.9185945784459558e-05,
|
|
"loss": 0.175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.171533465385437,
|
|
"step": 1815,
|
|
"valid_targets_mean": 3714.7,
|
|
"valid_targets_min": 1130
|
|
},
|
|
{
|
|
"epoch": 2.9027113237639552,
|
|
"grad_norm": 0.4393834579336601,
|
|
"learning_rate": 2.9115224675317252e-05,
|
|
"loss": 0.1749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.179039865732193,
|
|
"step": 1820,
|
|
"valid_targets_mean": 4410.4,
|
|
"valid_targets_min": 2456
|
|
},
|
|
{
|
|
"epoch": 2.9106858054226477,
|
|
"grad_norm": 0.6384724564509537,
|
|
"learning_rate": 2.9044359416931206e-05,
|
|
"loss": 0.1952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21434181928634644,
|
|
"step": 1825,
|
|
"valid_targets_mean": 4121.4,
|
|
"valid_targets_min": 781
|
|
},
|
|
{
|
|
"epoch": 2.9186602870813396,
|
|
"grad_norm": 0.4085176048547987,
|
|
"learning_rate": 2.897335112997302e-05,
|
|
"loss": 0.1834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17466455698013306,
|
|
"step": 1830,
|
|
"valid_targets_mean": 4953.9,
|
|
"valid_targets_min": 1902
|
|
},
|
|
{
|
|
"epoch": 2.926634768740032,
|
|
"grad_norm": 0.4035884199515583,
|
|
"learning_rate": 2.8902200937376173e-05,
|
|
"loss": 0.1718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15716539323329926,
|
|
"step": 1835,
|
|
"valid_targets_mean": 4157.4,
|
|
"valid_targets_min": 1285
|
|
},
|
|
{
|
|
"epoch": 2.934609250398724,
|
|
"grad_norm": 0.42116632110197916,
|
|
"learning_rate": 2.8830909964318242e-05,
|
|
"loss": 0.1873,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19213268160820007,
|
|
"step": 1840,
|
|
"valid_targets_mean": 4495.4,
|
|
"valid_targets_min": 1583
|
|
},
|
|
{
|
|
"epoch": 2.9425837320574164,
|
|
"grad_norm": 0.4672392392151659,
|
|
"learning_rate": 2.875947933820312e-05,
|
|
"loss": 0.1908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20701715350151062,
|
|
"step": 1845,
|
|
"valid_targets_mean": 4326.4,
|
|
"valid_targets_min": 875
|
|
},
|
|
{
|
|
"epoch": 2.9505582137161084,
|
|
"grad_norm": 0.41984821875482387,
|
|
"learning_rate": 2.868791018864321e-05,
|
|
"loss": 0.1794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1970323622226715,
|
|
"step": 1850,
|
|
"valid_targets_mean": 5258.4,
|
|
"valid_targets_min": 3503
|
|
},
|
|
{
|
|
"epoch": 2.958532695374801,
|
|
"grad_norm": 0.4141821804013884,
|
|
"learning_rate": 2.861620364744151e-05,
|
|
"loss": 0.1734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17296859622001648,
|
|
"step": 1855,
|
|
"valid_targets_mean": 4434.2,
|
|
"valid_targets_min": 1438
|
|
},
|
|
{
|
|
"epoch": 2.9665071770334928,
|
|
"grad_norm": 0.4076233446452013,
|
|
"learning_rate": 2.854436084857379e-05,
|
|
"loss": 0.1855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17421773076057434,
|
|
"step": 1860,
|
|
"valid_targets_mean": 4778.6,
|
|
"valid_targets_min": 2269
|
|
},
|
|
{
|
|
"epoch": 2.974481658692185,
|
|
"grad_norm": 0.5172206460764991,
|
|
"learning_rate": 2.847238292817057e-05,
|
|
"loss": 0.174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1736307442188263,
|
|
"step": 1865,
|
|
"valid_targets_mean": 4658.1,
|
|
"valid_targets_min": 1224
|
|
},
|
|
{
|
|
"epoch": 2.982456140350877,
|
|
"grad_norm": 0.46017617695819213,
|
|
"learning_rate": 2.8400271024499212e-05,
|
|
"loss": 0.1891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.191249817609787,
|
|
"step": 1870,
|
|
"valid_targets_mean": 4299.7,
|
|
"valid_targets_min": 1230
|
|
},
|
|
{
|
|
"epoch": 2.990430622009569,
|
|
"grad_norm": 0.46199360392557665,
|
|
"learning_rate": 2.832802627794593e-05,
|
|
"loss": 0.1852,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14475810527801514,
|
|
"step": 1875,
|
|
"valid_targets_mean": 3926.4,
|
|
"valid_targets_min": 1508
|
|
},
|
|
{
|
|
"epoch": 2.9984051036682615,
|
|
"grad_norm": 0.4246729180705717,
|
|
"learning_rate": 2.8255649830997704e-05,
|
|
"loss": 0.1898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16516268253326416,
|
|
"step": 1880,
|
|
"valid_targets_mean": 4231.8,
|
|
"valid_targets_min": 1605
|
|
},
|
|
{
|
|
"epoch": 3.006379585326954,
|
|
"grad_norm": 0.4520273317116176,
|
|
"learning_rate": 2.8183142828224238e-05,
|
|
"loss": 0.1725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18012793362140656,
|
|
"step": 1885,
|
|
"valid_targets_mean": 4729.9,
|
|
"valid_targets_min": 704
|
|
},
|
|
{
|
|
"epoch": 3.014354066985646,
|
|
"grad_norm": 0.4323654161545945,
|
|
"learning_rate": 2.811050641625988e-05,
|
|
"loss": 0.1732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16575147211551666,
|
|
"step": 1890,
|
|
"valid_targets_mean": 5525.6,
|
|
"valid_targets_min": 2515
|
|
},
|
|
{
|
|
"epoch": 3.0223285486443383,
|
|
"grad_norm": 0.526877716961556,
|
|
"learning_rate": 2.8037741743785458e-05,
|
|
"loss": 0.1569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15317729115486145,
|
|
"step": 1895,
|
|
"valid_targets_mean": 3065.1,
|
|
"valid_targets_min": 616
|
|
},
|
|
{
|
|
"epoch": 3.0303030303030303,
|
|
"grad_norm": 0.4563775946766069,
|
|
"learning_rate": 2.796484996151013e-05,
|
|
"loss": 0.1874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1825929582118988,
|
|
"step": 1900,
|
|
"valid_targets_mean": 4303.6,
|
|
"valid_targets_min": 1155
|
|
},
|
|
{
|
|
"epoch": 3.0382775119617227,
|
|
"grad_norm": 0.510145254220633,
|
|
"learning_rate": 2.7891832222153188e-05,
|
|
"loss": 0.1602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17809626460075378,
|
|
"step": 1905,
|
|
"valid_targets_mean": 4312.4,
|
|
"valid_targets_min": 1080
|
|
},
|
|
{
|
|
"epoch": 3.0462519936204147,
|
|
"grad_norm": 0.4511498819756649,
|
|
"learning_rate": 2.7818689680425807e-05,
|
|
"loss": 0.1594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1656397581100464,
|
|
"step": 1910,
|
|
"valid_targets_mean": 4301.7,
|
|
"valid_targets_min": 742
|
|
},
|
|
{
|
|
"epoch": 3.054226475279107,
|
|
"grad_norm": 0.4531154594779209,
|
|
"learning_rate": 2.7745423493012827e-05,
|
|
"loss": 0.1637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18964077532291412,
|
|
"step": 1915,
|
|
"valid_targets_mean": 4744.4,
|
|
"valid_targets_min": 565
|
|
},
|
|
{
|
|
"epoch": 3.062200956937799,
|
|
"grad_norm": 0.49158888631702524,
|
|
"learning_rate": 2.767203481855441e-05,
|
|
"loss": 0.1758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18415114283561707,
|
|
"step": 1920,
|
|
"valid_targets_mean": 4203.1,
|
|
"valid_targets_min": 1353
|
|
},
|
|
{
|
|
"epoch": 3.0701754385964914,
|
|
"grad_norm": 0.5370963017271485,
|
|
"learning_rate": 2.7598524817627764e-05,
|
|
"loss": 0.1735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19024474918842316,
|
|
"step": 1925,
|
|
"valid_targets_mean": 3489.6,
|
|
"valid_targets_min": 1139
|
|
},
|
|
{
|
|
"epoch": 3.0781499202551834,
|
|
"grad_norm": 0.47811106669503245,
|
|
"learning_rate": 2.7524894652728754e-05,
|
|
"loss": 0.1629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16935645043849945,
|
|
"step": 1930,
|
|
"valid_targets_mean": 3866.7,
|
|
"valid_targets_min": 1902
|
|
},
|
|
{
|
|
"epoch": 3.0861244019138754,
|
|
"grad_norm": 0.46078009694442296,
|
|
"learning_rate": 2.7451145488253544e-05,
|
|
"loss": 0.176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20130181312561035,
|
|
"step": 1935,
|
|
"valid_targets_mean": 5193.8,
|
|
"valid_targets_min": 1338
|
|
},
|
|
{
|
|
"epoch": 3.094098883572568,
|
|
"grad_norm": 0.505888529956278,
|
|
"learning_rate": 2.7377278490480157e-05,
|
|
"loss": 0.159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15962490439414978,
|
|
"step": 1940,
|
|
"valid_targets_mean": 4584.7,
|
|
"valid_targets_min": 1212
|
|
},
|
|
{
|
|
"epoch": 3.1020733652312598,
|
|
"grad_norm": 0.46842693356793075,
|
|
"learning_rate": 2.730329482755006e-05,
|
|
"loss": 0.1638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15817397832870483,
|
|
"step": 1945,
|
|
"valid_targets_mean": 4386.8,
|
|
"valid_targets_min": 904
|
|
},
|
|
{
|
|
"epoch": 3.110047846889952,
|
|
"grad_norm": 0.4992321815662437,
|
|
"learning_rate": 2.7229195669449667e-05,
|
|
"loss": 0.1656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17625224590301514,
|
|
"step": 1950,
|
|
"valid_targets_mean": 3426.9,
|
|
"valid_targets_min": 283
|
|
},
|
|
{
|
|
"epoch": 3.118022328548644,
|
|
"grad_norm": 0.5009553283649976,
|
|
"learning_rate": 2.7154982187991855e-05,
|
|
"loss": 0.1616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1838223934173584,
|
|
"step": 1955,
|
|
"valid_targets_mean": 3949.8,
|
|
"valid_targets_min": 304
|
|
},
|
|
{
|
|
"epoch": 3.1259968102073366,
|
|
"grad_norm": 0.47496049446739746,
|
|
"learning_rate": 2.7080655556797406e-05,
|
|
"loss": 0.1685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1881379783153534,
|
|
"step": 1960,
|
|
"valid_targets_mean": 3986.9,
|
|
"valid_targets_min": 235
|
|
},
|
|
{
|
|
"epoch": 3.1339712918660285,
|
|
"grad_norm": 0.4523108206354683,
|
|
"learning_rate": 2.700621695127649e-05,
|
|
"loss": 0.1679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1722801923751831,
|
|
"step": 1965,
|
|
"valid_targets_mean": 3984.7,
|
|
"valid_targets_min": 814
|
|
},
|
|
{
|
|
"epoch": 3.141945773524721,
|
|
"grad_norm": 0.4393426656032285,
|
|
"learning_rate": 2.693166754861003e-05,
|
|
"loss": 0.161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1484699845314026,
|
|
"step": 1970,
|
|
"valid_targets_mean": 4479.4,
|
|
"valid_targets_min": 1053
|
|
},
|
|
{
|
|
"epoch": 3.149920255183413,
|
|
"grad_norm": 0.4252053047018431,
|
|
"learning_rate": 2.685700852773113e-05,
|
|
"loss": 0.1763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13861039280891418,
|
|
"step": 1975,
|
|
"valid_targets_mean": 4281.2,
|
|
"valid_targets_min": 1283
|
|
},
|
|
{
|
|
"epoch": 3.1578947368421053,
|
|
"grad_norm": 0.46021674595154327,
|
|
"learning_rate": 2.6782241069306395e-05,
|
|
"loss": 0.1454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16409502923488617,
|
|
"step": 1980,
|
|
"valid_targets_mean": 4018.0,
|
|
"valid_targets_min": 1583
|
|
},
|
|
{
|
|
"epoch": 3.1658692185007973,
|
|
"grad_norm": 0.4479020851252004,
|
|
"learning_rate": 2.6707366355717268e-05,
|
|
"loss": 0.1582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15245331823825836,
|
|
"step": 1985,
|
|
"valid_targets_mean": 3932.9,
|
|
"valid_targets_min": 1842
|
|
},
|
|
{
|
|
"epoch": 3.1738437001594897,
|
|
"grad_norm": 0.43948809549480133,
|
|
"learning_rate": 2.663238557104136e-05,
|
|
"loss": 0.168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1566871702671051,
|
|
"step": 1990,
|
|
"valid_targets_mean": 4768.4,
|
|
"valid_targets_min": 1179
|
|
},
|
|
{
|
|
"epoch": 3.1818181818181817,
|
|
"grad_norm": 0.4025143431103952,
|
|
"learning_rate": 2.655729990103368e-05,
|
|
"loss": 0.1695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15614749491214752,
|
|
"step": 1995,
|
|
"valid_targets_mean": 4999.6,
|
|
"valid_targets_min": 1438
|
|
},
|
|
{
|
|
"epoch": 3.189792663476874,
|
|
"grad_norm": 0.43647717032348127,
|
|
"learning_rate": 2.648211053310792e-05,
|
|
"loss": 0.1587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14193618297576904,
|
|
"step": 2000,
|
|
"valid_targets_mean": 4367.1,
|
|
"valid_targets_min": 1151
|
|
},
|
|
{
|
|
"epoch": 3.197767145135566,
|
|
"grad_norm": 0.4769822995434698,
|
|
"learning_rate": 2.6406818656317654e-05,
|
|
"loss": 0.1643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15051321685314178,
|
|
"step": 2005,
|
|
"valid_targets_mean": 3930.6,
|
|
"valid_targets_min": 1205
|
|
},
|
|
{
|
|
"epoch": 3.2057416267942584,
|
|
"grad_norm": 0.5031629564541453,
|
|
"learning_rate": 2.6331425461337557e-05,
|
|
"loss": 0.1722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20424841344356537,
|
|
"step": 2010,
|
|
"valid_targets_mean": 4543.6,
|
|
"valid_targets_min": 775
|
|
},
|
|
{
|
|
"epoch": 3.2137161084529504,
|
|
"grad_norm": 0.7378168615789559,
|
|
"learning_rate": 2.6255932140444546e-05,
|
|
"loss": 0.1738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18289820849895477,
|
|
"step": 2015,
|
|
"valid_targets_mean": 3856.7,
|
|
"valid_targets_min": 1009
|
|
},
|
|
{
|
|
"epoch": 3.221690590111643,
|
|
"grad_norm": 0.43412240781080647,
|
|
"learning_rate": 2.618033988749895e-05,
|
|
"loss": 0.1729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16081905364990234,
|
|
"step": 2020,
|
|
"valid_targets_mean": 4748.2,
|
|
"valid_targets_min": 1516
|
|
},
|
|
{
|
|
"epoch": 3.229665071770335,
|
|
"grad_norm": 0.5086844897803551,
|
|
"learning_rate": 2.6104649897925622e-05,
|
|
"loss": 0.1609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1750149428844452,
|
|
"step": 2025,
|
|
"valid_targets_mean": 4165.4,
|
|
"valid_targets_min": 1426
|
|
},
|
|
{
|
|
"epoch": 3.237639553429027,
|
|
"grad_norm": 0.46478010424069277,
|
|
"learning_rate": 2.602886336869503e-05,
|
|
"loss": 0.1698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1662854254245758,
|
|
"step": 2030,
|
|
"valid_targets_mean": 4036.5,
|
|
"valid_targets_min": 1563
|
|
},
|
|
{
|
|
"epoch": 3.245614035087719,
|
|
"grad_norm": 0.6328566126812523,
|
|
"learning_rate": 2.595298149830433e-05,
|
|
"loss": 0.1565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14597457647323608,
|
|
"step": 2035,
|
|
"valid_targets_mean": 4552.4,
|
|
"valid_targets_min": 1685
|
|
},
|
|
{
|
|
"epoch": 3.2535885167464116,
|
|
"grad_norm": 0.4961300261282645,
|
|
"learning_rate": 2.5877005486758424e-05,
|
|
"loss": 0.1777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.197544127702713,
|
|
"step": 2040,
|
|
"valid_targets_mean": 3992.8,
|
|
"valid_targets_min": 510
|
|
},
|
|
{
|
|
"epoch": 3.2615629984051036,
|
|
"grad_norm": 0.485403892584791,
|
|
"learning_rate": 2.5800936535550954e-05,
|
|
"loss": 0.164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1379692554473877,
|
|
"step": 2045,
|
|
"valid_targets_mean": 4061.4,
|
|
"valid_targets_min": 731
|
|
},
|
|
{
|
|
"epoch": 3.269537480063796,
|
|
"grad_norm": 0.5005005141038639,
|
|
"learning_rate": 2.5724775847645345e-05,
|
|
"loss": 0.1517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16013890504837036,
|
|
"step": 2050,
|
|
"valid_targets_mean": 3665.0,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 3.277511961722488,
|
|
"grad_norm": 0.43576462970929086,
|
|
"learning_rate": 2.5648524627455738e-05,
|
|
"loss": 0.1684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15433132648468018,
|
|
"step": 2055,
|
|
"valid_targets_mean": 4429.9,
|
|
"valid_targets_min": 1172
|
|
},
|
|
{
|
|
"epoch": 3.2854864433811803,
|
|
"grad_norm": 0.45502204625071513,
|
|
"learning_rate": 2.557218408082798e-05,
|
|
"loss": 0.1707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18158936500549316,
|
|
"step": 2060,
|
|
"valid_targets_mean": 5073.0,
|
|
"valid_targets_min": 3128
|
|
},
|
|
{
|
|
"epoch": 3.2934609250398723,
|
|
"grad_norm": 0.48990064469106065,
|
|
"learning_rate": 2.5495755415020504e-05,
|
|
"loss": 0.1655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16774031519889832,
|
|
"step": 2065,
|
|
"valid_targets_mean": 4767.1,
|
|
"valid_targets_min": 1469
|
|
},
|
|
{
|
|
"epoch": 3.3014354066985647,
|
|
"grad_norm": 0.4139306192252983,
|
|
"learning_rate": 2.5419239838685325e-05,
|
|
"loss": 0.1575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13758450746536255,
|
|
"step": 2070,
|
|
"valid_targets_mean": 4740.1,
|
|
"valid_targets_min": 1823
|
|
},
|
|
{
|
|
"epoch": 3.3094098883572567,
|
|
"grad_norm": 0.46559383286959294,
|
|
"learning_rate": 2.5342638561848817e-05,
|
|
"loss": 0.1541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16002754867076874,
|
|
"step": 2075,
|
|
"valid_targets_mean": 4318.2,
|
|
"valid_targets_min": 1242
|
|
},
|
|
{
|
|
"epoch": 3.317384370015949,
|
|
"grad_norm": 0.452506749895589,
|
|
"learning_rate": 2.526595279589265e-05,
|
|
"loss": 0.1706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15410088002681732,
|
|
"step": 2080,
|
|
"valid_targets_mean": 4514.7,
|
|
"valid_targets_min": 1819
|
|
},
|
|
{
|
|
"epoch": 3.325358851674641,
|
|
"grad_norm": 0.6275137907871954,
|
|
"learning_rate": 2.5189183753534634e-05,
|
|
"loss": 0.1546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15736757218837738,
|
|
"step": 2085,
|
|
"valid_targets_mean": 5075.4,
|
|
"valid_targets_min": 1511
|
|
},
|
|
{
|
|
"epoch": 3.3333333333333335,
|
|
"grad_norm": 0.4518729419529901,
|
|
"learning_rate": 2.5112332648809495e-05,
|
|
"loss": 0.1744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17312079668045044,
|
|
"step": 2090,
|
|
"valid_targets_mean": 4445.9,
|
|
"valid_targets_min": 1261
|
|
},
|
|
{
|
|
"epoch": 3.3413078149920254,
|
|
"grad_norm": 0.4794701140141355,
|
|
"learning_rate": 2.503540069704973e-05,
|
|
"loss": 0.1558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15761785209178925,
|
|
"step": 2095,
|
|
"valid_targets_mean": 3327.2,
|
|
"valid_targets_min": 1010
|
|
},
|
|
{
|
|
"epoch": 3.349282296650718,
|
|
"grad_norm": 0.41177674327783687,
|
|
"learning_rate": 2.4958389114866326e-05,
|
|
"loss": 0.1555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15020941197872162,
|
|
"step": 2100,
|
|
"valid_targets_mean": 5081.7,
|
|
"valid_targets_min": 1775
|
|
},
|
|
{
|
|
"epoch": 3.35725677830941,
|
|
"grad_norm": 0.4377574454047289,
|
|
"learning_rate": 2.488129912012958e-05,
|
|
"loss": 0.1527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1599995493888855,
|
|
"step": 2105,
|
|
"valid_targets_mean": 4370.5,
|
|
"valid_targets_min": 1122
|
|
},
|
|
{
|
|
"epoch": 3.3652312599681022,
|
|
"grad_norm": 0.5007461571446744,
|
|
"learning_rate": 2.4804131931949823e-05,
|
|
"loss": 0.1655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18287134170532227,
|
|
"step": 2110,
|
|
"valid_targets_mean": 4095.7,
|
|
"valid_targets_min": 1308
|
|
},
|
|
{
|
|
"epoch": 3.373205741626794,
|
|
"grad_norm": 0.510519774232625,
|
|
"learning_rate": 2.4726888770658103e-05,
|
|
"loss": 0.1521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15940503776073456,
|
|
"step": 2115,
|
|
"valid_targets_mean": 3793.1,
|
|
"valid_targets_min": 1466
|
|
},
|
|
{
|
|
"epoch": 3.3811802232854866,
|
|
"grad_norm": 0.43622723527511154,
|
|
"learning_rate": 2.4649570857786928e-05,
|
|
"loss": 0.153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13715532422065735,
|
|
"step": 2120,
|
|
"valid_targets_mean": 4163.7,
|
|
"valid_targets_min": 975
|
|
},
|
|
{
|
|
"epoch": 3.3891547049441786,
|
|
"grad_norm": 0.6123027027256595,
|
|
"learning_rate": 2.4572179416050953e-05,
|
|
"loss": 0.176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16690824925899506,
|
|
"step": 2125,
|
|
"valid_targets_mean": 2991.3,
|
|
"valid_targets_min": 901
|
|
},
|
|
{
|
|
"epoch": 3.397129186602871,
|
|
"grad_norm": 0.5007925011224601,
|
|
"learning_rate": 2.449471566932761e-05,
|
|
"loss": 0.1707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17128664255142212,
|
|
"step": 2130,
|
|
"valid_targets_mean": 3593.0,
|
|
"valid_targets_min": 1047
|
|
},
|
|
{
|
|
"epoch": 3.405103668261563,
|
|
"grad_norm": 0.3962774808910384,
|
|
"learning_rate": 2.4417180842637764e-05,
|
|
"loss": 0.1686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1485387682914734,
|
|
"step": 2135,
|
|
"valid_targets_mean": 5086.2,
|
|
"valid_targets_min": 2153
|
|
},
|
|
{
|
|
"epoch": 3.4130781499202554,
|
|
"grad_norm": 0.4230929315784565,
|
|
"learning_rate": 2.4339576162126362e-05,
|
|
"loss": 0.161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1680546998977661,
|
|
"step": 2140,
|
|
"valid_targets_mean": 4294.2,
|
|
"valid_targets_min": 1451
|
|
},
|
|
{
|
|
"epoch": 3.4210526315789473,
|
|
"grad_norm": 0.4415415566146335,
|
|
"learning_rate": 2.4261902855043027e-05,
|
|
"loss": 0.1642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16772609949111938,
|
|
"step": 2145,
|
|
"valid_targets_mean": 4842.6,
|
|
"valid_targets_min": 1383
|
|
},
|
|
{
|
|
"epoch": 3.4290271132376393,
|
|
"grad_norm": 0.43828675722303184,
|
|
"learning_rate": 2.418416214972265e-05,
|
|
"loss": 0.1736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19463646411895752,
|
|
"step": 2150,
|
|
"valid_targets_mean": 4911.9,
|
|
"valid_targets_min": 993
|
|
},
|
|
{
|
|
"epoch": 3.4370015948963317,
|
|
"grad_norm": 0.4577068882859903,
|
|
"learning_rate": 2.4106355275565937e-05,
|
|
"loss": 0.1666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18860068917274475,
|
|
"step": 2155,
|
|
"valid_targets_mean": 4501.9,
|
|
"valid_targets_min": 1496
|
|
},
|
|
{
|
|
"epoch": 3.444976076555024,
|
|
"grad_norm": 0.4376339947453344,
|
|
"learning_rate": 2.4028483463020053e-05,
|
|
"loss": 0.1661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1340947300195694,
|
|
"step": 2160,
|
|
"valid_targets_mean": 4086.2,
|
|
"valid_targets_min": 1112
|
|
},
|
|
{
|
|
"epoch": 3.452950558213716,
|
|
"grad_norm": 0.454806156094289,
|
|
"learning_rate": 2.3950547943559056e-05,
|
|
"loss": 0.1695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16001570224761963,
|
|
"step": 2165,
|
|
"valid_targets_mean": 4962.9,
|
|
"valid_targets_min": 1493
|
|
},
|
|
{
|
|
"epoch": 3.460925039872408,
|
|
"grad_norm": 0.45460642856632766,
|
|
"learning_rate": 2.3872549949664493e-05,
|
|
"loss": 0.1614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15232044458389282,
|
|
"step": 2170,
|
|
"valid_targets_mean": 4010.1,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 3.4688995215311005,
|
|
"grad_norm": 0.5242691213969036,
|
|
"learning_rate": 2.3794490714805897e-05,
|
|
"loss": 0.1724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17195919156074524,
|
|
"step": 2175,
|
|
"valid_targets_mean": 3983.2,
|
|
"valid_targets_min": 410
|
|
},
|
|
{
|
|
"epoch": 3.476874003189793,
|
|
"grad_norm": 0.4739619712180746,
|
|
"learning_rate": 2.3716371473421242e-05,
|
|
"loss": 0.1631,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15904149413108826,
|
|
"step": 2180,
|
|
"valid_targets_mean": 3595.8,
|
|
"valid_targets_min": 582
|
|
},
|
|
{
|
|
"epoch": 3.484848484848485,
|
|
"grad_norm": 0.43101264223903824,
|
|
"learning_rate": 2.36381934608975e-05,
|
|
"loss": 0.1864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1860746443271637,
|
|
"step": 2185,
|
|
"valid_targets_mean": 5064.6,
|
|
"valid_targets_min": 759
|
|
},
|
|
{
|
|
"epoch": 3.492822966507177,
|
|
"grad_norm": 0.5398407568197218,
|
|
"learning_rate": 2.3559957913551014e-05,
|
|
"loss": 0.1604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16632530093193054,
|
|
"step": 2190,
|
|
"valid_targets_mean": 3722.2,
|
|
"valid_targets_min": 1039
|
|
},
|
|
{
|
|
"epoch": 3.5007974481658692,
|
|
"grad_norm": 0.45293140792376,
|
|
"learning_rate": 2.3481666068607994e-05,
|
|
"loss": 0.1626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16119389235973358,
|
|
"step": 2195,
|
|
"valid_targets_mean": 4607.5,
|
|
"valid_targets_min": 1601
|
|
},
|
|
{
|
|
"epoch": 3.5087719298245617,
|
|
"grad_norm": 0.45106253752260966,
|
|
"learning_rate": 2.3403319164184986e-05,
|
|
"loss": 0.164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17215946316719055,
|
|
"step": 2200,
|
|
"valid_targets_mean": 4280.8,
|
|
"valid_targets_min": 1097
|
|
},
|
|
{
|
|
"epoch": 3.5167464114832536,
|
|
"grad_norm": 0.46680610490292446,
|
|
"learning_rate": 2.3324918439269206e-05,
|
|
"loss": 0.17,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16993430256843567,
|
|
"step": 2205,
|
|
"valid_targets_mean": 4270.3,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 3.5247208931419456,
|
|
"grad_norm": 0.4264161927809688,
|
|
"learning_rate": 2.3246465133699024e-05,
|
|
"loss": 0.1685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1665218025445938,
|
|
"step": 2210,
|
|
"valid_targets_mean": 4959.2,
|
|
"valid_targets_min": 1473
|
|
},
|
|
{
|
|
"epoch": 3.532695374800638,
|
|
"grad_norm": 0.407029871149465,
|
|
"learning_rate": 2.31679604881443e-05,
|
|
"loss": 0.1572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15708976984024048,
|
|
"step": 2215,
|
|
"valid_targets_mean": 5416.6,
|
|
"valid_targets_min": 1547
|
|
},
|
|
{
|
|
"epoch": 3.5406698564593304,
|
|
"grad_norm": 0.41752567173892347,
|
|
"learning_rate": 2.3089405744086823e-05,
|
|
"loss": 0.1575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13425885140895844,
|
|
"step": 2220,
|
|
"valid_targets_mean": 4462.0,
|
|
"valid_targets_min": 1336
|
|
},
|
|
{
|
|
"epoch": 3.5486443381180224,
|
|
"grad_norm": 0.4692487414825984,
|
|
"learning_rate": 2.3010802143800626e-05,
|
|
"loss": 0.1717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16589203476905823,
|
|
"step": 2225,
|
|
"valid_targets_mean": 4523.8,
|
|
"valid_targets_min": 1372
|
|
},
|
|
{
|
|
"epoch": 3.5566188197767143,
|
|
"grad_norm": 0.46286582668043214,
|
|
"learning_rate": 2.2932150930332363e-05,
|
|
"loss": 0.1802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1823793351650238,
|
|
"step": 2230,
|
|
"valid_targets_mean": 4480.4,
|
|
"valid_targets_min": 899
|
|
},
|
|
{
|
|
"epoch": 3.5645933014354068,
|
|
"grad_norm": 0.4798773755938297,
|
|
"learning_rate": 2.2853453347481635e-05,
|
|
"loss": 0.156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1499791443347931,
|
|
"step": 2235,
|
|
"valid_targets_mean": 3986.0,
|
|
"valid_targets_min": 1147
|
|
},
|
|
{
|
|
"epoch": 3.5725677830940987,
|
|
"grad_norm": 0.42646341211145544,
|
|
"learning_rate": 2.277471063978137e-05,
|
|
"loss": 0.1649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1509476751089096,
|
|
"step": 2240,
|
|
"valid_targets_mean": 4191.2,
|
|
"valid_targets_min": 608
|
|
},
|
|
{
|
|
"epoch": 3.580542264752791,
|
|
"grad_norm": 0.4462910504530878,
|
|
"learning_rate": 2.2695924052478065e-05,
|
|
"loss": 0.168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17410409450531006,
|
|
"step": 2245,
|
|
"valid_targets_mean": 4789.9,
|
|
"valid_targets_min": 1508
|
|
},
|
|
{
|
|
"epoch": 3.588516746411483,
|
|
"grad_norm": 0.4622778859744224,
|
|
"learning_rate": 2.2617094831512167e-05,
|
|
"loss": 0.1666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15502643585205078,
|
|
"step": 2250,
|
|
"valid_targets_mean": 3819.8,
|
|
"valid_targets_min": 1470
|
|
},
|
|
{
|
|
"epoch": 3.5964912280701755,
|
|
"grad_norm": 0.44193695965108665,
|
|
"learning_rate": 2.253822422349831e-05,
|
|
"loss": 0.1816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.169322669506073,
|
|
"step": 2255,
|
|
"valid_targets_mean": 4659.6,
|
|
"valid_targets_min": 1705
|
|
},
|
|
{
|
|
"epoch": 3.6044657097288675,
|
|
"grad_norm": 0.424266516953393,
|
|
"learning_rate": 2.2459313475705645e-05,
|
|
"loss": 0.1703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13052059710025787,
|
|
"step": 2260,
|
|
"valid_targets_mean": 4785.7,
|
|
"valid_targets_min": 2090
|
|
},
|
|
{
|
|
"epoch": 3.61244019138756,
|
|
"grad_norm": 0.4943788110229207,
|
|
"learning_rate": 2.23803638360381e-05,
|
|
"loss": 0.1525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1514415144920349,
|
|
"step": 2265,
|
|
"valid_targets_mean": 3811.1,
|
|
"valid_targets_min": 1180
|
|
},
|
|
{
|
|
"epoch": 3.620414673046252,
|
|
"grad_norm": 0.4760998472512275,
|
|
"learning_rate": 2.2301376553014625e-05,
|
|
"loss": 0.1584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14183993637561798,
|
|
"step": 2270,
|
|
"valid_targets_mean": 4127.6,
|
|
"valid_targets_min": 2016
|
|
},
|
|
{
|
|
"epoch": 3.6283891547049443,
|
|
"grad_norm": 0.44311008935115587,
|
|
"learning_rate": 2.2222352875749493e-05,
|
|
"loss": 0.1642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16410455107688904,
|
|
"step": 2275,
|
|
"valid_targets_mean": 4465.2,
|
|
"valid_targets_min": 1171
|
|
},
|
|
{
|
|
"epoch": 3.6363636363636362,
|
|
"grad_norm": 0.446961968586708,
|
|
"learning_rate": 2.214329405393249e-05,
|
|
"loss": 0.1663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1542586386203766,
|
|
"step": 2280,
|
|
"valid_targets_mean": 4753.0,
|
|
"valid_targets_min": 1168
|
|
},
|
|
{
|
|
"epoch": 3.6443381180223287,
|
|
"grad_norm": 0.447183714568317,
|
|
"learning_rate": 2.2064201337809212e-05,
|
|
"loss": 0.1526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1685873419046402,
|
|
"step": 2285,
|
|
"valid_targets_mean": 4194.9,
|
|
"valid_targets_min": 750
|
|
},
|
|
{
|
|
"epoch": 3.6523125996810206,
|
|
"grad_norm": 0.45144987141764226,
|
|
"learning_rate": 2.1985075978161236e-05,
|
|
"loss": 0.1939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1775444746017456,
|
|
"step": 2290,
|
|
"valid_targets_mean": 4095.1,
|
|
"valid_targets_min": 1314
|
|
},
|
|
{
|
|
"epoch": 3.660287081339713,
|
|
"grad_norm": 0.46951692419492547,
|
|
"learning_rate": 2.1905919226286385e-05,
|
|
"loss": 0.1598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15636685490608215,
|
|
"step": 2295,
|
|
"valid_targets_mean": 4320.4,
|
|
"valid_targets_min": 348
|
|
},
|
|
{
|
|
"epoch": 3.668261562998405,
|
|
"grad_norm": 0.45708781179968716,
|
|
"learning_rate": 2.1826732333978924e-05,
|
|
"loss": 0.1685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1831934005022049,
|
|
"step": 2300,
|
|
"valid_targets_mean": 4012.3,
|
|
"valid_targets_min": 1197
|
|
},
|
|
{
|
|
"epoch": 3.6762360446570974,
|
|
"grad_norm": 0.5160188300528976,
|
|
"learning_rate": 2.1747516553509755e-05,
|
|
"loss": 0.1581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16620203852653503,
|
|
"step": 2305,
|
|
"valid_targets_mean": 4086.6,
|
|
"valid_targets_min": 293
|
|
},
|
|
{
|
|
"epoch": 3.6842105263157894,
|
|
"grad_norm": 0.49554359556174826,
|
|
"learning_rate": 2.166827313760662e-05,
|
|
"loss": 0.1642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13182459771633148,
|
|
"step": 2310,
|
|
"valid_targets_mean": 3512.9,
|
|
"valid_targets_min": 1217
|
|
},
|
|
{
|
|
"epoch": 3.692185007974482,
|
|
"grad_norm": 0.4436194940242228,
|
|
"learning_rate": 2.1589003339434292e-05,
|
|
"loss": 0.1692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17071856558322906,
|
|
"step": 2315,
|
|
"valid_targets_mean": 5048.2,
|
|
"valid_targets_min": 1159
|
|
},
|
|
{
|
|
"epoch": 3.7001594896331738,
|
|
"grad_norm": 0.5072255287266337,
|
|
"learning_rate": 2.150970841257476e-05,
|
|
"loss": 0.1657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1575286090373993,
|
|
"step": 2320,
|
|
"valid_targets_mean": 3204.7,
|
|
"valid_targets_min": 1108
|
|
},
|
|
{
|
|
"epoch": 3.708133971291866,
|
|
"grad_norm": 0.4890820514775427,
|
|
"learning_rate": 2.1430389611007393e-05,
|
|
"loss": 0.1662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16297124326229095,
|
|
"step": 2325,
|
|
"valid_targets_mean": 4078.5,
|
|
"valid_targets_min": 516
|
|
},
|
|
{
|
|
"epoch": 3.716108452950558,
|
|
"grad_norm": 0.4879275351091008,
|
|
"learning_rate": 2.135104818908913e-05,
|
|
"loss": 0.1725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19434574246406555,
|
|
"step": 2330,
|
|
"valid_targets_mean": 3913.0,
|
|
"valid_targets_min": 834
|
|
},
|
|
{
|
|
"epoch": 3.7240829346092506,
|
|
"grad_norm": 0.4854250301626308,
|
|
"learning_rate": 2.1271685401534617e-05,
|
|
"loss": 0.1577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15000009536743164,
|
|
"step": 2335,
|
|
"valid_targets_mean": 3880.9,
|
|
"valid_targets_min": 1059
|
|
},
|
|
{
|
|
"epoch": 3.7320574162679425,
|
|
"grad_norm": 0.47865421484942133,
|
|
"learning_rate": 2.11923025033964e-05,
|
|
"loss": 0.1545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15569142997264862,
|
|
"step": 2340,
|
|
"valid_targets_mean": 3807.2,
|
|
"valid_targets_min": 1651
|
|
},
|
|
{
|
|
"epoch": 3.740031897926635,
|
|
"grad_norm": 0.5053297922326104,
|
|
"learning_rate": 2.111290075004503e-05,
|
|
"loss": 0.1612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1683412790298462,
|
|
"step": 2345,
|
|
"valid_targets_mean": 3278.0,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 3.748006379585327,
|
|
"grad_norm": 0.4509897917516485,
|
|
"learning_rate": 2.103348139714925e-05,
|
|
"loss": 0.1705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1671190857887268,
|
|
"step": 2350,
|
|
"valid_targets_mean": 4464.8,
|
|
"valid_targets_min": 987
|
|
},
|
|
{
|
|
"epoch": 3.7559808612440193,
|
|
"grad_norm": 0.418061694277768,
|
|
"learning_rate": 2.0954045700656126e-05,
|
|
"loss": 0.1643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1704232394695282,
|
|
"step": 2355,
|
|
"valid_targets_mean": 5156.8,
|
|
"valid_targets_min": 611
|
|
},
|
|
{
|
|
"epoch": 3.7639553429027113,
|
|
"grad_norm": 0.4293652375556542,
|
|
"learning_rate": 2.0874594916771174e-05,
|
|
"loss": 0.1733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16052809357643127,
|
|
"step": 2360,
|
|
"valid_targets_mean": 4574.3,
|
|
"valid_targets_min": 1295
|
|
},
|
|
{
|
|
"epoch": 3.7719298245614032,
|
|
"grad_norm": 0.47350241734644954,
|
|
"learning_rate": 2.079513030193852e-05,
|
|
"loss": 0.1762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21240681409835815,
|
|
"step": 2365,
|
|
"valid_targets_mean": 4154.7,
|
|
"valid_targets_min": 962
|
|
},
|
|
{
|
|
"epoch": 3.7799043062200957,
|
|
"grad_norm": 0.4695104022626879,
|
|
"learning_rate": 2.071565311282099e-05,
|
|
"loss": 0.1674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16481779515743256,
|
|
"step": 2370,
|
|
"valid_targets_mean": 4477.1,
|
|
"valid_targets_min": 1457
|
|
},
|
|
{
|
|
"epoch": 3.787878787878788,
|
|
"grad_norm": 0.5428932293411882,
|
|
"learning_rate": 2.063616460628029e-05,
|
|
"loss": 0.1653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1805332899093628,
|
|
"step": 2375,
|
|
"valid_targets_mean": 3889.8,
|
|
"valid_targets_min": 1175
|
|
},
|
|
{
|
|
"epoch": 3.79585326953748,
|
|
"grad_norm": 0.502274861441207,
|
|
"learning_rate": 2.0556666039357084e-05,
|
|
"loss": 0.1635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14422598481178284,
|
|
"step": 2380,
|
|
"valid_targets_mean": 3955.4,
|
|
"valid_targets_min": 1250
|
|
},
|
|
{
|
|
"epoch": 3.803827751196172,
|
|
"grad_norm": 0.49594359083311446,
|
|
"learning_rate": 2.047715866925113e-05,
|
|
"loss": 0.1664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1489827185869217,
|
|
"step": 2385,
|
|
"valid_targets_mean": 3452.2,
|
|
"valid_targets_min": 959
|
|
},
|
|
{
|
|
"epoch": 3.8118022328548644,
|
|
"grad_norm": 0.4580027150325296,
|
|
"learning_rate": 2.0397643753301403e-05,
|
|
"loss": 0.1656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18850800395011902,
|
|
"step": 2390,
|
|
"valid_targets_mean": 4559.8,
|
|
"valid_targets_min": 1384
|
|
},
|
|
{
|
|
"epoch": 3.819776714513557,
|
|
"grad_norm": 0.4184878966986943,
|
|
"learning_rate": 2.0318122548966225e-05,
|
|
"loss": 0.1733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1715143769979477,
|
|
"step": 2395,
|
|
"valid_targets_mean": 5096.4,
|
|
"valid_targets_min": 1492
|
|
},
|
|
{
|
|
"epoch": 3.827751196172249,
|
|
"grad_norm": 0.39416604970188135,
|
|
"learning_rate": 2.0238596313803337e-05,
|
|
"loss": 0.1562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16019895672798157,
|
|
"step": 2400,
|
|
"valid_targets_mean": 5120.4,
|
|
"valid_targets_min": 2058
|
|
},
|
|
{
|
|
"epoch": 3.8357256778309408,
|
|
"grad_norm": 0.41519162105832647,
|
|
"learning_rate": 2.015906630545005e-05,
|
|
"loss": 0.1613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15690618753433228,
|
|
"step": 2405,
|
|
"valid_targets_mean": 4438.2,
|
|
"valid_targets_min": 1616
|
|
},
|
|
{
|
|
"epoch": 3.843700159489633,
|
|
"grad_norm": 0.4405642997796185,
|
|
"learning_rate": 2.0079533781603352e-05,
|
|
"loss": 0.1609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14348512887954712,
|
|
"step": 2410,
|
|
"valid_targets_mean": 4542.1,
|
|
"valid_targets_min": 1251
|
|
},
|
|
{
|
|
"epoch": 3.8516746411483256,
|
|
"grad_norm": 0.4761957820839387,
|
|
"learning_rate": 2e-05,
|
|
"loss": 0.1553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15683415532112122,
|
|
"step": 2415,
|
|
"valid_targets_mean": 4740.6,
|
|
"valid_targets_min": 1347
|
|
},
|
|
{
|
|
"epoch": 3.8596491228070176,
|
|
"grad_norm": 0.47532815303041137,
|
|
"learning_rate": 1.9920466218396655e-05,
|
|
"loss": 0.1723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1671157032251358,
|
|
"step": 2420,
|
|
"valid_targets_mean": 4179.8,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 3.8676236044657095,
|
|
"grad_norm": 0.4496312283152147,
|
|
"learning_rate": 1.9840933694549956e-05,
|
|
"loss": 0.1651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16024084389209747,
|
|
"step": 2425,
|
|
"valid_targets_mean": 4530.7,
|
|
"valid_targets_min": 1779
|
|
},
|
|
{
|
|
"epoch": 3.875598086124402,
|
|
"grad_norm": 0.4019135434849342,
|
|
"learning_rate": 1.976140368619667e-05,
|
|
"loss": 0.1535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13773368299007416,
|
|
"step": 2430,
|
|
"valid_targets_mean": 5622.4,
|
|
"valid_targets_min": 1558
|
|
},
|
|
{
|
|
"epoch": 3.8835725677830943,
|
|
"grad_norm": 0.4184038675104054,
|
|
"learning_rate": 1.9681877451033778e-05,
|
|
"loss": 0.1801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18108975887298584,
|
|
"step": 2435,
|
|
"valid_targets_mean": 4559.6,
|
|
"valid_targets_min": 1631
|
|
},
|
|
{
|
|
"epoch": 3.8915470494417863,
|
|
"grad_norm": 0.418838320176284,
|
|
"learning_rate": 1.9602356246698597e-05,
|
|
"loss": 0.161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16737952828407288,
|
|
"step": 2440,
|
|
"valid_targets_mean": 4500.6,
|
|
"valid_targets_min": 571
|
|
},
|
|
{
|
|
"epoch": 3.8995215311004783,
|
|
"grad_norm": 0.5029565286443105,
|
|
"learning_rate": 1.9522841330748877e-05,
|
|
"loss": 0.1564,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15453889966011047,
|
|
"step": 2445,
|
|
"valid_targets_mean": 3727.2,
|
|
"valid_targets_min": 705
|
|
},
|
|
{
|
|
"epoch": 3.9074960127591707,
|
|
"grad_norm": 0.5060398680142083,
|
|
"learning_rate": 1.944333396064292e-05,
|
|
"loss": 0.1789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17415592074394226,
|
|
"step": 2450,
|
|
"valid_targets_mean": 4523.3,
|
|
"valid_targets_min": 1593
|
|
},
|
|
{
|
|
"epoch": 3.915470494417863,
|
|
"grad_norm": 0.43637143449295923,
|
|
"learning_rate": 1.936383539371971e-05,
|
|
"loss": 0.1564,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15245279669761658,
|
|
"step": 2455,
|
|
"valid_targets_mean": 4722.9,
|
|
"valid_targets_min": 2059
|
|
},
|
|
{
|
|
"epoch": 3.923444976076555,
|
|
"grad_norm": 0.4380466161151669,
|
|
"learning_rate": 1.9284346887179016e-05,
|
|
"loss": 0.1642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16576790809631348,
|
|
"step": 2460,
|
|
"valid_targets_mean": 4650.0,
|
|
"valid_targets_min": 1373
|
|
},
|
|
{
|
|
"epoch": 3.931419457735247,
|
|
"grad_norm": 0.4600904354902821,
|
|
"learning_rate": 1.9204869698061493e-05,
|
|
"loss": 0.1698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18284443020820618,
|
|
"step": 2465,
|
|
"valid_targets_mean": 4279.1,
|
|
"valid_targets_min": 613
|
|
},
|
|
{
|
|
"epoch": 3.9393939393939394,
|
|
"grad_norm": 0.48410532927991606,
|
|
"learning_rate": 1.9125405083228833e-05,
|
|
"loss": 0.159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15718451142311096,
|
|
"step": 2470,
|
|
"valid_targets_mean": 3794.4,
|
|
"valid_targets_min": 1487
|
|
},
|
|
{
|
|
"epoch": 3.9473684210526314,
|
|
"grad_norm": 0.4321782909832125,
|
|
"learning_rate": 1.9045954299343884e-05,
|
|
"loss": 0.1543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1376398503780365,
|
|
"step": 2475,
|
|
"valid_targets_mean": 4641.6,
|
|
"valid_targets_min": 1964
|
|
},
|
|
{
|
|
"epoch": 3.955342902711324,
|
|
"grad_norm": 0.4662915810921282,
|
|
"learning_rate": 1.896651860285076e-05,
|
|
"loss": 0.179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1539989411830902,
|
|
"step": 2480,
|
|
"valid_targets_mean": 4233.6,
|
|
"valid_targets_min": 900
|
|
},
|
|
{
|
|
"epoch": 3.963317384370016,
|
|
"grad_norm": 0.44497241275747546,
|
|
"learning_rate": 1.8887099249954976e-05,
|
|
"loss": 0.1593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14902281761169434,
|
|
"step": 2485,
|
|
"valid_targets_mean": 4280.7,
|
|
"valid_targets_min": 1340
|
|
},
|
|
{
|
|
"epoch": 3.971291866028708,
|
|
"grad_norm": 0.4668559589038274,
|
|
"learning_rate": 1.8807697496603604e-05,
|
|
"loss": 0.1713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13627278804779053,
|
|
"step": 2490,
|
|
"valid_targets_mean": 3500.7,
|
|
"valid_targets_min": 1188
|
|
},
|
|
{
|
|
"epoch": 3.9792663476874,
|
|
"grad_norm": 0.470571459435835,
|
|
"learning_rate": 1.8728314598465386e-05,
|
|
"loss": 0.1644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1651896834373474,
|
|
"step": 2495,
|
|
"valid_targets_mean": 4091.2,
|
|
"valid_targets_min": 1499
|
|
},
|
|
{
|
|
"epoch": 3.9872408293460926,
|
|
"grad_norm": 0.47370861434677497,
|
|
"learning_rate": 1.8648951810910878e-05,
|
|
"loss": 0.1677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18329718708992004,
|
|
"step": 2500,
|
|
"valid_targets_mean": 3797.4,
|
|
"valid_targets_min": 1133
|
|
},
|
|
{
|
|
"epoch": 3.9952153110047846,
|
|
"grad_norm": 1.3838577054348022,
|
|
"learning_rate": 1.856961038899261e-05,
|
|
"loss": 0.1786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16169145703315735,
|
|
"step": 2505,
|
|
"valid_targets_mean": 4442.9,
|
|
"valid_targets_min": 1525
|
|
},
|
|
{
|
|
"epoch": 4.003189792663477,
|
|
"grad_norm": 0.46082837335573706,
|
|
"learning_rate": 1.849029158742525e-05,
|
|
"loss": 0.157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15621662139892578,
|
|
"step": 2510,
|
|
"valid_targets_mean": 3709.1,
|
|
"valid_targets_min": 706
|
|
},
|
|
{
|
|
"epoch": 4.011164274322169,
|
|
"grad_norm": 0.4847160070208682,
|
|
"learning_rate": 1.8410996660565714e-05,
|
|
"loss": 0.1538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13752679526805878,
|
|
"step": 2515,
|
|
"valid_targets_mean": 4029.2,
|
|
"valid_targets_min": 1982
|
|
},
|
|
{
|
|
"epoch": 4.019138755980861,
|
|
"grad_norm": 0.46606472233336216,
|
|
"learning_rate": 1.8331726862393385e-05,
|
|
"loss": 0.1603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14721858501434326,
|
|
"step": 2520,
|
|
"valid_targets_mean": 4269.6,
|
|
"valid_targets_min": 1127
|
|
},
|
|
{
|
|
"epoch": 4.027113237639553,
|
|
"grad_norm": 0.5351199747678579,
|
|
"learning_rate": 1.8252483446490245e-05,
|
|
"loss": 0.1539,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17011567950248718,
|
|
"step": 2525,
|
|
"valid_targets_mean": 3549.1,
|
|
"valid_targets_min": 351
|
|
},
|
|
{
|
|
"epoch": 4.035087719298246,
|
|
"grad_norm": 0.46306101115928566,
|
|
"learning_rate": 1.8173267666021075e-05,
|
|
"loss": 0.1515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13476046919822693,
|
|
"step": 2530,
|
|
"valid_targets_mean": 4095.9,
|
|
"valid_targets_min": 1072
|
|
},
|
|
{
|
|
"epoch": 4.043062200956938,
|
|
"grad_norm": 0.4560054745569957,
|
|
"learning_rate": 1.8094080773713615e-05,
|
|
"loss": 0.1494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1370489001274109,
|
|
"step": 2535,
|
|
"valid_targets_mean": 4567.5,
|
|
"valid_targets_min": 1548
|
|
},
|
|
{
|
|
"epoch": 4.05103668261563,
|
|
"grad_norm": 0.46559488315408987,
|
|
"learning_rate": 1.8014924021838774e-05,
|
|
"loss": 0.1542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15365129709243774,
|
|
"step": 2540,
|
|
"valid_targets_mean": 4686.8,
|
|
"valid_targets_min": 2251
|
|
},
|
|
{
|
|
"epoch": 4.059011164274322,
|
|
"grad_norm": 0.44072925582075784,
|
|
"learning_rate": 1.7935798662190798e-05,
|
|
"loss": 0.1482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.144924134016037,
|
|
"step": 2545,
|
|
"valid_targets_mean": 4126.1,
|
|
"valid_targets_min": 300
|
|
},
|
|
{
|
|
"epoch": 4.0669856459330145,
|
|
"grad_norm": 0.4567797817414844,
|
|
"learning_rate": 1.785670594606752e-05,
|
|
"loss": 0.1539,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1467863768339157,
|
|
"step": 2550,
|
|
"valid_targets_mean": 4592.9,
|
|
"valid_targets_min": 1188
|
|
},
|
|
{
|
|
"epoch": 4.074960127591707,
|
|
"grad_norm": 0.49727011728755927,
|
|
"learning_rate": 1.777764712425052e-05,
|
|
"loss": 0.1477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1593673825263977,
|
|
"step": 2555,
|
|
"valid_targets_mean": 4975.1,
|
|
"valid_targets_min": 1987
|
|
},
|
|
{
|
|
"epoch": 4.082934609250398,
|
|
"grad_norm": 0.509173154032002,
|
|
"learning_rate": 1.7698623446985378e-05,
|
|
"loss": 0.1483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14089062809944153,
|
|
"step": 2560,
|
|
"valid_targets_mean": 3507.3,
|
|
"valid_targets_min": 328
|
|
},
|
|
{
|
|
"epoch": 4.090909090909091,
|
|
"grad_norm": 0.39582620377952693,
|
|
"learning_rate": 1.7619636163961907e-05,
|
|
"loss": 0.1465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13495709002017975,
|
|
"step": 2565,
|
|
"valid_targets_mean": 5221.4,
|
|
"valid_targets_min": 2502
|
|
},
|
|
{
|
|
"epoch": 4.098883572567783,
|
|
"grad_norm": 0.53478767990482,
|
|
"learning_rate": 1.754068652429436e-05,
|
|
"loss": 0.1587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17476224899291992,
|
|
"step": 2570,
|
|
"valid_targets_mean": 3976.9,
|
|
"valid_targets_min": 1163
|
|
},
|
|
{
|
|
"epoch": 4.106858054226476,
|
|
"grad_norm": 0.4528337768210523,
|
|
"learning_rate": 1.74617757765017e-05,
|
|
"loss": 0.1537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1639062762260437,
|
|
"step": 2575,
|
|
"valid_targets_mean": 5353.8,
|
|
"valid_targets_min": 1555
|
|
},
|
|
{
|
|
"epoch": 4.114832535885167,
|
|
"grad_norm": 0.45912679039243104,
|
|
"learning_rate": 1.738290516848784e-05,
|
|
"loss": 0.1542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1560673713684082,
|
|
"step": 2580,
|
|
"valid_targets_mean": 4231.4,
|
|
"valid_targets_min": 1316
|
|
},
|
|
{
|
|
"epoch": 4.12280701754386,
|
|
"grad_norm": 0.46799829114899083,
|
|
"learning_rate": 1.730407594752194e-05,
|
|
"loss": 0.1534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13552908599376678,
|
|
"step": 2585,
|
|
"valid_targets_mean": 3899.2,
|
|
"valid_targets_min": 956
|
|
},
|
|
{
|
|
"epoch": 4.130781499202552,
|
|
"grad_norm": 0.4468301285983953,
|
|
"learning_rate": 1.722528936021864e-05,
|
|
"loss": 0.1606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14040517807006836,
|
|
"step": 2590,
|
|
"valid_targets_mean": 5062.9,
|
|
"valid_targets_min": 775
|
|
},
|
|
{
|
|
"epoch": 4.138755980861244,
|
|
"grad_norm": 0.4493283894661109,
|
|
"learning_rate": 1.714654665251837e-05,
|
|
"loss": 0.144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14356711506843567,
|
|
"step": 2595,
|
|
"valid_targets_mean": 4597.7,
|
|
"valid_targets_min": 1926
|
|
},
|
|
{
|
|
"epoch": 4.146730462519936,
|
|
"grad_norm": 0.4574396418326936,
|
|
"learning_rate": 1.7067849069667644e-05,
|
|
"loss": 0.1445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13516147434711456,
|
|
"step": 2600,
|
|
"valid_targets_mean": 4308.8,
|
|
"valid_targets_min": 962
|
|
},
|
|
{
|
|
"epoch": 4.154704944178628,
|
|
"grad_norm": 0.6156066189983693,
|
|
"learning_rate": 1.6989197856199377e-05,
|
|
"loss": 0.1386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14342327415943146,
|
|
"step": 2605,
|
|
"valid_targets_mean": 3615.1,
|
|
"valid_targets_min": 1442
|
|
},
|
|
{
|
|
"epoch": 4.162679425837321,
|
|
"grad_norm": 0.48957054085609664,
|
|
"learning_rate": 1.6910594255913177e-05,
|
|
"loss": 0.1456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14552995562553406,
|
|
"step": 2610,
|
|
"valid_targets_mean": 3894.9,
|
|
"valid_targets_min": 1266
|
|
},
|
|
{
|
|
"epoch": 4.170653907496013,
|
|
"grad_norm": 0.4637911047440085,
|
|
"learning_rate": 1.6832039511855702e-05,
|
|
"loss": 0.1377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13293996453285217,
|
|
"step": 2615,
|
|
"valid_targets_mean": 4283.1,
|
|
"valid_targets_min": 2283
|
|
},
|
|
{
|
|
"epoch": 4.178628389154705,
|
|
"grad_norm": 0.5827257403084201,
|
|
"learning_rate": 1.6753534866300983e-05,
|
|
"loss": 0.163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12907828390598297,
|
|
"step": 2620,
|
|
"valid_targets_mean": 4594.6,
|
|
"valid_targets_min": 2126
|
|
},
|
|
{
|
|
"epoch": 4.186602870813397,
|
|
"grad_norm": 0.45226433836241126,
|
|
"learning_rate": 1.6675081560730804e-05,
|
|
"loss": 0.1503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13174115121364594,
|
|
"step": 2625,
|
|
"valid_targets_mean": 4257.4,
|
|
"valid_targets_min": 1752
|
|
},
|
|
{
|
|
"epoch": 4.1945773524720895,
|
|
"grad_norm": 0.4921810305252334,
|
|
"learning_rate": 1.6596680835815024e-05,
|
|
"loss": 0.1463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13274423778057098,
|
|
"step": 2630,
|
|
"valid_targets_mean": 3713.9,
|
|
"valid_targets_min": 1321
|
|
},
|
|
{
|
|
"epoch": 4.202551834130782,
|
|
"grad_norm": 0.4348531695475078,
|
|
"learning_rate": 1.651833393139201e-05,
|
|
"loss": 0.15,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14396558701992035,
|
|
"step": 2635,
|
|
"valid_targets_mean": 4623.3,
|
|
"valid_targets_min": 319
|
|
},
|
|
{
|
|
"epoch": 4.2105263157894735,
|
|
"grad_norm": 0.4935047419367527,
|
|
"learning_rate": 1.6440042086449e-05,
|
|
"loss": 0.1543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1877938061952591,
|
|
"step": 2640,
|
|
"valid_targets_mean": 4243.1,
|
|
"valid_targets_min": 1039
|
|
},
|
|
{
|
|
"epoch": 4.218500797448166,
|
|
"grad_norm": 0.4585916262397164,
|
|
"learning_rate": 1.6361806539102508e-05,
|
|
"loss": 0.1548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1204279214143753,
|
|
"step": 2645,
|
|
"valid_targets_mean": 4467.3,
|
|
"valid_targets_min": 1347
|
|
},
|
|
{
|
|
"epoch": 4.226475279106858,
|
|
"grad_norm": 0.4398290216390354,
|
|
"learning_rate": 1.628362852657876e-05,
|
|
"loss": 0.1475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1392260044813156,
|
|
"step": 2650,
|
|
"valid_targets_mean": 5095.4,
|
|
"valid_targets_min": 2224
|
|
},
|
|
{
|
|
"epoch": 4.23444976076555,
|
|
"grad_norm": 0.41643326236787404,
|
|
"learning_rate": 1.620550928519411e-05,
|
|
"loss": 0.1504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12012036144733429,
|
|
"step": 2655,
|
|
"valid_targets_mean": 5408.5,
|
|
"valid_targets_min": 2153
|
|
},
|
|
{
|
|
"epoch": 4.242424242424242,
|
|
"grad_norm": 0.48969461908138195,
|
|
"learning_rate": 1.612745005033551e-05,
|
|
"loss": 0.1367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13264614343643188,
|
|
"step": 2660,
|
|
"valid_targets_mean": 3746.6,
|
|
"valid_targets_min": 1307
|
|
},
|
|
{
|
|
"epoch": 4.250398724082935,
|
|
"grad_norm": 0.4253994042327264,
|
|
"learning_rate": 1.6049452056440948e-05,
|
|
"loss": 0.1506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15716338157653809,
|
|
"step": 2665,
|
|
"valid_targets_mean": 5313.2,
|
|
"valid_targets_min": 2129
|
|
},
|
|
{
|
|
"epoch": 4.258373205741627,
|
|
"grad_norm": 0.4421556361223009,
|
|
"learning_rate": 1.597151653697995e-05,
|
|
"loss": 0.1546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14517253637313843,
|
|
"step": 2670,
|
|
"valid_targets_mean": 4433.8,
|
|
"valid_targets_min": 1774
|
|
},
|
|
{
|
|
"epoch": 4.266347687400319,
|
|
"grad_norm": 0.46923848272555246,
|
|
"learning_rate": 1.5893644724434066e-05,
|
|
"loss": 0.1501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1459263563156128,
|
|
"step": 2675,
|
|
"valid_targets_mean": 3924.7,
|
|
"valid_targets_min": 1040
|
|
},
|
|
{
|
|
"epoch": 4.274322169059011,
|
|
"grad_norm": 0.5390625970276209,
|
|
"learning_rate": 1.581583785027736e-05,
|
|
"loss": 0.1701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17137733101844788,
|
|
"step": 2680,
|
|
"valid_targets_mean": 3988.8,
|
|
"valid_targets_min": 1303
|
|
},
|
|
{
|
|
"epoch": 4.282296650717703,
|
|
"grad_norm": 0.46656506724335617,
|
|
"learning_rate": 1.5738097144956976e-05,
|
|
"loss": 0.154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16981856524944305,
|
|
"step": 2685,
|
|
"valid_targets_mean": 4525.6,
|
|
"valid_targets_min": 737
|
|
},
|
|
{
|
|
"epoch": 4.290271132376396,
|
|
"grad_norm": 0.47382334811310256,
|
|
"learning_rate": 1.566042383787364e-05,
|
|
"loss": 0.1544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15621820092201233,
|
|
"step": 2690,
|
|
"valid_targets_mean": 4412.1,
|
|
"valid_targets_min": 1043
|
|
},
|
|
{
|
|
"epoch": 4.298245614035087,
|
|
"grad_norm": 0.47423930222029365,
|
|
"learning_rate": 1.558281915736224e-05,
|
|
"loss": 0.1457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13917627930641174,
|
|
"step": 2695,
|
|
"valid_targets_mean": 3911.6,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 4.30622009569378,
|
|
"grad_norm": 0.47677205908607906,
|
|
"learning_rate": 1.550528433067239e-05,
|
|
"loss": 0.1414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13741689920425415,
|
|
"step": 2700,
|
|
"valid_targets_mean": 4266.3,
|
|
"valid_targets_min": 1515
|
|
},
|
|
{
|
|
"epoch": 4.314194577352472,
|
|
"grad_norm": 0.48380459123203234,
|
|
"learning_rate": 1.5427820583949054e-05,
|
|
"loss": 0.1437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12418796867132187,
|
|
"step": 2705,
|
|
"valid_targets_mean": 3508.4,
|
|
"valid_targets_min": 1393
|
|
},
|
|
{
|
|
"epoch": 4.3221690590111645,
|
|
"grad_norm": 0.4860315277331652,
|
|
"learning_rate": 1.5350429142213075e-05,
|
|
"loss": 0.1536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14992234110832214,
|
|
"step": 2710,
|
|
"valid_targets_mean": 3954.4,
|
|
"valid_targets_min": 792
|
|
},
|
|
{
|
|
"epoch": 4.330143540669856,
|
|
"grad_norm": 0.5665136589738596,
|
|
"learning_rate": 1.5273111229341907e-05,
|
|
"loss": 0.149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18508478999137878,
|
|
"step": 2715,
|
|
"valid_targets_mean": 4132.3,
|
|
"valid_targets_min": 1321
|
|
},
|
|
{
|
|
"epoch": 4.3381180223285485,
|
|
"grad_norm": 0.4978673152805232,
|
|
"learning_rate": 1.5195868068050185e-05,
|
|
"loss": 0.1555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14541754126548767,
|
|
"step": 2720,
|
|
"valid_targets_mean": 3971.9,
|
|
"valid_targets_min": 781
|
|
},
|
|
{
|
|
"epoch": 4.346092503987241,
|
|
"grad_norm": 0.4600427561356555,
|
|
"learning_rate": 1.5118700879870426e-05,
|
|
"loss": 0.15,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14078018069267273,
|
|
"step": 2725,
|
|
"valid_targets_mean": 4672.2,
|
|
"valid_targets_min": 1349
|
|
},
|
|
{
|
|
"epoch": 4.354066985645933,
|
|
"grad_norm": 0.5641369861857926,
|
|
"learning_rate": 1.5041610885133681e-05,
|
|
"loss": 0.1497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1585184782743454,
|
|
"step": 2730,
|
|
"valid_targets_mean": 4193.2,
|
|
"valid_targets_min": 1789
|
|
},
|
|
{
|
|
"epoch": 4.362041467304625,
|
|
"grad_norm": 0.5290010651891721,
|
|
"learning_rate": 1.496459930295028e-05,
|
|
"loss": 0.1416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1647966206073761,
|
|
"step": 2735,
|
|
"valid_targets_mean": 3715.3,
|
|
"valid_targets_min": 1259
|
|
},
|
|
{
|
|
"epoch": 4.370015948963317,
|
|
"grad_norm": 0.45651558819093896,
|
|
"learning_rate": 1.4887667351190508e-05,
|
|
"loss": 0.1537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15185299515724182,
|
|
"step": 2740,
|
|
"valid_targets_mean": 4561.3,
|
|
"valid_targets_min": 1794
|
|
},
|
|
{
|
|
"epoch": 4.37799043062201,
|
|
"grad_norm": 0.4424403595613681,
|
|
"learning_rate": 1.4810816246465374e-05,
|
|
"loss": 0.1658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16188275814056396,
|
|
"step": 2745,
|
|
"valid_targets_mean": 4640.9,
|
|
"valid_targets_min": 1502
|
|
},
|
|
{
|
|
"epoch": 4.385964912280702,
|
|
"grad_norm": 0.5457009532660597,
|
|
"learning_rate": 1.4734047204107358e-05,
|
|
"loss": 0.1501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15317600965499878,
|
|
"step": 2750,
|
|
"valid_targets_mean": 3320.0,
|
|
"valid_targets_min": 1283
|
|
},
|
|
{
|
|
"epoch": 4.393939393939394,
|
|
"grad_norm": 0.4754628766285464,
|
|
"learning_rate": 1.4657361438151192e-05,
|
|
"loss": 0.1389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13708147406578064,
|
|
"step": 2755,
|
|
"valid_targets_mean": 3947.8,
|
|
"valid_targets_min": 1420
|
|
},
|
|
{
|
|
"epoch": 4.401913875598086,
|
|
"grad_norm": 0.443881890517658,
|
|
"learning_rate": 1.458076016131468e-05,
|
|
"loss": 0.1496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14646129310131073,
|
|
"step": 2760,
|
|
"valid_targets_mean": 4935.9,
|
|
"valid_targets_min": 1676
|
|
},
|
|
{
|
|
"epoch": 4.409888357256778,
|
|
"grad_norm": 0.47181126917751165,
|
|
"learning_rate": 1.4504244584979494e-05,
|
|
"loss": 0.1607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15936020016670227,
|
|
"step": 2765,
|
|
"valid_targets_mean": 4039.1,
|
|
"valid_targets_min": 2112
|
|
},
|
|
{
|
|
"epoch": 4.417862838915471,
|
|
"grad_norm": 0.46430318115738506,
|
|
"learning_rate": 1.442781591917203e-05,
|
|
"loss": 0.1523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14855515956878662,
|
|
"step": 2770,
|
|
"valid_targets_mean": 4880.4,
|
|
"valid_targets_min": 1513
|
|
},
|
|
{
|
|
"epoch": 4.425837320574162,
|
|
"grad_norm": 0.4444912696895987,
|
|
"learning_rate": 1.4351475372544262e-05,
|
|
"loss": 0.1525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11446563154459,
|
|
"step": 2775,
|
|
"valid_targets_mean": 3706.1,
|
|
"valid_targets_min": 816
|
|
},
|
|
{
|
|
"epoch": 4.433811802232855,
|
|
"grad_norm": 0.49303282374079027,
|
|
"learning_rate": 1.4275224152354658e-05,
|
|
"loss": 0.1521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1683465540409088,
|
|
"step": 2780,
|
|
"valid_targets_mean": 4650.8,
|
|
"valid_targets_min": 705
|
|
},
|
|
{
|
|
"epoch": 4.441786283891547,
|
|
"grad_norm": 0.47407953449346757,
|
|
"learning_rate": 1.4199063464449047e-05,
|
|
"loss": 0.1473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1486625224351883,
|
|
"step": 2785,
|
|
"valid_targets_mean": 4090.1,
|
|
"valid_targets_min": 1078
|
|
},
|
|
{
|
|
"epoch": 4.44976076555024,
|
|
"grad_norm": 0.4626133584795092,
|
|
"learning_rate": 1.4122994513241587e-05,
|
|
"loss": 0.1643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15956081449985504,
|
|
"step": 2790,
|
|
"valid_targets_mean": 4818.5,
|
|
"valid_targets_min": 1243
|
|
},
|
|
{
|
|
"epoch": 4.457735247208931,
|
|
"grad_norm": 0.4368754340077179,
|
|
"learning_rate": 1.4047018501695677e-05,
|
|
"loss": 0.1512,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1544092297554016,
|
|
"step": 2795,
|
|
"valid_targets_mean": 4656.7,
|
|
"valid_targets_min": 1313
|
|
},
|
|
{
|
|
"epoch": 4.4657097288676235,
|
|
"grad_norm": 0.4392857155285604,
|
|
"learning_rate": 1.3971136631304978e-05,
|
|
"loss": 0.1533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15302875638008118,
|
|
"step": 2800,
|
|
"valid_targets_mean": 4749.9,
|
|
"valid_targets_min": 1655
|
|
},
|
|
{
|
|
"epoch": 4.473684210526316,
|
|
"grad_norm": 0.4966938133468106,
|
|
"learning_rate": 1.3895350102074386e-05,
|
|
"loss": 0.1569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1587035357952118,
|
|
"step": 2805,
|
|
"valid_targets_mean": 3911.9,
|
|
"valid_targets_min": 1307
|
|
},
|
|
{
|
|
"epoch": 4.481658692185008,
|
|
"grad_norm": 0.45523491343520706,
|
|
"learning_rate": 1.3819660112501054e-05,
|
|
"loss": 0.1469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1573181450366974,
|
|
"step": 2810,
|
|
"valid_targets_mean": 4757.9,
|
|
"valid_targets_min": 1251
|
|
},
|
|
{
|
|
"epoch": 4.4896331738437,
|
|
"grad_norm": 0.43034430283661584,
|
|
"learning_rate": 1.3744067859555461e-05,
|
|
"loss": 0.1634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1443377435207367,
|
|
"step": 2815,
|
|
"valid_targets_mean": 4685.4,
|
|
"valid_targets_min": 1687
|
|
},
|
|
{
|
|
"epoch": 4.497607655502392,
|
|
"grad_norm": 0.5178892326148206,
|
|
"learning_rate": 1.3668574538662451e-05,
|
|
"loss": 0.1517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15837419033050537,
|
|
"step": 2820,
|
|
"valid_targets_mean": 4160.9,
|
|
"valid_targets_min": 1050
|
|
},
|
|
{
|
|
"epoch": 4.505582137161085,
|
|
"grad_norm": 0.5117654759844766,
|
|
"learning_rate": 1.3593181343682353e-05,
|
|
"loss": 0.1609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17854346334934235,
|
|
"step": 2825,
|
|
"valid_targets_mean": 3810.8,
|
|
"valid_targets_min": 1040
|
|
},
|
|
{
|
|
"epoch": 4.513556618819777,
|
|
"grad_norm": 0.505159240220825,
|
|
"learning_rate": 1.3517889466892085e-05,
|
|
"loss": 0.1421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1388031542301178,
|
|
"step": 2830,
|
|
"valid_targets_mean": 3791.3,
|
|
"valid_targets_min": 991
|
|
},
|
|
{
|
|
"epoch": 4.521531100478469,
|
|
"grad_norm": 0.46151872838952157,
|
|
"learning_rate": 1.3442700098966326e-05,
|
|
"loss": 0.1521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14517709612846375,
|
|
"step": 2835,
|
|
"valid_targets_mean": 3671.3,
|
|
"valid_targets_min": 1389
|
|
},
|
|
{
|
|
"epoch": 4.529505582137161,
|
|
"grad_norm": 0.47243442118001044,
|
|
"learning_rate": 1.3367614428958646e-05,
|
|
"loss": 0.146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14471352100372314,
|
|
"step": 2840,
|
|
"valid_targets_mean": 4208.1,
|
|
"valid_targets_min": 380
|
|
},
|
|
{
|
|
"epoch": 4.5374800637958534,
|
|
"grad_norm": 0.45731480079278636,
|
|
"learning_rate": 1.3292633644282735e-05,
|
|
"loss": 0.1378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13014629483222961,
|
|
"step": 2845,
|
|
"valid_targets_mean": 4258.7,
|
|
"valid_targets_min": 1280
|
|
},
|
|
{
|
|
"epoch": 4.545454545454545,
|
|
"grad_norm": 0.5135556888733501,
|
|
"learning_rate": 1.3217758930693608e-05,
|
|
"loss": 0.1601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15074163675308228,
|
|
"step": 2850,
|
|
"valid_targets_mean": 4038.9,
|
|
"valid_targets_min": 692
|
|
},
|
|
{
|
|
"epoch": 4.553429027113237,
|
|
"grad_norm": 0.4685417898072257,
|
|
"learning_rate": 1.314299147226887e-05,
|
|
"loss": 0.1529,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1609649509191513,
|
|
"step": 2855,
|
|
"valid_targets_mean": 4444.9,
|
|
"valid_targets_min": 1687
|
|
},
|
|
{
|
|
"epoch": 4.56140350877193,
|
|
"grad_norm": 0.4842755523706133,
|
|
"learning_rate": 1.3068332451389969e-05,
|
|
"loss": 0.1568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16042378544807434,
|
|
"step": 2860,
|
|
"valid_targets_mean": 4721.2,
|
|
"valid_targets_min": 1884
|
|
},
|
|
{
|
|
"epoch": 4.569377990430622,
|
|
"grad_norm": 0.42561307448092917,
|
|
"learning_rate": 1.2993783048723515e-05,
|
|
"loss": 0.14,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10039359331130981,
|
|
"step": 2865,
|
|
"valid_targets_mean": 4152.1,
|
|
"valid_targets_min": 1244
|
|
},
|
|
{
|
|
"epoch": 4.577352472089315,
|
|
"grad_norm": 0.43387482081225964,
|
|
"learning_rate": 1.2919344443202602e-05,
|
|
"loss": 0.1471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14912056922912598,
|
|
"step": 2870,
|
|
"valid_targets_mean": 4900.8,
|
|
"valid_targets_min": 1911
|
|
},
|
|
{
|
|
"epoch": 4.585326953748006,
|
|
"grad_norm": 0.48056763534270636,
|
|
"learning_rate": 1.2845017812008158e-05,
|
|
"loss": 0.1632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14239303767681122,
|
|
"step": 2875,
|
|
"valid_targets_mean": 4296.3,
|
|
"valid_targets_min": 1095
|
|
},
|
|
{
|
|
"epoch": 4.5933014354066986,
|
|
"grad_norm": 0.4721750227730203,
|
|
"learning_rate": 1.277080433055034e-05,
|
|
"loss": 0.1386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14755162596702576,
|
|
"step": 2880,
|
|
"valid_targets_mean": 4274.8,
|
|
"valid_targets_min": 1414
|
|
},
|
|
{
|
|
"epoch": 4.601275917065391,
|
|
"grad_norm": 0.4905525710007373,
|
|
"learning_rate": 1.2696705172449944e-05,
|
|
"loss": 0.1547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14556947350502014,
|
|
"step": 2885,
|
|
"valid_targets_mean": 3678.1,
|
|
"valid_targets_min": 1331
|
|
},
|
|
{
|
|
"epoch": 4.6092503987240825,
|
|
"grad_norm": 0.4619945755885209,
|
|
"learning_rate": 1.2622721509519846e-05,
|
|
"loss": 0.1577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17254452407360077,
|
|
"step": 2890,
|
|
"valid_targets_mean": 4930.2,
|
|
"valid_targets_min": 1435
|
|
},
|
|
{
|
|
"epoch": 4.617224880382775,
|
|
"grad_norm": 0.5351148586758343,
|
|
"learning_rate": 1.2548854511746461e-05,
|
|
"loss": 0.1593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16579893231391907,
|
|
"step": 2895,
|
|
"valid_targets_mean": 3432.1,
|
|
"valid_targets_min": 669
|
|
},
|
|
{
|
|
"epoch": 4.625199362041467,
|
|
"grad_norm": 0.4119464808206694,
|
|
"learning_rate": 1.247510534727125e-05,
|
|
"loss": 0.1484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13362929224967957,
|
|
"step": 2900,
|
|
"valid_targets_mean": 5311.8,
|
|
"valid_targets_min": 1001
|
|
},
|
|
{
|
|
"epoch": 4.63317384370016,
|
|
"grad_norm": 0.5032968815022231,
|
|
"learning_rate": 1.240147518237224e-05,
|
|
"loss": 0.1473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1579529047012329,
|
|
"step": 2905,
|
|
"valid_targets_mean": 3713.4,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 4.641148325358852,
|
|
"grad_norm": 0.5210469622309966,
|
|
"learning_rate": 1.2327965181445593e-05,
|
|
"loss": 0.152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12542816996574402,
|
|
"step": 2910,
|
|
"valid_targets_mean": 3344.5,
|
|
"valid_targets_min": 892
|
|
},
|
|
{
|
|
"epoch": 4.649122807017544,
|
|
"grad_norm": 0.49500584853941293,
|
|
"learning_rate": 1.2254576506987182e-05,
|
|
"loss": 0.1441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12624770402908325,
|
|
"step": 2915,
|
|
"valid_targets_mean": 3989.5,
|
|
"valid_targets_min": 261
|
|
},
|
|
{
|
|
"epoch": 4.657097288676236,
|
|
"grad_norm": 0.4973415860079086,
|
|
"learning_rate": 1.21813103195742e-05,
|
|
"loss": 0.165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1785549819469452,
|
|
"step": 2920,
|
|
"valid_targets_mean": 4154.9,
|
|
"valid_targets_min": 373
|
|
},
|
|
{
|
|
"epoch": 4.6650717703349285,
|
|
"grad_norm": 0.4463321803196797,
|
|
"learning_rate": 1.2108167777846815e-05,
|
|
"loss": 0.1569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16145628690719604,
|
|
"step": 2925,
|
|
"valid_targets_mean": 4791.5,
|
|
"valid_targets_min": 967
|
|
},
|
|
{
|
|
"epoch": 4.67304625199362,
|
|
"grad_norm": 0.4329027441138823,
|
|
"learning_rate": 1.203515003848987e-05,
|
|
"loss": 0.167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15695135295391083,
|
|
"step": 2930,
|
|
"valid_targets_mean": 4929.6,
|
|
"valid_targets_min": 1646
|
|
},
|
|
{
|
|
"epoch": 4.681020733652312,
|
|
"grad_norm": 0.4682865018503108,
|
|
"learning_rate": 1.1962258256214545e-05,
|
|
"loss": 0.1504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1467132717370987,
|
|
"step": 2935,
|
|
"valid_targets_mean": 3955.1,
|
|
"valid_targets_min": 1331
|
|
},
|
|
{
|
|
"epoch": 4.688995215311005,
|
|
"grad_norm": 0.5300846226450522,
|
|
"learning_rate": 1.1889493583740124e-05,
|
|
"loss": 0.1556,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13545726239681244,
|
|
"step": 2940,
|
|
"valid_targets_mean": 3952.9,
|
|
"valid_targets_min": 1575
|
|
},
|
|
{
|
|
"epoch": 4.696969696969697,
|
|
"grad_norm": 0.4615985227476111,
|
|
"learning_rate": 1.1816857171775767e-05,
|
|
"loss": 0.1565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1520180106163025,
|
|
"step": 2945,
|
|
"valid_targets_mean": 4239.2,
|
|
"valid_targets_min": 1228
|
|
},
|
|
{
|
|
"epoch": 4.70494417862839,
|
|
"grad_norm": 0.49706930023086016,
|
|
"learning_rate": 1.1744350169002308e-05,
|
|
"loss": 0.1615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16231472790241241,
|
|
"step": 2950,
|
|
"valid_targets_mean": 3363.7,
|
|
"valid_targets_min": 1547
|
|
},
|
|
{
|
|
"epoch": 4.712918660287081,
|
|
"grad_norm": 0.4550537787741078,
|
|
"learning_rate": 1.1671973722054077e-05,
|
|
"loss": 0.1632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16848677396774292,
|
|
"step": 2955,
|
|
"valid_targets_mean": 4635.4,
|
|
"valid_targets_min": 1457
|
|
},
|
|
{
|
|
"epoch": 4.720893141945774,
|
|
"grad_norm": 0.5508955824601227,
|
|
"learning_rate": 1.159972897550079e-05,
|
|
"loss": 0.1634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19454948604106903,
|
|
"step": 2960,
|
|
"valid_targets_mean": 3949.9,
|
|
"valid_targets_min": 1091
|
|
},
|
|
{
|
|
"epoch": 4.728867623604466,
|
|
"grad_norm": 0.49080599771575706,
|
|
"learning_rate": 1.1527617071829447e-05,
|
|
"loss": 0.1528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17117540538311005,
|
|
"step": 2965,
|
|
"valid_targets_mean": 4317.9,
|
|
"valid_targets_min": 1497
|
|
},
|
|
{
|
|
"epoch": 4.7368421052631575,
|
|
"grad_norm": 0.485879218643565,
|
|
"learning_rate": 1.145563915142622e-05,
|
|
"loss": 0.1484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17503622174263,
|
|
"step": 2970,
|
|
"valid_targets_mean": 4471.3,
|
|
"valid_targets_min": 1313
|
|
},
|
|
{
|
|
"epoch": 4.74481658692185,
|
|
"grad_norm": 0.45309971660065484,
|
|
"learning_rate": 1.1383796352558489e-05,
|
|
"loss": 0.146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14421647787094116,
|
|
"step": 2975,
|
|
"valid_targets_mean": 4405.3,
|
|
"valid_targets_min": 1261
|
|
},
|
|
{
|
|
"epoch": 4.752791068580542,
|
|
"grad_norm": 0.44175010781716906,
|
|
"learning_rate": 1.1312089811356803e-05,
|
|
"loss": 0.1503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13868224620819092,
|
|
"step": 2980,
|
|
"valid_targets_mean": 4774.1,
|
|
"valid_targets_min": 1690
|
|
},
|
|
{
|
|
"epoch": 4.760765550239235,
|
|
"grad_norm": 0.5085366206165324,
|
|
"learning_rate": 1.1240520661796882e-05,
|
|
"loss": 0.1458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14815345406532288,
|
|
"step": 2985,
|
|
"valid_targets_mean": 3883.9,
|
|
"valid_targets_min": 1472
|
|
},
|
|
{
|
|
"epoch": 4.768740031897926,
|
|
"grad_norm": 0.4966512930785184,
|
|
"learning_rate": 1.1169090035681772e-05,
|
|
"loss": 0.1556,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15026727318763733,
|
|
"step": 2990,
|
|
"valid_targets_mean": 4754.1,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 4.776714513556619,
|
|
"grad_norm": 0.47136793390853915,
|
|
"learning_rate": 1.109779906262383e-05,
|
|
"loss": 0.1371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15511369705200195,
|
|
"step": 2995,
|
|
"valid_targets_mean": 4124.1,
|
|
"valid_targets_min": 1855
|
|
},
|
|
{
|
|
"epoch": 4.784688995215311,
|
|
"grad_norm": 0.4895038704563147,
|
|
"learning_rate": 1.102664887002698e-05,
|
|
"loss": 0.148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.135273277759552,
|
|
"step": 3000,
|
|
"valid_targets_mean": 3452.1,
|
|
"valid_targets_min": 881
|
|
},
|
|
{
|
|
"epoch": 4.7926634768740035,
|
|
"grad_norm": 0.4481325194334063,
|
|
"learning_rate": 1.0955640583068802e-05,
|
|
"loss": 0.1565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14666332304477692,
|
|
"step": 3005,
|
|
"valid_targets_mean": 4176.9,
|
|
"valid_targets_min": 1784
|
|
},
|
|
{
|
|
"epoch": 4.800637958532695,
|
|
"grad_norm": 0.5288323710380043,
|
|
"learning_rate": 1.0884775324682755e-05,
|
|
"loss": 0.153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16423480212688446,
|
|
"step": 3010,
|
|
"valid_targets_mean": 3525.9,
|
|
"valid_targets_min": 283
|
|
},
|
|
{
|
|
"epoch": 4.8086124401913874,
|
|
"grad_norm": 0.4207422151375285,
|
|
"learning_rate": 1.081405421554044e-05,
|
|
"loss": 0.1504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13425526022911072,
|
|
"step": 3015,
|
|
"valid_targets_mean": 5107.1,
|
|
"valid_targets_min": 1780
|
|
},
|
|
{
|
|
"epoch": 4.81658692185008,
|
|
"grad_norm": 0.5381318861985674,
|
|
"learning_rate": 1.074347837403387e-05,
|
|
"loss": 0.1495,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14620235562324524,
|
|
"step": 3020,
|
|
"valid_targets_mean": 3482.4,
|
|
"valid_targets_min": 899
|
|
},
|
|
{
|
|
"epoch": 4.824561403508772,
|
|
"grad_norm": 0.4509875650718798,
|
|
"learning_rate": 1.067304891625776e-05,
|
|
"loss": 0.1518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.155997171998024,
|
|
"step": 3025,
|
|
"valid_targets_mean": 4475.4,
|
|
"valid_targets_min": 783
|
|
},
|
|
{
|
|
"epoch": 4.832535885167464,
|
|
"grad_norm": 0.486029289131057,
|
|
"learning_rate": 1.0602766955991913e-05,
|
|
"loss": 0.1574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1824781745672226,
|
|
"step": 3030,
|
|
"valid_targets_mean": 4924.7,
|
|
"valid_targets_min": 913
|
|
},
|
|
{
|
|
"epoch": 4.840510366826156,
|
|
"grad_norm": 0.4892199181906616,
|
|
"learning_rate": 1.053263360468358e-05,
|
|
"loss": 0.1544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15650701522827148,
|
|
"step": 3035,
|
|
"valid_targets_mean": 4032.4,
|
|
"valid_targets_min": 1592
|
|
},
|
|
{
|
|
"epoch": 4.848484848484849,
|
|
"grad_norm": 0.5170321196784736,
|
|
"learning_rate": 1.0462649971429884e-05,
|
|
"loss": 0.1467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12321928888559341,
|
|
"step": 3040,
|
|
"valid_targets_mean": 4659.4,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 4.856459330143541,
|
|
"grad_norm": 0.583636712426874,
|
|
"learning_rate": 1.0392817162960304e-05,
|
|
"loss": 0.169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17830780148506165,
|
|
"step": 3045,
|
|
"valid_targets_mean": 3765.5,
|
|
"valid_targets_min": 925
|
|
},
|
|
{
|
|
"epoch": 4.8644338118022326,
|
|
"grad_norm": 0.4505666435781183,
|
|
"learning_rate": 1.0323136283619167e-05,
|
|
"loss": 0.1654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15542301535606384,
|
|
"step": 3050,
|
|
"valid_targets_mean": 4611.1,
|
|
"valid_targets_min": 1856
|
|
},
|
|
{
|
|
"epoch": 4.872408293460925,
|
|
"grad_norm": 0.40990096637164847,
|
|
"learning_rate": 1.0253608435348136e-05,
|
|
"loss": 0.1395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1208207979798317,
|
|
"step": 3055,
|
|
"valid_targets_mean": 5011.8,
|
|
"valid_targets_min": 1862
|
|
},
|
|
{
|
|
"epoch": 4.880382775119617,
|
|
"grad_norm": 0.5131477233882601,
|
|
"learning_rate": 1.0184234717668867e-05,
|
|
"loss": 0.1544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17018970847129822,
|
|
"step": 3060,
|
|
"valid_targets_mean": 3625.8,
|
|
"valid_targets_min": 991
|
|
},
|
|
{
|
|
"epoch": 4.88835725677831,
|
|
"grad_norm": 0.5164222082799219,
|
|
"learning_rate": 1.0115016227665544e-05,
|
|
"loss": 0.1661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16871359944343567,
|
|
"step": 3065,
|
|
"valid_targets_mean": 3732.2,
|
|
"valid_targets_min": 770
|
|
},
|
|
{
|
|
"epoch": 4.896331738437001,
|
|
"grad_norm": 0.5123562466545297,
|
|
"learning_rate": 1.0045954059967577e-05,
|
|
"loss": 0.1397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11625786125659943,
|
|
"step": 3070,
|
|
"valid_targets_mean": 2793.7,
|
|
"valid_targets_min": 1187
|
|
},
|
|
{
|
|
"epoch": 4.904306220095694,
|
|
"grad_norm": 0.6314287536261705,
|
|
"learning_rate": 9.977049306732287e-06,
|
|
"loss": 0.1414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1648997962474823,
|
|
"step": 3075,
|
|
"valid_targets_mean": 3762.4,
|
|
"valid_targets_min": 1301
|
|
},
|
|
{
|
|
"epoch": 4.912280701754386,
|
|
"grad_norm": 0.5233568152413559,
|
|
"learning_rate": 9.908303057627591e-06,
|
|
"loss": 0.1642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15821966528892517,
|
|
"step": 3080,
|
|
"valid_targets_mean": 3318.2,
|
|
"valid_targets_min": 1144
|
|
},
|
|
{
|
|
"epoch": 4.920255183413078,
|
|
"grad_norm": 0.49251543537219217,
|
|
"learning_rate": 9.83971639981484e-06,
|
|
"loss": 0.1416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16771940886974335,
|
|
"step": 3085,
|
|
"valid_targets_mean": 4808.2,
|
|
"valid_targets_min": 1267
|
|
},
|
|
{
|
|
"epoch": 4.92822966507177,
|
|
"grad_norm": 0.4751127690095813,
|
|
"learning_rate": 9.771290417931559e-06,
|
|
"loss": 0.1433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14133797585964203,
|
|
"step": 3090,
|
|
"valid_targets_mean": 4278.2,
|
|
"valid_targets_min": 1452
|
|
},
|
|
{
|
|
"epoch": 4.9362041467304625,
|
|
"grad_norm": 0.4237847140491428,
|
|
"learning_rate": 9.703026194074342e-06,
|
|
"loss": 0.1485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13128527998924255,
|
|
"step": 3095,
|
|
"valid_targets_mean": 4798.2,
|
|
"valid_targets_min": 1508
|
|
},
|
|
{
|
|
"epoch": 4.944178628389155,
|
|
"grad_norm": 0.4284810944082244,
|
|
"learning_rate": 9.634924807781729e-06,
|
|
"loss": 0.1525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17445242404937744,
|
|
"step": 3100,
|
|
"valid_targets_mean": 5354.4,
|
|
"valid_targets_min": 1589
|
|
},
|
|
{
|
|
"epoch": 4.952153110047847,
|
|
"grad_norm": 0.45550095094418386,
|
|
"learning_rate": 9.566987336017102e-06,
|
|
"loss": 0.1485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14677980542182922,
|
|
"step": 3105,
|
|
"valid_targets_mean": 4834.2,
|
|
"valid_targets_min": 1007
|
|
},
|
|
{
|
|
"epoch": 4.960127591706539,
|
|
"grad_norm": 0.4496028648205438,
|
|
"learning_rate": 9.499214853151699e-06,
|
|
"loss": 0.1464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14263808727264404,
|
|
"step": 3110,
|
|
"valid_targets_mean": 4590.4,
|
|
"valid_targets_min": 1931
|
|
},
|
|
{
|
|
"epoch": 4.968102073365231,
|
|
"grad_norm": 0.43090740648430836,
|
|
"learning_rate": 9.431608430947619e-06,
|
|
"loss": 0.1584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15924668312072754,
|
|
"step": 3115,
|
|
"valid_targets_mean": 4833.8,
|
|
"valid_targets_min": 2355
|
|
},
|
|
{
|
|
"epoch": 4.976076555023924,
|
|
"grad_norm": 0.441859289886078,
|
|
"learning_rate": 9.364169138540805e-06,
|
|
"loss": 0.148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13983158767223358,
|
|
"step": 3120,
|
|
"valid_targets_mean": 4533.5,
|
|
"valid_targets_min": 1651
|
|
},
|
|
{
|
|
"epoch": 4.984051036682615,
|
|
"grad_norm": 0.45465339803485433,
|
|
"learning_rate": 9.296898042424237e-06,
|
|
"loss": 0.148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14128567278385162,
|
|
"step": 3125,
|
|
"valid_targets_mean": 4593.1,
|
|
"valid_targets_min": 551
|
|
},
|
|
{
|
|
"epoch": 4.992025518341308,
|
|
"grad_norm": 0.4935688704725174,
|
|
"learning_rate": 9.229796206431015e-06,
|
|
"loss": 0.145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16522493958473206,
|
|
"step": 3130,
|
|
"valid_targets_mean": 4003.9,
|
|
"valid_targets_min": 1525
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.4822960757121396,
|
|
"learning_rate": 9.162864691717513e-06,
|
|
"loss": 0.1581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13867652416229248,
|
|
"step": 3135,
|
|
"valid_targets_mean": 4029.5,
|
|
"valid_targets_min": 889
|
|
},
|
|
{
|
|
"epoch": 5.007974481658692,
|
|
"grad_norm": 0.5629533274707689,
|
|
"learning_rate": 9.096104556746654e-06,
|
|
"loss": 0.1496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14311188459396362,
|
|
"step": 3140,
|
|
"valid_targets_mean": 3809.9,
|
|
"valid_targets_min": 750
|
|
},
|
|
{
|
|
"epoch": 5.015948963317384,
|
|
"grad_norm": 0.4330842900478362,
|
|
"learning_rate": 9.029516857271115e-06,
|
|
"loss": 0.1368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12040582299232483,
|
|
"step": 3145,
|
|
"valid_targets_mean": 4493.6,
|
|
"valid_targets_min": 1480
|
|
},
|
|
{
|
|
"epoch": 5.023923444976076,
|
|
"grad_norm": 0.4930662303885301,
|
|
"learning_rate": 8.963102646316677e-06,
|
|
"loss": 0.1437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14462250471115112,
|
|
"step": 3150,
|
|
"valid_targets_mean": 4185.9,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 5.031897926634769,
|
|
"grad_norm": 0.48366921098168475,
|
|
"learning_rate": 8.896862974165553e-06,
|
|
"loss": 0.143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.148525670170784,
|
|
"step": 3155,
|
|
"valid_targets_mean": 4491.6,
|
|
"valid_targets_min": 1287
|
|
},
|
|
{
|
|
"epoch": 5.039872408293461,
|
|
"grad_norm": 0.49454262034129004,
|
|
"learning_rate": 8.830798888339756e-06,
|
|
"loss": 0.1417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11371143162250519,
|
|
"step": 3160,
|
|
"valid_targets_mean": 3655.9,
|
|
"valid_targets_min": 616
|
|
},
|
|
{
|
|
"epoch": 5.047846889952153,
|
|
"grad_norm": 0.535921178104603,
|
|
"learning_rate": 8.764911433584581e-06,
|
|
"loss": 0.1273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12518128752708435,
|
|
"step": 3165,
|
|
"valid_targets_mean": 3578.3,
|
|
"valid_targets_min": 987
|
|
},
|
|
{
|
|
"epoch": 5.055821371610845,
|
|
"grad_norm": 0.5150703943203615,
|
|
"learning_rate": 8.699201651852056e-06,
|
|
"loss": 0.1468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16986435651779175,
|
|
"step": 3170,
|
|
"valid_targets_mean": 3954.2,
|
|
"valid_targets_min": 968
|
|
},
|
|
{
|
|
"epoch": 5.0637958532695375,
|
|
"grad_norm": 0.4140833244386174,
|
|
"learning_rate": 8.633670582284446e-06,
|
|
"loss": 0.1209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12663370370864868,
|
|
"step": 3175,
|
|
"valid_targets_mean": 4840.8,
|
|
"valid_targets_min": 2278
|
|
},
|
|
{
|
|
"epoch": 5.07177033492823,
|
|
"grad_norm": 0.9948336163286478,
|
|
"learning_rate": 8.56831926119787e-06,
|
|
"loss": 0.1447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1386774480342865,
|
|
"step": 3180,
|
|
"valid_targets_mean": 5047.3,
|
|
"valid_targets_min": 2421
|
|
},
|
|
{
|
|
"epoch": 5.0797448165869215,
|
|
"grad_norm": 0.43335227882245414,
|
|
"learning_rate": 8.503148722065851e-06,
|
|
"loss": 0.1331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14397971332073212,
|
|
"step": 3185,
|
|
"valid_targets_mean": 5075.3,
|
|
"valid_targets_min": 2055
|
|
},
|
|
{
|
|
"epoch": 5.087719298245614,
|
|
"grad_norm": 0.5496760378117667,
|
|
"learning_rate": 8.43815999550303e-06,
|
|
"loss": 0.1617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15840576589107513,
|
|
"step": 3190,
|
|
"valid_targets_mean": 3079.4,
|
|
"valid_targets_min": 611
|
|
},
|
|
{
|
|
"epoch": 5.095693779904306,
|
|
"grad_norm": 0.5423429995278034,
|
|
"learning_rate": 8.373354109248842e-06,
|
|
"loss": 0.1467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1417490839958191,
|
|
"step": 3195,
|
|
"valid_targets_mean": 3819.1,
|
|
"valid_targets_min": 704
|
|
},
|
|
{
|
|
"epoch": 5.103668261562999,
|
|
"grad_norm": 0.49255252102848646,
|
|
"learning_rate": 8.308732088151245e-06,
|
|
"loss": 0.1482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12895487248897552,
|
|
"step": 3200,
|
|
"valid_targets_mean": 3898.2,
|
|
"valid_targets_min": 1840
|
|
},
|
|
{
|
|
"epoch": 5.11164274322169,
|
|
"grad_norm": 0.5511524262494027,
|
|
"learning_rate": 8.24429495415054e-06,
|
|
"loss": 0.1516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14600226283073425,
|
|
"step": 3205,
|
|
"valid_targets_mean": 3842.6,
|
|
"valid_targets_min": 1344
|
|
},
|
|
{
|
|
"epoch": 5.119617224880383,
|
|
"grad_norm": 0.543775827614343,
|
|
"learning_rate": 8.180043726263216e-06,
|
|
"loss": 0.1389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.131940096616745,
|
|
"step": 3210,
|
|
"valid_targets_mean": 4112.2,
|
|
"valid_targets_min": 571
|
|
},
|
|
{
|
|
"epoch": 5.127591706539075,
|
|
"grad_norm": 0.4456041058741551,
|
|
"learning_rate": 8.115979420565794e-06,
|
|
"loss": 0.1456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14391078054904938,
|
|
"step": 3215,
|
|
"valid_targets_mean": 5077.2,
|
|
"valid_targets_min": 1819
|
|
},
|
|
{
|
|
"epoch": 5.1355661881977674,
|
|
"grad_norm": 0.46293425860546167,
|
|
"learning_rate": 8.052103050178806e-06,
|
|
"loss": 0.1425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14202432334423065,
|
|
"step": 3220,
|
|
"valid_targets_mean": 4340.3,
|
|
"valid_targets_min": 1345
|
|
},
|
|
{
|
|
"epoch": 5.143540669856459,
|
|
"grad_norm": 0.5090940536993411,
|
|
"learning_rate": 7.988415625250755e-06,
|
|
"loss": 0.151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14304105937480927,
|
|
"step": 3225,
|
|
"valid_targets_mean": 4960.6,
|
|
"valid_targets_min": 1495
|
|
},
|
|
{
|
|
"epoch": 5.151515151515151,
|
|
"grad_norm": 0.5101048564414876,
|
|
"learning_rate": 7.924918152942117e-06,
|
|
"loss": 0.1358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1257377415895462,
|
|
"step": 3230,
|
|
"valid_targets_mean": 4034.9,
|
|
"valid_targets_min": 1178
|
|
},
|
|
{
|
|
"epoch": 5.159489633173844,
|
|
"grad_norm": 0.5314361761865393,
|
|
"learning_rate": 7.861611637409462e-06,
|
|
"loss": 0.1389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15294279158115387,
|
|
"step": 3235,
|
|
"valid_targets_mean": 4144.6,
|
|
"valid_targets_min": 565
|
|
},
|
|
{
|
|
"epoch": 5.167464114832536,
|
|
"grad_norm": 0.49163639148809263,
|
|
"learning_rate": 7.798497079789513e-06,
|
|
"loss": 0.1433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1356612890958786,
|
|
"step": 3240,
|
|
"valid_targets_mean": 3899.4,
|
|
"valid_targets_min": 1801
|
|
},
|
|
{
|
|
"epoch": 5.175438596491228,
|
|
"grad_norm": 0.49636193358911074,
|
|
"learning_rate": 7.735575478183381e-06,
|
|
"loss": 0.1415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1626090705394745,
|
|
"step": 3245,
|
|
"valid_targets_mean": 4522.2,
|
|
"valid_targets_min": 783
|
|
},
|
|
{
|
|
"epoch": 5.18341307814992,
|
|
"grad_norm": 0.5913197549607332,
|
|
"learning_rate": 7.672847827640735e-06,
|
|
"loss": 0.1348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15321679413318634,
|
|
"step": 3250,
|
|
"valid_targets_mean": 3812.7,
|
|
"valid_targets_min": 1006
|
|
},
|
|
{
|
|
"epoch": 5.1913875598086126,
|
|
"grad_norm": 0.5168227349212295,
|
|
"learning_rate": 7.610315120144067e-06,
|
|
"loss": 0.1426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13285885751247406,
|
|
"step": 3255,
|
|
"valid_targets_mean": 3996.3,
|
|
"valid_targets_min": 1196
|
|
},
|
|
{
|
|
"epoch": 5.199362041467305,
|
|
"grad_norm": 0.523504800396223,
|
|
"learning_rate": 7.5479783445930414e-06,
|
|
"loss": 0.1482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15055041015148163,
|
|
"step": 3260,
|
|
"valid_targets_mean": 4184.9,
|
|
"valid_targets_min": 766
|
|
},
|
|
{
|
|
"epoch": 5.2073365231259965,
|
|
"grad_norm": 0.5049506088221044,
|
|
"learning_rate": 7.485838486788803e-06,
|
|
"loss": 0.1356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13988947868347168,
|
|
"step": 3265,
|
|
"valid_targets_mean": 3776.5,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 5.215311004784689,
|
|
"grad_norm": 0.48615345373651536,
|
|
"learning_rate": 7.4238965294184374e-06,
|
|
"loss": 0.1373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15568292140960693,
|
|
"step": 3270,
|
|
"valid_targets_mean": 5016.6,
|
|
"valid_targets_min": 2019
|
|
},
|
|
{
|
|
"epoch": 5.223285486443381,
|
|
"grad_norm": 0.4595944961175771,
|
|
"learning_rate": 7.362153452039409e-06,
|
|
"loss": 0.13,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13161596655845642,
|
|
"step": 3275,
|
|
"valid_targets_mean": 4705.2,
|
|
"valid_targets_min": 2035
|
|
},
|
|
{
|
|
"epoch": 5.231259968102074,
|
|
"grad_norm": 0.48959225632425923,
|
|
"learning_rate": 7.300610231064056e-06,
|
|
"loss": 0.1337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12434960901737213,
|
|
"step": 3280,
|
|
"valid_targets_mean": 5278.2,
|
|
"valid_targets_min": 1197
|
|
},
|
|
{
|
|
"epoch": 5.239234449760765,
|
|
"grad_norm": 0.5756971048505636,
|
|
"learning_rate": 7.239267839744166e-06,
|
|
"loss": 0.147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1595425009727478,
|
|
"step": 3285,
|
|
"valid_targets_mean": 3223.8,
|
|
"valid_targets_min": 674
|
|
},
|
|
{
|
|
"epoch": 5.247208931419458,
|
|
"grad_norm": 0.4306316009209797,
|
|
"learning_rate": 7.178127248155604e-06,
|
|
"loss": 0.1458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13886387646198273,
|
|
"step": 3290,
|
|
"valid_targets_mean": 4823.2,
|
|
"valid_targets_min": 1260
|
|
},
|
|
{
|
|
"epoch": 5.25518341307815,
|
|
"grad_norm": 0.49807166216076687,
|
|
"learning_rate": 7.117189423182917e-06,
|
|
"loss": 0.1409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13178473711013794,
|
|
"step": 3295,
|
|
"valid_targets_mean": 3933.4,
|
|
"valid_targets_min": 1109
|
|
},
|
|
{
|
|
"epoch": 5.2631578947368425,
|
|
"grad_norm": 0.4751606381127374,
|
|
"learning_rate": 7.056455328504104e-06,
|
|
"loss": 0.1438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1461402326822281,
|
|
"step": 3300,
|
|
"valid_targets_mean": 4372.4,
|
|
"valid_targets_min": 1998
|
|
},
|
|
{
|
|
"epoch": 5.271132376395534,
|
|
"grad_norm": 0.4680876821657924,
|
|
"learning_rate": 6.995925924575342e-06,
|
|
"loss": 0.1487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1421845555305481,
|
|
"step": 3305,
|
|
"valid_targets_mean": 4742.0,
|
|
"valid_targets_min": 1897
|
|
},
|
|
{
|
|
"epoch": 5.279106858054226,
|
|
"grad_norm": 0.5112017713409847,
|
|
"learning_rate": 6.935602168615792e-06,
|
|
"loss": 0.1397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14893358945846558,
|
|
"step": 3310,
|
|
"valid_targets_mean": 4514.3,
|
|
"valid_targets_min": 1372
|
|
},
|
|
{
|
|
"epoch": 5.287081339712919,
|
|
"grad_norm": 0.42805032421320105,
|
|
"learning_rate": 6.875485014592493e-06,
|
|
"loss": 0.1448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1472356915473938,
|
|
"step": 3315,
|
|
"valid_targets_mean": 5231.3,
|
|
"valid_targets_min": 2223
|
|
},
|
|
{
|
|
"epoch": 5.295055821371611,
|
|
"grad_norm": 0.5372630838896657,
|
|
"learning_rate": 6.815575413205235e-06,
|
|
"loss": 0.1295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1298196017742157,
|
|
"step": 3320,
|
|
"valid_targets_mean": 3285.2,
|
|
"valid_targets_min": 1582
|
|
},
|
|
{
|
|
"epoch": 5.303030303030303,
|
|
"grad_norm": 0.4664181130969157,
|
|
"learning_rate": 6.755874311871562e-06,
|
|
"loss": 0.134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12359993159770966,
|
|
"step": 3325,
|
|
"valid_targets_mean": 4089.0,
|
|
"valid_targets_min": 1303
|
|
},
|
|
{
|
|
"epoch": 5.311004784688995,
|
|
"grad_norm": 0.5642982255750836,
|
|
"learning_rate": 6.696382654711777e-06,
|
|
"loss": 0.1282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12850235402584076,
|
|
"step": 3330,
|
|
"valid_targets_mean": 4628.2,
|
|
"valid_targets_min": 1422
|
|
},
|
|
{
|
|
"epoch": 5.318979266347688,
|
|
"grad_norm": 0.5301351417801308,
|
|
"learning_rate": 6.637101382533986e-06,
|
|
"loss": 0.1484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16986438632011414,
|
|
"step": 3335,
|
|
"valid_targets_mean": 4231.4,
|
|
"valid_targets_min": 1338
|
|
},
|
|
{
|
|
"epoch": 5.32695374800638,
|
|
"grad_norm": 0.45861433728037887,
|
|
"learning_rate": 6.578031432819263e-06,
|
|
"loss": 0.1294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12941965460777283,
|
|
"step": 3340,
|
|
"valid_targets_mean": 4545.4,
|
|
"valid_targets_min": 1492
|
|
},
|
|
{
|
|
"epoch": 5.3349282296650715,
|
|
"grad_norm": 0.508031461041961,
|
|
"learning_rate": 6.5191737397068015e-06,
|
|
"loss": 0.145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14651808142662048,
|
|
"step": 3345,
|
|
"valid_targets_mean": 4734.7,
|
|
"valid_targets_min": 1218
|
|
},
|
|
{
|
|
"epoch": 5.342902711323764,
|
|
"grad_norm": 0.4477976484414104,
|
|
"learning_rate": 6.460529233979127e-06,
|
|
"loss": 0.1492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1280069351196289,
|
|
"step": 3350,
|
|
"valid_targets_mean": 5043.2,
|
|
"valid_targets_min": 2100
|
|
},
|
|
{
|
|
"epoch": 5.350877192982456,
|
|
"grad_norm": 0.47644444515766377,
|
|
"learning_rate": 6.402098843047417e-06,
|
|
"loss": 0.1485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16699884831905365,
|
|
"step": 3355,
|
|
"valid_targets_mean": 5008.2,
|
|
"valid_targets_min": 1257
|
|
},
|
|
{
|
|
"epoch": 5.358851674641148,
|
|
"grad_norm": 0.6308081755591421,
|
|
"learning_rate": 6.343883490936791e-06,
|
|
"loss": 0.1412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1526477336883545,
|
|
"step": 3360,
|
|
"valid_targets_mean": 3804.2,
|
|
"valid_targets_min": 688
|
|
},
|
|
{
|
|
"epoch": 5.36682615629984,
|
|
"grad_norm": 0.5114865224679561,
|
|
"learning_rate": 6.285884098271739e-06,
|
|
"loss": 0.1317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13057807087898254,
|
|
"step": 3365,
|
|
"valid_targets_mean": 4165.1,
|
|
"valid_targets_min": 1079
|
|
},
|
|
{
|
|
"epoch": 5.374800637958533,
|
|
"grad_norm": 0.5606726011810421,
|
|
"learning_rate": 6.228101582261532e-06,
|
|
"loss": 0.1457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15006095170974731,
|
|
"step": 3370,
|
|
"valid_targets_mean": 3717.6,
|
|
"valid_targets_min": 462
|
|
},
|
|
{
|
|
"epoch": 5.382775119617225,
|
|
"grad_norm": 0.43888395141516984,
|
|
"learning_rate": 6.170536856685716e-06,
|
|
"loss": 0.1418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13810545206069946,
|
|
"step": 3375,
|
|
"valid_targets_mean": 5302.4,
|
|
"valid_targets_min": 1564
|
|
},
|
|
{
|
|
"epoch": 5.3907496012759175,
|
|
"grad_norm": 0.5396965703768997,
|
|
"learning_rate": 6.113190831879698e-06,
|
|
"loss": 0.1423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14851754903793335,
|
|
"step": 3380,
|
|
"valid_targets_mean": 3936.1,
|
|
"valid_targets_min": 921
|
|
},
|
|
{
|
|
"epoch": 5.398724082934609,
|
|
"grad_norm": 0.5018908784337385,
|
|
"learning_rate": 6.056064414720317e-06,
|
|
"loss": 0.1375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13717985153198242,
|
|
"step": 3385,
|
|
"valid_targets_mean": 4414.8,
|
|
"valid_targets_min": 1687
|
|
},
|
|
{
|
|
"epoch": 5.4066985645933014,
|
|
"grad_norm": 0.5356484696276723,
|
|
"learning_rate": 5.999158508611496e-06,
|
|
"loss": 0.1373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15134429931640625,
|
|
"step": 3390,
|
|
"valid_targets_mean": 3617.5,
|
|
"valid_targets_min": 731
|
|
},
|
|
{
|
|
"epoch": 5.414673046251994,
|
|
"grad_norm": 0.4492827217308216,
|
|
"learning_rate": 5.942474013469983e-06,
|
|
"loss": 0.1595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15601211786270142,
|
|
"step": 3395,
|
|
"valid_targets_mean": 5470.9,
|
|
"valid_targets_min": 1856
|
|
},
|
|
{
|
|
"epoch": 5.422647527910685,
|
|
"grad_norm": 0.4687500001817781,
|
|
"learning_rate": 5.886011825711117e-06,
|
|
"loss": 0.1409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14672626554965973,
|
|
"step": 3400,
|
|
"valid_targets_mean": 4285.2,
|
|
"valid_targets_min": 1753
|
|
},
|
|
{
|
|
"epoch": 5.430622009569378,
|
|
"grad_norm": 0.49634514895738063,
|
|
"learning_rate": 5.829772838234615e-06,
|
|
"loss": 0.1435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14952832460403442,
|
|
"step": 3405,
|
|
"valid_targets_mean": 4263.7,
|
|
"valid_targets_min": 1204
|
|
},
|
|
{
|
|
"epoch": 5.43859649122807,
|
|
"grad_norm": 0.4764017325935456,
|
|
"learning_rate": 5.773757940410503e-06,
|
|
"loss": 0.1453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16976264119148254,
|
|
"step": 3410,
|
|
"valid_targets_mean": 5094.1,
|
|
"valid_targets_min": 1110
|
|
},
|
|
{
|
|
"epoch": 5.446570972886763,
|
|
"grad_norm": 0.5451805389107484,
|
|
"learning_rate": 5.7179680180650055e-06,
|
|
"loss": 0.1438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13701461255550385,
|
|
"step": 3415,
|
|
"valid_targets_mean": 4419.2,
|
|
"valid_targets_min": 1493
|
|
},
|
|
{
|
|
"epoch": 5.454545454545454,
|
|
"grad_norm": 0.5077507052768231,
|
|
"learning_rate": 5.6624039534665775e-06,
|
|
"loss": 0.1432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17690125107765198,
|
|
"step": 3420,
|
|
"valid_targets_mean": 4066.4,
|
|
"valid_targets_min": 1484
|
|
},
|
|
{
|
|
"epoch": 5.4625199362041466,
|
|
"grad_norm": 0.5081548100092786,
|
|
"learning_rate": 5.607066625311925e-06,
|
|
"loss": 0.1356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14744240045547485,
|
|
"step": 3425,
|
|
"valid_targets_mean": 4529.1,
|
|
"valid_targets_min": 1501
|
|
},
|
|
{
|
|
"epoch": 5.470494417862839,
|
|
"grad_norm": 0.49390796533489184,
|
|
"learning_rate": 5.55195690871211e-06,
|
|
"loss": 0.1426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13167548179626465,
|
|
"step": 3430,
|
|
"valid_targets_mean": 4310.8,
|
|
"valid_targets_min": 1165
|
|
},
|
|
{
|
|
"epoch": 5.478468899521531,
|
|
"grad_norm": 0.505090424739089,
|
|
"learning_rate": 5.497075675178727e-06,
|
|
"loss": 0.1505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14970368146896362,
|
|
"step": 3435,
|
|
"valid_targets_mean": 4268.6,
|
|
"valid_targets_min": 1688
|
|
},
|
|
{
|
|
"epoch": 5.486443381180223,
|
|
"grad_norm": 0.4731977276503727,
|
|
"learning_rate": 5.442423792610118e-06,
|
|
"loss": 0.1404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1303643435239792,
|
|
"step": 3440,
|
|
"valid_targets_mean": 4338.5,
|
|
"valid_targets_min": 1205
|
|
},
|
|
{
|
|
"epoch": 5.494417862838915,
|
|
"grad_norm": 0.5413010986244872,
|
|
"learning_rate": 5.388002125277627e-06,
|
|
"loss": 0.1576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15297143161296844,
|
|
"step": 3445,
|
|
"valid_targets_mean": 4006.3,
|
|
"valid_targets_min": 759
|
|
},
|
|
{
|
|
"epoch": 5.502392344497608,
|
|
"grad_norm": 0.5815598360093224,
|
|
"learning_rate": 5.333811533811945e-06,
|
|
"loss": 0.1375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12038137763738632,
|
|
"step": 3450,
|
|
"valid_targets_mean": 3107.1,
|
|
"valid_targets_min": 1142
|
|
},
|
|
{
|
|
"epoch": 5.5103668261563,
|
|
"grad_norm": 0.44942851109167925,
|
|
"learning_rate": 5.2798528751895265e-06,
|
|
"loss": 0.1342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10756224393844604,
|
|
"step": 3455,
|
|
"valid_targets_mean": 5097.1,
|
|
"valid_targets_min": 1072
|
|
},
|
|
{
|
|
"epoch": 5.518341307814992,
|
|
"grad_norm": 0.5038096888918236,
|
|
"learning_rate": 5.226127002718984e-06,
|
|
"loss": 0.1346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15056130290031433,
|
|
"step": 3460,
|
|
"valid_targets_mean": 4270.6,
|
|
"valid_targets_min": 993
|
|
},
|
|
{
|
|
"epoch": 5.526315789473684,
|
|
"grad_norm": 0.6207114640675514,
|
|
"learning_rate": 5.1726347660276424e-06,
|
|
"loss": 0.153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13879022002220154,
|
|
"step": 3465,
|
|
"valid_targets_mean": 3986.3,
|
|
"valid_targets_min": 1335
|
|
},
|
|
{
|
|
"epoch": 5.5342902711323765,
|
|
"grad_norm": 0.5465276778259095,
|
|
"learning_rate": 5.119377011048066e-06,
|
|
"loss": 0.1327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12108422070741653,
|
|
"step": 3470,
|
|
"valid_targets_mean": 3316.1,
|
|
"valid_targets_min": 1316
|
|
},
|
|
{
|
|
"epoch": 5.542264752791069,
|
|
"grad_norm": 0.471858306402603,
|
|
"learning_rate": 5.066354580004713e-06,
|
|
"loss": 0.143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15028458833694458,
|
|
"step": 3475,
|
|
"valid_targets_mean": 4637.7,
|
|
"valid_targets_min": 582
|
|
},
|
|
{
|
|
"epoch": 5.55023923444976,
|
|
"grad_norm": 0.9782002480685525,
|
|
"learning_rate": 5.013568311400599e-06,
|
|
"loss": 0.1392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14348694682121277,
|
|
"step": 3480,
|
|
"valid_targets_mean": 4303.2,
|
|
"valid_targets_min": 1455
|
|
},
|
|
{
|
|
"epoch": 5.558213716108453,
|
|
"grad_norm": 0.5354460996757844,
|
|
"learning_rate": 4.96101904000402e-06,
|
|
"loss": 0.1392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11374548077583313,
|
|
"step": 3485,
|
|
"valid_targets_mean": 3311.1,
|
|
"valid_targets_min": 510
|
|
},
|
|
{
|
|
"epoch": 5.566188197767145,
|
|
"grad_norm": 0.5279583848653548,
|
|
"learning_rate": 4.908707596835396e-06,
|
|
"loss": 0.1419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1552698016166687,
|
|
"step": 3490,
|
|
"valid_targets_mean": 4141.5,
|
|
"valid_targets_min": 705
|
|
},
|
|
{
|
|
"epoch": 5.574162679425838,
|
|
"grad_norm": 0.48268266670945026,
|
|
"learning_rate": 4.856634809154093e-06,
|
|
"loss": 0.1406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15394705533981323,
|
|
"step": 3495,
|
|
"valid_targets_mean": 4594.0,
|
|
"valid_targets_min": 984
|
|
},
|
|
{
|
|
"epoch": 5.582137161084529,
|
|
"grad_norm": 0.5116805436729173,
|
|
"learning_rate": 4.804801500445338e-06,
|
|
"loss": 0.1512,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.147267684340477,
|
|
"step": 3500,
|
|
"valid_targets_mean": 4635.1,
|
|
"valid_targets_min": 548
|
|
},
|
|
{
|
|
"epoch": 5.590111642743222,
|
|
"grad_norm": 0.6240842377600343,
|
|
"learning_rate": 4.753208490407233e-06,
|
|
"loss": 0.1359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13472461700439453,
|
|
"step": 3505,
|
|
"valid_targets_mean": 3050.1,
|
|
"valid_targets_min": 808
|
|
},
|
|
{
|
|
"epoch": 5.598086124401914,
|
|
"grad_norm": 0.5394738635196873,
|
|
"learning_rate": 4.701856594937744e-06,
|
|
"loss": 0.1423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15325218439102173,
|
|
"step": 3510,
|
|
"valid_targets_mean": 3656.1,
|
|
"valid_targets_min": 637
|
|
},
|
|
{
|
|
"epoch": 5.606060606060606,
|
|
"grad_norm": 0.5617309379547368,
|
|
"learning_rate": 4.650746626121838e-06,
|
|
"loss": 0.1465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13995513319969177,
|
|
"step": 3515,
|
|
"valid_targets_mean": 3566.6,
|
|
"valid_targets_min": 1175
|
|
},
|
|
{
|
|
"epoch": 5.614035087719298,
|
|
"grad_norm": 0.4817253860253568,
|
|
"learning_rate": 4.5998793922186315e-06,
|
|
"loss": 0.1424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13665953278541565,
|
|
"step": 3520,
|
|
"valid_targets_mean": 4073.0,
|
|
"valid_targets_min": 939
|
|
},
|
|
{
|
|
"epoch": 5.62200956937799,
|
|
"grad_norm": 0.5068331856123081,
|
|
"learning_rate": 4.549255697648576e-06,
|
|
"loss": 0.1421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14600390195846558,
|
|
"step": 3525,
|
|
"valid_targets_mean": 4159.8,
|
|
"valid_targets_min": 1927
|
|
},
|
|
{
|
|
"epoch": 5.629984051036683,
|
|
"grad_norm": 0.5126279999344675,
|
|
"learning_rate": 4.498876342980796e-06,
|
|
"loss": 0.1512,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1622035801410675,
|
|
"step": 3530,
|
|
"valid_targets_mean": 4443.4,
|
|
"valid_targets_min": 1143
|
|
},
|
|
{
|
|
"epoch": 5.637958532695375,
|
|
"grad_norm": 0.5225403357427435,
|
|
"learning_rate": 4.448742124920368e-06,
|
|
"loss": 0.1342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13578708469867706,
|
|
"step": 3535,
|
|
"valid_targets_mean": 3949.8,
|
|
"valid_targets_min": 374
|
|
},
|
|
{
|
|
"epoch": 5.645933014354067,
|
|
"grad_norm": 0.4722609222695677,
|
|
"learning_rate": 4.39885383629576e-06,
|
|
"loss": 0.1493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14870738983154297,
|
|
"step": 3540,
|
|
"valid_targets_mean": 4374.1,
|
|
"valid_targets_min": 1987
|
|
},
|
|
{
|
|
"epoch": 5.653907496012759,
|
|
"grad_norm": 0.48006247324563467,
|
|
"learning_rate": 4.349212266046285e-06,
|
|
"loss": 0.1289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11711093783378601,
|
|
"step": 3545,
|
|
"valid_targets_mean": 4232.9,
|
|
"valid_targets_min": 1193
|
|
},
|
|
{
|
|
"epoch": 5.6618819776714515,
|
|
"grad_norm": 0.46577581261332457,
|
|
"learning_rate": 4.299818199209629e-06,
|
|
"loss": 0.1288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12458941340446472,
|
|
"step": 3550,
|
|
"valid_targets_mean": 4339.9,
|
|
"valid_targets_min": 1158
|
|
},
|
|
{
|
|
"epoch": 5.669856459330144,
|
|
"grad_norm": 0.5239144196312671,
|
|
"learning_rate": 4.250672416909407e-06,
|
|
"loss": 0.1373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12059792876243591,
|
|
"step": 3555,
|
|
"valid_targets_mean": 3512.6,
|
|
"valid_targets_min": 1232
|
|
},
|
|
{
|
|
"epoch": 5.6778309409888355,
|
|
"grad_norm": 0.5097733128579093,
|
|
"learning_rate": 4.201775696342862e-06,
|
|
"loss": 0.1612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15765796601772308,
|
|
"step": 3560,
|
|
"valid_targets_mean": 4620.9,
|
|
"valid_targets_min": 1637
|
|
},
|
|
{
|
|
"epoch": 5.685805422647528,
|
|
"grad_norm": 0.42033651044246023,
|
|
"learning_rate": 4.153128810768517e-06,
|
|
"loss": 0.1387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13111995160579681,
|
|
"step": 3565,
|
|
"valid_targets_mean": 4884.5,
|
|
"valid_targets_min": 1707
|
|
},
|
|
{
|
|
"epoch": 5.69377990430622,
|
|
"grad_norm": 0.4922074424397772,
|
|
"learning_rate": 4.104732529493991e-06,
|
|
"loss": 0.1413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13891097903251648,
|
|
"step": 3570,
|
|
"valid_targets_mean": 4164.4,
|
|
"valid_targets_min": 1373
|
|
},
|
|
{
|
|
"epoch": 5.701754385964913,
|
|
"grad_norm": 0.45171890910506723,
|
|
"learning_rate": 4.056587617863825e-06,
|
|
"loss": 0.1431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13288962841033936,
|
|
"step": 3575,
|
|
"valid_targets_mean": 4949.0,
|
|
"valid_targets_min": 1949
|
|
},
|
|
{
|
|
"epoch": 5.709728867623604,
|
|
"grad_norm": 0.8054658993692118,
|
|
"learning_rate": 4.008694837247345e-06,
|
|
"loss": 0.1482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15030181407928467,
|
|
"step": 3580,
|
|
"valid_targets_mean": 3371.1,
|
|
"valid_targets_min": 708
|
|
},
|
|
{
|
|
"epoch": 5.717703349282297,
|
|
"grad_norm": 0.4633402688895893,
|
|
"learning_rate": 3.961054945026674e-06,
|
|
"loss": 0.1329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.118366539478302,
|
|
"step": 3585,
|
|
"valid_targets_mean": 4758.7,
|
|
"valid_targets_min": 1103
|
|
},
|
|
{
|
|
"epoch": 5.725677830940989,
|
|
"grad_norm": 0.5133518729532466,
|
|
"learning_rate": 3.913668694584705e-06,
|
|
"loss": 0.1409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14707781374454498,
|
|
"step": 3590,
|
|
"valid_targets_mean": 4438.4,
|
|
"valid_targets_min": 1911
|
|
},
|
|
{
|
|
"epoch": 5.733652312599681,
|
|
"grad_norm": 0.48767101612168134,
|
|
"learning_rate": 3.866536835293227e-06,
|
|
"loss": 0.1433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13892671465873718,
|
|
"step": 3595,
|
|
"valid_targets_mean": 4381.1,
|
|
"valid_targets_min": 1280
|
|
},
|
|
{
|
|
"epoch": 5.741626794258373,
|
|
"grad_norm": 0.6092459675465707,
|
|
"learning_rate": 3.819660112501053e-06,
|
|
"loss": 0.1445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13951140642166138,
|
|
"step": 3600,
|
|
"valid_targets_mean": 3673.3,
|
|
"valid_targets_min": 892
|
|
},
|
|
{
|
|
"epoch": 5.749601275917065,
|
|
"grad_norm": 0.5600018176954344,
|
|
"learning_rate": 3.773039267522227e-06,
|
|
"loss": 0.1444,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14764851331710815,
|
|
"step": 3605,
|
|
"valid_targets_mean": 3343.8,
|
|
"valid_targets_min": 964
|
|
},
|
|
{
|
|
"epoch": 5.757575757575758,
|
|
"grad_norm": 0.5467638248935707,
|
|
"learning_rate": 3.72667503762433e-06,
|
|
"loss": 0.148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15274560451507568,
|
|
"step": 3610,
|
|
"valid_targets_mean": 4381.1,
|
|
"valid_targets_min": 1085
|
|
},
|
|
{
|
|
"epoch": 5.76555023923445,
|
|
"grad_norm": 0.5261084070830147,
|
|
"learning_rate": 3.680568156016786e-06,
|
|
"loss": 0.1488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.148003488779068,
|
|
"step": 3615,
|
|
"valid_targets_mean": 4397.2,
|
|
"valid_targets_min": 899
|
|
},
|
|
{
|
|
"epoch": 5.773524720893142,
|
|
"grad_norm": 0.48898573629669023,
|
|
"learning_rate": 3.6347193518392776e-06,
|
|
"loss": 0.1392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1515229344367981,
|
|
"step": 3620,
|
|
"valid_targets_mean": 4443.4,
|
|
"valid_targets_min": 881
|
|
},
|
|
{
|
|
"epoch": 5.781499202551834,
|
|
"grad_norm": 0.4955784004050343,
|
|
"learning_rate": 3.58912935015024e-06,
|
|
"loss": 0.1446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16365936398506165,
|
|
"step": 3625,
|
|
"valid_targets_mean": 4447.0,
|
|
"valid_targets_min": 1312
|
|
},
|
|
{
|
|
"epoch": 5.7894736842105265,
|
|
"grad_norm": 0.5354224851103092,
|
|
"learning_rate": 3.543798871915367e-06,
|
|
"loss": 0.1386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12071128189563751,
|
|
"step": 3630,
|
|
"valid_targets_mean": 4032.9,
|
|
"valid_targets_min": 1091
|
|
},
|
|
{
|
|
"epoch": 5.797448165869218,
|
|
"grad_norm": 0.43982909891623767,
|
|
"learning_rate": 3.498728633996209e-06,
|
|
"loss": 0.1347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.130459725856781,
|
|
"step": 3635,
|
|
"valid_targets_mean": 4732.8,
|
|
"valid_targets_min": 1956
|
|
},
|
|
{
|
|
"epoch": 5.8054226475279105,
|
|
"grad_norm": 0.4662791358864798,
|
|
"learning_rate": 3.453919349138859e-06,
|
|
"loss": 0.1352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14181789755821228,
|
|
"step": 3640,
|
|
"valid_targets_mean": 4711.2,
|
|
"valid_targets_min": 960
|
|
},
|
|
{
|
|
"epoch": 5.813397129186603,
|
|
"grad_norm": 0.429871868067878,
|
|
"learning_rate": 3.4093717259626514e-06,
|
|
"loss": 0.1408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11633894592523575,
|
|
"step": 3645,
|
|
"valid_targets_mean": 4756.2,
|
|
"valid_targets_min": 2486
|
|
},
|
|
{
|
|
"epoch": 5.821371610845295,
|
|
"grad_norm": 0.508122727926778,
|
|
"learning_rate": 3.365086468948988e-06,
|
|
"loss": 0.1353,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15025562047958374,
|
|
"step": 3650,
|
|
"valid_targets_mean": 4223.5,
|
|
"valid_targets_min": 1059
|
|
},
|
|
{
|
|
"epoch": 5.829346092503988,
|
|
"grad_norm": 0.44098513678875584,
|
|
"learning_rate": 3.321064278430175e-06,
|
|
"loss": 0.143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1253623068332672,
|
|
"step": 3655,
|
|
"valid_targets_mean": 4567.9,
|
|
"valid_targets_min": 1433
|
|
},
|
|
{
|
|
"epoch": 5.837320574162679,
|
|
"grad_norm": 0.4797706599882918,
|
|
"learning_rate": 3.277305850578345e-06,
|
|
"loss": 0.1455,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14150691032409668,
|
|
"step": 3660,
|
|
"valid_targets_mean": 4465.7,
|
|
"valid_targets_min": 1142
|
|
},
|
|
{
|
|
"epoch": 5.845295055821372,
|
|
"grad_norm": 0.5254674993378927,
|
|
"learning_rate": 3.2338118773944684e-06,
|
|
"loss": 0.1478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15104851126670837,
|
|
"step": 3665,
|
|
"valid_targets_mean": 3567.0,
|
|
"valid_targets_min": 991
|
|
},
|
|
{
|
|
"epoch": 5.853269537480064,
|
|
"grad_norm": 0.7616401268290005,
|
|
"learning_rate": 3.1905830466973975e-06,
|
|
"loss": 0.1399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13616244494915009,
|
|
"step": 3670,
|
|
"valid_targets_mean": 4769.1,
|
|
"valid_targets_min": 1120
|
|
},
|
|
{
|
|
"epoch": 5.861244019138756,
|
|
"grad_norm": 0.5902406634650262,
|
|
"learning_rate": 3.14762004211298e-06,
|
|
"loss": 0.1568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16810569167137146,
|
|
"step": 3675,
|
|
"valid_targets_mean": 3292.2,
|
|
"valid_targets_min": 1331
|
|
},
|
|
{
|
|
"epoch": 5.869218500797448,
|
|
"grad_norm": 0.45117184562989077,
|
|
"learning_rate": 3.1049235430632696e-06,
|
|
"loss": 0.1434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13781927525997162,
|
|
"step": 3680,
|
|
"valid_targets_mean": 4636.6,
|
|
"valid_targets_min": 409
|
|
},
|
|
{
|
|
"epoch": 5.87719298245614,
|
|
"grad_norm": 0.5252616776971513,
|
|
"learning_rate": 3.062494224755759e-06,
|
|
"loss": 0.1283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12710627913475037,
|
|
"step": 3685,
|
|
"valid_targets_mean": 3951.2,
|
|
"valid_targets_min": 293
|
|
},
|
|
{
|
|
"epoch": 5.885167464114833,
|
|
"grad_norm": 0.5010548890650447,
|
|
"learning_rate": 3.0203327581727195e-06,
|
|
"loss": 0.1478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1391611397266388,
|
|
"step": 3690,
|
|
"valid_targets_mean": 4292.1,
|
|
"valid_targets_min": 977
|
|
},
|
|
{
|
|
"epoch": 5.893141945773524,
|
|
"grad_norm": 0.4882128606185147,
|
|
"learning_rate": 2.9784398100605937e-06,
|
|
"loss": 0.1495,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1623266339302063,
|
|
"step": 3695,
|
|
"valid_targets_mean": 5119.4,
|
|
"valid_targets_min": 564
|
|
},
|
|
{
|
|
"epoch": 5.901116427432217,
|
|
"grad_norm": 0.4602714959022085,
|
|
"learning_rate": 2.9368160429194127e-06,
|
|
"loss": 0.142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1505228728055954,
|
|
"step": 3700,
|
|
"valid_targets_mean": 4960.1,
|
|
"valid_targets_min": 956
|
|
},
|
|
{
|
|
"epoch": 5.909090909090909,
|
|
"grad_norm": 0.4898614391622105,
|
|
"learning_rate": 2.895462114992371e-06,
|
|
"loss": 0.1324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11824354529380798,
|
|
"step": 3705,
|
|
"valid_targets_mean": 3324.9,
|
|
"valid_targets_min": 2205
|
|
},
|
|
{
|
|
"epoch": 5.917065390749602,
|
|
"grad_norm": 0.5081298996402158,
|
|
"learning_rate": 2.8543786802553943e-06,
|
|
"loss": 0.1336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13177210092544556,
|
|
"step": 3710,
|
|
"valid_targets_mean": 3510.8,
|
|
"valid_targets_min": 924
|
|
},
|
|
{
|
|
"epoch": 5.925039872408293,
|
|
"grad_norm": 0.5087024088955797,
|
|
"learning_rate": 2.813566388406781e-06,
|
|
"loss": 0.1325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13253457844257355,
|
|
"step": 3715,
|
|
"valid_targets_mean": 3829.1,
|
|
"valid_targets_min": 980
|
|
},
|
|
{
|
|
"epoch": 5.9330143540669855,
|
|
"grad_norm": 0.46511476299111676,
|
|
"learning_rate": 2.773025884856957e-06,
|
|
"loss": 0.1475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1427678018808365,
|
|
"step": 3720,
|
|
"valid_targets_mean": 4314.4,
|
|
"valid_targets_min": 1146
|
|
},
|
|
{
|
|
"epoch": 5.940988835725678,
|
|
"grad_norm": 0.4877273215151772,
|
|
"learning_rate": 2.7327578107182585e-06,
|
|
"loss": 0.1389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1322350800037384,
|
|
"step": 3725,
|
|
"valid_targets_mean": 4624.2,
|
|
"valid_targets_min": 1047
|
|
},
|
|
{
|
|
"epoch": 5.94896331738437,
|
|
"grad_norm": 0.6207666467802064,
|
|
"learning_rate": 2.692762802794775e-06,
|
|
"loss": 0.1443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19305045902729034,
|
|
"step": 3730,
|
|
"valid_targets_mean": 3293.8,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 5.956937799043062,
|
|
"grad_norm": 0.48448380027310656,
|
|
"learning_rate": 2.6530414935723104e-06,
|
|
"loss": 0.139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13811522722244263,
|
|
"step": 3735,
|
|
"valid_targets_mean": 4057.9,
|
|
"valid_targets_min": 706
|
|
},
|
|
{
|
|
"epoch": 5.964912280701754,
|
|
"grad_norm": 0.47673620082586243,
|
|
"learning_rate": 2.6135945112083506e-06,
|
|
"loss": 0.1363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12966938316822052,
|
|
"step": 3740,
|
|
"valid_targets_mean": 4303.1,
|
|
"valid_targets_min": 779
|
|
},
|
|
{
|
|
"epoch": 5.972886762360447,
|
|
"grad_norm": 0.43740811989492157,
|
|
"learning_rate": 2.574422479522156e-06,
|
|
"loss": 0.1448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14098475873470306,
|
|
"step": 3745,
|
|
"valid_targets_mean": 5016.1,
|
|
"valid_targets_min": 1541
|
|
},
|
|
{
|
|
"epoch": 5.980861244019139,
|
|
"grad_norm": 0.5766534224576564,
|
|
"learning_rate": 2.535526017984884e-06,
|
|
"loss": 0.1527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19375832378864288,
|
|
"step": 3750,
|
|
"valid_targets_mean": 3527.2,
|
|
"valid_targets_min": 1316
|
|
},
|
|
{
|
|
"epoch": 5.988835725677831,
|
|
"grad_norm": 0.5083716544828836,
|
|
"learning_rate": 2.4969057417097807e-06,
|
|
"loss": 0.1394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12867873907089233,
|
|
"step": 3755,
|
|
"valid_targets_mean": 4092.6,
|
|
"valid_targets_min": 896
|
|
},
|
|
{
|
|
"epoch": 5.996810207336523,
|
|
"grad_norm": 0.5879766550187514,
|
|
"learning_rate": 2.458562261442483e-06,
|
|
"loss": 0.1475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18230052292346954,
|
|
"step": 3760,
|
|
"valid_targets_mean": 3517.2,
|
|
"valid_targets_min": 977
|
|
},
|
|
{
|
|
"epoch": 6.0047846889952154,
|
|
"grad_norm": 0.4867791254000974,
|
|
"learning_rate": 2.4204961835513263e-06,
|
|
"loss": 0.1336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15070700645446777,
|
|
"step": 3765,
|
|
"valid_targets_mean": 3920.6,
|
|
"valid_targets_min": 670
|
|
},
|
|
{
|
|
"epoch": 6.012759170653908,
|
|
"grad_norm": 0.4576111437935239,
|
|
"learning_rate": 2.3827081100177797e-06,
|
|
"loss": 0.1397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1281348168849945,
|
|
"step": 3770,
|
|
"valid_targets_mean": 4359.0,
|
|
"valid_targets_min": 1502
|
|
},
|
|
{
|
|
"epoch": 6.020733652312599,
|
|
"grad_norm": 0.4513964755180104,
|
|
"learning_rate": 2.3451986384269266e-06,
|
|
"loss": 0.1334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14962506294250488,
|
|
"step": 3775,
|
|
"valid_targets_mean": 4804.0,
|
|
"valid_targets_min": 1570
|
|
},
|
|
{
|
|
"epoch": 6.028708133971292,
|
|
"grad_norm": 0.4885503687934067,
|
|
"learning_rate": 2.307968361957993e-06,
|
|
"loss": 0.1418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1187099814414978,
|
|
"step": 3780,
|
|
"valid_targets_mean": 3923.4,
|
|
"valid_targets_min": 571
|
|
},
|
|
{
|
|
"epoch": 6.036682615629984,
|
|
"grad_norm": 0.49937332444021726,
|
|
"learning_rate": 2.2710178693749805e-06,
|
|
"loss": 0.1356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1488434374332428,
|
|
"step": 3785,
|
|
"valid_targets_mean": 4948.9,
|
|
"valid_targets_min": 1409
|
|
},
|
|
{
|
|
"epoch": 6.044657097288677,
|
|
"grad_norm": 0.5238418889893515,
|
|
"learning_rate": 2.2343477450173665e-06,
|
|
"loss": 0.1316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15195894241333008,
|
|
"step": 3790,
|
|
"valid_targets_mean": 3776.6,
|
|
"valid_targets_min": 956
|
|
},
|
|
{
|
|
"epoch": 6.052631578947368,
|
|
"grad_norm": 0.4780779436450832,
|
|
"learning_rate": 2.197958568790839e-06,
|
|
"loss": 0.1378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12773722410202026,
|
|
"step": 3795,
|
|
"valid_targets_mean": 4244.6,
|
|
"valid_targets_min": 1496
|
|
},
|
|
{
|
|
"epoch": 6.0606060606060606,
|
|
"grad_norm": 0.525367580269447,
|
|
"learning_rate": 2.161850916158148e-06,
|
|
"loss": 0.1233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10581042617559433,
|
|
"step": 3800,
|
|
"valid_targets_mean": 4312.6,
|
|
"valid_targets_min": 1224
|
|
},
|
|
{
|
|
"epoch": 6.068580542264753,
|
|
"grad_norm": 0.6386288261314822,
|
|
"learning_rate": 2.1260253581299996e-06,
|
|
"loss": 0.1289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14499042928218842,
|
|
"step": 3805,
|
|
"valid_targets_mean": 4119.4,
|
|
"valid_targets_min": 564
|
|
},
|
|
{
|
|
"epoch": 6.076555023923445,
|
|
"grad_norm": 0.46788217409542343,
|
|
"learning_rate": 2.0904824612560046e-06,
|
|
"loss": 0.1329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12945741415023804,
|
|
"step": 3810,
|
|
"valid_targets_mean": 4809.6,
|
|
"valid_targets_min": 1130
|
|
},
|
|
{
|
|
"epoch": 6.084529505582137,
|
|
"grad_norm": 0.5037352456345322,
|
|
"learning_rate": 2.0552227876157536e-06,
|
|
"loss": 0.134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13779643177986145,
|
|
"step": 3815,
|
|
"valid_targets_mean": 4907.7,
|
|
"valid_targets_min": 1747
|
|
},
|
|
{
|
|
"epoch": 6.092503987240829,
|
|
"grad_norm": 0.4837606313795429,
|
|
"learning_rate": 2.020246894809912e-06,
|
|
"loss": 0.1344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12583968043327332,
|
|
"step": 3820,
|
|
"valid_targets_mean": 4317.7,
|
|
"valid_targets_min": 1006
|
|
},
|
|
{
|
|
"epoch": 6.100478468899522,
|
|
"grad_norm": 0.8361655965084933,
|
|
"learning_rate": 1.9855553359513836e-06,
|
|
"loss": 0.1326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11619827896356583,
|
|
"step": 3825,
|
|
"valid_targets_mean": 3825.2,
|
|
"valid_targets_min": 998
|
|
},
|
|
{
|
|
"epoch": 6.108452950558214,
|
|
"grad_norm": 0.4862272715590184,
|
|
"learning_rate": 1.9511486596566054e-06,
|
|
"loss": 0.1386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13793030381202698,
|
|
"step": 3830,
|
|
"valid_targets_mean": 4142.4,
|
|
"valid_targets_min": 1338
|
|
},
|
|
{
|
|
"epoch": 6.116427432216906,
|
|
"grad_norm": 0.43886017634259966,
|
|
"learning_rate": 1.917027410036825e-06,
|
|
"loss": 0.1333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12418035417795181,
|
|
"step": 3835,
|
|
"valid_targets_mean": 5001.1,
|
|
"valid_targets_min": 1259
|
|
},
|
|
{
|
|
"epoch": 6.124401913875598,
|
|
"grad_norm": 0.5338515679927818,
|
|
"learning_rate": 1.8831921266895348e-06,
|
|
"loss": 0.1477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1431020200252533,
|
|
"step": 3840,
|
|
"valid_targets_mean": 3957.2,
|
|
"valid_targets_min": 968
|
|
},
|
|
{
|
|
"epoch": 6.1323763955342905,
|
|
"grad_norm": 0.4652532059140747,
|
|
"learning_rate": 1.8496433446899197e-06,
|
|
"loss": 0.1344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12275486439466476,
|
|
"step": 3845,
|
|
"valid_targets_mean": 4142.4,
|
|
"valid_targets_min": 1207
|
|
},
|
|
{
|
|
"epoch": 6.140350877192983,
|
|
"grad_norm": 0.4748266800666233,
|
|
"learning_rate": 1.8163815945823881e-06,
|
|
"loss": 0.1337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.131401926279068,
|
|
"step": 3850,
|
|
"valid_targets_mean": 5186.0,
|
|
"valid_targets_min": 3113
|
|
},
|
|
{
|
|
"epoch": 6.148325358851674,
|
|
"grad_norm": 0.5276744500841081,
|
|
"learning_rate": 1.7834074023722082e-06,
|
|
"loss": 0.1409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13628077507019043,
|
|
"step": 3855,
|
|
"valid_targets_mean": 3653.4,
|
|
"valid_targets_min": 305
|
|
},
|
|
{
|
|
"epoch": 6.156299840510367,
|
|
"grad_norm": 0.43614877732232665,
|
|
"learning_rate": 1.7507212895171632e-06,
|
|
"loss": 0.123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1286666840314865,
|
|
"step": 3860,
|
|
"valid_targets_mean": 5135.9,
|
|
"valid_targets_min": 1832
|
|
},
|
|
{
|
|
"epoch": 6.164274322169059,
|
|
"grad_norm": 0.5221782922577798,
|
|
"learning_rate": 1.7183237729193081e-06,
|
|
"loss": 0.146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15163499116897583,
|
|
"step": 3865,
|
|
"valid_targets_mean": 4744.5,
|
|
"valid_targets_min": 889
|
|
},
|
|
{
|
|
"epoch": 6.172248803827751,
|
|
"grad_norm": 0.48654629519289816,
|
|
"learning_rate": 1.6862153649168211e-06,
|
|
"loss": 0.1355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13112099468708038,
|
|
"step": 3870,
|
|
"valid_targets_mean": 4215.8,
|
|
"valid_targets_min": 1562
|
|
},
|
|
{
|
|
"epoch": 6.180223285486443,
|
|
"grad_norm": 0.467425389107099,
|
|
"learning_rate": 1.6543965732758737e-06,
|
|
"loss": 0.1353,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12230046093463898,
|
|
"step": 3875,
|
|
"valid_targets_mean": 4817.2,
|
|
"valid_targets_min": 1874
|
|
},
|
|
{
|
|
"epoch": 6.188197767145136,
|
|
"grad_norm": 0.4736792340558681,
|
|
"learning_rate": 1.6228679011826032e-06,
|
|
"loss": 0.1403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14318901300430298,
|
|
"step": 3880,
|
|
"valid_targets_mean": 4379.9,
|
|
"valid_targets_min": 2010
|
|
},
|
|
{
|
|
"epoch": 6.196172248803828,
|
|
"grad_norm": 0.47726476184015376,
|
|
"learning_rate": 1.591629847235172e-06,
|
|
"loss": 0.1301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13036486506462097,
|
|
"step": 3885,
|
|
"valid_targets_mean": 4381.5,
|
|
"valid_targets_min": 1574
|
|
},
|
|
{
|
|
"epoch": 6.2041467304625195,
|
|
"grad_norm": 0.5925176153243794,
|
|
"learning_rate": 1.5606829054358686e-06,
|
|
"loss": 0.1334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1272105723619461,
|
|
"step": 3890,
|
|
"valid_targets_mean": 4618.2,
|
|
"valid_targets_min": 1813
|
|
},
|
|
{
|
|
"epoch": 6.212121212121212,
|
|
"grad_norm": 0.5138056978357366,
|
|
"learning_rate": 1.5300275651832963e-06,
|
|
"loss": 0.1456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14890676736831665,
|
|
"step": 3895,
|
|
"valid_targets_mean": 3916.7,
|
|
"valid_targets_min": 1260
|
|
},
|
|
{
|
|
"epoch": 6.220095693779904,
|
|
"grad_norm": 0.5228495093455434,
|
|
"learning_rate": 1.499664311264648e-06,
|
|
"loss": 0.1371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12577253580093384,
|
|
"step": 3900,
|
|
"valid_targets_mean": 4215.2,
|
|
"valid_targets_min": 558
|
|
},
|
|
{
|
|
"epoch": 6.228070175438597,
|
|
"grad_norm": 0.5752629328809632,
|
|
"learning_rate": 1.4695936238480135e-06,
|
|
"loss": 0.1449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14973625540733337,
|
|
"step": 3905,
|
|
"valid_targets_mean": 3516.6,
|
|
"valid_targets_min": 611
|
|
},
|
|
{
|
|
"epoch": 6.236044657097288,
|
|
"grad_norm": 0.45507632902527884,
|
|
"learning_rate": 1.4398159784748144e-06,
|
|
"loss": 0.1399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14144280552864075,
|
|
"step": 3910,
|
|
"valid_targets_mean": 4764.2,
|
|
"valid_targets_min": 1343
|
|
},
|
|
{
|
|
"epoch": 6.244019138755981,
|
|
"grad_norm": 0.4960342085161718,
|
|
"learning_rate": 1.4103318460522598e-06,
|
|
"loss": 0.1312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1386122703552246,
|
|
"step": 3915,
|
|
"valid_targets_mean": 4583.9,
|
|
"valid_targets_min": 1287
|
|
},
|
|
{
|
|
"epoch": 6.251993620414673,
|
|
"grad_norm": 0.5342458624866506,
|
|
"learning_rate": 1.3811416928459177e-06,
|
|
"loss": 0.1413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12849336862564087,
|
|
"step": 3920,
|
|
"valid_targets_mean": 3761.2,
|
|
"valid_targets_min": 1146
|
|
},
|
|
{
|
|
"epoch": 6.2599681020733655,
|
|
"grad_norm": 0.45948214829535294,
|
|
"learning_rate": 1.3522459804723353e-06,
|
|
"loss": 0.1383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1402260959148407,
|
|
"step": 3925,
|
|
"valid_targets_mean": 4882.9,
|
|
"valid_targets_min": 1906
|
|
},
|
|
{
|
|
"epoch": 6.267942583732057,
|
|
"grad_norm": 0.5366692319676248,
|
|
"learning_rate": 1.3236451658917293e-06,
|
|
"loss": 0.1324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09341390430927277,
|
|
"step": 3930,
|
|
"valid_targets_mean": 3883.6,
|
|
"valid_targets_min": 348
|
|
},
|
|
{
|
|
"epoch": 6.2759170653907494,
|
|
"grad_norm": 0.48993177041174607,
|
|
"learning_rate": 1.2953397014007728e-06,
|
|
"loss": 0.1242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12492413818836212,
|
|
"step": 3935,
|
|
"valid_targets_mean": 4239.9,
|
|
"valid_targets_min": 1641
|
|
},
|
|
{
|
|
"epoch": 6.283891547049442,
|
|
"grad_norm": 0.4828667740590487,
|
|
"learning_rate": 1.2673300346254447e-06,
|
|
"loss": 0.1425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18056991696357727,
|
|
"step": 3940,
|
|
"valid_targets_mean": 5296.0,
|
|
"valid_targets_min": 2186
|
|
},
|
|
{
|
|
"epoch": 6.291866028708134,
|
|
"grad_norm": 0.49854455271607523,
|
|
"learning_rate": 1.239616608513925e-06,
|
|
"loss": 0.1456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1370605230331421,
|
|
"step": 3945,
|
|
"valid_targets_mean": 4469.8,
|
|
"valid_targets_min": 1765
|
|
},
|
|
{
|
|
"epoch": 6.299840510366826,
|
|
"grad_norm": 0.46533138364576815,
|
|
"learning_rate": 1.2121998613296259e-06,
|
|
"loss": 0.1397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15436951816082,
|
|
"step": 3950,
|
|
"valid_targets_mean": 5061.5,
|
|
"valid_targets_min": 2240
|
|
},
|
|
{
|
|
"epoch": 6.307814992025518,
|
|
"grad_norm": 0.4546974360288042,
|
|
"learning_rate": 1.1850802266442396e-06,
|
|
"loss": 0.1376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12396486103534698,
|
|
"step": 3955,
|
|
"valid_targets_mean": 4702.3,
|
|
"valid_targets_min": 2541
|
|
},
|
|
{
|
|
"epoch": 6.315789473684211,
|
|
"grad_norm": 0.49128726765041997,
|
|
"learning_rate": 1.1582581333308784e-06,
|
|
"loss": 0.1385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1290614753961563,
|
|
"step": 3960,
|
|
"valid_targets_mean": 3849.6,
|
|
"valid_targets_min": 1799
|
|
},
|
|
{
|
|
"epoch": 6.323763955342903,
|
|
"grad_norm": 0.5173256070147583,
|
|
"learning_rate": 1.1317340055573122e-06,
|
|
"loss": 0.1302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1329921931028366,
|
|
"step": 3965,
|
|
"valid_targets_mean": 4107.6,
|
|
"valid_targets_min": 525
|
|
},
|
|
{
|
|
"epoch": 6.3317384370015946,
|
|
"grad_norm": 0.4827647304172112,
|
|
"learning_rate": 1.1055082627792357e-06,
|
|
"loss": 0.1439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14561286568641663,
|
|
"step": 3970,
|
|
"valid_targets_mean": 4779.8,
|
|
"valid_targets_min": 502
|
|
},
|
|
{
|
|
"epoch": 6.339712918660287,
|
|
"grad_norm": 0.5559417535198338,
|
|
"learning_rate": 1.0795813197336602e-06,
|
|
"loss": 0.133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14679063856601715,
|
|
"step": 3975,
|
|
"valid_targets_mean": 3389.5,
|
|
"valid_targets_min": 1216
|
|
},
|
|
{
|
|
"epoch": 6.347687400318979,
|
|
"grad_norm": 0.5369002757249298,
|
|
"learning_rate": 1.0539535864323391e-06,
|
|
"loss": 0.1255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12552815675735474,
|
|
"step": 3980,
|
|
"valid_targets_mean": 3821.6,
|
|
"valid_targets_min": 1456
|
|
},
|
|
{
|
|
"epoch": 6.355661881977672,
|
|
"grad_norm": 0.4986520370864808,
|
|
"learning_rate": 1.0286254681552777e-06,
|
|
"loss": 0.1379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13379716873168945,
|
|
"step": 3985,
|
|
"valid_targets_mean": 4038.2,
|
|
"valid_targets_min": 1576
|
|
},
|
|
{
|
|
"epoch": 6.363636363636363,
|
|
"grad_norm": 0.49981705966319073,
|
|
"learning_rate": 1.0035973654443466e-06,
|
|
"loss": 0.1365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15339066088199615,
|
|
"step": 3990,
|
|
"valid_targets_mean": 4539.6,
|
|
"valid_targets_min": 1121
|
|
},
|
|
{
|
|
"epoch": 6.371610845295056,
|
|
"grad_norm": 0.5012388371558716,
|
|
"learning_rate": 9.788696740969295e-07,
|
|
"loss": 0.1477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12608125805854797,
|
|
"step": 3995,
|
|
"valid_targets_mean": 3913.4,
|
|
"valid_targets_min": 1408
|
|
},
|
|
{
|
|
"epoch": 6.379585326953748,
|
|
"grad_norm": 0.45975589001358835,
|
|
"learning_rate": 9.544427851596661e-07,
|
|
"loss": 0.1255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11922608315944672,
|
|
"step": 4000,
|
|
"valid_targets_mean": 4402.2,
|
|
"valid_targets_min": 2200
|
|
},
|
|
{
|
|
"epoch": 6.3875598086124405,
|
|
"grad_norm": 0.5168179832467762,
|
|
"learning_rate": 9.303170849222764e-07,
|
|
"loss": 0.1275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12895463407039642,
|
|
"step": 4005,
|
|
"valid_targets_mean": 3488.1,
|
|
"valid_targets_min": 1141
|
|
},
|
|
{
|
|
"epoch": 6.395534290271132,
|
|
"grad_norm": 0.6273222457442021,
|
|
"learning_rate": 9.064929549114421e-07,
|
|
"loss": 0.1385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1379334032535553,
|
|
"step": 4010,
|
|
"valid_targets_mean": 4525.6,
|
|
"valid_targets_min": 1203
|
|
},
|
|
{
|
|
"epoch": 6.4035087719298245,
|
|
"grad_norm": 0.5010611087378276,
|
|
"learning_rate": 8.829707718847835e-07,
|
|
"loss": 0.1351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13202281296253204,
|
|
"step": 4015,
|
|
"valid_targets_mean": 4277.4,
|
|
"valid_targets_min": 1228
|
|
},
|
|
{
|
|
"epoch": 6.411483253588517,
|
|
"grad_norm": 0.5269589030705695,
|
|
"learning_rate": 8.597509078248923e-07,
|
|
"loss": 0.1457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15238788723945618,
|
|
"step": 4020,
|
|
"valid_targets_mean": 4262.2,
|
|
"valid_targets_min": 1276
|
|
},
|
|
{
|
|
"epoch": 6.419457735247209,
|
|
"grad_norm": 0.4457295851779031,
|
|
"learning_rate": 8.368337299334461e-07,
|
|
"loss": 0.145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12342938780784607,
|
|
"step": 4025,
|
|
"valid_targets_mean": 4488.1,
|
|
"valid_targets_min": 1555
|
|
},
|
|
{
|
|
"epoch": 6.427432216905901,
|
|
"grad_norm": 0.46816589487899885,
|
|
"learning_rate": 8.142196006254144e-07,
|
|
"loss": 0.144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14023220539093018,
|
|
"step": 4030,
|
|
"valid_targets_mean": 4224.9,
|
|
"valid_targets_min": 1562
|
|
},
|
|
{
|
|
"epoch": 6.435406698564593,
|
|
"grad_norm": 0.4961712962184903,
|
|
"learning_rate": 7.919088775233264e-07,
|
|
"loss": 0.1377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1331174373626709,
|
|
"step": 4035,
|
|
"valid_targets_mean": 3866.6,
|
|
"valid_targets_min": 1835
|
|
},
|
|
{
|
|
"epoch": 6.443381180223286,
|
|
"grad_norm": 0.43934117839308373,
|
|
"learning_rate": 7.699019134515917e-07,
|
|
"loss": 0.1373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12831982970237732,
|
|
"step": 4040,
|
|
"valid_targets_mean": 4917.0,
|
|
"valid_targets_min": 1830
|
|
},
|
|
{
|
|
"epoch": 6.451355661881978,
|
|
"grad_norm": 0.4463118268379071,
|
|
"learning_rate": 7.48199056430956e-07,
|
|
"loss": 0.133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1254979372024536,
|
|
"step": 4045,
|
|
"valid_targets_mean": 5217.8,
|
|
"valid_targets_min": 1989
|
|
},
|
|
{
|
|
"epoch": 6.45933014354067,
|
|
"grad_norm": 0.49509104874797416,
|
|
"learning_rate": 7.268006496729762e-07,
|
|
"loss": 0.1399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13959988951683044,
|
|
"step": 4050,
|
|
"valid_targets_mean": 4100.3,
|
|
"valid_targets_min": 1283
|
|
},
|
|
{
|
|
"epoch": 6.467304625199362,
|
|
"grad_norm": 0.4972485298500034,
|
|
"learning_rate": 7.057070315745851e-07,
|
|
"loss": 0.1344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15970812737941742,
|
|
"step": 4055,
|
|
"valid_targets_mean": 4283.6,
|
|
"valid_targets_min": 828
|
|
},
|
|
{
|
|
"epoch": 6.475279106858054,
|
|
"grad_norm": 0.5015822648586085,
|
|
"learning_rate": 6.849185357127686e-07,
|
|
"loss": 0.1438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1395566314458847,
|
|
"step": 4060,
|
|
"valid_targets_mean": 3975.1,
|
|
"valid_targets_min": 1350
|
|
},
|
|
{
|
|
"epoch": 6.483253588516747,
|
|
"grad_norm": 0.42932158019971345,
|
|
"learning_rate": 6.64435490839257e-07,
|
|
"loss": 0.1233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13057997822761536,
|
|
"step": 4065,
|
|
"valid_targets_mean": 4823.8,
|
|
"valid_targets_min": 980
|
|
},
|
|
{
|
|
"epoch": 6.491228070175438,
|
|
"grad_norm": 0.5363904860298843,
|
|
"learning_rate": 6.442582208753578e-07,
|
|
"loss": 0.1478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16167891025543213,
|
|
"step": 4070,
|
|
"valid_targets_mean": 4411.5,
|
|
"valid_targets_min": 236
|
|
},
|
|
{
|
|
"epoch": 6.499202551834131,
|
|
"grad_norm": 0.6139438948970695,
|
|
"learning_rate": 6.243870449068068e-07,
|
|
"loss": 0.1422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1393304467201233,
|
|
"step": 4075,
|
|
"valid_targets_mean": 4442.0,
|
|
"valid_targets_min": 2492
|
|
},
|
|
{
|
|
"epoch": 6.507177033492823,
|
|
"grad_norm": 0.4910147165658172,
|
|
"learning_rate": 6.048222771787382e-07,
|
|
"loss": 0.1471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14208069443702698,
|
|
"step": 4080,
|
|
"valid_targets_mean": 4303.0,
|
|
"valid_targets_min": 1457
|
|
},
|
|
{
|
|
"epoch": 6.515151515151516,
|
|
"grad_norm": 0.49038920280527987,
|
|
"learning_rate": 5.85564227090707e-07,
|
|
"loss": 0.1339,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15606990456581116,
|
|
"step": 4085,
|
|
"valid_targets_mean": 5180.6,
|
|
"valid_targets_min": 1405
|
|
},
|
|
{
|
|
"epoch": 6.523125996810207,
|
|
"grad_norm": 0.4989838466718204,
|
|
"learning_rate": 5.666131991917989e-07,
|
|
"loss": 0.13,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1295931339263916,
|
|
"step": 4090,
|
|
"valid_targets_mean": 4469.1,
|
|
"valid_targets_min": 781
|
|
},
|
|
{
|
|
"epoch": 6.5311004784688995,
|
|
"grad_norm": 0.47172806775296705,
|
|
"learning_rate": 5.479694931758194e-07,
|
|
"loss": 0.1259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11878771334886551,
|
|
"step": 4095,
|
|
"valid_targets_mean": 4374.2,
|
|
"valid_targets_min": 1499
|
|
},
|
|
{
|
|
"epoch": 6.539074960127592,
|
|
"grad_norm": 0.5099208453251617,
|
|
"learning_rate": 5.296334038765483e-07,
|
|
"loss": 0.1379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12706753611564636,
|
|
"step": 4100,
|
|
"valid_targets_mean": 4058.2,
|
|
"valid_targets_min": 1949
|
|
},
|
|
{
|
|
"epoch": 6.5470494417862835,
|
|
"grad_norm": 0.4731797815398483,
|
|
"learning_rate": 5.116052212630696e-07,
|
|
"loss": 0.1344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16153037548065186,
|
|
"step": 4105,
|
|
"valid_targets_mean": 5232.8,
|
|
"valid_targets_min": 2155
|
|
},
|
|
{
|
|
"epoch": 6.555023923444976,
|
|
"grad_norm": 0.4723995414308366,
|
|
"learning_rate": 4.938852304352026e-07,
|
|
"loss": 0.1322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12810708582401276,
|
|
"step": 4110,
|
|
"valid_targets_mean": 4324.7,
|
|
"valid_targets_min": 1005
|
|
},
|
|
{
|
|
"epoch": 6.562998405103668,
|
|
"grad_norm": 0.5362997617672288,
|
|
"learning_rate": 4.7647371161898547e-07,
|
|
"loss": 0.1329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.137948676943779,
|
|
"step": 4115,
|
|
"valid_targets_mean": 4449.2,
|
|
"valid_targets_min": 2269
|
|
},
|
|
{
|
|
"epoch": 6.570972886762361,
|
|
"grad_norm": 0.5638870949882109,
|
|
"learning_rate": 4.593709401622359e-07,
|
|
"loss": 0.1445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14873097836971283,
|
|
"step": 4120,
|
|
"valid_targets_mean": 3944.8,
|
|
"valid_targets_min": 1044
|
|
},
|
|
{
|
|
"epoch": 6.578947368421053,
|
|
"grad_norm": 0.5184277775628652,
|
|
"learning_rate": 4.425771865302153e-07,
|
|
"loss": 0.1431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14540082216262817,
|
|
"step": 4125,
|
|
"valid_targets_mean": 4518.7,
|
|
"valid_targets_min": 1287
|
|
},
|
|
{
|
|
"epoch": 6.586921850079745,
|
|
"grad_norm": 0.5642098295207258,
|
|
"learning_rate": 4.2609271630133174e-07,
|
|
"loss": 0.1369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.126871258020401,
|
|
"step": 4130,
|
|
"valid_targets_mean": 3508.1,
|
|
"valid_targets_min": 1495
|
|
},
|
|
{
|
|
"epoch": 6.594896331738437,
|
|
"grad_norm": 0.4989184106262872,
|
|
"learning_rate": 4.099177901629525e-07,
|
|
"loss": 0.1466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13896486163139343,
|
|
"step": 4135,
|
|
"valid_targets_mean": 4295.7,
|
|
"valid_targets_min": 1153
|
|
},
|
|
{
|
|
"epoch": 6.6028708133971294,
|
|
"grad_norm": 0.4585773777096482,
|
|
"learning_rate": 3.9405266390727836e-07,
|
|
"loss": 0.1345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12985950708389282,
|
|
"step": 4140,
|
|
"valid_targets_mean": 4850.8,
|
|
"valid_targets_min": 1788
|
|
},
|
|
{
|
|
"epoch": 6.610845295055821,
|
|
"grad_norm": 0.5419963959512158,
|
|
"learning_rate": 3.7849758842729344e-07,
|
|
"loss": 0.1346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1376979649066925,
|
|
"step": 4145,
|
|
"valid_targets_mean": 3609.9,
|
|
"valid_targets_min": 1303
|
|
},
|
|
{
|
|
"epoch": 6.618819776714513,
|
|
"grad_norm": 0.4501985169540063,
|
|
"learning_rate": 3.632528097128085e-07,
|
|
"loss": 0.1406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11680396646261215,
|
|
"step": 4150,
|
|
"valid_targets_mean": 4162.4,
|
|
"valid_targets_min": 1652
|
|
},
|
|
{
|
|
"epoch": 6.626794258373206,
|
|
"grad_norm": 0.5679601801896299,
|
|
"learning_rate": 3.48318568846564e-07,
|
|
"loss": 0.1363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12601180374622345,
|
|
"step": 4155,
|
|
"valid_targets_mean": 3427.5,
|
|
"valid_targets_min": 1179
|
|
},
|
|
{
|
|
"epoch": 6.634768740031898,
|
|
"grad_norm": 0.6824361969704661,
|
|
"learning_rate": 3.336951020004087e-07,
|
|
"loss": 0.1245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12077543884515762,
|
|
"step": 4160,
|
|
"valid_targets_mean": 3663.2,
|
|
"valid_targets_min": 374
|
|
},
|
|
{
|
|
"epoch": 6.64274322169059,
|
|
"grad_norm": 0.5377388200983646,
|
|
"learning_rate": 3.1938264043158694e-07,
|
|
"loss": 0.1419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1328967809677124,
|
|
"step": 4165,
|
|
"valid_targets_mean": 3592.2,
|
|
"valid_targets_min": 1701
|
|
},
|
|
{
|
|
"epoch": 6.650717703349282,
|
|
"grad_norm": 0.4615393565403878,
|
|
"learning_rate": 3.05381410479062e-07,
|
|
"loss": 0.1366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1442861109972,
|
|
"step": 4170,
|
|
"valid_targets_mean": 4682.3,
|
|
"valid_targets_min": 1427
|
|
},
|
|
{
|
|
"epoch": 6.6586921850079746,
|
|
"grad_norm": 0.4874212095671253,
|
|
"learning_rate": 2.916916335599407e-07,
|
|
"loss": 0.1251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12358016520738602,
|
|
"step": 4175,
|
|
"valid_targets_mean": 4667.3,
|
|
"valid_targets_min": 1705
|
|
},
|
|
{
|
|
"epoch": 6.666666666666667,
|
|
"grad_norm": 0.4915382837106488,
|
|
"learning_rate": 2.783135261659831e-07,
|
|
"loss": 0.1365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13244980573654175,
|
|
"step": 4180,
|
|
"valid_targets_mean": 4289.5,
|
|
"valid_targets_min": 1344
|
|
},
|
|
{
|
|
"epoch": 6.6746411483253585,
|
|
"grad_norm": 0.5004882013247511,
|
|
"learning_rate": 2.6524729986016293e-07,
|
|
"loss": 0.1371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12342001497745514,
|
|
"step": 4185,
|
|
"valid_targets_mean": 3745.0,
|
|
"valid_targets_min": 1439
|
|
},
|
|
{
|
|
"epoch": 6.682615629984051,
|
|
"grad_norm": 0.48233043623349514,
|
|
"learning_rate": 2.524931612733328e-07,
|
|
"loss": 0.1354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1261105090379715,
|
|
"step": 4190,
|
|
"valid_targets_mean": 3988.2,
|
|
"valid_targets_min": 1477
|
|
},
|
|
{
|
|
"epoch": 6.690590111642743,
|
|
"grad_norm": 0.4883877261536948,
|
|
"learning_rate": 2.400513121009529e-07,
|
|
"loss": 0.1353,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1644919216632843,
|
|
"step": 4195,
|
|
"valid_targets_mean": 4774.5,
|
|
"valid_targets_min": 1539
|
|
},
|
|
{
|
|
"epoch": 6.698564593301436,
|
|
"grad_norm": 0.4937110841067935,
|
|
"learning_rate": 2.279219490998985e-07,
|
|
"loss": 0.1545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15075641870498657,
|
|
"step": 4200,
|
|
"valid_targets_mean": 4449.6,
|
|
"valid_targets_min": 1381
|
|
},
|
|
{
|
|
"epoch": 6.706539074960127,
|
|
"grad_norm": 0.48028390731089865,
|
|
"learning_rate": 2.161052640853578e-07,
|
|
"loss": 0.1368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11133769899606705,
|
|
"step": 4205,
|
|
"valid_targets_mean": 3895.3,
|
|
"valid_targets_min": 351
|
|
},
|
|
{
|
|
"epoch": 6.71451355661882,
|
|
"grad_norm": 0.44217210446570454,
|
|
"learning_rate": 2.0460144392778768e-07,
|
|
"loss": 0.1275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12350152432918549,
|
|
"step": 4210,
|
|
"valid_targets_mean": 4985.4,
|
|
"valid_targets_min": 1847
|
|
},
|
|
{
|
|
"epoch": 6.722488038277512,
|
|
"grad_norm": 0.4493621600448026,
|
|
"learning_rate": 1.9341067054996277e-07,
|
|
"loss": 0.1347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1262882947921753,
|
|
"step": 4215,
|
|
"valid_targets_mean": 4532.6,
|
|
"valid_targets_min": 2126
|
|
},
|
|
{
|
|
"epoch": 6.7304625199362045,
|
|
"grad_norm": 0.5262135692115378,
|
|
"learning_rate": 1.8253312092409992e-07,
|
|
"loss": 0.1599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13845784962177277,
|
|
"step": 4220,
|
|
"valid_targets_mean": 4281.3,
|
|
"valid_targets_min": 899
|
|
},
|
|
{
|
|
"epoch": 6.738437001594896,
|
|
"grad_norm": 0.4588485180792467,
|
|
"learning_rate": 1.7196896706906273e-07,
|
|
"loss": 0.1409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13960394263267517,
|
|
"step": 4225,
|
|
"valid_targets_mean": 4976.4,
|
|
"valid_targets_min": 2275
|
|
},
|
|
{
|
|
"epoch": 6.746411483253588,
|
|
"grad_norm": 0.5228563796698417,
|
|
"learning_rate": 1.6171837604762597e-07,
|
|
"loss": 0.1438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14636912941932678,
|
|
"step": 4230,
|
|
"valid_targets_mean": 4578.1,
|
|
"valid_targets_min": 2076
|
|
},
|
|
{
|
|
"epoch": 6.754385964912281,
|
|
"grad_norm": 0.5145311755308797,
|
|
"learning_rate": 1.5178150996385755e-07,
|
|
"loss": 0.1377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13876822590827942,
|
|
"step": 4235,
|
|
"valid_targets_mean": 3597.2,
|
|
"valid_targets_min": 1636
|
|
},
|
|
{
|
|
"epoch": 6.762360446570973,
|
|
"grad_norm": 0.4886129304017077,
|
|
"learning_rate": 1.421585259605318e-07,
|
|
"loss": 0.1279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1215449869632721,
|
|
"step": 4240,
|
|
"valid_targets_mean": 4064.8,
|
|
"valid_targets_min": 1218
|
|
},
|
|
{
|
|
"epoch": 6.770334928229665,
|
|
"grad_norm": 0.5076696174736534,
|
|
"learning_rate": 1.3284957621666039e-07,
|
|
"loss": 0.1288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12839660048484802,
|
|
"step": 4245,
|
|
"valid_targets_mean": 4979.1,
|
|
"valid_targets_min": 1519
|
|
},
|
|
{
|
|
"epoch": 6.778309409888357,
|
|
"grad_norm": 0.453167339546044,
|
|
"learning_rate": 1.2385480794507853e-07,
|
|
"loss": 0.137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14902392029762268,
|
|
"step": 4250,
|
|
"valid_targets_mean": 4552.2,
|
|
"valid_targets_min": 380
|
|
},
|
|
{
|
|
"epoch": 6.78628389154705,
|
|
"grad_norm": 0.4801594917342421,
|
|
"learning_rate": 1.1517436339011589e-07,
|
|
"loss": 0.1343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13310003280639648,
|
|
"step": 4255,
|
|
"valid_targets_mean": 4099.1,
|
|
"valid_targets_min": 690
|
|
},
|
|
{
|
|
"epoch": 6.794258373205742,
|
|
"grad_norm": 0.5276813767704727,
|
|
"learning_rate": 1.0680837982535607e-07,
|
|
"loss": 0.1413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14655044674873352,
|
|
"step": 4260,
|
|
"valid_targets_mean": 4145.2,
|
|
"valid_targets_min": 1677
|
|
},
|
|
{
|
|
"epoch": 6.8022328548644335,
|
|
"grad_norm": 0.5369046398498746,
|
|
"learning_rate": 9.875698955145174e-08,
|
|
"loss": 0.145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12759120762348175,
|
|
"step": 4265,
|
|
"valid_targets_mean": 3829.4,
|
|
"valid_targets_min": 1968
|
|
},
|
|
{
|
|
"epoch": 6.810207336523126,
|
|
"grad_norm": 0.4985523464960007,
|
|
"learning_rate": 9.102031989404403e-08,
|
|
"loss": 0.1501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15829971432685852,
|
|
"step": 4270,
|
|
"valid_targets_mean": 4364.6,
|
|
"valid_targets_min": 271
|
|
},
|
|
{
|
|
"epoch": 6.818181818181818,
|
|
"grad_norm": 0.42585764191369635,
|
|
"learning_rate": 8.359849320174196e-08,
|
|
"loss": 0.1297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12608718872070312,
|
|
"step": 4275,
|
|
"valid_targets_mean": 5336.8,
|
|
"valid_targets_min": 1746
|
|
},
|
|
{
|
|
"epoch": 6.826156299840511,
|
|
"grad_norm": 0.5587893061139727,
|
|
"learning_rate": 7.649162684419731e-08,
|
|
"loss": 0.1378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16089703142642975,
|
|
"step": 4280,
|
|
"valid_targets_mean": 3538.2,
|
|
"valid_targets_min": 959
|
|
},
|
|
{
|
|
"epoch": 6.834130781499202,
|
|
"grad_norm": 0.4820641038171948,
|
|
"learning_rate": 6.969983321023499e-08,
|
|
"loss": 0.1346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14253315329551697,
|
|
"step": 4285,
|
|
"valid_targets_mean": 4188.0,
|
|
"valid_targets_min": 1430
|
|
},
|
|
{
|
|
"epoch": 6.842105263157895,
|
|
"grad_norm": 0.539159626673032,
|
|
"learning_rate": 6.322321970608337e-08,
|
|
"loss": 0.1318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16274034976959229,
|
|
"step": 4290,
|
|
"valid_targets_mean": 4119.6,
|
|
"valid_targets_min": 1349
|
|
},
|
|
{
|
|
"epoch": 6.850079744816587,
|
|
"grad_norm": 0.500967936387429,
|
|
"learning_rate": 5.7061888753677796e-08,
|
|
"loss": 0.148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.160202294588089,
|
|
"step": 4295,
|
|
"valid_targets_mean": 4478.0,
|
|
"valid_targets_min": 1059
|
|
},
|
|
{
|
|
"epoch": 6.858054226475279,
|
|
"grad_norm": 0.5024372674631381,
|
|
"learning_rate": 5.121593778903755e-08,
|
|
"loss": 0.1438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14446912705898285,
|
|
"step": 4300,
|
|
"valid_targets_mean": 4215.1,
|
|
"valid_targets_min": 1139
|
|
},
|
|
{
|
|
"epoch": 6.866028708133971,
|
|
"grad_norm": 0.47871597136730415,
|
|
"learning_rate": 4.5685459260722544e-08,
|
|
"loss": 0.1295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12565824389457703,
|
|
"step": 4305,
|
|
"valid_targets_mean": 5086.6,
|
|
"valid_targets_min": 1763
|
|
},
|
|
{
|
|
"epoch": 6.8740031897926634,
|
|
"grad_norm": 0.4872811181871933,
|
|
"learning_rate": 4.047054062837452e-08,
|
|
"loss": 0.1353,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14549192786216736,
|
|
"step": 4310,
|
|
"valid_targets_mean": 4505.1,
|
|
"valid_targets_min": 1415
|
|
},
|
|
{
|
|
"epoch": 6.881977671451356,
|
|
"grad_norm": 0.47460986584548537,
|
|
"learning_rate": 3.55712643613404e-08,
|
|
"loss": 0.1514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14065739512443542,
|
|
"step": 4315,
|
|
"valid_targets_mean": 4180.5,
|
|
"valid_targets_min": 1393
|
|
},
|
|
{
|
|
"epoch": 6.889952153110048,
|
|
"grad_norm": 0.5117167735614471,
|
|
"learning_rate": 3.0987707937351066e-08,
|
|
"loss": 0.1454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1402340829372406,
|
|
"step": 4320,
|
|
"valid_targets_mean": 4187.2,
|
|
"valid_targets_min": 1403
|
|
},
|
|
{
|
|
"epoch": 6.89792663476874,
|
|
"grad_norm": 0.4687594629345176,
|
|
"learning_rate": 2.6719943841311268e-08,
|
|
"loss": 0.1431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12454405426979065,
|
|
"step": 4325,
|
|
"valid_targets_mean": 4335.8,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 6.905901116427432,
|
|
"grad_norm": 0.4727410371483239,
|
|
"learning_rate": 2.2768039564151635e-08,
|
|
"loss": 0.129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12318654358386993,
|
|
"step": 4330,
|
|
"valid_targets_mean": 4585.9,
|
|
"valid_targets_min": 1059
|
|
},
|
|
{
|
|
"epoch": 6.913875598086125,
|
|
"grad_norm": 0.511028528734843,
|
|
"learning_rate": 1.913205760175174e-08,
|
|
"loss": 0.1308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12416049838066101,
|
|
"step": 4335,
|
|
"valid_targets_mean": 3569.9,
|
|
"valid_targets_min": 1005
|
|
},
|
|
{
|
|
"epoch": 6.921850079744816,
|
|
"grad_norm": 0.4427706778733975,
|
|
"learning_rate": 1.5812055453963136e-08,
|
|
"loss": 0.129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14769014716148376,
|
|
"step": 4340,
|
|
"valid_targets_mean": 5517.8,
|
|
"valid_targets_min": 1373
|
|
},
|
|
{
|
|
"epoch": 6.9298245614035086,
|
|
"grad_norm": 0.505843379490127,
|
|
"learning_rate": 1.280808562369229e-08,
|
|
"loss": 0.1289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10900227725505829,
|
|
"step": 4345,
|
|
"valid_targets_mean": 3826.6,
|
|
"valid_targets_min": 674
|
|
},
|
|
{
|
|
"epoch": 6.937799043062201,
|
|
"grad_norm": 0.510889915065178,
|
|
"learning_rate": 1.01201956160768e-08,
|
|
"loss": 0.1386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14890338480472565,
|
|
"step": 4350,
|
|
"valid_targets_mean": 4390.7,
|
|
"valid_targets_min": 1152
|
|
},
|
|
{
|
|
"epoch": 6.945773524720893,
|
|
"grad_norm": 0.492856358469063,
|
|
"learning_rate": 7.74842793772601e-09,
|
|
"loss": 0.135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14718027412891388,
|
|
"step": 4355,
|
|
"valid_targets_mean": 4542.9,
|
|
"valid_targets_min": 2098
|
|
},
|
|
{
|
|
"epoch": 6.953748006379586,
|
|
"grad_norm": 0.5541187099187123,
|
|
"learning_rate": 5.692820096054874e-09,
|
|
"loss": 0.1483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15420426428318024,
|
|
"step": 4360,
|
|
"valid_targets_mean": 5050.1,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 6.961722488038277,
|
|
"grad_norm": 0.43930127380029793,
|
|
"learning_rate": 3.9534045986888706e-09,
|
|
"loss": 0.1359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11261337995529175,
|
|
"step": 4365,
|
|
"valid_targets_mean": 4508.2,
|
|
"valid_targets_min": 1566
|
|
},
|
|
{
|
|
"epoch": 6.96969696969697,
|
|
"grad_norm": 0.4555404101937215,
|
|
"learning_rate": 2.530208952953306e-09,
|
|
"loss": 0.1313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11587637662887573,
|
|
"step": 4370,
|
|
"valid_targets_mean": 4091.3,
|
|
"valid_targets_min": 1757
|
|
},
|
|
{
|
|
"epoch": 6.977671451355662,
|
|
"grad_norm": 0.4826546842830042,
|
|
"learning_rate": 1.4232556654314445e-09,
|
|
"loss": 0.1339,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1243821233510971,
|
|
"step": 4375,
|
|
"valid_targets_mean": 4191.7,
|
|
"valid_targets_min": 1058
|
|
},
|
|
{
|
|
"epoch": 6.985645933014354,
|
|
"grad_norm": 0.4219476628421706,
|
|
"learning_rate": 6.325622416136767e-10,
|
|
"loss": 0.1266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11712378263473511,
|
|
"step": 4380,
|
|
"valid_targets_mean": 4918.1,
|
|
"valid_targets_min": 1467
|
|
},
|
|
{
|
|
"epoch": 6.993620414673046,
|
|
"grad_norm": 0.4931328910537562,
|
|
"learning_rate": 1.581411856199644e-10,
|
|
"loss": 0.1327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11857933551073074,
|
|
"step": 4385,
|
|
"valid_targets_mean": 4174.8,
|
|
"valid_targets_min": 1636
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12271773815155029,
|
|
"step": 4389,
|
|
"total_flos": 1511188165689344.0,
|
|
"train_loss": 0.1804693197981321,
|
|
"train_runtime": 26261.3176,
|
|
"train_samples_per_second": 2.67,
|
|
"train_steps_per_second": 0.167,
|
|
"valid_targets_mean": 4882.1,
|
|
"valid_targets_min": 1664
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 4389,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 1500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1511188165689344.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|