Model: laion/exp-syh-r2egym-swesmith-mixed_glm_4_7_traces_jupiter_cleaned Source: Original Platform
9717 lines
269 KiB
JSON
9717 lines
269 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 4396,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.00796812749003984,
|
|
"grad_norm": 24.37495754265772,
|
|
"learning_rate": 3.6363636363636366e-07,
|
|
"loss": 0.9065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4462226629257202,
|
|
"step": 5,
|
|
"valid_targets_mean": 3722.1,
|
|
"valid_targets_min": 1050
|
|
},
|
|
{
|
|
"epoch": 0.01593625498007968,
|
|
"grad_norm": 20.77111310942199,
|
|
"learning_rate": 8.181818181818182e-07,
|
|
"loss": 0.8968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42043066024780273,
|
|
"step": 10,
|
|
"valid_targets_mean": 4730.2,
|
|
"valid_targets_min": 3538
|
|
},
|
|
{
|
|
"epoch": 0.02390438247011952,
|
|
"grad_norm": 16.688145610134242,
|
|
"learning_rate": 1.2727272727272728e-06,
|
|
"loss": 0.862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4235057234764099,
|
|
"step": 15,
|
|
"valid_targets_mean": 4623.1,
|
|
"valid_targets_min": 3361
|
|
},
|
|
{
|
|
"epoch": 0.03187250996015936,
|
|
"grad_norm": 14.122564484492695,
|
|
"learning_rate": 1.7272727272727275e-06,
|
|
"loss": 0.814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44331303238868713,
|
|
"step": 20,
|
|
"valid_targets_mean": 4180.2,
|
|
"valid_targets_min": 746
|
|
},
|
|
{
|
|
"epoch": 0.0398406374501992,
|
|
"grad_norm": 7.696548351443415,
|
|
"learning_rate": 2.181818181818182e-06,
|
|
"loss": 0.7668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32791611552238464,
|
|
"step": 25,
|
|
"valid_targets_mean": 3369.4,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 0.04780876494023904,
|
|
"grad_norm": 4.864489337884109,
|
|
"learning_rate": 2.6363636363636364e-06,
|
|
"loss": 0.7323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34147197008132935,
|
|
"step": 30,
|
|
"valid_targets_mean": 3219.8,
|
|
"valid_targets_min": 1244
|
|
},
|
|
{
|
|
"epoch": 0.055776892430278883,
|
|
"grad_norm": 2.65286621457725,
|
|
"learning_rate": 3.090909090909091e-06,
|
|
"loss": 0.7072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.310516357421875,
|
|
"step": 35,
|
|
"valid_targets_mean": 3807.8,
|
|
"valid_targets_min": 987
|
|
},
|
|
{
|
|
"epoch": 0.06374501992031872,
|
|
"grad_norm": 2.0820354739310676,
|
|
"learning_rate": 3.5454545454545458e-06,
|
|
"loss": 0.6397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30131199955940247,
|
|
"step": 40,
|
|
"valid_targets_mean": 3464.8,
|
|
"valid_targets_min": 324
|
|
},
|
|
{
|
|
"epoch": 0.07171314741035857,
|
|
"grad_norm": 1.5126088563806854,
|
|
"learning_rate": 4.000000000000001e-06,
|
|
"loss": 0.6265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4162129759788513,
|
|
"step": 45,
|
|
"valid_targets_mean": 4792.8,
|
|
"valid_targets_min": 748
|
|
},
|
|
{
|
|
"epoch": 0.0796812749003984,
|
|
"grad_norm": 1.218992918685487,
|
|
"learning_rate": 4.454545454545455e-06,
|
|
"loss": 0.5906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3314875662326813,
|
|
"step": 50,
|
|
"valid_targets_mean": 4466.9,
|
|
"valid_targets_min": 2862
|
|
},
|
|
{
|
|
"epoch": 0.08764940239043825,
|
|
"grad_norm": 0.996370822345656,
|
|
"learning_rate": 4.90909090909091e-06,
|
|
"loss": 0.5989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32610633969306946,
|
|
"step": 55,
|
|
"valid_targets_mean": 4328.5,
|
|
"valid_targets_min": 3474
|
|
},
|
|
{
|
|
"epoch": 0.09561752988047809,
|
|
"grad_norm": 0.9322106306908817,
|
|
"learning_rate": 5.3636363636363645e-06,
|
|
"loss": 0.5821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26030367612838745,
|
|
"step": 60,
|
|
"valid_targets_mean": 3929.0,
|
|
"valid_targets_min": 3433
|
|
},
|
|
{
|
|
"epoch": 0.10358565737051793,
|
|
"grad_norm": 0.8948869786691818,
|
|
"learning_rate": 5.8181818181818185e-06,
|
|
"loss": 0.5673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33863985538482666,
|
|
"step": 65,
|
|
"valid_targets_mean": 4560.4,
|
|
"valid_targets_min": 273
|
|
},
|
|
{
|
|
"epoch": 0.11155378486055777,
|
|
"grad_norm": 0.8563354410048826,
|
|
"learning_rate": 6.2727272727272734e-06,
|
|
"loss": 0.5388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24827717244625092,
|
|
"step": 70,
|
|
"valid_targets_mean": 3210.8,
|
|
"valid_targets_min": 1277
|
|
},
|
|
{
|
|
"epoch": 0.11952191235059761,
|
|
"grad_norm": 0.844051842541015,
|
|
"learning_rate": 6.7272727272727275e-06,
|
|
"loss": 0.5336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24409377574920654,
|
|
"step": 75,
|
|
"valid_targets_mean": 3379.8,
|
|
"valid_targets_min": 594
|
|
},
|
|
{
|
|
"epoch": 0.12749003984063745,
|
|
"grad_norm": 0.7965069165140631,
|
|
"learning_rate": 7.181818181818182e-06,
|
|
"loss": 0.5445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2620013952255249,
|
|
"step": 80,
|
|
"valid_targets_mean": 3820.8,
|
|
"valid_targets_min": 888
|
|
},
|
|
{
|
|
"epoch": 0.13545816733067728,
|
|
"grad_norm": 0.9317949602217058,
|
|
"learning_rate": 7.636363636363638e-06,
|
|
"loss": 0.5108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24453184008598328,
|
|
"step": 85,
|
|
"valid_targets_mean": 2961.5,
|
|
"valid_targets_min": 243
|
|
},
|
|
{
|
|
"epoch": 0.14342629482071714,
|
|
"grad_norm": 0.7842445927901606,
|
|
"learning_rate": 8.090909090909092e-06,
|
|
"loss": 0.4959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22229188680648804,
|
|
"step": 90,
|
|
"valid_targets_mean": 3972.0,
|
|
"valid_targets_min": 1081
|
|
},
|
|
{
|
|
"epoch": 0.15139442231075698,
|
|
"grad_norm": 0.701161735833168,
|
|
"learning_rate": 8.545454545454546e-06,
|
|
"loss": 0.4978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1909436583518982,
|
|
"step": 95,
|
|
"valid_targets_mean": 3284.6,
|
|
"valid_targets_min": 777
|
|
},
|
|
{
|
|
"epoch": 0.1593625498007968,
|
|
"grad_norm": 0.7184451189685676,
|
|
"learning_rate": 9e-06,
|
|
"loss": 0.4816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2568347156047821,
|
|
"step": 100,
|
|
"valid_targets_mean": 4385.9,
|
|
"valid_targets_min": 2552
|
|
},
|
|
{
|
|
"epoch": 0.16733067729083664,
|
|
"grad_norm": 0.7312987260079192,
|
|
"learning_rate": 9.454545454545456e-06,
|
|
"loss": 0.4759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2005646824836731,
|
|
"step": 105,
|
|
"valid_targets_mean": 3237.5,
|
|
"valid_targets_min": 1060
|
|
},
|
|
{
|
|
"epoch": 0.1752988047808765,
|
|
"grad_norm": 0.7873006873988114,
|
|
"learning_rate": 9.90909090909091e-06,
|
|
"loss": 0.4655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2670203447341919,
|
|
"step": 110,
|
|
"valid_targets_mean": 4206.8,
|
|
"valid_targets_min": 750
|
|
},
|
|
{
|
|
"epoch": 0.18326693227091634,
|
|
"grad_norm": 0.7473638820821069,
|
|
"learning_rate": 1.0363636363636364e-05,
|
|
"loss": 0.4568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21891140937805176,
|
|
"step": 115,
|
|
"valid_targets_mean": 4152.9,
|
|
"valid_targets_min": 2306
|
|
},
|
|
{
|
|
"epoch": 0.19123505976095617,
|
|
"grad_norm": 0.8257698623740443,
|
|
"learning_rate": 1.0818181818181818e-05,
|
|
"loss": 0.4528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18611110746860504,
|
|
"step": 120,
|
|
"valid_targets_mean": 3085.9,
|
|
"valid_targets_min": 838
|
|
},
|
|
{
|
|
"epoch": 0.199203187250996,
|
|
"grad_norm": 0.8625112182566368,
|
|
"learning_rate": 1.1272727272727272e-05,
|
|
"loss": 0.4818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30518537759780884,
|
|
"step": 125,
|
|
"valid_targets_mean": 3337.6,
|
|
"valid_targets_min": 281
|
|
},
|
|
{
|
|
"epoch": 0.20717131474103587,
|
|
"grad_norm": 0.8399932702279239,
|
|
"learning_rate": 1.1727272727272728e-05,
|
|
"loss": 0.4572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19899678230285645,
|
|
"step": 130,
|
|
"valid_targets_mean": 3083.8,
|
|
"valid_targets_min": 475
|
|
},
|
|
{
|
|
"epoch": 0.2151394422310757,
|
|
"grad_norm": 0.7197396607669662,
|
|
"learning_rate": 1.2181818181818184e-05,
|
|
"loss": 0.4459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1982295960187912,
|
|
"step": 135,
|
|
"valid_targets_mean": 3466.6,
|
|
"valid_targets_min": 776
|
|
},
|
|
{
|
|
"epoch": 0.22310756972111553,
|
|
"grad_norm": 0.7587803874195033,
|
|
"learning_rate": 1.2636363636363638e-05,
|
|
"loss": 0.4371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1760505735874176,
|
|
"step": 140,
|
|
"valid_targets_mean": 2922.8,
|
|
"valid_targets_min": 291
|
|
},
|
|
{
|
|
"epoch": 0.23107569721115537,
|
|
"grad_norm": 0.9910760086364537,
|
|
"learning_rate": 1.3090909090909092e-05,
|
|
"loss": 0.4506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2816995680332184,
|
|
"step": 145,
|
|
"valid_targets_mean": 4115.8,
|
|
"valid_targets_min": 1413
|
|
},
|
|
{
|
|
"epoch": 0.23904382470119523,
|
|
"grad_norm": 0.7363394591670118,
|
|
"learning_rate": 1.3545454545454546e-05,
|
|
"loss": 0.4451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24589544534683228,
|
|
"step": 150,
|
|
"valid_targets_mean": 3740.2,
|
|
"valid_targets_min": 553
|
|
},
|
|
{
|
|
"epoch": 0.24701195219123506,
|
|
"grad_norm": 0.8608747635138786,
|
|
"learning_rate": 1.4e-05,
|
|
"loss": 0.444,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1670471429824829,
|
|
"step": 155,
|
|
"valid_targets_mean": 1975.4,
|
|
"valid_targets_min": 541
|
|
},
|
|
{
|
|
"epoch": 0.2549800796812749,
|
|
"grad_norm": 0.766843036608719,
|
|
"learning_rate": 1.4454545454545457e-05,
|
|
"loss": 0.4459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2611636817455292,
|
|
"step": 160,
|
|
"valid_targets_mean": 4575.8,
|
|
"valid_targets_min": 3423
|
|
},
|
|
{
|
|
"epoch": 0.26294820717131473,
|
|
"grad_norm": 0.7373314833536057,
|
|
"learning_rate": 1.4909090909090911e-05,
|
|
"loss": 0.4336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21863976120948792,
|
|
"step": 165,
|
|
"valid_targets_mean": 4157.1,
|
|
"valid_targets_min": 982
|
|
},
|
|
{
|
|
"epoch": 0.27091633466135456,
|
|
"grad_norm": 0.7386566425113588,
|
|
"learning_rate": 1.5363636363636365e-05,
|
|
"loss": 0.4242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16179677844047546,
|
|
"step": 170,
|
|
"valid_targets_mean": 2649.9,
|
|
"valid_targets_min": 263
|
|
},
|
|
{
|
|
"epoch": 0.2788844621513944,
|
|
"grad_norm": 0.9052696105145834,
|
|
"learning_rate": 1.5818181818181818e-05,
|
|
"loss": 0.442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3230035901069641,
|
|
"step": 175,
|
|
"valid_targets_mean": 3192.4,
|
|
"valid_targets_min": 231
|
|
},
|
|
{
|
|
"epoch": 0.2868525896414343,
|
|
"grad_norm": 0.8419040745577814,
|
|
"learning_rate": 1.6272727272727273e-05,
|
|
"loss": 0.4318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24606087803840637,
|
|
"step": 180,
|
|
"valid_targets_mean": 4607.6,
|
|
"valid_targets_min": 3713
|
|
},
|
|
{
|
|
"epoch": 0.2948207171314741,
|
|
"grad_norm": 0.9473280325221405,
|
|
"learning_rate": 1.672727272727273e-05,
|
|
"loss": 0.4262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1998061090707779,
|
|
"step": 185,
|
|
"valid_targets_mean": 3167.9,
|
|
"valid_targets_min": 250
|
|
},
|
|
{
|
|
"epoch": 0.30278884462151395,
|
|
"grad_norm": 0.8456848959662562,
|
|
"learning_rate": 1.7181818181818185e-05,
|
|
"loss": 0.411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19775201380252838,
|
|
"step": 190,
|
|
"valid_targets_mean": 4045.4,
|
|
"valid_targets_min": 1434
|
|
},
|
|
{
|
|
"epoch": 0.3107569721115538,
|
|
"grad_norm": 1.0336922885537096,
|
|
"learning_rate": 1.7636363636363637e-05,
|
|
"loss": 0.4189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24612963199615479,
|
|
"step": 195,
|
|
"valid_targets_mean": 3500.5,
|
|
"valid_targets_min": 1091
|
|
},
|
|
{
|
|
"epoch": 0.3187250996015936,
|
|
"grad_norm": 0.6790199436123496,
|
|
"learning_rate": 1.8090909090909093e-05,
|
|
"loss": 0.4154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26405617594718933,
|
|
"step": 200,
|
|
"valid_targets_mean": 5522.4,
|
|
"valid_targets_min": 4352
|
|
},
|
|
{
|
|
"epoch": 0.32669322709163345,
|
|
"grad_norm": 0.7333217562417574,
|
|
"learning_rate": 1.8545454545454545e-05,
|
|
"loss": 0.4171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23619253933429718,
|
|
"step": 205,
|
|
"valid_targets_mean": 4490.4,
|
|
"valid_targets_min": 2425
|
|
},
|
|
{
|
|
"epoch": 0.3346613545816733,
|
|
"grad_norm": 0.726066810728795,
|
|
"learning_rate": 1.9e-05,
|
|
"loss": 0.4273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3351441025733948,
|
|
"step": 210,
|
|
"valid_targets_mean": 4791.6,
|
|
"valid_targets_min": 292
|
|
},
|
|
{
|
|
"epoch": 0.3426294820717131,
|
|
"grad_norm": 0.6917093562564017,
|
|
"learning_rate": 1.9454545454545457e-05,
|
|
"loss": 0.4045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1841329038143158,
|
|
"step": 215,
|
|
"valid_targets_mean": 3697.2,
|
|
"valid_targets_min": 1114
|
|
},
|
|
{
|
|
"epoch": 0.350597609561753,
|
|
"grad_norm": 0.7693607796480706,
|
|
"learning_rate": 1.9909090909090913e-05,
|
|
"loss": 0.412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25000637769699097,
|
|
"step": 220,
|
|
"valid_targets_mean": 4362.8,
|
|
"valid_targets_min": 3874
|
|
},
|
|
{
|
|
"epoch": 0.35856573705179284,
|
|
"grad_norm": 0.8096629223285201,
|
|
"learning_rate": 2.0363636363636365e-05,
|
|
"loss": 0.4096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22220614552497864,
|
|
"step": 225,
|
|
"valid_targets_mean": 3821.4,
|
|
"valid_targets_min": 275
|
|
},
|
|
{
|
|
"epoch": 0.3665338645418327,
|
|
"grad_norm": 1.0177917321036343,
|
|
"learning_rate": 2.081818181818182e-05,
|
|
"loss": 0.3877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1806771159172058,
|
|
"step": 230,
|
|
"valid_targets_mean": 2983.0,
|
|
"valid_targets_min": 260
|
|
},
|
|
{
|
|
"epoch": 0.3745019920318725,
|
|
"grad_norm": 0.7333916228070542,
|
|
"learning_rate": 2.1272727272727276e-05,
|
|
"loss": 0.391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17516730725765228,
|
|
"step": 235,
|
|
"valid_targets_mean": 3388.1,
|
|
"valid_targets_min": 854
|
|
},
|
|
{
|
|
"epoch": 0.38247011952191234,
|
|
"grad_norm": 0.9425301322102779,
|
|
"learning_rate": 2.172727272727273e-05,
|
|
"loss": 0.404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1691473424434662,
|
|
"step": 240,
|
|
"valid_targets_mean": 2758.1,
|
|
"valid_targets_min": 479
|
|
},
|
|
{
|
|
"epoch": 0.3904382470119522,
|
|
"grad_norm": 0.7597593582484266,
|
|
"learning_rate": 2.2181818181818184e-05,
|
|
"loss": 0.3992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1942702978849411,
|
|
"step": 245,
|
|
"valid_targets_mean": 3456.2,
|
|
"valid_targets_min": 980
|
|
},
|
|
{
|
|
"epoch": 0.398406374501992,
|
|
"grad_norm": 0.7719937467195934,
|
|
"learning_rate": 2.263636363636364e-05,
|
|
"loss": 0.3973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22239413857460022,
|
|
"step": 250,
|
|
"valid_targets_mean": 3850.1,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 0.4063745019920319,
|
|
"grad_norm": 0.8051124024795501,
|
|
"learning_rate": 2.3090909090909093e-05,
|
|
"loss": 0.4015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16510990262031555,
|
|
"step": 255,
|
|
"valid_targets_mean": 3311.1,
|
|
"valid_targets_min": 590
|
|
},
|
|
{
|
|
"epoch": 0.41434262948207173,
|
|
"grad_norm": 0.774678875688182,
|
|
"learning_rate": 2.3545454545454548e-05,
|
|
"loss": 0.3969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19007673859596252,
|
|
"step": 260,
|
|
"valid_targets_mean": 3814.5,
|
|
"valid_targets_min": 954
|
|
},
|
|
{
|
|
"epoch": 0.42231075697211157,
|
|
"grad_norm": 0.9737556189745716,
|
|
"learning_rate": 2.4e-05,
|
|
"loss": 0.3869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17725446820259094,
|
|
"step": 265,
|
|
"valid_targets_mean": 3379.5,
|
|
"valid_targets_min": 276
|
|
},
|
|
{
|
|
"epoch": 0.4302788844621514,
|
|
"grad_norm": 0.6869188610961593,
|
|
"learning_rate": 2.4454545454545456e-05,
|
|
"loss": 0.4012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20431405305862427,
|
|
"step": 270,
|
|
"valid_targets_mean": 4211.4,
|
|
"valid_targets_min": 2762
|
|
},
|
|
{
|
|
"epoch": 0.43824701195219123,
|
|
"grad_norm": 0.7518435455875745,
|
|
"learning_rate": 2.490909090909091e-05,
|
|
"loss": 0.3839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22350144386291504,
|
|
"step": 275,
|
|
"valid_targets_mean": 5223.8,
|
|
"valid_targets_min": 4425
|
|
},
|
|
{
|
|
"epoch": 0.44621513944223107,
|
|
"grad_norm": 0.6801525553872718,
|
|
"learning_rate": 2.5363636363636364e-05,
|
|
"loss": 0.3901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21610978245735168,
|
|
"step": 280,
|
|
"valid_targets_mean": 5043.2,
|
|
"valid_targets_min": 4456
|
|
},
|
|
{
|
|
"epoch": 0.4541832669322709,
|
|
"grad_norm": 0.7534382485600102,
|
|
"learning_rate": 2.5818181818181824e-05,
|
|
"loss": 0.3953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17026452720165253,
|
|
"step": 285,
|
|
"valid_targets_mean": 3922.9,
|
|
"valid_targets_min": 3022
|
|
},
|
|
{
|
|
"epoch": 0.46215139442231074,
|
|
"grad_norm": 0.7600823974106152,
|
|
"learning_rate": 2.6272727272727276e-05,
|
|
"loss": 0.3651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14615437388420105,
|
|
"step": 290,
|
|
"valid_targets_mean": 2542.1,
|
|
"valid_targets_min": 241
|
|
},
|
|
{
|
|
"epoch": 0.4701195219123506,
|
|
"grad_norm": 0.8131679536977819,
|
|
"learning_rate": 2.672727272727273e-05,
|
|
"loss": 0.3881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18951882421970367,
|
|
"step": 295,
|
|
"valid_targets_mean": 4425.0,
|
|
"valid_targets_min": 2871
|
|
},
|
|
{
|
|
"epoch": 0.47808764940239046,
|
|
"grad_norm": 0.704158682861056,
|
|
"learning_rate": 2.7181818181818184e-05,
|
|
"loss": 0.3817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2157367765903473,
|
|
"step": 300,
|
|
"valid_targets_mean": 4395.0,
|
|
"valid_targets_min": 2776
|
|
},
|
|
{
|
|
"epoch": 0.4860557768924303,
|
|
"grad_norm": 0.7334485862615044,
|
|
"learning_rate": 2.763636363636364e-05,
|
|
"loss": 0.3805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.149525448679924,
|
|
"step": 305,
|
|
"valid_targets_mean": 3410.2,
|
|
"valid_targets_min": 494
|
|
},
|
|
{
|
|
"epoch": 0.4940239043824701,
|
|
"grad_norm": 0.6398803355562287,
|
|
"learning_rate": 2.8090909090909092e-05,
|
|
"loss": 0.3925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.155901700258255,
|
|
"step": 310,
|
|
"valid_targets_mean": 3745.0,
|
|
"valid_targets_min": 1345
|
|
},
|
|
{
|
|
"epoch": 0.50199203187251,
|
|
"grad_norm": 0.7424072063522883,
|
|
"learning_rate": 2.8545454545454548e-05,
|
|
"loss": 0.3891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1601351946592331,
|
|
"step": 315,
|
|
"valid_targets_mean": 3158.5,
|
|
"valid_targets_min": 989
|
|
},
|
|
{
|
|
"epoch": 0.5099601593625498,
|
|
"grad_norm": 0.697644493947305,
|
|
"learning_rate": 2.9e-05,
|
|
"loss": 0.379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17844626307487488,
|
|
"step": 320,
|
|
"valid_targets_mean": 3966.8,
|
|
"valid_targets_min": 256
|
|
},
|
|
{
|
|
"epoch": 0.5179282868525896,
|
|
"grad_norm": 0.8334718158966897,
|
|
"learning_rate": 2.9454545454545456e-05,
|
|
"loss": 0.3856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1996501088142395,
|
|
"step": 325,
|
|
"valid_targets_mean": 3763.9,
|
|
"valid_targets_min": 577
|
|
},
|
|
{
|
|
"epoch": 0.5258964143426295,
|
|
"grad_norm": 0.6542673297605693,
|
|
"learning_rate": 2.9909090909090908e-05,
|
|
"loss": 0.3615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1474393904209137,
|
|
"step": 330,
|
|
"valid_targets_mean": 3912.9,
|
|
"valid_targets_min": 428
|
|
},
|
|
{
|
|
"epoch": 0.5338645418326693,
|
|
"grad_norm": 1.0898122627042852,
|
|
"learning_rate": 3.0363636363636364e-05,
|
|
"loss": 0.3939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20399470627307892,
|
|
"step": 335,
|
|
"valid_targets_mean": 4056.0,
|
|
"valid_targets_min": 189
|
|
},
|
|
{
|
|
"epoch": 0.5418326693227091,
|
|
"grad_norm": 0.7745918715566812,
|
|
"learning_rate": 3.081818181818182e-05,
|
|
"loss": 0.3795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25055158138275146,
|
|
"step": 340,
|
|
"valid_targets_mean": 4587.9,
|
|
"valid_targets_min": 2195
|
|
},
|
|
{
|
|
"epoch": 0.549800796812749,
|
|
"grad_norm": 0.7230172934249015,
|
|
"learning_rate": 3.127272727272728e-05,
|
|
"loss": 0.3721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1816451996564865,
|
|
"step": 345,
|
|
"valid_targets_mean": 4361.4,
|
|
"valid_targets_min": 1472
|
|
},
|
|
{
|
|
"epoch": 0.5577689243027888,
|
|
"grad_norm": 0.7003320872740779,
|
|
"learning_rate": 3.172727272727273e-05,
|
|
"loss": 0.3843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16478148102760315,
|
|
"step": 350,
|
|
"valid_targets_mean": 3774.4,
|
|
"valid_targets_min": 271
|
|
},
|
|
{
|
|
"epoch": 0.5657370517928287,
|
|
"grad_norm": 0.6804494412793238,
|
|
"learning_rate": 3.2181818181818184e-05,
|
|
"loss": 0.3659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20293974876403809,
|
|
"step": 355,
|
|
"valid_targets_mean": 3941.6,
|
|
"valid_targets_min": 392
|
|
},
|
|
{
|
|
"epoch": 0.5737051792828686,
|
|
"grad_norm": 0.7743000131108165,
|
|
"learning_rate": 3.263636363636364e-05,
|
|
"loss": 0.3748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20128706097602844,
|
|
"step": 360,
|
|
"valid_targets_mean": 3755.8,
|
|
"valid_targets_min": 1966
|
|
},
|
|
{
|
|
"epoch": 0.5816733067729084,
|
|
"grad_norm": 0.7472256621802407,
|
|
"learning_rate": 3.3090909090909095e-05,
|
|
"loss": 0.3643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19128525257110596,
|
|
"step": 365,
|
|
"valid_targets_mean": 4262.8,
|
|
"valid_targets_min": 1155
|
|
},
|
|
{
|
|
"epoch": 0.5896414342629482,
|
|
"grad_norm": 0.9585374708292493,
|
|
"learning_rate": 3.354545454545455e-05,
|
|
"loss": 0.3795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2316116988658905,
|
|
"step": 370,
|
|
"valid_targets_mean": 4323.4,
|
|
"valid_targets_min": 364
|
|
},
|
|
{
|
|
"epoch": 0.5976095617529881,
|
|
"grad_norm": 0.9480533929864912,
|
|
"learning_rate": 3.4e-05,
|
|
"loss": 0.3749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23274560272693634,
|
|
"step": 375,
|
|
"valid_targets_mean": 3637.2,
|
|
"valid_targets_min": 932
|
|
},
|
|
{
|
|
"epoch": 0.6055776892430279,
|
|
"grad_norm": 0.7021130871658416,
|
|
"learning_rate": 3.4454545454545455e-05,
|
|
"loss": 0.3898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2204621136188507,
|
|
"step": 380,
|
|
"valid_targets_mean": 4790.4,
|
|
"valid_targets_min": 493
|
|
},
|
|
{
|
|
"epoch": 0.6135458167330677,
|
|
"grad_norm": 0.8628186416823741,
|
|
"learning_rate": 3.490909090909091e-05,
|
|
"loss": 0.377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17570717632770538,
|
|
"step": 385,
|
|
"valid_targets_mean": 3729.4,
|
|
"valid_targets_min": 972
|
|
},
|
|
{
|
|
"epoch": 0.6215139442231076,
|
|
"grad_norm": 0.7210336759502691,
|
|
"learning_rate": 3.536363636363637e-05,
|
|
"loss": 0.3712,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21611185371875763,
|
|
"step": 390,
|
|
"valid_targets_mean": 4098.4,
|
|
"valid_targets_min": 3422
|
|
},
|
|
{
|
|
"epoch": 0.6294820717131474,
|
|
"grad_norm": 0.7615523380276196,
|
|
"learning_rate": 3.581818181818182e-05,
|
|
"loss": 0.3688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14733503758907318,
|
|
"step": 395,
|
|
"valid_targets_mean": 2655.8,
|
|
"valid_targets_min": 338
|
|
},
|
|
{
|
|
"epoch": 0.6374501992031872,
|
|
"grad_norm": 0.7351812946098347,
|
|
"learning_rate": 3.627272727272728e-05,
|
|
"loss": 0.3631,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17947062849998474,
|
|
"step": 400,
|
|
"valid_targets_mean": 3434.1,
|
|
"valid_targets_min": 546
|
|
},
|
|
{
|
|
"epoch": 0.6454183266932271,
|
|
"grad_norm": 0.6473507101556931,
|
|
"learning_rate": 3.6727272727272734e-05,
|
|
"loss": 0.362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18027062714099884,
|
|
"step": 405,
|
|
"valid_targets_mean": 4050.0,
|
|
"valid_targets_min": 648
|
|
},
|
|
{
|
|
"epoch": 0.6533864541832669,
|
|
"grad_norm": 0.8170944353027576,
|
|
"learning_rate": 3.718181818181818e-05,
|
|
"loss": 0.3651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18333405256271362,
|
|
"step": 410,
|
|
"valid_targets_mean": 3280.4,
|
|
"valid_targets_min": 742
|
|
},
|
|
{
|
|
"epoch": 0.6613545816733067,
|
|
"grad_norm": 0.6522389597007201,
|
|
"learning_rate": 3.763636363636364e-05,
|
|
"loss": 0.3653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15798671543598175,
|
|
"step": 415,
|
|
"valid_targets_mean": 3973.9,
|
|
"valid_targets_min": 3181
|
|
},
|
|
{
|
|
"epoch": 0.6693227091633466,
|
|
"grad_norm": 0.7010470762692088,
|
|
"learning_rate": 3.8090909090909095e-05,
|
|
"loss": 0.3688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15806664526462555,
|
|
"step": 420,
|
|
"valid_targets_mean": 3355.4,
|
|
"valid_targets_min": 430
|
|
},
|
|
{
|
|
"epoch": 0.6772908366533864,
|
|
"grad_norm": 0.6759026499243866,
|
|
"learning_rate": 3.854545454545455e-05,
|
|
"loss": 0.381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26875221729278564,
|
|
"step": 425,
|
|
"valid_targets_mean": 4280.9,
|
|
"valid_targets_min": 864
|
|
},
|
|
{
|
|
"epoch": 0.6852589641434262,
|
|
"grad_norm": 0.7683397961540518,
|
|
"learning_rate": 3.9e-05,
|
|
"loss": 0.3786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14716662466526031,
|
|
"step": 430,
|
|
"valid_targets_mean": 3156.4,
|
|
"valid_targets_min": 789
|
|
},
|
|
{
|
|
"epoch": 0.6932270916334662,
|
|
"grad_norm": 0.7376715268170536,
|
|
"learning_rate": 3.9454545454545455e-05,
|
|
"loss": 0.3839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21732556819915771,
|
|
"step": 435,
|
|
"valid_targets_mean": 3848.8,
|
|
"valid_targets_min": 232
|
|
},
|
|
{
|
|
"epoch": 0.701195219123506,
|
|
"grad_norm": 0.6728184615746164,
|
|
"learning_rate": 3.990909090909091e-05,
|
|
"loss": 0.3629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16482621431350708,
|
|
"step": 440,
|
|
"valid_targets_mean": 3314.9,
|
|
"valid_targets_min": 533
|
|
},
|
|
{
|
|
"epoch": 0.7091633466135459,
|
|
"grad_norm": 0.6675755697367736,
|
|
"learning_rate": 3.999989909636843e-05,
|
|
"loss": 0.3743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22296589612960815,
|
|
"step": 445,
|
|
"valid_targets_mean": 4027.6,
|
|
"valid_targets_min": 225
|
|
},
|
|
{
|
|
"epoch": 0.7171314741035857,
|
|
"grad_norm": 0.6380622207496793,
|
|
"learning_rate": 3.999948917711013e-05,
|
|
"loss": 0.3685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18178203701972961,
|
|
"step": 450,
|
|
"valid_targets_mean": 4977.4,
|
|
"valid_targets_min": 3157
|
|
},
|
|
{
|
|
"epoch": 0.7250996015936255,
|
|
"grad_norm": 0.7249748715728896,
|
|
"learning_rate": 3.999876394220603e-05,
|
|
"loss": 0.3507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15942233800888062,
|
|
"step": 455,
|
|
"valid_targets_mean": 3487.6,
|
|
"valid_targets_min": 425
|
|
},
|
|
{
|
|
"epoch": 0.7330677290836654,
|
|
"grad_norm": 0.6364193871643289,
|
|
"learning_rate": 3.999772340309031e-05,
|
|
"loss": 0.3623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17309603095054626,
|
|
"step": 460,
|
|
"valid_targets_mean": 4024.1,
|
|
"valid_targets_min": 2346
|
|
},
|
|
{
|
|
"epoch": 0.7410358565737052,
|
|
"grad_norm": 0.6534069223911941,
|
|
"learning_rate": 3.999636757616831e-05,
|
|
"loss": 0.3683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18539786338806152,
|
|
"step": 465,
|
|
"valid_targets_mean": 4150.1,
|
|
"valid_targets_min": 2355
|
|
},
|
|
{
|
|
"epoch": 0.749003984063745,
|
|
"grad_norm": 0.7623546621780052,
|
|
"learning_rate": 3.999469648281624e-05,
|
|
"loss": 0.3583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18554510176181793,
|
|
"step": 470,
|
|
"valid_targets_mean": 3821.2,
|
|
"valid_targets_min": 474
|
|
},
|
|
{
|
|
"epoch": 0.7569721115537849,
|
|
"grad_norm": 0.7478027946512054,
|
|
"learning_rate": 3.9992710149380875e-05,
|
|
"loss": 0.359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16699925065040588,
|
|
"step": 475,
|
|
"valid_targets_mean": 3427.5,
|
|
"valid_targets_min": 1163
|
|
},
|
|
{
|
|
"epoch": 0.7649402390438247,
|
|
"grad_norm": 0.696767701299578,
|
|
"learning_rate": 3.999040860717911e-05,
|
|
"loss": 0.3519,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17681604623794556,
|
|
"step": 480,
|
|
"valid_targets_mean": 3752.8,
|
|
"valid_targets_min": 502
|
|
},
|
|
{
|
|
"epoch": 0.7729083665338645,
|
|
"grad_norm": 0.6967704351910736,
|
|
"learning_rate": 3.998779189249749e-05,
|
|
"loss": 0.3414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1511075496673584,
|
|
"step": 485,
|
|
"valid_targets_mean": 3213.6,
|
|
"valid_targets_min": 205
|
|
},
|
|
{
|
|
"epoch": 0.7808764940239044,
|
|
"grad_norm": 0.771706695920937,
|
|
"learning_rate": 3.998486004659162e-05,
|
|
"loss": 0.3545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18651717901229858,
|
|
"step": 490,
|
|
"valid_targets_mean": 2901.6,
|
|
"valid_targets_min": 1091
|
|
},
|
|
{
|
|
"epoch": 0.7888446215139442,
|
|
"grad_norm": 0.6775930753323367,
|
|
"learning_rate": 3.9981613115685516e-05,
|
|
"loss": 0.3568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1885891556739807,
|
|
"step": 495,
|
|
"valid_targets_mean": 3702.8,
|
|
"valid_targets_min": 262
|
|
},
|
|
{
|
|
"epoch": 0.796812749003984,
|
|
"grad_norm": 0.695902107525504,
|
|
"learning_rate": 3.9978051150970906e-05,
|
|
"loss": 0.3595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1813029646873474,
|
|
"step": 500,
|
|
"valid_targets_mean": 4663.6,
|
|
"valid_targets_min": 2700
|
|
},
|
|
{
|
|
"epoch": 0.8047808764940239,
|
|
"grad_norm": 0.728929159452013,
|
|
"learning_rate": 3.9974174208606376e-05,
|
|
"loss": 0.3556,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1546524614095688,
|
|
"step": 505,
|
|
"valid_targets_mean": 2868.9,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 0.8127490039840638,
|
|
"grad_norm": 0.8272162026985286,
|
|
"learning_rate": 3.996998234971652e-05,
|
|
"loss": 0.3581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17594462633132935,
|
|
"step": 510,
|
|
"valid_targets_mean": 3446.9,
|
|
"valid_targets_min": 236
|
|
},
|
|
{
|
|
"epoch": 0.8207171314741036,
|
|
"grad_norm": 0.9446986936787952,
|
|
"learning_rate": 3.996547564039096e-05,
|
|
"loss": 0.3566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1588212251663208,
|
|
"step": 515,
|
|
"valid_targets_mean": 3624.6,
|
|
"valid_targets_min": 489
|
|
},
|
|
{
|
|
"epoch": 0.8286852589641435,
|
|
"grad_norm": 0.8017563324472775,
|
|
"learning_rate": 3.99606541516833e-05,
|
|
"loss": 0.3563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19778478145599365,
|
|
"step": 520,
|
|
"valid_targets_mean": 4328.5,
|
|
"valid_targets_min": 1124
|
|
},
|
|
{
|
|
"epoch": 0.8366533864541833,
|
|
"grad_norm": 0.7612012764629698,
|
|
"learning_rate": 3.995551795961004e-05,
|
|
"loss": 0.3571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18733835220336914,
|
|
"step": 525,
|
|
"valid_targets_mean": 3156.9,
|
|
"valid_targets_min": 273
|
|
},
|
|
{
|
|
"epoch": 0.8446215139442231,
|
|
"grad_norm": 0.6954057631209731,
|
|
"learning_rate": 3.995006714514932e-05,
|
|
"loss": 0.3663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2069414258003235,
|
|
"step": 530,
|
|
"valid_targets_mean": 4626.6,
|
|
"valid_targets_min": 942
|
|
},
|
|
{
|
|
"epoch": 0.852589641434263,
|
|
"grad_norm": 0.7380261470303369,
|
|
"learning_rate": 3.99443017942397e-05,
|
|
"loss": 0.35,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14619982242584229,
|
|
"step": 535,
|
|
"valid_targets_mean": 3010.8,
|
|
"valid_targets_min": 500
|
|
},
|
|
{
|
|
"epoch": 0.8605577689243028,
|
|
"grad_norm": 0.8077177991808565,
|
|
"learning_rate": 3.993822199777876e-05,
|
|
"loss": 0.3734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21335554122924805,
|
|
"step": 540,
|
|
"valid_targets_mean": 4808.2,
|
|
"valid_targets_min": 2905
|
|
},
|
|
{
|
|
"epoch": 0.8685258964143426,
|
|
"grad_norm": 0.674443430613216,
|
|
"learning_rate": 3.9931827851621694e-05,
|
|
"loss": 0.3531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20609551668167114,
|
|
"step": 545,
|
|
"valid_targets_mean": 4898.8,
|
|
"valid_targets_min": 3383
|
|
},
|
|
{
|
|
"epoch": 0.8764940239043825,
|
|
"grad_norm": 0.6954171773040285,
|
|
"learning_rate": 3.99251194565798e-05,
|
|
"loss": 0.3537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16853263974189758,
|
|
"step": 550,
|
|
"valid_targets_mean": 3665.2,
|
|
"valid_targets_min": 1078
|
|
},
|
|
{
|
|
"epoch": 0.8844621513944223,
|
|
"grad_norm": 0.723199475265153,
|
|
"learning_rate": 3.991809691841888e-05,
|
|
"loss": 0.3615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16584350168704987,
|
|
"step": 555,
|
|
"valid_targets_mean": 2947.6,
|
|
"valid_targets_min": 447
|
|
},
|
|
{
|
|
"epoch": 0.8924302788844621,
|
|
"grad_norm": 0.6430285946102091,
|
|
"learning_rate": 3.9910760347857554e-05,
|
|
"loss": 0.3494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15580511093139648,
|
|
"step": 560,
|
|
"valid_targets_mean": 3587.9,
|
|
"valid_targets_min": 921
|
|
},
|
|
{
|
|
"epoch": 0.900398406374502,
|
|
"grad_norm": 0.6820102983014715,
|
|
"learning_rate": 3.9903109860565565e-05,
|
|
"loss": 0.3542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18337659537792206,
|
|
"step": 565,
|
|
"valid_targets_mean": 3625.2,
|
|
"valid_targets_min": 518
|
|
},
|
|
{
|
|
"epoch": 0.9083665338645418,
|
|
"grad_norm": 0.7533993179464932,
|
|
"learning_rate": 3.98951455771619e-05,
|
|
"loss": 0.3596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1618381142616272,
|
|
"step": 570,
|
|
"valid_targets_mean": 3087.0,
|
|
"valid_targets_min": 598
|
|
},
|
|
{
|
|
"epoch": 0.9163346613545816,
|
|
"grad_norm": 0.5956981450863615,
|
|
"learning_rate": 3.988686762321293e-05,
|
|
"loss": 0.3469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21477267146110535,
|
|
"step": 575,
|
|
"valid_targets_mean": 5261.5,
|
|
"valid_targets_min": 2841
|
|
},
|
|
{
|
|
"epoch": 0.9243027888446215,
|
|
"grad_norm": 0.7564199919542755,
|
|
"learning_rate": 3.987827612923041e-05,
|
|
"loss": 0.3489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1756840944290161,
|
|
"step": 580,
|
|
"valid_targets_mean": 3110.1,
|
|
"valid_targets_min": 481
|
|
},
|
|
{
|
|
"epoch": 0.9322709163346613,
|
|
"grad_norm": 0.7011814423323345,
|
|
"learning_rate": 3.986937123066942e-05,
|
|
"loss": 0.36,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1822158396244049,
|
|
"step": 585,
|
|
"valid_targets_mean": 4179.5,
|
|
"valid_targets_min": 2827
|
|
},
|
|
{
|
|
"epoch": 0.9402390438247012,
|
|
"grad_norm": 1.2458428255009688,
|
|
"learning_rate": 3.9860153067926235e-05,
|
|
"loss": 0.3473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14599651098251343,
|
|
"step": 590,
|
|
"valid_targets_mean": 3402.9,
|
|
"valid_targets_min": 1095
|
|
},
|
|
{
|
|
"epoch": 0.9482071713147411,
|
|
"grad_norm": 0.6929967658492394,
|
|
"learning_rate": 3.985062178633612e-05,
|
|
"loss": 0.3475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20835021138191223,
|
|
"step": 595,
|
|
"valid_targets_mean": 3821.9,
|
|
"valid_targets_min": 3372
|
|
},
|
|
{
|
|
"epoch": 0.9561752988047809,
|
|
"grad_norm": 0.7061228324123497,
|
|
"learning_rate": 3.9840777536171026e-05,
|
|
"loss": 0.347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14604054391384125,
|
|
"step": 600,
|
|
"valid_targets_mean": 3521.1,
|
|
"valid_targets_min": 328
|
|
},
|
|
{
|
|
"epoch": 0.9641434262948207,
|
|
"grad_norm": 0.6151137860103864,
|
|
"learning_rate": 3.9830620472637214e-05,
|
|
"loss": 0.3479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1378212571144104,
|
|
"step": 605,
|
|
"valid_targets_mean": 3869.6,
|
|
"valid_targets_min": 2505
|
|
},
|
|
{
|
|
"epoch": 0.9721115537848606,
|
|
"grad_norm": 0.7078980854647497,
|
|
"learning_rate": 3.982015075587283e-05,
|
|
"loss": 0.3457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18525102734565735,
|
|
"step": 610,
|
|
"valid_targets_mean": 3604.5,
|
|
"valid_targets_min": 878
|
|
},
|
|
{
|
|
"epoch": 0.9800796812749004,
|
|
"grad_norm": 0.6675761629356032,
|
|
"learning_rate": 3.980936855094537e-05,
|
|
"loss": 0.3467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.173541858792305,
|
|
"step": 615,
|
|
"valid_targets_mean": 4400.9,
|
|
"valid_targets_min": 465
|
|
},
|
|
{
|
|
"epoch": 0.9880478087649402,
|
|
"grad_norm": 0.7097226127876368,
|
|
"learning_rate": 3.979827402784906e-05,
|
|
"loss": 0.3748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17709887027740479,
|
|
"step": 620,
|
|
"valid_targets_mean": 2642.9,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 0.9960159362549801,
|
|
"grad_norm": 0.6953431918337851,
|
|
"learning_rate": 3.978686736150221e-05,
|
|
"loss": 0.34,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15647362172603607,
|
|
"step": 625,
|
|
"valid_targets_mean": 3125.0,
|
|
"valid_targets_min": 245
|
|
},
|
|
{
|
|
"epoch": 1.003187250996016,
|
|
"grad_norm": 0.8380016134970487,
|
|
"learning_rate": 3.977514873174443e-05,
|
|
"loss": 0.3361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18529632687568665,
|
|
"step": 630,
|
|
"valid_targets_mean": 3895.5,
|
|
"valid_targets_min": 279
|
|
},
|
|
{
|
|
"epoch": 1.0111553784860559,
|
|
"grad_norm": 0.6180113975538215,
|
|
"learning_rate": 3.976311832333381e-05,
|
|
"loss": 0.3355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14978092908859253,
|
|
"step": 635,
|
|
"valid_targets_mean": 4151.5,
|
|
"valid_targets_min": 822
|
|
},
|
|
{
|
|
"epoch": 1.0191235059760957,
|
|
"grad_norm": 0.6477332225509156,
|
|
"learning_rate": 3.9750776325943984e-05,
|
|
"loss": 0.3278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18527810275554657,
|
|
"step": 640,
|
|
"valid_targets_mean": 4084.5,
|
|
"valid_targets_min": 1761
|
|
},
|
|
{
|
|
"epoch": 1.0270916334661355,
|
|
"grad_norm": 0.7186538375171995,
|
|
"learning_rate": 3.9738122934161174e-05,
|
|
"loss": 0.3331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17046117782592773,
|
|
"step": 645,
|
|
"valid_targets_mean": 3528.1,
|
|
"valid_targets_min": 478
|
|
},
|
|
{
|
|
"epoch": 1.0350597609561754,
|
|
"grad_norm": 0.6735156586160675,
|
|
"learning_rate": 3.97251583474811e-05,
|
|
"loss": 0.3482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13899904489517212,
|
|
"step": 650,
|
|
"valid_targets_mean": 2988.0,
|
|
"valid_targets_min": 232
|
|
},
|
|
{
|
|
"epoch": 1.0430278884462152,
|
|
"grad_norm": 0.6583706162534825,
|
|
"learning_rate": 3.971188277030582e-05,
|
|
"loss": 0.3288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.202030748128891,
|
|
"step": 655,
|
|
"valid_targets_mean": 4802.2,
|
|
"valid_targets_min": 4000
|
|
},
|
|
{
|
|
"epoch": 1.050996015936255,
|
|
"grad_norm": 0.6522611837588745,
|
|
"learning_rate": 3.969829641194055e-05,
|
|
"loss": 0.3308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15542668104171753,
|
|
"step": 660,
|
|
"valid_targets_mean": 3543.8,
|
|
"valid_targets_min": 638
|
|
},
|
|
{
|
|
"epoch": 1.0589641434262949,
|
|
"grad_norm": 0.5991682727250625,
|
|
"learning_rate": 3.968439948659033e-05,
|
|
"loss": 0.3308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15176421403884888,
|
|
"step": 665,
|
|
"valid_targets_mean": 4226.6,
|
|
"valid_targets_min": 616
|
|
},
|
|
{
|
|
"epoch": 1.0669322709163347,
|
|
"grad_norm": 0.7906438594031066,
|
|
"learning_rate": 3.967019221335664e-05,
|
|
"loss": 0.3448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22275003790855408,
|
|
"step": 670,
|
|
"valid_targets_mean": 4171.0,
|
|
"valid_targets_min": 3268
|
|
},
|
|
{
|
|
"epoch": 1.0749003984063745,
|
|
"grad_norm": 0.9001112914083158,
|
|
"learning_rate": 3.965567481623399e-05,
|
|
"loss": 0.3406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16322645545005798,
|
|
"step": 675,
|
|
"valid_targets_mean": 3059.8,
|
|
"valid_targets_min": 189
|
|
},
|
|
{
|
|
"epoch": 1.0828685258964144,
|
|
"grad_norm": 0.6929341128744392,
|
|
"learning_rate": 3.9640847524106356e-05,
|
|
"loss": 0.3406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1515749990940094,
|
|
"step": 680,
|
|
"valid_targets_mean": 3472.5,
|
|
"valid_targets_min": 411
|
|
},
|
|
{
|
|
"epoch": 1.0908366533864542,
|
|
"grad_norm": 0.6655570486924588,
|
|
"learning_rate": 3.9625710570743556e-05,
|
|
"loss": 0.3398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16119533777236938,
|
|
"step": 685,
|
|
"valid_targets_mean": 3803.9,
|
|
"valid_targets_min": 511
|
|
},
|
|
{
|
|
"epoch": 1.098804780876494,
|
|
"grad_norm": 0.656880149153456,
|
|
"learning_rate": 3.96102641947976e-05,
|
|
"loss": 0.3196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1461721807718277,
|
|
"step": 690,
|
|
"valid_targets_mean": 2939.8,
|
|
"valid_targets_min": 274
|
|
},
|
|
{
|
|
"epoch": 1.1067729083665339,
|
|
"grad_norm": 0.9014777398348911,
|
|
"learning_rate": 3.959450863979891e-05,
|
|
"loss": 0.3415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18205542862415314,
|
|
"step": 695,
|
|
"valid_targets_mean": 3970.0,
|
|
"valid_targets_min": 773
|
|
},
|
|
{
|
|
"epoch": 1.1147410358565737,
|
|
"grad_norm": 0.7286105371948465,
|
|
"learning_rate": 3.957844415415248e-05,
|
|
"loss": 0.3347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16195717453956604,
|
|
"step": 700,
|
|
"valid_targets_mean": 3414.6,
|
|
"valid_targets_min": 245
|
|
},
|
|
{
|
|
"epoch": 1.1227091633466135,
|
|
"grad_norm": 0.7140375479244369,
|
|
"learning_rate": 3.956207099113396e-05,
|
|
"loss": 0.3351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1416376531124115,
|
|
"step": 705,
|
|
"valid_targets_mean": 3106.2,
|
|
"valid_targets_min": 590
|
|
},
|
|
{
|
|
"epoch": 1.1306772908366534,
|
|
"grad_norm": 0.6387675684821813,
|
|
"learning_rate": 3.954538940888567e-05,
|
|
"loss": 0.3385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16593831777572632,
|
|
"step": 710,
|
|
"valid_targets_mean": 3963.5,
|
|
"valid_targets_min": 348
|
|
},
|
|
{
|
|
"epoch": 1.1386454183266932,
|
|
"grad_norm": 0.7108262306982716,
|
|
"learning_rate": 3.952839967041252e-05,
|
|
"loss": 0.3539,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1952948421239853,
|
|
"step": 715,
|
|
"valid_targets_mean": 3867.1,
|
|
"valid_targets_min": 268
|
|
},
|
|
{
|
|
"epoch": 1.146613545816733,
|
|
"grad_norm": 0.6923790988667464,
|
|
"learning_rate": 3.951110204357787e-05,
|
|
"loss": 0.3308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1314292848110199,
|
|
"step": 720,
|
|
"valid_targets_mean": 2712.4,
|
|
"valid_targets_min": 559
|
|
},
|
|
{
|
|
"epoch": 1.1545816733067729,
|
|
"grad_norm": 0.7761718139472028,
|
|
"learning_rate": 3.9493496801099306e-05,
|
|
"loss": 0.3379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17276610434055328,
|
|
"step": 725,
|
|
"valid_targets_mean": 3137.4,
|
|
"valid_targets_min": 535
|
|
},
|
|
{
|
|
"epoch": 1.1625498007968127,
|
|
"grad_norm": 0.6421701750586802,
|
|
"learning_rate": 3.9475584220544335e-05,
|
|
"loss": 0.3349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1793321669101715,
|
|
"step": 730,
|
|
"valid_targets_mean": 5372.2,
|
|
"valid_targets_min": 2713
|
|
},
|
|
{
|
|
"epoch": 1.1705179282868525,
|
|
"grad_norm": 0.7749510793480261,
|
|
"learning_rate": 3.9457364584326005e-05,
|
|
"loss": 0.3348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1492847204208374,
|
|
"step": 735,
|
|
"valid_targets_mean": 3576.1,
|
|
"valid_targets_min": 280
|
|
},
|
|
{
|
|
"epoch": 1.1784860557768924,
|
|
"grad_norm": 0.7949932249458553,
|
|
"learning_rate": 3.94388381796985e-05,
|
|
"loss": 0.3388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16831986606121063,
|
|
"step": 740,
|
|
"valid_targets_mean": 4208.0,
|
|
"valid_targets_min": 1011
|
|
},
|
|
{
|
|
"epoch": 1.1864541832669322,
|
|
"grad_norm": 0.6130986984958665,
|
|
"learning_rate": 3.942000529875251e-05,
|
|
"loss": 0.3393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1676281839609146,
|
|
"step": 745,
|
|
"valid_targets_mean": 4924.5,
|
|
"valid_targets_min": 1280
|
|
},
|
|
{
|
|
"epoch": 1.194422310756972,
|
|
"grad_norm": 0.6689286573258375,
|
|
"learning_rate": 3.9400866238410736e-05,
|
|
"loss": 0.3442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14720451831817627,
|
|
"step": 750,
|
|
"valid_targets_mean": 3518.8,
|
|
"valid_targets_min": 821
|
|
},
|
|
{
|
|
"epoch": 1.2023904382470119,
|
|
"grad_norm": 0.6475785428629414,
|
|
"learning_rate": 3.9381421300423145e-05,
|
|
"loss": 0.3316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18296414613723755,
|
|
"step": 755,
|
|
"valid_targets_mean": 3739.0,
|
|
"valid_targets_min": 1162
|
|
},
|
|
{
|
|
"epoch": 1.2103585657370517,
|
|
"grad_norm": 0.6781254648218166,
|
|
"learning_rate": 3.936167079136222e-05,
|
|
"loss": 0.3459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15673884749412537,
|
|
"step": 760,
|
|
"valid_targets_mean": 3204.5,
|
|
"valid_targets_min": 956
|
|
},
|
|
{
|
|
"epoch": 1.2183266932270915,
|
|
"grad_norm": 0.6427243656380253,
|
|
"learning_rate": 3.934161502261814e-05,
|
|
"loss": 0.3378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19556352496147156,
|
|
"step": 765,
|
|
"valid_targets_mean": 4977.9,
|
|
"valid_targets_min": 3219
|
|
},
|
|
{
|
|
"epoch": 1.2262948207171314,
|
|
"grad_norm": 0.7264390882508804,
|
|
"learning_rate": 3.932125431039387e-05,
|
|
"loss": 0.3412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15922430157661438,
|
|
"step": 770,
|
|
"valid_targets_mean": 2880.5,
|
|
"valid_targets_min": 266
|
|
},
|
|
{
|
|
"epoch": 1.2342629482071712,
|
|
"grad_norm": 0.7144672783858196,
|
|
"learning_rate": 3.930058897570016e-05,
|
|
"loss": 0.3331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1332297921180725,
|
|
"step": 775,
|
|
"valid_targets_mean": 2939.6,
|
|
"valid_targets_min": 242
|
|
},
|
|
{
|
|
"epoch": 1.2422310756972113,
|
|
"grad_norm": 0.6867672735917674,
|
|
"learning_rate": 3.9279619344350505e-05,
|
|
"loss": 0.3337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13223578035831451,
|
|
"step": 780,
|
|
"valid_targets_mean": 3395.1,
|
|
"valid_targets_min": 262
|
|
},
|
|
{
|
|
"epoch": 1.2501992031872509,
|
|
"grad_norm": 0.6135527493382645,
|
|
"learning_rate": 3.925834574695599e-05,
|
|
"loss": 0.3295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15247970819473267,
|
|
"step": 785,
|
|
"valid_targets_mean": 3609.8,
|
|
"valid_targets_min": 897
|
|
},
|
|
{
|
|
"epoch": 1.258167330677291,
|
|
"grad_norm": 0.6088121739198172,
|
|
"learning_rate": 3.923676851892008e-05,
|
|
"loss": 0.3413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15359815955162048,
|
|
"step": 790,
|
|
"valid_targets_mean": 3986.2,
|
|
"valid_targets_min": 278
|
|
},
|
|
{
|
|
"epoch": 1.2661354581673308,
|
|
"grad_norm": 0.646293293945006,
|
|
"learning_rate": 3.921488800043335e-05,
|
|
"loss": 0.3236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14105680584907532,
|
|
"step": 795,
|
|
"valid_targets_mean": 3519.8,
|
|
"valid_targets_min": 1042
|
|
},
|
|
{
|
|
"epoch": 1.2741035856573706,
|
|
"grad_norm": 0.6234293152369844,
|
|
"learning_rate": 3.9192704536468106e-05,
|
|
"loss": 0.3446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1406230628490448,
|
|
"step": 800,
|
|
"valid_targets_mean": 3263.4,
|
|
"valid_targets_min": 288
|
|
},
|
|
{
|
|
"epoch": 1.2820717131474104,
|
|
"grad_norm": 0.6443699988991758,
|
|
"learning_rate": 3.917021847677294e-05,
|
|
"loss": 0.3549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1499442160129547,
|
|
"step": 805,
|
|
"valid_targets_mean": 3704.4,
|
|
"valid_targets_min": 302
|
|
},
|
|
{
|
|
"epoch": 1.2900398406374503,
|
|
"grad_norm": 0.602243226611313,
|
|
"learning_rate": 3.914743017586722e-05,
|
|
"loss": 0.3528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15198102593421936,
|
|
"step": 810,
|
|
"valid_targets_mean": 4328.8,
|
|
"valid_targets_min": 1028
|
|
},
|
|
{
|
|
"epoch": 1.29800796812749,
|
|
"grad_norm": 0.7296266783180043,
|
|
"learning_rate": 3.912433999303552e-05,
|
|
"loss": 0.3281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13439792394638062,
|
|
"step": 815,
|
|
"valid_targets_mean": 2737.6,
|
|
"valid_targets_min": 248
|
|
},
|
|
{
|
|
"epoch": 1.30597609561753,
|
|
"grad_norm": 0.6350956369195538,
|
|
"learning_rate": 3.910094829232194e-05,
|
|
"loss": 0.3313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1784883439540863,
|
|
"step": 820,
|
|
"valid_targets_mean": 4386.4,
|
|
"valid_targets_min": 1427
|
|
},
|
|
{
|
|
"epoch": 1.3139442231075698,
|
|
"grad_norm": 0.7219352485252675,
|
|
"learning_rate": 3.907725544252436e-05,
|
|
"loss": 0.3257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1668669879436493,
|
|
"step": 825,
|
|
"valid_targets_mean": 3696.9,
|
|
"valid_targets_min": 255
|
|
},
|
|
{
|
|
"epoch": 1.3219123505976096,
|
|
"grad_norm": 0.6504102122640691,
|
|
"learning_rate": 3.905326181718862e-05,
|
|
"loss": 0.3208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1484387218952179,
|
|
"step": 830,
|
|
"valid_targets_mean": 3092.1,
|
|
"valid_targets_min": 359
|
|
},
|
|
{
|
|
"epoch": 1.3298804780876494,
|
|
"grad_norm": 0.6288078179306616,
|
|
"learning_rate": 3.902896779460266e-05,
|
|
"loss": 0.3266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14200851321220398,
|
|
"step": 835,
|
|
"valid_targets_mean": 3019.5,
|
|
"valid_targets_min": 514
|
|
},
|
|
{
|
|
"epoch": 1.3378486055776893,
|
|
"grad_norm": 0.8076767091954578,
|
|
"learning_rate": 3.900437375779055e-05,
|
|
"loss": 0.3316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1670466810464859,
|
|
"step": 840,
|
|
"valid_targets_mean": 3895.8,
|
|
"valid_targets_min": 498
|
|
},
|
|
{
|
|
"epoch": 1.345816733067729,
|
|
"grad_norm": 0.6140652183984848,
|
|
"learning_rate": 3.8979480094506394e-05,
|
|
"loss": 0.3406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14599671959877014,
|
|
"step": 845,
|
|
"valid_targets_mean": 3340.5,
|
|
"valid_targets_min": 219
|
|
},
|
|
{
|
|
"epoch": 1.353784860557769,
|
|
"grad_norm": 0.6107968473694149,
|
|
"learning_rate": 3.895428719722832e-05,
|
|
"loss": 0.3255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1698405146598816,
|
|
"step": 850,
|
|
"valid_targets_mean": 4689.1,
|
|
"valid_targets_min": 810
|
|
},
|
|
{
|
|
"epoch": 1.3617529880478088,
|
|
"grad_norm": 0.9361963934323948,
|
|
"learning_rate": 3.8928795463152186e-05,
|
|
"loss": 0.3476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17818495631217957,
|
|
"step": 855,
|
|
"valid_targets_mean": 3841.5,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 1.3697211155378486,
|
|
"grad_norm": 0.5664514572215686,
|
|
"learning_rate": 3.89030052941854e-05,
|
|
"loss": 0.3383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1609460860490799,
|
|
"step": 860,
|
|
"valid_targets_mean": 4430.2,
|
|
"valid_targets_min": 2145
|
|
},
|
|
{
|
|
"epoch": 1.3776892430278884,
|
|
"grad_norm": 0.6652826855162303,
|
|
"learning_rate": 3.8876917096940536e-05,
|
|
"loss": 0.3288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12637989223003387,
|
|
"step": 865,
|
|
"valid_targets_mean": 2741.5,
|
|
"valid_targets_min": 1030
|
|
},
|
|
{
|
|
"epoch": 1.3856573705179283,
|
|
"grad_norm": 0.671985336767791,
|
|
"learning_rate": 3.8850531282728934e-05,
|
|
"loss": 0.3441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19516661763191223,
|
|
"step": 870,
|
|
"valid_targets_mean": 3528.1,
|
|
"valid_targets_min": 343
|
|
},
|
|
{
|
|
"epoch": 1.393625498007968,
|
|
"grad_norm": 0.6477967928804484,
|
|
"learning_rate": 3.882384826755422e-05,
|
|
"loss": 0.3189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19669313728809357,
|
|
"step": 875,
|
|
"valid_targets_mean": 4708.4,
|
|
"valid_targets_min": 3502
|
|
},
|
|
{
|
|
"epoch": 1.401593625498008,
|
|
"grad_norm": 0.6262389017941296,
|
|
"learning_rate": 3.8796868472105746e-05,
|
|
"loss": 0.3197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1708228588104248,
|
|
"step": 880,
|
|
"valid_targets_mean": 5133.6,
|
|
"valid_targets_min": 3140
|
|
},
|
|
{
|
|
"epoch": 1.4095617529880478,
|
|
"grad_norm": 0.5665004228792191,
|
|
"learning_rate": 3.8769592321751964e-05,
|
|
"loss": 0.3194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1891600489616394,
|
|
"step": 885,
|
|
"valid_targets_mean": 4765.6,
|
|
"valid_targets_min": 2599
|
|
},
|
|
{
|
|
"epoch": 1.4175298804780876,
|
|
"grad_norm": 0.7184391120878334,
|
|
"learning_rate": 3.87420202465337e-05,
|
|
"loss": 0.3261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1749991774559021,
|
|
"step": 890,
|
|
"valid_targets_mean": 3819.4,
|
|
"valid_targets_min": 254
|
|
},
|
|
{
|
|
"epoch": 1.4254980079681274,
|
|
"grad_norm": 0.6043018244167944,
|
|
"learning_rate": 3.871415268115739e-05,
|
|
"loss": 0.3423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2269725501537323,
|
|
"step": 895,
|
|
"valid_targets_mean": 4961.9,
|
|
"valid_targets_min": 3451
|
|
},
|
|
{
|
|
"epoch": 1.4334661354581673,
|
|
"grad_norm": 0.5887212474653807,
|
|
"learning_rate": 3.868599006498823e-05,
|
|
"loss": 0.3239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1578642725944519,
|
|
"step": 900,
|
|
"valid_targets_mean": 4013.4,
|
|
"valid_targets_min": 184
|
|
},
|
|
{
|
|
"epoch": 1.441434262948207,
|
|
"grad_norm": 0.6249449772924667,
|
|
"learning_rate": 3.865753284204324e-05,
|
|
"loss": 0.3281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13083437085151672,
|
|
"step": 905,
|
|
"valid_targets_mean": 2994.2,
|
|
"valid_targets_min": 283
|
|
},
|
|
{
|
|
"epoch": 1.4494023904382471,
|
|
"grad_norm": 0.6444643290345756,
|
|
"learning_rate": 3.862878146098426e-05,
|
|
"loss": 0.3301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21578340232372284,
|
|
"step": 910,
|
|
"valid_targets_mean": 3560.9,
|
|
"valid_targets_min": 642
|
|
},
|
|
{
|
|
"epoch": 1.457370517928287,
|
|
"grad_norm": 0.6345924176843412,
|
|
"learning_rate": 3.859973637511086e-05,
|
|
"loss": 0.3258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16586630046367645,
|
|
"step": 915,
|
|
"valid_targets_mean": 4145.2,
|
|
"valid_targets_min": 341
|
|
},
|
|
{
|
|
"epoch": 1.4653386454183268,
|
|
"grad_norm": 0.7293833283228522,
|
|
"learning_rate": 3.857039804235327e-05,
|
|
"loss": 0.3393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20936918258666992,
|
|
"step": 920,
|
|
"valid_targets_mean": 3901.1,
|
|
"valid_targets_min": 530
|
|
},
|
|
{
|
|
"epoch": 1.4733067729083666,
|
|
"grad_norm": 0.6665691585288822,
|
|
"learning_rate": 3.854076692526505e-05,
|
|
"loss": 0.3279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19952437281608582,
|
|
"step": 925,
|
|
"valid_targets_mean": 4302.2,
|
|
"valid_targets_min": 3314
|
|
},
|
|
{
|
|
"epoch": 1.4812749003984065,
|
|
"grad_norm": 0.5880262605957536,
|
|
"learning_rate": 3.8510843491015874e-05,
|
|
"loss": 0.337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16966915130615234,
|
|
"step": 930,
|
|
"valid_targets_mean": 4073.6,
|
|
"valid_targets_min": 3095
|
|
},
|
|
{
|
|
"epoch": 1.4892430278884463,
|
|
"grad_norm": 0.5671415238129563,
|
|
"learning_rate": 3.848062821138415e-05,
|
|
"loss": 0.3174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13870111107826233,
|
|
"step": 935,
|
|
"valid_targets_mean": 3917.1,
|
|
"valid_targets_min": 345
|
|
},
|
|
{
|
|
"epoch": 1.4972111553784861,
|
|
"grad_norm": 0.5664536721175603,
|
|
"learning_rate": 3.8450121562749565e-05,
|
|
"loss": 0.3263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15465731918811798,
|
|
"step": 940,
|
|
"valid_targets_mean": 3790.1,
|
|
"valid_targets_min": 530
|
|
},
|
|
{
|
|
"epoch": 1.505179282868526,
|
|
"grad_norm": 0.6650849090179763,
|
|
"learning_rate": 3.841932402608557e-05,
|
|
"loss": 0.3337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1365033984184265,
|
|
"step": 945,
|
|
"valid_targets_mean": 2987.6,
|
|
"valid_targets_min": 379
|
|
},
|
|
{
|
|
"epoch": 1.5131474103585658,
|
|
"grad_norm": 0.5881270123449338,
|
|
"learning_rate": 3.838823608695185e-05,
|
|
"loss": 0.3338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19470025599002838,
|
|
"step": 950,
|
|
"valid_targets_mean": 4070.0,
|
|
"valid_targets_min": 909
|
|
},
|
|
{
|
|
"epoch": 1.5211155378486056,
|
|
"grad_norm": 0.6805344923656982,
|
|
"learning_rate": 3.835685823548659e-05,
|
|
"loss": 0.3257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1532375067472458,
|
|
"step": 955,
|
|
"valid_targets_mean": 3163.1,
|
|
"valid_targets_min": 243
|
|
},
|
|
{
|
|
"epoch": 1.5290836653386455,
|
|
"grad_norm": 0.6804754134582519,
|
|
"learning_rate": 3.832519096639879e-05,
|
|
"loss": 0.321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1352333426475525,
|
|
"step": 960,
|
|
"valid_targets_mean": 3507.5,
|
|
"valid_targets_min": 261
|
|
},
|
|
{
|
|
"epoch": 1.5370517928286853,
|
|
"grad_norm": 0.6789653560379708,
|
|
"learning_rate": 3.829323477896048e-05,
|
|
"loss": 0.3384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16350319981575012,
|
|
"step": 965,
|
|
"valid_targets_mean": 2874.0,
|
|
"valid_targets_min": 223
|
|
},
|
|
{
|
|
"epoch": 1.5450199203187251,
|
|
"grad_norm": 0.6915509492873587,
|
|
"learning_rate": 3.8260990176998835e-05,
|
|
"loss": 0.3268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1757764220237732,
|
|
"step": 970,
|
|
"valid_targets_mean": 4496.0,
|
|
"valid_targets_min": 245
|
|
},
|
|
{
|
|
"epoch": 1.552988047808765,
|
|
"grad_norm": 0.6426073811661999,
|
|
"learning_rate": 3.82284576688882e-05,
|
|
"loss": 0.3366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18171828985214233,
|
|
"step": 975,
|
|
"valid_targets_mean": 3385.2,
|
|
"valid_targets_min": 949
|
|
},
|
|
{
|
|
"epoch": 1.5609561752988048,
|
|
"grad_norm": 1.1859300440389073,
|
|
"learning_rate": 3.819563776754212e-05,
|
|
"loss": 0.3312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10818570852279663,
|
|
"step": 980,
|
|
"valid_targets_mean": 2652.6,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 1.5689243027888446,
|
|
"grad_norm": 0.7378665803791654,
|
|
"learning_rate": 3.8162530990405235e-05,
|
|
"loss": 0.3297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18052585422992706,
|
|
"step": 985,
|
|
"valid_targets_mean": 4138.9,
|
|
"valid_targets_min": 1227
|
|
},
|
|
{
|
|
"epoch": 1.5768924302788845,
|
|
"grad_norm": 0.7884738730445345,
|
|
"learning_rate": 3.8129137859445106e-05,
|
|
"loss": 0.3159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11578461527824402,
|
|
"step": 990,
|
|
"valid_targets_mean": 2230.2,
|
|
"valid_targets_min": 242
|
|
},
|
|
{
|
|
"epoch": 1.5848605577689243,
|
|
"grad_norm": 0.6764216933538416,
|
|
"learning_rate": 3.8095458901144014e-05,
|
|
"loss": 0.3195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14588657021522522,
|
|
"step": 995,
|
|
"valid_targets_mean": 2651.1,
|
|
"valid_targets_min": 273
|
|
},
|
|
{
|
|
"epoch": 1.5928286852589641,
|
|
"grad_norm": 0.7499955425166605,
|
|
"learning_rate": 3.806149464649066e-05,
|
|
"loss": 0.3337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16508588194847107,
|
|
"step": 1000,
|
|
"valid_targets_mean": 2787.5,
|
|
"valid_targets_min": 455
|
|
},
|
|
{
|
|
"epoch": 1.600796812749004,
|
|
"grad_norm": 0.5916363548759627,
|
|
"learning_rate": 3.802724563097175e-05,
|
|
"loss": 0.3366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14954009652137756,
|
|
"step": 1005,
|
|
"valid_targets_mean": 3623.4,
|
|
"valid_targets_min": 1240
|
|
},
|
|
{
|
|
"epoch": 1.6087649402390438,
|
|
"grad_norm": 0.734952901318973,
|
|
"learning_rate": 3.7992712394563606e-05,
|
|
"loss": 0.3412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2241578996181488,
|
|
"step": 1010,
|
|
"valid_targets_mean": 3893.6,
|
|
"valid_targets_min": 2274
|
|
},
|
|
{
|
|
"epoch": 1.6167330677290837,
|
|
"grad_norm": 0.6634751175431612,
|
|
"learning_rate": 3.795789548172362e-05,
|
|
"loss": 0.3314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16153842210769653,
|
|
"step": 1015,
|
|
"valid_targets_mean": 3391.4,
|
|
"valid_targets_min": 495
|
|
},
|
|
{
|
|
"epoch": 1.6247011952191235,
|
|
"grad_norm": 0.6907352514309517,
|
|
"learning_rate": 3.7922795441381674e-05,
|
|
"loss": 0.3316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15194550156593323,
|
|
"step": 1020,
|
|
"valid_targets_mean": 2836.1,
|
|
"valid_targets_min": 796
|
|
},
|
|
{
|
|
"epoch": 1.6326693227091633,
|
|
"grad_norm": 0.6434692779607148,
|
|
"learning_rate": 3.78874128269315e-05,
|
|
"loss": 0.3335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14861451089382172,
|
|
"step": 1025,
|
|
"valid_targets_mean": 3566.8,
|
|
"valid_targets_min": 493
|
|
},
|
|
{
|
|
"epoch": 1.6406374501992032,
|
|
"grad_norm": 0.6221059084208682,
|
|
"learning_rate": 3.785174819622195e-05,
|
|
"loss": 0.3382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1275479793548584,
|
|
"step": 1030,
|
|
"valid_targets_mean": 3273.4,
|
|
"valid_targets_min": 1054
|
|
},
|
|
{
|
|
"epoch": 1.648605577689243,
|
|
"grad_norm": 0.6489634971991323,
|
|
"learning_rate": 3.7815802111548185e-05,
|
|
"loss": 0.332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17398403584957123,
|
|
"step": 1035,
|
|
"valid_targets_mean": 4010.5,
|
|
"valid_targets_min": 806
|
|
},
|
|
{
|
|
"epoch": 1.6565737051792828,
|
|
"grad_norm": 0.5827419736850272,
|
|
"learning_rate": 3.777957513964282e-05,
|
|
"loss": 0.3392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1529952734708786,
|
|
"step": 1040,
|
|
"valid_targets_mean": 4105.8,
|
|
"valid_targets_min": 571
|
|
},
|
|
{
|
|
"epoch": 1.6645418326693227,
|
|
"grad_norm": 0.605605540428135,
|
|
"learning_rate": 3.7743067851666994e-05,
|
|
"loss": 0.3275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1617601215839386,
|
|
"step": 1045,
|
|
"valid_targets_mean": 3637.4,
|
|
"valid_targets_min": 760
|
|
},
|
|
{
|
|
"epoch": 1.6725099601593625,
|
|
"grad_norm": 0.5958018681284462,
|
|
"learning_rate": 3.770628082320137e-05,
|
|
"loss": 0.326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18859803676605225,
|
|
"step": 1050,
|
|
"valid_targets_mean": 4714.4,
|
|
"valid_targets_min": 3961
|
|
},
|
|
{
|
|
"epoch": 1.6804780876494023,
|
|
"grad_norm": 0.5548342819557698,
|
|
"learning_rate": 3.766921463423704e-05,
|
|
"loss": 0.3273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1481020450592041,
|
|
"step": 1055,
|
|
"valid_targets_mean": 3693.8,
|
|
"valid_targets_min": 942
|
|
},
|
|
{
|
|
"epoch": 1.6884462151394422,
|
|
"grad_norm": 0.6270607165257036,
|
|
"learning_rate": 3.7631869869166396e-05,
|
|
"loss": 0.32,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16427603363990784,
|
|
"step": 1060,
|
|
"valid_targets_mean": 3412.0,
|
|
"valid_targets_min": 800
|
|
},
|
|
{
|
|
"epoch": 1.696414342629482,
|
|
"grad_norm": 0.6298749105786624,
|
|
"learning_rate": 3.759424711677391e-05,
|
|
"loss": 0.325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17540724575519562,
|
|
"step": 1065,
|
|
"valid_targets_mean": 4121.4,
|
|
"valid_targets_min": 1091
|
|
},
|
|
{
|
|
"epoch": 1.7043824701195218,
|
|
"grad_norm": 0.6045268705855914,
|
|
"learning_rate": 3.755634697022686e-05,
|
|
"loss": 0.3268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19350652396678925,
|
|
"step": 1070,
|
|
"valid_targets_mean": 4344.9,
|
|
"valid_targets_min": 3294
|
|
},
|
|
{
|
|
"epoch": 1.7123505976095617,
|
|
"grad_norm": 0.778685455221528,
|
|
"learning_rate": 3.751817002706596e-05,
|
|
"loss": 0.3343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20157337188720703,
|
|
"step": 1075,
|
|
"valid_targets_mean": 4317.4,
|
|
"valid_targets_min": 2981
|
|
},
|
|
{
|
|
"epoch": 1.7203187250996015,
|
|
"grad_norm": 0.676232749412372,
|
|
"learning_rate": 3.747971688919597e-05,
|
|
"loss": 0.3208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13573341071605682,
|
|
"step": 1080,
|
|
"valid_targets_mean": 2960.9,
|
|
"valid_targets_min": 749
|
|
},
|
|
{
|
|
"epoch": 1.7282868525896413,
|
|
"grad_norm": 0.5653574577331923,
|
|
"learning_rate": 3.744098816287616e-05,
|
|
"loss": 0.3275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1345827579498291,
|
|
"step": 1085,
|
|
"valid_targets_mean": 3466.1,
|
|
"valid_targets_min": 949
|
|
},
|
|
{
|
|
"epoch": 1.7362549800796812,
|
|
"grad_norm": 0.6351690816506902,
|
|
"learning_rate": 3.7401984458710796e-05,
|
|
"loss": 0.3316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14693620800971985,
|
|
"step": 1090,
|
|
"valid_targets_mean": 3326.4,
|
|
"valid_targets_min": 245
|
|
},
|
|
{
|
|
"epoch": 1.744223107569721,
|
|
"grad_norm": 0.6561424657402385,
|
|
"learning_rate": 3.73627063916395e-05,
|
|
"loss": 0.3268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14549244940280914,
|
|
"step": 1095,
|
|
"valid_targets_mean": 3336.9,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 1.7521912350597608,
|
|
"grad_norm": 0.7612659059318599,
|
|
"learning_rate": 3.732315458092754e-05,
|
|
"loss": 0.3236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15325888991355896,
|
|
"step": 1100,
|
|
"valid_targets_mean": 3915.9,
|
|
"valid_targets_min": 1058
|
|
},
|
|
{
|
|
"epoch": 1.7601593625498007,
|
|
"grad_norm": 0.5708159840236217,
|
|
"learning_rate": 3.728332965015608e-05,
|
|
"loss": 0.3201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1499689221382141,
|
|
"step": 1105,
|
|
"valid_targets_mean": 4427.4,
|
|
"valid_targets_min": 320
|
|
},
|
|
{
|
|
"epoch": 1.7681274900398405,
|
|
"grad_norm": 0.6187362281801017,
|
|
"learning_rate": 3.724323222721234e-05,
|
|
"loss": 0.3285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14799684286117554,
|
|
"step": 1110,
|
|
"valid_targets_mean": 4080.1,
|
|
"valid_targets_min": 3371
|
|
},
|
|
{
|
|
"epoch": 1.7760956175298803,
|
|
"grad_norm": 0.623009628447439,
|
|
"learning_rate": 3.720286294427972e-05,
|
|
"loss": 0.3256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13788515329360962,
|
|
"step": 1115,
|
|
"valid_targets_mean": 3574.0,
|
|
"valid_targets_min": 306
|
|
},
|
|
{
|
|
"epoch": 1.7840637450199202,
|
|
"grad_norm": 0.5652559360866017,
|
|
"learning_rate": 3.716222243782778e-05,
|
|
"loss": 0.3173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12565413117408752,
|
|
"step": 1120,
|
|
"valid_targets_mean": 3111.9,
|
|
"valid_targets_min": 531
|
|
},
|
|
{
|
|
"epoch": 1.7920318725099602,
|
|
"grad_norm": 0.5851792904377354,
|
|
"learning_rate": 3.712131134860229e-05,
|
|
"loss": 0.3214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15786227583885193,
|
|
"step": 1125,
|
|
"valid_targets_mean": 3497.2,
|
|
"valid_targets_min": 280
|
|
},
|
|
{
|
|
"epoch": 1.8,
|
|
"grad_norm": 0.5995685863739088,
|
|
"learning_rate": 3.708013032161502e-05,
|
|
"loss": 0.3344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15148508548736572,
|
|
"step": 1130,
|
|
"valid_targets_mean": 3953.8,
|
|
"valid_targets_min": 559
|
|
},
|
|
{
|
|
"epoch": 1.8079681274900399,
|
|
"grad_norm": 0.6910535563908953,
|
|
"learning_rate": 3.703868000613369e-05,
|
|
"loss": 0.3229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17042243480682373,
|
|
"step": 1135,
|
|
"valid_targets_mean": 4433.4,
|
|
"valid_targets_min": 3430
|
|
},
|
|
{
|
|
"epoch": 1.8159362549800797,
|
|
"grad_norm": 0.6466466879164442,
|
|
"learning_rate": 3.6996961055671625e-05,
|
|
"loss": 0.3273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17712724208831787,
|
|
"step": 1140,
|
|
"valid_targets_mean": 4915.8,
|
|
"valid_targets_min": 3569
|
|
},
|
|
{
|
|
"epoch": 1.8239043824701195,
|
|
"grad_norm": 0.7685923945953174,
|
|
"learning_rate": 3.695497412797751e-05,
|
|
"loss": 0.3125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1342412382364273,
|
|
"step": 1145,
|
|
"valid_targets_mean": 2233.5,
|
|
"valid_targets_min": 227
|
|
},
|
|
{
|
|
"epoch": 1.8318725099601594,
|
|
"grad_norm": 0.5981690154674036,
|
|
"learning_rate": 3.6912719885025026e-05,
|
|
"loss": 0.3268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15309564769268036,
|
|
"step": 1150,
|
|
"valid_targets_mean": 3787.4,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 1.8398406374501992,
|
|
"grad_norm": 0.5774296637019503,
|
|
"learning_rate": 3.687019899300238e-05,
|
|
"loss": 0.315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1481582522392273,
|
|
"step": 1155,
|
|
"valid_targets_mean": 4259.6,
|
|
"valid_targets_min": 2558
|
|
},
|
|
{
|
|
"epoch": 1.847808764940239,
|
|
"grad_norm": 0.6634038779165066,
|
|
"learning_rate": 3.6827412122301805e-05,
|
|
"loss": 0.3357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17690718173980713,
|
|
"step": 1160,
|
|
"valid_targets_mean": 3654.9,
|
|
"valid_targets_min": 1573
|
|
},
|
|
{
|
|
"epoch": 1.8557768924302789,
|
|
"grad_norm": 0.6847713609664849,
|
|
"learning_rate": 3.678435994750905e-05,
|
|
"loss": 0.3154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1776587814092636,
|
|
"step": 1165,
|
|
"valid_targets_mean": 3298.9,
|
|
"valid_targets_min": 543
|
|
},
|
|
{
|
|
"epoch": 1.8637450199203187,
|
|
"grad_norm": 0.6440304329296582,
|
|
"learning_rate": 3.6741043147392634e-05,
|
|
"loss": 0.3124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16797837615013123,
|
|
"step": 1170,
|
|
"valid_targets_mean": 4261.9,
|
|
"valid_targets_min": 3587
|
|
},
|
|
{
|
|
"epoch": 1.8717131474103585,
|
|
"grad_norm": 0.6507238996694699,
|
|
"learning_rate": 3.6697462404893257e-05,
|
|
"loss": 0.3229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15259000658988953,
|
|
"step": 1175,
|
|
"valid_targets_mean": 3302.1,
|
|
"valid_targets_min": 288
|
|
},
|
|
{
|
|
"epoch": 1.8796812749003984,
|
|
"grad_norm": 0.7378478562605171,
|
|
"learning_rate": 3.665361840711297e-05,
|
|
"loss": 0.3231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13854113221168518,
|
|
"step": 1180,
|
|
"valid_targets_mean": 2631.8,
|
|
"valid_targets_min": 462
|
|
},
|
|
{
|
|
"epoch": 1.8876494023904382,
|
|
"grad_norm": 0.6457405994361323,
|
|
"learning_rate": 3.660951184530434e-05,
|
|
"loss": 0.3217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1787298023700714,
|
|
"step": 1185,
|
|
"valid_targets_mean": 3707.5,
|
|
"valid_targets_min": 254
|
|
},
|
|
{
|
|
"epoch": 1.895617529880478,
|
|
"grad_norm": 0.6591963554947874,
|
|
"learning_rate": 3.656514341485959e-05,
|
|
"loss": 0.3111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1772817075252533,
|
|
"step": 1190,
|
|
"valid_targets_mean": 4114.0,
|
|
"valid_targets_min": 1187
|
|
},
|
|
{
|
|
"epoch": 1.9035856573705179,
|
|
"grad_norm": 0.5344285689213272,
|
|
"learning_rate": 3.65205138152996e-05,
|
|
"loss": 0.3153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16616226732730865,
|
|
"step": 1195,
|
|
"valid_targets_mean": 4997.6,
|
|
"valid_targets_min": 3276
|
|
},
|
|
{
|
|
"epoch": 1.9115537848605577,
|
|
"grad_norm": 0.7522572262927913,
|
|
"learning_rate": 3.647562375026289e-05,
|
|
"loss": 0.3301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16722427308559418,
|
|
"step": 1200,
|
|
"valid_targets_mean": 4735.5,
|
|
"valid_targets_min": 3503
|
|
},
|
|
{
|
|
"epoch": 1.9195219123505978,
|
|
"grad_norm": 0.6619997058919527,
|
|
"learning_rate": 3.643047392749453e-05,
|
|
"loss": 0.3288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2077215015888214,
|
|
"step": 1205,
|
|
"valid_targets_mean": 4616.6,
|
|
"valid_targets_min": 3346
|
|
},
|
|
{
|
|
"epoch": 1.9274900398406376,
|
|
"grad_norm": 0.6349179521539451,
|
|
"learning_rate": 3.638506505883497e-05,
|
|
"loss": 0.333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1884724348783493,
|
|
"step": 1210,
|
|
"valid_targets_mean": 4218.8,
|
|
"valid_targets_min": 1537
|
|
},
|
|
{
|
|
"epoch": 1.9354581673306774,
|
|
"grad_norm": 0.5523245913514429,
|
|
"learning_rate": 3.633939786020884e-05,
|
|
"loss": 0.3176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13215897977352142,
|
|
"step": 1215,
|
|
"valid_targets_mean": 3504.0,
|
|
"valid_targets_min": 428
|
|
},
|
|
{
|
|
"epoch": 1.9434262948207173,
|
|
"grad_norm": 0.5915678062428961,
|
|
"learning_rate": 3.629347305161365e-05,
|
|
"loss": 0.3162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16069427132606506,
|
|
"step": 1220,
|
|
"valid_targets_mean": 3688.9,
|
|
"valid_targets_min": 599
|
|
},
|
|
{
|
|
"epoch": 1.951394422310757,
|
|
"grad_norm": 0.6738068743559218,
|
|
"learning_rate": 3.62472913571084e-05,
|
|
"loss": 0.3208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14029105007648468,
|
|
"step": 1225,
|
|
"valid_targets_mean": 3101.8,
|
|
"valid_targets_min": 233
|
|
},
|
|
{
|
|
"epoch": 1.959362549800797,
|
|
"grad_norm": 0.6403284264064206,
|
|
"learning_rate": 3.620085350480226e-05,
|
|
"loss": 0.3182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12314704805612564,
|
|
"step": 1230,
|
|
"valid_targets_mean": 2816.9,
|
|
"valid_targets_min": 1000
|
|
},
|
|
{
|
|
"epoch": 1.9673306772908368,
|
|
"grad_norm": 0.6325279233165706,
|
|
"learning_rate": 3.615416022684298e-05,
|
|
"loss": 0.319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16894102096557617,
|
|
"step": 1235,
|
|
"valid_targets_mean": 3623.5,
|
|
"valid_targets_min": 429
|
|
},
|
|
{
|
|
"epoch": 1.9752988047808766,
|
|
"grad_norm": 0.6533014597720369,
|
|
"learning_rate": 3.610721225940542e-05,
|
|
"loss": 0.3339,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20440031588077545,
|
|
"step": 1240,
|
|
"valid_targets_mean": 4533.1,
|
|
"valid_targets_min": 3572
|
|
},
|
|
{
|
|
"epoch": 1.9832669322709164,
|
|
"grad_norm": 0.7340801153525159,
|
|
"learning_rate": 3.606001034267992e-05,
|
|
"loss": 0.3208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15113261342048645,
|
|
"step": 1245,
|
|
"valid_targets_mean": 3354.2,
|
|
"valid_targets_min": 271
|
|
},
|
|
{
|
|
"epoch": 1.9912350597609563,
|
|
"grad_norm": 0.6771335347513721,
|
|
"learning_rate": 3.6012555220860634e-05,
|
|
"loss": 0.3173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15480926632881165,
|
|
"step": 1250,
|
|
"valid_targets_mean": 2704.0,
|
|
"valid_targets_min": 261
|
|
},
|
|
{
|
|
"epoch": 1.999203187250996,
|
|
"grad_norm": 0.5818311473977106,
|
|
"learning_rate": 3.596484764213379e-05,
|
|
"loss": 0.3369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16493937373161316,
|
|
"step": 1255,
|
|
"valid_targets_mean": 3971.1,
|
|
"valid_targets_min": 777
|
|
},
|
|
{
|
|
"epoch": 2.006374501992032,
|
|
"grad_norm": 0.5836251192424022,
|
|
"learning_rate": 3.591688835866589e-05,
|
|
"loss": 0.2994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17885245382785797,
|
|
"step": 1260,
|
|
"valid_targets_mean": 4970.4,
|
|
"valid_targets_min": 3162
|
|
},
|
|
{
|
|
"epoch": 2.014342629482072,
|
|
"grad_norm": 0.5629975095356654,
|
|
"learning_rate": 3.5868678126591884e-05,
|
|
"loss": 0.2972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1434982866048813,
|
|
"step": 1265,
|
|
"valid_targets_mean": 4153.0,
|
|
"valid_targets_min": 3178
|
|
},
|
|
{
|
|
"epoch": 2.0223107569721117,
|
|
"grad_norm": 0.619157611992127,
|
|
"learning_rate": 3.5820217706003194e-05,
|
|
"loss": 0.3038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1476842761039734,
|
|
"step": 1270,
|
|
"valid_targets_mean": 4041.4,
|
|
"valid_targets_min": 1166
|
|
},
|
|
{
|
|
"epoch": 2.0302788844621515,
|
|
"grad_norm": 0.6346864666948292,
|
|
"learning_rate": 3.577150786093579e-05,
|
|
"loss": 0.3117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16996094584465027,
|
|
"step": 1275,
|
|
"valid_targets_mean": 3509.5,
|
|
"valid_targets_min": 411
|
|
},
|
|
{
|
|
"epoch": 2.0382470119521914,
|
|
"grad_norm": 0.6778719480532754,
|
|
"learning_rate": 3.572254935935808e-05,
|
|
"loss": 0.3092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1883283257484436,
|
|
"step": 1280,
|
|
"valid_targets_mean": 3941.2,
|
|
"valid_targets_min": 584
|
|
},
|
|
{
|
|
"epoch": 2.046215139442231,
|
|
"grad_norm": 0.6049255619263011,
|
|
"learning_rate": 3.567334297315887e-05,
|
|
"loss": 0.2966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14070260524749756,
|
|
"step": 1285,
|
|
"valid_targets_mean": 3641.1,
|
|
"valid_targets_min": 645
|
|
},
|
|
{
|
|
"epoch": 2.054183266932271,
|
|
"grad_norm": 0.6374919156353854,
|
|
"learning_rate": 3.562388947813514e-05,
|
|
"loss": 0.3171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15400946140289307,
|
|
"step": 1290,
|
|
"valid_targets_mean": 3828.2,
|
|
"valid_targets_min": 1073
|
|
},
|
|
{
|
|
"epoch": 2.062151394422311,
|
|
"grad_norm": 0.6895996869045218,
|
|
"learning_rate": 3.557418965397985e-05,
|
|
"loss": 0.302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1403256058692932,
|
|
"step": 1295,
|
|
"valid_targets_mean": 2555.0,
|
|
"valid_targets_min": 407
|
|
},
|
|
{
|
|
"epoch": 2.0701195219123507,
|
|
"grad_norm": 0.7053623742408442,
|
|
"learning_rate": 3.552424428426962e-05,
|
|
"loss": 0.3238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1919977366924286,
|
|
"step": 1300,
|
|
"valid_targets_mean": 4301.8,
|
|
"valid_targets_min": 3630
|
|
},
|
|
{
|
|
"epoch": 2.0780876494023905,
|
|
"grad_norm": 0.6095070399751719,
|
|
"learning_rate": 3.547405415645237e-05,
|
|
"loss": 0.3265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13988962769508362,
|
|
"step": 1305,
|
|
"valid_targets_mean": 3645.9,
|
|
"valid_targets_min": 969
|
|
},
|
|
{
|
|
"epoch": 2.0860557768924304,
|
|
"grad_norm": 0.7010958997601787,
|
|
"learning_rate": 3.542362006183496e-05,
|
|
"loss": 0.3078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16029886901378632,
|
|
"step": 1310,
|
|
"valid_targets_mean": 4184.8,
|
|
"valid_targets_min": 2397
|
|
},
|
|
{
|
|
"epoch": 2.09402390438247,
|
|
"grad_norm": 0.681550955504812,
|
|
"learning_rate": 3.5372942795570644e-05,
|
|
"loss": 0.3137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13907872140407562,
|
|
"step": 1315,
|
|
"valid_targets_mean": 2587.6,
|
|
"valid_targets_min": 655
|
|
},
|
|
{
|
|
"epoch": 2.10199203187251,
|
|
"grad_norm": 0.6872243323376277,
|
|
"learning_rate": 3.532202315664658e-05,
|
|
"loss": 0.2994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16166797280311584,
|
|
"step": 1320,
|
|
"valid_targets_mean": 3661.5,
|
|
"valid_targets_min": 997
|
|
},
|
|
{
|
|
"epoch": 2.10996015936255,
|
|
"grad_norm": 0.678192201244478,
|
|
"learning_rate": 3.527086194787121e-05,
|
|
"loss": 0.3111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16050675511360168,
|
|
"step": 1325,
|
|
"valid_targets_mean": 3741.9,
|
|
"valid_targets_min": 233
|
|
},
|
|
{
|
|
"epoch": 2.1179282868525897,
|
|
"grad_norm": 0.6561493441706079,
|
|
"learning_rate": 3.521945997586162e-05,
|
|
"loss": 0.3047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.154448464512825,
|
|
"step": 1330,
|
|
"valid_targets_mean": 3963.8,
|
|
"valid_targets_min": 771
|
|
},
|
|
{
|
|
"epoch": 2.1258964143426295,
|
|
"grad_norm": 0.6432189194511014,
|
|
"learning_rate": 3.51678180510308e-05,
|
|
"loss": 0.2978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16677416861057281,
|
|
"step": 1335,
|
|
"valid_targets_mean": 4409.4,
|
|
"valid_targets_min": 2325
|
|
},
|
|
{
|
|
"epoch": 2.1338645418326694,
|
|
"grad_norm": 0.6549823203011039,
|
|
"learning_rate": 3.511593698757491e-05,
|
|
"loss": 0.3014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1379939168691635,
|
|
"step": 1340,
|
|
"valid_targets_mean": 3496.5,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 2.141832669322709,
|
|
"grad_norm": 0.5257721865369313,
|
|
"learning_rate": 3.506381760346037e-05,
|
|
"loss": 0.3102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14774326980113983,
|
|
"step": 1345,
|
|
"valid_targets_mean": 5081.5,
|
|
"valid_targets_min": 234
|
|
},
|
|
{
|
|
"epoch": 2.149800796812749,
|
|
"grad_norm": 0.6432511783636684,
|
|
"learning_rate": 3.501146072041104e-05,
|
|
"loss": 0.3101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10675960779190063,
|
|
"step": 1350,
|
|
"valid_targets_mean": 3111.6,
|
|
"valid_targets_min": 1187
|
|
},
|
|
{
|
|
"epoch": 2.157768924302789,
|
|
"grad_norm": 0.7306004464900633,
|
|
"learning_rate": 3.495886716389523e-05,
|
|
"loss": 0.2966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17563018202781677,
|
|
"step": 1355,
|
|
"valid_targets_mean": 4250.4,
|
|
"valid_targets_min": 535
|
|
},
|
|
{
|
|
"epoch": 2.1657370517928287,
|
|
"grad_norm": 0.5859975987411148,
|
|
"learning_rate": 3.4906037763112665e-05,
|
|
"loss": 0.3191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14896699786186218,
|
|
"step": 1360,
|
|
"valid_targets_mean": 3614.5,
|
|
"valid_targets_min": 218
|
|
},
|
|
{
|
|
"epoch": 2.1737051792828685,
|
|
"grad_norm": 0.6383870688004181,
|
|
"learning_rate": 3.4852973350981464e-05,
|
|
"loss": 0.3037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17357799410820007,
|
|
"step": 1365,
|
|
"valid_targets_mean": 4153.1,
|
|
"valid_targets_min": 392
|
|
},
|
|
{
|
|
"epoch": 2.1816733067729084,
|
|
"grad_norm": 0.660721797475077,
|
|
"learning_rate": 3.4799674764124956e-05,
|
|
"loss": 0.3007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1358649879693985,
|
|
"step": 1370,
|
|
"valid_targets_mean": 3674.2,
|
|
"valid_targets_min": 523
|
|
},
|
|
{
|
|
"epoch": 2.189641434262948,
|
|
"grad_norm": 0.5956420229552498,
|
|
"learning_rate": 3.474614284285852e-05,
|
|
"loss": 0.3022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17175112664699554,
|
|
"step": 1375,
|
|
"valid_targets_mean": 4028.9,
|
|
"valid_targets_min": 3225
|
|
},
|
|
{
|
|
"epoch": 2.197609561752988,
|
|
"grad_norm": 0.6745536992357251,
|
|
"learning_rate": 3.469237843117634e-05,
|
|
"loss": 0.3068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17130735516548157,
|
|
"step": 1380,
|
|
"valid_targets_mean": 3824.5,
|
|
"valid_targets_min": 935
|
|
},
|
|
{
|
|
"epoch": 2.205577689243028,
|
|
"grad_norm": 0.6404088633926213,
|
|
"learning_rate": 3.4638382376738064e-05,
|
|
"loss": 0.2876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14818412065505981,
|
|
"step": 1385,
|
|
"valid_targets_mean": 4813.4,
|
|
"valid_targets_min": 3722
|
|
},
|
|
{
|
|
"epoch": 2.2135458167330677,
|
|
"grad_norm": 0.6647290971651754,
|
|
"learning_rate": 3.458415553085548e-05,
|
|
"loss": 0.3041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16095389425754547,
|
|
"step": 1390,
|
|
"valid_targets_mean": 3601.6,
|
|
"valid_targets_min": 933
|
|
},
|
|
{
|
|
"epoch": 2.2215139442231076,
|
|
"grad_norm": 0.6505561635346703,
|
|
"learning_rate": 3.4529698748479075e-05,
|
|
"loss": 0.3073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18228718638420105,
|
|
"step": 1395,
|
|
"valid_targets_mean": 4198.2,
|
|
"valid_targets_min": 2518
|
|
},
|
|
{
|
|
"epoch": 2.2294820717131474,
|
|
"grad_norm": 0.5705495422269455,
|
|
"learning_rate": 3.4475012888184536e-05,
|
|
"loss": 0.2922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16400834918022156,
|
|
"step": 1400,
|
|
"valid_targets_mean": 4684.5,
|
|
"valid_targets_min": 2803
|
|
},
|
|
{
|
|
"epoch": 2.237450199203187,
|
|
"grad_norm": 0.6139488510987134,
|
|
"learning_rate": 3.4420098812159266e-05,
|
|
"loss": 0.3156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1334049552679062,
|
|
"step": 1405,
|
|
"valid_targets_mean": 3723.1,
|
|
"valid_targets_min": 918
|
|
},
|
|
{
|
|
"epoch": 2.245418326693227,
|
|
"grad_norm": 0.612388930146584,
|
|
"learning_rate": 3.4364957386188744e-05,
|
|
"loss": 0.2991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16954825818538666,
|
|
"step": 1410,
|
|
"valid_targets_mean": 4213.2,
|
|
"valid_targets_min": 706
|
|
},
|
|
{
|
|
"epoch": 2.253386454183267,
|
|
"grad_norm": 0.5997111383613154,
|
|
"learning_rate": 3.4309589479642894e-05,
|
|
"loss": 0.3093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1729383021593094,
|
|
"step": 1415,
|
|
"valid_targets_mean": 3919.0,
|
|
"valid_targets_min": 533
|
|
},
|
|
{
|
|
"epoch": 2.2613545816733067,
|
|
"grad_norm": 0.9734635696926245,
|
|
"learning_rate": 3.425399596546237e-05,
|
|
"loss": 0.3041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13724632561206818,
|
|
"step": 1420,
|
|
"valid_targets_mean": 3753.0,
|
|
"valid_targets_min": 1731
|
|
},
|
|
{
|
|
"epoch": 2.2693227091633466,
|
|
"grad_norm": 0.6508062353110562,
|
|
"learning_rate": 3.4198177720144794e-05,
|
|
"loss": 0.297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14353463053703308,
|
|
"step": 1425,
|
|
"valid_targets_mean": 3168.0,
|
|
"valid_targets_min": 775
|
|
},
|
|
{
|
|
"epoch": 2.2772908366533864,
|
|
"grad_norm": 0.6387766951543817,
|
|
"learning_rate": 3.4142135623730954e-05,
|
|
"loss": 0.2924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1348034292459488,
|
|
"step": 1430,
|
|
"valid_targets_mean": 3531.2,
|
|
"valid_targets_min": 1542
|
|
},
|
|
{
|
|
"epoch": 2.285258964143426,
|
|
"grad_norm": 0.6473111516507845,
|
|
"learning_rate": 3.4085870559790905e-05,
|
|
"loss": 0.3073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1499640792608261,
|
|
"step": 1435,
|
|
"valid_targets_mean": 3610.5,
|
|
"valid_targets_min": 986
|
|
},
|
|
{
|
|
"epoch": 2.293227091633466,
|
|
"grad_norm": 0.5837371285483895,
|
|
"learning_rate": 3.402938341541005e-05,
|
|
"loss": 0.3064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15251114964485168,
|
|
"step": 1440,
|
|
"valid_targets_mean": 3497.9,
|
|
"valid_targets_min": 269
|
|
},
|
|
{
|
|
"epoch": 2.301195219123506,
|
|
"grad_norm": 0.643445157252701,
|
|
"learning_rate": 3.397267508117517e-05,
|
|
"loss": 0.3058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16282296180725098,
|
|
"step": 1445,
|
|
"valid_targets_mean": 4048.0,
|
|
"valid_targets_min": 1547
|
|
},
|
|
{
|
|
"epoch": 2.3091633466135457,
|
|
"grad_norm": 0.626513759608805,
|
|
"learning_rate": 3.391574645116034e-05,
|
|
"loss": 0.2977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1480455845594406,
|
|
"step": 1450,
|
|
"valid_targets_mean": 4264.8,
|
|
"valid_targets_min": 3640
|
|
},
|
|
{
|
|
"epoch": 2.3171314741035856,
|
|
"grad_norm": 0.6006561956598667,
|
|
"learning_rate": 3.385859842291287e-05,
|
|
"loss": 0.3033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14290261268615723,
|
|
"step": 1455,
|
|
"valid_targets_mean": 4117.2,
|
|
"valid_targets_min": 3252
|
|
},
|
|
{
|
|
"epoch": 2.3250996015936254,
|
|
"grad_norm": 0.6704738132790149,
|
|
"learning_rate": 3.380123189743914e-05,
|
|
"loss": 0.2997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08275922387838364,
|
|
"step": 1460,
|
|
"valid_targets_mean": 2191.5,
|
|
"valid_targets_min": 246
|
|
},
|
|
{
|
|
"epoch": 2.333067729083665,
|
|
"grad_norm": 0.5883954078707374,
|
|
"learning_rate": 3.374364777919042e-05,
|
|
"loss": 0.3007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14832335710525513,
|
|
"step": 1465,
|
|
"valid_targets_mean": 3908.2,
|
|
"valid_targets_min": 2545
|
|
},
|
|
{
|
|
"epoch": 2.341035856573705,
|
|
"grad_norm": 0.6709760261263392,
|
|
"learning_rate": 3.368584697604856e-05,
|
|
"loss": 0.3075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15644146502017975,
|
|
"step": 1470,
|
|
"valid_targets_mean": 3209.9,
|
|
"valid_targets_min": 842
|
|
},
|
|
{
|
|
"epoch": 2.349003984063745,
|
|
"grad_norm": 0.6223432364326,
|
|
"learning_rate": 3.362783039931172e-05,
|
|
"loss": 0.3017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1454828828573227,
|
|
"step": 1475,
|
|
"valid_targets_mean": 3254.8,
|
|
"valid_targets_min": 561
|
|
},
|
|
{
|
|
"epoch": 2.3569721115537847,
|
|
"grad_norm": 0.7653458669062861,
|
|
"learning_rate": 3.356959896367997e-05,
|
|
"loss": 0.3068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20823490619659424,
|
|
"step": 1480,
|
|
"valid_targets_mean": 4071.9,
|
|
"valid_targets_min": 692
|
|
},
|
|
{
|
|
"epoch": 2.3649402390438246,
|
|
"grad_norm": 0.6555162840657145,
|
|
"learning_rate": 3.351115358724089e-05,
|
|
"loss": 0.2925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11996582895517349,
|
|
"step": 1485,
|
|
"valid_targets_mean": 2636.9,
|
|
"valid_targets_min": 278
|
|
},
|
|
{
|
|
"epoch": 2.3729083665338644,
|
|
"grad_norm": 0.6131098404603604,
|
|
"learning_rate": 3.345249519145512e-05,
|
|
"loss": 0.3037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13851338624954224,
|
|
"step": 1490,
|
|
"valid_targets_mean": 3307.1,
|
|
"valid_targets_min": 249
|
|
},
|
|
{
|
|
"epoch": 2.380876494023904,
|
|
"grad_norm": 0.587155183133548,
|
|
"learning_rate": 3.339362470114176e-05,
|
|
"loss": 0.3101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1548629105091095,
|
|
"step": 1495,
|
|
"valid_targets_mean": 4667.6,
|
|
"valid_targets_min": 2986
|
|
},
|
|
{
|
|
"epoch": 2.388844621513944,
|
|
"grad_norm": 0.6205162623877221,
|
|
"learning_rate": 3.333454304446385e-05,
|
|
"loss": 0.3039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15081962943077087,
|
|
"step": 1500,
|
|
"valid_targets_mean": 3304.5,
|
|
"valid_targets_min": 845
|
|
},
|
|
{
|
|
"epoch": 2.396812749003984,
|
|
"grad_norm": 0.6088762919984781,
|
|
"learning_rate": 3.3275251152913735e-05,
|
|
"loss": 0.3155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1803358793258667,
|
|
"step": 1505,
|
|
"valid_targets_mean": 3561.1,
|
|
"valid_targets_min": 313
|
|
},
|
|
{
|
|
"epoch": 2.4047808764940237,
|
|
"grad_norm": 0.6165816905297514,
|
|
"learning_rate": 3.3215749961298324e-05,
|
|
"loss": 0.3003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14795807003974915,
|
|
"step": 1510,
|
|
"valid_targets_mean": 3361.0,
|
|
"valid_targets_min": 522
|
|
},
|
|
{
|
|
"epoch": 2.4127490039840636,
|
|
"grad_norm": 0.7093923887292809,
|
|
"learning_rate": 3.315604040772442e-05,
|
|
"loss": 0.3083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1511913388967514,
|
|
"step": 1515,
|
|
"valid_targets_mean": 4108.2,
|
|
"valid_targets_min": 1085
|
|
},
|
|
{
|
|
"epoch": 2.4207171314741034,
|
|
"grad_norm": 0.5461063106221496,
|
|
"learning_rate": 3.3096123433583886e-05,
|
|
"loss": 0.2996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1274980902671814,
|
|
"step": 1520,
|
|
"valid_targets_mean": 3921.0,
|
|
"valid_targets_min": 453
|
|
},
|
|
{
|
|
"epoch": 2.4286852589641432,
|
|
"grad_norm": 0.5754317670287957,
|
|
"learning_rate": 3.303599998353882e-05,
|
|
"loss": 0.2932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15238189697265625,
|
|
"step": 1525,
|
|
"valid_targets_mean": 3932.9,
|
|
"valid_targets_min": 742
|
|
},
|
|
{
|
|
"epoch": 2.436653386454183,
|
|
"grad_norm": 0.6267899882855434,
|
|
"learning_rate": 3.297567100550667e-05,
|
|
"loss": 0.313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1574801802635193,
|
|
"step": 1530,
|
|
"valid_targets_mean": 3298.8,
|
|
"valid_targets_min": 1051
|
|
},
|
|
{
|
|
"epoch": 2.444621513944223,
|
|
"grad_norm": 0.6178767344720022,
|
|
"learning_rate": 3.2915137450645245e-05,
|
|
"loss": 0.3021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14424434304237366,
|
|
"step": 1535,
|
|
"valid_targets_mean": 3714.1,
|
|
"valid_targets_min": 673
|
|
},
|
|
{
|
|
"epoch": 2.4525896414342627,
|
|
"grad_norm": 0.5925864662994356,
|
|
"learning_rate": 3.285440027333777e-05,
|
|
"loss": 0.3007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15461206436157227,
|
|
"step": 1540,
|
|
"valid_targets_mean": 4021.6,
|
|
"valid_targets_min": 3645
|
|
},
|
|
{
|
|
"epoch": 2.4605577689243026,
|
|
"grad_norm": 0.5067020381686023,
|
|
"learning_rate": 3.2793460431177827e-05,
|
|
"loss": 0.3068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13205289840698242,
|
|
"step": 1545,
|
|
"valid_targets_mean": 4157.4,
|
|
"valid_targets_min": 1114
|
|
},
|
|
{
|
|
"epoch": 2.4685258964143424,
|
|
"grad_norm": 0.5570806905733114,
|
|
"learning_rate": 3.273231888495424e-05,
|
|
"loss": 0.3018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14321979880332947,
|
|
"step": 1550,
|
|
"valid_targets_mean": 4194.4,
|
|
"valid_targets_min": 1027
|
|
},
|
|
{
|
|
"epoch": 2.4764940239043822,
|
|
"grad_norm": 0.715776387657958,
|
|
"learning_rate": 3.267097659863592e-05,
|
|
"loss": 0.2956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14710450172424316,
|
|
"step": 1555,
|
|
"valid_targets_mean": 3978.6,
|
|
"valid_targets_min": 372
|
|
},
|
|
{
|
|
"epoch": 2.4844621513944225,
|
|
"grad_norm": 0.6019126948812167,
|
|
"learning_rate": 3.2609434539356726e-05,
|
|
"loss": 0.3013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13887368142604828,
|
|
"step": 1560,
|
|
"valid_targets_mean": 3164.2,
|
|
"valid_targets_min": 1012
|
|
},
|
|
{
|
|
"epoch": 2.4924302788844623,
|
|
"grad_norm": 0.5842347130598545,
|
|
"learning_rate": 3.2547693677400126e-05,
|
|
"loss": 0.3049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16767601668834686,
|
|
"step": 1565,
|
|
"valid_targets_mean": 4596.9,
|
|
"valid_targets_min": 2690
|
|
},
|
|
{
|
|
"epoch": 2.5003984063745017,
|
|
"grad_norm": 0.6014151039522305,
|
|
"learning_rate": 3.248575498618398e-05,
|
|
"loss": 0.2984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13801634311676025,
|
|
"step": 1570,
|
|
"valid_targets_mean": 3576.6,
|
|
"valid_targets_min": 2358
|
|
},
|
|
{
|
|
"epoch": 2.5083665338645416,
|
|
"grad_norm": 0.5988669443893535,
|
|
"learning_rate": 3.242361944224515e-05,
|
|
"loss": 0.2966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1550188958644867,
|
|
"step": 1575,
|
|
"valid_targets_mean": 4412.0,
|
|
"valid_targets_min": 939
|
|
},
|
|
{
|
|
"epoch": 2.516334661354582,
|
|
"grad_norm": 1.1602051750236582,
|
|
"learning_rate": 3.236128802522411e-05,
|
|
"loss": 0.304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16259071230888367,
|
|
"step": 1580,
|
|
"valid_targets_mean": 3310.6,
|
|
"valid_targets_min": 393
|
|
},
|
|
{
|
|
"epoch": 2.5243027888446217,
|
|
"grad_norm": 0.619395798668418,
|
|
"learning_rate": 3.229876171784952e-05,
|
|
"loss": 0.3081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16469460725784302,
|
|
"step": 1585,
|
|
"valid_targets_mean": 3745.2,
|
|
"valid_targets_min": 1495
|
|
},
|
|
{
|
|
"epoch": 2.5322709163346615,
|
|
"grad_norm": 0.5811774984361462,
|
|
"learning_rate": 3.22360415059227e-05,
|
|
"loss": 0.3038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15185478329658508,
|
|
"step": 1590,
|
|
"valid_targets_mean": 3504.4,
|
|
"valid_targets_min": 516
|
|
},
|
|
{
|
|
"epoch": 2.5402390438247013,
|
|
"grad_norm": 0.6592375730142807,
|
|
"learning_rate": 3.217312837830212e-05,
|
|
"loss": 0.2944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12678351998329163,
|
|
"step": 1595,
|
|
"valid_targets_mean": 2578.0,
|
|
"valid_targets_min": 329
|
|
},
|
|
{
|
|
"epoch": 2.548207171314741,
|
|
"grad_norm": 0.5982285666928621,
|
|
"learning_rate": 3.211002332688779e-05,
|
|
"loss": 0.3056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14462913572788239,
|
|
"step": 1600,
|
|
"valid_targets_mean": 3755.1,
|
|
"valid_targets_min": 473
|
|
},
|
|
{
|
|
"epoch": 2.556175298804781,
|
|
"grad_norm": 0.634918588914544,
|
|
"learning_rate": 3.2046727346605604e-05,
|
|
"loss": 0.3076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10820312798023224,
|
|
"step": 1605,
|
|
"valid_targets_mean": 2966.2,
|
|
"valid_targets_min": 245
|
|
},
|
|
{
|
|
"epoch": 2.564143426294821,
|
|
"grad_norm": 0.5690604204451986,
|
|
"learning_rate": 3.198324143539172e-05,
|
|
"loss": 0.3034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1273760050535202,
|
|
"step": 1610,
|
|
"valid_targets_mean": 3376.6,
|
|
"valid_targets_min": 1063
|
|
},
|
|
{
|
|
"epoch": 2.5721115537848607,
|
|
"grad_norm": 0.6754675686773965,
|
|
"learning_rate": 3.191956659417674e-05,
|
|
"loss": 0.3025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14878042042255402,
|
|
"step": 1615,
|
|
"valid_targets_mean": 2537.5,
|
|
"valid_targets_min": 277
|
|
},
|
|
{
|
|
"epoch": 2.5800796812749005,
|
|
"grad_norm": 0.6115452619485489,
|
|
"learning_rate": 3.185570382687e-05,
|
|
"loss": 0.3071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1950845569372177,
|
|
"step": 1620,
|
|
"valid_targets_mean": 4007.8,
|
|
"valid_targets_min": 918
|
|
},
|
|
{
|
|
"epoch": 2.5880478087649403,
|
|
"grad_norm": 0.5469768203006049,
|
|
"learning_rate": 3.17916541403437e-05,
|
|
"loss": 0.2994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14569422602653503,
|
|
"step": 1625,
|
|
"valid_targets_mean": 3992.4,
|
|
"valid_targets_min": 3098
|
|
},
|
|
{
|
|
"epoch": 2.59601593625498,
|
|
"grad_norm": 0.5415260690855109,
|
|
"learning_rate": 3.172741854441704e-05,
|
|
"loss": 0.2974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13133631646633148,
|
|
"step": 1630,
|
|
"valid_targets_mean": 4142.5,
|
|
"valid_targets_min": 3498
|
|
},
|
|
{
|
|
"epoch": 2.60398406374502,
|
|
"grad_norm": 0.6228669410969733,
|
|
"learning_rate": 3.1662998051840306e-05,
|
|
"loss": 0.2989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15364228188991547,
|
|
"step": 1635,
|
|
"valid_targets_mean": 3908.6,
|
|
"valid_targets_min": 2193
|
|
},
|
|
{
|
|
"epoch": 2.61195219123506,
|
|
"grad_norm": 0.7163920939053808,
|
|
"learning_rate": 3.159839367827891e-05,
|
|
"loss": 0.2995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16317997872829437,
|
|
"step": 1640,
|
|
"valid_targets_mean": 2720.8,
|
|
"valid_targets_min": 626
|
|
},
|
|
{
|
|
"epoch": 2.6199203187250997,
|
|
"grad_norm": 0.5093151040383717,
|
|
"learning_rate": 3.153360644229735e-05,
|
|
"loss": 0.3055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13812774419784546,
|
|
"step": 1645,
|
|
"valid_targets_mean": 4209.6,
|
|
"valid_targets_min": 1835
|
|
},
|
|
{
|
|
"epoch": 2.6278884462151395,
|
|
"grad_norm": 0.5611384387918703,
|
|
"learning_rate": 3.146863736534317e-05,
|
|
"loss": 0.2975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1440148502588272,
|
|
"step": 1650,
|
|
"valid_targets_mean": 3927.5,
|
|
"valid_targets_min": 1248
|
|
},
|
|
{
|
|
"epoch": 2.6358565737051793,
|
|
"grad_norm": 0.5949756135166688,
|
|
"learning_rate": 3.140348747173086e-05,
|
|
"loss": 0.2979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11527255177497864,
|
|
"step": 1655,
|
|
"valid_targets_mean": 3250.5,
|
|
"valid_targets_min": 590
|
|
},
|
|
{
|
|
"epoch": 2.643824701195219,
|
|
"grad_norm": 0.6260767960636067,
|
|
"learning_rate": 3.1338157788625695e-05,
|
|
"loss": 0.3124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17812351882457733,
|
|
"step": 1660,
|
|
"valid_targets_mean": 4397.9,
|
|
"valid_targets_min": 2449
|
|
},
|
|
{
|
|
"epoch": 2.651792828685259,
|
|
"grad_norm": 0.5851065166943961,
|
|
"learning_rate": 3.127264934602754e-05,
|
|
"loss": 0.3076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1575583666563034,
|
|
"step": 1665,
|
|
"valid_targets_mean": 3895.4,
|
|
"valid_targets_min": 1835
|
|
},
|
|
{
|
|
"epoch": 2.659760956175299,
|
|
"grad_norm": 0.6238084747272821,
|
|
"learning_rate": 3.120696317675462e-05,
|
|
"loss": 0.3171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15226903557777405,
|
|
"step": 1670,
|
|
"valid_targets_mean": 3680.2,
|
|
"valid_targets_min": 888
|
|
},
|
|
{
|
|
"epoch": 2.6677290836653387,
|
|
"grad_norm": 0.6150614739558937,
|
|
"learning_rate": 3.114110031642723e-05,
|
|
"loss": 0.3085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17283207178115845,
|
|
"step": 1675,
|
|
"valid_targets_mean": 4499.8,
|
|
"valid_targets_min": 942
|
|
},
|
|
{
|
|
"epoch": 2.6756972111553785,
|
|
"grad_norm": 1.2399919193311193,
|
|
"learning_rate": 3.1075061803451405e-05,
|
|
"loss": 0.3029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13976256549358368,
|
|
"step": 1680,
|
|
"valid_targets_mean": 3865.8,
|
|
"valid_targets_min": 2788
|
|
},
|
|
{
|
|
"epoch": 2.6836653386454183,
|
|
"grad_norm": 0.5350477326551074,
|
|
"learning_rate": 3.100884867900257e-05,
|
|
"loss": 0.3106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1667133867740631,
|
|
"step": 1685,
|
|
"valid_targets_mean": 4074.8,
|
|
"valid_targets_min": 944
|
|
},
|
|
{
|
|
"epoch": 2.691633466135458,
|
|
"grad_norm": 0.6672128201797747,
|
|
"learning_rate": 3.094246198700907e-05,
|
|
"loss": 0.2964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11858796328306198,
|
|
"step": 1690,
|
|
"valid_targets_mean": 2839.6,
|
|
"valid_targets_min": 308
|
|
},
|
|
{
|
|
"epoch": 2.699601593625498,
|
|
"grad_norm": 0.6128810636239489,
|
|
"learning_rate": 3.087590277413578e-05,
|
|
"loss": 0.3068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1811891496181488,
|
|
"step": 1695,
|
|
"valid_targets_mean": 4235.6,
|
|
"valid_targets_min": 811
|
|
},
|
|
{
|
|
"epoch": 2.707569721115538,
|
|
"grad_norm": 0.6116153083916679,
|
|
"learning_rate": 3.0809172089767576e-05,
|
|
"loss": 0.3048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19102010130882263,
|
|
"step": 1700,
|
|
"valid_targets_mean": 3887.8,
|
|
"valid_targets_min": 219
|
|
},
|
|
{
|
|
"epoch": 2.7155378486055777,
|
|
"grad_norm": 0.5705410245041,
|
|
"learning_rate": 3.0742270985992765e-05,
|
|
"loss": 0.3066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1410629004240036,
|
|
"step": 1705,
|
|
"valid_targets_mean": 3804.8,
|
|
"valid_targets_min": 1168
|
|
},
|
|
{
|
|
"epoch": 2.7235059760956175,
|
|
"grad_norm": 0.6645715015139396,
|
|
"learning_rate": 3.067520051758651e-05,
|
|
"loss": 0.3031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1776059865951538,
|
|
"step": 1710,
|
|
"valid_targets_mean": 3863.4,
|
|
"valid_targets_min": 1000
|
|
},
|
|
{
|
|
"epoch": 2.7314741035856573,
|
|
"grad_norm": 0.6074009839238762,
|
|
"learning_rate": 3.060796174199424e-05,
|
|
"loss": 0.304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13484247028827667,
|
|
"step": 1715,
|
|
"valid_targets_mean": 2893.8,
|
|
"valid_targets_min": 436
|
|
},
|
|
{
|
|
"epoch": 2.739442231075697,
|
|
"grad_norm": 0.528055350299334,
|
|
"learning_rate": 3.0540555719314914e-05,
|
|
"loss": 0.2977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14803050458431244,
|
|
"step": 1720,
|
|
"valid_targets_mean": 4187.0,
|
|
"valid_targets_min": 284
|
|
},
|
|
{
|
|
"epoch": 2.747410358565737,
|
|
"grad_norm": 0.587075736779692,
|
|
"learning_rate": 3.0472983512284366e-05,
|
|
"loss": 0.3117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17222487926483154,
|
|
"step": 1725,
|
|
"valid_targets_mean": 4210.5,
|
|
"valid_targets_min": 886
|
|
},
|
|
{
|
|
"epoch": 2.755378486055777,
|
|
"grad_norm": 0.6109700877997402,
|
|
"learning_rate": 3.04052461862585e-05,
|
|
"loss": 0.2951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13396194577217102,
|
|
"step": 1730,
|
|
"valid_targets_mean": 3010.6,
|
|
"valid_targets_min": 471
|
|
},
|
|
{
|
|
"epoch": 2.7633466135458167,
|
|
"grad_norm": 0.6188577067928057,
|
|
"learning_rate": 3.0337344809196547e-05,
|
|
"loss": 0.3127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07617945224046707,
|
|
"step": 1735,
|
|
"valid_targets_mean": 2067.8,
|
|
"valid_targets_min": 292
|
|
},
|
|
{
|
|
"epoch": 2.7713147410358565,
|
|
"grad_norm": 0.5239023857632885,
|
|
"learning_rate": 3.0269280451644155e-05,
|
|
"loss": 0.3156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15986491739749908,
|
|
"step": 1740,
|
|
"valid_targets_mean": 4151.2,
|
|
"valid_targets_min": 581
|
|
},
|
|
{
|
|
"epoch": 2.7792828685258963,
|
|
"grad_norm": 0.5362837482230216,
|
|
"learning_rate": 3.020105418671659e-05,
|
|
"loss": 0.3022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1509944051504135,
|
|
"step": 1745,
|
|
"valid_targets_mean": 4105.1,
|
|
"valid_targets_min": 393
|
|
},
|
|
{
|
|
"epoch": 2.787250996015936,
|
|
"grad_norm": 0.5560190067839896,
|
|
"learning_rate": 3.0132667090081758e-05,
|
|
"loss": 0.3199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1459665596485138,
|
|
"step": 1750,
|
|
"valid_targets_mean": 3637.9,
|
|
"valid_targets_min": 241
|
|
},
|
|
{
|
|
"epoch": 2.795219123505976,
|
|
"grad_norm": 0.6117304860481148,
|
|
"learning_rate": 3.006412023994328e-05,
|
|
"loss": 0.3003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1714390516281128,
|
|
"step": 1755,
|
|
"valid_targets_mean": 3695.4,
|
|
"valid_targets_min": 530
|
|
},
|
|
{
|
|
"epoch": 2.803187250996016,
|
|
"grad_norm": 0.6054263142624877,
|
|
"learning_rate": 2.999541471702347e-05,
|
|
"loss": 0.3004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16323651373386383,
|
|
"step": 1760,
|
|
"valid_targets_mean": 4197.4,
|
|
"valid_targets_min": 875
|
|
},
|
|
{
|
|
"epoch": 2.8111553784860557,
|
|
"grad_norm": 0.5947895791279191,
|
|
"learning_rate": 2.9926551604546312e-05,
|
|
"loss": 0.3049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15745720267295837,
|
|
"step": 1765,
|
|
"valid_targets_mean": 3358.5,
|
|
"valid_targets_min": 205
|
|
},
|
|
{
|
|
"epoch": 2.8191235059760955,
|
|
"grad_norm": 0.6001862046050473,
|
|
"learning_rate": 2.9857531988220385e-05,
|
|
"loss": 0.3043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11560022830963135,
|
|
"step": 1770,
|
|
"valid_targets_mean": 2782.0,
|
|
"valid_targets_min": 306
|
|
},
|
|
{
|
|
"epoch": 2.8270916334661353,
|
|
"grad_norm": 0.6300307004340259,
|
|
"learning_rate": 2.9788356956221712e-05,
|
|
"loss": 0.3122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13865865767002106,
|
|
"step": 1775,
|
|
"valid_targets_mean": 3863.4,
|
|
"valid_targets_min": 276
|
|
},
|
|
{
|
|
"epoch": 2.835059760956175,
|
|
"grad_norm": 0.6182945054047809,
|
|
"learning_rate": 2.971902759917665e-05,
|
|
"loss": 0.3074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1466536521911621,
|
|
"step": 1780,
|
|
"valid_targets_mean": 3917.1,
|
|
"valid_targets_min": 773
|
|
},
|
|
{
|
|
"epoch": 2.843027888446215,
|
|
"grad_norm": 0.5298397429107443,
|
|
"learning_rate": 2.964954501014467e-05,
|
|
"loss": 0.296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11321134120225906,
|
|
"step": 1785,
|
|
"valid_targets_mean": 3792.2,
|
|
"valid_targets_min": 2869
|
|
},
|
|
{
|
|
"epoch": 2.850996015936255,
|
|
"grad_norm": 0.6273210190956005,
|
|
"learning_rate": 2.957991028460112e-05,
|
|
"loss": 0.3095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14284269511699677,
|
|
"step": 1790,
|
|
"valid_targets_mean": 2845.5,
|
|
"valid_targets_min": 616
|
|
},
|
|
{
|
|
"epoch": 2.8589641434262947,
|
|
"grad_norm": 0.5804735026959892,
|
|
"learning_rate": 2.951012452041997e-05,
|
|
"loss": 0.3088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.174922913312912,
|
|
"step": 1795,
|
|
"valid_targets_mean": 4494.4,
|
|
"valid_targets_min": 2700
|
|
},
|
|
{
|
|
"epoch": 2.8669322709163345,
|
|
"grad_norm": 0.7419190432191575,
|
|
"learning_rate": 2.9440188817856478e-05,
|
|
"loss": 0.3034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13581791520118713,
|
|
"step": 1800,
|
|
"valid_targets_mean": 3082.6,
|
|
"valid_targets_min": 339
|
|
},
|
|
{
|
|
"epoch": 2.8749003984063743,
|
|
"grad_norm": 0.5665491958828685,
|
|
"learning_rate": 2.937010427952986e-05,
|
|
"loss": 0.2924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15435200929641724,
|
|
"step": 1805,
|
|
"valid_targets_mean": 4402.1,
|
|
"valid_targets_min": 3260
|
|
},
|
|
{
|
|
"epoch": 2.882868525896414,
|
|
"grad_norm": 0.6495565332447018,
|
|
"learning_rate": 2.929987201040593e-05,
|
|
"loss": 0.3035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.188032865524292,
|
|
"step": 1810,
|
|
"valid_targets_mean": 4567.4,
|
|
"valid_targets_min": 252
|
|
},
|
|
{
|
|
"epoch": 2.8908366533864545,
|
|
"grad_norm": 0.5342486023226761,
|
|
"learning_rate": 2.922949311777962e-05,
|
|
"loss": 0.2974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19005250930786133,
|
|
"step": 1815,
|
|
"valid_targets_mean": 4846.8,
|
|
"valid_targets_min": 3605
|
|
},
|
|
{
|
|
"epoch": 2.8988047808764943,
|
|
"grad_norm": 0.6653894464444504,
|
|
"learning_rate": 2.9158968711257576e-05,
|
|
"loss": 0.2878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14081129431724548,
|
|
"step": 1820,
|
|
"valid_targets_mean": 2541.4,
|
|
"valid_targets_min": 291
|
|
},
|
|
{
|
|
"epoch": 2.906772908366534,
|
|
"grad_norm": 0.6262578124048611,
|
|
"learning_rate": 2.908829990274064e-05,
|
|
"loss": 0.3059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14443422853946686,
|
|
"step": 1825,
|
|
"valid_targets_mean": 3446.8,
|
|
"valid_targets_min": 1030
|
|
},
|
|
{
|
|
"epoch": 2.914741035856574,
|
|
"grad_norm": 0.6347752827432807,
|
|
"learning_rate": 2.9017487806406312e-05,
|
|
"loss": 0.3032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19679532945156097,
|
|
"step": 1830,
|
|
"valid_targets_mean": 4701.9,
|
|
"valid_targets_min": 3952
|
|
},
|
|
{
|
|
"epoch": 2.922709163346614,
|
|
"grad_norm": 0.6689696315532441,
|
|
"learning_rate": 2.89465335386912e-05,
|
|
"loss": 0.3004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12756000459194183,
|
|
"step": 1835,
|
|
"valid_targets_mean": 2977.2,
|
|
"valid_targets_min": 220
|
|
},
|
|
{
|
|
"epoch": 2.9306772908366536,
|
|
"grad_norm": 0.616831376153753,
|
|
"learning_rate": 2.8875438218273423e-05,
|
|
"loss": 0.304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18263155221939087,
|
|
"step": 1840,
|
|
"valid_targets_mean": 4662.1,
|
|
"valid_targets_min": 455
|
|
},
|
|
{
|
|
"epoch": 2.9386454183266935,
|
|
"grad_norm": 0.6565628369554742,
|
|
"learning_rate": 2.880420296605494e-05,
|
|
"loss": 0.294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19445011019706726,
|
|
"step": 1845,
|
|
"valid_targets_mean": 3611.1,
|
|
"valid_targets_min": 972
|
|
},
|
|
{
|
|
"epoch": 2.9466135458167333,
|
|
"grad_norm": 0.6340297598862685,
|
|
"learning_rate": 2.8732828905143938e-05,
|
|
"loss": 0.3017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15914231538772583,
|
|
"step": 1850,
|
|
"valid_targets_mean": 3565.1,
|
|
"valid_targets_min": 1245
|
|
},
|
|
{
|
|
"epoch": 2.954581673306773,
|
|
"grad_norm": 0.6130063359000373,
|
|
"learning_rate": 2.8661317160837038e-05,
|
|
"loss": 0.3055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15856942534446716,
|
|
"step": 1855,
|
|
"valid_targets_mean": 3286.4,
|
|
"valid_targets_min": 540
|
|
},
|
|
{
|
|
"epoch": 2.962549800796813,
|
|
"grad_norm": 0.5882736715432598,
|
|
"learning_rate": 2.8589668860601643e-05,
|
|
"loss": 0.3001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15061055123806,
|
|
"step": 1860,
|
|
"valid_targets_mean": 3854.0,
|
|
"valid_targets_min": 1280
|
|
},
|
|
{
|
|
"epoch": 2.970517928286853,
|
|
"grad_norm": 0.5304952983378881,
|
|
"learning_rate": 2.85178851340581e-05,
|
|
"loss": 0.3024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10105285048484802,
|
|
"step": 1865,
|
|
"valid_targets_mean": 2839.4,
|
|
"valid_targets_min": 271
|
|
},
|
|
{
|
|
"epoch": 2.9784860557768926,
|
|
"grad_norm": 0.5395075495945896,
|
|
"learning_rate": 2.8445967112961928e-05,
|
|
"loss": 0.2913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13914275169372559,
|
|
"step": 1870,
|
|
"valid_targets_mean": 4269.6,
|
|
"valid_targets_min": 215
|
|
},
|
|
{
|
|
"epoch": 2.9864541832669325,
|
|
"grad_norm": 0.5231744288139889,
|
|
"learning_rate": 2.8373915931185946e-05,
|
|
"loss": 0.2976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15183329582214355,
|
|
"step": 1875,
|
|
"valid_targets_mean": 4896.2,
|
|
"valid_targets_min": 3180
|
|
},
|
|
{
|
|
"epoch": 2.9944223107569723,
|
|
"grad_norm": 0.5612640713997488,
|
|
"learning_rate": 2.830173272470241e-05,
|
|
"loss": 0.3057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16358435153961182,
|
|
"step": 1880,
|
|
"valid_targets_mean": 4041.8,
|
|
"valid_targets_min": 579
|
|
},
|
|
{
|
|
"epoch": 3.001593625498008,
|
|
"grad_norm": 0.5995604976964911,
|
|
"learning_rate": 2.822941863156512e-05,
|
|
"loss": 0.2983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13486389815807343,
|
|
"step": 1885,
|
|
"valid_targets_mean": 3476.0,
|
|
"valid_targets_min": 764
|
|
},
|
|
{
|
|
"epoch": 3.0095617529880476,
|
|
"grad_norm": 0.6366474422140981,
|
|
"learning_rate": 2.8156974791891425e-05,
|
|
"loss": 0.2788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1563360095024109,
|
|
"step": 1890,
|
|
"valid_targets_mean": 4201.0,
|
|
"valid_targets_min": 2953
|
|
},
|
|
{
|
|
"epoch": 3.0175298804780875,
|
|
"grad_norm": 0.7305768682977837,
|
|
"learning_rate": 2.8084402347844316e-05,
|
|
"loss": 0.2766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1641981601715088,
|
|
"step": 1895,
|
|
"valid_targets_mean": 4095.1,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 3.0254980079681273,
|
|
"grad_norm": 0.6142059058779309,
|
|
"learning_rate": 2.801170244361436e-05,
|
|
"loss": 0.2778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12227625399827957,
|
|
"step": 1900,
|
|
"valid_targets_mean": 4612.0,
|
|
"valid_targets_min": 3237
|
|
},
|
|
{
|
|
"epoch": 3.033466135458167,
|
|
"grad_norm": 0.5802754329647101,
|
|
"learning_rate": 2.7938876225401714e-05,
|
|
"loss": 0.2867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15479183197021484,
|
|
"step": 1905,
|
|
"valid_targets_mean": 4091.9,
|
|
"valid_targets_min": 2449
|
|
},
|
|
{
|
|
"epoch": 3.041434262948207,
|
|
"grad_norm": 0.5122200468681064,
|
|
"learning_rate": 2.7865924841397985e-05,
|
|
"loss": 0.2818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14310495555400848,
|
|
"step": 1910,
|
|
"valid_targets_mean": 4970.1,
|
|
"valid_targets_min": 2545
|
|
},
|
|
{
|
|
"epoch": 3.049402390438247,
|
|
"grad_norm": 0.5468786402493747,
|
|
"learning_rate": 2.7792849441768194e-05,
|
|
"loss": 0.2868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11139404028654099,
|
|
"step": 1915,
|
|
"valid_targets_mean": 3889.2,
|
|
"valid_targets_min": 989
|
|
},
|
|
{
|
|
"epoch": 3.057370517928287,
|
|
"grad_norm": 0.6358516773300867,
|
|
"learning_rate": 2.7719651178632605e-05,
|
|
"loss": 0.2891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16447865962982178,
|
|
"step": 1920,
|
|
"valid_targets_mean": 3449.5,
|
|
"valid_targets_min": 288
|
|
},
|
|
{
|
|
"epoch": 3.065338645418327,
|
|
"grad_norm": 0.6357450523853115,
|
|
"learning_rate": 2.7646331206048586e-05,
|
|
"loss": 0.2783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1413956582546234,
|
|
"step": 1925,
|
|
"valid_targets_mean": 3402.6,
|
|
"valid_targets_min": 859
|
|
},
|
|
{
|
|
"epoch": 3.0733067729083667,
|
|
"grad_norm": 0.5958195249935571,
|
|
"learning_rate": 2.7572890679992376e-05,
|
|
"loss": 0.2744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14067938923835754,
|
|
"step": 1930,
|
|
"valid_targets_mean": 3301.1,
|
|
"valid_targets_min": 1026
|
|
},
|
|
{
|
|
"epoch": 3.0812749003984066,
|
|
"grad_norm": 0.5947140196007367,
|
|
"learning_rate": 2.7499330758340898e-05,
|
|
"loss": 0.2953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16272953152656555,
|
|
"step": 1935,
|
|
"valid_targets_mean": 4757.6,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 3.0892430278884464,
|
|
"grad_norm": 0.6594534851414603,
|
|
"learning_rate": 2.742565260085348e-05,
|
|
"loss": 0.2816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1049199253320694,
|
|
"step": 1940,
|
|
"valid_targets_mean": 2760.9,
|
|
"valid_targets_min": 276
|
|
},
|
|
{
|
|
"epoch": 3.0972111553784862,
|
|
"grad_norm": 0.5898841686461007,
|
|
"learning_rate": 2.7351857369153595e-05,
|
|
"loss": 0.2818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12935137748718262,
|
|
"step": 1945,
|
|
"valid_targets_mean": 3645.2,
|
|
"valid_targets_min": 225
|
|
},
|
|
{
|
|
"epoch": 3.105179282868526,
|
|
"grad_norm": 0.5935297691180758,
|
|
"learning_rate": 2.72779462267105e-05,
|
|
"loss": 0.2896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15314815938472748,
|
|
"step": 1950,
|
|
"valid_targets_mean": 4015.2,
|
|
"valid_targets_min": 845
|
|
},
|
|
{
|
|
"epoch": 3.113147410358566,
|
|
"grad_norm": 0.6018144142743058,
|
|
"learning_rate": 2.720392033882094e-05,
|
|
"loss": 0.2963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15182068943977356,
|
|
"step": 1955,
|
|
"valid_targets_mean": 3905.0,
|
|
"valid_targets_min": 898
|
|
},
|
|
{
|
|
"epoch": 3.1211155378486057,
|
|
"grad_norm": 0.5963825090220352,
|
|
"learning_rate": 2.7129780872590768e-05,
|
|
"loss": 0.2782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14685726165771484,
|
|
"step": 1960,
|
|
"valid_targets_mean": 4461.5,
|
|
"valid_targets_min": 3677
|
|
},
|
|
{
|
|
"epoch": 3.1290836653386456,
|
|
"grad_norm": 0.6917481323512222,
|
|
"learning_rate": 2.705552899691652e-05,
|
|
"loss": 0.2876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1306656301021576,
|
|
"step": 1965,
|
|
"valid_targets_mean": 3440.0,
|
|
"valid_targets_min": 308
|
|
},
|
|
{
|
|
"epoch": 3.1370517928286854,
|
|
"grad_norm": 1.2865263066763915,
|
|
"learning_rate": 2.6981165882466994e-05,
|
|
"loss": 0.2987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16542966663837433,
|
|
"step": 1970,
|
|
"valid_targets_mean": 4034.5,
|
|
"valid_targets_min": 935
|
|
},
|
|
{
|
|
"epoch": 3.1450199203187252,
|
|
"grad_norm": 0.5862591014399262,
|
|
"learning_rate": 2.6906692701664817e-05,
|
|
"loss": 0.2953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17163851857185364,
|
|
"step": 1975,
|
|
"valid_targets_mean": 4001.5,
|
|
"valid_targets_min": 2274
|
|
},
|
|
{
|
|
"epoch": 3.152988047808765,
|
|
"grad_norm": 0.5709660535338027,
|
|
"learning_rate": 2.683211062866792e-05,
|
|
"loss": 0.2925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13916179537773132,
|
|
"step": 1980,
|
|
"valid_targets_mean": 4306.9,
|
|
"valid_targets_min": 307
|
|
},
|
|
{
|
|
"epoch": 3.160956175298805,
|
|
"grad_norm": 0.6046602837151367,
|
|
"learning_rate": 2.6757420839351077e-05,
|
|
"loss": 0.2808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09660614281892776,
|
|
"step": 1985,
|
|
"valid_targets_mean": 2486.8,
|
|
"valid_targets_min": 306
|
|
},
|
|
{
|
|
"epoch": 3.1689243027888447,
|
|
"grad_norm": 0.6491917941119068,
|
|
"learning_rate": 2.6682624511287315e-05,
|
|
"loss": 0.2758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13069605827331543,
|
|
"step": 1990,
|
|
"valid_targets_mean": 3085.6,
|
|
"valid_targets_min": 511
|
|
},
|
|
{
|
|
"epoch": 3.1768924302788846,
|
|
"grad_norm": 0.6423556168753874,
|
|
"learning_rate": 2.660772282372938e-05,
|
|
"loss": 0.2893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10765212774276733,
|
|
"step": 1995,
|
|
"valid_targets_mean": 2737.4,
|
|
"valid_targets_min": 493
|
|
},
|
|
{
|
|
"epoch": 3.1848605577689244,
|
|
"grad_norm": 0.6396926948889184,
|
|
"learning_rate": 2.6532716957591128e-05,
|
|
"loss": 0.2742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13146337866783142,
|
|
"step": 2000,
|
|
"valid_targets_mean": 3224.9,
|
|
"valid_targets_min": 671
|
|
},
|
|
{
|
|
"epoch": 3.1928286852589642,
|
|
"grad_norm": 0.5818042509176177,
|
|
"learning_rate": 2.6457608095428925e-05,
|
|
"loss": 0.2895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09944739937782288,
|
|
"step": 2005,
|
|
"valid_targets_mean": 3452.5,
|
|
"valid_targets_min": 1066
|
|
},
|
|
{
|
|
"epoch": 3.200796812749004,
|
|
"grad_norm": 0.6402551975996129,
|
|
"learning_rate": 2.6382397421422986e-05,
|
|
"loss": 0.2901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1358262300491333,
|
|
"step": 2010,
|
|
"valid_targets_mean": 3142.8,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 3.208764940239044,
|
|
"grad_norm": 0.6042377415732846,
|
|
"learning_rate": 2.6307086121358706e-05,
|
|
"loss": 0.2848,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11764895915985107,
|
|
"step": 2015,
|
|
"valid_targets_mean": 4003.0,
|
|
"valid_targets_min": 609
|
|
},
|
|
{
|
|
"epoch": 3.2167330677290837,
|
|
"grad_norm": 0.6493343175064425,
|
|
"learning_rate": 2.6231675382607974e-05,
|
|
"loss": 0.2789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12565159797668457,
|
|
"step": 2020,
|
|
"valid_targets_mean": 2952.9,
|
|
"valid_targets_min": 562
|
|
},
|
|
{
|
|
"epoch": 3.2247011952191236,
|
|
"grad_norm": 0.6490494009274449,
|
|
"learning_rate": 2.6156166394110447e-05,
|
|
"loss": 0.2856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16414640843868256,
|
|
"step": 2025,
|
|
"valid_targets_mean": 3937.4,
|
|
"valid_targets_min": 1063
|
|
},
|
|
{
|
|
"epoch": 3.2326693227091634,
|
|
"grad_norm": 0.6132887447825962,
|
|
"learning_rate": 2.60805603463548e-05,
|
|
"loss": 0.2746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1475113481283188,
|
|
"step": 2030,
|
|
"valid_targets_mean": 4180.5,
|
|
"valid_targets_min": 3326
|
|
},
|
|
{
|
|
"epoch": 3.2406374501992032,
|
|
"grad_norm": 0.679247026494716,
|
|
"learning_rate": 2.6004858431359972e-05,
|
|
"loss": 0.2828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11567876487970352,
|
|
"step": 2035,
|
|
"valid_targets_mean": 2849.9,
|
|
"valid_targets_min": 623
|
|
},
|
|
{
|
|
"epoch": 3.248605577689243,
|
|
"grad_norm": 0.5648846871150982,
|
|
"learning_rate": 2.592906184265635e-05,
|
|
"loss": 0.2852,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1182098463177681,
|
|
"step": 2040,
|
|
"valid_targets_mean": 4033.9,
|
|
"valid_targets_min": 246
|
|
},
|
|
{
|
|
"epoch": 3.256573705179283,
|
|
"grad_norm": 0.6571665065508722,
|
|
"learning_rate": 2.585317177526699e-05,
|
|
"loss": 0.292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11440694332122803,
|
|
"step": 2045,
|
|
"valid_targets_mean": 2681.2,
|
|
"valid_targets_min": 284
|
|
},
|
|
{
|
|
"epoch": 3.2645418326693227,
|
|
"grad_norm": 0.6077727307789242,
|
|
"learning_rate": 2.5777189425688714e-05,
|
|
"loss": 0.2729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13683414459228516,
|
|
"step": 2050,
|
|
"valid_targets_mean": 3754.1,
|
|
"valid_targets_min": 1609
|
|
},
|
|
{
|
|
"epoch": 3.2725099601593626,
|
|
"grad_norm": 0.687572452632822,
|
|
"learning_rate": 2.570111599187331e-05,
|
|
"loss": 0.29,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1259164810180664,
|
|
"step": 2055,
|
|
"valid_targets_mean": 2959.9,
|
|
"valid_targets_min": 531
|
|
},
|
|
{
|
|
"epoch": 3.2804780876494024,
|
|
"grad_norm": 0.5609394014948552,
|
|
"learning_rate": 2.5624952673208608e-05,
|
|
"loss": 0.2784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13147097826004028,
|
|
"step": 2060,
|
|
"valid_targets_mean": 4716.4,
|
|
"valid_targets_min": 2818
|
|
},
|
|
{
|
|
"epoch": 3.2884462151394422,
|
|
"grad_norm": 0.5757109006459055,
|
|
"learning_rate": 2.5548700670499577e-05,
|
|
"loss": 0.2799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13211728632450104,
|
|
"step": 2065,
|
|
"valid_targets_mean": 3288.0,
|
|
"valid_targets_min": 752
|
|
},
|
|
{
|
|
"epoch": 3.296414342629482,
|
|
"grad_norm": 0.6737818677216962,
|
|
"learning_rate": 2.5472361185949387e-05,
|
|
"loss": 0.2873,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1399116814136505,
|
|
"step": 2070,
|
|
"valid_targets_mean": 3550.8,
|
|
"valid_targets_min": 2399
|
|
},
|
|
{
|
|
"epoch": 3.304382470119522,
|
|
"grad_norm": 0.5984387043370892,
|
|
"learning_rate": 2.5395935423140487e-05,
|
|
"loss": 0.2732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11631085723638535,
|
|
"step": 2075,
|
|
"valid_targets_mean": 2886.1,
|
|
"valid_targets_min": 185
|
|
},
|
|
{
|
|
"epoch": 3.3123505976095617,
|
|
"grad_norm": 0.6826229831277852,
|
|
"learning_rate": 2.5319424587015587e-05,
|
|
"loss": 0.2791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13441094756126404,
|
|
"step": 2080,
|
|
"valid_targets_mean": 3741.4,
|
|
"valid_targets_min": 750
|
|
},
|
|
{
|
|
"epoch": 3.3203187250996016,
|
|
"grad_norm": 0.6222883129214012,
|
|
"learning_rate": 2.524282988385867e-05,
|
|
"loss": 0.2833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09123936295509338,
|
|
"step": 2085,
|
|
"valid_targets_mean": 2420.9,
|
|
"valid_targets_min": 243
|
|
},
|
|
{
|
|
"epoch": 3.3282868525896414,
|
|
"grad_norm": 0.6036436875416724,
|
|
"learning_rate": 2.5166152521276014e-05,
|
|
"loss": 0.2869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13198913633823395,
|
|
"step": 2090,
|
|
"valid_targets_mean": 3586.8,
|
|
"valid_targets_min": 1205
|
|
},
|
|
{
|
|
"epoch": 3.3362549800796812,
|
|
"grad_norm": 0.6259771803154971,
|
|
"learning_rate": 2.5089393708177083e-05,
|
|
"loss": 0.278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13058066368103027,
|
|
"step": 2095,
|
|
"valid_targets_mean": 3258.1,
|
|
"valid_targets_min": 794
|
|
},
|
|
{
|
|
"epoch": 3.344223107569721,
|
|
"grad_norm": 0.5539352449420292,
|
|
"learning_rate": 2.501255465475553e-05,
|
|
"loss": 0.2831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12083698064088821,
|
|
"step": 2100,
|
|
"valid_targets_mean": 3466.0,
|
|
"valid_targets_min": 379
|
|
},
|
|
{
|
|
"epoch": 3.352191235059761,
|
|
"grad_norm": 0.6063032233740718,
|
|
"learning_rate": 2.4935636572470085e-05,
|
|
"loss": 0.2974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1511157900094986,
|
|
"step": 2105,
|
|
"valid_targets_mean": 3660.9,
|
|
"valid_targets_min": 328
|
|
},
|
|
{
|
|
"epoch": 3.3601593625498007,
|
|
"grad_norm": 0.5836958251674884,
|
|
"learning_rate": 2.4858640674025464e-05,
|
|
"loss": 0.2851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1489568054676056,
|
|
"step": 2110,
|
|
"valid_targets_mean": 3728.9,
|
|
"valid_targets_min": 749
|
|
},
|
|
{
|
|
"epoch": 3.3681274900398406,
|
|
"grad_norm": 0.7744785403219803,
|
|
"learning_rate": 2.4781568173353234e-05,
|
|
"loss": 0.2833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14987047016620636,
|
|
"step": 2115,
|
|
"valid_targets_mean": 4306.0,
|
|
"valid_targets_min": 742
|
|
},
|
|
{
|
|
"epoch": 3.3760956175298804,
|
|
"grad_norm": 0.5292398262457924,
|
|
"learning_rate": 2.4704420285592718e-05,
|
|
"loss": 0.3009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1497262567281723,
|
|
"step": 2120,
|
|
"valid_targets_mean": 4323.2,
|
|
"valid_targets_min": 3539
|
|
},
|
|
{
|
|
"epoch": 3.3840637450199202,
|
|
"grad_norm": 0.6125782189718384,
|
|
"learning_rate": 2.4627198227071764e-05,
|
|
"loss": 0.2801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15631897747516632,
|
|
"step": 2125,
|
|
"valid_targets_mean": 4071.4,
|
|
"valid_targets_min": 453
|
|
},
|
|
{
|
|
"epoch": 3.39203187250996,
|
|
"grad_norm": 0.5797963912159338,
|
|
"learning_rate": 2.4549903215287635e-05,
|
|
"loss": 0.2888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15331532061100006,
|
|
"step": 2130,
|
|
"valid_targets_mean": 4717.6,
|
|
"valid_targets_min": 3245
|
|
},
|
|
{
|
|
"epoch": 3.4,
|
|
"grad_norm": 0.6244701259197334,
|
|
"learning_rate": 2.4472536468887795e-05,
|
|
"loss": 0.2922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11838929355144501,
|
|
"step": 2135,
|
|
"valid_targets_mean": 3324.8,
|
|
"valid_targets_min": 284
|
|
},
|
|
{
|
|
"epoch": 3.4079681274900397,
|
|
"grad_norm": 0.6127920759841025,
|
|
"learning_rate": 2.4395099207650673e-05,
|
|
"loss": 0.2862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13430649042129517,
|
|
"step": 2140,
|
|
"valid_targets_mean": 3650.8,
|
|
"valid_targets_min": 522
|
|
},
|
|
{
|
|
"epoch": 3.4159362549800796,
|
|
"grad_norm": 0.6998391345627126,
|
|
"learning_rate": 2.4317592652466444e-05,
|
|
"loss": 0.2814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09987592697143555,
|
|
"step": 2145,
|
|
"valid_targets_mean": 2595.4,
|
|
"valid_targets_min": 279
|
|
},
|
|
{
|
|
"epoch": 3.4239043824701194,
|
|
"grad_norm": 0.7244425160964371,
|
|
"learning_rate": 2.4240018025317812e-05,
|
|
"loss": 0.2794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16513580083847046,
|
|
"step": 2150,
|
|
"valid_targets_mean": 3812.9,
|
|
"valid_targets_min": 386
|
|
},
|
|
{
|
|
"epoch": 3.4318725099601592,
|
|
"grad_norm": 0.5458440187409629,
|
|
"learning_rate": 2.4162376549260685e-05,
|
|
"loss": 0.2858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14888599514961243,
|
|
"step": 2155,
|
|
"valid_targets_mean": 4864.2,
|
|
"valid_targets_min": 3462
|
|
},
|
|
{
|
|
"epoch": 3.439840637450199,
|
|
"grad_norm": 0.6849029917454437,
|
|
"learning_rate": 2.408466944840494e-05,
|
|
"loss": 0.2885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1478743851184845,
|
|
"step": 2160,
|
|
"valid_targets_mean": 2458.1,
|
|
"valid_targets_min": 312
|
|
},
|
|
{
|
|
"epoch": 3.447808764940239,
|
|
"grad_norm": 0.5230213886309137,
|
|
"learning_rate": 2.4006897947895097e-05,
|
|
"loss": 0.2781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15073207020759583,
|
|
"step": 2165,
|
|
"valid_targets_mean": 4047.8,
|
|
"valid_targets_min": 801
|
|
},
|
|
{
|
|
"epoch": 3.4557768924302787,
|
|
"grad_norm": 0.6894144536145977,
|
|
"learning_rate": 2.392906327389103e-05,
|
|
"loss": 0.2865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12685012817382812,
|
|
"step": 2170,
|
|
"valid_targets_mean": 3707.5,
|
|
"valid_targets_min": 932
|
|
},
|
|
{
|
|
"epoch": 3.4637450199203186,
|
|
"grad_norm": 0.6182744233014824,
|
|
"learning_rate": 2.3851166653548603e-05,
|
|
"loss": 0.2787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1278611719608307,
|
|
"step": 2175,
|
|
"valid_targets_mean": 3423.9,
|
|
"valid_targets_min": 932
|
|
},
|
|
{
|
|
"epoch": 3.4717131474103584,
|
|
"grad_norm": 0.6072984899474033,
|
|
"learning_rate": 2.3773209315000344e-05,
|
|
"loss": 0.279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17405426502227783,
|
|
"step": 2180,
|
|
"valid_targets_mean": 4520.2,
|
|
"valid_targets_min": 3757
|
|
},
|
|
{
|
|
"epoch": 3.4796812749003982,
|
|
"grad_norm": 0.6075109522285683,
|
|
"learning_rate": 2.3695192487336064e-05,
|
|
"loss": 0.279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1332045942544937,
|
|
"step": 2185,
|
|
"valid_targets_mean": 3539.1,
|
|
"valid_targets_min": 837
|
|
},
|
|
{
|
|
"epoch": 3.487649402390438,
|
|
"grad_norm": 0.579318209786998,
|
|
"learning_rate": 2.361711740058351e-05,
|
|
"loss": 0.2975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16303032636642456,
|
|
"step": 2190,
|
|
"valid_targets_mean": 4565.4,
|
|
"valid_targets_min": 3886
|
|
},
|
|
{
|
|
"epoch": 3.495617529880478,
|
|
"grad_norm": 0.565535935820369,
|
|
"learning_rate": 2.3538985285688934e-05,
|
|
"loss": 0.2877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1396387815475464,
|
|
"step": 2195,
|
|
"valid_targets_mean": 3660.1,
|
|
"valid_targets_min": 258
|
|
},
|
|
{
|
|
"epoch": 3.503585657370518,
|
|
"grad_norm": 0.6680724888114902,
|
|
"learning_rate": 2.3460797374497714e-05,
|
|
"loss": 0.2878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17963314056396484,
|
|
"step": 2200,
|
|
"valid_targets_mean": 3727.4,
|
|
"valid_targets_min": 936
|
|
},
|
|
{
|
|
"epoch": 3.511553784860558,
|
|
"grad_norm": 0.7378675334761234,
|
|
"learning_rate": 2.3382554899734917e-05,
|
|
"loss": 0.2696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15196353197097778,
|
|
"step": 2205,
|
|
"valid_targets_mean": 3584.1,
|
|
"valid_targets_min": 905
|
|
},
|
|
{
|
|
"epoch": 3.519521912350598,
|
|
"grad_norm": 0.6117817117897517,
|
|
"learning_rate": 2.3304259094985883e-05,
|
|
"loss": 0.2722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16188624501228333,
|
|
"step": 2210,
|
|
"valid_targets_mean": 4811.2,
|
|
"valid_targets_min": 2638
|
|
},
|
|
{
|
|
"epoch": 3.5274900398406377,
|
|
"grad_norm": 0.7152359494553315,
|
|
"learning_rate": 2.322591119467674e-05,
|
|
"loss": 0.273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12665317952632904,
|
|
"step": 2215,
|
|
"valid_targets_mean": 3055.8,
|
|
"valid_targets_min": 245
|
|
},
|
|
{
|
|
"epoch": 3.5354581673306775,
|
|
"grad_norm": 0.6521648585968394,
|
|
"learning_rate": 2.3147512434054988e-05,
|
|
"loss": 0.2905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15076977014541626,
|
|
"step": 2220,
|
|
"valid_targets_mean": 3450.8,
|
|
"valid_targets_min": 462
|
|
},
|
|
{
|
|
"epoch": 3.5434262948207174,
|
|
"grad_norm": 0.6047539156215063,
|
|
"learning_rate": 2.3069064049169985e-05,
|
|
"loss": 0.2834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13174653053283691,
|
|
"step": 2225,
|
|
"valid_targets_mean": 3577.0,
|
|
"valid_targets_min": 1110
|
|
},
|
|
{
|
|
"epoch": 3.551394422310757,
|
|
"grad_norm": 0.6521975076989384,
|
|
"learning_rate": 2.299056727685348e-05,
|
|
"loss": 0.2956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12156250327825546,
|
|
"step": 2230,
|
|
"valid_targets_mean": 3268.8,
|
|
"valid_targets_min": 462
|
|
},
|
|
{
|
|
"epoch": 3.559362549800797,
|
|
"grad_norm": 0.5930785261067133,
|
|
"learning_rate": 2.2912023354700105e-05,
|
|
"loss": 0.279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13798466324806213,
|
|
"step": 2235,
|
|
"valid_targets_mean": 4144.4,
|
|
"valid_targets_min": 1029
|
|
},
|
|
{
|
|
"epoch": 3.567330677290837,
|
|
"grad_norm": 0.6593164729144471,
|
|
"learning_rate": 2.2833433521047853e-05,
|
|
"loss": 0.2822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15233337879180908,
|
|
"step": 2240,
|
|
"valid_targets_mean": 3792.4,
|
|
"valid_targets_min": 436
|
|
},
|
|
{
|
|
"epoch": 3.5752988047808767,
|
|
"grad_norm": 0.5871339084922141,
|
|
"learning_rate": 2.2754799014958597e-05,
|
|
"loss": 0.286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1288946568965912,
|
|
"step": 2245,
|
|
"valid_targets_mean": 3557.2,
|
|
"valid_targets_min": 1000
|
|
},
|
|
{
|
|
"epoch": 3.5832669322709165,
|
|
"grad_norm": 0.5445899656318003,
|
|
"learning_rate": 2.26761210761985e-05,
|
|
"loss": 0.2793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12100610136985779,
|
|
"step": 2250,
|
|
"valid_targets_mean": 3635.0,
|
|
"valid_targets_min": 239
|
|
},
|
|
{
|
|
"epoch": 3.5912350597609564,
|
|
"grad_norm": 0.5465128751833384,
|
|
"learning_rate": 2.259740094521849e-05,
|
|
"loss": 0.2903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13375595211982727,
|
|
"step": 2255,
|
|
"valid_targets_mean": 4219.5,
|
|
"valid_targets_min": 1229
|
|
},
|
|
{
|
|
"epoch": 3.599203187250996,
|
|
"grad_norm": 0.5582905069234687,
|
|
"learning_rate": 2.251863986313472e-05,
|
|
"loss": 0.2881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12912191450595856,
|
|
"step": 2260,
|
|
"valid_targets_mean": 4051.5,
|
|
"valid_targets_min": 242
|
|
},
|
|
{
|
|
"epoch": 3.607171314741036,
|
|
"grad_norm": 0.5508307471622537,
|
|
"learning_rate": 2.2439839071708988e-05,
|
|
"loss": 0.2814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14723606407642365,
|
|
"step": 2265,
|
|
"valid_targets_mean": 4390.9,
|
|
"valid_targets_min": 3072
|
|
},
|
|
{
|
|
"epoch": 3.615139442231076,
|
|
"grad_norm": 0.7005723624233458,
|
|
"learning_rate": 2.2360999813329126e-05,
|
|
"loss": 0.2885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13096565008163452,
|
|
"step": 2270,
|
|
"valid_targets_mean": 3261.4,
|
|
"valid_targets_min": 843
|
|
},
|
|
{
|
|
"epoch": 3.6231075697211157,
|
|
"grad_norm": 0.5858584315320217,
|
|
"learning_rate": 2.2282123330989482e-05,
|
|
"loss": 0.2796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13934998214244843,
|
|
"step": 2275,
|
|
"valid_targets_mean": 4086.1,
|
|
"valid_targets_min": 2620
|
|
},
|
|
{
|
|
"epoch": 3.6310756972111555,
|
|
"grad_norm": 0.5484935581764541,
|
|
"learning_rate": 2.220321086827126e-05,
|
|
"loss": 0.2759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1496485471725464,
|
|
"step": 2280,
|
|
"valid_targets_mean": 3966.0,
|
|
"valid_targets_min": 244
|
|
},
|
|
{
|
|
"epoch": 3.6390438247011954,
|
|
"grad_norm": 0.5558339001247016,
|
|
"learning_rate": 2.2124263669322948e-05,
|
|
"loss": 0.2973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1396896243095398,
|
|
"step": 2285,
|
|
"valid_targets_mean": 4098.0,
|
|
"valid_targets_min": 2939
|
|
},
|
|
{
|
|
"epoch": 3.647011952191235,
|
|
"grad_norm": 0.6369737232360697,
|
|
"learning_rate": 2.2045282978840684e-05,
|
|
"loss": 0.2806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09870846569538116,
|
|
"step": 2290,
|
|
"valid_targets_mean": 2270.4,
|
|
"valid_targets_min": 644
|
|
},
|
|
{
|
|
"epoch": 3.654980079681275,
|
|
"grad_norm": 0.6153969894022138,
|
|
"learning_rate": 2.1966270042048655e-05,
|
|
"loss": 0.2833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1183319166302681,
|
|
"step": 2295,
|
|
"valid_targets_mean": 3203.2,
|
|
"valid_targets_min": 509
|
|
},
|
|
{
|
|
"epoch": 3.662948207171315,
|
|
"grad_norm": 0.538426036654963,
|
|
"learning_rate": 2.188722610467942e-05,
|
|
"loss": 0.2812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11018185317516327,
|
|
"step": 2300,
|
|
"valid_targets_mean": 3402.2,
|
|
"valid_targets_min": 458
|
|
},
|
|
{
|
|
"epoch": 3.6709163346613547,
|
|
"grad_norm": 0.5383962469152709,
|
|
"learning_rate": 2.180815241295433e-05,
|
|
"loss": 0.2863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12926092743873596,
|
|
"step": 2305,
|
|
"valid_targets_mean": 4177.4,
|
|
"valid_targets_min": 3502
|
|
},
|
|
{
|
|
"epoch": 3.6788844621513945,
|
|
"grad_norm": 0.6003161923557355,
|
|
"learning_rate": 2.172905021356383e-05,
|
|
"loss": 0.2818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12183894962072372,
|
|
"step": 2310,
|
|
"valid_targets_mean": 3276.9,
|
|
"valid_targets_min": 538
|
|
},
|
|
{
|
|
"epoch": 3.6868525896414344,
|
|
"grad_norm": 0.6518112235339004,
|
|
"learning_rate": 2.1649920753647828e-05,
|
|
"loss": 0.2841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13412031531333923,
|
|
"step": 2315,
|
|
"valid_targets_mean": 3182.9,
|
|
"valid_targets_min": 264
|
|
},
|
|
{
|
|
"epoch": 3.694820717131474,
|
|
"grad_norm": 0.5205412554322436,
|
|
"learning_rate": 2.157076528077603e-05,
|
|
"loss": 0.2752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1912461221218109,
|
|
"step": 2320,
|
|
"valid_targets_mean": 6015.6,
|
|
"valid_targets_min": 3175
|
|
},
|
|
{
|
|
"epoch": 3.702788844621514,
|
|
"grad_norm": 0.5955171494796777,
|
|
"learning_rate": 2.149158504292826e-05,
|
|
"loss": 0.2789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12436293810606003,
|
|
"step": 2325,
|
|
"valid_targets_mean": 3384.1,
|
|
"valid_targets_min": 428
|
|
},
|
|
{
|
|
"epoch": 3.710756972111554,
|
|
"grad_norm": 0.6612962440281269,
|
|
"learning_rate": 2.1412381288474793e-05,
|
|
"loss": 0.2968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12987712025642395,
|
|
"step": 2330,
|
|
"valid_targets_mean": 3765.6,
|
|
"valid_targets_min": 1065
|
|
},
|
|
{
|
|
"epoch": 3.7187250996015937,
|
|
"grad_norm": 0.6398347675218756,
|
|
"learning_rate": 2.1333155266156676e-05,
|
|
"loss": 0.2867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11875618249177933,
|
|
"step": 2335,
|
|
"valid_targets_mean": 3575.2,
|
|
"valid_targets_min": 1910
|
|
},
|
|
{
|
|
"epoch": 3.7266932270916335,
|
|
"grad_norm": 0.6478178575347608,
|
|
"learning_rate": 2.1253908225066027e-05,
|
|
"loss": 0.28,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15330427885055542,
|
|
"step": 2340,
|
|
"valid_targets_mean": 2914.2,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 3.7346613545816734,
|
|
"grad_norm": 0.5707342179380125,
|
|
"learning_rate": 2.1174641414626366e-05,
|
|
"loss": 0.277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14285320043563843,
|
|
"step": 2345,
|
|
"valid_targets_mean": 3983.9,
|
|
"valid_targets_min": 883
|
|
},
|
|
{
|
|
"epoch": 3.742629482071713,
|
|
"grad_norm": 0.5923028249924766,
|
|
"learning_rate": 2.109535608457287e-05,
|
|
"loss": 0.2683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14899635314941406,
|
|
"step": 2350,
|
|
"valid_targets_mean": 3962.4,
|
|
"valid_targets_min": 3094
|
|
},
|
|
{
|
|
"epoch": 3.750597609561753,
|
|
"grad_norm": 0.6040850863740893,
|
|
"learning_rate": 2.101605348493274e-05,
|
|
"loss": 0.2843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14101773500442505,
|
|
"step": 2355,
|
|
"valid_targets_mean": 3419.9,
|
|
"valid_targets_min": 277
|
|
},
|
|
{
|
|
"epoch": 3.758565737051793,
|
|
"grad_norm": 0.5613157416279575,
|
|
"learning_rate": 2.093673486600542e-05,
|
|
"loss": 0.2747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12926945090293884,
|
|
"step": 2360,
|
|
"valid_targets_mean": 3742.1,
|
|
"valid_targets_min": 505
|
|
},
|
|
{
|
|
"epoch": 3.7665338645418327,
|
|
"grad_norm": 0.5399070734817591,
|
|
"learning_rate": 2.0857401478342925e-05,
|
|
"loss": 0.2784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1257469207048416,
|
|
"step": 2365,
|
|
"valid_targets_mean": 3741.6,
|
|
"valid_targets_min": 2927
|
|
},
|
|
{
|
|
"epoch": 3.7745019920318725,
|
|
"grad_norm": 0.6817669899399026,
|
|
"learning_rate": 2.077805457273012e-05,
|
|
"loss": 0.2722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1523137092590332,
|
|
"step": 2370,
|
|
"valid_targets_mean": 3375.4,
|
|
"valid_targets_min": 871
|
|
},
|
|
{
|
|
"epoch": 3.7824701195219124,
|
|
"grad_norm": 0.5796059703785094,
|
|
"learning_rate": 2.0698695400165e-05,
|
|
"loss": 0.284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1267370581626892,
|
|
"step": 2375,
|
|
"valid_targets_mean": 3524.1,
|
|
"valid_targets_min": 245
|
|
},
|
|
{
|
|
"epoch": 3.790438247011952,
|
|
"grad_norm": 0.5908006727183092,
|
|
"learning_rate": 2.061932521183896e-05,
|
|
"loss": 0.2849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11956535279750824,
|
|
"step": 2380,
|
|
"valid_targets_mean": 3446.1,
|
|
"valid_targets_min": 1080
|
|
},
|
|
{
|
|
"epoch": 3.798406374501992,
|
|
"grad_norm": 0.5835483122225003,
|
|
"learning_rate": 2.0539945259117075e-05,
|
|
"loss": 0.2965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13289514183998108,
|
|
"step": 2385,
|
|
"valid_targets_mean": 3167.5,
|
|
"valid_targets_min": 316
|
|
},
|
|
{
|
|
"epoch": 3.806374501992032,
|
|
"grad_norm": 0.5565972103068599,
|
|
"learning_rate": 2.046055679351835e-05,
|
|
"loss": 0.2856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12216920405626297,
|
|
"step": 2390,
|
|
"valid_targets_mean": 3260.6,
|
|
"valid_targets_min": 862
|
|
},
|
|
{
|
|
"epoch": 3.8143426294820717,
|
|
"grad_norm": 0.5902112613689473,
|
|
"learning_rate": 2.0381161066696025e-05,
|
|
"loss": 0.2857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12644429504871368,
|
|
"step": 2395,
|
|
"valid_targets_mean": 3576.1,
|
|
"valid_targets_min": 1302
|
|
},
|
|
{
|
|
"epoch": 3.8223107569721115,
|
|
"grad_norm": 0.5636459832337665,
|
|
"learning_rate": 2.030175933041782e-05,
|
|
"loss": 0.2831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13322488963603973,
|
|
"step": 2400,
|
|
"valid_targets_mean": 4208.4,
|
|
"valid_targets_min": 590
|
|
},
|
|
{
|
|
"epoch": 3.8302788844621514,
|
|
"grad_norm": 0.6362095035054844,
|
|
"learning_rate": 2.022235283654619e-05,
|
|
"loss": 0.2918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15388166904449463,
|
|
"step": 2405,
|
|
"valid_targets_mean": 3620.5,
|
|
"valid_targets_min": 705
|
|
},
|
|
{
|
|
"epoch": 3.838247011952191,
|
|
"grad_norm": 0.622345383320758,
|
|
"learning_rate": 2.014294283701862e-05,
|
|
"loss": 0.2788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1341124176979065,
|
|
"step": 2410,
|
|
"valid_targets_mean": 3622.9,
|
|
"valid_targets_min": 1092
|
|
},
|
|
{
|
|
"epoch": 3.846215139442231,
|
|
"grad_norm": 0.6346754661160846,
|
|
"learning_rate": 2.006353058382783e-05,
|
|
"loss": 0.2858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13256362080574036,
|
|
"step": 2415,
|
|
"valid_targets_mean": 3270.0,
|
|
"valid_targets_min": 491
|
|
},
|
|
{
|
|
"epoch": 3.854183266932271,
|
|
"grad_norm": 0.5268023793829587,
|
|
"learning_rate": 1.9984117329002112e-05,
|
|
"loss": 0.2753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1353156566619873,
|
|
"step": 2420,
|
|
"valid_targets_mean": 4941.2,
|
|
"valid_targets_min": 1454
|
|
},
|
|
{
|
|
"epoch": 3.8621513944223107,
|
|
"grad_norm": 0.5300525675059939,
|
|
"learning_rate": 1.9904704324585516e-05,
|
|
"loss": 0.2926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14425653219223022,
|
|
"step": 2425,
|
|
"valid_targets_mean": 4485.6,
|
|
"valid_targets_min": 3172
|
|
},
|
|
{
|
|
"epoch": 3.8701195219123505,
|
|
"grad_norm": 0.5388020022950969,
|
|
"learning_rate": 1.9825292822618167e-05,
|
|
"loss": 0.2897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1312248259782791,
|
|
"step": 2430,
|
|
"valid_targets_mean": 3972.4,
|
|
"valid_targets_min": 239
|
|
},
|
|
{
|
|
"epoch": 3.8780876494023904,
|
|
"grad_norm": 0.560189511402876,
|
|
"learning_rate": 1.9745884075116498e-05,
|
|
"loss": 0.2833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13984884321689606,
|
|
"step": 2435,
|
|
"valid_targets_mean": 4401.2,
|
|
"valid_targets_min": 2858
|
|
},
|
|
{
|
|
"epoch": 3.88605577689243,
|
|
"grad_norm": 0.6421038748598413,
|
|
"learning_rate": 1.9666479334053496e-05,
|
|
"loss": 0.2833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12848687171936035,
|
|
"step": 2440,
|
|
"valid_targets_mean": 4030.8,
|
|
"valid_targets_min": 1054
|
|
},
|
|
{
|
|
"epoch": 3.89402390438247,
|
|
"grad_norm": 0.609169241185786,
|
|
"learning_rate": 1.9587079851339016e-05,
|
|
"loss": 0.3002,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1273653656244278,
|
|
"step": 2445,
|
|
"valid_targets_mean": 2882.1,
|
|
"valid_targets_min": 546
|
|
},
|
|
{
|
|
"epoch": 3.90199203187251,
|
|
"grad_norm": 0.6736514002310635,
|
|
"learning_rate": 1.9507686878799974e-05,
|
|
"loss": 0.2899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1528712809085846,
|
|
"step": 2450,
|
|
"valid_targets_mean": 4030.1,
|
|
"valid_targets_min": 304
|
|
},
|
|
{
|
|
"epoch": 3.9099601593625497,
|
|
"grad_norm": 0.6055893558771849,
|
|
"learning_rate": 1.9428301668160674e-05,
|
|
"loss": 0.2844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15029467642307281,
|
|
"step": 2455,
|
|
"valid_targets_mean": 4603.8,
|
|
"valid_targets_min": 2911
|
|
},
|
|
{
|
|
"epoch": 3.9179282868525895,
|
|
"grad_norm": 0.574308867524337,
|
|
"learning_rate": 1.9348925471023023e-05,
|
|
"loss": 0.2884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13215994834899902,
|
|
"step": 2460,
|
|
"valid_targets_mean": 3528.2,
|
|
"valid_targets_min": 986
|
|
},
|
|
{
|
|
"epoch": 3.9258964143426294,
|
|
"grad_norm": 0.5651712320533091,
|
|
"learning_rate": 1.9269559538846823e-05,
|
|
"loss": 0.2679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14778804779052734,
|
|
"step": 2465,
|
|
"valid_targets_mean": 3976.8,
|
|
"valid_targets_min": 795
|
|
},
|
|
{
|
|
"epoch": 3.933864541832669,
|
|
"grad_norm": 0.566217304542824,
|
|
"learning_rate": 1.9190205122930056e-05,
|
|
"loss": 0.2961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13560554385185242,
|
|
"step": 2470,
|
|
"valid_targets_mean": 3654.2,
|
|
"valid_targets_min": 1762
|
|
},
|
|
{
|
|
"epoch": 3.941832669322709,
|
|
"grad_norm": 0.6866702891545322,
|
|
"learning_rate": 1.911086347438911e-05,
|
|
"loss": 0.276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11984483152627945,
|
|
"step": 2475,
|
|
"valid_targets_mean": 3167.0,
|
|
"valid_targets_min": 455
|
|
},
|
|
{
|
|
"epoch": 3.949800796812749,
|
|
"grad_norm": 0.6790922137521931,
|
|
"learning_rate": 1.90315358441391e-05,
|
|
"loss": 0.3036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1607200801372528,
|
|
"step": 2480,
|
|
"valid_targets_mean": 3555.1,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 3.9577689243027887,
|
|
"grad_norm": 0.6047354406366248,
|
|
"learning_rate": 1.8952223482874114e-05,
|
|
"loss": 0.2803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13502906262874603,
|
|
"step": 2485,
|
|
"valid_targets_mean": 3240.8,
|
|
"valid_targets_min": 340
|
|
},
|
|
{
|
|
"epoch": 3.9657370517928285,
|
|
"grad_norm": 0.5974440933919509,
|
|
"learning_rate": 1.88729276410475e-05,
|
|
"loss": 0.2809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10749666392803192,
|
|
"step": 2490,
|
|
"valid_targets_mean": 2338.4,
|
|
"valid_targets_min": 230
|
|
},
|
|
{
|
|
"epoch": 3.9737051792828684,
|
|
"grad_norm": 0.6287521749380471,
|
|
"learning_rate": 1.8793649568852192e-05,
|
|
"loss": 0.2812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14456066489219666,
|
|
"step": 2495,
|
|
"valid_targets_mean": 3407.4,
|
|
"valid_targets_min": 707
|
|
},
|
|
{
|
|
"epoch": 3.981673306772908,
|
|
"grad_norm": 0.5612414508377164,
|
|
"learning_rate": 1.871439051620092e-05,
|
|
"loss": 0.2778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.117288738489151,
|
|
"step": 2500,
|
|
"valid_targets_mean": 3686.9,
|
|
"valid_targets_min": 1045
|
|
},
|
|
{
|
|
"epoch": 3.989641434262948,
|
|
"grad_norm": 0.6472855144328568,
|
|
"learning_rate": 1.8635151732706586e-05,
|
|
"loss": 0.2929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14853492379188538,
|
|
"step": 2505,
|
|
"valid_targets_mean": 4304.1,
|
|
"valid_targets_min": 1068
|
|
},
|
|
{
|
|
"epoch": 3.997609561752988,
|
|
"grad_norm": 0.5683164926052543,
|
|
"learning_rate": 1.8555934467662485e-05,
|
|
"loss": 0.2849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16348713636398315,
|
|
"step": 2510,
|
|
"valid_targets_mean": 4443.6,
|
|
"valid_targets_min": 3530
|
|
},
|
|
{
|
|
"epoch": 4.004780876494024,
|
|
"grad_norm": 0.5720072003254708,
|
|
"learning_rate": 1.84767399700227e-05,
|
|
"loss": 0.26,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10269935429096222,
|
|
"step": 2515,
|
|
"valid_targets_mean": 3126.1,
|
|
"valid_targets_min": 268
|
|
},
|
|
{
|
|
"epoch": 4.012749003984064,
|
|
"grad_norm": 0.6401327200606226,
|
|
"learning_rate": 1.839756948838231e-05,
|
|
"loss": 0.2816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14503172039985657,
|
|
"step": 2520,
|
|
"valid_targets_mean": 3696.2,
|
|
"valid_targets_min": 778
|
|
},
|
|
{
|
|
"epoch": 4.020717131474104,
|
|
"grad_norm": 0.6175905443941119,
|
|
"learning_rate": 1.831842427095778e-05,
|
|
"loss": 0.2595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13108932971954346,
|
|
"step": 2525,
|
|
"valid_targets_mean": 4221.6,
|
|
"valid_targets_min": 2509
|
|
},
|
|
{
|
|
"epoch": 4.028685258964144,
|
|
"grad_norm": 0.609025602287618,
|
|
"learning_rate": 1.823930556556724e-05,
|
|
"loss": 0.2627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1457751840353012,
|
|
"step": 2530,
|
|
"valid_targets_mean": 4213.2,
|
|
"valid_targets_min": 3146
|
|
},
|
|
{
|
|
"epoch": 4.036653386454184,
|
|
"grad_norm": 0.5324632778008639,
|
|
"learning_rate": 1.8160214619610843e-05,
|
|
"loss": 0.2542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13213565945625305,
|
|
"step": 2535,
|
|
"valid_targets_mean": 5396.5,
|
|
"valid_targets_min": 1045
|
|
},
|
|
{
|
|
"epoch": 4.044621513944223,
|
|
"grad_norm": 0.5968218009537902,
|
|
"learning_rate": 1.8081152680051075e-05,
|
|
"loss": 0.2605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11637388169765472,
|
|
"step": 2540,
|
|
"valid_targets_mean": 3910.0,
|
|
"valid_targets_min": 2294
|
|
},
|
|
{
|
|
"epoch": 4.052589641434263,
|
|
"grad_norm": 0.5436350172753162,
|
|
"learning_rate": 1.8002120993393095e-05,
|
|
"loss": 0.2725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11976222693920135,
|
|
"step": 2545,
|
|
"valid_targets_mean": 3767.1,
|
|
"valid_targets_min": 276
|
|
},
|
|
{
|
|
"epoch": 4.060557768924303,
|
|
"grad_norm": 0.5963199711651942,
|
|
"learning_rate": 1.7923120805665087e-05,
|
|
"loss": 0.2763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15962520241737366,
|
|
"step": 2550,
|
|
"valid_targets_mean": 4453.5,
|
|
"valid_targets_min": 3735
|
|
},
|
|
{
|
|
"epoch": 4.068525896414343,
|
|
"grad_norm": 0.5958728257310438,
|
|
"learning_rate": 1.7844153362398638e-05,
|
|
"loss": 0.2742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11870819330215454,
|
|
"step": 2555,
|
|
"valid_targets_mean": 3804.8,
|
|
"valid_targets_min": 2116
|
|
},
|
|
{
|
|
"epoch": 4.076494023904383,
|
|
"grad_norm": 0.6362877665061702,
|
|
"learning_rate": 1.776521990860905e-05,
|
|
"loss": 0.2639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14715123176574707,
|
|
"step": 2560,
|
|
"valid_targets_mean": 3874.2,
|
|
"valid_targets_min": 843
|
|
},
|
|
{
|
|
"epoch": 4.084462151394423,
|
|
"grad_norm": 0.627540440735645,
|
|
"learning_rate": 1.7686321688775772e-05,
|
|
"loss": 0.272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13024848699569702,
|
|
"step": 2565,
|
|
"valid_targets_mean": 4178.6,
|
|
"valid_targets_min": 379
|
|
},
|
|
{
|
|
"epoch": 4.092430278884462,
|
|
"grad_norm": 0.619079317681379,
|
|
"learning_rate": 1.7607459946822717e-05,
|
|
"loss": 0.2649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1171807050704956,
|
|
"step": 2570,
|
|
"valid_targets_mean": 3078.2,
|
|
"valid_targets_min": 262
|
|
},
|
|
{
|
|
"epoch": 4.100398406374502,
|
|
"grad_norm": 0.5968668664984488,
|
|
"learning_rate": 1.7528635926098715e-05,
|
|
"loss": 0.2528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10291929543018341,
|
|
"step": 2575,
|
|
"valid_targets_mean": 2617.1,
|
|
"valid_targets_min": 956
|
|
},
|
|
{
|
|
"epoch": 4.108366533864542,
|
|
"grad_norm": 0.6002374068933708,
|
|
"learning_rate": 1.7449850869357846e-05,
|
|
"loss": 0.2849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13879713416099548,
|
|
"step": 2580,
|
|
"valid_targets_mean": 4197.0,
|
|
"valid_targets_min": 3299
|
|
},
|
|
{
|
|
"epoch": 4.116334661354582,
|
|
"grad_norm": 0.5889840886046992,
|
|
"learning_rate": 1.7371106018739886e-05,
|
|
"loss": 0.2682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16842882335186005,
|
|
"step": 2585,
|
|
"valid_targets_mean": 4845.5,
|
|
"valid_targets_min": 3076
|
|
},
|
|
{
|
|
"epoch": 4.124302788844622,
|
|
"grad_norm": 0.6293201945894611,
|
|
"learning_rate": 1.729240261575072e-05,
|
|
"loss": 0.2701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09744669497013092,
|
|
"step": 2590,
|
|
"valid_targets_mean": 2754.2,
|
|
"valid_targets_min": 325
|
|
},
|
|
{
|
|
"epoch": 4.132270916334662,
|
|
"grad_norm": 0.699730460411323,
|
|
"learning_rate": 1.7213741901242747e-05,
|
|
"loss": 0.2759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18035660684108734,
|
|
"step": 2595,
|
|
"valid_targets_mean": 3327.5,
|
|
"valid_targets_min": 282
|
|
},
|
|
{
|
|
"epoch": 4.140239043824701,
|
|
"grad_norm": 0.615311548996262,
|
|
"learning_rate": 1.713512511539536e-05,
|
|
"loss": 0.2743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12695595622062683,
|
|
"step": 2600,
|
|
"valid_targets_mean": 2938.9,
|
|
"valid_targets_min": 590
|
|
},
|
|
{
|
|
"epoch": 4.148207171314741,
|
|
"grad_norm": 0.6054858047449538,
|
|
"learning_rate": 1.705655349769534e-05,
|
|
"loss": 0.2618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12634170055389404,
|
|
"step": 2605,
|
|
"valid_targets_mean": 3309.4,
|
|
"valid_targets_min": 561
|
|
},
|
|
{
|
|
"epoch": 4.156175298804781,
|
|
"grad_norm": 0.6011487774505819,
|
|
"learning_rate": 1.6978028286917336e-05,
|
|
"loss": 0.2636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.150556743144989,
|
|
"step": 2610,
|
|
"valid_targets_mean": 4359.9,
|
|
"valid_targets_min": 1257
|
|
},
|
|
{
|
|
"epoch": 4.164143426294821,
|
|
"grad_norm": 0.6354149285950226,
|
|
"learning_rate": 1.6899550721104362e-05,
|
|
"loss": 0.2734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10773111134767532,
|
|
"step": 2615,
|
|
"valid_targets_mean": 2906.2,
|
|
"valid_targets_min": 236
|
|
},
|
|
{
|
|
"epoch": 4.172111553784861,
|
|
"grad_norm": 0.6472900571793592,
|
|
"learning_rate": 1.6821122037548223e-05,
|
|
"loss": 0.2724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14156454801559448,
|
|
"step": 2620,
|
|
"valid_targets_mean": 3117.5,
|
|
"valid_targets_min": 1196
|
|
},
|
|
{
|
|
"epoch": 4.180079681274901,
|
|
"grad_norm": 1.7388435945929173,
|
|
"learning_rate": 1.6742743472770063e-05,
|
|
"loss": 0.2678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13756445050239563,
|
|
"step": 2625,
|
|
"valid_targets_mean": 3921.9,
|
|
"valid_targets_min": 767
|
|
},
|
|
{
|
|
"epoch": 4.18804780876494,
|
|
"grad_norm": 0.5719938609987489,
|
|
"learning_rate": 1.666441626250083e-05,
|
|
"loss": 0.275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13680854439735413,
|
|
"step": 2630,
|
|
"valid_targets_mean": 4360.4,
|
|
"valid_targets_min": 1362
|
|
},
|
|
{
|
|
"epoch": 4.19601593625498,
|
|
"grad_norm": 0.5598967171434873,
|
|
"learning_rate": 1.65861416416618e-05,
|
|
"loss": 0.2617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14067170023918152,
|
|
"step": 2635,
|
|
"valid_targets_mean": 4220.8,
|
|
"valid_targets_min": 2567
|
|
},
|
|
{
|
|
"epoch": 4.20398406374502,
|
|
"grad_norm": 0.5954103735840044,
|
|
"learning_rate": 1.6507920844345135e-05,
|
|
"loss": 0.2719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10944996029138565,
|
|
"step": 2640,
|
|
"valid_targets_mean": 3185.2,
|
|
"valid_targets_min": 522
|
|
},
|
|
{
|
|
"epoch": 4.21195219123506,
|
|
"grad_norm": 0.6274287229021701,
|
|
"learning_rate": 1.642975510379439e-05,
|
|
"loss": 0.2575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11779697239398956,
|
|
"step": 2645,
|
|
"valid_targets_mean": 3483.0,
|
|
"valid_targets_min": 281
|
|
},
|
|
{
|
|
"epoch": 4.2199203187251,
|
|
"grad_norm": 0.6013008613155694,
|
|
"learning_rate": 1.6351645652385095e-05,
|
|
"loss": 0.2687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.136363223195076,
|
|
"step": 2650,
|
|
"valid_targets_mean": 4350.0,
|
|
"valid_targets_min": 3070
|
|
},
|
|
{
|
|
"epoch": 4.22788844621514,
|
|
"grad_norm": 0.5579786913359722,
|
|
"learning_rate": 1.6273593721605295e-05,
|
|
"loss": 0.2649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1384572684764862,
|
|
"step": 2655,
|
|
"valid_targets_mean": 4822.0,
|
|
"valid_targets_min": 1009
|
|
},
|
|
{
|
|
"epoch": 4.235856573705179,
|
|
"grad_norm": 0.6151644516447127,
|
|
"learning_rate": 1.6195600542036188e-05,
|
|
"loss": 0.2731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11860912293195724,
|
|
"step": 2660,
|
|
"valid_targets_mean": 3096.6,
|
|
"valid_targets_min": 887
|
|
},
|
|
{
|
|
"epoch": 4.243824701195219,
|
|
"grad_norm": 0.6013137436944016,
|
|
"learning_rate": 1.6117667343332658e-05,
|
|
"loss": 0.256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1265154927968979,
|
|
"step": 2665,
|
|
"valid_targets_mean": 3917.5,
|
|
"valid_targets_min": 329
|
|
},
|
|
{
|
|
"epoch": 4.251792828685259,
|
|
"grad_norm": 0.5578421855801583,
|
|
"learning_rate": 1.6039795354203925e-05,
|
|
"loss": 0.2617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13051439821720123,
|
|
"step": 2670,
|
|
"valid_targets_mean": 4113.4,
|
|
"valid_targets_min": 3127
|
|
},
|
|
{
|
|
"epoch": 4.259760956175299,
|
|
"grad_norm": 0.6366682354964354,
|
|
"learning_rate": 1.5961985802394195e-05,
|
|
"loss": 0.2665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13450387120246887,
|
|
"step": 2675,
|
|
"valid_targets_mean": 3713.9,
|
|
"valid_targets_min": 244
|
|
},
|
|
{
|
|
"epoch": 4.267729083665339,
|
|
"grad_norm": 0.5645704713350099,
|
|
"learning_rate": 1.5884239914663232e-05,
|
|
"loss": 0.264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11777573078870773,
|
|
"step": 2680,
|
|
"valid_targets_mean": 3160.2,
|
|
"valid_targets_min": 1035
|
|
},
|
|
{
|
|
"epoch": 4.275697211155379,
|
|
"grad_norm": 0.5992604755923369,
|
|
"learning_rate": 1.58065589167671e-05,
|
|
"loss": 0.269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13701510429382324,
|
|
"step": 2685,
|
|
"valid_targets_mean": 3644.9,
|
|
"valid_targets_min": 729
|
|
},
|
|
{
|
|
"epoch": 4.283665338645418,
|
|
"grad_norm": 0.5809410715544644,
|
|
"learning_rate": 1.572894403343878e-05,
|
|
"loss": 0.2651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.150016188621521,
|
|
"step": 2690,
|
|
"valid_targets_mean": 4719.0,
|
|
"valid_targets_min": 818
|
|
},
|
|
{
|
|
"epoch": 4.291633466135458,
|
|
"grad_norm": 0.6333561999558883,
|
|
"learning_rate": 1.5651396488368863e-05,
|
|
"loss": 0.2786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11701355874538422,
|
|
"step": 2695,
|
|
"valid_targets_mean": 2797.4,
|
|
"valid_targets_min": 568
|
|
},
|
|
{
|
|
"epoch": 4.299601593625498,
|
|
"grad_norm": 0.6265872692246324,
|
|
"learning_rate": 1.5573917504186306e-05,
|
|
"loss": 0.2549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09892305731773376,
|
|
"step": 2700,
|
|
"valid_targets_mean": 2866.5,
|
|
"valid_targets_min": 973
|
|
},
|
|
{
|
|
"epoch": 4.307569721115538,
|
|
"grad_norm": 0.7058453282378055,
|
|
"learning_rate": 1.5496508302439096e-05,
|
|
"loss": 0.2724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10564042627811432,
|
|
"step": 2705,
|
|
"valid_targets_mean": 3128.2,
|
|
"valid_targets_min": 559
|
|
},
|
|
{
|
|
"epoch": 4.315537848605578,
|
|
"grad_norm": 0.5810402160845162,
|
|
"learning_rate": 1.5419170103575037e-05,
|
|
"loss": 0.2677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12401340156793594,
|
|
"step": 2710,
|
|
"valid_targets_mean": 3585.1,
|
|
"valid_targets_min": 282
|
|
},
|
|
{
|
|
"epoch": 4.323505976095618,
|
|
"grad_norm": 0.6021195692971588,
|
|
"learning_rate": 1.534190412692246e-05,
|
|
"loss": 0.2686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14363420009613037,
|
|
"step": 2715,
|
|
"valid_targets_mean": 4491.1,
|
|
"valid_targets_min": 1050
|
|
},
|
|
{
|
|
"epoch": 4.331474103585657,
|
|
"grad_norm": 0.6421757152001665,
|
|
"learning_rate": 1.5264711590671067e-05,
|
|
"loss": 0.2629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09448320418596268,
|
|
"step": 2720,
|
|
"valid_targets_mean": 2747.0,
|
|
"valid_targets_min": 302
|
|
},
|
|
{
|
|
"epoch": 4.339442231075697,
|
|
"grad_norm": 0.64221293405125,
|
|
"learning_rate": 1.5187593711852653e-05,
|
|
"loss": 0.2698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14570492506027222,
|
|
"step": 2725,
|
|
"valid_targets_mean": 3436.6,
|
|
"valid_targets_min": 248
|
|
},
|
|
{
|
|
"epoch": 4.347410358565737,
|
|
"grad_norm": 0.6063579652481405,
|
|
"learning_rate": 1.5110551706321952e-05,
|
|
"loss": 0.2666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15537774562835693,
|
|
"step": 2730,
|
|
"valid_targets_mean": 5108.0,
|
|
"valid_targets_min": 3796
|
|
},
|
|
{
|
|
"epoch": 4.355378486055777,
|
|
"grad_norm": 0.5627554063810711,
|
|
"learning_rate": 1.5033586788737492e-05,
|
|
"loss": 0.2695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16793255507946014,
|
|
"step": 2735,
|
|
"valid_targets_mean": 6144.2,
|
|
"valid_targets_min": 1486
|
|
},
|
|
{
|
|
"epoch": 4.363346613545817,
|
|
"grad_norm": 0.6693086211462621,
|
|
"learning_rate": 1.495670017254238e-05,
|
|
"loss": 0.267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11684629321098328,
|
|
"step": 2740,
|
|
"valid_targets_mean": 2792.8,
|
|
"valid_targets_min": 248
|
|
},
|
|
{
|
|
"epoch": 4.371314741035857,
|
|
"grad_norm": 0.5520813726926395,
|
|
"learning_rate": 1.487989306994525e-05,
|
|
"loss": 0.2669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14985960721969604,
|
|
"step": 2745,
|
|
"valid_targets_mean": 4697.1,
|
|
"valid_targets_min": 3255
|
|
},
|
|
{
|
|
"epoch": 4.379282868525896,
|
|
"grad_norm": 0.5770594788479113,
|
|
"learning_rate": 1.480316669190108e-05,
|
|
"loss": 0.2706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14154091477394104,
|
|
"step": 2750,
|
|
"valid_targets_mean": 4440.0,
|
|
"valid_targets_min": 1092
|
|
},
|
|
{
|
|
"epoch": 4.387250996015936,
|
|
"grad_norm": 0.6444824733637786,
|
|
"learning_rate": 1.4726522248092132e-05,
|
|
"loss": 0.2623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14305931329727173,
|
|
"step": 2755,
|
|
"valid_targets_mean": 4230.0,
|
|
"valid_targets_min": 646
|
|
},
|
|
{
|
|
"epoch": 4.395219123505976,
|
|
"grad_norm": 0.651398166700227,
|
|
"learning_rate": 1.4649960946908897e-05,
|
|
"loss": 0.2608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13604432344436646,
|
|
"step": 2760,
|
|
"valid_targets_mean": 3322.8,
|
|
"valid_targets_min": 212
|
|
},
|
|
{
|
|
"epoch": 4.403187250996016,
|
|
"grad_norm": 1.0035239212131737,
|
|
"learning_rate": 1.4573483995430992e-05,
|
|
"loss": 0.2626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0961909294128418,
|
|
"step": 2765,
|
|
"valid_targets_mean": 3236.6,
|
|
"valid_targets_min": 320
|
|
},
|
|
{
|
|
"epoch": 4.411155378486056,
|
|
"grad_norm": 0.6149666100569019,
|
|
"learning_rate": 1.4497092599408207e-05,
|
|
"loss": 0.2754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14706659317016602,
|
|
"step": 2770,
|
|
"valid_targets_mean": 3674.4,
|
|
"valid_targets_min": 256
|
|
},
|
|
{
|
|
"epoch": 4.419123505976096,
|
|
"grad_norm": 0.5944075105572708,
|
|
"learning_rate": 1.4420787963241399e-05,
|
|
"loss": 0.2608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14063294231891632,
|
|
"step": 2775,
|
|
"valid_targets_mean": 4041.8,
|
|
"valid_targets_min": 1192
|
|
},
|
|
{
|
|
"epoch": 4.427091633466135,
|
|
"grad_norm": 0.5683592411277911,
|
|
"learning_rate": 1.4344571289963592e-05,
|
|
"loss": 0.2624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12611077725887299,
|
|
"step": 2780,
|
|
"valid_targets_mean": 4117.4,
|
|
"valid_targets_min": 2819
|
|
},
|
|
{
|
|
"epoch": 4.435059760956175,
|
|
"grad_norm": 0.6042269294027321,
|
|
"learning_rate": 1.426844378122095e-05,
|
|
"loss": 0.268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12424126267433167,
|
|
"step": 2785,
|
|
"valid_targets_mean": 3238.1,
|
|
"valid_targets_min": 495
|
|
},
|
|
{
|
|
"epoch": 4.443027888446215,
|
|
"grad_norm": 0.6076612649120969,
|
|
"learning_rate": 1.4192406637253853e-05,
|
|
"loss": 0.2684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15406498312950134,
|
|
"step": 2790,
|
|
"valid_targets_mean": 4695.2,
|
|
"valid_targets_min": 3038
|
|
},
|
|
{
|
|
"epoch": 4.450996015936255,
|
|
"grad_norm": 0.610314242877089,
|
|
"learning_rate": 1.4116461056877986e-05,
|
|
"loss": 0.2685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.139997661113739,
|
|
"step": 2795,
|
|
"valid_targets_mean": 3597.5,
|
|
"valid_targets_min": 971
|
|
},
|
|
{
|
|
"epoch": 4.458964143426295,
|
|
"grad_norm": 0.6407491494730748,
|
|
"learning_rate": 1.4040608237465412e-05,
|
|
"loss": 0.2795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1259598433971405,
|
|
"step": 2800,
|
|
"valid_targets_mean": 3842.8,
|
|
"valid_targets_min": 764
|
|
},
|
|
{
|
|
"epoch": 4.466932270916335,
|
|
"grad_norm": 0.626833320757597,
|
|
"learning_rate": 1.3964849374925712e-05,
|
|
"loss": 0.274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13756072521209717,
|
|
"step": 2805,
|
|
"valid_targets_mean": 3469.4,
|
|
"valid_targets_min": 823
|
|
},
|
|
{
|
|
"epoch": 4.474900398406374,
|
|
"grad_norm": 0.6113636018899783,
|
|
"learning_rate": 1.3889185663687133e-05,
|
|
"loss": 0.2773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16179078817367554,
|
|
"step": 2810,
|
|
"valid_targets_mean": 4718.1,
|
|
"valid_targets_min": 715
|
|
},
|
|
{
|
|
"epoch": 4.482868525896414,
|
|
"grad_norm": 0.6016491127594051,
|
|
"learning_rate": 1.3813618296677734e-05,
|
|
"loss": 0.2793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11874759197235107,
|
|
"step": 2815,
|
|
"valid_targets_mean": 3507.0,
|
|
"valid_targets_min": 1143
|
|
},
|
|
{
|
|
"epoch": 4.490836653386454,
|
|
"grad_norm": 0.587263270292815,
|
|
"learning_rate": 1.3738148465306607e-05,
|
|
"loss": 0.2735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12201830744743347,
|
|
"step": 2820,
|
|
"valid_targets_mean": 3844.5,
|
|
"valid_targets_min": 2024
|
|
},
|
|
{
|
|
"epoch": 4.498804780876494,
|
|
"grad_norm": 0.5440429270288707,
|
|
"learning_rate": 1.3662777359445065e-05,
|
|
"loss": 0.2569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12927262485027313,
|
|
"step": 2825,
|
|
"valid_targets_mean": 4035.0,
|
|
"valid_targets_min": 1042
|
|
},
|
|
{
|
|
"epoch": 4.506772908366534,
|
|
"grad_norm": 0.6806185747715815,
|
|
"learning_rate": 1.3587506167407922e-05,
|
|
"loss": 0.2638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13687384128570557,
|
|
"step": 2830,
|
|
"valid_targets_mean": 3160.6,
|
|
"valid_targets_min": 276
|
|
},
|
|
{
|
|
"epoch": 4.514741035856574,
|
|
"grad_norm": 1.0865846082181532,
|
|
"learning_rate": 1.3512336075934704e-05,
|
|
"loss": 0.2678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1198994517326355,
|
|
"step": 2835,
|
|
"valid_targets_mean": 3477.8,
|
|
"valid_targets_min": 1011
|
|
},
|
|
{
|
|
"epoch": 4.522709163346613,
|
|
"grad_norm": 0.6067100547590222,
|
|
"learning_rate": 1.3437268270170969e-05,
|
|
"loss": 0.2673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1296362578868866,
|
|
"step": 2840,
|
|
"valid_targets_mean": 3557.1,
|
|
"valid_targets_min": 502
|
|
},
|
|
{
|
|
"epoch": 4.530677290836653,
|
|
"grad_norm": 0.6530018117969328,
|
|
"learning_rate": 1.3362303933649648e-05,
|
|
"loss": 0.2857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1688781976699829,
|
|
"step": 2845,
|
|
"valid_targets_mean": 4241.9,
|
|
"valid_targets_min": 1212
|
|
},
|
|
{
|
|
"epoch": 4.538645418326693,
|
|
"grad_norm": 0.5678765737865717,
|
|
"learning_rate": 1.328744424827232e-05,
|
|
"loss": 0.27,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11141425371170044,
|
|
"step": 2850,
|
|
"valid_targets_mean": 3766.9,
|
|
"valid_targets_min": 205
|
|
},
|
|
{
|
|
"epoch": 4.546613545816733,
|
|
"grad_norm": 0.7560314376646058,
|
|
"learning_rate": 1.3212690394290646e-05,
|
|
"loss": 0.264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1437414586544037,
|
|
"step": 2855,
|
|
"valid_targets_mean": 4350.5,
|
|
"valid_targets_min": 749
|
|
},
|
|
{
|
|
"epoch": 4.554581673306773,
|
|
"grad_norm": 0.5845211493049349,
|
|
"learning_rate": 1.3138043550287707e-05,
|
|
"loss": 0.2705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12323597818613052,
|
|
"step": 2860,
|
|
"valid_targets_mean": 3659.0,
|
|
"valid_targets_min": 771
|
|
},
|
|
{
|
|
"epoch": 4.562549800796813,
|
|
"grad_norm": 0.6486812981624971,
|
|
"learning_rate": 1.3063504893159458e-05,
|
|
"loss": 0.2745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1795152723789215,
|
|
"step": 2865,
|
|
"valid_targets_mean": 3804.5,
|
|
"valid_targets_min": 880
|
|
},
|
|
{
|
|
"epoch": 4.570517928286852,
|
|
"grad_norm": 0.6301843093431468,
|
|
"learning_rate": 1.2989075598096148e-05,
|
|
"loss": 0.2701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16830292344093323,
|
|
"step": 2870,
|
|
"valid_targets_mean": 4378.8,
|
|
"valid_targets_min": 3573
|
|
},
|
|
{
|
|
"epoch": 4.578486055776892,
|
|
"grad_norm": 0.6268135224867947,
|
|
"learning_rate": 1.2914756838563816e-05,
|
|
"loss": 0.2681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1396118551492691,
|
|
"step": 2875,
|
|
"valid_targets_mean": 3876.6,
|
|
"valid_targets_min": 1208
|
|
},
|
|
{
|
|
"epoch": 4.586454183266932,
|
|
"grad_norm": 0.5408965385460501,
|
|
"learning_rate": 1.2840549786285776e-05,
|
|
"loss": 0.2693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.170441135764122,
|
|
"step": 2880,
|
|
"valid_targets_mean": 4401.6,
|
|
"valid_targets_min": 967
|
|
},
|
|
{
|
|
"epoch": 4.594422310756972,
|
|
"grad_norm": 0.5579675203394512,
|
|
"learning_rate": 1.2766455611224127e-05,
|
|
"loss": 0.2695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09349963814020157,
|
|
"step": 2885,
|
|
"valid_targets_mean": 3135.0,
|
|
"valid_targets_min": 286
|
|
},
|
|
{
|
|
"epoch": 4.602390438247012,
|
|
"grad_norm": 0.5940088764164068,
|
|
"learning_rate": 1.2692475481561357e-05,
|
|
"loss": 0.2657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1439450979232788,
|
|
"step": 2890,
|
|
"valid_targets_mean": 4247.5,
|
|
"valid_targets_min": 1646
|
|
},
|
|
{
|
|
"epoch": 4.610358565737052,
|
|
"grad_norm": 0.5896375920810674,
|
|
"learning_rate": 1.2618610563681863e-05,
|
|
"loss": 0.2697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14034605026245117,
|
|
"step": 2895,
|
|
"valid_targets_mean": 4082.8,
|
|
"valid_targets_min": 3274
|
|
},
|
|
{
|
|
"epoch": 4.618326693227091,
|
|
"grad_norm": 0.6081159186670604,
|
|
"learning_rate": 1.2544862022153601e-05,
|
|
"loss": 0.2518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1338728368282318,
|
|
"step": 2900,
|
|
"valid_targets_mean": 3608.2,
|
|
"valid_targets_min": 1016
|
|
},
|
|
{
|
|
"epoch": 4.626294820717131,
|
|
"grad_norm": 0.6308840394377083,
|
|
"learning_rate": 1.2471231019709732e-05,
|
|
"loss": 0.2678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18884161114692688,
|
|
"step": 2905,
|
|
"valid_targets_mean": 4549.6,
|
|
"valid_targets_min": 3188
|
|
},
|
|
{
|
|
"epoch": 4.634262948207171,
|
|
"grad_norm": 0.6307301868840895,
|
|
"learning_rate": 1.2397718717230243e-05,
|
|
"loss": 0.267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11760175228118896,
|
|
"step": 2910,
|
|
"valid_targets_mean": 3715.1,
|
|
"valid_targets_min": 324
|
|
},
|
|
{
|
|
"epoch": 4.642231075697211,
|
|
"grad_norm": 0.6637131816811684,
|
|
"learning_rate": 1.2324326273723707e-05,
|
|
"loss": 0.274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13860295712947845,
|
|
"step": 2915,
|
|
"valid_targets_mean": 3125.9,
|
|
"valid_targets_min": 495
|
|
},
|
|
{
|
|
"epoch": 4.650199203187251,
|
|
"grad_norm": 0.7079389576834997,
|
|
"learning_rate": 1.225105484630896e-05,
|
|
"loss": 0.2648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16028892993927002,
|
|
"step": 2920,
|
|
"valid_targets_mean": 3774.5,
|
|
"valid_targets_min": 1091
|
|
},
|
|
{
|
|
"epoch": 4.658167330677291,
|
|
"grad_norm": 0.6342776074519758,
|
|
"learning_rate": 1.2177905590196884e-05,
|
|
"loss": 0.2708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14029663801193237,
|
|
"step": 2925,
|
|
"valid_targets_mean": 3839.9,
|
|
"valid_targets_min": 3242
|
|
},
|
|
{
|
|
"epoch": 4.66613545816733,
|
|
"grad_norm": 0.5830877226753675,
|
|
"learning_rate": 1.2104879658672175e-05,
|
|
"loss": 0.2828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13618014752864838,
|
|
"step": 2930,
|
|
"valid_targets_mean": 3554.6,
|
|
"valid_targets_min": 1261
|
|
},
|
|
{
|
|
"epoch": 4.67410358565737,
|
|
"grad_norm": 0.6253219779106525,
|
|
"learning_rate": 1.2031978203075172e-05,
|
|
"loss": 0.2533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1254906952381134,
|
|
"step": 2935,
|
|
"valid_targets_mean": 3197.5,
|
|
"valid_targets_min": 508
|
|
},
|
|
{
|
|
"epoch": 4.68207171314741,
|
|
"grad_norm": 0.6141152336245717,
|
|
"learning_rate": 1.1959202372783728e-05,
|
|
"loss": 0.2647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13074913620948792,
|
|
"step": 2940,
|
|
"valid_targets_mean": 4284.1,
|
|
"valid_targets_min": 993
|
|
},
|
|
{
|
|
"epoch": 4.69003984063745,
|
|
"grad_norm": 0.6183014945928188,
|
|
"learning_rate": 1.188655331519502e-05,
|
|
"loss": 0.2607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11938630044460297,
|
|
"step": 2945,
|
|
"valid_targets_mean": 3510.5,
|
|
"valid_targets_min": 931
|
|
},
|
|
{
|
|
"epoch": 4.69800796812749,
|
|
"grad_norm": 0.6036958793292667,
|
|
"learning_rate": 1.1814032175707556e-05,
|
|
"loss": 0.2717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12805697321891785,
|
|
"step": 2950,
|
|
"valid_targets_mean": 3902.5,
|
|
"valid_targets_min": 2931
|
|
},
|
|
{
|
|
"epoch": 4.70597609561753,
|
|
"grad_norm": 0.762495220481728,
|
|
"learning_rate": 1.1741640097703018e-05,
|
|
"loss": 0.2671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1260424256324768,
|
|
"step": 2955,
|
|
"valid_targets_mean": 4108.6,
|
|
"valid_targets_min": 280
|
|
},
|
|
{
|
|
"epoch": 4.713944223107569,
|
|
"grad_norm": 0.6123259514957903,
|
|
"learning_rate": 1.1669378222528303e-05,
|
|
"loss": 0.27,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17273962497711182,
|
|
"step": 2960,
|
|
"valid_targets_mean": 4228.9,
|
|
"valid_targets_min": 2193
|
|
},
|
|
{
|
|
"epoch": 4.721912350597609,
|
|
"grad_norm": 0.5751366213718874,
|
|
"learning_rate": 1.1597247689477502e-05,
|
|
"loss": 0.2791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1270749866962433,
|
|
"step": 2965,
|
|
"valid_targets_mean": 3677.0,
|
|
"valid_targets_min": 909
|
|
},
|
|
{
|
|
"epoch": 4.729880478087649,
|
|
"grad_norm": 0.5121925447196681,
|
|
"learning_rate": 1.1525249635773935e-05,
|
|
"loss": 0.266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13193365931510925,
|
|
"step": 2970,
|
|
"valid_targets_mean": 4637.4,
|
|
"valid_targets_min": 2272
|
|
},
|
|
{
|
|
"epoch": 4.737848605577689,
|
|
"grad_norm": 0.5622471227682847,
|
|
"learning_rate": 1.1453385196552247e-05,
|
|
"loss": 0.265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12934821844100952,
|
|
"step": 2975,
|
|
"valid_targets_mean": 3515.5,
|
|
"valid_targets_min": 1559
|
|
},
|
|
{
|
|
"epoch": 4.745816733067729,
|
|
"grad_norm": 0.5589445788982488,
|
|
"learning_rate": 1.1381655504840468e-05,
|
|
"loss": 0.2694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12661080062389374,
|
|
"step": 2980,
|
|
"valid_targets_mean": 3686.2,
|
|
"valid_targets_min": 553
|
|
},
|
|
{
|
|
"epoch": 4.753784860557769,
|
|
"grad_norm": 0.5686016273636768,
|
|
"learning_rate": 1.1310061691542198e-05,
|
|
"loss": 0.2606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10433939844369888,
|
|
"step": 2985,
|
|
"valid_targets_mean": 2660.1,
|
|
"valid_targets_min": 707
|
|
},
|
|
{
|
|
"epoch": 4.761752988047808,
|
|
"grad_norm": 0.62086351110411,
|
|
"learning_rate": 1.1238604885418734e-05,
|
|
"loss": 0.2655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13418859243392944,
|
|
"step": 2990,
|
|
"valid_targets_mean": 3186.5,
|
|
"valid_targets_min": 343
|
|
},
|
|
{
|
|
"epoch": 4.769721115537848,
|
|
"grad_norm": 0.6999014478550488,
|
|
"learning_rate": 1.1167286213071293e-05,
|
|
"loss": 0.2782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17473706603050232,
|
|
"step": 2995,
|
|
"valid_targets_mean": 3881.5,
|
|
"valid_targets_min": 1199
|
|
},
|
|
{
|
|
"epoch": 4.777689243027888,
|
|
"grad_norm": 0.6581397019022929,
|
|
"learning_rate": 1.109610679892327e-05,
|
|
"loss": 0.2693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1315963864326477,
|
|
"step": 3000,
|
|
"valid_targets_mean": 3558.2,
|
|
"valid_targets_min": 277
|
|
},
|
|
{
|
|
"epoch": 4.785657370517928,
|
|
"grad_norm": 0.5138979491365111,
|
|
"learning_rate": 1.102506776520246e-05,
|
|
"loss": 0.2721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13793763518333435,
|
|
"step": 3005,
|
|
"valid_targets_mean": 4831.6,
|
|
"valid_targets_min": 3467
|
|
},
|
|
{
|
|
"epoch": 4.793625498007968,
|
|
"grad_norm": 0.6293574910876729,
|
|
"learning_rate": 1.0954170231923422e-05,
|
|
"loss": 0.2887,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1142679750919342,
|
|
"step": 3010,
|
|
"valid_targets_mean": 2987.9,
|
|
"valid_targets_min": 750
|
|
},
|
|
{
|
|
"epoch": 4.801593625498008,
|
|
"grad_norm": 0.5640515125693804,
|
|
"learning_rate": 1.0883415316869775e-05,
|
|
"loss": 0.2562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12808850407600403,
|
|
"step": 3015,
|
|
"valid_targets_mean": 4490.0,
|
|
"valid_targets_min": 3071
|
|
},
|
|
{
|
|
"epoch": 4.8095617529880474,
|
|
"grad_norm": 0.5351200613662318,
|
|
"learning_rate": 1.0812804135576588e-05,
|
|
"loss": 0.2547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12939012050628662,
|
|
"step": 3020,
|
|
"valid_targets_mean": 4176.9,
|
|
"valid_targets_min": 2148
|
|
},
|
|
{
|
|
"epoch": 4.817529880478087,
|
|
"grad_norm": 0.5478240000204622,
|
|
"learning_rate": 1.0742337801312823e-05,
|
|
"loss": 0.2616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1086738258600235,
|
|
"step": 3025,
|
|
"valid_targets_mean": 3624.8,
|
|
"valid_targets_min": 930
|
|
},
|
|
{
|
|
"epoch": 4.825498007968127,
|
|
"grad_norm": 0.6441739518704989,
|
|
"learning_rate": 1.0672017425063727e-05,
|
|
"loss": 0.2517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15140973031520844,
|
|
"step": 3030,
|
|
"valid_targets_mean": 3325.4,
|
|
"valid_targets_min": 980
|
|
},
|
|
{
|
|
"epoch": 4.833466135458167,
|
|
"grad_norm": 0.5964090477642284,
|
|
"learning_rate": 1.0601844115513376e-05,
|
|
"loss": 0.2756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15871793031692505,
|
|
"step": 3035,
|
|
"valid_targets_mean": 3672.0,
|
|
"valid_targets_min": 244
|
|
},
|
|
{
|
|
"epoch": 4.841434262948207,
|
|
"grad_norm": 0.5476839569514874,
|
|
"learning_rate": 1.0531818979027136e-05,
|
|
"loss": 0.2652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13524594902992249,
|
|
"step": 3040,
|
|
"valid_targets_mean": 4092.1,
|
|
"valid_targets_min": 1444
|
|
},
|
|
{
|
|
"epoch": 4.849402390438247,
|
|
"grad_norm": 0.6237182251210356,
|
|
"learning_rate": 1.0461943119634257e-05,
|
|
"loss": 0.2753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13669374585151672,
|
|
"step": 3045,
|
|
"valid_targets_mean": 3500.8,
|
|
"valid_targets_min": 1034
|
|
},
|
|
{
|
|
"epoch": 4.8573705179282864,
|
|
"grad_norm": 0.6452188999210264,
|
|
"learning_rate": 1.0392217639010478e-05,
|
|
"loss": 0.2781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14306673407554626,
|
|
"step": 3050,
|
|
"valid_targets_mean": 3621.4,
|
|
"valid_targets_min": 1015
|
|
},
|
|
{
|
|
"epoch": 4.865338645418326,
|
|
"grad_norm": 0.556578150084834,
|
|
"learning_rate": 1.0322643636460619e-05,
|
|
"loss": 0.2616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14264307916164398,
|
|
"step": 3055,
|
|
"valid_targets_mean": 4356.8,
|
|
"valid_targets_min": 2297
|
|
},
|
|
{
|
|
"epoch": 4.873306772908366,
|
|
"grad_norm": 0.6371613150664773,
|
|
"learning_rate": 1.0253222208901263e-05,
|
|
"loss": 0.2755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12909969687461853,
|
|
"step": 3060,
|
|
"valid_targets_mean": 3354.9,
|
|
"valid_targets_min": 602
|
|
},
|
|
{
|
|
"epoch": 4.881274900398406,
|
|
"grad_norm": 0.5501207886817936,
|
|
"learning_rate": 1.0183954450843493e-05,
|
|
"loss": 0.2592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14595386385917664,
|
|
"step": 3065,
|
|
"valid_targets_mean": 5109.0,
|
|
"valid_targets_min": 3403
|
|
},
|
|
{
|
|
"epoch": 4.889243027888446,
|
|
"grad_norm": 0.5872788387480288,
|
|
"learning_rate": 1.0114841454375592e-05,
|
|
"loss": 0.2789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14299675822257996,
|
|
"step": 3070,
|
|
"valid_targets_mean": 4005.0,
|
|
"valid_targets_min": 843
|
|
},
|
|
{
|
|
"epoch": 4.897211155378486,
|
|
"grad_norm": 0.577514005160258,
|
|
"learning_rate": 1.0045884309145846e-05,
|
|
"loss": 0.2766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1589789092540741,
|
|
"step": 3075,
|
|
"valid_targets_mean": 4120.2,
|
|
"valid_targets_min": 875
|
|
},
|
|
{
|
|
"epoch": 4.9051792828685254,
|
|
"grad_norm": 0.7255061859176135,
|
|
"learning_rate": 9.97708410234535e-06,
|
|
"loss": 0.2589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10865312069654465,
|
|
"step": 3080,
|
|
"valid_targets_mean": 3547.6,
|
|
"valid_targets_min": 271
|
|
},
|
|
{
|
|
"epoch": 4.913147410358565,
|
|
"grad_norm": 0.6320521664668232,
|
|
"learning_rate": 9.90844191869091e-06,
|
|
"loss": 0.2703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15063154697418213,
|
|
"step": 3085,
|
|
"valid_targets_mean": 4336.4,
|
|
"valid_targets_min": 262
|
|
},
|
|
{
|
|
"epoch": 4.921115537848605,
|
|
"grad_norm": 0.6382363135199773,
|
|
"learning_rate": 9.839958840407873e-06,
|
|
"loss": 0.2787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08313880115747452,
|
|
"step": 3090,
|
|
"valid_targets_mean": 1961.0,
|
|
"valid_targets_min": 290
|
|
},
|
|
{
|
|
"epoch": 4.929083665338645,
|
|
"grad_norm": 0.597234618248688,
|
|
"learning_rate": 9.771635947213135e-06,
|
|
"loss": 0.2743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1442553997039795,
|
|
"step": 3095,
|
|
"valid_targets_mean": 4394.2,
|
|
"valid_targets_min": 3447
|
|
},
|
|
{
|
|
"epoch": 4.937051792828685,
|
|
"grad_norm": 0.5433107080604842,
|
|
"learning_rate": 9.703474316298066e-06,
|
|
"loss": 0.2573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1334853172302246,
|
|
"step": 3100,
|
|
"valid_targets_mean": 4664.4,
|
|
"valid_targets_min": 705
|
|
},
|
|
{
|
|
"epoch": 4.945019920318725,
|
|
"grad_norm": 0.547178955754686,
|
|
"learning_rate": 9.635475022311528e-06,
|
|
"loss": 0.281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09971381723880768,
|
|
"step": 3105,
|
|
"valid_targets_mean": 3355.2,
|
|
"valid_targets_min": 504
|
|
},
|
|
{
|
|
"epoch": 4.9529880478087644,
|
|
"grad_norm": 0.5494820644025523,
|
|
"learning_rate": 9.567639137342997e-06,
|
|
"loss": 0.2698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13351447880268097,
|
|
"step": 3110,
|
|
"valid_targets_mean": 4463.0,
|
|
"valid_targets_min": 1127
|
|
},
|
|
{
|
|
"epoch": 4.960956175298804,
|
|
"grad_norm": 0.5956550952853131,
|
|
"learning_rate": 9.499967730905557e-06,
|
|
"loss": 0.2719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1420593410730362,
|
|
"step": 3115,
|
|
"valid_targets_mean": 3492.9,
|
|
"valid_targets_min": 878
|
|
},
|
|
{
|
|
"epoch": 4.968924302788845,
|
|
"grad_norm": 0.5890941314104784,
|
|
"learning_rate": 9.43246186991914e-06,
|
|
"loss": 0.2641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.107373908162117,
|
|
"step": 3120,
|
|
"valid_targets_mean": 2851.2,
|
|
"valid_targets_min": 581
|
|
},
|
|
{
|
|
"epoch": 4.976892430278885,
|
|
"grad_norm": 0.6330487368510908,
|
|
"learning_rate": 9.365122618693623e-06,
|
|
"loss": 0.2658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14127232134342194,
|
|
"step": 3125,
|
|
"valid_targets_mean": 3870.2,
|
|
"valid_targets_min": 536
|
|
},
|
|
{
|
|
"epoch": 4.984860557768925,
|
|
"grad_norm": 0.5684114548678937,
|
|
"learning_rate": 9.29795103891211e-06,
|
|
"loss": 0.2744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10140170902013779,
|
|
"step": 3130,
|
|
"valid_targets_mean": 3053.9,
|
|
"valid_targets_min": 1016
|
|
},
|
|
{
|
|
"epoch": 4.9928286852589645,
|
|
"grad_norm": 0.5531437463797109,
|
|
"learning_rate": 9.230948189614144e-06,
|
|
"loss": 0.2593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14458362758159637,
|
|
"step": 3135,
|
|
"valid_targets_mean": 3941.5,
|
|
"valid_targets_min": 1107
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.7851376467007583,
|
|
"learning_rate": 9.164115127179038e-06,
|
|
"loss": 0.2681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.245457261800766,
|
|
"step": 3140,
|
|
"valid_targets_mean": 3788.1,
|
|
"valid_targets_min": 692
|
|
},
|
|
{
|
|
"epoch": 5.00796812749004,
|
|
"grad_norm": 0.6120181494813008,
|
|
"learning_rate": 9.09745290530923e-06,
|
|
"loss": 0.2516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13366028666496277,
|
|
"step": 3145,
|
|
"valid_targets_mean": 3292.1,
|
|
"valid_targets_min": 902
|
|
},
|
|
{
|
|
"epoch": 5.01593625498008,
|
|
"grad_norm": 0.6173310357599049,
|
|
"learning_rate": 9.030962575013622e-06,
|
|
"loss": 0.2533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10277126729488373,
|
|
"step": 3150,
|
|
"valid_targets_mean": 2644.2,
|
|
"valid_targets_min": 656
|
|
},
|
|
{
|
|
"epoch": 5.0239043824701195,
|
|
"grad_norm": 0.6320852166475259,
|
|
"learning_rate": 8.964645184591082e-06,
|
|
"loss": 0.2709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1892041563987732,
|
|
"step": 3155,
|
|
"valid_targets_mean": 3817.2,
|
|
"valid_targets_min": 2602
|
|
},
|
|
{
|
|
"epoch": 5.031872509960159,
|
|
"grad_norm": 0.5864932038948057,
|
|
"learning_rate": 8.898501779613842e-06,
|
|
"loss": 0.253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0961039811372757,
|
|
"step": 3160,
|
|
"valid_targets_mean": 2533.5,
|
|
"valid_targets_min": 259
|
|
},
|
|
{
|
|
"epoch": 5.039840637450199,
|
|
"grad_norm": 0.6558564309235504,
|
|
"learning_rate": 8.832533402911056e-06,
|
|
"loss": 0.2524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1114492118358612,
|
|
"step": 3165,
|
|
"valid_targets_mean": 2848.4,
|
|
"valid_targets_min": 264
|
|
},
|
|
{
|
|
"epoch": 5.047808764940239,
|
|
"grad_norm": 0.604799911749898,
|
|
"learning_rate": 8.766741094552368e-06,
|
|
"loss": 0.2507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14596164226531982,
|
|
"step": 3170,
|
|
"valid_targets_mean": 4304.1,
|
|
"valid_targets_min": 2269
|
|
},
|
|
{
|
|
"epoch": 5.055776892430279,
|
|
"grad_norm": 0.6907387417878199,
|
|
"learning_rate": 8.70112589183147e-06,
|
|
"loss": 0.2594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20130327343940735,
|
|
"step": 3175,
|
|
"valid_targets_mean": 4048.9,
|
|
"valid_targets_min": 259
|
|
},
|
|
{
|
|
"epoch": 5.063745019920319,
|
|
"grad_norm": 0.6435296345766328,
|
|
"learning_rate": 8.63568882924979e-06,
|
|
"loss": 0.2596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12666499614715576,
|
|
"step": 3180,
|
|
"valid_targets_mean": 3742.2,
|
|
"valid_targets_min": 610
|
|
},
|
|
{
|
|
"epoch": 5.0717131474103585,
|
|
"grad_norm": 0.6121321847314146,
|
|
"learning_rate": 8.570430938500155e-06,
|
|
"loss": 0.2594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14254778623580933,
|
|
"step": 3185,
|
|
"valid_targets_mean": 4353.9,
|
|
"valid_targets_min": 942
|
|
},
|
|
{
|
|
"epoch": 5.079681274900398,
|
|
"grad_norm": 0.5950951684074194,
|
|
"learning_rate": 8.50535324845055e-06,
|
|
"loss": 0.2487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1303534358739853,
|
|
"step": 3190,
|
|
"valid_targets_mean": 3913.5,
|
|
"valid_targets_min": 1085
|
|
},
|
|
{
|
|
"epoch": 5.087649402390438,
|
|
"grad_norm": 0.6009818742648716,
|
|
"learning_rate": 8.44045678512787e-06,
|
|
"loss": 0.2654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09683337062597275,
|
|
"step": 3195,
|
|
"valid_targets_mean": 2937.6,
|
|
"valid_targets_min": 447
|
|
},
|
|
{
|
|
"epoch": 5.095617529880478,
|
|
"grad_norm": 0.5320908668862425,
|
|
"learning_rate": 8.375742571701755e-06,
|
|
"loss": 0.2634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11519855260848999,
|
|
"step": 3200,
|
|
"valid_targets_mean": 4071.8,
|
|
"valid_targets_min": 3058
|
|
},
|
|
{
|
|
"epoch": 5.103585657370518,
|
|
"grad_norm": 0.633736222964632,
|
|
"learning_rate": 8.311211628468477e-06,
|
|
"loss": 0.256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1808081418275833,
|
|
"step": 3205,
|
|
"valid_targets_mean": 4401.2,
|
|
"valid_targets_min": 3670
|
|
},
|
|
{
|
|
"epoch": 5.111553784860558,
|
|
"grad_norm": 0.6358160265795091,
|
|
"learning_rate": 8.24686497283481e-06,
|
|
"loss": 0.261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11627238988876343,
|
|
"step": 3210,
|
|
"valid_targets_mean": 3284.1,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 5.1195219123505975,
|
|
"grad_norm": 0.6214276392825285,
|
|
"learning_rate": 8.182703619302044e-06,
|
|
"loss": 0.2704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15138496458530426,
|
|
"step": 3215,
|
|
"valid_targets_mean": 4645.5,
|
|
"valid_targets_min": 3657
|
|
},
|
|
{
|
|
"epoch": 5.127490039840637,
|
|
"grad_norm": 0.631697844135404,
|
|
"learning_rate": 8.118728579449937e-06,
|
|
"loss": 0.2675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14449623227119446,
|
|
"step": 3220,
|
|
"valid_targets_mean": 3653.4,
|
|
"valid_targets_min": 936
|
|
},
|
|
{
|
|
"epoch": 5.135458167330677,
|
|
"grad_norm": 0.5634288323882933,
|
|
"learning_rate": 8.054940861920797e-06,
|
|
"loss": 0.2519,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15146976709365845,
|
|
"step": 3225,
|
|
"valid_targets_mean": 5302.8,
|
|
"valid_targets_min": 2097
|
|
},
|
|
{
|
|
"epoch": 5.143426294820717,
|
|
"grad_norm": 0.615565070732101,
|
|
"learning_rate": 7.991341472403593e-06,
|
|
"loss": 0.2599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10391537845134735,
|
|
"step": 3230,
|
|
"valid_targets_mean": 3142.2,
|
|
"valid_targets_min": 911
|
|
},
|
|
{
|
|
"epoch": 5.151394422310757,
|
|
"grad_norm": 0.6303560007127735,
|
|
"learning_rate": 7.927931413618049e-06,
|
|
"loss": 0.2506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13570864498615265,
|
|
"step": 3235,
|
|
"valid_targets_mean": 4128.4,
|
|
"valid_targets_min": 1738
|
|
},
|
|
{
|
|
"epoch": 5.159362549800797,
|
|
"grad_norm": 0.6195753953084229,
|
|
"learning_rate": 7.864711685298894e-06,
|
|
"loss": 0.2629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12660765647888184,
|
|
"step": 3240,
|
|
"valid_targets_mean": 3844.6,
|
|
"valid_targets_min": 1092
|
|
},
|
|
{
|
|
"epoch": 5.1673306772908365,
|
|
"grad_norm": 0.6023833932451614,
|
|
"learning_rate": 7.80168328418005e-06,
|
|
"loss": 0.2526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1314094513654709,
|
|
"step": 3245,
|
|
"valid_targets_mean": 4262.2,
|
|
"valid_targets_min": 3061
|
|
},
|
|
{
|
|
"epoch": 5.175298804780876,
|
|
"grad_norm": 0.6369710896263531,
|
|
"learning_rate": 7.738847203978947e-06,
|
|
"loss": 0.2422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14783096313476562,
|
|
"step": 3250,
|
|
"valid_targets_mean": 3945.1,
|
|
"valid_targets_min": 913
|
|
},
|
|
{
|
|
"epoch": 5.183266932270916,
|
|
"grad_norm": 0.6290959951478746,
|
|
"learning_rate": 7.676204435380858e-06,
|
|
"loss": 0.2598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13241353631019592,
|
|
"step": 3255,
|
|
"valid_targets_mean": 4444.0,
|
|
"valid_targets_min": 2618
|
|
},
|
|
{
|
|
"epoch": 5.191235059760956,
|
|
"grad_norm": 0.5855838407019288,
|
|
"learning_rate": 7.613755966023249e-06,
|
|
"loss": 0.2628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13430337607860565,
|
|
"step": 3260,
|
|
"valid_targets_mean": 3815.6,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 5.199203187250996,
|
|
"grad_norm": 0.6121148914572989,
|
|
"learning_rate": 7.551502780480251e-06,
|
|
"loss": 0.253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14205516874790192,
|
|
"step": 3265,
|
|
"valid_targets_mean": 4030.8,
|
|
"valid_targets_min": 1027
|
|
},
|
|
{
|
|
"epoch": 5.207171314741036,
|
|
"grad_norm": 0.6127443563266214,
|
|
"learning_rate": 7.4894458602470886e-06,
|
|
"loss": 0.2529,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14408063888549805,
|
|
"step": 3270,
|
|
"valid_targets_mean": 4403.8,
|
|
"valid_targets_min": 453
|
|
},
|
|
{
|
|
"epoch": 5.2151394422310755,
|
|
"grad_norm": 0.5652079759756923,
|
|
"learning_rate": 7.427586183724662e-06,
|
|
"loss": 0.2668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12766605615615845,
|
|
"step": 3275,
|
|
"valid_targets_mean": 4511.5,
|
|
"valid_targets_min": 278
|
|
},
|
|
{
|
|
"epoch": 5.223107569721115,
|
|
"grad_norm": 0.5260422761564271,
|
|
"learning_rate": 7.365924726204063e-06,
|
|
"loss": 0.2601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0982002541422844,
|
|
"step": 3280,
|
|
"valid_targets_mean": 3441.4,
|
|
"valid_targets_min": 1126
|
|
},
|
|
{
|
|
"epoch": 5.231075697211155,
|
|
"grad_norm": 0.6101008470360397,
|
|
"learning_rate": 7.3044624598512406e-06,
|
|
"loss": 0.256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12030519545078278,
|
|
"step": 3285,
|
|
"valid_targets_mean": 4636.6,
|
|
"valid_targets_min": 3069
|
|
},
|
|
{
|
|
"epoch": 5.239043824701195,
|
|
"grad_norm": 0.6293104823597632,
|
|
"learning_rate": 7.243200353691653e-06,
|
|
"loss": 0.2601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1115085631608963,
|
|
"step": 3290,
|
|
"valid_targets_mean": 3239.8,
|
|
"valid_targets_min": 885
|
|
},
|
|
{
|
|
"epoch": 5.247011952191235,
|
|
"grad_norm": 1.0769677052879854,
|
|
"learning_rate": 7.18213937359499e-06,
|
|
"loss": 0.2648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11736782640218735,
|
|
"step": 3295,
|
|
"valid_targets_mean": 4504.4,
|
|
"valid_targets_min": 862
|
|
},
|
|
{
|
|
"epoch": 5.254980079681275,
|
|
"grad_norm": 0.6204834440821602,
|
|
"learning_rate": 7.121280482259976e-06,
|
|
"loss": 0.2584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13409659266471863,
|
|
"step": 3300,
|
|
"valid_targets_mean": 3729.5,
|
|
"valid_targets_min": 529
|
|
},
|
|
{
|
|
"epoch": 5.2629482071713145,
|
|
"grad_norm": 0.6292985982003821,
|
|
"learning_rate": 7.060624639199138e-06,
|
|
"loss": 0.2588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12875115871429443,
|
|
"step": 3305,
|
|
"valid_targets_mean": 3483.2,
|
|
"valid_targets_min": 1078
|
|
},
|
|
{
|
|
"epoch": 5.270916334661354,
|
|
"grad_norm": 0.5638927036364227,
|
|
"learning_rate": 7.000172800723715e-06,
|
|
"loss": 0.2562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13111388683319092,
|
|
"step": 3310,
|
|
"valid_targets_mean": 3953.2,
|
|
"valid_targets_min": 1557
|
|
},
|
|
{
|
|
"epoch": 5.278884462151394,
|
|
"grad_norm": 0.5207585707244314,
|
|
"learning_rate": 6.939925919928585e-06,
|
|
"loss": 0.2541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11312989145517349,
|
|
"step": 3315,
|
|
"valid_targets_mean": 3930.0,
|
|
"valid_targets_min": 1127
|
|
},
|
|
{
|
|
"epoch": 5.286852589641434,
|
|
"grad_norm": 0.6308579300090001,
|
|
"learning_rate": 6.879884946677205e-06,
|
|
"loss": 0.2469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12059182673692703,
|
|
"step": 3320,
|
|
"valid_targets_mean": 3784.1,
|
|
"valid_targets_min": 1289
|
|
},
|
|
{
|
|
"epoch": 5.294820717131474,
|
|
"grad_norm": 0.7137019369582966,
|
|
"learning_rate": 6.8200508275866726e-06,
|
|
"loss": 0.262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13358832895755768,
|
|
"step": 3325,
|
|
"valid_targets_mean": 2928.1,
|
|
"valid_targets_min": 282
|
|
},
|
|
{
|
|
"epoch": 5.302788844621514,
|
|
"grad_norm": 0.7600953451448714,
|
|
"learning_rate": 6.76042450601277e-06,
|
|
"loss": 0.2589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.135623961687088,
|
|
"step": 3330,
|
|
"valid_targets_mean": 2927.5,
|
|
"valid_targets_min": 809
|
|
},
|
|
{
|
|
"epoch": 5.3107569721115535,
|
|
"grad_norm": 0.5930150460883772,
|
|
"learning_rate": 6.701006922035125e-06,
|
|
"loss": 0.261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1559372842311859,
|
|
"step": 3335,
|
|
"valid_targets_mean": 4291.8,
|
|
"valid_targets_min": 539
|
|
},
|
|
{
|
|
"epoch": 5.318725099601593,
|
|
"grad_norm": 0.6303787749793871,
|
|
"learning_rate": 6.641799012442349e-06,
|
|
"loss": 0.2547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11322662234306335,
|
|
"step": 3340,
|
|
"valid_targets_mean": 3000.9,
|
|
"valid_targets_min": 843
|
|
},
|
|
{
|
|
"epoch": 5.326693227091633,
|
|
"grad_norm": 0.6274733616364941,
|
|
"learning_rate": 6.582801710717291e-06,
|
|
"loss": 0.2505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14976182579994202,
|
|
"step": 3345,
|
|
"valid_targets_mean": 4425.8,
|
|
"valid_targets_min": 3556
|
|
},
|
|
{
|
|
"epoch": 5.334661354581673,
|
|
"grad_norm": 0.6404124255568916,
|
|
"learning_rate": 6.524015947022333e-06,
|
|
"loss": 0.2572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14457091689109802,
|
|
"step": 3350,
|
|
"valid_targets_mean": 3645.4,
|
|
"valid_targets_min": 473
|
|
},
|
|
{
|
|
"epoch": 5.342629482071713,
|
|
"grad_norm": 0.7056415948297033,
|
|
"learning_rate": 6.465442648184692e-06,
|
|
"loss": 0.2555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10674829035997391,
|
|
"step": 3355,
|
|
"valid_targets_mean": 4155.5,
|
|
"valid_targets_min": 589
|
|
},
|
|
{
|
|
"epoch": 5.350597609561753,
|
|
"grad_norm": 0.713178600135085,
|
|
"learning_rate": 6.4070827376818424e-06,
|
|
"loss": 0.2548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14119845628738403,
|
|
"step": 3360,
|
|
"valid_targets_mean": 4175.0,
|
|
"valid_targets_min": 3516
|
|
},
|
|
{
|
|
"epoch": 5.3585657370517925,
|
|
"grad_norm": 0.5363588089502721,
|
|
"learning_rate": 6.348937135626922e-06,
|
|
"loss": 0.2566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13907405734062195,
|
|
"step": 3365,
|
|
"valid_targets_mean": 4721.6,
|
|
"valid_targets_min": 876
|
|
},
|
|
{
|
|
"epoch": 5.366533864541832,
|
|
"grad_norm": 0.584369676262613,
|
|
"learning_rate": 6.291006758754241e-06,
|
|
"loss": 0.2511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14466825127601624,
|
|
"step": 3370,
|
|
"valid_targets_mean": 3820.5,
|
|
"valid_targets_min": 533
|
|
},
|
|
{
|
|
"epoch": 5.374501992031872,
|
|
"grad_norm": 0.5681324843263479,
|
|
"learning_rate": 6.233292520404852e-06,
|
|
"loss": 0.2492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12271925806999207,
|
|
"step": 3375,
|
|
"valid_targets_mean": 4781.9,
|
|
"valid_targets_min": 2814
|
|
},
|
|
{
|
|
"epoch": 5.382470119521912,
|
|
"grad_norm": 0.5777887543850055,
|
|
"learning_rate": 6.1757953305120975e-06,
|
|
"loss": 0.2552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12985867261886597,
|
|
"step": 3380,
|
|
"valid_targets_mean": 3952.9,
|
|
"valid_targets_min": 316
|
|
},
|
|
{
|
|
"epoch": 5.390438247011952,
|
|
"grad_norm": 0.5530887525834984,
|
|
"learning_rate": 6.118516095587321e-06,
|
|
"loss": 0.2519,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10672642290592194,
|
|
"step": 3385,
|
|
"valid_targets_mean": 4004.9,
|
|
"valid_targets_min": 2262
|
|
},
|
|
{
|
|
"epoch": 5.398406374501992,
|
|
"grad_norm": 0.6313478636714507,
|
|
"learning_rate": 6.0614557187055335e-06,
|
|
"loss": 0.2611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11622016131877899,
|
|
"step": 3390,
|
|
"valid_targets_mean": 3472.5,
|
|
"valid_targets_min": 385
|
|
},
|
|
{
|
|
"epoch": 5.4063745019920315,
|
|
"grad_norm": 0.61809329445396,
|
|
"learning_rate": 6.004615099491189e-06,
|
|
"loss": 0.2629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13471439480781555,
|
|
"step": 3395,
|
|
"valid_targets_mean": 3704.6,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 5.414342629482071,
|
|
"grad_norm": 0.7352906677939539,
|
|
"learning_rate": 5.947995134103999e-06,
|
|
"loss": 0.2553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08384540677070618,
|
|
"step": 3400,
|
|
"valid_targets_mean": 2533.5,
|
|
"valid_targets_min": 188
|
|
},
|
|
{
|
|
"epoch": 5.422310756972111,
|
|
"grad_norm": 0.618641096906809,
|
|
"learning_rate": 5.891596715224821e-06,
|
|
"loss": 0.2504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16095472872257233,
|
|
"step": 3405,
|
|
"valid_targets_mean": 4694.6,
|
|
"valid_targets_min": 2258
|
|
},
|
|
{
|
|
"epoch": 5.430278884462151,
|
|
"grad_norm": 0.6816089290330103,
|
|
"learning_rate": 5.835420732041557e-06,
|
|
"loss": 0.2537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1280200034379959,
|
|
"step": 3410,
|
|
"valid_targets_mean": 3855.4,
|
|
"valid_targets_min": 278
|
|
},
|
|
{
|
|
"epoch": 5.438247011952191,
|
|
"grad_norm": 0.6971808790211675,
|
|
"learning_rate": 5.779468070235139e-06,
|
|
"loss": 0.2587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1313139945268631,
|
|
"step": 3415,
|
|
"valid_targets_mean": 3432.0,
|
|
"valid_targets_min": 864
|
|
},
|
|
{
|
|
"epoch": 5.446215139442231,
|
|
"grad_norm": 0.6032503774011648,
|
|
"learning_rate": 5.7237396119655995e-06,
|
|
"loss": 0.2632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13556715846061707,
|
|
"step": 3420,
|
|
"valid_targets_mean": 4342.9,
|
|
"valid_targets_min": 3353
|
|
},
|
|
{
|
|
"epoch": 5.4541832669322705,
|
|
"grad_norm": 0.6273513927469573,
|
|
"learning_rate": 5.668236235858109e-06,
|
|
"loss": 0.2558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12618348002433777,
|
|
"step": 3425,
|
|
"valid_targets_mean": 2915.1,
|
|
"valid_targets_min": 603
|
|
},
|
|
{
|
|
"epoch": 5.46215139442231,
|
|
"grad_norm": 0.6004260835295273,
|
|
"learning_rate": 5.61295881698916e-06,
|
|
"loss": 0.2576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13639703392982483,
|
|
"step": 3430,
|
|
"valid_targets_mean": 4461.4,
|
|
"valid_targets_min": 3533
|
|
},
|
|
{
|
|
"epoch": 5.47011952191235,
|
|
"grad_norm": 0.7645690026214497,
|
|
"learning_rate": 5.557908226872775e-06,
|
|
"loss": 0.2639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12604597210884094,
|
|
"step": 3435,
|
|
"valid_targets_mean": 3385.5,
|
|
"valid_targets_min": 451
|
|
},
|
|
{
|
|
"epoch": 5.47808764940239,
|
|
"grad_norm": 0.6086838874219482,
|
|
"learning_rate": 5.503085333446727e-06,
|
|
"loss": 0.2468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1220838874578476,
|
|
"step": 3440,
|
|
"valid_targets_mean": 3682.1,
|
|
"valid_targets_min": 1168
|
|
},
|
|
{
|
|
"epoch": 5.48605577689243,
|
|
"grad_norm": 0.606693439874937,
|
|
"learning_rate": 5.448491001058909e-06,
|
|
"loss": 0.2572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14827896654605865,
|
|
"step": 3445,
|
|
"valid_targets_mean": 4212.6,
|
|
"valid_targets_min": 1011
|
|
},
|
|
{
|
|
"epoch": 5.49402390438247,
|
|
"grad_norm": 0.7063285262397214,
|
|
"learning_rate": 5.394126090453655e-06,
|
|
"loss": 0.2592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1105622798204422,
|
|
"step": 3450,
|
|
"valid_targets_mean": 2606.2,
|
|
"valid_targets_min": 242
|
|
},
|
|
{
|
|
"epoch": 5.5019920318725095,
|
|
"grad_norm": 0.5777296608486836,
|
|
"learning_rate": 5.3399914587582e-06,
|
|
"loss": 0.2582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15368403494358063,
|
|
"step": 3455,
|
|
"valid_targets_mean": 5300.1,
|
|
"valid_targets_min": 3578
|
|
},
|
|
{
|
|
"epoch": 5.509960159362549,
|
|
"grad_norm": 0.6146095361868158,
|
|
"learning_rate": 5.286087959469168e-06,
|
|
"loss": 0.2649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14480501413345337,
|
|
"step": 3460,
|
|
"valid_targets_mean": 4302.0,
|
|
"valid_targets_min": 1635
|
|
},
|
|
{
|
|
"epoch": 5.517928286852589,
|
|
"grad_norm": 0.6012435923967533,
|
|
"learning_rate": 5.232416442439092e-06,
|
|
"loss": 0.2556,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14101041853427887,
|
|
"step": 3465,
|
|
"valid_targets_mean": 4215.5,
|
|
"valid_targets_min": 2317
|
|
},
|
|
{
|
|
"epoch": 5.525896414342629,
|
|
"grad_norm": 0.5611584475868379,
|
|
"learning_rate": 5.178977753863048e-06,
|
|
"loss": 0.2533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09993009269237518,
|
|
"step": 3470,
|
|
"valid_targets_mean": 3897.8,
|
|
"valid_targets_min": 206
|
|
},
|
|
{
|
|
"epoch": 5.533864541832669,
|
|
"grad_norm": 0.5563902771441435,
|
|
"learning_rate": 5.125772736265271e-06,
|
|
"loss": 0.2545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13861241936683655,
|
|
"step": 3475,
|
|
"valid_targets_mean": 4227.2,
|
|
"valid_targets_min": 922
|
|
},
|
|
{
|
|
"epoch": 5.541832669322709,
|
|
"grad_norm": 0.8791159048380199,
|
|
"learning_rate": 5.072802228485925e-06,
|
|
"loss": 0.2543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13441188633441925,
|
|
"step": 3480,
|
|
"valid_targets_mean": 3450.5,
|
|
"valid_targets_min": 256
|
|
},
|
|
{
|
|
"epoch": 5.5498007968127485,
|
|
"grad_norm": 0.5434635127655687,
|
|
"learning_rate": 5.020067065667826e-06,
|
|
"loss": 0.2639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1299361139535904,
|
|
"step": 3485,
|
|
"valid_targets_mean": 4664.4,
|
|
"valid_targets_min": 1080
|
|
},
|
|
{
|
|
"epoch": 5.557768924302788,
|
|
"grad_norm": 0.5898325853645024,
|
|
"learning_rate": 4.967568079243301e-06,
|
|
"loss": 0.262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12370647490024567,
|
|
"step": 3490,
|
|
"valid_targets_mean": 4088.5,
|
|
"valid_targets_min": 2095
|
|
},
|
|
{
|
|
"epoch": 5.565737051792829,
|
|
"grad_norm": 0.5996111126593591,
|
|
"learning_rate": 4.915306096921093e-06,
|
|
"loss": 0.2691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1425367295742035,
|
|
"step": 3495,
|
|
"valid_targets_mean": 3613.0,
|
|
"valid_targets_min": 539
|
|
},
|
|
{
|
|
"epoch": 5.573705179282869,
|
|
"grad_norm": 0.5839869424101981,
|
|
"learning_rate": 4.8632819426732705e-06,
|
|
"loss": 0.2521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13066698610782623,
|
|
"step": 3500,
|
|
"valid_targets_mean": 3647.0,
|
|
"valid_targets_min": 579
|
|
},
|
|
{
|
|
"epoch": 5.581673306772909,
|
|
"grad_norm": 0.6287370374618291,
|
|
"learning_rate": 4.811496436722285e-06,
|
|
"loss": 0.2618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10785184800624847,
|
|
"step": 3505,
|
|
"valid_targets_mean": 2965.1,
|
|
"valid_targets_min": 245
|
|
},
|
|
{
|
|
"epoch": 5.589641434262949,
|
|
"grad_norm": 0.5892107222191396,
|
|
"learning_rate": 4.7599503955279945e-06,
|
|
"loss": 0.2454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13893276453018188,
|
|
"step": 3510,
|
|
"valid_targets_mean": 4028.8,
|
|
"valid_targets_min": 3131
|
|
},
|
|
{
|
|
"epoch": 5.597609561752988,
|
|
"grad_norm": 0.6003581198787067,
|
|
"learning_rate": 4.708644631774819e-06,
|
|
"loss": 0.255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1262696385383606,
|
|
"step": 3515,
|
|
"valid_targets_mean": 3744.4,
|
|
"valid_targets_min": 1134
|
|
},
|
|
{
|
|
"epoch": 5.605577689243028,
|
|
"grad_norm": 0.5806890541188977,
|
|
"learning_rate": 4.657579954358924e-06,
|
|
"loss": 0.2599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15570898354053497,
|
|
"step": 3520,
|
|
"valid_targets_mean": 4482.2,
|
|
"valid_targets_min": 705
|
|
},
|
|
{
|
|
"epoch": 5.613545816733068,
|
|
"grad_norm": 0.6051899531594201,
|
|
"learning_rate": 4.606757168375451e-06,
|
|
"loss": 0.2564,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12347070872783661,
|
|
"step": 3525,
|
|
"valid_targets_mean": 3937.1,
|
|
"valid_targets_min": 780
|
|
},
|
|
{
|
|
"epoch": 5.621513944223108,
|
|
"grad_norm": 0.6648354607135956,
|
|
"learning_rate": 4.556177075105857e-06,
|
|
"loss": 0.2587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10443254560232162,
|
|
"step": 3530,
|
|
"valid_targets_mean": 2838.1,
|
|
"valid_targets_min": 242
|
|
},
|
|
{
|
|
"epoch": 5.629482071713148,
|
|
"grad_norm": 0.8188241704628644,
|
|
"learning_rate": 4.505840472005236e-06,
|
|
"loss": 0.2525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11551107466220856,
|
|
"step": 3535,
|
|
"valid_targets_mean": 3775.8,
|
|
"valid_targets_min": 1049
|
|
},
|
|
{
|
|
"epoch": 5.637450199203188,
|
|
"grad_norm": 0.6174445040965998,
|
|
"learning_rate": 4.455748152689796e-06,
|
|
"loss": 0.2712,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1448177695274353,
|
|
"step": 3540,
|
|
"valid_targets_mean": 4228.5,
|
|
"valid_targets_min": 486
|
|
},
|
|
{
|
|
"epoch": 5.645418326693227,
|
|
"grad_norm": 0.582006297269565,
|
|
"learning_rate": 4.405900906924303e-06,
|
|
"loss": 0.2592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11946921050548553,
|
|
"step": 3545,
|
|
"valid_targets_mean": 3386.0,
|
|
"valid_targets_min": 232
|
|
},
|
|
{
|
|
"epoch": 5.653386454183267,
|
|
"grad_norm": 0.6235145696587125,
|
|
"learning_rate": 4.35629952060965e-06,
|
|
"loss": 0.2576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11263646185398102,
|
|
"step": 3550,
|
|
"valid_targets_mean": 3223.4,
|
|
"valid_targets_min": 755
|
|
},
|
|
{
|
|
"epoch": 5.661354581673307,
|
|
"grad_norm": 0.7206037687758406,
|
|
"learning_rate": 4.306944775770479e-06,
|
|
"loss": 0.2576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09978772699832916,
|
|
"step": 3555,
|
|
"valid_targets_mean": 2395.9,
|
|
"valid_targets_min": 273
|
|
},
|
|
{
|
|
"epoch": 5.669322709163347,
|
|
"grad_norm": 0.6638192351502472,
|
|
"learning_rate": 4.2578374505428145e-06,
|
|
"loss": 0.2589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14399829506874084,
|
|
"step": 3560,
|
|
"valid_targets_mean": 2907.1,
|
|
"valid_targets_min": 268
|
|
},
|
|
{
|
|
"epoch": 5.677290836653387,
|
|
"grad_norm": 0.5882996503712633,
|
|
"learning_rate": 4.208978319161843e-06,
|
|
"loss": 0.2556,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12330938130617142,
|
|
"step": 3565,
|
|
"valid_targets_mean": 3894.4,
|
|
"valid_targets_min": 555
|
|
},
|
|
{
|
|
"epoch": 5.685258964143427,
|
|
"grad_norm": 0.6543040309553202,
|
|
"learning_rate": 4.160368151949659e-06,
|
|
"loss": 0.2572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1259610503911972,
|
|
"step": 3570,
|
|
"valid_targets_mean": 3302.6,
|
|
"valid_targets_min": 589
|
|
},
|
|
{
|
|
"epoch": 5.693227091633466,
|
|
"grad_norm": 0.6143364204199971,
|
|
"learning_rate": 4.112007715303148e-06,
|
|
"loss": 0.2632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09602335095405579,
|
|
"step": 3575,
|
|
"valid_targets_mean": 2796.8,
|
|
"valid_targets_min": 273
|
|
},
|
|
{
|
|
"epoch": 5.701195219123506,
|
|
"grad_norm": 0.6206784374022006,
|
|
"learning_rate": 4.0638977716819105e-06,
|
|
"loss": 0.2552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13594505190849304,
|
|
"step": 3580,
|
|
"valid_targets_mean": 3717.2,
|
|
"valid_targets_min": 1244
|
|
},
|
|
{
|
|
"epoch": 5.709163346613546,
|
|
"grad_norm": 0.5615628549123262,
|
|
"learning_rate": 4.016039079596204e-06,
|
|
"loss": 0.2442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10886578261852264,
|
|
"step": 3585,
|
|
"valid_targets_mean": 3321.4,
|
|
"valid_targets_min": 1020
|
|
},
|
|
{
|
|
"epoch": 5.717131474103586,
|
|
"grad_norm": 0.5662573225062353,
|
|
"learning_rate": 3.968432393595034e-06,
|
|
"loss": 0.2607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13004595041275024,
|
|
"step": 3590,
|
|
"valid_targets_mean": 4096.1,
|
|
"valid_targets_min": 328
|
|
},
|
|
{
|
|
"epoch": 5.725099601593626,
|
|
"grad_norm": 0.5837892365616794,
|
|
"learning_rate": 3.921078464254204e-06,
|
|
"loss": 0.2651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13288232684135437,
|
|
"step": 3595,
|
|
"valid_targets_mean": 4558.6,
|
|
"valid_targets_min": 3170
|
|
},
|
|
{
|
|
"epoch": 5.733067729083666,
|
|
"grad_norm": 0.6089077319301412,
|
|
"learning_rate": 3.873978038164537e-06,
|
|
"loss": 0.2568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09915770590305328,
|
|
"step": 3600,
|
|
"valid_targets_mean": 3429.9,
|
|
"valid_targets_min": 200
|
|
},
|
|
{
|
|
"epoch": 5.741035856573705,
|
|
"grad_norm": 0.6917121360743607,
|
|
"learning_rate": 3.8271318579200565e-06,
|
|
"loss": 0.2501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1068388819694519,
|
|
"step": 3605,
|
|
"valid_targets_mean": 2564.9,
|
|
"valid_targets_min": 246
|
|
},
|
|
{
|
|
"epoch": 5.749003984063745,
|
|
"grad_norm": 0.6843480696806137,
|
|
"learning_rate": 3.780540662106302e-06,
|
|
"loss": 0.2651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14033851027488708,
|
|
"step": 3610,
|
|
"valid_targets_mean": 3091.6,
|
|
"valid_targets_min": 254
|
|
},
|
|
{
|
|
"epoch": 5.756972111553785,
|
|
"grad_norm": 0.6003046764796326,
|
|
"learning_rate": 3.734205185288693e-06,
|
|
"loss": 0.2631,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11629563570022583,
|
|
"step": 3615,
|
|
"valid_targets_mean": 3680.1,
|
|
"valid_targets_min": 228
|
|
},
|
|
{
|
|
"epoch": 5.764940239043825,
|
|
"grad_norm": 0.6694111442962223,
|
|
"learning_rate": 3.6881261580009242e-06,
|
|
"loss": 0.2483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1048082709312439,
|
|
"step": 3620,
|
|
"valid_targets_mean": 2768.4,
|
|
"valid_targets_min": 555
|
|
},
|
|
{
|
|
"epoch": 5.772908366533865,
|
|
"grad_norm": 0.6953453592075824,
|
|
"learning_rate": 3.642304306733464e-06,
|
|
"loss": 0.271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11646781116724014,
|
|
"step": 3625,
|
|
"valid_targets_mean": 3290.4,
|
|
"valid_targets_min": 456
|
|
},
|
|
{
|
|
"epoch": 5.780876494023905,
|
|
"grad_norm": 0.6599195373597444,
|
|
"learning_rate": 3.596740353922088e-06,
|
|
"loss": 0.2613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16503630578517914,
|
|
"step": 3630,
|
|
"valid_targets_mean": 4059.8,
|
|
"valid_targets_min": 517
|
|
},
|
|
{
|
|
"epoch": 5.788844621513944,
|
|
"grad_norm": 0.6682518386497261,
|
|
"learning_rate": 3.5514350179365176e-06,
|
|
"loss": 0.2499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10960632562637329,
|
|
"step": 3635,
|
|
"valid_targets_mean": 3265.4,
|
|
"valid_targets_min": 258
|
|
},
|
|
{
|
|
"epoch": 5.796812749003984,
|
|
"grad_norm": 0.601595786597232,
|
|
"learning_rate": 3.5063890130690513e-06,
|
|
"loss": 0.2551,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15612296760082245,
|
|
"step": 3640,
|
|
"valid_targets_mean": 4444.1,
|
|
"valid_targets_min": 3802
|
|
},
|
|
{
|
|
"epoch": 5.804780876494024,
|
|
"grad_norm": 0.6496112640493846,
|
|
"learning_rate": 3.461603049523334e-06,
|
|
"loss": 0.2455,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10309451818466187,
|
|
"step": 3645,
|
|
"valid_targets_mean": 3254.8,
|
|
"valid_targets_min": 262
|
|
},
|
|
{
|
|
"epoch": 5.812749003984064,
|
|
"grad_norm": 0.6288915508343855,
|
|
"learning_rate": 3.4170778334031595e-06,
|
|
"loss": 0.2585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16654735803604126,
|
|
"step": 3650,
|
|
"valid_targets_mean": 3907.5,
|
|
"valid_targets_min": 847
|
|
},
|
|
{
|
|
"epoch": 5.820717131474104,
|
|
"grad_norm": 0.6961504839578929,
|
|
"learning_rate": 3.3728140667013155e-06,
|
|
"loss": 0.2667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13372699916362762,
|
|
"step": 3655,
|
|
"valid_targets_mean": 3155.8,
|
|
"valid_targets_min": 363
|
|
},
|
|
{
|
|
"epoch": 5.828685258964144,
|
|
"grad_norm": 0.6223010800317257,
|
|
"learning_rate": 3.3288124472885318e-06,
|
|
"loss": 0.2593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15059836208820343,
|
|
"step": 3660,
|
|
"valid_targets_mean": 3522.4,
|
|
"valid_targets_min": 809
|
|
},
|
|
{
|
|
"epoch": 5.836653386454183,
|
|
"grad_norm": 0.6939497712021372,
|
|
"learning_rate": 3.2850736689024877e-06,
|
|
"loss": 0.2554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1575852632522583,
|
|
"step": 3665,
|
|
"valid_targets_mean": 3594.9,
|
|
"valid_targets_min": 522
|
|
},
|
|
{
|
|
"epoch": 5.844621513944223,
|
|
"grad_norm": 0.5940310626609997,
|
|
"learning_rate": 3.2415984211368446e-06,
|
|
"loss": 0.2603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11697559058666229,
|
|
"step": 3670,
|
|
"valid_targets_mean": 3811.5,
|
|
"valid_targets_min": 274
|
|
},
|
|
{
|
|
"epoch": 5.852589641434263,
|
|
"grad_norm": 0.6140475487399377,
|
|
"learning_rate": 3.1983873894304105e-06,
|
|
"loss": 0.2491,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1458444744348526,
|
|
"step": 3675,
|
|
"valid_targets_mean": 4887.2,
|
|
"valid_targets_min": 3166
|
|
},
|
|
{
|
|
"epoch": 5.860557768924303,
|
|
"grad_norm": 0.6879843367074118,
|
|
"learning_rate": 3.1554412550562952e-06,
|
|
"loss": 0.255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1462876796722412,
|
|
"step": 3680,
|
|
"valid_targets_mean": 4436.4,
|
|
"valid_targets_min": 3259
|
|
},
|
|
{
|
|
"epoch": 5.868525896414343,
|
|
"grad_norm": 0.5953728261449194,
|
|
"learning_rate": 3.1127606951112056e-06,
|
|
"loss": 0.2602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16630247235298157,
|
|
"step": 3685,
|
|
"valid_targets_mean": 4529.9,
|
|
"valid_targets_min": 722
|
|
},
|
|
{
|
|
"epoch": 5.876494023904383,
|
|
"grad_norm": 0.5944339968513348,
|
|
"learning_rate": 3.070346382504743e-06,
|
|
"loss": 0.2549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15230220556259155,
|
|
"step": 3690,
|
|
"valid_targets_mean": 4477.9,
|
|
"valid_targets_min": 3111
|
|
},
|
|
{
|
|
"epoch": 5.884462151394422,
|
|
"grad_norm": 0.5846695073715269,
|
|
"learning_rate": 3.028198985948796e-06,
|
|
"loss": 0.2671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12972813844680786,
|
|
"step": 3695,
|
|
"valid_targets_mean": 4184.8,
|
|
"valid_targets_min": 3228
|
|
},
|
|
{
|
|
"epoch": 5.892430278884462,
|
|
"grad_norm": 0.86917738581549,
|
|
"learning_rate": 2.9863191699470295e-06,
|
|
"loss": 0.2644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11308373510837555,
|
|
"step": 3700,
|
|
"valid_targets_mean": 2976.0,
|
|
"valid_targets_min": 308
|
|
},
|
|
{
|
|
"epoch": 5.900398406374502,
|
|
"grad_norm": 0.5774890125931904,
|
|
"learning_rate": 2.9447075947843573e-06,
|
|
"loss": 0.2505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11549004912376404,
|
|
"step": 3705,
|
|
"valid_targets_mean": 4005.8,
|
|
"valid_targets_min": 277
|
|
},
|
|
{
|
|
"epoch": 5.908366533864542,
|
|
"grad_norm": 0.5740786078772951,
|
|
"learning_rate": 2.9033649165165802e-06,
|
|
"loss": 0.2571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13311205804347992,
|
|
"step": 3710,
|
|
"valid_targets_mean": 3929.8,
|
|
"valid_targets_min": 765
|
|
},
|
|
{
|
|
"epoch": 5.916334661354582,
|
|
"grad_norm": 0.638209126073348,
|
|
"learning_rate": 2.8622917869600053e-06,
|
|
"loss": 0.2475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13181473314762115,
|
|
"step": 3715,
|
|
"valid_targets_mean": 3157.6,
|
|
"valid_targets_min": 366
|
|
},
|
|
{
|
|
"epoch": 5.924302788844622,
|
|
"grad_norm": 0.662891750164014,
|
|
"learning_rate": 2.821488853681187e-06,
|
|
"loss": 0.2657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13231581449508667,
|
|
"step": 3720,
|
|
"valid_targets_mean": 3815.1,
|
|
"valid_targets_min": 465
|
|
},
|
|
{
|
|
"epoch": 5.932270916334661,
|
|
"grad_norm": 0.6707296283611003,
|
|
"learning_rate": 2.7809567599867304e-06,
|
|
"loss": 0.2551,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10229024291038513,
|
|
"step": 3725,
|
|
"valid_targets_mean": 2831.9,
|
|
"valid_targets_min": 246
|
|
},
|
|
{
|
|
"epoch": 5.940239043824701,
|
|
"grad_norm": 0.6609159161446633,
|
|
"learning_rate": 2.7406961449131153e-06,
|
|
"loss": 0.2541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16223978996276855,
|
|
"step": 3730,
|
|
"valid_targets_mean": 4233.6,
|
|
"valid_targets_min": 1647
|
|
},
|
|
{
|
|
"epoch": 5.948207171314741,
|
|
"grad_norm": 0.6070202128566694,
|
|
"learning_rate": 2.7007076432166402e-06,
|
|
"loss": 0.2638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10488445311784744,
|
|
"step": 3735,
|
|
"valid_targets_mean": 3504.9,
|
|
"valid_targets_min": 617
|
|
},
|
|
{
|
|
"epoch": 5.956175298804781,
|
|
"grad_norm": 0.5693033009319906,
|
|
"learning_rate": 2.660991885363433e-06,
|
|
"loss": 0.2546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12675464153289795,
|
|
"step": 3740,
|
|
"valid_targets_mean": 4333.9,
|
|
"valid_targets_min": 3605
|
|
},
|
|
{
|
|
"epoch": 5.964143426294821,
|
|
"grad_norm": 0.6344388659710755,
|
|
"learning_rate": 2.621549497519471e-06,
|
|
"loss": 0.2667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12354440987110138,
|
|
"step": 3745,
|
|
"valid_targets_mean": 4249.4,
|
|
"valid_targets_min": 1259
|
|
},
|
|
{
|
|
"epoch": 5.972111553784861,
|
|
"grad_norm": 0.6153848272313728,
|
|
"learning_rate": 2.5823811015407386e-06,
|
|
"loss": 0.249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10808524489402771,
|
|
"step": 3750,
|
|
"valid_targets_mean": 3444.8,
|
|
"valid_targets_min": 273
|
|
},
|
|
{
|
|
"epoch": 5.9800796812749,
|
|
"grad_norm": 0.6100355257087904,
|
|
"learning_rate": 2.5434873149634045e-06,
|
|
"loss": 0.2509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09660793840885162,
|
|
"step": 3755,
|
|
"valid_targets_mean": 2964.8,
|
|
"valid_targets_min": 306
|
|
},
|
|
{
|
|
"epoch": 5.98804780876494,
|
|
"grad_norm": 0.609935219988965,
|
|
"learning_rate": 2.5048687509941163e-06,
|
|
"loss": 0.25,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1303994059562683,
|
|
"step": 3760,
|
|
"valid_targets_mean": 3852.6,
|
|
"valid_targets_min": 522
|
|
},
|
|
{
|
|
"epoch": 5.99601593625498,
|
|
"grad_norm": 0.6089231262522933,
|
|
"learning_rate": 2.4665260185002815e-06,
|
|
"loss": 0.2559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12429457902908325,
|
|
"step": 3765,
|
|
"valid_targets_mean": 3790.1,
|
|
"valid_targets_min": 1014
|
|
},
|
|
{
|
|
"epoch": 6.003187250996016,
|
|
"grad_norm": 0.6432421277276235,
|
|
"learning_rate": 2.428459722000529e-06,
|
|
"loss": 0.2497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14490444958209991,
|
|
"step": 3770,
|
|
"valid_targets_mean": 4511.8,
|
|
"valid_targets_min": 3330
|
|
},
|
|
{
|
|
"epoch": 6.011155378486055,
|
|
"grad_norm": 0.5751694281008249,
|
|
"learning_rate": 2.39067046165512e-06,
|
|
"loss": 0.255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12868493795394897,
|
|
"step": 3775,
|
|
"valid_targets_mean": 3961.5,
|
|
"valid_targets_min": 2894
|
|
},
|
|
{
|
|
"epoch": 6.019123505976095,
|
|
"grad_norm": 0.7067008056866969,
|
|
"learning_rate": 2.3531588332565238e-06,
|
|
"loss": 0.2706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15841418504714966,
|
|
"step": 3780,
|
|
"valid_targets_mean": 3148.9,
|
|
"valid_targets_min": 719
|
|
},
|
|
{
|
|
"epoch": 6.027091633466135,
|
|
"grad_norm": 0.6668337546814035,
|
|
"learning_rate": 2.3159254282200207e-06,
|
|
"loss": 0.2473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10749612748622894,
|
|
"step": 3785,
|
|
"valid_targets_mean": 2676.1,
|
|
"valid_targets_min": 324
|
|
},
|
|
{
|
|
"epoch": 6.035059760956175,
|
|
"grad_norm": 0.6040929869770774,
|
|
"learning_rate": 2.27897083357435e-06,
|
|
"loss": 0.2451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12229761481285095,
|
|
"step": 3790,
|
|
"valid_targets_mean": 3779.4,
|
|
"valid_targets_min": 1282
|
|
},
|
|
{
|
|
"epoch": 6.043027888446215,
|
|
"grad_norm": 0.5702540586454504,
|
|
"learning_rate": 2.242295631952496e-06,
|
|
"loss": 0.2462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14322780072689056,
|
|
"step": 3795,
|
|
"valid_targets_mean": 4631.1,
|
|
"valid_targets_min": 4027
|
|
},
|
|
{
|
|
"epoch": 6.050996015936255,
|
|
"grad_norm": 0.6638784558122266,
|
|
"learning_rate": 2.205900401582466e-06,
|
|
"loss": 0.2422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1431213617324829,
|
|
"step": 3800,
|
|
"valid_targets_mean": 3195.1,
|
|
"valid_targets_min": 447
|
|
},
|
|
{
|
|
"epoch": 6.058964143426294,
|
|
"grad_norm": 0.6371751394503381,
|
|
"learning_rate": 2.169785716278199e-06,
|
|
"loss": 0.2608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12928101420402527,
|
|
"step": 3805,
|
|
"valid_targets_mean": 3510.6,
|
|
"valid_targets_min": 1292
|
|
},
|
|
{
|
|
"epoch": 6.066932270916334,
|
|
"grad_norm": 0.605029891992074,
|
|
"learning_rate": 2.133952145430502e-06,
|
|
"loss": 0.2531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12399262934923172,
|
|
"step": 3810,
|
|
"valid_targets_mean": 3561.8,
|
|
"valid_targets_min": 817
|
|
},
|
|
{
|
|
"epoch": 6.074900398406374,
|
|
"grad_norm": 0.6587525854023784,
|
|
"learning_rate": 2.0984002539980785e-06,
|
|
"loss": 0.2555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15116888284683228,
|
|
"step": 3815,
|
|
"valid_targets_mean": 4431.6,
|
|
"valid_targets_min": 3484
|
|
},
|
|
{
|
|
"epoch": 6.082868525896414,
|
|
"grad_norm": 0.5828610306338085,
|
|
"learning_rate": 2.0631306024986284e-06,
|
|
"loss": 0.253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10188622772693634,
|
|
"step": 3820,
|
|
"valid_targets_mean": 3481.1,
|
|
"valid_targets_min": 251
|
|
},
|
|
{
|
|
"epoch": 6.090836653386454,
|
|
"grad_norm": 0.6010393546690598,
|
|
"learning_rate": 2.0281437469999976e-06,
|
|
"loss": 0.251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12114863097667694,
|
|
"step": 3825,
|
|
"valid_targets_mean": 4196.4,
|
|
"valid_targets_min": 2129
|
|
},
|
|
{
|
|
"epoch": 6.098804780876494,
|
|
"grad_norm": 0.5702706809060334,
|
|
"learning_rate": 1.9934402391114283e-06,
|
|
"loss": 0.2492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14958012104034424,
|
|
"step": 3830,
|
|
"valid_targets_mean": 5032.5,
|
|
"valid_targets_min": 3602
|
|
},
|
|
{
|
|
"epoch": 6.106772908366533,
|
|
"grad_norm": 0.6537613374201687,
|
|
"learning_rate": 1.9590206259748413e-06,
|
|
"loss": 0.2474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14057272672653198,
|
|
"step": 3835,
|
|
"valid_targets_mean": 3877.0,
|
|
"valid_targets_min": 935
|
|
},
|
|
{
|
|
"epoch": 6.114741035856574,
|
|
"grad_norm": 0.585760262716983,
|
|
"learning_rate": 1.924885450256222e-06,
|
|
"loss": 0.2401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10234951972961426,
|
|
"step": 3840,
|
|
"valid_targets_mean": 3211.5,
|
|
"valid_targets_min": 583
|
|
},
|
|
{
|
|
"epoch": 6.122709163346614,
|
|
"grad_norm": 0.5976124419885895,
|
|
"learning_rate": 1.8910352501370677e-06,
|
|
"loss": 0.2419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11288703978061676,
|
|
"step": 3845,
|
|
"valid_targets_mean": 3741.0,
|
|
"valid_targets_min": 460
|
|
},
|
|
{
|
|
"epoch": 6.130677290836654,
|
|
"grad_norm": 0.6288849321286447,
|
|
"learning_rate": 1.8574705593058962e-06,
|
|
"loss": 0.2562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1731109917163849,
|
|
"step": 3850,
|
|
"valid_targets_mean": 4558.8,
|
|
"valid_targets_min": 3570
|
|
},
|
|
{
|
|
"epoch": 6.138645418326694,
|
|
"grad_norm": 0.6929670892787564,
|
|
"learning_rate": 1.8241919069498281e-06,
|
|
"loss": 0.2498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12311825156211853,
|
|
"step": 3855,
|
|
"valid_targets_mean": 4152.5,
|
|
"valid_targets_min": 1475
|
|
},
|
|
{
|
|
"epoch": 6.1466135458167335,
|
|
"grad_norm": 0.6693335839104627,
|
|
"learning_rate": 1.7911998177462563e-06,
|
|
"loss": 0.2533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1238730400800705,
|
|
"step": 3860,
|
|
"valid_targets_mean": 3705.8,
|
|
"valid_targets_min": 650
|
|
},
|
|
{
|
|
"epoch": 6.154581673306773,
|
|
"grad_norm": 0.5686511959559429,
|
|
"learning_rate": 1.758494811854554e-06,
|
|
"loss": 0.2446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14562711119651794,
|
|
"step": 3865,
|
|
"valid_targets_mean": 4748.6,
|
|
"valid_targets_min": 3413
|
|
},
|
|
{
|
|
"epoch": 6.162549800796813,
|
|
"grad_norm": 0.6040324036331258,
|
|
"learning_rate": 1.7260774049079044e-06,
|
|
"loss": 0.2484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16318969428539276,
|
|
"step": 3870,
|
|
"valid_targets_mean": 4559.2,
|
|
"valid_targets_min": 3556
|
|
},
|
|
{
|
|
"epoch": 6.170517928286853,
|
|
"grad_norm": 0.6111187412398503,
|
|
"learning_rate": 1.6939481080051324e-06,
|
|
"loss": 0.2456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12175963819026947,
|
|
"step": 3875,
|
|
"valid_targets_mean": 4280.0,
|
|
"valid_targets_min": 2199
|
|
},
|
|
{
|
|
"epoch": 6.178486055776893,
|
|
"grad_norm": 0.607615016751425,
|
|
"learning_rate": 1.6621074277026838e-06,
|
|
"loss": 0.2456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11982540041208267,
|
|
"step": 3880,
|
|
"valid_targets_mean": 3989.1,
|
|
"valid_targets_min": 884
|
|
},
|
|
{
|
|
"epoch": 6.186454183266933,
|
|
"grad_norm": 0.6190062881915447,
|
|
"learning_rate": 1.630555866006609e-06,
|
|
"loss": 0.2487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12791648507118225,
|
|
"step": 3885,
|
|
"valid_targets_mean": 3625.5,
|
|
"valid_targets_min": 692
|
|
},
|
|
{
|
|
"epoch": 6.1944223107569725,
|
|
"grad_norm": 0.6544343561676963,
|
|
"learning_rate": 1.5992939203646706e-06,
|
|
"loss": 0.2576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11502386629581451,
|
|
"step": 3890,
|
|
"valid_targets_mean": 3540.8,
|
|
"valid_targets_min": 223
|
|
},
|
|
{
|
|
"epoch": 6.202390438247012,
|
|
"grad_norm": 0.6488734360036593,
|
|
"learning_rate": 1.5683220836584822e-06,
|
|
"loss": 0.2543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14629307389259338,
|
|
"step": 3895,
|
|
"valid_targets_mean": 3591.6,
|
|
"valid_targets_min": 263
|
|
},
|
|
{
|
|
"epoch": 6.210358565737052,
|
|
"grad_norm": 0.6076624199204144,
|
|
"learning_rate": 1.5376408441957513e-06,
|
|
"loss": 0.2511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11878538131713867,
|
|
"step": 3900,
|
|
"valid_targets_mean": 3426.6,
|
|
"valid_targets_min": 1292
|
|
},
|
|
{
|
|
"epoch": 6.218326693227092,
|
|
"grad_norm": 0.5920608700590033,
|
|
"learning_rate": 1.5072506857025748e-06,
|
|
"loss": 0.244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10741353780031204,
|
|
"step": 3905,
|
|
"valid_targets_mean": 4125.2,
|
|
"valid_targets_min": 1248
|
|
},
|
|
{
|
|
"epoch": 6.226294820717132,
|
|
"grad_norm": 0.5830741097908088,
|
|
"learning_rate": 1.4771520873158118e-06,
|
|
"loss": 0.2624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13355758786201477,
|
|
"step": 3910,
|
|
"valid_targets_mean": 4088.6,
|
|
"valid_targets_min": 3169
|
|
},
|
|
{
|
|
"epoch": 6.234262948207172,
|
|
"grad_norm": 0.7774677063961954,
|
|
"learning_rate": 1.4473455235755385e-06,
|
|
"loss": 0.2428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10745563358068466,
|
|
"step": 3915,
|
|
"valid_targets_mean": 3174.9,
|
|
"valid_targets_min": 545
|
|
},
|
|
{
|
|
"epoch": 6.2422310756972115,
|
|
"grad_norm": 0.6414339471264867,
|
|
"learning_rate": 1.417831464417545e-06,
|
|
"loss": 0.2493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10562710464000702,
|
|
"step": 3920,
|
|
"valid_targets_mean": 3753.1,
|
|
"valid_targets_min": 2195
|
|
},
|
|
{
|
|
"epoch": 6.250199203187251,
|
|
"grad_norm": 0.6040908897361035,
|
|
"learning_rate": 1.3886103751659462e-06,
|
|
"loss": 0.2516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11933998763561249,
|
|
"step": 3925,
|
|
"valid_targets_mean": 3794.1,
|
|
"valid_targets_min": 243
|
|
},
|
|
{
|
|
"epoch": 6.258167330677291,
|
|
"grad_norm": 0.5888051951119573,
|
|
"learning_rate": 1.3596827165258474e-06,
|
|
"loss": 0.239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1309901475906372,
|
|
"step": 3930,
|
|
"valid_targets_mean": 4263.9,
|
|
"valid_targets_min": 2155
|
|
},
|
|
{
|
|
"epoch": 6.266135458167331,
|
|
"grad_norm": 0.581996929840516,
|
|
"learning_rate": 1.331048944576061e-06,
|
|
"loss": 0.2531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14565280079841614,
|
|
"step": 3935,
|
|
"valid_targets_mean": 4253.8,
|
|
"valid_targets_min": 949
|
|
},
|
|
{
|
|
"epoch": 6.274103585657371,
|
|
"grad_norm": 0.5876836628531714,
|
|
"learning_rate": 1.3027095107619347e-06,
|
|
"loss": 0.2499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09295883029699326,
|
|
"step": 3940,
|
|
"valid_targets_mean": 3626.9,
|
|
"valid_targets_min": 706
|
|
},
|
|
{
|
|
"epoch": 6.282071713147411,
|
|
"grad_norm": 0.6080383151496606,
|
|
"learning_rate": 1.2746648618882197e-06,
|
|
"loss": 0.2528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11289983987808228,
|
|
"step": 3945,
|
|
"valid_targets_mean": 3987.1,
|
|
"valid_targets_min": 2348
|
|
},
|
|
{
|
|
"epoch": 6.2900398406374505,
|
|
"grad_norm": 0.5774994539577519,
|
|
"learning_rate": 1.2469154401120442e-06,
|
|
"loss": 0.245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12663111090660095,
|
|
"step": 3950,
|
|
"valid_targets_mean": 3853.5,
|
|
"valid_targets_min": 2981
|
|
},
|
|
{
|
|
"epoch": 6.29800796812749,
|
|
"grad_norm": 0.7210516288858115,
|
|
"learning_rate": 1.2194616829359206e-06,
|
|
"loss": 0.2494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11826236546039581,
|
|
"step": 3955,
|
|
"valid_targets_mean": 3350.1,
|
|
"valid_targets_min": 810
|
|
},
|
|
{
|
|
"epoch": 6.30597609561753,
|
|
"grad_norm": 0.7225175533232717,
|
|
"learning_rate": 1.1923040232008653e-06,
|
|
"loss": 0.2722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14951607584953308,
|
|
"step": 3960,
|
|
"valid_targets_mean": 3520.2,
|
|
"valid_targets_min": 278
|
|
},
|
|
{
|
|
"epoch": 6.31394422310757,
|
|
"grad_norm": 0.7291038894240529,
|
|
"learning_rate": 1.1654428890795622e-06,
|
|
"loss": 0.2485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13940054178237915,
|
|
"step": 3965,
|
|
"valid_targets_mean": 3456.6,
|
|
"valid_targets_min": 1023
|
|
},
|
|
{
|
|
"epoch": 6.32191235059761,
|
|
"grad_norm": 0.5929982172160873,
|
|
"learning_rate": 1.1388787040696215e-06,
|
|
"loss": 0.2598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11806536465883255,
|
|
"step": 3970,
|
|
"valid_targets_mean": 3589.1,
|
|
"valid_targets_min": 291
|
|
},
|
|
{
|
|
"epoch": 6.32988047808765,
|
|
"grad_norm": 0.7037752359022759,
|
|
"learning_rate": 1.1126118869868985e-06,
|
|
"loss": 0.2441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12819892168045044,
|
|
"step": 3975,
|
|
"valid_targets_mean": 3012.1,
|
|
"valid_targets_min": 857
|
|
},
|
|
{
|
|
"epoch": 6.3378486055776895,
|
|
"grad_norm": 0.6620313014408482,
|
|
"learning_rate": 1.0866428519588923e-06,
|
|
"loss": 0.2723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13882631063461304,
|
|
"step": 3980,
|
|
"valid_targets_mean": 4492.8,
|
|
"valid_targets_min": 2264
|
|
},
|
|
{
|
|
"epoch": 6.345816733067729,
|
|
"grad_norm": 0.6380382089651276,
|
|
"learning_rate": 1.060972008418204e-06,
|
|
"loss": 0.2485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1222672387957573,
|
|
"step": 3985,
|
|
"valid_targets_mean": 3581.2,
|
|
"valid_targets_min": 255
|
|
},
|
|
{
|
|
"epoch": 6.353784860557769,
|
|
"grad_norm": 0.6081573976490425,
|
|
"learning_rate": 1.0355997610961132e-06,
|
|
"loss": 0.2592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1143135279417038,
|
|
"step": 3990,
|
|
"valid_targets_mean": 3371.2,
|
|
"valid_targets_min": 282
|
|
},
|
|
{
|
|
"epoch": 6.361752988047809,
|
|
"grad_norm": 0.6223924820943934,
|
|
"learning_rate": 1.0105265100161564e-06,
|
|
"loss": 0.2562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13434356451034546,
|
|
"step": 3995,
|
|
"valid_targets_mean": 3690.0,
|
|
"valid_targets_min": 922
|
|
},
|
|
{
|
|
"epoch": 6.369721115537849,
|
|
"grad_norm": 0.8007739791330828,
|
|
"learning_rate": 9.857526504878524e-07,
|
|
"loss": 0.2489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10666193068027496,
|
|
"step": 4000,
|
|
"valid_targets_mean": 3907.4,
|
|
"valid_targets_min": 740
|
|
},
|
|
{
|
|
"epoch": 6.377689243027889,
|
|
"grad_norm": 0.5828222085983297,
|
|
"learning_rate": 9.612785731004525e-07,
|
|
"loss": 0.259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1447790563106537,
|
|
"step": 4005,
|
|
"valid_targets_mean": 4335.6,
|
|
"valid_targets_min": 610
|
|
},
|
|
{
|
|
"epoch": 6.3856573705179285,
|
|
"grad_norm": 0.6612189894555205,
|
|
"learning_rate": 9.371046637167835e-07,
|
|
"loss": 0.2471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12845785915851593,
|
|
"step": 4010,
|
|
"valid_targets_mean": 3381.4,
|
|
"valid_targets_min": 491
|
|
},
|
|
{
|
|
"epoch": 6.393625498007968,
|
|
"grad_norm": 0.6542651258710315,
|
|
"learning_rate": 9.132313034671792e-07,
|
|
"loss": 0.2552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1381814181804657,
|
|
"step": 4015,
|
|
"valid_targets_mean": 3207.6,
|
|
"valid_targets_min": 673
|
|
},
|
|
{
|
|
"epoch": 6.401593625498008,
|
|
"grad_norm": 0.6328798463695975,
|
|
"learning_rate": 8.89658868743446e-07,
|
|
"loss": 0.2447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1083463802933693,
|
|
"step": 4020,
|
|
"valid_targets_mean": 3546.5,
|
|
"valid_targets_min": 425
|
|
},
|
|
{
|
|
"epoch": 6.409561752988048,
|
|
"grad_norm": 0.6334408197404385,
|
|
"learning_rate": 8.663877311929569e-07,
|
|
"loss": 0.249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11294161528348923,
|
|
"step": 4025,
|
|
"valid_targets_mean": 3244.4,
|
|
"valid_targets_min": 1028
|
|
},
|
|
{
|
|
"epoch": 6.417529880478088,
|
|
"grad_norm": 0.6057134984464468,
|
|
"learning_rate": 8.43418257712767e-07,
|
|
"loss": 0.2602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10949505865573883,
|
|
"step": 4030,
|
|
"valid_targets_mean": 3993.9,
|
|
"valid_targets_min": 811
|
|
},
|
|
{
|
|
"epoch": 6.425498007968128,
|
|
"grad_norm": 0.5778435806875283,
|
|
"learning_rate": 8.207508104438467e-07,
|
|
"loss": 0.2581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10580522567033768,
|
|
"step": 4035,
|
|
"valid_targets_mean": 4066.9,
|
|
"valid_targets_min": 1115
|
|
},
|
|
{
|
|
"epoch": 6.4334661354581675,
|
|
"grad_norm": 0.5429928605370324,
|
|
"learning_rate": 7.983857467653599e-07,
|
|
"loss": 0.2379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11019369959831238,
|
|
"step": 4040,
|
|
"valid_targets_mean": 4641.2,
|
|
"valid_targets_min": 3660
|
|
},
|
|
{
|
|
"epoch": 6.441434262948207,
|
|
"grad_norm": 0.6487210534399938,
|
|
"learning_rate": 7.763234192890378e-07,
|
|
"loss": 0.2443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11472362279891968,
|
|
"step": 4045,
|
|
"valid_targets_mean": 3347.8,
|
|
"valid_targets_min": 259
|
|
},
|
|
{
|
|
"epoch": 6.449402390438247,
|
|
"grad_norm": 0.7047344926611184,
|
|
"learning_rate": 7.545641758536204e-07,
|
|
"loss": 0.2472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12119234353303909,
|
|
"step": 4050,
|
|
"valid_targets_mean": 3398.9,
|
|
"valid_targets_min": 333
|
|
},
|
|
{
|
|
"epoch": 6.457370517928287,
|
|
"grad_norm": 0.6369030055079338,
|
|
"learning_rate": 7.331083595193566e-07,
|
|
"loss": 0.2498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11330699920654297,
|
|
"step": 4055,
|
|
"valid_targets_mean": 3417.4,
|
|
"valid_targets_min": 273
|
|
},
|
|
{
|
|
"epoch": 6.465338645418327,
|
|
"grad_norm": 0.6845066985366877,
|
|
"learning_rate": 7.119563085626246e-07,
|
|
"loss": 0.2509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16965419054031372,
|
|
"step": 4060,
|
|
"valid_targets_mean": 3622.8,
|
|
"valid_targets_min": 207
|
|
},
|
|
{
|
|
"epoch": 6.473306772908367,
|
|
"grad_norm": 0.5882207570553777,
|
|
"learning_rate": 6.911083564705689e-07,
|
|
"loss": 0.2526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13698944449424744,
|
|
"step": 4065,
|
|
"valid_targets_mean": 4761.4,
|
|
"valid_targets_min": 4057
|
|
},
|
|
{
|
|
"epoch": 6.4812749003984065,
|
|
"grad_norm": 0.6554874702398878,
|
|
"learning_rate": 6.70564831935856e-07,
|
|
"loss": 0.2428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11183053255081177,
|
|
"step": 4070,
|
|
"valid_targets_mean": 2970.2,
|
|
"valid_targets_min": 721
|
|
},
|
|
{
|
|
"epoch": 6.489243027888446,
|
|
"grad_norm": 0.6234303433193248,
|
|
"learning_rate": 6.503260588514959e-07,
|
|
"loss": 0.2542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13360071182250977,
|
|
"step": 4075,
|
|
"valid_targets_mean": 3698.2,
|
|
"valid_targets_min": 1159
|
|
},
|
|
{
|
|
"epoch": 6.497211155378486,
|
|
"grad_norm": 0.5855960507592379,
|
|
"learning_rate": 6.303923563057291e-07,
|
|
"loss": 0.2563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13242430984973907,
|
|
"step": 4080,
|
|
"valid_targets_mean": 4189.8,
|
|
"valid_targets_min": 3117
|
|
},
|
|
{
|
|
"epoch": 6.505179282868526,
|
|
"grad_norm": 0.6322204475332664,
|
|
"learning_rate": 6.107640385769964e-07,
|
|
"loss": 0.2388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14184370636940002,
|
|
"step": 4085,
|
|
"valid_targets_mean": 4107.6,
|
|
"valid_targets_min": 576
|
|
},
|
|
{
|
|
"epoch": 6.513147410358566,
|
|
"grad_norm": 1.0485414149705368,
|
|
"learning_rate": 5.91441415128986e-07,
|
|
"loss": 0.2508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12613211572170258,
|
|
"step": 4090,
|
|
"valid_targets_mean": 4314.0,
|
|
"valid_targets_min": 2611
|
|
},
|
|
{
|
|
"epoch": 6.521115537848606,
|
|
"grad_norm": 0.7762269781620317,
|
|
"learning_rate": 5.724247906057545e-07,
|
|
"loss": 0.2541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12296421825885773,
|
|
"step": 4095,
|
|
"valid_targets_mean": 3463.9,
|
|
"valid_targets_min": 517
|
|
},
|
|
{
|
|
"epoch": 6.5290836653386455,
|
|
"grad_norm": 0.6006370469850979,
|
|
"learning_rate": 5.537144648269221e-07,
|
|
"loss": 0.251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11015652865171432,
|
|
"step": 4100,
|
|
"valid_targets_mean": 3809.4,
|
|
"valid_targets_min": 1016
|
|
},
|
|
{
|
|
"epoch": 6.537051792828685,
|
|
"grad_norm": 0.5860933713134772,
|
|
"learning_rate": 5.35310732782941e-07,
|
|
"loss": 0.2517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11739160865545273,
|
|
"step": 4105,
|
|
"valid_targets_mean": 4038.5,
|
|
"valid_targets_min": 2302
|
|
},
|
|
{
|
|
"epoch": 6.545019920318725,
|
|
"grad_norm": 0.6604373964511138,
|
|
"learning_rate": 5.172138846304609e-07,
|
|
"loss": 0.2613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12965252995491028,
|
|
"step": 4110,
|
|
"valid_targets_mean": 3493.2,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 6.552988047808765,
|
|
"grad_norm": 0.6209094419618799,
|
|
"learning_rate": 4.994242056877352e-07,
|
|
"loss": 0.2488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1375695765018463,
|
|
"step": 4115,
|
|
"valid_targets_mean": 3564.8,
|
|
"valid_targets_min": 248
|
|
},
|
|
{
|
|
"epoch": 6.560956175298805,
|
|
"grad_norm": 0.5516051600593797,
|
|
"learning_rate": 4.819419764301314e-07,
|
|
"loss": 0.2596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16178067028522491,
|
|
"step": 4120,
|
|
"valid_targets_mean": 4802.2,
|
|
"valid_targets_min": 3542
|
|
},
|
|
{
|
|
"epoch": 6.568924302788845,
|
|
"grad_norm": 0.7135404766623891,
|
|
"learning_rate": 4.647674724857143e-07,
|
|
"loss": 0.2435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11924779415130615,
|
|
"step": 4125,
|
|
"valid_targets_mean": 3350.8,
|
|
"valid_targets_min": 896
|
|
},
|
|
{
|
|
"epoch": 6.5768924302788845,
|
|
"grad_norm": 0.5815975747573743,
|
|
"learning_rate": 4.4790096463088293e-07,
|
|
"loss": 0.2508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1157185435295105,
|
|
"step": 4130,
|
|
"valid_targets_mean": 3725.5,
|
|
"valid_targets_min": 567
|
|
},
|
|
{
|
|
"epoch": 6.584860557768924,
|
|
"grad_norm": 0.6273070480646679,
|
|
"learning_rate": 4.313427187861252e-07,
|
|
"loss": 0.2498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14831429719924927,
|
|
"step": 4135,
|
|
"valid_targets_mean": 4227.1,
|
|
"valid_targets_min": 2552
|
|
},
|
|
{
|
|
"epoch": 6.592828685258964,
|
|
"grad_norm": 0.5850990481798938,
|
|
"learning_rate": 4.1509299601180553e-07,
|
|
"loss": 0.2631,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15613797307014465,
|
|
"step": 4140,
|
|
"valid_targets_mean": 4252.5,
|
|
"valid_targets_min": 3624
|
|
},
|
|
{
|
|
"epoch": 6.600796812749004,
|
|
"grad_norm": 0.6685198631985739,
|
|
"learning_rate": 3.9915205250406153e-07,
|
|
"loss": 0.2623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14067862927913666,
|
|
"step": 4145,
|
|
"valid_targets_mean": 3225.0,
|
|
"valid_targets_min": 539
|
|
},
|
|
{
|
|
"epoch": 6.608764940239044,
|
|
"grad_norm": 0.5978862286711235,
|
|
"learning_rate": 3.83520139590754e-07,
|
|
"loss": 0.2481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1057911068201065,
|
|
"step": 4150,
|
|
"valid_targets_mean": 3703.2,
|
|
"valid_targets_min": 706
|
|
},
|
|
{
|
|
"epoch": 6.616733067729084,
|
|
"grad_norm": 0.5686980976514994,
|
|
"learning_rate": 3.6819750372751874e-07,
|
|
"loss": 0.2511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13568620383739471,
|
|
"step": 4155,
|
|
"valid_targets_mean": 4560.6,
|
|
"valid_targets_min": 3183
|
|
},
|
|
{
|
|
"epoch": 6.6247011952191235,
|
|
"grad_norm": 0.5958820374658117,
|
|
"learning_rate": 3.531843864938611e-07,
|
|
"loss": 0.2533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12185736000537872,
|
|
"step": 4160,
|
|
"valid_targets_mean": 4474.1,
|
|
"valid_targets_min": 3739
|
|
},
|
|
{
|
|
"epoch": 6.632669322709163,
|
|
"grad_norm": 0.6726651729680071,
|
|
"learning_rate": 3.384810245893677e-07,
|
|
"loss": 0.242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10705474764108658,
|
|
"step": 4165,
|
|
"valid_targets_mean": 3190.9,
|
|
"valid_targets_min": 952
|
|
},
|
|
{
|
|
"epoch": 6.640637450199203,
|
|
"grad_norm": 0.5588675453049953,
|
|
"learning_rate": 3.2408764982996056e-07,
|
|
"loss": 0.2534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11087557673454285,
|
|
"step": 4170,
|
|
"valid_targets_mean": 3897.0,
|
|
"valid_targets_min": 1519
|
|
},
|
|
{
|
|
"epoch": 6.648605577689243,
|
|
"grad_norm": 0.6485560069091032,
|
|
"learning_rate": 3.1000448914425106e-07,
|
|
"loss": 0.2596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10861679911613464,
|
|
"step": 4175,
|
|
"valid_targets_mean": 2707.1,
|
|
"valid_targets_min": 1035
|
|
},
|
|
{
|
|
"epoch": 6.656573705179283,
|
|
"grad_norm": 0.5152247010486197,
|
|
"learning_rate": 2.9623176456995195e-07,
|
|
"loss": 0.2616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11714199185371399,
|
|
"step": 4180,
|
|
"valid_targets_mean": 4544.1,
|
|
"valid_targets_min": 3225
|
|
},
|
|
{
|
|
"epoch": 6.664541832669323,
|
|
"grad_norm": 0.6924372026026602,
|
|
"learning_rate": 2.8276969325038874e-07,
|
|
"loss": 0.2407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1526404619216919,
|
|
"step": 4185,
|
|
"valid_targets_mean": 3647.8,
|
|
"valid_targets_min": 933
|
|
},
|
|
{
|
|
"epoch": 6.6725099601593625,
|
|
"grad_norm": 0.6565708140906135,
|
|
"learning_rate": 2.696184874310692e-07,
|
|
"loss": 0.244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1124889999628067,
|
|
"step": 4190,
|
|
"valid_targets_mean": 3418.2,
|
|
"valid_targets_min": 269
|
|
},
|
|
{
|
|
"epoch": 6.680478087649402,
|
|
"grad_norm": 0.6085711168138929,
|
|
"learning_rate": 2.5677835445633515e-07,
|
|
"loss": 0.2674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09558190405368805,
|
|
"step": 4195,
|
|
"valid_targets_mean": 3099.6,
|
|
"valid_targets_min": 260
|
|
},
|
|
{
|
|
"epoch": 6.688446215139442,
|
|
"grad_norm": 0.6163718517084988,
|
|
"learning_rate": 2.44249496766098e-07,
|
|
"loss": 0.2502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08738566190004349,
|
|
"step": 4200,
|
|
"valid_targets_mean": 3180.8,
|
|
"valid_targets_min": 316
|
|
},
|
|
{
|
|
"epoch": 6.696414342629482,
|
|
"grad_norm": 0.6645990900178848,
|
|
"learning_rate": 2.3203211189264607e-07,
|
|
"loss": 0.2452,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10755690932273865,
|
|
"step": 4205,
|
|
"valid_targets_mean": 3424.2,
|
|
"valid_targets_min": 244
|
|
},
|
|
{
|
|
"epoch": 6.704382470119522,
|
|
"grad_norm": 0.5784923268500493,
|
|
"learning_rate": 2.201263924575292e-07,
|
|
"loss": 0.2379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10449139028787613,
|
|
"step": 4210,
|
|
"valid_targets_mean": 3429.8,
|
|
"valid_targets_min": 242
|
|
},
|
|
{
|
|
"epoch": 6.712350597609562,
|
|
"grad_norm": 0.6004625390135877,
|
|
"learning_rate": 2.0853252616852338e-07,
|
|
"loss": 0.2694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12962156534194946,
|
|
"step": 4215,
|
|
"valid_targets_mean": 3904.2,
|
|
"valid_targets_min": 3067
|
|
},
|
|
{
|
|
"epoch": 6.7203187250996015,
|
|
"grad_norm": 0.5952887978908022,
|
|
"learning_rate": 1.9725069581666645e-07,
|
|
"loss": 0.2532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11442944407463074,
|
|
"step": 4220,
|
|
"valid_targets_mean": 3526.9,
|
|
"valid_targets_min": 227
|
|
},
|
|
{
|
|
"epoch": 6.728286852589641,
|
|
"grad_norm": 0.5819765604555225,
|
|
"learning_rate": 1.862810792733849e-07,
|
|
"loss": 0.2468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1205209344625473,
|
|
"step": 4225,
|
|
"valid_targets_mean": 3931.8,
|
|
"valid_targets_min": 953
|
|
},
|
|
{
|
|
"epoch": 6.736254980079681,
|
|
"grad_norm": 0.5935505869923664,
|
|
"learning_rate": 1.7562384948768273e-07,
|
|
"loss": 0.2539,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12588539719581604,
|
|
"step": 4230,
|
|
"valid_targets_mean": 4015.5,
|
|
"valid_targets_min": 2934
|
|
},
|
|
{
|
|
"epoch": 6.744223107569721,
|
|
"grad_norm": 0.611139401136095,
|
|
"learning_rate": 1.6527917448341478e-07,
|
|
"loss": 0.2396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11996519565582275,
|
|
"step": 4235,
|
|
"valid_targets_mean": 3138.9,
|
|
"valid_targets_min": 291
|
|
},
|
|
{
|
|
"epoch": 6.752191235059761,
|
|
"grad_norm": 0.6392028829283917,
|
|
"learning_rate": 1.5524721735663995e-07,
|
|
"loss": 0.2555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17184896767139435,
|
|
"step": 4240,
|
|
"valid_targets_mean": 4052.5,
|
|
"valid_targets_min": 1849
|
|
},
|
|
{
|
|
"epoch": 6.760159362549801,
|
|
"grad_norm": 0.5754622223791958,
|
|
"learning_rate": 1.4552813627305208e-07,
|
|
"loss": 0.2421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13561061024665833,
|
|
"step": 4245,
|
|
"valid_targets_mean": 3938.9,
|
|
"valid_targets_min": 1101
|
|
},
|
|
{
|
|
"epoch": 6.7681274900398405,
|
|
"grad_norm": 0.6767350907038611,
|
|
"learning_rate": 1.361220844654798e-07,
|
|
"loss": 0.2608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09622784703969955,
|
|
"step": 4250,
|
|
"valid_targets_mean": 2196.9,
|
|
"valid_targets_min": 338
|
|
},
|
|
{
|
|
"epoch": 6.77609561752988,
|
|
"grad_norm": 0.6304243386042828,
|
|
"learning_rate": 1.270292102314752e-07,
|
|
"loss": 0.2529,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12174540013074875,
|
|
"step": 4255,
|
|
"valid_targets_mean": 3984.8,
|
|
"valid_targets_min": 285
|
|
},
|
|
{
|
|
"epoch": 6.78406374501992,
|
|
"grad_norm": 0.6227931181046218,
|
|
"learning_rate": 1.1824965693097767e-07,
|
|
"loss": 0.2683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13814817368984222,
|
|
"step": 4260,
|
|
"valid_targets_mean": 4148.8,
|
|
"valid_targets_min": 267
|
|
},
|
|
{
|
|
"epoch": 6.79203187250996,
|
|
"grad_norm": 0.6487946242842916,
|
|
"learning_rate": 1.0978356298404713e-07,
|
|
"loss": 0.2458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14033278822898865,
|
|
"step": 4265,
|
|
"valid_targets_mean": 3367.5,
|
|
"valid_targets_min": 242
|
|
},
|
|
{
|
|
"epoch": 6.8,
|
|
"grad_norm": 0.615835745685907,
|
|
"learning_rate": 1.0163106186868777e-07,
|
|
"loss": 0.256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1257311999797821,
|
|
"step": 4270,
|
|
"valid_targets_mean": 3718.1,
|
|
"valid_targets_min": 862
|
|
},
|
|
{
|
|
"epoch": 6.80796812749004,
|
|
"grad_norm": 0.5515603477154181,
|
|
"learning_rate": 9.379228211873648e-08,
|
|
"loss": 0.2544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1278674602508545,
|
|
"step": 4275,
|
|
"valid_targets_mean": 4272.2,
|
|
"valid_targets_min": 541
|
|
},
|
|
{
|
|
"epoch": 6.8159362549800795,
|
|
"grad_norm": 0.6024657842293992,
|
|
"learning_rate": 8.626734732185116e-08,
|
|
"loss": 0.2531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11025673151016235,
|
|
"step": 4280,
|
|
"valid_targets_mean": 3804.4,
|
|
"valid_targets_min": 808
|
|
},
|
|
{
|
|
"epoch": 6.823904382470119,
|
|
"grad_norm": 0.5808961396156915,
|
|
"learning_rate": 7.905637611754114e-08,
|
|
"loss": 0.2471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11888125538825989,
|
|
"step": 4285,
|
|
"valid_targets_mean": 4250.6,
|
|
"valid_targets_min": 795
|
|
},
|
|
{
|
|
"epoch": 6.831872509960159,
|
|
"grad_norm": 0.6648227294388135,
|
|
"learning_rate": 7.21594821953131e-08,
|
|
"loss": 0.2568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16212469339370728,
|
|
"step": 4290,
|
|
"valid_targets_mean": 3200.8,
|
|
"valid_targets_min": 268
|
|
},
|
|
{
|
|
"epoch": 6.839840637450199,
|
|
"grad_norm": 0.5933956324309776,
|
|
"learning_rate": 6.557677429287257e-08,
|
|
"loss": 0.2407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13593024015426636,
|
|
"step": 4295,
|
|
"valid_targets_mean": 4707.9,
|
|
"valid_targets_min": 2371
|
|
},
|
|
{
|
|
"epoch": 6.847808764940239,
|
|
"grad_norm": 0.6957976732099579,
|
|
"learning_rate": 5.930835619441189e-08,
|
|
"loss": 0.249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1199740320444107,
|
|
"step": 4300,
|
|
"valid_targets_mean": 2310.2,
|
|
"valid_targets_min": 261
|
|
},
|
|
{
|
|
"epoch": 6.855776892430279,
|
|
"grad_norm": 0.5561054288493201,
|
|
"learning_rate": 5.335432672896712e-08,
|
|
"loss": 0.257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10510461777448654,
|
|
"step": 4305,
|
|
"valid_targets_mean": 4765.1,
|
|
"valid_targets_min": 959
|
|
},
|
|
{
|
|
"epoch": 6.8637450199203185,
|
|
"grad_norm": 0.573876798093476,
|
|
"learning_rate": 4.77147797688704e-08,
|
|
"loss": 0.2386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14638984203338623,
|
|
"step": 4310,
|
|
"valid_targets_mean": 4447.1,
|
|
"valid_targets_min": 3156
|
|
},
|
|
{
|
|
"epoch": 6.871713147410358,
|
|
"grad_norm": 0.6475360444663981,
|
|
"learning_rate": 4.238980422826e-08,
|
|
"loss": 0.2462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1150745302438736,
|
|
"step": 4315,
|
|
"valid_targets_mean": 3765.0,
|
|
"valid_targets_min": 1017
|
|
},
|
|
{
|
|
"epoch": 6.879681274900398,
|
|
"grad_norm": 0.6220352479403992,
|
|
"learning_rate": 3.737948406168812e-08,
|
|
"loss": 0.2504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10800758749246597,
|
|
"step": 4320,
|
|
"valid_targets_mean": 3688.0,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 6.887649402390438,
|
|
"grad_norm": 0.6415167530434839,
|
|
"learning_rate": 3.26838982627864e-08,
|
|
"loss": 0.2459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14324162900447845,
|
|
"step": 4325,
|
|
"valid_targets_mean": 3372.5,
|
|
"valid_targets_min": 878
|
|
},
|
|
{
|
|
"epoch": 6.895617529880478,
|
|
"grad_norm": 0.6358132250393562,
|
|
"learning_rate": 2.8303120863033552e-08,
|
|
"loss": 0.2527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1304199993610382,
|
|
"step": 4330,
|
|
"valid_targets_mean": 3848.9,
|
|
"valid_targets_min": 2545
|
|
},
|
|
{
|
|
"epoch": 6.903585657370518,
|
|
"grad_norm": 0.6448123337947649,
|
|
"learning_rate": 2.4237220930571904e-08,
|
|
"loss": 0.2416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09287726134061813,
|
|
"step": 4335,
|
|
"valid_targets_mean": 2860.0,
|
|
"valid_targets_min": 410
|
|
},
|
|
{
|
|
"epoch": 6.9115537848605575,
|
|
"grad_norm": 0.5742771882266169,
|
|
"learning_rate": 2.0486262569132664e-08,
|
|
"loss": 0.2467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11134955286979675,
|
|
"step": 4340,
|
|
"valid_targets_mean": 3606.9,
|
|
"valid_targets_min": 1362
|
|
},
|
|
{
|
|
"epoch": 6.919521912350597,
|
|
"grad_norm": 0.5609742392864686,
|
|
"learning_rate": 1.70503049170212e-08,
|
|
"loss": 0.2502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11778663098812103,
|
|
"step": 4345,
|
|
"valid_targets_mean": 4391.4,
|
|
"valid_targets_min": 941
|
|
},
|
|
{
|
|
"epoch": 6.927490039840637,
|
|
"grad_norm": 0.7002531287014088,
|
|
"learning_rate": 1.3929402146179994e-08,
|
|
"loss": 0.2512,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1366140991449356,
|
|
"step": 4350,
|
|
"valid_targets_mean": 3469.9,
|
|
"valid_targets_min": 495
|
|
},
|
|
{
|
|
"epoch": 6.935458167330677,
|
|
"grad_norm": 0.6549722486719072,
|
|
"learning_rate": 1.1123603461340449e-08,
|
|
"loss": 0.2497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11728227138519287,
|
|
"step": 4355,
|
|
"valid_targets_mean": 3456.1,
|
|
"valid_targets_min": 826
|
|
},
|
|
{
|
|
"epoch": 6.943426294820717,
|
|
"grad_norm": 0.6280634012494569,
|
|
"learning_rate": 8.632953099241282e-09,
|
|
"loss": 0.2544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12032538652420044,
|
|
"step": 4360,
|
|
"valid_targets_mean": 4182.5,
|
|
"valid_targets_min": 3547
|
|
},
|
|
{
|
|
"epoch": 6.951394422310757,
|
|
"grad_norm": 0.5713513465827403,
|
|
"learning_rate": 6.457490327940186e-09,
|
|
"loss": 0.253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11433510482311249,
|
|
"step": 4365,
|
|
"valid_targets_mean": 4045.8,
|
|
"valid_targets_min": 3534
|
|
},
|
|
{
|
|
"epoch": 6.9593625498007965,
|
|
"grad_norm": 0.6096363669276409,
|
|
"learning_rate": 4.597249446183227e-09,
|
|
"loss": 0.2578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14061906933784485,
|
|
"step": 4370,
|
|
"valid_targets_mean": 4232.0,
|
|
"valid_targets_min": 348
|
|
},
|
|
{
|
|
"epoch": 6.967330677290836,
|
|
"grad_norm": 0.5569203082593212,
|
|
"learning_rate": 3.0522597828719357e-09,
|
|
"loss": 0.2626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11819716542959213,
|
|
"step": 4375,
|
|
"valid_targets_mean": 4388.1,
|
|
"valid_targets_min": 1463
|
|
},
|
|
{
|
|
"epoch": 6.975298804780876,
|
|
"grad_norm": 0.5903096458988022,
|
|
"learning_rate": 1.822545696601452e-09,
|
|
"loss": 0.2625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12359915673732758,
|
|
"step": 4380,
|
|
"valid_targets_mean": 4007.8,
|
|
"valid_targets_min": 1494
|
|
},
|
|
{
|
|
"epoch": 6.983266932270916,
|
|
"grad_norm": 0.7116026191593819,
|
|
"learning_rate": 9.081265752697299e-10,
|
|
"loss": 0.2357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12049850076436996,
|
|
"step": 4385,
|
|
"valid_targets_mean": 3735.2,
|
|
"valid_targets_min": 626
|
|
},
|
|
{
|
|
"epoch": 6.991235059760956,
|
|
"grad_norm": 0.6225155358400506,
|
|
"learning_rate": 3.0901683577999606e-10,
|
|
"loss": 0.2528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.134708970785141,
|
|
"step": 4390,
|
|
"valid_targets_mean": 3669.4,
|
|
"valid_targets_min": 982
|
|
},
|
|
{
|
|
"epoch": 6.999203187250996,
|
|
"grad_norm": 0.5318998756184224,
|
|
"learning_rate": 2.522592380316269e-11,
|
|
"loss": 0.2521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13233494758605957,
|
|
"step": 4395,
|
|
"valid_targets_mean": 4332.0,
|
|
"valid_targets_min": 3516
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2810341417789459,
|
|
"step": 4396,
|
|
"total_flos": 2.4258565470369874e+18,
|
|
"train_loss": 0.30312070208478775,
|
|
"train_runtime": 54647.127,
|
|
"train_samples_per_second": 1.285,
|
|
"train_steps_per_second": 0.08,
|
|
"valid_targets_mean": 4356.1,
|
|
"valid_targets_min": 3101
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 4396,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 1500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 2.4258565470369874e+18,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|