9607 lines
267 KiB
JSON
9607 lines
267 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 4347,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.008051529790660225,
|
|
"grad_norm": 12.6211720137527,
|
|
"learning_rate": 3.6781609195402303e-07,
|
|
"loss": 0.8023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8141472339630127,
|
|
"step": 5,
|
|
"valid_targets_mean": 6454.3,
|
|
"valid_targets_min": 2941
|
|
},
|
|
{
|
|
"epoch": 0.01610305958132045,
|
|
"grad_norm": 13.535375108314145,
|
|
"learning_rate": 8.275862068965518e-07,
|
|
"loss": 0.779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7463928461074829,
|
|
"step": 10,
|
|
"valid_targets_mean": 6041.3,
|
|
"valid_targets_min": 3050
|
|
},
|
|
{
|
|
"epoch": 0.024154589371980676,
|
|
"grad_norm": 11.267948292741378,
|
|
"learning_rate": 1.2873563218390806e-06,
|
|
"loss": 0.7929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7566848993301392,
|
|
"step": 15,
|
|
"valid_targets_mean": 6337.3,
|
|
"valid_targets_min": 3318
|
|
},
|
|
{
|
|
"epoch": 0.0322061191626409,
|
|
"grad_norm": 9.384853818135273,
|
|
"learning_rate": 1.7471264367816093e-06,
|
|
"loss": 0.7322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7497715353965759,
|
|
"step": 20,
|
|
"valid_targets_mean": 6536.2,
|
|
"valid_targets_min": 3943
|
|
},
|
|
{
|
|
"epoch": 0.040257648953301126,
|
|
"grad_norm": 6.508928535304328,
|
|
"learning_rate": 2.206896551724138e-06,
|
|
"loss": 0.6848,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6616116762161255,
|
|
"step": 25,
|
|
"valid_targets_mean": 6144.0,
|
|
"valid_targets_min": 1873
|
|
},
|
|
{
|
|
"epoch": 0.04830917874396135,
|
|
"grad_norm": 4.939706725980711,
|
|
"learning_rate": 2.666666666666667e-06,
|
|
"loss": 0.6527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6172756552696228,
|
|
"step": 30,
|
|
"valid_targets_mean": 5658.6,
|
|
"valid_targets_min": 3195
|
|
},
|
|
{
|
|
"epoch": 0.05636070853462158,
|
|
"grad_norm": 2.5910555463911815,
|
|
"learning_rate": 3.1264367816091956e-06,
|
|
"loss": 0.626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6274504065513611,
|
|
"step": 35,
|
|
"valid_targets_mean": 5992.8,
|
|
"valid_targets_min": 3207
|
|
},
|
|
{
|
|
"epoch": 0.0644122383252818,
|
|
"grad_norm": 1.7689592025893621,
|
|
"learning_rate": 3.5862068965517243e-06,
|
|
"loss": 0.576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6012336015701294,
|
|
"step": 40,
|
|
"valid_targets_mean": 5498.4,
|
|
"valid_targets_min": 2385
|
|
},
|
|
{
|
|
"epoch": 0.07246376811594203,
|
|
"grad_norm": 1.3230501227984852,
|
|
"learning_rate": 4.0459770114942535e-06,
|
|
"loss": 0.5463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5329886078834534,
|
|
"step": 45,
|
|
"valid_targets_mean": 6101.3,
|
|
"valid_targets_min": 2571
|
|
},
|
|
{
|
|
"epoch": 0.08051529790660225,
|
|
"grad_norm": 1.1549301298631403,
|
|
"learning_rate": 4.505747126436782e-06,
|
|
"loss": 0.5354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5498359203338623,
|
|
"step": 50,
|
|
"valid_targets_mean": 5591.6,
|
|
"valid_targets_min": 436
|
|
},
|
|
{
|
|
"epoch": 0.08856682769726248,
|
|
"grad_norm": 0.9148612268441723,
|
|
"learning_rate": 4.965517241379311e-06,
|
|
"loss": 0.5173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5667314529418945,
|
|
"step": 55,
|
|
"valid_targets_mean": 7175.1,
|
|
"valid_targets_min": 3309
|
|
},
|
|
{
|
|
"epoch": 0.0966183574879227,
|
|
"grad_norm": 0.9051707439274824,
|
|
"learning_rate": 5.42528735632184e-06,
|
|
"loss": 0.4999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49746763706207275,
|
|
"step": 60,
|
|
"valid_targets_mean": 5700.9,
|
|
"valid_targets_min": 1417
|
|
},
|
|
{
|
|
"epoch": 0.10466988727858294,
|
|
"grad_norm": 0.7105966724914392,
|
|
"learning_rate": 5.8850574712643685e-06,
|
|
"loss": 0.4905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49637746810913086,
|
|
"step": 65,
|
|
"valid_targets_mean": 6462.1,
|
|
"valid_targets_min": 3176
|
|
},
|
|
{
|
|
"epoch": 0.11272141706924316,
|
|
"grad_norm": 0.6112534858050221,
|
|
"learning_rate": 6.344827586206898e-06,
|
|
"loss": 0.483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44268643856048584,
|
|
"step": 70,
|
|
"valid_targets_mean": 6671.4,
|
|
"valid_targets_min": 2978
|
|
},
|
|
{
|
|
"epoch": 0.12077294685990338,
|
|
"grad_norm": 0.6428768043018936,
|
|
"learning_rate": 6.804597701149426e-06,
|
|
"loss": 0.4662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4915180206298828,
|
|
"step": 75,
|
|
"valid_targets_mean": 5441.8,
|
|
"valid_targets_min": 2285
|
|
},
|
|
{
|
|
"epoch": 0.1288244766505636,
|
|
"grad_norm": 0.9646038113879557,
|
|
"learning_rate": 7.264367816091955e-06,
|
|
"loss": 0.4314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43008822202682495,
|
|
"step": 80,
|
|
"valid_targets_mean": 5118.4,
|
|
"valid_targets_min": 2205
|
|
},
|
|
{
|
|
"epoch": 0.13687600644122383,
|
|
"grad_norm": 1.4574457159646115,
|
|
"learning_rate": 7.724137931034483e-06,
|
|
"loss": 0.615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6628851890563965,
|
|
"step": 85,
|
|
"valid_targets_mean": 2431.5,
|
|
"valid_targets_min": 1052
|
|
},
|
|
{
|
|
"epoch": 0.14492753623188406,
|
|
"grad_norm": 1.041883232166695,
|
|
"learning_rate": 8.183908045977013e-06,
|
|
"loss": 0.701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6815399527549744,
|
|
"step": 90,
|
|
"valid_targets_mean": 3045.8,
|
|
"valid_targets_min": 1348
|
|
},
|
|
{
|
|
"epoch": 0.1529790660225443,
|
|
"grad_norm": 0.8494250273002256,
|
|
"learning_rate": 8.643678160919541e-06,
|
|
"loss": 0.6405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6369081735610962,
|
|
"step": 95,
|
|
"valid_targets_mean": 3644.5,
|
|
"valid_targets_min": 1127
|
|
},
|
|
{
|
|
"epoch": 0.1610305958132045,
|
|
"grad_norm": 0.8637751062370636,
|
|
"learning_rate": 9.10344827586207e-06,
|
|
"loss": 0.6437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6281415224075317,
|
|
"step": 100,
|
|
"valid_targets_mean": 2985.6,
|
|
"valid_targets_min": 1353
|
|
},
|
|
{
|
|
"epoch": 0.16908212560386474,
|
|
"grad_norm": 0.7659577551413557,
|
|
"learning_rate": 9.563218390804598e-06,
|
|
"loss": 0.5845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5499722957611084,
|
|
"step": 105,
|
|
"valid_targets_mean": 4235.5,
|
|
"valid_targets_min": 1153
|
|
},
|
|
{
|
|
"epoch": 0.17713365539452497,
|
|
"grad_norm": 0.8658771361706771,
|
|
"learning_rate": 1.0022988505747126e-05,
|
|
"loss": 0.6017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6165111660957336,
|
|
"step": 110,
|
|
"valid_targets_mean": 3142.8,
|
|
"valid_targets_min": 1555
|
|
},
|
|
{
|
|
"epoch": 0.18518518518518517,
|
|
"grad_norm": 0.7434733707474204,
|
|
"learning_rate": 1.0482758620689658e-05,
|
|
"loss": 0.5981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5669258832931519,
|
|
"step": 115,
|
|
"valid_targets_mean": 3279.6,
|
|
"valid_targets_min": 1283
|
|
},
|
|
{
|
|
"epoch": 0.1932367149758454,
|
|
"grad_norm": 0.8722216577271487,
|
|
"learning_rate": 1.0942528735632186e-05,
|
|
"loss": 0.6172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6591456532478333,
|
|
"step": 120,
|
|
"valid_targets_mean": 2693.6,
|
|
"valid_targets_min": 1497
|
|
},
|
|
{
|
|
"epoch": 0.20128824476650564,
|
|
"grad_norm": 0.8310182578313449,
|
|
"learning_rate": 1.1402298850574713e-05,
|
|
"loss": 0.5898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5488319396972656,
|
|
"step": 125,
|
|
"valid_targets_mean": 2785.3,
|
|
"valid_targets_min": 1343
|
|
},
|
|
{
|
|
"epoch": 0.20933977455716588,
|
|
"grad_norm": 0.9235409692277453,
|
|
"learning_rate": 1.1862068965517241e-05,
|
|
"loss": 0.6038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6315065622329712,
|
|
"step": 130,
|
|
"valid_targets_mean": 2680.9,
|
|
"valid_targets_min": 1319
|
|
},
|
|
{
|
|
"epoch": 0.21739130434782608,
|
|
"grad_norm": 0.7693874973200193,
|
|
"learning_rate": 1.2321839080459773e-05,
|
|
"loss": 0.5719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5175089836120605,
|
|
"step": 135,
|
|
"valid_targets_mean": 3356.8,
|
|
"valid_targets_min": 1216
|
|
},
|
|
{
|
|
"epoch": 0.22544283413848631,
|
|
"grad_norm": 0.926054709365038,
|
|
"learning_rate": 1.2781609195402301e-05,
|
|
"loss": 0.5767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.59300696849823,
|
|
"step": 140,
|
|
"valid_targets_mean": 2648.8,
|
|
"valid_targets_min": 1013
|
|
},
|
|
{
|
|
"epoch": 0.23349436392914655,
|
|
"grad_norm": 0.7436423550718153,
|
|
"learning_rate": 1.324137931034483e-05,
|
|
"loss": 0.5838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5333613157272339,
|
|
"step": 145,
|
|
"valid_targets_mean": 3610.2,
|
|
"valid_targets_min": 1346
|
|
},
|
|
{
|
|
"epoch": 0.24154589371980675,
|
|
"grad_norm": 0.8771301167213914,
|
|
"learning_rate": 1.3701149425287356e-05,
|
|
"loss": 0.5807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5605872869491577,
|
|
"step": 150,
|
|
"valid_targets_mean": 2815.4,
|
|
"valid_targets_min": 1085
|
|
},
|
|
{
|
|
"epoch": 0.249597423510467,
|
|
"grad_norm": 0.7265522281955346,
|
|
"learning_rate": 1.4160919540229888e-05,
|
|
"loss": 0.5597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5334085822105408,
|
|
"step": 155,
|
|
"valid_targets_mean": 3536.7,
|
|
"valid_targets_min": 1598
|
|
},
|
|
{
|
|
"epoch": 0.2576489533011272,
|
|
"grad_norm": 0.8637169240692523,
|
|
"learning_rate": 1.4620689655172416e-05,
|
|
"loss": 0.5512,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5399806499481201,
|
|
"step": 160,
|
|
"valid_targets_mean": 2561.4,
|
|
"valid_targets_min": 1339
|
|
},
|
|
{
|
|
"epoch": 0.26570048309178745,
|
|
"grad_norm": 0.7684987940501175,
|
|
"learning_rate": 1.5080459770114944e-05,
|
|
"loss": 0.5561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5099571943283081,
|
|
"step": 165,
|
|
"valid_targets_mean": 2889.4,
|
|
"valid_targets_min": 1076
|
|
},
|
|
{
|
|
"epoch": 0.27375201288244766,
|
|
"grad_norm": 1.2443263629609338,
|
|
"learning_rate": 1.5540229885057473e-05,
|
|
"loss": 0.5758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5990105271339417,
|
|
"step": 170,
|
|
"valid_targets_mean": 2557.4,
|
|
"valid_targets_min": 1094
|
|
},
|
|
{
|
|
"epoch": 0.28180354267310787,
|
|
"grad_norm": 0.7025661086648686,
|
|
"learning_rate": 1.6000000000000003e-05,
|
|
"loss": 0.5516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5607724785804749,
|
|
"step": 175,
|
|
"valid_targets_mean": 3530.8,
|
|
"valid_targets_min": 1290
|
|
},
|
|
{
|
|
"epoch": 0.2898550724637681,
|
|
"grad_norm": 0.9299035150917279,
|
|
"learning_rate": 1.645977011494253e-05,
|
|
"loss": 0.5504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5684704184532166,
|
|
"step": 180,
|
|
"valid_targets_mean": 2579.7,
|
|
"valid_targets_min": 1229
|
|
},
|
|
{
|
|
"epoch": 0.29790660225442833,
|
|
"grad_norm": 0.9029683337889093,
|
|
"learning_rate": 1.691954022988506e-05,
|
|
"loss": 0.5549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5909307599067688,
|
|
"step": 185,
|
|
"valid_targets_mean": 3024.5,
|
|
"valid_targets_min": 1126
|
|
},
|
|
{
|
|
"epoch": 0.3059581320450886,
|
|
"grad_norm": 0.7196257441971349,
|
|
"learning_rate": 1.7379310344827586e-05,
|
|
"loss": 0.533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5063704252243042,
|
|
"step": 190,
|
|
"valid_targets_mean": 3523.4,
|
|
"valid_targets_min": 1277
|
|
},
|
|
{
|
|
"epoch": 0.3140096618357488,
|
|
"grad_norm": 0.7255462834778044,
|
|
"learning_rate": 1.7839080459770116e-05,
|
|
"loss": 0.5285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5233041644096375,
|
|
"step": 195,
|
|
"valid_targets_mean": 3317.8,
|
|
"valid_targets_min": 1389
|
|
},
|
|
{
|
|
"epoch": 0.322061191626409,
|
|
"grad_norm": 0.8908272457952328,
|
|
"learning_rate": 1.8298850574712646e-05,
|
|
"loss": 0.566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5723013877868652,
|
|
"step": 200,
|
|
"valid_targets_mean": 2728.8,
|
|
"valid_targets_min": 1062
|
|
},
|
|
{
|
|
"epoch": 0.33011272141706927,
|
|
"grad_norm": 0.7237478954756121,
|
|
"learning_rate": 1.8758620689655173e-05,
|
|
"loss": 0.5403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.488622784614563,
|
|
"step": 205,
|
|
"valid_targets_mean": 3474.4,
|
|
"valid_targets_min": 1520
|
|
},
|
|
{
|
|
"epoch": 0.33816425120772947,
|
|
"grad_norm": 0.9731721468625133,
|
|
"learning_rate": 1.9218390804597703e-05,
|
|
"loss": 0.5479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5853530168533325,
|
|
"step": 210,
|
|
"valid_targets_mean": 2857.8,
|
|
"valid_targets_min": 1154
|
|
},
|
|
{
|
|
"epoch": 0.3462157809983897,
|
|
"grad_norm": 0.8620329842750755,
|
|
"learning_rate": 1.9678160919540233e-05,
|
|
"loss": 0.5582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5213043689727783,
|
|
"step": 215,
|
|
"valid_targets_mean": 2419.3,
|
|
"valid_targets_min": 1115
|
|
},
|
|
{
|
|
"epoch": 0.35426731078904994,
|
|
"grad_norm": 0.9094077963820549,
|
|
"learning_rate": 2.013793103448276e-05,
|
|
"loss": 0.5365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5487880706787109,
|
|
"step": 220,
|
|
"valid_targets_mean": 2817.8,
|
|
"valid_targets_min": 1383
|
|
},
|
|
{
|
|
"epoch": 0.36231884057971014,
|
|
"grad_norm": 0.7944146685277702,
|
|
"learning_rate": 2.059770114942529e-05,
|
|
"loss": 0.5477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5611993074417114,
|
|
"step": 225,
|
|
"valid_targets_mean": 2906.6,
|
|
"valid_targets_min": 1414
|
|
},
|
|
{
|
|
"epoch": 0.37037037037037035,
|
|
"grad_norm": 0.8404791965077937,
|
|
"learning_rate": 2.1057471264367816e-05,
|
|
"loss": 0.5326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5319218635559082,
|
|
"step": 230,
|
|
"valid_targets_mean": 2699.4,
|
|
"valid_targets_min": 1140
|
|
},
|
|
{
|
|
"epoch": 0.3784219001610306,
|
|
"grad_norm": 0.7398954624086636,
|
|
"learning_rate": 2.1517241379310346e-05,
|
|
"loss": 0.5158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5021366477012634,
|
|
"step": 235,
|
|
"valid_targets_mean": 3474.1,
|
|
"valid_targets_min": 1060
|
|
},
|
|
{
|
|
"epoch": 0.3864734299516908,
|
|
"grad_norm": 0.6565950720543745,
|
|
"learning_rate": 2.1977011494252873e-05,
|
|
"loss": 0.516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4567714333534241,
|
|
"step": 240,
|
|
"valid_targets_mean": 4942.6,
|
|
"valid_targets_min": 1358
|
|
},
|
|
{
|
|
"epoch": 0.394524959742351,
|
|
"grad_norm": 0.7843837006160241,
|
|
"learning_rate": 2.2436781609195406e-05,
|
|
"loss": 0.5148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.525688886642456,
|
|
"step": 245,
|
|
"valid_targets_mean": 3203.9,
|
|
"valid_targets_min": 1109
|
|
},
|
|
{
|
|
"epoch": 0.4025764895330113,
|
|
"grad_norm": 0.786413171985749,
|
|
"learning_rate": 2.2896551724137933e-05,
|
|
"loss": 0.4953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5001181364059448,
|
|
"step": 250,
|
|
"valid_targets_mean": 2741.4,
|
|
"valid_targets_min": 990
|
|
},
|
|
{
|
|
"epoch": 0.4106280193236715,
|
|
"grad_norm": 0.9340749614199906,
|
|
"learning_rate": 2.3356321839080463e-05,
|
|
"loss": 0.5043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5208650827407837,
|
|
"step": 255,
|
|
"valid_targets_mean": 2505.7,
|
|
"valid_targets_min": 1217
|
|
},
|
|
{
|
|
"epoch": 0.41867954911433175,
|
|
"grad_norm": 0.7974854363515583,
|
|
"learning_rate": 2.381609195402299e-05,
|
|
"loss": 0.4952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4734256863594055,
|
|
"step": 260,
|
|
"valid_targets_mean": 2908.2,
|
|
"valid_targets_min": 977
|
|
},
|
|
{
|
|
"epoch": 0.42673107890499196,
|
|
"grad_norm": 0.7083832787574131,
|
|
"learning_rate": 2.427586206896552e-05,
|
|
"loss": 0.4563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4569528102874756,
|
|
"step": 265,
|
|
"valid_targets_mean": 3492.0,
|
|
"valid_targets_min": 1549
|
|
},
|
|
{
|
|
"epoch": 0.43478260869565216,
|
|
"grad_norm": 0.8471307752430234,
|
|
"learning_rate": 2.4735632183908046e-05,
|
|
"loss": 0.4805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4980763792991638,
|
|
"step": 270,
|
|
"valid_targets_mean": 2432.0,
|
|
"valid_targets_min": 838
|
|
},
|
|
{
|
|
"epoch": 0.4428341384863124,
|
|
"grad_norm": 0.7581722015778103,
|
|
"learning_rate": 2.5195402298850576e-05,
|
|
"loss": 0.4698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4556068778038025,
|
|
"step": 275,
|
|
"valid_targets_mean": 3069.9,
|
|
"valid_targets_min": 269
|
|
},
|
|
{
|
|
"epoch": 0.45088566827697263,
|
|
"grad_norm": 0.7647100815494069,
|
|
"learning_rate": 2.5655172413793103e-05,
|
|
"loss": 0.5029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5297585725784302,
|
|
"step": 280,
|
|
"valid_targets_mean": 3644.1,
|
|
"valid_targets_min": 1308
|
|
},
|
|
{
|
|
"epoch": 0.45893719806763283,
|
|
"grad_norm": 0.6888367368546122,
|
|
"learning_rate": 2.6114942528735636e-05,
|
|
"loss": 0.4635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4325258135795593,
|
|
"step": 285,
|
|
"valid_targets_mean": 3538.7,
|
|
"valid_targets_min": 865
|
|
},
|
|
{
|
|
"epoch": 0.4669887278582931,
|
|
"grad_norm": 0.7918334936431798,
|
|
"learning_rate": 2.6574712643678166e-05,
|
|
"loss": 0.4829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48796749114990234,
|
|
"step": 290,
|
|
"valid_targets_mean": 2789.2,
|
|
"valid_targets_min": 1084
|
|
},
|
|
{
|
|
"epoch": 0.4750402576489533,
|
|
"grad_norm": 0.8297680238669458,
|
|
"learning_rate": 2.7034482758620693e-05,
|
|
"loss": 0.5134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47267746925354004,
|
|
"step": 295,
|
|
"valid_targets_mean": 2663.7,
|
|
"valid_targets_min": 1007
|
|
},
|
|
{
|
|
"epoch": 0.4830917874396135,
|
|
"grad_norm": 0.6633379830429746,
|
|
"learning_rate": 2.749425287356322e-05,
|
|
"loss": 0.4589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49964243173599243,
|
|
"step": 300,
|
|
"valid_targets_mean": 3865.0,
|
|
"valid_targets_min": 556
|
|
},
|
|
{
|
|
"epoch": 0.49114331723027377,
|
|
"grad_norm": 0.6267750181705374,
|
|
"learning_rate": 2.795402298850575e-05,
|
|
"loss": 0.4686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44949406385421753,
|
|
"step": 305,
|
|
"valid_targets_mean": 4117.5,
|
|
"valid_targets_min": 1094
|
|
},
|
|
{
|
|
"epoch": 0.499194847020934,
|
|
"grad_norm": 0.705425101438482,
|
|
"learning_rate": 2.8413793103448276e-05,
|
|
"loss": 0.4443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4142606258392334,
|
|
"step": 310,
|
|
"valid_targets_mean": 3565.3,
|
|
"valid_targets_min": 333
|
|
},
|
|
{
|
|
"epoch": 0.5072463768115942,
|
|
"grad_norm": 0.6719556250206123,
|
|
"learning_rate": 2.8873563218390806e-05,
|
|
"loss": 0.4748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46530526876449585,
|
|
"step": 315,
|
|
"valid_targets_mean": 3507.9,
|
|
"valid_targets_min": 1135
|
|
},
|
|
{
|
|
"epoch": 0.5152979066022544,
|
|
"grad_norm": 0.7129427212893642,
|
|
"learning_rate": 2.9333333333333333e-05,
|
|
"loss": 0.4742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4753148555755615,
|
|
"step": 320,
|
|
"valid_targets_mean": 3822.9,
|
|
"valid_targets_min": 428
|
|
},
|
|
{
|
|
"epoch": 0.5233494363929146,
|
|
"grad_norm": 0.7453050849315828,
|
|
"learning_rate": 2.9793103448275866e-05,
|
|
"loss": 0.4602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4605864882469177,
|
|
"step": 325,
|
|
"valid_targets_mean": 2932.9,
|
|
"valid_targets_min": 1398
|
|
},
|
|
{
|
|
"epoch": 0.5314009661835749,
|
|
"grad_norm": 0.6416359264690352,
|
|
"learning_rate": 3.0252873563218396e-05,
|
|
"loss": 0.4427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3964742124080658,
|
|
"step": 330,
|
|
"valid_targets_mean": 3473.2,
|
|
"valid_targets_min": 1265
|
|
},
|
|
{
|
|
"epoch": 0.5394524959742351,
|
|
"grad_norm": 0.8091118683339827,
|
|
"learning_rate": 3.071264367816092e-05,
|
|
"loss": 0.4865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5222635269165039,
|
|
"step": 335,
|
|
"valid_targets_mean": 2707.5,
|
|
"valid_targets_min": 1453
|
|
},
|
|
{
|
|
"epoch": 0.5475040257648953,
|
|
"grad_norm": 0.6475514263706704,
|
|
"learning_rate": 3.117241379310345e-05,
|
|
"loss": 0.468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4403412640094757,
|
|
"step": 340,
|
|
"valid_targets_mean": 3770.1,
|
|
"valid_targets_min": 1554
|
|
},
|
|
{
|
|
"epoch": 0.5555555555555556,
|
|
"grad_norm": 0.6905557475577746,
|
|
"learning_rate": 3.1632183908045976e-05,
|
|
"loss": 0.4429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42495399713516235,
|
|
"step": 345,
|
|
"valid_targets_mean": 3687.9,
|
|
"valid_targets_min": 1423
|
|
},
|
|
{
|
|
"epoch": 0.5636070853462157,
|
|
"grad_norm": 0.7104740968387137,
|
|
"learning_rate": 3.2091954022988506e-05,
|
|
"loss": 0.455,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43538206815719604,
|
|
"step": 350,
|
|
"valid_targets_mean": 3580.0,
|
|
"valid_targets_min": 1778
|
|
},
|
|
{
|
|
"epoch": 0.571658615136876,
|
|
"grad_norm": 0.7499873582780294,
|
|
"learning_rate": 3.2551724137931036e-05,
|
|
"loss": 0.471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42534762620925903,
|
|
"step": 355,
|
|
"valid_targets_mean": 3154.2,
|
|
"valid_targets_min": 1070
|
|
},
|
|
{
|
|
"epoch": 0.5797101449275363,
|
|
"grad_norm": 0.6642445186064259,
|
|
"learning_rate": 3.3011494252873566e-05,
|
|
"loss": 0.4456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45597341656684875,
|
|
"step": 360,
|
|
"valid_targets_mean": 3442.1,
|
|
"valid_targets_min": 1222
|
|
},
|
|
{
|
|
"epoch": 0.5877616747181964,
|
|
"grad_norm": 0.9240276056077943,
|
|
"learning_rate": 3.3471264367816096e-05,
|
|
"loss": 0.4582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46805498003959656,
|
|
"step": 365,
|
|
"valid_targets_mean": 2991.2,
|
|
"valid_targets_min": 1344
|
|
},
|
|
{
|
|
"epoch": 0.5958132045088567,
|
|
"grad_norm": 0.7060945620088765,
|
|
"learning_rate": 3.3931034482758626e-05,
|
|
"loss": 0.4711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42430639266967773,
|
|
"step": 370,
|
|
"valid_targets_mean": 3584.1,
|
|
"valid_targets_min": 1080
|
|
},
|
|
{
|
|
"epoch": 0.6038647342995169,
|
|
"grad_norm": 0.6951756292278192,
|
|
"learning_rate": 3.4390804597701156e-05,
|
|
"loss": 0.435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4317897856235504,
|
|
"step": 375,
|
|
"valid_targets_mean": 3283.6,
|
|
"valid_targets_min": 1136
|
|
},
|
|
{
|
|
"epoch": 0.6119162640901772,
|
|
"grad_norm": 0.6929947538123722,
|
|
"learning_rate": 3.485057471264368e-05,
|
|
"loss": 0.4654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44339442253112793,
|
|
"step": 380,
|
|
"valid_targets_mean": 3267.1,
|
|
"valid_targets_min": 1469
|
|
},
|
|
{
|
|
"epoch": 0.6199677938808373,
|
|
"grad_norm": 0.59684922350695,
|
|
"learning_rate": 3.531034482758621e-05,
|
|
"loss": 0.4385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38249504566192627,
|
|
"step": 385,
|
|
"valid_targets_mean": 4220.8,
|
|
"valid_targets_min": 1141
|
|
},
|
|
{
|
|
"epoch": 0.6280193236714976,
|
|
"grad_norm": 0.6361797000665275,
|
|
"learning_rate": 3.577011494252874e-05,
|
|
"loss": 0.4499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42138925194740295,
|
|
"step": 390,
|
|
"valid_targets_mean": 3403.3,
|
|
"valid_targets_min": 1315
|
|
},
|
|
{
|
|
"epoch": 0.6360708534621579,
|
|
"grad_norm": 0.7553402257839422,
|
|
"learning_rate": 3.622988505747126e-05,
|
|
"loss": 0.4251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.415188193321228,
|
|
"step": 395,
|
|
"valid_targets_mean": 3994.0,
|
|
"valid_targets_min": 1243
|
|
},
|
|
{
|
|
"epoch": 0.644122383252818,
|
|
"grad_norm": 0.7840399240091517,
|
|
"learning_rate": 3.668965517241379e-05,
|
|
"loss": 0.4666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4885357618331909,
|
|
"step": 400,
|
|
"valid_targets_mean": 2728.6,
|
|
"valid_targets_min": 1205
|
|
},
|
|
{
|
|
"epoch": 0.6521739130434783,
|
|
"grad_norm": 0.7737252334457005,
|
|
"learning_rate": 3.714942528735633e-05,
|
|
"loss": 0.4453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4366183876991272,
|
|
"step": 405,
|
|
"valid_targets_mean": 2805.7,
|
|
"valid_targets_min": 1316
|
|
},
|
|
{
|
|
"epoch": 0.6602254428341385,
|
|
"grad_norm": 0.8020067156942912,
|
|
"learning_rate": 3.760919540229885e-05,
|
|
"loss": 0.4458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4633807837963104,
|
|
"step": 410,
|
|
"valid_targets_mean": 3165.5,
|
|
"valid_targets_min": 1339
|
|
},
|
|
{
|
|
"epoch": 0.6682769726247987,
|
|
"grad_norm": 0.7674549160676873,
|
|
"learning_rate": 3.806896551724138e-05,
|
|
"loss": 0.4431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4483141601085663,
|
|
"step": 415,
|
|
"valid_targets_mean": 2954.1,
|
|
"valid_targets_min": 1158
|
|
},
|
|
{
|
|
"epoch": 0.6763285024154589,
|
|
"grad_norm": 0.6203648387560737,
|
|
"learning_rate": 3.852873563218391e-05,
|
|
"loss": 0.4338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4143407642841339,
|
|
"step": 420,
|
|
"valid_targets_mean": 4536.0,
|
|
"valid_targets_min": 1647
|
|
},
|
|
{
|
|
"epoch": 0.6843800322061192,
|
|
"grad_norm": 0.6999255524103093,
|
|
"learning_rate": 3.898850574712644e-05,
|
|
"loss": 0.4453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41560059785842896,
|
|
"step": 425,
|
|
"valid_targets_mean": 3440.9,
|
|
"valid_targets_min": 1237
|
|
},
|
|
{
|
|
"epoch": 0.6924315619967794,
|
|
"grad_norm": 0.7359356732077772,
|
|
"learning_rate": 3.9448275862068966e-05,
|
|
"loss": 0.4852,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4889700412750244,
|
|
"step": 430,
|
|
"valid_targets_mean": 3381.9,
|
|
"valid_targets_min": 1538
|
|
},
|
|
{
|
|
"epoch": 0.7004830917874396,
|
|
"grad_norm": 0.6434530063256044,
|
|
"learning_rate": 3.9908045977011496e-05,
|
|
"loss": 0.438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43669044971466064,
|
|
"step": 435,
|
|
"valid_targets_mean": 4124.1,
|
|
"valid_targets_min": 1524
|
|
},
|
|
{
|
|
"epoch": 0.7085346215780999,
|
|
"grad_norm": 0.7259157801334376,
|
|
"learning_rate": 3.9999896813789735e-05,
|
|
"loss": 0.4362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46412330865859985,
|
|
"step": 440,
|
|
"valid_targets_mean": 3666.5,
|
|
"valid_targets_min": 1144
|
|
},
|
|
{
|
|
"epoch": 0.71658615136876,
|
|
"grad_norm": 0.6920837839026965,
|
|
"learning_rate": 3.999947762163533e-05,
|
|
"loss": 0.4364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4421723783016205,
|
|
"step": 445,
|
|
"valid_targets_mean": 3813.6,
|
|
"valid_targets_min": 1150
|
|
},
|
|
{
|
|
"epoch": 0.7246376811594203,
|
|
"grad_norm": 0.8229582070019114,
|
|
"learning_rate": 3.999873598115203e-05,
|
|
"loss": 0.4244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4543113112449646,
|
|
"step": 450,
|
|
"valid_targets_mean": 3184.7,
|
|
"valid_targets_min": 1064
|
|
},
|
|
{
|
|
"epoch": 0.7326892109500805,
|
|
"grad_norm": 0.6618965870642886,
|
|
"learning_rate": 3.999767190429718e-05,
|
|
"loss": 0.4354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4174274504184723,
|
|
"step": 455,
|
|
"valid_targets_mean": 3734.6,
|
|
"valid_targets_min": 1689
|
|
},
|
|
{
|
|
"epoch": 0.7407407407407407,
|
|
"grad_norm": 0.7350636189187817,
|
|
"learning_rate": 3.99962854082267e-05,
|
|
"loss": 0.4358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45041102170944214,
|
|
"step": 460,
|
|
"valid_targets_mean": 3706.6,
|
|
"valid_targets_min": 1537
|
|
},
|
|
{
|
|
"epoch": 0.748792270531401,
|
|
"grad_norm": 0.6527924396728313,
|
|
"learning_rate": 3.9994576515294864e-05,
|
|
"loss": 0.4562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44863760471343994,
|
|
"step": 465,
|
|
"valid_targets_mean": 3998.4,
|
|
"valid_targets_min": 1261
|
|
},
|
|
{
|
|
"epoch": 0.7568438003220612,
|
|
"grad_norm": 0.7162099025470566,
|
|
"learning_rate": 3.999254525305386e-05,
|
|
"loss": 0.3562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30241087079048157,
|
|
"step": 470,
|
|
"valid_targets_mean": 6379.5,
|
|
"valid_targets_min": 3100
|
|
},
|
|
{
|
|
"epoch": 0.7648953301127214,
|
|
"grad_norm": 0.5518921640398805,
|
|
"learning_rate": 3.999019165425341e-05,
|
|
"loss": 0.2954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.271371066570282,
|
|
"step": 475,
|
|
"valid_targets_mean": 5043.8,
|
|
"valid_targets_min": 2708
|
|
},
|
|
{
|
|
"epoch": 0.7729468599033816,
|
|
"grad_norm": 0.5572299520035154,
|
|
"learning_rate": 3.99875157568402e-05,
|
|
"loss": 0.2732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2725101113319397,
|
|
"step": 480,
|
|
"valid_targets_mean": 5717.3,
|
|
"valid_targets_min": 3191
|
|
},
|
|
{
|
|
"epoch": 0.7809983896940419,
|
|
"grad_norm": 0.6593727425698053,
|
|
"learning_rate": 3.998451760395729e-05,
|
|
"loss": 0.3122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33532363176345825,
|
|
"step": 485,
|
|
"valid_targets_mean": 6198.8,
|
|
"valid_targets_min": 2599
|
|
},
|
|
{
|
|
"epoch": 0.789049919484702,
|
|
"grad_norm": 0.4904371382366237,
|
|
"learning_rate": 3.99811972439434e-05,
|
|
"loss": 0.2597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27875158190727234,
|
|
"step": 490,
|
|
"valid_targets_mean": 5880.9,
|
|
"valid_targets_min": 3427
|
|
},
|
|
{
|
|
"epoch": 0.7971014492753623,
|
|
"grad_norm": 0.5384641711313225,
|
|
"learning_rate": 3.997755473033218e-05,
|
|
"loss": 0.2606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27927276492118835,
|
|
"step": 495,
|
|
"valid_targets_mean": 5429.1,
|
|
"valid_targets_min": 3331
|
|
},
|
|
{
|
|
"epoch": 0.8051529790660226,
|
|
"grad_norm": 0.4872366354078386,
|
|
"learning_rate": 3.997359012185127e-05,
|
|
"loss": 0.2644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2796809673309326,
|
|
"step": 500,
|
|
"valid_targets_mean": 5318.9,
|
|
"valid_targets_min": 3176
|
|
},
|
|
{
|
|
"epoch": 0.8132045088566827,
|
|
"grad_norm": 0.5513399943173102,
|
|
"learning_rate": 3.996930348242141e-05,
|
|
"loss": 0.3046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34563466906547546,
|
|
"step": 505,
|
|
"valid_targets_mean": 6334.5,
|
|
"valid_targets_min": 3200
|
|
},
|
|
{
|
|
"epoch": 0.821256038647343,
|
|
"grad_norm": 0.5178713450708176,
|
|
"learning_rate": 3.996469488115539e-05,
|
|
"loss": 0.2697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2517154812812805,
|
|
"step": 510,
|
|
"valid_targets_mean": 5575.2,
|
|
"valid_targets_min": 2982
|
|
},
|
|
{
|
|
"epoch": 0.8293075684380032,
|
|
"grad_norm": 0.5354106021483718,
|
|
"learning_rate": 3.995976439235694e-05,
|
|
"loss": 0.2742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2508094310760498,
|
|
"step": 515,
|
|
"valid_targets_mean": 5045.2,
|
|
"valid_targets_min": 2662
|
|
},
|
|
{
|
|
"epoch": 0.8373590982286635,
|
|
"grad_norm": 0.5043478969730439,
|
|
"learning_rate": 3.995451209551953e-05,
|
|
"loss": 0.2601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23723244667053223,
|
|
"step": 520,
|
|
"valid_targets_mean": 5929.9,
|
|
"valid_targets_min": 3640
|
|
},
|
|
{
|
|
"epoch": 0.8454106280193237,
|
|
"grad_norm": 0.4644833157640728,
|
|
"learning_rate": 3.994893807532509e-05,
|
|
"loss": 0.2754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2522507905960083,
|
|
"step": 525,
|
|
"valid_targets_mean": 5560.6,
|
|
"valid_targets_min": 3146
|
|
},
|
|
{
|
|
"epoch": 0.8534621578099839,
|
|
"grad_norm": 0.5528859754209329,
|
|
"learning_rate": 3.994304242164265e-05,
|
|
"loss": 0.2484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2732771635055542,
|
|
"step": 530,
|
|
"valid_targets_mean": 5438.9,
|
|
"valid_targets_min": 3095
|
|
},
|
|
{
|
|
"epoch": 0.8615136876006442,
|
|
"grad_norm": 0.475067141876231,
|
|
"learning_rate": 3.9936825229526855e-05,
|
|
"loss": 0.2612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2705361843109131,
|
|
"step": 535,
|
|
"valid_targets_mean": 6153.6,
|
|
"valid_targets_min": 3325
|
|
},
|
|
{
|
|
"epoch": 0.8695652173913043,
|
|
"grad_norm": 0.4480740353625752,
|
|
"learning_rate": 3.9930286599216506e-05,
|
|
"loss": 0.2628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25481677055358887,
|
|
"step": 540,
|
|
"valid_targets_mean": 6176.7,
|
|
"valid_targets_min": 3690
|
|
},
|
|
{
|
|
"epoch": 0.8776167471819646,
|
|
"grad_norm": 0.5197397102755734,
|
|
"learning_rate": 3.9923426636132866e-05,
|
|
"loss": 0.2514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23781773447990417,
|
|
"step": 545,
|
|
"valid_targets_mean": 5253.8,
|
|
"valid_targets_min": 2926
|
|
},
|
|
{
|
|
"epoch": 0.8856682769726248,
|
|
"grad_norm": 0.4625888856613296,
|
|
"learning_rate": 3.991624545087801e-05,
|
|
"loss": 0.2515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22309911251068115,
|
|
"step": 550,
|
|
"valid_targets_mean": 6097.0,
|
|
"valid_targets_min": 3707
|
|
},
|
|
{
|
|
"epoch": 0.893719806763285,
|
|
"grad_norm": 0.4887112175957848,
|
|
"learning_rate": 3.9908743159233016e-05,
|
|
"loss": 0.254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25249046087265015,
|
|
"step": 555,
|
|
"valid_targets_mean": 5931.1,
|
|
"valid_targets_min": 3697
|
|
},
|
|
{
|
|
"epoch": 0.9017713365539453,
|
|
"grad_norm": 0.4459880666225944,
|
|
"learning_rate": 3.990091988215612e-05,
|
|
"loss": 0.2425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24061977863311768,
|
|
"step": 560,
|
|
"valid_targets_mean": 6370.6,
|
|
"valid_targets_min": 3665
|
|
},
|
|
{
|
|
"epoch": 0.9098228663446055,
|
|
"grad_norm": 0.5441022062942001,
|
|
"learning_rate": 3.989277574578074e-05,
|
|
"loss": 0.2596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26900017261505127,
|
|
"step": 565,
|
|
"valid_targets_mean": 6119.8,
|
|
"valid_targets_min": 2561
|
|
},
|
|
{
|
|
"epoch": 0.9178743961352657,
|
|
"grad_norm": 0.5238796115702521,
|
|
"learning_rate": 3.9884310881413473e-05,
|
|
"loss": 0.2428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24351592361927032,
|
|
"step": 570,
|
|
"valid_targets_mean": 5352.8,
|
|
"valid_targets_min": 3838
|
|
},
|
|
{
|
|
"epoch": 0.9259259259259259,
|
|
"grad_norm": 0.4638163386116075,
|
|
"learning_rate": 3.987552542553194e-05,
|
|
"loss": 0.2427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24837109446525574,
|
|
"step": 575,
|
|
"valid_targets_mean": 6064.1,
|
|
"valid_targets_min": 2800
|
|
},
|
|
{
|
|
"epoch": 0.9339774557165862,
|
|
"grad_norm": 0.4296037881388161,
|
|
"learning_rate": 3.9866419519782636e-05,
|
|
"loss": 0.2526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21541734039783478,
|
|
"step": 580,
|
|
"valid_targets_mean": 6725.1,
|
|
"valid_targets_min": 4014
|
|
},
|
|
{
|
|
"epoch": 0.9420289855072463,
|
|
"grad_norm": 0.5667563203419782,
|
|
"learning_rate": 3.985699331097858e-05,
|
|
"loss": 0.261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25082963705062866,
|
|
"step": 585,
|
|
"valid_targets_mean": 5340.4,
|
|
"valid_targets_min": 3261
|
|
},
|
|
{
|
|
"epoch": 0.9500805152979066,
|
|
"grad_norm": 0.44699582805642185,
|
|
"learning_rate": 3.984724695109702e-05,
|
|
"loss": 0.2436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24760377407073975,
|
|
"step": 590,
|
|
"valid_targets_mean": 6194.7,
|
|
"valid_targets_min": 3579
|
|
},
|
|
{
|
|
"epoch": 0.9581320450885669,
|
|
"grad_norm": 0.4523900544730103,
|
|
"learning_rate": 3.983718059727693e-05,
|
|
"loss": 0.2375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22146061062812805,
|
|
"step": 595,
|
|
"valid_targets_mean": 6527.8,
|
|
"valid_targets_min": 2379
|
|
},
|
|
{
|
|
"epoch": 0.966183574879227,
|
|
"grad_norm": 0.39826806360509687,
|
|
"learning_rate": 3.9826794411816495e-05,
|
|
"loss": 0.2484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23311802744865417,
|
|
"step": 600,
|
|
"valid_targets_mean": 6887.7,
|
|
"valid_targets_min": 3569
|
|
},
|
|
{
|
|
"epoch": 0.9742351046698873,
|
|
"grad_norm": 0.6229816324333505,
|
|
"learning_rate": 3.981608856217049e-05,
|
|
"loss": 0.2314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21930500864982605,
|
|
"step": 605,
|
|
"valid_targets_mean": 5398.8,
|
|
"valid_targets_min": 2580
|
|
},
|
|
{
|
|
"epoch": 0.9822866344605475,
|
|
"grad_norm": 0.4999823141609161,
|
|
"learning_rate": 3.980506322094761e-05,
|
|
"loss": 0.2553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2858148217201233,
|
|
"step": 610,
|
|
"valid_targets_mean": 6136.1,
|
|
"valid_targets_min": 2610
|
|
},
|
|
{
|
|
"epoch": 0.9903381642512077,
|
|
"grad_norm": 0.46445064808991693,
|
|
"learning_rate": 3.979371856590762e-05,
|
|
"loss": 0.2405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21342137455940247,
|
|
"step": 615,
|
|
"valid_targets_mean": 5957.4,
|
|
"valid_targets_min": 2562
|
|
},
|
|
{
|
|
"epoch": 0.998389694041868,
|
|
"grad_norm": 0.5766730357055111,
|
|
"learning_rate": 3.978205477995856e-05,
|
|
"loss": 0.2373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23376914858818054,
|
|
"step": 620,
|
|
"valid_targets_mean": 5860.3,
|
|
"valid_targets_min": 2833
|
|
},
|
|
{
|
|
"epoch": 1.0064412238325282,
|
|
"grad_norm": 0.5381872293931007,
|
|
"learning_rate": 3.9770072051153754e-05,
|
|
"loss": 0.3512,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38717740774154663,
|
|
"step": 625,
|
|
"valid_targets_mean": 5898.7,
|
|
"valid_targets_min": 2769
|
|
},
|
|
{
|
|
"epoch": 1.0144927536231885,
|
|
"grad_norm": 0.5254280438649158,
|
|
"learning_rate": 3.9757770572688786e-05,
|
|
"loss": 0.3512,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3415398895740509,
|
|
"step": 630,
|
|
"valid_targets_mean": 6068.6,
|
|
"valid_targets_min": 2285
|
|
},
|
|
{
|
|
"epoch": 1.0225442834138487,
|
|
"grad_norm": 0.48064107372040804,
|
|
"learning_rate": 3.9745150542898405e-05,
|
|
"loss": 0.3506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36373406648635864,
|
|
"step": 635,
|
|
"valid_targets_mean": 6519.8,
|
|
"valid_targets_min": 3314
|
|
},
|
|
{
|
|
"epoch": 1.0305958132045088,
|
|
"grad_norm": 0.4867942399417131,
|
|
"learning_rate": 3.97322121652533e-05,
|
|
"loss": 0.3243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33237212896347046,
|
|
"step": 640,
|
|
"valid_targets_mean": 6028.6,
|
|
"valid_targets_min": 2671
|
|
},
|
|
{
|
|
"epoch": 1.038647342995169,
|
|
"grad_norm": 0.5535759217279507,
|
|
"learning_rate": 3.971895564835683e-05,
|
|
"loss": 0.3405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36408573389053345,
|
|
"step": 645,
|
|
"valid_targets_mean": 6266.9,
|
|
"valid_targets_min": 469
|
|
},
|
|
{
|
|
"epoch": 1.0466988727858293,
|
|
"grad_norm": 0.5234567604492989,
|
|
"learning_rate": 3.970538120594166e-05,
|
|
"loss": 0.3275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30862703919410706,
|
|
"step": 650,
|
|
"valid_targets_mean": 5759.1,
|
|
"valid_targets_min": 2568
|
|
},
|
|
{
|
|
"epoch": 1.0547504025764896,
|
|
"grad_norm": 0.5104944272693731,
|
|
"learning_rate": 3.9691489056866324e-05,
|
|
"loss": 0.335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31290531158447266,
|
|
"step": 655,
|
|
"valid_targets_mean": 5530.2,
|
|
"valid_targets_min": 611
|
|
},
|
|
{
|
|
"epoch": 1.0628019323671498,
|
|
"grad_norm": 0.4962198988376796,
|
|
"learning_rate": 3.9677279425111684e-05,
|
|
"loss": 0.3172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.292328417301178,
|
|
"step": 660,
|
|
"valid_targets_mean": 5696.9,
|
|
"valid_targets_min": 2392
|
|
},
|
|
{
|
|
"epoch": 1.07085346215781,
|
|
"grad_norm": 0.4883167094213102,
|
|
"learning_rate": 3.9662752539777314e-05,
|
|
"loss": 0.3178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3085409700870514,
|
|
"step": 665,
|
|
"valid_targets_mean": 5683.9,
|
|
"valid_targets_min": 1285
|
|
},
|
|
{
|
|
"epoch": 1.0789049919484701,
|
|
"grad_norm": 0.45277731991583026,
|
|
"learning_rate": 3.9647908635077845e-05,
|
|
"loss": 0.3094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27562692761421204,
|
|
"step": 670,
|
|
"valid_targets_mean": 6331.4,
|
|
"valid_targets_min": 3356
|
|
},
|
|
{
|
|
"epoch": 1.0869565217391304,
|
|
"grad_norm": 0.5167803974967058,
|
|
"learning_rate": 3.963274795033913e-05,
|
|
"loss": 0.3027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28318291902542114,
|
|
"step": 675,
|
|
"valid_targets_mean": 5326.1,
|
|
"valid_targets_min": 1676
|
|
},
|
|
{
|
|
"epoch": 1.0950080515297906,
|
|
"grad_norm": 0.5095041080696596,
|
|
"learning_rate": 3.9617270729994436e-05,
|
|
"loss": 0.3183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3311567008495331,
|
|
"step": 680,
|
|
"valid_targets_mean": 6448.9,
|
|
"valid_targets_min": 2859
|
|
},
|
|
{
|
|
"epoch": 1.103059581320451,
|
|
"grad_norm": 0.49987418312419335,
|
|
"learning_rate": 3.960147722358046e-05,
|
|
"loss": 0.3109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3058398962020874,
|
|
"step": 685,
|
|
"valid_targets_mean": 5910.2,
|
|
"valid_targets_min": 1417
|
|
},
|
|
{
|
|
"epoch": 1.1111111111111112,
|
|
"grad_norm": 0.4553139112077016,
|
|
"learning_rate": 3.958536768573335e-05,
|
|
"loss": 0.3275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3590342402458191,
|
|
"step": 690,
|
|
"valid_targets_mean": 7141.1,
|
|
"valid_targets_min": 3571
|
|
},
|
|
{
|
|
"epoch": 1.1191626409017714,
|
|
"grad_norm": 0.4708599542937082,
|
|
"learning_rate": 3.956894237618456e-05,
|
|
"loss": 0.3048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32140040397644043,
|
|
"step": 695,
|
|
"valid_targets_mean": 6492.9,
|
|
"valid_targets_min": 3446
|
|
},
|
|
{
|
|
"epoch": 1.1272141706924317,
|
|
"grad_norm": 0.48610528051988894,
|
|
"learning_rate": 3.955220155975669e-05,
|
|
"loss": 0.2973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26136165857315063,
|
|
"step": 700,
|
|
"valid_targets_mean": 5447.6,
|
|
"valid_targets_min": 306
|
|
},
|
|
{
|
|
"epoch": 1.1352657004830917,
|
|
"grad_norm": 0.7998551023631064,
|
|
"learning_rate": 3.9535145506359206e-05,
|
|
"loss": 0.4179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4753114581108093,
|
|
"step": 705,
|
|
"valid_targets_mean": 3657.8,
|
|
"valid_targets_min": 1050
|
|
},
|
|
{
|
|
"epoch": 1.143317230273752,
|
|
"grad_norm": 0.8628326452111007,
|
|
"learning_rate": 3.951777449098408e-05,
|
|
"loss": 0.5288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5633917450904846,
|
|
"step": 710,
|
|
"valid_targets_mean": 2673.1,
|
|
"valid_targets_min": 1309
|
|
},
|
|
{
|
|
"epoch": 1.1513687600644122,
|
|
"grad_norm": 0.8324627405108382,
|
|
"learning_rate": 3.9500088793701387e-05,
|
|
"loss": 0.4929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4774799644947052,
|
|
"step": 715,
|
|
"valid_targets_mean": 3209.7,
|
|
"valid_targets_min": 1155
|
|
},
|
|
{
|
|
"epoch": 1.1594202898550725,
|
|
"grad_norm": 0.802960087915467,
|
|
"learning_rate": 3.948208869965473e-05,
|
|
"loss": 0.5032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5248502492904663,
|
|
"step": 720,
|
|
"valid_targets_mean": 3054.8,
|
|
"valid_targets_min": 1099
|
|
},
|
|
{
|
|
"epoch": 1.1674718196457328,
|
|
"grad_norm": 0.7752102610736108,
|
|
"learning_rate": 3.946377449905672e-05,
|
|
"loss": 0.4698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45278412103652954,
|
|
"step": 725,
|
|
"valid_targets_mean": 2978.8,
|
|
"valid_targets_min": 1175
|
|
},
|
|
{
|
|
"epoch": 1.1755233494363928,
|
|
"grad_norm": 0.7774619112330354,
|
|
"learning_rate": 3.9445146487184226e-05,
|
|
"loss": 0.4626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4363817572593689,
|
|
"step": 730,
|
|
"valid_targets_mean": 2849.6,
|
|
"valid_targets_min": 1295
|
|
},
|
|
{
|
|
"epoch": 1.183574879227053,
|
|
"grad_norm": 1.1474142413076773,
|
|
"learning_rate": 3.942620496437366e-05,
|
|
"loss": 0.4836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.519567608833313,
|
|
"step": 735,
|
|
"valid_targets_mean": 2615.8,
|
|
"valid_targets_min": 1383
|
|
},
|
|
{
|
|
"epoch": 1.1916264090177133,
|
|
"grad_norm": 0.7140245197333289,
|
|
"learning_rate": 3.940695023601612e-05,
|
|
"loss": 0.482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49396196007728577,
|
|
"step": 740,
|
|
"valid_targets_mean": 3296.4,
|
|
"valid_targets_min": 1316
|
|
},
|
|
{
|
|
"epoch": 1.1996779388083736,
|
|
"grad_norm": 0.6546808722676869,
|
|
"learning_rate": 3.938738261255247e-05,
|
|
"loss": 0.4933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46171700954437256,
|
|
"step": 745,
|
|
"valid_targets_mean": 3968.6,
|
|
"valid_targets_min": 1383
|
|
},
|
|
{
|
|
"epoch": 1.2077294685990339,
|
|
"grad_norm": 0.8090731054477709,
|
|
"learning_rate": 3.9367502409468315e-05,
|
|
"loss": 0.4818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5525240898132324,
|
|
"step": 750,
|
|
"valid_targets_mean": 3078.5,
|
|
"valid_targets_min": 1174
|
|
},
|
|
{
|
|
"epoch": 1.2157809983896941,
|
|
"grad_norm": 0.6725814967283812,
|
|
"learning_rate": 3.934730994728893e-05,
|
|
"loss": 0.4887,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44579121470451355,
|
|
"step": 755,
|
|
"valid_targets_mean": 3417.5,
|
|
"valid_targets_min": 1252
|
|
},
|
|
{
|
|
"epoch": 1.2238325281803544,
|
|
"grad_norm": 0.7197375797510343,
|
|
"learning_rate": 3.932680555157413e-05,
|
|
"loss": 0.4645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5104143619537354,
|
|
"step": 760,
|
|
"valid_targets_mean": 3427.6,
|
|
"valid_targets_min": 1218
|
|
},
|
|
{
|
|
"epoch": 1.2318840579710144,
|
|
"grad_norm": 0.8543242804532502,
|
|
"learning_rate": 3.9305989552912936e-05,
|
|
"loss": 0.4923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5740408897399902,
|
|
"step": 765,
|
|
"valid_targets_mean": 2875.2,
|
|
"valid_targets_min": 1123
|
|
},
|
|
{
|
|
"epoch": 1.2399355877616747,
|
|
"grad_norm": 0.832686925614947,
|
|
"learning_rate": 3.928486228691831e-05,
|
|
"loss": 0.4789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.498767614364624,
|
|
"step": 770,
|
|
"valid_targets_mean": 2933.1,
|
|
"valid_targets_min": 1485
|
|
},
|
|
{
|
|
"epoch": 1.247987117552335,
|
|
"grad_norm": 0.8162275317686818,
|
|
"learning_rate": 3.926342409422175e-05,
|
|
"loss": 0.474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5068527460098267,
|
|
"step": 775,
|
|
"valid_targets_mean": 2699.5,
|
|
"valid_targets_min": 1170
|
|
},
|
|
{
|
|
"epoch": 1.2560386473429952,
|
|
"grad_norm": 0.6180614081605837,
|
|
"learning_rate": 3.924167532046773e-05,
|
|
"loss": 0.4594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.426825612783432,
|
|
"step": 780,
|
|
"valid_targets_mean": 3932.2,
|
|
"valid_targets_min": 1108
|
|
},
|
|
{
|
|
"epoch": 1.2640901771336555,
|
|
"grad_norm": 0.8309975887016753,
|
|
"learning_rate": 3.9219616316308215e-05,
|
|
"loss": 0.475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48292607069015503,
|
|
"step": 785,
|
|
"valid_targets_mean": 2747.4,
|
|
"valid_targets_min": 1205
|
|
},
|
|
{
|
|
"epoch": 1.2721417069243155,
|
|
"grad_norm": 0.829396143340556,
|
|
"learning_rate": 3.919724743739694e-05,
|
|
"loss": 0.4728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4963272511959076,
|
|
"step": 790,
|
|
"valid_targets_mean": 3029.8,
|
|
"valid_targets_min": 1355
|
|
},
|
|
{
|
|
"epoch": 1.2801932367149758,
|
|
"grad_norm": 0.7797044579353001,
|
|
"learning_rate": 3.91745690443837e-05,
|
|
"loss": 0.471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4795590937137604,
|
|
"step": 795,
|
|
"valid_targets_mean": 3227.2,
|
|
"valid_targets_min": 1427
|
|
},
|
|
{
|
|
"epoch": 1.288244766505636,
|
|
"grad_norm": 0.7154505490752525,
|
|
"learning_rate": 3.915158150290855e-05,
|
|
"loss": 0.4652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4680216312408447,
|
|
"step": 800,
|
|
"valid_targets_mean": 3224.4,
|
|
"valid_targets_min": 1045
|
|
},
|
|
{
|
|
"epoch": 1.2962962962962963,
|
|
"grad_norm": 0.6366798215464128,
|
|
"learning_rate": 3.912828518359588e-05,
|
|
"loss": 0.4689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4367047846317291,
|
|
"step": 805,
|
|
"valid_targets_mean": 4021.2,
|
|
"valid_targets_min": 1118
|
|
},
|
|
{
|
|
"epoch": 1.3043478260869565,
|
|
"grad_norm": 0.7245476673199482,
|
|
"learning_rate": 3.910468046204846e-05,
|
|
"loss": 0.4691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41532838344573975,
|
|
"step": 810,
|
|
"valid_targets_mean": 3145.9,
|
|
"valid_targets_min": 977
|
|
},
|
|
{
|
|
"epoch": 1.3123993558776168,
|
|
"grad_norm": 0.965081597376756,
|
|
"learning_rate": 3.908076771884139e-05,
|
|
"loss": 0.4461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47352737188339233,
|
|
"step": 815,
|
|
"valid_targets_mean": 2583.8,
|
|
"valid_targets_min": 1167
|
|
},
|
|
{
|
|
"epoch": 1.320450885668277,
|
|
"grad_norm": 0.7363364439864812,
|
|
"learning_rate": 3.905654733951595e-05,
|
|
"loss": 0.4784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4524630904197693,
|
|
"step": 820,
|
|
"valid_targets_mean": 3109.6,
|
|
"valid_targets_min": 1268
|
|
},
|
|
{
|
|
"epoch": 1.3285024154589373,
|
|
"grad_norm": 0.7636753657586481,
|
|
"learning_rate": 3.9032019714573366e-05,
|
|
"loss": 0.4786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4579421281814575,
|
|
"step": 825,
|
|
"valid_targets_mean": 2516.6,
|
|
"valid_targets_min": 1159
|
|
},
|
|
{
|
|
"epoch": 1.3365539452495974,
|
|
"grad_norm": 0.6680520600678994,
|
|
"learning_rate": 3.9007185239468554e-05,
|
|
"loss": 0.456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4591274857521057,
|
|
"step": 830,
|
|
"valid_targets_mean": 4016.8,
|
|
"valid_targets_min": 1642
|
|
},
|
|
{
|
|
"epoch": 1.3446054750402576,
|
|
"grad_norm": 0.7001951095419365,
|
|
"learning_rate": 3.8982044314603725e-05,
|
|
"loss": 0.4926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4835224151611328,
|
|
"step": 835,
|
|
"valid_targets_mean": 3258.7,
|
|
"valid_targets_min": 1158
|
|
},
|
|
{
|
|
"epoch": 1.3526570048309179,
|
|
"grad_norm": 0.6740616815716441,
|
|
"learning_rate": 3.8956597345321927e-05,
|
|
"loss": 0.4595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4707792401313782,
|
|
"step": 840,
|
|
"valid_targets_mean": 3767.5,
|
|
"valid_targets_min": 1430
|
|
},
|
|
{
|
|
"epoch": 1.3607085346215781,
|
|
"grad_norm": 0.678578864364496,
|
|
"learning_rate": 3.893084474190051e-05,
|
|
"loss": 0.4685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4893713593482971,
|
|
"step": 845,
|
|
"valid_targets_mean": 3205.4,
|
|
"valid_targets_min": 977
|
|
},
|
|
{
|
|
"epoch": 1.3687600644122382,
|
|
"grad_norm": 0.7052919421243395,
|
|
"learning_rate": 3.890478691954452e-05,
|
|
"loss": 0.465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4288814067840576,
|
|
"step": 850,
|
|
"valid_targets_mean": 3383.2,
|
|
"valid_targets_min": 1706
|
|
},
|
|
{
|
|
"epoch": 1.3768115942028984,
|
|
"grad_norm": 0.7065538252025207,
|
|
"learning_rate": 3.8878424298379996e-05,
|
|
"loss": 0.4327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43889719247817993,
|
|
"step": 855,
|
|
"valid_targets_mean": 2897.8,
|
|
"valid_targets_min": 1102
|
|
},
|
|
{
|
|
"epoch": 1.3848631239935587,
|
|
"grad_norm": 0.826059371011481,
|
|
"learning_rate": 3.885175730344718e-05,
|
|
"loss": 0.4279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.489162802696228,
|
|
"step": 860,
|
|
"valid_targets_mean": 2376.7,
|
|
"valid_targets_min": 1197
|
|
},
|
|
{
|
|
"epoch": 1.392914653784219,
|
|
"grad_norm": 0.6534195388501567,
|
|
"learning_rate": 3.882478636469372e-05,
|
|
"loss": 0.4147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40982362627983093,
|
|
"step": 865,
|
|
"valid_targets_mean": 3869.4,
|
|
"valid_targets_min": 1445
|
|
},
|
|
{
|
|
"epoch": 1.4009661835748792,
|
|
"grad_norm": 0.5643024903852305,
|
|
"learning_rate": 3.879751191696766e-05,
|
|
"loss": 0.4128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3592322766780853,
|
|
"step": 870,
|
|
"valid_targets_mean": 4852.6,
|
|
"valid_targets_min": 1536
|
|
},
|
|
{
|
|
"epoch": 1.4090177133655395,
|
|
"grad_norm": 0.8046362197064678,
|
|
"learning_rate": 3.8769934400010506e-05,
|
|
"loss": 0.4132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42762547731399536,
|
|
"step": 875,
|
|
"valid_targets_mean": 3675.9,
|
|
"valid_targets_min": 1260
|
|
},
|
|
{
|
|
"epoch": 1.4170692431561998,
|
|
"grad_norm": 0.6344687303861078,
|
|
"learning_rate": 3.8742054258450085e-05,
|
|
"loss": 0.4197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3801061809062958,
|
|
"step": 880,
|
|
"valid_targets_mean": 3460.8,
|
|
"valid_targets_min": 406
|
|
},
|
|
{
|
|
"epoch": 1.42512077294686,
|
|
"grad_norm": 0.6323525098555549,
|
|
"learning_rate": 3.871387194179338e-05,
|
|
"loss": 0.3821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3990853428840637,
|
|
"step": 885,
|
|
"valid_targets_mean": 4104.0,
|
|
"valid_targets_min": 1569
|
|
},
|
|
{
|
|
"epoch": 1.43317230273752,
|
|
"grad_norm": 0.7366585526555239,
|
|
"learning_rate": 3.868538790441931e-05,
|
|
"loss": 0.3944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39089301228523254,
|
|
"step": 890,
|
|
"valid_targets_mean": 3000.4,
|
|
"valid_targets_min": 1169
|
|
},
|
|
{
|
|
"epoch": 1.4412238325281803,
|
|
"grad_norm": 0.6980083863459992,
|
|
"learning_rate": 3.865660260557138e-05,
|
|
"loss": 0.3997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45632731914520264,
|
|
"step": 895,
|
|
"valid_targets_mean": 3155.8,
|
|
"valid_targets_min": 1407
|
|
},
|
|
{
|
|
"epoch": 1.4492753623188406,
|
|
"grad_norm": 0.7473196990397595,
|
|
"learning_rate": 3.8627516509350286e-05,
|
|
"loss": 0.4058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4090888500213623,
|
|
"step": 900,
|
|
"valid_targets_mean": 2876.1,
|
|
"valid_targets_min": 1097
|
|
},
|
|
{
|
|
"epoch": 1.4573268921095008,
|
|
"grad_norm": 0.7252851362258242,
|
|
"learning_rate": 3.859813008470644e-05,
|
|
"loss": 0.4055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4397547245025635,
|
|
"step": 905,
|
|
"valid_targets_mean": 3099.4,
|
|
"valid_targets_min": 1258
|
|
},
|
|
{
|
|
"epoch": 1.465378421900161,
|
|
"grad_norm": 0.8116656536711668,
|
|
"learning_rate": 3.856844380543239e-05,
|
|
"loss": 0.3953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4384375810623169,
|
|
"step": 910,
|
|
"valid_targets_mean": 2574.1,
|
|
"valid_targets_min": 1351
|
|
},
|
|
{
|
|
"epoch": 1.4734299516908211,
|
|
"grad_norm": 0.7311144238819602,
|
|
"learning_rate": 3.8538458150155186e-05,
|
|
"loss": 0.4362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4589492678642273,
|
|
"step": 915,
|
|
"valid_targets_mean": 3223.3,
|
|
"valid_targets_min": 1314
|
|
},
|
|
{
|
|
"epoch": 1.4814814814814814,
|
|
"grad_norm": 0.6970970758853244,
|
|
"learning_rate": 3.850817360232869e-05,
|
|
"loss": 0.3792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39102041721343994,
|
|
"step": 920,
|
|
"valid_targets_mean": 3132.0,
|
|
"valid_targets_min": 1272
|
|
},
|
|
{
|
|
"epoch": 1.4895330112721417,
|
|
"grad_norm": 0.6439455681571239,
|
|
"learning_rate": 3.8477590650225735e-05,
|
|
"loss": 0.4058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3645486533641815,
|
|
"step": 925,
|
|
"valid_targets_mean": 3755.3,
|
|
"valid_targets_min": 1230
|
|
},
|
|
{
|
|
"epoch": 1.497584541062802,
|
|
"grad_norm": 0.7469424687646696,
|
|
"learning_rate": 3.8446709786930305e-05,
|
|
"loss": 0.3823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36221805214881897,
|
|
"step": 930,
|
|
"valid_targets_mean": 3029.9,
|
|
"valid_targets_min": 1590
|
|
},
|
|
{
|
|
"epoch": 1.5056360708534622,
|
|
"grad_norm": 0.7074858488388046,
|
|
"learning_rate": 3.841553151032953e-05,
|
|
"loss": 0.3914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38697734475135803,
|
|
"step": 935,
|
|
"valid_targets_mean": 3930.0,
|
|
"valid_targets_min": 1734
|
|
},
|
|
{
|
|
"epoch": 1.5136876006441224,
|
|
"grad_norm": 0.6408672423143533,
|
|
"learning_rate": 3.8384056323105695e-05,
|
|
"loss": 0.4006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45191776752471924,
|
|
"step": 940,
|
|
"valid_targets_mean": 3632.3,
|
|
"valid_targets_min": 1021
|
|
},
|
|
{
|
|
"epoch": 1.5217391304347827,
|
|
"grad_norm": 0.6999987480130047,
|
|
"learning_rate": 3.835228473272814e-05,
|
|
"loss": 0.3926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40401631593704224,
|
|
"step": 945,
|
|
"valid_targets_mean": 3690.8,
|
|
"valid_targets_min": 1136
|
|
},
|
|
{
|
|
"epoch": 1.529790660225443,
|
|
"grad_norm": 0.7300521054387631,
|
|
"learning_rate": 3.832021725144506e-05,
|
|
"loss": 0.3856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40716779232025146,
|
|
"step": 950,
|
|
"valid_targets_mean": 2800.6,
|
|
"valid_targets_min": 485
|
|
},
|
|
{
|
|
"epoch": 1.537842190016103,
|
|
"grad_norm": 0.7731778429667089,
|
|
"learning_rate": 3.828785439627523e-05,
|
|
"loss": 0.3912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43859434127807617,
|
|
"step": 955,
|
|
"valid_targets_mean": 3744.1,
|
|
"valid_targets_min": 1267
|
|
},
|
|
{
|
|
"epoch": 1.5458937198067633,
|
|
"grad_norm": 0.6705224246744239,
|
|
"learning_rate": 3.825519668899972e-05,
|
|
"loss": 0.4118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3778587877750397,
|
|
"step": 960,
|
|
"valid_targets_mean": 3365.1,
|
|
"valid_targets_min": 1014
|
|
},
|
|
{
|
|
"epoch": 1.5539452495974235,
|
|
"grad_norm": 0.7645910195884874,
|
|
"learning_rate": 3.8222244656153444e-05,
|
|
"loss": 0.3801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3704504668712616,
|
|
"step": 965,
|
|
"valid_targets_mean": 3473.1,
|
|
"valid_targets_min": 1657
|
|
},
|
|
{
|
|
"epoch": 1.5619967793880838,
|
|
"grad_norm": 0.7395785345258863,
|
|
"learning_rate": 3.818899882901666e-05,
|
|
"loss": 0.3848,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4030625820159912,
|
|
"step": 970,
|
|
"valid_targets_mean": 2840.8,
|
|
"valid_targets_min": 1494
|
|
},
|
|
{
|
|
"epoch": 1.5700483091787438,
|
|
"grad_norm": 0.736229535025106,
|
|
"learning_rate": 3.815545974360644e-05,
|
|
"loss": 0.4024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4085889160633087,
|
|
"step": 975,
|
|
"valid_targets_mean": 2815.1,
|
|
"valid_targets_min": 1208
|
|
},
|
|
{
|
|
"epoch": 1.578099838969404,
|
|
"grad_norm": 0.7854099739280379,
|
|
"learning_rate": 3.812162794066802e-05,
|
|
"loss": 0.3743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.379982054233551,
|
|
"step": 980,
|
|
"valid_targets_mean": 3228.2,
|
|
"valid_targets_min": 1394
|
|
},
|
|
{
|
|
"epoch": 1.5861513687600644,
|
|
"grad_norm": 0.7315662602632311,
|
|
"learning_rate": 3.8087503965666057e-05,
|
|
"loss": 0.3898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40219753980636597,
|
|
"step": 985,
|
|
"valid_targets_mean": 3174.3,
|
|
"valid_targets_min": 1240
|
|
},
|
|
{
|
|
"epoch": 1.5942028985507246,
|
|
"grad_norm": 0.6694592764443967,
|
|
"learning_rate": 3.805308836877586e-05,
|
|
"loss": 0.4112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37781035900115967,
|
|
"step": 990,
|
|
"valid_targets_mean": 3438.4,
|
|
"valid_targets_min": 1124
|
|
},
|
|
{
|
|
"epoch": 1.6022544283413849,
|
|
"grad_norm": 0.7386035260971374,
|
|
"learning_rate": 3.80183817048745e-05,
|
|
"loss": 0.3702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38043975830078125,
|
|
"step": 995,
|
|
"valid_targets_mean": 2587.6,
|
|
"valid_targets_min": 1218
|
|
},
|
|
{
|
|
"epoch": 1.6103059581320451,
|
|
"grad_norm": 0.7294675876286597,
|
|
"learning_rate": 3.7983384533531894e-05,
|
|
"loss": 0.395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3978623151779175,
|
|
"step": 1000,
|
|
"valid_targets_mean": 2969.8,
|
|
"valid_targets_min": 1309
|
|
},
|
|
{
|
|
"epoch": 1.6183574879227054,
|
|
"grad_norm": 0.6439654018799925,
|
|
"learning_rate": 3.7948097419001736e-05,
|
|
"loss": 0.3842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.402493953704834,
|
|
"step": 1005,
|
|
"valid_targets_mean": 3475.1,
|
|
"valid_targets_min": 1128
|
|
},
|
|
{
|
|
"epoch": 1.6264090177133657,
|
|
"grad_norm": 0.7469049629085611,
|
|
"learning_rate": 3.7912520930212445e-05,
|
|
"loss": 0.378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3579831123352051,
|
|
"step": 1010,
|
|
"valid_targets_mean": 2925.9,
|
|
"valid_targets_min": 1311
|
|
},
|
|
{
|
|
"epoch": 1.634460547504026,
|
|
"grad_norm": 0.6828460239332308,
|
|
"learning_rate": 3.7876655640757974e-05,
|
|
"loss": 0.3646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37845996022224426,
|
|
"step": 1015,
|
|
"valid_targets_mean": 3369.0,
|
|
"valid_targets_min": 1109
|
|
},
|
|
{
|
|
"epoch": 1.642512077294686,
|
|
"grad_norm": 0.916703135376311,
|
|
"learning_rate": 3.784050212888857e-05,
|
|
"loss": 0.3908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3890652358531952,
|
|
"step": 1020,
|
|
"valid_targets_mean": 3307.7,
|
|
"valid_targets_min": 1172
|
|
},
|
|
{
|
|
"epoch": 1.6505636070853462,
|
|
"grad_norm": 0.8453007862430895,
|
|
"learning_rate": 3.780406097750141e-05,
|
|
"loss": 0.3892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.393949419260025,
|
|
"step": 1025,
|
|
"valid_targets_mean": 2625.4,
|
|
"valid_targets_min": 1313
|
|
},
|
|
{
|
|
"epoch": 1.6586151368760065,
|
|
"grad_norm": 0.7030261980183728,
|
|
"learning_rate": 3.776733277413127e-05,
|
|
"loss": 0.3771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3959161043167114,
|
|
"step": 1030,
|
|
"valid_targets_mean": 3028.0,
|
|
"valid_targets_min": 1602
|
|
},
|
|
{
|
|
"epoch": 1.6666666666666665,
|
|
"grad_norm": 0.6424805669240223,
|
|
"learning_rate": 3.7730318110941004e-05,
|
|
"loss": 0.3843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3867754340171814,
|
|
"step": 1035,
|
|
"valid_targets_mean": 3680.5,
|
|
"valid_targets_min": 1441
|
|
},
|
|
{
|
|
"epoch": 1.6747181964573268,
|
|
"grad_norm": 0.6723162629033781,
|
|
"learning_rate": 3.7693017584712013e-05,
|
|
"loss": 0.3793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3691941499710083,
|
|
"step": 1040,
|
|
"valid_targets_mean": 3472.8,
|
|
"valid_targets_min": 1029
|
|
},
|
|
{
|
|
"epoch": 1.682769726247987,
|
|
"grad_norm": 0.66473478421512,
|
|
"learning_rate": 3.765543179683462e-05,
|
|
"loss": 0.3855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37193185091018677,
|
|
"step": 1045,
|
|
"valid_targets_mean": 3390.6,
|
|
"valid_targets_min": 1873
|
|
},
|
|
{
|
|
"epoch": 1.6908212560386473,
|
|
"grad_norm": 0.6827138169001735,
|
|
"learning_rate": 3.7617561353298395e-05,
|
|
"loss": 0.4096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.407214492559433,
|
|
"step": 1050,
|
|
"valid_targets_mean": 3282.8,
|
|
"valid_targets_min": 588
|
|
},
|
|
{
|
|
"epoch": 1.6988727858293076,
|
|
"grad_norm": 0.6054569389155301,
|
|
"learning_rate": 3.7579406864682327e-05,
|
|
"loss": 0.3876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39312368631362915,
|
|
"step": 1055,
|
|
"valid_targets_mean": 4544.4,
|
|
"valid_targets_min": 1616
|
|
},
|
|
{
|
|
"epoch": 1.7069243156199678,
|
|
"grad_norm": 0.6376030347219761,
|
|
"learning_rate": 3.7540968946145036e-05,
|
|
"loss": 0.3712,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37451237440109253,
|
|
"step": 1060,
|
|
"valid_targets_mean": 3384.4,
|
|
"valid_targets_min": 1236
|
|
},
|
|
{
|
|
"epoch": 1.714975845410628,
|
|
"grad_norm": 0.6211030350353202,
|
|
"learning_rate": 3.750224821741486e-05,
|
|
"loss": 0.3837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38833314180374146,
|
|
"step": 1065,
|
|
"valid_targets_mean": 4230.6,
|
|
"valid_targets_min": 1229
|
|
},
|
|
{
|
|
"epoch": 1.7230273752012883,
|
|
"grad_norm": 1.3513202092043939,
|
|
"learning_rate": 3.7463245302779795e-05,
|
|
"loss": 0.3654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3714137673377991,
|
|
"step": 1070,
|
|
"valid_targets_mean": 3359.7,
|
|
"valid_targets_min": 1076
|
|
},
|
|
{
|
|
"epoch": 1.7310789049919486,
|
|
"grad_norm": 0.6120247412950662,
|
|
"learning_rate": 3.742396083107751e-05,
|
|
"loss": 0.384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36947697401046753,
|
|
"step": 1075,
|
|
"valid_targets_mean": 4327.3,
|
|
"valid_targets_min": 1075
|
|
},
|
|
{
|
|
"epoch": 1.7391304347826086,
|
|
"grad_norm": 0.6000696270867342,
|
|
"learning_rate": 3.7384395435685166e-05,
|
|
"loss": 0.3725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33209776878356934,
|
|
"step": 1080,
|
|
"valid_targets_mean": 3812.0,
|
|
"valid_targets_min": 1142
|
|
},
|
|
{
|
|
"epoch": 1.747181964573269,
|
|
"grad_norm": 0.8080033004353664,
|
|
"learning_rate": 3.7344549754509196e-05,
|
|
"loss": 0.3952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4237212538719177,
|
|
"step": 1085,
|
|
"valid_targets_mean": 2473.6,
|
|
"valid_targets_min": 1208
|
|
},
|
|
{
|
|
"epoch": 1.7552334943639292,
|
|
"grad_norm": 0.5805164381467139,
|
|
"learning_rate": 3.7304424429975046e-05,
|
|
"loss": 0.3171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3186456561088562,
|
|
"step": 1090,
|
|
"valid_targets_mean": 7490.2,
|
|
"valid_targets_min": 2721
|
|
},
|
|
{
|
|
"epoch": 1.7632850241545892,
|
|
"grad_norm": 0.4969614961756743,
|
|
"learning_rate": 3.726402010901681e-05,
|
|
"loss": 0.2399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25411802530288696,
|
|
"step": 1095,
|
|
"valid_targets_mean": 6344.0,
|
|
"valid_targets_min": 3073
|
|
},
|
|
{
|
|
"epoch": 1.7713365539452495,
|
|
"grad_norm": 0.5036039262445117,
|
|
"learning_rate": 3.722333744306678e-05,
|
|
"loss": 0.2202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2208937406539917,
|
|
"step": 1100,
|
|
"valid_targets_mean": 5062.1,
|
|
"valid_targets_min": 2985
|
|
},
|
|
{
|
|
"epoch": 1.7793880837359097,
|
|
"grad_norm": 0.47890617580766465,
|
|
"learning_rate": 3.7182377088044984e-05,
|
|
"loss": 0.2477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27038395404815674,
|
|
"step": 1105,
|
|
"valid_targets_mean": 5613.6,
|
|
"valid_targets_min": 2620
|
|
},
|
|
{
|
|
"epoch": 1.78743961352657,
|
|
"grad_norm": 1.0317443624660205,
|
|
"learning_rate": 3.7141139704348576e-05,
|
|
"loss": 0.2268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22091877460479736,
|
|
"step": 1110,
|
|
"valid_targets_mean": 6100.1,
|
|
"valid_targets_min": 3494
|
|
},
|
|
{
|
|
"epoch": 1.7954911433172303,
|
|
"grad_norm": 0.4528546728089874,
|
|
"learning_rate": 3.7099625956841175e-05,
|
|
"loss": 0.2163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19431838393211365,
|
|
"step": 1115,
|
|
"valid_targets_mean": 5711.6,
|
|
"valid_targets_min": 3761
|
|
},
|
|
{
|
|
"epoch": 1.8035426731078905,
|
|
"grad_norm": 0.48395965253856643,
|
|
"learning_rate": 3.70578365148422e-05,
|
|
"loss": 0.2224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23526781797409058,
|
|
"step": 1120,
|
|
"valid_targets_mean": 5863.1,
|
|
"valid_targets_min": 3178
|
|
},
|
|
{
|
|
"epoch": 1.8115942028985508,
|
|
"grad_norm": 0.40906986287532093,
|
|
"learning_rate": 3.701577205211604e-05,
|
|
"loss": 0.2471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22184664011001587,
|
|
"step": 1125,
|
|
"valid_targets_mean": 6090.8,
|
|
"valid_targets_min": 2870
|
|
},
|
|
{
|
|
"epoch": 1.819645732689211,
|
|
"grad_norm": 0.4479703485506722,
|
|
"learning_rate": 3.697343324686119e-05,
|
|
"loss": 0.2463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2089478075504303,
|
|
"step": 1130,
|
|
"valid_targets_mean": 5746.5,
|
|
"valid_targets_min": 3334
|
|
},
|
|
{
|
|
"epoch": 1.8276972624798713,
|
|
"grad_norm": 0.5767247918497662,
|
|
"learning_rate": 3.693082078169933e-05,
|
|
"loss": 0.2338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23523643612861633,
|
|
"step": 1135,
|
|
"valid_targets_mean": 5464.9,
|
|
"valid_targets_min": 3204
|
|
},
|
|
{
|
|
"epoch": 1.8357487922705316,
|
|
"grad_norm": 0.47967598737718,
|
|
"learning_rate": 3.68879353436643e-05,
|
|
"loss": 0.2249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2230021208524704,
|
|
"step": 1140,
|
|
"valid_targets_mean": 5526.6,
|
|
"valid_targets_min": 2793
|
|
},
|
|
{
|
|
"epoch": 1.8438003220611916,
|
|
"grad_norm": 0.4741249695731208,
|
|
"learning_rate": 3.684477762419108e-05,
|
|
"loss": 0.2344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24306999146938324,
|
|
"step": 1145,
|
|
"valid_targets_mean": 6074.9,
|
|
"valid_targets_min": 3285
|
|
},
|
|
{
|
|
"epoch": 1.8518518518518519,
|
|
"grad_norm": 0.48153493651658874,
|
|
"learning_rate": 3.6801348319104546e-05,
|
|
"loss": 0.2082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2044723629951477,
|
|
"step": 1150,
|
|
"valid_targets_mean": 5260.6,
|
|
"valid_targets_min": 2292
|
|
},
|
|
{
|
|
"epoch": 1.8599033816425121,
|
|
"grad_norm": 0.584632590412221,
|
|
"learning_rate": 3.675764812860833e-05,
|
|
"loss": 0.2258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.224099799990654,
|
|
"step": 1155,
|
|
"valid_targets_mean": 5752.9,
|
|
"valid_targets_min": 4204
|
|
},
|
|
{
|
|
"epoch": 1.8679549114331722,
|
|
"grad_norm": 0.5420753765458437,
|
|
"learning_rate": 3.671367775727353e-05,
|
|
"loss": 0.2301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21649691462516785,
|
|
"step": 1160,
|
|
"valid_targets_mean": 4836.7,
|
|
"valid_targets_min": 2111
|
|
},
|
|
{
|
|
"epoch": 1.8760064412238324,
|
|
"grad_norm": 0.4760661908355776,
|
|
"learning_rate": 3.666943791402726e-05,
|
|
"loss": 0.2199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24728357791900635,
|
|
"step": 1165,
|
|
"valid_targets_mean": 6021.0,
|
|
"valid_targets_min": 4439
|
|
},
|
|
{
|
|
"epoch": 1.8840579710144927,
|
|
"grad_norm": 0.6720080745282876,
|
|
"learning_rate": 3.662492931214137e-05,
|
|
"loss": 0.2198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21869316697120667,
|
|
"step": 1170,
|
|
"valid_targets_mean": 5661.9,
|
|
"valid_targets_min": 3607
|
|
},
|
|
{
|
|
"epoch": 1.892109500805153,
|
|
"grad_norm": 0.4890482756646293,
|
|
"learning_rate": 3.6580152669220784e-05,
|
|
"loss": 0.2143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22565293312072754,
|
|
"step": 1175,
|
|
"valid_targets_mean": 5420.4,
|
|
"valid_targets_min": 3078
|
|
},
|
|
{
|
|
"epoch": 1.9001610305958132,
|
|
"grad_norm": 0.5189964339664618,
|
|
"learning_rate": 3.6535108707192053e-05,
|
|
"loss": 0.2131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20920491218566895,
|
|
"step": 1180,
|
|
"valid_targets_mean": 5750.4,
|
|
"valid_targets_min": 2491
|
|
},
|
|
{
|
|
"epoch": 1.9082125603864735,
|
|
"grad_norm": 0.4214170728597196,
|
|
"learning_rate": 3.648979815229167e-05,
|
|
"loss": 0.2228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20261229574680328,
|
|
"step": 1185,
|
|
"valid_targets_mean": 5996.6,
|
|
"valid_targets_min": 4004
|
|
},
|
|
{
|
|
"epoch": 1.9162640901771337,
|
|
"grad_norm": 0.43481442428355693,
|
|
"learning_rate": 3.644422173505433e-05,
|
|
"loss": 0.2163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20599091053009033,
|
|
"step": 1190,
|
|
"valid_targets_mean": 6200.9,
|
|
"valid_targets_min": 3148
|
|
},
|
|
{
|
|
"epoch": 1.924315619967794,
|
|
"grad_norm": 0.45638204853677106,
|
|
"learning_rate": 3.639838019030123e-05,
|
|
"loss": 0.2124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1937769055366516,
|
|
"step": 1195,
|
|
"valid_targets_mean": 6472.3,
|
|
"valid_targets_min": 3581
|
|
},
|
|
{
|
|
"epoch": 1.9323671497584543,
|
|
"grad_norm": 0.4186145666771615,
|
|
"learning_rate": 3.635227425712812e-05,
|
|
"loss": 0.2278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2861325144767761,
|
|
"step": 1200,
|
|
"valid_targets_mean": 7532.8,
|
|
"valid_targets_min": 3206
|
|
},
|
|
{
|
|
"epoch": 1.9404186795491143,
|
|
"grad_norm": 0.5308258730995378,
|
|
"learning_rate": 3.6305904678893504e-05,
|
|
"loss": 0.2245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2531401515007019,
|
|
"step": 1205,
|
|
"valid_targets_mean": 5952.9,
|
|
"valid_targets_min": 3304
|
|
},
|
|
{
|
|
"epoch": 1.9484702093397746,
|
|
"grad_norm": 0.46680263986526405,
|
|
"learning_rate": 3.6259272203206535e-05,
|
|
"loss": 0.2135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21227112412452698,
|
|
"step": 1210,
|
|
"valid_targets_mean": 5735.1,
|
|
"valid_targets_min": 3368
|
|
},
|
|
{
|
|
"epoch": 1.9565217391304348,
|
|
"grad_norm": 0.491355991243715,
|
|
"learning_rate": 3.621237758191505e-05,
|
|
"loss": 0.214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2222747504711151,
|
|
"step": 1215,
|
|
"valid_targets_mean": 5506.7,
|
|
"valid_targets_min": 2908
|
|
},
|
|
{
|
|
"epoch": 1.9645732689210949,
|
|
"grad_norm": 0.4862497243022364,
|
|
"learning_rate": 3.616522157109342e-05,
|
|
"loss": 0.2174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2260490208864212,
|
|
"step": 1220,
|
|
"valid_targets_mean": 5777.9,
|
|
"valid_targets_min": 3451
|
|
},
|
|
{
|
|
"epoch": 1.9726247987117551,
|
|
"grad_norm": 0.4051875398639865,
|
|
"learning_rate": 3.6117804931030324e-05,
|
|
"loss": 0.2057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19149649143218994,
|
|
"step": 1225,
|
|
"valid_targets_mean": 6045.9,
|
|
"valid_targets_min": 2967
|
|
},
|
|
{
|
|
"epoch": 1.9806763285024154,
|
|
"grad_norm": 0.48575560614945534,
|
|
"learning_rate": 3.607012842621657e-05,
|
|
"loss": 0.2131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21349817514419556,
|
|
"step": 1230,
|
|
"valid_targets_mean": 6113.4,
|
|
"valid_targets_min": 3912
|
|
},
|
|
{
|
|
"epoch": 1.9887278582930756,
|
|
"grad_norm": 0.4334111032895171,
|
|
"learning_rate": 3.602219282533269e-05,
|
|
"loss": 0.2247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1997205913066864,
|
|
"step": 1235,
|
|
"valid_targets_mean": 5837.0,
|
|
"valid_targets_min": 3528
|
|
},
|
|
{
|
|
"epoch": 1.996779388083736,
|
|
"grad_norm": 0.4425270232705891,
|
|
"learning_rate": 3.597399890123659e-05,
|
|
"loss": 0.2078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22543010115623474,
|
|
"step": 1240,
|
|
"valid_targets_mean": 6356.6,
|
|
"valid_targets_min": 3725
|
|
},
|
|
{
|
|
"epoch": 2.004830917874396,
|
|
"grad_norm": 0.49848016805395046,
|
|
"learning_rate": 3.5925547430951094e-05,
|
|
"loss": 0.2709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30809903144836426,
|
|
"step": 1245,
|
|
"valid_targets_mean": 6803.5,
|
|
"valid_targets_min": 3040
|
|
},
|
|
{
|
|
"epoch": 2.0128824476650564,
|
|
"grad_norm": 0.5893052782286002,
|
|
"learning_rate": 3.587683919565136e-05,
|
|
"loss": 0.3082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27029603719711304,
|
|
"step": 1250,
|
|
"valid_targets_mean": 5952.2,
|
|
"valid_targets_min": 3694
|
|
},
|
|
{
|
|
"epoch": 2.0209339774557167,
|
|
"grad_norm": 0.5557870949459839,
|
|
"learning_rate": 3.582787498065237e-05,
|
|
"loss": 0.2989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3327678442001343,
|
|
"step": 1255,
|
|
"valid_targets_mean": 5764.1,
|
|
"valid_targets_min": 2130
|
|
},
|
|
{
|
|
"epoch": 2.028985507246377,
|
|
"grad_norm": 0.4584733838358168,
|
|
"learning_rate": 3.577865557539621e-05,
|
|
"loss": 0.2879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30011194944381714,
|
|
"step": 1260,
|
|
"valid_targets_mean": 6953.2,
|
|
"valid_targets_min": 3425
|
|
},
|
|
{
|
|
"epoch": 2.037037037037037,
|
|
"grad_norm": 1.64628767564844,
|
|
"learning_rate": 3.572918177343935e-05,
|
|
"loss": 0.2943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30400946736335754,
|
|
"step": 1265,
|
|
"valid_targets_mean": 6372.9,
|
|
"valid_targets_min": 2633
|
|
},
|
|
{
|
|
"epoch": 2.0450885668276975,
|
|
"grad_norm": 0.47363097381646996,
|
|
"learning_rate": 3.567945437243987e-05,
|
|
"loss": 0.3,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3237455487251282,
|
|
"step": 1270,
|
|
"valid_targets_mean": 6667.6,
|
|
"valid_targets_min": 3230
|
|
},
|
|
{
|
|
"epoch": 2.0531400966183573,
|
|
"grad_norm": 0.6121579597876319,
|
|
"learning_rate": 3.5629474174144564e-05,
|
|
"loss": 0.2976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3143073320388794,
|
|
"step": 1275,
|
|
"valid_targets_mean": 5952.1,
|
|
"valid_targets_min": 3816
|
|
},
|
|
{
|
|
"epoch": 2.0611916264090175,
|
|
"grad_norm": 0.5122726834409544,
|
|
"learning_rate": 3.5579241984376065e-05,
|
|
"loss": 0.285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2792460322380066,
|
|
"step": 1280,
|
|
"valid_targets_mean": 6181.7,
|
|
"valid_targets_min": 1726
|
|
},
|
|
{
|
|
"epoch": 2.069243156199678,
|
|
"grad_norm": 0.5279318998363034,
|
|
"learning_rate": 3.5528758613019804e-05,
|
|
"loss": 0.2829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2636644244194031,
|
|
"step": 1285,
|
|
"valid_targets_mean": 5198.9,
|
|
"valid_targets_min": 1161
|
|
},
|
|
{
|
|
"epoch": 2.077294685990338,
|
|
"grad_norm": 0.4637806256747439,
|
|
"learning_rate": 3.547802487401097e-05,
|
|
"loss": 0.2827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2678183317184448,
|
|
"step": 1290,
|
|
"valid_targets_mean": 5936.2,
|
|
"valid_targets_min": 2526
|
|
},
|
|
{
|
|
"epoch": 2.0853462157809983,
|
|
"grad_norm": 0.44332589750107415,
|
|
"learning_rate": 3.54270415853214e-05,
|
|
"loss": 0.2707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2518324851989746,
|
|
"step": 1295,
|
|
"valid_targets_mean": 6524.1,
|
|
"valid_targets_min": 3439
|
|
},
|
|
{
|
|
"epoch": 2.0933977455716586,
|
|
"grad_norm": 0.5070450536840013,
|
|
"learning_rate": 3.537580956894638e-05,
|
|
"loss": 0.278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27724772691726685,
|
|
"step": 1300,
|
|
"valid_targets_mean": 6404.5,
|
|
"valid_targets_min": 2689
|
|
},
|
|
{
|
|
"epoch": 2.101449275362319,
|
|
"grad_norm": 0.6584385593174998,
|
|
"learning_rate": 3.532432965089138e-05,
|
|
"loss": 0.286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2892034649848938,
|
|
"step": 1305,
|
|
"valid_targets_mean": 5932.1,
|
|
"valid_targets_min": 2432
|
|
},
|
|
{
|
|
"epoch": 2.109500805152979,
|
|
"grad_norm": 0.4944188614557695,
|
|
"learning_rate": 3.527260266115876e-05,
|
|
"loss": 0.2888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32951074838638306,
|
|
"step": 1310,
|
|
"valid_targets_mean": 6782.6,
|
|
"valid_targets_min": 3784
|
|
},
|
|
{
|
|
"epoch": 2.1175523349436394,
|
|
"grad_norm": 0.4980617413022126,
|
|
"learning_rate": 3.522062943373438e-05,
|
|
"loss": 0.2859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2504119575023651,
|
|
"step": 1315,
|
|
"valid_targets_mean": 6065.6,
|
|
"valid_targets_min": 3311
|
|
},
|
|
{
|
|
"epoch": 2.1256038647342996,
|
|
"grad_norm": 0.5112205537054673,
|
|
"learning_rate": 3.516841080657413e-05,
|
|
"loss": 0.2798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2678036689758301,
|
|
"step": 1320,
|
|
"valid_targets_mean": 4948.5,
|
|
"valid_targets_min": 1373
|
|
},
|
|
{
|
|
"epoch": 2.13365539452496,
|
|
"grad_norm": 0.9291687075910791,
|
|
"learning_rate": 3.511594762159046e-05,
|
|
"loss": 0.3325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42405465245246887,
|
|
"step": 1325,
|
|
"valid_targets_mean": 2751.3,
|
|
"valid_targets_min": 1151
|
|
},
|
|
{
|
|
"epoch": 2.14170692431562,
|
|
"grad_norm": 0.8796559274131959,
|
|
"learning_rate": 3.506324072463878e-05,
|
|
"loss": 0.4501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4509429931640625,
|
|
"step": 1330,
|
|
"valid_targets_mean": 2481.8,
|
|
"valid_targets_min": 872
|
|
},
|
|
{
|
|
"epoch": 2.14975845410628,
|
|
"grad_norm": 0.7338052792870016,
|
|
"learning_rate": 3.5010290965503826e-05,
|
|
"loss": 0.4473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42610806226730347,
|
|
"step": 1335,
|
|
"valid_targets_mean": 3228.8,
|
|
"valid_targets_min": 1082
|
|
},
|
|
{
|
|
"epoch": 2.1578099838969402,
|
|
"grad_norm": 0.9099556688067492,
|
|
"learning_rate": 3.495709919788597e-05,
|
|
"loss": 0.4364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4756355881690979,
|
|
"step": 1340,
|
|
"valid_targets_mean": 3290.7,
|
|
"valid_targets_min": 905
|
|
},
|
|
{
|
|
"epoch": 2.1658615136876005,
|
|
"grad_norm": 0.7280295738870289,
|
|
"learning_rate": 3.490366627938742e-05,
|
|
"loss": 0.4281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4415823817253113,
|
|
"step": 1345,
|
|
"valid_targets_mean": 3735.3,
|
|
"valid_targets_min": 1115
|
|
},
|
|
{
|
|
"epoch": 2.1739130434782608,
|
|
"grad_norm": 0.8552032601411625,
|
|
"learning_rate": 3.484999307149846e-05,
|
|
"loss": 0.4093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4188133478164673,
|
|
"step": 1350,
|
|
"valid_targets_mean": 2806.2,
|
|
"valid_targets_min": 1140
|
|
},
|
|
{
|
|
"epoch": 2.181964573268921,
|
|
"grad_norm": 0.7699287073880586,
|
|
"learning_rate": 3.47960804395835e-05,
|
|
"loss": 0.4144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38652926683425903,
|
|
"step": 1355,
|
|
"valid_targets_mean": 2498.2,
|
|
"valid_targets_min": 970
|
|
},
|
|
{
|
|
"epoch": 2.1900161030595813,
|
|
"grad_norm": 0.6914816395930312,
|
|
"learning_rate": 3.474192925286714e-05,
|
|
"loss": 0.4313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39671099185943604,
|
|
"step": 1360,
|
|
"valid_targets_mean": 3014.8,
|
|
"valid_targets_min": 1130
|
|
},
|
|
{
|
|
"epoch": 2.1980676328502415,
|
|
"grad_norm": 0.8257531389708893,
|
|
"learning_rate": 3.468754038442017e-05,
|
|
"loss": 0.4392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4281843304634094,
|
|
"step": 1365,
|
|
"valid_targets_mean": 2247.2,
|
|
"valid_targets_min": 1125
|
|
},
|
|
{
|
|
"epoch": 2.206119162640902,
|
|
"grad_norm": 0.7268372945505573,
|
|
"learning_rate": 3.463291471114548e-05,
|
|
"loss": 0.4095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4062803387641907,
|
|
"step": 1370,
|
|
"valid_targets_mean": 3460.9,
|
|
"valid_targets_min": 1652
|
|
},
|
|
{
|
|
"epoch": 2.214170692431562,
|
|
"grad_norm": 0.8070422587353386,
|
|
"learning_rate": 3.4578053113763936e-05,
|
|
"loss": 0.4513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4336710572242737,
|
|
"step": 1375,
|
|
"valid_targets_mean": 2609.9,
|
|
"valid_targets_min": 1079
|
|
},
|
|
{
|
|
"epoch": 2.2222222222222223,
|
|
"grad_norm": 1.67186958813965,
|
|
"learning_rate": 3.452295647680014e-05,
|
|
"loss": 0.4001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40693116188049316,
|
|
"step": 1380,
|
|
"valid_targets_mean": 3187.6,
|
|
"valid_targets_min": 1303
|
|
},
|
|
{
|
|
"epoch": 2.2302737520128826,
|
|
"grad_norm": 0.7010798669064308,
|
|
"learning_rate": 3.4467625688568245e-05,
|
|
"loss": 0.4279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40231195092201233,
|
|
"step": 1385,
|
|
"valid_targets_mean": 3980.8,
|
|
"valid_targets_min": 1495
|
|
},
|
|
{
|
|
"epoch": 2.238325281803543,
|
|
"grad_norm": 0.9239851489046476,
|
|
"learning_rate": 3.4412061641157546e-05,
|
|
"loss": 0.4443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.434467613697052,
|
|
"step": 1390,
|
|
"valid_targets_mean": 2712.9,
|
|
"valid_targets_min": 1495
|
|
},
|
|
{
|
|
"epoch": 2.246376811594203,
|
|
"grad_norm": 0.7442921098581603,
|
|
"learning_rate": 3.435626523041815e-05,
|
|
"loss": 0.4188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4101715683937073,
|
|
"step": 1395,
|
|
"valid_targets_mean": 3055.6,
|
|
"valid_targets_min": 1216
|
|
},
|
|
{
|
|
"epoch": 2.2544283413848634,
|
|
"grad_norm": 0.7796509221772335,
|
|
"learning_rate": 3.430023735594653e-05,
|
|
"loss": 0.4248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43132680654525757,
|
|
"step": 1400,
|
|
"valid_targets_mean": 3022.4,
|
|
"valid_targets_min": 1130
|
|
},
|
|
{
|
|
"epoch": 2.262479871175523,
|
|
"grad_norm": 0.8845650683341554,
|
|
"learning_rate": 3.4243978921071005e-05,
|
|
"loss": 0.4115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4295615553855896,
|
|
"step": 1405,
|
|
"valid_targets_mean": 2932.4,
|
|
"valid_targets_min": 1319
|
|
},
|
|
{
|
|
"epoch": 2.2705314009661834,
|
|
"grad_norm": 0.7654694935136631,
|
|
"learning_rate": 3.418749083283719e-05,
|
|
"loss": 0.4139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41558438539505005,
|
|
"step": 1410,
|
|
"valid_targets_mean": 2893.2,
|
|
"valid_targets_min": 1501
|
|
},
|
|
{
|
|
"epoch": 2.2785829307568437,
|
|
"grad_norm": 0.7620855439193758,
|
|
"learning_rate": 3.413077400199334e-05,
|
|
"loss": 0.4227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3939434885978699,
|
|
"step": 1415,
|
|
"valid_targets_mean": 2413.7,
|
|
"valid_targets_min": 1044
|
|
},
|
|
{
|
|
"epoch": 2.286634460547504,
|
|
"grad_norm": 0.6909091800489532,
|
|
"learning_rate": 3.407382934297571e-05,
|
|
"loss": 0.4174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3776891231536865,
|
|
"step": 1420,
|
|
"valid_targets_mean": 3168.1,
|
|
"valid_targets_min": 1035
|
|
},
|
|
{
|
|
"epoch": 2.2946859903381642,
|
|
"grad_norm": 0.7978903184807417,
|
|
"learning_rate": 3.4016657773893785e-05,
|
|
"loss": 0.422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46548742055892944,
|
|
"step": 1425,
|
|
"valid_targets_mean": 3087.7,
|
|
"valid_targets_min": 1699
|
|
},
|
|
{
|
|
"epoch": 2.3027375201288245,
|
|
"grad_norm": 0.8248703939493464,
|
|
"learning_rate": 3.3959260216515495e-05,
|
|
"loss": 0.4219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41521650552749634,
|
|
"step": 1430,
|
|
"valid_targets_mean": 2646.4,
|
|
"valid_targets_min": 1509
|
|
},
|
|
{
|
|
"epoch": 2.3107890499194848,
|
|
"grad_norm": 0.7311952260051283,
|
|
"learning_rate": 3.3901637596252325e-05,
|
|
"loss": 0.3865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36918574571609497,
|
|
"step": 1435,
|
|
"valid_targets_mean": 2969.6,
|
|
"valid_targets_min": 1251
|
|
},
|
|
{
|
|
"epoch": 2.318840579710145,
|
|
"grad_norm": 0.7976719684765547,
|
|
"learning_rate": 3.384379084214443e-05,
|
|
"loss": 0.432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4152054190635681,
|
|
"step": 1440,
|
|
"valid_targets_mean": 3140.2,
|
|
"valid_targets_min": 1161
|
|
},
|
|
{
|
|
"epoch": 2.3268921095008053,
|
|
"grad_norm": 0.7964318060668145,
|
|
"learning_rate": 3.378572088684562e-05,
|
|
"loss": 0.4245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45041656494140625,
|
|
"step": 1445,
|
|
"valid_targets_mean": 2786.8,
|
|
"valid_targets_min": 1501
|
|
},
|
|
{
|
|
"epoch": 2.3349436392914655,
|
|
"grad_norm": 0.779177762219085,
|
|
"learning_rate": 3.372742866660836e-05,
|
|
"loss": 0.4063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42900824546813965,
|
|
"step": 1450,
|
|
"valid_targets_mean": 3073.4,
|
|
"valid_targets_min": 1434
|
|
},
|
|
{
|
|
"epoch": 2.342995169082126,
|
|
"grad_norm": 0.8252841045839578,
|
|
"learning_rate": 3.3668915121268636e-05,
|
|
"loss": 0.4356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42234331369400024,
|
|
"step": 1455,
|
|
"valid_targets_mean": 2718.1,
|
|
"valid_targets_min": 1147
|
|
},
|
|
{
|
|
"epoch": 2.3510466988727856,
|
|
"grad_norm": 0.7559789239050331,
|
|
"learning_rate": 3.361018119423085e-05,
|
|
"loss": 0.4107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3902217745780945,
|
|
"step": 1460,
|
|
"valid_targets_mean": 3051.3,
|
|
"valid_targets_min": 1170
|
|
},
|
|
{
|
|
"epoch": 2.359098228663446,
|
|
"grad_norm": 0.7118532524697113,
|
|
"learning_rate": 3.3551227832452555e-05,
|
|
"loss": 0.4167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4386996924877167,
|
|
"step": 1465,
|
|
"valid_targets_mean": 3645.1,
|
|
"valid_targets_min": 1319
|
|
},
|
|
{
|
|
"epoch": 2.367149758454106,
|
|
"grad_norm": 0.7314007089106499,
|
|
"learning_rate": 3.3492055986429235e-05,
|
|
"loss": 0.4215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3893301486968994,
|
|
"step": 1470,
|
|
"valid_targets_mean": 3020.9,
|
|
"valid_targets_min": 1036
|
|
},
|
|
{
|
|
"epoch": 2.3752012882447664,
|
|
"grad_norm": 0.7883519271357176,
|
|
"learning_rate": 3.3432666610178936e-05,
|
|
"loss": 0.3829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3778621554374695,
|
|
"step": 1475,
|
|
"valid_targets_mean": 2636.9,
|
|
"valid_targets_min": 632
|
|
},
|
|
{
|
|
"epoch": 2.3832528180354267,
|
|
"grad_norm": 0.5950705396292623,
|
|
"learning_rate": 3.3373060661226944e-05,
|
|
"loss": 0.3704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3533982038497925,
|
|
"step": 1480,
|
|
"valid_targets_mean": 4681.1,
|
|
"valid_targets_min": 1703
|
|
},
|
|
{
|
|
"epoch": 2.391304347826087,
|
|
"grad_norm": 0.6284697509500937,
|
|
"learning_rate": 3.331323910059027e-05,
|
|
"loss": 0.3813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36771711707115173,
|
|
"step": 1485,
|
|
"valid_targets_mean": 4288.2,
|
|
"valid_targets_min": 349
|
|
},
|
|
{
|
|
"epoch": 2.399355877616747,
|
|
"grad_norm": 0.6241002825603361,
|
|
"learning_rate": 3.3253202892762244e-05,
|
|
"loss": 0.3774,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35911619663238525,
|
|
"step": 1490,
|
|
"valid_targets_mean": 4145.6,
|
|
"valid_targets_min": 1505
|
|
},
|
|
{
|
|
"epoch": 2.4074074074074074,
|
|
"grad_norm": 0.7195975803486768,
|
|
"learning_rate": 3.319295300569686e-05,
|
|
"loss": 0.3541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3680953085422516,
|
|
"step": 1495,
|
|
"valid_targets_mean": 3169.7,
|
|
"valid_targets_min": 1125
|
|
},
|
|
{
|
|
"epoch": 2.4154589371980677,
|
|
"grad_norm": 0.6678894669137365,
|
|
"learning_rate": 3.3132490410793294e-05,
|
|
"loss": 0.3807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40425896644592285,
|
|
"step": 1500,
|
|
"valid_targets_mean": 3801.9,
|
|
"valid_targets_min": 1805
|
|
},
|
|
{
|
|
"epoch": 2.423510466988728,
|
|
"grad_norm": 0.67381554336476,
|
|
"learning_rate": 3.3071816082880115e-05,
|
|
"loss": 0.3364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3536141514778137,
|
|
"step": 1505,
|
|
"valid_targets_mean": 3517.1,
|
|
"valid_targets_min": 1172
|
|
},
|
|
{
|
|
"epoch": 2.4315619967793882,
|
|
"grad_norm": 0.5844843611773384,
|
|
"learning_rate": 3.3010931000199674e-05,
|
|
"loss": 0.3513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3546409606933594,
|
|
"step": 1510,
|
|
"valid_targets_mean": 4191.2,
|
|
"valid_targets_min": 2286
|
|
},
|
|
{
|
|
"epoch": 2.4396135265700485,
|
|
"grad_norm": 0.6086706004595346,
|
|
"learning_rate": 3.2949836144392256e-05,
|
|
"loss": 0.3428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32763075828552246,
|
|
"step": 1515,
|
|
"valid_targets_mean": 4189.8,
|
|
"valid_targets_min": 1568
|
|
},
|
|
{
|
|
"epoch": 2.4476650563607087,
|
|
"grad_norm": 0.8158802293533742,
|
|
"learning_rate": 3.28885325004803e-05,
|
|
"loss": 0.3678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35021084547042847,
|
|
"step": 1520,
|
|
"valid_targets_mean": 3081.1,
|
|
"valid_targets_min": 1168
|
|
},
|
|
{
|
|
"epoch": 2.455716586151369,
|
|
"grad_norm": 0.6124714609883762,
|
|
"learning_rate": 3.282702105685251e-05,
|
|
"loss": 0.3543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33643293380737305,
|
|
"step": 1525,
|
|
"valid_targets_mean": 3844.6,
|
|
"valid_targets_min": 1318
|
|
},
|
|
{
|
|
"epoch": 2.463768115942029,
|
|
"grad_norm": 0.6419535517935,
|
|
"learning_rate": 3.2765302805247885e-05,
|
|
"loss": 0.3512,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.324346661567688,
|
|
"step": 1530,
|
|
"valid_targets_mean": 3268.8,
|
|
"valid_targets_min": 1153
|
|
},
|
|
{
|
|
"epoch": 2.471819645732689,
|
|
"grad_norm": 0.707763033567243,
|
|
"learning_rate": 3.270337874073977e-05,
|
|
"loss": 0.3842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39264971017837524,
|
|
"step": 1535,
|
|
"valid_targets_mean": 3251.4,
|
|
"valid_targets_min": 1576
|
|
},
|
|
{
|
|
"epoch": 2.4798711755233493,
|
|
"grad_norm": 0.6517860581223903,
|
|
"learning_rate": 3.264124986171981e-05,
|
|
"loss": 0.3503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30974501371383667,
|
|
"step": 1540,
|
|
"valid_targets_mean": 3231.7,
|
|
"valid_targets_min": 1471
|
|
},
|
|
{
|
|
"epoch": 2.4879227053140096,
|
|
"grad_norm": 0.6861932870077228,
|
|
"learning_rate": 3.2578917169881816e-05,
|
|
"loss": 0.3679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3639856278896332,
|
|
"step": 1545,
|
|
"valid_targets_mean": 3158.2,
|
|
"valid_targets_min": 373
|
|
},
|
|
{
|
|
"epoch": 2.49597423510467,
|
|
"grad_norm": 0.5800378877703327,
|
|
"learning_rate": 3.2516381670205665e-05,
|
|
"loss": 0.3418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33077770471572876,
|
|
"step": 1550,
|
|
"valid_targets_mean": 4750.0,
|
|
"valid_targets_min": 1270
|
|
},
|
|
{
|
|
"epoch": 2.50402576489533,
|
|
"grad_norm": 0.6790879537056581,
|
|
"learning_rate": 3.245364437094105e-05,
|
|
"loss": 0.3436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36615103483200073,
|
|
"step": 1555,
|
|
"valid_targets_mean": 3425.2,
|
|
"valid_targets_min": 1034
|
|
},
|
|
{
|
|
"epoch": 2.5120772946859904,
|
|
"grad_norm": 0.7532708248786624,
|
|
"learning_rate": 3.239070628359126e-05,
|
|
"loss": 0.3473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3348079025745392,
|
|
"step": 1560,
|
|
"valid_targets_mean": 2773.3,
|
|
"valid_targets_min": 1054
|
|
},
|
|
{
|
|
"epoch": 2.5201288244766507,
|
|
"grad_norm": 0.6602490038924721,
|
|
"learning_rate": 3.232756842289685e-05,
|
|
"loss": 0.3609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3299705982208252,
|
|
"step": 1565,
|
|
"valid_targets_mean": 4059.8,
|
|
"valid_targets_min": 1416
|
|
},
|
|
{
|
|
"epoch": 2.528180354267311,
|
|
"grad_norm": 0.635105782473463,
|
|
"learning_rate": 3.2264231806819286e-05,
|
|
"loss": 0.3443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3414621353149414,
|
|
"step": 1570,
|
|
"valid_targets_mean": 3796.5,
|
|
"valid_targets_min": 1490
|
|
},
|
|
{
|
|
"epoch": 2.536231884057971,
|
|
"grad_norm": 0.6634194373750147,
|
|
"learning_rate": 3.220069745652456e-05,
|
|
"loss": 0.3435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3682880997657776,
|
|
"step": 1575,
|
|
"valid_targets_mean": 3359.2,
|
|
"valid_targets_min": 1401
|
|
},
|
|
{
|
|
"epoch": 2.544283413848631,
|
|
"grad_norm": 0.6748832386881813,
|
|
"learning_rate": 3.213696639636666e-05,
|
|
"loss": 0.3803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37244677543640137,
|
|
"step": 1580,
|
|
"valid_targets_mean": 3234.9,
|
|
"valid_targets_min": 1543
|
|
},
|
|
{
|
|
"epoch": 2.5523349436392913,
|
|
"grad_norm": 0.6147417333490566,
|
|
"learning_rate": 3.207303965387114e-05,
|
|
"loss": 0.3398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2844334840774536,
|
|
"step": 1585,
|
|
"valid_targets_mean": 3613.4,
|
|
"valid_targets_min": 1535
|
|
},
|
|
{
|
|
"epoch": 2.5603864734299515,
|
|
"grad_norm": 0.6644115534295828,
|
|
"learning_rate": 3.200891825971846e-05,
|
|
"loss": 0.341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34174948930740356,
|
|
"step": 1590,
|
|
"valid_targets_mean": 3520.8,
|
|
"valid_targets_min": 1561
|
|
},
|
|
{
|
|
"epoch": 2.5684380032206118,
|
|
"grad_norm": 0.6492076929001203,
|
|
"learning_rate": 3.194460324772746e-05,
|
|
"loss": 0.3575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34135136008262634,
|
|
"step": 1595,
|
|
"valid_targets_mean": 3454.2,
|
|
"valid_targets_min": 1036
|
|
},
|
|
{
|
|
"epoch": 2.576489533011272,
|
|
"grad_norm": 0.6262401028348537,
|
|
"learning_rate": 3.188009565483861e-05,
|
|
"loss": 0.3389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37102776765823364,
|
|
"step": 1600,
|
|
"valid_targets_mean": 3877.2,
|
|
"valid_targets_min": 1400
|
|
},
|
|
{
|
|
"epoch": 2.5845410628019323,
|
|
"grad_norm": 0.6690368914483794,
|
|
"learning_rate": 3.1815396521097376e-05,
|
|
"loss": 0.3443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30660367012023926,
|
|
"step": 1605,
|
|
"valid_targets_mean": 3103.3,
|
|
"valid_targets_min": 1327
|
|
},
|
|
{
|
|
"epoch": 2.5925925925925926,
|
|
"grad_norm": 0.6782490130071587,
|
|
"learning_rate": 3.1750506889637366e-05,
|
|
"loss": 0.3738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3212292492389679,
|
|
"step": 1610,
|
|
"valid_targets_mean": 3301.8,
|
|
"valid_targets_min": 1530
|
|
},
|
|
{
|
|
"epoch": 2.600644122383253,
|
|
"grad_norm": 0.6136219154267422,
|
|
"learning_rate": 3.1685427806663574e-05,
|
|
"loss": 0.3319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3197278678417206,
|
|
"step": 1615,
|
|
"valid_targets_mean": 3827.2,
|
|
"valid_targets_min": 1604
|
|
},
|
|
{
|
|
"epoch": 2.608695652173913,
|
|
"grad_norm": 0.7590245604099114,
|
|
"learning_rate": 3.1620160321435475e-05,
|
|
"loss": 0.3493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41536277532577515,
|
|
"step": 1620,
|
|
"valid_targets_mean": 3155.0,
|
|
"valid_targets_min": 1301
|
|
},
|
|
{
|
|
"epoch": 2.6167471819645733,
|
|
"grad_norm": 0.6668069970888741,
|
|
"learning_rate": 3.155470548625014e-05,
|
|
"loss": 0.3414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34977424144744873,
|
|
"step": 1625,
|
|
"valid_targets_mean": 3495.8,
|
|
"valid_targets_min": 1108
|
|
},
|
|
{
|
|
"epoch": 2.6247987117552336,
|
|
"grad_norm": 0.6443781380528575,
|
|
"learning_rate": 3.1489064356425235e-05,
|
|
"loss": 0.3452,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.348181813955307,
|
|
"step": 1630,
|
|
"valid_targets_mean": 3598.6,
|
|
"valid_targets_min": 1663
|
|
},
|
|
{
|
|
"epoch": 2.632850241545894,
|
|
"grad_norm": 0.6477230031259141,
|
|
"learning_rate": 3.142323799028204e-05,
|
|
"loss": 0.3225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34765568375587463,
|
|
"step": 1635,
|
|
"valid_targets_mean": 3781.5,
|
|
"valid_targets_min": 1585
|
|
},
|
|
{
|
|
"epoch": 2.640901771336554,
|
|
"grad_norm": 0.6757898090196188,
|
|
"learning_rate": 3.135722744912836e-05,
|
|
"loss": 0.3501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38952842354774475,
|
|
"step": 1640,
|
|
"valid_targets_mean": 4125.3,
|
|
"valid_targets_min": 1262
|
|
},
|
|
{
|
|
"epoch": 2.6489533011272144,
|
|
"grad_norm": 0.6699916964095252,
|
|
"learning_rate": 3.129103379724143e-05,
|
|
"loss": 0.3481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37032926082611084,
|
|
"step": 1645,
|
|
"valid_targets_mean": 3489.9,
|
|
"valid_targets_min": 1525
|
|
},
|
|
{
|
|
"epoch": 2.6570048309178746,
|
|
"grad_norm": 0.639493421870476,
|
|
"learning_rate": 3.122465810185075e-05,
|
|
"loss": 0.3367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33100080490112305,
|
|
"step": 1650,
|
|
"valid_targets_mean": 3673.7,
|
|
"valid_targets_min": 1354
|
|
},
|
|
{
|
|
"epoch": 2.6650563607085345,
|
|
"grad_norm": 0.5871950095800859,
|
|
"learning_rate": 3.1158101433120863e-05,
|
|
"loss": 0.3465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34115904569625854,
|
|
"step": 1655,
|
|
"valid_targets_mean": 4327.2,
|
|
"valid_targets_min": 1228
|
|
},
|
|
{
|
|
"epoch": 2.6731078904991947,
|
|
"grad_norm": 0.6666026792346776,
|
|
"learning_rate": 3.1091364864134136e-05,
|
|
"loss": 0.3438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39104804396629333,
|
|
"step": 1660,
|
|
"valid_targets_mean": 3398.6,
|
|
"valid_targets_min": 1445
|
|
},
|
|
{
|
|
"epoch": 2.681159420289855,
|
|
"grad_norm": 0.6426036994837567,
|
|
"learning_rate": 3.102444947087342e-05,
|
|
"loss": 0.3465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34018710255622864,
|
|
"step": 1665,
|
|
"valid_targets_mean": 3618.1,
|
|
"valid_targets_min": 1665
|
|
},
|
|
{
|
|
"epoch": 2.6892109500805152,
|
|
"grad_norm": 0.7200889886232774,
|
|
"learning_rate": 3.0957356332204745e-05,
|
|
"loss": 0.3624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4483473300933838,
|
|
"step": 1670,
|
|
"valid_targets_mean": 3554.4,
|
|
"valid_targets_min": 1075
|
|
},
|
|
{
|
|
"epoch": 2.6972624798711755,
|
|
"grad_norm": 0.6807458182499385,
|
|
"learning_rate": 3.089008652985989e-05,
|
|
"loss": 0.3517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31604230403900146,
|
|
"step": 1675,
|
|
"valid_targets_mean": 3257.0,
|
|
"valid_targets_min": 1286
|
|
},
|
|
{
|
|
"epoch": 2.7053140096618358,
|
|
"grad_norm": 0.7052899877260982,
|
|
"learning_rate": 3.082264114841892e-05,
|
|
"loss": 0.3374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3132331371307373,
|
|
"step": 1680,
|
|
"valid_targets_mean": 2975.9,
|
|
"valid_targets_min": 1470
|
|
},
|
|
{
|
|
"epoch": 2.713365539452496,
|
|
"grad_norm": 0.6354810637538505,
|
|
"learning_rate": 3.07550212752928e-05,
|
|
"loss": 0.3442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3128201365470886,
|
|
"step": 1685,
|
|
"valid_targets_mean": 3791.3,
|
|
"valid_targets_min": 1211
|
|
},
|
|
{
|
|
"epoch": 2.7214170692431563,
|
|
"grad_norm": 0.5906889884547679,
|
|
"learning_rate": 3.068722800070574e-05,
|
|
"loss": 0.334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2894616723060608,
|
|
"step": 1690,
|
|
"valid_targets_mean": 3958.0,
|
|
"valid_targets_min": 1407
|
|
},
|
|
{
|
|
"epoch": 2.7294685990338166,
|
|
"grad_norm": 0.643609035593482,
|
|
"learning_rate": 3.0619262417677695e-05,
|
|
"loss": 0.3489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34073030948638916,
|
|
"step": 1695,
|
|
"valid_targets_mean": 3950.0,
|
|
"valid_targets_min": 1116
|
|
},
|
|
{
|
|
"epoch": 2.7375201288244764,
|
|
"grad_norm": 0.7282369023618399,
|
|
"learning_rate": 3.055112562200673e-05,
|
|
"loss": 0.3436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3716239631175995,
|
|
"step": 1700,
|
|
"valid_targets_mean": 3736.1,
|
|
"valid_targets_min": 991
|
|
},
|
|
{
|
|
"epoch": 2.7455716586151366,
|
|
"grad_norm": 0.7474926314066599,
|
|
"learning_rate": 3.0482818712251318e-05,
|
|
"loss": 0.3402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35656508803367615,
|
|
"step": 1705,
|
|
"valid_targets_mean": 3098.4,
|
|
"valid_targets_min": 1529
|
|
},
|
|
{
|
|
"epoch": 2.753623188405797,
|
|
"grad_norm": 0.5882343918140231,
|
|
"learning_rate": 3.0414342789712675e-05,
|
|
"loss": 0.303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22480350732803345,
|
|
"step": 1710,
|
|
"valid_targets_mean": 6031.0,
|
|
"valid_targets_min": 1980
|
|
},
|
|
{
|
|
"epoch": 2.761674718196457,
|
|
"grad_norm": 0.7860705058373512,
|
|
"learning_rate": 3.034569895841699e-05,
|
|
"loss": 0.2247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21748200058937073,
|
|
"step": 1715,
|
|
"valid_targets_mean": 5359.5,
|
|
"valid_targets_min": 3428
|
|
},
|
|
{
|
|
"epoch": 2.7697262479871174,
|
|
"grad_norm": 0.46882998249925517,
|
|
"learning_rate": 3.0276888325097583e-05,
|
|
"loss": 0.2014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.211460679769516,
|
|
"step": 1720,
|
|
"valid_targets_mean": 5453.4,
|
|
"valid_targets_min": 2625
|
|
},
|
|
{
|
|
"epoch": 2.7777777777777777,
|
|
"grad_norm": 0.5396421101555618,
|
|
"learning_rate": 3.020791199917713e-05,
|
|
"loss": 0.2123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22912117838859558,
|
|
"step": 1725,
|
|
"valid_targets_mean": 6407.3,
|
|
"valid_targets_min": 4226
|
|
},
|
|
{
|
|
"epoch": 2.785829307568438,
|
|
"grad_norm": 0.44331928629182393,
|
|
"learning_rate": 3.0138771092749722e-05,
|
|
"loss": 0.2118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19119247794151306,
|
|
"step": 1730,
|
|
"valid_targets_mean": 6119.8,
|
|
"valid_targets_min": 2891
|
|
},
|
|
{
|
|
"epoch": 2.793880837359098,
|
|
"grad_norm": 0.4549848502323358,
|
|
"learning_rate": 3.006946672056297e-05,
|
|
"loss": 0.2003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18082711100578308,
|
|
"step": 1735,
|
|
"valid_targets_mean": 5309.0,
|
|
"valid_targets_min": 3122
|
|
},
|
|
{
|
|
"epoch": 2.8019323671497585,
|
|
"grad_norm": 0.5088412843130042,
|
|
"learning_rate": 3.0000000000000004e-05,
|
|
"loss": 0.1904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18192598223686218,
|
|
"step": 1740,
|
|
"valid_targets_mean": 5206.8,
|
|
"valid_targets_min": 3536
|
|
},
|
|
{
|
|
"epoch": 2.8099838969404187,
|
|
"grad_norm": 0.4533466677445726,
|
|
"learning_rate": 2.993037205106147e-05,
|
|
"loss": 0.2254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22841483354568481,
|
|
"step": 1745,
|
|
"valid_targets_mean": 6110.2,
|
|
"valid_targets_min": 3433
|
|
},
|
|
{
|
|
"epoch": 2.818035426731079,
|
|
"grad_norm": 0.42286915041535666,
|
|
"learning_rate": 2.9860583996347495e-05,
|
|
"loss": 0.2238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16705384850502014,
|
|
"step": 1750,
|
|
"valid_targets_mean": 5580.4,
|
|
"valid_targets_min": 2346
|
|
},
|
|
{
|
|
"epoch": 2.8260869565217392,
|
|
"grad_norm": 0.46308406661472623,
|
|
"learning_rate": 2.9790636961039524e-05,
|
|
"loss": 0.2044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20052894949913025,
|
|
"step": 1755,
|
|
"valid_targets_mean": 5809.4,
|
|
"valid_targets_min": 2808
|
|
},
|
|
{
|
|
"epoch": 2.8341384863123995,
|
|
"grad_norm": 0.4973033226604493,
|
|
"learning_rate": 2.9720532072882268e-05,
|
|
"loss": 0.2035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19987952709197998,
|
|
"step": 1760,
|
|
"valid_targets_mean": 5466.4,
|
|
"valid_targets_min": 1878
|
|
},
|
|
{
|
|
"epoch": 2.8421900161030598,
|
|
"grad_norm": 0.49364706625593213,
|
|
"learning_rate": 2.965027046216544e-05,
|
|
"loss": 0.2072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24134349822998047,
|
|
"step": 1765,
|
|
"valid_targets_mean": 6193.4,
|
|
"valid_targets_min": 2985
|
|
},
|
|
{
|
|
"epoch": 2.85024154589372,
|
|
"grad_norm": 0.44774305114424784,
|
|
"learning_rate": 2.9579853261705573e-05,
|
|
"loss": 0.1939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1811220645904541,
|
|
"step": 1770,
|
|
"valid_targets_mean": 5857.2,
|
|
"valid_targets_min": 3369
|
|
},
|
|
{
|
|
"epoch": 2.8582930756843803,
|
|
"grad_norm": 0.5084367346859436,
|
|
"learning_rate": 2.950928160682775e-05,
|
|
"loss": 0.1998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16717293858528137,
|
|
"step": 1775,
|
|
"valid_targets_mean": 4669.6,
|
|
"valid_targets_min": 2984
|
|
},
|
|
{
|
|
"epoch": 2.86634460547504,
|
|
"grad_norm": 0.5258355536138191,
|
|
"learning_rate": 2.943855663534731e-05,
|
|
"loss": 0.2085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21475860476493835,
|
|
"step": 1780,
|
|
"valid_targets_mean": 5739.6,
|
|
"valid_targets_min": 3163
|
|
},
|
|
{
|
|
"epoch": 2.8743961352657004,
|
|
"grad_norm": 0.624047290830488,
|
|
"learning_rate": 2.9367679487551473e-05,
|
|
"loss": 0.1906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17102691531181335,
|
|
"step": 1785,
|
|
"valid_targets_mean": 5951.2,
|
|
"valid_targets_min": 2774
|
|
},
|
|
{
|
|
"epoch": 2.8824476650563606,
|
|
"grad_norm": 0.48242204698030056,
|
|
"learning_rate": 2.929665130618098e-05,
|
|
"loss": 0.2024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20739901065826416,
|
|
"step": 1790,
|
|
"valid_targets_mean": 5372.6,
|
|
"valid_targets_min": 2692
|
|
},
|
|
{
|
|
"epoch": 2.890499194847021,
|
|
"grad_norm": 0.4326608763924011,
|
|
"learning_rate": 2.9225473236411655e-05,
|
|
"loss": 0.1911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17670348286628723,
|
|
"step": 1795,
|
|
"valid_targets_mean": 5611.9,
|
|
"valid_targets_min": 3302
|
|
},
|
|
{
|
|
"epoch": 2.898550724637681,
|
|
"grad_norm": 0.4674400578356549,
|
|
"learning_rate": 2.915414642583596e-05,
|
|
"loss": 0.1939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.192458838224411,
|
|
"step": 1800,
|
|
"valid_targets_mean": 5552.4,
|
|
"valid_targets_min": 3435
|
|
},
|
|
{
|
|
"epoch": 2.9066022544283414,
|
|
"grad_norm": 0.46590415009442393,
|
|
"learning_rate": 2.9082672024444485e-05,
|
|
"loss": 0.2019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20876693725585938,
|
|
"step": 1805,
|
|
"valid_targets_mean": 5709.1,
|
|
"valid_targets_min": 3713
|
|
},
|
|
{
|
|
"epoch": 2.9146537842190017,
|
|
"grad_norm": 0.55521296635976,
|
|
"learning_rate": 2.901105118460737e-05,
|
|
"loss": 0.1949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20165735483169556,
|
|
"step": 1810,
|
|
"valid_targets_mean": 5800.1,
|
|
"valid_targets_min": 3378
|
|
},
|
|
{
|
|
"epoch": 2.922705314009662,
|
|
"grad_norm": 0.44671622462161964,
|
|
"learning_rate": 2.8939285061055807e-05,
|
|
"loss": 0.1933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20931756496429443,
|
|
"step": 1815,
|
|
"valid_targets_mean": 5923.8,
|
|
"valid_targets_min": 3662
|
|
},
|
|
{
|
|
"epoch": 2.930756843800322,
|
|
"grad_norm": 0.45724838211116503,
|
|
"learning_rate": 2.8867374810863325e-05,
|
|
"loss": 0.1886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18910741806030273,
|
|
"step": 1820,
|
|
"valid_targets_mean": 5651.0,
|
|
"valid_targets_min": 3365
|
|
},
|
|
{
|
|
"epoch": 2.938808373590982,
|
|
"grad_norm": 0.45924176560852137,
|
|
"learning_rate": 2.8795321593427227e-05,
|
|
"loss": 0.2101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2153066247701645,
|
|
"step": 1825,
|
|
"valid_targets_mean": 6766.6,
|
|
"valid_targets_min": 3372
|
|
},
|
|
{
|
|
"epoch": 2.9468599033816423,
|
|
"grad_norm": 0.4602179071328878,
|
|
"learning_rate": 2.8723126570449813e-05,
|
|
"loss": 0.1995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17615215480327606,
|
|
"step": 1830,
|
|
"valid_targets_mean": 5546.4,
|
|
"valid_targets_min": 3374
|
|
},
|
|
{
|
|
"epoch": 2.9549114331723025,
|
|
"grad_norm": 0.4757535168677279,
|
|
"learning_rate": 2.8650790905919724e-05,
|
|
"loss": 0.1903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18606898188591003,
|
|
"step": 1835,
|
|
"valid_targets_mean": 5870.6,
|
|
"valid_targets_min": 3626
|
|
},
|
|
{
|
|
"epoch": 2.962962962962963,
|
|
"grad_norm": 0.4550197511497243,
|
|
"learning_rate": 2.8578315766093133e-05,
|
|
"loss": 0.1957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20954301953315735,
|
|
"step": 1840,
|
|
"valid_targets_mean": 5978.6,
|
|
"valid_targets_min": 3695
|
|
},
|
|
{
|
|
"epoch": 2.971014492753623,
|
|
"grad_norm": 0.48815632704334255,
|
|
"learning_rate": 2.850570231947493e-05,
|
|
"loss": 0.1927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20100757479667664,
|
|
"step": 1845,
|
|
"valid_targets_mean": 5347.4,
|
|
"valid_targets_min": 3463
|
|
},
|
|
{
|
|
"epoch": 2.9790660225442833,
|
|
"grad_norm": 0.45140508763909315,
|
|
"learning_rate": 2.8432951736799933e-05,
|
|
"loss": 0.1867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1854209005832672,
|
|
"step": 1850,
|
|
"valid_targets_mean": 5264.8,
|
|
"valid_targets_min": 3132
|
|
},
|
|
{
|
|
"epoch": 2.9871175523349436,
|
|
"grad_norm": 0.47657839693030885,
|
|
"learning_rate": 2.8360065191013967e-05,
|
|
"loss": 0.2063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1967577189207077,
|
|
"step": 1855,
|
|
"valid_targets_mean": 5723.9,
|
|
"valid_targets_min": 3017
|
|
},
|
|
{
|
|
"epoch": 2.995169082125604,
|
|
"grad_norm": 0.4892874249873267,
|
|
"learning_rate": 2.8287043857254957e-05,
|
|
"loss": 0.1822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1871507167816162,
|
|
"step": 1860,
|
|
"valid_targets_mean": 6460.0,
|
|
"valid_targets_min": 2935
|
|
},
|
|
{
|
|
"epoch": 3.003220611916264,
|
|
"grad_norm": 0.5527497946640381,
|
|
"learning_rate": 2.8213888912834026e-05,
|
|
"loss": 0.2272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.258331298828125,
|
|
"step": 1865,
|
|
"valid_targets_mean": 5212.3,
|
|
"valid_targets_min": 987
|
|
},
|
|
{
|
|
"epoch": 3.0112721417069244,
|
|
"grad_norm": 0.5830002451186027,
|
|
"learning_rate": 2.814060153721644e-05,
|
|
"loss": 0.2892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2999268174171448,
|
|
"step": 1870,
|
|
"valid_targets_mean": 6001.2,
|
|
"valid_targets_min": 433
|
|
},
|
|
{
|
|
"epoch": 3.0193236714975846,
|
|
"grad_norm": 0.4500521340884674,
|
|
"learning_rate": 2.8067182912002663e-05,
|
|
"loss": 0.2623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2809280753135681,
|
|
"step": 1875,
|
|
"valid_targets_mean": 6695.0,
|
|
"valid_targets_min": 3606
|
|
},
|
|
{
|
|
"epoch": 3.027375201288245,
|
|
"grad_norm": 0.4652104269522271,
|
|
"learning_rate": 2.7993634220909254e-05,
|
|
"loss": 0.2699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22316525876522064,
|
|
"step": 1880,
|
|
"valid_targets_mean": 5460.7,
|
|
"valid_targets_min": 284
|
|
},
|
|
{
|
|
"epoch": 3.035426731078905,
|
|
"grad_norm": 0.46090061714203895,
|
|
"learning_rate": 2.7919956649749826e-05,
|
|
"loss": 0.2702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2710314095020294,
|
|
"step": 1885,
|
|
"valid_targets_mean": 6763.2,
|
|
"valid_targets_min": 3435
|
|
},
|
|
{
|
|
"epoch": 3.0434782608695654,
|
|
"grad_norm": 0.4611138651345071,
|
|
"learning_rate": 2.784615138641588e-05,
|
|
"loss": 0.2755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.242034912109375,
|
|
"step": 1890,
|
|
"valid_targets_mean": 6750.6,
|
|
"valid_targets_min": 3554
|
|
},
|
|
{
|
|
"epoch": 3.0515297906602252,
|
|
"grad_norm": 0.49645873562039455,
|
|
"learning_rate": 2.7772219620857685e-05,
|
|
"loss": 0.2781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28464362025260925,
|
|
"step": 1895,
|
|
"valid_targets_mean": 6853.2,
|
|
"valid_targets_min": 4715
|
|
},
|
|
{
|
|
"epoch": 3.0595813204508855,
|
|
"grad_norm": 0.4950622875309601,
|
|
"learning_rate": 2.769816254506509e-05,
|
|
"loss": 0.2694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2540530562400818,
|
|
"step": 1900,
|
|
"valid_targets_mean": 5465.6,
|
|
"valid_targets_min": 2485
|
|
},
|
|
{
|
|
"epoch": 3.0676328502415457,
|
|
"grad_norm": 0.4737619492864805,
|
|
"learning_rate": 2.76239813530483e-05,
|
|
"loss": 0.2639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2536058723926544,
|
|
"step": 1905,
|
|
"valid_targets_mean": 7040.4,
|
|
"valid_targets_min": 3642
|
|
},
|
|
{
|
|
"epoch": 3.075684380032206,
|
|
"grad_norm": 0.43563346257793917,
|
|
"learning_rate": 2.7549677240818628e-05,
|
|
"loss": 0.26,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25332552194595337,
|
|
"step": 1910,
|
|
"valid_targets_mean": 6890.4,
|
|
"valid_targets_min": 3408
|
|
},
|
|
{
|
|
"epoch": 3.0837359098228663,
|
|
"grad_norm": 0.5538637159995918,
|
|
"learning_rate": 2.7475251406369197e-05,
|
|
"loss": 0.253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27943116426467896,
|
|
"step": 1915,
|
|
"valid_targets_mean": 5696.9,
|
|
"valid_targets_min": 2285
|
|
},
|
|
{
|
|
"epoch": 3.0917874396135265,
|
|
"grad_norm": 0.502755804416658,
|
|
"learning_rate": 2.740070504965565e-05,
|
|
"loss": 0.2525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2628275156021118,
|
|
"step": 1920,
|
|
"valid_targets_mean": 5780.6,
|
|
"valid_targets_min": 1991
|
|
},
|
|
{
|
|
"epoch": 3.099838969404187,
|
|
"grad_norm": 0.46642134348041764,
|
|
"learning_rate": 2.7326039372576782e-05,
|
|
"loss": 0.2624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26721107959747314,
|
|
"step": 1925,
|
|
"valid_targets_mean": 5684.6,
|
|
"valid_targets_min": 2267
|
|
},
|
|
{
|
|
"epoch": 3.107890499194847,
|
|
"grad_norm": 0.4761056162555204,
|
|
"learning_rate": 2.7251255578955186e-05,
|
|
"loss": 0.2604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24499773979187012,
|
|
"step": 1930,
|
|
"valid_targets_mean": 6097.4,
|
|
"valid_targets_min": 536
|
|
},
|
|
{
|
|
"epoch": 3.1159420289855073,
|
|
"grad_norm": 0.4574904350207186,
|
|
"learning_rate": 2.7176354874517805e-05,
|
|
"loss": 0.2795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2528035044670105,
|
|
"step": 1935,
|
|
"valid_targets_mean": 5954.2,
|
|
"valid_targets_min": 3071
|
|
},
|
|
{
|
|
"epoch": 3.1239935587761676,
|
|
"grad_norm": 0.5257344524597956,
|
|
"learning_rate": 2.7101338466876542e-05,
|
|
"loss": 0.256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25541383028030396,
|
|
"step": 1940,
|
|
"valid_targets_mean": 5115.2,
|
|
"valid_targets_min": 2196
|
|
},
|
|
{
|
|
"epoch": 3.132045088566828,
|
|
"grad_norm": 0.9148021670271776,
|
|
"learning_rate": 2.702620756550874e-05,
|
|
"loss": 0.276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4044179320335388,
|
|
"step": 1945,
|
|
"valid_targets_mean": 2789.6,
|
|
"valid_targets_min": 1009
|
|
},
|
|
{
|
|
"epoch": 3.140096618357488,
|
|
"grad_norm": 0.8933463865740449,
|
|
"learning_rate": 2.6950963381737728e-05,
|
|
"loss": 0.4008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43537747859954834,
|
|
"step": 1950,
|
|
"valid_targets_mean": 3042.1,
|
|
"valid_targets_min": 1721
|
|
},
|
|
{
|
|
"epoch": 3.148148148148148,
|
|
"grad_norm": 0.7586492212375334,
|
|
"learning_rate": 2.687560712871325e-05,
|
|
"loss": 0.4051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40004706382751465,
|
|
"step": 1955,
|
|
"valid_targets_mean": 3377.2,
|
|
"valid_targets_min": 860
|
|
},
|
|
{
|
|
"epoch": 3.156199677938808,
|
|
"grad_norm": 0.8200189711325815,
|
|
"learning_rate": 2.6800140021391933e-05,
|
|
"loss": 0.3918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42270979285240173,
|
|
"step": 1960,
|
|
"valid_targets_mean": 3314.4,
|
|
"valid_targets_min": 1619
|
|
},
|
|
{
|
|
"epoch": 3.1642512077294684,
|
|
"grad_norm": 0.7884155012819621,
|
|
"learning_rate": 2.6724563276517697e-05,
|
|
"loss": 0.3936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36296790838241577,
|
|
"step": 1965,
|
|
"valid_targets_mean": 3112.6,
|
|
"valid_targets_min": 1068
|
|
},
|
|
{
|
|
"epoch": 3.1723027375201287,
|
|
"grad_norm": 0.6764869788258896,
|
|
"learning_rate": 2.6648878112602115e-05,
|
|
"loss": 0.3728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35345786809921265,
|
|
"step": 1970,
|
|
"valid_targets_mean": 3572.9,
|
|
"valid_targets_min": 1413
|
|
},
|
|
{
|
|
"epoch": 3.180354267310789,
|
|
"grad_norm": 0.7225388514917793,
|
|
"learning_rate": 2.6573085749904784e-05,
|
|
"loss": 0.3774,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39635008573532104,
|
|
"step": 1975,
|
|
"valid_targets_mean": 3774.3,
|
|
"valid_targets_min": 1338
|
|
},
|
|
{
|
|
"epoch": 3.1884057971014492,
|
|
"grad_norm": 0.8231423436666249,
|
|
"learning_rate": 2.6497187410413676e-05,
|
|
"loss": 0.3828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3863440752029419,
|
|
"step": 1980,
|
|
"valid_targets_mean": 2317.6,
|
|
"valid_targets_min": 1079
|
|
},
|
|
{
|
|
"epoch": 3.1964573268921095,
|
|
"grad_norm": 0.7285170435595408,
|
|
"learning_rate": 2.642118431782537e-05,
|
|
"loss": 0.3874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3951292335987091,
|
|
"step": 1985,
|
|
"valid_targets_mean": 3185.3,
|
|
"valid_targets_min": 985
|
|
},
|
|
{
|
|
"epoch": 3.2045088566827697,
|
|
"grad_norm": 0.7017080619872773,
|
|
"learning_rate": 2.6345077697525394e-05,
|
|
"loss": 0.3695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.347123384475708,
|
|
"step": 1990,
|
|
"valid_targets_mean": 3075.4,
|
|
"valid_targets_min": 1376
|
|
},
|
|
{
|
|
"epoch": 3.21256038647343,
|
|
"grad_norm": 0.8231794679729749,
|
|
"learning_rate": 2.6268868776568416e-05,
|
|
"loss": 0.4001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35584574937820435,
|
|
"step": 1995,
|
|
"valid_targets_mean": 2823.5,
|
|
"valid_targets_min": 1187
|
|
},
|
|
{
|
|
"epoch": 3.2206119162640903,
|
|
"grad_norm": 0.7683051537663695,
|
|
"learning_rate": 2.619255878365849e-05,
|
|
"loss": 0.3626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33899423480033875,
|
|
"step": 2000,
|
|
"valid_targets_mean": 3121.4,
|
|
"valid_targets_min": 1260
|
|
},
|
|
{
|
|
"epoch": 3.2286634460547505,
|
|
"grad_norm": 1.0228623510526746,
|
|
"learning_rate": 2.6116148949129237e-05,
|
|
"loss": 0.3926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3965111970901489,
|
|
"step": 2005,
|
|
"valid_targets_mean": 3859.0,
|
|
"valid_targets_min": 1457
|
|
},
|
|
{
|
|
"epoch": 3.236714975845411,
|
|
"grad_norm": 0.8910998328193267,
|
|
"learning_rate": 2.603964050492401e-05,
|
|
"loss": 0.3947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4256768822669983,
|
|
"step": 2010,
|
|
"valid_targets_mean": 2689.9,
|
|
"valid_targets_min": 909
|
|
},
|
|
{
|
|
"epoch": 3.244766505636071,
|
|
"grad_norm": 0.7345012383668745,
|
|
"learning_rate": 2.5963034684576024e-05,
|
|
"loss": 0.3855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3804903030395508,
|
|
"step": 2015,
|
|
"valid_targets_mean": 3814.0,
|
|
"valid_targets_min": 1182
|
|
},
|
|
{
|
|
"epoch": 3.2528180354267313,
|
|
"grad_norm": 0.7678805707006796,
|
|
"learning_rate": 2.5886332723188484e-05,
|
|
"loss": 0.3777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36300283670425415,
|
|
"step": 2020,
|
|
"valid_targets_mean": 3139.2,
|
|
"valid_targets_min": 999
|
|
},
|
|
{
|
|
"epoch": 3.260869565217391,
|
|
"grad_norm": 0.8103254411230125,
|
|
"learning_rate": 2.5809535857414637e-05,
|
|
"loss": 0.3704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3861234784126282,
|
|
"step": 2025,
|
|
"valid_targets_mean": 3032.6,
|
|
"valid_targets_min": 1346
|
|
},
|
|
{
|
|
"epoch": 3.2689210950080514,
|
|
"grad_norm": 0.8485120319760017,
|
|
"learning_rate": 2.573264532543788e-05,
|
|
"loss": 0.3716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.399763822555542,
|
|
"step": 2030,
|
|
"valid_targets_mean": 2960.4,
|
|
"valid_targets_min": 1292
|
|
},
|
|
{
|
|
"epoch": 3.2769726247987117,
|
|
"grad_norm": 0.7941028816750529,
|
|
"learning_rate": 2.5655662366951778e-05,
|
|
"loss": 0.3836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38156741857528687,
|
|
"step": 2035,
|
|
"valid_targets_mean": 3008.5,
|
|
"valid_targets_min": 1031
|
|
},
|
|
{
|
|
"epoch": 3.285024154589372,
|
|
"grad_norm": 0.7517179718878059,
|
|
"learning_rate": 2.557858822314007e-05,
|
|
"loss": 0.3784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3794868588447571,
|
|
"step": 2040,
|
|
"valid_targets_mean": 2940.0,
|
|
"valid_targets_min": 1404
|
|
},
|
|
{
|
|
"epoch": 3.293075684380032,
|
|
"grad_norm": 0.7474671950243801,
|
|
"learning_rate": 2.5501424136656635e-05,
|
|
"loss": 0.3627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3578556180000305,
|
|
"step": 2045,
|
|
"valid_targets_mean": 2994.5,
|
|
"valid_targets_min": 1383
|
|
},
|
|
{
|
|
"epoch": 3.3011272141706924,
|
|
"grad_norm": 0.8073459873948605,
|
|
"learning_rate": 2.5424171351605518e-05,
|
|
"loss": 0.3917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35137873888015747,
|
|
"step": 2050,
|
|
"valid_targets_mean": 2981.6,
|
|
"valid_targets_min": 1013
|
|
},
|
|
{
|
|
"epoch": 3.3091787439613527,
|
|
"grad_norm": 0.7363545984797931,
|
|
"learning_rate": 2.5346831113520827e-05,
|
|
"loss": 0.3558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33045268058776855,
|
|
"step": 2055,
|
|
"valid_targets_mean": 2965.2,
|
|
"valid_targets_min": 1119
|
|
},
|
|
{
|
|
"epoch": 3.317230273752013,
|
|
"grad_norm": 0.8551622649502074,
|
|
"learning_rate": 2.526940466934664e-05,
|
|
"loss": 0.3791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4106612801551819,
|
|
"step": 2060,
|
|
"valid_targets_mean": 2819.8,
|
|
"valid_targets_min": 1638
|
|
},
|
|
{
|
|
"epoch": 3.325281803542673,
|
|
"grad_norm": 0.8197438218947848,
|
|
"learning_rate": 2.5191893267416964e-05,
|
|
"loss": 0.3761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35730135440826416,
|
|
"step": 2065,
|
|
"valid_targets_mean": 3237.6,
|
|
"valid_targets_min": 1255
|
|
},
|
|
{
|
|
"epoch": 3.3333333333333335,
|
|
"grad_norm": 0.8675787777451592,
|
|
"learning_rate": 2.5114298157435526e-05,
|
|
"loss": 0.3704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3833295404911041,
|
|
"step": 2070,
|
|
"valid_targets_mean": 3031.8,
|
|
"valid_targets_min": 1244
|
|
},
|
|
{
|
|
"epoch": 3.3413848631239937,
|
|
"grad_norm": 0.7989238817393547,
|
|
"learning_rate": 2.503662059045568e-05,
|
|
"loss": 0.3959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39606934785842896,
|
|
"step": 2075,
|
|
"valid_targets_mean": 2751.6,
|
|
"valid_targets_min": 1349
|
|
},
|
|
{
|
|
"epoch": 3.3494363929146536,
|
|
"grad_norm": 0.7649931205299405,
|
|
"learning_rate": 2.4958861818860217e-05,
|
|
"loss": 0.3756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3948572874069214,
|
|
"step": 2080,
|
|
"valid_targets_mean": 2998.9,
|
|
"valid_targets_min": 1344
|
|
},
|
|
{
|
|
"epoch": 3.357487922705314,
|
|
"grad_norm": 0.6957699032853977,
|
|
"learning_rate": 2.488102309634119e-05,
|
|
"loss": 0.3733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36002251505851746,
|
|
"step": 2085,
|
|
"valid_targets_mean": 3551.8,
|
|
"valid_targets_min": 1310
|
|
},
|
|
{
|
|
"epoch": 3.365539452495974,
|
|
"grad_norm": 0.8172302341279781,
|
|
"learning_rate": 2.480310567787967e-05,
|
|
"loss": 0.3891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3608362078666687,
|
|
"step": 2090,
|
|
"valid_targets_mean": 2457.2,
|
|
"valid_targets_min": 1285
|
|
},
|
|
{
|
|
"epoch": 3.3735909822866343,
|
|
"grad_norm": 0.6603911880036989,
|
|
"learning_rate": 2.4725110819725542e-05,
|
|
"loss": 0.3467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34838154911994934,
|
|
"step": 2095,
|
|
"valid_targets_mean": 4338.5,
|
|
"valid_targets_min": 1915
|
|
},
|
|
{
|
|
"epoch": 3.3816425120772946,
|
|
"grad_norm": 0.7013111479678409,
|
|
"learning_rate": 2.464703977937723e-05,
|
|
"loss": 0.339,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3159921169281006,
|
|
"step": 2100,
|
|
"valid_targets_mean": 3443.4,
|
|
"valid_targets_min": 1224
|
|
},
|
|
{
|
|
"epoch": 3.389694041867955,
|
|
"grad_norm": 0.7271690127494458,
|
|
"learning_rate": 2.456889381556144e-05,
|
|
"loss": 0.3431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3470936417579651,
|
|
"step": 2105,
|
|
"valid_targets_mean": 3482.4,
|
|
"valid_targets_min": 1451
|
|
},
|
|
{
|
|
"epoch": 3.397745571658615,
|
|
"grad_norm": 0.6846909480553407,
|
|
"learning_rate": 2.449067418821285e-05,
|
|
"loss": 0.3453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3760640025138855,
|
|
"step": 2110,
|
|
"valid_targets_mean": 3535.4,
|
|
"valid_targets_min": 1258
|
|
},
|
|
{
|
|
"epoch": 3.4057971014492754,
|
|
"grad_norm": 0.6498807742334415,
|
|
"learning_rate": 2.4412382158453807e-05,
|
|
"loss": 0.3209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31845319271087646,
|
|
"step": 2115,
|
|
"valid_targets_mean": 3810.8,
|
|
"valid_targets_min": 1412
|
|
},
|
|
{
|
|
"epoch": 3.4138486312399356,
|
|
"grad_norm": 0.7280328109327153,
|
|
"learning_rate": 2.4334018988573983e-05,
|
|
"loss": 0.3395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33589398860931396,
|
|
"step": 2120,
|
|
"valid_targets_mean": 3309.2,
|
|
"valid_targets_min": 815
|
|
},
|
|
{
|
|
"epoch": 3.421900161030596,
|
|
"grad_norm": 0.5931265791786551,
|
|
"learning_rate": 2.425558594201004e-05,
|
|
"loss": 0.3142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26291149854660034,
|
|
"step": 2125,
|
|
"valid_targets_mean": 4325.1,
|
|
"valid_targets_min": 1575
|
|
},
|
|
{
|
|
"epoch": 3.429951690821256,
|
|
"grad_norm": 0.7111318648584927,
|
|
"learning_rate": 2.417708428332525e-05,
|
|
"loss": 0.3171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34463876485824585,
|
|
"step": 2130,
|
|
"valid_targets_mean": 3129.6,
|
|
"valid_targets_min": 1545
|
|
},
|
|
{
|
|
"epoch": 3.4380032206119164,
|
|
"grad_norm": 0.6250487281091418,
|
|
"learning_rate": 2.4098515278189097e-05,
|
|
"loss": 0.3156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3047589063644409,
|
|
"step": 2135,
|
|
"valid_targets_mean": 4161.9,
|
|
"valid_targets_min": 1557
|
|
},
|
|
{
|
|
"epoch": 3.4460547504025767,
|
|
"grad_norm": 0.626198180719724,
|
|
"learning_rate": 2.4019880193356902e-05,
|
|
"loss": 0.3293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3424261808395386,
|
|
"step": 2140,
|
|
"valid_targets_mean": 3786.2,
|
|
"valid_targets_min": 1129
|
|
},
|
|
{
|
|
"epoch": 3.454106280193237,
|
|
"grad_norm": 0.6155081722038326,
|
|
"learning_rate": 2.3941180296649348e-05,
|
|
"loss": 0.3243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31505197286605835,
|
|
"step": 2145,
|
|
"valid_targets_mean": 4313.6,
|
|
"valid_targets_min": 1130
|
|
},
|
|
{
|
|
"epoch": 3.4621578099838968,
|
|
"grad_norm": 0.6956667846707119,
|
|
"learning_rate": 2.3862416856932087e-05,
|
|
"loss": 0.3203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32704436779022217,
|
|
"step": 2150,
|
|
"valid_targets_mean": 3571.6,
|
|
"valid_targets_min": 1459
|
|
},
|
|
{
|
|
"epoch": 3.470209339774557,
|
|
"grad_norm": 0.7061438709341965,
|
|
"learning_rate": 2.378359114409527e-05,
|
|
"loss": 0.334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3402191996574402,
|
|
"step": 2155,
|
|
"valid_targets_mean": 3576.9,
|
|
"valid_targets_min": 1717
|
|
},
|
|
{
|
|
"epoch": 3.4782608695652173,
|
|
"grad_norm": 0.6733367985439651,
|
|
"learning_rate": 2.370470442903306e-05,
|
|
"loss": 0.3334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29829442501068115,
|
|
"step": 2160,
|
|
"valid_targets_mean": 3445.9,
|
|
"valid_targets_min": 1340
|
|
},
|
|
{
|
|
"epoch": 3.4863123993558776,
|
|
"grad_norm": 0.7199769182800366,
|
|
"learning_rate": 2.362575798362315e-05,
|
|
"loss": 0.3245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3266036808490753,
|
|
"step": 2165,
|
|
"valid_targets_mean": 3381.9,
|
|
"valid_targets_min": 968
|
|
},
|
|
{
|
|
"epoch": 3.494363929146538,
|
|
"grad_norm": 0.762977409147056,
|
|
"learning_rate": 2.3546753080706242e-05,
|
|
"loss": 0.3162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32624316215515137,
|
|
"step": 2170,
|
|
"valid_targets_mean": 3398.4,
|
|
"valid_targets_min": 732
|
|
},
|
|
{
|
|
"epoch": 3.502415458937198,
|
|
"grad_norm": 0.7356969210292927,
|
|
"learning_rate": 2.346769099406557e-05,
|
|
"loss": 0.3061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3401956260204315,
|
|
"step": 2175,
|
|
"valid_targets_mean": 3281.4,
|
|
"valid_targets_min": 1153
|
|
},
|
|
{
|
|
"epoch": 3.5104669887278583,
|
|
"grad_norm": 0.6884173861250173,
|
|
"learning_rate": 2.33885729984063e-05,
|
|
"loss": 0.3222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33478766679763794,
|
|
"step": 2180,
|
|
"valid_targets_mean": 3259.2,
|
|
"valid_targets_min": 1348
|
|
},
|
|
{
|
|
"epoch": 3.5185185185185186,
|
|
"grad_norm": 0.6833192926719988,
|
|
"learning_rate": 2.3309400369335033e-05,
|
|
"loss": 0.3293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29600557684898376,
|
|
"step": 2185,
|
|
"valid_targets_mean": 3833.4,
|
|
"valid_targets_min": 1495
|
|
},
|
|
{
|
|
"epoch": 3.526570048309179,
|
|
"grad_norm": 0.6951855497662062,
|
|
"learning_rate": 2.3230174383339196e-05,
|
|
"loss": 0.3115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3087206780910492,
|
|
"step": 2190,
|
|
"valid_targets_mean": 3652.0,
|
|
"valid_targets_min": 1088
|
|
},
|
|
{
|
|
"epoch": 3.534621578099839,
|
|
"grad_norm": 0.6613560062519789,
|
|
"learning_rate": 2.3150896317766505e-05,
|
|
"loss": 0.306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2983492612838745,
|
|
"step": 2195,
|
|
"valid_targets_mean": 3762.8,
|
|
"valid_targets_min": 1512
|
|
},
|
|
{
|
|
"epoch": 3.542673107890499,
|
|
"grad_norm": 0.68550903113913,
|
|
"learning_rate": 2.3071567450804325e-05,
|
|
"loss": 0.3477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35058295726776123,
|
|
"step": 2200,
|
|
"valid_targets_mean": 3278.4,
|
|
"valid_targets_min": 1405
|
|
},
|
|
{
|
|
"epoch": 3.550724637681159,
|
|
"grad_norm": 0.8603392885960655,
|
|
"learning_rate": 2.299218906145909e-05,
|
|
"loss": 0.3221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34910333156585693,
|
|
"step": 2205,
|
|
"valid_targets_mean": 2441.1,
|
|
"valid_targets_min": 1235
|
|
},
|
|
{
|
|
"epoch": 3.5587761674718195,
|
|
"grad_norm": 0.6801260592469724,
|
|
"learning_rate": 2.2912762429535684e-05,
|
|
"loss": 0.2986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2981488108634949,
|
|
"step": 2210,
|
|
"valid_targets_mean": 3300.4,
|
|
"valid_targets_min": 1317
|
|
},
|
|
{
|
|
"epoch": 3.5668276972624797,
|
|
"grad_norm": 0.7979499575329786,
|
|
"learning_rate": 2.2833288835616784e-05,
|
|
"loss": 0.3221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3507813513278961,
|
|
"step": 2215,
|
|
"valid_targets_mean": 3180.9,
|
|
"valid_targets_min": 1377
|
|
},
|
|
{
|
|
"epoch": 3.57487922705314,
|
|
"grad_norm": 0.7589237518218688,
|
|
"learning_rate": 2.2753769561042235e-05,
|
|
"loss": 0.3005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2893664240837097,
|
|
"step": 2220,
|
|
"valid_targets_mean": 2736.6,
|
|
"valid_targets_min": 1348
|
|
},
|
|
{
|
|
"epoch": 3.5829307568438002,
|
|
"grad_norm": 0.7842484155012712,
|
|
"learning_rate": 2.2674205887888386e-05,
|
|
"loss": 0.3236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34598594903945923,
|
|
"step": 2225,
|
|
"valid_targets_mean": 2817.1,
|
|
"valid_targets_min": 1021
|
|
},
|
|
{
|
|
"epoch": 3.5909822866344605,
|
|
"grad_norm": 0.7553985888978503,
|
|
"learning_rate": 2.259459909894742e-05,
|
|
"loss": 0.3366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3397219777107239,
|
|
"step": 2230,
|
|
"valid_targets_mean": 3038.3,
|
|
"valid_targets_min": 1150
|
|
},
|
|
{
|
|
"epoch": 3.5990338164251208,
|
|
"grad_norm": 0.6631101299213469,
|
|
"learning_rate": 2.2514950477706657e-05,
|
|
"loss": 0.301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2631319761276245,
|
|
"step": 2235,
|
|
"valid_targets_mean": 3378.8,
|
|
"valid_targets_min": 1412
|
|
},
|
|
{
|
|
"epoch": 3.607085346215781,
|
|
"grad_norm": 0.6917879399712983,
|
|
"learning_rate": 2.2435261308327875e-05,
|
|
"loss": 0.2979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2921825051307678,
|
|
"step": 2240,
|
|
"valid_targets_mean": 3375.1,
|
|
"valid_targets_min": 950
|
|
},
|
|
{
|
|
"epoch": 3.6151368760064413,
|
|
"grad_norm": 0.6646988908267994,
|
|
"learning_rate": 2.2355532875626612e-05,
|
|
"loss": 0.3218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28387004137039185,
|
|
"step": 2245,
|
|
"valid_targets_mean": 3651.2,
|
|
"valid_targets_min": 938
|
|
},
|
|
{
|
|
"epoch": 3.6231884057971016,
|
|
"grad_norm": 0.7566641430033721,
|
|
"learning_rate": 2.2275766465051444e-05,
|
|
"loss": 0.3134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3653978705406189,
|
|
"step": 2250,
|
|
"valid_targets_mean": 3000.1,
|
|
"valid_targets_min": 937
|
|
},
|
|
{
|
|
"epoch": 3.631239935587762,
|
|
"grad_norm": 0.640762301083058,
|
|
"learning_rate": 2.2195963362663236e-05,
|
|
"loss": 0.2918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2885698080062866,
|
|
"step": 2255,
|
|
"valid_targets_mean": 4016.1,
|
|
"valid_targets_min": 1710
|
|
},
|
|
{
|
|
"epoch": 3.639291465378422,
|
|
"grad_norm": 0.7312405714684358,
|
|
"learning_rate": 2.211612485511446e-05,
|
|
"loss": 0.3109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3239280581474304,
|
|
"step": 2260,
|
|
"valid_targets_mean": 3113.6,
|
|
"valid_targets_min": 1036
|
|
},
|
|
{
|
|
"epoch": 3.6473429951690823,
|
|
"grad_norm": 0.6797552789086924,
|
|
"learning_rate": 2.2036252229628392e-05,
|
|
"loss": 0.3208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30023393034935,
|
|
"step": 2265,
|
|
"valid_targets_mean": 3113.1,
|
|
"valid_targets_min": 1421
|
|
},
|
|
{
|
|
"epoch": 3.6553945249597426,
|
|
"grad_norm": 0.7033890984196507,
|
|
"learning_rate": 2.19563467739784e-05,
|
|
"loss": 0.3128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33096373081207275,
|
|
"step": 2270,
|
|
"valid_targets_mean": 3861.4,
|
|
"valid_targets_min": 1570
|
|
},
|
|
{
|
|
"epoch": 3.6634460547504024,
|
|
"grad_norm": 0.693299418919331,
|
|
"learning_rate": 2.1876409776467165e-05,
|
|
"loss": 0.3152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31812596321105957,
|
|
"step": 2275,
|
|
"valid_targets_mean": 3224.8,
|
|
"valid_targets_min": 1644
|
|
},
|
|
{
|
|
"epoch": 3.6714975845410627,
|
|
"grad_norm": 0.7050830766977225,
|
|
"learning_rate": 2.1796442525905923e-05,
|
|
"loss": 0.3029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2968364953994751,
|
|
"step": 2280,
|
|
"valid_targets_mean": 3072.1,
|
|
"valid_targets_min": 1239
|
|
},
|
|
{
|
|
"epoch": 3.679549114331723,
|
|
"grad_norm": 0.7092053234510587,
|
|
"learning_rate": 2.171644631159366e-05,
|
|
"loss": 0.3251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33621740341186523,
|
|
"step": 2285,
|
|
"valid_targets_mean": 3552.6,
|
|
"valid_targets_min": 1180
|
|
},
|
|
{
|
|
"epoch": 3.687600644122383,
|
|
"grad_norm": 0.7327779149808457,
|
|
"learning_rate": 2.163642242329633e-05,
|
|
"loss": 0.3097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3615042567253113,
|
|
"step": 2290,
|
|
"valid_targets_mean": 3199.9,
|
|
"valid_targets_min": 1402
|
|
},
|
|
{
|
|
"epoch": 3.6956521739130435,
|
|
"grad_norm": 0.6747000049468731,
|
|
"learning_rate": 2.1556372151226097e-05,
|
|
"loss": 0.3444,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30473870038986206,
|
|
"step": 2295,
|
|
"valid_targets_mean": 3625.8,
|
|
"valid_targets_min": 1668
|
|
},
|
|
{
|
|
"epoch": 3.7037037037037037,
|
|
"grad_norm": 0.6905439473556321,
|
|
"learning_rate": 2.1476296786020502e-05,
|
|
"loss": 0.3079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3153340816497803,
|
|
"step": 2300,
|
|
"valid_targets_mean": 3234.1,
|
|
"valid_targets_min": 1476
|
|
},
|
|
{
|
|
"epoch": 3.711755233494364,
|
|
"grad_norm": 0.757410046151389,
|
|
"learning_rate": 2.139619761872163e-05,
|
|
"loss": 0.3156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32494598627090454,
|
|
"step": 2305,
|
|
"valid_targets_mean": 3108.1,
|
|
"valid_targets_min": 1329
|
|
},
|
|
{
|
|
"epoch": 3.7198067632850242,
|
|
"grad_norm": 0.6333814485490104,
|
|
"learning_rate": 2.1316075940755363e-05,
|
|
"loss": 0.3101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30045998096466064,
|
|
"step": 2310,
|
|
"valid_targets_mean": 4108.0,
|
|
"valid_targets_min": 1439
|
|
},
|
|
{
|
|
"epoch": 3.7278582930756845,
|
|
"grad_norm": 0.8569424628003814,
|
|
"learning_rate": 2.1235933043910488e-05,
|
|
"loss": 0.3102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3410085439682007,
|
|
"step": 2315,
|
|
"valid_targets_mean": 2746.4,
|
|
"valid_targets_min": 965
|
|
},
|
|
{
|
|
"epoch": 3.7359098228663448,
|
|
"grad_norm": 0.7003250678607069,
|
|
"learning_rate": 2.1155770220317918e-05,
|
|
"loss": 0.3087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29915452003479004,
|
|
"step": 2320,
|
|
"valid_targets_mean": 3250.2,
|
|
"valid_targets_min": 1373
|
|
},
|
|
{
|
|
"epoch": 3.7439613526570046,
|
|
"grad_norm": 0.731064940807403,
|
|
"learning_rate": 2.107558876242983e-05,
|
|
"loss": 0.3152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.330859899520874,
|
|
"step": 2325,
|
|
"valid_targets_mean": 3688.2,
|
|
"valid_targets_min": 1267
|
|
},
|
|
{
|
|
"epoch": 3.752012882447665,
|
|
"grad_norm": 0.6231472245606579,
|
|
"learning_rate": 2.0995389962998845e-05,
|
|
"loss": 0.301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20000654458999634,
|
|
"step": 2330,
|
|
"valid_targets_mean": 4773.8,
|
|
"valid_targets_min": 1417
|
|
},
|
|
{
|
|
"epoch": 3.760064412238325,
|
|
"grad_norm": 1.1067173824667762,
|
|
"learning_rate": 2.091517511505719e-05,
|
|
"loss": 0.2102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1921842396259308,
|
|
"step": 2335,
|
|
"valid_targets_mean": 5335.0,
|
|
"valid_targets_min": 2383
|
|
},
|
|
{
|
|
"epoch": 3.7681159420289854,
|
|
"grad_norm": 0.4630108390914341,
|
|
"learning_rate": 2.0834945511895816e-05,
|
|
"loss": 0.1852,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1726524531841278,
|
|
"step": 2340,
|
|
"valid_targets_mean": 5678.3,
|
|
"valid_targets_min": 2928
|
|
},
|
|
{
|
|
"epoch": 3.7761674718196456,
|
|
"grad_norm": 0.5122162205986636,
|
|
"learning_rate": 2.0754702447043585e-05,
|
|
"loss": 0.1906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21546904742717743,
|
|
"step": 2345,
|
|
"valid_targets_mean": 6460.8,
|
|
"valid_targets_min": 2335
|
|
},
|
|
{
|
|
"epoch": 3.784219001610306,
|
|
"grad_norm": 0.43207305254475,
|
|
"learning_rate": 2.0674447214246394e-05,
|
|
"loss": 0.2006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15075618028640747,
|
|
"step": 2350,
|
|
"valid_targets_mean": 6145.8,
|
|
"valid_targets_min": 3540
|
|
},
|
|
{
|
|
"epoch": 3.792270531400966,
|
|
"grad_norm": 0.4774154606281994,
|
|
"learning_rate": 2.059418110744633e-05,
|
|
"loss": 0.1845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17323094606399536,
|
|
"step": 2355,
|
|
"valid_targets_mean": 5129.8,
|
|
"valid_targets_min": 3650
|
|
},
|
|
{
|
|
"epoch": 3.8003220611916264,
|
|
"grad_norm": 0.46983051174721563,
|
|
"learning_rate": 2.0513905420760798e-05,
|
|
"loss": 0.1738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1917821168899536,
|
|
"step": 2360,
|
|
"valid_targets_mean": 6283.1,
|
|
"valid_targets_min": 3367
|
|
},
|
|
{
|
|
"epoch": 3.8083735909822867,
|
|
"grad_norm": 0.4902336033194191,
|
|
"learning_rate": 2.043362144846164e-05,
|
|
"loss": 0.1973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25147587060928345,
|
|
"step": 2365,
|
|
"valid_targets_mean": 5965.1,
|
|
"valid_targets_min": 3021
|
|
},
|
|
{
|
|
"epoch": 3.816425120772947,
|
|
"grad_norm": 0.42280694124833545,
|
|
"learning_rate": 2.035333048495431e-05,
|
|
"loss": 0.2166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20615729689598083,
|
|
"step": 2370,
|
|
"valid_targets_mean": 7093.3,
|
|
"valid_targets_min": 2985
|
|
},
|
|
{
|
|
"epoch": 3.824476650563607,
|
|
"grad_norm": 0.4889232418853953,
|
|
"learning_rate": 2.0273033824756964e-05,
|
|
"loss": 0.1792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2106115221977234,
|
|
"step": 2375,
|
|
"valid_targets_mean": 5944.9,
|
|
"valid_targets_min": 3130
|
|
},
|
|
{
|
|
"epoch": 3.8325281803542675,
|
|
"grad_norm": 0.4486166262691516,
|
|
"learning_rate": 2.0192732762479616e-05,
|
|
"loss": 0.1848,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20369920134544373,
|
|
"step": 2380,
|
|
"valid_targets_mean": 6768.1,
|
|
"valid_targets_min": 2346
|
|
},
|
|
{
|
|
"epoch": 3.8405797101449277,
|
|
"grad_norm": 0.5052893828675203,
|
|
"learning_rate": 2.011242859280325e-05,
|
|
"loss": 0.18,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20773877203464508,
|
|
"step": 2385,
|
|
"valid_targets_mean": 5635.4,
|
|
"valid_targets_min": 3421
|
|
},
|
|
{
|
|
"epoch": 3.848631239935588,
|
|
"grad_norm": 0.552069432523759,
|
|
"learning_rate": 2.0032122610458947e-05,
|
|
"loss": 0.1882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16113221645355225,
|
|
"step": 2390,
|
|
"valid_targets_mean": 4650.4,
|
|
"valid_targets_min": 2970
|
|
},
|
|
{
|
|
"epoch": 3.8566827697262482,
|
|
"grad_norm": 0.524046615447888,
|
|
"learning_rate": 1.9951816110207004e-05,
|
|
"loss": 0.1839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20094186067581177,
|
|
"step": 2395,
|
|
"valid_targets_mean": 5519.9,
|
|
"valid_targets_min": 3054
|
|
},
|
|
{
|
|
"epoch": 3.864734299516908,
|
|
"grad_norm": 0.48693523721557375,
|
|
"learning_rate": 1.9871510386816103e-05,
|
|
"loss": 0.1811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18589508533477783,
|
|
"step": 2400,
|
|
"valid_targets_mean": 6197.9,
|
|
"valid_targets_min": 3424
|
|
},
|
|
{
|
|
"epoch": 3.8727858293075683,
|
|
"grad_norm": 0.49674650842025453,
|
|
"learning_rate": 1.979120673504235e-05,
|
|
"loss": 0.1806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16874763369560242,
|
|
"step": 2405,
|
|
"valid_targets_mean": 5672.4,
|
|
"valid_targets_min": 3860
|
|
},
|
|
{
|
|
"epoch": 3.8808373590982286,
|
|
"grad_norm": 0.5935011533211295,
|
|
"learning_rate": 1.9710906449608498e-05,
|
|
"loss": 0.1782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16884219646453857,
|
|
"step": 2410,
|
|
"valid_targets_mean": 5892.7,
|
|
"valid_targets_min": 2898
|
|
},
|
|
{
|
|
"epoch": 3.888888888888889,
|
|
"grad_norm": 0.4678758596506749,
|
|
"learning_rate": 1.9630610825182992e-05,
|
|
"loss": 0.1789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1843390017747879,
|
|
"step": 2415,
|
|
"valid_targets_mean": 5762.8,
|
|
"valid_targets_min": 3670
|
|
},
|
|
{
|
|
"epoch": 3.896940418679549,
|
|
"grad_norm": 0.466433159436347,
|
|
"learning_rate": 1.955032115635915e-05,
|
|
"loss": 0.1733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16634991765022278,
|
|
"step": 2420,
|
|
"valid_targets_mean": 6079.9,
|
|
"valid_targets_min": 3135
|
|
},
|
|
{
|
|
"epoch": 3.9049919484702094,
|
|
"grad_norm": 0.4703450305531825,
|
|
"learning_rate": 1.9470038737634257e-05,
|
|
"loss": 0.1817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18430741131305695,
|
|
"step": 2425,
|
|
"valid_targets_mean": 5954.4,
|
|
"valid_targets_min": 2502
|
|
},
|
|
{
|
|
"epoch": 3.9130434782608696,
|
|
"grad_norm": 0.4988164707549782,
|
|
"learning_rate": 1.9389764863388706e-05,
|
|
"loss": 0.1794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18756403028964996,
|
|
"step": 2430,
|
|
"valid_targets_mean": 5311.7,
|
|
"valid_targets_min": 3278
|
|
},
|
|
{
|
|
"epoch": 3.92109500805153,
|
|
"grad_norm": 0.4630341885874764,
|
|
"learning_rate": 1.9309500827865136e-05,
|
|
"loss": 0.1745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15896305441856384,
|
|
"step": 2435,
|
|
"valid_targets_mean": 5995.9,
|
|
"valid_targets_min": 2819
|
|
},
|
|
{
|
|
"epoch": 3.92914653784219,
|
|
"grad_norm": 0.4913791539120621,
|
|
"learning_rate": 1.9229247925147553e-05,
|
|
"loss": 0.1751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16495642066001892,
|
|
"step": 2440,
|
|
"valid_targets_mean": 5220.8,
|
|
"valid_targets_min": 3358
|
|
},
|
|
{
|
|
"epoch": 3.9371980676328504,
|
|
"grad_norm": 0.46392543060132874,
|
|
"learning_rate": 1.9149007449140462e-05,
|
|
"loss": 0.19,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17253006994724274,
|
|
"step": 2445,
|
|
"valid_targets_mean": 4946.7,
|
|
"valid_targets_min": 2665
|
|
},
|
|
{
|
|
"epoch": 3.9452495974235102,
|
|
"grad_norm": 0.4222579875698941,
|
|
"learning_rate": 1.906878069354804e-05,
|
|
"loss": 0.189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16640131175518036,
|
|
"step": 2450,
|
|
"valid_targets_mean": 6109.9,
|
|
"valid_targets_min": 2762
|
|
},
|
|
{
|
|
"epoch": 3.9533011272141705,
|
|
"grad_norm": 0.5936694712109022,
|
|
"learning_rate": 1.898856895185322e-05,
|
|
"loss": 0.1723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1677541732788086,
|
|
"step": 2455,
|
|
"valid_targets_mean": 5266.1,
|
|
"valid_targets_min": 3731
|
|
},
|
|
{
|
|
"epoch": 3.9613526570048307,
|
|
"grad_norm": 0.5089530035696851,
|
|
"learning_rate": 1.8908373517296888e-05,
|
|
"loss": 0.1739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17881962656974792,
|
|
"step": 2460,
|
|
"valid_targets_mean": 6246.1,
|
|
"valid_targets_min": 2970
|
|
},
|
|
{
|
|
"epoch": 3.969404186795491,
|
|
"grad_norm": 0.4163908705213259,
|
|
"learning_rate": 1.882819568285701e-05,
|
|
"loss": 0.1771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16383564472198486,
|
|
"step": 2465,
|
|
"valid_targets_mean": 6126.9,
|
|
"valid_targets_min": 3345
|
|
},
|
|
{
|
|
"epoch": 3.9774557165861513,
|
|
"grad_norm": 0.5352174387805062,
|
|
"learning_rate": 1.874803674122778e-05,
|
|
"loss": 0.1716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1686192899942398,
|
|
"step": 2470,
|
|
"valid_targets_mean": 5409.5,
|
|
"valid_targets_min": 2785
|
|
},
|
|
{
|
|
"epoch": 3.9855072463768115,
|
|
"grad_norm": 0.5174379593558874,
|
|
"learning_rate": 1.8667897984798804e-05,
|
|
"loss": 0.1864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20292040705680847,
|
|
"step": 2475,
|
|
"valid_targets_mean": 6526.0,
|
|
"valid_targets_min": 3724
|
|
},
|
|
{
|
|
"epoch": 3.993558776167472,
|
|
"grad_norm": 0.46174047001815965,
|
|
"learning_rate": 1.858778070563422e-05,
|
|
"loss": 0.1668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1856258511543274,
|
|
"step": 2480,
|
|
"valid_targets_mean": 5751.2,
|
|
"valid_targets_min": 3683
|
|
},
|
|
{
|
|
"epoch": 4.001610305958132,
|
|
"grad_norm": 0.486458689377258,
|
|
"learning_rate": 1.8507686195451918e-05,
|
|
"loss": 0.1927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22269584238529205,
|
|
"step": 2485,
|
|
"valid_targets_mean": 6743.1,
|
|
"valid_targets_min": 2494
|
|
},
|
|
{
|
|
"epoch": 4.009661835748792,
|
|
"grad_norm": 0.49067053864038795,
|
|
"learning_rate": 1.8427615745602667e-05,
|
|
"loss": 0.2628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24867583811283112,
|
|
"step": 2490,
|
|
"valid_targets_mean": 6679.0,
|
|
"valid_targets_min": 2802
|
|
},
|
|
{
|
|
"epoch": 4.017713365539453,
|
|
"grad_norm": 0.48082700217324775,
|
|
"learning_rate": 1.834757064704933e-05,
|
|
"loss": 0.2484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2587515413761139,
|
|
"step": 2495,
|
|
"valid_targets_mean": 6534.6,
|
|
"valid_targets_min": 2100
|
|
},
|
|
{
|
|
"epoch": 4.025764895330113,
|
|
"grad_norm": 0.463238335944571,
|
|
"learning_rate": 1.826755219034603e-05,
|
|
"loss": 0.2634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24451354146003723,
|
|
"step": 2500,
|
|
"valid_targets_mean": 6190.5,
|
|
"valid_targets_min": 2430
|
|
},
|
|
{
|
|
"epoch": 4.033816425120773,
|
|
"grad_norm": 0.47115946029794054,
|
|
"learning_rate": 1.818756166561733e-05,
|
|
"loss": 0.2426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2353125512599945,
|
|
"step": 2505,
|
|
"valid_targets_mean": 6432.6,
|
|
"valid_targets_min": 1969
|
|
},
|
|
{
|
|
"epoch": 4.041867954911433,
|
|
"grad_norm": 0.5556347094263723,
|
|
"learning_rate": 1.8107600362537473e-05,
|
|
"loss": 0.2634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25839778780937195,
|
|
"step": 2510,
|
|
"valid_targets_mean": 4996.2,
|
|
"valid_targets_min": 695
|
|
},
|
|
{
|
|
"epoch": 4.049919484702094,
|
|
"grad_norm": 0.4828357803593343,
|
|
"learning_rate": 1.8027669570309572e-05,
|
|
"loss": 0.2532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28702059388160706,
|
|
"step": 2515,
|
|
"valid_targets_mean": 7438.4,
|
|
"valid_targets_min": 3650
|
|
},
|
|
{
|
|
"epoch": 4.057971014492754,
|
|
"grad_norm": 0.4732689439450728,
|
|
"learning_rate": 1.7947770577644787e-05,
|
|
"loss": 0.2576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24621464312076569,
|
|
"step": 2520,
|
|
"valid_targets_mean": 5771.1,
|
|
"valid_targets_min": 2778
|
|
},
|
|
{
|
|
"epoch": 4.066022544283414,
|
|
"grad_norm": 0.51704075854913,
|
|
"learning_rate": 1.786790467274161e-05,
|
|
"loss": 0.2463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27112066745758057,
|
|
"step": 2525,
|
|
"valid_targets_mean": 6732.4,
|
|
"valid_targets_min": 880
|
|
},
|
|
{
|
|
"epoch": 4.074074074074074,
|
|
"grad_norm": 0.4888484890427802,
|
|
"learning_rate": 1.778807314326505e-05,
|
|
"loss": 0.245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2849244475364685,
|
|
"step": 2530,
|
|
"valid_targets_mean": 6322.0,
|
|
"valid_targets_min": 2822
|
|
},
|
|
{
|
|
"epoch": 4.082125603864735,
|
|
"grad_norm": 0.5118382404505906,
|
|
"learning_rate": 1.7708277276325886e-05,
|
|
"loss": 0.2316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2283010631799698,
|
|
"step": 2535,
|
|
"valid_targets_mean": 6036.7,
|
|
"valid_targets_min": 1334
|
|
},
|
|
{
|
|
"epoch": 4.090177133655395,
|
|
"grad_norm": 0.47873074069899385,
|
|
"learning_rate": 1.762851835845992e-05,
|
|
"loss": 0.24,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20961812138557434,
|
|
"step": 2540,
|
|
"valid_targets_mean": 5966.9,
|
|
"valid_targets_min": 3109
|
|
},
|
|
{
|
|
"epoch": 4.098228663446055,
|
|
"grad_norm": 0.5079150029764018,
|
|
"learning_rate": 1.754879767560723e-05,
|
|
"loss": 0.2445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23400455713272095,
|
|
"step": 2545,
|
|
"valid_targets_mean": 5874.7,
|
|
"valid_targets_min": 2080
|
|
},
|
|
{
|
|
"epoch": 4.106280193236715,
|
|
"grad_norm": 0.4709937790052201,
|
|
"learning_rate": 1.746911651309144e-05,
|
|
"loss": 0.2479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23563805222511292,
|
|
"step": 2550,
|
|
"valid_targets_mean": 5984.3,
|
|
"valid_targets_min": 2278
|
|
},
|
|
{
|
|
"epoch": 4.114331723027375,
|
|
"grad_norm": 0.5582460016862327,
|
|
"learning_rate": 1.7389476155598974e-05,
|
|
"loss": 0.2611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.261229932308197,
|
|
"step": 2555,
|
|
"valid_targets_mean": 4817.7,
|
|
"valid_targets_min": 1991
|
|
},
|
|
{
|
|
"epoch": 4.122383252818035,
|
|
"grad_norm": 0.4613472981940905,
|
|
"learning_rate": 1.7309877887158388e-05,
|
|
"loss": 0.2386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22766956686973572,
|
|
"step": 2560,
|
|
"valid_targets_mean": 6135.9,
|
|
"valid_targets_min": 1736
|
|
},
|
|
{
|
|
"epoch": 4.130434782608695,
|
|
"grad_norm": 0.5809580379347714,
|
|
"learning_rate": 1.723032299111964e-05,
|
|
"loss": 0.2284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2500440180301666,
|
|
"step": 2565,
|
|
"valid_targets_mean": 5818.6,
|
|
"valid_targets_min": 2083
|
|
},
|
|
{
|
|
"epoch": 4.138486312399356,
|
|
"grad_norm": 0.9151207208782325,
|
|
"learning_rate": 1.7150812750133382e-05,
|
|
"loss": 0.3601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3996202051639557,
|
|
"step": 2570,
|
|
"valid_targets_mean": 3210.2,
|
|
"valid_targets_min": 1526
|
|
},
|
|
{
|
|
"epoch": 4.146537842190016,
|
|
"grad_norm": 0.8283372169743392,
|
|
"learning_rate": 1.707134844613032e-05,
|
|
"loss": 0.3757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34221383929252625,
|
|
"step": 2575,
|
|
"valid_targets_mean": 2571.2,
|
|
"valid_targets_min": 1220
|
|
},
|
|
{
|
|
"epoch": 4.154589371980676,
|
|
"grad_norm": 0.7100117027673157,
|
|
"learning_rate": 1.699193136030052e-05,
|
|
"loss": 0.3546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34208422899246216,
|
|
"step": 2580,
|
|
"valid_targets_mean": 3851.6,
|
|
"valid_targets_min": 1610
|
|
},
|
|
{
|
|
"epoch": 4.162640901771336,
|
|
"grad_norm": 0.7684487763500514,
|
|
"learning_rate": 1.6912562773072765e-05,
|
|
"loss": 0.3701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3207693099975586,
|
|
"step": 2585,
|
|
"valid_targets_mean": 3207.0,
|
|
"valid_targets_min": 1232
|
|
},
|
|
{
|
|
"epoch": 4.170692431561997,
|
|
"grad_norm": 0.8557363619696622,
|
|
"learning_rate": 1.6833243964093877e-05,
|
|
"loss": 0.3431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3694392144680023,
|
|
"step": 2590,
|
|
"valid_targets_mean": 2929.4,
|
|
"valid_targets_min": 1174
|
|
},
|
|
{
|
|
"epoch": 4.178743961352657,
|
|
"grad_norm": 0.8161354731224225,
|
|
"learning_rate": 1.6753976212208137e-05,
|
|
"loss": 0.3345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3618141710758209,
|
|
"step": 2595,
|
|
"valid_targets_mean": 3058.9,
|
|
"valid_targets_min": 1415
|
|
},
|
|
{
|
|
"epoch": 4.186795491143317,
|
|
"grad_norm": 0.7945413438871889,
|
|
"learning_rate": 1.667476079543664e-05,
|
|
"loss": 0.3499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3769470155239105,
|
|
"step": 2600,
|
|
"valid_targets_mean": 3233.4,
|
|
"valid_targets_min": 910
|
|
},
|
|
{
|
|
"epoch": 4.194847020933977,
|
|
"grad_norm": 0.7506914826900978,
|
|
"learning_rate": 1.659559899095667e-05,
|
|
"loss": 0.3485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3336166739463806,
|
|
"step": 2605,
|
|
"valid_targets_mean": 3071.4,
|
|
"valid_targets_min": 1422
|
|
},
|
|
{
|
|
"epoch": 4.202898550724638,
|
|
"grad_norm": 0.8680474470245183,
|
|
"learning_rate": 1.651649207508114e-05,
|
|
"loss": 0.3423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3558621406555176,
|
|
"step": 2610,
|
|
"valid_targets_mean": 2435.2,
|
|
"valid_targets_min": 1299
|
|
},
|
|
{
|
|
"epoch": 4.210950080515298,
|
|
"grad_norm": 0.850673248513953,
|
|
"learning_rate": 1.643744132323801e-05,
|
|
"loss": 0.3612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3782421946525574,
|
|
"step": 2615,
|
|
"valid_targets_mean": 2803.0,
|
|
"valid_targets_min": 1129
|
|
},
|
|
{
|
|
"epoch": 4.219001610305958,
|
|
"grad_norm": 0.9623854143221985,
|
|
"learning_rate": 1.6358448009949714e-05,
|
|
"loss": 0.3289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3601483106613159,
|
|
"step": 2620,
|
|
"valid_targets_mean": 2628.6,
|
|
"valid_targets_min": 1147
|
|
},
|
|
{
|
|
"epoch": 4.2270531400966185,
|
|
"grad_norm": 0.7298752703256921,
|
|
"learning_rate": 1.6279513408812603e-05,
|
|
"loss": 0.3494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3530423641204834,
|
|
"step": 2625,
|
|
"valid_targets_mean": 3935.0,
|
|
"valid_targets_min": 1136
|
|
},
|
|
{
|
|
"epoch": 4.235104669887279,
|
|
"grad_norm": 0.7922927416603631,
|
|
"learning_rate": 1.620063879247643e-05,
|
|
"loss": 0.357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33408689498901367,
|
|
"step": 2630,
|
|
"valid_targets_mean": 2861.8,
|
|
"valid_targets_min": 1352
|
|
},
|
|
{
|
|
"epoch": 4.243156199677939,
|
|
"grad_norm": 0.8225640040752034,
|
|
"learning_rate": 1.6121825432623827e-05,
|
|
"loss": 0.3569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34123361110687256,
|
|
"step": 2635,
|
|
"valid_targets_mean": 2913.5,
|
|
"valid_targets_min": 1603
|
|
},
|
|
{
|
|
"epoch": 4.251207729468599,
|
|
"grad_norm": 0.862014730987376,
|
|
"learning_rate": 1.6043074599949785e-05,
|
|
"loss": 0.3476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35428357124328613,
|
|
"step": 2640,
|
|
"valid_targets_mean": 2604.4,
|
|
"valid_targets_min": 1216
|
|
},
|
|
{
|
|
"epoch": 4.2592592592592595,
|
|
"grad_norm": 0.8556648106440016,
|
|
"learning_rate": 1.5964387564141192e-05,
|
|
"loss": 0.3331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.339069128036499,
|
|
"step": 2645,
|
|
"valid_targets_mean": 3359.8,
|
|
"valid_targets_min": 1454
|
|
},
|
|
{
|
|
"epoch": 4.26731078904992,
|
|
"grad_norm": 0.849595092751884,
|
|
"learning_rate": 1.588576559385635e-05,
|
|
"loss": 0.3321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3184316158294678,
|
|
"step": 2650,
|
|
"valid_targets_mean": 2601.7,
|
|
"valid_targets_min": 1398
|
|
},
|
|
{
|
|
"epoch": 4.27536231884058,
|
|
"grad_norm": 0.7834684766024429,
|
|
"learning_rate": 1.5807209956704505e-05,
|
|
"loss": 0.3522,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3305340111255646,
|
|
"step": 2655,
|
|
"valid_targets_mean": 3171.8,
|
|
"valid_targets_min": 1293
|
|
},
|
|
{
|
|
"epoch": 4.28341384863124,
|
|
"grad_norm": 0.8658423398271352,
|
|
"learning_rate": 1.5728721919225428e-05,
|
|
"loss": 0.3447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35852861404418945,
|
|
"step": 2660,
|
|
"valid_targets_mean": 3561.2,
|
|
"valid_targets_min": 995
|
|
},
|
|
{
|
|
"epoch": 4.291465378421901,
|
|
"grad_norm": 0.7999552824406122,
|
|
"learning_rate": 1.5650302746869004e-05,
|
|
"loss": 0.3338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3435271978378296,
|
|
"step": 2665,
|
|
"valid_targets_mean": 3375.6,
|
|
"valid_targets_min": 1027
|
|
},
|
|
{
|
|
"epoch": 4.29951690821256,
|
|
"grad_norm": 0.8649636583204158,
|
|
"learning_rate": 1.5571953703974813e-05,
|
|
"loss": 0.3578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37238696217536926,
|
|
"step": 2670,
|
|
"valid_targets_mean": 2952.2,
|
|
"valid_targets_min": 1455
|
|
},
|
|
{
|
|
"epoch": 4.30756843800322,
|
|
"grad_norm": 0.8745862341667207,
|
|
"learning_rate": 1.5493676053751747e-05,
|
|
"loss": 0.328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36845505237579346,
|
|
"step": 2675,
|
|
"valid_targets_mean": 3117.5,
|
|
"valid_targets_min": 1411
|
|
},
|
|
{
|
|
"epoch": 4.3156199677938805,
|
|
"grad_norm": 0.9341881778545614,
|
|
"learning_rate": 1.5415471058257638e-05,
|
|
"loss": 0.329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37140941619873047,
|
|
"step": 2680,
|
|
"valid_targets_mean": 2730.5,
|
|
"valid_targets_min": 1549
|
|
},
|
|
{
|
|
"epoch": 4.323671497584541,
|
|
"grad_norm": 0.9405657665431416,
|
|
"learning_rate": 1.533733997837893e-05,
|
|
"loss": 0.3553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3740704655647278,
|
|
"step": 2685,
|
|
"valid_targets_mean": 2825.3,
|
|
"valid_targets_min": 1180
|
|
},
|
|
{
|
|
"epoch": 4.331723027375201,
|
|
"grad_norm": 0.7710353538662376,
|
|
"learning_rate": 1.5259284073810333e-05,
|
|
"loss": 0.3317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32604238390922546,
|
|
"step": 2690,
|
|
"valid_targets_mean": 3510.8,
|
|
"valid_targets_min": 1347
|
|
},
|
|
{
|
|
"epoch": 4.339774557165861,
|
|
"grad_norm": 0.781426537140954,
|
|
"learning_rate": 1.5181304603034513e-05,
|
|
"loss": 0.3603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37608543038368225,
|
|
"step": 2695,
|
|
"valid_targets_mean": 3150.6,
|
|
"valid_targets_min": 877
|
|
},
|
|
{
|
|
"epoch": 4.3478260869565215,
|
|
"grad_norm": 0.7237047762120449,
|
|
"learning_rate": 1.5103402823301819e-05,
|
|
"loss": 0.34,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33800530433654785,
|
|
"step": 2700,
|
|
"valid_targets_mean": 3118.8,
|
|
"valid_targets_min": 1146
|
|
},
|
|
{
|
|
"epoch": 4.355877616747182,
|
|
"grad_norm": 0.7630896900796991,
|
|
"learning_rate": 1.5025579990609973e-05,
|
|
"loss": 0.3471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3303137421607971,
|
|
"step": 2705,
|
|
"valid_targets_mean": 3119.9,
|
|
"valid_targets_min": 1082
|
|
},
|
|
{
|
|
"epoch": 4.363929146537842,
|
|
"grad_norm": 0.8601788048234646,
|
|
"learning_rate": 1.4947837359683882e-05,
|
|
"loss": 0.3547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3552572727203369,
|
|
"step": 2710,
|
|
"valid_targets_mean": 2656.8,
|
|
"valid_targets_min": 1224
|
|
},
|
|
{
|
|
"epoch": 4.371980676328502,
|
|
"grad_norm": 0.7820969944556317,
|
|
"learning_rate": 1.487017618395534e-05,
|
|
"loss": 0.316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.309645414352417,
|
|
"step": 2715,
|
|
"valid_targets_mean": 3481.2,
|
|
"valid_targets_min": 1468
|
|
},
|
|
{
|
|
"epoch": 4.3800322061191626,
|
|
"grad_norm": 0.7637150424395313,
|
|
"learning_rate": 1.479259771554288e-05,
|
|
"loss": 0.3188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34639042615890503,
|
|
"step": 2720,
|
|
"valid_targets_mean": 3636.6,
|
|
"valid_targets_min": 784
|
|
},
|
|
{
|
|
"epoch": 4.388083735909823,
|
|
"grad_norm": 0.6908525532816608,
|
|
"learning_rate": 1.4715103205231545e-05,
|
|
"loss": 0.3107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.327004075050354,
|
|
"step": 2725,
|
|
"valid_targets_mean": 4069.1,
|
|
"valid_targets_min": 1625
|
|
},
|
|
{
|
|
"epoch": 4.396135265700483,
|
|
"grad_norm": 0.6634760061573296,
|
|
"learning_rate": 1.463769390245273e-05,
|
|
"loss": 0.3152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30868253111839294,
|
|
"step": 2730,
|
|
"valid_targets_mean": 4105.0,
|
|
"valid_targets_min": 1173
|
|
},
|
|
{
|
|
"epoch": 4.404186795491143,
|
|
"grad_norm": 0.6687824762796025,
|
|
"learning_rate": 1.4560371055264052e-05,
|
|
"loss": 0.3082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3181726336479187,
|
|
"step": 2735,
|
|
"valid_targets_mean": 4012.6,
|
|
"valid_targets_min": 2398
|
|
},
|
|
{
|
|
"epoch": 4.412238325281804,
|
|
"grad_norm": 0.7648132029162774,
|
|
"learning_rate": 1.448313591032922e-05,
|
|
"loss": 0.3089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30312180519104004,
|
|
"step": 2740,
|
|
"valid_targets_mean": 3146.2,
|
|
"valid_targets_min": 1247
|
|
},
|
|
{
|
|
"epoch": 4.420289855072464,
|
|
"grad_norm": 0.6458192190255184,
|
|
"learning_rate": 1.4405989712897923e-05,
|
|
"loss": 0.3033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30585235357284546,
|
|
"step": 2745,
|
|
"valid_targets_mean": 4329.4,
|
|
"valid_targets_min": 1125
|
|
},
|
|
{
|
|
"epoch": 4.428341384863124,
|
|
"grad_norm": 0.6550907118099679,
|
|
"learning_rate": 1.4328933706785782e-05,
|
|
"loss": 0.277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2688300609588623,
|
|
"step": 2750,
|
|
"valid_targets_mean": 3632.9,
|
|
"valid_targets_min": 1076
|
|
},
|
|
{
|
|
"epoch": 4.436392914653784,
|
|
"grad_norm": 0.7080323021524921,
|
|
"learning_rate": 1.4251969134354247e-05,
|
|
"loss": 0.297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29432380199432373,
|
|
"step": 2755,
|
|
"valid_targets_mean": 3553.6,
|
|
"valid_targets_min": 1094
|
|
},
|
|
{
|
|
"epoch": 4.444444444444445,
|
|
"grad_norm": 0.7541689777697117,
|
|
"learning_rate": 1.4175097236490627e-05,
|
|
"loss": 0.2966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3119310438632965,
|
|
"step": 2760,
|
|
"valid_targets_mean": 3095.5,
|
|
"valid_targets_min": 1793
|
|
},
|
|
{
|
|
"epoch": 4.452495974235105,
|
|
"grad_norm": 0.7223929430673088,
|
|
"learning_rate": 1.409831925258805e-05,
|
|
"loss": 0.3023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2738376259803772,
|
|
"step": 2765,
|
|
"valid_targets_mean": 3082.8,
|
|
"valid_targets_min": 1495
|
|
},
|
|
{
|
|
"epoch": 4.460547504025765,
|
|
"grad_norm": 0.7341114060579149,
|
|
"learning_rate": 1.4021636420525466e-05,
|
|
"loss": 0.2945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2992332875728607,
|
|
"step": 2770,
|
|
"valid_targets_mean": 2982.9,
|
|
"valid_targets_min": 1490
|
|
},
|
|
{
|
|
"epoch": 4.468599033816425,
|
|
"grad_norm": 0.7930767785136432,
|
|
"learning_rate": 1.3945049976647726e-05,
|
|
"loss": 0.3031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3382141590118408,
|
|
"step": 2775,
|
|
"valid_targets_mean": 3224.7,
|
|
"valid_targets_min": 1414
|
|
},
|
|
{
|
|
"epoch": 4.476650563607086,
|
|
"grad_norm": 0.7051589780334897,
|
|
"learning_rate": 1.3868561155745628e-05,
|
|
"loss": 0.3149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.302938848733902,
|
|
"step": 2780,
|
|
"valid_targets_mean": 4591.2,
|
|
"valid_targets_min": 1895
|
|
},
|
|
{
|
|
"epoch": 4.484702093397746,
|
|
"grad_norm": 0.776963321434189,
|
|
"learning_rate": 1.3792171191036001e-05,
|
|
"loss": 0.2936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32361382246017456,
|
|
"step": 2785,
|
|
"valid_targets_mean": 3124.8,
|
|
"valid_targets_min": 987
|
|
},
|
|
{
|
|
"epoch": 4.492753623188406,
|
|
"grad_norm": 0.8112701248746712,
|
|
"learning_rate": 1.3715881314141835e-05,
|
|
"loss": 0.2925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2923651337623596,
|
|
"step": 2790,
|
|
"valid_targets_mean": 2744.4,
|
|
"valid_targets_min": 1399
|
|
},
|
|
{
|
|
"epoch": 4.500805152979066,
|
|
"grad_norm": 0.7331706558443012,
|
|
"learning_rate": 1.3639692755072429e-05,
|
|
"loss": 0.2814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3007197380065918,
|
|
"step": 2795,
|
|
"valid_targets_mean": 3716.5,
|
|
"valid_targets_min": 1383
|
|
},
|
|
{
|
|
"epoch": 4.508856682769727,
|
|
"grad_norm": 0.647744700371841,
|
|
"learning_rate": 1.3563606742203548e-05,
|
|
"loss": 0.2993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2892110347747803,
|
|
"step": 2800,
|
|
"valid_targets_mean": 4289.1,
|
|
"valid_targets_min": 1433
|
|
},
|
|
{
|
|
"epoch": 4.516908212560386,
|
|
"grad_norm": 0.9421850377075999,
|
|
"learning_rate": 1.3487624502257598e-05,
|
|
"loss": 0.3149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3276273310184479,
|
|
"step": 2805,
|
|
"valid_targets_mean": 2789.2,
|
|
"valid_targets_min": 1322
|
|
},
|
|
{
|
|
"epoch": 4.524959742351046,
|
|
"grad_norm": 0.7188700197789585,
|
|
"learning_rate": 1.3411747260283905e-05,
|
|
"loss": 0.2852,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2845657765865326,
|
|
"step": 2810,
|
|
"valid_targets_mean": 3436.0,
|
|
"valid_targets_min": 1213
|
|
},
|
|
{
|
|
"epoch": 4.533011272141707,
|
|
"grad_norm": 0.7781869831067078,
|
|
"learning_rate": 1.333597623963892e-05,
|
|
"loss": 0.2833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29683226346969604,
|
|
"step": 2815,
|
|
"valid_targets_mean": 2580.9,
|
|
"valid_targets_min": 1280
|
|
},
|
|
{
|
|
"epoch": 4.541062801932367,
|
|
"grad_norm": 0.7012527244821464,
|
|
"learning_rate": 1.3260312661966487e-05,
|
|
"loss": 0.3125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30411043763160706,
|
|
"step": 2820,
|
|
"valid_targets_mean": 4231.2,
|
|
"valid_targets_min": 1381
|
|
},
|
|
{
|
|
"epoch": 4.549114331723027,
|
|
"grad_norm": 0.6896904130191109,
|
|
"learning_rate": 1.3184757747178187e-05,
|
|
"loss": 0.2979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29390496015548706,
|
|
"step": 2825,
|
|
"valid_targets_mean": 3503.9,
|
|
"valid_targets_min": 1033
|
|
},
|
|
{
|
|
"epoch": 4.557165861513687,
|
|
"grad_norm": 0.696983130694775,
|
|
"learning_rate": 1.3109312713433642e-05,
|
|
"loss": 0.2834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31736573576927185,
|
|
"step": 2830,
|
|
"valid_targets_mean": 3569.1,
|
|
"valid_targets_min": 1245
|
|
},
|
|
{
|
|
"epoch": 4.565217391304348,
|
|
"grad_norm": 0.7902589844756526,
|
|
"learning_rate": 1.3033978777120861e-05,
|
|
"loss": 0.2861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30584877729415894,
|
|
"step": 2835,
|
|
"valid_targets_mean": 2826.0,
|
|
"valid_targets_min": 1012
|
|
},
|
|
{
|
|
"epoch": 4.573268921095008,
|
|
"grad_norm": 0.7274888924433361,
|
|
"learning_rate": 1.2958757152836671e-05,
|
|
"loss": 0.2881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25177866220474243,
|
|
"step": 2840,
|
|
"valid_targets_mean": 3450.6,
|
|
"valid_targets_min": 1486
|
|
},
|
|
{
|
|
"epoch": 4.581320450885668,
|
|
"grad_norm": 0.725180749478288,
|
|
"learning_rate": 1.2883649053367106e-05,
|
|
"loss": 0.2872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2852326035499573,
|
|
"step": 2845,
|
|
"valid_targets_mean": 3446.2,
|
|
"valid_targets_min": 1140
|
|
},
|
|
{
|
|
"epoch": 4.5893719806763285,
|
|
"grad_norm": 0.8999719879391306,
|
|
"learning_rate": 1.2808655689667846e-05,
|
|
"loss": 0.3124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.386888325214386,
|
|
"step": 2850,
|
|
"valid_targets_mean": 3519.2,
|
|
"valid_targets_min": 1327
|
|
},
|
|
{
|
|
"epoch": 4.597423510466989,
|
|
"grad_norm": 0.7442219129910428,
|
|
"learning_rate": 1.2733778270844712e-05,
|
|
"loss": 0.2916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3202979862689972,
|
|
"step": 2855,
|
|
"valid_targets_mean": 3581.2,
|
|
"valid_targets_min": 1277
|
|
},
|
|
{
|
|
"epoch": 4.605475040257649,
|
|
"grad_norm": 0.7961958367210087,
|
|
"learning_rate": 1.265901800413416e-05,
|
|
"loss": 0.2675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.289141982793808,
|
|
"step": 2860,
|
|
"valid_targets_mean": 3373.2,
|
|
"valid_targets_min": 1205
|
|
},
|
|
{
|
|
"epoch": 4.613526570048309,
|
|
"grad_norm": 0.7594348180961747,
|
|
"learning_rate": 1.2584376094883832e-05,
|
|
"loss": 0.2997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29926833510398865,
|
|
"step": 2865,
|
|
"valid_targets_mean": 3217.2,
|
|
"valid_targets_min": 1432
|
|
},
|
|
{
|
|
"epoch": 4.6215780998389695,
|
|
"grad_norm": 0.7903300067618602,
|
|
"learning_rate": 1.250985374653311e-05,
|
|
"loss": 0.2729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2627432942390442,
|
|
"step": 2870,
|
|
"valid_targets_mean": 2827.8,
|
|
"valid_targets_min": 368
|
|
},
|
|
{
|
|
"epoch": 4.62962962962963,
|
|
"grad_norm": 0.746273737284161,
|
|
"learning_rate": 1.2435452160593698e-05,
|
|
"loss": 0.2818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2513338625431061,
|
|
"step": 2875,
|
|
"valid_targets_mean": 3353.4,
|
|
"valid_targets_min": 1064
|
|
},
|
|
{
|
|
"epoch": 4.63768115942029,
|
|
"grad_norm": 0.7106900237367672,
|
|
"learning_rate": 1.2361172536630288e-05,
|
|
"loss": 0.2823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2939052879810333,
|
|
"step": 2880,
|
|
"valid_targets_mean": 4223.3,
|
|
"valid_targets_min": 1746
|
|
},
|
|
{
|
|
"epoch": 4.64573268921095,
|
|
"grad_norm": 0.6667954441747274,
|
|
"learning_rate": 1.2287016072241195e-05,
|
|
"loss": 0.3012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2683846652507782,
|
|
"step": 2885,
|
|
"valid_targets_mean": 3858.1,
|
|
"valid_targets_min": 1327
|
|
},
|
|
{
|
|
"epoch": 4.6537842190016105,
|
|
"grad_norm": 0.9490652913436398,
|
|
"learning_rate": 1.221298396303904e-05,
|
|
"loss": 0.2801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2613232135772705,
|
|
"step": 2890,
|
|
"valid_targets_mean": 2620.6,
|
|
"valid_targets_min": 1039
|
|
},
|
|
{
|
|
"epoch": 4.661835748792271,
|
|
"grad_norm": 0.7333762476710057,
|
|
"learning_rate": 1.2139077402631495e-05,
|
|
"loss": 0.2977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2822628617286682,
|
|
"step": 2895,
|
|
"valid_targets_mean": 3479.4,
|
|
"valid_targets_min": 1238
|
|
},
|
|
{
|
|
"epoch": 4.669887278582931,
|
|
"grad_norm": 0.6515841944617482,
|
|
"learning_rate": 1.2065297582602037e-05,
|
|
"loss": 0.2824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25587737560272217,
|
|
"step": 2900,
|
|
"valid_targets_mean": 3427.9,
|
|
"valid_targets_min": 1281
|
|
},
|
|
{
|
|
"epoch": 4.677938808373591,
|
|
"grad_norm": 0.8050654547421896,
|
|
"learning_rate": 1.199164569249071e-05,
|
|
"loss": 0.2925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.299319863319397,
|
|
"step": 2905,
|
|
"valid_targets_mean": 2845.1,
|
|
"valid_targets_min": 991
|
|
},
|
|
{
|
|
"epoch": 4.685990338164252,
|
|
"grad_norm": 0.6708735466300162,
|
|
"learning_rate": 1.191812291977497e-05,
|
|
"loss": 0.2809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2744864225387573,
|
|
"step": 2910,
|
|
"valid_targets_mean": 4298.4,
|
|
"valid_targets_min": 1411
|
|
},
|
|
{
|
|
"epoch": 4.694041867954912,
|
|
"grad_norm": 0.7514813752903359,
|
|
"learning_rate": 1.1844730449850546e-05,
|
|
"loss": 0.33,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3047374486923218,
|
|
"step": 2915,
|
|
"valid_targets_mean": 3738.6,
|
|
"valid_targets_min": 1555
|
|
},
|
|
{
|
|
"epoch": 4.702093397745571,
|
|
"grad_norm": 0.8585820063404703,
|
|
"learning_rate": 1.1771469466012309e-05,
|
|
"loss": 0.2818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.263333261013031,
|
|
"step": 2920,
|
|
"valid_targets_mean": 2371.1,
|
|
"valid_targets_min": 1181
|
|
},
|
|
{
|
|
"epoch": 4.710144927536232,
|
|
"grad_norm": 0.9084047054321588,
|
|
"learning_rate": 1.1698341149435196e-05,
|
|
"loss": 0.2898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29706811904907227,
|
|
"step": 2925,
|
|
"valid_targets_mean": 3284.2,
|
|
"valid_targets_min": 1517
|
|
},
|
|
{
|
|
"epoch": 4.718196457326892,
|
|
"grad_norm": 0.7335312030781107,
|
|
"learning_rate": 1.1625346679155179e-05,
|
|
"loss": 0.292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2993118166923523,
|
|
"step": 2930,
|
|
"valid_targets_mean": 3596.9,
|
|
"valid_targets_min": 1119
|
|
},
|
|
{
|
|
"epoch": 4.726247987117552,
|
|
"grad_norm": 0.7645383662240981,
|
|
"learning_rate": 1.1552487232050242e-05,
|
|
"loss": 0.2793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27383461594581604,
|
|
"step": 2935,
|
|
"valid_targets_mean": 3022.3,
|
|
"valid_targets_min": 1298
|
|
},
|
|
{
|
|
"epoch": 4.734299516908212,
|
|
"grad_norm": 0.7045343410442323,
|
|
"learning_rate": 1.1479763982821414e-05,
|
|
"loss": 0.2943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30053383111953735,
|
|
"step": 2940,
|
|
"valid_targets_mean": 3824.2,
|
|
"valid_targets_min": 1978
|
|
},
|
|
{
|
|
"epoch": 4.7423510466988725,
|
|
"grad_norm": 0.7631623179378855,
|
|
"learning_rate": 1.1407178103973834e-05,
|
|
"loss": 0.2866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2785217761993408,
|
|
"step": 2945,
|
|
"valid_targets_mean": 3441.6,
|
|
"valid_targets_min": 1594
|
|
},
|
|
{
|
|
"epoch": 4.750402576489533,
|
|
"grad_norm": 0.6749936353133249,
|
|
"learning_rate": 1.1334730765797843e-05,
|
|
"loss": 0.3031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28506165742874146,
|
|
"step": 2950,
|
|
"valid_targets_mean": 3849.7,
|
|
"valid_targets_min": 1650
|
|
},
|
|
{
|
|
"epoch": 4.758454106280193,
|
|
"grad_norm": 0.737322666574153,
|
|
"learning_rate": 1.1262423136350087e-05,
|
|
"loss": 0.1971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.161736398935318,
|
|
"step": 2955,
|
|
"valid_targets_mean": 5966.6,
|
|
"valid_targets_min": 3184
|
|
},
|
|
{
|
|
"epoch": 4.766505636070853,
|
|
"grad_norm": 0.5423893032678805,
|
|
"learning_rate": 1.1190256381434738e-05,
|
|
"loss": 0.1768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1538485288619995,
|
|
"step": 2960,
|
|
"valid_targets_mean": 5159.4,
|
|
"valid_targets_min": 3589
|
|
},
|
|
{
|
|
"epoch": 4.774557165861514,
|
|
"grad_norm": 0.5128933204704713,
|
|
"learning_rate": 1.1118231664584674e-05,
|
|
"loss": 0.1688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17879360914230347,
|
|
"step": 2965,
|
|
"valid_targets_mean": 6292.2,
|
|
"valid_targets_min": 3131
|
|
},
|
|
{
|
|
"epoch": 4.782608695652174,
|
|
"grad_norm": 0.5098998582752253,
|
|
"learning_rate": 1.1046350147042681e-05,
|
|
"loss": 0.1995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15875032544136047,
|
|
"step": 2970,
|
|
"valid_targets_mean": 5304.0,
|
|
"valid_targets_min": 3117
|
|
},
|
|
{
|
|
"epoch": 4.790660225442834,
|
|
"grad_norm": 0.5044753740910969,
|
|
"learning_rate": 1.0974612987742807e-05,
|
|
"loss": 0.1667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18598923087120056,
|
|
"step": 2975,
|
|
"valid_targets_mean": 6316.2,
|
|
"valid_targets_min": 3455
|
|
},
|
|
{
|
|
"epoch": 4.798711755233494,
|
|
"grad_norm": 0.4536792935375198,
|
|
"learning_rate": 1.0903021343291613e-05,
|
|
"loss": 0.1572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1645420491695404,
|
|
"step": 2980,
|
|
"valid_targets_mean": 6636.1,
|
|
"valid_targets_min": 3364
|
|
},
|
|
{
|
|
"epoch": 4.806763285024155,
|
|
"grad_norm": 1.0919493102135451,
|
|
"learning_rate": 1.0831576367949555e-05,
|
|
"loss": 0.1709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1736430525779724,
|
|
"step": 2985,
|
|
"valid_targets_mean": 5804.9,
|
|
"valid_targets_min": 2579
|
|
},
|
|
{
|
|
"epoch": 4.814814814814815,
|
|
"grad_norm": 0.44890465909191923,
|
|
"learning_rate": 1.0760279213612362e-05,
|
|
"loss": 0.2108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23404830694198608,
|
|
"step": 2990,
|
|
"valid_targets_mean": 6861.9,
|
|
"valid_targets_min": 2907
|
|
},
|
|
{
|
|
"epoch": 4.822866344605475,
|
|
"grad_norm": 0.5343037492936906,
|
|
"learning_rate": 1.068913102979248e-05,
|
|
"loss": 0.1656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1848660707473755,
|
|
"step": 2995,
|
|
"valid_targets_mean": 6218.1,
|
|
"valid_targets_min": 2748
|
|
},
|
|
{
|
|
"epoch": 4.830917874396135,
|
|
"grad_norm": 0.4771916103447067,
|
|
"learning_rate": 1.0618132963600507e-05,
|
|
"loss": 0.1722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1645662933588028,
|
|
"step": 3000,
|
|
"valid_targets_mean": 5813.8,
|
|
"valid_targets_min": 3623
|
|
},
|
|
{
|
|
"epoch": 4.838969404186796,
|
|
"grad_norm": 0.5572712759897953,
|
|
"learning_rate": 1.0547286159726743e-05,
|
|
"loss": 0.1651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15293219685554504,
|
|
"step": 3005,
|
|
"valid_targets_mean": 5563.2,
|
|
"valid_targets_min": 2843
|
|
},
|
|
{
|
|
"epoch": 4.847020933977456,
|
|
"grad_norm": 0.5223306827253792,
|
|
"learning_rate": 1.047659176042268e-05,
|
|
"loss": 0.1855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16464412212371826,
|
|
"step": 3010,
|
|
"valid_targets_mean": 6044.7,
|
|
"valid_targets_min": 3602
|
|
},
|
|
{
|
|
"epoch": 4.855072463768116,
|
|
"grad_norm": 0.47001363201449686,
|
|
"learning_rate": 1.0406050905482647e-05,
|
|
"loss": 0.163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.183122456073761,
|
|
"step": 3015,
|
|
"valid_targets_mean": 6674.1,
|
|
"valid_targets_min": 3783
|
|
},
|
|
{
|
|
"epoch": 4.8631239935587764,
|
|
"grad_norm": 0.5165001888223757,
|
|
"learning_rate": 1.033566473222539e-05,
|
|
"loss": 0.1701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18210646510124207,
|
|
"step": 3020,
|
|
"valid_targets_mean": 5845.1,
|
|
"valid_targets_min": 2402
|
|
},
|
|
{
|
|
"epoch": 4.871175523349437,
|
|
"grad_norm": 0.5031892171999971,
|
|
"learning_rate": 1.0265434375475744e-05,
|
|
"loss": 0.1705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17503511905670166,
|
|
"step": 3025,
|
|
"valid_targets_mean": 5448.2,
|
|
"valid_targets_min": 3720
|
|
},
|
|
{
|
|
"epoch": 4.879227053140097,
|
|
"grad_norm": 0.49986928711541884,
|
|
"learning_rate": 1.0195360967546342e-05,
|
|
"loss": 0.165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18064576387405396,
|
|
"step": 3030,
|
|
"valid_targets_mean": 5979.9,
|
|
"valid_targets_min": 2553
|
|
},
|
|
{
|
|
"epoch": 4.887278582930757,
|
|
"grad_norm": 0.5125587155597235,
|
|
"learning_rate": 1.0125445638219369e-05,
|
|
"loss": 0.1623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16748838126659393,
|
|
"step": 3035,
|
|
"valid_targets_mean": 6232.3,
|
|
"valid_targets_min": 2860
|
|
},
|
|
{
|
|
"epoch": 4.8953301127214175,
|
|
"grad_norm": 0.4616045885291346,
|
|
"learning_rate": 1.00556895147283e-05,
|
|
"loss": 0.1634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.162489652633667,
|
|
"step": 3040,
|
|
"valid_targets_mean": 5831.9,
|
|
"valid_targets_min": 3564
|
|
},
|
|
{
|
|
"epoch": 4.903381642512077,
|
|
"grad_norm": 0.46322600369785283,
|
|
"learning_rate": 9.986093721739793e-06,
|
|
"loss": 0.1643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18634885549545288,
|
|
"step": 3045,
|
|
"valid_targets_mean": 6287.4,
|
|
"valid_targets_min": 2925
|
|
},
|
|
{
|
|
"epoch": 4.911433172302738,
|
|
"grad_norm": 0.46732093070548425,
|
|
"learning_rate": 9.916659381335524e-06,
|
|
"loss": 0.1657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14257997274398804,
|
|
"step": 3050,
|
|
"valid_targets_mean": 5975.3,
|
|
"valid_targets_min": 2785
|
|
},
|
|
{
|
|
"epoch": 4.919484702093397,
|
|
"grad_norm": 0.49090695641865617,
|
|
"learning_rate": 9.847387612994065e-06,
|
|
"loss": 0.1679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1775178611278534,
|
|
"step": 3055,
|
|
"valid_targets_mean": 5815.7,
|
|
"valid_targets_min": 2991
|
|
},
|
|
{
|
|
"epoch": 4.927536231884058,
|
|
"grad_norm": 0.4936584974779153,
|
|
"learning_rate": 9.778279533572894e-06,
|
|
"loss": 0.1613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16541126370429993,
|
|
"step": 3060,
|
|
"valid_targets_mean": 6023.0,
|
|
"valid_targets_min": 3528
|
|
},
|
|
{
|
|
"epoch": 4.935587761674718,
|
|
"grad_norm": 0.4843782424726677,
|
|
"learning_rate": 9.70933625729035e-06,
|
|
"loss": 0.1755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17926865816116333,
|
|
"step": 3065,
|
|
"valid_targets_mean": 6609.0,
|
|
"valid_targets_min": 3348
|
|
},
|
|
{
|
|
"epoch": 4.943639291465378,
|
|
"grad_norm": 0.6642579622564159,
|
|
"learning_rate": 9.640558895707681e-06,
|
|
"loss": 0.1771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1745370775461197,
|
|
"step": 3070,
|
|
"valid_targets_mean": 5392.2,
|
|
"valid_targets_min": 2988
|
|
},
|
|
{
|
|
"epoch": 4.951690821256038,
|
|
"grad_norm": 0.5815282039563188,
|
|
"learning_rate": 9.571948557711104e-06,
|
|
"loss": 0.1592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16913628578186035,
|
|
"step": 3075,
|
|
"valid_targets_mean": 5687.4,
|
|
"valid_targets_min": 3662
|
|
},
|
|
{
|
|
"epoch": 4.959742351046699,
|
|
"grad_norm": 0.4644634261413639,
|
|
"learning_rate": 9.503506349493959e-06,
|
|
"loss": 0.1584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1633993536233902,
|
|
"step": 3080,
|
|
"valid_targets_mean": 5669.5,
|
|
"valid_targets_min": 2991
|
|
},
|
|
{
|
|
"epoch": 4.967793880837359,
|
|
"grad_norm": 0.5022295538466749,
|
|
"learning_rate": 9.435233374538848e-06,
|
|
"loss": 0.1681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15409933030605316,
|
|
"step": 3085,
|
|
"valid_targets_mean": 5877.1,
|
|
"valid_targets_min": 3272
|
|
},
|
|
{
|
|
"epoch": 4.975845410628019,
|
|
"grad_norm": 0.5467333590771565,
|
|
"learning_rate": 9.367130733599863e-06,
|
|
"loss": 0.1573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1819993257522583,
|
|
"step": 3090,
|
|
"valid_targets_mean": 5579.3,
|
|
"valid_targets_min": 2743
|
|
},
|
|
{
|
|
"epoch": 4.9838969404186795,
|
|
"grad_norm": 0.47589633784217944,
|
|
"learning_rate": 9.299199524684815e-06,
|
|
"loss": 0.1667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16169019043445587,
|
|
"step": 3095,
|
|
"valid_targets_mean": 6154.2,
|
|
"valid_targets_min": 3288
|
|
},
|
|
{
|
|
"epoch": 4.99194847020934,
|
|
"grad_norm": 0.4987072791545887,
|
|
"learning_rate": 9.23144084303756e-06,
|
|
"loss": 0.1581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14372682571411133,
|
|
"step": 3100,
|
|
"valid_targets_mean": 5617.6,
|
|
"valid_targets_min": 2206
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.5420052690244692,
|
|
"learning_rate": 9.163855781120302e-06,
|
|
"loss": 0.1723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1929244101047516,
|
|
"step": 3105,
|
|
"valid_targets_mean": 6658.9,
|
|
"valid_targets_min": 2494
|
|
},
|
|
{
|
|
"epoch": 5.00805152979066,
|
|
"grad_norm": 0.5838464125620735,
|
|
"learning_rate": 9.096445428596026e-06,
|
|
"loss": 0.2433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26280146837234497,
|
|
"step": 3110,
|
|
"valid_targets_mean": 6454.3,
|
|
"valid_targets_min": 2941
|
|
},
|
|
{
|
|
"epoch": 5.0161030595813205,
|
|
"grad_norm": 0.5544032083369451,
|
|
"learning_rate": 9.029210872310884e-06,
|
|
"loss": 0.2334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2069273293018341,
|
|
"step": 3115,
|
|
"valid_targets_mean": 6041.3,
|
|
"valid_targets_min": 3050
|
|
},
|
|
{
|
|
"epoch": 5.024154589371981,
|
|
"grad_norm": 0.5060239859737673,
|
|
"learning_rate": 8.962153196276713e-06,
|
|
"loss": 0.2536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2327526956796646,
|
|
"step": 3120,
|
|
"valid_targets_mean": 6337.3,
|
|
"valid_targets_min": 3318
|
|
},
|
|
{
|
|
"epoch": 5.032206119162641,
|
|
"grad_norm": 0.5024924584087652,
|
|
"learning_rate": 8.895273481653527e-06,
|
|
"loss": 0.2318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2556541860103607,
|
|
"step": 3125,
|
|
"valid_targets_mean": 6536.2,
|
|
"valid_targets_min": 3943
|
|
},
|
|
{
|
|
"epoch": 5.040257648953301,
|
|
"grad_norm": 0.5532271429624148,
|
|
"learning_rate": 8.828572806732103e-06,
|
|
"loss": 0.2474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23633992671966553,
|
|
"step": 3130,
|
|
"valid_targets_mean": 6144.0,
|
|
"valid_targets_min": 1873
|
|
},
|
|
{
|
|
"epoch": 5.048309178743962,
|
|
"grad_norm": 0.5020434087339521,
|
|
"learning_rate": 8.76205224691659e-06,
|
|
"loss": 0.2349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.220601886510849,
|
|
"step": 3135,
|
|
"valid_targets_mean": 5658.6,
|
|
"valid_targets_min": 3195
|
|
},
|
|
{
|
|
"epoch": 5.056360708534622,
|
|
"grad_norm": 0.5004024186733302,
|
|
"learning_rate": 8.695712874707169e-06,
|
|
"loss": 0.253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2531841993331909,
|
|
"step": 3140,
|
|
"valid_targets_mean": 5992.8,
|
|
"valid_targets_min": 3207
|
|
},
|
|
{
|
|
"epoch": 5.064412238325282,
|
|
"grad_norm": 0.5475537882143925,
|
|
"learning_rate": 8.629555759682756e-06,
|
|
"loss": 0.2285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2494160532951355,
|
|
"step": 3145,
|
|
"valid_targets_mean": 5498.4,
|
|
"valid_targets_min": 2385
|
|
},
|
|
{
|
|
"epoch": 5.072463768115942,
|
|
"grad_norm": 0.5809278130638802,
|
|
"learning_rate": 8.563581968483774e-06,
|
|
"loss": 0.2299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22677326202392578,
|
|
"step": 3150,
|
|
"valid_targets_mean": 6101.3,
|
|
"valid_targets_min": 2571
|
|
},
|
|
{
|
|
"epoch": 5.080515297906603,
|
|
"grad_norm": 0.5328987070977548,
|
|
"learning_rate": 8.497792564794935e-06,
|
|
"loss": 0.2308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23735421895980835,
|
|
"step": 3155,
|
|
"valid_targets_mean": 5591.6,
|
|
"valid_targets_min": 436
|
|
},
|
|
{
|
|
"epoch": 5.088566827697263,
|
|
"grad_norm": 0.4993503923966659,
|
|
"learning_rate": 8.432188609328112e-06,
|
|
"loss": 0.2315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28377091884613037,
|
|
"step": 3160,
|
|
"valid_targets_mean": 7175.1,
|
|
"valid_targets_min": 3309
|
|
},
|
|
{
|
|
"epoch": 5.096618357487923,
|
|
"grad_norm": 0.5672470270119984,
|
|
"learning_rate": 8.366771159805222e-06,
|
|
"loss": 0.2273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22815349698066711,
|
|
"step": 3165,
|
|
"valid_targets_mean": 5700.9,
|
|
"valid_targets_min": 1417
|
|
},
|
|
{
|
|
"epoch": 5.1046698872785825,
|
|
"grad_norm": 0.5110748498614881,
|
|
"learning_rate": 8.301541270941178e-06,
|
|
"loss": 0.2356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25175783038139343,
|
|
"step": 3170,
|
|
"valid_targets_mean": 6462.1,
|
|
"valid_targets_min": 3176
|
|
},
|
|
{
|
|
"epoch": 5.112721417069243,
|
|
"grad_norm": 0.5146196376849012,
|
|
"learning_rate": 8.236499994426886e-06,
|
|
"loss": 0.2436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2239510416984558,
|
|
"step": 3175,
|
|
"valid_targets_mean": 6671.4,
|
|
"valid_targets_min": 2978
|
|
},
|
|
{
|
|
"epoch": 5.120772946859903,
|
|
"grad_norm": 0.5276651861060871,
|
|
"learning_rate": 8.171648378912272e-06,
|
|
"loss": 0.2328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24736565351486206,
|
|
"step": 3180,
|
|
"valid_targets_mean": 5441.8,
|
|
"valid_targets_min": 2285
|
|
},
|
|
{
|
|
"epoch": 5.128824476650563,
|
|
"grad_norm": 0.5713617059940914,
|
|
"learning_rate": 8.1069874699894e-06,
|
|
"loss": 0.2113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21684984862804413,
|
|
"step": 3185,
|
|
"valid_targets_mean": 5118.4,
|
|
"valid_targets_min": 2205
|
|
},
|
|
{
|
|
"epoch": 5.1368760064412236,
|
|
"grad_norm": 0.9446935557315936,
|
|
"learning_rate": 8.042518310175607e-06,
|
|
"loss": 0.3086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3164033889770508,
|
|
"step": 3190,
|
|
"valid_targets_mean": 2431.5,
|
|
"valid_targets_min": 1052
|
|
},
|
|
{
|
|
"epoch": 5.144927536231884,
|
|
"grad_norm": 0.8102422471310984,
|
|
"learning_rate": 7.978241938896679e-06,
|
|
"loss": 0.3619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3511773347854614,
|
|
"step": 3195,
|
|
"valid_targets_mean": 3045.8,
|
|
"valid_targets_min": 1348
|
|
},
|
|
{
|
|
"epoch": 5.152979066022544,
|
|
"grad_norm": 0.8229058252512985,
|
|
"learning_rate": 7.914159392470118e-06,
|
|
"loss": 0.3316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3436507284641266,
|
|
"step": 3200,
|
|
"valid_targets_mean": 3644.5,
|
|
"valid_targets_min": 1127
|
|
},
|
|
{
|
|
"epoch": 5.161030595813204,
|
|
"grad_norm": 0.7911041242061355,
|
|
"learning_rate": 7.850271704088396e-06,
|
|
"loss": 0.3503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3280431628227234,
|
|
"step": 3205,
|
|
"valid_targets_mean": 2985.6,
|
|
"valid_targets_min": 1353
|
|
},
|
|
{
|
|
"epoch": 5.169082125603865,
|
|
"grad_norm": 0.7417281880292826,
|
|
"learning_rate": 7.786579903802342e-06,
|
|
"loss": 0.311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3037336766719818,
|
|
"step": 3210,
|
|
"valid_targets_mean": 4235.5,
|
|
"valid_targets_min": 1153
|
|
},
|
|
{
|
|
"epoch": 5.177133655394525,
|
|
"grad_norm": 0.8043169878590806,
|
|
"learning_rate": 7.723085018504512e-06,
|
|
"loss": 0.3114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31899648904800415,
|
|
"step": 3215,
|
|
"valid_targets_mean": 3142.8,
|
|
"valid_targets_min": 1555
|
|
},
|
|
{
|
|
"epoch": 5.185185185185185,
|
|
"grad_norm": 0.7681103865645257,
|
|
"learning_rate": 7.659788071912612e-06,
|
|
"loss": 0.3224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3006795644760132,
|
|
"step": 3220,
|
|
"valid_targets_mean": 3279.6,
|
|
"valid_targets_min": 1283
|
|
},
|
|
{
|
|
"epoch": 5.193236714975845,
|
|
"grad_norm": 0.9279631678919613,
|
|
"learning_rate": 7.59669008455304e-06,
|
|
"loss": 0.3323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3557708263397217,
|
|
"step": 3225,
|
|
"valid_targets_mean": 2693.6,
|
|
"valid_targets_min": 1497
|
|
},
|
|
{
|
|
"epoch": 5.201288244766506,
|
|
"grad_norm": 0.8325517861916301,
|
|
"learning_rate": 7.533792073744395e-06,
|
|
"loss": 0.3134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29200559854507446,
|
|
"step": 3230,
|
|
"valid_targets_mean": 2785.3,
|
|
"valid_targets_min": 1343
|
|
},
|
|
{
|
|
"epoch": 5.209339774557166,
|
|
"grad_norm": 0.9000004067843751,
|
|
"learning_rate": 7.471095053581086e-06,
|
|
"loss": 0.3322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33962708711624146,
|
|
"step": 3235,
|
|
"valid_targets_mean": 2680.9,
|
|
"valid_targets_min": 1319
|
|
},
|
|
{
|
|
"epoch": 5.217391304347826,
|
|
"grad_norm": 0.7351616652174657,
|
|
"learning_rate": 7.4086000349169864e-06,
|
|
"loss": 0.3073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27437877655029297,
|
|
"step": 3240,
|
|
"valid_targets_mean": 3356.8,
|
|
"valid_targets_min": 1216
|
|
},
|
|
{
|
|
"epoch": 5.225442834138486,
|
|
"grad_norm": 0.8649092507616926,
|
|
"learning_rate": 7.346308025349138e-06,
|
|
"loss": 0.326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3227480947971344,
|
|
"step": 3245,
|
|
"valid_targets_mean": 2648.8,
|
|
"valid_targets_min": 1013
|
|
},
|
|
{
|
|
"epoch": 5.233494363929147,
|
|
"grad_norm": 0.7894068022798766,
|
|
"learning_rate": 7.2842200292014805e-06,
|
|
"loss": 0.3381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2977900505065918,
|
|
"step": 3250,
|
|
"valid_targets_mean": 3610.2,
|
|
"valid_targets_min": 1346
|
|
},
|
|
{
|
|
"epoch": 5.241545893719807,
|
|
"grad_norm": 0.8348958744102947,
|
|
"learning_rate": 7.2223370475086896e-06,
|
|
"loss": 0.3291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31349635124206543,
|
|
"step": 3255,
|
|
"valid_targets_mean": 2815.4,
|
|
"valid_targets_min": 1085
|
|
},
|
|
{
|
|
"epoch": 5.249597423510467,
|
|
"grad_norm": 0.783279282550219,
|
|
"learning_rate": 7.160660078000028e-06,
|
|
"loss": 0.3223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31384730339050293,
|
|
"step": 3260,
|
|
"valid_targets_mean": 3536.7,
|
|
"valid_targets_min": 1598
|
|
},
|
|
{
|
|
"epoch": 5.2576489533011275,
|
|
"grad_norm": 0.8474474930539333,
|
|
"learning_rate": 7.099190115083259e-06,
|
|
"loss": 0.3117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2967156171798706,
|
|
"step": 3265,
|
|
"valid_targets_mean": 2561.4,
|
|
"valid_targets_min": 1339
|
|
},
|
|
{
|
|
"epoch": 5.265700483091788,
|
|
"grad_norm": 0.808644026983678,
|
|
"learning_rate": 7.037928149828608e-06,
|
|
"loss": 0.3127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2750012278556824,
|
|
"step": 3270,
|
|
"valid_targets_mean": 2889.4,
|
|
"valid_targets_min": 1076
|
|
},
|
|
{
|
|
"epoch": 5.273752012882448,
|
|
"grad_norm": 0.9194627850557411,
|
|
"learning_rate": 6.97687516995279e-06,
|
|
"loss": 0.3234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3373414874076843,
|
|
"step": 3275,
|
|
"valid_targets_mean": 2557.4,
|
|
"valid_targets_min": 1094
|
|
},
|
|
{
|
|
"epoch": 5.281803542673108,
|
|
"grad_norm": 0.822826899306057,
|
|
"learning_rate": 6.916032159803088e-06,
|
|
"loss": 0.3148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3265712857246399,
|
|
"step": 3280,
|
|
"valid_targets_mean": 3530.8,
|
|
"valid_targets_min": 1290
|
|
},
|
|
{
|
|
"epoch": 5.2898550724637685,
|
|
"grad_norm": 1.0801701198259575,
|
|
"learning_rate": 6.855400100341458e-06,
|
|
"loss": 0.3135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31815803050994873,
|
|
"step": 3285,
|
|
"valid_targets_mean": 2579.7,
|
|
"valid_targets_min": 1229
|
|
},
|
|
{
|
|
"epoch": 5.297906602254429,
|
|
"grad_norm": 0.9189668274655115,
|
|
"learning_rate": 6.794979969128755e-06,
|
|
"loss": 0.3284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35337209701538086,
|
|
"step": 3290,
|
|
"valid_targets_mean": 3024.5,
|
|
"valid_targets_min": 1126
|
|
},
|
|
{
|
|
"epoch": 5.305958132045088,
|
|
"grad_norm": 0.7830019148830716,
|
|
"learning_rate": 6.7347727403089325e-06,
|
|
"loss": 0.3048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3025501072406769,
|
|
"step": 3295,
|
|
"valid_targets_mean": 3523.4,
|
|
"valid_targets_min": 1277
|
|
},
|
|
{
|
|
"epoch": 5.314009661835748,
|
|
"grad_norm": 0.7973020737314889,
|
|
"learning_rate": 6.674779384593373e-06,
|
|
"loss": 0.3047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3119211196899414,
|
|
"step": 3300,
|
|
"valid_targets_mean": 3317.8,
|
|
"valid_targets_min": 1389
|
|
},
|
|
{
|
|
"epoch": 5.322061191626409,
|
|
"grad_norm": 0.8741442602989578,
|
|
"learning_rate": 6.61500086924519e-06,
|
|
"loss": 0.3296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32590872049331665,
|
|
"step": 3305,
|
|
"valid_targets_mean": 2728.8,
|
|
"valid_targets_min": 1062
|
|
},
|
|
{
|
|
"epoch": 5.330112721417069,
|
|
"grad_norm": 0.7121746609134123,
|
|
"learning_rate": 6.555438158063683e-06,
|
|
"loss": 0.3176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29646340012550354,
|
|
"step": 3310,
|
|
"valid_targets_mean": 3474.4,
|
|
"valid_targets_min": 1520
|
|
},
|
|
{
|
|
"epoch": 5.338164251207729,
|
|
"grad_norm": 0.8449323115888453,
|
|
"learning_rate": 6.4960922113687695e-06,
|
|
"loss": 0.3273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3494471311569214,
|
|
"step": 3315,
|
|
"valid_targets_mean": 2857.8,
|
|
"valid_targets_min": 1154
|
|
},
|
|
{
|
|
"epoch": 5.3462157809983895,
|
|
"grad_norm": 0.885312187387555,
|
|
"learning_rate": 6.4369639859855115e-06,
|
|
"loss": 0.3228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2813508212566376,
|
|
"step": 3320,
|
|
"valid_targets_mean": 2419.3,
|
|
"valid_targets_min": 1115
|
|
},
|
|
{
|
|
"epoch": 5.35426731078905,
|
|
"grad_norm": 0.9927190814901663,
|
|
"learning_rate": 6.378054435228671e-06,
|
|
"loss": 0.3254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34994691610336304,
|
|
"step": 3325,
|
|
"valid_targets_mean": 2817.8,
|
|
"valid_targets_min": 1383
|
|
},
|
|
{
|
|
"epoch": 5.36231884057971,
|
|
"grad_norm": 0.8385621560859085,
|
|
"learning_rate": 6.319364508887371e-06,
|
|
"loss": 0.3274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3212810456752777,
|
|
"step": 3330,
|
|
"valid_targets_mean": 2906.6,
|
|
"valid_targets_min": 1414
|
|
},
|
|
{
|
|
"epoch": 5.37037037037037,
|
|
"grad_norm": 0.9132443618507105,
|
|
"learning_rate": 6.260895153209763e-06,
|
|
"loss": 0.2996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3039951026439667,
|
|
"step": 3335,
|
|
"valid_targets_mean": 2699.4,
|
|
"valid_targets_min": 1140
|
|
},
|
|
{
|
|
"epoch": 5.3784219001610305,
|
|
"grad_norm": 0.7695295851549366,
|
|
"learning_rate": 6.202647310887764e-06,
|
|
"loss": 0.2929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2774300277233124,
|
|
"step": 3340,
|
|
"valid_targets_mean": 3474.1,
|
|
"valid_targets_min": 1060
|
|
},
|
|
{
|
|
"epoch": 5.386473429951691,
|
|
"grad_norm": 0.6668937071959811,
|
|
"learning_rate": 6.14462192104188e-06,
|
|
"loss": 0.2975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27856311202049255,
|
|
"step": 3345,
|
|
"valid_targets_mean": 4942.6,
|
|
"valid_targets_min": 1358
|
|
},
|
|
{
|
|
"epoch": 5.394524959742351,
|
|
"grad_norm": 0.8455706950790055,
|
|
"learning_rate": 6.086819919206051e-06,
|
|
"loss": 0.3022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30070313811302185,
|
|
"step": 3350,
|
|
"valid_targets_mean": 3203.9,
|
|
"valid_targets_min": 1109
|
|
},
|
|
{
|
|
"epoch": 5.402576489533011,
|
|
"grad_norm": 0.836900372814081,
|
|
"learning_rate": 6.029242237312554e-06,
|
|
"loss": 0.2896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2704571485519409,
|
|
"step": 3355,
|
|
"valid_targets_mean": 2741.4,
|
|
"valid_targets_min": 990
|
|
},
|
|
{
|
|
"epoch": 5.4106280193236715,
|
|
"grad_norm": 1.0931862207345193,
|
|
"learning_rate": 5.971889803676996e-06,
|
|
"loss": 0.2948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2953025698661804,
|
|
"step": 3360,
|
|
"valid_targets_mean": 2505.7,
|
|
"valid_targets_min": 1217
|
|
},
|
|
{
|
|
"epoch": 5.418679549114332,
|
|
"grad_norm": 0.7654117324257275,
|
|
"learning_rate": 5.914763542983355e-06,
|
|
"loss": 0.2854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26454564929008484,
|
|
"step": 3365,
|
|
"valid_targets_mean": 2908.2,
|
|
"valid_targets_min": 977
|
|
},
|
|
{
|
|
"epoch": 5.426731078904992,
|
|
"grad_norm": 0.7330273533335651,
|
|
"learning_rate": 5.857864376269051e-06,
|
|
"loss": 0.269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26756301522254944,
|
|
"step": 3370,
|
|
"valid_targets_mean": 3492.0,
|
|
"valid_targets_min": 1549
|
|
},
|
|
{
|
|
"epoch": 5.434782608695652,
|
|
"grad_norm": 0.8733408409541297,
|
|
"learning_rate": 5.801193220910108e-06,
|
|
"loss": 0.2752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26825398206710815,
|
|
"step": 3375,
|
|
"valid_targets_mean": 2432.0,
|
|
"valid_targets_min": 838
|
|
},
|
|
{
|
|
"epoch": 5.442834138486313,
|
|
"grad_norm": 0.7169076864712473,
|
|
"learning_rate": 5.744750990606356e-06,
|
|
"loss": 0.2771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24838054180145264,
|
|
"step": 3380,
|
|
"valid_targets_mean": 3069.9,
|
|
"valid_targets_min": 269
|
|
},
|
|
{
|
|
"epoch": 5.450885668276973,
|
|
"grad_norm": 0.7502178650889314,
|
|
"learning_rate": 5.688538595366706e-06,
|
|
"loss": 0.2918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31911134719848633,
|
|
"step": 3385,
|
|
"valid_targets_mean": 3644.1,
|
|
"valid_targets_min": 1308
|
|
},
|
|
{
|
|
"epoch": 5.458937198067633,
|
|
"grad_norm": 1.414216146795669,
|
|
"learning_rate": 5.632556941494482e-06,
|
|
"loss": 0.2732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25184541940689087,
|
|
"step": 3390,
|
|
"valid_targets_mean": 3538.7,
|
|
"valid_targets_min": 865
|
|
},
|
|
{
|
|
"epoch": 5.466988727858293,
|
|
"grad_norm": 0.8372271865966336,
|
|
"learning_rate": 5.5768069315727895e-06,
|
|
"loss": 0.2771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27714282274246216,
|
|
"step": 3395,
|
|
"valid_targets_mean": 2789.2,
|
|
"valid_targets_min": 1084
|
|
},
|
|
{
|
|
"epoch": 5.475040257648954,
|
|
"grad_norm": 0.8184499988808632,
|
|
"learning_rate": 5.521289464449975e-06,
|
|
"loss": 0.3038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2645953595638275,
|
|
"step": 3400,
|
|
"valid_targets_mean": 2663.7,
|
|
"valid_targets_min": 1007
|
|
},
|
|
{
|
|
"epoch": 5.483091787439614,
|
|
"grad_norm": 0.7185289726192565,
|
|
"learning_rate": 5.46600543522515e-06,
|
|
"loss": 0.2736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30967646837234497,
|
|
"step": 3405,
|
|
"valid_targets_mean": 3865.0,
|
|
"valid_targets_min": 556
|
|
},
|
|
{
|
|
"epoch": 5.491143317230274,
|
|
"grad_norm": 0.705180498725472,
|
|
"learning_rate": 5.410955735233736e-06,
|
|
"loss": 0.2819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2706761658191681,
|
|
"step": 3410,
|
|
"valid_targets_mean": 4117.5,
|
|
"valid_targets_min": 1094
|
|
},
|
|
{
|
|
"epoch": 5.499194847020934,
|
|
"grad_norm": 0.6957397872406959,
|
|
"learning_rate": 5.3561412520331025e-06,
|
|
"loss": 0.263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23413681983947754,
|
|
"step": 3415,
|
|
"valid_targets_mean": 3565.3,
|
|
"valid_targets_min": 333
|
|
},
|
|
{
|
|
"epoch": 5.507246376811594,
|
|
"grad_norm": 0.7323699706744174,
|
|
"learning_rate": 5.30156286938826e-06,
|
|
"loss": 0.285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27193206548690796,
|
|
"step": 3420,
|
|
"valid_targets_mean": 3507.9,
|
|
"valid_targets_min": 1135
|
|
},
|
|
{
|
|
"epoch": 5.515297906602254,
|
|
"grad_norm": 0.7948157490925247,
|
|
"learning_rate": 5.24722146725761e-06,
|
|
"loss": 0.2912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30066072940826416,
|
|
"step": 3425,
|
|
"valid_targets_mean": 3822.9,
|
|
"valid_targets_min": 428
|
|
},
|
|
{
|
|
"epoch": 5.523349436392914,
|
|
"grad_norm": 0.7330705322398546,
|
|
"learning_rate": 5.193117921778743e-06,
|
|
"loss": 0.2784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2574806809425354,
|
|
"step": 3430,
|
|
"valid_targets_mean": 2932.9,
|
|
"valid_targets_min": 1398
|
|
},
|
|
{
|
|
"epoch": 5.531400966183575,
|
|
"grad_norm": 0.737623162777766,
|
|
"learning_rate": 5.139253105254336e-06,
|
|
"loss": 0.2662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2380131632089615,
|
|
"step": 3435,
|
|
"valid_targets_mean": 3473.2,
|
|
"valid_targets_min": 1265
|
|
},
|
|
{
|
|
"epoch": 5.539452495974235,
|
|
"grad_norm": 0.872809824307193,
|
|
"learning_rate": 5.085627886138078e-06,
|
|
"loss": 0.2942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3099297881126404,
|
|
"step": 3440,
|
|
"valid_targets_mean": 2707.5,
|
|
"valid_targets_min": 1453
|
|
},
|
|
{
|
|
"epoch": 5.547504025764895,
|
|
"grad_norm": 0.698343107099745,
|
|
"learning_rate": 5.032243129020671e-06,
|
|
"loss": 0.2838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.263833224773407,
|
|
"step": 3445,
|
|
"valid_targets_mean": 3770.1,
|
|
"valid_targets_min": 1554
|
|
},
|
|
{
|
|
"epoch": 5.555555555555555,
|
|
"grad_norm": 0.7253845641200191,
|
|
"learning_rate": 4.9790996946158695e-06,
|
|
"loss": 0.2621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2566887438297272,
|
|
"step": 3450,
|
|
"valid_targets_mean": 3687.9,
|
|
"valid_targets_min": 1423
|
|
},
|
|
{
|
|
"epoch": 5.563607085346216,
|
|
"grad_norm": 0.7712918784806847,
|
|
"learning_rate": 4.926198439746641e-06,
|
|
"loss": 0.2724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26648885011672974,
|
|
"step": 3455,
|
|
"valid_targets_mean": 3580.0,
|
|
"valid_targets_min": 1778
|
|
},
|
|
{
|
|
"epoch": 5.571658615136876,
|
|
"grad_norm": 0.856414024782782,
|
|
"learning_rate": 4.873540217331325e-06,
|
|
"loss": 0.2817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2561458945274353,
|
|
"step": 3460,
|
|
"valid_targets_mean": 3154.2,
|
|
"valid_targets_min": 1070
|
|
},
|
|
{
|
|
"epoch": 5.579710144927536,
|
|
"grad_norm": 0.7683716866413719,
|
|
"learning_rate": 4.82112587636989e-06,
|
|
"loss": 0.2644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2730039358139038,
|
|
"step": 3465,
|
|
"valid_targets_mean": 3442.1,
|
|
"valid_targets_min": 1222
|
|
},
|
|
{
|
|
"epoch": 5.587761674718196,
|
|
"grad_norm": 0.8230430968296925,
|
|
"learning_rate": 4.768956261930233e-06,
|
|
"loss": 0.276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27767133712768555,
|
|
"step": 3470,
|
|
"valid_targets_mean": 2991.2,
|
|
"valid_targets_min": 1344
|
|
},
|
|
{
|
|
"epoch": 5.595813204508857,
|
|
"grad_norm": 0.8004415970586688,
|
|
"learning_rate": 4.717032215134576e-06,
|
|
"loss": 0.2884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2605861723423004,
|
|
"step": 3475,
|
|
"valid_targets_mean": 3584.1,
|
|
"valid_targets_min": 1080
|
|
},
|
|
{
|
|
"epoch": 5.603864734299517,
|
|
"grad_norm": 0.7353749562835018,
|
|
"learning_rate": 4.66535457314589e-06,
|
|
"loss": 0.2576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24795982241630554,
|
|
"step": 3480,
|
|
"valid_targets_mean": 3283.6,
|
|
"valid_targets_min": 1136
|
|
},
|
|
{
|
|
"epoch": 5.611916264090177,
|
|
"grad_norm": 0.7934788767299171,
|
|
"learning_rate": 4.613924169154406e-06,
|
|
"loss": 0.2812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2561984658241272,
|
|
"step": 3485,
|
|
"valid_targets_mean": 3267.1,
|
|
"valid_targets_min": 1469
|
|
},
|
|
{
|
|
"epoch": 5.6199677938808374,
|
|
"grad_norm": 0.6451673243838487,
|
|
"learning_rate": 4.5627418323641705e-06,
|
|
"loss": 0.2645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23117291927337646,
|
|
"step": 3490,
|
|
"valid_targets_mean": 4220.8,
|
|
"valid_targets_min": 1141
|
|
},
|
|
{
|
|
"epoch": 5.628019323671498,
|
|
"grad_norm": 0.7547471899894796,
|
|
"learning_rate": 4.51180838797969e-06,
|
|
"loss": 0.2671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2534325420856476,
|
|
"step": 3495,
|
|
"valid_targets_mean": 3403.3,
|
|
"valid_targets_min": 1315
|
|
},
|
|
{
|
|
"epoch": 5.636070853462158,
|
|
"grad_norm": 0.773278558835617,
|
|
"learning_rate": 4.461124657192612e-06,
|
|
"loss": 0.2602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26870912313461304,
|
|
"step": 3500,
|
|
"valid_targets_mean": 3994.0,
|
|
"valid_targets_min": 1243
|
|
},
|
|
{
|
|
"epoch": 5.644122383252818,
|
|
"grad_norm": 0.810555778973838,
|
|
"learning_rate": 4.410691457168488e-06,
|
|
"loss": 0.2896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28086018562316895,
|
|
"step": 3505,
|
|
"valid_targets_mean": 2728.6,
|
|
"valid_targets_min": 1205
|
|
},
|
|
{
|
|
"epoch": 5.6521739130434785,
|
|
"grad_norm": 0.8120077352409444,
|
|
"learning_rate": 4.3605096010336115e-06,
|
|
"loss": 0.2653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24294914305210114,
|
|
"step": 3510,
|
|
"valid_targets_mean": 2805.7,
|
|
"valid_targets_min": 1316
|
|
},
|
|
{
|
|
"epoch": 5.660225442834139,
|
|
"grad_norm": 0.8130325924281421,
|
|
"learning_rate": 4.310579897861902e-06,
|
|
"loss": 0.2781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28100401163101196,
|
|
"step": 3515,
|
|
"valid_targets_mean": 3165.5,
|
|
"valid_targets_min": 1339
|
|
},
|
|
{
|
|
"epoch": 5.668276972624799,
|
|
"grad_norm": 0.8162870406632708,
|
|
"learning_rate": 4.26090315266185e-06,
|
|
"loss": 0.2723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26572561264038086,
|
|
"step": 3520,
|
|
"valid_targets_mean": 2954.1,
|
|
"valid_targets_min": 1158
|
|
},
|
|
{
|
|
"epoch": 5.676328502415459,
|
|
"grad_norm": 0.6514530451354953,
|
|
"learning_rate": 4.2114801663635504e-06,
|
|
"loss": 0.2687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2698674201965332,
|
|
"step": 3525,
|
|
"valid_targets_mean": 4536.0,
|
|
"valid_targets_min": 1647
|
|
},
|
|
{
|
|
"epoch": 5.6843800322061195,
|
|
"grad_norm": 0.7583758417148468,
|
|
"learning_rate": 4.1623117358057865e-06,
|
|
"loss": 0.2696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24608030915260315,
|
|
"step": 3530,
|
|
"valid_targets_mean": 3440.9,
|
|
"valid_targets_min": 1237
|
|
},
|
|
{
|
|
"epoch": 5.692431561996779,
|
|
"grad_norm": 0.8068612199962942,
|
|
"learning_rate": 4.113398653723168e-06,
|
|
"loss": 0.3091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30153918266296387,
|
|
"step": 3535,
|
|
"valid_targets_mean": 3381.9,
|
|
"valid_targets_min": 1538
|
|
},
|
|
{
|
|
"epoch": 5.70048309178744,
|
|
"grad_norm": 0.6753881803646461,
|
|
"learning_rate": 4.0647417087333776e-06,
|
|
"loss": 0.2767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2815188765525818,
|
|
"step": 3540,
|
|
"valid_targets_mean": 4124.1,
|
|
"valid_targets_min": 1524
|
|
},
|
|
{
|
|
"epoch": 5.708534621578099,
|
|
"grad_norm": 0.7538186575725058,
|
|
"learning_rate": 4.0163416853244385e-06,
|
|
"loss": 0.267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3041001558303833,
|
|
"step": 3545,
|
|
"valid_targets_mean": 3666.5,
|
|
"valid_targets_min": 1144
|
|
},
|
|
{
|
|
"epoch": 5.71658615136876,
|
|
"grad_norm": 0.7597861294821214,
|
|
"learning_rate": 3.968199363842056e-06,
|
|
"loss": 0.2788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28277167677879333,
|
|
"step": 3550,
|
|
"valid_targets_mean": 3813.6,
|
|
"valid_targets_min": 1150
|
|
},
|
|
{
|
|
"epoch": 5.72463768115942,
|
|
"grad_norm": 0.8692552896889315,
|
|
"learning_rate": 3.920315520477065e-06,
|
|
"loss": 0.2705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.290031373500824,
|
|
"step": 3555,
|
|
"valid_targets_mean": 3184.7,
|
|
"valid_targets_min": 1064
|
|
},
|
|
{
|
|
"epoch": 5.73268921095008,
|
|
"grad_norm": 0.725354422712945,
|
|
"learning_rate": 3.872690927252891e-06,
|
|
"loss": 0.2739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2602255642414093,
|
|
"step": 3560,
|
|
"valid_targets_mean": 3734.6,
|
|
"valid_targets_min": 1689
|
|
},
|
|
{
|
|
"epoch": 5.7407407407407405,
|
|
"grad_norm": 0.7906077868844616,
|
|
"learning_rate": 3.825326352013119e-06,
|
|
"loss": 0.2771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2873994708061218,
|
|
"step": 3565,
|
|
"valid_targets_mean": 3706.6,
|
|
"valid_targets_min": 1537
|
|
},
|
|
{
|
|
"epoch": 5.748792270531401,
|
|
"grad_norm": 0.7271200769973074,
|
|
"learning_rate": 3.7782225584091016e-06,
|
|
"loss": 0.2861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29486900568008423,
|
|
"step": 3570,
|
|
"valid_targets_mean": 3998.4,
|
|
"valid_targets_min": 1261
|
|
},
|
|
{
|
|
"epoch": 5.756843800322061,
|
|
"grad_norm": 0.6627264051632484,
|
|
"learning_rate": 3.731380305887644e-06,
|
|
"loss": 0.2122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17932528257369995,
|
|
"step": 3575,
|
|
"valid_targets_mean": 6379.5,
|
|
"valid_targets_min": 3100
|
|
},
|
|
{
|
|
"epoch": 5.764895330112721,
|
|
"grad_norm": 0.5856709563027361,
|
|
"learning_rate": 3.684800349678781e-06,
|
|
"loss": 0.1718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15101756155490875,
|
|
"step": 3580,
|
|
"valid_targets_mean": 5043.8,
|
|
"valid_targets_min": 2708
|
|
},
|
|
{
|
|
"epoch": 5.7729468599033815,
|
|
"grad_norm": 0.5383802419402726,
|
|
"learning_rate": 3.638483440783576e-06,
|
|
"loss": 0.1566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1619020700454712,
|
|
"step": 3585,
|
|
"valid_targets_mean": 5717.3,
|
|
"valid_targets_min": 3191
|
|
},
|
|
{
|
|
"epoch": 5.780998389694042,
|
|
"grad_norm": 0.5400164906310356,
|
|
"learning_rate": 3.5924303259620307e-06,
|
|
"loss": 0.1954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22362828254699707,
|
|
"step": 3590,
|
|
"valid_targets_mean": 6198.8,
|
|
"valid_targets_min": 2599
|
|
},
|
|
{
|
|
"epoch": 5.789049919484702,
|
|
"grad_norm": 0.49147798358688005,
|
|
"learning_rate": 3.546641747721036e-06,
|
|
"loss": 0.1541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16704602539539337,
|
|
"step": 3595,
|
|
"valid_targets_mean": 5880.9,
|
|
"valid_targets_min": 3427
|
|
},
|
|
{
|
|
"epoch": 5.797101449275362,
|
|
"grad_norm": 0.5344086768922587,
|
|
"learning_rate": 3.501118444302394e-06,
|
|
"loss": 0.1543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16091877222061157,
|
|
"step": 3600,
|
|
"valid_targets_mean": 5429.1,
|
|
"valid_targets_min": 3331
|
|
},
|
|
{
|
|
"epoch": 5.805152979066023,
|
|
"grad_norm": 0.5393969729072521,
|
|
"learning_rate": 3.4558611496709384e-06,
|
|
"loss": 0.1611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1679922491312027,
|
|
"step": 3605,
|
|
"valid_targets_mean": 5318.9,
|
|
"valid_targets_min": 3176
|
|
},
|
|
{
|
|
"epoch": 5.813204508856683,
|
|
"grad_norm": 0.5476815168907639,
|
|
"learning_rate": 3.4108705935026685e-06,
|
|
"loss": 0.1914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21377882361412048,
|
|
"step": 3610,
|
|
"valid_targets_mean": 6334.5,
|
|
"valid_targets_min": 3200
|
|
},
|
|
{
|
|
"epoch": 5.821256038647343,
|
|
"grad_norm": 0.5454273677417246,
|
|
"learning_rate": 3.3661475011730206e-06,
|
|
"loss": 0.1677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15195472538471222,
|
|
"step": 3615,
|
|
"valid_targets_mean": 5575.2,
|
|
"valid_targets_min": 2982
|
|
},
|
|
{
|
|
"epoch": 5.829307568438003,
|
|
"grad_norm": 0.487579490900084,
|
|
"learning_rate": 3.321692593745147e-06,
|
|
"loss": 0.1689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14653947949409485,
|
|
"step": 3620,
|
|
"valid_targets_mean": 5045.2,
|
|
"valid_targets_min": 2662
|
|
},
|
|
{
|
|
"epoch": 5.837359098228664,
|
|
"grad_norm": 0.46110914845574164,
|
|
"learning_rate": 3.2775065879582948e-06,
|
|
"loss": 0.1594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14655046164989471,
|
|
"step": 3625,
|
|
"valid_targets_mean": 5929.9,
|
|
"valid_targets_min": 3640
|
|
},
|
|
{
|
|
"epoch": 5.845410628019324,
|
|
"grad_norm": 0.5371793623726028,
|
|
"learning_rate": 3.233590196216263e-06,
|
|
"loss": 0.1759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15916280448436737,
|
|
"step": 3630,
|
|
"valid_targets_mean": 5560.6,
|
|
"valid_targets_min": 3146
|
|
},
|
|
{
|
|
"epoch": 5.853462157809984,
|
|
"grad_norm": 0.5336623676265232,
|
|
"learning_rate": 3.1899441265759036e-06,
|
|
"loss": 0.1519,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17341580986976624,
|
|
"step": 3635,
|
|
"valid_targets_mean": 5438.9,
|
|
"valid_targets_min": 3095
|
|
},
|
|
{
|
|
"epoch": 5.861513687600644,
|
|
"grad_norm": 0.506978820179007,
|
|
"learning_rate": 3.1465690827356955e-06,
|
|
"loss": 0.1629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16827458143234253,
|
|
"step": 3640,
|
|
"valid_targets_mean": 6153.6,
|
|
"valid_targets_min": 3325
|
|
},
|
|
{
|
|
"epoch": 5.869565217391305,
|
|
"grad_norm": 0.48238153810687057,
|
|
"learning_rate": 3.103465764024438e-06,
|
|
"loss": 0.1636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1539824903011322,
|
|
"step": 3645,
|
|
"valid_targets_mean": 6176.7,
|
|
"valid_targets_min": 3690
|
|
},
|
|
{
|
|
"epoch": 5.877616747181965,
|
|
"grad_norm": 0.6316647612921683,
|
|
"learning_rate": 3.0606348653899288e-06,
|
|
"loss": 0.156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14262428879737854,
|
|
"step": 3650,
|
|
"valid_targets_mean": 5253.8,
|
|
"valid_targets_min": 2926
|
|
},
|
|
{
|
|
"epoch": 5.885668276972625,
|
|
"grad_norm": 0.5087670946379775,
|
|
"learning_rate": 3.0180770773877866e-06,
|
|
"loss": 0.1575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1334628015756607,
|
|
"step": 3655,
|
|
"valid_targets_mean": 6097.0,
|
|
"valid_targets_min": 3707
|
|
},
|
|
{
|
|
"epoch": 5.8937198067632846,
|
|
"grad_norm": 0.5000631591042055,
|
|
"learning_rate": 2.9757930861703223e-06,
|
|
"loss": 0.1567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16042768955230713,
|
|
"step": 3660,
|
|
"valid_targets_mean": 5931.1,
|
|
"valid_targets_min": 3697
|
|
},
|
|
{
|
|
"epoch": 5.901771336553946,
|
|
"grad_norm": 0.5448493291259932,
|
|
"learning_rate": 2.9337835734754504e-06,
|
|
"loss": 0.1528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15106025338172913,
|
|
"step": 3665,
|
|
"valid_targets_mean": 6370.6,
|
|
"valid_targets_min": 3665
|
|
},
|
|
{
|
|
"epoch": 5.909822866344605,
|
|
"grad_norm": 0.4904247806452323,
|
|
"learning_rate": 2.892049216615724e-06,
|
|
"loss": 0.1669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17938336730003357,
|
|
"step": 3670,
|
|
"valid_targets_mean": 6119.8,
|
|
"valid_targets_min": 2561
|
|
},
|
|
{
|
|
"epoch": 5.917874396135265,
|
|
"grad_norm": 0.5176054978636475,
|
|
"learning_rate": 2.850590688467405e-06,
|
|
"loss": 0.1538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1513424813747406,
|
|
"step": 3675,
|
|
"valid_targets_mean": 5352.8,
|
|
"valid_targets_min": 3838
|
|
},
|
|
{
|
|
"epoch": 5.925925925925926,
|
|
"grad_norm": 0.5014302674357601,
|
|
"learning_rate": 2.8094086574595934e-06,
|
|
"loss": 0.1564,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16385960578918457,
|
|
"step": 3680,
|
|
"valid_targets_mean": 6064.1,
|
|
"valid_targets_min": 2800
|
|
},
|
|
{
|
|
"epoch": 5.933977455716586,
|
|
"grad_norm": 0.4897422253909219,
|
|
"learning_rate": 2.768503787563497e-06,
|
|
"loss": 0.165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14840051531791687,
|
|
"step": 3685,
|
|
"valid_targets_mean": 6725.1,
|
|
"valid_targets_min": 4014
|
|
},
|
|
{
|
|
"epoch": 5.942028985507246,
|
|
"grad_norm": 0.527797846005594,
|
|
"learning_rate": 2.7278767382816828e-06,
|
|
"loss": 0.1707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15732444822788239,
|
|
"step": 3690,
|
|
"valid_targets_mean": 5340.4,
|
|
"valid_targets_min": 3261
|
|
},
|
|
{
|
|
"epoch": 5.950080515297906,
|
|
"grad_norm": 0.4537750452077363,
|
|
"learning_rate": 2.687528164637474e-06,
|
|
"loss": 0.1532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15537717938423157,
|
|
"step": 3695,
|
|
"valid_targets_mean": 6194.7,
|
|
"valid_targets_min": 3579
|
|
},
|
|
{
|
|
"epoch": 5.958132045088567,
|
|
"grad_norm": 0.481130982772452,
|
|
"learning_rate": 2.647458717164357e-06,
|
|
"loss": 0.1524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1415160596370697,
|
|
"step": 3700,
|
|
"valid_targets_mean": 6527.8,
|
|
"valid_targets_min": 2379
|
|
},
|
|
{
|
|
"epoch": 5.966183574879227,
|
|
"grad_norm": 0.4971167453343456,
|
|
"learning_rate": 2.607669041895535e-06,
|
|
"loss": 0.1627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16012218594551086,
|
|
"step": 3705,
|
|
"valid_targets_mean": 6887.7,
|
|
"valid_targets_min": 3569
|
|
},
|
|
{
|
|
"epoch": 5.974235104669887,
|
|
"grad_norm": 0.47404245776286746,
|
|
"learning_rate": 2.568159780353476e-06,
|
|
"loss": 0.1448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.134698748588562,
|
|
"step": 3710,
|
|
"valid_targets_mean": 5398.8,
|
|
"valid_targets_min": 2580
|
|
},
|
|
{
|
|
"epoch": 5.982286634460547,
|
|
"grad_norm": 0.5394680793747297,
|
|
"learning_rate": 2.5289315695395834e-06,
|
|
"loss": 0.163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1926458775997162,
|
|
"step": 3715,
|
|
"valid_targets_mean": 6136.1,
|
|
"valid_targets_min": 2610
|
|
},
|
|
{
|
|
"epoch": 5.990338164251208,
|
|
"grad_norm": 0.5073583213947698,
|
|
"learning_rate": 2.489985041923928e-06,
|
|
"loss": 0.1542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1339704990386963,
|
|
"step": 3720,
|
|
"valid_targets_mean": 5957.4,
|
|
"valid_targets_min": 2562
|
|
},
|
|
{
|
|
"epoch": 5.998389694041868,
|
|
"grad_norm": 0.5506968197613669,
|
|
"learning_rate": 2.4513208254350486e-06,
|
|
"loss": 0.1552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1555670201778412,
|
|
"step": 3725,
|
|
"valid_targets_mean": 5860.3,
|
|
"valid_targets_min": 2833
|
|
},
|
|
{
|
|
"epoch": 6.006441223832528,
|
|
"grad_norm": 0.6724494776596883,
|
|
"learning_rate": 2.412939543449828e-06,
|
|
"loss": 0.2218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2651374936103821,
|
|
"step": 3730,
|
|
"valid_targets_mean": 5898.7,
|
|
"valid_targets_min": 2769
|
|
},
|
|
{
|
|
"epoch": 6.0144927536231885,
|
|
"grad_norm": 0.5836006033084224,
|
|
"learning_rate": 2.3748418147834394e-06,
|
|
"loss": 0.2381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2323194146156311,
|
|
"step": 3735,
|
|
"valid_targets_mean": 6068.6,
|
|
"valid_targets_min": 2285
|
|
},
|
|
{
|
|
"epoch": 6.022544283413849,
|
|
"grad_norm": 0.5642017551180925,
|
|
"learning_rate": 2.337028253679381e-06,
|
|
"loss": 0.2429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26346156001091003,
|
|
"step": 3740,
|
|
"valid_targets_mean": 6519.8,
|
|
"valid_targets_min": 3314
|
|
},
|
|
{
|
|
"epoch": 6.030595813204509,
|
|
"grad_norm": 0.7066259408147071,
|
|
"learning_rate": 2.299499469799542e-06,
|
|
"loss": 0.221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2269531935453415,
|
|
"step": 3745,
|
|
"valid_targets_mean": 6028.6,
|
|
"valid_targets_min": 2671
|
|
},
|
|
{
|
|
"epoch": 6.038647342995169,
|
|
"grad_norm": 0.5632095775983418,
|
|
"learning_rate": 2.262256068214421e-06,
|
|
"loss": 0.2446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2706316113471985,
|
|
"step": 3750,
|
|
"valid_targets_mean": 6266.9,
|
|
"valid_targets_min": 469
|
|
},
|
|
{
|
|
"epoch": 6.0466988727858295,
|
|
"grad_norm": 0.5348831735859538,
|
|
"learning_rate": 2.2252986493933237e-06,
|
|
"loss": 0.2318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21734777092933655,
|
|
"step": 3755,
|
|
"valid_targets_mean": 5759.1,
|
|
"valid_targets_min": 2568
|
|
},
|
|
{
|
|
"epoch": 6.05475040257649,
|
|
"grad_norm": 0.5569500892452331,
|
|
"learning_rate": 2.18862780919471e-06,
|
|
"loss": 0.2402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21391534805297852,
|
|
"step": 3760,
|
|
"valid_targets_mean": 5530.2,
|
|
"valid_targets_min": 611
|
|
},
|
|
{
|
|
"epoch": 6.06280193236715,
|
|
"grad_norm": 0.5382181428705697,
|
|
"learning_rate": 2.152244138856585e-06,
|
|
"loss": 0.2235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20388023555278778,
|
|
"step": 3765,
|
|
"valid_targets_mean": 5696.9,
|
|
"valid_targets_min": 2392
|
|
},
|
|
{
|
|
"epoch": 6.07085346215781,
|
|
"grad_norm": 0.6463190792326495,
|
|
"learning_rate": 2.1161482249869513e-06,
|
|
"loss": 0.2284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21541935205459595,
|
|
"step": 3770,
|
|
"valid_targets_mean": 5683.9,
|
|
"valid_targets_min": 1285
|
|
},
|
|
{
|
|
"epoch": 6.078904991948471,
|
|
"grad_norm": 0.4769019487796991,
|
|
"learning_rate": 2.080340649554369e-06,
|
|
"loss": 0.2226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1947912871837616,
|
|
"step": 3775,
|
|
"valid_targets_mean": 6331.4,
|
|
"valid_targets_min": 3356
|
|
},
|
|
{
|
|
"epoch": 6.086956521739131,
|
|
"grad_norm": 0.5260688671936042,
|
|
"learning_rate": 2.044821989878558e-06,
|
|
"loss": 0.2162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1972348690032959,
|
|
"step": 3780,
|
|
"valid_targets_mean": 5326.1,
|
|
"valid_targets_min": 1676
|
|
},
|
|
{
|
|
"epoch": 6.095008051529791,
|
|
"grad_norm": 0.5385684220997624,
|
|
"learning_rate": 2.0095928186210956e-06,
|
|
"loss": 0.2323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24432078003883362,
|
|
"step": 3785,
|
|
"valid_targets_mean": 6448.9,
|
|
"valid_targets_min": 2859
|
|
},
|
|
{
|
|
"epoch": 6.1030595813204505,
|
|
"grad_norm": 0.5134775076305466,
|
|
"learning_rate": 1.974653703776188e-06,
|
|
"loss": 0.2247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21769499778747559,
|
|
"step": 3790,
|
|
"valid_targets_mean": 5910.2,
|
|
"valid_targets_min": 1417
|
|
},
|
|
{
|
|
"epoch": 6.111111111111111,
|
|
"grad_norm": 0.500956162314828,
|
|
"learning_rate": 1.9400052086615153e-06,
|
|
"loss": 0.2424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26993608474731445,
|
|
"step": 3795,
|
|
"valid_targets_mean": 7141.1,
|
|
"valid_targets_min": 3571
|
|
},
|
|
{
|
|
"epoch": 6.119162640901771,
|
|
"grad_norm": 0.5451480445857494,
|
|
"learning_rate": 1.9056478919091236e-06,
|
|
"loss": 0.2211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23594680428504944,
|
|
"step": 3800,
|
|
"valid_targets_mean": 6492.9,
|
|
"valid_targets_min": 3446
|
|
},
|
|
{
|
|
"epoch": 6.127214170692431,
|
|
"grad_norm": 0.4970674059635784,
|
|
"learning_rate": 1.8715823074564587e-06,
|
|
"loss": 0.21,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18086868524551392,
|
|
"step": 3805,
|
|
"valid_targets_mean": 5447.6,
|
|
"valid_targets_min": 306
|
|
},
|
|
{
|
|
"epoch": 6.1352657004830915,
|
|
"grad_norm": 0.8407143099623481,
|
|
"learning_rate": 1.837809004537401e-06,
|
|
"loss": 0.278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31480419635772705,
|
|
"step": 3810,
|
|
"valid_targets_mean": 3657.8,
|
|
"valid_targets_min": 1050
|
|
},
|
|
{
|
|
"epoch": 6.143317230273752,
|
|
"grad_norm": 0.9619594700485488,
|
|
"learning_rate": 1.8043285276734334e-06,
|
|
"loss": 0.3421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3486429452896118,
|
|
"step": 3815,
|
|
"valid_targets_mean": 2673.1,
|
|
"valid_targets_min": 1309
|
|
},
|
|
{
|
|
"epoch": 6.151368760064412,
|
|
"grad_norm": 0.8500263557004369,
|
|
"learning_rate": 1.7711414166648365e-06,
|
|
"loss": 0.3224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3212417662143707,
|
|
"step": 3820,
|
|
"valid_targets_mean": 3209.7,
|
|
"valid_targets_min": 1155
|
|
},
|
|
{
|
|
"epoch": 6.159420289855072,
|
|
"grad_norm": 0.9189308209039743,
|
|
"learning_rate": 1.7382482065820138e-06,
|
|
"loss": 0.3419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35360682010650635,
|
|
"step": 3825,
|
|
"valid_targets_mean": 3054.8,
|
|
"valid_targets_min": 1099
|
|
},
|
|
{
|
|
"epoch": 6.1674718196457325,
|
|
"grad_norm": 0.843417341220929,
|
|
"learning_rate": 1.7056494277568503e-06,
|
|
"loss": 0.3048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28352683782577515,
|
|
"step": 3830,
|
|
"valid_targets_mean": 2978.8,
|
|
"valid_targets_min": 1175
|
|
},
|
|
{
|
|
"epoch": 6.175523349436393,
|
|
"grad_norm": 0.8163445646433484,
|
|
"learning_rate": 1.6733456057741592e-06,
|
|
"loss": 0.2972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2696455121040344,
|
|
"step": 3835,
|
|
"valid_targets_mean": 2849.6,
|
|
"valid_targets_min": 1295
|
|
},
|
|
{
|
|
"epoch": 6.183574879227053,
|
|
"grad_norm": 0.9881052112221266,
|
|
"learning_rate": 1.641337261463216e-06,
|
|
"loss": 0.3143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.332567423582077,
|
|
"step": 3840,
|
|
"valid_targets_mean": 2615.8,
|
|
"valid_targets_min": 1383
|
|
},
|
|
{
|
|
"epoch": 6.191626409017713,
|
|
"grad_norm": 0.8302563038314946,
|
|
"learning_rate": 1.6096249108893602e-06,
|
|
"loss": 0.3094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33189308643341064,
|
|
"step": 3845,
|
|
"valid_targets_mean": 3296.4,
|
|
"valid_targets_min": 1316
|
|
},
|
|
{
|
|
"epoch": 6.199677938808374,
|
|
"grad_norm": 0.8685755282410109,
|
|
"learning_rate": 1.5782090653456616e-06,
|
|
"loss": 0.3143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31094080209732056,
|
|
"step": 3850,
|
|
"valid_targets_mean": 3968.6,
|
|
"valid_targets_min": 1383
|
|
},
|
|
{
|
|
"epoch": 6.207729468599034,
|
|
"grad_norm": 0.9006410427832183,
|
|
"learning_rate": 1.547090231344699e-06,
|
|
"loss": 0.3111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37766462564468384,
|
|
"step": 3855,
|
|
"valid_targets_mean": 3078.5,
|
|
"valid_targets_min": 1174
|
|
},
|
|
{
|
|
"epoch": 6.215780998389694,
|
|
"grad_norm": 0.8145924540125793,
|
|
"learning_rate": 1.5162689106103746e-06,
|
|
"loss": 0.3082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.283829003572464,
|
|
"step": 3860,
|
|
"valid_targets_mean": 3417.5,
|
|
"valid_targets_min": 1252
|
|
},
|
|
{
|
|
"epoch": 6.223832528180354,
|
|
"grad_norm": 0.8259786070193814,
|
|
"learning_rate": 1.4857456000698366e-06,
|
|
"loss": 0.3045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3424111008644104,
|
|
"step": 3865,
|
|
"valid_targets_mean": 3427.6,
|
|
"valid_targets_min": 1218
|
|
},
|
|
{
|
|
"epoch": 6.231884057971015,
|
|
"grad_norm": 0.9647531902604716,
|
|
"learning_rate": 1.4555207918454662e-06,
|
|
"loss": 0.3322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37204068899154663,
|
|
"step": 3870,
|
|
"valid_targets_mean": 2875.2,
|
|
"valid_targets_min": 1123
|
|
},
|
|
{
|
|
"epoch": 6.239935587761675,
|
|
"grad_norm": 0.878124106513052,
|
|
"learning_rate": 1.4255949732469309e-06,
|
|
"loss": 0.3147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.326835960149765,
|
|
"step": 3875,
|
|
"valid_targets_mean": 2933.1,
|
|
"valid_targets_min": 1485
|
|
},
|
|
{
|
|
"epoch": 6.247987117552335,
|
|
"grad_norm": 0.9256224608559495,
|
|
"learning_rate": 1.3959686267633488e-06,
|
|
"loss": 0.3116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32315731048583984,
|
|
"step": 3880,
|
|
"valid_targets_mean": 2699.5,
|
|
"valid_targets_min": 1170
|
|
},
|
|
{
|
|
"epoch": 6.256038647342995,
|
|
"grad_norm": 0.7806676732625024,
|
|
"learning_rate": 1.3666422300554905e-06,
|
|
"loss": 0.3044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2884427607059479,
|
|
"step": 3885,
|
|
"valid_targets_mean": 3932.2,
|
|
"valid_targets_min": 1108
|
|
},
|
|
{
|
|
"epoch": 6.264090177133656,
|
|
"grad_norm": 0.8683190375235182,
|
|
"learning_rate": 1.3376162559480822e-06,
|
|
"loss": 0.3071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3049579858779907,
|
|
"step": 3890,
|
|
"valid_targets_mean": 2747.4,
|
|
"valid_targets_min": 1205
|
|
},
|
|
{
|
|
"epoch": 6.272141706924316,
|
|
"grad_norm": 0.8710255989708383,
|
|
"learning_rate": 1.308891172422193e-06,
|
|
"loss": 0.3008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33199816942214966,
|
|
"step": 3895,
|
|
"valid_targets_mean": 3029.8,
|
|
"valid_targets_min": 1355
|
|
},
|
|
{
|
|
"epoch": 6.280193236714976,
|
|
"grad_norm": 0.8477552990968392,
|
|
"learning_rate": 1.2804674426076757e-06,
|
|
"loss": 0.3069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.325872540473938,
|
|
"step": 3900,
|
|
"valid_targets_mean": 3227.2,
|
|
"valid_targets_min": 1427
|
|
},
|
|
{
|
|
"epoch": 6.2882447665056365,
|
|
"grad_norm": 0.7965348453218628,
|
|
"learning_rate": 1.2523455247757088e-06,
|
|
"loss": 0.3048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30131930112838745,
|
|
"step": 3905,
|
|
"valid_targets_mean": 3224.4,
|
|
"valid_targets_min": 1045
|
|
},
|
|
{
|
|
"epoch": 6.296296296296296,
|
|
"grad_norm": 0.7395568722630161,
|
|
"learning_rate": 1.224525872331408e-06,
|
|
"loss": 0.3113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3066796660423279,
|
|
"step": 3910,
|
|
"valid_targets_mean": 4021.2,
|
|
"valid_targets_min": 1118
|
|
},
|
|
{
|
|
"epoch": 6.304347826086957,
|
|
"grad_norm": 0.8162687764572345,
|
|
"learning_rate": 1.1970089338065071e-06,
|
|
"loss": 0.3041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2592276334762573,
|
|
"step": 3915,
|
|
"valid_targets_mean": 3145.9,
|
|
"valid_targets_min": 977
|
|
},
|
|
{
|
|
"epoch": 6.312399355877616,
|
|
"grad_norm": 0.8944188897503945,
|
|
"learning_rate": 1.1697951528521422e-06,
|
|
"loss": 0.2928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3102479875087738,
|
|
"step": 3920,
|
|
"valid_targets_mean": 2583.8,
|
|
"valid_targets_min": 1167
|
|
},
|
|
{
|
|
"epoch": 6.320450885668277,
|
|
"grad_norm": 0.8873873245958489,
|
|
"learning_rate": 1.1428849682316766e-06,
|
|
"loss": 0.3162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2901550829410553,
|
|
"step": 3925,
|
|
"valid_targets_mean": 3109.6,
|
|
"valid_targets_min": 1268
|
|
},
|
|
{
|
|
"epoch": 6.328502415458937,
|
|
"grad_norm": 0.8886136259904959,
|
|
"learning_rate": 1.116278813813647e-06,
|
|
"loss": 0.3118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2849825918674469,
|
|
"step": 3930,
|
|
"valid_targets_mean": 2516.6,
|
|
"valid_targets_min": 1159
|
|
},
|
|
{
|
|
"epoch": 6.336553945249597,
|
|
"grad_norm": 0.8340658998428512,
|
|
"learning_rate": 1.08997711856476e-06,
|
|
"loss": 0.3062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31567925214767456,
|
|
"step": 3935,
|
|
"valid_targets_mean": 4016.8,
|
|
"valid_targets_min": 1642
|
|
},
|
|
{
|
|
"epoch": 6.344605475040257,
|
|
"grad_norm": 0.8397362085202228,
|
|
"learning_rate": 1.0639803065429755e-06,
|
|
"loss": 0.3254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3202289938926697,
|
|
"step": 3940,
|
|
"valid_targets_mean": 3258.7,
|
|
"valid_targets_min": 1158
|
|
},
|
|
{
|
|
"epoch": 6.352657004830918,
|
|
"grad_norm": 0.8392706087036921,
|
|
"learning_rate": 1.0382887968906718e-06,
|
|
"loss": 0.3012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3209483027458191,
|
|
"step": 3945,
|
|
"valid_targets_mean": 3767.5,
|
|
"valid_targets_min": 1430
|
|
},
|
|
{
|
|
"epoch": 6.360708534621578,
|
|
"grad_norm": 0.8201489913983286,
|
|
"learning_rate": 1.012903003827883e-06,
|
|
"loss": 0.3229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3404323160648346,
|
|
"step": 3950,
|
|
"valid_targets_mean": 3205.4,
|
|
"valid_targets_min": 977
|
|
},
|
|
{
|
|
"epoch": 6.368760064412238,
|
|
"grad_norm": 0.811177676991476,
|
|
"learning_rate": 9.87823336645628e-07,
|
|
"loss": 0.2924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27225881814956665,
|
|
"step": 3955,
|
|
"valid_targets_mean": 3383.2,
|
|
"valid_targets_min": 1706
|
|
},
|
|
{
|
|
"epoch": 6.3768115942028984,
|
|
"grad_norm": 0.858309663982121,
|
|
"learning_rate": 9.630501996993091e-07,
|
|
"loss": 0.2884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2884863018989563,
|
|
"step": 3960,
|
|
"valid_targets_mean": 2897.8,
|
|
"valid_targets_min": 1102
|
|
},
|
|
{
|
|
"epoch": 6.384863123993559,
|
|
"grad_norm": 0.9879313892597844,
|
|
"learning_rate": 9.385839924021844e-07,
|
|
"loss": 0.2884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3102876842021942,
|
|
"step": 3965,
|
|
"valid_targets_mean": 2376.7,
|
|
"valid_targets_min": 1197
|
|
},
|
|
{
|
|
"epoch": 6.392914653784219,
|
|
"grad_norm": 0.8341622182276683,
|
|
"learning_rate": 9.144251092189416e-07,
|
|
"loss": 0.2908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29059672355651855,
|
|
"step": 3970,
|
|
"valid_targets_mean": 3869.4,
|
|
"valid_targets_min": 1445
|
|
},
|
|
{
|
|
"epoch": 6.400966183574879,
|
|
"grad_norm": 0.6305412459076566,
|
|
"learning_rate": 8.905739396593316e-07,
|
|
"loss": 0.2888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2593904137611389,
|
|
"step": 3975,
|
|
"valid_targets_mean": 4852.6,
|
|
"valid_targets_min": 1536
|
|
},
|
|
{
|
|
"epoch": 6.4090177133655395,
|
|
"grad_norm": 0.8415630056656381,
|
|
"learning_rate": 8.670308682718853e-07,
|
|
"loss": 0.2827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30260005593299866,
|
|
"step": 3980,
|
|
"valid_targets_mean": 3675.9,
|
|
"valid_targets_min": 1260
|
|
},
|
|
{
|
|
"epoch": 6.4170692431562,
|
|
"grad_norm": 0.7289917763669883,
|
|
"learning_rate": 8.437962746377204e-07,
|
|
"loss": 0.284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25300300121307373,
|
|
"step": 3985,
|
|
"valid_targets_mean": 3460.8,
|
|
"valid_targets_min": 406
|
|
},
|
|
{
|
|
"epoch": 6.42512077294686,
|
|
"grad_norm": 0.8183662125493177,
|
|
"learning_rate": 8.208705333644129e-07,
|
|
"loss": 0.2618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27502337098121643,
|
|
"step": 3990,
|
|
"valid_targets_mean": 4104.0,
|
|
"valid_targets_min": 1569
|
|
},
|
|
{
|
|
"epoch": 6.43317230273752,
|
|
"grad_norm": 0.8025243264792566,
|
|
"learning_rate": 7.982540140799688e-07,
|
|
"loss": 0.2676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2634972333908081,
|
|
"step": 3995,
|
|
"valid_targets_mean": 3000.4,
|
|
"valid_targets_min": 1169
|
|
},
|
|
{
|
|
"epoch": 6.4412238325281805,
|
|
"grad_norm": 0.8460006631505339,
|
|
"learning_rate": 7.759470814268489e-07,
|
|
"loss": 0.2736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3205861449241638,
|
|
"step": 4000,
|
|
"valid_targets_mean": 3155.8,
|
|
"valid_targets_min": 1407
|
|
},
|
|
{
|
|
"epoch": 6.449275362318841,
|
|
"grad_norm": 0.8293418081000622,
|
|
"learning_rate": 7.539500950561063e-07,
|
|
"loss": 0.2704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.261262983083725,
|
|
"step": 4005,
|
|
"valid_targets_mean": 2876.1,
|
|
"valid_targets_min": 1097
|
|
},
|
|
{
|
|
"epoch": 6.457326892109501,
|
|
"grad_norm": 0.8476655987165361,
|
|
"learning_rate": 7.322634096215831e-07,
|
|
"loss": 0.2798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2980925440788269,
|
|
"step": 4010,
|
|
"valid_targets_mean": 3099.4,
|
|
"valid_targets_min": 1258
|
|
},
|
|
{
|
|
"epoch": 6.465378421900161,
|
|
"grad_norm": 0.8876078091742649,
|
|
"learning_rate": 7.108873747741807e-07,
|
|
"loss": 0.267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28537583351135254,
|
|
"step": 4015,
|
|
"valid_targets_mean": 2574.1,
|
|
"valid_targets_min": 1351
|
|
},
|
|
{
|
|
"epoch": 6.473429951690822,
|
|
"grad_norm": 0.8957582444852635,
|
|
"learning_rate": 6.898223351562405e-07,
|
|
"loss": 0.2985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32109585404396057,
|
|
"step": 4020,
|
|
"valid_targets_mean": 3223.3,
|
|
"valid_targets_min": 1314
|
|
},
|
|
{
|
|
"epoch": 6.481481481481482,
|
|
"grad_norm": 0.764077364410412,
|
|
"learning_rate": 6.690686303959748e-07,
|
|
"loss": 0.2577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2634509801864624,
|
|
"step": 4025,
|
|
"valid_targets_mean": 3132.0,
|
|
"valid_targets_min": 1272
|
|
},
|
|
{
|
|
"epoch": 6.489533011272142,
|
|
"grad_norm": 0.7443505087806759,
|
|
"learning_rate": 6.48626595101991e-07,
|
|
"loss": 0.2828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2569301724433899,
|
|
"step": 4030,
|
|
"valid_targets_mean": 3755.3,
|
|
"valid_targets_min": 1230
|
|
},
|
|
{
|
|
"epoch": 6.4975845410628015,
|
|
"grad_norm": 0.7930493772415029,
|
|
"learning_rate": 6.284965588579028e-07,
|
|
"loss": 0.2632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24235209822654724,
|
|
"step": 4035,
|
|
"valid_targets_mean": 3029.9,
|
|
"valid_targets_min": 1590
|
|
},
|
|
{
|
|
"epoch": 6.505636070853463,
|
|
"grad_norm": 0.7138369856169103,
|
|
"learning_rate": 6.08678846217019e-07,
|
|
"loss": 0.2706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27324530482292175,
|
|
"step": 4040,
|
|
"valid_targets_mean": 3930.0,
|
|
"valid_targets_min": 1734
|
|
},
|
|
{
|
|
"epoch": 6.513687600644122,
|
|
"grad_norm": 0.7841606768019419,
|
|
"learning_rate": 5.891737766970984e-07,
|
|
"loss": 0.2786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32244470715522766,
|
|
"step": 4045,
|
|
"valid_targets_mean": 3632.3,
|
|
"valid_targets_min": 1021
|
|
},
|
|
{
|
|
"epoch": 6.521739130434782,
|
|
"grad_norm": 0.761125195078748,
|
|
"learning_rate": 5.699816647752077e-07,
|
|
"loss": 0.2805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2867166996002197,
|
|
"step": 4050,
|
|
"valid_targets_mean": 3690.8,
|
|
"valid_targets_min": 1136
|
|
},
|
|
{
|
|
"epoch": 6.5297906602254425,
|
|
"grad_norm": 0.9595718427005088,
|
|
"learning_rate": 5.511028198826496e-07,
|
|
"loss": 0.2634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2752280831336975,
|
|
"step": 4055,
|
|
"valid_targets_mean": 2800.6,
|
|
"valid_targets_min": 485
|
|
},
|
|
{
|
|
"epoch": 6.537842190016103,
|
|
"grad_norm": 0.7724065995439069,
|
|
"learning_rate": 5.32537546399976e-07,
|
|
"loss": 0.2732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3199930787086487,
|
|
"step": 4060,
|
|
"valid_targets_mean": 3744.1,
|
|
"valid_targets_min": 1267
|
|
},
|
|
{
|
|
"epoch": 6.545893719806763,
|
|
"grad_norm": 0.7955896348465847,
|
|
"learning_rate": 5.142861436520763e-07,
|
|
"loss": 0.286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26323774456977844,
|
|
"step": 4065,
|
|
"valid_targets_mean": 3365.1,
|
|
"valid_targets_min": 1014
|
|
},
|
|
{
|
|
"epoch": 6.553945249597423,
|
|
"grad_norm": 0.7123095433809674,
|
|
"learning_rate": 4.963489059033477e-07,
|
|
"loss": 0.2572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2584189176559448,
|
|
"step": 4070,
|
|
"valid_targets_mean": 3473.1,
|
|
"valid_targets_min": 1657
|
|
},
|
|
{
|
|
"epoch": 6.561996779388084,
|
|
"grad_norm": 0.8106421482681555,
|
|
"learning_rate": 4.787261223529616e-07,
|
|
"loss": 0.2639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26411914825439453,
|
|
"step": 4075,
|
|
"valid_targets_mean": 2840.8,
|
|
"valid_targets_min": 1494
|
|
},
|
|
{
|
|
"epoch": 6.570048309178744,
|
|
"grad_norm": 0.7944422290785246,
|
|
"learning_rate": 4.6141807713019793e-07,
|
|
"loss": 0.2772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28050708770751953,
|
|
"step": 4080,
|
|
"valid_targets_mean": 2815.1,
|
|
"valid_targets_min": 1208
|
|
},
|
|
{
|
|
"epoch": 6.578099838969404,
|
|
"grad_norm": 0.7829124652037258,
|
|
"learning_rate": 4.444250492898539e-07,
|
|
"loss": 0.2555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2613917589187622,
|
|
"step": 4085,
|
|
"valid_targets_mean": 3228.2,
|
|
"valid_targets_min": 1394
|
|
},
|
|
{
|
|
"epoch": 6.586151368760064,
|
|
"grad_norm": 0.785576038884685,
|
|
"learning_rate": 4.277473128077625e-07,
|
|
"loss": 0.2686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2885136008262634,
|
|
"step": 4090,
|
|
"valid_targets_mean": 3174.3,
|
|
"valid_targets_min": 1240
|
|
},
|
|
{
|
|
"epoch": 6.594202898550725,
|
|
"grad_norm": 0.7531252267510662,
|
|
"learning_rate": 4.113851365763544e-07,
|
|
"loss": 0.2859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2605210542678833,
|
|
"step": 4095,
|
|
"valid_targets_mean": 3438.4,
|
|
"valid_targets_min": 1124
|
|
},
|
|
{
|
|
"epoch": 6.602254428341385,
|
|
"grad_norm": 0.8483462893898233,
|
|
"learning_rate": 3.953387844003431e-07,
|
|
"loss": 0.2543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24956530332565308,
|
|
"step": 4100,
|
|
"valid_targets_mean": 2587.6,
|
|
"valid_targets_min": 1218
|
|
},
|
|
{
|
|
"epoch": 6.610305958132045,
|
|
"grad_norm": 0.8250120431362822,
|
|
"learning_rate": 3.7960851499245554e-07,
|
|
"loss": 0.2732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2679558992385864,
|
|
"step": 4105,
|
|
"valid_targets_mean": 2969.8,
|
|
"valid_targets_min": 1309
|
|
},
|
|
{
|
|
"epoch": 6.618357487922705,
|
|
"grad_norm": 0.9900756494723584,
|
|
"learning_rate": 3.6419458196926825e-07,
|
|
"loss": 0.2638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.276964396238327,
|
|
"step": 4110,
|
|
"valid_targets_mean": 3475.1,
|
|
"valid_targets_min": 1128
|
|
},
|
|
{
|
|
"epoch": 6.626409017713366,
|
|
"grad_norm": 0.7872199755262294,
|
|
"learning_rate": 3.4909723384712436e-07,
|
|
"loss": 0.2567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2374078333377838,
|
|
"step": 4115,
|
|
"valid_targets_mean": 2925.9,
|
|
"valid_targets_min": 1311
|
|
},
|
|
{
|
|
"epoch": 6.634460547504026,
|
|
"grad_norm": 0.7593732626595731,
|
|
"learning_rate": 3.3431671403811207e-07,
|
|
"loss": 0.2516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2528577148914337,
|
|
"step": 4120,
|
|
"valid_targets_mean": 3369.0,
|
|
"valid_targets_min": 1109
|
|
},
|
|
{
|
|
"epoch": 6.642512077294686,
|
|
"grad_norm": 0.7712701520481997,
|
|
"learning_rate": 3.198532608461524e-07,
|
|
"loss": 0.2814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27561864256858826,
|
|
"step": 4125,
|
|
"valid_targets_mean": 3307.7,
|
|
"valid_targets_min": 1172
|
|
},
|
|
{
|
|
"epoch": 6.650563607085346,
|
|
"grad_norm": 0.8293254928497902,
|
|
"learning_rate": 3.0570710746314903e-07,
|
|
"loss": 0.2668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2669210433959961,
|
|
"step": 4130,
|
|
"valid_targets_mean": 2625.4,
|
|
"valid_targets_min": 1313
|
|
},
|
|
{
|
|
"epoch": 6.658615136876007,
|
|
"grad_norm": 0.9102984688640577,
|
|
"learning_rate": 2.9187848196524205e-07,
|
|
"loss": 0.2646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29154983162879944,
|
|
"step": 4135,
|
|
"valid_targets_mean": 3028.0,
|
|
"valid_targets_min": 1602
|
|
},
|
|
{
|
|
"epoch": 6.666666666666667,
|
|
"grad_norm": 0.7950386744860756,
|
|
"learning_rate": 2.7836760730910464e-07,
|
|
"loss": 0.2699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2758431136608124,
|
|
"step": 4140,
|
|
"valid_targets_mean": 3680.5,
|
|
"valid_targets_min": 1441
|
|
},
|
|
{
|
|
"epoch": 6.674718196457327,
|
|
"grad_norm": 0.77773549922398,
|
|
"learning_rate": 2.6517470132838117e-07,
|
|
"loss": 0.2621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2587966322898865,
|
|
"step": 4145,
|
|
"valid_targets_mean": 3472.8,
|
|
"valid_targets_min": 1029
|
|
},
|
|
{
|
|
"epoch": 6.6827697262479875,
|
|
"grad_norm": 0.8232297238523694,
|
|
"learning_rate": 2.522999767301482e-07,
|
|
"loss": 0.2689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.251497745513916,
|
|
"step": 4150,
|
|
"valid_targets_mean": 3390.6,
|
|
"valid_targets_min": 1873
|
|
},
|
|
{
|
|
"epoch": 6.690821256038648,
|
|
"grad_norm": 0.8278987027552264,
|
|
"learning_rate": 2.3974364109149886e-07,
|
|
"loss": 0.2923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2831830382347107,
|
|
"step": 4155,
|
|
"valid_targets_mean": 3282.8,
|
|
"valid_targets_min": 588
|
|
},
|
|
{
|
|
"epoch": 6.698872785829307,
|
|
"grad_norm": 0.7184574441839767,
|
|
"learning_rate": 2.2750589685619495e-07,
|
|
"loss": 0.2751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29470086097717285,
|
|
"step": 4160,
|
|
"valid_targets_mean": 4544.4,
|
|
"valid_targets_min": 1616
|
|
},
|
|
{
|
|
"epoch": 6.706924315619968,
|
|
"grad_norm": 0.6991872186724033,
|
|
"learning_rate": 2.1558694133139823e-07,
|
|
"loss": 0.2565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2618224620819092,
|
|
"step": 4165,
|
|
"valid_targets_mean": 3384.4,
|
|
"valid_targets_min": 1236
|
|
},
|
|
{
|
|
"epoch": 6.714975845410628,
|
|
"grad_norm": 0.6730952845085181,
|
|
"learning_rate": 2.039869666844929e-07,
|
|
"loss": 0.2772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2868538200855255,
|
|
"step": 4170,
|
|
"valid_targets_mean": 4230.6,
|
|
"valid_targets_min": 1229
|
|
},
|
|
{
|
|
"epoch": 6.723027375201288,
|
|
"grad_norm": 0.8142353289352747,
|
|
"learning_rate": 1.9270615993998375e-07,
|
|
"loss": 0.2628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27020856738090515,
|
|
"step": 4175,
|
|
"valid_targets_mean": 3359.7,
|
|
"valid_targets_min": 1076
|
|
},
|
|
{
|
|
"epoch": 6.731078904991948,
|
|
"grad_norm": 0.6894366977045292,
|
|
"learning_rate": 1.817447029764874e-07,
|
|
"loss": 0.2735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2708439230918884,
|
|
"step": 4180,
|
|
"valid_targets_mean": 4327.3,
|
|
"valid_targets_min": 1075
|
|
},
|
|
{
|
|
"epoch": 6.739130434782608,
|
|
"grad_norm": 0.6974543175965264,
|
|
"learning_rate": 1.7110277252379238e-07,
|
|
"loss": 0.2652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23700222373008728,
|
|
"step": 4185,
|
|
"valid_targets_mean": 3812.0,
|
|
"valid_targets_min": 1142
|
|
},
|
|
{
|
|
"epoch": 6.747181964573269,
|
|
"grad_norm": 0.902579622282395,
|
|
"learning_rate": 1.607805401600149e-07,
|
|
"loss": 0.2777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28798216581344604,
|
|
"step": 4190,
|
|
"valid_targets_mean": 2473.6,
|
|
"valid_targets_min": 1208
|
|
},
|
|
{
|
|
"epoch": 6.755233494363929,
|
|
"grad_norm": 1.1198738028974389,
|
|
"learning_rate": 1.5077817230883419e-07,
|
|
"loss": 0.2306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24988670647144318,
|
|
"step": 4195,
|
|
"valid_targets_mean": 7490.2,
|
|
"valid_targets_min": 2721
|
|
},
|
|
{
|
|
"epoch": 6.763285024154589,
|
|
"grad_norm": 0.6445241702724612,
|
|
"learning_rate": 1.4109583023679706e-07,
|
|
"loss": 0.1741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19134119153022766,
|
|
"step": 4200,
|
|
"valid_targets_mean": 6344.0,
|
|
"valid_targets_min": 3073
|
|
},
|
|
{
|
|
"epoch": 6.7713365539452495,
|
|
"grad_norm": 0.6558044755460182,
|
|
"learning_rate": 1.3173367005073545e-07,
|
|
"loss": 0.1521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14993247389793396,
|
|
"step": 4205,
|
|
"valid_targets_mean": 5062.1,
|
|
"valid_targets_min": 2985
|
|
},
|
|
{
|
|
"epoch": 6.77938808373591,
|
|
"grad_norm": 0.6428349765980677,
|
|
"learning_rate": 1.2269184269523282e-07,
|
|
"loss": 0.181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19283941388130188,
|
|
"step": 4210,
|
|
"valid_targets_mean": 5613.6,
|
|
"valid_targets_min": 2620
|
|
},
|
|
{
|
|
"epoch": 6.78743961352657,
|
|
"grad_norm": 0.6404100414290556,
|
|
"learning_rate": 1.1397049395020842e-07,
|
|
"loss": 0.1639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16561368107795715,
|
|
"step": 4215,
|
|
"valid_targets_mean": 6100.1,
|
|
"valid_targets_min": 3494
|
|
},
|
|
{
|
|
"epoch": 6.79549114331723,
|
|
"grad_norm": 0.5799011128340407,
|
|
"learning_rate": 1.0556976442854805e-07,
|
|
"loss": 0.1534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1338614821434021,
|
|
"step": 4220,
|
|
"valid_targets_mean": 5711.6,
|
|
"valid_targets_min": 3761
|
|
},
|
|
{
|
|
"epoch": 6.8035426731078905,
|
|
"grad_norm": 0.5831529495090866,
|
|
"learning_rate": 9.748978957385025e-08,
|
|
"loss": 0.158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16792958974838257,
|
|
"step": 4225,
|
|
"valid_targets_mean": 5863.1,
|
|
"valid_targets_min": 3178
|
|
},
|
|
{
|
|
"epoch": 6.811594202898551,
|
|
"grad_norm": 0.5618368648500537,
|
|
"learning_rate": 8.9730699658237e-08,
|
|
"loss": 0.1807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15672558546066284,
|
|
"step": 4230,
|
|
"valid_targets_mean": 6090.8,
|
|
"valid_targets_min": 2870
|
|
},
|
|
{
|
|
"epoch": 6.819645732689211,
|
|
"grad_norm": 0.5753023449670881,
|
|
"learning_rate": 8.229261978025316e-08,
|
|
"loss": 0.179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14445531368255615,
|
|
"step": 4235,
|
|
"valid_targets_mean": 5746.5,
|
|
"valid_targets_min": 3334
|
|
},
|
|
{
|
|
"epoch": 6.827697262479871,
|
|
"grad_norm": 0.6046612188056499,
|
|
"learning_rate": 7.517566986285474e-08,
|
|
"loss": 0.1688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16920645534992218,
|
|
"step": 4240,
|
|
"valid_targets_mean": 5464.9,
|
|
"valid_targets_min": 3204
|
|
},
|
|
{
|
|
"epoch": 6.835748792270532,
|
|
"grad_norm": 0.566160926164469,
|
|
"learning_rate": 6.837996465146823e-08,
|
|
"loss": 0.1581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1559247374534607,
|
|
"step": 4245,
|
|
"valid_targets_mean": 5526.6,
|
|
"valid_targets_min": 2793
|
|
},
|
|
{
|
|
"epoch": 6.843800322061192,
|
|
"grad_norm": 0.5934842203255924,
|
|
"learning_rate": 6.190561371214321e-08,
|
|
"loss": 0.1722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17836138606071472,
|
|
"step": 4250,
|
|
"valid_targets_mean": 6074.9,
|
|
"valid_targets_min": 3285
|
|
},
|
|
{
|
|
"epoch": 6.851851851851852,
|
|
"grad_norm": 0.5989111745239429,
|
|
"learning_rate": 5.575272142978927e-08,
|
|
"loss": 0.1479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14026236534118652,
|
|
"step": 4255,
|
|
"valid_targets_mean": 5260.6,
|
|
"valid_targets_min": 2292
|
|
},
|
|
{
|
|
"epoch": 6.859903381642512,
|
|
"grad_norm": 0.5852529999276449,
|
|
"learning_rate": 4.992138700649074e-08,
|
|
"loss": 0.163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16409310698509216,
|
|
"step": 4260,
|
|
"valid_targets_mean": 5752.9,
|
|
"valid_targets_min": 4204
|
|
},
|
|
{
|
|
"epoch": 6.867954911433173,
|
|
"grad_norm": 0.6115217698676657,
|
|
"learning_rate": 4.4411704459903506e-08,
|
|
"loss": 0.1653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1484546661376953,
|
|
"step": 4265,
|
|
"valid_targets_mean": 4836.7,
|
|
"valid_targets_min": 2111
|
|
},
|
|
{
|
|
"epoch": 6.876006441223833,
|
|
"grad_norm": 0.6316569084859304,
|
|
"learning_rate": 3.92237626217451e-08,
|
|
"loss": 0.1577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18417194485664368,
|
|
"step": 4270,
|
|
"valid_targets_mean": 6021.0,
|
|
"valid_targets_min": 4439
|
|
},
|
|
{
|
|
"epoch": 6.884057971014493,
|
|
"grad_norm": 0.5659170506134107,
|
|
"learning_rate": 3.435764513635809e-08,
|
|
"loss": 0.1585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16125309467315674,
|
|
"step": 4275,
|
|
"valid_targets_mean": 5661.9,
|
|
"valid_targets_min": 3607
|
|
},
|
|
{
|
|
"epoch": 6.892109500805153,
|
|
"grad_norm": 0.6183161652818728,
|
|
"learning_rate": 2.9813430459364465e-08,
|
|
"loss": 0.1506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16375675797462463,
|
|
"step": 4280,
|
|
"valid_targets_mean": 5420.4,
|
|
"valid_targets_min": 3078
|
|
},
|
|
{
|
|
"epoch": 6.900161030595813,
|
|
"grad_norm": 0.598243453584514,
|
|
"learning_rate": 2.5591191856397802e-08,
|
|
"loss": 0.1538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15643054246902466,
|
|
"step": 4285,
|
|
"valid_targets_mean": 5750.4,
|
|
"valid_targets_min": 2491
|
|
},
|
|
{
|
|
"epoch": 6.908212560386474,
|
|
"grad_norm": 0.5759364478429377,
|
|
"learning_rate": 2.1690997401928593e-08,
|
|
"loss": 0.1605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14383944869041443,
|
|
"step": 4290,
|
|
"valid_targets_mean": 5996.6,
|
|
"valid_targets_min": 4004
|
|
},
|
|
{
|
|
"epoch": 6.916264090177133,
|
|
"grad_norm": 0.5552887684264204,
|
|
"learning_rate": 1.811290997815851e-08,
|
|
"loss": 0.1586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14651936292648315,
|
|
"step": 4295,
|
|
"valid_targets_mean": 6200.9,
|
|
"valid_targets_min": 3148
|
|
},
|
|
{
|
|
"epoch": 6.9243156199677935,
|
|
"grad_norm": 0.49927201422387957,
|
|
"learning_rate": 1.485698727400564e-08,
|
|
"loss": 0.1534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13894528150558472,
|
|
"step": 4300,
|
|
"valid_targets_mean": 6472.3,
|
|
"valid_targets_min": 3581
|
|
},
|
|
{
|
|
"epoch": 6.932367149758454,
|
|
"grad_norm": 0.6237894773842607,
|
|
"learning_rate": 1.1923281784185226e-08,
|
|
"loss": 0.1677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22623667120933533,
|
|
"step": 4305,
|
|
"valid_targets_mean": 7532.8,
|
|
"valid_targets_min": 3206
|
|
},
|
|
{
|
|
"epoch": 6.940418679549114,
|
|
"grad_norm": 0.5964161951339775,
|
|
"learning_rate": 9.311840808357009e-09,
|
|
"loss": 0.1687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1940787136554718,
|
|
"step": 4310,
|
|
"valid_targets_mean": 5952.9,
|
|
"valid_targets_min": 3304
|
|
},
|
|
{
|
|
"epoch": 6.948470209339774,
|
|
"grad_norm": 0.5723446720622757,
|
|
"learning_rate": 7.022706450354744e-09,
|
|
"loss": 0.1532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15612125396728516,
|
|
"step": 4315,
|
|
"valid_targets_mean": 5735.1,
|
|
"valid_targets_min": 3368
|
|
},
|
|
{
|
|
"epoch": 6.956521739130435,
|
|
"grad_norm": 0.6247370179630106,
|
|
"learning_rate": 5.055915617522278e-09,
|
|
"loss": 0.1553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16278734803199768,
|
|
"step": 4320,
|
|
"valid_targets_mean": 5506.7,
|
|
"valid_targets_min": 2908
|
|
},
|
|
{
|
|
"epoch": 6.964573268921095,
|
|
"grad_norm": 0.6002549871414108,
|
|
"learning_rate": 3.411500020109593e-09,
|
|
"loss": 0.1588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16616703569889069,
|
|
"step": 4325,
|
|
"valid_targets_mean": 5777.9,
|
|
"valid_targets_min": 3451
|
|
},
|
|
{
|
|
"epoch": 6.972624798711755,
|
|
"grad_norm": 0.4927861563779905,
|
|
"learning_rate": 2.08948617075988e-09,
|
|
"loss": 0.1497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13573740422725677,
|
|
"step": 4330,
|
|
"valid_targets_mean": 6045.9,
|
|
"valid_targets_min": 2967
|
|
},
|
|
{
|
|
"epoch": 6.980676328502415,
|
|
"grad_norm": 0.5443175862615581,
|
|
"learning_rate": 1.0898953840898786e-09,
|
|
"loss": 0.151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1544107049703598,
|
|
"step": 4335,
|
|
"valid_targets_mean": 6113.4,
|
|
"valid_targets_min": 3912
|
|
},
|
|
{
|
|
"epoch": 6.988727858293076,
|
|
"grad_norm": 0.5346570746956858,
|
|
"learning_rate": 4.127437763390418e-10,
|
|
"loss": 0.1656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1397796869277954,
|
|
"step": 4340,
|
|
"valid_targets_mean": 5837.0,
|
|
"valid_targets_min": 3528
|
|
},
|
|
{
|
|
"epoch": 6.996779388083736,
|
|
"grad_norm": 0.5722567869143517,
|
|
"learning_rate": 5.804226511196831e-11,
|
|
"loss": 0.1502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16464683413505554,
|
|
"step": 4345,
|
|
"valid_targets_mean": 6356.6,
|
|
"valid_targets_min": 3725
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18215620517730713,
|
|
"step": 4347,
|
|
"total_flos": 1416797650550784.0,
|
|
"train_loss": 0.3156141554908915,
|
|
"train_runtime": 23294.0349,
|
|
"train_samples_per_second": 2.983,
|
|
"train_steps_per_second": 0.187,
|
|
"valid_targets_mean": 6658.9,
|
|
"valid_targets_min": 2494
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 4347,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 1500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1416797650550784.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|