Model: laion/exp-syh-r2egym-askllm-hardened_glm_4_7_traces_jupiter Source: Original Platform
9519 lines
264 KiB
JSON
9519 lines
264 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 4305,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.008136696501220505,
|
|
"grad_norm": 16.090431037903322,
|
|
"learning_rate": 3.7122969837587006e-07,
|
|
"loss": 0.7715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2875712513923645,
|
|
"step": 5,
|
|
"valid_targets_mean": 4549.8,
|
|
"valid_targets_min": 252
|
|
},
|
|
{
|
|
"epoch": 0.01627339300244101,
|
|
"grad_norm": 13.600731709495204,
|
|
"learning_rate": 8.352668213457077e-07,
|
|
"loss": 0.7117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35614049434661865,
|
|
"step": 10,
|
|
"valid_targets_mean": 8209.8,
|
|
"valid_targets_min": 4066
|
|
},
|
|
{
|
|
"epoch": 0.024410089503661515,
|
|
"grad_norm": 14.14836434723303,
|
|
"learning_rate": 1.2993039443155453e-06,
|
|
"loss": 0.694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.250732421875,
|
|
"step": 15,
|
|
"valid_targets_mean": 4782.0,
|
|
"valid_targets_min": 867
|
|
},
|
|
{
|
|
"epoch": 0.03254678600488202,
|
|
"grad_norm": 7.548954071846173,
|
|
"learning_rate": 1.7633410672853829e-06,
|
|
"loss": 0.6943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22547389566898346,
|
|
"step": 20,
|
|
"valid_targets_mean": 5292.1,
|
|
"valid_targets_min": 1196
|
|
},
|
|
{
|
|
"epoch": 0.04068348250610252,
|
|
"grad_norm": 6.231235771802954,
|
|
"learning_rate": 2.2273781902552207e-06,
|
|
"loss": 0.6218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33196526765823364,
|
|
"step": 25,
|
|
"valid_targets_mean": 5366.5,
|
|
"valid_targets_min": 2694
|
|
},
|
|
{
|
|
"epoch": 0.04882017900732303,
|
|
"grad_norm": 3.2220494398584676,
|
|
"learning_rate": 2.691415313225058e-06,
|
|
"loss": 0.5883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2818257212638855,
|
|
"step": 30,
|
|
"valid_targets_mean": 5397.9,
|
|
"valid_targets_min": 4217
|
|
},
|
|
{
|
|
"epoch": 0.05695687550854353,
|
|
"grad_norm": 1.9906473871157997,
|
|
"learning_rate": 3.155452436194896e-06,
|
|
"loss": 0.5962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29219943284988403,
|
|
"step": 35,
|
|
"valid_targets_mean": 6485.5,
|
|
"valid_targets_min": 4178
|
|
},
|
|
{
|
|
"epoch": 0.06509357200976404,
|
|
"grad_norm": 1.566586623976585,
|
|
"learning_rate": 3.6194895591647333e-06,
|
|
"loss": 0.5804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31874755024909973,
|
|
"step": 40,
|
|
"valid_targets_mean": 5538.4,
|
|
"valid_targets_min": 2206
|
|
},
|
|
{
|
|
"epoch": 0.07323026851098453,
|
|
"grad_norm": 0.8989747000335192,
|
|
"learning_rate": 4.083526682134571e-06,
|
|
"loss": 0.5337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26823461055755615,
|
|
"step": 45,
|
|
"valid_targets_mean": 8599.2,
|
|
"valid_targets_min": 2163
|
|
},
|
|
{
|
|
"epoch": 0.08136696501220504,
|
|
"grad_norm": 0.9319478495784349,
|
|
"learning_rate": 4.547563805104409e-06,
|
|
"loss": 0.5084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24531835317611694,
|
|
"step": 50,
|
|
"valid_targets_mean": 5544.0,
|
|
"valid_targets_min": 3675
|
|
},
|
|
{
|
|
"epoch": 0.08950366151342555,
|
|
"grad_norm": 0.8445321573859387,
|
|
"learning_rate": 5.011600928074246e-06,
|
|
"loss": 0.5385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24889877438545227,
|
|
"step": 55,
|
|
"valid_targets_mean": 4860.2,
|
|
"valid_targets_min": 2473
|
|
},
|
|
{
|
|
"epoch": 0.09764035801464606,
|
|
"grad_norm": 0.5631526521452107,
|
|
"learning_rate": 5.4756380510440845e-06,
|
|
"loss": 0.5077,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1919834017753601,
|
|
"step": 60,
|
|
"valid_targets_mean": 8965.6,
|
|
"valid_targets_min": 1390
|
|
},
|
|
{
|
|
"epoch": 0.10577705451586655,
|
|
"grad_norm": 0.723751948401996,
|
|
"learning_rate": 5.939675174013921e-06,
|
|
"loss": 0.5264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24049162864685059,
|
|
"step": 65,
|
|
"valid_targets_mean": 5572.6,
|
|
"valid_targets_min": 987
|
|
},
|
|
{
|
|
"epoch": 0.11391375101708706,
|
|
"grad_norm": 0.6686399938539643,
|
|
"learning_rate": 6.403712296983759e-06,
|
|
"loss": 0.525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2604122459888458,
|
|
"step": 70,
|
|
"valid_targets_mean": 5024.8,
|
|
"valid_targets_min": 1360
|
|
},
|
|
{
|
|
"epoch": 0.12205044751830757,
|
|
"grad_norm": 0.6374560834798504,
|
|
"learning_rate": 6.867749419953597e-06,
|
|
"loss": 0.4679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20338362455368042,
|
|
"step": 75,
|
|
"valid_targets_mean": 4254.9,
|
|
"valid_targets_min": 1926
|
|
},
|
|
{
|
|
"epoch": 0.13018714401952808,
|
|
"grad_norm": 0.5787585870311817,
|
|
"learning_rate": 7.331786542923435e-06,
|
|
"loss": 0.4843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21182265877723694,
|
|
"step": 80,
|
|
"valid_targets_mean": 6022.1,
|
|
"valid_targets_min": 2763
|
|
},
|
|
{
|
|
"epoch": 0.1383238405207486,
|
|
"grad_norm": 0.67321565497922,
|
|
"learning_rate": 7.795823665893271e-06,
|
|
"loss": 0.4312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23013588786125183,
|
|
"step": 85,
|
|
"valid_targets_mean": 5411.8,
|
|
"valid_targets_min": 3173
|
|
},
|
|
{
|
|
"epoch": 0.14646053702196907,
|
|
"grad_norm": 0.6071366032342709,
|
|
"learning_rate": 8.25986078886311e-06,
|
|
"loss": 0.4646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1693999469280243,
|
|
"step": 90,
|
|
"valid_targets_mean": 4955.9,
|
|
"valid_targets_min": 1353
|
|
},
|
|
{
|
|
"epoch": 0.15459723352318958,
|
|
"grad_norm": 0.5269887432627762,
|
|
"learning_rate": 8.723897911832948e-06,
|
|
"loss": 0.456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1908610761165619,
|
|
"step": 95,
|
|
"valid_targets_mean": 5783.5,
|
|
"valid_targets_min": 2568
|
|
},
|
|
{
|
|
"epoch": 0.16273393002441008,
|
|
"grad_norm": 0.49470856135363345,
|
|
"learning_rate": 9.187935034802784e-06,
|
|
"loss": 0.4707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16176165640354156,
|
|
"step": 100,
|
|
"valid_targets_mean": 5736.5,
|
|
"valid_targets_min": 914
|
|
},
|
|
{
|
|
"epoch": 0.1708706265256306,
|
|
"grad_norm": 0.6634219296905246,
|
|
"learning_rate": 9.651972157772623e-06,
|
|
"loss": 0.4261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21228602528572083,
|
|
"step": 105,
|
|
"valid_targets_mean": 4824.8,
|
|
"valid_targets_min": 880
|
|
},
|
|
{
|
|
"epoch": 0.1790073230268511,
|
|
"grad_norm": 0.5503232838963362,
|
|
"learning_rate": 1.011600928074246e-05,
|
|
"loss": 0.4521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21243637800216675,
|
|
"step": 110,
|
|
"valid_targets_mean": 4871.6,
|
|
"valid_targets_min": 3180
|
|
},
|
|
{
|
|
"epoch": 0.1871440195280716,
|
|
"grad_norm": 0.5371175803438996,
|
|
"learning_rate": 1.0580046403712299e-05,
|
|
"loss": 0.4236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18787938356399536,
|
|
"step": 115,
|
|
"valid_targets_mean": 6630.4,
|
|
"valid_targets_min": 2886
|
|
},
|
|
{
|
|
"epoch": 0.19528071602929212,
|
|
"grad_norm": 0.592957013748528,
|
|
"learning_rate": 1.1044083526682134e-05,
|
|
"loss": 0.423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21313674747943878,
|
|
"step": 120,
|
|
"valid_targets_mean": 4635.9,
|
|
"valid_targets_min": 1984
|
|
},
|
|
{
|
|
"epoch": 0.20341741253051263,
|
|
"grad_norm": 0.5850643813081216,
|
|
"learning_rate": 1.1508120649651972e-05,
|
|
"loss": 0.441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18926499783992767,
|
|
"step": 125,
|
|
"valid_targets_mean": 3976.4,
|
|
"valid_targets_min": 2195
|
|
},
|
|
{
|
|
"epoch": 0.2115541090317331,
|
|
"grad_norm": 0.6154983784285487,
|
|
"learning_rate": 1.197215777262181e-05,
|
|
"loss": 0.4045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32481032609939575,
|
|
"step": 130,
|
|
"valid_targets_mean": 7336.1,
|
|
"valid_targets_min": 2261
|
|
},
|
|
{
|
|
"epoch": 0.21969080553295361,
|
|
"grad_norm": 0.5811910852740185,
|
|
"learning_rate": 1.2436194895591649e-05,
|
|
"loss": 0.3979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19883839786052704,
|
|
"step": 135,
|
|
"valid_targets_mean": 4926.2,
|
|
"valid_targets_min": 1380
|
|
},
|
|
{
|
|
"epoch": 0.22782750203417412,
|
|
"grad_norm": 0.562023919355631,
|
|
"learning_rate": 1.2900232018561485e-05,
|
|
"loss": 0.4539,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1847591996192932,
|
|
"step": 140,
|
|
"valid_targets_mean": 4817.6,
|
|
"valid_targets_min": 472
|
|
},
|
|
{
|
|
"epoch": 0.23596419853539463,
|
|
"grad_norm": 0.72271568902011,
|
|
"learning_rate": 1.3364269141531323e-05,
|
|
"loss": 0.4195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2751965820789337,
|
|
"step": 145,
|
|
"valid_targets_mean": 4217.8,
|
|
"valid_targets_min": 1056
|
|
},
|
|
{
|
|
"epoch": 0.24410089503661514,
|
|
"grad_norm": 0.3938229955385366,
|
|
"learning_rate": 1.3828306264501162e-05,
|
|
"loss": 0.3832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14872416853904724,
|
|
"step": 150,
|
|
"valid_targets_mean": 9482.1,
|
|
"valid_targets_min": 1451
|
|
},
|
|
{
|
|
"epoch": 0.25223759153783565,
|
|
"grad_norm": 0.49221594968351173,
|
|
"learning_rate": 1.4292343387471e-05,
|
|
"loss": 0.3907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20908868312835693,
|
|
"step": 155,
|
|
"valid_targets_mean": 6888.2,
|
|
"valid_targets_min": 976
|
|
},
|
|
{
|
|
"epoch": 0.26037428803905616,
|
|
"grad_norm": 0.5545201308696927,
|
|
"learning_rate": 1.4756380510440838e-05,
|
|
"loss": 0.4102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2789374887943268,
|
|
"step": 160,
|
|
"valid_targets_mean": 6530.2,
|
|
"valid_targets_min": 3557
|
|
},
|
|
{
|
|
"epoch": 0.26851098454027666,
|
|
"grad_norm": 0.4586839023597891,
|
|
"learning_rate": 1.5220417633410673e-05,
|
|
"loss": 0.3853,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15098837018013,
|
|
"step": 165,
|
|
"valid_targets_mean": 6107.5,
|
|
"valid_targets_min": 3366
|
|
},
|
|
{
|
|
"epoch": 0.2766476810414972,
|
|
"grad_norm": 0.5466087287399158,
|
|
"learning_rate": 1.5684454756380513e-05,
|
|
"loss": 0.3823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18119701743125916,
|
|
"step": 170,
|
|
"valid_targets_mean": 5958.1,
|
|
"valid_targets_min": 2636
|
|
},
|
|
{
|
|
"epoch": 0.2847843775427177,
|
|
"grad_norm": 0.7246628057096877,
|
|
"learning_rate": 1.614849187935035e-05,
|
|
"loss": 0.3878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18207889795303345,
|
|
"step": 175,
|
|
"valid_targets_mean": 3833.5,
|
|
"valid_targets_min": 731
|
|
},
|
|
{
|
|
"epoch": 0.29292107404393813,
|
|
"grad_norm": 0.538445461461904,
|
|
"learning_rate": 1.661252900232019e-05,
|
|
"loss": 0.3899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20349320769309998,
|
|
"step": 180,
|
|
"valid_targets_mean": 5779.9,
|
|
"valid_targets_min": 2408
|
|
},
|
|
{
|
|
"epoch": 0.30105777054515864,
|
|
"grad_norm": 0.5154736408446468,
|
|
"learning_rate": 1.7076566125290022e-05,
|
|
"loss": 0.3979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14643463492393494,
|
|
"step": 185,
|
|
"valid_targets_mean": 5368.9,
|
|
"valid_targets_min": 1130
|
|
},
|
|
{
|
|
"epoch": 0.30919446704637915,
|
|
"grad_norm": 0.5537361180002401,
|
|
"learning_rate": 1.7540603248259862e-05,
|
|
"loss": 0.3761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24436548352241516,
|
|
"step": 190,
|
|
"valid_targets_mean": 6028.6,
|
|
"valid_targets_min": 2328
|
|
},
|
|
{
|
|
"epoch": 0.31733116354759966,
|
|
"grad_norm": 0.5029342180508779,
|
|
"learning_rate": 1.80046403712297e-05,
|
|
"loss": 0.4093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1840636283159256,
|
|
"step": 195,
|
|
"valid_targets_mean": 5353.9,
|
|
"valid_targets_min": 2892
|
|
},
|
|
{
|
|
"epoch": 0.32546786004882017,
|
|
"grad_norm": 0.665787256003558,
|
|
"learning_rate": 1.846867749419954e-05,
|
|
"loss": 0.4148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20056715607643127,
|
|
"step": 200,
|
|
"valid_targets_mean": 4300.5,
|
|
"valid_targets_min": 2125
|
|
},
|
|
{
|
|
"epoch": 0.3336045565500407,
|
|
"grad_norm": 0.6052671481751055,
|
|
"learning_rate": 1.8932714617169375e-05,
|
|
"loss": 0.4007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18301618099212646,
|
|
"step": 205,
|
|
"valid_targets_mean": 4318.6,
|
|
"valid_targets_min": 1348
|
|
},
|
|
{
|
|
"epoch": 0.3417412530512612,
|
|
"grad_norm": 0.5974300039254363,
|
|
"learning_rate": 1.9396751740139212e-05,
|
|
"loss": 0.359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1635710895061493,
|
|
"step": 210,
|
|
"valid_targets_mean": 4149.1,
|
|
"valid_targets_min": 1341
|
|
},
|
|
{
|
|
"epoch": 0.3498779495524817,
|
|
"grad_norm": 0.5162532888002979,
|
|
"learning_rate": 1.986078886310905e-05,
|
|
"loss": 0.3616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1892920732498169,
|
|
"step": 215,
|
|
"valid_targets_mean": 5807.4,
|
|
"valid_targets_min": 3465
|
|
},
|
|
{
|
|
"epoch": 0.3580146460537022,
|
|
"grad_norm": 0.6211875321766217,
|
|
"learning_rate": 2.0324825986078888e-05,
|
|
"loss": 0.3707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2048984169960022,
|
|
"step": 220,
|
|
"valid_targets_mean": 5146.1,
|
|
"valid_targets_min": 3448
|
|
},
|
|
{
|
|
"epoch": 0.3661513425549227,
|
|
"grad_norm": 0.5629734837453833,
|
|
"learning_rate": 2.0788863109048725e-05,
|
|
"loss": 0.4264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20716804265975952,
|
|
"step": 225,
|
|
"valid_targets_mean": 6232.1,
|
|
"valid_targets_min": 3504
|
|
},
|
|
{
|
|
"epoch": 0.3742880390561432,
|
|
"grad_norm": 0.5754882541364582,
|
|
"learning_rate": 2.125290023201856e-05,
|
|
"loss": 0.3609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17893140017986298,
|
|
"step": 230,
|
|
"valid_targets_mean": 5060.4,
|
|
"valid_targets_min": 1097
|
|
},
|
|
{
|
|
"epoch": 0.3824247355573637,
|
|
"grad_norm": 0.4803589387079019,
|
|
"learning_rate": 2.17169373549884e-05,
|
|
"loss": 0.3729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.169406920671463,
|
|
"step": 235,
|
|
"valid_targets_mean": 10265.1,
|
|
"valid_targets_min": 3654
|
|
},
|
|
{
|
|
"epoch": 0.39056143205858423,
|
|
"grad_norm": 0.5684141070472157,
|
|
"learning_rate": 2.2180974477958238e-05,
|
|
"loss": 0.356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18647357821464539,
|
|
"step": 240,
|
|
"valid_targets_mean": 6180.6,
|
|
"valid_targets_min": 4034
|
|
},
|
|
{
|
|
"epoch": 0.39869812855980474,
|
|
"grad_norm": 2.1822397438079113,
|
|
"learning_rate": 2.2645011600928078e-05,
|
|
"loss": 0.3773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13844701647758484,
|
|
"step": 245,
|
|
"valid_targets_mean": 3962.9,
|
|
"valid_targets_min": 1115
|
|
},
|
|
{
|
|
"epoch": 0.40683482506102525,
|
|
"grad_norm": 0.5269077214375306,
|
|
"learning_rate": 2.3109048723897914e-05,
|
|
"loss": 0.3638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22004611790180206,
|
|
"step": 250,
|
|
"valid_targets_mean": 7061.2,
|
|
"valid_targets_min": 1798
|
|
},
|
|
{
|
|
"epoch": 0.4149715215622457,
|
|
"grad_norm": 0.5440786293579026,
|
|
"learning_rate": 2.357308584686775e-05,
|
|
"loss": 0.3884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17783966660499573,
|
|
"step": 255,
|
|
"valid_targets_mean": 6231.8,
|
|
"valid_targets_min": 1180
|
|
},
|
|
{
|
|
"epoch": 0.4231082180634662,
|
|
"grad_norm": 0.5948755408238603,
|
|
"learning_rate": 2.4037122969837587e-05,
|
|
"loss": 0.3825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17008402943611145,
|
|
"step": 260,
|
|
"valid_targets_mean": 5944.2,
|
|
"valid_targets_min": 3162
|
|
},
|
|
{
|
|
"epoch": 0.4312449145646867,
|
|
"grad_norm": 0.6242241542196177,
|
|
"learning_rate": 2.4501160092807427e-05,
|
|
"loss": 0.3741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24811501801013947,
|
|
"step": 265,
|
|
"valid_targets_mean": 6068.0,
|
|
"valid_targets_min": 3582
|
|
},
|
|
{
|
|
"epoch": 0.43938161106590723,
|
|
"grad_norm": 0.6339502257062204,
|
|
"learning_rate": 2.4965197215777264e-05,
|
|
"loss": 0.378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23643383383750916,
|
|
"step": 270,
|
|
"valid_targets_mean": 6240.1,
|
|
"valid_targets_min": 3166
|
|
},
|
|
{
|
|
"epoch": 0.44751830756712774,
|
|
"grad_norm": 0.620013916375109,
|
|
"learning_rate": 2.54292343387471e-05,
|
|
"loss": 0.3781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20989033579826355,
|
|
"step": 275,
|
|
"valid_targets_mean": 4992.1,
|
|
"valid_targets_min": 1974
|
|
},
|
|
{
|
|
"epoch": 0.45565500406834825,
|
|
"grad_norm": 0.6062383822505677,
|
|
"learning_rate": 2.589327146171694e-05,
|
|
"loss": 0.3909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.165325328707695,
|
|
"step": 280,
|
|
"valid_targets_mean": 5992.2,
|
|
"valid_targets_min": 2286
|
|
},
|
|
{
|
|
"epoch": 0.46379170056956875,
|
|
"grad_norm": 0.5313593137715071,
|
|
"learning_rate": 2.6357308584686777e-05,
|
|
"loss": 0.3599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.211288183927536,
|
|
"step": 285,
|
|
"valid_targets_mean": 8111.2,
|
|
"valid_targets_min": 1743
|
|
},
|
|
{
|
|
"epoch": 0.47192839707078926,
|
|
"grad_norm": 0.6295506563199366,
|
|
"learning_rate": 2.6821345707656617e-05,
|
|
"loss": 0.4067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2054595947265625,
|
|
"step": 290,
|
|
"valid_targets_mean": 5166.6,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 0.48006509357200977,
|
|
"grad_norm": 0.49592526684619054,
|
|
"learning_rate": 2.7285382830626453e-05,
|
|
"loss": 0.3392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16820460557937622,
|
|
"step": 295,
|
|
"valid_targets_mean": 7745.8,
|
|
"valid_targets_min": 4466
|
|
},
|
|
{
|
|
"epoch": 0.4882017900732303,
|
|
"grad_norm": 0.567789284281097,
|
|
"learning_rate": 2.774941995359629e-05,
|
|
"loss": 0.3658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20403246581554413,
|
|
"step": 300,
|
|
"valid_targets_mean": 5828.9,
|
|
"valid_targets_min": 3519
|
|
},
|
|
{
|
|
"epoch": 0.4963384865744508,
|
|
"grad_norm": 0.5994970137381144,
|
|
"learning_rate": 2.8213457076566126e-05,
|
|
"loss": 0.3616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17800754308700562,
|
|
"step": 305,
|
|
"valid_targets_mean": 6173.2,
|
|
"valid_targets_min": 2727
|
|
},
|
|
{
|
|
"epoch": 0.5044751830756713,
|
|
"grad_norm": 0.4507815805396529,
|
|
"learning_rate": 2.8677494199535966e-05,
|
|
"loss": 0.3422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.163581982254982,
|
|
"step": 310,
|
|
"valid_targets_mean": 7815.4,
|
|
"valid_targets_min": 2591
|
|
},
|
|
{
|
|
"epoch": 0.5126118795768918,
|
|
"grad_norm": 0.5239390399565743,
|
|
"learning_rate": 2.9141531322505803e-05,
|
|
"loss": 0.3643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17151904106140137,
|
|
"step": 315,
|
|
"valid_targets_mean": 6634.5,
|
|
"valid_targets_min": 2709
|
|
},
|
|
{
|
|
"epoch": 0.5207485760781123,
|
|
"grad_norm": 0.5369612515514515,
|
|
"learning_rate": 2.9605568445475643e-05,
|
|
"loss": 0.3502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18754036724567413,
|
|
"step": 320,
|
|
"valid_targets_mean": 7942.8,
|
|
"valid_targets_min": 2731
|
|
},
|
|
{
|
|
"epoch": 0.5288852725793328,
|
|
"grad_norm": 0.5793782711405452,
|
|
"learning_rate": 3.006960556844548e-05,
|
|
"loss": 0.3665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19467616081237793,
|
|
"step": 325,
|
|
"valid_targets_mean": 7150.4,
|
|
"valid_targets_min": 3200
|
|
},
|
|
{
|
|
"epoch": 0.5370219690805533,
|
|
"grad_norm": 0.6555376050209063,
|
|
"learning_rate": 3.053364269141532e-05,
|
|
"loss": 0.3597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22380053997039795,
|
|
"step": 330,
|
|
"valid_targets_mean": 5470.0,
|
|
"valid_targets_min": 3328
|
|
},
|
|
{
|
|
"epoch": 0.5451586655817738,
|
|
"grad_norm": 0.47280019324486067,
|
|
"learning_rate": 3.099767981438515e-05,
|
|
"loss": 0.353,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1600561887025833,
|
|
"step": 335,
|
|
"valid_targets_mean": 7268.8,
|
|
"valid_targets_min": 1187
|
|
},
|
|
{
|
|
"epoch": 0.5532953620829943,
|
|
"grad_norm": 0.5881598770087844,
|
|
"learning_rate": 3.146171693735499e-05,
|
|
"loss": 0.3822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24715545773506165,
|
|
"step": 340,
|
|
"valid_targets_mean": 6853.9,
|
|
"valid_targets_min": 3075
|
|
},
|
|
{
|
|
"epoch": 0.5614320585842149,
|
|
"grad_norm": 0.6391589883331622,
|
|
"learning_rate": 3.1925754060324825e-05,
|
|
"loss": 0.3614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1951560229063034,
|
|
"step": 345,
|
|
"valid_targets_mean": 5299.4,
|
|
"valid_targets_min": 1246
|
|
},
|
|
{
|
|
"epoch": 0.5695687550854354,
|
|
"grad_norm": 0.5623585142837345,
|
|
"learning_rate": 3.2389791183294665e-05,
|
|
"loss": 0.3891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16995632648468018,
|
|
"step": 350,
|
|
"valid_targets_mean": 5440.2,
|
|
"valid_targets_min": 2508
|
|
},
|
|
{
|
|
"epoch": 0.5777054515866559,
|
|
"grad_norm": 0.6219299756731129,
|
|
"learning_rate": 3.2853828306264505e-05,
|
|
"loss": 0.3645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1758767068386078,
|
|
"step": 355,
|
|
"valid_targets_mean": 5666.1,
|
|
"valid_targets_min": 4068
|
|
},
|
|
{
|
|
"epoch": 0.5858421480878763,
|
|
"grad_norm": 0.5685036390421684,
|
|
"learning_rate": 3.3317865429234345e-05,
|
|
"loss": 0.3647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16862249374389648,
|
|
"step": 360,
|
|
"valid_targets_mean": 4828.4,
|
|
"valid_targets_min": 3059
|
|
},
|
|
{
|
|
"epoch": 0.5939788445890968,
|
|
"grad_norm": 0.4625422720060745,
|
|
"learning_rate": 3.378190255220418e-05,
|
|
"loss": 0.3712,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1963481456041336,
|
|
"step": 365,
|
|
"valid_targets_mean": 7321.2,
|
|
"valid_targets_min": 2340
|
|
},
|
|
{
|
|
"epoch": 0.6021155410903173,
|
|
"grad_norm": 0.5470733880761669,
|
|
"learning_rate": 3.424593967517402e-05,
|
|
"loss": 0.3691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.210045725107193,
|
|
"step": 370,
|
|
"valid_targets_mean": 6877.0,
|
|
"valid_targets_min": 3491
|
|
},
|
|
{
|
|
"epoch": 0.6102522375915378,
|
|
"grad_norm": 0.6202487972326748,
|
|
"learning_rate": 3.470997679814386e-05,
|
|
"loss": 0.3772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17183330655097961,
|
|
"step": 375,
|
|
"valid_targets_mean": 4664.4,
|
|
"valid_targets_min": 1184
|
|
},
|
|
{
|
|
"epoch": 0.6183889340927583,
|
|
"grad_norm": 0.5221402160582946,
|
|
"learning_rate": 3.517401392111369e-05,
|
|
"loss": 0.3627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12995409965515137,
|
|
"step": 380,
|
|
"valid_targets_mean": 4589.5,
|
|
"valid_targets_min": 1355
|
|
},
|
|
{
|
|
"epoch": 0.6265256305939788,
|
|
"grad_norm": 0.5752601496871426,
|
|
"learning_rate": 3.563805104408353e-05,
|
|
"loss": 0.357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1458801031112671,
|
|
"step": 385,
|
|
"valid_targets_mean": 4861.6,
|
|
"valid_targets_min": 781
|
|
},
|
|
{
|
|
"epoch": 0.6346623270951993,
|
|
"grad_norm": 0.49073739472195316,
|
|
"learning_rate": 3.6102088167053364e-05,
|
|
"loss": 0.3394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14129197597503662,
|
|
"step": 390,
|
|
"valid_targets_mean": 7941.0,
|
|
"valid_targets_min": 1697
|
|
},
|
|
{
|
|
"epoch": 0.6427990235964198,
|
|
"grad_norm": 0.6724135988713974,
|
|
"learning_rate": 3.6566125290023204e-05,
|
|
"loss": 0.3407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20702053606510162,
|
|
"step": 395,
|
|
"valid_targets_mean": 5088.1,
|
|
"valid_targets_min": 3051
|
|
},
|
|
{
|
|
"epoch": 0.6509357200976403,
|
|
"grad_norm": 0.5361691867146933,
|
|
"learning_rate": 3.7030162412993044e-05,
|
|
"loss": 0.3382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11901885271072388,
|
|
"step": 400,
|
|
"valid_targets_mean": 4371.2,
|
|
"valid_targets_min": 667
|
|
},
|
|
{
|
|
"epoch": 0.6590724165988608,
|
|
"grad_norm": 0.5133700351880802,
|
|
"learning_rate": 3.7494199535962884e-05,
|
|
"loss": 0.3437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19993503391742706,
|
|
"step": 405,
|
|
"valid_targets_mean": 7043.2,
|
|
"valid_targets_min": 2275
|
|
},
|
|
{
|
|
"epoch": 0.6672091131000814,
|
|
"grad_norm": 0.6482819055800569,
|
|
"learning_rate": 3.795823665893272e-05,
|
|
"loss": 0.3395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17060551047325134,
|
|
"step": 410,
|
|
"valid_targets_mean": 4805.9,
|
|
"valid_targets_min": 2812
|
|
},
|
|
{
|
|
"epoch": 0.6753458096013019,
|
|
"grad_norm": 0.6390031583274932,
|
|
"learning_rate": 3.842227378190256e-05,
|
|
"loss": 0.3535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21496957540512085,
|
|
"step": 415,
|
|
"valid_targets_mean": 5022.5,
|
|
"valid_targets_min": 3145
|
|
},
|
|
{
|
|
"epoch": 0.6834825061025224,
|
|
"grad_norm": 0.5976519963434623,
|
|
"learning_rate": 3.888631090487239e-05,
|
|
"loss": 0.3517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2512780427932739,
|
|
"step": 420,
|
|
"valid_targets_mean": 7279.9,
|
|
"valid_targets_min": 1433
|
|
},
|
|
{
|
|
"epoch": 0.6916192026037429,
|
|
"grad_norm": 0.47064787232454297,
|
|
"learning_rate": 3.935034802784223e-05,
|
|
"loss": 0.3492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13265804946422577,
|
|
"step": 425,
|
|
"valid_targets_mean": 7578.4,
|
|
"valid_targets_min": 1208
|
|
},
|
|
{
|
|
"epoch": 0.6997558991049634,
|
|
"grad_norm": 0.6527567618207681,
|
|
"learning_rate": 3.981438515081207e-05,
|
|
"loss": 0.3574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1878128945827484,
|
|
"step": 430,
|
|
"valid_targets_mean": 6056.6,
|
|
"valid_targets_min": 839
|
|
},
|
|
{
|
|
"epoch": 0.7078925956061839,
|
|
"grad_norm": 0.5735509384032068,
|
|
"learning_rate": 3.9999940813479674e-05,
|
|
"loss": 0.3372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1770503968000412,
|
|
"step": 435,
|
|
"valid_targets_mean": 6436.2,
|
|
"valid_targets_min": 1048
|
|
},
|
|
{
|
|
"epoch": 0.7160292921074044,
|
|
"grad_norm": 0.6261328521415896,
|
|
"learning_rate": 3.999957911934624e-05,
|
|
"loss": 0.3515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18041738867759705,
|
|
"step": 440,
|
|
"valid_targets_mean": 4812.4,
|
|
"valid_targets_min": 978
|
|
},
|
|
{
|
|
"epoch": 0.7241659886086249,
|
|
"grad_norm": 0.6204622587345417,
|
|
"learning_rate": 3.9998888618418865e-05,
|
|
"loss": 0.3373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1899314522743225,
|
|
"step": 445,
|
|
"valid_targets_mean": 5510.2,
|
|
"valid_targets_min": 3204
|
|
},
|
|
{
|
|
"epoch": 0.7323026851098454,
|
|
"grad_norm": 3.299320631347469,
|
|
"learning_rate": 3.999786932204985e-05,
|
|
"loss": 0.3785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1819208562374115,
|
|
"step": 450,
|
|
"valid_targets_mean": 4358.6,
|
|
"valid_targets_min": 1771
|
|
},
|
|
{
|
|
"epoch": 0.7404393816110659,
|
|
"grad_norm": 0.5252880370409836,
|
|
"learning_rate": 3.999652124699712e-05,
|
|
"loss": 0.3475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16896438598632812,
|
|
"step": 455,
|
|
"valid_targets_mean": 5077.8,
|
|
"valid_targets_min": 1348
|
|
},
|
|
{
|
|
"epoch": 0.7485760781122864,
|
|
"grad_norm": 0.6698283241722841,
|
|
"learning_rate": 3.999484441542395e-05,
|
|
"loss": 0.3407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1943579763174057,
|
|
"step": 460,
|
|
"valid_targets_mean": 6660.6,
|
|
"valid_targets_min": 2219
|
|
},
|
|
{
|
|
"epoch": 0.7567127746135069,
|
|
"grad_norm": 0.5507163167952325,
|
|
"learning_rate": 3.999283885489861e-05,
|
|
"loss": 0.3414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17113232612609863,
|
|
"step": 465,
|
|
"valid_targets_mean": 5965.8,
|
|
"valid_targets_min": 821
|
|
},
|
|
{
|
|
"epoch": 0.7648494711147275,
|
|
"grad_norm": 0.5967720564945255,
|
|
"learning_rate": 3.999050459839389e-05,
|
|
"loss": 0.3584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18689490854740143,
|
|
"step": 470,
|
|
"valid_targets_mean": 5054.9,
|
|
"valid_targets_min": 2713
|
|
},
|
|
{
|
|
"epoch": 0.772986167615948,
|
|
"grad_norm": 0.47232445183793803,
|
|
"learning_rate": 3.998784168428657e-05,
|
|
"loss": 0.332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15000829100608826,
|
|
"step": 475,
|
|
"valid_targets_mean": 8201.6,
|
|
"valid_targets_min": 4044
|
|
},
|
|
{
|
|
"epoch": 0.7811228641171685,
|
|
"grad_norm": 0.621422144424275,
|
|
"learning_rate": 3.998485015635677e-05,
|
|
"loss": 0.3432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1805095225572586,
|
|
"step": 480,
|
|
"valid_targets_mean": 5017.2,
|
|
"valid_targets_min": 2360
|
|
},
|
|
{
|
|
"epoch": 0.789259560618389,
|
|
"grad_norm": 0.5384507298681502,
|
|
"learning_rate": 3.998153006378727e-05,
|
|
"loss": 0.3606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14276368916034698,
|
|
"step": 485,
|
|
"valid_targets_mean": 5560.0,
|
|
"valid_targets_min": 3450
|
|
},
|
|
{
|
|
"epoch": 0.7973962571196095,
|
|
"grad_norm": 0.5979879136613555,
|
|
"learning_rate": 3.997788146116267e-05,
|
|
"loss": 0.3246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2316283881664276,
|
|
"step": 490,
|
|
"valid_targets_mean": 6350.8,
|
|
"valid_targets_min": 1530
|
|
},
|
|
{
|
|
"epoch": 0.80553295362083,
|
|
"grad_norm": 0.5101547736138818,
|
|
"learning_rate": 3.99739044084685e-05,
|
|
"loss": 0.3342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14997598528862,
|
|
"step": 495,
|
|
"valid_targets_mean": 5629.4,
|
|
"valid_targets_min": 1916
|
|
},
|
|
{
|
|
"epoch": 0.8136696501220505,
|
|
"grad_norm": 0.5665977624248587,
|
|
"learning_rate": 3.9969598971090225e-05,
|
|
"loss": 0.3485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16634723544120789,
|
|
"step": 500,
|
|
"valid_targets_mean": 5539.4,
|
|
"valid_targets_min": 2190
|
|
},
|
|
{
|
|
"epoch": 0.8218063466232709,
|
|
"grad_norm": 0.5440024359106904,
|
|
"learning_rate": 3.99649652198122e-05,
|
|
"loss": 0.3367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12725237011909485,
|
|
"step": 505,
|
|
"valid_targets_mean": 5113.1,
|
|
"valid_targets_min": 2708
|
|
},
|
|
{
|
|
"epoch": 0.8299430431244914,
|
|
"grad_norm": 0.5290865318322414,
|
|
"learning_rate": 3.9960003230816456e-05,
|
|
"loss": 0.3289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14404511451721191,
|
|
"step": 510,
|
|
"valid_targets_mean": 5173.8,
|
|
"valid_targets_min": 3136
|
|
},
|
|
{
|
|
"epoch": 0.8380797396257119,
|
|
"grad_norm": 0.5770982920950211,
|
|
"learning_rate": 3.9954713085681504e-05,
|
|
"loss": 0.3588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18526235222816467,
|
|
"step": 515,
|
|
"valid_targets_mean": 6702.4,
|
|
"valid_targets_min": 4946
|
|
},
|
|
{
|
|
"epoch": 0.8462164361269324,
|
|
"grad_norm": 0.47568964474126807,
|
|
"learning_rate": 3.994909487138096e-05,
|
|
"loss": 0.329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14243432879447937,
|
|
"step": 520,
|
|
"valid_targets_mean": 7471.1,
|
|
"valid_targets_min": 1985
|
|
},
|
|
{
|
|
"epoch": 0.8543531326281529,
|
|
"grad_norm": 0.5983260323228816,
|
|
"learning_rate": 3.994314868028212e-05,
|
|
"loss": 0.3711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2362203747034073,
|
|
"step": 525,
|
|
"valid_targets_mean": 6965.2,
|
|
"valid_targets_min": 5206
|
|
},
|
|
{
|
|
"epoch": 0.8624898291293734,
|
|
"grad_norm": 0.5211844138476033,
|
|
"learning_rate": 3.9936874610144445e-05,
|
|
"loss": 0.3396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15538473427295685,
|
|
"step": 530,
|
|
"valid_targets_mean": 5649.0,
|
|
"valid_targets_min": 2480
|
|
},
|
|
{
|
|
"epoch": 0.870626525630594,
|
|
"grad_norm": 0.5747838948995959,
|
|
"learning_rate": 3.993027276411793e-05,
|
|
"loss": 0.3405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14920654892921448,
|
|
"step": 535,
|
|
"valid_targets_mean": 5064.1,
|
|
"valid_targets_min": 1328
|
|
},
|
|
{
|
|
"epoch": 0.8787632221318145,
|
|
"grad_norm": 0.7641456693399412,
|
|
"learning_rate": 3.992334325074148e-05,
|
|
"loss": 0.3486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22456279397010803,
|
|
"step": 540,
|
|
"valid_targets_mean": 4989.8,
|
|
"valid_targets_min": 1908
|
|
},
|
|
{
|
|
"epoch": 0.886899918633035,
|
|
"grad_norm": 0.6130711548025931,
|
|
"learning_rate": 3.991608618394102e-05,
|
|
"loss": 0.3533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13783709704875946,
|
|
"step": 545,
|
|
"valid_targets_mean": 4486.8,
|
|
"valid_targets_min": 2494
|
|
},
|
|
{
|
|
"epoch": 0.8950366151342555,
|
|
"grad_norm": 0.6595580159116637,
|
|
"learning_rate": 3.9908501683027726e-05,
|
|
"loss": 0.3288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1897541582584381,
|
|
"step": 550,
|
|
"valid_targets_mean": 4604.0,
|
|
"valid_targets_min": 3132
|
|
},
|
|
{
|
|
"epoch": 0.903173311635476,
|
|
"grad_norm": 0.5968370142818512,
|
|
"learning_rate": 3.990058987269597e-05,
|
|
"loss": 0.3331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15421295166015625,
|
|
"step": 555,
|
|
"valid_targets_mean": 5379.5,
|
|
"valid_targets_min": 1269
|
|
},
|
|
{
|
|
"epoch": 0.9113100081366965,
|
|
"grad_norm": 0.5675265201448391,
|
|
"learning_rate": 3.9892350883021366e-05,
|
|
"loss": 0.3461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15991877019405365,
|
|
"step": 560,
|
|
"valid_targets_mean": 5252.5,
|
|
"valid_targets_min": 1873
|
|
},
|
|
{
|
|
"epoch": 0.919446704637917,
|
|
"grad_norm": 0.4953887074657764,
|
|
"learning_rate": 3.988378484945853e-05,
|
|
"loss": 0.3102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13573208451271057,
|
|
"step": 565,
|
|
"valid_targets_mean": 4776.0,
|
|
"valid_targets_min": 1040
|
|
},
|
|
{
|
|
"epoch": 0.9275834011391375,
|
|
"grad_norm": 0.514455820774763,
|
|
"learning_rate": 3.987489191283894e-05,
|
|
"loss": 0.3416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1836441606283188,
|
|
"step": 570,
|
|
"valid_targets_mean": 7845.2,
|
|
"valid_targets_min": 4051
|
|
},
|
|
{
|
|
"epoch": 0.935720097640358,
|
|
"grad_norm": 0.6132417120301569,
|
|
"learning_rate": 3.9865672219368574e-05,
|
|
"loss": 0.3199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20987677574157715,
|
|
"step": 575,
|
|
"valid_targets_mean": 7432.6,
|
|
"valid_targets_min": 3210
|
|
},
|
|
{
|
|
"epoch": 0.9438567941415785,
|
|
"grad_norm": 0.5225003165533024,
|
|
"learning_rate": 3.98561259206255e-05,
|
|
"loss": 0.3344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13004842400550842,
|
|
"step": 580,
|
|
"valid_targets_mean": 4851.6,
|
|
"valid_targets_min": 873
|
|
},
|
|
{
|
|
"epoch": 0.951993490642799,
|
|
"grad_norm": 0.4373690128216657,
|
|
"learning_rate": 3.984625317355743e-05,
|
|
"loss": 0.3281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.152201846241951,
|
|
"step": 585,
|
|
"valid_targets_mean": 7664.9,
|
|
"valid_targets_min": 3641
|
|
},
|
|
{
|
|
"epoch": 0.9601301871440195,
|
|
"grad_norm": 0.47431856596663735,
|
|
"learning_rate": 3.983605414047908e-05,
|
|
"loss": 0.3609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21499404311180115,
|
|
"step": 590,
|
|
"valid_targets_mean": 7886.2,
|
|
"valid_targets_min": 2797
|
|
},
|
|
{
|
|
"epoch": 0.96826688364524,
|
|
"grad_norm": 0.5715947847050505,
|
|
"learning_rate": 3.982552898906956e-05,
|
|
"loss": 0.3562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20815590023994446,
|
|
"step": 595,
|
|
"valid_targets_mean": 5514.9,
|
|
"valid_targets_min": 2081
|
|
},
|
|
{
|
|
"epoch": 0.9764035801464606,
|
|
"grad_norm": 0.6400810920406435,
|
|
"learning_rate": 3.981467789236958e-05,
|
|
"loss": 0.3419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1813117265701294,
|
|
"step": 600,
|
|
"valid_targets_mean": 5263.9,
|
|
"valid_targets_min": 850
|
|
},
|
|
{
|
|
"epoch": 0.9845402766476811,
|
|
"grad_norm": 0.507069029775432,
|
|
"learning_rate": 3.98035010287786e-05,
|
|
"loss": 0.3361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15920934081077576,
|
|
"step": 605,
|
|
"valid_targets_mean": 6134.8,
|
|
"valid_targets_min": 1505
|
|
},
|
|
{
|
|
"epoch": 0.9926769731489016,
|
|
"grad_norm": 0.5784988986931031,
|
|
"learning_rate": 3.979199858205192e-05,
|
|
"loss": 0.3263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16542240977287292,
|
|
"step": 610,
|
|
"valid_targets_mean": 5547.0,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"grad_norm": 0.907765371978957,
|
|
"learning_rate": 3.9780170741297655e-05,
|
|
"loss": 0.3348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36365807056427,
|
|
"step": 615,
|
|
"valid_targets_mean": 4512.4,
|
|
"valid_targets_min": 2120
|
|
},
|
|
{
|
|
"epoch": 1.0081366965012204,
|
|
"grad_norm": 0.5028868621668272,
|
|
"learning_rate": 3.976801770097361e-05,
|
|
"loss": 0.32,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1552557647228241,
|
|
"step": 620,
|
|
"valid_targets_mean": 6936.5,
|
|
"valid_targets_min": 4152
|
|
},
|
|
{
|
|
"epoch": 1.016273393002441,
|
|
"grad_norm": 0.5068697229571411,
|
|
"learning_rate": 3.975553966088412e-05,
|
|
"loss": 0.3018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16703470051288605,
|
|
"step": 625,
|
|
"valid_targets_mean": 6322.5,
|
|
"valid_targets_min": 1831
|
|
},
|
|
{
|
|
"epoch": 1.0244100895036614,
|
|
"grad_norm": 0.6214780240503095,
|
|
"learning_rate": 3.9742736826176706e-05,
|
|
"loss": 0.3306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19309839606285095,
|
|
"step": 630,
|
|
"valid_targets_mean": 5129.1,
|
|
"valid_targets_min": 1722
|
|
},
|
|
{
|
|
"epoch": 1.032546786004882,
|
|
"grad_norm": 0.5206937707201459,
|
|
"learning_rate": 3.9729609407338745e-05,
|
|
"loss": 0.3294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1733720600605011,
|
|
"step": 635,
|
|
"valid_targets_mean": 5940.8,
|
|
"valid_targets_min": 3375
|
|
},
|
|
{
|
|
"epoch": 1.0406834825061024,
|
|
"grad_norm": 0.558975152851932,
|
|
"learning_rate": 3.971615762019401e-05,
|
|
"loss": 0.3404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17723140120506287,
|
|
"step": 640,
|
|
"valid_targets_mean": 5387.9,
|
|
"valid_targets_min": 844
|
|
},
|
|
{
|
|
"epoch": 1.048820179007323,
|
|
"grad_norm": 0.52042070753551,
|
|
"learning_rate": 3.970238168589911e-05,
|
|
"loss": 0.3239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16863638162612915,
|
|
"step": 645,
|
|
"valid_targets_mean": 5446.0,
|
|
"valid_targets_min": 3303
|
|
},
|
|
{
|
|
"epoch": 1.0569568755085434,
|
|
"grad_norm": 0.52743590283002,
|
|
"learning_rate": 3.968828183093984e-05,
|
|
"loss": 0.3076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1578497290611267,
|
|
"step": 650,
|
|
"valid_targets_mean": 6333.2,
|
|
"valid_targets_min": 3859
|
|
},
|
|
{
|
|
"epoch": 1.065093572009764,
|
|
"grad_norm": 0.5709713263916416,
|
|
"learning_rate": 3.9673858287127484e-05,
|
|
"loss": 0.3016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13569536805152893,
|
|
"step": 655,
|
|
"valid_targets_mean": 6296.2,
|
|
"valid_targets_min": 4446
|
|
},
|
|
{
|
|
"epoch": 1.0732302685109845,
|
|
"grad_norm": 0.6034080046446395,
|
|
"learning_rate": 3.965911129159501e-05,
|
|
"loss": 0.3308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1853494793176651,
|
|
"step": 660,
|
|
"valid_targets_mean": 5594.5,
|
|
"valid_targets_min": 2807
|
|
},
|
|
{
|
|
"epoch": 1.081366965012205,
|
|
"grad_norm": 0.6783173009548439,
|
|
"learning_rate": 3.9644041086793115e-05,
|
|
"loss": 0.3322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2208280861377716,
|
|
"step": 665,
|
|
"valid_targets_mean": 5207.0,
|
|
"valid_targets_min": 2639
|
|
},
|
|
{
|
|
"epoch": 1.0895036615134255,
|
|
"grad_norm": 0.7073358550878341,
|
|
"learning_rate": 3.9628647920486313e-05,
|
|
"loss": 0.3372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11236638575792313,
|
|
"step": 670,
|
|
"valid_targets_mean": 4536.8,
|
|
"valid_targets_min": 2480
|
|
},
|
|
{
|
|
"epoch": 1.097640358014646,
|
|
"grad_norm": 0.5573022103538503,
|
|
"learning_rate": 3.961293204574881e-05,
|
|
"loss": 0.3426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20048069953918457,
|
|
"step": 675,
|
|
"valid_targets_mean": 6317.4,
|
|
"valid_targets_min": 2446
|
|
},
|
|
{
|
|
"epoch": 1.1057770545158665,
|
|
"grad_norm": 0.6274818878999071,
|
|
"learning_rate": 3.959689372096034e-05,
|
|
"loss": 0.3242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19005917012691498,
|
|
"step": 680,
|
|
"valid_targets_mean": 6330.2,
|
|
"valid_targets_min": 3816
|
|
},
|
|
{
|
|
"epoch": 1.1139137510170871,
|
|
"grad_norm": 0.6054646269903999,
|
|
"learning_rate": 3.9580533209802e-05,
|
|
"loss": 0.3085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16152934730052948,
|
|
"step": 685,
|
|
"valid_targets_mean": 5820.4,
|
|
"valid_targets_min": 4284
|
|
},
|
|
{
|
|
"epoch": 1.1220504475183075,
|
|
"grad_norm": 0.5033565392236059,
|
|
"learning_rate": 3.9563850781251785e-05,
|
|
"loss": 0.3274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17019686102867126,
|
|
"step": 690,
|
|
"valid_targets_mean": 5422.2,
|
|
"valid_targets_min": 1910
|
|
},
|
|
{
|
|
"epoch": 1.1301871440195281,
|
|
"grad_norm": 0.6054316108985657,
|
|
"learning_rate": 3.954684670958027e-05,
|
|
"loss": 0.3027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1751125305891037,
|
|
"step": 695,
|
|
"valid_targets_mean": 5926.9,
|
|
"valid_targets_min": 2434
|
|
},
|
|
{
|
|
"epoch": 1.1383238405207485,
|
|
"grad_norm": 0.5215439424842706,
|
|
"learning_rate": 3.9529521274346036e-05,
|
|
"loss": 0.3181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1284661889076233,
|
|
"step": 700,
|
|
"valid_targets_mean": 5178.6,
|
|
"valid_targets_min": 3204
|
|
},
|
|
{
|
|
"epoch": 1.1464605370219692,
|
|
"grad_norm": 0.5678065554082121,
|
|
"learning_rate": 3.951187476039114e-05,
|
|
"loss": 0.3532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.209128275513649,
|
|
"step": 705,
|
|
"valid_targets_mean": 6623.4,
|
|
"valid_targets_min": 977
|
|
},
|
|
{
|
|
"epoch": 1.1545972335231895,
|
|
"grad_norm": 0.517064905402786,
|
|
"learning_rate": 3.9493907457836355e-05,
|
|
"loss": 0.3157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1468493938446045,
|
|
"step": 710,
|
|
"valid_targets_mean": 5925.8,
|
|
"valid_targets_min": 1771
|
|
},
|
|
{
|
|
"epoch": 1.1627339300244102,
|
|
"grad_norm": 0.5878396139579618,
|
|
"learning_rate": 3.947561966207646e-05,
|
|
"loss": 0.3283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23921839892864227,
|
|
"step": 715,
|
|
"valid_targets_mean": 7141.8,
|
|
"valid_targets_min": 4723
|
|
},
|
|
{
|
|
"epoch": 1.1708706265256306,
|
|
"grad_norm": 0.6620036041434723,
|
|
"learning_rate": 3.945701167377537e-05,
|
|
"loss": 0.314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1856687366962433,
|
|
"step": 720,
|
|
"valid_targets_mean": 5410.5,
|
|
"valid_targets_min": 1093
|
|
},
|
|
{
|
|
"epoch": 1.1790073230268512,
|
|
"grad_norm": 0.6212164485855235,
|
|
"learning_rate": 3.9438083798861145e-05,
|
|
"loss": 0.3013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14771541953086853,
|
|
"step": 725,
|
|
"valid_targets_mean": 3694.8,
|
|
"valid_targets_min": 1040
|
|
},
|
|
{
|
|
"epoch": 1.1871440195280716,
|
|
"grad_norm": 0.5515816569814562,
|
|
"learning_rate": 3.9418836348521045e-05,
|
|
"loss": 0.3499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17307670414447784,
|
|
"step": 730,
|
|
"valid_targets_mean": 5327.5,
|
|
"valid_targets_min": 3287
|
|
},
|
|
{
|
|
"epoch": 1.1952807160292922,
|
|
"grad_norm": 0.5104440783845825,
|
|
"learning_rate": 3.939926963919635e-05,
|
|
"loss": 0.3171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1218840628862381,
|
|
"step": 735,
|
|
"valid_targets_mean": 6123.0,
|
|
"valid_targets_min": 1751
|
|
},
|
|
{
|
|
"epoch": 1.2034174125305126,
|
|
"grad_norm": 0.4651258970343293,
|
|
"learning_rate": 3.9379383992577166e-05,
|
|
"loss": 0.3294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14117637276649475,
|
|
"step": 740,
|
|
"valid_targets_mean": 5320.9,
|
|
"valid_targets_min": 1282
|
|
},
|
|
{
|
|
"epoch": 1.211554109031733,
|
|
"grad_norm": 0.5744345931767872,
|
|
"learning_rate": 3.9359179735597174e-05,
|
|
"loss": 0.3076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1502009928226471,
|
|
"step": 745,
|
|
"valid_targets_mean": 5779.2,
|
|
"valid_targets_min": 1217
|
|
},
|
|
{
|
|
"epoch": 1.2196908055329536,
|
|
"grad_norm": 0.5866014778192581,
|
|
"learning_rate": 3.9338657200428215e-05,
|
|
"loss": 0.3116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15397050976753235,
|
|
"step": 750,
|
|
"valid_targets_mean": 5608.9,
|
|
"valid_targets_min": 2017
|
|
},
|
|
{
|
|
"epoch": 1.2278275020341742,
|
|
"grad_norm": 0.49106387973213833,
|
|
"learning_rate": 3.931781672447482e-05,
|
|
"loss": 0.3015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1318609118461609,
|
|
"step": 755,
|
|
"valid_targets_mean": 5518.1,
|
|
"valid_targets_min": 1324
|
|
},
|
|
{
|
|
"epoch": 1.2359641985353946,
|
|
"grad_norm": 0.535746593811322,
|
|
"learning_rate": 3.9296658650368707e-05,
|
|
"loss": 0.3423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20755848288536072,
|
|
"step": 760,
|
|
"valid_targets_mean": 6305.9,
|
|
"valid_targets_min": 2592
|
|
},
|
|
{
|
|
"epoch": 1.244100895036615,
|
|
"grad_norm": 0.7756294072634289,
|
|
"learning_rate": 3.927518332596313e-05,
|
|
"loss": 0.3199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17277516424655914,
|
|
"step": 765,
|
|
"valid_targets_mean": 5248.5,
|
|
"valid_targets_min": 1305
|
|
},
|
|
{
|
|
"epoch": 1.2522375915378356,
|
|
"grad_norm": 0.4743651921197624,
|
|
"learning_rate": 3.925339110432716e-05,
|
|
"loss": 0.3008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13423749804496765,
|
|
"step": 770,
|
|
"valid_targets_mean": 6439.6,
|
|
"valid_targets_min": 741
|
|
},
|
|
{
|
|
"epoch": 1.2603742880390563,
|
|
"grad_norm": 0.5214220787724717,
|
|
"learning_rate": 3.923128234373984e-05,
|
|
"loss": 0.3232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18879708647727966,
|
|
"step": 775,
|
|
"valid_targets_mean": 7090.8,
|
|
"valid_targets_min": 2174
|
|
},
|
|
{
|
|
"epoch": 1.2685109845402767,
|
|
"grad_norm": 0.5794444612757781,
|
|
"learning_rate": 3.9208857407684356e-05,
|
|
"loss": 0.3161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12169642746448517,
|
|
"step": 780,
|
|
"valid_targets_mean": 3770.6,
|
|
"valid_targets_min": 850
|
|
},
|
|
{
|
|
"epoch": 1.276647681041497,
|
|
"grad_norm": 0.9435889725374784,
|
|
"learning_rate": 3.918611666484205e-05,
|
|
"loss": 0.3243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18906664848327637,
|
|
"step": 785,
|
|
"valid_targets_mean": 8074.6,
|
|
"valid_targets_min": 4774
|
|
},
|
|
{
|
|
"epoch": 1.2847843775427177,
|
|
"grad_norm": 0.47698101778309643,
|
|
"learning_rate": 3.9163060489086305e-05,
|
|
"loss": 0.3169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1338808387517929,
|
|
"step": 790,
|
|
"valid_targets_mean": 5215.4,
|
|
"valid_targets_min": 1686
|
|
},
|
|
{
|
|
"epoch": 1.292921074043938,
|
|
"grad_norm": 0.5413947969675581,
|
|
"learning_rate": 3.913968925947647e-05,
|
|
"loss": 0.319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09044672548770905,
|
|
"step": 795,
|
|
"valid_targets_mean": 5740.1,
|
|
"valid_targets_min": 1164
|
|
},
|
|
{
|
|
"epoch": 1.3010577705451587,
|
|
"grad_norm": 0.4708777951982262,
|
|
"learning_rate": 3.91160033602516e-05,
|
|
"loss": 0.3296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14017099142074585,
|
|
"step": 800,
|
|
"valid_targets_mean": 5497.0,
|
|
"valid_targets_min": 2063
|
|
},
|
|
{
|
|
"epoch": 1.309194467046379,
|
|
"grad_norm": 0.49062883161248416,
|
|
"learning_rate": 3.909200318082409e-05,
|
|
"loss": 0.2945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13735032081604004,
|
|
"step": 805,
|
|
"valid_targets_mean": 7376.5,
|
|
"valid_targets_min": 2311
|
|
},
|
|
{
|
|
"epoch": 1.3173311635475997,
|
|
"grad_norm": 0.5599551574609924,
|
|
"learning_rate": 3.906768911577337e-05,
|
|
"loss": 0.3023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14982812106609344,
|
|
"step": 810,
|
|
"valid_targets_mean": 5673.0,
|
|
"valid_targets_min": 1423
|
|
},
|
|
{
|
|
"epoch": 1.3254678600488201,
|
|
"grad_norm": 0.5614503755525145,
|
|
"learning_rate": 3.9043061564839325e-05,
|
|
"loss": 0.3098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14781512320041656,
|
|
"step": 815,
|
|
"valid_targets_mean": 4763.9,
|
|
"valid_targets_min": 1955
|
|
},
|
|
{
|
|
"epoch": 1.3336045565500407,
|
|
"grad_norm": 0.4786045979460182,
|
|
"learning_rate": 3.901812093291579e-05,
|
|
"loss": 0.3153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15768522024154663,
|
|
"step": 820,
|
|
"valid_targets_mean": 6786.4,
|
|
"valid_targets_min": 2828
|
|
},
|
|
{
|
|
"epoch": 1.3417412530512611,
|
|
"grad_norm": 0.6560977260032284,
|
|
"learning_rate": 3.8992867630043855e-05,
|
|
"loss": 0.3185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19212278723716736,
|
|
"step": 825,
|
|
"valid_targets_mean": 6104.2,
|
|
"valid_targets_min": 2648
|
|
},
|
|
{
|
|
"epoch": 1.3498779495524817,
|
|
"grad_norm": 0.6879274724026825,
|
|
"learning_rate": 3.896730207140512e-05,
|
|
"loss": 0.3315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1794552505016327,
|
|
"step": 830,
|
|
"valid_targets_mean": 4482.6,
|
|
"valid_targets_min": 2414
|
|
},
|
|
{
|
|
"epoch": 1.3580146460537021,
|
|
"grad_norm": 0.5203653762780058,
|
|
"learning_rate": 3.894142467731492e-05,
|
|
"loss": 0.3268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13266748189926147,
|
|
"step": 835,
|
|
"valid_targets_mean": 5549.0,
|
|
"valid_targets_min": 1787
|
|
},
|
|
{
|
|
"epoch": 1.3661513425549228,
|
|
"grad_norm": 0.5135176764267754,
|
|
"learning_rate": 3.891523587321534e-05,
|
|
"loss": 0.291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13733965158462524,
|
|
"step": 840,
|
|
"valid_targets_mean": 4990.9,
|
|
"valid_targets_min": 975
|
|
},
|
|
{
|
|
"epoch": 1.3742880390561432,
|
|
"grad_norm": 0.550811284010829,
|
|
"learning_rate": 3.888873608966828e-05,
|
|
"loss": 0.3087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17198342084884644,
|
|
"step": 845,
|
|
"valid_targets_mean": 6360.2,
|
|
"valid_targets_min": 1126
|
|
},
|
|
{
|
|
"epoch": 1.3824247355573638,
|
|
"grad_norm": 0.46756550131963104,
|
|
"learning_rate": 3.886192576234836e-05,
|
|
"loss": 0.3135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16204015910625458,
|
|
"step": 850,
|
|
"valid_targets_mean": 6772.0,
|
|
"valid_targets_min": 1683
|
|
},
|
|
{
|
|
"epoch": 1.3905614320585842,
|
|
"grad_norm": 0.7407603671535479,
|
|
"learning_rate": 3.883480533203574e-05,
|
|
"loss": 0.3145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13542041182518005,
|
|
"step": 855,
|
|
"valid_targets_mean": 5719.1,
|
|
"valid_targets_min": 1115
|
|
},
|
|
{
|
|
"epoch": 1.3986981285598048,
|
|
"grad_norm": 0.49156469399841785,
|
|
"learning_rate": 3.880737524460888e-05,
|
|
"loss": 0.3135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15303273499011993,
|
|
"step": 860,
|
|
"valid_targets_mean": 5483.2,
|
|
"valid_targets_min": 1521
|
|
},
|
|
{
|
|
"epoch": 1.4068348250610252,
|
|
"grad_norm": 0.6176037825800013,
|
|
"learning_rate": 3.877963595103725e-05,
|
|
"loss": 0.3101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16262325644493103,
|
|
"step": 865,
|
|
"valid_targets_mean": 5055.8,
|
|
"valid_targets_min": 841
|
|
},
|
|
{
|
|
"epoch": 1.4149715215622458,
|
|
"grad_norm": 0.48552964622802863,
|
|
"learning_rate": 3.875158790737383e-05,
|
|
"loss": 0.3196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07722240686416626,
|
|
"step": 870,
|
|
"valid_targets_mean": 5601.2,
|
|
"valid_targets_min": 614
|
|
},
|
|
{
|
|
"epoch": 1.4231082180634662,
|
|
"grad_norm": 0.5920608654664343,
|
|
"learning_rate": 3.87232315747477e-05,
|
|
"loss": 0.3416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2405494749546051,
|
|
"step": 875,
|
|
"valid_targets_mean": 5470.0,
|
|
"valid_targets_min": 3268
|
|
},
|
|
{
|
|
"epoch": 1.4312449145646866,
|
|
"grad_norm": 0.5377494420110379,
|
|
"learning_rate": 3.8694567419356414e-05,
|
|
"loss": 0.3561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14981283247470856,
|
|
"step": 880,
|
|
"valid_targets_mean": 6194.1,
|
|
"valid_targets_min": 4085
|
|
},
|
|
{
|
|
"epoch": 1.4393816110659072,
|
|
"grad_norm": 0.5103348467193803,
|
|
"learning_rate": 3.8665595912458346e-05,
|
|
"loss": 0.3001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11243285983800888,
|
|
"step": 885,
|
|
"valid_targets_mean": 4186.1,
|
|
"valid_targets_min": 1883
|
|
},
|
|
{
|
|
"epoch": 1.4475183075671278,
|
|
"grad_norm": 0.49573235370231433,
|
|
"learning_rate": 3.863631753036492e-05,
|
|
"loss": 0.3084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15874309837818146,
|
|
"step": 890,
|
|
"valid_targets_mean": 5946.5,
|
|
"valid_targets_min": 1225
|
|
},
|
|
{
|
|
"epoch": 1.4556550040683482,
|
|
"grad_norm": 0.6018434178435743,
|
|
"learning_rate": 3.860673275443283e-05,
|
|
"loss": 0.2917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1639241725206375,
|
|
"step": 895,
|
|
"valid_targets_mean": 4885.0,
|
|
"valid_targets_min": 576
|
|
},
|
|
{
|
|
"epoch": 1.4637917005695686,
|
|
"grad_norm": 0.6566529428934831,
|
|
"learning_rate": 3.857684207105606e-05,
|
|
"loss": 0.3089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16956907510757446,
|
|
"step": 900,
|
|
"valid_targets_mean": 5790.5,
|
|
"valid_targets_min": 2026
|
|
},
|
|
{
|
|
"epoch": 1.4719283970707893,
|
|
"grad_norm": 0.6479521917332159,
|
|
"learning_rate": 3.854664597165795e-05,
|
|
"loss": 0.3106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18413260579109192,
|
|
"step": 905,
|
|
"valid_targets_mean": 4738.2,
|
|
"valid_targets_min": 956
|
|
},
|
|
{
|
|
"epoch": 1.4800650935720099,
|
|
"grad_norm": 0.6614607029109295,
|
|
"learning_rate": 3.851614495268308e-05,
|
|
"loss": 0.2903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12402161210775375,
|
|
"step": 910,
|
|
"valid_targets_mean": 6095.1,
|
|
"valid_targets_min": 1638
|
|
},
|
|
{
|
|
"epoch": 1.4882017900732303,
|
|
"grad_norm": 0.5271086668666808,
|
|
"learning_rate": 3.848533951558912e-05,
|
|
"loss": 0.3058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1627025306224823,
|
|
"step": 915,
|
|
"valid_targets_mean": 5300.0,
|
|
"valid_targets_min": 3448
|
|
},
|
|
{
|
|
"epoch": 1.4963384865744507,
|
|
"grad_norm": 0.4963158372629004,
|
|
"learning_rate": 3.845423016683856e-05,
|
|
"loss": 0.2982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14617714285850525,
|
|
"step": 920,
|
|
"valid_targets_mean": 5295.8,
|
|
"valid_targets_min": 2628
|
|
},
|
|
{
|
|
"epoch": 1.5044751830756713,
|
|
"grad_norm": 0.5240147926067843,
|
|
"learning_rate": 3.842281741789044e-05,
|
|
"loss": 0.3,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17961186170578003,
|
|
"step": 925,
|
|
"valid_targets_mean": 6973.5,
|
|
"valid_targets_min": 3698
|
|
},
|
|
{
|
|
"epoch": 1.512611879576892,
|
|
"grad_norm": 0.5416943235639524,
|
|
"learning_rate": 3.839110178519189e-05,
|
|
"loss": 0.2817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16294705867767334,
|
|
"step": 930,
|
|
"valid_targets_mean": 5591.8,
|
|
"valid_targets_min": 1248
|
|
},
|
|
{
|
|
"epoch": 1.5207485760781123,
|
|
"grad_norm": 0.5293043927321138,
|
|
"learning_rate": 3.835908379016966e-05,
|
|
"loss": 0.3194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18354995548725128,
|
|
"step": 935,
|
|
"valid_targets_mean": 6090.0,
|
|
"valid_targets_min": 2196
|
|
},
|
|
{
|
|
"epoch": 1.5288852725793327,
|
|
"grad_norm": 0.6655706480297668,
|
|
"learning_rate": 3.832676395922153e-05,
|
|
"loss": 0.3233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1539248526096344,
|
|
"step": 940,
|
|
"valid_targets_mean": 5697.2,
|
|
"valid_targets_min": 1431
|
|
},
|
|
{
|
|
"epoch": 1.5370219690805533,
|
|
"grad_norm": 0.4426293049857693,
|
|
"learning_rate": 3.82941428237077e-05,
|
|
"loss": 0.3199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14680266380310059,
|
|
"step": 945,
|
|
"valid_targets_mean": 7124.8,
|
|
"valid_targets_min": 2558
|
|
},
|
|
{
|
|
"epoch": 1.545158665581774,
|
|
"grad_norm": 0.5298814060383792,
|
|
"learning_rate": 3.826122091994198e-05,
|
|
"loss": 0.3119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19289115071296692,
|
|
"step": 950,
|
|
"valid_targets_mean": 6667.8,
|
|
"valid_targets_min": 4416
|
|
},
|
|
{
|
|
"epoch": 1.5532953620829943,
|
|
"grad_norm": 0.5978964272190899,
|
|
"learning_rate": 3.822799878918307e-05,
|
|
"loss": 0.2975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1660599559545517,
|
|
"step": 955,
|
|
"valid_targets_mean": 5468.9,
|
|
"valid_targets_min": 2193
|
|
},
|
|
{
|
|
"epoch": 1.5614320585842147,
|
|
"grad_norm": 0.561888873495537,
|
|
"learning_rate": 3.8194476977625556e-05,
|
|
"loss": 0.289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16075164079666138,
|
|
"step": 960,
|
|
"valid_targets_mean": 5366.9,
|
|
"valid_targets_min": 2111
|
|
},
|
|
{
|
|
"epoch": 1.5695687550854354,
|
|
"grad_norm": 0.4590821633958237,
|
|
"learning_rate": 3.8160656036391024e-05,
|
|
"loss": 0.2967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1680944561958313,
|
|
"step": 965,
|
|
"valid_targets_mean": 5881.0,
|
|
"valid_targets_min": 1981
|
|
},
|
|
{
|
|
"epoch": 1.577705451586656,
|
|
"grad_norm": 0.5016615802728864,
|
|
"learning_rate": 3.812653652151893e-05,
|
|
"loss": 0.3151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13341429829597473,
|
|
"step": 970,
|
|
"valid_targets_mean": 5605.9,
|
|
"valid_targets_min": 611
|
|
},
|
|
{
|
|
"epoch": 1.5858421480878762,
|
|
"grad_norm": 0.5543813724974704,
|
|
"learning_rate": 3.809211899395749e-05,
|
|
"loss": 0.3306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14888155460357666,
|
|
"step": 975,
|
|
"valid_targets_mean": 4731.4,
|
|
"valid_targets_min": 2004
|
|
},
|
|
{
|
|
"epoch": 1.5939788445890968,
|
|
"grad_norm": 0.6038340910652956,
|
|
"learning_rate": 3.8057404019554464e-05,
|
|
"loss": 0.3164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20221802592277527,
|
|
"step": 980,
|
|
"valid_targets_mean": 5403.4,
|
|
"valid_targets_min": 2495
|
|
},
|
|
{
|
|
"epoch": 1.6021155410903174,
|
|
"grad_norm": 0.5913190155057948,
|
|
"learning_rate": 3.802239216904782e-05,
|
|
"loss": 0.2934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12410770356655121,
|
|
"step": 985,
|
|
"valid_targets_mean": 3962.5,
|
|
"valid_targets_min": 617
|
|
},
|
|
{
|
|
"epoch": 1.6102522375915378,
|
|
"grad_norm": 0.6202617073499859,
|
|
"learning_rate": 3.79870840180564e-05,
|
|
"loss": 0.3299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21062825620174408,
|
|
"step": 990,
|
|
"valid_targets_mean": 5342.4,
|
|
"valid_targets_min": 2829
|
|
},
|
|
{
|
|
"epoch": 1.6183889340927582,
|
|
"grad_norm": 0.42689752582314566,
|
|
"learning_rate": 3.795148014707042e-05,
|
|
"loss": 0.3248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13294441998004913,
|
|
"step": 995,
|
|
"valid_targets_mean": 6876.8,
|
|
"valid_targets_min": 3548
|
|
},
|
|
{
|
|
"epoch": 1.6265256305939788,
|
|
"grad_norm": 0.5097378796851677,
|
|
"learning_rate": 3.791558114144192e-05,
|
|
"loss": 0.3253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1804441213607788,
|
|
"step": 1000,
|
|
"valid_targets_mean": 5285.4,
|
|
"valid_targets_min": 492
|
|
},
|
|
{
|
|
"epoch": 1.6346623270951994,
|
|
"grad_norm": 0.6005808241710359,
|
|
"learning_rate": 3.7879387591375174e-05,
|
|
"loss": 0.3049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1667994111776352,
|
|
"step": 1005,
|
|
"valid_targets_mean": 4509.6,
|
|
"valid_targets_min": 1333
|
|
},
|
|
{
|
|
"epoch": 1.6427990235964198,
|
|
"grad_norm": 0.5231818030054007,
|
|
"learning_rate": 3.7842900091916956e-05,
|
|
"loss": 0.3145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13126087188720703,
|
|
"step": 1010,
|
|
"valid_targets_mean": 4980.8,
|
|
"valid_targets_min": 1418
|
|
},
|
|
{
|
|
"epoch": 1.6509357200976402,
|
|
"grad_norm": 0.5627337448855949,
|
|
"learning_rate": 3.7806119242946785e-05,
|
|
"loss": 0.3003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15183790028095245,
|
|
"step": 1015,
|
|
"valid_targets_mean": 4827.0,
|
|
"valid_targets_min": 1418
|
|
},
|
|
{
|
|
"epoch": 1.6590724165988608,
|
|
"grad_norm": 0.5414919926293886,
|
|
"learning_rate": 3.7769045649167034e-05,
|
|
"loss": 0.3154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13620023429393768,
|
|
"step": 1020,
|
|
"valid_targets_mean": 5039.8,
|
|
"valid_targets_min": 1019
|
|
},
|
|
{
|
|
"epoch": 1.6672091131000815,
|
|
"grad_norm": 0.7328913562496312,
|
|
"learning_rate": 3.7731679920093e-05,
|
|
"loss": 0.3137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1815638542175293,
|
|
"step": 1025,
|
|
"valid_targets_mean": 5956.9,
|
|
"valid_targets_min": 3557
|
|
},
|
|
{
|
|
"epoch": 1.6753458096013019,
|
|
"grad_norm": 0.4817503231027874,
|
|
"learning_rate": 3.7694022670042894e-05,
|
|
"loss": 0.3071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1990923136472702,
|
|
"step": 1030,
|
|
"valid_targets_mean": 8180.1,
|
|
"valid_targets_min": 2581
|
|
},
|
|
{
|
|
"epoch": 1.6834825061025223,
|
|
"grad_norm": 0.5371824559449501,
|
|
"learning_rate": 3.765607451812773e-05,
|
|
"loss": 0.2847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14338502287864685,
|
|
"step": 1035,
|
|
"valid_targets_mean": 6586.2,
|
|
"valid_targets_min": 2170
|
|
},
|
|
{
|
|
"epoch": 1.6916192026037429,
|
|
"grad_norm": 0.5238554557666614,
|
|
"learning_rate": 3.7617836088241144e-05,
|
|
"loss": 0.3082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17069703340530396,
|
|
"step": 1040,
|
|
"valid_targets_mean": 7230.1,
|
|
"valid_targets_min": 4015
|
|
},
|
|
{
|
|
"epoch": 1.6997558991049635,
|
|
"grad_norm": 0.5512994537952781,
|
|
"learning_rate": 3.757930800904914e-05,
|
|
"loss": 0.3302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18536214530467987,
|
|
"step": 1045,
|
|
"valid_targets_mean": 5432.5,
|
|
"valid_targets_min": 2706
|
|
},
|
|
{
|
|
"epoch": 1.707892595606184,
|
|
"grad_norm": 0.48107844360810964,
|
|
"learning_rate": 3.754049091397976e-05,
|
|
"loss": 0.2831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14424890279769897,
|
|
"step": 1050,
|
|
"valid_targets_mean": 5978.6,
|
|
"valid_targets_min": 2666
|
|
},
|
|
{
|
|
"epoch": 1.7160292921074043,
|
|
"grad_norm": 0.6075024591611888,
|
|
"learning_rate": 3.7501385441212664e-05,
|
|
"loss": 0.3182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17810899019241333,
|
|
"step": 1055,
|
|
"valid_targets_mean": 4907.1,
|
|
"valid_targets_min": 1817
|
|
},
|
|
{
|
|
"epoch": 1.724165988608625,
|
|
"grad_norm": 0.5996016937895567,
|
|
"learning_rate": 3.746199223366863e-05,
|
|
"loss": 0.3143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1289026141166687,
|
|
"step": 1060,
|
|
"valid_targets_mean": 5785.2,
|
|
"valid_targets_min": 2095
|
|
},
|
|
{
|
|
"epoch": 1.7323026851098455,
|
|
"grad_norm": 0.5765570637174628,
|
|
"learning_rate": 3.7422311938999013e-05,
|
|
"loss": 0.3179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16623811423778534,
|
|
"step": 1065,
|
|
"valid_targets_mean": 4367.6,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 1.740439381611066,
|
|
"grad_norm": 0.5940920131339937,
|
|
"learning_rate": 3.738234520957506e-05,
|
|
"loss": 0.3068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19219470024108887,
|
|
"step": 1070,
|
|
"valid_targets_mean": 6104.2,
|
|
"valid_targets_min": 1766
|
|
},
|
|
{
|
|
"epoch": 1.7485760781122863,
|
|
"grad_norm": 0.5700715611730844,
|
|
"learning_rate": 3.73420927024772e-05,
|
|
"loss": 0.3197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19549089670181274,
|
|
"step": 1075,
|
|
"valid_targets_mean": 5461.0,
|
|
"valid_targets_min": 1614
|
|
},
|
|
{
|
|
"epoch": 1.756712774613507,
|
|
"grad_norm": 0.7143078330637325,
|
|
"learning_rate": 3.730155507948426e-05,
|
|
"loss": 0.3209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13924923539161682,
|
|
"step": 1080,
|
|
"valid_targets_mean": 5168.4,
|
|
"valid_targets_min": 2990
|
|
},
|
|
{
|
|
"epoch": 1.7648494711147276,
|
|
"grad_norm": 0.5056362879775232,
|
|
"learning_rate": 3.726073300706256e-05,
|
|
"loss": 0.3125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2037101835012436,
|
|
"step": 1085,
|
|
"valid_targets_mean": 7316.2,
|
|
"valid_targets_min": 4681
|
|
},
|
|
{
|
|
"epoch": 1.772986167615948,
|
|
"grad_norm": 0.6236146764427375,
|
|
"learning_rate": 3.721962715635495e-05,
|
|
"loss": 0.2814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10541586577892303,
|
|
"step": 1090,
|
|
"valid_targets_mean": 6140.2,
|
|
"valid_targets_min": 2053
|
|
},
|
|
{
|
|
"epoch": 1.7811228641171684,
|
|
"grad_norm": 0.4808526495829943,
|
|
"learning_rate": 3.7178238203169804e-05,
|
|
"loss": 0.3098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11800515651702881,
|
|
"step": 1095,
|
|
"valid_targets_mean": 5675.6,
|
|
"valid_targets_min": 3375
|
|
},
|
|
{
|
|
"epoch": 1.789259560618389,
|
|
"grad_norm": 0.5465172567640042,
|
|
"learning_rate": 3.7136566827969895e-05,
|
|
"loss": 0.3057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11254914849996567,
|
|
"step": 1100,
|
|
"valid_targets_mean": 4710.8,
|
|
"valid_targets_min": 3037
|
|
},
|
|
{
|
|
"epoch": 1.7973962571196096,
|
|
"grad_norm": 0.6709120787275277,
|
|
"learning_rate": 3.70946137158612e-05,
|
|
"loss": 0.3051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1213892251253128,
|
|
"step": 1105,
|
|
"valid_targets_mean": 4375.8,
|
|
"valid_targets_min": 767
|
|
},
|
|
{
|
|
"epoch": 1.80553295362083,
|
|
"grad_norm": 0.5484870209932681,
|
|
"learning_rate": 3.705237955658166e-05,
|
|
"loss": 0.3045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12435522675514221,
|
|
"step": 1110,
|
|
"valid_targets_mean": 4072.5,
|
|
"valid_targets_min": 628
|
|
},
|
|
{
|
|
"epoch": 1.8136696501220504,
|
|
"grad_norm": 0.5750687925217363,
|
|
"learning_rate": 3.70098650444898e-05,
|
|
"loss": 0.3174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17003116011619568,
|
|
"step": 1115,
|
|
"valid_targets_mean": 5343.2,
|
|
"valid_targets_min": 2628
|
|
},
|
|
{
|
|
"epoch": 1.821806346623271,
|
|
"grad_norm": 0.546589915088346,
|
|
"learning_rate": 3.6967070878553346e-05,
|
|
"loss": 0.2939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13826923072338104,
|
|
"step": 1120,
|
|
"valid_targets_mean": 5285.4,
|
|
"valid_targets_min": 1237
|
|
},
|
|
{
|
|
"epoch": 1.8299430431244914,
|
|
"grad_norm": 0.4773415462163218,
|
|
"learning_rate": 3.692399776233775e-05,
|
|
"loss": 0.3012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16702383756637573,
|
|
"step": 1125,
|
|
"valid_targets_mean": 6414.5,
|
|
"valid_targets_min": 913
|
|
},
|
|
{
|
|
"epoch": 1.8380797396257118,
|
|
"grad_norm": 0.5328441386714764,
|
|
"learning_rate": 3.688064640399456e-05,
|
|
"loss": 0.3206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19889968633651733,
|
|
"step": 1130,
|
|
"valid_targets_mean": 5738.4,
|
|
"valid_targets_min": 2918
|
|
},
|
|
{
|
|
"epoch": 1.8462164361269324,
|
|
"grad_norm": 0.6976118740867715,
|
|
"learning_rate": 3.683701751624983e-05,
|
|
"loss": 0.3086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18962863087654114,
|
|
"step": 1135,
|
|
"valid_targets_mean": 4923.5,
|
|
"valid_targets_min": 566
|
|
},
|
|
{
|
|
"epoch": 1.854353132628153,
|
|
"grad_norm": 0.5304211947920925,
|
|
"learning_rate": 3.67931118163924e-05,
|
|
"loss": 0.3114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14399157464504242,
|
|
"step": 1140,
|
|
"valid_targets_mean": 4750.4,
|
|
"valid_targets_min": 3074
|
|
},
|
|
{
|
|
"epoch": 1.8624898291293734,
|
|
"grad_norm": 0.5257505288021834,
|
|
"learning_rate": 3.674893002626208e-05,
|
|
"loss": 0.3281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1668822169303894,
|
|
"step": 1145,
|
|
"valid_targets_mean": 4587.6,
|
|
"valid_targets_min": 2853
|
|
},
|
|
{
|
|
"epoch": 1.8706265256305938,
|
|
"grad_norm": 0.5378879554200597,
|
|
"learning_rate": 3.6704472872237786e-05,
|
|
"loss": 0.2895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13983070850372314,
|
|
"step": 1150,
|
|
"valid_targets_mean": 4616.9,
|
|
"valid_targets_min": 1062
|
|
},
|
|
{
|
|
"epoch": 1.8787632221318145,
|
|
"grad_norm": 0.4433938061464499,
|
|
"learning_rate": 3.665974108522562e-05,
|
|
"loss": 0.2943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13427463173866272,
|
|
"step": 1155,
|
|
"valid_targets_mean": 6249.2,
|
|
"valid_targets_min": 4339
|
|
},
|
|
{
|
|
"epoch": 1.886899918633035,
|
|
"grad_norm": 0.5206360977809323,
|
|
"learning_rate": 3.6614735400646824e-05,
|
|
"loss": 0.3296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16344305872917175,
|
|
"step": 1160,
|
|
"valid_targets_mean": 5424.0,
|
|
"valid_targets_min": 1984
|
|
},
|
|
{
|
|
"epoch": 1.8950366151342555,
|
|
"grad_norm": 0.7386914345471229,
|
|
"learning_rate": 3.6569456558425724e-05,
|
|
"loss": 0.3082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1590915322303772,
|
|
"step": 1165,
|
|
"valid_targets_mean": 4894.9,
|
|
"valid_targets_min": 1686
|
|
},
|
|
{
|
|
"epoch": 1.9031733116354759,
|
|
"grad_norm": 0.5227919706663646,
|
|
"learning_rate": 3.6523905302977524e-05,
|
|
"loss": 0.2904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14777085185050964,
|
|
"step": 1170,
|
|
"valid_targets_mean": 5690.5,
|
|
"valid_targets_min": 2053
|
|
},
|
|
{
|
|
"epoch": 1.9113100081366965,
|
|
"grad_norm": 0.4725060339813615,
|
|
"learning_rate": 3.64780823831961e-05,
|
|
"loss": 0.3069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11375989019870758,
|
|
"step": 1175,
|
|
"valid_targets_mean": 5967.5,
|
|
"valid_targets_min": 2992
|
|
},
|
|
{
|
|
"epoch": 1.9194467046379171,
|
|
"grad_norm": 0.5531722742216661,
|
|
"learning_rate": 3.643198855244167e-05,
|
|
"loss": 0.3244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16253279149532318,
|
|
"step": 1180,
|
|
"valid_targets_mean": 4639.1,
|
|
"valid_targets_min": 536
|
|
},
|
|
{
|
|
"epoch": 1.9275834011391375,
|
|
"grad_norm": 0.5643086611406565,
|
|
"learning_rate": 3.6385624568528424e-05,
|
|
"loss": 0.341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2203579545021057,
|
|
"step": 1185,
|
|
"valid_targets_mean": 5916.4,
|
|
"valid_targets_min": 2444
|
|
},
|
|
{
|
|
"epoch": 1.935720097640358,
|
|
"grad_norm": 0.5188039070960107,
|
|
"learning_rate": 3.6338991193712045e-05,
|
|
"loss": 0.3237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15397492051124573,
|
|
"step": 1190,
|
|
"valid_targets_mean": 5445.5,
|
|
"valid_targets_min": 1156
|
|
},
|
|
{
|
|
"epoch": 1.9438567941415785,
|
|
"grad_norm": 0.5944670726923176,
|
|
"learning_rate": 3.629208919467718e-05,
|
|
"loss": 0.276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10873277485370636,
|
|
"step": 1195,
|
|
"valid_targets_mean": 5434.2,
|
|
"valid_targets_min": 1056
|
|
},
|
|
{
|
|
"epoch": 1.9519934906427991,
|
|
"grad_norm": 0.5820905612331527,
|
|
"learning_rate": 3.624491934252487e-05,
|
|
"loss": 0.2816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17353525757789612,
|
|
"step": 1200,
|
|
"valid_targets_mean": 5973.5,
|
|
"valid_targets_min": 2297
|
|
},
|
|
{
|
|
"epoch": 1.9601301871440195,
|
|
"grad_norm": 0.5328994536277564,
|
|
"learning_rate": 3.619748241275981e-05,
|
|
"loss": 0.3057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15726155042648315,
|
|
"step": 1205,
|
|
"valid_targets_mean": 5639.8,
|
|
"valid_targets_min": 2308
|
|
},
|
|
{
|
|
"epoch": 1.96826688364524,
|
|
"grad_norm": 0.5129359181321838,
|
|
"learning_rate": 3.614977918527767e-05,
|
|
"loss": 0.3115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18390318751335144,
|
|
"step": 1210,
|
|
"valid_targets_mean": 5109.2,
|
|
"valid_targets_min": 1159
|
|
},
|
|
{
|
|
"epoch": 1.9764035801464606,
|
|
"grad_norm": 0.5994511326540063,
|
|
"learning_rate": 3.610181044435221e-05,
|
|
"loss": 0.2885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13535848259925842,
|
|
"step": 1215,
|
|
"valid_targets_mean": 3102.2,
|
|
"valid_targets_min": 560
|
|
},
|
|
{
|
|
"epoch": 1.9845402766476812,
|
|
"grad_norm": 0.4870695756000568,
|
|
"learning_rate": 3.605357697862242e-05,
|
|
"loss": 0.3185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12981772422790527,
|
|
"step": 1220,
|
|
"valid_targets_mean": 5653.2,
|
|
"valid_targets_min": 1754
|
|
},
|
|
{
|
|
"epoch": 1.9926769731489016,
|
|
"grad_norm": 0.43943406829275067,
|
|
"learning_rate": 3.6005079581079545e-05,
|
|
"loss": 0.2837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13087867200374603,
|
|
"step": 1225,
|
|
"valid_targets_mean": 5805.1,
|
|
"valid_targets_min": 1258
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"grad_norm": 0.9516475150367983,
|
|
"learning_rate": 3.595631904905406e-05,
|
|
"loss": 0.275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2887159287929535,
|
|
"step": 1230,
|
|
"valid_targets_mean": 6080.8,
|
|
"valid_targets_min": 2494
|
|
},
|
|
{
|
|
"epoch": 2.0081366965012206,
|
|
"grad_norm": 0.6856718094729896,
|
|
"learning_rate": 3.590729618420255e-05,
|
|
"loss": 0.292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12657839059829712,
|
|
"step": 1235,
|
|
"valid_targets_mean": 4212.5,
|
|
"valid_targets_min": 1370
|
|
},
|
|
{
|
|
"epoch": 2.016273393002441,
|
|
"grad_norm": 0.558226531525447,
|
|
"learning_rate": 3.585801179249452e-05,
|
|
"loss": 0.3125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1799025982618332,
|
|
"step": 1240,
|
|
"valid_targets_mean": 7292.6,
|
|
"valid_targets_min": 3950
|
|
},
|
|
{
|
|
"epoch": 2.0244100895036614,
|
|
"grad_norm": 0.5075555500011202,
|
|
"learning_rate": 3.5808466684199166e-05,
|
|
"loss": 0.3104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15327878296375275,
|
|
"step": 1245,
|
|
"valid_targets_mean": 6603.9,
|
|
"valid_targets_min": 2080
|
|
},
|
|
{
|
|
"epoch": 2.032546786004882,
|
|
"grad_norm": 0.6541052075091615,
|
|
"learning_rate": 3.575866167387204e-05,
|
|
"loss": 0.2769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10481029748916626,
|
|
"step": 1250,
|
|
"valid_targets_mean": 4328.6,
|
|
"valid_targets_min": 1036
|
|
},
|
|
{
|
|
"epoch": 2.0406834825061027,
|
|
"grad_norm": 0.5007902862675059,
|
|
"learning_rate": 3.570859758034165e-05,
|
|
"loss": 0.3047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11358005553483963,
|
|
"step": 1255,
|
|
"valid_targets_mean": 6206.4,
|
|
"valid_targets_min": 966
|
|
},
|
|
{
|
|
"epoch": 2.048820179007323,
|
|
"grad_norm": 0.4639873782398551,
|
|
"learning_rate": 3.565827522669605e-05,
|
|
"loss": 0.2798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18322622776031494,
|
|
"step": 1260,
|
|
"valid_targets_mean": 7581.5,
|
|
"valid_targets_min": 1826
|
|
},
|
|
{
|
|
"epoch": 2.0569568755085434,
|
|
"grad_norm": 0.47229568303905795,
|
|
"learning_rate": 3.5607695440269214e-05,
|
|
"loss": 0.2589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11020516604185104,
|
|
"step": 1265,
|
|
"valid_targets_mean": 5878.0,
|
|
"valid_targets_min": 743
|
|
},
|
|
{
|
|
"epoch": 2.065093572009764,
|
|
"grad_norm": 0.4606387724405939,
|
|
"learning_rate": 3.555685905262751e-05,
|
|
"loss": 0.2607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12334157526493073,
|
|
"step": 1270,
|
|
"valid_targets_mean": 7156.1,
|
|
"valid_targets_min": 3306
|
|
},
|
|
{
|
|
"epoch": 2.0732302685109847,
|
|
"grad_norm": 0.5568749724181045,
|
|
"learning_rate": 3.5505766899556026e-05,
|
|
"loss": 0.283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18671417236328125,
|
|
"step": 1275,
|
|
"valid_targets_mean": 5929.2,
|
|
"valid_targets_min": 2774
|
|
},
|
|
{
|
|
"epoch": 2.081366965012205,
|
|
"grad_norm": 0.5320198579082824,
|
|
"learning_rate": 3.5454419821044786e-05,
|
|
"loss": 0.2846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1624656319618225,
|
|
"step": 1280,
|
|
"valid_targets_mean": 5359.9,
|
|
"valid_targets_min": 1879
|
|
},
|
|
{
|
|
"epoch": 2.0895036615134255,
|
|
"grad_norm": 0.4946581187629097,
|
|
"learning_rate": 3.540281866127496e-05,
|
|
"loss": 0.2829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.113202303647995,
|
|
"step": 1285,
|
|
"valid_targets_mean": 4963.8,
|
|
"valid_targets_min": 590
|
|
},
|
|
{
|
|
"epoch": 2.097640358014646,
|
|
"grad_norm": 0.5314804091550819,
|
|
"learning_rate": 3.5350964268605006e-05,
|
|
"loss": 0.2986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1298123598098755,
|
|
"step": 1290,
|
|
"valid_targets_mean": 5789.2,
|
|
"valid_targets_min": 524
|
|
},
|
|
{
|
|
"epoch": 2.1057770545158667,
|
|
"grad_norm": 0.4608248324996564,
|
|
"learning_rate": 3.5298857495556684e-05,
|
|
"loss": 0.2679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09899343550205231,
|
|
"step": 1295,
|
|
"valid_targets_mean": 6090.4,
|
|
"valid_targets_min": 625
|
|
},
|
|
{
|
|
"epoch": 2.113913751017087,
|
|
"grad_norm": 0.6160654304320085,
|
|
"learning_rate": 3.524649919880108e-05,
|
|
"loss": 0.3006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1561860293149948,
|
|
"step": 1300,
|
|
"valid_targets_mean": 5319.5,
|
|
"valid_targets_min": 2221
|
|
},
|
|
{
|
|
"epoch": 2.1220504475183075,
|
|
"grad_norm": 0.6034183220777789,
|
|
"learning_rate": 3.519389023914449e-05,
|
|
"loss": 0.2731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1617409586906433,
|
|
"step": 1305,
|
|
"valid_targets_mean": 4925.8,
|
|
"valid_targets_min": 1622
|
|
},
|
|
{
|
|
"epoch": 2.130187144019528,
|
|
"grad_norm": 0.5445584399411685,
|
|
"learning_rate": 3.5141031481514276e-05,
|
|
"loss": 0.2927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15059779584407806,
|
|
"step": 1310,
|
|
"valid_targets_mean": 4658.4,
|
|
"valid_targets_min": 2734
|
|
},
|
|
{
|
|
"epoch": 2.1383238405207488,
|
|
"grad_norm": 0.5409881257424901,
|
|
"learning_rate": 3.508792379494468e-05,
|
|
"loss": 0.2813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1463170051574707,
|
|
"step": 1315,
|
|
"valid_targets_mean": 5791.5,
|
|
"valid_targets_min": 1064
|
|
},
|
|
{
|
|
"epoch": 2.146460537021969,
|
|
"grad_norm": 1.106581439320117,
|
|
"learning_rate": 3.503456805256246e-05,
|
|
"loss": 0.2767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1498536616563797,
|
|
"step": 1320,
|
|
"valid_targets_mean": 5623.2,
|
|
"valid_targets_min": 2620
|
|
},
|
|
{
|
|
"epoch": 2.1545972335231895,
|
|
"grad_norm": 0.4584831942256986,
|
|
"learning_rate": 3.4980965131572616e-05,
|
|
"loss": 0.2783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14147350192070007,
|
|
"step": 1325,
|
|
"valid_targets_mean": 5904.6,
|
|
"valid_targets_min": 3878
|
|
},
|
|
{
|
|
"epoch": 2.16273393002441,
|
|
"grad_norm": 0.4492273639183458,
|
|
"learning_rate": 3.492711591324392e-05,
|
|
"loss": 0.2733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14748775959014893,
|
|
"step": 1330,
|
|
"valid_targets_mean": 5023.4,
|
|
"valid_targets_min": 2861
|
|
},
|
|
{
|
|
"epoch": 2.170870626525631,
|
|
"grad_norm": 0.5811998512861577,
|
|
"learning_rate": 3.487302128289445e-05,
|
|
"loss": 0.3088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11501553654670715,
|
|
"step": 1335,
|
|
"valid_targets_mean": 2852.8,
|
|
"valid_targets_min": 977
|
|
},
|
|
{
|
|
"epoch": 2.179007323026851,
|
|
"grad_norm": 0.4764245873738506,
|
|
"learning_rate": 3.481868212987702e-05,
|
|
"loss": 0.2818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1383422166109085,
|
|
"step": 1340,
|
|
"valid_targets_mean": 6408.1,
|
|
"valid_targets_min": 2128
|
|
},
|
|
{
|
|
"epoch": 2.1871440195280716,
|
|
"grad_norm": 0.48558055183022075,
|
|
"learning_rate": 3.476409934756456e-05,
|
|
"loss": 0.2733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1000407487154007,
|
|
"step": 1345,
|
|
"valid_targets_mean": 5105.9,
|
|
"valid_targets_min": 542
|
|
},
|
|
{
|
|
"epoch": 2.195280716029292,
|
|
"grad_norm": 0.5180718638517471,
|
|
"learning_rate": 3.470927383333544e-05,
|
|
"loss": 0.2879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10666623711585999,
|
|
"step": 1350,
|
|
"valid_targets_mean": 5258.9,
|
|
"valid_targets_min": 3522
|
|
},
|
|
{
|
|
"epoch": 2.203417412530513,
|
|
"grad_norm": 0.5400230528001939,
|
|
"learning_rate": 3.46542064885587e-05,
|
|
"loss": 0.3084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12988999485969543,
|
|
"step": 1355,
|
|
"valid_targets_mean": 4880.5,
|
|
"valid_targets_min": 573
|
|
},
|
|
{
|
|
"epoch": 2.211554109031733,
|
|
"grad_norm": 0.45852170946065385,
|
|
"learning_rate": 3.459889821857926e-05,
|
|
"loss": 0.2836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11199237406253815,
|
|
"step": 1360,
|
|
"valid_targets_mean": 4982.0,
|
|
"valid_targets_min": 310
|
|
},
|
|
{
|
|
"epoch": 2.2196908055329536,
|
|
"grad_norm": 0.5665127278933965,
|
|
"learning_rate": 3.4543349932702984e-05,
|
|
"loss": 0.3028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18964138627052307,
|
|
"step": 1365,
|
|
"valid_targets_mean": 6463.2,
|
|
"valid_targets_min": 3622
|
|
},
|
|
{
|
|
"epoch": 2.2278275020341742,
|
|
"grad_norm": 0.5118619545403494,
|
|
"learning_rate": 3.448756254418179e-05,
|
|
"loss": 0.2826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18084275722503662,
|
|
"step": 1370,
|
|
"valid_targets_mean": 5036.5,
|
|
"valid_targets_min": 1393
|
|
},
|
|
{
|
|
"epoch": 2.2359641985353944,
|
|
"grad_norm": 0.4567137661572972,
|
|
"learning_rate": 3.443153697019861e-05,
|
|
"loss": 0.2964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13496790826320648,
|
|
"step": 1375,
|
|
"valid_targets_mean": 6337.4,
|
|
"valid_targets_min": 2816
|
|
},
|
|
{
|
|
"epoch": 2.244100895036615,
|
|
"grad_norm": 0.5129895131737416,
|
|
"learning_rate": 3.437527413185227e-05,
|
|
"loss": 0.294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15807993710041046,
|
|
"step": 1380,
|
|
"valid_targets_mean": 6726.0,
|
|
"valid_targets_min": 1732
|
|
},
|
|
{
|
|
"epoch": 2.2522375915378356,
|
|
"grad_norm": 0.658861425263113,
|
|
"learning_rate": 3.431877495414242e-05,
|
|
"loss": 0.295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.119639553129673,
|
|
"step": 1385,
|
|
"valid_targets_mean": 5690.6,
|
|
"valid_targets_min": 1710
|
|
},
|
|
{
|
|
"epoch": 2.2603742880390563,
|
|
"grad_norm": 0.5053124575310604,
|
|
"learning_rate": 3.42620403659543e-05,
|
|
"loss": 0.2872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14893598854541779,
|
|
"step": 1390,
|
|
"valid_targets_mean": 5824.2,
|
|
"valid_targets_min": 741
|
|
},
|
|
{
|
|
"epoch": 2.268510984540277,
|
|
"grad_norm": 0.5549205480957295,
|
|
"learning_rate": 3.420507130004341e-05,
|
|
"loss": 0.2929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14574193954467773,
|
|
"step": 1395,
|
|
"valid_targets_mean": 5895.8,
|
|
"valid_targets_min": 3196
|
|
},
|
|
{
|
|
"epoch": 2.276647681041497,
|
|
"grad_norm": 0.5169854577549462,
|
|
"learning_rate": 3.414786869302029e-05,
|
|
"loss": 0.2715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1294897347688675,
|
|
"step": 1400,
|
|
"valid_targets_mean": 4723.2,
|
|
"valid_targets_min": 3238
|
|
},
|
|
{
|
|
"epoch": 2.2847843775427177,
|
|
"grad_norm": 0.4968472508374019,
|
|
"learning_rate": 3.4090433485334996e-05,
|
|
"loss": 0.2776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13790994882583618,
|
|
"step": 1405,
|
|
"valid_targets_mean": 6752.5,
|
|
"valid_targets_min": 2943
|
|
},
|
|
{
|
|
"epoch": 2.2929210740439383,
|
|
"grad_norm": 0.5185289577994608,
|
|
"learning_rate": 3.403276662126173e-05,
|
|
"loss": 0.2816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1724536418914795,
|
|
"step": 1410,
|
|
"valid_targets_mean": 6443.1,
|
|
"valid_targets_min": 1159
|
|
},
|
|
{
|
|
"epoch": 2.3010577705451585,
|
|
"grad_norm": 0.5649680792199295,
|
|
"learning_rate": 3.397486904888328e-05,
|
|
"loss": 0.2871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13218429684638977,
|
|
"step": 1415,
|
|
"valid_targets_mean": 4330.9,
|
|
"valid_targets_min": 2339
|
|
},
|
|
{
|
|
"epoch": 2.309194467046379,
|
|
"grad_norm": 0.5035857907328071,
|
|
"learning_rate": 3.391674172007544e-05,
|
|
"loss": 0.2884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1316101849079132,
|
|
"step": 1420,
|
|
"valid_targets_mean": 4778.2,
|
|
"valid_targets_min": 2482
|
|
},
|
|
{
|
|
"epoch": 2.3173311635475997,
|
|
"grad_norm": 0.5303641033831891,
|
|
"learning_rate": 3.3858385590491347e-05,
|
|
"loss": 0.298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1569986343383789,
|
|
"step": 1425,
|
|
"valid_targets_mean": 6596.2,
|
|
"valid_targets_min": 4475
|
|
},
|
|
{
|
|
"epoch": 2.3254678600488203,
|
|
"grad_norm": 0.48585758657147765,
|
|
"learning_rate": 3.379980161954578e-05,
|
|
"loss": 0.2653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11201968044042587,
|
|
"step": 1430,
|
|
"valid_targets_mean": 5466.8,
|
|
"valid_targets_min": 2472
|
|
},
|
|
{
|
|
"epoch": 2.3336045565500405,
|
|
"grad_norm": 0.4949945647060311,
|
|
"learning_rate": 3.3740990770399404e-05,
|
|
"loss": 0.3016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15348434448242188,
|
|
"step": 1435,
|
|
"valid_targets_mean": 5463.9,
|
|
"valid_targets_min": 2560
|
|
},
|
|
{
|
|
"epoch": 2.341741253051261,
|
|
"grad_norm": 0.51532488211287,
|
|
"learning_rate": 3.368195400994289e-05,
|
|
"loss": 0.2675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14001132547855377,
|
|
"step": 1440,
|
|
"valid_targets_mean": 5797.4,
|
|
"valid_targets_min": 3868
|
|
},
|
|
{
|
|
"epoch": 2.3498779495524817,
|
|
"grad_norm": 0.4954512879924778,
|
|
"learning_rate": 3.362269230878107e-05,
|
|
"loss": 0.2874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1684633195400238,
|
|
"step": 1445,
|
|
"valid_targets_mean": 5250.2,
|
|
"valid_targets_min": 484
|
|
},
|
|
{
|
|
"epoch": 2.3580146460537024,
|
|
"grad_norm": 0.5206552287941425,
|
|
"learning_rate": 3.356320664121694e-05,
|
|
"loss": 0.3099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13183483481407166,
|
|
"step": 1450,
|
|
"valid_targets_mean": 5600.0,
|
|
"valid_targets_min": 2037
|
|
},
|
|
{
|
|
"epoch": 2.3661513425549225,
|
|
"grad_norm": 0.5806830244648058,
|
|
"learning_rate": 3.350349798523566e-05,
|
|
"loss": 0.2887,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11899223923683167,
|
|
"step": 1455,
|
|
"valid_targets_mean": 4941.5,
|
|
"valid_targets_min": 1237
|
|
},
|
|
{
|
|
"epoch": 2.374288039056143,
|
|
"grad_norm": 0.4835797115209612,
|
|
"learning_rate": 3.344356732248849e-05,
|
|
"loss": 0.2934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12682092189788818,
|
|
"step": 1460,
|
|
"valid_targets_mean": 5932.6,
|
|
"valid_targets_min": 2233
|
|
},
|
|
{
|
|
"epoch": 2.382424735557364,
|
|
"grad_norm": 0.46420895136183626,
|
|
"learning_rate": 3.33834156382766e-05,
|
|
"loss": 0.2705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1228257492184639,
|
|
"step": 1465,
|
|
"valid_targets_mean": 6454.4,
|
|
"valid_targets_min": 3734
|
|
},
|
|
{
|
|
"epoch": 2.3905614320585844,
|
|
"grad_norm": 0.4848046054819125,
|
|
"learning_rate": 3.332304392153494e-05,
|
|
"loss": 0.2776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13247308135032654,
|
|
"step": 1470,
|
|
"valid_targets_mean": 5843.1,
|
|
"valid_targets_min": 3532
|
|
},
|
|
{
|
|
"epoch": 2.3986981285598046,
|
|
"grad_norm": 0.5322468446136851,
|
|
"learning_rate": 3.326245316481591e-05,
|
|
"loss": 0.2844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14443229138851166,
|
|
"step": 1475,
|
|
"valid_targets_mean": 5665.5,
|
|
"valid_targets_min": 2645
|
|
},
|
|
{
|
|
"epoch": 2.406834825061025,
|
|
"grad_norm": 0.4953219209876432,
|
|
"learning_rate": 3.320164436427311e-05,
|
|
"loss": 0.2996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12060455232858658,
|
|
"step": 1480,
|
|
"valid_targets_mean": 5264.1,
|
|
"valid_targets_min": 3318
|
|
},
|
|
{
|
|
"epoch": 2.414971521562246,
|
|
"grad_norm": 0.5103794103032024,
|
|
"learning_rate": 3.314061851964491e-05,
|
|
"loss": 0.2823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12996214628219604,
|
|
"step": 1485,
|
|
"valid_targets_mean": 4645.6,
|
|
"valid_targets_min": 3056
|
|
},
|
|
{
|
|
"epoch": 2.423108218063466,
|
|
"grad_norm": 0.48812963792791125,
|
|
"learning_rate": 3.307937663423804e-05,
|
|
"loss": 0.2775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13980984687805176,
|
|
"step": 1490,
|
|
"valid_targets_mean": 6567.1,
|
|
"valid_targets_min": 3602
|
|
},
|
|
{
|
|
"epoch": 2.4312449145646866,
|
|
"grad_norm": 0.6781638230036592,
|
|
"learning_rate": 3.3017919714911094e-05,
|
|
"loss": 0.2775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11503338813781738,
|
|
"step": 1495,
|
|
"valid_targets_mean": 5187.6,
|
|
"valid_targets_min": 598
|
|
},
|
|
{
|
|
"epoch": 2.4393816110659072,
|
|
"grad_norm": 0.494679876515722,
|
|
"learning_rate": 3.295624877205796e-05,
|
|
"loss": 0.293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12201376259326935,
|
|
"step": 1500,
|
|
"valid_targets_mean": 5504.5,
|
|
"valid_targets_min": 1291
|
|
},
|
|
{
|
|
"epoch": 2.447518307567128,
|
|
"grad_norm": 0.6017430889988331,
|
|
"learning_rate": 3.2894364819591224e-05,
|
|
"loss": 0.2944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.154220849275589,
|
|
"step": 1505,
|
|
"valid_targets_mean": 4390.1,
|
|
"valid_targets_min": 1214
|
|
},
|
|
{
|
|
"epoch": 2.4556550040683485,
|
|
"grad_norm": 0.5265681649533256,
|
|
"learning_rate": 3.28322688749255e-05,
|
|
"loss": 0.3226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14010374248027802,
|
|
"step": 1510,
|
|
"valid_targets_mean": 4612.5,
|
|
"valid_targets_min": 1296
|
|
},
|
|
{
|
|
"epoch": 2.4637917005695686,
|
|
"grad_norm": 0.551484588945014,
|
|
"learning_rate": 3.2769961958960694e-05,
|
|
"loss": 0.2759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14503279328346252,
|
|
"step": 1515,
|
|
"valid_targets_mean": 5862.9,
|
|
"valid_targets_min": 2141
|
|
},
|
|
{
|
|
"epoch": 2.4719283970707893,
|
|
"grad_norm": 0.5213608708077778,
|
|
"learning_rate": 3.270744509606523e-05,
|
|
"loss": 0.2875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12514446675777435,
|
|
"step": 1520,
|
|
"valid_targets_mean": 5143.2,
|
|
"valid_targets_min": 2810
|
|
},
|
|
{
|
|
"epoch": 2.48006509357201,
|
|
"grad_norm": 0.5609869887661976,
|
|
"learning_rate": 3.26447193140592e-05,
|
|
"loss": 0.2815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16480261087417603,
|
|
"step": 1525,
|
|
"valid_targets_mean": 4906.8,
|
|
"valid_targets_min": 2091
|
|
},
|
|
{
|
|
"epoch": 2.48820179007323,
|
|
"grad_norm": 0.45622929795341044,
|
|
"learning_rate": 3.2581785644197456e-05,
|
|
"loss": 0.2833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10129719972610474,
|
|
"step": 1530,
|
|
"valid_targets_mean": 4956.2,
|
|
"valid_targets_min": 2164
|
|
},
|
|
{
|
|
"epoch": 2.4963384865744507,
|
|
"grad_norm": 0.5164564778909829,
|
|
"learning_rate": 3.251864512115271e-05,
|
|
"loss": 0.3104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2165476679801941,
|
|
"step": 1535,
|
|
"valid_targets_mean": 6606.1,
|
|
"valid_targets_min": 2363
|
|
},
|
|
{
|
|
"epoch": 2.5044751830756713,
|
|
"grad_norm": 0.3983072775801996,
|
|
"learning_rate": 3.2455298782998424e-05,
|
|
"loss": 0.2737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10032185167074203,
|
|
"step": 1540,
|
|
"valid_targets_mean": 8497.1,
|
|
"valid_targets_min": 2843
|
|
},
|
|
{
|
|
"epoch": 2.512611879576892,
|
|
"grad_norm": 0.5732682239726878,
|
|
"learning_rate": 3.2391747671191854e-05,
|
|
"loss": 0.256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12304198741912842,
|
|
"step": 1545,
|
|
"valid_targets_mean": 6310.2,
|
|
"valid_targets_min": 2146
|
|
},
|
|
{
|
|
"epoch": 2.5207485760781125,
|
|
"grad_norm": 0.48301651904457216,
|
|
"learning_rate": 3.2327992830556846e-05,
|
|
"loss": 0.293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1629238873720169,
|
|
"step": 1550,
|
|
"valid_targets_mean": 5748.8,
|
|
"valid_targets_min": 845
|
|
},
|
|
{
|
|
"epoch": 2.5288852725793327,
|
|
"grad_norm": 0.4455251023255143,
|
|
"learning_rate": 3.22640353092667e-05,
|
|
"loss": 0.3035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1761193573474884,
|
|
"step": 1555,
|
|
"valid_targets_mean": 5489.5,
|
|
"valid_targets_min": 1059
|
|
},
|
|
{
|
|
"epoch": 2.5370219690805533,
|
|
"grad_norm": 0.4809788733224243,
|
|
"learning_rate": 3.2199876158826915e-05,
|
|
"loss": 0.2979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1458398401737213,
|
|
"step": 1560,
|
|
"valid_targets_mean": 7149.8,
|
|
"valid_targets_min": 3817
|
|
},
|
|
{
|
|
"epoch": 2.545158665581774,
|
|
"grad_norm": 0.45307852053685194,
|
|
"learning_rate": 3.2135516434057915e-05,
|
|
"loss": 0.2907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11577823013067245,
|
|
"step": 1565,
|
|
"valid_targets_mean": 5393.9,
|
|
"valid_targets_min": 2233
|
|
},
|
|
{
|
|
"epoch": 2.553295362082994,
|
|
"grad_norm": 0.49760950499062984,
|
|
"learning_rate": 3.2070957193077705e-05,
|
|
"loss": 0.2756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12272712588310242,
|
|
"step": 1570,
|
|
"valid_targets_mean": 4861.1,
|
|
"valid_targets_min": 3252
|
|
},
|
|
{
|
|
"epoch": 2.5614320585842147,
|
|
"grad_norm": 0.5355275643102434,
|
|
"learning_rate": 3.200619949728448e-05,
|
|
"loss": 0.2694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1525430828332901,
|
|
"step": 1575,
|
|
"valid_targets_mean": 6466.9,
|
|
"valid_targets_min": 3548
|
|
},
|
|
{
|
|
"epoch": 2.5695687550854354,
|
|
"grad_norm": 0.5018072667233595,
|
|
"learning_rate": 3.194124441133916e-05,
|
|
"loss": 0.2818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14310288429260254,
|
|
"step": 1580,
|
|
"valid_targets_mean": 4550.6,
|
|
"valid_targets_min": 2784
|
|
},
|
|
{
|
|
"epoch": 2.577705451586656,
|
|
"grad_norm": 0.5173938528400699,
|
|
"learning_rate": 3.187609300314789e-05,
|
|
"loss": 0.3062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13510233163833618,
|
|
"step": 1585,
|
|
"valid_targets_mean": 5440.2,
|
|
"valid_targets_min": 923
|
|
},
|
|
{
|
|
"epoch": 2.585842148087876,
|
|
"grad_norm": 0.5159504691075016,
|
|
"learning_rate": 3.181074634384451e-05,
|
|
"loss": 0.2891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14067131280899048,
|
|
"step": 1590,
|
|
"valid_targets_mean": 5820.6,
|
|
"valid_targets_min": 3059
|
|
},
|
|
{
|
|
"epoch": 2.5939788445890968,
|
|
"grad_norm": 0.5405735538422601,
|
|
"learning_rate": 3.1745205507772876e-05,
|
|
"loss": 0.2818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12036160379648209,
|
|
"step": 1595,
|
|
"valid_targets_mean": 5669.1,
|
|
"valid_targets_min": 2723
|
|
},
|
|
{
|
|
"epoch": 2.6021155410903174,
|
|
"grad_norm": 0.7407500991451971,
|
|
"learning_rate": 3.16794715724693e-05,
|
|
"loss": 0.2834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15827390551567078,
|
|
"step": 1600,
|
|
"valid_targets_mean": 5583.9,
|
|
"valid_targets_min": 2685
|
|
},
|
|
{
|
|
"epoch": 2.6102522375915376,
|
|
"grad_norm": 0.5099598998443594,
|
|
"learning_rate": 3.161354561864474e-05,
|
|
"loss": 0.2981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1440209299325943,
|
|
"step": 1605,
|
|
"valid_targets_mean": 5326.2,
|
|
"valid_targets_min": 1707
|
|
},
|
|
{
|
|
"epoch": 2.618388934092758,
|
|
"grad_norm": 0.5479785273711072,
|
|
"learning_rate": 3.154742873016707e-05,
|
|
"loss": 0.2871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13522969186306,
|
|
"step": 1610,
|
|
"valid_targets_mean": 5222.2,
|
|
"valid_targets_min": 1303
|
|
},
|
|
{
|
|
"epoch": 2.626525630593979,
|
|
"grad_norm": 0.47481378459752427,
|
|
"learning_rate": 3.14811219940433e-05,
|
|
"loss": 0.272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13894972205162048,
|
|
"step": 1615,
|
|
"valid_targets_mean": 6755.9,
|
|
"valid_targets_min": 3830
|
|
},
|
|
{
|
|
"epoch": 2.6346623270951994,
|
|
"grad_norm": 0.4650084315327305,
|
|
"learning_rate": 3.141462650040161e-05,
|
|
"loss": 0.2854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1224336326122284,
|
|
"step": 1620,
|
|
"valid_targets_mean": 5373.9,
|
|
"valid_targets_min": 1348
|
|
},
|
|
{
|
|
"epoch": 2.64279902359642,
|
|
"grad_norm": 0.5401018992903237,
|
|
"learning_rate": 3.134794334247351e-05,
|
|
"loss": 0.31,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1545572429895401,
|
|
"step": 1625,
|
|
"valid_targets_mean": 5654.0,
|
|
"valid_targets_min": 1794
|
|
},
|
|
{
|
|
"epoch": 2.6509357200976402,
|
|
"grad_norm": 0.48016953909282284,
|
|
"learning_rate": 3.1281073616575856e-05,
|
|
"loss": 0.2915,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13257913291454315,
|
|
"step": 1630,
|
|
"valid_targets_mean": 5957.4,
|
|
"valid_targets_min": 3444
|
|
},
|
|
{
|
|
"epoch": 2.659072416598861,
|
|
"grad_norm": 0.5599972237868494,
|
|
"learning_rate": 3.121401842209279e-05,
|
|
"loss": 0.2747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22545486688613892,
|
|
"step": 1635,
|
|
"valid_targets_mean": 6662.0,
|
|
"valid_targets_min": 3924
|
|
},
|
|
{
|
|
"epoch": 2.6672091131000815,
|
|
"grad_norm": 0.5099869798156484,
|
|
"learning_rate": 3.114677886145768e-05,
|
|
"loss": 0.2907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15294325351715088,
|
|
"step": 1640,
|
|
"valid_targets_mean": 5627.6,
|
|
"valid_targets_min": 2398
|
|
},
|
|
{
|
|
"epoch": 2.6753458096013016,
|
|
"grad_norm": 0.5577431582136917,
|
|
"learning_rate": 3.107935604013501e-05,
|
|
"loss": 0.2957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1127748042345047,
|
|
"step": 1645,
|
|
"valid_targets_mean": 4538.1,
|
|
"valid_targets_min": 1505
|
|
},
|
|
{
|
|
"epoch": 2.6834825061025223,
|
|
"grad_norm": 0.5463423029256798,
|
|
"learning_rate": 3.101175106660219e-05,
|
|
"loss": 0.3001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1513296216726303,
|
|
"step": 1650,
|
|
"valid_targets_mean": 5564.5,
|
|
"valid_targets_min": 1164
|
|
},
|
|
{
|
|
"epoch": 2.691619202603743,
|
|
"grad_norm": 0.6027625180902552,
|
|
"learning_rate": 3.094396505233135e-05,
|
|
"loss": 0.3009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1423185020685196,
|
|
"step": 1655,
|
|
"valid_targets_mean": 4589.9,
|
|
"valid_targets_min": 1015
|
|
},
|
|
{
|
|
"epoch": 2.6997558991049635,
|
|
"grad_norm": 0.43885668468200845,
|
|
"learning_rate": 3.087599911177103e-05,
|
|
"loss": 0.2798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12957511842250824,
|
|
"step": 1660,
|
|
"valid_targets_mean": 7121.9,
|
|
"valid_targets_min": 4396
|
|
},
|
|
{
|
|
"epoch": 2.707892595606184,
|
|
"grad_norm": 0.5377034711492127,
|
|
"learning_rate": 3.0807854362327906e-05,
|
|
"loss": 0.3039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14502663910388947,
|
|
"step": 1665,
|
|
"valid_targets_mean": 5456.5,
|
|
"valid_targets_min": 3570
|
|
},
|
|
{
|
|
"epoch": 2.7160292921074043,
|
|
"grad_norm": 0.5001336173501958,
|
|
"learning_rate": 3.073953192434837e-05,
|
|
"loss": 0.2666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12598839402198792,
|
|
"step": 1670,
|
|
"valid_targets_mean": 4810.9,
|
|
"valid_targets_min": 873
|
|
},
|
|
{
|
|
"epoch": 2.724165988608625,
|
|
"grad_norm": 0.5220789996194123,
|
|
"learning_rate": 3.067103292110017e-05,
|
|
"loss": 0.2806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14378760755062103,
|
|
"step": 1675,
|
|
"valid_targets_mean": 4979.9,
|
|
"valid_targets_min": 2252
|
|
},
|
|
{
|
|
"epoch": 2.7323026851098455,
|
|
"grad_norm": 0.5162691594592334,
|
|
"learning_rate": 3.060235847875387e-05,
|
|
"loss": 0.3052,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1796841025352478,
|
|
"step": 1680,
|
|
"valid_targets_mean": 6276.2,
|
|
"valid_targets_min": 521
|
|
},
|
|
{
|
|
"epoch": 2.7404393816110657,
|
|
"grad_norm": 0.48029341615912935,
|
|
"learning_rate": 3.05335097263644e-05,
|
|
"loss": 0.2659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.125966876745224,
|
|
"step": 1685,
|
|
"valid_targets_mean": 5720.2,
|
|
"valid_targets_min": 1338
|
|
},
|
|
{
|
|
"epoch": 2.7485760781122863,
|
|
"grad_norm": 0.4803595848509251,
|
|
"learning_rate": 3.0464487795852463e-05,
|
|
"loss": 0.2674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09926225244998932,
|
|
"step": 1690,
|
|
"valid_targets_mean": 5128.4,
|
|
"valid_targets_min": 1470
|
|
},
|
|
{
|
|
"epoch": 2.756712774613507,
|
|
"grad_norm": 0.5106278700225423,
|
|
"learning_rate": 3.0395293821985906e-05,
|
|
"loss": 0.2894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12691687047481537,
|
|
"step": 1695,
|
|
"valid_targets_mean": 4647.5,
|
|
"valid_targets_min": 1087
|
|
},
|
|
{
|
|
"epoch": 2.7648494711147276,
|
|
"grad_norm": 0.61727008379644,
|
|
"learning_rate": 3.032592894236112e-05,
|
|
"loss": 0.319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1375855952501297,
|
|
"step": 1700,
|
|
"valid_targets_mean": 4063.8,
|
|
"valid_targets_min": 1345
|
|
},
|
|
{
|
|
"epoch": 2.772986167615948,
|
|
"grad_norm": 0.5220802075309688,
|
|
"learning_rate": 3.0256394297384273e-05,
|
|
"loss": 0.256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1398482620716095,
|
|
"step": 1705,
|
|
"valid_targets_mean": 5660.1,
|
|
"valid_targets_min": 3984
|
|
},
|
|
{
|
|
"epoch": 2.7811228641171684,
|
|
"grad_norm": 0.5238354995617224,
|
|
"learning_rate": 3.0186691030252614e-05,
|
|
"loss": 0.2855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.132085919380188,
|
|
"step": 1710,
|
|
"valid_targets_mean": 5220.8,
|
|
"valid_targets_min": 801
|
|
},
|
|
{
|
|
"epoch": 2.789259560618389,
|
|
"grad_norm": 0.499241003810301,
|
|
"learning_rate": 3.0116820286935654e-05,
|
|
"loss": 0.2921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.177951842546463,
|
|
"step": 1715,
|
|
"valid_targets_mean": 5724.1,
|
|
"valid_targets_min": 2484
|
|
},
|
|
{
|
|
"epoch": 2.7973962571196096,
|
|
"grad_norm": 0.575750437435253,
|
|
"learning_rate": 3.0046783216156315e-05,
|
|
"loss": 0.29,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12942825257778168,
|
|
"step": 1720,
|
|
"valid_targets_mean": 4648.0,
|
|
"valid_targets_min": 2633
|
|
},
|
|
{
|
|
"epoch": 2.8055329536208298,
|
|
"grad_norm": 0.5045556071219864,
|
|
"learning_rate": 2.997658096937207e-05,
|
|
"loss": 0.2791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12483763694763184,
|
|
"step": 1725,
|
|
"valid_targets_mean": 5357.9,
|
|
"valid_targets_min": 895
|
|
},
|
|
{
|
|
"epoch": 2.8136696501220504,
|
|
"grad_norm": 0.684348188778152,
|
|
"learning_rate": 2.990621470075598e-05,
|
|
"loss": 0.2684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1319955587387085,
|
|
"step": 1730,
|
|
"valid_targets_mean": 5457.0,
|
|
"valid_targets_min": 1374
|
|
},
|
|
{
|
|
"epoch": 2.821806346623271,
|
|
"grad_norm": 0.5671325911785545,
|
|
"learning_rate": 2.9835685567177763e-05,
|
|
"loss": 0.2708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1548902690410614,
|
|
"step": 1735,
|
|
"valid_targets_mean": 5260.6,
|
|
"valid_targets_min": 956
|
|
},
|
|
{
|
|
"epoch": 2.8299430431244916,
|
|
"grad_norm": 0.4629636978551261,
|
|
"learning_rate": 2.9764994728184725e-05,
|
|
"loss": 0.2788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15818393230438232,
|
|
"step": 1740,
|
|
"valid_targets_mean": 6452.5,
|
|
"valid_targets_min": 3759
|
|
},
|
|
{
|
|
"epoch": 2.838079739625712,
|
|
"grad_norm": 0.5953069122381895,
|
|
"learning_rate": 2.9694143345982732e-05,
|
|
"loss": 0.3008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17098405957221985,
|
|
"step": 1745,
|
|
"valid_targets_mean": 5593.5,
|
|
"valid_targets_min": 372
|
|
},
|
|
{
|
|
"epoch": 2.8462164361269324,
|
|
"grad_norm": 0.45172986591859304,
|
|
"learning_rate": 2.9623132585417096e-05,
|
|
"loss": 0.2713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13527032732963562,
|
|
"step": 1750,
|
|
"valid_targets_mean": 6704.2,
|
|
"valid_targets_min": 1353
|
|
},
|
|
{
|
|
"epoch": 2.854353132628153,
|
|
"grad_norm": 0.4975648396200817,
|
|
"learning_rate": 2.9551963613953404e-05,
|
|
"loss": 0.2949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12178505957126617,
|
|
"step": 1755,
|
|
"valid_targets_mean": 6038.4,
|
|
"valid_targets_min": 4521
|
|
},
|
|
{
|
|
"epoch": 2.862489829129373,
|
|
"grad_norm": 0.5414519194121625,
|
|
"learning_rate": 2.948063760165835e-05,
|
|
"loss": 0.2837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12081147730350494,
|
|
"step": 1760,
|
|
"valid_targets_mean": 4120.1,
|
|
"valid_targets_min": 537
|
|
},
|
|
{
|
|
"epoch": 2.870626525630594,
|
|
"grad_norm": 0.4366067141691184,
|
|
"learning_rate": 2.9409155721180477e-05,
|
|
"loss": 0.27,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1350255012512207,
|
|
"step": 1765,
|
|
"valid_targets_mean": 6225.4,
|
|
"valid_targets_min": 3036
|
|
},
|
|
{
|
|
"epoch": 2.8787632221318145,
|
|
"grad_norm": 0.5500766155760595,
|
|
"learning_rate": 2.9337519147730918e-05,
|
|
"loss": 0.2847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10548760741949081,
|
|
"step": 1770,
|
|
"valid_targets_mean": 4567.8,
|
|
"valid_targets_min": 1523
|
|
},
|
|
{
|
|
"epoch": 2.886899918633035,
|
|
"grad_norm": 0.4435904826881841,
|
|
"learning_rate": 2.9265729059064054e-05,
|
|
"loss": 0.2812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13118746876716614,
|
|
"step": 1775,
|
|
"valid_targets_mean": 6306.4,
|
|
"valid_targets_min": 3141
|
|
},
|
|
{
|
|
"epoch": 2.8950366151342557,
|
|
"grad_norm": 0.7587627287786612,
|
|
"learning_rate": 2.9193786635458178e-05,
|
|
"loss": 0.2841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18454968929290771,
|
|
"step": 1780,
|
|
"valid_targets_mean": 5579.1,
|
|
"valid_targets_min": 1106
|
|
},
|
|
{
|
|
"epoch": 2.903173311635476,
|
|
"grad_norm": 0.4897525087855841,
|
|
"learning_rate": 2.912169305969605e-05,
|
|
"loss": 0.2812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13022705912590027,
|
|
"step": 1785,
|
|
"valid_targets_mean": 6038.9,
|
|
"valid_targets_min": 3288
|
|
},
|
|
{
|
|
"epoch": 2.9113100081366965,
|
|
"grad_norm": 0.3896829782222166,
|
|
"learning_rate": 2.9049449517045497e-05,
|
|
"loss": 0.2604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11002755165100098,
|
|
"step": 1790,
|
|
"valid_targets_mean": 6118.0,
|
|
"valid_targets_min": 2338
|
|
},
|
|
{
|
|
"epoch": 2.919446704637917,
|
|
"grad_norm": 0.3784633224945294,
|
|
"learning_rate": 2.89770571952399e-05,
|
|
"loss": 0.2515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11330070346593857,
|
|
"step": 1795,
|
|
"valid_targets_mean": 8400.2,
|
|
"valid_targets_min": 4155
|
|
},
|
|
{
|
|
"epoch": 2.9275834011391373,
|
|
"grad_norm": 0.49491264882028485,
|
|
"learning_rate": 2.890451728445866e-05,
|
|
"loss": 0.293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18684348464012146,
|
|
"step": 1800,
|
|
"valid_targets_mean": 6696.6,
|
|
"valid_targets_min": 3861
|
|
},
|
|
{
|
|
"epoch": 2.935720097640358,
|
|
"grad_norm": 0.47181247576364993,
|
|
"learning_rate": 2.8831830977307644e-05,
|
|
"loss": 0.2784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16414770483970642,
|
|
"step": 1805,
|
|
"valid_targets_mean": 6644.9,
|
|
"valid_targets_min": 1462
|
|
},
|
|
{
|
|
"epoch": 2.9438567941415785,
|
|
"grad_norm": 0.5155396891551823,
|
|
"learning_rate": 2.8758999468799594e-05,
|
|
"loss": 0.2881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15796388685703278,
|
|
"step": 1810,
|
|
"valid_targets_mean": 4846.5,
|
|
"valid_targets_min": 1632
|
|
},
|
|
{
|
|
"epoch": 2.951993490642799,
|
|
"grad_norm": 0.4514689977307621,
|
|
"learning_rate": 2.868602395633444e-05,
|
|
"loss": 0.2855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19608153402805328,
|
|
"step": 1815,
|
|
"valid_targets_mean": 7106.8,
|
|
"valid_targets_min": 3883
|
|
},
|
|
{
|
|
"epoch": 2.9601301871440198,
|
|
"grad_norm": 0.5846021256028041,
|
|
"learning_rate": 2.861290563967965e-05,
|
|
"loss": 0.315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0958099514245987,
|
|
"step": 1820,
|
|
"valid_targets_mean": 2989.9,
|
|
"valid_targets_min": 1378
|
|
},
|
|
{
|
|
"epoch": 2.96826688364524,
|
|
"grad_norm": 0.4539261325479636,
|
|
"learning_rate": 2.8539645720950474e-05,
|
|
"loss": 0.2935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15887710452079773,
|
|
"step": 1825,
|
|
"valid_targets_mean": 5430.4,
|
|
"valid_targets_min": 2053
|
|
},
|
|
{
|
|
"epoch": 2.9764035801464606,
|
|
"grad_norm": 0.5167204311608852,
|
|
"learning_rate": 2.8466245404590226e-05,
|
|
"loss": 0.3094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17045152187347412,
|
|
"step": 1830,
|
|
"valid_targets_mean": 6415.8,
|
|
"valid_targets_min": 2274
|
|
},
|
|
{
|
|
"epoch": 2.984540276647681,
|
|
"grad_norm": 0.4910802890132026,
|
|
"learning_rate": 2.8392705897350425e-05,
|
|
"loss": 0.2902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15125080943107605,
|
|
"step": 1835,
|
|
"valid_targets_mean": 5811.2,
|
|
"valid_targets_min": 2158
|
|
},
|
|
{
|
|
"epoch": 2.9926769731489014,
|
|
"grad_norm": 0.46505504808407744,
|
|
"learning_rate": 2.8319028408270983e-05,
|
|
"loss": 0.2751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15869152545928955,
|
|
"step": 1840,
|
|
"valid_targets_mean": 7832.1,
|
|
"valid_targets_min": 3792
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"grad_norm": 0.7018093322068503,
|
|
"learning_rate": 2.8245214148660364e-05,
|
|
"loss": 0.3069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32103702425956726,
|
|
"step": 1845,
|
|
"valid_targets_mean": 5602.2,
|
|
"valid_targets_min": 2942
|
|
},
|
|
{
|
|
"epoch": 3.0081366965012206,
|
|
"grad_norm": 0.557545342235143,
|
|
"learning_rate": 2.8171264332075588e-05,
|
|
"loss": 0.256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12069088220596313,
|
|
"step": 1850,
|
|
"valid_targets_mean": 4822.1,
|
|
"valid_targets_min": 3071
|
|
},
|
|
{
|
|
"epoch": 3.016273393002441,
|
|
"grad_norm": 0.5055444788767136,
|
|
"learning_rate": 2.809718017430236e-05,
|
|
"loss": 0.246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1308608502149582,
|
|
"step": 1855,
|
|
"valid_targets_mean": 5950.2,
|
|
"valid_targets_min": 2493
|
|
},
|
|
{
|
|
"epoch": 3.0244100895036614,
|
|
"grad_norm": 0.564974769389222,
|
|
"learning_rate": 2.8022962893335023e-05,
|
|
"loss": 0.2619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13714183866977692,
|
|
"step": 1860,
|
|
"valid_targets_mean": 4790.5,
|
|
"valid_targets_min": 1710
|
|
},
|
|
{
|
|
"epoch": 3.032546786004882,
|
|
"grad_norm": 0.5368918165884716,
|
|
"learning_rate": 2.7948613709356565e-05,
|
|
"loss": 0.2881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11848818510770798,
|
|
"step": 1865,
|
|
"valid_targets_mean": 5709.6,
|
|
"valid_targets_min": 1869
|
|
},
|
|
{
|
|
"epoch": 3.0406834825061027,
|
|
"grad_norm": 0.6202460762235937,
|
|
"learning_rate": 2.7874133844718557e-05,
|
|
"loss": 0.2756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15099003911018372,
|
|
"step": 1870,
|
|
"valid_targets_mean": 5946.1,
|
|
"valid_targets_min": 3788
|
|
},
|
|
{
|
|
"epoch": 3.048820179007323,
|
|
"grad_norm": 0.47945587720363,
|
|
"learning_rate": 2.7799524523921038e-05,
|
|
"loss": 0.2634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14982934296131134,
|
|
"step": 1875,
|
|
"valid_targets_mean": 4646.1,
|
|
"valid_targets_min": 1010
|
|
},
|
|
{
|
|
"epoch": 3.0569568755085434,
|
|
"grad_norm": 0.5981553209252303,
|
|
"learning_rate": 2.77247869735924e-05,
|
|
"loss": 0.2785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13525862991809845,
|
|
"step": 1880,
|
|
"valid_targets_mean": 5607.5,
|
|
"valid_targets_min": 3276
|
|
},
|
|
{
|
|
"epoch": 3.065093572009764,
|
|
"grad_norm": 0.5197959262649877,
|
|
"learning_rate": 2.764992242246921e-05,
|
|
"loss": 0.2799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12702880799770355,
|
|
"step": 1885,
|
|
"valid_targets_mean": 5396.0,
|
|
"valid_targets_min": 2859
|
|
},
|
|
{
|
|
"epoch": 3.0732302685109847,
|
|
"grad_norm": 0.4896279271559394,
|
|
"learning_rate": 2.7574932101376034e-05,
|
|
"loss": 0.2628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10663357377052307,
|
|
"step": 1890,
|
|
"valid_targets_mean": 5472.4,
|
|
"valid_targets_min": 2781
|
|
},
|
|
{
|
|
"epoch": 3.081366965012205,
|
|
"grad_norm": 0.6197885493966871,
|
|
"learning_rate": 2.749981724320516e-05,
|
|
"loss": 0.2829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14642459154129028,
|
|
"step": 1895,
|
|
"valid_targets_mean": 4172.6,
|
|
"valid_targets_min": 867
|
|
},
|
|
{
|
|
"epoch": 3.0895036615134255,
|
|
"grad_norm": 0.4909453144584929,
|
|
"learning_rate": 2.7424579082896357e-05,
|
|
"loss": 0.2836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1485298126935959,
|
|
"step": 1900,
|
|
"valid_targets_mean": 5413.5,
|
|
"valid_targets_min": 2571
|
|
},
|
|
{
|
|
"epoch": 3.097640358014646,
|
|
"grad_norm": 0.4913841836660842,
|
|
"learning_rate": 2.7349218857416587e-05,
|
|
"loss": 0.2716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16140612959861755,
|
|
"step": 1905,
|
|
"valid_targets_mean": 6672.6,
|
|
"valid_targets_min": 1381
|
|
},
|
|
{
|
|
"epoch": 3.1057770545158667,
|
|
"grad_norm": 0.5286751495746139,
|
|
"learning_rate": 2.7273737805739614e-05,
|
|
"loss": 0.2696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09970603883266449,
|
|
"step": 1910,
|
|
"valid_targets_mean": 4983.8,
|
|
"valid_targets_min": 926
|
|
},
|
|
{
|
|
"epoch": 3.113913751017087,
|
|
"grad_norm": 0.4922716591309361,
|
|
"learning_rate": 2.719813716882569e-05,
|
|
"loss": 0.2483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1322004795074463,
|
|
"step": 1915,
|
|
"valid_targets_mean": 7345.9,
|
|
"valid_targets_min": 4037
|
|
},
|
|
{
|
|
"epoch": 3.1220504475183075,
|
|
"grad_norm": 0.4932161067767006,
|
|
"learning_rate": 2.7122418189601118e-05,
|
|
"loss": 0.2599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12052470445632935,
|
|
"step": 1920,
|
|
"valid_targets_mean": 5224.1,
|
|
"valid_targets_min": 3445
|
|
},
|
|
{
|
|
"epoch": 3.130187144019528,
|
|
"grad_norm": 0.49943776929619677,
|
|
"learning_rate": 2.7046582112937837e-05,
|
|
"loss": 0.2513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13334211707115173,
|
|
"step": 1925,
|
|
"valid_targets_mean": 7433.6,
|
|
"valid_targets_min": 1974
|
|
},
|
|
{
|
|
"epoch": 3.1383238405207488,
|
|
"grad_norm": 0.44357138068663077,
|
|
"learning_rate": 2.697063018563295e-05,
|
|
"loss": 0.2647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11824844032526016,
|
|
"step": 1930,
|
|
"valid_targets_mean": 6402.1,
|
|
"valid_targets_min": 3582
|
|
},
|
|
{
|
|
"epoch": 3.146460537021969,
|
|
"grad_norm": 0.6216470540080042,
|
|
"learning_rate": 2.6894563656388217e-05,
|
|
"loss": 0.2757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16654358804225922,
|
|
"step": 1935,
|
|
"valid_targets_mean": 4445.5,
|
|
"valid_targets_min": 652
|
|
},
|
|
{
|
|
"epoch": 3.1545972335231895,
|
|
"grad_norm": 0.5203431916047961,
|
|
"learning_rate": 2.681838377578954e-05,
|
|
"loss": 0.2685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13338005542755127,
|
|
"step": 1940,
|
|
"valid_targets_mean": 6740.8,
|
|
"valid_targets_min": 3477
|
|
},
|
|
{
|
|
"epoch": 3.16273393002441,
|
|
"grad_norm": 0.548730057587269,
|
|
"learning_rate": 2.6742091796286388e-05,
|
|
"loss": 0.2799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14711609482765198,
|
|
"step": 1945,
|
|
"valid_targets_mean": 5364.9,
|
|
"valid_targets_min": 1328
|
|
},
|
|
{
|
|
"epoch": 3.170870626525631,
|
|
"grad_norm": 0.5303267033016945,
|
|
"learning_rate": 2.6665688972171215e-05,
|
|
"loss": 0.2777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1701548993587494,
|
|
"step": 1950,
|
|
"valid_targets_mean": 5727.0,
|
|
"valid_targets_min": 4151
|
|
},
|
|
{
|
|
"epoch": 3.179007323026851,
|
|
"grad_norm": 0.5676758975951339,
|
|
"learning_rate": 2.658917655955884e-05,
|
|
"loss": 0.2623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15539327263832092,
|
|
"step": 1955,
|
|
"valid_targets_mean": 4853.1,
|
|
"valid_targets_min": 2899
|
|
},
|
|
{
|
|
"epoch": 3.1871440195280716,
|
|
"grad_norm": 0.4484115372218612,
|
|
"learning_rate": 2.651255581636578e-05,
|
|
"loss": 0.2759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16180385649204254,
|
|
"step": 1960,
|
|
"valid_targets_mean": 7725.1,
|
|
"valid_targets_min": 4420
|
|
},
|
|
{
|
|
"epoch": 3.195280716029292,
|
|
"grad_norm": 0.5105638327238713,
|
|
"learning_rate": 2.6435828002289596e-05,
|
|
"loss": 0.2798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12511882185935974,
|
|
"step": 1965,
|
|
"valid_targets_mean": 5482.6,
|
|
"valid_targets_min": 2480
|
|
},
|
|
{
|
|
"epoch": 3.203417412530513,
|
|
"grad_norm": 0.45443106401153505,
|
|
"learning_rate": 2.6358994378788163e-05,
|
|
"loss": 0.2499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1377660185098648,
|
|
"step": 1970,
|
|
"valid_targets_mean": 6621.2,
|
|
"valid_targets_min": 3045
|
|
},
|
|
{
|
|
"epoch": 3.211554109031733,
|
|
"grad_norm": 0.5569701717755269,
|
|
"learning_rate": 2.6282056209058936e-05,
|
|
"loss": 0.2908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12813882529735565,
|
|
"step": 1975,
|
|
"valid_targets_mean": 4760.6,
|
|
"valid_targets_min": 1870
|
|
},
|
|
{
|
|
"epoch": 3.2196908055329536,
|
|
"grad_norm": 0.5403643934008627,
|
|
"learning_rate": 2.6205014758018176e-05,
|
|
"loss": 0.2731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.126073956489563,
|
|
"step": 1980,
|
|
"valid_targets_mean": 5044.4,
|
|
"valid_targets_min": 3035
|
|
},
|
|
{
|
|
"epoch": 3.2278275020341742,
|
|
"grad_norm": 0.4746581189512608,
|
|
"learning_rate": 2.6127871292280165e-05,
|
|
"loss": 0.2654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11351381242275238,
|
|
"step": 1985,
|
|
"valid_targets_mean": 6290.9,
|
|
"valid_targets_min": 1744
|
|
},
|
|
{
|
|
"epoch": 3.2359641985353944,
|
|
"grad_norm": 0.5001109013797744,
|
|
"learning_rate": 2.6050627080136376e-05,
|
|
"loss": 0.2457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1430266797542572,
|
|
"step": 1990,
|
|
"valid_targets_mean": 4843.6,
|
|
"valid_targets_min": 2324
|
|
},
|
|
{
|
|
"epoch": 3.244100895036615,
|
|
"grad_norm": 0.4495739039013548,
|
|
"learning_rate": 2.5973283391534615e-05,
|
|
"loss": 0.2627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09228505194187164,
|
|
"step": 1995,
|
|
"valid_targets_mean": 4507.0,
|
|
"valid_targets_min": 2149
|
|
},
|
|
{
|
|
"epoch": 3.2522375915378356,
|
|
"grad_norm": 0.48980120047593206,
|
|
"learning_rate": 2.589584149805817e-05,
|
|
"loss": 0.2619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13319309055805206,
|
|
"step": 2000,
|
|
"valid_targets_mean": 7194.9,
|
|
"valid_targets_min": 977
|
|
},
|
|
{
|
|
"epoch": 3.2603742880390563,
|
|
"grad_norm": 0.4812607915960625,
|
|
"learning_rate": 2.581830267290486e-05,
|
|
"loss": 0.2546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1082778349518776,
|
|
"step": 2005,
|
|
"valid_targets_mean": 6961.8,
|
|
"valid_targets_min": 3516
|
|
},
|
|
{
|
|
"epoch": 3.268510984540277,
|
|
"grad_norm": 0.570112545383423,
|
|
"learning_rate": 2.574066819086613e-05,
|
|
"loss": 0.2555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.134844571352005,
|
|
"step": 2010,
|
|
"valid_targets_mean": 4588.6,
|
|
"valid_targets_min": 1036
|
|
},
|
|
{
|
|
"epoch": 3.276647681041497,
|
|
"grad_norm": 0.4362815156888107,
|
|
"learning_rate": 2.5662939328306113e-05,
|
|
"loss": 0.2709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16294899582862854,
|
|
"step": 2015,
|
|
"valid_targets_mean": 8057.6,
|
|
"valid_targets_min": 2510
|
|
},
|
|
{
|
|
"epoch": 3.2847843775427177,
|
|
"grad_norm": 0.4089077124565424,
|
|
"learning_rate": 2.5585117363140592e-05,
|
|
"loss": 0.2671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12175396829843521,
|
|
"step": 2020,
|
|
"valid_targets_mean": 6774.1,
|
|
"valid_targets_min": 3456
|
|
},
|
|
{
|
|
"epoch": 3.2929210740439383,
|
|
"grad_norm": 0.502187014362583,
|
|
"learning_rate": 2.5507203574816043e-05,
|
|
"loss": 0.2777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16614243388175964,
|
|
"step": 2025,
|
|
"valid_targets_mean": 6135.8,
|
|
"valid_targets_min": 2329
|
|
},
|
|
{
|
|
"epoch": 3.3010577705451585,
|
|
"grad_norm": 0.4730402208775795,
|
|
"learning_rate": 2.542919924428856e-05,
|
|
"loss": 0.2754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16762784123420715,
|
|
"step": 2030,
|
|
"valid_targets_mean": 5869.8,
|
|
"valid_targets_min": 1504
|
|
},
|
|
{
|
|
"epoch": 3.309194467046379,
|
|
"grad_norm": 0.5339111572346003,
|
|
"learning_rate": 2.5351105654002838e-05,
|
|
"loss": 0.2747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1370839774608612,
|
|
"step": 2035,
|
|
"valid_targets_mean": 5234.9,
|
|
"valid_targets_min": 2162
|
|
},
|
|
{
|
|
"epoch": 3.3173311635475997,
|
|
"grad_norm": 0.6413302863152066,
|
|
"learning_rate": 2.527292408787104e-05,
|
|
"loss": 0.2634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16920380294322968,
|
|
"step": 2040,
|
|
"valid_targets_mean": 4609.8,
|
|
"valid_targets_min": 1040
|
|
},
|
|
{
|
|
"epoch": 3.3254678600488203,
|
|
"grad_norm": 0.470230236442957,
|
|
"learning_rate": 2.5194655831251712e-05,
|
|
"loss": 0.2456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12207140028476715,
|
|
"step": 2045,
|
|
"valid_targets_mean": 5943.4,
|
|
"valid_targets_min": 2111
|
|
},
|
|
{
|
|
"epoch": 3.3336045565500405,
|
|
"grad_norm": 0.48456012812970206,
|
|
"learning_rate": 2.5116302170928678e-05,
|
|
"loss": 0.251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11639226973056793,
|
|
"step": 2050,
|
|
"valid_targets_mean": 5307.0,
|
|
"valid_targets_min": 784
|
|
},
|
|
{
|
|
"epoch": 3.341741253051261,
|
|
"grad_norm": 0.5523702500052676,
|
|
"learning_rate": 2.5037864395089822e-05,
|
|
"loss": 0.2893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.106050044298172,
|
|
"step": 2055,
|
|
"valid_targets_mean": 4106.2,
|
|
"valid_targets_min": 1187
|
|
},
|
|
{
|
|
"epoch": 3.3498779495524817,
|
|
"grad_norm": 0.6268239379014566,
|
|
"learning_rate": 2.495934379330597e-05,
|
|
"loss": 0.2669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13571971654891968,
|
|
"step": 2060,
|
|
"valid_targets_mean": 4496.1,
|
|
"valid_targets_min": 1237
|
|
},
|
|
{
|
|
"epoch": 3.3580146460537024,
|
|
"grad_norm": 0.5211118227275444,
|
|
"learning_rate": 2.4880741656509656e-05,
|
|
"loss": 0.2577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18967539072036743,
|
|
"step": 2065,
|
|
"valid_targets_mean": 5973.8,
|
|
"valid_targets_min": 2874
|
|
},
|
|
{
|
|
"epoch": 3.3661513425549225,
|
|
"grad_norm": 0.4867184690553686,
|
|
"learning_rate": 2.4802059276973904e-05,
|
|
"loss": 0.2744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08832257986068726,
|
|
"step": 2070,
|
|
"valid_targets_mean": 6020.0,
|
|
"valid_targets_min": 2571
|
|
},
|
|
{
|
|
"epoch": 3.374288039056143,
|
|
"grad_norm": 0.40231974560194717,
|
|
"learning_rate": 2.4723297948290982e-05,
|
|
"loss": 0.2559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13233518600463867,
|
|
"step": 2075,
|
|
"valid_targets_mean": 9070.0,
|
|
"valid_targets_min": 3459
|
|
},
|
|
{
|
|
"epoch": 3.382424735557364,
|
|
"grad_norm": 0.5154449145165141,
|
|
"learning_rate": 2.464445896535113e-05,
|
|
"loss": 0.2836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1818242222070694,
|
|
"step": 2080,
|
|
"valid_targets_mean": 6717.2,
|
|
"valid_targets_min": 4298
|
|
},
|
|
{
|
|
"epoch": 3.3905614320585844,
|
|
"grad_norm": 0.5379336133652549,
|
|
"learning_rate": 2.45655436243213e-05,
|
|
"loss": 0.2693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12027017772197723,
|
|
"step": 2085,
|
|
"valid_targets_mean": 5752.2,
|
|
"valid_targets_min": 3219
|
|
},
|
|
{
|
|
"epoch": 3.3986981285598046,
|
|
"grad_norm": 0.4175221191336141,
|
|
"learning_rate": 2.44865532226238e-05,
|
|
"loss": 0.2774,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1881314218044281,
|
|
"step": 2090,
|
|
"valid_targets_mean": 8001.6,
|
|
"valid_targets_min": 2705
|
|
},
|
|
{
|
|
"epoch": 3.406834825061025,
|
|
"grad_norm": 0.5961468554541282,
|
|
"learning_rate": 2.4407489058915004e-05,
|
|
"loss": 0.2874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1391357183456421,
|
|
"step": 2095,
|
|
"valid_targets_mean": 4966.1,
|
|
"valid_targets_min": 3157
|
|
},
|
|
{
|
|
"epoch": 3.414971521562246,
|
|
"grad_norm": 0.4943897075326212,
|
|
"learning_rate": 2.4328352433063966e-05,
|
|
"loss": 0.2752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14456993341445923,
|
|
"step": 2100,
|
|
"valid_targets_mean": 7371.6,
|
|
"valid_targets_min": 4929
|
|
},
|
|
{
|
|
"epoch": 3.423108218063466,
|
|
"grad_norm": 0.4798546004908442,
|
|
"learning_rate": 2.4249144646131083e-05,
|
|
"loss": 0.2545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12103509902954102,
|
|
"step": 2105,
|
|
"valid_targets_mean": 5275.5,
|
|
"valid_targets_min": 2062
|
|
},
|
|
{
|
|
"epoch": 3.4312449145646866,
|
|
"grad_norm": 0.6024466670234377,
|
|
"learning_rate": 2.4169867000346684e-05,
|
|
"loss": 0.2631,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1351066529750824,
|
|
"step": 2110,
|
|
"valid_targets_mean": 4428.4,
|
|
"valid_targets_min": 1064
|
|
},
|
|
{
|
|
"epoch": 3.4393816110659072,
|
|
"grad_norm": 0.5447078874142987,
|
|
"learning_rate": 2.4090520799089612e-05,
|
|
"loss": 0.2866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.157949760556221,
|
|
"step": 2115,
|
|
"valid_targets_mean": 5712.9,
|
|
"valid_targets_min": 2100
|
|
},
|
|
{
|
|
"epoch": 3.447518307567128,
|
|
"grad_norm": 0.5293454552675316,
|
|
"learning_rate": 2.4011107346865844e-05,
|
|
"loss": 0.2892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19082076847553253,
|
|
"step": 2120,
|
|
"valid_targets_mean": 6204.5,
|
|
"valid_targets_min": 3697
|
|
},
|
|
{
|
|
"epoch": 3.4556550040683485,
|
|
"grad_norm": 0.496357535543296,
|
|
"learning_rate": 2.393162794928697e-05,
|
|
"loss": 0.2499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1595294177532196,
|
|
"step": 2125,
|
|
"valid_targets_mean": 7122.4,
|
|
"valid_targets_min": 2914
|
|
},
|
|
{
|
|
"epoch": 3.4637917005695686,
|
|
"grad_norm": 0.5390530304141364,
|
|
"learning_rate": 2.385208391304879e-05,
|
|
"loss": 0.2581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13380271196365356,
|
|
"step": 2130,
|
|
"valid_targets_mean": 5964.4,
|
|
"valid_targets_min": 4317
|
|
},
|
|
{
|
|
"epoch": 3.4719283970707893,
|
|
"grad_norm": 0.48671301947421164,
|
|
"learning_rate": 2.3772476545909794e-05,
|
|
"loss": 0.2717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0984102338552475,
|
|
"step": 2135,
|
|
"valid_targets_mean": 5276.6,
|
|
"valid_targets_min": 2773
|
|
},
|
|
{
|
|
"epoch": 3.48006509357201,
|
|
"grad_norm": 0.5168171595907421,
|
|
"learning_rate": 2.3692807156669684e-05,
|
|
"loss": 0.2883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10761836171150208,
|
|
"step": 2140,
|
|
"valid_targets_mean": 5127.4,
|
|
"valid_targets_min": 1005
|
|
},
|
|
{
|
|
"epoch": 3.48820179007323,
|
|
"grad_norm": 0.45917243865127705,
|
|
"learning_rate": 2.3613077055147855e-05,
|
|
"loss": 0.2871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11678306758403778,
|
|
"step": 2145,
|
|
"valid_targets_mean": 6023.4,
|
|
"valid_targets_min": 1403
|
|
},
|
|
{
|
|
"epoch": 3.4963384865744507,
|
|
"grad_norm": 1.0802408916637594,
|
|
"learning_rate": 2.3533287552161833e-05,
|
|
"loss": 0.2818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12424206733703613,
|
|
"step": 2150,
|
|
"valid_targets_mean": 4101.6,
|
|
"valid_targets_min": 1208
|
|
},
|
|
{
|
|
"epoch": 3.5044751830756713,
|
|
"grad_norm": 0.9725147310489535,
|
|
"learning_rate": 2.345343995950577e-05,
|
|
"loss": 0.2819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11299556493759155,
|
|
"step": 2155,
|
|
"valid_targets_mean": 5489.8,
|
|
"valid_targets_min": 2994
|
|
},
|
|
{
|
|
"epoch": 3.512611879576892,
|
|
"grad_norm": 0.5277612614291434,
|
|
"learning_rate": 2.3373535589928827e-05,
|
|
"loss": 0.2793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14234156906604767,
|
|
"step": 2160,
|
|
"valid_targets_mean": 5168.0,
|
|
"valid_targets_min": 1637
|
|
},
|
|
{
|
|
"epoch": 3.5207485760781125,
|
|
"grad_norm": 0.4577806901348409,
|
|
"learning_rate": 2.3293575757113635e-05,
|
|
"loss": 0.2645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11869750916957855,
|
|
"step": 2165,
|
|
"valid_targets_mean": 5705.0,
|
|
"valid_targets_min": 3465
|
|
},
|
|
{
|
|
"epoch": 3.5288852725793327,
|
|
"grad_norm": 0.48813053590763633,
|
|
"learning_rate": 2.3213561775654678e-05,
|
|
"loss": 0.2769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13192936778068542,
|
|
"step": 2170,
|
|
"valid_targets_mean": 5435.0,
|
|
"valid_targets_min": 2039
|
|
},
|
|
{
|
|
"epoch": 3.5370219690805533,
|
|
"grad_norm": 0.5417864760687305,
|
|
"learning_rate": 2.3133494961036655e-05,
|
|
"loss": 0.2679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14656861126422882,
|
|
"step": 2175,
|
|
"valid_targets_mean": 4186.5,
|
|
"valid_targets_min": 3036
|
|
},
|
|
{
|
|
"epoch": 3.545158665581774,
|
|
"grad_norm": 0.5079700581274689,
|
|
"learning_rate": 2.305337662961292e-05,
|
|
"loss": 0.2951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15438416600227356,
|
|
"step": 2180,
|
|
"valid_targets_mean": 5634.9,
|
|
"valid_targets_min": 1514
|
|
},
|
|
{
|
|
"epoch": 3.553295362082994,
|
|
"grad_norm": 0.4302507514212832,
|
|
"learning_rate": 2.2973208098583767e-05,
|
|
"loss": 0.2528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1305883228778839,
|
|
"step": 2185,
|
|
"valid_targets_mean": 7573.4,
|
|
"valid_targets_min": 3714
|
|
},
|
|
{
|
|
"epoch": 3.5614320585842147,
|
|
"grad_norm": 0.4784814526634792,
|
|
"learning_rate": 2.2892990685974815e-05,
|
|
"loss": 0.2319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13934920728206635,
|
|
"step": 2190,
|
|
"valid_targets_mean": 6661.2,
|
|
"valid_targets_min": 2725
|
|
},
|
|
{
|
|
"epoch": 3.5695687550854354,
|
|
"grad_norm": 0.5218194964987156,
|
|
"learning_rate": 2.2812725710615328e-05,
|
|
"loss": 0.2586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1036933958530426,
|
|
"step": 2195,
|
|
"valid_targets_mean": 4396.5,
|
|
"valid_targets_min": 530
|
|
},
|
|
{
|
|
"epoch": 3.577705451586656,
|
|
"grad_norm": 0.3915528153922062,
|
|
"learning_rate": 2.2732414492116538e-05,
|
|
"loss": 0.2549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11413315683603287,
|
|
"step": 2200,
|
|
"valid_targets_mean": 7739.1,
|
|
"valid_targets_min": 988
|
|
},
|
|
{
|
|
"epoch": 3.585842148087876,
|
|
"grad_norm": 0.5474376964212345,
|
|
"learning_rate": 2.2652058350849955e-05,
|
|
"loss": 0.2694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12832045555114746,
|
|
"step": 2205,
|
|
"valid_targets_mean": 4460.6,
|
|
"valid_targets_min": 844
|
|
},
|
|
{
|
|
"epoch": 3.5939788445890968,
|
|
"grad_norm": 0.4513603679012204,
|
|
"learning_rate": 2.2571658607925624e-05,
|
|
"loss": 0.2594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11507508158683777,
|
|
"step": 2210,
|
|
"valid_targets_mean": 6023.5,
|
|
"valid_targets_min": 2711
|
|
},
|
|
{
|
|
"epoch": 3.6021155410903174,
|
|
"grad_norm": 0.4358199903498031,
|
|
"learning_rate": 2.2491216585170458e-05,
|
|
"loss": 0.2594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08925238251686096,
|
|
"step": 2215,
|
|
"valid_targets_mean": 5539.4,
|
|
"valid_targets_min": 841
|
|
},
|
|
{
|
|
"epoch": 3.6102522375915376,
|
|
"grad_norm": 0.5033142192460704,
|
|
"learning_rate": 2.2410733605106462e-05,
|
|
"loss": 0.2567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07945795357227325,
|
|
"step": 2220,
|
|
"valid_targets_mean": 4576.6,
|
|
"valid_targets_min": 1937
|
|
},
|
|
{
|
|
"epoch": 3.618388934092758,
|
|
"grad_norm": 0.5295724882881188,
|
|
"learning_rate": 2.233021099092902e-05,
|
|
"loss": 0.2528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1273106187582016,
|
|
"step": 2225,
|
|
"valid_targets_mean": 5181.6,
|
|
"valid_targets_min": 3580
|
|
},
|
|
{
|
|
"epoch": 3.626525630593979,
|
|
"grad_norm": 0.49909534425266827,
|
|
"learning_rate": 2.224965006648512e-05,
|
|
"loss": 0.2615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11837124824523926,
|
|
"step": 2230,
|
|
"valid_targets_mean": 6526.1,
|
|
"valid_targets_min": 956
|
|
},
|
|
{
|
|
"epoch": 3.6346623270951994,
|
|
"grad_norm": 0.48550502453666,
|
|
"learning_rate": 2.2169052156251585e-05,
|
|
"loss": 0.3001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13036608695983887,
|
|
"step": 2235,
|
|
"valid_targets_mean": 5403.1,
|
|
"valid_targets_min": 1572
|
|
},
|
|
{
|
|
"epoch": 3.64279902359642,
|
|
"grad_norm": 0.6877569373231779,
|
|
"learning_rate": 2.2088418585313346e-05,
|
|
"loss": 0.2613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15400430560112,
|
|
"step": 2240,
|
|
"valid_targets_mean": 4920.0,
|
|
"valid_targets_min": 1879
|
|
},
|
|
{
|
|
"epoch": 3.6509357200976402,
|
|
"grad_norm": 0.5449931803254913,
|
|
"learning_rate": 2.200775067934158e-05,
|
|
"loss": 0.2866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11476185917854309,
|
|
"step": 2245,
|
|
"valid_targets_mean": 4997.9,
|
|
"valid_targets_min": 1732
|
|
},
|
|
{
|
|
"epoch": 3.659072416598861,
|
|
"grad_norm": 0.4525544723604396,
|
|
"learning_rate": 2.192704976457198e-05,
|
|
"loss": 0.2555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1380799114704132,
|
|
"step": 2250,
|
|
"valid_targets_mean": 6882.5,
|
|
"valid_targets_min": 4428
|
|
},
|
|
{
|
|
"epoch": 3.6672091131000815,
|
|
"grad_norm": 0.5174034196077824,
|
|
"learning_rate": 2.1846317167782923e-05,
|
|
"loss": 0.2481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.152031809091568,
|
|
"step": 2255,
|
|
"valid_targets_mean": 5447.2,
|
|
"valid_targets_min": 2463
|
|
},
|
|
{
|
|
"epoch": 3.6753458096013016,
|
|
"grad_norm": 0.44137473000448374,
|
|
"learning_rate": 2.1765554216273652e-05,
|
|
"loss": 0.268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1166839748620987,
|
|
"step": 2260,
|
|
"valid_targets_mean": 6610.4,
|
|
"valid_targets_min": 3051
|
|
},
|
|
{
|
|
"epoch": 3.6834825061025223,
|
|
"grad_norm": 0.5023783889425373,
|
|
"learning_rate": 2.1684762237842466e-05,
|
|
"loss": 0.2741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20356138050556183,
|
|
"step": 2265,
|
|
"valid_targets_mean": 6716.1,
|
|
"valid_targets_min": 2851
|
|
},
|
|
{
|
|
"epoch": 3.691619202603743,
|
|
"grad_norm": 0.5649370975103999,
|
|
"learning_rate": 2.1603942560764884e-05,
|
|
"loss": 0.2678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14867046475410461,
|
|
"step": 2270,
|
|
"valid_targets_mean": 4637.0,
|
|
"valid_targets_min": 1356
|
|
},
|
|
{
|
|
"epoch": 3.6997558991049635,
|
|
"grad_norm": 0.43128962161492657,
|
|
"learning_rate": 2.1523096513771825e-05,
|
|
"loss": 0.245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11670452356338501,
|
|
"step": 2275,
|
|
"valid_targets_mean": 6769.8,
|
|
"valid_targets_min": 3135
|
|
},
|
|
{
|
|
"epoch": 3.707892595606184,
|
|
"grad_norm": 0.6107884039191125,
|
|
"learning_rate": 2.1442225426027724e-05,
|
|
"loss": 0.2688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1622416079044342,
|
|
"step": 2280,
|
|
"valid_targets_mean": 4730.1,
|
|
"valid_targets_min": 2457
|
|
},
|
|
{
|
|
"epoch": 3.7160292921074043,
|
|
"grad_norm": 0.42009628659466286,
|
|
"learning_rate": 2.1361330627108724e-05,
|
|
"loss": 0.2786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14188794791698456,
|
|
"step": 2285,
|
|
"valid_targets_mean": 8693.0,
|
|
"valid_targets_min": 3303
|
|
},
|
|
{
|
|
"epoch": 3.724165988608625,
|
|
"grad_norm": 0.4593106062963763,
|
|
"learning_rate": 2.128041344698078e-05,
|
|
"loss": 0.2391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10363125801086426,
|
|
"step": 2290,
|
|
"valid_targets_mean": 4878.5,
|
|
"valid_targets_min": 1828
|
|
},
|
|
{
|
|
"epoch": 3.7323026851098455,
|
|
"grad_norm": 0.43814883641270597,
|
|
"learning_rate": 2.1199475215977817e-05,
|
|
"loss": 0.2665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10538579523563385,
|
|
"step": 2295,
|
|
"valid_targets_mean": 5957.1,
|
|
"valid_targets_min": 2916
|
|
},
|
|
{
|
|
"epoch": 3.7404393816110657,
|
|
"grad_norm": 0.5103737751157224,
|
|
"learning_rate": 2.1118517264779858e-05,
|
|
"loss": 0.2475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12744760513305664,
|
|
"step": 2300,
|
|
"valid_targets_mean": 4476.2,
|
|
"valid_targets_min": 1892
|
|
},
|
|
{
|
|
"epoch": 3.7485760781122863,
|
|
"grad_norm": 0.47819552478708083,
|
|
"learning_rate": 2.103754092439112e-05,
|
|
"loss": 0.2823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11515910178422928,
|
|
"step": 2305,
|
|
"valid_targets_mean": 6143.5,
|
|
"valid_targets_min": 1626
|
|
},
|
|
{
|
|
"epoch": 3.756712774613507,
|
|
"grad_norm": 0.5649450823382121,
|
|
"learning_rate": 2.095654752611817e-05,
|
|
"loss": 0.26,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15299847722053528,
|
|
"step": 2310,
|
|
"valid_targets_mean": 5482.8,
|
|
"valid_targets_min": 624
|
|
},
|
|
{
|
|
"epoch": 3.7648494711147276,
|
|
"grad_norm": 0.43149118197396086,
|
|
"learning_rate": 2.087553840154801e-05,
|
|
"loss": 0.2658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12518690526485443,
|
|
"step": 2315,
|
|
"valid_targets_mean": 6021.2,
|
|
"valid_targets_min": 2664
|
|
},
|
|
{
|
|
"epoch": 3.772986167615948,
|
|
"grad_norm": 0.4891027718138667,
|
|
"learning_rate": 2.0794514882526196e-05,
|
|
"loss": 0.2526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11703081429004669,
|
|
"step": 2320,
|
|
"valid_targets_mean": 4964.6,
|
|
"valid_targets_min": 397
|
|
},
|
|
{
|
|
"epoch": 3.7811228641171684,
|
|
"grad_norm": 0.48601703764578974,
|
|
"learning_rate": 2.0713478301134935e-05,
|
|
"loss": 0.2882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13128621876239777,
|
|
"step": 2325,
|
|
"valid_targets_mean": 6682.8,
|
|
"valid_targets_min": 2997
|
|
},
|
|
{
|
|
"epoch": 3.789259560618389,
|
|
"grad_norm": 0.5447061578281931,
|
|
"learning_rate": 2.063242998967118e-05,
|
|
"loss": 0.2512,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12155985832214355,
|
|
"step": 2330,
|
|
"valid_targets_mean": 4736.2,
|
|
"valid_targets_min": 2174
|
|
},
|
|
{
|
|
"epoch": 3.7973962571196096,
|
|
"grad_norm": 0.45017085915069277,
|
|
"learning_rate": 2.0551371280624758e-05,
|
|
"loss": 0.2551,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.158024862408638,
|
|
"step": 2335,
|
|
"valid_targets_mean": 8097.1,
|
|
"valid_targets_min": 4594
|
|
},
|
|
{
|
|
"epoch": 3.8055329536208298,
|
|
"grad_norm": 0.48647898978382154,
|
|
"learning_rate": 2.0470303506656414e-05,
|
|
"loss": 0.2515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12568457424640656,
|
|
"step": 2340,
|
|
"valid_targets_mean": 4161.4,
|
|
"valid_targets_min": 611
|
|
},
|
|
{
|
|
"epoch": 3.8136696501220504,
|
|
"grad_norm": 0.5446656209319005,
|
|
"learning_rate": 2.0389228000575953e-05,
|
|
"loss": 0.2726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16627874970436096,
|
|
"step": 2345,
|
|
"valid_targets_mean": 5223.8,
|
|
"valid_targets_min": 3621
|
|
},
|
|
{
|
|
"epoch": 3.821806346623271,
|
|
"grad_norm": 0.5378101789966692,
|
|
"learning_rate": 2.0308146095320275e-05,
|
|
"loss": 0.2669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11316706240177155,
|
|
"step": 2350,
|
|
"valid_targets_mean": 5668.1,
|
|
"valid_targets_min": 2857
|
|
},
|
|
{
|
|
"epoch": 3.8299430431244916,
|
|
"grad_norm": 0.4704455377437423,
|
|
"learning_rate": 2.0227059123931504e-05,
|
|
"loss": 0.2663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14419454336166382,
|
|
"step": 2355,
|
|
"valid_targets_mean": 6657.2,
|
|
"valid_targets_min": 2752
|
|
},
|
|
{
|
|
"epoch": 3.838079739625712,
|
|
"grad_norm": 0.4545925318504704,
|
|
"learning_rate": 2.0145968419535045e-05,
|
|
"loss": 0.2594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0803401917219162,
|
|
"step": 2360,
|
|
"valid_targets_mean": 5352.0,
|
|
"valid_targets_min": 2022
|
|
},
|
|
{
|
|
"epoch": 3.8462164361269324,
|
|
"grad_norm": 0.5261750953052919,
|
|
"learning_rate": 2.0064875315317674e-05,
|
|
"loss": 0.2661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1119522899389267,
|
|
"step": 2365,
|
|
"valid_targets_mean": 5081.0,
|
|
"valid_targets_min": 1380
|
|
},
|
|
{
|
|
"epoch": 3.854353132628153,
|
|
"grad_norm": 0.5977456077727262,
|
|
"learning_rate": 1.998378114450565e-05,
|
|
"loss": 0.2568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10629862546920776,
|
|
"step": 2370,
|
|
"valid_targets_mean": 4967.9,
|
|
"valid_targets_min": 559
|
|
},
|
|
{
|
|
"epoch": 3.862489829129373,
|
|
"grad_norm": 0.6008964888102649,
|
|
"learning_rate": 1.9902687240342722e-05,
|
|
"loss": 0.2568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11401569843292236,
|
|
"step": 2375,
|
|
"valid_targets_mean": 4733.9,
|
|
"valid_targets_min": 2100
|
|
},
|
|
{
|
|
"epoch": 3.870626525630594,
|
|
"grad_norm": 0.5127247595415064,
|
|
"learning_rate": 1.982159493606829e-05,
|
|
"loss": 0.2573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1426711082458496,
|
|
"step": 2380,
|
|
"valid_targets_mean": 5931.9,
|
|
"valid_targets_min": 4307
|
|
},
|
|
{
|
|
"epoch": 3.8787632221318145,
|
|
"grad_norm": 0.5231212605718677,
|
|
"learning_rate": 1.9740505564895436e-05,
|
|
"loss": 0.262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15910865366458893,
|
|
"step": 2385,
|
|
"valid_targets_mean": 5790.6,
|
|
"valid_targets_min": 3166
|
|
},
|
|
{
|
|
"epoch": 3.886899918633035,
|
|
"grad_norm": 0.5112833558456881,
|
|
"learning_rate": 1.9659420459989026e-05,
|
|
"loss": 0.2742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12127886712551117,
|
|
"step": 2390,
|
|
"valid_targets_mean": 4869.2,
|
|
"valid_targets_min": 3034
|
|
},
|
|
{
|
|
"epoch": 3.8950366151342557,
|
|
"grad_norm": 0.4557114664235136,
|
|
"learning_rate": 1.9578340954443784e-05,
|
|
"loss": 0.2623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.132114976644516,
|
|
"step": 2395,
|
|
"valid_targets_mean": 6727.5,
|
|
"valid_targets_min": 360
|
|
},
|
|
{
|
|
"epoch": 3.903173311635476,
|
|
"grad_norm": 0.5394796995535039,
|
|
"learning_rate": 1.949726838126237e-05,
|
|
"loss": 0.2658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15240266919136047,
|
|
"step": 2400,
|
|
"valid_targets_mean": 5102.9,
|
|
"valid_targets_min": 2064
|
|
},
|
|
{
|
|
"epoch": 3.9113100081366965,
|
|
"grad_norm": 0.5486897951018997,
|
|
"learning_rate": 1.941620407333347e-05,
|
|
"loss": 0.2623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12607380747795105,
|
|
"step": 2405,
|
|
"valid_targets_mean": 4510.5,
|
|
"valid_targets_min": 1648
|
|
},
|
|
{
|
|
"epoch": 3.919446704637917,
|
|
"grad_norm": 0.42322274272575794,
|
|
"learning_rate": 1.933514936340991e-05,
|
|
"loss": 0.249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06517419964075089,
|
|
"step": 2410,
|
|
"valid_targets_mean": 5701.9,
|
|
"valid_targets_min": 1451
|
|
},
|
|
{
|
|
"epoch": 3.9275834011391373,
|
|
"grad_norm": 0.49141277897204666,
|
|
"learning_rate": 1.9254105584086683e-05,
|
|
"loss": 0.2733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12552395462989807,
|
|
"step": 2415,
|
|
"valid_targets_mean": 5657.6,
|
|
"valid_targets_min": 3092
|
|
},
|
|
{
|
|
"epoch": 3.935720097640358,
|
|
"grad_norm": 0.5126538411225738,
|
|
"learning_rate": 1.9173074067779102e-05,
|
|
"loss": 0.2653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13129568099975586,
|
|
"step": 2420,
|
|
"valid_targets_mean": 5424.1,
|
|
"valid_targets_min": 2023
|
|
},
|
|
{
|
|
"epoch": 3.9438567941415785,
|
|
"grad_norm": 0.4865930267161227,
|
|
"learning_rate": 1.9092056146700844e-05,
|
|
"loss": 0.2623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12925077974796295,
|
|
"step": 2425,
|
|
"valid_targets_mean": 5372.9,
|
|
"valid_targets_min": 3511
|
|
},
|
|
{
|
|
"epoch": 3.951993490642799,
|
|
"grad_norm": 0.5288677950325463,
|
|
"learning_rate": 1.9011053152842087e-05,
|
|
"loss": 0.267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1401764452457428,
|
|
"step": 2430,
|
|
"valid_targets_mean": 5477.1,
|
|
"valid_targets_min": 958
|
|
},
|
|
{
|
|
"epoch": 3.9601301871440198,
|
|
"grad_norm": 0.5471093288120334,
|
|
"learning_rate": 1.89300664179476e-05,
|
|
"loss": 0.2784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1709902137517929,
|
|
"step": 2435,
|
|
"valid_targets_mean": 5307.8,
|
|
"valid_targets_min": 1418
|
|
},
|
|
{
|
|
"epoch": 3.96826688364524,
|
|
"grad_norm": 0.6064125532966921,
|
|
"learning_rate": 1.8849097273494827e-05,
|
|
"loss": 0.2833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1471061408519745,
|
|
"step": 2440,
|
|
"valid_targets_mean": 5405.4,
|
|
"valid_targets_min": 582
|
|
},
|
|
{
|
|
"epoch": 3.9764035801464606,
|
|
"grad_norm": 0.41587136888702764,
|
|
"learning_rate": 1.8768147050672028e-05,
|
|
"loss": 0.2637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09928697347640991,
|
|
"step": 2445,
|
|
"valid_targets_mean": 6103.5,
|
|
"valid_targets_min": 1364
|
|
},
|
|
{
|
|
"epoch": 3.984540276647681,
|
|
"grad_norm": 0.5513050609197007,
|
|
"learning_rate": 1.8687217080356365e-05,
|
|
"loss": 0.277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12455247342586517,
|
|
"step": 2450,
|
|
"valid_targets_mean": 3710.2,
|
|
"valid_targets_min": 468
|
|
},
|
|
{
|
|
"epoch": 3.9926769731489014,
|
|
"grad_norm": 0.5016492346972834,
|
|
"learning_rate": 1.8606308693092035e-05,
|
|
"loss": 0.2655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10865034908056259,
|
|
"step": 2455,
|
|
"valid_targets_mean": 4436.8,
|
|
"valid_targets_min": 2339
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"grad_norm": 0.6629905256300082,
|
|
"learning_rate": 1.8525423219068423e-05,
|
|
"loss": 0.2677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2965284585952759,
|
|
"step": 2460,
|
|
"valid_targets_mean": 5677.5,
|
|
"valid_targets_min": 2822
|
|
},
|
|
{
|
|
"epoch": 4.008136696501221,
|
|
"grad_norm": 0.5129517123763623,
|
|
"learning_rate": 1.844456198809817e-05,
|
|
"loss": 0.2333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10877451300621033,
|
|
"step": 2465,
|
|
"valid_targets_mean": 5123.9,
|
|
"valid_targets_min": 2568
|
|
},
|
|
{
|
|
"epoch": 4.016273393002441,
|
|
"grad_norm": 0.473163436849981,
|
|
"learning_rate": 1.8363726329595356e-05,
|
|
"loss": 0.2384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14932730793952942,
|
|
"step": 2470,
|
|
"valid_targets_mean": 7151.4,
|
|
"valid_targets_min": 3539
|
|
},
|
|
{
|
|
"epoch": 4.024410089503662,
|
|
"grad_norm": 0.4124240574592047,
|
|
"learning_rate": 1.828291757255364e-05,
|
|
"loss": 0.2381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11770130693912506,
|
|
"step": 2475,
|
|
"valid_targets_mean": 8133.2,
|
|
"valid_targets_min": 3713
|
|
},
|
|
{
|
|
"epoch": 4.032546786004882,
|
|
"grad_norm": 0.6586399627241981,
|
|
"learning_rate": 1.8202137045524383e-05,
|
|
"loss": 0.2623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13058783113956451,
|
|
"step": 2480,
|
|
"valid_targets_mean": 6915.6,
|
|
"valid_targets_min": 4175
|
|
},
|
|
{
|
|
"epoch": 4.040683482506102,
|
|
"grad_norm": 0.5513424168769842,
|
|
"learning_rate": 1.812138607659486e-05,
|
|
"loss": 0.2313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12268248945474625,
|
|
"step": 2485,
|
|
"valid_targets_mean": 5776.4,
|
|
"valid_targets_min": 2434
|
|
},
|
|
{
|
|
"epoch": 4.048820179007323,
|
|
"grad_norm": 0.5252916649047769,
|
|
"learning_rate": 1.8040665993366355e-05,
|
|
"loss": 0.2389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13886849582195282,
|
|
"step": 2490,
|
|
"valid_targets_mean": 5557.5,
|
|
"valid_targets_min": 2120
|
|
},
|
|
{
|
|
"epoch": 4.0569568755085434,
|
|
"grad_norm": 0.5060252786002666,
|
|
"learning_rate": 1.795997812293239e-05,
|
|
"loss": 0.2565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20440292358398438,
|
|
"step": 2495,
|
|
"valid_targets_mean": 8090.6,
|
|
"valid_targets_min": 3649
|
|
},
|
|
{
|
|
"epoch": 4.065093572009764,
|
|
"grad_norm": 0.5304099045808806,
|
|
"learning_rate": 1.7879323791856875e-05,
|
|
"loss": 0.264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11923650652170181,
|
|
"step": 2500,
|
|
"valid_targets_mean": 5128.9,
|
|
"valid_targets_min": 3118
|
|
},
|
|
{
|
|
"epoch": 4.073230268510985,
|
|
"grad_norm": 0.5848838368779085,
|
|
"learning_rate": 1.7798704326152317e-05,
|
|
"loss": 0.257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19044384360313416,
|
|
"step": 2505,
|
|
"valid_targets_mean": 6830.1,
|
|
"valid_targets_min": 4289
|
|
},
|
|
{
|
|
"epoch": 4.081366965012205,
|
|
"grad_norm": 0.5705122461346422,
|
|
"learning_rate": 1.7718121051258016e-05,
|
|
"loss": 0.262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14008468389511108,
|
|
"step": 2510,
|
|
"valid_targets_mean": 5542.6,
|
|
"valid_targets_min": 2108
|
|
},
|
|
{
|
|
"epoch": 4.089503661513426,
|
|
"grad_norm": 0.441927710137963,
|
|
"learning_rate": 1.763757529201826e-05,
|
|
"loss": 0.2531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12376945465803146,
|
|
"step": 2515,
|
|
"valid_targets_mean": 6966.4,
|
|
"valid_targets_min": 1952
|
|
},
|
|
{
|
|
"epoch": 4.097640358014646,
|
|
"grad_norm": 0.7043747925369948,
|
|
"learning_rate": 1.7557068372660562e-05,
|
|
"loss": 0.2438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12836799025535583,
|
|
"step": 2520,
|
|
"valid_targets_mean": 4607.2,
|
|
"valid_targets_min": 2518
|
|
},
|
|
{
|
|
"epoch": 4.105777054515866,
|
|
"grad_norm": 0.5410005564912586,
|
|
"learning_rate": 1.747660161677387e-05,
|
|
"loss": 0.255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15527786314487457,
|
|
"step": 2525,
|
|
"valid_targets_mean": 6187.5,
|
|
"valid_targets_min": 1744
|
|
},
|
|
{
|
|
"epoch": 4.113913751017087,
|
|
"grad_norm": 0.4912342111176402,
|
|
"learning_rate": 1.7396176347286838e-05,
|
|
"loss": 0.2581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1411733329296112,
|
|
"step": 2530,
|
|
"valid_targets_mean": 6837.1,
|
|
"valid_targets_min": 2828
|
|
},
|
|
{
|
|
"epoch": 4.1220504475183075,
|
|
"grad_norm": 0.5865774973750087,
|
|
"learning_rate": 1.7315793886446036e-05,
|
|
"loss": 0.2557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1618509590625763,
|
|
"step": 2535,
|
|
"valid_targets_mean": 5910.0,
|
|
"valid_targets_min": 3286
|
|
},
|
|
{
|
|
"epoch": 4.130187144019528,
|
|
"grad_norm": 0.626903542457007,
|
|
"learning_rate": 1.7235455555794236e-05,
|
|
"loss": 0.2373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13324236869812012,
|
|
"step": 2540,
|
|
"valid_targets_mean": 5587.8,
|
|
"valid_targets_min": 2551
|
|
},
|
|
{
|
|
"epoch": 4.138323840520749,
|
|
"grad_norm": 0.5795141806915454,
|
|
"learning_rate": 1.7155162676148682e-05,
|
|
"loss": 0.2257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12815946340560913,
|
|
"step": 2545,
|
|
"valid_targets_mean": 5687.1,
|
|
"valid_targets_min": 1514
|
|
},
|
|
{
|
|
"epoch": 4.146460537021969,
|
|
"grad_norm": 0.5206487840907027,
|
|
"learning_rate": 1.707491656757936e-05,
|
|
"loss": 0.2287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14685989916324615,
|
|
"step": 2550,
|
|
"valid_targets_mean": 6729.9,
|
|
"valid_targets_min": 1309
|
|
},
|
|
{
|
|
"epoch": 4.15459723352319,
|
|
"grad_norm": 0.5418721063841692,
|
|
"learning_rate": 1.6994718549387332e-05,
|
|
"loss": 0.2367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11401748657226562,
|
|
"step": 2555,
|
|
"valid_targets_mean": 4553.0,
|
|
"valid_targets_min": 914
|
|
},
|
|
{
|
|
"epoch": 4.16273393002441,
|
|
"grad_norm": 0.5304635472704533,
|
|
"learning_rate": 1.6914569940083004e-05,
|
|
"loss": 0.2657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15515920519828796,
|
|
"step": 2560,
|
|
"valid_targets_mean": 5698.4,
|
|
"valid_targets_min": 3076
|
|
},
|
|
{
|
|
"epoch": 4.17087062652563,
|
|
"grad_norm": 0.5577644314206343,
|
|
"learning_rate": 1.6834472057364462e-05,
|
|
"loss": 0.2403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1251552402973175,
|
|
"step": 2565,
|
|
"valid_targets_mean": 5167.2,
|
|
"valid_targets_min": 897
|
|
},
|
|
{
|
|
"epoch": 4.179007323026851,
|
|
"grad_norm": 0.516551191316929,
|
|
"learning_rate": 1.6754426218095827e-05,
|
|
"loss": 0.2421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13127167522907257,
|
|
"step": 2570,
|
|
"valid_targets_mean": 5867.4,
|
|
"valid_targets_min": 3286
|
|
},
|
|
{
|
|
"epoch": 4.187144019528072,
|
|
"grad_norm": 0.4789758761612466,
|
|
"learning_rate": 1.6674433738285573e-05,
|
|
"loss": 0.2587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1123281717300415,
|
|
"step": 2575,
|
|
"valid_targets_mean": 6434.2,
|
|
"valid_targets_min": 3024
|
|
},
|
|
{
|
|
"epoch": 4.195280716029292,
|
|
"grad_norm": 0.5647313559781841,
|
|
"learning_rate": 1.6594495933064926e-05,
|
|
"loss": 0.2499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16273194551467896,
|
|
"step": 2580,
|
|
"valid_targets_mean": 5906.6,
|
|
"valid_targets_min": 3636
|
|
},
|
|
{
|
|
"epoch": 4.203417412530513,
|
|
"grad_norm": 0.4660706331602136,
|
|
"learning_rate": 1.6514614116666213e-05,
|
|
"loss": 0.2664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11347354203462601,
|
|
"step": 2585,
|
|
"valid_targets_mean": 7309.0,
|
|
"valid_targets_min": 2153
|
|
},
|
|
{
|
|
"epoch": 4.211554109031733,
|
|
"grad_norm": 0.5253962224050552,
|
|
"learning_rate": 1.6434789602401264e-05,
|
|
"loss": 0.2525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1410287618637085,
|
|
"step": 2590,
|
|
"valid_targets_mean": 6679.8,
|
|
"valid_targets_min": 1391
|
|
},
|
|
{
|
|
"epoch": 4.219690805532954,
|
|
"grad_norm": 0.5081317554456144,
|
|
"learning_rate": 1.6355023702639835e-05,
|
|
"loss": 0.2676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10498687624931335,
|
|
"step": 2595,
|
|
"valid_targets_mean": 5955.5,
|
|
"valid_targets_min": 1427
|
|
},
|
|
{
|
|
"epoch": 4.227827502034174,
|
|
"grad_norm": 0.5133995179689695,
|
|
"learning_rate": 1.6275317728787995e-05,
|
|
"loss": 0.2626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15336143970489502,
|
|
"step": 2600,
|
|
"valid_targets_mean": 6639.5,
|
|
"valid_targets_min": 3253
|
|
},
|
|
{
|
|
"epoch": 4.235964198535394,
|
|
"grad_norm": 0.6025044099778876,
|
|
"learning_rate": 1.6195672991266627e-05,
|
|
"loss": 0.2436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1540364921092987,
|
|
"step": 2605,
|
|
"valid_targets_mean": 5138.1,
|
|
"valid_targets_min": 1887
|
|
},
|
|
{
|
|
"epoch": 4.244100895036615,
|
|
"grad_norm": 0.5322557851182275,
|
|
"learning_rate": 1.6116090799489817e-05,
|
|
"loss": 0.263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13185200095176697,
|
|
"step": 2610,
|
|
"valid_targets_mean": 6066.1,
|
|
"valid_targets_min": 3914
|
|
},
|
|
{
|
|
"epoch": 4.252237591537836,
|
|
"grad_norm": 0.5441188482256656,
|
|
"learning_rate": 1.603657246184337e-05,
|
|
"loss": 0.2449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12901723384857178,
|
|
"step": 2615,
|
|
"valid_targets_mean": 4693.8,
|
|
"valid_targets_min": 1555
|
|
},
|
|
{
|
|
"epoch": 4.260374288039056,
|
|
"grad_norm": 0.5849782251122515,
|
|
"learning_rate": 1.5957119285663276e-05,
|
|
"loss": 0.2555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11500156670808792,
|
|
"step": 2620,
|
|
"valid_targets_mean": 5282.9,
|
|
"valid_targets_min": 2751
|
|
},
|
|
{
|
|
"epoch": 4.268510984540277,
|
|
"grad_norm": 0.48316160524922036,
|
|
"learning_rate": 1.5877732577214227e-05,
|
|
"loss": 0.2713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12493854761123657,
|
|
"step": 2625,
|
|
"valid_targets_mean": 6715.5,
|
|
"valid_targets_min": 2447
|
|
},
|
|
{
|
|
"epoch": 4.2766476810414975,
|
|
"grad_norm": 0.5053057310492522,
|
|
"learning_rate": 1.5798413641668152e-05,
|
|
"loss": 0.2486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10976533591747284,
|
|
"step": 2630,
|
|
"valid_targets_mean": 5234.4,
|
|
"valid_targets_min": 3316
|
|
},
|
|
{
|
|
"epoch": 4.284784377542717,
|
|
"grad_norm": 0.5519671887282555,
|
|
"learning_rate": 1.5719163783082735e-05,
|
|
"loss": 0.2687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11516216397285461,
|
|
"step": 2635,
|
|
"valid_targets_mean": 4224.4,
|
|
"valid_targets_min": 2866
|
|
},
|
|
{
|
|
"epoch": 4.292921074043938,
|
|
"grad_norm": 0.5225037254497872,
|
|
"learning_rate": 1.563998430437999e-05,
|
|
"loss": 0.2536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17195287346839905,
|
|
"step": 2640,
|
|
"valid_targets_mean": 5706.8,
|
|
"valid_targets_min": 3850
|
|
},
|
|
{
|
|
"epoch": 4.3010577705451585,
|
|
"grad_norm": 0.598135661385332,
|
|
"learning_rate": 1.556087650732483e-05,
|
|
"loss": 0.2216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13336588442325592,
|
|
"step": 2645,
|
|
"valid_targets_mean": 4468.4,
|
|
"valid_targets_min": 3203
|
|
},
|
|
{
|
|
"epoch": 4.309194467046379,
|
|
"grad_norm": 0.5569556750382165,
|
|
"learning_rate": 1.5481841692503696e-05,
|
|
"loss": 0.2596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1275118738412857,
|
|
"step": 2650,
|
|
"valid_targets_mean": 6468.5,
|
|
"valid_targets_min": 2076
|
|
},
|
|
{
|
|
"epoch": 4.3173311635476,
|
|
"grad_norm": 0.5748124791778696,
|
|
"learning_rate": 1.5402881159303132e-05,
|
|
"loss": 0.2641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13305258750915527,
|
|
"step": 2655,
|
|
"valid_targets_mean": 5509.2,
|
|
"valid_targets_min": 4168
|
|
},
|
|
{
|
|
"epoch": 4.32546786004882,
|
|
"grad_norm": 0.5867859868511631,
|
|
"learning_rate": 1.5323996205888444e-05,
|
|
"loss": 0.2625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10594667494297028,
|
|
"step": 2660,
|
|
"valid_targets_mean": 4375.5,
|
|
"valid_targets_min": 1215
|
|
},
|
|
{
|
|
"epoch": 4.333604556550041,
|
|
"grad_norm": 0.5536999755590897,
|
|
"learning_rate": 1.5245188129182352e-05,
|
|
"loss": 0.2466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15136541426181793,
|
|
"step": 2665,
|
|
"valid_targets_mean": 6034.4,
|
|
"valid_targets_min": 2955
|
|
},
|
|
{
|
|
"epoch": 4.341741253051262,
|
|
"grad_norm": 0.45096926773078744,
|
|
"learning_rate": 1.5166458224843666e-05,
|
|
"loss": 0.2356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15225692093372345,
|
|
"step": 2670,
|
|
"valid_targets_mean": 7230.6,
|
|
"valid_targets_min": 4046
|
|
},
|
|
{
|
|
"epoch": 4.349877949552481,
|
|
"grad_norm": 0.4876695137087219,
|
|
"learning_rate": 1.5087807787246018e-05,
|
|
"loss": 0.2478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1192513108253479,
|
|
"step": 2675,
|
|
"valid_targets_mean": 6170.1,
|
|
"valid_targets_min": 4160
|
|
},
|
|
{
|
|
"epoch": 4.358014646053702,
|
|
"grad_norm": 0.518906460408022,
|
|
"learning_rate": 1.5009238109456519e-05,
|
|
"loss": 0.2588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11651073396205902,
|
|
"step": 2680,
|
|
"valid_targets_mean": 5396.5,
|
|
"valid_targets_min": 2022
|
|
},
|
|
{
|
|
"epoch": 4.3661513425549225,
|
|
"grad_norm": 0.49929632927019785,
|
|
"learning_rate": 1.4930750483214545e-05,
|
|
"loss": 0.2338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12936654686927795,
|
|
"step": 2685,
|
|
"valid_targets_mean": 6563.0,
|
|
"valid_targets_min": 3114
|
|
},
|
|
{
|
|
"epoch": 4.374288039056143,
|
|
"grad_norm": 0.6156782440298237,
|
|
"learning_rate": 1.485234619891049e-05,
|
|
"loss": 0.2538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11831729114055634,
|
|
"step": 2690,
|
|
"valid_targets_mean": 3777.8,
|
|
"valid_targets_min": 1211
|
|
},
|
|
{
|
|
"epoch": 4.382424735557364,
|
|
"grad_norm": 0.49361613525157977,
|
|
"learning_rate": 1.4774026545564542e-05,
|
|
"loss": 0.2493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13980375230312347,
|
|
"step": 2695,
|
|
"valid_targets_mean": 6734.5,
|
|
"valid_targets_min": 3579
|
|
},
|
|
{
|
|
"epoch": 4.390561432058584,
|
|
"grad_norm": 0.4945912218477993,
|
|
"learning_rate": 1.4695792810805513e-05,
|
|
"loss": 0.2634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11305113881826401,
|
|
"step": 2700,
|
|
"valid_targets_mean": 4944.9,
|
|
"valid_targets_min": 2712
|
|
},
|
|
{
|
|
"epoch": 4.398698128559805,
|
|
"grad_norm": 0.5756325553240135,
|
|
"learning_rate": 1.4617646280849642e-05,
|
|
"loss": 0.2774,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12662610411643982,
|
|
"step": 2705,
|
|
"valid_targets_mean": 4155.1,
|
|
"valid_targets_min": 2861
|
|
},
|
|
{
|
|
"epoch": 4.406834825061026,
|
|
"grad_norm": 0.4985205743712053,
|
|
"learning_rate": 1.4539588240479465e-05,
|
|
"loss": 0.2523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12237317860126495,
|
|
"step": 2710,
|
|
"valid_targets_mean": 6090.8,
|
|
"valid_targets_min": 4559
|
|
},
|
|
{
|
|
"epoch": 4.414971521562245,
|
|
"grad_norm": 0.5203081116851938,
|
|
"learning_rate": 1.4461619973022687e-05,
|
|
"loss": 0.2471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1582365483045578,
|
|
"step": 2715,
|
|
"valid_targets_mean": 5534.2,
|
|
"valid_targets_min": 2713
|
|
},
|
|
{
|
|
"epoch": 4.423108218063466,
|
|
"grad_norm": 0.5071779116892461,
|
|
"learning_rate": 1.4383742760331076e-05,
|
|
"loss": 0.2458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10871212184429169,
|
|
"step": 2720,
|
|
"valid_targets_mean": 5417.2,
|
|
"valid_targets_min": 1808
|
|
},
|
|
{
|
|
"epoch": 4.431244914564687,
|
|
"grad_norm": 0.5122608576322747,
|
|
"learning_rate": 1.4305957882759427e-05,
|
|
"loss": 0.2536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09927636384963989,
|
|
"step": 2725,
|
|
"valid_targets_mean": 4650.1,
|
|
"valid_targets_min": 544
|
|
},
|
|
{
|
|
"epoch": 4.439381611065907,
|
|
"grad_norm": 0.6181215239682092,
|
|
"learning_rate": 1.4228266619144453e-05,
|
|
"loss": 0.2658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1790265142917633,
|
|
"step": 2730,
|
|
"valid_targets_mean": 5120.4,
|
|
"valid_targets_min": 2997
|
|
},
|
|
{
|
|
"epoch": 4.447518307567128,
|
|
"grad_norm": 0.568948631389522,
|
|
"learning_rate": 1.4150670246783799e-05,
|
|
"loss": 0.229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12507446110248566,
|
|
"step": 2735,
|
|
"valid_targets_mean": 4760.6,
|
|
"valid_targets_min": 2546
|
|
},
|
|
{
|
|
"epoch": 4.4556550040683485,
|
|
"grad_norm": 0.49435152766723117,
|
|
"learning_rate": 1.4073170041415028e-05,
|
|
"loss": 0.2428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1444876343011856,
|
|
"step": 2740,
|
|
"valid_targets_mean": 6215.4,
|
|
"valid_targets_min": 3236
|
|
},
|
|
{
|
|
"epoch": 4.463791700569569,
|
|
"grad_norm": 0.6181766282098673,
|
|
"learning_rate": 1.3995767277194665e-05,
|
|
"loss": 0.2429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10405126214027405,
|
|
"step": 2745,
|
|
"valid_targets_mean": 4468.4,
|
|
"valid_targets_min": 724
|
|
},
|
|
{
|
|
"epoch": 4.471928397070789,
|
|
"grad_norm": 0.41154847544209594,
|
|
"learning_rate": 1.391846322667722e-05,
|
|
"loss": 0.2338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0906953513622284,
|
|
"step": 2750,
|
|
"valid_targets_mean": 5218.9,
|
|
"valid_targets_min": 2520
|
|
},
|
|
{
|
|
"epoch": 4.480065093572009,
|
|
"grad_norm": 0.6572132993720595,
|
|
"learning_rate": 1.3841259160794298e-05,
|
|
"loss": 0.256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12475261837244034,
|
|
"step": 2755,
|
|
"valid_targets_mean": 6810.9,
|
|
"valid_targets_min": 2340
|
|
},
|
|
{
|
|
"epoch": 4.48820179007323,
|
|
"grad_norm": 0.5336969527360339,
|
|
"learning_rate": 1.3764156348833666e-05,
|
|
"loss": 0.2492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12281182408332825,
|
|
"step": 2760,
|
|
"valid_targets_mean": 6573.4,
|
|
"valid_targets_min": 984
|
|
},
|
|
{
|
|
"epoch": 4.496338486574451,
|
|
"grad_norm": 0.568863776583933,
|
|
"learning_rate": 1.3687156058418422e-05,
|
|
"loss": 0.2305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1258392333984375,
|
|
"step": 2765,
|
|
"valid_targets_mean": 4216.4,
|
|
"valid_targets_min": 2142
|
|
},
|
|
{
|
|
"epoch": 4.504475183075671,
|
|
"grad_norm": 0.5202434927147002,
|
|
"learning_rate": 1.3610259555486152e-05,
|
|
"loss": 0.2665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14846715331077576,
|
|
"step": 2770,
|
|
"valid_targets_mean": 5878.0,
|
|
"valid_targets_min": 880
|
|
},
|
|
{
|
|
"epoch": 4.512611879576892,
|
|
"grad_norm": 0.620131546040906,
|
|
"learning_rate": 1.3533468104268078e-05,
|
|
"loss": 0.275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1723940074443817,
|
|
"step": 2775,
|
|
"valid_targets_mean": 5155.9,
|
|
"valid_targets_min": 902
|
|
},
|
|
{
|
|
"epoch": 4.5207485760781125,
|
|
"grad_norm": 0.49196921841926167,
|
|
"learning_rate": 1.3456782967268316e-05,
|
|
"loss": 0.2492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12150443345308304,
|
|
"step": 2780,
|
|
"valid_targets_mean": 5932.6,
|
|
"valid_targets_min": 3864
|
|
},
|
|
{
|
|
"epoch": 4.528885272579333,
|
|
"grad_norm": 0.4638624677398862,
|
|
"learning_rate": 1.3380205405243096e-05,
|
|
"loss": 0.2423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09334471076726913,
|
|
"step": 2785,
|
|
"valid_targets_mean": 5831.0,
|
|
"valid_targets_min": 2304
|
|
},
|
|
{
|
|
"epoch": 4.537021969080554,
|
|
"grad_norm": 0.582037329244977,
|
|
"learning_rate": 1.3303736677180044e-05,
|
|
"loss": 0.2357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17545725405216217,
|
|
"step": 2790,
|
|
"valid_targets_mean": 5931.8,
|
|
"valid_targets_min": 1744
|
|
},
|
|
{
|
|
"epoch": 4.5451586655817735,
|
|
"grad_norm": 0.40849974141197115,
|
|
"learning_rate": 1.322737804027749e-05,
|
|
"loss": 0.2452,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11623605340719223,
|
|
"step": 2795,
|
|
"valid_targets_mean": 8913.4,
|
|
"valid_targets_min": 3525
|
|
},
|
|
{
|
|
"epoch": 4.553295362082994,
|
|
"grad_norm": 0.5932308533815444,
|
|
"learning_rate": 1.315113074992378e-05,
|
|
"loss": 0.255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11075286567211151,
|
|
"step": 2800,
|
|
"valid_targets_mean": 6780.4,
|
|
"valid_targets_min": 4594
|
|
},
|
|
{
|
|
"epoch": 4.561432058584215,
|
|
"grad_norm": 0.5613236786122735,
|
|
"learning_rate": 1.3074996059676644e-05,
|
|
"loss": 0.2383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12261034548282623,
|
|
"step": 2805,
|
|
"valid_targets_mean": 6014.5,
|
|
"valid_targets_min": 1412
|
|
},
|
|
{
|
|
"epoch": 4.569568755085435,
|
|
"grad_norm": 0.6145107752409859,
|
|
"learning_rate": 1.2998975221242596e-05,
|
|
"loss": 0.2341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11969676613807678,
|
|
"step": 2810,
|
|
"valid_targets_mean": 6176.0,
|
|
"valid_targets_min": 2517
|
|
},
|
|
{
|
|
"epoch": 4.577705451586656,
|
|
"grad_norm": 0.5116817887496089,
|
|
"learning_rate": 1.292306948445634e-05,
|
|
"loss": 0.2527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09205292165279388,
|
|
"step": 2815,
|
|
"valid_targets_mean": 4062.6,
|
|
"valid_targets_min": 1865
|
|
},
|
|
{
|
|
"epoch": 4.585842148087877,
|
|
"grad_norm": 0.5032628834115105,
|
|
"learning_rate": 1.2847280097260245e-05,
|
|
"loss": 0.2361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10667150467634201,
|
|
"step": 2820,
|
|
"valid_targets_mean": 5223.2,
|
|
"valid_targets_min": 1628
|
|
},
|
|
{
|
|
"epoch": 4.593978844589097,
|
|
"grad_norm": 0.519138895357275,
|
|
"learning_rate": 1.2771608305683798e-05,
|
|
"loss": 0.2423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1109437346458435,
|
|
"step": 2825,
|
|
"valid_targets_mean": 5241.1,
|
|
"valid_targets_min": 2067
|
|
},
|
|
{
|
|
"epoch": 4.602115541090317,
|
|
"grad_norm": 0.525394243588212,
|
|
"learning_rate": 1.269605535382314e-05,
|
|
"loss": 0.2547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16636374592781067,
|
|
"step": 2830,
|
|
"valid_targets_mean": 6438.2,
|
|
"valid_targets_min": 558
|
|
},
|
|
{
|
|
"epoch": 4.610252237591538,
|
|
"grad_norm": 0.5191821565520898,
|
|
"learning_rate": 1.2620622483820604e-05,
|
|
"loss": 0.2402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11650275439023972,
|
|
"step": 2835,
|
|
"valid_targets_mean": 4864.6,
|
|
"valid_targets_min": 524
|
|
},
|
|
{
|
|
"epoch": 4.618388934092758,
|
|
"grad_norm": 0.5627143798214106,
|
|
"learning_rate": 1.2545310935844288e-05,
|
|
"loss": 0.2473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09677013009786606,
|
|
"step": 2840,
|
|
"valid_targets_mean": 3457.5,
|
|
"valid_targets_min": 360
|
|
},
|
|
{
|
|
"epoch": 4.626525630593979,
|
|
"grad_norm": 0.5874185551809977,
|
|
"learning_rate": 1.2470121948067693e-05,
|
|
"loss": 0.2656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1323344111442566,
|
|
"step": 2845,
|
|
"valid_targets_mean": 5520.2,
|
|
"valid_targets_min": 2752
|
|
},
|
|
{
|
|
"epoch": 4.634662327095199,
|
|
"grad_norm": 0.5568858672026162,
|
|
"learning_rate": 1.2395056756649328e-05,
|
|
"loss": 0.241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13221968710422516,
|
|
"step": 2850,
|
|
"valid_targets_mean": 5042.0,
|
|
"valid_targets_min": 1374
|
|
},
|
|
{
|
|
"epoch": 4.64279902359642,
|
|
"grad_norm": 0.4669743898337823,
|
|
"learning_rate": 1.2320116595712413e-05,
|
|
"loss": 0.2599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10005086660385132,
|
|
"step": 2855,
|
|
"valid_targets_mean": 6354.0,
|
|
"valid_targets_min": 2977
|
|
},
|
|
{
|
|
"epoch": 4.650935720097641,
|
|
"grad_norm": 0.890197194306011,
|
|
"learning_rate": 1.224530269732457e-05,
|
|
"loss": 0.2535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11468639224767685,
|
|
"step": 2860,
|
|
"valid_targets_mean": 4317.4,
|
|
"valid_targets_min": 1349
|
|
},
|
|
{
|
|
"epoch": 4.65907241659886,
|
|
"grad_norm": 0.523673019189796,
|
|
"learning_rate": 1.2170616291477595e-05,
|
|
"loss": 0.2532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10656925290822983,
|
|
"step": 2865,
|
|
"valid_targets_mean": 5904.4,
|
|
"valid_targets_min": 513
|
|
},
|
|
{
|
|
"epoch": 4.667209113100081,
|
|
"grad_norm": 0.5080582692191867,
|
|
"learning_rate": 1.2096058606067205e-05,
|
|
"loss": 0.257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10707581788301468,
|
|
"step": 2870,
|
|
"valid_targets_mean": 4963.8,
|
|
"valid_targets_min": 2731
|
|
},
|
|
{
|
|
"epoch": 4.675345809601302,
|
|
"grad_norm": 0.5988092562512479,
|
|
"learning_rate": 1.2021630866872877e-05,
|
|
"loss": 0.2523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1406228244304657,
|
|
"step": 2875,
|
|
"valid_targets_mean": 5334.5,
|
|
"valid_targets_min": 1531
|
|
},
|
|
{
|
|
"epoch": 4.683482506102522,
|
|
"grad_norm": 0.41508039808193575,
|
|
"learning_rate": 1.1947334297537675e-05,
|
|
"loss": 0.2564,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07547567784786224,
|
|
"step": 2880,
|
|
"valid_targets_mean": 6389.6,
|
|
"valid_targets_min": 1338
|
|
},
|
|
{
|
|
"epoch": 4.691619202603743,
|
|
"grad_norm": 0.4920978401058176,
|
|
"learning_rate": 1.1873170119548134e-05,
|
|
"loss": 0.2433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12869910895824432,
|
|
"step": 2885,
|
|
"valid_targets_mean": 6174.5,
|
|
"valid_targets_min": 1998
|
|
},
|
|
{
|
|
"epoch": 4.6997558991049635,
|
|
"grad_norm": 0.5167942635637724,
|
|
"learning_rate": 1.1799139552214202e-05,
|
|
"loss": 0.2656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16343358159065247,
|
|
"step": 2890,
|
|
"valid_targets_mean": 5750.2,
|
|
"valid_targets_min": 3412
|
|
},
|
|
{
|
|
"epoch": 4.707892595606184,
|
|
"grad_norm": 0.6239170640519179,
|
|
"learning_rate": 1.1725243812649168e-05,
|
|
"loss": 0.25,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14894859492778778,
|
|
"step": 2895,
|
|
"valid_targets_mean": 5554.4,
|
|
"valid_targets_min": 4602
|
|
},
|
|
{
|
|
"epoch": 4.716029292107405,
|
|
"grad_norm": 0.5652543437239146,
|
|
"learning_rate": 1.1651484115749647e-05,
|
|
"loss": 0.2542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13217860460281372,
|
|
"step": 2900,
|
|
"valid_targets_mean": 4814.0,
|
|
"valid_targets_min": 1908
|
|
},
|
|
{
|
|
"epoch": 4.724165988608625,
|
|
"grad_norm": 0.43613616950750617,
|
|
"learning_rate": 1.1577861674175645e-05,
|
|
"loss": 0.2434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11000243574380875,
|
|
"step": 2905,
|
|
"valid_targets_mean": 6706.0,
|
|
"valid_targets_min": 2946
|
|
},
|
|
{
|
|
"epoch": 4.732302685109845,
|
|
"grad_norm": 0.5312582849880375,
|
|
"learning_rate": 1.1504377698330575e-05,
|
|
"loss": 0.2636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10568075627088547,
|
|
"step": 2910,
|
|
"valid_targets_mean": 4921.6,
|
|
"valid_targets_min": 2460
|
|
},
|
|
{
|
|
"epoch": 4.740439381611066,
|
|
"grad_norm": 0.47078207010849765,
|
|
"learning_rate": 1.1431033396341391e-05,
|
|
"loss": 0.2561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1127002015709877,
|
|
"step": 2915,
|
|
"valid_targets_mean": 6926.8,
|
|
"valid_targets_min": 1427
|
|
},
|
|
{
|
|
"epoch": 4.748576078112286,
|
|
"grad_norm": 0.5215888340298842,
|
|
"learning_rate": 1.1357829974038703e-05,
|
|
"loss": 0.2688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1564989537000656,
|
|
"step": 2920,
|
|
"valid_targets_mean": 6785.0,
|
|
"valid_targets_min": 3423
|
|
},
|
|
{
|
|
"epoch": 4.756712774613507,
|
|
"grad_norm": 0.5470344569640518,
|
|
"learning_rate": 1.1284768634936971e-05,
|
|
"loss": 0.2485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.187900573015213,
|
|
"step": 2925,
|
|
"valid_targets_mean": 6071.8,
|
|
"valid_targets_min": 2555
|
|
},
|
|
{
|
|
"epoch": 4.764849471114728,
|
|
"grad_norm": 0.5184922299861338,
|
|
"learning_rate": 1.1211850580214703e-05,
|
|
"loss": 0.2424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10915215313434601,
|
|
"step": 2930,
|
|
"valid_targets_mean": 5272.6,
|
|
"valid_targets_min": 774
|
|
},
|
|
{
|
|
"epoch": 4.772986167615948,
|
|
"grad_norm": 0.5260960542301046,
|
|
"learning_rate": 1.1139077008694712e-05,
|
|
"loss": 0.2485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16236786544322968,
|
|
"step": 2935,
|
|
"valid_targets_mean": 6286.5,
|
|
"valid_targets_min": 1737
|
|
},
|
|
{
|
|
"epoch": 4.781122864117169,
|
|
"grad_norm": 0.5220847375110784,
|
|
"learning_rate": 1.1066449116824428e-05,
|
|
"loss": 0.2454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12581384181976318,
|
|
"step": 2940,
|
|
"valid_targets_mean": 5605.1,
|
|
"valid_targets_min": 4296
|
|
},
|
|
{
|
|
"epoch": 4.7892595606183885,
|
|
"grad_norm": 0.48555117830549954,
|
|
"learning_rate": 1.099396809865618e-05,
|
|
"loss": 0.2653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10319238156080246,
|
|
"step": 2945,
|
|
"valid_targets_mean": 5697.5,
|
|
"valid_targets_min": 1837
|
|
},
|
|
{
|
|
"epoch": 4.797396257119609,
|
|
"grad_norm": 0.5192267689139981,
|
|
"learning_rate": 1.0921635145827611e-05,
|
|
"loss": 0.2416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1321655809879303,
|
|
"step": 2950,
|
|
"valid_targets_mean": 6476.0,
|
|
"valid_targets_min": 3020
|
|
},
|
|
{
|
|
"epoch": 4.80553295362083,
|
|
"grad_norm": 0.5076930593639264,
|
|
"learning_rate": 1.0849451447542054e-05,
|
|
"loss": 0.2495,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11374587565660477,
|
|
"step": 2955,
|
|
"valid_targets_mean": 6396.4,
|
|
"valid_targets_min": 1826
|
|
},
|
|
{
|
|
"epoch": 4.81366965012205,
|
|
"grad_norm": 0.6351691391627748,
|
|
"learning_rate": 1.0777418190549018e-05,
|
|
"loss": 0.2512,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2092050015926361,
|
|
"step": 2960,
|
|
"valid_targets_mean": 4927.0,
|
|
"valid_targets_min": 803
|
|
},
|
|
{
|
|
"epoch": 4.821806346623271,
|
|
"grad_norm": 0.55668226084915,
|
|
"learning_rate": 1.070553655912463e-05,
|
|
"loss": 0.2685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11320999264717102,
|
|
"step": 2965,
|
|
"valid_targets_mean": 4125.8,
|
|
"valid_targets_min": 1163
|
|
},
|
|
{
|
|
"epoch": 4.829943043124492,
|
|
"grad_norm": 0.4950231660068158,
|
|
"learning_rate": 1.0633807735052202e-05,
|
|
"loss": 0.2639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11982996016740799,
|
|
"step": 2970,
|
|
"valid_targets_mean": 4853.8,
|
|
"valid_targets_min": 2324
|
|
},
|
|
{
|
|
"epoch": 4.838079739625712,
|
|
"grad_norm": 0.7573897869396419,
|
|
"learning_rate": 1.056223289760278e-05,
|
|
"loss": 0.2399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10986106842756271,
|
|
"step": 2975,
|
|
"valid_targets_mean": 6308.2,
|
|
"valid_targets_min": 2719
|
|
},
|
|
{
|
|
"epoch": 4.846216436126932,
|
|
"grad_norm": 0.5238872739771269,
|
|
"learning_rate": 1.0490813223515764e-05,
|
|
"loss": 0.2487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12110009789466858,
|
|
"step": 2980,
|
|
"valid_targets_mean": 6574.9,
|
|
"valid_targets_min": 3849
|
|
},
|
|
{
|
|
"epoch": 4.854353132628153,
|
|
"grad_norm": 0.6054997927877389,
|
|
"learning_rate": 1.0419549886979582e-05,
|
|
"loss": 0.2585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14355552196502686,
|
|
"step": 2985,
|
|
"valid_targets_mean": 4339.4,
|
|
"valid_targets_min": 947
|
|
},
|
|
{
|
|
"epoch": 4.862489829129373,
|
|
"grad_norm": 0.5891493401006247,
|
|
"learning_rate": 1.0348444059612338e-05,
|
|
"loss": 0.2537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12982431054115295,
|
|
"step": 2990,
|
|
"valid_targets_mean": 4962.1,
|
|
"valid_targets_min": 3174
|
|
},
|
|
{
|
|
"epoch": 4.870626525630594,
|
|
"grad_norm": 0.49481010220886834,
|
|
"learning_rate": 1.0277496910442596e-05,
|
|
"loss": 0.2613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11110454797744751,
|
|
"step": 2995,
|
|
"valid_targets_mean": 5967.2,
|
|
"valid_targets_min": 767
|
|
},
|
|
{
|
|
"epoch": 4.8787632221318145,
|
|
"grad_norm": 0.45375461974264514,
|
|
"learning_rate": 1.0206709605890133e-05,
|
|
"loss": 0.2626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1309911608695984,
|
|
"step": 3000,
|
|
"valid_targets_mean": 5787.9,
|
|
"valid_targets_min": 3290
|
|
},
|
|
{
|
|
"epoch": 4.886899918633035,
|
|
"grad_norm": 0.4717289745698522,
|
|
"learning_rate": 1.0136083309746765e-05,
|
|
"loss": 0.2623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18679049611091614,
|
|
"step": 3005,
|
|
"valid_targets_mean": 8518.8,
|
|
"valid_targets_min": 5241
|
|
},
|
|
{
|
|
"epoch": 4.895036615134256,
|
|
"grad_norm": 0.4826417818565528,
|
|
"learning_rate": 1.006561918315724e-05,
|
|
"loss": 0.2595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14399048686027527,
|
|
"step": 3010,
|
|
"valid_targets_mean": 6302.8,
|
|
"valid_targets_min": 4959
|
|
},
|
|
{
|
|
"epoch": 4.903173311635476,
|
|
"grad_norm": 0.626033134101394,
|
|
"learning_rate": 9.995318384600112e-06,
|
|
"loss": 0.2513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13014422357082367,
|
|
"step": 3015,
|
|
"valid_targets_mean": 6003.1,
|
|
"valid_targets_min": 1225
|
|
},
|
|
{
|
|
"epoch": 4.911310008136697,
|
|
"grad_norm": 0.530085231028136,
|
|
"learning_rate": 9.92518206986871e-06,
|
|
"loss": 0.2395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09500178694725037,
|
|
"step": 3020,
|
|
"valid_targets_mean": 4791.5,
|
|
"valid_targets_min": 524
|
|
},
|
|
{
|
|
"epoch": 4.919446704637917,
|
|
"grad_norm": 0.645773429216433,
|
|
"learning_rate": 9.855211392052139e-06,
|
|
"loss": 0.2584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1401991993188858,
|
|
"step": 3025,
|
|
"valid_targets_mean": 4193.1,
|
|
"valid_targets_min": 2546
|
|
},
|
|
{
|
|
"epoch": 4.927583401139137,
|
|
"grad_norm": 0.5337804157823813,
|
|
"learning_rate": 9.78540750151632e-06,
|
|
"loss": 0.2467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13427186012268066,
|
|
"step": 3030,
|
|
"valid_targets_mean": 5285.6,
|
|
"valid_targets_min": 1577
|
|
},
|
|
{
|
|
"epoch": 4.935720097640358,
|
|
"grad_norm": 1.9302688171029854,
|
|
"learning_rate": 9.715771545885076e-06,
|
|
"loss": 0.2668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14041340351104736,
|
|
"step": 3035,
|
|
"valid_targets_mean": 7417.6,
|
|
"valid_targets_min": 3274
|
|
},
|
|
{
|
|
"epoch": 4.9438567941415785,
|
|
"grad_norm": 0.5343935268176057,
|
|
"learning_rate": 9.646304670021263e-06,
|
|
"loss": 0.2528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1480150669813156,
|
|
"step": 3040,
|
|
"valid_targets_mean": 6581.8,
|
|
"valid_targets_min": 1927
|
|
},
|
|
{
|
|
"epoch": 4.951993490642799,
|
|
"grad_norm": 0.569875399697325,
|
|
"learning_rate": 9.577008016007956e-06,
|
|
"loss": 0.2683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17220965027809143,
|
|
"step": 3045,
|
|
"valid_targets_mean": 5431.8,
|
|
"valid_targets_min": 1040
|
|
},
|
|
{
|
|
"epoch": 4.96013018714402,
|
|
"grad_norm": 0.47006455437543826,
|
|
"learning_rate": 9.50788272312966e-06,
|
|
"loss": 0.2623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1282365322113037,
|
|
"step": 3050,
|
|
"valid_targets_mean": 6441.4,
|
|
"valid_targets_min": 427
|
|
},
|
|
{
|
|
"epoch": 4.96826688364524,
|
|
"grad_norm": 0.5349848814276844,
|
|
"learning_rate": 9.43892992785358e-06,
|
|
"loss": 0.2595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12551423907279968,
|
|
"step": 3055,
|
|
"valid_targets_mean": 5614.6,
|
|
"valid_targets_min": 2311
|
|
},
|
|
{
|
|
"epoch": 4.97640358014646,
|
|
"grad_norm": 0.5428356503095221,
|
|
"learning_rate": 9.370150763810966e-06,
|
|
"loss": 0.2569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10237240791320801,
|
|
"step": 3060,
|
|
"valid_targets_mean": 4330.6,
|
|
"valid_targets_min": 2059
|
|
},
|
|
{
|
|
"epoch": 4.984540276647681,
|
|
"grad_norm": 0.48029744452868756,
|
|
"learning_rate": 9.301546361778424e-06,
|
|
"loss": 0.2457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13645213842391968,
|
|
"step": 3065,
|
|
"valid_targets_mean": 8541.2,
|
|
"valid_targets_min": 2287
|
|
},
|
|
{
|
|
"epoch": 4.992676973148901,
|
|
"grad_norm": 0.6660157157795131,
|
|
"learning_rate": 9.233117849659367e-06,
|
|
"loss": 0.273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12218661606311798,
|
|
"step": 3070,
|
|
"valid_targets_mean": 5242.0,
|
|
"valid_targets_min": 3400
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.6496835877212901,
|
|
"learning_rate": 9.164866352465447e-06,
|
|
"loss": 0.2478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23714704811573029,
|
|
"step": 3075,
|
|
"valid_targets_mean": 6213.5,
|
|
"valid_targets_min": 4269
|
|
},
|
|
{
|
|
"epoch": 5.008136696501221,
|
|
"grad_norm": 0.5523377269177591,
|
|
"learning_rate": 9.096792992298089e-06,
|
|
"loss": 0.2474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14559800922870636,
|
|
"step": 3080,
|
|
"valid_targets_mean": 5338.9,
|
|
"valid_targets_min": 1860
|
|
},
|
|
{
|
|
"epoch": 5.016273393002441,
|
|
"grad_norm": 0.48621978067676924,
|
|
"learning_rate": 9.028898888330005e-06,
|
|
"loss": 0.2405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11603275686502457,
|
|
"step": 3085,
|
|
"valid_targets_mean": 7716.8,
|
|
"valid_targets_min": 4160
|
|
},
|
|
{
|
|
"epoch": 5.024410089503662,
|
|
"grad_norm": 0.4888228209250331,
|
|
"learning_rate": 8.961185156786815e-06,
|
|
"loss": 0.2584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15556365251541138,
|
|
"step": 3090,
|
|
"valid_targets_mean": 7306.2,
|
|
"valid_targets_min": 1730
|
|
},
|
|
{
|
|
"epoch": 5.032546786004882,
|
|
"grad_norm": 0.5604592596720499,
|
|
"learning_rate": 8.893652910928698e-06,
|
|
"loss": 0.236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09606099128723145,
|
|
"step": 3095,
|
|
"valid_targets_mean": 4868.5,
|
|
"valid_targets_min": 867
|
|
},
|
|
{
|
|
"epoch": 5.040683482506102,
|
|
"grad_norm": 0.5312703952374581,
|
|
"learning_rate": 8.826303261032072e-06,
|
|
"loss": 0.2596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14954786002635956,
|
|
"step": 3100,
|
|
"valid_targets_mean": 5175.0,
|
|
"valid_targets_min": 3054
|
|
},
|
|
{
|
|
"epoch": 5.048820179007323,
|
|
"grad_norm": 0.49989048406424097,
|
|
"learning_rate": 8.759137314371378e-06,
|
|
"loss": 0.245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10760124027729034,
|
|
"step": 3105,
|
|
"valid_targets_mean": 6618.8,
|
|
"valid_targets_min": 2164
|
|
},
|
|
{
|
|
"epoch": 5.0569568755085434,
|
|
"grad_norm": 0.7886032990488385,
|
|
"learning_rate": 8.692156175200823e-06,
|
|
"loss": 0.2505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1214192658662796,
|
|
"step": 3110,
|
|
"valid_targets_mean": 6027.0,
|
|
"valid_targets_min": 2787
|
|
},
|
|
{
|
|
"epoch": 5.065093572009764,
|
|
"grad_norm": 0.48898341530084205,
|
|
"learning_rate": 8.625360944736262e-06,
|
|
"loss": 0.2436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11569772660732269,
|
|
"step": 3115,
|
|
"valid_targets_mean": 6272.9,
|
|
"valid_targets_min": 372
|
|
},
|
|
{
|
|
"epoch": 5.073230268510985,
|
|
"grad_norm": 0.5956493423805724,
|
|
"learning_rate": 8.558752721137089e-06,
|
|
"loss": 0.2469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1290283203125,
|
|
"step": 3120,
|
|
"valid_targets_mean": 5507.6,
|
|
"valid_targets_min": 3024
|
|
},
|
|
{
|
|
"epoch": 5.081366965012205,
|
|
"grad_norm": 0.5660632743523386,
|
|
"learning_rate": 8.492332599488157e-06,
|
|
"loss": 0.2324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10170029103755951,
|
|
"step": 3125,
|
|
"valid_targets_mean": 4242.0,
|
|
"valid_targets_min": 541
|
|
},
|
|
{
|
|
"epoch": 5.089503661513426,
|
|
"grad_norm": 0.5887922517647676,
|
|
"learning_rate": 8.42610167178183e-06,
|
|
"loss": 0.2397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15197445452213287,
|
|
"step": 3130,
|
|
"valid_targets_mean": 5382.2,
|
|
"valid_targets_min": 3204
|
|
},
|
|
{
|
|
"epoch": 5.097640358014646,
|
|
"grad_norm": 0.487011491561986,
|
|
"learning_rate": 8.360061026899962e-06,
|
|
"loss": 0.2207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12325992435216904,
|
|
"step": 3135,
|
|
"valid_targets_mean": 8065.1,
|
|
"valid_targets_min": 3117
|
|
},
|
|
{
|
|
"epoch": 5.105777054515866,
|
|
"grad_norm": 0.5182417975398911,
|
|
"learning_rate": 8.294211750596035e-06,
|
|
"loss": 0.2457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09405682235956192,
|
|
"step": 3140,
|
|
"valid_targets_mean": 5815.0,
|
|
"valid_targets_min": 466
|
|
},
|
|
{
|
|
"epoch": 5.113913751017087,
|
|
"grad_norm": 0.5322887135274532,
|
|
"learning_rate": 8.228554925477306e-06,
|
|
"loss": 0.2349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11751969158649445,
|
|
"step": 3145,
|
|
"valid_targets_mean": 6338.4,
|
|
"valid_targets_min": 2067
|
|
},
|
|
{
|
|
"epoch": 5.1220504475183075,
|
|
"grad_norm": 0.5012197400585588,
|
|
"learning_rate": 8.163091630987e-06,
|
|
"loss": 0.2327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16712604463100433,
|
|
"step": 3150,
|
|
"valid_targets_mean": 6505.2,
|
|
"valid_targets_min": 2195
|
|
},
|
|
{
|
|
"epoch": 5.130187144019528,
|
|
"grad_norm": 0.5774573904412683,
|
|
"learning_rate": 8.097822943386563e-06,
|
|
"loss": 0.2308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14454790949821472,
|
|
"step": 3155,
|
|
"valid_targets_mean": 5446.6,
|
|
"valid_targets_min": 2004
|
|
},
|
|
{
|
|
"epoch": 5.138323840520749,
|
|
"grad_norm": 0.5165060277624411,
|
|
"learning_rate": 8.03274993573797e-06,
|
|
"loss": 0.2424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14901450276374817,
|
|
"step": 3160,
|
|
"valid_targets_mean": 6276.4,
|
|
"valid_targets_min": 3350
|
|
},
|
|
{
|
|
"epoch": 5.146460537021969,
|
|
"grad_norm": 0.4804753447195192,
|
|
"learning_rate": 7.96787367788609e-06,
|
|
"loss": 0.2401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11782047897577286,
|
|
"step": 3165,
|
|
"valid_targets_mean": 6550.2,
|
|
"valid_targets_min": 1879
|
|
},
|
|
{
|
|
"epoch": 5.15459723352319,
|
|
"grad_norm": 0.6535091078691466,
|
|
"learning_rate": 7.903195236441086e-06,
|
|
"loss": 0.2505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11725549399852753,
|
|
"step": 3170,
|
|
"valid_targets_mean": 6698.9,
|
|
"valid_targets_min": 1744
|
|
},
|
|
{
|
|
"epoch": 5.16273393002441,
|
|
"grad_norm": 0.4259935959590437,
|
|
"learning_rate": 7.838715674760874e-06,
|
|
"loss": 0.2298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0984138548374176,
|
|
"step": 3175,
|
|
"valid_targets_mean": 7291.5,
|
|
"valid_targets_min": 2757
|
|
},
|
|
{
|
|
"epoch": 5.17087062652563,
|
|
"grad_norm": 0.5362331748478609,
|
|
"learning_rate": 7.774436052933675e-06,
|
|
"loss": 0.2413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1301015317440033,
|
|
"step": 3180,
|
|
"valid_targets_mean": 5733.1,
|
|
"valid_targets_min": 2704
|
|
},
|
|
{
|
|
"epoch": 5.179007323026851,
|
|
"grad_norm": 0.4742304640079011,
|
|
"learning_rate": 7.710357427760541e-06,
|
|
"loss": 0.2339,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11534355580806732,
|
|
"step": 3185,
|
|
"valid_targets_mean": 7236.4,
|
|
"valid_targets_min": 897
|
|
},
|
|
{
|
|
"epoch": 5.187144019528072,
|
|
"grad_norm": 0.5344847333949452,
|
|
"learning_rate": 7.646480852738008e-06,
|
|
"loss": 0.2286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11348524689674377,
|
|
"step": 3190,
|
|
"valid_targets_mean": 6580.0,
|
|
"valid_targets_min": 1862
|
|
},
|
|
{
|
|
"epoch": 5.195280716029292,
|
|
"grad_norm": 0.5127549344284401,
|
|
"learning_rate": 7.5828073780407575e-06,
|
|
"loss": 0.2188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12163542211055756,
|
|
"step": 3195,
|
|
"valid_targets_mean": 7134.1,
|
|
"valid_targets_min": 4067
|
|
},
|
|
{
|
|
"epoch": 5.203417412530513,
|
|
"grad_norm": 0.43286097887389563,
|
|
"learning_rate": 7.51933805050439e-06,
|
|
"loss": 0.2315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11376143991947174,
|
|
"step": 3200,
|
|
"valid_targets_mean": 9556.1,
|
|
"valid_targets_min": 3383
|
|
},
|
|
{
|
|
"epoch": 5.211554109031733,
|
|
"grad_norm": 0.574180003978773,
|
|
"learning_rate": 7.45607391360816e-06,
|
|
"loss": 0.243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0861620083451271,
|
|
"step": 3205,
|
|
"valid_targets_mean": 3932.6,
|
|
"valid_targets_min": 1533
|
|
},
|
|
{
|
|
"epoch": 5.219690805532954,
|
|
"grad_norm": 0.6356603119387289,
|
|
"learning_rate": 7.393016007457858e-06,
|
|
"loss": 0.2561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19337713718414307,
|
|
"step": 3210,
|
|
"valid_targets_mean": 6120.9,
|
|
"valid_targets_min": 3426
|
|
},
|
|
{
|
|
"epoch": 5.227827502034174,
|
|
"grad_norm": 0.7725829783966397,
|
|
"learning_rate": 7.3301653687687005e-06,
|
|
"loss": 0.2272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1038031131029129,
|
|
"step": 3215,
|
|
"valid_targets_mean": 5630.2,
|
|
"valid_targets_min": 1724
|
|
},
|
|
{
|
|
"epoch": 5.235964198535394,
|
|
"grad_norm": 0.6539324695436758,
|
|
"learning_rate": 7.2675230308482715e-06,
|
|
"loss": 0.2431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12459734082221985,
|
|
"step": 3220,
|
|
"valid_targets_mean": 7373.1,
|
|
"valid_targets_min": 4269
|
|
},
|
|
{
|
|
"epoch": 5.244100895036615,
|
|
"grad_norm": 0.5030860212842273,
|
|
"learning_rate": 7.205090023579575e-06,
|
|
"loss": 0.2533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12467215210199356,
|
|
"step": 3225,
|
|
"valid_targets_mean": 6925.0,
|
|
"valid_targets_min": 2743
|
|
},
|
|
{
|
|
"epoch": 5.252237591537836,
|
|
"grad_norm": 0.5831950972097935,
|
|
"learning_rate": 7.142867373404054e-06,
|
|
"loss": 0.2374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.120590440928936,
|
|
"step": 3230,
|
|
"valid_targets_mean": 5793.6,
|
|
"valid_targets_min": 1388
|
|
},
|
|
{
|
|
"epoch": 5.260374288039056,
|
|
"grad_norm": 0.550231410324282,
|
|
"learning_rate": 7.080856103304739e-06,
|
|
"loss": 0.237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12391173839569092,
|
|
"step": 3235,
|
|
"valid_targets_mean": 5339.4,
|
|
"valid_targets_min": 3466
|
|
},
|
|
{
|
|
"epoch": 5.268510984540277,
|
|
"grad_norm": 0.5759308774766279,
|
|
"learning_rate": 7.019057232789432e-06,
|
|
"loss": 0.2432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11659073829650879,
|
|
"step": 3240,
|
|
"valid_targets_mean": 4783.8,
|
|
"valid_targets_min": 1721
|
|
},
|
|
{
|
|
"epoch": 5.2766476810414975,
|
|
"grad_norm": 0.6119869770255949,
|
|
"learning_rate": 6.95747177787393e-06,
|
|
"loss": 0.2461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12066857516765594,
|
|
"step": 3245,
|
|
"valid_targets_mean": 5504.8,
|
|
"valid_targets_min": 2771
|
|
},
|
|
{
|
|
"epoch": 5.284784377542717,
|
|
"grad_norm": 0.4599465517768359,
|
|
"learning_rate": 6.896100751065355e-06,
|
|
"loss": 0.2296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10617942363023758,
|
|
"step": 3250,
|
|
"valid_targets_mean": 6689.1,
|
|
"valid_targets_min": 4081
|
|
},
|
|
{
|
|
"epoch": 5.292921074043938,
|
|
"grad_norm": 0.6272025768366799,
|
|
"learning_rate": 6.834945161345458e-06,
|
|
"loss": 0.2316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12755145132541656,
|
|
"step": 3255,
|
|
"valid_targets_mean": 5025.0,
|
|
"valid_targets_min": 1015
|
|
},
|
|
{
|
|
"epoch": 5.3010577705451585,
|
|
"grad_norm": 0.6127725285042429,
|
|
"learning_rate": 6.7740060141540735e-06,
|
|
"loss": 0.2573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13596072793006897,
|
|
"step": 3260,
|
|
"valid_targets_mean": 3870.8,
|
|
"valid_targets_min": 614
|
|
},
|
|
{
|
|
"epoch": 5.309194467046379,
|
|
"grad_norm": 0.6393405669196937,
|
|
"learning_rate": 6.713284311372559e-06,
|
|
"loss": 0.261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13608285784721375,
|
|
"step": 3265,
|
|
"valid_targets_mean": 3831.1,
|
|
"valid_targets_min": 1395
|
|
},
|
|
{
|
|
"epoch": 5.3173311635476,
|
|
"grad_norm": 0.519005266223286,
|
|
"learning_rate": 6.652781051307347e-06,
|
|
"loss": 0.26,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09571771323680878,
|
|
"step": 3270,
|
|
"valid_targets_mean": 5871.8,
|
|
"valid_targets_min": 1771
|
|
},
|
|
{
|
|
"epoch": 5.32546786004882,
|
|
"grad_norm": 0.4801756872745154,
|
|
"learning_rate": 6.592497228673515e-06,
|
|
"loss": 0.2335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09945647418498993,
|
|
"step": 3275,
|
|
"valid_targets_mean": 5828.0,
|
|
"valid_targets_min": 3637
|
|
},
|
|
{
|
|
"epoch": 5.333604556550041,
|
|
"grad_norm": 0.6255645015728781,
|
|
"learning_rate": 6.532433834578449e-06,
|
|
"loss": 0.2354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1372753381729126,
|
|
"step": 3280,
|
|
"valid_targets_mean": 5747.9,
|
|
"valid_targets_min": 2363
|
|
},
|
|
{
|
|
"epoch": 5.341741253051262,
|
|
"grad_norm": 0.5792091744981441,
|
|
"learning_rate": 6.472591856505526e-06,
|
|
"loss": 0.2479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12719324231147766,
|
|
"step": 3285,
|
|
"valid_targets_mean": 5969.4,
|
|
"valid_targets_min": 3258
|
|
},
|
|
{
|
|
"epoch": 5.349877949552481,
|
|
"grad_norm": 0.46470017871854946,
|
|
"learning_rate": 6.412972278297893e-06,
|
|
"loss": 0.2525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0882813110947609,
|
|
"step": 3290,
|
|
"valid_targets_mean": 6402.1,
|
|
"valid_targets_min": 774
|
|
},
|
|
{
|
|
"epoch": 5.358014646053702,
|
|
"grad_norm": 0.521223858319776,
|
|
"learning_rate": 6.353576080142309e-06,
|
|
"loss": 0.2289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11632373183965683,
|
|
"step": 3295,
|
|
"valid_targets_mean": 6959.1,
|
|
"valid_targets_min": 5246
|
|
},
|
|
{
|
|
"epoch": 5.3661513425549225,
|
|
"grad_norm": 0.5413214801922716,
|
|
"learning_rate": 6.294404238552994e-06,
|
|
"loss": 0.2363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13462717831134796,
|
|
"step": 3300,
|
|
"valid_targets_mean": 6453.6,
|
|
"valid_targets_min": 3165
|
|
},
|
|
{
|
|
"epoch": 5.374288039056143,
|
|
"grad_norm": 0.5573148190013232,
|
|
"learning_rate": 6.235457726355591e-06,
|
|
"loss": 0.2378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08801619708538055,
|
|
"step": 3305,
|
|
"valid_targets_mean": 4524.6,
|
|
"valid_targets_min": 903
|
|
},
|
|
{
|
|
"epoch": 5.382424735557364,
|
|
"grad_norm": 0.606307196821786,
|
|
"learning_rate": 6.176737512671182e-06,
|
|
"loss": 0.243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13813389837741852,
|
|
"step": 3310,
|
|
"valid_targets_mean": 4672.2,
|
|
"valid_targets_min": 1451
|
|
},
|
|
{
|
|
"epoch": 5.390561432058584,
|
|
"grad_norm": 0.6132589856336323,
|
|
"learning_rate": 6.11824456290034e-06,
|
|
"loss": 0.2434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1388852894306183,
|
|
"step": 3315,
|
|
"valid_targets_mean": 7884.9,
|
|
"valid_targets_min": 3424
|
|
},
|
|
{
|
|
"epoch": 5.398698128559805,
|
|
"grad_norm": 0.5924080709083809,
|
|
"learning_rate": 6.05997983870727e-06,
|
|
"loss": 0.2343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17288537323474884,
|
|
"step": 3320,
|
|
"valid_targets_mean": 4974.5,
|
|
"valid_targets_min": 1145
|
|
},
|
|
{
|
|
"epoch": 5.406834825061026,
|
|
"grad_norm": 0.5457228907298186,
|
|
"learning_rate": 6.00194429800399e-06,
|
|
"loss": 0.2486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13619005680084229,
|
|
"step": 3325,
|
|
"valid_targets_mean": 5901.8,
|
|
"valid_targets_min": 1958
|
|
},
|
|
{
|
|
"epoch": 5.414971521562245,
|
|
"grad_norm": 0.5079754475219553,
|
|
"learning_rate": 5.944138894934582e-06,
|
|
"loss": 0.2362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13231110572814941,
|
|
"step": 3330,
|
|
"valid_targets_mean": 4886.6,
|
|
"valid_targets_min": 1461
|
|
},
|
|
{
|
|
"epoch": 5.423108218063466,
|
|
"grad_norm": 0.5492790566828493,
|
|
"learning_rate": 5.886564579859504e-06,
|
|
"loss": 0.2228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10987076908349991,
|
|
"step": 3335,
|
|
"valid_targets_mean": 4042.6,
|
|
"valid_targets_min": 397
|
|
},
|
|
{
|
|
"epoch": 5.431244914564687,
|
|
"grad_norm": 0.5765503149421513,
|
|
"learning_rate": 5.829222299339969e-06,
|
|
"loss": 0.2256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09734175354242325,
|
|
"step": 3340,
|
|
"valid_targets_mean": 4017.5,
|
|
"valid_targets_min": 687
|
|
},
|
|
{
|
|
"epoch": 5.439381611065907,
|
|
"grad_norm": 0.4945201441893148,
|
|
"learning_rate": 5.772112996122403e-06,
|
|
"loss": 0.2588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12293386459350586,
|
|
"step": 3345,
|
|
"valid_targets_mean": 6276.8,
|
|
"valid_targets_min": 1669
|
|
},
|
|
{
|
|
"epoch": 5.447518307567128,
|
|
"grad_norm": 0.5241675442790266,
|
|
"learning_rate": 5.715237609122896e-06,
|
|
"loss": 0.2226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13944748044013977,
|
|
"step": 3350,
|
|
"valid_targets_mean": 6314.0,
|
|
"valid_targets_min": 4246
|
|
},
|
|
{
|
|
"epoch": 5.4556550040683485,
|
|
"grad_norm": 0.5086698627055527,
|
|
"learning_rate": 5.658597073411816e-06,
|
|
"loss": 0.2394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10946042835712433,
|
|
"step": 3355,
|
|
"valid_targets_mean": 4947.4,
|
|
"valid_targets_min": 803
|
|
},
|
|
{
|
|
"epoch": 5.463791700569569,
|
|
"grad_norm": 0.5087935856854946,
|
|
"learning_rate": 5.602192320198401e-06,
|
|
"loss": 0.2447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11571265012025833,
|
|
"step": 3360,
|
|
"valid_targets_mean": 4712.5,
|
|
"valid_targets_min": 768
|
|
},
|
|
{
|
|
"epoch": 5.471928397070789,
|
|
"grad_norm": 0.6209480221847946,
|
|
"learning_rate": 5.546024276815467e-06,
|
|
"loss": 0.2363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12471382319927216,
|
|
"step": 3365,
|
|
"valid_targets_mean": 6229.0,
|
|
"valid_targets_min": 2774
|
|
},
|
|
{
|
|
"epoch": 5.480065093572009,
|
|
"grad_norm": 0.5740469197183198,
|
|
"learning_rate": 5.490093866704171e-06,
|
|
"loss": 0.2464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15511798858642578,
|
|
"step": 3370,
|
|
"valid_targets_mean": 5492.1,
|
|
"valid_targets_min": 2163
|
|
},
|
|
{
|
|
"epoch": 5.48820179007323,
|
|
"grad_norm": 0.5005163631534619,
|
|
"learning_rate": 5.434402009398798e-06,
|
|
"loss": 0.2506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10742124915122986,
|
|
"step": 3375,
|
|
"valid_targets_mean": 5065.9,
|
|
"valid_targets_min": 1052
|
|
},
|
|
{
|
|
"epoch": 5.496338486574451,
|
|
"grad_norm": 0.5671784567412589,
|
|
"learning_rate": 5.378949620511671e-06,
|
|
"loss": 0.2348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11315690726041794,
|
|
"step": 3380,
|
|
"valid_targets_mean": 3717.4,
|
|
"valid_targets_min": 1470
|
|
},
|
|
{
|
|
"epoch": 5.504475183075671,
|
|
"grad_norm": 0.4360799056624353,
|
|
"learning_rate": 5.3237376117180854e-06,
|
|
"loss": 0.2393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0977911725640297,
|
|
"step": 3385,
|
|
"valid_targets_mean": 6910.8,
|
|
"valid_targets_min": 3082
|
|
},
|
|
{
|
|
"epoch": 5.512611879576892,
|
|
"grad_norm": 0.5142900343224198,
|
|
"learning_rate": 5.268766890741315e-06,
|
|
"loss": 0.2582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13378457725048065,
|
|
"step": 3390,
|
|
"valid_targets_mean": 6389.2,
|
|
"valid_targets_min": 3445
|
|
},
|
|
{
|
|
"epoch": 5.5207485760781125,
|
|
"grad_norm": 0.5602434630005119,
|
|
"learning_rate": 5.214038361337719e-06,
|
|
"loss": 0.2594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10973970592021942,
|
|
"step": 3395,
|
|
"valid_targets_mean": 4815.2,
|
|
"valid_targets_min": 1484
|
|
},
|
|
{
|
|
"epoch": 5.528885272579333,
|
|
"grad_norm": 0.5342812643462996,
|
|
"learning_rate": 5.159552923281841e-06,
|
|
"loss": 0.259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0758785754442215,
|
|
"step": 3400,
|
|
"valid_targets_mean": 4529.4,
|
|
"valid_targets_min": 2360
|
|
},
|
|
{
|
|
"epoch": 5.537021969080554,
|
|
"grad_norm": 0.5716216749186573,
|
|
"learning_rate": 5.105311472351639e-06,
|
|
"loss": 0.2308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.121833935379982,
|
|
"step": 3405,
|
|
"valid_targets_mean": 5706.9,
|
|
"valid_targets_min": 4080
|
|
},
|
|
{
|
|
"epoch": 5.5451586655817735,
|
|
"grad_norm": 0.43346386249748464,
|
|
"learning_rate": 5.051314900313764e-06,
|
|
"loss": 0.2351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07879453897476196,
|
|
"step": 3410,
|
|
"valid_targets_mean": 5555.4,
|
|
"valid_targets_min": 3770
|
|
},
|
|
{
|
|
"epoch": 5.553295362082994,
|
|
"grad_norm": 0.49340156082331843,
|
|
"learning_rate": 4.997564094908878e-06,
|
|
"loss": 0.2508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12145587801933289,
|
|
"step": 3415,
|
|
"valid_targets_mean": 6155.6,
|
|
"valid_targets_min": 2387
|
|
},
|
|
{
|
|
"epoch": 5.561432058584215,
|
|
"grad_norm": 0.47890012005963234,
|
|
"learning_rate": 4.944059939837082e-06,
|
|
"loss": 0.2475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12839338183403015,
|
|
"step": 3420,
|
|
"valid_targets_mean": 6536.0,
|
|
"valid_targets_min": 1825
|
|
},
|
|
{
|
|
"epoch": 5.569568755085435,
|
|
"grad_norm": 0.4903125361830746,
|
|
"learning_rate": 4.890803314743371e-06,
|
|
"loss": 0.255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11269734799861908,
|
|
"step": 3425,
|
|
"valid_targets_mean": 5621.2,
|
|
"valid_targets_min": 3511
|
|
},
|
|
{
|
|
"epoch": 5.577705451586656,
|
|
"grad_norm": 0.5283918720724791,
|
|
"learning_rate": 4.837795095203175e-06,
|
|
"loss": 0.2306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10433046519756317,
|
|
"step": 3430,
|
|
"valid_targets_mean": 5621.9,
|
|
"valid_targets_min": 2251
|
|
},
|
|
{
|
|
"epoch": 5.585842148087877,
|
|
"grad_norm": 0.6026011579728354,
|
|
"learning_rate": 4.785036152707969e-06,
|
|
"loss": 0.2434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12681570649147034,
|
|
"step": 3435,
|
|
"valid_targets_mean": 4668.2,
|
|
"valid_targets_min": 2544
|
|
},
|
|
{
|
|
"epoch": 5.593978844589097,
|
|
"grad_norm": 0.6061374193583238,
|
|
"learning_rate": 4.732527354650951e-06,
|
|
"loss": 0.2432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13143488764762878,
|
|
"step": 3440,
|
|
"valid_targets_mean": 4703.0,
|
|
"valid_targets_min": 1974
|
|
},
|
|
{
|
|
"epoch": 5.602115541090317,
|
|
"grad_norm": 0.5221796158770693,
|
|
"learning_rate": 4.68026956431276e-06,
|
|
"loss": 0.2277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.149415522813797,
|
|
"step": 3445,
|
|
"valid_targets_mean": 6412.0,
|
|
"valid_targets_min": 4008
|
|
},
|
|
{
|
|
"epoch": 5.610252237591538,
|
|
"grad_norm": 0.576901307893142,
|
|
"learning_rate": 4.628263640847304e-06,
|
|
"loss": 0.2455,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12277019023895264,
|
|
"step": 3450,
|
|
"valid_targets_mean": 5465.6,
|
|
"valid_targets_min": 3759
|
|
},
|
|
{
|
|
"epoch": 5.618388934092758,
|
|
"grad_norm": 0.5641514509002485,
|
|
"learning_rate": 4.5765104392676205e-06,
|
|
"loss": 0.2356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13894778490066528,
|
|
"step": 3455,
|
|
"valid_targets_mean": 5405.1,
|
|
"valid_targets_min": 1513
|
|
},
|
|
{
|
|
"epoch": 5.626525630593979,
|
|
"grad_norm": 0.7340535896041918,
|
|
"learning_rate": 4.525010810431825e-06,
|
|
"loss": 0.2406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13835415244102478,
|
|
"step": 3460,
|
|
"valid_targets_mean": 3785.0,
|
|
"valid_targets_min": 2039
|
|
},
|
|
{
|
|
"epoch": 5.634662327095199,
|
|
"grad_norm": 0.5034917359869234,
|
|
"learning_rate": 4.4737656010291366e-06,
|
|
"loss": 0.2356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1569593846797943,
|
|
"step": 3465,
|
|
"valid_targets_mean": 8556.2,
|
|
"valid_targets_min": 4927
|
|
},
|
|
{
|
|
"epoch": 5.64279902359642,
|
|
"grad_norm": 0.6255736017643779,
|
|
"learning_rate": 4.422775653565934e-06,
|
|
"loss": 0.2356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10940764844417572,
|
|
"step": 3470,
|
|
"valid_targets_mean": 4768.1,
|
|
"valid_targets_min": 1338
|
|
},
|
|
{
|
|
"epoch": 5.650935720097641,
|
|
"grad_norm": 0.46222130787565185,
|
|
"learning_rate": 4.372041806351914e-06,
|
|
"loss": 0.2516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12370267510414124,
|
|
"step": 3475,
|
|
"valid_targets_mean": 6039.4,
|
|
"valid_targets_min": 2221
|
|
},
|
|
{
|
|
"epoch": 5.65907241659886,
|
|
"grad_norm": 0.7151061983633159,
|
|
"learning_rate": 4.321564893486312e-06,
|
|
"loss": 0.2519,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10197927057743073,
|
|
"step": 3480,
|
|
"valid_targets_mean": 5656.9,
|
|
"valid_targets_min": 3191
|
|
},
|
|
{
|
|
"epoch": 5.667209113100081,
|
|
"grad_norm": 0.4612178221303647,
|
|
"learning_rate": 4.271345744844182e-06,
|
|
"loss": 0.2125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07878731191158295,
|
|
"step": 3485,
|
|
"valid_targets_mean": 5026.6,
|
|
"valid_targets_min": 781
|
|
},
|
|
{
|
|
"epoch": 5.675345809601302,
|
|
"grad_norm": 0.5034775896468006,
|
|
"learning_rate": 4.2213851860627696e-06,
|
|
"loss": 0.2415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11451518535614014,
|
|
"step": 3490,
|
|
"valid_targets_mean": 5065.0,
|
|
"valid_targets_min": 3912
|
|
},
|
|
{
|
|
"epoch": 5.683482506102522,
|
|
"grad_norm": 0.48370829828670214,
|
|
"learning_rate": 4.171684038527914e-06,
|
|
"loss": 0.2475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1177678257226944,
|
|
"step": 3495,
|
|
"valid_targets_mean": 5450.1,
|
|
"valid_targets_min": 2203
|
|
},
|
|
{
|
|
"epoch": 5.691619202603743,
|
|
"grad_norm": 0.4915476071311832,
|
|
"learning_rate": 4.12224311936056e-06,
|
|
"loss": 0.2312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11996840685606003,
|
|
"step": 3500,
|
|
"valid_targets_mean": 7179.9,
|
|
"valid_targets_min": 4365
|
|
},
|
|
{
|
|
"epoch": 5.6997558991049635,
|
|
"grad_norm": 0.5337016634168994,
|
|
"learning_rate": 4.073063241403316e-06,
|
|
"loss": 0.2425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12122653424739838,
|
|
"step": 3505,
|
|
"valid_targets_mean": 5070.0,
|
|
"valid_targets_min": 784
|
|
},
|
|
{
|
|
"epoch": 5.707892595606184,
|
|
"grad_norm": 0.5853690423707902,
|
|
"learning_rate": 4.024145213207103e-06,
|
|
"loss": 0.2474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10927228629589081,
|
|
"step": 3510,
|
|
"valid_targets_mean": 4184.2,
|
|
"valid_targets_min": 867
|
|
},
|
|
{
|
|
"epoch": 5.716029292107405,
|
|
"grad_norm": 0.5353891362994679,
|
|
"learning_rate": 3.975489839017846e-06,
|
|
"loss": 0.2312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12216495722532272,
|
|
"step": 3515,
|
|
"valid_targets_mean": 5225.1,
|
|
"valid_targets_min": 949
|
|
},
|
|
{
|
|
"epoch": 5.724165988608625,
|
|
"grad_norm": 0.9481430336242733,
|
|
"learning_rate": 3.9270979187632516e-06,
|
|
"loss": 0.2422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12332406640052795,
|
|
"step": 3520,
|
|
"valid_targets_mean": 5509.9,
|
|
"valid_targets_min": 2682
|
|
},
|
|
{
|
|
"epoch": 5.732302685109845,
|
|
"grad_norm": 0.47893060639696544,
|
|
"learning_rate": 3.878970248039678e-06,
|
|
"loss": 0.2557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1404319554567337,
|
|
"step": 3525,
|
|
"valid_targets_mean": 7358.9,
|
|
"valid_targets_min": 3809
|
|
},
|
|
{
|
|
"epoch": 5.740439381611066,
|
|
"grad_norm": 0.509875821596104,
|
|
"learning_rate": 3.831107618099026e-06,
|
|
"loss": 0.2331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19203639030456543,
|
|
"step": 3530,
|
|
"valid_targets_mean": 6786.8,
|
|
"valid_targets_min": 2958
|
|
},
|
|
{
|
|
"epoch": 5.748576078112286,
|
|
"grad_norm": 0.47911534628300395,
|
|
"learning_rate": 3.7835108158357537e-06,
|
|
"loss": 0.2331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12184299528598785,
|
|
"step": 3535,
|
|
"valid_targets_mean": 7290.6,
|
|
"valid_targets_min": 3489
|
|
},
|
|
{
|
|
"epoch": 5.756712774613507,
|
|
"grad_norm": 0.45486474848191916,
|
|
"learning_rate": 3.7361806237739264e-06,
|
|
"loss": 0.2317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08215008676052094,
|
|
"step": 3540,
|
|
"valid_targets_mean": 5637.9,
|
|
"valid_targets_min": 1196
|
|
},
|
|
{
|
|
"epoch": 5.764849471114728,
|
|
"grad_norm": 0.5210595475392571,
|
|
"learning_rate": 3.689117820054351e-06,
|
|
"loss": 0.2434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08964404463768005,
|
|
"step": 3545,
|
|
"valid_targets_mean": 5758.0,
|
|
"valid_targets_min": 2964
|
|
},
|
|
{
|
|
"epoch": 5.772986167615948,
|
|
"grad_norm": 0.5267608109326574,
|
|
"learning_rate": 3.6423231784217918e-06,
|
|
"loss": 0.2421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14031247794628143,
|
|
"step": 3550,
|
|
"valid_targets_mean": 6369.0,
|
|
"valid_targets_min": 1512
|
|
},
|
|
{
|
|
"epoch": 5.781122864117169,
|
|
"grad_norm": 0.5326050285350579,
|
|
"learning_rate": 3.595797468212241e-06,
|
|
"loss": 0.2419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10683193802833557,
|
|
"step": 3555,
|
|
"valid_targets_mean": 4920.4,
|
|
"valid_targets_min": 806
|
|
},
|
|
{
|
|
"epoch": 5.7892595606183885,
|
|
"grad_norm": 0.47240098718567736,
|
|
"learning_rate": 3.549541454340284e-06,
|
|
"loss": 0.2249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07187937200069427,
|
|
"step": 3560,
|
|
"valid_targets_mean": 4559.9,
|
|
"valid_targets_min": 556
|
|
},
|
|
{
|
|
"epoch": 5.797396257119609,
|
|
"grad_norm": 0.5523118981586366,
|
|
"learning_rate": 3.503555897286499e-06,
|
|
"loss": 0.2582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1383015513420105,
|
|
"step": 3565,
|
|
"valid_targets_mean": 6232.8,
|
|
"valid_targets_min": 3199
|
|
},
|
|
{
|
|
"epoch": 5.80553295362083,
|
|
"grad_norm": 0.47382486470602847,
|
|
"learning_rate": 3.4578415530849794e-06,
|
|
"loss": 0.2435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10226757824420929,
|
|
"step": 3570,
|
|
"valid_targets_mean": 6907.6,
|
|
"valid_targets_min": 2369
|
|
},
|
|
{
|
|
"epoch": 5.81366965012205,
|
|
"grad_norm": 0.559498744815589,
|
|
"learning_rate": 3.4123991733108852e-06,
|
|
"loss": 0.248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11900554597377777,
|
|
"step": 3575,
|
|
"valid_targets_mean": 4755.1,
|
|
"valid_targets_min": 1019
|
|
},
|
|
{
|
|
"epoch": 5.821806346623271,
|
|
"grad_norm": 0.5114120203304382,
|
|
"learning_rate": 3.3672295050680946e-06,
|
|
"loss": 0.2333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14657077193260193,
|
|
"step": 3580,
|
|
"valid_targets_mean": 6173.2,
|
|
"valid_targets_min": 3078
|
|
},
|
|
{
|
|
"epoch": 5.829943043124492,
|
|
"grad_norm": 0.9817994961771758,
|
|
"learning_rate": 3.322333290976936e-06,
|
|
"loss": 0.2523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13037019968032837,
|
|
"step": 3585,
|
|
"valid_targets_mean": 4806.9,
|
|
"valid_targets_min": 948
|
|
},
|
|
{
|
|
"epoch": 5.838079739625712,
|
|
"grad_norm": 0.49960877842083473,
|
|
"learning_rate": 3.2777112691619473e-06,
|
|
"loss": 0.2481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11504438519477844,
|
|
"step": 3590,
|
|
"valid_targets_mean": 5366.2,
|
|
"valid_targets_min": 3196
|
|
},
|
|
{
|
|
"epoch": 5.846216436126932,
|
|
"grad_norm": 0.5060382703438006,
|
|
"learning_rate": 3.233364173239766e-06,
|
|
"loss": 0.234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11748774349689484,
|
|
"step": 3595,
|
|
"valid_targets_mean": 5511.2,
|
|
"valid_targets_min": 2098
|
|
},
|
|
{
|
|
"epoch": 5.854353132628153,
|
|
"grad_norm": 0.5646071765030719,
|
|
"learning_rate": 3.189292732307052e-06,
|
|
"loss": 0.2311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11946381628513336,
|
|
"step": 3600,
|
|
"valid_targets_mean": 5497.6,
|
|
"valid_targets_min": 3401
|
|
},
|
|
{
|
|
"epoch": 5.862489829129373,
|
|
"grad_norm": 1.9085216141047994,
|
|
"learning_rate": 3.1454976709285124e-06,
|
|
"loss": 0.2563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12498481571674347,
|
|
"step": 3605,
|
|
"valid_targets_mean": 5483.4,
|
|
"valid_targets_min": 756
|
|
},
|
|
{
|
|
"epoch": 5.870626525630594,
|
|
"grad_norm": 0.6119847782021685,
|
|
"learning_rate": 3.1019797091249938e-06,
|
|
"loss": 0.2352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1357029676437378,
|
|
"step": 3610,
|
|
"valid_targets_mean": 4750.2,
|
|
"valid_targets_min": 1614
|
|
},
|
|
{
|
|
"epoch": 5.8787632221318145,
|
|
"grad_norm": 0.5952452103733551,
|
|
"learning_rate": 3.058739562361621e-06,
|
|
"loss": 0.245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12497454881668091,
|
|
"step": 3615,
|
|
"valid_targets_mean": 4917.1,
|
|
"valid_targets_min": 3716
|
|
},
|
|
{
|
|
"epoch": 5.886899918633035,
|
|
"grad_norm": 0.5378260436755556,
|
|
"learning_rate": 3.015777941536058e-06,
|
|
"loss": 0.2383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10903813689947128,
|
|
"step": 3620,
|
|
"valid_targets_mean": 4843.9,
|
|
"valid_targets_min": 2199
|
|
},
|
|
{
|
|
"epoch": 5.895036615134256,
|
|
"grad_norm": 0.5888457289118504,
|
|
"learning_rate": 2.973095552966805e-06,
|
|
"loss": 0.2584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13942526280879974,
|
|
"step": 3625,
|
|
"valid_targets_mean": 5965.9,
|
|
"valid_targets_min": 3408
|
|
},
|
|
{
|
|
"epoch": 5.903173311635476,
|
|
"grad_norm": 0.5111041219072542,
|
|
"learning_rate": 2.9306930983816005e-06,
|
|
"loss": 0.2357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15496578812599182,
|
|
"step": 3630,
|
|
"valid_targets_mean": 6038.0,
|
|
"valid_targets_min": 1693
|
|
},
|
|
{
|
|
"epoch": 5.911310008136697,
|
|
"grad_norm": 0.5794364971608068,
|
|
"learning_rate": 2.8885712749058737e-06,
|
|
"loss": 0.2496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1413809061050415,
|
|
"step": 3635,
|
|
"valid_targets_mean": 5439.6,
|
|
"valid_targets_min": 1362
|
|
},
|
|
{
|
|
"epoch": 5.919446704637917,
|
|
"grad_norm": 0.498196696749723,
|
|
"learning_rate": 2.8467307750512808e-06,
|
|
"loss": 0.2649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14829573035240173,
|
|
"step": 3640,
|
|
"valid_targets_mean": 7339.1,
|
|
"valid_targets_min": 1391
|
|
},
|
|
{
|
|
"epoch": 5.927583401139137,
|
|
"grad_norm": 0.5485087375601867,
|
|
"learning_rate": 2.80517228670433e-06,
|
|
"loss": 0.2395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12239962071180344,
|
|
"step": 3645,
|
|
"valid_targets_mean": 5950.1,
|
|
"valid_targets_min": 1544
|
|
},
|
|
{
|
|
"epoch": 5.935720097640358,
|
|
"grad_norm": 0.9852713752535961,
|
|
"learning_rate": 2.7638964931150637e-06,
|
|
"loss": 0.258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13210991024971008,
|
|
"step": 3650,
|
|
"valid_targets_mean": 5893.9,
|
|
"valid_targets_min": 1385
|
|
},
|
|
{
|
|
"epoch": 5.9438567941415785,
|
|
"grad_norm": 0.5176163129021438,
|
|
"learning_rate": 2.7229040728858323e-06,
|
|
"loss": 0.2477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10251633077859879,
|
|
"step": 3655,
|
|
"valid_targets_mean": 5866.8,
|
|
"valid_targets_min": 2058
|
|
},
|
|
{
|
|
"epoch": 5.951993490642799,
|
|
"grad_norm": 0.553870107905142,
|
|
"learning_rate": 2.6821956999601306e-06,
|
|
"loss": 0.2427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11761283129453659,
|
|
"step": 3660,
|
|
"valid_targets_mean": 4549.0,
|
|
"valid_targets_min": 504
|
|
},
|
|
{
|
|
"epoch": 5.96013018714402,
|
|
"grad_norm": 0.5622284508277662,
|
|
"learning_rate": 2.641772043611521e-06,
|
|
"loss": 0.2462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11832987517118454,
|
|
"step": 3665,
|
|
"valid_targets_mean": 5334.1,
|
|
"valid_targets_min": 1997
|
|
},
|
|
{
|
|
"epoch": 5.96826688364524,
|
|
"grad_norm": 0.6002412417162024,
|
|
"learning_rate": 2.6016337684326342e-06,
|
|
"loss": 0.2257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1252046376466751,
|
|
"step": 3670,
|
|
"valid_targets_mean": 5856.5,
|
|
"valid_targets_min": 670
|
|
},
|
|
{
|
|
"epoch": 5.97640358014646,
|
|
"grad_norm": 0.5820259812808503,
|
|
"learning_rate": 2.5617815343242327e-06,
|
|
"loss": 0.2502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15230754017829895,
|
|
"step": 3675,
|
|
"valid_targets_mean": 5588.6,
|
|
"valid_targets_min": 1374
|
|
},
|
|
{
|
|
"epoch": 5.984540276647681,
|
|
"grad_norm": 0.5006926676913205,
|
|
"learning_rate": 2.522215996484374e-06,
|
|
"loss": 0.2351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10319880396127701,
|
|
"step": 3680,
|
|
"valid_targets_mean": 5475.5,
|
|
"valid_targets_min": 2095
|
|
},
|
|
{
|
|
"epoch": 5.992676973148901,
|
|
"grad_norm": 0.4818010996292355,
|
|
"learning_rate": 2.4829378053976318e-06,
|
|
"loss": 0.226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12096649408340454,
|
|
"step": 3685,
|
|
"valid_targets_mean": 6174.9,
|
|
"valid_targets_min": 3864
|
|
},
|
|
{
|
|
"epoch": 6.0,
|
|
"grad_norm": 0.7286610952593501,
|
|
"learning_rate": 2.4439476068243927e-06,
|
|
"loss": 0.2288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2138330638408661,
|
|
"step": 3690,
|
|
"valid_targets_mean": 5464.5,
|
|
"valid_targets_min": 1291
|
|
},
|
|
{
|
|
"epoch": 6.008136696501221,
|
|
"grad_norm": 0.5858367728890088,
|
|
"learning_rate": 2.4052460417902613e-06,
|
|
"loss": 0.2364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14344994723796844,
|
|
"step": 3695,
|
|
"valid_targets_mean": 5086.1,
|
|
"valid_targets_min": 1582
|
|
},
|
|
{
|
|
"epoch": 6.016273393002441,
|
|
"grad_norm": 0.5202481454299519,
|
|
"learning_rate": 2.3668337465754985e-06,
|
|
"loss": 0.2395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10779225826263428,
|
|
"step": 3700,
|
|
"valid_targets_mean": 6387.1,
|
|
"valid_targets_min": 2931
|
|
},
|
|
{
|
|
"epoch": 6.024410089503662,
|
|
"grad_norm": 0.45889546479866966,
|
|
"learning_rate": 2.3287113527045823e-06,
|
|
"loss": 0.228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1384791135787964,
|
|
"step": 3705,
|
|
"valid_targets_mean": 8278.0,
|
|
"valid_targets_min": 3366
|
|
},
|
|
{
|
|
"epoch": 6.032546786004882,
|
|
"grad_norm": 0.5089628731085755,
|
|
"learning_rate": 2.2908794869358044e-06,
|
|
"loss": 0.2524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15829752385616302,
|
|
"step": 3710,
|
|
"valid_targets_mean": 6261.4,
|
|
"valid_targets_min": 2457
|
|
},
|
|
{
|
|
"epoch": 6.040683482506102,
|
|
"grad_norm": 0.5462143117486035,
|
|
"learning_rate": 2.253338771250977e-06,
|
|
"loss": 0.2276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08127269148826599,
|
|
"step": 3715,
|
|
"valid_targets_mean": 3890.1,
|
|
"valid_targets_min": 1056
|
|
},
|
|
{
|
|
"epoch": 6.048820179007323,
|
|
"grad_norm": 0.5707430580005115,
|
|
"learning_rate": 2.216089822845211e-06,
|
|
"loss": 0.2298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09576968103647232,
|
|
"step": 3720,
|
|
"valid_targets_mean": 4583.1,
|
|
"valid_targets_min": 606
|
|
},
|
|
{
|
|
"epoch": 6.0569568755085434,
|
|
"grad_norm": 0.4608492749702116,
|
|
"learning_rate": 2.1791332541167497e-06,
|
|
"loss": 0.2159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0854940190911293,
|
|
"step": 3725,
|
|
"valid_targets_mean": 5545.5,
|
|
"valid_targets_min": 1019
|
|
},
|
|
{
|
|
"epoch": 6.065093572009764,
|
|
"grad_norm": 0.5002751592382064,
|
|
"learning_rate": 2.142469672656935e-06,
|
|
"loss": 0.2319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10318467766046524,
|
|
"step": 3730,
|
|
"valid_targets_mean": 6957.6,
|
|
"valid_targets_min": 3822
|
|
},
|
|
{
|
|
"epoch": 6.073230268510985,
|
|
"grad_norm": 0.570462395210286,
|
|
"learning_rate": 2.106099681240179e-06,
|
|
"loss": 0.2221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09837909042835236,
|
|
"step": 3735,
|
|
"valid_targets_mean": 4815.5,
|
|
"valid_targets_min": 1144
|
|
},
|
|
{
|
|
"epoch": 6.081366965012205,
|
|
"grad_norm": 0.5408066637162211,
|
|
"learning_rate": 2.07002387781408e-06,
|
|
"loss": 0.2275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15398940443992615,
|
|
"step": 3740,
|
|
"valid_targets_mean": 5805.0,
|
|
"valid_targets_min": 3641
|
|
},
|
|
{
|
|
"epoch": 6.089503661513426,
|
|
"grad_norm": 0.6160653619825817,
|
|
"learning_rate": 2.0342428554895788e-06,
|
|
"loss": 0.2604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10584094375371933,
|
|
"step": 3745,
|
|
"valid_targets_mean": 4334.1,
|
|
"valid_targets_min": 767
|
|
},
|
|
{
|
|
"epoch": 6.097640358014646,
|
|
"grad_norm": 0.48826990351762456,
|
|
"learning_rate": 1.998757202531223e-06,
|
|
"loss": 0.2392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09239885210990906,
|
|
"step": 3750,
|
|
"valid_targets_mean": 5868.6,
|
|
"valid_targets_min": 391
|
|
},
|
|
{
|
|
"epoch": 6.105777054515866,
|
|
"grad_norm": 0.6379627065382744,
|
|
"learning_rate": 1.9635675023474764e-06,
|
|
"loss": 0.2399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11344724893569946,
|
|
"step": 3755,
|
|
"valid_targets_mean": 5121.8,
|
|
"valid_targets_min": 2171
|
|
},
|
|
{
|
|
"epoch": 6.113913751017087,
|
|
"grad_norm": 0.5446797282952438,
|
|
"learning_rate": 1.92867433348114e-06,
|
|
"loss": 0.2327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09264282137155533,
|
|
"step": 3760,
|
|
"valid_targets_mean": 4778.0,
|
|
"valid_targets_min": 1570
|
|
},
|
|
{
|
|
"epoch": 6.1220504475183075,
|
|
"grad_norm": 0.626815073545696,
|
|
"learning_rate": 1.8940782695998305e-06,
|
|
"loss": 0.241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10249383747577667,
|
|
"step": 3765,
|
|
"valid_targets_mean": 5221.0,
|
|
"valid_targets_min": 3398
|
|
},
|
|
{
|
|
"epoch": 6.130187144019528,
|
|
"grad_norm": 0.5214124493423383,
|
|
"learning_rate": 1.859779879486565e-06,
|
|
"loss": 0.2405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13012221455574036,
|
|
"step": 3770,
|
|
"valid_targets_mean": 6414.4,
|
|
"valid_targets_min": 947
|
|
},
|
|
{
|
|
"epoch": 6.138323840520749,
|
|
"grad_norm": 0.4894257977488121,
|
|
"learning_rate": 1.8257797270303924e-06,
|
|
"loss": 0.2386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08547305315732956,
|
|
"step": 3775,
|
|
"valid_targets_mean": 5550.0,
|
|
"valid_targets_min": 1010
|
|
},
|
|
{
|
|
"epoch": 6.146460537021969,
|
|
"grad_norm": 0.4914282914782947,
|
|
"learning_rate": 1.792078371217132e-06,
|
|
"loss": 0.226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1106870174407959,
|
|
"step": 3780,
|
|
"valid_targets_mean": 6550.1,
|
|
"valid_targets_min": 2920
|
|
},
|
|
{
|
|
"epoch": 6.15459723352319,
|
|
"grad_norm": 0.5426971061042444,
|
|
"learning_rate": 1.7586763661201821e-06,
|
|
"loss": 0.2432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0976264476776123,
|
|
"step": 3785,
|
|
"valid_targets_mean": 5254.1,
|
|
"valid_targets_min": 1979
|
|
},
|
|
{
|
|
"epoch": 6.16273393002441,
|
|
"grad_norm": 0.5986618193157306,
|
|
"learning_rate": 1.7255742608914095e-06,
|
|
"loss": 0.2421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14822298288345337,
|
|
"step": 3790,
|
|
"valid_targets_mean": 5250.0,
|
|
"valid_targets_min": 1139
|
|
},
|
|
{
|
|
"epoch": 6.17087062652563,
|
|
"grad_norm": 0.5924847870251507,
|
|
"learning_rate": 1.6927725997521171e-06,
|
|
"loss": 0.2428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11596965044736862,
|
|
"step": 3795,
|
|
"valid_targets_mean": 5393.9,
|
|
"valid_targets_min": 3392
|
|
},
|
|
{
|
|
"epoch": 6.179007323026851,
|
|
"grad_norm": 0.5014703652224826,
|
|
"learning_rate": 1.6602719219841135e-06,
|
|
"loss": 0.2281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09786193817853928,
|
|
"step": 3800,
|
|
"valid_targets_mean": 7391.9,
|
|
"valid_targets_min": 3280
|
|
},
|
|
{
|
|
"epoch": 6.187144019528072,
|
|
"grad_norm": 0.5273515456482646,
|
|
"learning_rate": 1.6280727619208202e-06,
|
|
"loss": 0.2386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12879054248332977,
|
|
"step": 3805,
|
|
"valid_targets_mean": 5833.8,
|
|
"valid_targets_min": 1693
|
|
},
|
|
{
|
|
"epoch": 6.195280716029292,
|
|
"grad_norm": 0.5379631945457531,
|
|
"learning_rate": 1.5961756489385117e-06,
|
|
"loss": 0.2508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13405945897102356,
|
|
"step": 3810,
|
|
"valid_targets_mean": 6324.8,
|
|
"valid_targets_min": 3480
|
|
},
|
|
{
|
|
"epoch": 6.203417412530513,
|
|
"grad_norm": 0.5506482251419262,
|
|
"learning_rate": 1.5645811074475915e-06,
|
|
"loss": 0.2417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1008005291223526,
|
|
"step": 3815,
|
|
"valid_targets_mean": 5211.1,
|
|
"valid_targets_min": 2095
|
|
},
|
|
{
|
|
"epoch": 6.211554109031733,
|
|
"grad_norm": 0.5525468218833477,
|
|
"learning_rate": 1.533289656883985e-06,
|
|
"loss": 0.25,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12161102890968323,
|
|
"step": 3820,
|
|
"valid_targets_mean": 5366.6,
|
|
"valid_targets_min": 1201
|
|
},
|
|
{
|
|
"epoch": 6.219690805532954,
|
|
"grad_norm": 1.3205892043820247,
|
|
"learning_rate": 1.5023018117005995e-06,
|
|
"loss": 0.2254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13536399602890015,
|
|
"step": 3825,
|
|
"valid_targets_mean": 5411.5,
|
|
"valid_targets_min": 976
|
|
},
|
|
{
|
|
"epoch": 6.227827502034174,
|
|
"grad_norm": 0.6399965745417111,
|
|
"learning_rate": 1.4716180813588566e-06,
|
|
"loss": 0.2439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14973969757556915,
|
|
"step": 3830,
|
|
"valid_targets_mean": 5368.0,
|
|
"valid_targets_min": 3462
|
|
},
|
|
{
|
|
"epoch": 6.235964198535394,
|
|
"grad_norm": 0.5609556474306848,
|
|
"learning_rate": 1.44123897032032e-06,
|
|
"loss": 0.2327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13443996012210846,
|
|
"step": 3835,
|
|
"valid_targets_mean": 5981.2,
|
|
"valid_targets_min": 3215
|
|
},
|
|
{
|
|
"epoch": 6.244100895036615,
|
|
"grad_norm": 0.6385228076191484,
|
|
"learning_rate": 1.411164978038404e-06,
|
|
"loss": 0.2434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1237209141254425,
|
|
"step": 3840,
|
|
"valid_targets_mean": 4783.6,
|
|
"valid_targets_min": 1112
|
|
},
|
|
{
|
|
"epoch": 6.252237591537836,
|
|
"grad_norm": 0.6555481944926224,
|
|
"learning_rate": 1.3813965989501687e-06,
|
|
"loss": 0.2284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10206277668476105,
|
|
"step": 3845,
|
|
"valid_targets_mean": 6029.4,
|
|
"valid_targets_min": 1648
|
|
},
|
|
{
|
|
"epoch": 6.260374288039056,
|
|
"grad_norm": 0.5440883938396228,
|
|
"learning_rate": 1.3519343224681758e-06,
|
|
"loss": 0.2357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13689102232456207,
|
|
"step": 3850,
|
|
"valid_targets_mean": 5527.1,
|
|
"valid_targets_min": 2357
|
|
},
|
|
{
|
|
"epoch": 6.268510984540277,
|
|
"grad_norm": 0.5738449854156686,
|
|
"learning_rate": 1.3227786329724479e-06,
|
|
"loss": 0.2403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13260340690612793,
|
|
"step": 3855,
|
|
"valid_targets_mean": 4806.5,
|
|
"valid_targets_min": 1773
|
|
},
|
|
{
|
|
"epoch": 6.2766476810414975,
|
|
"grad_norm": 0.5551324266480604,
|
|
"learning_rate": 1.2939300098025177e-06,
|
|
"loss": 0.2333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14535200595855713,
|
|
"step": 3860,
|
|
"valid_targets_mean": 5923.5,
|
|
"valid_targets_min": 919
|
|
},
|
|
{
|
|
"epoch": 6.284784377542717,
|
|
"grad_norm": 0.6073163759364055,
|
|
"learning_rate": 1.2653889272495223e-06,
|
|
"loss": 0.2492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10163906216621399,
|
|
"step": 3865,
|
|
"valid_targets_mean": 4454.5,
|
|
"valid_targets_min": 427
|
|
},
|
|
{
|
|
"epoch": 6.292921074043938,
|
|
"grad_norm": 0.6023482154559401,
|
|
"learning_rate": 1.2371558545484375e-06,
|
|
"loss": 0.2533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13912144303321838,
|
|
"step": 3870,
|
|
"valid_targets_mean": 4576.5,
|
|
"valid_targets_min": 2053
|
|
},
|
|
{
|
|
"epoch": 6.3010577705451585,
|
|
"grad_norm": 0.4614699781457518,
|
|
"learning_rate": 1.2092312558703333e-06,
|
|
"loss": 0.2353,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09365392476320267,
|
|
"step": 3875,
|
|
"valid_targets_mean": 5213.4,
|
|
"valid_targets_min": 2221
|
|
},
|
|
{
|
|
"epoch": 6.309194467046379,
|
|
"grad_norm": 0.5158867835539471,
|
|
"learning_rate": 1.181615590314762e-06,
|
|
"loss": 0.2338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12160620093345642,
|
|
"step": 3880,
|
|
"valid_targets_mean": 6346.5,
|
|
"valid_targets_min": 3619
|
|
},
|
|
{
|
|
"epoch": 6.3173311635476,
|
|
"grad_norm": 2.2609058090585687,
|
|
"learning_rate": 1.1543093119021976e-06,
|
|
"loss": 0.2232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13036061823368073,
|
|
"step": 3885,
|
|
"valid_targets_mean": 4674.4,
|
|
"valid_targets_min": 1355
|
|
},
|
|
{
|
|
"epoch": 6.32546786004882,
|
|
"grad_norm": 0.6835770950079834,
|
|
"learning_rate": 1.1273128695665814e-06,
|
|
"loss": 0.2398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16928444802761078,
|
|
"step": 3890,
|
|
"valid_targets_mean": 5820.9,
|
|
"valid_targets_min": 1498
|
|
},
|
|
{
|
|
"epoch": 6.333604556550041,
|
|
"grad_norm": 0.5664875984145479,
|
|
"learning_rate": 1.1006267071479359e-06,
|
|
"loss": 0.2381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10270114988088608,
|
|
"step": 3895,
|
|
"valid_targets_mean": 5303.8,
|
|
"valid_targets_min": 1183
|
|
},
|
|
{
|
|
"epoch": 6.341741253051262,
|
|
"grad_norm": 0.4578194482636973,
|
|
"learning_rate": 1.074251263385071e-06,
|
|
"loss": 0.2391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08202557265758514,
|
|
"step": 3900,
|
|
"valid_targets_mean": 6897.5,
|
|
"valid_targets_min": 2742
|
|
},
|
|
{
|
|
"epoch": 6.349877949552481,
|
|
"grad_norm": 0.5720561944484039,
|
|
"learning_rate": 1.0481869719083647e-06,
|
|
"loss": 0.2629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12599898874759674,
|
|
"step": 3905,
|
|
"valid_targets_mean": 4984.6,
|
|
"valid_targets_min": 1067
|
|
},
|
|
{
|
|
"epoch": 6.358014646053702,
|
|
"grad_norm": 0.5346739530338187,
|
|
"learning_rate": 1.022434261232641e-06,
|
|
"loss": 0.2399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18678709864616394,
|
|
"step": 3910,
|
|
"valid_targets_mean": 7899.0,
|
|
"valid_targets_min": 2439
|
|
},
|
|
{
|
|
"epoch": 6.3661513425549225,
|
|
"grad_norm": 0.5245267855591524,
|
|
"learning_rate": 9.969935547501208e-07,
|
|
"loss": 0.2414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1010551005601883,
|
|
"step": 3915,
|
|
"valid_targets_mean": 5726.0,
|
|
"valid_targets_min": 2426
|
|
},
|
|
{
|
|
"epoch": 6.374288039056143,
|
|
"grad_norm": 0.577988469761867,
|
|
"learning_rate": 9.718652707234667e-07,
|
|
"loss": 0.2296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15215085446834564,
|
|
"step": 3920,
|
|
"valid_targets_mean": 6691.8,
|
|
"valid_targets_min": 741
|
|
},
|
|
{
|
|
"epoch": 6.382424735557364,
|
|
"grad_norm": 0.5313322523199454,
|
|
"learning_rate": 9.47049822278896e-07,
|
|
"loss": 0.2479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1224609836935997,
|
|
"step": 3925,
|
|
"valid_targets_mean": 7647.4,
|
|
"valid_targets_min": 3864
|
|
},
|
|
{
|
|
"epoch": 6.390561432058584,
|
|
"grad_norm": 0.5506328179742512,
|
|
"learning_rate": 9.225476173993941e-07,
|
|
"loss": 0.2332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11494912952184677,
|
|
"step": 3930,
|
|
"valid_targets_mean": 5041.4,
|
|
"valid_targets_min": 1844
|
|
},
|
|
{
|
|
"epoch": 6.398698128559805,
|
|
"grad_norm": 0.6221746878029165,
|
|
"learning_rate": 8.983590589180125e-07,
|
|
"loss": 0.2382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13699518144130707,
|
|
"step": 3935,
|
|
"valid_targets_mean": 4982.8,
|
|
"valid_targets_min": 596
|
|
},
|
|
{
|
|
"epoch": 6.406834825061026,
|
|
"grad_norm": 0.5322907918953855,
|
|
"learning_rate": 8.744845445112337e-07,
|
|
"loss": 0.2373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14416161179542542,
|
|
"step": 3940,
|
|
"valid_targets_mean": 6786.8,
|
|
"valid_targets_min": 3528
|
|
},
|
|
{
|
|
"epoch": 6.414971521562245,
|
|
"grad_norm": 0.6032735858410141,
|
|
"learning_rate": 8.50924466692451e-07,
|
|
"loss": 0.2294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12138237059116364,
|
|
"step": 3945,
|
|
"valid_targets_mean": 5486.4,
|
|
"valid_targets_min": 1091
|
|
},
|
|
{
|
|
"epoch": 6.423108218063466,
|
|
"grad_norm": 0.6020791332482403,
|
|
"learning_rate": 8.276792128054967e-07,
|
|
"loss": 0.2359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11796577274799347,
|
|
"step": 3950,
|
|
"valid_targets_mean": 4668.9,
|
|
"valid_targets_min": 2761
|
|
},
|
|
{
|
|
"epoch": 6.431244914564687,
|
|
"grad_norm": 0.550955973668879,
|
|
"learning_rate": 8.047491650182815e-07,
|
|
"loss": 0.2229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14780160784721375,
|
|
"step": 3955,
|
|
"valid_targets_mean": 5403.9,
|
|
"valid_targets_min": 1514
|
|
},
|
|
{
|
|
"epoch": 6.439381611065907,
|
|
"grad_norm": 0.5899929467979808,
|
|
"learning_rate": 7.821347003165125e-07,
|
|
"loss": 0.2282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13076281547546387,
|
|
"step": 3960,
|
|
"valid_targets_mean": 5458.2,
|
|
"valid_targets_min": 715
|
|
},
|
|
{
|
|
"epoch": 6.447518307567128,
|
|
"grad_norm": 0.46432249628629235,
|
|
"learning_rate": 7.598361904974982e-07,
|
|
"loss": 0.227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.103657066822052,
|
|
"step": 3965,
|
|
"valid_targets_mean": 5980.2,
|
|
"valid_targets_min": 4230
|
|
},
|
|
{
|
|
"epoch": 6.4556550040683485,
|
|
"grad_norm": 0.7371030234438611,
|
|
"learning_rate": 7.378540021640313e-07,
|
|
"loss": 0.2259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07392923533916473,
|
|
"step": 3970,
|
|
"valid_targets_mean": 4101.2,
|
|
"valid_targets_min": 756
|
|
},
|
|
{
|
|
"epoch": 6.463791700569569,
|
|
"grad_norm": 0.5682466131911814,
|
|
"learning_rate": 7.161884967183552e-07,
|
|
"loss": 0.2439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1610821634531021,
|
|
"step": 3975,
|
|
"valid_targets_mean": 6404.6,
|
|
"valid_targets_min": 2554
|
|
},
|
|
{
|
|
"epoch": 6.471928397070789,
|
|
"grad_norm": 0.5442753436375526,
|
|
"learning_rate": 6.948400303562386e-07,
|
|
"loss": 0.242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13828691840171814,
|
|
"step": 3980,
|
|
"valid_targets_mean": 6081.0,
|
|
"valid_targets_min": 1105
|
|
},
|
|
{
|
|
"epoch": 6.480065093572009,
|
|
"grad_norm": 0.5290599896063467,
|
|
"learning_rate": 6.738089540611059e-07,
|
|
"loss": 0.2064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12780803442001343,
|
|
"step": 3985,
|
|
"valid_targets_mean": 5326.5,
|
|
"valid_targets_min": 3083
|
|
},
|
|
{
|
|
"epoch": 6.48820179007323,
|
|
"grad_norm": 0.4829545021115107,
|
|
"learning_rate": 6.530956135982713e-07,
|
|
"loss": 0.2223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07991525530815125,
|
|
"step": 3990,
|
|
"valid_targets_mean": 5208.4,
|
|
"valid_targets_min": 988
|
|
},
|
|
{
|
|
"epoch": 6.496338486574451,
|
|
"grad_norm": 0.4941367346868645,
|
|
"learning_rate": 6.327003495092565e-07,
|
|
"loss": 0.2217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09314847737550735,
|
|
"step": 3995,
|
|
"valid_targets_mean": 6371.6,
|
|
"valid_targets_min": 3403
|
|
},
|
|
{
|
|
"epoch": 6.504475183075671,
|
|
"grad_norm": 0.603169711820912,
|
|
"learning_rate": 6.126234971061861e-07,
|
|
"loss": 0.2104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14228929579257965,
|
|
"step": 4000,
|
|
"valid_targets_mean": 5264.0,
|
|
"valid_targets_min": 542
|
|
},
|
|
{
|
|
"epoch": 6.512611879576892,
|
|
"grad_norm": 0.5710471143580141,
|
|
"learning_rate": 5.928653864662815e-07,
|
|
"loss": 0.2392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15178368985652924,
|
|
"step": 4005,
|
|
"valid_targets_mean": 5708.6,
|
|
"valid_targets_min": 3584
|
|
},
|
|
{
|
|
"epoch": 6.5207485760781125,
|
|
"grad_norm": 0.4842323415856468,
|
|
"learning_rate": 5.734263424264242e-07,
|
|
"loss": 0.2311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0898006483912468,
|
|
"step": 4010,
|
|
"valid_targets_mean": 5642.4,
|
|
"valid_targets_min": 1473
|
|
},
|
|
{
|
|
"epoch": 6.528885272579333,
|
|
"grad_norm": 0.5617774998939432,
|
|
"learning_rate": 5.543066845778345e-07,
|
|
"loss": 0.2341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14239703118801117,
|
|
"step": 4015,
|
|
"valid_targets_mean": 4555.8,
|
|
"valid_targets_min": 1628
|
|
},
|
|
{
|
|
"epoch": 6.537021969080554,
|
|
"grad_norm": 0.5072944150418117,
|
|
"learning_rate": 5.355067272607928e-07,
|
|
"loss": 0.2381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11048190295696259,
|
|
"step": 4020,
|
|
"valid_targets_mean": 7035.8,
|
|
"valid_targets_min": 803
|
|
},
|
|
{
|
|
"epoch": 6.5451586655817735,
|
|
"grad_norm": 0.4954909171386829,
|
|
"learning_rate": 5.170267795594886e-07,
|
|
"loss": 0.2159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11649087071418762,
|
|
"step": 4025,
|
|
"valid_targets_mean": 6345.4,
|
|
"valid_targets_min": 2927
|
|
},
|
|
{
|
|
"epoch": 6.553295362082994,
|
|
"grad_norm": 0.6103860123458417,
|
|
"learning_rate": 4.988671452969329e-07,
|
|
"loss": 0.2303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10858224332332611,
|
|
"step": 4030,
|
|
"valid_targets_mean": 4031.6,
|
|
"valid_targets_min": 2584
|
|
},
|
|
{
|
|
"epoch": 6.561432058584215,
|
|
"grad_norm": 0.539076716156704,
|
|
"learning_rate": 4.810281230299674e-07,
|
|
"loss": 0.2384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10688619315624237,
|
|
"step": 4035,
|
|
"valid_targets_mean": 4906.5,
|
|
"valid_targets_min": 1837
|
|
},
|
|
{
|
|
"epoch": 6.569568755085435,
|
|
"grad_norm": 0.5451844058939661,
|
|
"learning_rate": 4.6351000604434537e-07,
|
|
"loss": 0.2302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13301129639148712,
|
|
"step": 4040,
|
|
"valid_targets_mean": 6030.0,
|
|
"valid_targets_min": 3338
|
|
},
|
|
{
|
|
"epoch": 6.577705451586656,
|
|
"grad_norm": 0.6364692773603006,
|
|
"learning_rate": 4.463130823499273e-07,
|
|
"loss": 0.2342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1468641757965088,
|
|
"step": 4045,
|
|
"valid_targets_mean": 5677.6,
|
|
"valid_targets_min": 560
|
|
},
|
|
{
|
|
"epoch": 6.585842148087877,
|
|
"grad_norm": 0.5318256719237091,
|
|
"learning_rate": 4.2943763467592436e-07,
|
|
"loss": 0.2393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10516038537025452,
|
|
"step": 4050,
|
|
"valid_targets_mean": 4711.4,
|
|
"valid_targets_min": 1877
|
|
},
|
|
{
|
|
"epoch": 6.593978844589097,
|
|
"grad_norm": 0.5509512683634988,
|
|
"learning_rate": 4.12883940466271e-07,
|
|
"loss": 0.2359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10421756654977798,
|
|
"step": 4055,
|
|
"valid_targets_mean": 4614.4,
|
|
"valid_targets_min": 2460
|
|
},
|
|
{
|
|
"epoch": 6.602115541090317,
|
|
"grad_norm": 0.5200751439970589,
|
|
"learning_rate": 3.9665227187505097e-07,
|
|
"loss": 0.232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10616647452116013,
|
|
"step": 4060,
|
|
"valid_targets_mean": 4184.4,
|
|
"valid_targets_min": 2193
|
|
},
|
|
{
|
|
"epoch": 6.610252237591538,
|
|
"grad_norm": 0.5747208042923473,
|
|
"learning_rate": 3.8074289576202295e-07,
|
|
"loss": 0.2396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11532352864742279,
|
|
"step": 4065,
|
|
"valid_targets_mean": 4391.8,
|
|
"valid_targets_min": 2875
|
|
},
|
|
{
|
|
"epoch": 6.618388934092758,
|
|
"grad_norm": 0.5442269160669613,
|
|
"learning_rate": 3.6515607368824203e-07,
|
|
"loss": 0.249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14139285683631897,
|
|
"step": 4070,
|
|
"valid_targets_mean": 5408.9,
|
|
"valid_targets_min": 1305
|
|
},
|
|
{
|
|
"epoch": 6.626525630593979,
|
|
"grad_norm": 0.6566336791778115,
|
|
"learning_rate": 3.498920619117474e-07,
|
|
"loss": 0.2525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10617846250534058,
|
|
"step": 4075,
|
|
"valid_targets_mean": 4429.5,
|
|
"valid_targets_min": 1337
|
|
},
|
|
{
|
|
"epoch": 6.634662327095199,
|
|
"grad_norm": 0.5122404519465269,
|
|
"learning_rate": 3.3495111138336147e-07,
|
|
"loss": 0.2161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10197494924068451,
|
|
"step": 4080,
|
|
"valid_targets_mean": 4975.5,
|
|
"valid_targets_min": 1614
|
|
},
|
|
{
|
|
"epoch": 6.64279902359642,
|
|
"grad_norm": 0.6028082911064025,
|
|
"learning_rate": 3.203334677425529e-07,
|
|
"loss": 0.2185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09473268687725067,
|
|
"step": 4085,
|
|
"valid_targets_mean": 3829.1,
|
|
"valid_targets_min": 1555
|
|
},
|
|
{
|
|
"epoch": 6.650935720097641,
|
|
"grad_norm": 0.5205649448698142,
|
|
"learning_rate": 3.060393713134091e-07,
|
|
"loss": 0.2438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1434570699930191,
|
|
"step": 4090,
|
|
"valid_targets_mean": 6809.5,
|
|
"valid_targets_min": 1572
|
|
},
|
|
{
|
|
"epoch": 6.65907241659886,
|
|
"grad_norm": 0.5601018905652814,
|
|
"learning_rate": 2.920690571006768e-07,
|
|
"loss": 0.2317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12937568128108978,
|
|
"step": 4095,
|
|
"valid_targets_mean": 7661.0,
|
|
"valid_targets_min": 1771
|
|
},
|
|
{
|
|
"epoch": 6.667209113100081,
|
|
"grad_norm": 0.6361282605163243,
|
|
"learning_rate": 2.784227547858964e-07,
|
|
"loss": 0.2281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0899709165096283,
|
|
"step": 4100,
|
|
"valid_targets_mean": 4337.1,
|
|
"valid_targets_min": 484
|
|
},
|
|
{
|
|
"epoch": 6.675345809601302,
|
|
"grad_norm": 0.5508211250858578,
|
|
"learning_rate": 2.651006887236385e-07,
|
|
"loss": 0.2437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09255590289831161,
|
|
"step": 4105,
|
|
"valid_targets_mean": 5757.0,
|
|
"valid_targets_min": 3592
|
|
},
|
|
{
|
|
"epoch": 6.683482506102522,
|
|
"grad_norm": 0.5929185382963317,
|
|
"learning_rate": 2.52103077937802e-07,
|
|
"loss": 0.2272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11462150514125824,
|
|
"step": 4110,
|
|
"valid_targets_mean": 8654.9,
|
|
"valid_targets_min": 687
|
|
},
|
|
{
|
|
"epoch": 6.691619202603743,
|
|
"grad_norm": 0.5406995619531852,
|
|
"learning_rate": 2.394301361180218e-07,
|
|
"loss": 0.2228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09833990037441254,
|
|
"step": 4115,
|
|
"valid_targets_mean": 5543.6,
|
|
"valid_targets_min": 1684
|
|
},
|
|
{
|
|
"epoch": 6.6997558991049635,
|
|
"grad_norm": 0.5202765802597007,
|
|
"learning_rate": 2.2708207161615147e-07,
|
|
"loss": 0.2468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12440069019794464,
|
|
"step": 4120,
|
|
"valid_targets_mean": 6339.9,
|
|
"valid_targets_min": 2935
|
|
},
|
|
{
|
|
"epoch": 6.707892595606184,
|
|
"grad_norm": 0.5477566339180919,
|
|
"learning_rate": 2.150590874428371e-07,
|
|
"loss": 0.2523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16028626263141632,
|
|
"step": 4125,
|
|
"valid_targets_mean": 6068.5,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 6.716029292107405,
|
|
"grad_norm": 0.5263469114319956,
|
|
"learning_rate": 2.0336138126417994e-07,
|
|
"loss": 0.2271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12802526354789734,
|
|
"step": 4130,
|
|
"valid_targets_mean": 6818.8,
|
|
"valid_targets_min": 3843
|
|
},
|
|
{
|
|
"epoch": 6.724165988608625,
|
|
"grad_norm": 0.6239937148070356,
|
|
"learning_rate": 1.9198914539849455e-07,
|
|
"loss": 0.2216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10577556490898132,
|
|
"step": 4135,
|
|
"valid_targets_mean": 4284.1,
|
|
"valid_targets_min": 524
|
|
},
|
|
{
|
|
"epoch": 6.732302685109845,
|
|
"grad_norm": 0.5786354008011593,
|
|
"learning_rate": 1.8094256681313593e-07,
|
|
"loss": 0.2474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17910592257976532,
|
|
"step": 4140,
|
|
"valid_targets_mean": 5484.6,
|
|
"valid_targets_min": 1652
|
|
},
|
|
{
|
|
"epoch": 6.740439381611066,
|
|
"grad_norm": 0.6573268588625598,
|
|
"learning_rate": 1.702218271214262e-07,
|
|
"loss": 0.2349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12595227360725403,
|
|
"step": 4145,
|
|
"valid_targets_mean": 4625.1,
|
|
"valid_targets_min": 1444
|
|
},
|
|
{
|
|
"epoch": 6.748576078112286,
|
|
"grad_norm": 0.4184562575542007,
|
|
"learning_rate": 1.598271025796816e-07,
|
|
"loss": 0.2231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09041139483451843,
|
|
"step": 4150,
|
|
"valid_targets_mean": 5593.0,
|
|
"valid_targets_min": 1619
|
|
},
|
|
{
|
|
"epoch": 6.756712774613507,
|
|
"grad_norm": 0.49775516096636063,
|
|
"learning_rate": 1.4975856408429912e-07,
|
|
"loss": 0.2491,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12942853569984436,
|
|
"step": 4155,
|
|
"valid_targets_mean": 4617.5,
|
|
"valid_targets_min": 1893
|
|
},
|
|
{
|
|
"epoch": 6.764849471114728,
|
|
"grad_norm": 0.5158152468576911,
|
|
"learning_rate": 1.4001637716895445e-07,
|
|
"loss": 0.2393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12505996227264404,
|
|
"step": 4160,
|
|
"valid_targets_mean": 5568.4,
|
|
"valid_targets_min": 2868
|
|
},
|
|
{
|
|
"epoch": 6.772986167615948,
|
|
"grad_norm": 0.45862764208578194,
|
|
"learning_rate": 1.3060070200188179e-07,
|
|
"loss": 0.2396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09306715428829193,
|
|
"step": 4165,
|
|
"valid_targets_mean": 7481.6,
|
|
"valid_targets_min": 1395
|
|
},
|
|
{
|
|
"epoch": 6.781122864117169,
|
|
"grad_norm": 0.5484801191580874,
|
|
"learning_rate": 1.215116933832361e-07,
|
|
"loss": 0.2237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0996013730764389,
|
|
"step": 4170,
|
|
"valid_targets_mean": 5064.8,
|
|
"valid_targets_min": 1288
|
|
},
|
|
{
|
|
"epoch": 6.7892595606183885,
|
|
"grad_norm": 0.5432376350068135,
|
|
"learning_rate": 1.1274950074255053e-07,
|
|
"loss": 0.2303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14020952582359314,
|
|
"step": 4175,
|
|
"valid_targets_mean": 5912.4,
|
|
"valid_targets_min": 3887
|
|
},
|
|
{
|
|
"epoch": 6.797396257119609,
|
|
"grad_norm": 0.63592955162879,
|
|
"learning_rate": 1.0431426813628298e-07,
|
|
"loss": 0.2326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14750230312347412,
|
|
"step": 4180,
|
|
"valid_targets_mean": 6155.6,
|
|
"valid_targets_min": 2960
|
|
},
|
|
{
|
|
"epoch": 6.80553295362083,
|
|
"grad_norm": 0.5502827308580879,
|
|
"learning_rate": 9.62061342454379e-08,
|
|
"loss": 0.2434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08104512095451355,
|
|
"step": 4185,
|
|
"valid_targets_mean": 4188.5,
|
|
"valid_targets_min": 1325
|
|
},
|
|
{
|
|
"epoch": 6.81366965012205,
|
|
"grad_norm": 0.5325444503557673,
|
|
"learning_rate": 8.842523237329925e-08,
|
|
"loss": 0.2459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12346133589744568,
|
|
"step": 4190,
|
|
"valid_targets_mean": 6763.8,
|
|
"valid_targets_min": 1068
|
|
},
|
|
{
|
|
"epoch": 6.821806346623271,
|
|
"grad_norm": 0.546512190706496,
|
|
"learning_rate": 8.097169044322561e-08,
|
|
"loss": 0.2315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1239863932132721,
|
|
"step": 4195,
|
|
"valid_targets_mean": 5937.0,
|
|
"valid_targets_min": 2620
|
|
},
|
|
{
|
|
"epoch": 6.829943043124492,
|
|
"grad_norm": 0.5906532640459007,
|
|
"learning_rate": 7.38456309965585e-08,
|
|
"loss": 0.2467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12631988525390625,
|
|
"step": 4200,
|
|
"valid_targets_mean": 5960.2,
|
|
"valid_targets_min": 2942
|
|
},
|
|
{
|
|
"epoch": 6.838079739625712,
|
|
"grad_norm": 0.6953061985356663,
|
|
"learning_rate": 6.7047171190604e-08,
|
|
"loss": 0.2305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13684195280075073,
|
|
"step": 4205,
|
|
"valid_targets_mean": 4888.6,
|
|
"valid_targets_min": 2983
|
|
},
|
|
{
|
|
"epoch": 6.846216436126932,
|
|
"grad_norm": 0.5425415272735113,
|
|
"learning_rate": 6.057642279669874e-08,
|
|
"loss": 0.2589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10478918999433517,
|
|
"step": 4210,
|
|
"valid_targets_mean": 4543.4,
|
|
"valid_targets_min": 1180
|
|
},
|
|
{
|
|
"epoch": 6.854353132628153,
|
|
"grad_norm": 0.48291552518428543,
|
|
"learning_rate": 5.4433492198386895e-08,
|
|
"loss": 0.2596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16383056342601776,
|
|
"step": 4215,
|
|
"valid_targets_mean": 7361.4,
|
|
"valid_targets_min": 4819
|
|
},
|
|
{
|
|
"epoch": 6.862489829129373,
|
|
"grad_norm": 0.5655144988352875,
|
|
"learning_rate": 4.861848038965722e-08,
|
|
"loss": 0.2372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1411811113357544,
|
|
"step": 4220,
|
|
"valid_targets_mean": 5598.1,
|
|
"valid_targets_min": 2990
|
|
},
|
|
{
|
|
"epoch": 6.870626525630594,
|
|
"grad_norm": 0.477678998506917,
|
|
"learning_rate": 4.313148297328873e-08,
|
|
"loss": 0.2522,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14228369295597076,
|
|
"step": 4225,
|
|
"valid_targets_mean": 6497.6,
|
|
"valid_targets_min": 3097
|
|
},
|
|
{
|
|
"epoch": 6.8787632221318145,
|
|
"grad_norm": 0.581502670933742,
|
|
"learning_rate": 3.797259015928534e-08,
|
|
"loss": 0.2586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15068471431732178,
|
|
"step": 4230,
|
|
"valid_targets_mean": 4712.4,
|
|
"valid_targets_min": 2505
|
|
},
|
|
{
|
|
"epoch": 6.886899918633035,
|
|
"grad_norm": 0.4756280191630647,
|
|
"learning_rate": 3.314188676338148e-08,
|
|
"loss": 0.2493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1089138463139534,
|
|
"step": 4235,
|
|
"valid_targets_mean": 5755.9,
|
|
"valid_targets_min": 2892
|
|
},
|
|
{
|
|
"epoch": 6.895036615134256,
|
|
"grad_norm": 0.5398206094881605,
|
|
"learning_rate": 2.863945220565434e-08,
|
|
"loss": 0.2367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11508828401565552,
|
|
"step": 4240,
|
|
"valid_targets_mean": 5286.5,
|
|
"valid_targets_min": 2360
|
|
},
|
|
{
|
|
"epoch": 6.903173311635476,
|
|
"grad_norm": 0.5717657037216582,
|
|
"learning_rate": 2.4465360509211555e-08,
|
|
"loss": 0.2308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13908590376377106,
|
|
"step": 4245,
|
|
"valid_targets_mean": 5737.0,
|
|
"valid_targets_min": 2518
|
|
},
|
|
{
|
|
"epoch": 6.911310008136697,
|
|
"grad_norm": 0.5554985915270165,
|
|
"learning_rate": 2.0619680298983313e-08,
|
|
"loss": 0.2441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11254111677408218,
|
|
"step": 4250,
|
|
"valid_targets_mean": 4905.9,
|
|
"valid_targets_min": 792
|
|
},
|
|
{
|
|
"epoch": 6.919446704637917,
|
|
"grad_norm": 0.5218369286912145,
|
|
"learning_rate": 1.7102474800592128e-08,
|
|
"loss": 0.2359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13761496543884277,
|
|
"step": 4255,
|
|
"valid_targets_mean": 5413.0,
|
|
"valid_targets_min": 2422
|
|
},
|
|
{
|
|
"epoch": 6.927583401139137,
|
|
"grad_norm": 0.5381548129002338,
|
|
"learning_rate": 1.3913801839307017e-08,
|
|
"loss": 0.2511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14477401971817017,
|
|
"step": 4260,
|
|
"valid_targets_mean": 6205.5,
|
|
"valid_targets_min": 558
|
|
},
|
|
{
|
|
"epoch": 6.935720097640358,
|
|
"grad_norm": 0.44713106150780874,
|
|
"learning_rate": 1.105371383909759e-08,
|
|
"loss": 0.2383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10858777165412903,
|
|
"step": 4265,
|
|
"valid_targets_mean": 8534.0,
|
|
"valid_targets_min": 1522
|
|
},
|
|
{
|
|
"epoch": 6.9438567941415785,
|
|
"grad_norm": 0.5274514347290213,
|
|
"learning_rate": 8.522257821770296e-09,
|
|
"loss": 0.2491,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1236865222454071,
|
|
"step": 4270,
|
|
"valid_targets_mean": 6043.4,
|
|
"valid_targets_min": 3334
|
|
},
|
|
{
|
|
"epoch": 6.951993490642799,
|
|
"grad_norm": 0.5801773088235224,
|
|
"learning_rate": 6.319475406200148e-09,
|
|
"loss": 0.2203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11447873711585999,
|
|
"step": 4275,
|
|
"valid_targets_mean": 4326.0,
|
|
"valid_targets_min": 2843
|
|
},
|
|
{
|
|
"epoch": 6.96013018714402,
|
|
"grad_norm": 0.5965479996719061,
|
|
"learning_rate": 4.445402807637944e-09,
|
|
"loss": 0.2364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13095706701278687,
|
|
"step": 4280,
|
|
"valid_targets_mean": 5125.6,
|
|
"valid_targets_min": 3083
|
|
},
|
|
{
|
|
"epoch": 6.96826688364524,
|
|
"grad_norm": 0.596073208594123,
|
|
"learning_rate": 2.9000708371240695e-09,
|
|
"loss": 0.2456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12374962121248245,
|
|
"step": 4285,
|
|
"valid_targets_mean": 4898.1,
|
|
"valid_targets_min": 1390
|
|
},
|
|
{
|
|
"epoch": 6.97640358014646,
|
|
"grad_norm": 0.547455008958649,
|
|
"learning_rate": 1.6835049009755745e-09,
|
|
"loss": 0.2368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13343475759029388,
|
|
"step": 4290,
|
|
"valid_targets_mean": 6155.8,
|
|
"valid_targets_min": 1121
|
|
},
|
|
{
|
|
"epoch": 6.984540276647681,
|
|
"grad_norm": 0.5017858705960277,
|
|
"learning_rate": 7.9572500036873e-10,
|
|
"loss": 0.2129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11443660408258438,
|
|
"step": 4295,
|
|
"valid_targets_mean": 7366.5,
|
|
"valid_targets_min": 3418
|
|
},
|
|
{
|
|
"epoch": 6.992676973148901,
|
|
"grad_norm": 0.5649590473030836,
|
|
"learning_rate": 2.367457310170629e-10,
|
|
"loss": 0.2423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.168892502784729,
|
|
"step": 4300,
|
|
"valid_targets_mean": 6009.1,
|
|
"valid_targets_min": 3140
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"grad_norm": 0.6042592466198581,
|
|
"learning_rate": 6.5762829204452095e-12,
|
|
"loss": 0.2148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17104065418243408,
|
|
"step": 4305,
|
|
"valid_targets_mean": 7242.6,
|
|
"valid_targets_min": 3287
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17104065418243408,
|
|
"step": 4305,
|
|
"total_flos": 2.6783135993776046e+18,
|
|
"train_loss": 0.2845736916749181,
|
|
"train_runtime": 62000.2676,
|
|
"train_samples_per_second": 1.109,
|
|
"train_steps_per_second": 0.069,
|
|
"valid_targets_mean": 7242.6,
|
|
"valid_targets_min": 3287
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 4305,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 1500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 2.6783135993776046e+18,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|