9651 lines
268 KiB
JSON
9651 lines
268 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 4368,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.00801924619085806,
|
|
"grad_norm": 8.313740982008923,
|
|
"learning_rate": 3.661327231121282e-07,
|
|
"loss": 0.869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48422402143478394,
|
|
"step": 5,
|
|
"valid_targets_mean": 3345.4,
|
|
"valid_targets_min": 1130
|
|
},
|
|
{
|
|
"epoch": 0.01603849238171612,
|
|
"grad_norm": 8.69878286285814,
|
|
"learning_rate": 8.237986270022884e-07,
|
|
"loss": 0.8969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5387725234031677,
|
|
"step": 10,
|
|
"valid_targets_mean": 3215.9,
|
|
"valid_targets_min": 1626
|
|
},
|
|
{
|
|
"epoch": 0.024057738572574178,
|
|
"grad_norm": 7.088732892506888,
|
|
"learning_rate": 1.2814645308924487e-06,
|
|
"loss": 0.8442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3239784836769104,
|
|
"step": 15,
|
|
"valid_targets_mean": 4054.0,
|
|
"valid_targets_min": 1373
|
|
},
|
|
{
|
|
"epoch": 0.03207698476343224,
|
|
"grad_norm": 5.514543935307852,
|
|
"learning_rate": 1.7391304347826088e-06,
|
|
"loss": 0.8457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30969667434692383,
|
|
"step": 20,
|
|
"valid_targets_mean": 3318.2,
|
|
"valid_targets_min": 1351
|
|
},
|
|
{
|
|
"epoch": 0.040096230954290296,
|
|
"grad_norm": 4.91461430890284,
|
|
"learning_rate": 2.196796338672769e-06,
|
|
"loss": 0.8157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30109691619873047,
|
|
"step": 25,
|
|
"valid_targets_mean": 4198.5,
|
|
"valid_targets_min": 1501
|
|
},
|
|
{
|
|
"epoch": 0.048115477145148355,
|
|
"grad_norm": 4.047882001978824,
|
|
"learning_rate": 2.654462242562929e-06,
|
|
"loss": 0.7751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5099153518676758,
|
|
"step": 30,
|
|
"valid_targets_mean": 4115.9,
|
|
"valid_targets_min": 1498
|
|
},
|
|
{
|
|
"epoch": 0.056134723336006415,
|
|
"grad_norm": 3.0199670668025567,
|
|
"learning_rate": 3.1121281464530894e-06,
|
|
"loss": 0.7276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37805676460266113,
|
|
"step": 35,
|
|
"valid_targets_mean": 3285.9,
|
|
"valid_targets_min": 1410
|
|
},
|
|
{
|
|
"epoch": 0.06415396952686447,
|
|
"grad_norm": 1.9962784811319771,
|
|
"learning_rate": 3.56979405034325e-06,
|
|
"loss": 0.7159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36083686351776123,
|
|
"step": 40,
|
|
"valid_targets_mean": 4623.9,
|
|
"valid_targets_min": 1285
|
|
},
|
|
{
|
|
"epoch": 0.07217321571772253,
|
|
"grad_norm": 1.3260517674540337,
|
|
"learning_rate": 4.0274599542334094e-06,
|
|
"loss": 0.6569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4008219540119171,
|
|
"step": 45,
|
|
"valid_targets_mean": 4624.8,
|
|
"valid_targets_min": 1879
|
|
},
|
|
{
|
|
"epoch": 0.08019246190858059,
|
|
"grad_norm": 1.243564937333077,
|
|
"learning_rate": 4.48512585812357e-06,
|
|
"loss": 0.6293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26258644461631775,
|
|
"step": 50,
|
|
"valid_targets_mean": 2541.2,
|
|
"valid_targets_min": 692
|
|
},
|
|
{
|
|
"epoch": 0.08821170809943865,
|
|
"grad_norm": 1.0342084536151661,
|
|
"learning_rate": 4.94279176201373e-06,
|
|
"loss": 0.6711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3230687975883484,
|
|
"step": 55,
|
|
"valid_targets_mean": 3970.5,
|
|
"valid_targets_min": 938
|
|
},
|
|
{
|
|
"epoch": 0.09623095429029671,
|
|
"grad_norm": 1.000715187181818,
|
|
"learning_rate": 5.400457665903891e-06,
|
|
"loss": 0.6423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4008551836013794,
|
|
"step": 60,
|
|
"valid_targets_mean": 6482.1,
|
|
"valid_targets_min": 2441
|
|
},
|
|
{
|
|
"epoch": 0.10425020048115477,
|
|
"grad_norm": 0.9742888960296938,
|
|
"learning_rate": 5.858123569794051e-06,
|
|
"loss": 0.6364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3344465494155884,
|
|
"step": 65,
|
|
"valid_targets_mean": 3673.5,
|
|
"valid_targets_min": 1071
|
|
},
|
|
{
|
|
"epoch": 0.11226944667201283,
|
|
"grad_norm": 0.8790762890640784,
|
|
"learning_rate": 6.31578947368421e-06,
|
|
"loss": 0.6252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3023986220359802,
|
|
"step": 70,
|
|
"valid_targets_mean": 3469.6,
|
|
"valid_targets_min": 1955
|
|
},
|
|
{
|
|
"epoch": 0.12028869286287089,
|
|
"grad_norm": 0.8929131904667906,
|
|
"learning_rate": 6.773455377574372e-06,
|
|
"loss": 0.606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21897467970848083,
|
|
"step": 75,
|
|
"valid_targets_mean": 2452.0,
|
|
"valid_targets_min": 1645
|
|
},
|
|
{
|
|
"epoch": 0.12830793905372895,
|
|
"grad_norm": 0.7663072367257558,
|
|
"learning_rate": 7.231121281464531e-06,
|
|
"loss": 0.5629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.241590678691864,
|
|
"step": 80,
|
|
"valid_targets_mean": 4069.1,
|
|
"valid_targets_min": 1336
|
|
},
|
|
{
|
|
"epoch": 0.136327185244587,
|
|
"grad_norm": 0.7343387826633802,
|
|
"learning_rate": 7.688787185354691e-06,
|
|
"loss": 0.6027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21094068884849548,
|
|
"step": 85,
|
|
"valid_targets_mean": 3845.5,
|
|
"valid_targets_min": 1581
|
|
},
|
|
{
|
|
"epoch": 0.14434643143544507,
|
|
"grad_norm": 0.8153893864380317,
|
|
"learning_rate": 8.146453089244852e-06,
|
|
"loss": 0.5877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3595584034919739,
|
|
"step": 90,
|
|
"valid_targets_mean": 4052.0,
|
|
"valid_targets_min": 1634
|
|
},
|
|
{
|
|
"epoch": 0.15236567762630313,
|
|
"grad_norm": 0.6932668353955103,
|
|
"learning_rate": 8.604118993135013e-06,
|
|
"loss": 0.5497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33834296464920044,
|
|
"step": 95,
|
|
"valid_targets_mean": 5480.1,
|
|
"valid_targets_min": 1283
|
|
},
|
|
{
|
|
"epoch": 0.16038492381716118,
|
|
"grad_norm": 0.7777890816483226,
|
|
"learning_rate": 9.061784897025172e-06,
|
|
"loss": 0.5655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33180496096611023,
|
|
"step": 100,
|
|
"valid_targets_mean": 3460.2,
|
|
"valid_targets_min": 1868
|
|
},
|
|
{
|
|
"epoch": 0.16840417000801924,
|
|
"grad_norm": 0.5283169926803735,
|
|
"learning_rate": 9.519450800915333e-06,
|
|
"loss": 0.5399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2036198526620865,
|
|
"step": 105,
|
|
"valid_targets_mean": 5922.9,
|
|
"valid_targets_min": 2107
|
|
},
|
|
{
|
|
"epoch": 0.1764234161988773,
|
|
"grad_norm": 0.697221665984805,
|
|
"learning_rate": 9.977116704805492e-06,
|
|
"loss": 0.5073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2451438009738922,
|
|
"step": 110,
|
|
"valid_targets_mean": 3265.9,
|
|
"valid_targets_min": 2124
|
|
},
|
|
{
|
|
"epoch": 0.18444266238973536,
|
|
"grad_norm": 0.7008730936667373,
|
|
"learning_rate": 1.0434782608695653e-05,
|
|
"loss": 0.5299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3022688925266266,
|
|
"step": 115,
|
|
"valid_targets_mean": 4371.0,
|
|
"valid_targets_min": 1146
|
|
},
|
|
{
|
|
"epoch": 0.19246190858059342,
|
|
"grad_norm": 0.6439155109775441,
|
|
"learning_rate": 1.0892448512585814e-05,
|
|
"loss": 0.4918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25761961936950684,
|
|
"step": 120,
|
|
"valid_targets_mean": 4046.0,
|
|
"valid_targets_min": 1546
|
|
},
|
|
{
|
|
"epoch": 0.20048115477145148,
|
|
"grad_norm": 0.5833963512550437,
|
|
"learning_rate": 1.1350114416475973e-05,
|
|
"loss": 0.5327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21748362481594086,
|
|
"step": 125,
|
|
"valid_targets_mean": 4139.4,
|
|
"valid_targets_min": 1287
|
|
},
|
|
{
|
|
"epoch": 0.20850040096230954,
|
|
"grad_norm": 0.7103416991437168,
|
|
"learning_rate": 1.1807780320366134e-05,
|
|
"loss": 0.5084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2946273684501648,
|
|
"step": 130,
|
|
"valid_targets_mean": 4061.2,
|
|
"valid_targets_min": 1811
|
|
},
|
|
{
|
|
"epoch": 0.2165196471531676,
|
|
"grad_norm": 0.6807314277664426,
|
|
"learning_rate": 1.2265446224256295e-05,
|
|
"loss": 0.5241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2987809181213379,
|
|
"step": 135,
|
|
"valid_targets_mean": 4009.8,
|
|
"valid_targets_min": 1916
|
|
},
|
|
{
|
|
"epoch": 0.22453889334402566,
|
|
"grad_norm": 0.7480876393642586,
|
|
"learning_rate": 1.2723112128146454e-05,
|
|
"loss": 0.5581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30567678809165955,
|
|
"step": 140,
|
|
"valid_targets_mean": 3422.6,
|
|
"valid_targets_min": 1463
|
|
},
|
|
{
|
|
"epoch": 0.23255813953488372,
|
|
"grad_norm": 0.6184882136658373,
|
|
"learning_rate": 1.3180778032036615e-05,
|
|
"loss": 0.5081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2955462336540222,
|
|
"step": 145,
|
|
"valid_targets_mean": 5266.5,
|
|
"valid_targets_min": 1721
|
|
},
|
|
{
|
|
"epoch": 0.24057738572574178,
|
|
"grad_norm": 0.629492550820491,
|
|
"learning_rate": 1.3638443935926776e-05,
|
|
"loss": 0.5211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20750996470451355,
|
|
"step": 150,
|
|
"valid_targets_mean": 3665.2,
|
|
"valid_targets_min": 1339
|
|
},
|
|
{
|
|
"epoch": 0.24859663191659984,
|
|
"grad_norm": 0.7885648489531245,
|
|
"learning_rate": 1.4096109839816933e-05,
|
|
"loss": 0.4779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34341418743133545,
|
|
"step": 155,
|
|
"valid_targets_mean": 4196.0,
|
|
"valid_targets_min": 1501
|
|
},
|
|
{
|
|
"epoch": 0.2566158781074579,
|
|
"grad_norm": 0.6300044643868511,
|
|
"learning_rate": 1.4553775743707096e-05,
|
|
"loss": 0.4933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2758129835128784,
|
|
"step": 160,
|
|
"valid_targets_mean": 5468.2,
|
|
"valid_targets_min": 1288
|
|
},
|
|
{
|
|
"epoch": 0.264635124298316,
|
|
"grad_norm": 0.6391644825807138,
|
|
"learning_rate": 1.5011441647597256e-05,
|
|
"loss": 0.4866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3081105649471283,
|
|
"step": 165,
|
|
"valid_targets_mean": 5716.5,
|
|
"valid_targets_min": 2127
|
|
},
|
|
{
|
|
"epoch": 0.272654370489174,
|
|
"grad_norm": 0.7935923586616551,
|
|
"learning_rate": 1.5469107551487414e-05,
|
|
"loss": 0.5083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2117885947227478,
|
|
"step": 170,
|
|
"valid_targets_mean": 2941.4,
|
|
"valid_targets_min": 810
|
|
},
|
|
{
|
|
"epoch": 0.2806736166800321,
|
|
"grad_norm": 0.6530606093704732,
|
|
"learning_rate": 1.5926773455377575e-05,
|
|
"loss": 0.4928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.195469468832016,
|
|
"step": 175,
|
|
"valid_targets_mean": 3944.8,
|
|
"valid_targets_min": 1484
|
|
},
|
|
{
|
|
"epoch": 0.28869286287089013,
|
|
"grad_norm": 0.6446059581638844,
|
|
"learning_rate": 1.6384439359267736e-05,
|
|
"loss": 0.4739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29873958230018616,
|
|
"step": 180,
|
|
"valid_targets_mean": 5453.0,
|
|
"valid_targets_min": 1483
|
|
},
|
|
{
|
|
"epoch": 0.2967121090617482,
|
|
"grad_norm": 0.6138929387268349,
|
|
"learning_rate": 1.6842105263157896e-05,
|
|
"loss": 0.4805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15602803230285645,
|
|
"step": 185,
|
|
"valid_targets_mean": 3395.1,
|
|
"valid_targets_min": 1011
|
|
},
|
|
{
|
|
"epoch": 0.30473135525260625,
|
|
"grad_norm": 0.7895610581771481,
|
|
"learning_rate": 1.7299771167048057e-05,
|
|
"loss": 0.4829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2401597499847412,
|
|
"step": 190,
|
|
"valid_targets_mean": 2895.6,
|
|
"valid_targets_min": 1599
|
|
},
|
|
{
|
|
"epoch": 0.31275060144346434,
|
|
"grad_norm": 0.729603285280062,
|
|
"learning_rate": 1.7757437070938218e-05,
|
|
"loss": 0.527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2671535015106201,
|
|
"step": 195,
|
|
"valid_targets_mean": 3562.6,
|
|
"valid_targets_min": 1527
|
|
},
|
|
{
|
|
"epoch": 0.32076984763432237,
|
|
"grad_norm": 0.5253955703069345,
|
|
"learning_rate": 1.8215102974828376e-05,
|
|
"loss": 0.5199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2831480801105499,
|
|
"step": 200,
|
|
"valid_targets_mean": 7620.6,
|
|
"valid_targets_min": 1700
|
|
},
|
|
{
|
|
"epoch": 0.32878909382518046,
|
|
"grad_norm": 0.6865277172463633,
|
|
"learning_rate": 1.8672768878718537e-05,
|
|
"loss": 0.4971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16837254166603088,
|
|
"step": 205,
|
|
"valid_targets_mean": 3245.9,
|
|
"valid_targets_min": 1096
|
|
},
|
|
{
|
|
"epoch": 0.3368083400160385,
|
|
"grad_norm": 0.6470638235142138,
|
|
"learning_rate": 1.9130434782608697e-05,
|
|
"loss": 0.4501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1938144564628601,
|
|
"step": 210,
|
|
"valid_targets_mean": 4453.6,
|
|
"valid_targets_min": 1014
|
|
},
|
|
{
|
|
"epoch": 0.3448275862068966,
|
|
"grad_norm": 0.702129479232604,
|
|
"learning_rate": 1.9588100686498858e-05,
|
|
"loss": 0.4925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21430332958698273,
|
|
"step": 215,
|
|
"valid_targets_mean": 3609.4,
|
|
"valid_targets_min": 1152
|
|
},
|
|
{
|
|
"epoch": 0.3528468323977546,
|
|
"grad_norm": 0.6475901989881273,
|
|
"learning_rate": 2.004576659038902e-05,
|
|
"loss": 0.4987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2964100241661072,
|
|
"step": 220,
|
|
"valid_targets_mean": 5638.2,
|
|
"valid_targets_min": 1248
|
|
},
|
|
{
|
|
"epoch": 0.3608660785886127,
|
|
"grad_norm": 0.6638281189029952,
|
|
"learning_rate": 2.050343249427918e-05,
|
|
"loss": 0.4839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20554473996162415,
|
|
"step": 225,
|
|
"valid_targets_mean": 3964.6,
|
|
"valid_targets_min": 1751
|
|
},
|
|
{
|
|
"epoch": 0.3688853247794707,
|
|
"grad_norm": 0.6011178975893025,
|
|
"learning_rate": 2.0961098398169337e-05,
|
|
"loss": 0.4699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21771050989627838,
|
|
"step": 230,
|
|
"valid_targets_mean": 5652.9,
|
|
"valid_targets_min": 1419
|
|
},
|
|
{
|
|
"epoch": 0.3769045709703288,
|
|
"grad_norm": 0.6918051581990787,
|
|
"learning_rate": 2.14187643020595e-05,
|
|
"loss": 0.4965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23044303059577942,
|
|
"step": 235,
|
|
"valid_targets_mean": 4066.4,
|
|
"valid_targets_min": 989
|
|
},
|
|
{
|
|
"epoch": 0.38492381716118684,
|
|
"grad_norm": 0.6397795878243587,
|
|
"learning_rate": 2.187643020594966e-05,
|
|
"loss": 0.4772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2183871567249298,
|
|
"step": 240,
|
|
"valid_targets_mean": 4418.2,
|
|
"valid_targets_min": 1634
|
|
},
|
|
{
|
|
"epoch": 0.39294306335204493,
|
|
"grad_norm": 0.6314059989661045,
|
|
"learning_rate": 2.2334096109839817e-05,
|
|
"loss": 0.4586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23583510518074036,
|
|
"step": 245,
|
|
"valid_targets_mean": 3972.2,
|
|
"valid_targets_min": 1116
|
|
},
|
|
{
|
|
"epoch": 0.40096230954290296,
|
|
"grad_norm": 0.7521340694225619,
|
|
"learning_rate": 2.279176201372998e-05,
|
|
"loss": 0.4214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18104761838912964,
|
|
"step": 250,
|
|
"valid_targets_mean": 2805.6,
|
|
"valid_targets_min": 1609
|
|
},
|
|
{
|
|
"epoch": 0.40898155573376105,
|
|
"grad_norm": 0.6589043727793573,
|
|
"learning_rate": 2.3249427917620138e-05,
|
|
"loss": 0.4829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1823362410068512,
|
|
"step": 255,
|
|
"valid_targets_mean": 2975.9,
|
|
"valid_targets_min": 964
|
|
},
|
|
{
|
|
"epoch": 0.4170008019246191,
|
|
"grad_norm": 0.6273900082236032,
|
|
"learning_rate": 2.37070938215103e-05,
|
|
"loss": 0.4885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22542431950569153,
|
|
"step": 260,
|
|
"valid_targets_mean": 3486.6,
|
|
"valid_targets_min": 881
|
|
},
|
|
{
|
|
"epoch": 0.42502004811547717,
|
|
"grad_norm": 0.6081957372926003,
|
|
"learning_rate": 2.4164759725400463e-05,
|
|
"loss": 0.4647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24096626043319702,
|
|
"step": 265,
|
|
"valid_targets_mean": 5235.9,
|
|
"valid_targets_min": 1595
|
|
},
|
|
{
|
|
"epoch": 0.4330392943063352,
|
|
"grad_norm": 0.7845378461800494,
|
|
"learning_rate": 2.462242562929062e-05,
|
|
"loss": 0.5028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23097199201583862,
|
|
"step": 270,
|
|
"valid_targets_mean": 3027.6,
|
|
"valid_targets_min": 1364
|
|
},
|
|
{
|
|
"epoch": 0.4410585404971933,
|
|
"grad_norm": 0.7755771169814443,
|
|
"learning_rate": 2.508009153318078e-05,
|
|
"loss": 0.4452,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2752797305583954,
|
|
"step": 275,
|
|
"valid_targets_mean": 3456.0,
|
|
"valid_targets_min": 1642
|
|
},
|
|
{
|
|
"epoch": 0.4490777866880513,
|
|
"grad_norm": 0.6442704506397136,
|
|
"learning_rate": 2.5537757437070943e-05,
|
|
"loss": 0.4668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29733771085739136,
|
|
"step": 280,
|
|
"valid_targets_mean": 7013.9,
|
|
"valid_targets_min": 1802
|
|
},
|
|
{
|
|
"epoch": 0.4570970328789094,
|
|
"grad_norm": 0.7352489212493196,
|
|
"learning_rate": 2.59954233409611e-05,
|
|
"loss": 0.4845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28917449712753296,
|
|
"step": 285,
|
|
"valid_targets_mean": 4661.9,
|
|
"valid_targets_min": 3003
|
|
},
|
|
{
|
|
"epoch": 0.46511627906976744,
|
|
"grad_norm": 0.7720755855494124,
|
|
"learning_rate": 2.645308924485126e-05,
|
|
"loss": 0.5035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2599334716796875,
|
|
"step": 290,
|
|
"valid_targets_mean": 2824.4,
|
|
"valid_targets_min": 1320
|
|
},
|
|
{
|
|
"epoch": 0.4731355252606255,
|
|
"grad_norm": 0.7363743109818579,
|
|
"learning_rate": 2.6910755148741422e-05,
|
|
"loss": 0.479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31573259830474854,
|
|
"step": 295,
|
|
"valid_targets_mean": 4273.9,
|
|
"valid_targets_min": 1738
|
|
},
|
|
{
|
|
"epoch": 0.48115477145148355,
|
|
"grad_norm": 0.744633521519781,
|
|
"learning_rate": 2.7368421052631583e-05,
|
|
"loss": 0.4797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3156285881996155,
|
|
"step": 300,
|
|
"valid_targets_mean": 5315.2,
|
|
"valid_targets_min": 360
|
|
},
|
|
{
|
|
"epoch": 0.48917401764234164,
|
|
"grad_norm": 0.7129949807940422,
|
|
"learning_rate": 2.782608695652174e-05,
|
|
"loss": 0.4419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27106714248657227,
|
|
"step": 305,
|
|
"valid_targets_mean": 3763.2,
|
|
"valid_targets_min": 1351
|
|
},
|
|
{
|
|
"epoch": 0.4971932638331997,
|
|
"grad_norm": 0.6175971826717892,
|
|
"learning_rate": 2.8283752860411904e-05,
|
|
"loss": 0.4453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16650423407554626,
|
|
"step": 310,
|
|
"valid_targets_mean": 5072.0,
|
|
"valid_targets_min": 1509
|
|
},
|
|
{
|
|
"epoch": 0.5052125100240578,
|
|
"grad_norm": 0.7040236348581613,
|
|
"learning_rate": 2.8741418764302062e-05,
|
|
"loss": 0.4844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3040885329246521,
|
|
"step": 315,
|
|
"valid_targets_mean": 4063.0,
|
|
"valid_targets_min": 1296
|
|
},
|
|
{
|
|
"epoch": 0.5132317562149158,
|
|
"grad_norm": 0.8830857148509627,
|
|
"learning_rate": 2.9199084668192223e-05,
|
|
"loss": 0.4629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24983727931976318,
|
|
"step": 320,
|
|
"valid_targets_mean": 3427.4,
|
|
"valid_targets_min": 1365
|
|
},
|
|
{
|
|
"epoch": 0.5212510024057738,
|
|
"grad_norm": 0.7984683477663427,
|
|
"learning_rate": 2.9656750572082384e-05,
|
|
"loss": 0.458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2035318911075592,
|
|
"step": 325,
|
|
"valid_targets_mean": 2357.1,
|
|
"valid_targets_min": 1003
|
|
},
|
|
{
|
|
"epoch": 0.529270248596632,
|
|
"grad_norm": 0.7839299118682744,
|
|
"learning_rate": 3.0114416475972544e-05,
|
|
"loss": 0.4703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2685667872428894,
|
|
"step": 330,
|
|
"valid_targets_mean": 4827.5,
|
|
"valid_targets_min": 1605
|
|
},
|
|
{
|
|
"epoch": 0.53728949478749,
|
|
"grad_norm": 0.6794195326992597,
|
|
"learning_rate": 3.05720823798627e-05,
|
|
"loss": 0.4873,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19377443194389343,
|
|
"step": 335,
|
|
"valid_targets_mean": 3683.9,
|
|
"valid_targets_min": 1199
|
|
},
|
|
{
|
|
"epoch": 0.545308740978348,
|
|
"grad_norm": 0.5213966576318203,
|
|
"learning_rate": 3.102974828375286e-05,
|
|
"loss": 0.4295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12621726095676422,
|
|
"step": 340,
|
|
"valid_targets_mean": 4432.0,
|
|
"valid_targets_min": 1576
|
|
},
|
|
{
|
|
"epoch": 0.5533279871692061,
|
|
"grad_norm": 0.706356642791585,
|
|
"learning_rate": 3.1487414187643024e-05,
|
|
"loss": 0.4458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21022140979766846,
|
|
"step": 345,
|
|
"valid_targets_mean": 5536.6,
|
|
"valid_targets_min": 1518
|
|
},
|
|
{
|
|
"epoch": 0.5613472333600642,
|
|
"grad_norm": 0.6835233787156912,
|
|
"learning_rate": 3.1945080091533184e-05,
|
|
"loss": 0.4444,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2203514128923416,
|
|
"step": 350,
|
|
"valid_targets_mean": 4027.5,
|
|
"valid_targets_min": 1358
|
|
},
|
|
{
|
|
"epoch": 0.5693664795509222,
|
|
"grad_norm": 0.6619068795239736,
|
|
"learning_rate": 3.240274599542334e-05,
|
|
"loss": 0.4171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19794681668281555,
|
|
"step": 355,
|
|
"valid_targets_mean": 4365.2,
|
|
"valid_targets_min": 2057
|
|
},
|
|
{
|
|
"epoch": 0.5773857257417803,
|
|
"grad_norm": 0.7758989332936811,
|
|
"learning_rate": 3.2860411899313506e-05,
|
|
"loss": 0.4951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3366064429283142,
|
|
"step": 360,
|
|
"valid_targets_mean": 3579.4,
|
|
"valid_targets_min": 1889
|
|
},
|
|
{
|
|
"epoch": 0.5854049719326383,
|
|
"grad_norm": 0.572316916164085,
|
|
"learning_rate": 3.331807780320366e-05,
|
|
"loss": 0.4308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2127903699874878,
|
|
"step": 365,
|
|
"valid_targets_mean": 4391.5,
|
|
"valid_targets_min": 1306
|
|
},
|
|
{
|
|
"epoch": 0.5934242181234964,
|
|
"grad_norm": 0.8060101627155969,
|
|
"learning_rate": 3.377574370709382e-05,
|
|
"loss": 0.4272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2559089660644531,
|
|
"step": 370,
|
|
"valid_targets_mean": 4548.6,
|
|
"valid_targets_min": 1743
|
|
},
|
|
{
|
|
"epoch": 0.6014434643143545,
|
|
"grad_norm": 0.7342884918292761,
|
|
"learning_rate": 3.423340961098399e-05,
|
|
"loss": 0.4531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2449020892381668,
|
|
"step": 375,
|
|
"valid_targets_mean": 4173.0,
|
|
"valid_targets_min": 851
|
|
},
|
|
{
|
|
"epoch": 0.6094627105052125,
|
|
"grad_norm": 0.6932121954679972,
|
|
"learning_rate": 3.469107551487414e-05,
|
|
"loss": 0.4438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2163270115852356,
|
|
"step": 380,
|
|
"valid_targets_mean": 3886.9,
|
|
"valid_targets_min": 1276
|
|
},
|
|
{
|
|
"epoch": 0.6174819566960705,
|
|
"grad_norm": 0.7715154208797361,
|
|
"learning_rate": 3.5148741418764304e-05,
|
|
"loss": 0.4325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2262679934501648,
|
|
"step": 385,
|
|
"valid_targets_mean": 4779.1,
|
|
"valid_targets_min": 2040
|
|
},
|
|
{
|
|
"epoch": 0.6255012028869287,
|
|
"grad_norm": 0.6983503647349161,
|
|
"learning_rate": 3.5606407322654464e-05,
|
|
"loss": 0.4289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25364089012145996,
|
|
"step": 390,
|
|
"valid_targets_mean": 4373.1,
|
|
"valid_targets_min": 1653
|
|
},
|
|
{
|
|
"epoch": 0.6335204490777867,
|
|
"grad_norm": 0.650166494248333,
|
|
"learning_rate": 3.6064073226544625e-05,
|
|
"loss": 0.4424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1815357506275177,
|
|
"step": 395,
|
|
"valid_targets_mean": 3892.1,
|
|
"valid_targets_min": 1493
|
|
},
|
|
{
|
|
"epoch": 0.6415396952686447,
|
|
"grad_norm": 0.6281878304316749,
|
|
"learning_rate": 3.6521739130434786e-05,
|
|
"loss": 0.4248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2615407705307007,
|
|
"step": 400,
|
|
"valid_targets_mean": 5092.6,
|
|
"valid_targets_min": 2034
|
|
},
|
|
{
|
|
"epoch": 0.6495589414595028,
|
|
"grad_norm": 0.6316023344367048,
|
|
"learning_rate": 3.697940503432495e-05,
|
|
"loss": 0.4216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2884695529937744,
|
|
"step": 405,
|
|
"valid_targets_mean": 5941.5,
|
|
"valid_targets_min": 1671
|
|
},
|
|
{
|
|
"epoch": 0.6575781876503609,
|
|
"grad_norm": 0.6656637581337326,
|
|
"learning_rate": 3.743707093821511e-05,
|
|
"loss": 0.4533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2328605353832245,
|
|
"step": 410,
|
|
"valid_targets_mean": 4097.0,
|
|
"valid_targets_min": 1930
|
|
},
|
|
{
|
|
"epoch": 0.6655974338412189,
|
|
"grad_norm": 0.6571360392327468,
|
|
"learning_rate": 3.789473684210526e-05,
|
|
"loss": 0.4429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1997435986995697,
|
|
"step": 415,
|
|
"valid_targets_mean": 5019.8,
|
|
"valid_targets_min": 1927
|
|
},
|
|
{
|
|
"epoch": 0.673616680032077,
|
|
"grad_norm": 0.6170027112876605,
|
|
"learning_rate": 3.835240274599543e-05,
|
|
"loss": 0.4043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.237014502286911,
|
|
"step": 420,
|
|
"valid_targets_mean": 5630.4,
|
|
"valid_targets_min": 1932
|
|
},
|
|
{
|
|
"epoch": 0.681635926222935,
|
|
"grad_norm": 0.6551577656712124,
|
|
"learning_rate": 3.8810068649885584e-05,
|
|
"loss": 0.4169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19945791363716125,
|
|
"step": 425,
|
|
"valid_targets_mean": 5277.8,
|
|
"valid_targets_min": 1138
|
|
},
|
|
{
|
|
"epoch": 0.6896551724137931,
|
|
"grad_norm": 0.5852441140362239,
|
|
"learning_rate": 3.9267734553775745e-05,
|
|
"loss": 0.4136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13955387473106384,
|
|
"step": 430,
|
|
"valid_targets_mean": 3020.5,
|
|
"valid_targets_min": 2087
|
|
},
|
|
{
|
|
"epoch": 0.6976744186046512,
|
|
"grad_norm": 0.6021829173481317,
|
|
"learning_rate": 3.9725400457665905e-05,
|
|
"loss": 0.4141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1376398205757141,
|
|
"step": 435,
|
|
"valid_targets_mean": 4271.9,
|
|
"valid_targets_min": 1333
|
|
},
|
|
{
|
|
"epoch": 0.7056936647955092,
|
|
"grad_norm": 0.6479698160689082,
|
|
"learning_rate": 3.999997445219712e-05,
|
|
"loss": 0.4438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22524264454841614,
|
|
"step": 440,
|
|
"valid_targets_mean": 5391.4,
|
|
"valid_targets_min": 1757
|
|
},
|
|
{
|
|
"epoch": 0.7137129109863672,
|
|
"grad_norm": 0.7208389560786789,
|
|
"learning_rate": 3.999968704016428e-05,
|
|
"loss": 0.429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15243616700172424,
|
|
"step": 445,
|
|
"valid_targets_mean": 2479.2,
|
|
"valid_targets_min": 970
|
|
},
|
|
{
|
|
"epoch": 0.7217321571772254,
|
|
"grad_norm": 0.7748329379807736,
|
|
"learning_rate": 3.9999080285949514e-05,
|
|
"loss": 0.4357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25249481201171875,
|
|
"step": 450,
|
|
"valid_targets_mean": 5528.8,
|
|
"valid_targets_min": 1407
|
|
},
|
|
{
|
|
"epoch": 0.7297514033680834,
|
|
"grad_norm": 0.5195534551910792,
|
|
"learning_rate": 3.999815419924108e-05,
|
|
"loss": 0.4554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26327452063560486,
|
|
"step": 455,
|
|
"valid_targets_mean": 6266.9,
|
|
"valid_targets_min": 1342
|
|
},
|
|
{
|
|
"epoch": 0.7377706495589414,
|
|
"grad_norm": 0.5635412505100377,
|
|
"learning_rate": 3.999690879482614e-05,
|
|
"loss": 0.4385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18464183807373047,
|
|
"step": 460,
|
|
"valid_targets_mean": 5252.4,
|
|
"valid_targets_min": 1665
|
|
},
|
|
{
|
|
"epoch": 0.7457898957497995,
|
|
"grad_norm": 0.9951524748597607,
|
|
"learning_rate": 3.9995344092590506e-05,
|
|
"loss": 0.4318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21847239136695862,
|
|
"step": 465,
|
|
"valid_targets_mean": 2516.6,
|
|
"valid_targets_min": 1035
|
|
},
|
|
{
|
|
"epoch": 0.7538091419406576,
|
|
"grad_norm": 1.1079483899236926,
|
|
"learning_rate": 3.999346011751835e-05,
|
|
"loss": 0.4161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2754901945590973,
|
|
"step": 470,
|
|
"valid_targets_mean": 4657.5,
|
|
"valid_targets_min": 917
|
|
},
|
|
{
|
|
"epoch": 0.7618283881315157,
|
|
"grad_norm": 0.6457266593716856,
|
|
"learning_rate": 3.999125689969176e-05,
|
|
"loss": 0.4559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15281400084495544,
|
|
"step": 475,
|
|
"valid_targets_mean": 2690.4,
|
|
"valid_targets_min": 1158
|
|
},
|
|
{
|
|
"epoch": 0.7698476343223737,
|
|
"grad_norm": 0.7161175072619643,
|
|
"learning_rate": 3.9988734474290324e-05,
|
|
"loss": 0.4136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16189947724342346,
|
|
"step": 480,
|
|
"valid_targets_mean": 3288.2,
|
|
"valid_targets_min": 1547
|
|
},
|
|
{
|
|
"epoch": 0.7778668805132317,
|
|
"grad_norm": 0.8425818311311937,
|
|
"learning_rate": 3.9985892881590513e-05,
|
|
"loss": 0.4483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25789886713027954,
|
|
"step": 485,
|
|
"valid_targets_mean": 5527.6,
|
|
"valid_targets_min": 2131
|
|
},
|
|
{
|
|
"epoch": 0.7858861267040899,
|
|
"grad_norm": 0.6816766014086393,
|
|
"learning_rate": 3.9982732166965054e-05,
|
|
"loss": 0.4129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2202441394329071,
|
|
"step": 490,
|
|
"valid_targets_mean": 4515.8,
|
|
"valid_targets_min": 1461
|
|
},
|
|
{
|
|
"epoch": 0.7939053728949479,
|
|
"grad_norm": 0.6711499035511631,
|
|
"learning_rate": 3.997925238088221e-05,
|
|
"loss": 0.4046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2616243362426758,
|
|
"step": 495,
|
|
"valid_targets_mean": 4674.5,
|
|
"valid_targets_min": 1175
|
|
},
|
|
{
|
|
"epoch": 0.8019246190858059,
|
|
"grad_norm": 0.5792692946536627,
|
|
"learning_rate": 3.9975453578904975e-05,
|
|
"loss": 0.4182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17172032594680786,
|
|
"step": 500,
|
|
"valid_targets_mean": 3829.4,
|
|
"valid_targets_min": 1661
|
|
},
|
|
{
|
|
"epoch": 0.809943865276664,
|
|
"grad_norm": 0.6479080676210576,
|
|
"learning_rate": 3.997133582169018e-05,
|
|
"loss": 0.4088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2771586775779724,
|
|
"step": 505,
|
|
"valid_targets_mean": 4916.1,
|
|
"valid_targets_min": 2111
|
|
},
|
|
{
|
|
"epoch": 0.8179631114675221,
|
|
"grad_norm": 0.7284911471862828,
|
|
"learning_rate": 3.996689917498754e-05,
|
|
"loss": 0.423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22784632444381714,
|
|
"step": 510,
|
|
"valid_targets_mean": 3270.2,
|
|
"valid_targets_min": 1364
|
|
},
|
|
{
|
|
"epoch": 0.8259823576583801,
|
|
"grad_norm": 0.7537417338532415,
|
|
"learning_rate": 3.9962143709638585e-05,
|
|
"loss": 0.4254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20665107667446136,
|
|
"step": 515,
|
|
"valid_targets_mean": 2749.4,
|
|
"valid_targets_min": 1224
|
|
},
|
|
{
|
|
"epoch": 0.8340016038492382,
|
|
"grad_norm": 0.5735317711929651,
|
|
"learning_rate": 3.995706950157554e-05,
|
|
"loss": 0.4128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2083359658718109,
|
|
"step": 520,
|
|
"valid_targets_mean": 4386.8,
|
|
"valid_targets_min": 1563
|
|
},
|
|
{
|
|
"epoch": 0.8420208500400962,
|
|
"grad_norm": 0.5775968083813575,
|
|
"learning_rate": 3.995167663182008e-05,
|
|
"loss": 0.4133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3303985595703125,
|
|
"step": 525,
|
|
"valid_targets_mean": 4967.6,
|
|
"valid_targets_min": 1500
|
|
},
|
|
{
|
|
"epoch": 0.8500400962309543,
|
|
"grad_norm": 0.6953060852286664,
|
|
"learning_rate": 3.994596518648214e-05,
|
|
"loss": 0.4705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2025911808013916,
|
|
"step": 530,
|
|
"valid_targets_mean": 4558.0,
|
|
"valid_targets_min": 1886
|
|
},
|
|
{
|
|
"epoch": 0.8580593424218124,
|
|
"grad_norm": 0.6934565862515342,
|
|
"learning_rate": 3.993993525675838e-05,
|
|
"loss": 0.4309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3160518705844879,
|
|
"step": 535,
|
|
"valid_targets_mean": 5058.6,
|
|
"valid_targets_min": 1445
|
|
},
|
|
{
|
|
"epoch": 0.8660785886126704,
|
|
"grad_norm": 0.6799717965973326,
|
|
"learning_rate": 3.993358693893086e-05,
|
|
"loss": 0.4284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24555282294750214,
|
|
"step": 540,
|
|
"valid_targets_mean": 4132.4,
|
|
"valid_targets_min": 1349
|
|
},
|
|
{
|
|
"epoch": 0.8740978348035284,
|
|
"grad_norm": 0.6091069047749981,
|
|
"learning_rate": 3.9926920334365457e-05,
|
|
"loss": 0.4297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09333278238773346,
|
|
"step": 545,
|
|
"valid_targets_mean": 2423.5,
|
|
"valid_targets_min": 833
|
|
},
|
|
{
|
|
"epoch": 0.8821170809943866,
|
|
"grad_norm": 0.9935398003007877,
|
|
"learning_rate": 3.991993554951023e-05,
|
|
"loss": 0.4177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20457065105438232,
|
|
"step": 550,
|
|
"valid_targets_mean": 2924.2,
|
|
"valid_targets_min": 1456
|
|
},
|
|
{
|
|
"epoch": 0.8901363271852446,
|
|
"grad_norm": 0.6506939503312682,
|
|
"learning_rate": 3.991263269589376e-05,
|
|
"loss": 0.4013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24844929575920105,
|
|
"step": 555,
|
|
"valid_targets_mean": 4460.2,
|
|
"valid_targets_min": 1352
|
|
},
|
|
{
|
|
"epoch": 0.8981555733761026,
|
|
"grad_norm": 0.7904296869066102,
|
|
"learning_rate": 3.990501189012332e-05,
|
|
"loss": 0.409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25031614303588867,
|
|
"step": 560,
|
|
"valid_targets_mean": 3274.9,
|
|
"valid_targets_min": 1033
|
|
},
|
|
{
|
|
"epoch": 0.9061748195669607,
|
|
"grad_norm": 0.6243782360832607,
|
|
"learning_rate": 3.989707325388305e-05,
|
|
"loss": 0.4202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20207425951957703,
|
|
"step": 565,
|
|
"valid_targets_mean": 4446.5,
|
|
"valid_targets_min": 1798
|
|
},
|
|
{
|
|
"epoch": 0.9141940657578188,
|
|
"grad_norm": 0.6243052859782692,
|
|
"learning_rate": 3.9888816913932016e-05,
|
|
"loss": 0.4132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20946535468101501,
|
|
"step": 570,
|
|
"valid_targets_mean": 4064.6,
|
|
"valid_targets_min": 1004
|
|
},
|
|
{
|
|
"epoch": 0.9222133119486768,
|
|
"grad_norm": 0.8133657221603788,
|
|
"learning_rate": 3.988024300210215e-05,
|
|
"loss": 0.4147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17629793286323547,
|
|
"step": 575,
|
|
"valid_targets_mean": 2829.0,
|
|
"valid_targets_min": 1334
|
|
},
|
|
{
|
|
"epoch": 0.9302325581395349,
|
|
"grad_norm": 0.5933036701633775,
|
|
"learning_rate": 3.987135165529618e-05,
|
|
"loss": 0.3945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14002780616283417,
|
|
"step": 580,
|
|
"valid_targets_mean": 3140.2,
|
|
"valid_targets_min": 1598
|
|
},
|
|
{
|
|
"epoch": 0.9382518043303929,
|
|
"grad_norm": 0.6572313159662928,
|
|
"learning_rate": 3.9862143015485446e-05,
|
|
"loss": 0.41,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19144278764724731,
|
|
"step": 585,
|
|
"valid_targets_mean": 4209.8,
|
|
"valid_targets_min": 951
|
|
},
|
|
{
|
|
"epoch": 0.946271050521251,
|
|
"grad_norm": 0.7182512157499061,
|
|
"learning_rate": 3.985261722970759e-05,
|
|
"loss": 0.4354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2679348587989807,
|
|
"step": 590,
|
|
"valid_targets_mean": 3733.0,
|
|
"valid_targets_min": 1711
|
|
},
|
|
{
|
|
"epoch": 0.9542902967121091,
|
|
"grad_norm": 0.6599170152735541,
|
|
"learning_rate": 3.984277445006426e-05,
|
|
"loss": 0.4245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21976399421691895,
|
|
"step": 595,
|
|
"valid_targets_mean": 3530.1,
|
|
"valid_targets_min": 1079
|
|
},
|
|
{
|
|
"epoch": 0.9623095429029671,
|
|
"grad_norm": 0.5769175180828044,
|
|
"learning_rate": 3.9832614833718654e-05,
|
|
"loss": 0.4254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2880416512489319,
|
|
"step": 600,
|
|
"valid_targets_mean": 6344.4,
|
|
"valid_targets_min": 937
|
|
},
|
|
{
|
|
"epoch": 0.9703287890938251,
|
|
"grad_norm": 0.6263189640875445,
|
|
"learning_rate": 3.9822138542893005e-05,
|
|
"loss": 0.4096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1385025531053543,
|
|
"step": 605,
|
|
"valid_targets_mean": 3284.1,
|
|
"valid_targets_min": 1832
|
|
},
|
|
{
|
|
"epoch": 0.9783480352846833,
|
|
"grad_norm": 0.7609485085875123,
|
|
"learning_rate": 3.9811345744866014e-05,
|
|
"loss": 0.4249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.180627703666687,
|
|
"step": 610,
|
|
"valid_targets_mean": 1876.9,
|
|
"valid_targets_min": 855
|
|
},
|
|
{
|
|
"epoch": 0.9863672814755413,
|
|
"grad_norm": 0.6052202116295862,
|
|
"learning_rate": 3.980023661197016e-05,
|
|
"loss": 0.431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28665053844451904,
|
|
"step": 615,
|
|
"valid_targets_mean": 5199.5,
|
|
"valid_targets_min": 1140
|
|
},
|
|
{
|
|
"epoch": 0.9943865276663993,
|
|
"grad_norm": 0.7582336087430221,
|
|
"learning_rate": 3.978881132158896e-05,
|
|
"loss": 0.4093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23537395894527435,
|
|
"step": 620,
|
|
"valid_targets_mean": 3701.1,
|
|
"valid_targets_min": 1152
|
|
},
|
|
{
|
|
"epoch": 1.0016038492381716,
|
|
"grad_norm": 0.695598402136173,
|
|
"learning_rate": 3.9777070056154124e-05,
|
|
"loss": 0.3859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17286968231201172,
|
|
"step": 625,
|
|
"valid_targets_mean": 2414.4,
|
|
"valid_targets_min": 1434
|
|
},
|
|
{
|
|
"epoch": 1.0096230954290297,
|
|
"grad_norm": 0.615630823687904,
|
|
"learning_rate": 3.976501300314264e-05,
|
|
"loss": 0.3889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1431884765625,
|
|
"step": 630,
|
|
"valid_targets_mean": 3064.6,
|
|
"valid_targets_min": 1214
|
|
},
|
|
{
|
|
"epoch": 1.0176423416198876,
|
|
"grad_norm": 0.6850838344428346,
|
|
"learning_rate": 3.9752640355073825e-05,
|
|
"loss": 0.4045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16621248424053192,
|
|
"step": 635,
|
|
"valid_targets_mean": 3180.9,
|
|
"valid_targets_min": 1285
|
|
},
|
|
{
|
|
"epoch": 1.0256615878107458,
|
|
"grad_norm": 0.5619195455110378,
|
|
"learning_rate": 3.9739952309506175e-05,
|
|
"loss": 0.3786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15314628183841705,
|
|
"step": 640,
|
|
"valid_targets_mean": 4104.9,
|
|
"valid_targets_min": 1536
|
|
},
|
|
{
|
|
"epoch": 1.033680834001604,
|
|
"grad_norm": 0.6675969722042899,
|
|
"learning_rate": 3.972694906903427e-05,
|
|
"loss": 0.3833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21451722085475922,
|
|
"step": 645,
|
|
"valid_targets_mean": 3510.8,
|
|
"valid_targets_min": 945
|
|
},
|
|
{
|
|
"epoch": 1.0417000801924619,
|
|
"grad_norm": 0.6796820596998153,
|
|
"learning_rate": 3.971363084128552e-05,
|
|
"loss": 0.372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1613035798072815,
|
|
"step": 650,
|
|
"valid_targets_mean": 2254.0,
|
|
"valid_targets_min": 883
|
|
},
|
|
{
|
|
"epoch": 1.04971932638332,
|
|
"grad_norm": 0.5138699133063548,
|
|
"learning_rate": 3.969999783891685e-05,
|
|
"loss": 0.3736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14441242814064026,
|
|
"step": 655,
|
|
"valid_targets_mean": 5016.5,
|
|
"valid_targets_min": 1355
|
|
},
|
|
{
|
|
"epoch": 1.057738572574178,
|
|
"grad_norm": 0.549376268830875,
|
|
"learning_rate": 3.96860502796113e-05,
|
|
"loss": 0.4123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2061927616596222,
|
|
"step": 660,
|
|
"valid_targets_mean": 6110.9,
|
|
"valid_targets_min": 2075
|
|
},
|
|
{
|
|
"epoch": 1.065757818765036,
|
|
"grad_norm": 0.7311459967280249,
|
|
"learning_rate": 3.967178838607456e-05,
|
|
"loss": 0.3839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2232741117477417,
|
|
"step": 665,
|
|
"valid_targets_mean": 3224.8,
|
|
"valid_targets_min": 1420
|
|
},
|
|
{
|
|
"epoch": 1.0737770649558942,
|
|
"grad_norm": 0.5499017486701547,
|
|
"learning_rate": 3.965721238603139e-05,
|
|
"loss": 0.3894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16423560678958893,
|
|
"step": 670,
|
|
"valid_targets_mean": 4179.4,
|
|
"valid_targets_min": 1366
|
|
},
|
|
{
|
|
"epoch": 1.0817963111467521,
|
|
"grad_norm": 0.6875258421977899,
|
|
"learning_rate": 3.964232251222203e-05,
|
|
"loss": 0.4207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15144357085227966,
|
|
"step": 675,
|
|
"valid_targets_mean": 2502.8,
|
|
"valid_targets_min": 1463
|
|
},
|
|
{
|
|
"epoch": 1.0898155573376103,
|
|
"grad_norm": 0.6340623830629193,
|
|
"learning_rate": 3.962711900239844e-05,
|
|
"loss": 0.3697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16017653048038483,
|
|
"step": 680,
|
|
"valid_targets_mean": 2854.1,
|
|
"valid_targets_min": 1136
|
|
},
|
|
{
|
|
"epoch": 1.0978348035284684,
|
|
"grad_norm": 0.6347481474391807,
|
|
"learning_rate": 3.961160209932051e-05,
|
|
"loss": 0.3914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21811774373054504,
|
|
"step": 685,
|
|
"valid_targets_mean": 4877.0,
|
|
"valid_targets_min": 1327
|
|
},
|
|
{
|
|
"epoch": 1.1058540497193263,
|
|
"grad_norm": 0.7578764836248371,
|
|
"learning_rate": 3.95957720507522e-05,
|
|
"loss": 0.3648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19442862272262573,
|
|
"step": 690,
|
|
"valid_targets_mean": 4939.2,
|
|
"valid_targets_min": 1418
|
|
},
|
|
{
|
|
"epoch": 1.1138732959101845,
|
|
"grad_norm": 0.7135318700261571,
|
|
"learning_rate": 3.957962910945759e-05,
|
|
"loss": 0.3791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14678996801376343,
|
|
"step": 695,
|
|
"valid_targets_mean": 3328.9,
|
|
"valid_targets_min": 1303
|
|
},
|
|
{
|
|
"epoch": 1.1218925421010426,
|
|
"grad_norm": 0.7595496965888743,
|
|
"learning_rate": 3.9563173533196805e-05,
|
|
"loss": 0.4045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17554423213005066,
|
|
"step": 700,
|
|
"valid_targets_mean": 2093.6,
|
|
"valid_targets_min": 1237
|
|
},
|
|
{
|
|
"epoch": 1.1299117882919005,
|
|
"grad_norm": 0.6255982334737784,
|
|
"learning_rate": 3.954640558472195e-05,
|
|
"loss": 0.3806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.230742409825325,
|
|
"step": 705,
|
|
"valid_targets_mean": 5426.8,
|
|
"valid_targets_min": 1559
|
|
},
|
|
{
|
|
"epoch": 1.1379310344827587,
|
|
"grad_norm": 0.7293239045809642,
|
|
"learning_rate": 3.952932553177287e-05,
|
|
"loss": 0.3585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20457494258880615,
|
|
"step": 710,
|
|
"valid_targets_mean": 4382.8,
|
|
"valid_targets_min": 1631
|
|
},
|
|
{
|
|
"epoch": 1.1459502806736166,
|
|
"grad_norm": 0.6938425440543589,
|
|
"learning_rate": 3.95119336470729e-05,
|
|
"loss": 0.3949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23677586019039154,
|
|
"step": 715,
|
|
"valid_targets_mean": 5619.5,
|
|
"valid_targets_min": 2168
|
|
},
|
|
{
|
|
"epoch": 1.1539695268644747,
|
|
"grad_norm": 0.6576001630605337,
|
|
"learning_rate": 3.949423020832451e-05,
|
|
"loss": 0.3806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24037396907806396,
|
|
"step": 720,
|
|
"valid_targets_mean": 4540.8,
|
|
"valid_targets_min": 851
|
|
},
|
|
{
|
|
"epoch": 1.1619887730553329,
|
|
"grad_norm": 0.4483035830808508,
|
|
"learning_rate": 3.947621549820485e-05,
|
|
"loss": 0.3639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12731903791427612,
|
|
"step": 725,
|
|
"valid_targets_mean": 4211.2,
|
|
"valid_targets_min": 1614
|
|
},
|
|
{
|
|
"epoch": 1.1700080192461908,
|
|
"grad_norm": 0.804348533347129,
|
|
"learning_rate": 3.945788980436129e-05,
|
|
"loss": 0.3855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18650484085083008,
|
|
"step": 730,
|
|
"valid_targets_mean": 2705.8,
|
|
"valid_targets_min": 1271
|
|
},
|
|
{
|
|
"epoch": 1.178027265437049,
|
|
"grad_norm": 0.6041634251187872,
|
|
"learning_rate": 3.943925341940673e-05,
|
|
"loss": 0.4054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19750933349132538,
|
|
"step": 735,
|
|
"valid_targets_mean": 3846.0,
|
|
"valid_targets_min": 1926
|
|
},
|
|
{
|
|
"epoch": 1.1860465116279069,
|
|
"grad_norm": 0.58747886152978,
|
|
"learning_rate": 3.942030664091503e-05,
|
|
"loss": 0.3805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17710307240486145,
|
|
"step": 740,
|
|
"valid_targets_mean": 2780.9,
|
|
"valid_targets_min": 1708
|
|
},
|
|
{
|
|
"epoch": 1.194065757818765,
|
|
"grad_norm": 0.6404869784227899,
|
|
"learning_rate": 3.9401049771416214e-05,
|
|
"loss": 0.3925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16430872678756714,
|
|
"step": 745,
|
|
"valid_targets_mean": 2718.2,
|
|
"valid_targets_min": 1046
|
|
},
|
|
{
|
|
"epoch": 1.2020850040096231,
|
|
"grad_norm": 0.5710025270152095,
|
|
"learning_rate": 3.938148311839162e-05,
|
|
"loss": 0.4026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18448391556739807,
|
|
"step": 750,
|
|
"valid_targets_mean": 5132.5,
|
|
"valid_targets_min": 2265
|
|
},
|
|
{
|
|
"epoch": 1.210104250200481,
|
|
"grad_norm": 0.6497135461400895,
|
|
"learning_rate": 3.9361606994269014e-05,
|
|
"loss": 0.3676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26077187061309814,
|
|
"step": 755,
|
|
"valid_targets_mean": 4660.1,
|
|
"valid_targets_min": 1580
|
|
},
|
|
{
|
|
"epoch": 1.2181234963913392,
|
|
"grad_norm": 0.607110503560847,
|
|
"learning_rate": 3.934142171641763e-05,
|
|
"loss": 0.3793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15704308450222015,
|
|
"step": 760,
|
|
"valid_targets_mean": 4377.2,
|
|
"valid_targets_min": 1097
|
|
},
|
|
{
|
|
"epoch": 1.2261427425821974,
|
|
"grad_norm": 0.6094554796598396,
|
|
"learning_rate": 3.9320927607143003e-05,
|
|
"loss": 0.4103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12785972654819489,
|
|
"step": 765,
|
|
"valid_targets_mean": 2213.1,
|
|
"valid_targets_min": 1267
|
|
},
|
|
{
|
|
"epoch": 1.2341619887730553,
|
|
"grad_norm": 0.6363340342249799,
|
|
"learning_rate": 3.9300124993681976e-05,
|
|
"loss": 0.3598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1621864140033722,
|
|
"step": 770,
|
|
"valid_targets_mean": 3536.1,
|
|
"valid_targets_min": 1263
|
|
},
|
|
{
|
|
"epoch": 1.2421812349639134,
|
|
"grad_norm": 0.6622582879084168,
|
|
"learning_rate": 3.9279014208197317e-05,
|
|
"loss": 0.3757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1941109001636505,
|
|
"step": 775,
|
|
"valid_targets_mean": 3288.6,
|
|
"valid_targets_min": 1263
|
|
},
|
|
{
|
|
"epoch": 1.2502004811547716,
|
|
"grad_norm": 0.638376853403367,
|
|
"learning_rate": 3.925759558777252e-05,
|
|
"loss": 0.3892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23520147800445557,
|
|
"step": 780,
|
|
"valid_targets_mean": 4333.1,
|
|
"valid_targets_min": 1946
|
|
},
|
|
{
|
|
"epoch": 1.2582197273456295,
|
|
"grad_norm": 0.7287126082613413,
|
|
"learning_rate": 3.923586947440639e-05,
|
|
"loss": 0.3976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1913953721523285,
|
|
"step": 785,
|
|
"valid_targets_mean": 2964.0,
|
|
"valid_targets_min": 1587
|
|
},
|
|
{
|
|
"epoch": 1.2662389735364876,
|
|
"grad_norm": 0.5821074784665079,
|
|
"learning_rate": 3.921383621500758e-05,
|
|
"loss": 0.3936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2320612668991089,
|
|
"step": 790,
|
|
"valid_targets_mean": 5262.9,
|
|
"valid_targets_min": 2208
|
|
},
|
|
{
|
|
"epoch": 1.2742582197273458,
|
|
"grad_norm": 0.5959266410039498,
|
|
"learning_rate": 3.919149616138906e-05,
|
|
"loss": 0.4131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18204858899116516,
|
|
"step": 795,
|
|
"valid_targets_mean": 4003.8,
|
|
"valid_targets_min": 1494
|
|
},
|
|
{
|
|
"epoch": 1.2822774659182037,
|
|
"grad_norm": 0.538833079574803,
|
|
"learning_rate": 3.916884967026246e-05,
|
|
"loss": 0.3819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15234261751174927,
|
|
"step": 800,
|
|
"valid_targets_mean": 3804.1,
|
|
"valid_targets_min": 1563
|
|
},
|
|
{
|
|
"epoch": 1.2902967121090618,
|
|
"grad_norm": 0.8247769067262545,
|
|
"learning_rate": 3.914589710323245e-05,
|
|
"loss": 0.3624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13381686806678772,
|
|
"step": 805,
|
|
"valid_targets_mean": 3968.5,
|
|
"valid_targets_min": 1180
|
|
},
|
|
{
|
|
"epoch": 1.2983159582999197,
|
|
"grad_norm": 0.773159132160585,
|
|
"learning_rate": 3.912263882679091e-05,
|
|
"loss": 0.3884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16676564514636993,
|
|
"step": 810,
|
|
"valid_targets_mean": 2239.8,
|
|
"valid_targets_min": 1398
|
|
},
|
|
{
|
|
"epoch": 1.306335204490778,
|
|
"grad_norm": 0.6435973604228074,
|
|
"learning_rate": 3.9099075212311076e-05,
|
|
"loss": 0.4118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22716866433620453,
|
|
"step": 815,
|
|
"valid_targets_mean": 3918.2,
|
|
"valid_targets_min": 1291
|
|
},
|
|
{
|
|
"epoch": 1.3143544506816358,
|
|
"grad_norm": 0.5259675841357822,
|
|
"learning_rate": 3.9075206636041646e-05,
|
|
"loss": 0.3927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24232850968837738,
|
|
"step": 820,
|
|
"valid_targets_mean": 7605.0,
|
|
"valid_targets_min": 1967
|
|
},
|
|
{
|
|
"epoch": 1.322373696872494,
|
|
"grad_norm": 0.7325220804411511,
|
|
"learning_rate": 3.905103347910075e-05,
|
|
"loss": 0.396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22928906977176666,
|
|
"step": 825,
|
|
"valid_targets_mean": 2980.9,
|
|
"valid_targets_min": 1683
|
|
},
|
|
{
|
|
"epoch": 1.330392943063352,
|
|
"grad_norm": 0.5647785923965424,
|
|
"learning_rate": 3.902655612746985e-05,
|
|
"loss": 0.3951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14142805337905884,
|
|
"step": 830,
|
|
"valid_targets_mean": 4535.4,
|
|
"valid_targets_min": 1344
|
|
},
|
|
{
|
|
"epoch": 1.33841218925421,
|
|
"grad_norm": 0.6198274246253738,
|
|
"learning_rate": 3.900177497198761e-05,
|
|
"loss": 0.4102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15712687373161316,
|
|
"step": 835,
|
|
"valid_targets_mean": 3244.4,
|
|
"valid_targets_min": 1910
|
|
},
|
|
{
|
|
"epoch": 1.3464314354450682,
|
|
"grad_norm": 0.7479328322426334,
|
|
"learning_rate": 3.8976690408343635e-05,
|
|
"loss": 0.378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1526011973619461,
|
|
"step": 840,
|
|
"valid_targets_mean": 1959.6,
|
|
"valid_targets_min": 1236
|
|
},
|
|
{
|
|
"epoch": 1.3544506816359263,
|
|
"grad_norm": 0.5705913022888958,
|
|
"learning_rate": 3.8951302837072165e-05,
|
|
"loss": 0.3819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21281887590885162,
|
|
"step": 845,
|
|
"valid_targets_mean": 5731.0,
|
|
"valid_targets_min": 1996
|
|
},
|
|
{
|
|
"epoch": 1.3624699278267842,
|
|
"grad_norm": 0.6305200044274495,
|
|
"learning_rate": 3.892561266354566e-05,
|
|
"loss": 0.3924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1274857521057129,
|
|
"step": 850,
|
|
"valid_targets_mean": 2638.5,
|
|
"valid_targets_min": 1475
|
|
},
|
|
{
|
|
"epoch": 1.3704891740176424,
|
|
"grad_norm": 0.6540817528238299,
|
|
"learning_rate": 3.889962029796833e-05,
|
|
"loss": 0.3994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22636473178863525,
|
|
"step": 855,
|
|
"valid_targets_mean": 4248.0,
|
|
"valid_targets_min": 1631
|
|
},
|
|
{
|
|
"epoch": 1.3785084202085005,
|
|
"grad_norm": 0.6384044783744945,
|
|
"learning_rate": 3.887332615536962e-05,
|
|
"loss": 0.3577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19277402758598328,
|
|
"step": 860,
|
|
"valid_targets_mean": 3839.9,
|
|
"valid_targets_min": 1393
|
|
},
|
|
{
|
|
"epoch": 1.3865276663993584,
|
|
"grad_norm": 0.5403507036239689,
|
|
"learning_rate": 3.8846730655597535e-05,
|
|
"loss": 0.3861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2178334891796112,
|
|
"step": 865,
|
|
"valid_targets_mean": 6470.2,
|
|
"valid_targets_min": 1213
|
|
},
|
|
{
|
|
"epoch": 1.3945469125902166,
|
|
"grad_norm": 0.5879412330667896,
|
|
"learning_rate": 3.881983422331198e-05,
|
|
"loss": 0.3655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1219518780708313,
|
|
"step": 870,
|
|
"valid_targets_mean": 2703.0,
|
|
"valid_targets_min": 1619
|
|
},
|
|
{
|
|
"epoch": 1.4025661587810747,
|
|
"grad_norm": 0.5119560422076794,
|
|
"learning_rate": 3.879263728797792e-05,
|
|
"loss": 0.4172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2390919178724289,
|
|
"step": 875,
|
|
"valid_targets_mean": 7733.4,
|
|
"valid_targets_min": 1100
|
|
},
|
|
{
|
|
"epoch": 1.4105854049719326,
|
|
"grad_norm": 0.5841873090375073,
|
|
"learning_rate": 3.876514028385861e-05,
|
|
"loss": 0.3823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25305986404418945,
|
|
"step": 880,
|
|
"valid_targets_mean": 6032.5,
|
|
"valid_targets_min": 1255
|
|
},
|
|
{
|
|
"epoch": 1.4186046511627908,
|
|
"grad_norm": 0.6537532789732419,
|
|
"learning_rate": 3.873734365000857e-05,
|
|
"loss": 0.3585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17425492405891418,
|
|
"step": 885,
|
|
"valid_targets_mean": 3659.5,
|
|
"valid_targets_min": 814
|
|
},
|
|
{
|
|
"epoch": 1.4266238973536487,
|
|
"grad_norm": 0.6485041173010123,
|
|
"learning_rate": 3.870924783026663e-05,
|
|
"loss": 0.3935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2005024552345276,
|
|
"step": 890,
|
|
"valid_targets_mean": 3811.0,
|
|
"valid_targets_min": 951
|
|
},
|
|
{
|
|
"epoch": 1.4346431435445068,
|
|
"grad_norm": 0.7275313045075661,
|
|
"learning_rate": 3.8680853273248826e-05,
|
|
"loss": 0.3998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19834119081497192,
|
|
"step": 895,
|
|
"valid_targets_mean": 2894.2,
|
|
"valid_targets_min": 1179
|
|
},
|
|
{
|
|
"epoch": 1.4426623897353648,
|
|
"grad_norm": 0.6458382459028278,
|
|
"learning_rate": 3.865216043234126e-05,
|
|
"loss": 0.3622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23739655315876007,
|
|
"step": 900,
|
|
"valid_targets_mean": 3600.1,
|
|
"valid_targets_min": 1565
|
|
},
|
|
{
|
|
"epoch": 1.450681635926223,
|
|
"grad_norm": 0.667902826457069,
|
|
"learning_rate": 3.862316976569281e-05,
|
|
"loss": 0.3911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18984192609786987,
|
|
"step": 905,
|
|
"valid_targets_mean": 3311.0,
|
|
"valid_targets_min": 1529
|
|
},
|
|
{
|
|
"epoch": 1.458700882117081,
|
|
"grad_norm": 0.5301348528975791,
|
|
"learning_rate": 3.859388173620785e-05,
|
|
"loss": 0.3768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20433899760246277,
|
|
"step": 910,
|
|
"valid_targets_mean": 5428.2,
|
|
"valid_targets_min": 1908
|
|
},
|
|
{
|
|
"epoch": 1.466720128307939,
|
|
"grad_norm": 0.5548703258066876,
|
|
"learning_rate": 3.8564296811538874e-05,
|
|
"loss": 0.418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21961280703544617,
|
|
"step": 915,
|
|
"valid_targets_mean": 5185.6,
|
|
"valid_targets_min": 1395
|
|
},
|
|
{
|
|
"epoch": 1.474739374498797,
|
|
"grad_norm": 0.597151997949445,
|
|
"learning_rate": 3.853441546407898e-05,
|
|
"loss": 0.3939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22741135954856873,
|
|
"step": 920,
|
|
"valid_targets_mean": 4965.1,
|
|
"valid_targets_min": 1490
|
|
},
|
|
{
|
|
"epoch": 1.4827586206896552,
|
|
"grad_norm": 0.5010688320540237,
|
|
"learning_rate": 3.850423817095438e-05,
|
|
"loss": 0.3276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13275910913944244,
|
|
"step": 925,
|
|
"valid_targets_mean": 4258.0,
|
|
"valid_targets_min": 1493
|
|
},
|
|
{
|
|
"epoch": 1.4907778668805132,
|
|
"grad_norm": 0.8174306620777976,
|
|
"learning_rate": 3.847376541401674e-05,
|
|
"loss": 0.3943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20243340730667114,
|
|
"step": 930,
|
|
"valid_targets_mean": 3649.5,
|
|
"valid_targets_min": 1774
|
|
},
|
|
{
|
|
"epoch": 1.4987971130713713,
|
|
"grad_norm": 0.7021462499744725,
|
|
"learning_rate": 3.844299767983551e-05,
|
|
"loss": 0.3937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1653379201889038,
|
|
"step": 935,
|
|
"valid_targets_mean": 3139.4,
|
|
"valid_targets_min": 1300
|
|
},
|
|
{
|
|
"epoch": 1.5068163592622295,
|
|
"grad_norm": 0.551702554538306,
|
|
"learning_rate": 3.841193545969015e-05,
|
|
"loss": 0.36,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16654658317565918,
|
|
"step": 940,
|
|
"valid_targets_mean": 4287.2,
|
|
"valid_targets_min": 1712
|
|
},
|
|
{
|
|
"epoch": 1.5148356054530874,
|
|
"grad_norm": 0.6123396594792097,
|
|
"learning_rate": 3.8380579249562265e-05,
|
|
"loss": 0.3755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11518816649913788,
|
|
"step": 945,
|
|
"valid_targets_mean": 2894.8,
|
|
"valid_targets_min": 1059
|
|
},
|
|
{
|
|
"epoch": 1.5228548516439455,
|
|
"grad_norm": 0.6187761971179958,
|
|
"learning_rate": 3.8348929550127734e-05,
|
|
"loss": 0.3799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15089167654514313,
|
|
"step": 950,
|
|
"valid_targets_mean": 3219.1,
|
|
"valid_targets_min": 1324
|
|
},
|
|
{
|
|
"epoch": 1.5308740978348037,
|
|
"grad_norm": 0.6758921374968958,
|
|
"learning_rate": 3.831698686674866e-05,
|
|
"loss": 0.366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19325017929077148,
|
|
"step": 955,
|
|
"valid_targets_mean": 3923.2,
|
|
"valid_targets_min": 1380
|
|
},
|
|
{
|
|
"epoch": 1.5388933440256616,
|
|
"grad_norm": 0.553373368309608,
|
|
"learning_rate": 3.828475170946534e-05,
|
|
"loss": 0.3656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17428003251552582,
|
|
"step": 960,
|
|
"valid_targets_mean": 4634.9,
|
|
"valid_targets_min": 1615
|
|
},
|
|
{
|
|
"epoch": 1.5469125902165195,
|
|
"grad_norm": 0.6395790172827835,
|
|
"learning_rate": 3.8252224592988087e-05,
|
|
"loss": 0.3894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18858684599399567,
|
|
"step": 965,
|
|
"valid_targets_mean": 4211.4,
|
|
"valid_targets_min": 2032
|
|
},
|
|
{
|
|
"epoch": 1.5549318364073779,
|
|
"grad_norm": 0.6854114367908858,
|
|
"learning_rate": 3.821940603668906e-05,
|
|
"loss": 0.3721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15687119960784912,
|
|
"step": 970,
|
|
"valid_targets_mean": 3116.8,
|
|
"valid_targets_min": 1047
|
|
},
|
|
{
|
|
"epoch": 1.5629510825982358,
|
|
"grad_norm": 0.7202300491845391,
|
|
"learning_rate": 3.8186296564593924e-05,
|
|
"loss": 0.3819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20200546085834503,
|
|
"step": 975,
|
|
"valid_targets_mean": 3873.4,
|
|
"valid_targets_min": 1518
|
|
},
|
|
{
|
|
"epoch": 1.5709703287890937,
|
|
"grad_norm": 0.6492604424261679,
|
|
"learning_rate": 3.815289670537351e-05,
|
|
"loss": 0.3986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09067234396934509,
|
|
"step": 980,
|
|
"valid_targets_mean": 2164.9,
|
|
"valid_targets_min": 1549
|
|
},
|
|
{
|
|
"epoch": 1.5789895749799518,
|
|
"grad_norm": 0.5755956725155771,
|
|
"learning_rate": 3.811920699233535e-05,
|
|
"loss": 0.3782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24794843792915344,
|
|
"step": 985,
|
|
"valid_targets_mean": 5809.8,
|
|
"valid_targets_min": 1139
|
|
},
|
|
{
|
|
"epoch": 1.58700882117081,
|
|
"grad_norm": 0.6035656260523917,
|
|
"learning_rate": 3.8085227963415186e-05,
|
|
"loss": 0.3526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23764899373054504,
|
|
"step": 990,
|
|
"valid_targets_mean": 4744.6,
|
|
"valid_targets_min": 1419
|
|
},
|
|
{
|
|
"epoch": 1.595028067361668,
|
|
"grad_norm": 0.6279806954292144,
|
|
"learning_rate": 3.805096016116838e-05,
|
|
"loss": 0.4146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1629919707775116,
|
|
"step": 995,
|
|
"valid_targets_mean": 2693.1,
|
|
"valid_targets_min": 1798
|
|
},
|
|
{
|
|
"epoch": 1.603047313552526,
|
|
"grad_norm": 0.7382597775124584,
|
|
"learning_rate": 3.801640413276121e-05,
|
|
"loss": 0.4079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2260105013847351,
|
|
"step": 1000,
|
|
"valid_targets_mean": 2633.6,
|
|
"valid_targets_min": 1706
|
|
},
|
|
{
|
|
"epoch": 1.6110665597433842,
|
|
"grad_norm": 0.573979477466008,
|
|
"learning_rate": 3.7981560429962204e-05,
|
|
"loss": 0.3784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18031983077526093,
|
|
"step": 1005,
|
|
"valid_targets_mean": 4727.4,
|
|
"valid_targets_min": 1945
|
|
},
|
|
{
|
|
"epoch": 1.6190858059342421,
|
|
"grad_norm": 0.5255671835014767,
|
|
"learning_rate": 3.7946429609133274e-05,
|
|
"loss": 0.3751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1784265786409378,
|
|
"step": 1010,
|
|
"valid_targets_mean": 4758.0,
|
|
"valid_targets_min": 1600
|
|
},
|
|
{
|
|
"epoch": 1.6271050521251003,
|
|
"grad_norm": 0.6913493291750783,
|
|
"learning_rate": 3.791101223122084e-05,
|
|
"loss": 0.3687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18905776739120483,
|
|
"step": 1015,
|
|
"valid_targets_mean": 3459.2,
|
|
"valid_targets_min": 1501
|
|
},
|
|
{
|
|
"epoch": 1.6351242983159584,
|
|
"grad_norm": 0.5676146782725685,
|
|
"learning_rate": 3.787530886174688e-05,
|
|
"loss": 0.3933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14749515056610107,
|
|
"step": 1020,
|
|
"valid_targets_mean": 3031.5,
|
|
"valid_targets_min": 1067
|
|
},
|
|
{
|
|
"epoch": 1.6431435445068163,
|
|
"grad_norm": 0.5975286122354355,
|
|
"learning_rate": 3.783932007079992e-05,
|
|
"loss": 0.3832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1427052915096283,
|
|
"step": 1025,
|
|
"valid_targets_mean": 2939.0,
|
|
"valid_targets_min": 1255
|
|
},
|
|
{
|
|
"epoch": 1.6511627906976745,
|
|
"grad_norm": 0.6486448049857164,
|
|
"learning_rate": 3.7803046433025905e-05,
|
|
"loss": 0.3681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1567174643278122,
|
|
"step": 1030,
|
|
"valid_targets_mean": 2957.6,
|
|
"valid_targets_min": 1203
|
|
},
|
|
{
|
|
"epoch": 1.6591820368885326,
|
|
"grad_norm": 0.649120297289315,
|
|
"learning_rate": 3.7766488527619024e-05,
|
|
"loss": 0.384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19277894496917725,
|
|
"step": 1035,
|
|
"valid_targets_mean": 4205.9,
|
|
"valid_targets_min": 1722
|
|
},
|
|
{
|
|
"epoch": 1.6672012830793905,
|
|
"grad_norm": 0.504041788849741,
|
|
"learning_rate": 3.772964693831247e-05,
|
|
"loss": 0.392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20917224884033203,
|
|
"step": 1040,
|
|
"valid_targets_mean": 6204.8,
|
|
"valid_targets_min": 925
|
|
},
|
|
{
|
|
"epoch": 1.6752205292702484,
|
|
"grad_norm": 0.5006556041596575,
|
|
"learning_rate": 3.7692522253369136e-05,
|
|
"loss": 0.4094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1799381971359253,
|
|
"step": 1045,
|
|
"valid_targets_mean": 5656.5,
|
|
"valid_targets_min": 1801
|
|
},
|
|
{
|
|
"epoch": 1.6832397754611068,
|
|
"grad_norm": 0.5355916567478674,
|
|
"learning_rate": 3.7655115065572194e-05,
|
|
"loss": 0.3731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1617688238620758,
|
|
"step": 1050,
|
|
"valid_targets_mean": 5315.2,
|
|
"valid_targets_min": 1195
|
|
},
|
|
{
|
|
"epoch": 1.6912590216519647,
|
|
"grad_norm": 0.6605006405457838,
|
|
"learning_rate": 3.7617425972215626e-05,
|
|
"loss": 0.3847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15565674006938934,
|
|
"step": 1055,
|
|
"valid_targets_mean": 3047.6,
|
|
"valid_targets_min": 991
|
|
},
|
|
{
|
|
"epoch": 1.6992782678428227,
|
|
"grad_norm": 0.5867059394810048,
|
|
"learning_rate": 3.757945557509472e-05,
|
|
"loss": 0.3661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2331041693687439,
|
|
"step": 1060,
|
|
"valid_targets_mean": 5444.6,
|
|
"valid_targets_min": 1102
|
|
},
|
|
{
|
|
"epoch": 1.7072975140336808,
|
|
"grad_norm": 0.5720752116389921,
|
|
"learning_rate": 3.7541204480496444e-05,
|
|
"loss": 0.3727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1441497951745987,
|
|
"step": 1065,
|
|
"valid_targets_mean": 3124.1,
|
|
"valid_targets_min": 501
|
|
},
|
|
{
|
|
"epoch": 1.715316760224539,
|
|
"grad_norm": 0.7595156059225239,
|
|
"learning_rate": 3.7502673299189745e-05,
|
|
"loss": 0.3827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16775290668010712,
|
|
"step": 1070,
|
|
"valid_targets_mean": 2317.6,
|
|
"valid_targets_min": 1106
|
|
},
|
|
{
|
|
"epoch": 1.7233360064153969,
|
|
"grad_norm": 0.5755424388708024,
|
|
"learning_rate": 3.746386264641583e-05,
|
|
"loss": 0.3813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16708099842071533,
|
|
"step": 1075,
|
|
"valid_targets_mean": 3082.5,
|
|
"valid_targets_min": 1554
|
|
},
|
|
{
|
|
"epoch": 1.731355252606255,
|
|
"grad_norm": 0.631158884641858,
|
|
"learning_rate": 3.7424773141878324e-05,
|
|
"loss": 0.3713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15561020374298096,
|
|
"step": 1080,
|
|
"valid_targets_mean": 3028.0,
|
|
"valid_targets_min": 1479
|
|
},
|
|
{
|
|
"epoch": 1.7393744987971131,
|
|
"grad_norm": 0.5965028551998798,
|
|
"learning_rate": 3.738540540973338e-05,
|
|
"loss": 0.3703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25275903940200806,
|
|
"step": 1085,
|
|
"valid_targets_mean": 5063.2,
|
|
"valid_targets_min": 1365
|
|
},
|
|
{
|
|
"epoch": 1.747393744987971,
|
|
"grad_norm": 0.5659192371242109,
|
|
"learning_rate": 3.7345760078579695e-05,
|
|
"loss": 0.3647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16954252123832703,
|
|
"step": 1090,
|
|
"valid_targets_mean": 3610.2,
|
|
"valid_targets_min": 1634
|
|
},
|
|
{
|
|
"epoch": 1.7554129911788292,
|
|
"grad_norm": 0.5069380849543149,
|
|
"learning_rate": 3.730583778144852e-05,
|
|
"loss": 0.3525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1400984525680542,
|
|
"step": 1095,
|
|
"valid_targets_mean": 3182.9,
|
|
"valid_targets_min": 1170
|
|
},
|
|
{
|
|
"epoch": 1.7634322373696873,
|
|
"grad_norm": 0.605006474711256,
|
|
"learning_rate": 3.7265639155793494e-05,
|
|
"loss": 0.4027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18291226029396057,
|
|
"step": 1100,
|
|
"valid_targets_mean": 3794.0,
|
|
"valid_targets_min": 1094
|
|
},
|
|
{
|
|
"epoch": 1.7714514835605453,
|
|
"grad_norm": 0.574090621545591,
|
|
"learning_rate": 3.7225164843480503e-05,
|
|
"loss": 0.3613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15381957590579987,
|
|
"step": 1105,
|
|
"valid_targets_mean": 3089.6,
|
|
"valid_targets_min": 907
|
|
},
|
|
{
|
|
"epoch": 1.7794707297514034,
|
|
"grad_norm": 0.6430771990270066,
|
|
"learning_rate": 3.7184415490777426e-05,
|
|
"loss": 0.387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21809989213943481,
|
|
"step": 1110,
|
|
"valid_targets_mean": 4406.8,
|
|
"valid_targets_min": 1385
|
|
},
|
|
{
|
|
"epoch": 1.7874899759422616,
|
|
"grad_norm": 0.6411894477736148,
|
|
"learning_rate": 3.714339174834379e-05,
|
|
"loss": 0.3764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19796337187290192,
|
|
"step": 1115,
|
|
"valid_targets_mean": 4073.0,
|
|
"valid_targets_min": 1535
|
|
},
|
|
{
|
|
"epoch": 1.7955092221331195,
|
|
"grad_norm": 0.6562575782675708,
|
|
"learning_rate": 3.710209427122044e-05,
|
|
"loss": 0.3726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16205215454101562,
|
|
"step": 1120,
|
|
"valid_targets_mean": 2719.2,
|
|
"valid_targets_min": 1448
|
|
},
|
|
{
|
|
"epoch": 1.8035284683239774,
|
|
"grad_norm": 0.6609794155101337,
|
|
"learning_rate": 3.7060523718819e-05,
|
|
"loss": 0.3969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31307533383369446,
|
|
"step": 1125,
|
|
"valid_targets_mean": 5189.6,
|
|
"valid_targets_min": 1513
|
|
},
|
|
{
|
|
"epoch": 1.8115477145148358,
|
|
"grad_norm": 0.8729671860828194,
|
|
"learning_rate": 3.701868075491139e-05,
|
|
"loss": 0.3853,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20690149068832397,
|
|
"step": 1130,
|
|
"valid_targets_mean": 3036.1,
|
|
"valid_targets_min": 1501
|
|
},
|
|
{
|
|
"epoch": 1.8195669607056937,
|
|
"grad_norm": 0.5347078058683018,
|
|
"learning_rate": 3.697656604761926e-05,
|
|
"loss": 0.3565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12335579097270966,
|
|
"step": 1135,
|
|
"valid_targets_mean": 2996.4,
|
|
"valid_targets_min": 977
|
|
},
|
|
{
|
|
"epoch": 1.8275862068965516,
|
|
"grad_norm": 2.15564282147594,
|
|
"learning_rate": 3.693418026940325e-05,
|
|
"loss": 0.377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19620174169540405,
|
|
"step": 1140,
|
|
"valid_targets_mean": 3806.0,
|
|
"valid_targets_min": 915
|
|
},
|
|
{
|
|
"epoch": 1.8356054530874097,
|
|
"grad_norm": 0.8361541825039279,
|
|
"learning_rate": 3.689152409705229e-05,
|
|
"loss": 0.3795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20147132873535156,
|
|
"step": 1145,
|
|
"valid_targets_mean": 2509.1,
|
|
"valid_targets_min": 1137
|
|
},
|
|
{
|
|
"epoch": 1.8436246992782679,
|
|
"grad_norm": 0.5485386517037776,
|
|
"learning_rate": 3.6848598211672794e-05,
|
|
"loss": 0.3381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14361560344696045,
|
|
"step": 1150,
|
|
"valid_targets_mean": 3387.9,
|
|
"valid_targets_min": 1678
|
|
},
|
|
{
|
|
"epoch": 1.8516439454691258,
|
|
"grad_norm": 0.523212489254307,
|
|
"learning_rate": 3.6805403298677797e-05,
|
|
"loss": 0.3892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1377015858888626,
|
|
"step": 1155,
|
|
"valid_targets_mean": 4157.6,
|
|
"valid_targets_min": 1320
|
|
},
|
|
{
|
|
"epoch": 1.859663191659984,
|
|
"grad_norm": 0.5805720706667392,
|
|
"learning_rate": 3.6761940047775966e-05,
|
|
"loss": 0.3698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18016770482063293,
|
|
"step": 1160,
|
|
"valid_targets_mean": 5156.2,
|
|
"valid_targets_min": 1920
|
|
},
|
|
{
|
|
"epoch": 1.867682437850842,
|
|
"grad_norm": 0.7073699077015825,
|
|
"learning_rate": 3.671820915296063e-05,
|
|
"loss": 0.3782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12544605135917664,
|
|
"step": 1165,
|
|
"valid_targets_mean": 2071.5,
|
|
"valid_targets_min": 1669
|
|
},
|
|
{
|
|
"epoch": 1.8757016840417,
|
|
"grad_norm": 0.5785725133606808,
|
|
"learning_rate": 3.667421131249869e-05,
|
|
"loss": 0.3971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.143942728638649,
|
|
"step": 1170,
|
|
"valid_targets_mean": 4040.6,
|
|
"valid_targets_min": 1152
|
|
},
|
|
{
|
|
"epoch": 1.8837209302325582,
|
|
"grad_norm": 0.7097749219675242,
|
|
"learning_rate": 3.662994722891946e-05,
|
|
"loss": 0.384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1935431808233261,
|
|
"step": 1175,
|
|
"valid_targets_mean": 2891.0,
|
|
"valid_targets_min": 1207
|
|
},
|
|
{
|
|
"epoch": 1.8917401764234163,
|
|
"grad_norm": 0.5737892028472221,
|
|
"learning_rate": 3.658541760900344e-05,
|
|
"loss": 0.3467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16055023670196533,
|
|
"step": 1180,
|
|
"valid_targets_mean": 3619.0,
|
|
"valid_targets_min": 1179
|
|
},
|
|
{
|
|
"epoch": 1.8997594226142742,
|
|
"grad_norm": 0.7232340601273627,
|
|
"learning_rate": 3.654062316377106e-05,
|
|
"loss": 0.4098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10650050640106201,
|
|
"step": 1185,
|
|
"valid_targets_mean": 1760.5,
|
|
"valid_targets_min": 1013
|
|
},
|
|
{
|
|
"epoch": 1.9077786688051324,
|
|
"grad_norm": 0.6443220988369217,
|
|
"learning_rate": 3.649556460847131e-05,
|
|
"loss": 0.3532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14743563532829285,
|
|
"step": 1190,
|
|
"valid_targets_mean": 3203.2,
|
|
"valid_targets_min": 1358
|
|
},
|
|
{
|
|
"epoch": 1.9157979149959905,
|
|
"grad_norm": 0.5582690367284744,
|
|
"learning_rate": 3.6450242662570314e-05,
|
|
"loss": 0.3863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19748684763908386,
|
|
"step": 1195,
|
|
"valid_targets_mean": 4101.1,
|
|
"valid_targets_min": 924
|
|
},
|
|
{
|
|
"epoch": 1.9238171611868484,
|
|
"grad_norm": 0.6001754112517981,
|
|
"learning_rate": 3.6404658049739854e-05,
|
|
"loss": 0.3866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13483411073684692,
|
|
"step": 1200,
|
|
"valid_targets_mean": 3079.5,
|
|
"valid_targets_min": 2280
|
|
},
|
|
{
|
|
"epoch": 1.9318364073777063,
|
|
"grad_norm": 0.5966029488469835,
|
|
"learning_rate": 3.63588114978458e-05,
|
|
"loss": 0.3956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1857229471206665,
|
|
"step": 1205,
|
|
"valid_targets_mean": 3873.2,
|
|
"valid_targets_min": 1460
|
|
},
|
|
{
|
|
"epoch": 1.9398556535685647,
|
|
"grad_norm": 0.6168496251026493,
|
|
"learning_rate": 3.6312703738936504e-05,
|
|
"loss": 0.356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13357146084308624,
|
|
"step": 1210,
|
|
"valid_targets_mean": 2732.0,
|
|
"valid_targets_min": 1303
|
|
},
|
|
{
|
|
"epoch": 1.9478748997594226,
|
|
"grad_norm": 0.6311909475544676,
|
|
"learning_rate": 3.626633550923111e-05,
|
|
"loss": 0.3864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22280068695545197,
|
|
"step": 1215,
|
|
"valid_targets_mean": 4697.6,
|
|
"valid_targets_min": 1197
|
|
},
|
|
{
|
|
"epoch": 1.9558941459502805,
|
|
"grad_norm": 0.6330802569242555,
|
|
"learning_rate": 3.621970754910778e-05,
|
|
"loss": 0.3879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17384251952171326,
|
|
"step": 1220,
|
|
"valid_targets_mean": 3419.2,
|
|
"valid_targets_min": 1113
|
|
},
|
|
{
|
|
"epoch": 1.9639133921411387,
|
|
"grad_norm": 0.6832101537550106,
|
|
"learning_rate": 3.6172820603091885e-05,
|
|
"loss": 0.3898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23188313841819763,
|
|
"step": 1225,
|
|
"valid_targets_mean": 5003.0,
|
|
"valid_targets_min": 1552
|
|
},
|
|
{
|
|
"epoch": 1.9719326383319968,
|
|
"grad_norm": 0.5453371948123984,
|
|
"learning_rate": 3.612567541984413e-05,
|
|
"loss": 0.3592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1720346063375473,
|
|
"step": 1230,
|
|
"valid_targets_mean": 4566.8,
|
|
"valid_targets_min": 1636
|
|
},
|
|
{
|
|
"epoch": 1.9799518845228548,
|
|
"grad_norm": 0.6263399586173676,
|
|
"learning_rate": 3.6078272752148574e-05,
|
|
"loss": 0.3594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18737949430942535,
|
|
"step": 1235,
|
|
"valid_targets_mean": 4321.5,
|
|
"valid_targets_min": 1472
|
|
},
|
|
{
|
|
"epoch": 1.987971130713713,
|
|
"grad_norm": 0.627134594554725,
|
|
"learning_rate": 3.6030613356900635e-05,
|
|
"loss": 0.3852,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22431224584579468,
|
|
"step": 1240,
|
|
"valid_targets_mean": 4424.0,
|
|
"valid_targets_min": 1391
|
|
},
|
|
{
|
|
"epoch": 1.995990376904571,
|
|
"grad_norm": 0.5993458413134857,
|
|
"learning_rate": 3.598269799509498e-05,
|
|
"loss": 0.3682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20022183656692505,
|
|
"step": 1245,
|
|
"valid_targets_mean": 4605.9,
|
|
"valid_targets_min": 1823
|
|
},
|
|
{
|
|
"epoch": 2.003207698476343,
|
|
"grad_norm": 0.5663208864221956,
|
|
"learning_rate": 3.5934527431813385e-05,
|
|
"loss": 0.3698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12005630880594254,
|
|
"step": 1250,
|
|
"valid_targets_mean": 2945.0,
|
|
"valid_targets_min": 1420
|
|
},
|
|
{
|
|
"epoch": 2.011226944667201,
|
|
"grad_norm": 0.8872549889160983,
|
|
"learning_rate": 3.5886102436212536e-05,
|
|
"loss": 0.3523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22787944972515106,
|
|
"step": 1255,
|
|
"valid_targets_mean": 2892.1,
|
|
"valid_targets_min": 1151
|
|
},
|
|
{
|
|
"epoch": 2.0192461908580595,
|
|
"grad_norm": 0.70325046246873,
|
|
"learning_rate": 3.583742378151171e-05,
|
|
"loss": 0.3569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09715551137924194,
|
|
"step": 1260,
|
|
"valid_targets_mean": 1930.8,
|
|
"valid_targets_min": 1236
|
|
},
|
|
{
|
|
"epoch": 2.0272654370489174,
|
|
"grad_norm": 0.45173903036506174,
|
|
"learning_rate": 3.5788492244980464e-05,
|
|
"loss": 0.3134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15271684527397156,
|
|
"step": 1265,
|
|
"valid_targets_mean": 5269.2,
|
|
"valid_targets_min": 1086
|
|
},
|
|
{
|
|
"epoch": 2.0352846832397753,
|
|
"grad_norm": 0.6267349391058774,
|
|
"learning_rate": 3.573930860792621e-05,
|
|
"loss": 0.3587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12874770164489746,
|
|
"step": 1270,
|
|
"valid_targets_mean": 3163.2,
|
|
"valid_targets_min": 1708
|
|
},
|
|
{
|
|
"epoch": 2.0433039294306337,
|
|
"grad_norm": 0.6511298081327664,
|
|
"learning_rate": 3.568987365568173e-05,
|
|
"loss": 0.3162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15595881640911102,
|
|
"step": 1275,
|
|
"valid_targets_mean": 4213.8,
|
|
"valid_targets_min": 2064
|
|
},
|
|
{
|
|
"epoch": 2.0513231756214916,
|
|
"grad_norm": 0.6887412815570042,
|
|
"learning_rate": 3.564018817759266e-05,
|
|
"loss": 0.3608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19574494659900665,
|
|
"step": 1280,
|
|
"valid_targets_mean": 3699.8,
|
|
"valid_targets_min": 1734
|
|
},
|
|
{
|
|
"epoch": 2.0593424218123495,
|
|
"grad_norm": 0.5876985128109127,
|
|
"learning_rate": 3.559025296700484e-05,
|
|
"loss": 0.3442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2059636414051056,
|
|
"step": 1285,
|
|
"valid_targets_mean": 5620.4,
|
|
"valid_targets_min": 1040
|
|
},
|
|
{
|
|
"epoch": 2.067361668003208,
|
|
"grad_norm": 0.6489766733954635,
|
|
"learning_rate": 3.554006882125173e-05,
|
|
"loss": 0.3468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14607182145118713,
|
|
"step": 1290,
|
|
"valid_targets_mean": 3392.2,
|
|
"valid_targets_min": 1345
|
|
},
|
|
{
|
|
"epoch": 2.075380914194066,
|
|
"grad_norm": 0.6998589814788587,
|
|
"learning_rate": 3.5489636541641586e-05,
|
|
"loss": 0.3531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25323814153671265,
|
|
"step": 1295,
|
|
"valid_targets_mean": 4242.2,
|
|
"valid_targets_min": 1980
|
|
},
|
|
{
|
|
"epoch": 2.0834001603849237,
|
|
"grad_norm": 0.6809616848578639,
|
|
"learning_rate": 3.543895693344472e-05,
|
|
"loss": 0.3421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26932215690612793,
|
|
"step": 1300,
|
|
"valid_targets_mean": 6057.2,
|
|
"valid_targets_min": 1100
|
|
},
|
|
{
|
|
"epoch": 2.091419406575782,
|
|
"grad_norm": 0.7197676582601361,
|
|
"learning_rate": 3.538803080588063e-05,
|
|
"loss": 0.3771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13603916764259338,
|
|
"step": 1305,
|
|
"valid_targets_mean": 2857.4,
|
|
"valid_targets_min": 1126
|
|
},
|
|
{
|
|
"epoch": 2.09943865276664,
|
|
"grad_norm": 0.5586827477126696,
|
|
"learning_rate": 3.5336858972105076e-05,
|
|
"loss": 0.3465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16107013821601868,
|
|
"step": 1310,
|
|
"valid_targets_mean": 5763.4,
|
|
"valid_targets_min": 912
|
|
},
|
|
{
|
|
"epoch": 2.107457898957498,
|
|
"grad_norm": 0.6466104512256226,
|
|
"learning_rate": 3.528544224919708e-05,
|
|
"loss": 0.3284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1799907684326172,
|
|
"step": 1315,
|
|
"valid_targets_mean": 4982.8,
|
|
"valid_targets_min": 1556
|
|
},
|
|
{
|
|
"epoch": 2.115477145148356,
|
|
"grad_norm": 0.7350409668585496,
|
|
"learning_rate": 3.5233781458145934e-05,
|
|
"loss": 0.368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22638684511184692,
|
|
"step": 1320,
|
|
"valid_targets_mean": 4702.5,
|
|
"valid_targets_min": 1729
|
|
},
|
|
{
|
|
"epoch": 2.123496391339214,
|
|
"grad_norm": 0.7064124825894162,
|
|
"learning_rate": 3.5181877423838034e-05,
|
|
"loss": 0.3549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14431232213974,
|
|
"step": 1325,
|
|
"valid_targets_mean": 2504.0,
|
|
"valid_targets_min": 1463
|
|
},
|
|
{
|
|
"epoch": 2.131515637530072,
|
|
"grad_norm": 0.6170135621678351,
|
|
"learning_rate": 3.512973097504371e-05,
|
|
"loss": 0.3395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1600828617811203,
|
|
"step": 1330,
|
|
"valid_targets_mean": 4526.1,
|
|
"valid_targets_min": 1870
|
|
},
|
|
{
|
|
"epoch": 2.13953488372093,
|
|
"grad_norm": 0.6334913259623194,
|
|
"learning_rate": 3.507734294440403e-05,
|
|
"loss": 0.3453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22223691642284393,
|
|
"step": 1335,
|
|
"valid_targets_mean": 3722.9,
|
|
"valid_targets_min": 1819
|
|
},
|
|
{
|
|
"epoch": 2.1475541299117884,
|
|
"grad_norm": 0.5896282676984603,
|
|
"learning_rate": 3.50247141684175e-05,
|
|
"loss": 0.3314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1323956400156021,
|
|
"step": 1340,
|
|
"valid_targets_mean": 3185.5,
|
|
"valid_targets_min": 1339
|
|
},
|
|
{
|
|
"epoch": 2.1555733761026463,
|
|
"grad_norm": 0.7760657508108424,
|
|
"learning_rate": 3.497184548742667e-05,
|
|
"loss": 0.3643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19285187125205994,
|
|
"step": 1345,
|
|
"valid_targets_mean": 2632.2,
|
|
"valid_targets_min": 1300
|
|
},
|
|
{
|
|
"epoch": 2.1635926222935042,
|
|
"grad_norm": 0.6467658716847074,
|
|
"learning_rate": 3.491873774560473e-05,
|
|
"loss": 0.3383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1351018100976944,
|
|
"step": 1350,
|
|
"valid_targets_mean": 3104.2,
|
|
"valid_targets_min": 1488
|
|
},
|
|
{
|
|
"epoch": 2.1716118684843626,
|
|
"grad_norm": 0.6572971280106079,
|
|
"learning_rate": 3.486539179094208e-05,
|
|
"loss": 0.3467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19534534215927124,
|
|
"step": 1355,
|
|
"valid_targets_mean": 4289.0,
|
|
"valid_targets_min": 954
|
|
},
|
|
{
|
|
"epoch": 2.1796311146752205,
|
|
"grad_norm": 0.576102818448099,
|
|
"learning_rate": 3.481180847523272e-05,
|
|
"loss": 0.3471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10375870019197464,
|
|
"step": 1360,
|
|
"valid_targets_mean": 3130.5,
|
|
"valid_targets_min": 2285
|
|
},
|
|
{
|
|
"epoch": 2.1876503608660784,
|
|
"grad_norm": 0.5976408294352582,
|
|
"learning_rate": 3.4757988654060684e-05,
|
|
"loss": 0.3322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22457639873027802,
|
|
"step": 1365,
|
|
"valid_targets_mean": 4272.2,
|
|
"valid_targets_min": 1063
|
|
},
|
|
{
|
|
"epoch": 2.195669607056937,
|
|
"grad_norm": 0.6876863275492887,
|
|
"learning_rate": 3.470393318678637e-05,
|
|
"loss": 0.3545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13346576690673828,
|
|
"step": 1370,
|
|
"valid_targets_mean": 3217.4,
|
|
"valid_targets_min": 1409
|
|
},
|
|
{
|
|
"epoch": 2.2036888532477947,
|
|
"grad_norm": 0.5667005128855648,
|
|
"learning_rate": 3.4649642936532836e-05,
|
|
"loss": 0.3661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1915643811225891,
|
|
"step": 1375,
|
|
"valid_targets_mean": 4325.4,
|
|
"valid_targets_min": 1274
|
|
},
|
|
{
|
|
"epoch": 2.2117080994386527,
|
|
"grad_norm": 0.6805553682250962,
|
|
"learning_rate": 3.4595118770171984e-05,
|
|
"loss": 0.3608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16712094843387604,
|
|
"step": 1380,
|
|
"valid_targets_mean": 3757.0,
|
|
"valid_targets_min": 1767
|
|
},
|
|
{
|
|
"epoch": 2.219727345629511,
|
|
"grad_norm": 0.5851592379232585,
|
|
"learning_rate": 3.454036155831077e-05,
|
|
"loss": 0.3684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18492160737514496,
|
|
"step": 1385,
|
|
"valid_targets_mean": 4730.1,
|
|
"valid_targets_min": 1819
|
|
},
|
|
{
|
|
"epoch": 2.227746591820369,
|
|
"grad_norm": 0.5941379064666082,
|
|
"learning_rate": 3.4485372175277236e-05,
|
|
"loss": 0.3378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17360571026802063,
|
|
"step": 1390,
|
|
"valid_targets_mean": 4303.4,
|
|
"valid_targets_min": 1718
|
|
},
|
|
{
|
|
"epoch": 2.235765838011227,
|
|
"grad_norm": 0.7120870875424696,
|
|
"learning_rate": 3.44301514991066e-05,
|
|
"loss": 0.3575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1876790076494217,
|
|
"step": 1395,
|
|
"valid_targets_mean": 3447.5,
|
|
"valid_targets_min": 1887
|
|
},
|
|
{
|
|
"epoch": 2.2437850842020852,
|
|
"grad_norm": 0.8987849319173,
|
|
"learning_rate": 3.4374700411527225e-05,
|
|
"loss": 0.3516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1342727690935135,
|
|
"step": 1400,
|
|
"valid_targets_mean": 2107.1,
|
|
"valid_targets_min": 1157
|
|
},
|
|
{
|
|
"epoch": 2.251804330392943,
|
|
"grad_norm": 0.7207343049659005,
|
|
"learning_rate": 3.431901979794653e-05,
|
|
"loss": 0.3784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14748650789260864,
|
|
"step": 1405,
|
|
"valid_targets_mean": 4076.9,
|
|
"valid_targets_min": 2404
|
|
},
|
|
{
|
|
"epoch": 2.259823576583801,
|
|
"grad_norm": 0.646268203387511,
|
|
"learning_rate": 3.426311054743685e-05,
|
|
"loss": 0.3207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17233049869537354,
|
|
"step": 1410,
|
|
"valid_targets_mean": 3841.1,
|
|
"valid_targets_min": 1958
|
|
},
|
|
{
|
|
"epoch": 2.267842822774659,
|
|
"grad_norm": 0.6555687819002504,
|
|
"learning_rate": 3.420697355272127e-05,
|
|
"loss": 0.3485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21643511950969696,
|
|
"step": 1415,
|
|
"valid_targets_mean": 4472.8,
|
|
"valid_targets_min": 988
|
|
},
|
|
{
|
|
"epoch": 2.2758620689655173,
|
|
"grad_norm": 0.6365450212470497,
|
|
"learning_rate": 3.415060971015933e-05,
|
|
"loss": 0.3425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15613329410552979,
|
|
"step": 1420,
|
|
"valid_targets_mean": 3359.0,
|
|
"valid_targets_min": 1872
|
|
},
|
|
{
|
|
"epoch": 2.2838813151563753,
|
|
"grad_norm": 0.5472941463259734,
|
|
"learning_rate": 3.4094019919732736e-05,
|
|
"loss": 0.3582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18104006350040436,
|
|
"step": 1425,
|
|
"valid_targets_mean": 3852.8,
|
|
"valid_targets_min": 1580
|
|
},
|
|
{
|
|
"epoch": 2.291900561347233,
|
|
"grad_norm": 0.6778575739718365,
|
|
"learning_rate": 3.403720508503098e-05,
|
|
"loss": 0.3188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11832959949970245,
|
|
"step": 1430,
|
|
"valid_targets_mean": 2429.9,
|
|
"valid_targets_min": 1140
|
|
},
|
|
{
|
|
"epoch": 2.2999198075380916,
|
|
"grad_norm": 0.7046598918064869,
|
|
"learning_rate": 3.398016611323693e-05,
|
|
"loss": 0.3428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1872364580631256,
|
|
"step": 1435,
|
|
"valid_targets_mean": 2780.0,
|
|
"valid_targets_min": 989
|
|
},
|
|
{
|
|
"epoch": 2.3079390537289495,
|
|
"grad_norm": 0.5903200079092314,
|
|
"learning_rate": 3.392290391511232e-05,
|
|
"loss": 0.3376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24641390144824982,
|
|
"step": 1440,
|
|
"valid_targets_mean": 5554.1,
|
|
"valid_targets_min": 1395
|
|
},
|
|
{
|
|
"epoch": 2.3159582999198074,
|
|
"grad_norm": 0.5877596462721397,
|
|
"learning_rate": 3.386541940498322e-05,
|
|
"loss": 0.3471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20738443732261658,
|
|
"step": 1445,
|
|
"valid_targets_mean": 5463.1,
|
|
"valid_targets_min": 1245
|
|
},
|
|
{
|
|
"epoch": 2.3239775461106658,
|
|
"grad_norm": 0.518772079301355,
|
|
"learning_rate": 3.380771350072543e-05,
|
|
"loss": 0.3246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14182378351688385,
|
|
"step": 1450,
|
|
"valid_targets_mean": 4598.1,
|
|
"valid_targets_min": 1677
|
|
},
|
|
{
|
|
"epoch": 2.3319967923015237,
|
|
"grad_norm": 0.6214771748168114,
|
|
"learning_rate": 3.374978712374986e-05,
|
|
"loss": 0.3465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1580396294593811,
|
|
"step": 1455,
|
|
"valid_targets_mean": 3403.0,
|
|
"valid_targets_min": 804
|
|
},
|
|
{
|
|
"epoch": 2.3400160384923816,
|
|
"grad_norm": 0.6601543484544395,
|
|
"learning_rate": 3.369164119898774e-05,
|
|
"loss": 0.3465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1316310465335846,
|
|
"step": 1460,
|
|
"valid_targets_mean": 2555.2,
|
|
"valid_targets_min": 708
|
|
},
|
|
{
|
|
"epoch": 2.34803528468324,
|
|
"grad_norm": 0.5865229505252662,
|
|
"learning_rate": 3.363327665487593e-05,
|
|
"loss": 0.3555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1241571456193924,
|
|
"step": 1465,
|
|
"valid_targets_mean": 2880.8,
|
|
"valid_targets_min": 1233
|
|
},
|
|
{
|
|
"epoch": 2.356054530874098,
|
|
"grad_norm": 0.5980685298620095,
|
|
"learning_rate": 3.357469442334206e-05,
|
|
"loss": 0.3669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2264668047428131,
|
|
"step": 1470,
|
|
"valid_targets_mean": 6163.9,
|
|
"valid_targets_min": 1401
|
|
},
|
|
{
|
|
"epoch": 2.364073777064956,
|
|
"grad_norm": 0.6627796109058691,
|
|
"learning_rate": 3.351589543978965e-05,
|
|
"loss": 0.3481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2296164184808731,
|
|
"step": 1475,
|
|
"valid_targets_mean": 3652.5,
|
|
"valid_targets_min": 1154
|
|
},
|
|
{
|
|
"epoch": 2.3720930232558137,
|
|
"grad_norm": 0.5294058440362729,
|
|
"learning_rate": 3.345688064308317e-05,
|
|
"loss": 0.3283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09211620688438416,
|
|
"step": 1480,
|
|
"valid_targets_mean": 1971.9,
|
|
"valid_targets_min": 1273
|
|
},
|
|
{
|
|
"epoch": 2.380112269446672,
|
|
"grad_norm": 0.5811757127018823,
|
|
"learning_rate": 3.339765097553307e-05,
|
|
"loss": 0.3275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1387772560119629,
|
|
"step": 1485,
|
|
"valid_targets_mean": 4018.6,
|
|
"valid_targets_min": 1418
|
|
},
|
|
{
|
|
"epoch": 2.38813151563753,
|
|
"grad_norm": 0.6531008698660362,
|
|
"learning_rate": 3.33382073828807e-05,
|
|
"loss": 0.3569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2030208259820938,
|
|
"step": 1490,
|
|
"valid_targets_mean": 3304.8,
|
|
"valid_targets_min": 345
|
|
},
|
|
{
|
|
"epoch": 2.3961507618283884,
|
|
"grad_norm": 0.5940837552451501,
|
|
"learning_rate": 3.327855081428326e-05,
|
|
"loss": 0.3315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11011539399623871,
|
|
"step": 1495,
|
|
"valid_targets_mean": 2390.4,
|
|
"valid_targets_min": 1365
|
|
},
|
|
{
|
|
"epoch": 2.4041700080192463,
|
|
"grad_norm": 0.6349081771533944,
|
|
"learning_rate": 3.3218682222298584e-05,
|
|
"loss": 0.3297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17606303095817566,
|
|
"step": 1500,
|
|
"valid_targets_mean": 4131.0,
|
|
"valid_targets_min": 1191
|
|
},
|
|
{
|
|
"epoch": 2.412189254210104,
|
|
"grad_norm": 0.6551570262937156,
|
|
"learning_rate": 3.315860256286996e-05,
|
|
"loss": 0.3444,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1725284457206726,
|
|
"step": 1505,
|
|
"valid_targets_mean": 3694.4,
|
|
"valid_targets_min": 1828
|
|
},
|
|
{
|
|
"epoch": 2.420208500400962,
|
|
"grad_norm": 0.576066249468684,
|
|
"learning_rate": 3.3098312795310894e-05,
|
|
"loss": 0.3592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11704175174236298,
|
|
"step": 1510,
|
|
"valid_targets_mean": 3280.1,
|
|
"valid_targets_min": 2248
|
|
},
|
|
{
|
|
"epoch": 2.4282277465918205,
|
|
"grad_norm": 0.5960437561557996,
|
|
"learning_rate": 3.303781388228974e-05,
|
|
"loss": 0.3464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1958293616771698,
|
|
"step": 1515,
|
|
"valid_targets_mean": 4049.8,
|
|
"valid_targets_min": 1618
|
|
},
|
|
{
|
|
"epoch": 2.4362469927826784,
|
|
"grad_norm": 0.8307745047978246,
|
|
"learning_rate": 3.297710678981435e-05,
|
|
"loss": 0.3123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1351197361946106,
|
|
"step": 1520,
|
|
"valid_targets_mean": 2351.6,
|
|
"valid_targets_min": 1405
|
|
},
|
|
{
|
|
"epoch": 2.4442662389735363,
|
|
"grad_norm": 0.6476048564369427,
|
|
"learning_rate": 3.291619248721667e-05,
|
|
"loss": 0.3319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21313653886318207,
|
|
"step": 1525,
|
|
"valid_targets_mean": 3757.8,
|
|
"valid_targets_min": 1368
|
|
},
|
|
{
|
|
"epoch": 2.4522854851643947,
|
|
"grad_norm": 0.6101060365763258,
|
|
"learning_rate": 3.285507194713724e-05,
|
|
"loss": 0.3467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19842976331710815,
|
|
"step": 1530,
|
|
"valid_targets_mean": 4398.0,
|
|
"valid_targets_min": 1107
|
|
},
|
|
{
|
|
"epoch": 2.4603047313552526,
|
|
"grad_norm": 0.5916014914557419,
|
|
"learning_rate": 3.279374614550966e-05,
|
|
"loss": 0.3277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13822132349014282,
|
|
"step": 1535,
|
|
"valid_targets_mean": 3856.4,
|
|
"valid_targets_min": 1457
|
|
},
|
|
{
|
|
"epoch": 2.4683239775461105,
|
|
"grad_norm": 0.6072793114889506,
|
|
"learning_rate": 3.2732216061545e-05,
|
|
"loss": 0.3367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20727184414863586,
|
|
"step": 1540,
|
|
"valid_targets_mean": 4482.2,
|
|
"valid_targets_min": 1559
|
|
},
|
|
{
|
|
"epoch": 2.476343223736969,
|
|
"grad_norm": 0.5182531096963034,
|
|
"learning_rate": 3.2670482677716214e-05,
|
|
"loss": 0.3488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1823084056377411,
|
|
"step": 1545,
|
|
"valid_targets_mean": 6600.6,
|
|
"valid_targets_min": 1566
|
|
},
|
|
{
|
|
"epoch": 2.484362469927827,
|
|
"grad_norm": 0.6588849437987316,
|
|
"learning_rate": 3.2608546979742394e-05,
|
|
"loss": 0.3583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2547794282436371,
|
|
"step": 1550,
|
|
"valid_targets_mean": 4279.4,
|
|
"valid_targets_min": 1003
|
|
},
|
|
{
|
|
"epoch": 2.4923817161186848,
|
|
"grad_norm": 0.5210067181618686,
|
|
"learning_rate": 3.254640995657307e-05,
|
|
"loss": 0.3334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12388885021209717,
|
|
"step": 1555,
|
|
"valid_targets_mean": 3967.8,
|
|
"valid_targets_min": 1582
|
|
},
|
|
{
|
|
"epoch": 2.500400962309543,
|
|
"grad_norm": 0.6675420827290998,
|
|
"learning_rate": 3.248407260037239e-05,
|
|
"loss": 0.3419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17942309379577637,
|
|
"step": 1560,
|
|
"valid_targets_mean": 4615.5,
|
|
"valid_targets_min": 1587
|
|
},
|
|
{
|
|
"epoch": 2.508420208500401,
|
|
"grad_norm": 0.7303300679097665,
|
|
"learning_rate": 3.24215359065033e-05,
|
|
"loss": 0.3565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2134750932455063,
|
|
"step": 1565,
|
|
"valid_targets_mean": 3633.2,
|
|
"valid_targets_min": 1707
|
|
},
|
|
{
|
|
"epoch": 2.516439454691259,
|
|
"grad_norm": 0.6444335496667586,
|
|
"learning_rate": 3.235880087351164e-05,
|
|
"loss": 0.3555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23998838663101196,
|
|
"step": 1570,
|
|
"valid_targets_mean": 5089.2,
|
|
"valid_targets_min": 1714
|
|
},
|
|
{
|
|
"epoch": 2.524458700882117,
|
|
"grad_norm": 0.5748610625680467,
|
|
"learning_rate": 3.2295868503110184e-05,
|
|
"loss": 0.34,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2552739977836609,
|
|
"step": 1575,
|
|
"valid_targets_mean": 6235.0,
|
|
"valid_targets_min": 1689
|
|
},
|
|
{
|
|
"epoch": 2.5324779470729752,
|
|
"grad_norm": 0.6462216645868455,
|
|
"learning_rate": 3.22327398001627e-05,
|
|
"loss": 0.3605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16141477227210999,
|
|
"step": 1580,
|
|
"valid_targets_mean": 2878.8,
|
|
"valid_targets_min": 1170
|
|
},
|
|
{
|
|
"epoch": 2.540497193263833,
|
|
"grad_norm": 0.5609462119008043,
|
|
"learning_rate": 3.216941577266783e-05,
|
|
"loss": 0.3695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2184831202030182,
|
|
"step": 1585,
|
|
"valid_targets_mean": 6295.1,
|
|
"valid_targets_min": 1501
|
|
},
|
|
{
|
|
"epoch": 2.5485164394546915,
|
|
"grad_norm": 0.4957075570354505,
|
|
"learning_rate": 3.210589743174308e-05,
|
|
"loss": 0.327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2078409641981125,
|
|
"step": 1590,
|
|
"valid_targets_mean": 7591.4,
|
|
"valid_targets_min": 1955
|
|
},
|
|
{
|
|
"epoch": 2.5565356856455494,
|
|
"grad_norm": 0.6463964641560773,
|
|
"learning_rate": 3.204218579160857e-05,
|
|
"loss": 0.3293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24058961868286133,
|
|
"step": 1595,
|
|
"valid_targets_mean": 5036.2,
|
|
"valid_targets_min": 1200
|
|
},
|
|
{
|
|
"epoch": 2.5645549318364074,
|
|
"grad_norm": 0.5516304479142902,
|
|
"learning_rate": 3.197828186957094e-05,
|
|
"loss": 0.3367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1926528811454773,
|
|
"step": 1600,
|
|
"valid_targets_mean": 5438.1,
|
|
"valid_targets_min": 1771
|
|
},
|
|
{
|
|
"epoch": 2.5725741780272653,
|
|
"grad_norm": 0.5248474906462943,
|
|
"learning_rate": 3.191418668600705e-05,
|
|
"loss": 0.3413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15956036746501923,
|
|
"step": 1605,
|
|
"valid_targets_mean": 5140.5,
|
|
"valid_targets_min": 1973
|
|
},
|
|
{
|
|
"epoch": 2.5805934242181237,
|
|
"grad_norm": 0.6278892330122797,
|
|
"learning_rate": 3.184990126434771e-05,
|
|
"loss": 0.3503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21146860718727112,
|
|
"step": 1610,
|
|
"valid_targets_mean": 3795.6,
|
|
"valid_targets_min": 1006
|
|
},
|
|
{
|
|
"epoch": 2.5886126704089816,
|
|
"grad_norm": 0.6365412145603873,
|
|
"learning_rate": 3.178542663106131e-05,
|
|
"loss": 0.3514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.165127694606781,
|
|
"step": 1615,
|
|
"valid_targets_mean": 3648.6,
|
|
"valid_targets_min": 1373
|
|
},
|
|
{
|
|
"epoch": 2.5966319165998395,
|
|
"grad_norm": 0.6105168700748761,
|
|
"learning_rate": 3.172076381563748e-05,
|
|
"loss": 0.3697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2247442901134491,
|
|
"step": 1620,
|
|
"valid_targets_mean": 4797.0,
|
|
"valid_targets_min": 1482
|
|
},
|
|
{
|
|
"epoch": 2.604651162790698,
|
|
"grad_norm": 0.5552391542865701,
|
|
"learning_rate": 3.165591385057058e-05,
|
|
"loss": 0.366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16839686036109924,
|
|
"step": 1625,
|
|
"valid_targets_mean": 4180.8,
|
|
"valid_targets_min": 2486
|
|
},
|
|
{
|
|
"epoch": 2.612670408981556,
|
|
"grad_norm": 0.6258173020249116,
|
|
"learning_rate": 3.1590877771343316e-05,
|
|
"loss": 0.3276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20165500044822693,
|
|
"step": 1630,
|
|
"valid_targets_mean": 4232.8,
|
|
"valid_targets_min": 954
|
|
},
|
|
{
|
|
"epoch": 2.6206896551724137,
|
|
"grad_norm": 0.6291679923624242,
|
|
"learning_rate": 3.152565661641008e-05,
|
|
"loss": 0.3309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2278696447610855,
|
|
"step": 1635,
|
|
"valid_targets_mean": 5187.6,
|
|
"valid_targets_min": 1647
|
|
},
|
|
{
|
|
"epoch": 2.6287089013632716,
|
|
"grad_norm": 0.629899100817526,
|
|
"learning_rate": 3.1460251427180474e-05,
|
|
"loss": 0.3488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13377898931503296,
|
|
"step": 1640,
|
|
"valid_targets_mean": 2682.0,
|
|
"valid_targets_min": 1718
|
|
},
|
|
{
|
|
"epoch": 2.63672814755413,
|
|
"grad_norm": 0.6868725842453621,
|
|
"learning_rate": 3.139466324800263e-05,
|
|
"loss": 0.3232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12783987820148468,
|
|
"step": 1645,
|
|
"valid_targets_mean": 3092.1,
|
|
"valid_targets_min": 1504
|
|
},
|
|
{
|
|
"epoch": 2.644747393744988,
|
|
"grad_norm": 0.655325240952164,
|
|
"learning_rate": 3.132889312614655e-05,
|
|
"loss": 0.3143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1781664788722992,
|
|
"step": 1650,
|
|
"valid_targets_mean": 3401.0,
|
|
"valid_targets_min": 1985
|
|
},
|
|
{
|
|
"epoch": 2.6527666399358463,
|
|
"grad_norm": 0.6228530990624284,
|
|
"learning_rate": 3.126294211178737e-05,
|
|
"loss": 0.3413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17397254705429077,
|
|
"step": 1655,
|
|
"valid_targets_mean": 3983.8,
|
|
"valid_targets_min": 1548
|
|
},
|
|
{
|
|
"epoch": 2.660785886126704,
|
|
"grad_norm": 0.5714671399749158,
|
|
"learning_rate": 3.1196811257988634e-05,
|
|
"loss": 0.3569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16387617588043213,
|
|
"step": 1660,
|
|
"valid_targets_mean": 4429.9,
|
|
"valid_targets_min": 1463
|
|
},
|
|
{
|
|
"epoch": 2.668805132317562,
|
|
"grad_norm": 0.7483517252874993,
|
|
"learning_rate": 3.1130501620685394e-05,
|
|
"loss": 0.3671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23365803062915802,
|
|
"step": 1665,
|
|
"valid_targets_mean": 2976.1,
|
|
"valid_targets_min": 1136
|
|
},
|
|
{
|
|
"epoch": 2.67682437850842,
|
|
"grad_norm": 0.6639005033164377,
|
|
"learning_rate": 3.106401425866745e-05,
|
|
"loss": 0.341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19206903874874115,
|
|
"step": 1670,
|
|
"valid_targets_mean": 3576.8,
|
|
"valid_targets_min": 1679
|
|
},
|
|
{
|
|
"epoch": 2.6848436246992784,
|
|
"grad_norm": 0.5766789644867589,
|
|
"learning_rate": 3.099735023356236e-05,
|
|
"loss": 0.3651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21450704336166382,
|
|
"step": 1675,
|
|
"valid_targets_mean": 5612.0,
|
|
"valid_targets_min": 849
|
|
},
|
|
{
|
|
"epoch": 2.6928628708901363,
|
|
"grad_norm": 0.5871168709870269,
|
|
"learning_rate": 3.0930510609818564e-05,
|
|
"loss": 0.3311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17829036712646484,
|
|
"step": 1680,
|
|
"valid_targets_mean": 4906.4,
|
|
"valid_targets_min": 1779
|
|
},
|
|
{
|
|
"epoch": 2.7008821170809942,
|
|
"grad_norm": 0.5872313358818464,
|
|
"learning_rate": 3.086349645468831e-05,
|
|
"loss": 0.3371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11560177803039551,
|
|
"step": 1685,
|
|
"valid_targets_mean": 4144.8,
|
|
"valid_targets_min": 1313
|
|
},
|
|
{
|
|
"epoch": 2.7089013632718526,
|
|
"grad_norm": 0.5826035312015911,
|
|
"learning_rate": 3.079630883821067e-05,
|
|
"loss": 0.3512,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21896260976791382,
|
|
"step": 1690,
|
|
"valid_targets_mean": 4781.9,
|
|
"valid_targets_min": 2115
|
|
},
|
|
{
|
|
"epoch": 2.7169206094627105,
|
|
"grad_norm": 0.6011395273868881,
|
|
"learning_rate": 3.0728948833194436e-05,
|
|
"loss": 0.3343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21181732416152954,
|
|
"step": 1695,
|
|
"valid_targets_mean": 5069.0,
|
|
"valid_targets_min": 1363
|
|
},
|
|
{
|
|
"epoch": 2.7249398556535684,
|
|
"grad_norm": 0.6651359418444572,
|
|
"learning_rate": 3.066141751520099e-05,
|
|
"loss": 0.3442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1747858226299286,
|
|
"step": 1700,
|
|
"valid_targets_mean": 3284.0,
|
|
"valid_targets_min": 1926
|
|
},
|
|
{
|
|
"epoch": 2.7329591018444264,
|
|
"grad_norm": 0.6115176677200098,
|
|
"learning_rate": 3.059371596252712e-05,
|
|
"loss": 0.3269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.162808358669281,
|
|
"step": 1705,
|
|
"valid_targets_mean": 3847.5,
|
|
"valid_targets_min": 1296
|
|
},
|
|
{
|
|
"epoch": 2.7409783480352847,
|
|
"grad_norm": 0.6656515539528712,
|
|
"learning_rate": 3.0525845256187834e-05,
|
|
"loss": 0.3136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14415833353996277,
|
|
"step": 1710,
|
|
"valid_targets_mean": 3631.0,
|
|
"valid_targets_min": 2313
|
|
},
|
|
{
|
|
"epoch": 2.7489975942261426,
|
|
"grad_norm": 0.6083830291868935,
|
|
"learning_rate": 3.0457806479899044e-05,
|
|
"loss": 0.3757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18218392133712769,
|
|
"step": 1715,
|
|
"valid_targets_mean": 4358.8,
|
|
"valid_targets_min": 1822
|
|
},
|
|
{
|
|
"epoch": 2.757016840417001,
|
|
"grad_norm": 0.6011013868022232,
|
|
"learning_rate": 3.0389600720060318e-05,
|
|
"loss": 0.366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2295471429824829,
|
|
"step": 1720,
|
|
"valid_targets_mean": 4038.2,
|
|
"valid_targets_min": 1807
|
|
},
|
|
{
|
|
"epoch": 2.765036086607859,
|
|
"grad_norm": 0.6611587519621636,
|
|
"learning_rate": 3.0321229065737522e-05,
|
|
"loss": 0.3441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16769525408744812,
|
|
"step": 1725,
|
|
"valid_targets_mean": 2427.5,
|
|
"valid_targets_min": 1552
|
|
},
|
|
{
|
|
"epoch": 2.773055332798717,
|
|
"grad_norm": 0.8131060808181708,
|
|
"learning_rate": 3.0252692608645384e-05,
|
|
"loss": 0.3584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18336832523345947,
|
|
"step": 1730,
|
|
"valid_targets_mean": 3267.2,
|
|
"valid_targets_min": 894
|
|
},
|
|
{
|
|
"epoch": 2.7810745789895748,
|
|
"grad_norm": 0.5610531465944066,
|
|
"learning_rate": 3.0183992443130127e-05,
|
|
"loss": 0.3633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16682939231395721,
|
|
"step": 1735,
|
|
"valid_targets_mean": 4752.2,
|
|
"valid_targets_min": 2202
|
|
},
|
|
{
|
|
"epoch": 2.789093825180433,
|
|
"grad_norm": 0.5997382925590476,
|
|
"learning_rate": 3.011512966615195e-05,
|
|
"loss": 0.3449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12142496556043625,
|
|
"step": 1740,
|
|
"valid_targets_mean": 2443.5,
|
|
"valid_targets_min": 1471
|
|
},
|
|
{
|
|
"epoch": 2.797113071371291,
|
|
"grad_norm": 0.5762907495346832,
|
|
"learning_rate": 3.0046105377267523e-05,
|
|
"loss": 0.3301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17085179686546326,
|
|
"step": 1745,
|
|
"valid_targets_mean": 4630.1,
|
|
"valid_targets_min": 1542
|
|
},
|
|
{
|
|
"epoch": 2.8051323175621494,
|
|
"grad_norm": 0.5829196843633385,
|
|
"learning_rate": 2.9976920678612456e-05,
|
|
"loss": 0.3471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24269111454486847,
|
|
"step": 1750,
|
|
"valid_targets_mean": 5201.4,
|
|
"valid_targets_min": 1951
|
|
},
|
|
{
|
|
"epoch": 2.8131515637530073,
|
|
"grad_norm": 0.6277816875568113,
|
|
"learning_rate": 2.9907576674883664e-05,
|
|
"loss": 0.3371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2342943698167801,
|
|
"step": 1755,
|
|
"valid_targets_mean": 5771.5,
|
|
"valid_targets_min": 2030
|
|
},
|
|
{
|
|
"epoch": 2.8211708099438653,
|
|
"grad_norm": 0.5566183050309045,
|
|
"learning_rate": 2.983807447332174e-05,
|
|
"loss": 0.3455,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12989120185375214,
|
|
"step": 1760,
|
|
"valid_targets_mean": 3220.8,
|
|
"valid_targets_min": 1346
|
|
},
|
|
{
|
|
"epoch": 2.829190056134723,
|
|
"grad_norm": 0.5843107774579075,
|
|
"learning_rate": 2.9768415183693293e-05,
|
|
"loss": 0.3412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15450912714004517,
|
|
"step": 1765,
|
|
"valid_targets_mean": 3541.1,
|
|
"valid_targets_min": 1458
|
|
},
|
|
{
|
|
"epoch": 2.8372093023255816,
|
|
"grad_norm": 0.6007068163414647,
|
|
"learning_rate": 2.9698599918273197e-05,
|
|
"loss": 0.3282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17683474719524384,
|
|
"step": 1770,
|
|
"valid_targets_mean": 4956.5,
|
|
"valid_targets_min": 881
|
|
},
|
|
{
|
|
"epoch": 2.8452285485164395,
|
|
"grad_norm": 0.6439652957844222,
|
|
"learning_rate": 2.962862979182686e-05,
|
|
"loss": 0.3612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13516972959041595,
|
|
"step": 1775,
|
|
"valid_targets_mean": 2771.8,
|
|
"valid_targets_min": 1059
|
|
},
|
|
{
|
|
"epoch": 2.8532477947072974,
|
|
"grad_norm": 0.5806838903580533,
|
|
"learning_rate": 2.95585059215924e-05,
|
|
"loss": 0.3445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14902666211128235,
|
|
"step": 1780,
|
|
"valid_targets_mean": 3495.1,
|
|
"valid_targets_min": 1494
|
|
},
|
|
{
|
|
"epoch": 2.8612670408981558,
|
|
"grad_norm": 0.8405453402220504,
|
|
"learning_rate": 2.948822942726284e-05,
|
|
"loss": 0.3465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19173868000507355,
|
|
"step": 1785,
|
|
"valid_targets_mean": 2640.8,
|
|
"valid_targets_min": 1046
|
|
},
|
|
{
|
|
"epoch": 2.8692862870890137,
|
|
"grad_norm": 0.5958007208888075,
|
|
"learning_rate": 2.941780143096817e-05,
|
|
"loss": 0.3465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19616219401359558,
|
|
"step": 1790,
|
|
"valid_targets_mean": 4756.8,
|
|
"valid_targets_min": 1433
|
|
},
|
|
{
|
|
"epoch": 2.8773055332798716,
|
|
"grad_norm": 0.5542905432219679,
|
|
"learning_rate": 2.9347223057257505e-05,
|
|
"loss": 0.3407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1591700315475464,
|
|
"step": 1795,
|
|
"valid_targets_mean": 5558.9,
|
|
"valid_targets_min": 1023
|
|
},
|
|
{
|
|
"epoch": 2.8853247794707295,
|
|
"grad_norm": 0.4833364708765064,
|
|
"learning_rate": 2.927649543308106e-05,
|
|
"loss": 0.3459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18111169338226318,
|
|
"step": 1800,
|
|
"valid_targets_mean": 6311.6,
|
|
"valid_targets_min": 1023
|
|
},
|
|
{
|
|
"epoch": 2.893344025661588,
|
|
"grad_norm": 0.5957529324717237,
|
|
"learning_rate": 2.9205619687772212e-05,
|
|
"loss": 0.3347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18495184183120728,
|
|
"step": 1805,
|
|
"valid_targets_mean": 4080.9,
|
|
"valid_targets_min": 1787
|
|
},
|
|
{
|
|
"epoch": 2.901363271852446,
|
|
"grad_norm": 0.6158328047841045,
|
|
"learning_rate": 2.9134596953029413e-05,
|
|
"loss": 0.3501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17425167560577393,
|
|
"step": 1810,
|
|
"valid_targets_mean": 4262.2,
|
|
"valid_targets_min": 1576
|
|
},
|
|
{
|
|
"epoch": 2.909382518043304,
|
|
"grad_norm": 0.4842742755340218,
|
|
"learning_rate": 2.9063428362898168e-05,
|
|
"loss": 0.3273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13343685865402222,
|
|
"step": 1815,
|
|
"valid_targets_mean": 5032.9,
|
|
"valid_targets_min": 1627
|
|
},
|
|
{
|
|
"epoch": 2.917401764234162,
|
|
"grad_norm": 0.5503873137781846,
|
|
"learning_rate": 2.8992115053752905e-05,
|
|
"loss": 0.3271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1701168566942215,
|
|
"step": 1820,
|
|
"valid_targets_mean": 4278.1,
|
|
"valid_targets_min": 1496
|
|
},
|
|
{
|
|
"epoch": 2.92542101042502,
|
|
"grad_norm": 0.5745206673048178,
|
|
"learning_rate": 2.8920658164278816e-05,
|
|
"loss": 0.3479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14051708579063416,
|
|
"step": 1825,
|
|
"valid_targets_mean": 3690.9,
|
|
"valid_targets_min": 1271
|
|
},
|
|
{
|
|
"epoch": 2.933440256615878,
|
|
"grad_norm": 0.6857547647409145,
|
|
"learning_rate": 2.884905883545373e-05,
|
|
"loss": 0.344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15106357634067535,
|
|
"step": 1830,
|
|
"valid_targets_mean": 3079.5,
|
|
"valid_targets_min": 1457
|
|
},
|
|
{
|
|
"epoch": 2.9414595028067363,
|
|
"grad_norm": 0.5480224874709139,
|
|
"learning_rate": 2.877731821052981e-05,
|
|
"loss": 0.3521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23945242166519165,
|
|
"step": 1835,
|
|
"valid_targets_mean": 5315.8,
|
|
"valid_targets_min": 1619
|
|
},
|
|
{
|
|
"epoch": 2.949478748997594,
|
|
"grad_norm": 0.5679922622553645,
|
|
"learning_rate": 2.8705437435015375e-05,
|
|
"loss": 0.3322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21760649979114532,
|
|
"step": 1840,
|
|
"valid_targets_mean": 5026.5,
|
|
"valid_targets_min": 1646
|
|
},
|
|
{
|
|
"epoch": 2.957497995188452,
|
|
"grad_norm": 0.5313840600274049,
|
|
"learning_rate": 2.8633417656656566e-05,
|
|
"loss": 0.3371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11117774248123169,
|
|
"step": 1845,
|
|
"valid_targets_mean": 3457.6,
|
|
"valid_targets_min": 1578
|
|
},
|
|
{
|
|
"epoch": 2.9655172413793105,
|
|
"grad_norm": 0.6297881056735455,
|
|
"learning_rate": 2.8561260025419036e-05,
|
|
"loss": 0.3242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2085118293762207,
|
|
"step": 1850,
|
|
"valid_targets_mean": 4719.8,
|
|
"valid_targets_min": 1504
|
|
},
|
|
{
|
|
"epoch": 2.9735364875701684,
|
|
"grad_norm": 0.8963545587577034,
|
|
"learning_rate": 2.8488965693469583e-05,
|
|
"loss": 0.3367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17817959189414978,
|
|
"step": 1855,
|
|
"valid_targets_mean": 3197.5,
|
|
"valid_targets_min": 1654
|
|
},
|
|
{
|
|
"epoch": 2.9815557337610263,
|
|
"grad_norm": 0.5354808191697631,
|
|
"learning_rate": 2.8416535815157763e-05,
|
|
"loss": 0.329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1702459305524826,
|
|
"step": 1860,
|
|
"valid_targets_mean": 4165.6,
|
|
"valid_targets_min": 1186
|
|
},
|
|
{
|
|
"epoch": 2.9895749799518843,
|
|
"grad_norm": 0.6498642625370372,
|
|
"learning_rate": 2.8343971546997434e-05,
|
|
"loss": 0.3243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.157914936542511,
|
|
"step": 1865,
|
|
"valid_targets_mean": 2609.2,
|
|
"valid_targets_min": 1441
|
|
},
|
|
{
|
|
"epoch": 2.9975942261427426,
|
|
"grad_norm": 0.6297407453522518,
|
|
"learning_rate": 2.827127404764831e-05,
|
|
"loss": 0.3259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11299164593219757,
|
|
"step": 1870,
|
|
"valid_targets_mean": 2654.1,
|
|
"valid_targets_min": 1182
|
|
},
|
|
{
|
|
"epoch": 3.0048115477145148,
|
|
"grad_norm": 0.7032762627174555,
|
|
"learning_rate": 2.8198444477897467e-05,
|
|
"loss": 0.3048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15206965804100037,
|
|
"step": 1875,
|
|
"valid_targets_mean": 2631.5,
|
|
"valid_targets_min": 1636
|
|
},
|
|
{
|
|
"epoch": 3.0128307939053727,
|
|
"grad_norm": 0.7155942882812133,
|
|
"learning_rate": 2.8125484000640787e-05,
|
|
"loss": 0.3047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10091409832239151,
|
|
"step": 1880,
|
|
"valid_targets_mean": 2218.4,
|
|
"valid_targets_min": 808
|
|
},
|
|
{
|
|
"epoch": 3.020850040096231,
|
|
"grad_norm": 0.4981371502776664,
|
|
"learning_rate": 2.8052393780864394e-05,
|
|
"loss": 0.3012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1289309561252594,
|
|
"step": 1885,
|
|
"valid_targets_mean": 4298.6,
|
|
"valid_targets_min": 1512
|
|
},
|
|
{
|
|
"epoch": 3.028869286287089,
|
|
"grad_norm": 0.575400474472258,
|
|
"learning_rate": 2.797917498562607e-05,
|
|
"loss": 0.3181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1485946774482727,
|
|
"step": 1890,
|
|
"valid_targets_mean": 4350.4,
|
|
"valid_targets_min": 1291
|
|
},
|
|
{
|
|
"epoch": 3.036888532477947,
|
|
"grad_norm": 0.5120704781400871,
|
|
"learning_rate": 2.7905828784036596e-05,
|
|
"loss": 0.3114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21469643712043762,
|
|
"step": 1895,
|
|
"valid_targets_mean": 7091.8,
|
|
"valid_targets_min": 1765
|
|
},
|
|
{
|
|
"epoch": 3.0449077786688052,
|
|
"grad_norm": 0.5767259511228147,
|
|
"learning_rate": 2.78323563472411e-05,
|
|
"loss": 0.3333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18228240311145782,
|
|
"step": 1900,
|
|
"valid_targets_mean": 5326.8,
|
|
"valid_targets_min": 1610
|
|
},
|
|
{
|
|
"epoch": 3.052927024859663,
|
|
"grad_norm": 0.5491197579142447,
|
|
"learning_rate": 2.7758758848400354e-05,
|
|
"loss": 0.3296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1779412031173706,
|
|
"step": 1905,
|
|
"valid_targets_mean": 5061.5,
|
|
"valid_targets_min": 1582
|
|
},
|
|
{
|
|
"epoch": 3.060946271050521,
|
|
"grad_norm": 0.6842155517498201,
|
|
"learning_rate": 2.7685037462672043e-05,
|
|
"loss": 0.3075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19132070243358612,
|
|
"step": 1910,
|
|
"valid_targets_mean": 3660.1,
|
|
"valid_targets_min": 1162
|
|
},
|
|
{
|
|
"epoch": 3.0689655172413794,
|
|
"grad_norm": 0.6760878064304382,
|
|
"learning_rate": 2.7611193367191993e-05,
|
|
"loss": 0.3065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22465574741363525,
|
|
"step": 1915,
|
|
"valid_targets_mean": 4018.1,
|
|
"valid_targets_min": 2057
|
|
},
|
|
{
|
|
"epoch": 3.0769847634322374,
|
|
"grad_norm": 0.6414087031460661,
|
|
"learning_rate": 2.7537227741055378e-05,
|
|
"loss": 0.3305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10675209015607834,
|
|
"step": 1920,
|
|
"valid_targets_mean": 2162.2,
|
|
"valid_targets_min": 1006
|
|
},
|
|
{
|
|
"epoch": 3.0850040096230953,
|
|
"grad_norm": 0.6858636117457415,
|
|
"learning_rate": 2.746314176529791e-05,
|
|
"loss": 0.3172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16336016356945038,
|
|
"step": 1925,
|
|
"valid_targets_mean": 3358.5,
|
|
"valid_targets_min": 1381
|
|
},
|
|
{
|
|
"epoch": 3.0930232558139537,
|
|
"grad_norm": 0.4896718696555601,
|
|
"learning_rate": 2.7388936622876957e-05,
|
|
"loss": 0.3474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17518985271453857,
|
|
"step": 1930,
|
|
"valid_targets_mean": 7132.2,
|
|
"valid_targets_min": 1765
|
|
},
|
|
{
|
|
"epoch": 3.1010425020048116,
|
|
"grad_norm": 0.5104354367503314,
|
|
"learning_rate": 2.7314613498652663e-05,
|
|
"loss": 0.3179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17402449250221252,
|
|
"step": 1935,
|
|
"valid_targets_mean": 5995.6,
|
|
"valid_targets_min": 1169
|
|
},
|
|
{
|
|
"epoch": 3.1090617481956695,
|
|
"grad_norm": 0.6254156399773187,
|
|
"learning_rate": 2.7240173579369025e-05,
|
|
"loss": 0.3159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12640006840229034,
|
|
"step": 1940,
|
|
"valid_targets_mean": 3783.9,
|
|
"valid_targets_min": 1231
|
|
},
|
|
{
|
|
"epoch": 3.117080994386528,
|
|
"grad_norm": 0.5304276087744046,
|
|
"learning_rate": 2.7165618053634962e-05,
|
|
"loss": 0.3402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13664016127586365,
|
|
"step": 1945,
|
|
"valid_targets_mean": 6537.9,
|
|
"valid_targets_min": 2060
|
|
},
|
|
{
|
|
"epoch": 3.125100240577386,
|
|
"grad_norm": 0.6393804177692343,
|
|
"learning_rate": 2.7090948111905304e-05,
|
|
"loss": 0.3177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15379135310649872,
|
|
"step": 1950,
|
|
"valid_targets_mean": 3703.0,
|
|
"valid_targets_min": 1171
|
|
},
|
|
{
|
|
"epoch": 3.1331194867682437,
|
|
"grad_norm": 0.7483845792341272,
|
|
"learning_rate": 2.701616494646183e-05,
|
|
"loss": 0.3417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16798612475395203,
|
|
"step": 1955,
|
|
"valid_targets_mean": 2964.4,
|
|
"valid_targets_min": 2087
|
|
},
|
|
{
|
|
"epoch": 3.141138732959102,
|
|
"grad_norm": 0.6571338517574732,
|
|
"learning_rate": 2.6941269751394174e-05,
|
|
"loss": 0.3165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19471541047096252,
|
|
"step": 1960,
|
|
"valid_targets_mean": 4089.4,
|
|
"valid_targets_min": 1584
|
|
},
|
|
{
|
|
"epoch": 3.14915797914996,
|
|
"grad_norm": 0.7937141144340575,
|
|
"learning_rate": 2.686626372258081e-05,
|
|
"loss": 0.3353,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2159966677427292,
|
|
"step": 1965,
|
|
"valid_targets_mean": 3406.1,
|
|
"valid_targets_min": 1393
|
|
},
|
|
{
|
|
"epoch": 3.157177225340818,
|
|
"grad_norm": 0.6798355217613847,
|
|
"learning_rate": 2.6791148057669913e-05,
|
|
"loss": 0.3036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19850729405879974,
|
|
"step": 1970,
|
|
"valid_targets_mean": 4502.8,
|
|
"valid_targets_min": 1625
|
|
},
|
|
{
|
|
"epoch": 3.165196471531676,
|
|
"grad_norm": 0.5314856867903979,
|
|
"learning_rate": 2.671592395606027e-05,
|
|
"loss": 0.3177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0934186726808548,
|
|
"step": 1975,
|
|
"valid_targets_mean": 3235.5,
|
|
"valid_targets_min": 1488
|
|
},
|
|
{
|
|
"epoch": 3.173215717722534,
|
|
"grad_norm": 0.7120381313874422,
|
|
"learning_rate": 2.6640592618882114e-05,
|
|
"loss": 0.3152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19404734671115875,
|
|
"step": 1980,
|
|
"valid_targets_mean": 3064.2,
|
|
"valid_targets_min": 1401
|
|
},
|
|
{
|
|
"epoch": 3.181234963913392,
|
|
"grad_norm": 0.4626762807628728,
|
|
"learning_rate": 2.656515524897795e-05,
|
|
"loss": 0.3084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19291116297245026,
|
|
"step": 1985,
|
|
"valid_targets_mean": 7916.4,
|
|
"valid_targets_min": 1706
|
|
},
|
|
{
|
|
"epoch": 3.18925421010425,
|
|
"grad_norm": 0.5223576381813416,
|
|
"learning_rate": 2.6489613050883343e-05,
|
|
"loss": 0.3548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.132581427693367,
|
|
"step": 1990,
|
|
"valid_targets_mean": 5168.0,
|
|
"valid_targets_min": 1191
|
|
},
|
|
{
|
|
"epoch": 3.1972734562951084,
|
|
"grad_norm": 0.5813867725843586,
|
|
"learning_rate": 2.6413967230807677e-05,
|
|
"loss": 0.2938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17431476712226868,
|
|
"step": 1995,
|
|
"valid_targets_mean": 4458.4,
|
|
"valid_targets_min": 1123
|
|
},
|
|
{
|
|
"epoch": 3.2052927024859663,
|
|
"grad_norm": 0.6202033622798051,
|
|
"learning_rate": 2.6338218996614924e-05,
|
|
"loss": 0.3066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1445320099592209,
|
|
"step": 2000,
|
|
"valid_targets_mean": 3833.1,
|
|
"valid_targets_min": 1692
|
|
},
|
|
{
|
|
"epoch": 3.2133119486768242,
|
|
"grad_norm": 0.6448556178568718,
|
|
"learning_rate": 2.6262369557804325e-05,
|
|
"loss": 0.3133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1171756237745285,
|
|
"step": 2005,
|
|
"valid_targets_mean": 2876.5,
|
|
"valid_targets_min": 1136
|
|
},
|
|
{
|
|
"epoch": 3.2213311948676826,
|
|
"grad_norm": 0.6092126401517288,
|
|
"learning_rate": 2.6186420125491094e-05,
|
|
"loss": 0.302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14689497649669647,
|
|
"step": 2010,
|
|
"valid_targets_mean": 3826.5,
|
|
"valid_targets_min": 2248
|
|
},
|
|
{
|
|
"epoch": 3.2293504410585405,
|
|
"grad_norm": 0.6043324360660347,
|
|
"learning_rate": 2.6110371912387083e-05,
|
|
"loss": 0.3023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1918356716632843,
|
|
"step": 2015,
|
|
"valid_targets_mean": 5438.6,
|
|
"valid_targets_min": 1884
|
|
},
|
|
{
|
|
"epoch": 3.2373696872493984,
|
|
"grad_norm": 0.47063226408624154,
|
|
"learning_rate": 2.6034226132781407e-05,
|
|
"loss": 0.315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22369518876075745,
|
|
"step": 2020,
|
|
"valid_targets_mean": 8461.0,
|
|
"valid_targets_min": 1896
|
|
},
|
|
{
|
|
"epoch": 3.245388933440257,
|
|
"grad_norm": 0.9275068584587873,
|
|
"learning_rate": 2.5957984002521066e-05,
|
|
"loss": 0.2872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.197782963514328,
|
|
"step": 2025,
|
|
"valid_targets_mean": 3524.5,
|
|
"valid_targets_min": 1703
|
|
},
|
|
{
|
|
"epoch": 3.2534081796311147,
|
|
"grad_norm": 0.5533054991481485,
|
|
"learning_rate": 2.588164673899151e-05,
|
|
"loss": 0.3244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13876253366470337,
|
|
"step": 2030,
|
|
"valid_targets_mean": 4418.5,
|
|
"valid_targets_min": 1813
|
|
},
|
|
{
|
|
"epoch": 3.2614274258219726,
|
|
"grad_norm": 0.7212838634122106,
|
|
"learning_rate": 2.580521556109724e-05,
|
|
"loss": 0.3402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20015841722488403,
|
|
"step": 2035,
|
|
"valid_targets_mean": 3805.8,
|
|
"valid_targets_min": 1421
|
|
},
|
|
{
|
|
"epoch": 3.2694466720128306,
|
|
"grad_norm": 0.7597766574519237,
|
|
"learning_rate": 2.57286916892423e-05,
|
|
"loss": 0.3079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15914931893348694,
|
|
"step": 2040,
|
|
"valid_targets_mean": 2774.2,
|
|
"valid_targets_min": 1255
|
|
},
|
|
{
|
|
"epoch": 3.277465918203689,
|
|
"grad_norm": 0.5821850834880777,
|
|
"learning_rate": 2.5652076345310822e-05,
|
|
"loss": 0.3252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1513526290655136,
|
|
"step": 2045,
|
|
"valid_targets_mean": 5144.5,
|
|
"valid_targets_min": 1597
|
|
},
|
|
{
|
|
"epoch": 3.285485164394547,
|
|
"grad_norm": 0.6255573781506097,
|
|
"learning_rate": 2.5575370752647507e-05,
|
|
"loss": 0.3036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2162339985370636,
|
|
"step": 2050,
|
|
"valid_targets_mean": 5110.9,
|
|
"valid_targets_min": 1532
|
|
},
|
|
{
|
|
"epoch": 3.293504410585405,
|
|
"grad_norm": 0.696164430308172,
|
|
"learning_rate": 2.5498576136038077e-05,
|
|
"loss": 0.3169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1917979121208191,
|
|
"step": 2055,
|
|
"valid_targets_mean": 3515.4,
|
|
"valid_targets_min": 1727
|
|
},
|
|
{
|
|
"epoch": 3.301523656776263,
|
|
"grad_norm": 0.6295572557942658,
|
|
"learning_rate": 2.542169372168976e-05,
|
|
"loss": 0.3361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12846094369888306,
|
|
"step": 2060,
|
|
"valid_targets_mean": 3133.1,
|
|
"valid_targets_min": 1526
|
|
},
|
|
{
|
|
"epoch": 3.309542902967121,
|
|
"grad_norm": 0.7413126077265113,
|
|
"learning_rate": 2.5344724737211646e-05,
|
|
"loss": 0.3347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11233331263065338,
|
|
"step": 2065,
|
|
"valid_targets_mean": 2601.6,
|
|
"valid_targets_min": 1160
|
|
},
|
|
{
|
|
"epoch": 3.317562149157979,
|
|
"grad_norm": 0.7018220334438993,
|
|
"learning_rate": 2.5267670411595152e-05,
|
|
"loss": 0.3327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18812254071235657,
|
|
"step": 2070,
|
|
"valid_targets_mean": 4047.4,
|
|
"valid_targets_min": 1636
|
|
},
|
|
{
|
|
"epoch": 3.3255813953488373,
|
|
"grad_norm": 0.7023439898521772,
|
|
"learning_rate": 2.5190531975194345e-05,
|
|
"loss": 0.3201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12871921062469482,
|
|
"step": 2075,
|
|
"valid_targets_mean": 3000.5,
|
|
"valid_targets_min": 1178
|
|
},
|
|
{
|
|
"epoch": 3.3336006415396953,
|
|
"grad_norm": 0.6859887980576995,
|
|
"learning_rate": 2.5113310659706322e-05,
|
|
"loss": 0.3037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14714935421943665,
|
|
"step": 2080,
|
|
"valid_targets_mean": 3496.4,
|
|
"valid_targets_min": 1499
|
|
},
|
|
{
|
|
"epoch": 3.341619887730553,
|
|
"grad_norm": 0.6210652673938497,
|
|
"learning_rate": 2.5036007698151553e-05,
|
|
"loss": 0.3521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11785542219877243,
|
|
"step": 2085,
|
|
"valid_targets_mean": 3337.6,
|
|
"valid_targets_min": 1170
|
|
},
|
|
{
|
|
"epoch": 3.3496391339214115,
|
|
"grad_norm": 0.723666478833868,
|
|
"learning_rate": 2.4958624324854185e-05,
|
|
"loss": 0.3195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2654890716075897,
|
|
"step": 2090,
|
|
"valid_targets_mean": 4701.8,
|
|
"valid_targets_min": 1820
|
|
},
|
|
{
|
|
"epoch": 3.3576583801122695,
|
|
"grad_norm": 0.6623344321527482,
|
|
"learning_rate": 2.4881161775422303e-05,
|
|
"loss": 0.2965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14719511568546295,
|
|
"step": 2095,
|
|
"valid_targets_mean": 3824.8,
|
|
"valid_targets_min": 1739
|
|
},
|
|
{
|
|
"epoch": 3.3656776263031274,
|
|
"grad_norm": 1.4563673300691875,
|
|
"learning_rate": 2.480362128672824e-05,
|
|
"loss": 0.3193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1964644342660904,
|
|
"step": 2100,
|
|
"valid_targets_mean": 3709.1,
|
|
"valid_targets_min": 1576
|
|
},
|
|
{
|
|
"epoch": 3.3736968724939858,
|
|
"grad_norm": 0.6842287679822243,
|
|
"learning_rate": 2.4726004096888817e-05,
|
|
"loss": 0.3166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10231268405914307,
|
|
"step": 2105,
|
|
"valid_targets_mean": 2384.8,
|
|
"valid_targets_min": 948
|
|
},
|
|
{
|
|
"epoch": 3.3817161186848437,
|
|
"grad_norm": 0.6061497280938558,
|
|
"learning_rate": 2.4648311445245558e-05,
|
|
"loss": 0.3181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11788766831159592,
|
|
"step": 2110,
|
|
"valid_targets_mean": 3423.9,
|
|
"valid_targets_min": 1617
|
|
},
|
|
{
|
|
"epoch": 3.3897353648757016,
|
|
"grad_norm": 0.6208122569688548,
|
|
"learning_rate": 2.457054457234493e-05,
|
|
"loss": 0.3171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18449848890304565,
|
|
"step": 2115,
|
|
"valid_targets_mean": 5041.8,
|
|
"valid_targets_min": 1682
|
|
},
|
|
{
|
|
"epoch": 3.39775461106656,
|
|
"grad_norm": 0.6169483248382215,
|
|
"learning_rate": 2.4492704719918497e-05,
|
|
"loss": 0.3237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1982535421848297,
|
|
"step": 2120,
|
|
"valid_targets_mean": 4748.6,
|
|
"valid_targets_min": 1276
|
|
},
|
|
{
|
|
"epoch": 3.405773857257418,
|
|
"grad_norm": 0.6248745182773543,
|
|
"learning_rate": 2.4414793130863134e-05,
|
|
"loss": 0.3302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20577558875083923,
|
|
"step": 2125,
|
|
"valid_targets_mean": 5071.9,
|
|
"valid_targets_min": 1111
|
|
},
|
|
{
|
|
"epoch": 3.413793103448276,
|
|
"grad_norm": 0.6631257350229737,
|
|
"learning_rate": 2.433681104922114e-05,
|
|
"loss": 0.2932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1303030252456665,
|
|
"step": 2130,
|
|
"valid_targets_mean": 3573.0,
|
|
"valid_targets_min": 1133
|
|
},
|
|
{
|
|
"epoch": 3.4218123496391337,
|
|
"grad_norm": 0.5854169688561389,
|
|
"learning_rate": 2.4258759720160412e-05,
|
|
"loss": 0.3236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09749852865934372,
|
|
"step": 2135,
|
|
"valid_targets_mean": 2798.1,
|
|
"valid_targets_min": 1635
|
|
},
|
|
{
|
|
"epoch": 3.429831595829992,
|
|
"grad_norm": 0.7655917364113674,
|
|
"learning_rate": 2.4180640389954534e-05,
|
|
"loss": 0.3512,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15098845958709717,
|
|
"step": 2140,
|
|
"valid_targets_mean": 2984.9,
|
|
"valid_targets_min": 1296
|
|
},
|
|
{
|
|
"epoch": 3.43785084202085,
|
|
"grad_norm": 0.7793522714461361,
|
|
"learning_rate": 2.4102454305962892e-05,
|
|
"loss": 0.321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1879688799381256,
|
|
"step": 2145,
|
|
"valid_targets_mean": 2992.2,
|
|
"valid_targets_min": 1347
|
|
},
|
|
{
|
|
"epoch": 3.445870088211708,
|
|
"grad_norm": 0.5389302191512596,
|
|
"learning_rate": 2.402420271661076e-05,
|
|
"loss": 0.3298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2137525975704193,
|
|
"step": 2150,
|
|
"valid_targets_mean": 5566.5,
|
|
"valid_targets_min": 1084
|
|
},
|
|
{
|
|
"epoch": 3.4538893344025663,
|
|
"grad_norm": 0.5578382143957197,
|
|
"learning_rate": 2.3945886871369338e-05,
|
|
"loss": 0.3198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20459142327308655,
|
|
"step": 2155,
|
|
"valid_targets_mean": 5751.5,
|
|
"valid_targets_min": 855
|
|
},
|
|
{
|
|
"epoch": 3.461908580593424,
|
|
"grad_norm": 0.6820518141435146,
|
|
"learning_rate": 2.3867508020735865e-05,
|
|
"loss": 0.3094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15902569890022278,
|
|
"step": 2160,
|
|
"valid_targets_mean": 3122.2,
|
|
"valid_targets_min": 1646
|
|
},
|
|
{
|
|
"epoch": 3.469927826784282,
|
|
"grad_norm": 0.7101366612068016,
|
|
"learning_rate": 2.3789067416213568e-05,
|
|
"loss": 0.3133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23392510414123535,
|
|
"step": 2165,
|
|
"valid_targets_mean": 3977.8,
|
|
"valid_targets_min": 1901
|
|
},
|
|
{
|
|
"epoch": 3.4779470729751405,
|
|
"grad_norm": 0.5959411507559745,
|
|
"learning_rate": 2.3710566310291733e-05,
|
|
"loss": 0.3233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12113741040229797,
|
|
"step": 2170,
|
|
"valid_targets_mean": 3409.2,
|
|
"valid_targets_min": 1170
|
|
},
|
|
{
|
|
"epoch": 3.4859663191659984,
|
|
"grad_norm": 0.825513561005537,
|
|
"learning_rate": 2.36320059564257e-05,
|
|
"loss": 0.3127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18326623737812042,
|
|
"step": 2175,
|
|
"valid_targets_mean": 2507.5,
|
|
"valid_targets_min": 1314
|
|
},
|
|
{
|
|
"epoch": 3.4939855653568563,
|
|
"grad_norm": 0.5881807614207331,
|
|
"learning_rate": 2.3553387609016833e-05,
|
|
"loss": 0.3159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17981299757957458,
|
|
"step": 2180,
|
|
"valid_targets_mean": 4888.0,
|
|
"valid_targets_min": 1192
|
|
},
|
|
{
|
|
"epoch": 3.5020048115477147,
|
|
"grad_norm": 0.6376036084209341,
|
|
"learning_rate": 2.347471252339252e-05,
|
|
"loss": 0.3294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13395290076732635,
|
|
"step": 2185,
|
|
"valid_targets_mean": 4075.9,
|
|
"valid_targets_min": 1563
|
|
},
|
|
{
|
|
"epoch": 3.5100240577385726,
|
|
"grad_norm": 0.7048620960136164,
|
|
"learning_rate": 2.339598195578608e-05,
|
|
"loss": 0.3039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15857946872711182,
|
|
"step": 2190,
|
|
"valid_targets_mean": 3868.2,
|
|
"valid_targets_min": 1272
|
|
},
|
|
{
|
|
"epoch": 3.5180433039294305,
|
|
"grad_norm": 0.6021136042957389,
|
|
"learning_rate": 2.3317197163316757e-05,
|
|
"loss": 0.3069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15142685174942017,
|
|
"step": 2195,
|
|
"valid_targets_mean": 4533.5,
|
|
"valid_targets_min": 1038
|
|
},
|
|
{
|
|
"epoch": 3.5260625501202885,
|
|
"grad_norm": 0.6263357498807368,
|
|
"learning_rate": 2.3238359403969608e-05,
|
|
"loss": 0.3077,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1935206949710846,
|
|
"step": 2200,
|
|
"valid_targets_mean": 4888.2,
|
|
"valid_targets_min": 1624
|
|
},
|
|
{
|
|
"epoch": 3.534081796311147,
|
|
"grad_norm": 0.5754085355874449,
|
|
"learning_rate": 2.315946993657543e-05,
|
|
"loss": 0.3172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1102745458483696,
|
|
"step": 2205,
|
|
"valid_targets_mean": 3415.2,
|
|
"valid_targets_min": 1243
|
|
},
|
|
{
|
|
"epoch": 3.5421010425020047,
|
|
"grad_norm": 0.6423875097563647,
|
|
"learning_rate": 2.3080530020790673e-05,
|
|
"loss": 0.3184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14007174968719482,
|
|
"step": 2210,
|
|
"valid_targets_mean": 3485.9,
|
|
"valid_targets_min": 1035
|
|
},
|
|
{
|
|
"epoch": 3.550120288692863,
|
|
"grad_norm": 0.6750320761835421,
|
|
"learning_rate": 2.300154091707731e-05,
|
|
"loss": 0.3318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.117277592420578,
|
|
"step": 2215,
|
|
"valid_targets_mean": 2678.0,
|
|
"valid_targets_min": 1380
|
|
},
|
|
{
|
|
"epoch": 3.558139534883721,
|
|
"grad_norm": 0.6925001340893145,
|
|
"learning_rate": 2.2922503886682706e-05,
|
|
"loss": 0.3194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12452782690525055,
|
|
"step": 2220,
|
|
"valid_targets_mean": 2616.5,
|
|
"valid_targets_min": 1483
|
|
},
|
|
{
|
|
"epoch": 3.566158781074579,
|
|
"grad_norm": 0.6234691562412563,
|
|
"learning_rate": 2.28434201916195e-05,
|
|
"loss": 0.292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13704489171504974,
|
|
"step": 2225,
|
|
"valid_targets_mean": 3737.6,
|
|
"valid_targets_min": 1066
|
|
},
|
|
{
|
|
"epoch": 3.574178027265437,
|
|
"grad_norm": 0.5773390397694997,
|
|
"learning_rate": 2.2764291094645446e-05,
|
|
"loss": 0.3299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.163415789604187,
|
|
"step": 2230,
|
|
"valid_targets_mean": 4716.2,
|
|
"valid_targets_min": 945
|
|
},
|
|
{
|
|
"epoch": 3.5821972734562952,
|
|
"grad_norm": 0.4634986618987104,
|
|
"learning_rate": 2.2685117859243223e-05,
|
|
"loss": 0.3095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0939498022198677,
|
|
"step": 2235,
|
|
"valid_targets_mean": 4529.6,
|
|
"valid_targets_min": 1421
|
|
},
|
|
{
|
|
"epoch": 3.590216519647153,
|
|
"grad_norm": 0.5818654879010436,
|
|
"learning_rate": 2.2605901749600312e-05,
|
|
"loss": 0.3071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17287366092205048,
|
|
"step": 2240,
|
|
"valid_targets_mean": 4227.2,
|
|
"valid_targets_min": 1826
|
|
},
|
|
{
|
|
"epoch": 3.598235765838011,
|
|
"grad_norm": 0.7271214583355132,
|
|
"learning_rate": 2.2526644030588764e-05,
|
|
"loss": 0.3165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.159470796585083,
|
|
"step": 2245,
|
|
"valid_targets_mean": 3116.0,
|
|
"valid_targets_min": 1306
|
|
},
|
|
{
|
|
"epoch": 3.6062550120288694,
|
|
"grad_norm": 0.6303736540938414,
|
|
"learning_rate": 2.2447345967745036e-05,
|
|
"loss": 0.311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2228410840034485,
|
|
"step": 2250,
|
|
"valid_targets_mean": 4970.2,
|
|
"valid_targets_min": 1644
|
|
},
|
|
{
|
|
"epoch": 3.6142742582197274,
|
|
"grad_norm": 0.6640983802312498,
|
|
"learning_rate": 2.2368008827249756e-05,
|
|
"loss": 0.337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14799731969833374,
|
|
"step": 2255,
|
|
"valid_targets_mean": 3004.9,
|
|
"valid_targets_min": 1629
|
|
},
|
|
{
|
|
"epoch": 3.6222935044105853,
|
|
"grad_norm": 0.6702618106749398,
|
|
"learning_rate": 2.228863387590752e-05,
|
|
"loss": 0.3103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17356805503368378,
|
|
"step": 2260,
|
|
"valid_targets_mean": 3511.9,
|
|
"valid_targets_min": 1761
|
|
},
|
|
{
|
|
"epoch": 3.630312750601443,
|
|
"grad_norm": 0.6574381856226909,
|
|
"learning_rate": 2.2209222381126687e-05,
|
|
"loss": 0.3232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13788670301437378,
|
|
"step": 2265,
|
|
"valid_targets_mean": 3237.2,
|
|
"valid_targets_min": 1420
|
|
},
|
|
{
|
|
"epoch": 3.6383319967923016,
|
|
"grad_norm": 0.6413893082198431,
|
|
"learning_rate": 2.212977561089908e-05,
|
|
"loss": 0.3023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12281788140535355,
|
|
"step": 2270,
|
|
"valid_targets_mean": 3069.0,
|
|
"valid_targets_min": 1542
|
|
},
|
|
{
|
|
"epoch": 3.6463512429831595,
|
|
"grad_norm": 0.5679373919087424,
|
|
"learning_rate": 2.20502948337798e-05,
|
|
"loss": 0.3242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14298275113105774,
|
|
"step": 2275,
|
|
"valid_targets_mean": 4023.9,
|
|
"valid_targets_min": 852
|
|
},
|
|
{
|
|
"epoch": 3.654370489174018,
|
|
"grad_norm": 0.6109697268123285,
|
|
"learning_rate": 2.1970781318866953e-05,
|
|
"loss": 0.3366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20169465243816376,
|
|
"step": 2280,
|
|
"valid_targets_mean": 4718.6,
|
|
"valid_targets_min": 1267
|
|
},
|
|
{
|
|
"epoch": 3.6623897353648758,
|
|
"grad_norm": 0.6979138557796742,
|
|
"learning_rate": 2.1891236335781363e-05,
|
|
"loss": 0.3158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.215728297829628,
|
|
"step": 2285,
|
|
"valid_targets_mean": 4399.8,
|
|
"valid_targets_min": 1068
|
|
},
|
|
{
|
|
"epoch": 3.6704089815557337,
|
|
"grad_norm": 0.5586803216815522,
|
|
"learning_rate": 2.1811661154646332e-05,
|
|
"loss": 0.3079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15370899438858032,
|
|
"step": 2290,
|
|
"valid_targets_mean": 4967.1,
|
|
"valid_targets_min": 1803
|
|
},
|
|
{
|
|
"epoch": 3.6784282277465916,
|
|
"grad_norm": 0.6642607846469272,
|
|
"learning_rate": 2.173205704606735e-05,
|
|
"loss": 0.3151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1577022671699524,
|
|
"step": 2295,
|
|
"valid_targets_mean": 3313.6,
|
|
"valid_targets_min": 1050
|
|
},
|
|
{
|
|
"epoch": 3.68644747393745,
|
|
"grad_norm": 0.47285164686405123,
|
|
"learning_rate": 2.1652425281111785e-05,
|
|
"loss": 0.3044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12954489886760712,
|
|
"step": 2300,
|
|
"valid_targets_mean": 5698.8,
|
|
"valid_targets_min": 1217
|
|
},
|
|
{
|
|
"epoch": 3.694466720128308,
|
|
"grad_norm": 0.616931928903888,
|
|
"learning_rate": 2.1572767131288607e-05,
|
|
"loss": 0.3319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20086967945098877,
|
|
"step": 2305,
|
|
"valid_targets_mean": 4502.0,
|
|
"valid_targets_min": 1995
|
|
},
|
|
{
|
|
"epoch": 3.7024859663191663,
|
|
"grad_norm": 0.6776466099415515,
|
|
"learning_rate": 2.1493083868528095e-05,
|
|
"loss": 0.3189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14949862658977509,
|
|
"step": 2310,
|
|
"valid_targets_mean": 2669.6,
|
|
"valid_targets_min": 1321
|
|
},
|
|
{
|
|
"epoch": 3.710505212510024,
|
|
"grad_norm": 0.5879844987206615,
|
|
"learning_rate": 2.141337676516151e-05,
|
|
"loss": 0.3009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14676722884178162,
|
|
"step": 2315,
|
|
"valid_targets_mean": 4159.5,
|
|
"valid_targets_min": 1297
|
|
},
|
|
{
|
|
"epoch": 3.718524458700882,
|
|
"grad_norm": 0.5316094998452582,
|
|
"learning_rate": 2.1333647093900772e-05,
|
|
"loss": 0.3084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13722524046897888,
|
|
"step": 2320,
|
|
"valid_targets_mean": 4758.6,
|
|
"valid_targets_min": 1388
|
|
},
|
|
{
|
|
"epoch": 3.72654370489174,
|
|
"grad_norm": 0.5384516550528783,
|
|
"learning_rate": 2.1253896127818175e-05,
|
|
"loss": 0.3268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12726837396621704,
|
|
"step": 2325,
|
|
"valid_targets_mean": 3886.1,
|
|
"valid_targets_min": 954
|
|
},
|
|
{
|
|
"epoch": 3.7345629510825984,
|
|
"grad_norm": 0.5685626865932278,
|
|
"learning_rate": 2.1174125140326013e-05,
|
|
"loss": 0.3221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14816422760486603,
|
|
"step": 2330,
|
|
"valid_targets_mean": 5391.9,
|
|
"valid_targets_min": 1217
|
|
},
|
|
{
|
|
"epoch": 3.7425821972734563,
|
|
"grad_norm": 0.6959300369817977,
|
|
"learning_rate": 2.1094335405156277e-05,
|
|
"loss": 0.3279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1291712373495102,
|
|
"step": 2335,
|
|
"valid_targets_mean": 2644.0,
|
|
"valid_targets_min": 1115
|
|
},
|
|
{
|
|
"epoch": 3.7506014434643142,
|
|
"grad_norm": 0.5721691342290822,
|
|
"learning_rate": 2.1014528196340316e-05,
|
|
"loss": 0.3253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13201016187667847,
|
|
"step": 2340,
|
|
"valid_targets_mean": 4654.4,
|
|
"valid_targets_min": 1343
|
|
},
|
|
{
|
|
"epoch": 3.7586206896551726,
|
|
"grad_norm": 0.5308921374713382,
|
|
"learning_rate": 2.093470478818847e-05,
|
|
"loss": 0.3066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13984020054340363,
|
|
"step": 2345,
|
|
"valid_targets_mean": 4449.5,
|
|
"valid_targets_min": 1874
|
|
},
|
|
{
|
|
"epoch": 3.7666399358460305,
|
|
"grad_norm": 0.5537790122050558,
|
|
"learning_rate": 2.0854866455269756e-05,
|
|
"loss": 0.3035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1428103893995285,
|
|
"step": 2350,
|
|
"valid_targets_mean": 4530.8,
|
|
"valid_targets_min": 1901
|
|
},
|
|
{
|
|
"epoch": 3.7746591820368884,
|
|
"grad_norm": 0.6191705377884588,
|
|
"learning_rate": 2.0775014472391496e-05,
|
|
"loss": 0.291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12096063047647476,
|
|
"step": 2355,
|
|
"valid_targets_mean": 2235.4,
|
|
"valid_targets_min": 894
|
|
},
|
|
{
|
|
"epoch": 3.7826784282277464,
|
|
"grad_norm": 0.7862911510988639,
|
|
"learning_rate": 2.0695150114578958e-05,
|
|
"loss": 0.3217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2139546275138855,
|
|
"step": 2360,
|
|
"valid_targets_mean": 4749.4,
|
|
"valid_targets_min": 1306
|
|
},
|
|
{
|
|
"epoch": 3.7906976744186047,
|
|
"grad_norm": 0.5933421737639701,
|
|
"learning_rate": 2.061527465705502e-05,
|
|
"loss": 0.3094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19528578221797943,
|
|
"step": 2365,
|
|
"valid_targets_mean": 5284.5,
|
|
"valid_targets_min": 781
|
|
},
|
|
{
|
|
"epoch": 3.7987169206094626,
|
|
"grad_norm": 0.7884471934753257,
|
|
"learning_rate": 2.0535389375219773e-05,
|
|
"loss": 0.3388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15380559861660004,
|
|
"step": 2370,
|
|
"valid_targets_mean": 2415.1,
|
|
"valid_targets_min": 769
|
|
},
|
|
{
|
|
"epoch": 3.806736166800321,
|
|
"grad_norm": 0.5628187445041986,
|
|
"learning_rate": 2.045549554463019e-05,
|
|
"loss": 0.2957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16172266006469727,
|
|
"step": 2375,
|
|
"valid_targets_mean": 5658.4,
|
|
"valid_targets_min": 1996
|
|
},
|
|
{
|
|
"epoch": 3.814755412991179,
|
|
"grad_norm": 0.7023321168544707,
|
|
"learning_rate": 2.0375594440979744e-05,
|
|
"loss": 0.3238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1851782500743866,
|
|
"step": 2380,
|
|
"valid_targets_mean": 3709.6,
|
|
"valid_targets_min": 940
|
|
},
|
|
{
|
|
"epoch": 3.822774659182037,
|
|
"grad_norm": 0.49816800365299313,
|
|
"learning_rate": 2.0295687340078037e-05,
|
|
"loss": 0.3232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13774600625038147,
|
|
"step": 2385,
|
|
"valid_targets_mean": 5824.8,
|
|
"valid_targets_min": 1088
|
|
},
|
|
{
|
|
"epoch": 3.8307939053728948,
|
|
"grad_norm": 0.5434759259403616,
|
|
"learning_rate": 2.0215775517830437e-05,
|
|
"loss": 0.3095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10222262889146805,
|
|
"step": 2390,
|
|
"valid_targets_mean": 2964.1,
|
|
"valid_targets_min": 1235
|
|
},
|
|
{
|
|
"epoch": 3.838813151563753,
|
|
"grad_norm": 0.5582954462858974,
|
|
"learning_rate": 2.013586025021769e-05,
|
|
"loss": 0.3078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1320442408323288,
|
|
"step": 2395,
|
|
"valid_targets_mean": 4672.4,
|
|
"valid_targets_min": 1916
|
|
},
|
|
{
|
|
"epoch": 3.846832397754611,
|
|
"grad_norm": 0.6219244880549153,
|
|
"learning_rate": 2.0055942813275564e-05,
|
|
"loss": 0.3158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1533651351928711,
|
|
"step": 2400,
|
|
"valid_targets_mean": 3792.5,
|
|
"valid_targets_min": 1635
|
|
},
|
|
{
|
|
"epoch": 3.854851643945469,
|
|
"grad_norm": 0.583133909428859,
|
|
"learning_rate": 1.9976024483074456e-05,
|
|
"loss": 0.3029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13537335395812988,
|
|
"step": 2405,
|
|
"valid_targets_mean": 3669.5,
|
|
"valid_targets_min": 777
|
|
},
|
|
{
|
|
"epoch": 3.8628708901363273,
|
|
"grad_norm": 0.5710475515218163,
|
|
"learning_rate": 1.9896106535699025e-05,
|
|
"loss": 0.3186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17371848225593567,
|
|
"step": 2410,
|
|
"valid_targets_mean": 4763.5,
|
|
"valid_targets_min": 1886
|
|
},
|
|
{
|
|
"epoch": 3.8708901363271853,
|
|
"grad_norm": 0.7422087750459289,
|
|
"learning_rate": 1.9816190247227834e-05,
|
|
"loss": 0.3175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20205745100975037,
|
|
"step": 2415,
|
|
"valid_targets_mean": 3689.9,
|
|
"valid_targets_min": 910
|
|
},
|
|
{
|
|
"epoch": 3.878909382518043,
|
|
"grad_norm": 0.5735536579762645,
|
|
"learning_rate": 1.9736276893712954e-05,
|
|
"loss": 0.2792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10921778529882431,
|
|
"step": 2420,
|
|
"valid_targets_mean": 2331.6,
|
|
"valid_targets_min": 1712
|
|
},
|
|
{
|
|
"epoch": 3.886928628708901,
|
|
"grad_norm": 0.8290520620887073,
|
|
"learning_rate": 1.9656367751159565e-05,
|
|
"loss": 0.3029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16875413060188293,
|
|
"step": 2425,
|
|
"valid_targets_mean": 5420.2,
|
|
"valid_targets_min": 942
|
|
},
|
|
{
|
|
"epoch": 3.8949478748997595,
|
|
"grad_norm": 0.6634118467936565,
|
|
"learning_rate": 1.957646409550565e-05,
|
|
"loss": 0.3406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.162868469953537,
|
|
"step": 2430,
|
|
"valid_targets_mean": 3375.5,
|
|
"valid_targets_min": 1776
|
|
},
|
|
{
|
|
"epoch": 3.9029671210906174,
|
|
"grad_norm": 0.5514424320141615,
|
|
"learning_rate": 1.9496567202601545e-05,
|
|
"loss": 0.3416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11645983159542084,
|
|
"step": 2435,
|
|
"valid_targets_mean": 4063.9,
|
|
"valid_targets_min": 1093
|
|
},
|
|
{
|
|
"epoch": 3.9109863672814758,
|
|
"grad_norm": 0.7559637270619394,
|
|
"learning_rate": 1.9416678348189627e-05,
|
|
"loss": 0.3209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13118937611579895,
|
|
"step": 2440,
|
|
"valid_targets_mean": 2165.9,
|
|
"valid_targets_min": 1256
|
|
},
|
|
{
|
|
"epoch": 3.9190056134723337,
|
|
"grad_norm": 0.5721686798208526,
|
|
"learning_rate": 1.9336798807883907e-05,
|
|
"loss": 0.3319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15008927881717682,
|
|
"step": 2445,
|
|
"valid_targets_mean": 4456.1,
|
|
"valid_targets_min": 1478
|
|
},
|
|
{
|
|
"epoch": 3.9270248596631916,
|
|
"grad_norm": 0.6746770874611306,
|
|
"learning_rate": 1.9256929857149686e-05,
|
|
"loss": 0.3254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1619241088628769,
|
|
"step": 2450,
|
|
"valid_targets_mean": 4323.2,
|
|
"valid_targets_min": 360
|
|
},
|
|
{
|
|
"epoch": 3.9350441058540495,
|
|
"grad_norm": 0.4588461813998274,
|
|
"learning_rate": 1.9177072771283167e-05,
|
|
"loss": 0.3064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12452046573162079,
|
|
"step": 2455,
|
|
"valid_targets_mean": 6464.2,
|
|
"valid_targets_min": 1916
|
|
},
|
|
{
|
|
"epoch": 3.943063352044908,
|
|
"grad_norm": 0.7611903980866798,
|
|
"learning_rate": 1.9097228825391087e-05,
|
|
"loss": 0.3234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12964674830436707,
|
|
"step": 2460,
|
|
"valid_targets_mean": 2518.0,
|
|
"valid_targets_min": 1172
|
|
},
|
|
{
|
|
"epoch": 3.951082598235766,
|
|
"grad_norm": 0.5770073146825253,
|
|
"learning_rate": 1.9017399294370413e-05,
|
|
"loss": 0.3004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18876230716705322,
|
|
"step": 2465,
|
|
"valid_targets_mean": 5568.9,
|
|
"valid_targets_min": 1405
|
|
},
|
|
{
|
|
"epoch": 3.959101844426624,
|
|
"grad_norm": 0.5817238464553584,
|
|
"learning_rate": 1.893758545288791e-05,
|
|
"loss": 0.2867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1423710584640503,
|
|
"step": 2470,
|
|
"valid_targets_mean": 4295.6,
|
|
"valid_targets_min": 1645
|
|
},
|
|
{
|
|
"epoch": 3.967121090617482,
|
|
"grad_norm": 0.6317461077406834,
|
|
"learning_rate": 1.8857788575359847e-05,
|
|
"loss": 0.3128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16364887356758118,
|
|
"step": 2475,
|
|
"valid_targets_mean": 3607.5,
|
|
"valid_targets_min": 1407
|
|
},
|
|
{
|
|
"epoch": 3.97514033680834,
|
|
"grad_norm": 0.8827850222282876,
|
|
"learning_rate": 1.87780099359316e-05,
|
|
"loss": 0.2975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.185146763920784,
|
|
"step": 2480,
|
|
"valid_targets_mean": 2134.6,
|
|
"valid_targets_min": 1129
|
|
},
|
|
{
|
|
"epoch": 3.983159582999198,
|
|
"grad_norm": 0.6645940380342159,
|
|
"learning_rate": 1.869825080845734e-05,
|
|
"loss": 0.306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10959568619728088,
|
|
"step": 2485,
|
|
"valid_targets_mean": 3266.5,
|
|
"valid_targets_min": 932
|
|
},
|
|
{
|
|
"epoch": 3.9911788291900563,
|
|
"grad_norm": 0.6186510386974092,
|
|
"learning_rate": 1.8618512466479686e-05,
|
|
"loss": 0.3092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15738460421562195,
|
|
"step": 2490,
|
|
"valid_targets_mean": 3458.0,
|
|
"valid_targets_min": 1685
|
|
},
|
|
{
|
|
"epoch": 3.999198075380914,
|
|
"grad_norm": 0.6807919040257233,
|
|
"learning_rate": 1.8538796183209373e-05,
|
|
"loss": 0.2979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1197328045964241,
|
|
"step": 2495,
|
|
"valid_targets_mean": 2409.2,
|
|
"valid_targets_min": 725
|
|
},
|
|
{
|
|
"epoch": 4.006415396952686,
|
|
"grad_norm": 0.7664612910352315,
|
|
"learning_rate": 1.845910323150491e-05,
|
|
"loss": 0.2998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12675949931144714,
|
|
"step": 2500,
|
|
"valid_targets_mean": 2815.8,
|
|
"valid_targets_min": 1370
|
|
},
|
|
{
|
|
"epoch": 4.014434643143544,
|
|
"grad_norm": 0.6249341429920836,
|
|
"learning_rate": 1.8379434883852255e-05,
|
|
"loss": 0.3047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1351551115512848,
|
|
"step": 2505,
|
|
"valid_targets_mean": 3788.1,
|
|
"valid_targets_min": 1445
|
|
},
|
|
{
|
|
"epoch": 4.022453889334402,
|
|
"grad_norm": 0.6169426406721958,
|
|
"learning_rate": 1.8299792412344524e-05,
|
|
"loss": 0.2996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19960349798202515,
|
|
"step": 2510,
|
|
"valid_targets_mean": 5476.1,
|
|
"valid_targets_min": 1675
|
|
},
|
|
{
|
|
"epoch": 4.030473135525261,
|
|
"grad_norm": 0.6980740210909196,
|
|
"learning_rate": 1.8220177088661635e-05,
|
|
"loss": 0.292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1375872790813446,
|
|
"step": 2515,
|
|
"valid_targets_mean": 3389.1,
|
|
"valid_targets_min": 1106
|
|
},
|
|
{
|
|
"epoch": 4.038492381716119,
|
|
"grad_norm": 0.6326648876922993,
|
|
"learning_rate": 1.814059018405004e-05,
|
|
"loss": 0.2739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2054217904806137,
|
|
"step": 2520,
|
|
"valid_targets_mean": 4802.5,
|
|
"valid_targets_min": 1526
|
|
},
|
|
{
|
|
"epoch": 4.046511627906977,
|
|
"grad_norm": 0.5533039222556833,
|
|
"learning_rate": 1.806103296930243e-05,
|
|
"loss": 0.2847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12456436455249786,
|
|
"step": 2525,
|
|
"valid_targets_mean": 4612.8,
|
|
"valid_targets_min": 917
|
|
},
|
|
{
|
|
"epoch": 4.054530874097835,
|
|
"grad_norm": 0.7546658667065802,
|
|
"learning_rate": 1.7981506714737392e-05,
|
|
"loss": 0.297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1207520067691803,
|
|
"step": 2530,
|
|
"valid_targets_mean": 2114.4,
|
|
"valid_targets_min": 1501
|
|
},
|
|
{
|
|
"epoch": 4.062550120288693,
|
|
"grad_norm": 0.7903494861888394,
|
|
"learning_rate": 1.7902012690179188e-05,
|
|
"loss": 0.2944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17907792329788208,
|
|
"step": 2535,
|
|
"valid_targets_mean": 2663.4,
|
|
"valid_targets_min": 1352
|
|
},
|
|
{
|
|
"epoch": 4.070569366479551,
|
|
"grad_norm": 0.5392010413967019,
|
|
"learning_rate": 1.7822552164937437e-05,
|
|
"loss": 0.2722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1764984130859375,
|
|
"step": 2540,
|
|
"valid_targets_mean": 6216.5,
|
|
"valid_targets_min": 1486
|
|
},
|
|
{
|
|
"epoch": 4.078588612670409,
|
|
"grad_norm": 0.6218958225310146,
|
|
"learning_rate": 1.7743126407786873e-05,
|
|
"loss": 0.2998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12865877151489258,
|
|
"step": 2545,
|
|
"valid_targets_mean": 3260.6,
|
|
"valid_targets_min": 1072
|
|
},
|
|
{
|
|
"epoch": 4.086607858861267,
|
|
"grad_norm": 0.6988079855685614,
|
|
"learning_rate": 1.766373668694707e-05,
|
|
"loss": 0.2939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18029667437076569,
|
|
"step": 2550,
|
|
"valid_targets_mean": 4027.5,
|
|
"valid_targets_min": 1291
|
|
},
|
|
{
|
|
"epoch": 4.094627105052125,
|
|
"grad_norm": 0.6850210709213693,
|
|
"learning_rate": 1.7584384270062195e-05,
|
|
"loss": 0.2788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1642959713935852,
|
|
"step": 2555,
|
|
"valid_targets_mean": 3666.5,
|
|
"valid_targets_min": 1856
|
|
},
|
|
{
|
|
"epoch": 4.102646351242983,
|
|
"grad_norm": 0.7292850754819737,
|
|
"learning_rate": 1.7505070424180772e-05,
|
|
"loss": 0.2918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1267768293619156,
|
|
"step": 2560,
|
|
"valid_targets_mean": 3086.2,
|
|
"valid_targets_min": 2045
|
|
},
|
|
{
|
|
"epoch": 4.110665597433841,
|
|
"grad_norm": 0.5967251682445334,
|
|
"learning_rate": 1.7425796415735454e-05,
|
|
"loss": 0.2875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1499030888080597,
|
|
"step": 2565,
|
|
"valid_targets_mean": 5102.1,
|
|
"valid_targets_min": 1401
|
|
},
|
|
{
|
|
"epoch": 4.118684843624699,
|
|
"grad_norm": 0.6571378352373214,
|
|
"learning_rate": 1.7346563510522783e-05,
|
|
"loss": 0.2942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16292981803417206,
|
|
"step": 2570,
|
|
"valid_targets_mean": 4743.0,
|
|
"valid_targets_min": 1578
|
|
},
|
|
{
|
|
"epoch": 4.126704089815557,
|
|
"grad_norm": 0.675497128387472,
|
|
"learning_rate": 1.7267372973682998e-05,
|
|
"loss": 0.2799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1479819118976593,
|
|
"step": 2575,
|
|
"valid_targets_mean": 3963.6,
|
|
"valid_targets_min": 1729
|
|
},
|
|
{
|
|
"epoch": 4.134723336006416,
|
|
"grad_norm": 0.7326592923300912,
|
|
"learning_rate": 1.7188226069679834e-05,
|
|
"loss": 0.3031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19977575540542603,
|
|
"step": 2580,
|
|
"valid_targets_mean": 3865.2,
|
|
"valid_targets_min": 1590
|
|
},
|
|
{
|
|
"epoch": 4.142742582197274,
|
|
"grad_norm": 0.6873671648413628,
|
|
"learning_rate": 1.7109124062280307e-05,
|
|
"loss": 0.3447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14063113927841187,
|
|
"step": 2585,
|
|
"valid_targets_mean": 4487.2,
|
|
"valid_targets_min": 692
|
|
},
|
|
{
|
|
"epoch": 4.150761828388132,
|
|
"grad_norm": 0.6095576855334193,
|
|
"learning_rate": 1.7030068214534567e-05,
|
|
"loss": 0.3086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23792943358421326,
|
|
"step": 2590,
|
|
"valid_targets_mean": 5420.4,
|
|
"valid_targets_min": 1196
|
|
},
|
|
{
|
|
"epoch": 4.1587810745789895,
|
|
"grad_norm": 0.5830732903880987,
|
|
"learning_rate": 1.695105978875572e-05,
|
|
"loss": 0.3001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23750761151313782,
|
|
"step": 2595,
|
|
"valid_targets_mean": 6366.0,
|
|
"valid_targets_min": 1360
|
|
},
|
|
{
|
|
"epoch": 4.166800320769847,
|
|
"grad_norm": 1.3716635476926997,
|
|
"learning_rate": 1.687210004649965e-05,
|
|
"loss": 0.2893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16633760929107666,
|
|
"step": 2600,
|
|
"valid_targets_mean": 3487.9,
|
|
"valid_targets_min": 1588
|
|
},
|
|
{
|
|
"epoch": 4.174819566960705,
|
|
"grad_norm": 0.5938899390350082,
|
|
"learning_rate": 1.679319024854491e-05,
|
|
"loss": 0.2881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11015024781227112,
|
|
"step": 2605,
|
|
"valid_targets_mean": 3193.2,
|
|
"valid_targets_min": 2061
|
|
},
|
|
{
|
|
"epoch": 4.182838813151564,
|
|
"grad_norm": 0.6326797027989053,
|
|
"learning_rate": 1.6714331654872564e-05,
|
|
"loss": 0.2857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11679890751838684,
|
|
"step": 2610,
|
|
"valid_targets_mean": 3417.5,
|
|
"valid_targets_min": 1365
|
|
},
|
|
{
|
|
"epoch": 4.190858059342422,
|
|
"grad_norm": 0.7312342864342426,
|
|
"learning_rate": 1.663552552464609e-05,
|
|
"loss": 0.2986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18835344910621643,
|
|
"step": 2615,
|
|
"valid_targets_mean": 4148.8,
|
|
"valid_targets_min": 1673
|
|
},
|
|
{
|
|
"epoch": 4.19887730553328,
|
|
"grad_norm": 0.7173472704001893,
|
|
"learning_rate": 1.6556773116191257e-05,
|
|
"loss": 0.2852,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20450419187545776,
|
|
"step": 2620,
|
|
"valid_targets_mean": 3396.1,
|
|
"valid_targets_min": 1668
|
|
},
|
|
{
|
|
"epoch": 4.206896551724138,
|
|
"grad_norm": 0.77329570396891,
|
|
"learning_rate": 1.647807568697603e-05,
|
|
"loss": 0.2996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11599031090736389,
|
|
"step": 2625,
|
|
"valid_targets_mean": 2351.6,
|
|
"valid_targets_min": 1421
|
|
},
|
|
{
|
|
"epoch": 4.214915797914996,
|
|
"grad_norm": 0.5845000243505154,
|
|
"learning_rate": 1.6399434493590524e-05,
|
|
"loss": 0.2922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09674696624279022,
|
|
"step": 2630,
|
|
"valid_targets_mean": 4645.5,
|
|
"valid_targets_min": 989
|
|
},
|
|
{
|
|
"epoch": 4.222935044105854,
|
|
"grad_norm": 0.6011721501271884,
|
|
"learning_rate": 1.6320850791726884e-05,
|
|
"loss": 0.3061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12517546117305756,
|
|
"step": 2635,
|
|
"valid_targets_mean": 4551.5,
|
|
"valid_targets_min": 1533
|
|
},
|
|
{
|
|
"epoch": 4.230954290296712,
|
|
"grad_norm": 0.6399303295964973,
|
|
"learning_rate": 1.6242325836159304e-05,
|
|
"loss": 0.2878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08618373423814774,
|
|
"step": 2640,
|
|
"valid_targets_mean": 3065.9,
|
|
"valid_targets_min": 1593
|
|
},
|
|
{
|
|
"epoch": 4.2389735364875705,
|
|
"grad_norm": 0.6249080756872688,
|
|
"learning_rate": 1.6163860880723923e-05,
|
|
"loss": 0.2857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1666812002658844,
|
|
"step": 2645,
|
|
"valid_targets_mean": 5007.8,
|
|
"valid_targets_min": 1590
|
|
},
|
|
{
|
|
"epoch": 4.246992782678428,
|
|
"grad_norm": 0.7629237085664993,
|
|
"learning_rate": 1.6085457178298866e-05,
|
|
"loss": 0.2786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20086871087551117,
|
|
"step": 2650,
|
|
"valid_targets_mean": 3285.4,
|
|
"valid_targets_min": 1072
|
|
},
|
|
{
|
|
"epoch": 4.255012028869286,
|
|
"grad_norm": 0.6291560094829501,
|
|
"learning_rate": 1.6007115980784182e-05,
|
|
"loss": 0.2894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14886760711669922,
|
|
"step": 2655,
|
|
"valid_targets_mean": 3889.0,
|
|
"valid_targets_min": 984
|
|
},
|
|
{
|
|
"epoch": 4.263031275060144,
|
|
"grad_norm": 0.592396380167956,
|
|
"learning_rate": 1.592883853908188e-05,
|
|
"loss": 0.2856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16692480444908142,
|
|
"step": 2660,
|
|
"valid_targets_mean": 5258.0,
|
|
"valid_targets_min": 1757
|
|
},
|
|
{
|
|
"epoch": 4.271050521251002,
|
|
"grad_norm": 0.614901407527612,
|
|
"learning_rate": 1.585062610307599e-05,
|
|
"loss": 0.2745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12212534993886948,
|
|
"step": 2665,
|
|
"valid_targets_mean": 3828.4,
|
|
"valid_targets_min": 1512
|
|
},
|
|
{
|
|
"epoch": 4.27906976744186,
|
|
"grad_norm": 0.539175507420039,
|
|
"learning_rate": 1.5772479921612543e-05,
|
|
"loss": 0.2961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1522282361984253,
|
|
"step": 2670,
|
|
"valid_targets_mean": 5116.0,
|
|
"valid_targets_min": 2192
|
|
},
|
|
{
|
|
"epoch": 4.287089013632719,
|
|
"grad_norm": 0.7880906493055246,
|
|
"learning_rate": 1.5694401242479677e-05,
|
|
"loss": 0.2858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14232610166072845,
|
|
"step": 2675,
|
|
"valid_targets_mean": 2413.8,
|
|
"valid_targets_min": 947
|
|
},
|
|
{
|
|
"epoch": 4.295108259823577,
|
|
"grad_norm": 0.7072732280589041,
|
|
"learning_rate": 1.5616391312387683e-05,
|
|
"loss": 0.2819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12438453733921051,
|
|
"step": 2680,
|
|
"valid_targets_mean": 2989.5,
|
|
"valid_targets_min": 1407
|
|
},
|
|
{
|
|
"epoch": 4.303127506014435,
|
|
"grad_norm": 0.6071341451929586,
|
|
"learning_rate": 1.5538451376949106e-05,
|
|
"loss": 0.2809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16311699151992798,
|
|
"step": 2685,
|
|
"valid_targets_mean": 3806.5,
|
|
"valid_targets_min": 2208
|
|
},
|
|
{
|
|
"epoch": 4.311146752205293,
|
|
"grad_norm": 0.6800851010260914,
|
|
"learning_rate": 1.5460582680658888e-05,
|
|
"loss": 0.2735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16989164054393768,
|
|
"step": 2690,
|
|
"valid_targets_mean": 3681.4,
|
|
"valid_targets_min": 1333
|
|
},
|
|
{
|
|
"epoch": 4.319165998396151,
|
|
"grad_norm": 0.48556758234567987,
|
|
"learning_rate": 1.5382786466874446e-05,
|
|
"loss": 0.3001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14437130093574524,
|
|
"step": 2695,
|
|
"valid_targets_mean": 4970.8,
|
|
"valid_targets_min": 1122
|
|
},
|
|
{
|
|
"epoch": 4.3271852445870085,
|
|
"grad_norm": 0.6577443016532359,
|
|
"learning_rate": 1.5305063977795856e-05,
|
|
"loss": 0.3022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11139737069606781,
|
|
"step": 2700,
|
|
"valid_targets_mean": 2757.2,
|
|
"valid_targets_min": 1281
|
|
},
|
|
{
|
|
"epoch": 4.335204490777867,
|
|
"grad_norm": 0.6182840648525525,
|
|
"learning_rate": 1.5227416454445995e-05,
|
|
"loss": 0.2799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12627378106117249,
|
|
"step": 2705,
|
|
"valid_targets_mean": 3772.1,
|
|
"valid_targets_min": 1144
|
|
},
|
|
{
|
|
"epoch": 4.343223736968725,
|
|
"grad_norm": 0.6243168028194502,
|
|
"learning_rate": 1.5149845136650748e-05,
|
|
"loss": 0.2998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12724119424819946,
|
|
"step": 2710,
|
|
"valid_targets_mean": 3995.0,
|
|
"valid_targets_min": 1201
|
|
},
|
|
{
|
|
"epoch": 4.351242983159583,
|
|
"grad_norm": 0.6417026983652291,
|
|
"learning_rate": 1.5072351263019177e-05,
|
|
"loss": 0.2763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1036786288022995,
|
|
"step": 2715,
|
|
"valid_targets_mean": 3606.6,
|
|
"valid_targets_min": 1458
|
|
},
|
|
{
|
|
"epoch": 4.359262229350441,
|
|
"grad_norm": 0.844392440282072,
|
|
"learning_rate": 1.4994936070923784e-05,
|
|
"loss": 0.3144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32359081506729126,
|
|
"step": 2720,
|
|
"valid_targets_mean": 4454.1,
|
|
"valid_targets_min": 1068
|
|
},
|
|
{
|
|
"epoch": 4.367281475541299,
|
|
"grad_norm": 0.7084510197391164,
|
|
"learning_rate": 1.4917600796480745e-05,
|
|
"loss": 0.2827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1323051005601883,
|
|
"step": 2725,
|
|
"valid_targets_mean": 2887.8,
|
|
"valid_targets_min": 1694
|
|
},
|
|
{
|
|
"epoch": 4.375300721732157,
|
|
"grad_norm": 0.6594726779847885,
|
|
"learning_rate": 1.4840346674530122e-05,
|
|
"loss": 0.3044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1922025829553604,
|
|
"step": 2730,
|
|
"valid_targets_mean": 4383.1,
|
|
"valid_targets_min": 1635
|
|
},
|
|
{
|
|
"epoch": 4.383319967923015,
|
|
"grad_norm": 0.6820193540630956,
|
|
"learning_rate": 1.4763174938616232e-05,
|
|
"loss": 0.2756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14726585149765015,
|
|
"step": 2735,
|
|
"valid_targets_mean": 3319.2,
|
|
"valid_targets_min": 1885
|
|
},
|
|
{
|
|
"epoch": 4.391339214113874,
|
|
"grad_norm": 0.6015328925160687,
|
|
"learning_rate": 1.4686086820967865e-05,
|
|
"loss": 0.3106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.162798672914505,
|
|
"step": 2740,
|
|
"valid_targets_mean": 4476.4,
|
|
"valid_targets_min": 1696
|
|
},
|
|
{
|
|
"epoch": 4.3993584603047315,
|
|
"grad_norm": 0.8278448591901193,
|
|
"learning_rate": 1.460908355247868e-05,
|
|
"loss": 0.3411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14701534807682037,
|
|
"step": 2745,
|
|
"valid_targets_mean": 2546.6,
|
|
"valid_targets_min": 1403
|
|
},
|
|
{
|
|
"epoch": 4.4073777064955895,
|
|
"grad_norm": 0.7021457185806277,
|
|
"learning_rate": 1.4532166362687507e-05,
|
|
"loss": 0.2801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25253719091415405,
|
|
"step": 2750,
|
|
"valid_targets_mean": 4316.8,
|
|
"valid_targets_min": 2207
|
|
},
|
|
{
|
|
"epoch": 4.415396952686447,
|
|
"grad_norm": 0.7067776926843179,
|
|
"learning_rate": 1.445533647975871e-05,
|
|
"loss": 0.2978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16636572778224945,
|
|
"step": 2755,
|
|
"valid_targets_mean": 3728.9,
|
|
"valid_targets_min": 1401
|
|
},
|
|
{
|
|
"epoch": 4.423416198877305,
|
|
"grad_norm": 0.553065216859592,
|
|
"learning_rate": 1.437859513046263e-05,
|
|
"loss": 0.2908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22052179276943207,
|
|
"step": 2760,
|
|
"valid_targets_mean": 7221.8,
|
|
"valid_targets_min": 2063
|
|
},
|
|
{
|
|
"epoch": 4.431435445068163,
|
|
"grad_norm": 0.7333787563498618,
|
|
"learning_rate": 1.4301943540155914e-05,
|
|
"loss": 0.3001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14278584718704224,
|
|
"step": 2765,
|
|
"valid_targets_mean": 3379.8,
|
|
"valid_targets_min": 1436
|
|
},
|
|
{
|
|
"epoch": 4.439454691259022,
|
|
"grad_norm": 0.7014878612639377,
|
|
"learning_rate": 1.4225382932762033e-05,
|
|
"loss": 0.2778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12255248427391052,
|
|
"step": 2770,
|
|
"valid_targets_mean": 2985.6,
|
|
"valid_targets_min": 1121
|
|
},
|
|
{
|
|
"epoch": 4.44747393744988,
|
|
"grad_norm": 0.5983108500408503,
|
|
"learning_rate": 1.4148914530751681e-05,
|
|
"loss": 0.2955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15522173047065735,
|
|
"step": 2775,
|
|
"valid_targets_mean": 4806.6,
|
|
"valid_targets_min": 1304
|
|
},
|
|
{
|
|
"epoch": 4.455493183640738,
|
|
"grad_norm": 0.6016213588313895,
|
|
"learning_rate": 1.4072539555123292e-05,
|
|
"loss": 0.2839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11137634515762329,
|
|
"step": 2780,
|
|
"valid_targets_mean": 3612.9,
|
|
"valid_targets_min": 1023
|
|
},
|
|
{
|
|
"epoch": 4.463512429831596,
|
|
"grad_norm": 0.708603157173453,
|
|
"learning_rate": 1.3996259225383514e-05,
|
|
"loss": 0.3017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19193142652511597,
|
|
"step": 2785,
|
|
"valid_targets_mean": 3475.4,
|
|
"valid_targets_min": 1384
|
|
},
|
|
{
|
|
"epoch": 4.471531676022454,
|
|
"grad_norm": 0.6595079377732089,
|
|
"learning_rate": 1.3920074759527737e-05,
|
|
"loss": 0.2925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14381149411201477,
|
|
"step": 2790,
|
|
"valid_targets_mean": 4244.5,
|
|
"valid_targets_min": 1644
|
|
},
|
|
{
|
|
"epoch": 4.479550922213312,
|
|
"grad_norm": 0.682347199348743,
|
|
"learning_rate": 1.3843987374020689e-05,
|
|
"loss": 0.3107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11310821771621704,
|
|
"step": 2795,
|
|
"valid_targets_mean": 2818.0,
|
|
"valid_targets_min": 1144
|
|
},
|
|
{
|
|
"epoch": 4.4875701684041704,
|
|
"grad_norm": 0.673708092784138,
|
|
"learning_rate": 1.376799828377696e-05,
|
|
"loss": 0.2811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1609186977148056,
|
|
"step": 2800,
|
|
"valid_targets_mean": 4392.5,
|
|
"valid_targets_min": 1682
|
|
},
|
|
{
|
|
"epoch": 4.495589414595028,
|
|
"grad_norm": 0.7701654224555916,
|
|
"learning_rate": 1.3692108702141642e-05,
|
|
"loss": 0.2948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16439709067344666,
|
|
"step": 2805,
|
|
"valid_targets_mean": 3979.4,
|
|
"valid_targets_min": 1068
|
|
},
|
|
{
|
|
"epoch": 4.503608660785886,
|
|
"grad_norm": 0.747752295214148,
|
|
"learning_rate": 1.361631984087091e-05,
|
|
"loss": 0.2924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19700704514980316,
|
|
"step": 2810,
|
|
"valid_targets_mean": 4090.0,
|
|
"valid_targets_min": 1721
|
|
},
|
|
{
|
|
"epoch": 4.511627906976744,
|
|
"grad_norm": 0.6477208313095743,
|
|
"learning_rate": 1.354063291011273e-05,
|
|
"loss": 0.3032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16184011101722717,
|
|
"step": 2815,
|
|
"valid_targets_mean": 4613.4,
|
|
"valid_targets_min": 1128
|
|
},
|
|
{
|
|
"epoch": 4.519647153167602,
|
|
"grad_norm": 0.6733680816426071,
|
|
"learning_rate": 1.3465049118387486e-05,
|
|
"loss": 0.2851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10472169518470764,
|
|
"step": 2820,
|
|
"valid_targets_mean": 2796.8,
|
|
"valid_targets_min": 851
|
|
},
|
|
{
|
|
"epoch": 4.52766639935846,
|
|
"grad_norm": 0.6424194689152843,
|
|
"learning_rate": 1.3389569672568707e-05,
|
|
"loss": 0.2667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13144317269325256,
|
|
"step": 2825,
|
|
"valid_targets_mean": 3813.0,
|
|
"valid_targets_min": 1258
|
|
},
|
|
{
|
|
"epoch": 4.535685645549318,
|
|
"grad_norm": 0.711628727877956,
|
|
"learning_rate": 1.331419577786381e-05,
|
|
"loss": 0.3032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1445176601409912,
|
|
"step": 2830,
|
|
"valid_targets_mean": 3091.0,
|
|
"valid_targets_min": 1269
|
|
},
|
|
{
|
|
"epoch": 4.543704891740177,
|
|
"grad_norm": 0.5858426317676418,
|
|
"learning_rate": 1.3238928637794816e-05,
|
|
"loss": 0.3152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1151544451713562,
|
|
"step": 2835,
|
|
"valid_targets_mean": 3128.2,
|
|
"valid_targets_min": 1702
|
|
},
|
|
{
|
|
"epoch": 4.551724137931035,
|
|
"grad_norm": 0.5285642967965074,
|
|
"learning_rate": 1.3163769454179183e-05,
|
|
"loss": 0.3059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11126041412353516,
|
|
"step": 2840,
|
|
"valid_targets_mean": 4345.5,
|
|
"valid_targets_min": 1215
|
|
},
|
|
{
|
|
"epoch": 4.559743384121893,
|
|
"grad_norm": 0.6880573162367785,
|
|
"learning_rate": 1.3088719427110552e-05,
|
|
"loss": 0.2954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17842459678649902,
|
|
"step": 2845,
|
|
"valid_targets_mean": 3686.1,
|
|
"valid_targets_min": 1141
|
|
},
|
|
{
|
|
"epoch": 4.5677626303127505,
|
|
"grad_norm": 0.5738068890763433,
|
|
"learning_rate": 1.3013779754939666e-05,
|
|
"loss": 0.2609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08108074963092804,
|
|
"step": 2850,
|
|
"valid_targets_mean": 3041.1,
|
|
"valid_targets_min": 1131
|
|
},
|
|
{
|
|
"epoch": 4.5757818765036085,
|
|
"grad_norm": 0.6727608318013361,
|
|
"learning_rate": 1.2938951634255164e-05,
|
|
"loss": 0.3086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14837145805358887,
|
|
"step": 2855,
|
|
"valid_targets_mean": 3484.1,
|
|
"valid_targets_min": 781
|
|
},
|
|
{
|
|
"epoch": 4.583801122694466,
|
|
"grad_norm": 0.7491945483704048,
|
|
"learning_rate": 1.2864236259864495e-05,
|
|
"loss": 0.2927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17192235589027405,
|
|
"step": 2860,
|
|
"valid_targets_mean": 3492.8,
|
|
"valid_targets_min": 1388
|
|
},
|
|
{
|
|
"epoch": 4.591820368885324,
|
|
"grad_norm": 0.7333536011748594,
|
|
"learning_rate": 1.2789634824774887e-05,
|
|
"loss": 0.2812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12326601147651672,
|
|
"step": 2865,
|
|
"valid_targets_mean": 2463.1,
|
|
"valid_targets_min": 849
|
|
},
|
|
{
|
|
"epoch": 4.599839615076183,
|
|
"grad_norm": 0.6618282090733283,
|
|
"learning_rate": 1.2715148520174206e-05,
|
|
"loss": 0.3116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13372093439102173,
|
|
"step": 2870,
|
|
"valid_targets_mean": 3236.8,
|
|
"valid_targets_min": 1867
|
|
},
|
|
{
|
|
"epoch": 4.607858861267041,
|
|
"grad_norm": 0.5773713859304231,
|
|
"learning_rate": 1.2640778535412036e-05,
|
|
"loss": 0.2774,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1293078362941742,
|
|
"step": 2875,
|
|
"valid_targets_mean": 4062.0,
|
|
"valid_targets_min": 1491
|
|
},
|
|
{
|
|
"epoch": 4.615878107457899,
|
|
"grad_norm": 0.5752357451602147,
|
|
"learning_rate": 1.2566526057980608e-05,
|
|
"loss": 0.2874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14979536831378937,
|
|
"step": 2880,
|
|
"valid_targets_mean": 6125.9,
|
|
"valid_targets_min": 1865
|
|
},
|
|
{
|
|
"epoch": 4.623897353648757,
|
|
"grad_norm": 0.718233009091973,
|
|
"learning_rate": 1.2492392273495879e-05,
|
|
"loss": 0.2886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15855081379413605,
|
|
"step": 2885,
|
|
"valid_targets_mean": 3574.5,
|
|
"valid_targets_min": 1157
|
|
},
|
|
{
|
|
"epoch": 4.631916599839615,
|
|
"grad_norm": 0.6385022169911636,
|
|
"learning_rate": 1.2418378365678612e-05,
|
|
"loss": 0.3064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11923573911190033,
|
|
"step": 2890,
|
|
"valid_targets_mean": 3496.6,
|
|
"valid_targets_min": 1233
|
|
},
|
|
{
|
|
"epoch": 4.639935846030474,
|
|
"grad_norm": 0.6741105564550025,
|
|
"learning_rate": 1.234448551633542e-05,
|
|
"loss": 0.2998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13079974055290222,
|
|
"step": 2895,
|
|
"valid_targets_mean": 3383.1,
|
|
"valid_targets_min": 1756
|
|
},
|
|
{
|
|
"epoch": 4.6479550922213315,
|
|
"grad_norm": 0.6731032246078844,
|
|
"learning_rate": 1.2270714905339969e-05,
|
|
"loss": 0.2882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13784664869308472,
|
|
"step": 2900,
|
|
"valid_targets_mean": 3501.4,
|
|
"valid_targets_min": 1172
|
|
},
|
|
{
|
|
"epoch": 4.655974338412189,
|
|
"grad_norm": 0.5459817806212016,
|
|
"learning_rate": 1.2197067710614075e-05,
|
|
"loss": 0.2899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1550457924604416,
|
|
"step": 2905,
|
|
"valid_targets_mean": 6346.2,
|
|
"valid_targets_min": 1452
|
|
},
|
|
{
|
|
"epoch": 4.663993584603047,
|
|
"grad_norm": 0.6580847788724664,
|
|
"learning_rate": 1.2123545108108943e-05,
|
|
"loss": 0.2926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14394521713256836,
|
|
"step": 2910,
|
|
"valid_targets_mean": 3643.9,
|
|
"valid_targets_min": 1347
|
|
},
|
|
{
|
|
"epoch": 4.672012830793905,
|
|
"grad_norm": 0.5599219227017097,
|
|
"learning_rate": 1.2050148271786348e-05,
|
|
"loss": 0.2973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20623619854450226,
|
|
"step": 2915,
|
|
"valid_targets_mean": 5556.8,
|
|
"valid_targets_min": 2400
|
|
},
|
|
{
|
|
"epoch": 4.680032076984763,
|
|
"grad_norm": 0.664894094376875,
|
|
"learning_rate": 1.1976878373599928e-05,
|
|
"loss": 0.278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09952618926763535,
|
|
"step": 2920,
|
|
"valid_targets_mean": 2577.0,
|
|
"valid_targets_min": 1282
|
|
},
|
|
{
|
|
"epoch": 4.688051323175621,
|
|
"grad_norm": 0.7111094446633587,
|
|
"learning_rate": 1.1903736583476441e-05,
|
|
"loss": 0.299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12671411037445068,
|
|
"step": 2925,
|
|
"valid_targets_mean": 3261.9,
|
|
"valid_targets_min": 1935
|
|
},
|
|
{
|
|
"epoch": 4.69607056936648,
|
|
"grad_norm": 0.5847651766990055,
|
|
"learning_rate": 1.1830724069297106e-05,
|
|
"loss": 0.3046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1780378818511963,
|
|
"step": 2930,
|
|
"valid_targets_mean": 5556.0,
|
|
"valid_targets_min": 1506
|
|
},
|
|
{
|
|
"epoch": 4.704089815557338,
|
|
"grad_norm": 0.5177468999912241,
|
|
"learning_rate": 1.1757841996878957e-05,
|
|
"loss": 0.2913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16140317916870117,
|
|
"step": 2935,
|
|
"valid_targets_mean": 7164.5,
|
|
"valid_targets_min": 1599
|
|
},
|
|
{
|
|
"epoch": 4.712109061748196,
|
|
"grad_norm": 0.6591016413495835,
|
|
"learning_rate": 1.1685091529956187e-05,
|
|
"loss": 0.3122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17742176353931427,
|
|
"step": 2940,
|
|
"valid_targets_mean": 4117.8,
|
|
"valid_targets_min": 1422
|
|
},
|
|
{
|
|
"epoch": 4.720128307939054,
|
|
"grad_norm": 0.5580423423386793,
|
|
"learning_rate": 1.161247383016163e-05,
|
|
"loss": 0.2927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1325514018535614,
|
|
"step": 2945,
|
|
"valid_targets_mean": 4791.0,
|
|
"valid_targets_min": 1252
|
|
},
|
|
{
|
|
"epoch": 4.728147554129912,
|
|
"grad_norm": 0.698865733709361,
|
|
"learning_rate": 1.1539990057008166e-05,
|
|
"loss": 0.2906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18542318046092987,
|
|
"step": 2950,
|
|
"valid_targets_mean": 4443.1,
|
|
"valid_targets_min": 1629
|
|
},
|
|
{
|
|
"epoch": 4.7361668003207695,
|
|
"grad_norm": 0.7173027421251956,
|
|
"learning_rate": 1.1467641367870198e-05,
|
|
"loss": 0.296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12904459238052368,
|
|
"step": 2955,
|
|
"valid_targets_mean": 2821.5,
|
|
"valid_targets_min": 1393
|
|
},
|
|
{
|
|
"epoch": 4.7441860465116275,
|
|
"grad_norm": 0.6104913487741482,
|
|
"learning_rate": 1.1395428917965239e-05,
|
|
"loss": 0.3091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12366154789924622,
|
|
"step": 2960,
|
|
"valid_targets_mean": 3687.4,
|
|
"valid_targets_min": 2668
|
|
},
|
|
{
|
|
"epoch": 4.752205292702486,
|
|
"grad_norm": 0.6573756048140178,
|
|
"learning_rate": 1.1323353860335385e-05,
|
|
"loss": 0.2908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14900854229927063,
|
|
"step": 2965,
|
|
"valid_targets_mean": 3873.4,
|
|
"valid_targets_min": 1095
|
|
},
|
|
{
|
|
"epoch": 4.760224538893344,
|
|
"grad_norm": 0.5454178642416636,
|
|
"learning_rate": 1.1251417345828962e-05,
|
|
"loss": 0.2931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1441696435213089,
|
|
"step": 2970,
|
|
"valid_targets_mean": 4109.6,
|
|
"valid_targets_min": 1283
|
|
},
|
|
{
|
|
"epoch": 4.768243785084202,
|
|
"grad_norm": 1.1921633161653373,
|
|
"learning_rate": 1.1179620523082107e-05,
|
|
"loss": 0.289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18217259645462036,
|
|
"step": 2975,
|
|
"valid_targets_mean": 5515.0,
|
|
"valid_targets_min": 1708
|
|
},
|
|
{
|
|
"epoch": 4.77626303127506,
|
|
"grad_norm": 0.7839193007975936,
|
|
"learning_rate": 1.110796453850047e-05,
|
|
"loss": 0.2827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15419209003448486,
|
|
"step": 2980,
|
|
"valid_targets_mean": 3332.9,
|
|
"valid_targets_min": 897
|
|
},
|
|
{
|
|
"epoch": 4.784282277465918,
|
|
"grad_norm": 0.5924502741680564,
|
|
"learning_rate": 1.1036450536240877e-05,
|
|
"loss": 0.299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17344138026237488,
|
|
"step": 2985,
|
|
"valid_targets_mean": 4609.0,
|
|
"valid_targets_min": 1500
|
|
},
|
|
{
|
|
"epoch": 4.792301523656777,
|
|
"grad_norm": 0.6375960120782795,
|
|
"learning_rate": 1.0965079658193068e-05,
|
|
"loss": 0.307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20021280646324158,
|
|
"step": 2990,
|
|
"valid_targets_mean": 5160.4,
|
|
"valid_targets_min": 2007
|
|
},
|
|
{
|
|
"epoch": 4.800320769847635,
|
|
"grad_norm": 0.7333265185533147,
|
|
"learning_rate": 1.0893853043961475e-05,
|
|
"loss": 0.2999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13082224130630493,
|
|
"step": 2995,
|
|
"valid_targets_mean": 2873.8,
|
|
"valid_targets_min": 961
|
|
},
|
|
{
|
|
"epoch": 4.808340016038493,
|
|
"grad_norm": 0.8117410325104224,
|
|
"learning_rate": 1.0822771830847011e-05,
|
|
"loss": 0.2915,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11903451383113861,
|
|
"step": 3000,
|
|
"valid_targets_mean": 2380.4,
|
|
"valid_targets_min": 1009
|
|
},
|
|
{
|
|
"epoch": 4.8163592622293505,
|
|
"grad_norm": 0.6321140084556661,
|
|
"learning_rate": 1.0751837153828926e-05,
|
|
"loss": 0.3312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13263818621635437,
|
|
"step": 3005,
|
|
"valid_targets_mean": 3883.4,
|
|
"valid_targets_min": 1728
|
|
},
|
|
{
|
|
"epoch": 4.824378508420208,
|
|
"grad_norm": 0.5936225667367856,
|
|
"learning_rate": 1.0681050145546666e-05,
|
|
"loss": 0.2935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10457971692085266,
|
|
"step": 3010,
|
|
"valid_targets_mean": 2851.5,
|
|
"valid_targets_min": 1276
|
|
},
|
|
{
|
|
"epoch": 4.832397754611066,
|
|
"grad_norm": 0.747286134721582,
|
|
"learning_rate": 1.0610411936281801e-05,
|
|
"loss": 0.2976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15670940279960632,
|
|
"step": 3015,
|
|
"valid_targets_mean": 2873.4,
|
|
"valid_targets_min": 1605
|
|
},
|
|
{
|
|
"epoch": 4.840417000801924,
|
|
"grad_norm": 0.5763464609476979,
|
|
"learning_rate": 1.0539923653939978e-05,
|
|
"loss": 0.2869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13804122805595398,
|
|
"step": 3020,
|
|
"valid_targets_mean": 4552.0,
|
|
"valid_targets_min": 1908
|
|
},
|
|
{
|
|
"epoch": 4.848436246992783,
|
|
"grad_norm": 0.6601870709466612,
|
|
"learning_rate": 1.0469586424032903e-05,
|
|
"loss": 0.2967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13651859760284424,
|
|
"step": 3025,
|
|
"valid_targets_mean": 3073.1,
|
|
"valid_targets_min": 1515
|
|
},
|
|
{
|
|
"epoch": 4.856455493183641,
|
|
"grad_norm": 0.6137610848624081,
|
|
"learning_rate": 1.0399401369660369e-05,
|
|
"loss": 0.2923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1242741197347641,
|
|
"step": 3030,
|
|
"valid_targets_mean": 3265.4,
|
|
"valid_targets_min": 1837
|
|
},
|
|
{
|
|
"epoch": 4.864474739374499,
|
|
"grad_norm": 0.5798299445809169,
|
|
"learning_rate": 1.0329369611492334e-05,
|
|
"loss": 0.2971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1235361248254776,
|
|
"step": 3035,
|
|
"valid_targets_mean": 4151.0,
|
|
"valid_targets_min": 1387
|
|
},
|
|
{
|
|
"epoch": 4.872493985565357,
|
|
"grad_norm": 0.7065656337789246,
|
|
"learning_rate": 1.0259492267751022e-05,
|
|
"loss": 0.3057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14881673455238342,
|
|
"step": 3040,
|
|
"valid_targets_mean": 3253.1,
|
|
"valid_targets_min": 1251
|
|
},
|
|
{
|
|
"epoch": 4.880513231756215,
|
|
"grad_norm": 0.7084555430039885,
|
|
"learning_rate": 1.0189770454193052e-05,
|
|
"loss": 0.3146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13089048862457275,
|
|
"step": 3045,
|
|
"valid_targets_mean": 3153.9,
|
|
"valid_targets_min": 1193
|
|
},
|
|
{
|
|
"epoch": 4.888532477947073,
|
|
"grad_norm": 0.6319452934618477,
|
|
"learning_rate": 1.0120205284091673e-05,
|
|
"loss": 0.3106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2694340944290161,
|
|
"step": 3050,
|
|
"valid_targets_mean": 5772.9,
|
|
"valid_targets_min": 1506
|
|
},
|
|
{
|
|
"epoch": 4.896551724137931,
|
|
"grad_norm": 0.7347361203826404,
|
|
"learning_rate": 1.0050797868218907e-05,
|
|
"loss": 0.297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13853907585144043,
|
|
"step": 3055,
|
|
"valid_targets_mean": 2785.8,
|
|
"valid_targets_min": 1055
|
|
},
|
|
{
|
|
"epoch": 4.904570970328789,
|
|
"grad_norm": 0.6858744703319833,
|
|
"learning_rate": 9.981549314827876e-06,
|
|
"loss": 0.288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10932730883359909,
|
|
"step": 3060,
|
|
"valid_targets_mean": 3593.2,
|
|
"valid_targets_min": 1509
|
|
},
|
|
{
|
|
"epoch": 4.912590216519647,
|
|
"grad_norm": 0.7463965117098917,
|
|
"learning_rate": 9.912460729635097e-06,
|
|
"loss": 0.3002,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12026022374629974,
|
|
"step": 3065,
|
|
"valid_targets_mean": 2690.9,
|
|
"valid_targets_min": 1464
|
|
},
|
|
{
|
|
"epoch": 4.920609462710505,
|
|
"grad_norm": 0.6255584551399795,
|
|
"learning_rate": 9.843533215802796e-06,
|
|
"loss": 0.3031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12835003435611725,
|
|
"step": 3070,
|
|
"valid_targets_mean": 4192.9,
|
|
"valid_targets_min": 1599
|
|
},
|
|
{
|
|
"epoch": 4.928628708901363,
|
|
"grad_norm": 0.6195598668042773,
|
|
"learning_rate": 9.774767873921357e-06,
|
|
"loss": 0.2901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15083155035972595,
|
|
"step": 3075,
|
|
"valid_targets_mean": 4717.8,
|
|
"valid_targets_min": 1495
|
|
},
|
|
{
|
|
"epoch": 4.936647955092221,
|
|
"grad_norm": 0.6716462013810224,
|
|
"learning_rate": 9.706165801991651e-06,
|
|
"loss": 0.3178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14034923911094666,
|
|
"step": 3080,
|
|
"valid_targets_mean": 3910.9,
|
|
"valid_targets_min": 1345
|
|
},
|
|
{
|
|
"epoch": 4.944667201283079,
|
|
"grad_norm": 0.6569835195641757,
|
|
"learning_rate": 9.637728095407593e-06,
|
|
"loss": 0.3004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12612038850784302,
|
|
"step": 3085,
|
|
"valid_targets_mean": 3746.1,
|
|
"valid_targets_min": 1577
|
|
},
|
|
{
|
|
"epoch": 4.952686447473938,
|
|
"grad_norm": 0.63238747810288,
|
|
"learning_rate": 9.56945584693861e-06,
|
|
"loss": 0.3078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1501215100288391,
|
|
"step": 3090,
|
|
"valid_targets_mean": 4018.1,
|
|
"valid_targets_min": 1625
|
|
},
|
|
{
|
|
"epoch": 4.960705693664796,
|
|
"grad_norm": 0.7230137708810255,
|
|
"learning_rate": 9.501350146712193e-06,
|
|
"loss": 0.2841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24168968200683594,
|
|
"step": 3095,
|
|
"valid_targets_mean": 4028.8,
|
|
"valid_targets_min": 1868
|
|
},
|
|
{
|
|
"epoch": 4.968724939855654,
|
|
"grad_norm": 0.6319990610308012,
|
|
"learning_rate": 9.433412082196527e-06,
|
|
"loss": 0.3048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12605804204940796,
|
|
"step": 3100,
|
|
"valid_targets_mean": 3486.5,
|
|
"valid_targets_min": 2160
|
|
},
|
|
{
|
|
"epoch": 4.976744186046512,
|
|
"grad_norm": 0.5708536969900101,
|
|
"learning_rate": 9.365642738183044e-06,
|
|
"loss": 0.31,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15920937061309814,
|
|
"step": 3105,
|
|
"valid_targets_mean": 4990.1,
|
|
"valid_targets_min": 1545
|
|
},
|
|
{
|
|
"epoch": 4.9847634322373695,
|
|
"grad_norm": 0.5821919214576043,
|
|
"learning_rate": 9.298043196769217e-06,
|
|
"loss": 0.3137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15933553874492645,
|
|
"step": 3110,
|
|
"valid_targets_mean": 3565.8,
|
|
"valid_targets_min": 1102
|
|
},
|
|
{
|
|
"epoch": 4.992782678428227,
|
|
"grad_norm": 0.6786847357729681,
|
|
"learning_rate": 9.230614537341167e-06,
|
|
"loss": 0.2848,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11117031425237656,
|
|
"step": 3115,
|
|
"valid_targets_mean": 2432.1,
|
|
"valid_targets_min": 1158
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 1.193829679462879,
|
|
"learning_rate": 9.163357836556498e-06,
|
|
"loss": 0.2992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2876368761062622,
|
|
"step": 3120,
|
|
"valid_targets_mean": 2031.5,
|
|
"valid_targets_min": 1107
|
|
},
|
|
{
|
|
"epoch": 5.008019246190858,
|
|
"grad_norm": 0.6109747091355935,
|
|
"learning_rate": 9.096274168327122e-06,
|
|
"loss": 0.2839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12701627612113953,
|
|
"step": 3125,
|
|
"valid_targets_mean": 4074.0,
|
|
"valid_targets_min": 1501
|
|
},
|
|
{
|
|
"epoch": 5.016038492381716,
|
|
"grad_norm": 0.6942276035599934,
|
|
"learning_rate": 9.029364603802017e-06,
|
|
"loss": 0.2734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15716278553009033,
|
|
"step": 3130,
|
|
"valid_targets_mean": 4931.4,
|
|
"valid_targets_min": 1926
|
|
},
|
|
{
|
|
"epoch": 5.024057738572574,
|
|
"grad_norm": 0.759887487112266,
|
|
"learning_rate": 8.962630211350248e-06,
|
|
"loss": 0.283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14439627528190613,
|
|
"step": 3135,
|
|
"valid_targets_mean": 2814.2,
|
|
"valid_targets_min": 1342
|
|
},
|
|
{
|
|
"epoch": 5.032076984763433,
|
|
"grad_norm": 0.599795720615974,
|
|
"learning_rate": 8.89607205654378e-06,
|
|
"loss": 0.272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09469808638095856,
|
|
"step": 3140,
|
|
"valid_targets_mean": 3236.1,
|
|
"valid_targets_min": 1353
|
|
},
|
|
{
|
|
"epoch": 5.0400962309542905,
|
|
"grad_norm": 0.6440035501543131,
|
|
"learning_rate": 8.829691202140591e-06,
|
|
"loss": 0.282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12207989394664764,
|
|
"step": 3145,
|
|
"valid_targets_mean": 3591.8,
|
|
"valid_targets_min": 1063
|
|
},
|
|
{
|
|
"epoch": 5.048115477145148,
|
|
"grad_norm": 0.5238916332457338,
|
|
"learning_rate": 8.763488708067604e-06,
|
|
"loss": 0.2804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19291779398918152,
|
|
"step": 3150,
|
|
"valid_targets_mean": 6842.5,
|
|
"valid_targets_min": 1696
|
|
},
|
|
{
|
|
"epoch": 5.056134723336006,
|
|
"grad_norm": 0.627305644963957,
|
|
"learning_rate": 8.69746563140379e-06,
|
|
"loss": 0.2764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15898439288139343,
|
|
"step": 3155,
|
|
"valid_targets_mean": 5154.2,
|
|
"valid_targets_min": 1161
|
|
},
|
|
{
|
|
"epoch": 5.064153969526864,
|
|
"grad_norm": 0.6325013348334049,
|
|
"learning_rate": 8.631623026363331e-06,
|
|
"loss": 0.2772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17437998950481415,
|
|
"step": 3160,
|
|
"valid_targets_mean": 4743.2,
|
|
"valid_targets_min": 1142
|
|
},
|
|
{
|
|
"epoch": 5.072173215717722,
|
|
"grad_norm": 0.5083461337913077,
|
|
"learning_rate": 8.56596194427873e-06,
|
|
"loss": 0.2767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10465358197689056,
|
|
"step": 3165,
|
|
"valid_targets_mean": 4556.4,
|
|
"valid_targets_min": 1869
|
|
},
|
|
{
|
|
"epoch": 5.080192461908581,
|
|
"grad_norm": 0.6420578870001568,
|
|
"learning_rate": 8.500483433584054e-06,
|
|
"loss": 0.321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14743009209632874,
|
|
"step": 3170,
|
|
"valid_targets_mean": 4409.9,
|
|
"valid_targets_min": 2802
|
|
},
|
|
{
|
|
"epoch": 5.088211708099439,
|
|
"grad_norm": 0.5800945332781065,
|
|
"learning_rate": 8.435188539798187e-06,
|
|
"loss": 0.288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1412481814622879,
|
|
"step": 3175,
|
|
"valid_targets_mean": 5302.6,
|
|
"valid_targets_min": 1094
|
|
},
|
|
{
|
|
"epoch": 5.096230954290297,
|
|
"grad_norm": 0.5487995493169565,
|
|
"learning_rate": 8.370078305508136e-06,
|
|
"loss": 0.259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12816378474235535,
|
|
"step": 3180,
|
|
"valid_targets_mean": 3939.6,
|
|
"valid_targets_min": 2281
|
|
},
|
|
{
|
|
"epoch": 5.104250200481155,
|
|
"grad_norm": 0.7209217869168146,
|
|
"learning_rate": 8.305153770352384e-06,
|
|
"loss": 0.2944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19583243131637573,
|
|
"step": 3185,
|
|
"valid_targets_mean": 4046.2,
|
|
"valid_targets_min": 1050
|
|
},
|
|
{
|
|
"epoch": 5.112269446672013,
|
|
"grad_norm": 0.9153844934361973,
|
|
"learning_rate": 8.240415971004285e-06,
|
|
"loss": 0.3015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16246981918811798,
|
|
"step": 3190,
|
|
"valid_targets_mean": 3443.2,
|
|
"valid_targets_min": 1358
|
|
},
|
|
{
|
|
"epoch": 5.120288692862871,
|
|
"grad_norm": 0.7035173116926543,
|
|
"learning_rate": 8.175865941155525e-06,
|
|
"loss": 0.2898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16265368461608887,
|
|
"step": 3195,
|
|
"valid_targets_mean": 3864.9,
|
|
"valid_targets_min": 1076
|
|
},
|
|
{
|
|
"epoch": 5.1283079390537285,
|
|
"grad_norm": 0.5779187110976927,
|
|
"learning_rate": 8.111504711499598e-06,
|
|
"loss": 0.2671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18262943625450134,
|
|
"step": 3200,
|
|
"valid_targets_mean": 6109.2,
|
|
"valid_targets_min": 1422
|
|
},
|
|
{
|
|
"epoch": 5.136327185244587,
|
|
"grad_norm": 0.7399562834821453,
|
|
"learning_rate": 8.04733330971536e-06,
|
|
"loss": 0.2837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16191628575325012,
|
|
"step": 3205,
|
|
"valid_targets_mean": 3651.8,
|
|
"valid_targets_min": 1524
|
|
},
|
|
{
|
|
"epoch": 5.144346431435445,
|
|
"grad_norm": 0.5525253134787564,
|
|
"learning_rate": 7.983352760450618e-06,
|
|
"loss": 0.2571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0911126509308815,
|
|
"step": 3210,
|
|
"valid_targets_mean": 3176.1,
|
|
"valid_targets_min": 1182
|
|
},
|
|
{
|
|
"epoch": 5.152365677626303,
|
|
"grad_norm": 0.5882943311329101,
|
|
"learning_rate": 7.919564085305768e-06,
|
|
"loss": 0.2739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1868501901626587,
|
|
"step": 3215,
|
|
"valid_targets_mean": 6299.4,
|
|
"valid_targets_min": 1645
|
|
},
|
|
{
|
|
"epoch": 5.160384923817161,
|
|
"grad_norm": 0.6106884984641533,
|
|
"learning_rate": 7.855968302817487e-06,
|
|
"loss": 0.2791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12039339542388916,
|
|
"step": 3220,
|
|
"valid_targets_mean": 3778.2,
|
|
"valid_targets_min": 2175
|
|
},
|
|
{
|
|
"epoch": 5.168404170008019,
|
|
"grad_norm": 0.7089376796426653,
|
|
"learning_rate": 7.792566428442456e-06,
|
|
"loss": 0.2862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11271081864833832,
|
|
"step": 3225,
|
|
"valid_targets_mean": 2812.2,
|
|
"valid_targets_min": 1319
|
|
},
|
|
{
|
|
"epoch": 5.176423416198877,
|
|
"grad_norm": 0.5888497319322781,
|
|
"learning_rate": 7.729359474541168e-06,
|
|
"loss": 0.2535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09802419692277908,
|
|
"step": 3230,
|
|
"valid_targets_mean": 3390.8,
|
|
"valid_targets_min": 1416
|
|
},
|
|
{
|
|
"epoch": 5.184442662389736,
|
|
"grad_norm": 0.5942736764463901,
|
|
"learning_rate": 7.666348450361737e-06,
|
|
"loss": 0.2816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09806996583938599,
|
|
"step": 3235,
|
|
"valid_targets_mean": 4160.8,
|
|
"valid_targets_min": 1040
|
|
},
|
|
{
|
|
"epoch": 5.192461908580594,
|
|
"grad_norm": 0.6959182547618397,
|
|
"learning_rate": 7.60353436202381e-06,
|
|
"loss": 0.2768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13760900497436523,
|
|
"step": 3240,
|
|
"valid_targets_mean": 3679.8,
|
|
"valid_targets_min": 925
|
|
},
|
|
{
|
|
"epoch": 5.200481154771452,
|
|
"grad_norm": 0.7867712939025543,
|
|
"learning_rate": 7.540918212502479e-06,
|
|
"loss": 0.2641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1103554517030716,
|
|
"step": 3245,
|
|
"valid_targets_mean": 2345.2,
|
|
"valid_targets_min": 1059
|
|
},
|
|
{
|
|
"epoch": 5.2085004009623095,
|
|
"grad_norm": 0.6692820136265186,
|
|
"learning_rate": 7.478501001612281e-06,
|
|
"loss": 0.2815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14655274152755737,
|
|
"step": 3250,
|
|
"valid_targets_mean": 4338.4,
|
|
"valid_targets_min": 2237
|
|
},
|
|
{
|
|
"epoch": 5.216519647153167,
|
|
"grad_norm": 0.7032072972079363,
|
|
"learning_rate": 7.416283725991229e-06,
|
|
"loss": 0.2711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12942096590995789,
|
|
"step": 3255,
|
|
"valid_targets_mean": 3271.1,
|
|
"valid_targets_min": 1351
|
|
},
|
|
{
|
|
"epoch": 5.224538893344025,
|
|
"grad_norm": 0.7202212720662927,
|
|
"learning_rate": 7.354267379084896e-06,
|
|
"loss": 0.2906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10814294219017029,
|
|
"step": 3260,
|
|
"valid_targets_mean": 2909.4,
|
|
"valid_targets_min": 1419
|
|
},
|
|
{
|
|
"epoch": 5.232558139534884,
|
|
"grad_norm": 0.6622834667881968,
|
|
"learning_rate": 7.292452951130548e-06,
|
|
"loss": 0.2597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12850156426429749,
|
|
"step": 3265,
|
|
"valid_targets_mean": 3660.1,
|
|
"valid_targets_min": 1224
|
|
},
|
|
{
|
|
"epoch": 5.240577385725742,
|
|
"grad_norm": 0.6598360930022297,
|
|
"learning_rate": 7.230841429141347e-06,
|
|
"loss": 0.3022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15915730595588684,
|
|
"step": 3270,
|
|
"valid_targets_mean": 4591.1,
|
|
"valid_targets_min": 1677
|
|
},
|
|
{
|
|
"epoch": 5.2485966319166,
|
|
"grad_norm": 0.5650035570886325,
|
|
"learning_rate": 7.169433796890595e-06,
|
|
"loss": 0.302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1909146010875702,
|
|
"step": 3275,
|
|
"valid_targets_mean": 5763.5,
|
|
"valid_targets_min": 1282
|
|
},
|
|
{
|
|
"epoch": 5.256615878107458,
|
|
"grad_norm": 0.7786393273005862,
|
|
"learning_rate": 7.108231034895976e-06,
|
|
"loss": 0.2708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12871766090393066,
|
|
"step": 3280,
|
|
"valid_targets_mean": 3066.1,
|
|
"valid_targets_min": 1747
|
|
},
|
|
{
|
|
"epoch": 5.264635124298316,
|
|
"grad_norm": 0.5847129918611397,
|
|
"learning_rate": 7.047234120403972e-06,
|
|
"loss": 0.2785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15970800817012787,
|
|
"step": 3285,
|
|
"valid_targets_mean": 4016.0,
|
|
"valid_targets_min": 1848
|
|
},
|
|
{
|
|
"epoch": 5.272654370489174,
|
|
"grad_norm": 0.7449037107821327,
|
|
"learning_rate": 6.986444027374211e-06,
|
|
"loss": 0.2552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12065094709396362,
|
|
"step": 3290,
|
|
"valid_targets_mean": 2694.6,
|
|
"valid_targets_min": 825
|
|
},
|
|
{
|
|
"epoch": 5.280673616680032,
|
|
"grad_norm": 0.7299928241856165,
|
|
"learning_rate": 6.925861726463919e-06,
|
|
"loss": 0.2983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10395687818527222,
|
|
"step": 3295,
|
|
"valid_targets_mean": 2786.4,
|
|
"valid_targets_min": 1582
|
|
},
|
|
{
|
|
"epoch": 5.2886928628708905,
|
|
"grad_norm": 0.5820419035629211,
|
|
"learning_rate": 6.865488185012464e-06,
|
|
"loss": 0.2807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16662782430648804,
|
|
"step": 3300,
|
|
"valid_targets_mean": 5864.4,
|
|
"valid_targets_min": 1432
|
|
},
|
|
{
|
|
"epoch": 5.296712109061748,
|
|
"grad_norm": 0.5820333959976953,
|
|
"learning_rate": 6.805324367025825e-06,
|
|
"loss": 0.269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09692417085170746,
|
|
"step": 3305,
|
|
"valid_targets_mean": 4125.8,
|
|
"valid_targets_min": 1384
|
|
},
|
|
{
|
|
"epoch": 5.304731355252606,
|
|
"grad_norm": 0.5958545841346957,
|
|
"learning_rate": 6.745371233161309e-06,
|
|
"loss": 0.2913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19019240140914917,
|
|
"step": 3310,
|
|
"valid_targets_mean": 6345.2,
|
|
"valid_targets_min": 1722
|
|
},
|
|
{
|
|
"epoch": 5.312750601443464,
|
|
"grad_norm": 0.7556127215227147,
|
|
"learning_rate": 6.685629740712103e-06,
|
|
"loss": 0.2856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09946641325950623,
|
|
"step": 3315,
|
|
"valid_targets_mean": 2636.1,
|
|
"valid_targets_min": 1172
|
|
},
|
|
{
|
|
"epoch": 5.320769847634322,
|
|
"grad_norm": 0.5877343531269578,
|
|
"learning_rate": 6.6261008435920605e-06,
|
|
"loss": 0.2684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19391608238220215,
|
|
"step": 3320,
|
|
"valid_targets_mean": 6642.2,
|
|
"valid_targets_min": 3260
|
|
},
|
|
{
|
|
"epoch": 5.32878909382518,
|
|
"grad_norm": 0.8406015809943042,
|
|
"learning_rate": 6.566785492320471e-06,
|
|
"loss": 0.3239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1828058362007141,
|
|
"step": 3325,
|
|
"valid_targets_mean": 3091.1,
|
|
"valid_targets_min": 932
|
|
},
|
|
{
|
|
"epoch": 5.336808340016039,
|
|
"grad_norm": 0.6630809875941652,
|
|
"learning_rate": 6.507684634006815e-06,
|
|
"loss": 0.2717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11142952740192413,
|
|
"step": 3330,
|
|
"valid_targets_mean": 3407.4,
|
|
"valid_targets_min": 1374
|
|
},
|
|
{
|
|
"epoch": 5.344827586206897,
|
|
"grad_norm": 0.6972165813383657,
|
|
"learning_rate": 6.448799212335734e-06,
|
|
"loss": 0.272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1595240831375122,
|
|
"step": 3335,
|
|
"valid_targets_mean": 4169.5,
|
|
"valid_targets_min": 1688
|
|
},
|
|
{
|
|
"epoch": 5.352846832397755,
|
|
"grad_norm": 0.6323461664571374,
|
|
"learning_rate": 6.390130167551869e-06,
|
|
"loss": 0.285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10348061472177505,
|
|
"step": 3340,
|
|
"valid_targets_mean": 3504.0,
|
|
"valid_targets_min": 1129
|
|
},
|
|
{
|
|
"epoch": 5.360866078588613,
|
|
"grad_norm": 0.5607270131280743,
|
|
"learning_rate": 6.331678436444939e-06,
|
|
"loss": 0.2737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1412566602230072,
|
|
"step": 3345,
|
|
"valid_targets_mean": 5841.9,
|
|
"valid_targets_min": 1321
|
|
},
|
|
{
|
|
"epoch": 5.368885324779471,
|
|
"grad_norm": 0.8274449782443676,
|
|
"learning_rate": 6.273444952334713e-06,
|
|
"loss": 0.2701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1447286307811737,
|
|
"step": 3350,
|
|
"valid_targets_mean": 4034.0,
|
|
"valid_targets_min": 1182
|
|
},
|
|
{
|
|
"epoch": 5.3769045709703285,
|
|
"grad_norm": 0.6736548127167691,
|
|
"learning_rate": 6.2154306450561175e-06,
|
|
"loss": 0.2897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24714772403240204,
|
|
"step": 3355,
|
|
"valid_targets_mean": 6792.6,
|
|
"valid_targets_min": 1244
|
|
},
|
|
{
|
|
"epoch": 5.384923817161187,
|
|
"grad_norm": 0.76791519754358,
|
|
"learning_rate": 6.157636440944445e-06,
|
|
"loss": 0.284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18893024325370789,
|
|
"step": 3360,
|
|
"valid_targets_mean": 3262.1,
|
|
"valid_targets_min": 1045
|
|
},
|
|
{
|
|
"epoch": 5.392943063352045,
|
|
"grad_norm": 0.6506598342927086,
|
|
"learning_rate": 6.100063262820474e-06,
|
|
"loss": 0.2707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17875610291957855,
|
|
"step": 3365,
|
|
"valid_targets_mean": 5643.9,
|
|
"valid_targets_min": 1452
|
|
},
|
|
{
|
|
"epoch": 5.400962309542903,
|
|
"grad_norm": 0.535214932942372,
|
|
"learning_rate": 6.0427120299758236e-06,
|
|
"loss": 0.2759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12076449394226074,
|
|
"step": 3370,
|
|
"valid_targets_mean": 6959.5,
|
|
"valid_targets_min": 1356
|
|
},
|
|
{
|
|
"epoch": 5.408981555733761,
|
|
"grad_norm": 0.6584811079234478,
|
|
"learning_rate": 5.985583658158212e-06,
|
|
"loss": 0.2862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12384140491485596,
|
|
"step": 3375,
|
|
"valid_targets_mean": 3983.8,
|
|
"valid_targets_min": 2305
|
|
},
|
|
{
|
|
"epoch": 5.417000801924619,
|
|
"grad_norm": 0.677555957298812,
|
|
"learning_rate": 5.928679059556852e-06,
|
|
"loss": 0.28,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12068624049425125,
|
|
"step": 3380,
|
|
"valid_targets_mean": 3473.6,
|
|
"valid_targets_min": 1195
|
|
},
|
|
{
|
|
"epoch": 5.425020048115477,
|
|
"grad_norm": 1.0135790155597735,
|
|
"learning_rate": 5.871999142787908e-06,
|
|
"loss": 0.2806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13749045133590698,
|
|
"step": 3385,
|
|
"valid_targets_mean": 2260.4,
|
|
"valid_targets_min": 937
|
|
},
|
|
{
|
|
"epoch": 5.433039294306335,
|
|
"grad_norm": 0.8039034351308703,
|
|
"learning_rate": 5.815544812879936e-06,
|
|
"loss": 0.2596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13829991221427917,
|
|
"step": 3390,
|
|
"valid_targets_mean": 2838.2,
|
|
"valid_targets_min": 1416
|
|
},
|
|
{
|
|
"epoch": 5.441058540497194,
|
|
"grad_norm": 0.641324943268092,
|
|
"learning_rate": 5.759316971259503e-06,
|
|
"loss": 0.2592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09990415722131729,
|
|
"step": 3395,
|
|
"valid_targets_mean": 3651.2,
|
|
"valid_targets_min": 1471
|
|
},
|
|
{
|
|
"epoch": 5.4490777866880515,
|
|
"grad_norm": 0.7607138645745484,
|
|
"learning_rate": 5.703316515736734e-06,
|
|
"loss": 0.2783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13912031054496765,
|
|
"step": 3400,
|
|
"valid_targets_mean": 2623.8,
|
|
"valid_targets_min": 1205
|
|
},
|
|
{
|
|
"epoch": 5.4570970328789095,
|
|
"grad_norm": 0.6774223564603659,
|
|
"learning_rate": 5.647544340491007e-06,
|
|
"loss": 0.2943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12906160950660706,
|
|
"step": 3405,
|
|
"valid_targets_mean": 3493.5,
|
|
"valid_targets_min": 1513
|
|
},
|
|
{
|
|
"epoch": 5.465116279069767,
|
|
"grad_norm": 3.1286340870646976,
|
|
"learning_rate": 5.592001336056659e-06,
|
|
"loss": 0.2898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11892290413379669,
|
|
"step": 3410,
|
|
"valid_targets_mean": 2599.0,
|
|
"valid_targets_min": 1301
|
|
},
|
|
{
|
|
"epoch": 5.473135525260625,
|
|
"grad_norm": 0.8035364009185652,
|
|
"learning_rate": 5.536688389308782e-06,
|
|
"loss": 0.2939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1144896149635315,
|
|
"step": 3415,
|
|
"valid_targets_mean": 2427.1,
|
|
"valid_targets_min": 1085
|
|
},
|
|
{
|
|
"epoch": 5.481154771451483,
|
|
"grad_norm": 0.7442012893993346,
|
|
"learning_rate": 5.4816063834490496e-06,
|
|
"loss": 0.2773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12948277592658997,
|
|
"step": 3420,
|
|
"valid_targets_mean": 2744.4,
|
|
"valid_targets_min": 1542
|
|
},
|
|
{
|
|
"epoch": 5.489174017642342,
|
|
"grad_norm": 0.7023135477533802,
|
|
"learning_rate": 5.426756197991625e-06,
|
|
"loss": 0.2792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.145607128739357,
|
|
"step": 3425,
|
|
"valid_targets_mean": 4299.5,
|
|
"valid_targets_min": 1776
|
|
},
|
|
{
|
|
"epoch": 5.4971932638332,
|
|
"grad_norm": 0.6787099010377489,
|
|
"learning_rate": 5.372138708749104e-06,
|
|
"loss": 0.284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14306356012821198,
|
|
"step": 3430,
|
|
"valid_targets_mean": 4279.4,
|
|
"valid_targets_min": 1414
|
|
},
|
|
{
|
|
"epoch": 5.505212510024058,
|
|
"grad_norm": 0.7198686312973578,
|
|
"learning_rate": 5.3177547878185436e-06,
|
|
"loss": 0.2766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11792582273483276,
|
|
"step": 3435,
|
|
"valid_targets_mean": 2626.5,
|
|
"valid_targets_min": 1683
|
|
},
|
|
{
|
|
"epoch": 5.513231756214916,
|
|
"grad_norm": 0.6788117637567427,
|
|
"learning_rate": 5.263605303567532e-06,
|
|
"loss": 0.285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11016759276390076,
|
|
"step": 3440,
|
|
"valid_targets_mean": 3527.2,
|
|
"valid_targets_min": 1300
|
|
},
|
|
{
|
|
"epoch": 5.521251002405774,
|
|
"grad_norm": 0.5905284944796608,
|
|
"learning_rate": 5.20969112062032e-06,
|
|
"loss": 0.268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11519446969032288,
|
|
"step": 3445,
|
|
"valid_targets_mean": 4631.0,
|
|
"valid_targets_min": 1645
|
|
},
|
|
{
|
|
"epoch": 5.529270248596632,
|
|
"grad_norm": 0.6080932302389219,
|
|
"learning_rate": 5.156013099844017e-06,
|
|
"loss": 0.2588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11113663017749786,
|
|
"step": 3450,
|
|
"valid_targets_mean": 3602.1,
|
|
"valid_targets_min": 772
|
|
},
|
|
{
|
|
"epoch": 5.53728949478749,
|
|
"grad_norm": 0.7288808414312432,
|
|
"learning_rate": 5.1025720983348544e-06,
|
|
"loss": 0.2797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15324431657791138,
|
|
"step": 3455,
|
|
"valid_targets_mean": 3440.1,
|
|
"valid_targets_min": 1892
|
|
},
|
|
{
|
|
"epoch": 5.545308740978348,
|
|
"grad_norm": 0.7316756200535461,
|
|
"learning_rate": 5.049368969404484e-06,
|
|
"loss": 0.2984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18235455453395844,
|
|
"step": 3460,
|
|
"valid_targets_mean": 3642.9,
|
|
"valid_targets_min": 1808
|
|
},
|
|
{
|
|
"epoch": 5.553327987169206,
|
|
"grad_norm": 0.6966803567024507,
|
|
"learning_rate": 4.99640456256636e-06,
|
|
"loss": 0.2759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08969349414110184,
|
|
"step": 3465,
|
|
"valid_targets_mean": 3088.2,
|
|
"valid_targets_min": 1576
|
|
},
|
|
{
|
|
"epoch": 5.561347233360064,
|
|
"grad_norm": 0.7596935347056445,
|
|
"learning_rate": 4.9436797235221814e-06,
|
|
"loss": 0.2901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15548765659332275,
|
|
"step": 3470,
|
|
"valid_targets_mean": 4137.1,
|
|
"valid_targets_min": 1414
|
|
},
|
|
{
|
|
"epoch": 5.569366479550922,
|
|
"grad_norm": 0.6598274436780546,
|
|
"learning_rate": 4.891195294148376e-06,
|
|
"loss": 0.2923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1944895088672638,
|
|
"step": 3475,
|
|
"valid_targets_mean": 4596.1,
|
|
"valid_targets_min": 1689
|
|
},
|
|
{
|
|
"epoch": 5.57738572574178,
|
|
"grad_norm": 0.7378986972846266,
|
|
"learning_rate": 4.838952112482671e-06,
|
|
"loss": 0.2852,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16218118369579315,
|
|
"step": 3480,
|
|
"valid_targets_mean": 3069.6,
|
|
"valid_targets_min": 1139
|
|
},
|
|
{
|
|
"epoch": 5.585404971932638,
|
|
"grad_norm": 0.5945094620185116,
|
|
"learning_rate": 4.786951012710699e-06,
|
|
"loss": 0.2915,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11682992428541183,
|
|
"step": 3485,
|
|
"valid_targets_mean": 4199.4,
|
|
"valid_targets_min": 1645
|
|
},
|
|
{
|
|
"epoch": 5.593424218123497,
|
|
"grad_norm": 0.6940384063458671,
|
|
"learning_rate": 4.735192825152686e-06,
|
|
"loss": 0.2734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15601713955402374,
|
|
"step": 3490,
|
|
"valid_targets_mean": 3884.2,
|
|
"valid_targets_min": 1300
|
|
},
|
|
{
|
|
"epoch": 5.601443464314355,
|
|
"grad_norm": 0.6599876260544516,
|
|
"learning_rate": 4.683678376250189e-06,
|
|
"loss": 0.3067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18536148965358734,
|
|
"step": 3495,
|
|
"valid_targets_mean": 4424.0,
|
|
"valid_targets_min": 1243
|
|
},
|
|
{
|
|
"epoch": 5.609462710505213,
|
|
"grad_norm": 0.7201106735919834,
|
|
"learning_rate": 4.6324084885529086e-06,
|
|
"loss": 0.281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21292556822299957,
|
|
"step": 3500,
|
|
"valid_targets_mean": 4139.2,
|
|
"valid_targets_min": 1123
|
|
},
|
|
{
|
|
"epoch": 5.6174819566960705,
|
|
"grad_norm": 0.5875396290488495,
|
|
"learning_rate": 4.581383980705538e-06,
|
|
"loss": 0.277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15536782145500183,
|
|
"step": 3505,
|
|
"valid_targets_mean": 6571.2,
|
|
"valid_targets_min": 2178
|
|
},
|
|
{
|
|
"epoch": 5.6255012028869285,
|
|
"grad_norm": 0.6143731813541532,
|
|
"learning_rate": 4.530605667434727e-06,
|
|
"loss": 0.2652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11163066327571869,
|
|
"step": 3510,
|
|
"valid_targets_mean": 3917.9,
|
|
"valid_targets_min": 1530
|
|
},
|
|
{
|
|
"epoch": 5.633520449077786,
|
|
"grad_norm": 0.9096910759644121,
|
|
"learning_rate": 4.480074359536013e-06,
|
|
"loss": 0.2939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14102259278297424,
|
|
"step": 3515,
|
|
"valid_targets_mean": 2312.6,
|
|
"valid_targets_min": 1279
|
|
},
|
|
{
|
|
"epoch": 5.641539695268644,
|
|
"grad_norm": 0.7248396656648093,
|
|
"learning_rate": 4.429790863860934e-06,
|
|
"loss": 0.2612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17408156394958496,
|
|
"step": 3520,
|
|
"valid_targets_mean": 3632.6,
|
|
"valid_targets_min": 1274
|
|
},
|
|
{
|
|
"epoch": 5.649558941459503,
|
|
"grad_norm": 0.7149084191703611,
|
|
"learning_rate": 4.3797559833041146e-06,
|
|
"loss": 0.2883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14481118321418762,
|
|
"step": 3525,
|
|
"valid_targets_mean": 4124.4,
|
|
"valid_targets_min": 1556
|
|
},
|
|
{
|
|
"epoch": 5.657578187650361,
|
|
"grad_norm": 0.6821009433950561,
|
|
"learning_rate": 4.329970516790447e-06,
|
|
"loss": 0.2565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12271390855312347,
|
|
"step": 3530,
|
|
"valid_targets_mean": 3484.8,
|
|
"valid_targets_min": 1535
|
|
},
|
|
{
|
|
"epoch": 5.665597433841219,
|
|
"grad_norm": 0.8982050367879706,
|
|
"learning_rate": 4.280435259262363e-06,
|
|
"loss": 0.2733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10625463724136353,
|
|
"step": 3535,
|
|
"valid_targets_mean": 1793.9,
|
|
"valid_targets_min": 942
|
|
},
|
|
{
|
|
"epoch": 5.673616680032077,
|
|
"grad_norm": 0.8223177683921912,
|
|
"learning_rate": 4.231151001667077e-06,
|
|
"loss": 0.2665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15554620325565338,
|
|
"step": 3540,
|
|
"valid_targets_mean": 6827.8,
|
|
"valid_targets_min": 2382
|
|
},
|
|
{
|
|
"epoch": 5.681635926222935,
|
|
"grad_norm": 0.643420527087105,
|
|
"learning_rate": 4.182118530944044e-06,
|
|
"loss": 0.2641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14312048256397247,
|
|
"step": 3545,
|
|
"valid_targets_mean": 4710.2,
|
|
"valid_targets_min": 1910
|
|
},
|
|
{
|
|
"epoch": 5.689655172413794,
|
|
"grad_norm": 0.6471702234310968,
|
|
"learning_rate": 4.133338630012307e-06,
|
|
"loss": 0.2783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15536224842071533,
|
|
"step": 3550,
|
|
"valid_targets_mean": 5084.6,
|
|
"valid_targets_min": 1081
|
|
},
|
|
{
|
|
"epoch": 5.6976744186046515,
|
|
"grad_norm": 0.5528436647261742,
|
|
"learning_rate": 4.0848120777580554e-06,
|
|
"loss": 0.2578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12030863016843796,
|
|
"step": 3555,
|
|
"valid_targets_mean": 5179.9,
|
|
"valid_targets_min": 1397
|
|
},
|
|
{
|
|
"epoch": 5.705693664795509,
|
|
"grad_norm": 0.5396611921495026,
|
|
"learning_rate": 4.036539649022182e-06,
|
|
"loss": 0.295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2417977899312973,
|
|
"step": 3560,
|
|
"valid_targets_mean": 7620.6,
|
|
"valid_targets_min": 1939
|
|
},
|
|
{
|
|
"epoch": 5.713712910986367,
|
|
"grad_norm": 0.650216464185914,
|
|
"learning_rate": 3.988522114587865e-06,
|
|
"loss": 0.2947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09948883205652237,
|
|
"step": 3565,
|
|
"valid_targets_mean": 3263.5,
|
|
"valid_targets_min": 961
|
|
},
|
|
{
|
|
"epoch": 5.721732157177225,
|
|
"grad_norm": 0.5381622998244979,
|
|
"learning_rate": 3.940760241168331e-06,
|
|
"loss": 0.2838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20080620050430298,
|
|
"step": 3570,
|
|
"valid_targets_mean": 8185.6,
|
|
"valid_targets_min": 1067
|
|
},
|
|
{
|
|
"epoch": 5.729751403368083,
|
|
"grad_norm": 0.7212188608624658,
|
|
"learning_rate": 3.893254791394541e-06,
|
|
"loss": 0.2944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20672596991062164,
|
|
"step": 3575,
|
|
"valid_targets_mean": 4884.8,
|
|
"valid_targets_min": 1743
|
|
},
|
|
{
|
|
"epoch": 5.737770649558941,
|
|
"grad_norm": 0.669092022617526,
|
|
"learning_rate": 3.846006523803074e-06,
|
|
"loss": 0.2756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1462830901145935,
|
|
"step": 3580,
|
|
"valid_targets_mean": 3797.6,
|
|
"valid_targets_min": 1137
|
|
},
|
|
{
|
|
"epoch": 5.7457898957498,
|
|
"grad_norm": 0.8029880791009278,
|
|
"learning_rate": 3.799016192823981e-06,
|
|
"loss": 0.3068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1821311116218567,
|
|
"step": 3585,
|
|
"valid_targets_mean": 4097.9,
|
|
"valid_targets_min": 1582
|
|
},
|
|
{
|
|
"epoch": 5.753809141940658,
|
|
"grad_norm": 0.5732364301633718,
|
|
"learning_rate": 3.7522845487687276e-06,
|
|
"loss": 0.2737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13184769451618195,
|
|
"step": 3590,
|
|
"valid_targets_mean": 5341.0,
|
|
"valid_targets_min": 1348
|
|
},
|
|
{
|
|
"epoch": 5.761828388131516,
|
|
"grad_norm": 0.7644038643363767,
|
|
"learning_rate": 3.7058123378182664e-06,
|
|
"loss": 0.2866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17732053995132446,
|
|
"step": 3595,
|
|
"valid_targets_mean": 3186.9,
|
|
"valid_targets_min": 1418
|
|
},
|
|
{
|
|
"epoch": 5.769847634322374,
|
|
"grad_norm": 0.5889080563512636,
|
|
"learning_rate": 3.6596003020110636e-06,
|
|
"loss": 0.2986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1895439326763153,
|
|
"step": 3600,
|
|
"valid_targets_mean": 4825.1,
|
|
"valid_targets_min": 1217
|
|
},
|
|
{
|
|
"epoch": 5.777866880513232,
|
|
"grad_norm": 0.7001673848354629,
|
|
"learning_rate": 3.613649179231287e-06,
|
|
"loss": 0.288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10094672441482544,
|
|
"step": 3605,
|
|
"valid_targets_mean": 2560.6,
|
|
"valid_targets_min": 1881
|
|
},
|
|
{
|
|
"epoch": 5.7858861267040895,
|
|
"grad_norm": 0.7113191211178155,
|
|
"learning_rate": 3.5679597031970017e-06,
|
|
"loss": 0.2918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1153012216091156,
|
|
"step": 3610,
|
|
"valid_targets_mean": 3266.9,
|
|
"valid_targets_min": 1394
|
|
},
|
|
{
|
|
"epoch": 5.7939053728949474,
|
|
"grad_norm": 0.6038915015284441,
|
|
"learning_rate": 3.5225326034484764e-06,
|
|
"loss": 0.2823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20703008770942688,
|
|
"step": 3615,
|
|
"valid_targets_mean": 5885.2,
|
|
"valid_targets_min": 1229
|
|
},
|
|
{
|
|
"epoch": 5.801924619085806,
|
|
"grad_norm": 0.7051731320703262,
|
|
"learning_rate": 3.4773686053365197e-06,
|
|
"loss": 0.2768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10645755380392075,
|
|
"step": 3620,
|
|
"valid_targets_mean": 3316.8,
|
|
"valid_targets_min": 1509
|
|
},
|
|
{
|
|
"epoch": 5.809943865276664,
|
|
"grad_norm": 0.5541262922082779,
|
|
"learning_rate": 3.4324684300109003e-06,
|
|
"loss": 0.2967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1505921185016632,
|
|
"step": 3625,
|
|
"valid_targets_mean": 5566.5,
|
|
"valid_targets_min": 1045
|
|
},
|
|
{
|
|
"epoch": 5.817963111467522,
|
|
"grad_norm": 0.6614777960578949,
|
|
"learning_rate": 3.387832794408832e-06,
|
|
"loss": 0.2696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10717364400625229,
|
|
"step": 3630,
|
|
"valid_targets_mean": 2447.9,
|
|
"valid_targets_min": 910
|
|
},
|
|
{
|
|
"epoch": 5.82598235765838,
|
|
"grad_norm": 0.6530849619363815,
|
|
"learning_rate": 3.3434624112435342e-06,
|
|
"loss": 0.2841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11250616610050201,
|
|
"step": 3635,
|
|
"valid_targets_mean": 3679.1,
|
|
"valid_targets_min": 1252
|
|
},
|
|
{
|
|
"epoch": 5.834001603849238,
|
|
"grad_norm": 0.8457840930871934,
|
|
"learning_rate": 3.2993579889928397e-06,
|
|
"loss": 0.2911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12079623341560364,
|
|
"step": 3640,
|
|
"valid_targets_mean": 2276.9,
|
|
"valid_targets_min": 781
|
|
},
|
|
{
|
|
"epoch": 5.842020850040096,
|
|
"grad_norm": 0.5424570068999839,
|
|
"learning_rate": 3.25552023188789e-06,
|
|
"loss": 0.2633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11634393036365509,
|
|
"step": 3645,
|
|
"valid_targets_mean": 5780.5,
|
|
"valid_targets_min": 1631
|
|
},
|
|
{
|
|
"epoch": 5.850040096230955,
|
|
"grad_norm": 0.6609099224803072,
|
|
"learning_rate": 3.211949839901889e-06,
|
|
"loss": 0.2602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07904715836048126,
|
|
"step": 3650,
|
|
"valid_targets_mean": 1853.6,
|
|
"valid_targets_min": 583
|
|
},
|
|
{
|
|
"epoch": 5.858059342421813,
|
|
"grad_norm": 0.7054665857775407,
|
|
"learning_rate": 3.168647508738927e-06,
|
|
"loss": 0.2619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11137621849775314,
|
|
"step": 3655,
|
|
"valid_targets_mean": 3421.8,
|
|
"valid_targets_min": 1086
|
|
},
|
|
{
|
|
"epoch": 5.8660785886126705,
|
|
"grad_norm": 0.5673837151999918,
|
|
"learning_rate": 3.125613929822866e-06,
|
|
"loss": 0.2826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16755767166614532,
|
|
"step": 3660,
|
|
"valid_targets_mean": 4874.6,
|
|
"valid_targets_min": 1170
|
|
},
|
|
{
|
|
"epoch": 5.874097834803528,
|
|
"grad_norm": 0.6430654552670423,
|
|
"learning_rate": 3.0828497902863106e-06,
|
|
"loss": 0.2624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14625166356563568,
|
|
"step": 3665,
|
|
"valid_targets_mean": 4878.0,
|
|
"valid_targets_min": 1679
|
|
},
|
|
{
|
|
"epoch": 5.882117080994386,
|
|
"grad_norm": 0.7773523165873664,
|
|
"learning_rate": 3.0403557729596267e-06,
|
|
"loss": 0.2778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15384423732757568,
|
|
"step": 3670,
|
|
"valid_targets_mean": 3396.4,
|
|
"valid_targets_min": 1055
|
|
},
|
|
{
|
|
"epoch": 5.890136327185244,
|
|
"grad_norm": 0.5560053959749172,
|
|
"learning_rate": 2.998132556360038e-06,
|
|
"loss": 0.2882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09452245384454727,
|
|
"step": 3675,
|
|
"valid_targets_mean": 3702.9,
|
|
"valid_targets_min": 2185
|
|
},
|
|
{
|
|
"epoch": 5.898155573376103,
|
|
"grad_norm": 0.7832758816536532,
|
|
"learning_rate": 2.9561808146808068e-06,
|
|
"loss": 0.2863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15402548015117645,
|
|
"step": 3680,
|
|
"valid_targets_mean": 3342.1,
|
|
"valid_targets_min": 1422
|
|
},
|
|
{
|
|
"epoch": 5.906174819566961,
|
|
"grad_norm": 0.5874927264256125,
|
|
"learning_rate": 2.9145012177804476e-06,
|
|
"loss": 0.271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1170136108994484,
|
|
"step": 3685,
|
|
"valid_targets_mean": 4333.6,
|
|
"valid_targets_min": 1515
|
|
},
|
|
{
|
|
"epoch": 5.914194065757819,
|
|
"grad_norm": 0.7250685172338484,
|
|
"learning_rate": 2.8730944311720454e-06,
|
|
"loss": 0.2903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16327306628227234,
|
|
"step": 3690,
|
|
"valid_targets_mean": 4079.8,
|
|
"valid_targets_min": 1035
|
|
},
|
|
{
|
|
"epoch": 5.922213311948677,
|
|
"grad_norm": 0.6049201069316128,
|
|
"learning_rate": 2.8319611160126226e-06,
|
|
"loss": 0.2953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10896667838096619,
|
|
"step": 3695,
|
|
"valid_targets_mean": 4256.0,
|
|
"valid_targets_min": 1479
|
|
},
|
|
{
|
|
"epoch": 5.930232558139535,
|
|
"grad_norm": 0.8074696599106505,
|
|
"learning_rate": 2.791101929092592e-06,
|
|
"loss": 0.2734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18283414840698242,
|
|
"step": 3700,
|
|
"valid_targets_mean": 3042.2,
|
|
"valid_targets_min": 1025
|
|
},
|
|
{
|
|
"epoch": 5.938251804330393,
|
|
"grad_norm": 0.6418415192706601,
|
|
"learning_rate": 2.750517522825251e-06,
|
|
"loss": 0.2893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14782294631004333,
|
|
"step": 3705,
|
|
"valid_targets_mean": 4387.5,
|
|
"valid_targets_min": 2406
|
|
},
|
|
{
|
|
"epoch": 5.946271050521251,
|
|
"grad_norm": 0.7128403749449099,
|
|
"learning_rate": 2.710208545236397e-06,
|
|
"loss": 0.3046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13058456778526306,
|
|
"step": 3710,
|
|
"valid_targets_mean": 3053.6,
|
|
"valid_targets_min": 1380
|
|
},
|
|
{
|
|
"epoch": 5.954290296712109,
|
|
"grad_norm": 0.5020374709645875,
|
|
"learning_rate": 2.670175639953929e-06,
|
|
"loss": 0.277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09986815601587296,
|
|
"step": 3715,
|
|
"valid_targets_mean": 3981.4,
|
|
"valid_targets_min": 981
|
|
},
|
|
{
|
|
"epoch": 5.962309542902967,
|
|
"grad_norm": 0.7439070604641389,
|
|
"learning_rate": 2.6304194461976207e-06,
|
|
"loss": 0.2869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13553877174854279,
|
|
"step": 3720,
|
|
"valid_targets_mean": 2815.2,
|
|
"valid_targets_min": 1368
|
|
},
|
|
{
|
|
"epoch": 5.970328789093825,
|
|
"grad_norm": 0.8061327031659907,
|
|
"learning_rate": 2.5909405987688896e-06,
|
|
"loss": 0.2692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16787466406822205,
|
|
"step": 3725,
|
|
"valid_targets_mean": 2979.1,
|
|
"valid_targets_min": 1029
|
|
},
|
|
{
|
|
"epoch": 5.978348035284683,
|
|
"grad_norm": 0.7233469829920061,
|
|
"learning_rate": 2.5517397280406565e-06,
|
|
"loss": 0.2793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1734805703163147,
|
|
"step": 3730,
|
|
"valid_targets_mean": 4583.1,
|
|
"valid_targets_min": 1858
|
|
},
|
|
{
|
|
"epoch": 5.986367281475541,
|
|
"grad_norm": 0.7616891829738133,
|
|
"learning_rate": 2.512817459947312e-06,
|
|
"loss": 0.2949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11115828156471252,
|
|
"step": 3735,
|
|
"valid_targets_mean": 2511.5,
|
|
"valid_targets_min": 1029
|
|
},
|
|
{
|
|
"epoch": 5.994386527666399,
|
|
"grad_norm": 0.6285399195870048,
|
|
"learning_rate": 2.4741744159746618e-06,
|
|
"loss": 0.2728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12156839668750763,
|
|
"step": 3740,
|
|
"valid_targets_mean": 3640.2,
|
|
"valid_targets_min": 2044
|
|
},
|
|
{
|
|
"epoch": 6.001603849238172,
|
|
"grad_norm": 0.5991692363576295,
|
|
"learning_rate": 2.435811213150079e-06,
|
|
"loss": 0.295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12158750742673874,
|
|
"step": 3745,
|
|
"valid_targets_mean": 4298.0,
|
|
"valid_targets_min": 1453
|
|
},
|
|
{
|
|
"epoch": 6.0096230954290295,
|
|
"grad_norm": 0.5838065514388638,
|
|
"learning_rate": 2.3977284640325805e-06,
|
|
"loss": 0.2704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1734558641910553,
|
|
"step": 3750,
|
|
"valid_targets_mean": 5849.6,
|
|
"valid_targets_min": 1131
|
|
},
|
|
{
|
|
"epoch": 6.017642341619887,
|
|
"grad_norm": 0.7515761152464798,
|
|
"learning_rate": 2.359926776703092e-06,
|
|
"loss": 0.2838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1791701316833496,
|
|
"step": 3755,
|
|
"valid_targets_mean": 4131.6,
|
|
"valid_targets_min": 1807
|
|
},
|
|
{
|
|
"epoch": 6.025661587810745,
|
|
"grad_norm": 0.5615783508002681,
|
|
"learning_rate": 2.3224067547547357e-06,
|
|
"loss": 0.2749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16611458361148834,
|
|
"step": 3760,
|
|
"valid_targets_mean": 5644.5,
|
|
"valid_targets_min": 1826
|
|
},
|
|
{
|
|
"epoch": 6.033680834001604,
|
|
"grad_norm": 0.6349112012821538,
|
|
"learning_rate": 2.2851689972831536e-06,
|
|
"loss": 0.292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09982097148895264,
|
|
"step": 3765,
|
|
"valid_targets_mean": 3284.1,
|
|
"valid_targets_min": 1652
|
|
},
|
|
{
|
|
"epoch": 6.041700080192462,
|
|
"grad_norm": 0.4936466599303436,
|
|
"learning_rate": 2.248214098877002e-06,
|
|
"loss": 0.2743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16967222094535828,
|
|
"step": 3770,
|
|
"valid_targets_mean": 7187.9,
|
|
"valid_targets_min": 1142
|
|
},
|
|
{
|
|
"epoch": 6.04971932638332,
|
|
"grad_norm": 0.6975506989696647,
|
|
"learning_rate": 2.2115426496083958e-06,
|
|
"loss": 0.2859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15700647234916687,
|
|
"step": 3775,
|
|
"valid_targets_mean": 3620.6,
|
|
"valid_targets_min": 1094
|
|
},
|
|
{
|
|
"epoch": 6.057738572574178,
|
|
"grad_norm": 0.5213863781241199,
|
|
"learning_rate": 2.175155235023536e-06,
|
|
"loss": 0.2528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13770925998687744,
|
|
"step": 3780,
|
|
"valid_targets_mean": 6736.9,
|
|
"valid_targets_min": 2195
|
|
},
|
|
{
|
|
"epoch": 6.065757818765036,
|
|
"grad_norm": 0.7034256718400578,
|
|
"learning_rate": 2.1390524361333355e-06,
|
|
"loss": 0.2495,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13219033181667328,
|
|
"step": 3785,
|
|
"valid_targets_mean": 3660.4,
|
|
"valid_targets_min": 1566
|
|
},
|
|
{
|
|
"epoch": 6.073777064955894,
|
|
"grad_norm": 0.6915785323589456,
|
|
"learning_rate": 2.1032348294041305e-06,
|
|
"loss": 0.2865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1072409376502037,
|
|
"step": 3790,
|
|
"valid_targets_mean": 2984.6,
|
|
"valid_targets_min": 1478
|
|
},
|
|
{
|
|
"epoch": 6.081796311146753,
|
|
"grad_norm": 0.6525266844739303,
|
|
"learning_rate": 2.067702986748521e-06,
|
|
"loss": 0.2807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1530601680278778,
|
|
"step": 3795,
|
|
"valid_targets_mean": 4701.8,
|
|
"valid_targets_min": 1128
|
|
},
|
|
{
|
|
"epoch": 6.0898155573376105,
|
|
"grad_norm": 0.6239357292972502,
|
|
"learning_rate": 2.0324574755161764e-06,
|
|
"loss": 0.2616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11305804550647736,
|
|
"step": 3800,
|
|
"valid_targets_mean": 3578.0,
|
|
"valid_targets_min": 1575
|
|
},
|
|
{
|
|
"epoch": 6.097834803528468,
|
|
"grad_norm": 0.6645665897198888,
|
|
"learning_rate": 1.9974988584848385e-06,
|
|
"loss": 0.2994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14500468969345093,
|
|
"step": 3805,
|
|
"valid_targets_mean": 4432.5,
|
|
"valid_targets_min": 1748
|
|
},
|
|
{
|
|
"epoch": 6.105854049719326,
|
|
"grad_norm": 0.8009084871999578,
|
|
"learning_rate": 1.96282769385129e-06,
|
|
"loss": 0.2909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16477596759796143,
|
|
"step": 3810,
|
|
"valid_targets_mean": 5450.1,
|
|
"valid_targets_min": 2265
|
|
},
|
|
{
|
|
"epoch": 6.113873295910184,
|
|
"grad_norm": 0.6177586996719383,
|
|
"learning_rate": 1.9284445352224625e-06,
|
|
"loss": 0.2799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14988410472869873,
|
|
"step": 3815,
|
|
"valid_targets_mean": 3910.0,
|
|
"valid_targets_min": 1244
|
|
},
|
|
{
|
|
"epoch": 6.121892542101042,
|
|
"grad_norm": 0.893366067941702,
|
|
"learning_rate": 1.894349931606596e-06,
|
|
"loss": 0.277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17635749280452728,
|
|
"step": 3820,
|
|
"valid_targets_mean": 2809.9,
|
|
"valid_targets_min": 999
|
|
},
|
|
{
|
|
"epoch": 6.129911788291901,
|
|
"grad_norm": 0.7176410005122456,
|
|
"learning_rate": 1.8605444274044493e-06,
|
|
"loss": 0.2808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12091509997844696,
|
|
"step": 3825,
|
|
"valid_targets_mean": 3264.6,
|
|
"valid_targets_min": 1410
|
|
},
|
|
{
|
|
"epoch": 6.137931034482759,
|
|
"grad_norm": 0.6593417551637085,
|
|
"learning_rate": 1.827028562400659e-06,
|
|
"loss": 0.2695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11562930047512054,
|
|
"step": 3830,
|
|
"valid_targets_mean": 2925.2,
|
|
"valid_targets_min": 1676
|
|
},
|
|
{
|
|
"epoch": 6.145950280673617,
|
|
"grad_norm": 0.7489493000181351,
|
|
"learning_rate": 1.793802871755066e-06,
|
|
"loss": 0.264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12282755225896835,
|
|
"step": 3835,
|
|
"valid_targets_mean": 4057.5,
|
|
"valid_targets_min": 1500
|
|
},
|
|
{
|
|
"epoch": 6.153969526864475,
|
|
"grad_norm": 0.6571208032513868,
|
|
"learning_rate": 1.760867885994202e-06,
|
|
"loss": 0.2474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11998392641544342,
|
|
"step": 3840,
|
|
"valid_targets_mean": 4374.6,
|
|
"valid_targets_min": 1591
|
|
},
|
|
{
|
|
"epoch": 6.161988773055333,
|
|
"grad_norm": 0.8039505872062598,
|
|
"learning_rate": 1.7282241310028047e-06,
|
|
"loss": 0.2725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17138797044754028,
|
|
"step": 3845,
|
|
"valid_targets_mean": 3309.2,
|
|
"valid_targets_min": 1124
|
|
},
|
|
{
|
|
"epoch": 6.170008019246191,
|
|
"grad_norm": 0.5963452400390175,
|
|
"learning_rate": 1.6958721280154232e-06,
|
|
"loss": 0.2776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1287102848291397,
|
|
"step": 3850,
|
|
"valid_targets_mean": 4082.4,
|
|
"valid_targets_min": 1429
|
|
},
|
|
{
|
|
"epoch": 6.1780272654370485,
|
|
"grad_norm": 0.5536501052142992,
|
|
"learning_rate": 1.6638123936081085e-06,
|
|
"loss": 0.2596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11305131018161774,
|
|
"step": 3855,
|
|
"valid_targets_mean": 5233.0,
|
|
"valid_targets_min": 1520
|
|
},
|
|
{
|
|
"epoch": 6.186046511627907,
|
|
"grad_norm": 1.1404604574734827,
|
|
"learning_rate": 1.6320454396901463e-06,
|
|
"loss": 0.2559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1419065296649933,
|
|
"step": 3860,
|
|
"valid_targets_mean": 4052.0,
|
|
"valid_targets_min": 1598
|
|
},
|
|
{
|
|
"epoch": 6.194065757818765,
|
|
"grad_norm": 0.7161367900264296,
|
|
"learning_rate": 1.6005717734958914e-06,
|
|
"loss": 0.2759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1022464781999588,
|
|
"step": 3865,
|
|
"valid_targets_mean": 3001.2,
|
|
"valid_targets_min": 1102
|
|
},
|
|
{
|
|
"epoch": 6.202085004009623,
|
|
"grad_norm": 0.7281191839480282,
|
|
"learning_rate": 1.569391897576671e-06,
|
|
"loss": 0.2612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11236599832773209,
|
|
"step": 3870,
|
|
"valid_targets_mean": 2805.0,
|
|
"valid_targets_min": 1458
|
|
},
|
|
{
|
|
"epoch": 6.210104250200481,
|
|
"grad_norm": 0.6886143528876548,
|
|
"learning_rate": 1.5385063097927533e-06,
|
|
"loss": 0.2709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09367795288562775,
|
|
"step": 3875,
|
|
"valid_targets_mean": 2750.8,
|
|
"valid_targets_min": 1252
|
|
},
|
|
{
|
|
"epoch": 6.218123496391339,
|
|
"grad_norm": 0.7374096316470432,
|
|
"learning_rate": 1.5079155033054104e-06,
|
|
"loss": 0.2743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1338837891817093,
|
|
"step": 3880,
|
|
"valid_targets_mean": 3094.1,
|
|
"valid_targets_min": 1037
|
|
},
|
|
{
|
|
"epoch": 6.226142742582197,
|
|
"grad_norm": 0.6462423085656397,
|
|
"learning_rate": 1.4776199665690239e-06,
|
|
"loss": 0.2816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09631840884685516,
|
|
"step": 3885,
|
|
"valid_targets_mean": 3532.2,
|
|
"valid_targets_min": 1504
|
|
},
|
|
{
|
|
"epoch": 6.234161988773056,
|
|
"grad_norm": 0.6543467487593563,
|
|
"learning_rate": 1.4476201833233084e-06,
|
|
"loss": 0.2833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19178268313407898,
|
|
"step": 3890,
|
|
"valid_targets_mean": 5289.2,
|
|
"valid_targets_min": 1841
|
|
},
|
|
{
|
|
"epoch": 6.242181234963914,
|
|
"grad_norm": 0.5293959822986096,
|
|
"learning_rate": 1.4179166325855676e-06,
|
|
"loss": 0.2614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12478166818618774,
|
|
"step": 3895,
|
|
"valid_targets_mean": 5240.5,
|
|
"valid_targets_min": 2239
|
|
},
|
|
{
|
|
"epoch": 6.250200481154772,
|
|
"grad_norm": 0.5543609876923493,
|
|
"learning_rate": 1.3885097886430599e-06,
|
|
"loss": 0.2689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10484036803245544,
|
|
"step": 3900,
|
|
"valid_targets_mean": 5359.6,
|
|
"valid_targets_min": 2709
|
|
},
|
|
{
|
|
"epoch": 6.2582197273456295,
|
|
"grad_norm": 0.7216036614629309,
|
|
"learning_rate": 1.35940012104542e-06,
|
|
"loss": 0.2723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10729452222585678,
|
|
"step": 3905,
|
|
"valid_targets_mean": 3561.8,
|
|
"valid_targets_min": 1506
|
|
},
|
|
{
|
|
"epoch": 6.266238973536487,
|
|
"grad_norm": 0.7478663074770113,
|
|
"learning_rate": 1.3305880945971583e-06,
|
|
"loss": 0.2624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14511063694953918,
|
|
"step": 3910,
|
|
"valid_targets_mean": 2780.2,
|
|
"valid_targets_min": 1023
|
|
},
|
|
{
|
|
"epoch": 6.274258219727345,
|
|
"grad_norm": 0.550060865375561,
|
|
"learning_rate": 1.3020741693502403e-06,
|
|
"loss": 0.2862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15165145695209503,
|
|
"step": 3915,
|
|
"valid_targets_mean": 6547.9,
|
|
"valid_targets_min": 2000
|
|
},
|
|
{
|
|
"epoch": 6.282277465918204,
|
|
"grad_norm": 0.6615177499929813,
|
|
"learning_rate": 1.27385880059675e-06,
|
|
"loss": 0.2845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10962480306625366,
|
|
"step": 3920,
|
|
"valid_targets_mean": 4424.0,
|
|
"valid_targets_min": 1093
|
|
},
|
|
{
|
|
"epoch": 6.290296712109062,
|
|
"grad_norm": 0.694246980988033,
|
|
"learning_rate": 1.245942438861607e-06,
|
|
"loss": 0.2753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10474206507205963,
|
|
"step": 3925,
|
|
"valid_targets_mean": 3367.9,
|
|
"valid_targets_min": 1102
|
|
},
|
|
{
|
|
"epoch": 6.29831595829992,
|
|
"grad_norm": 0.7157635807579565,
|
|
"learning_rate": 1.2183255298953788e-06,
|
|
"loss": 0.2778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1532362550497055,
|
|
"step": 3930,
|
|
"valid_targets_mean": 4046.2,
|
|
"valid_targets_min": 2199
|
|
},
|
|
{
|
|
"epoch": 6.306335204490778,
|
|
"grad_norm": 0.69158602977965,
|
|
"learning_rate": 1.1910085146671645e-06,
|
|
"loss": 0.2764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11300238966941833,
|
|
"step": 3935,
|
|
"valid_targets_mean": 3600.0,
|
|
"valid_targets_min": 1316
|
|
},
|
|
{
|
|
"epoch": 6.314354450681636,
|
|
"grad_norm": 0.827641902397557,
|
|
"learning_rate": 1.1639918293575492e-06,
|
|
"loss": 0.2611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1277027130126953,
|
|
"step": 3940,
|
|
"valid_targets_mean": 2965.0,
|
|
"valid_targets_min": 1300
|
|
},
|
|
{
|
|
"epoch": 6.322373696872494,
|
|
"grad_norm": 0.6439877820560833,
|
|
"learning_rate": 1.1372759053516536e-06,
|
|
"loss": 0.2735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14757797122001648,
|
|
"step": 3945,
|
|
"valid_targets_mean": 5060.8,
|
|
"valid_targets_min": 1476
|
|
},
|
|
{
|
|
"epoch": 6.330392943063352,
|
|
"grad_norm": 0.8237834476733428,
|
|
"learning_rate": 1.1108611692322157e-06,
|
|
"loss": 0.2751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14925746619701385,
|
|
"step": 3950,
|
|
"valid_targets_mean": 3301.4,
|
|
"valid_targets_min": 1894
|
|
},
|
|
{
|
|
"epoch": 6.3384121892542105,
|
|
"grad_norm": 0.6933010368323701,
|
|
"learning_rate": 1.0847480427728142e-06,
|
|
"loss": 0.2549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18430668115615845,
|
|
"step": 3955,
|
|
"valid_targets_mean": 4292.0,
|
|
"valid_targets_min": 1172
|
|
},
|
|
{
|
|
"epoch": 6.346431435445068,
|
|
"grad_norm": 0.6863177670467828,
|
|
"learning_rate": 1.0589369429311125e-06,
|
|
"loss": 0.2851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13784122467041016,
|
|
"step": 3960,
|
|
"valid_targets_mean": 4396.8,
|
|
"valid_targets_min": 1687
|
|
},
|
|
{
|
|
"epoch": 6.354450681635926,
|
|
"grad_norm": 0.6371680102976265,
|
|
"learning_rate": 1.0334282818422037e-06,
|
|
"loss": 0.2538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15805824100971222,
|
|
"step": 3965,
|
|
"valid_targets_mean": 5949.5,
|
|
"valid_targets_min": 1945
|
|
},
|
|
{
|
|
"epoch": 6.362469927826784,
|
|
"grad_norm": 0.6568792404928988,
|
|
"learning_rate": 1.008222466812041e-06,
|
|
"loss": 0.2778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1167311817407608,
|
|
"step": 3970,
|
|
"valid_targets_mean": 3345.5,
|
|
"valid_targets_min": 1287
|
|
},
|
|
{
|
|
"epoch": 6.370489174017642,
|
|
"grad_norm": 0.6559067333173757,
|
|
"learning_rate": 9.83319900310915e-07,
|
|
"loss": 0.2614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10906291007995605,
|
|
"step": 3975,
|
|
"valid_targets_mean": 3599.5,
|
|
"valid_targets_min": 1912
|
|
},
|
|
{
|
|
"epoch": 6.3785084202085,
|
|
"grad_norm": 0.7723309861979427,
|
|
"learning_rate": 9.587209799670495e-07,
|
|
"loss": 0.2729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1628764122724533,
|
|
"step": 3980,
|
|
"valid_targets_mean": 3042.9,
|
|
"valid_targets_min": 1527
|
|
},
|
|
{
|
|
"epoch": 6.386527666399359,
|
|
"grad_norm": 0.734200484710398,
|
|
"learning_rate": 9.344260985602327e-07,
|
|
"loss": 0.2839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12474434822797775,
|
|
"step": 3985,
|
|
"valid_targets_mean": 3176.6,
|
|
"valid_targets_min": 1617
|
|
},
|
|
{
|
|
"epoch": 6.394546912590217,
|
|
"grad_norm": 0.5800212945862554,
|
|
"learning_rate": 9.104356440155526e-07,
|
|
"loss": 0.2895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1241566613316536,
|
|
"step": 3990,
|
|
"valid_targets_mean": 5577.8,
|
|
"valid_targets_min": 1150
|
|
},
|
|
{
|
|
"epoch": 6.402566158781075,
|
|
"grad_norm": 0.5664850555279295,
|
|
"learning_rate": 8.867499993972162e-07,
|
|
"loss": 0.2822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19933608174324036,
|
|
"step": 3995,
|
|
"valid_targets_mean": 8049.4,
|
|
"valid_targets_min": 2446
|
|
},
|
|
{
|
|
"epoch": 6.410585404971933,
|
|
"grad_norm": 0.7356802978325007,
|
|
"learning_rate": 8.633695429024058e-07,
|
|
"loss": 0.2707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18359798192977905,
|
|
"step": 4000,
|
|
"valid_targets_mean": 4368.5,
|
|
"valid_targets_min": 1938
|
|
},
|
|
{
|
|
"epoch": 6.4186046511627906,
|
|
"grad_norm": 0.7546287387709505,
|
|
"learning_rate": 8.402946478552732e-07,
|
|
"loss": 0.2708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17862293124198914,
|
|
"step": 4005,
|
|
"valid_targets_mean": 3321.8,
|
|
"valid_targets_min": 736
|
|
},
|
|
{
|
|
"epoch": 6.4266238973536485,
|
|
"grad_norm": 0.7309273625638524,
|
|
"learning_rate": 8.175256827009392e-07,
|
|
"loss": 0.2664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12505288422107697,
|
|
"step": 4010,
|
|
"valid_targets_mean": 4053.5,
|
|
"valid_targets_min": 1398
|
|
},
|
|
{
|
|
"epoch": 6.434643143544507,
|
|
"grad_norm": 0.6264446725047349,
|
|
"learning_rate": 7.95063010999646e-07,
|
|
"loss": 0.2724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1693299412727356,
|
|
"step": 4015,
|
|
"valid_targets_mean": 4615.9,
|
|
"valid_targets_min": 1216
|
|
},
|
|
{
|
|
"epoch": 6.442662389735365,
|
|
"grad_norm": 0.7464025775211349,
|
|
"learning_rate": 7.729069914209409e-07,
|
|
"loss": 0.2605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13851439952850342,
|
|
"step": 4020,
|
|
"valid_targets_mean": 3684.0,
|
|
"valid_targets_min": 1045
|
|
},
|
|
{
|
|
"epoch": 6.450681635926223,
|
|
"grad_norm": 0.5524150098685408,
|
|
"learning_rate": 7.510579777379345e-07,
|
|
"loss": 0.2641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21182794868946075,
|
|
"step": 4025,
|
|
"valid_targets_mean": 6848.2,
|
|
"valid_targets_min": 1806
|
|
},
|
|
{
|
|
"epoch": 6.458700882117081,
|
|
"grad_norm": 0.6643165903460105,
|
|
"learning_rate": 7.295163188216792e-07,
|
|
"loss": 0.2793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10513627529144287,
|
|
"step": 4030,
|
|
"valid_targets_mean": 3270.2,
|
|
"valid_targets_min": 894
|
|
},
|
|
{
|
|
"epoch": 6.466720128307939,
|
|
"grad_norm": 0.7738257068533779,
|
|
"learning_rate": 7.08282358635568e-07,
|
|
"loss": 0.2721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13248899579048157,
|
|
"step": 4035,
|
|
"valid_targets_mean": 2989.2,
|
|
"valid_targets_min": 1229
|
|
},
|
|
{
|
|
"epoch": 6.474739374498797,
|
|
"grad_norm": 0.663638489935353,
|
|
"learning_rate": 6.87356436229869e-07,
|
|
"loss": 0.2816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08959473669528961,
|
|
"step": 4040,
|
|
"valid_targets_mean": 3430.5,
|
|
"valid_targets_min": 1771
|
|
},
|
|
{
|
|
"epoch": 6.482758620689655,
|
|
"grad_norm": 0.8146828017163209,
|
|
"learning_rate": 6.667388857362977e-07,
|
|
"loss": 0.2567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14155611395835876,
|
|
"step": 4045,
|
|
"valid_targets_mean": 3218.6,
|
|
"valid_targets_min": 1283
|
|
},
|
|
{
|
|
"epoch": 6.490777866880514,
|
|
"grad_norm": 0.6793827790016271,
|
|
"learning_rate": 6.464300363626797e-07,
|
|
"loss": 0.2813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09733996540307999,
|
|
"step": 4050,
|
|
"valid_targets_mean": 2679.1,
|
|
"valid_targets_min": 1106
|
|
},
|
|
{
|
|
"epoch": 6.4987971130713715,
|
|
"grad_norm": 0.6285180608759408,
|
|
"learning_rate": 6.264302123877053e-07,
|
|
"loss": 0.2731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13995882868766785,
|
|
"step": 4055,
|
|
"valid_targets_mean": 5047.8,
|
|
"valid_targets_min": 1303
|
|
},
|
|
{
|
|
"epoch": 6.5068163592622295,
|
|
"grad_norm": 0.6873892553192886,
|
|
"learning_rate": 6.067397331557412e-07,
|
|
"loss": 0.2712,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11077672243118286,
|
|
"step": 4060,
|
|
"valid_targets_mean": 3205.5,
|
|
"valid_targets_min": 1584
|
|
},
|
|
{
|
|
"epoch": 6.514835605453087,
|
|
"grad_norm": 0.5517625384903336,
|
|
"learning_rate": 5.873589130717405e-07,
|
|
"loss": 0.2676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08499126881361008,
|
|
"step": 4065,
|
|
"valid_targets_mean": 3437.4,
|
|
"valid_targets_min": 1191
|
|
},
|
|
{
|
|
"epoch": 6.522854851643945,
|
|
"grad_norm": 0.6755583113168804,
|
|
"learning_rate": 5.682880615962116e-07,
|
|
"loss": 0.2672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14547117054462433,
|
|
"step": 4070,
|
|
"valid_targets_mean": 4614.2,
|
|
"valid_targets_min": 1265
|
|
},
|
|
{
|
|
"epoch": 6.530874097834803,
|
|
"grad_norm": 0.6395651118677016,
|
|
"learning_rate": 5.495274832402841e-07,
|
|
"loss": 0.2754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12784671783447266,
|
|
"step": 4075,
|
|
"valid_targets_mean": 3884.9,
|
|
"valid_targets_min": 1306
|
|
},
|
|
{
|
|
"epoch": 6.538893344025661,
|
|
"grad_norm": 0.5770813261830327,
|
|
"learning_rate": 5.310774775608529e-07,
|
|
"loss": 0.2569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12950290739536285,
|
|
"step": 4080,
|
|
"valid_targets_mean": 4559.6,
|
|
"valid_targets_min": 1370
|
|
},
|
|
{
|
|
"epoch": 6.54691259021652,
|
|
"grad_norm": 0.6331014040707633,
|
|
"learning_rate": 5.129383391557751e-07,
|
|
"loss": 0.2924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.165439635515213,
|
|
"step": 4085,
|
|
"valid_targets_mean": 4090.6,
|
|
"valid_targets_min": 1282
|
|
},
|
|
{
|
|
"epoch": 6.554931836407378,
|
|
"grad_norm": 0.6511365230785408,
|
|
"learning_rate": 4.951103576591876e-07,
|
|
"loss": 0.2589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13743093609809875,
|
|
"step": 4090,
|
|
"valid_targets_mean": 4554.4,
|
|
"valid_targets_min": 1653
|
|
},
|
|
{
|
|
"epoch": 6.562951082598236,
|
|
"grad_norm": 0.7987173023729609,
|
|
"learning_rate": 4.7759381773687e-07,
|
|
"loss": 0.2784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13793396949768066,
|
|
"step": 4095,
|
|
"valid_targets_mean": 2783.2,
|
|
"valid_targets_min": 1066
|
|
},
|
|
{
|
|
"epoch": 6.570970328789094,
|
|
"grad_norm": 0.7544997486665157,
|
|
"learning_rate": 4.6038899908170234e-07,
|
|
"loss": 0.2565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12955988943576813,
|
|
"step": 4100,
|
|
"valid_targets_mean": 3326.0,
|
|
"valid_targets_min": 2170
|
|
},
|
|
{
|
|
"epoch": 6.578989574979952,
|
|
"grad_norm": 0.5854788552810375,
|
|
"learning_rate": 4.4349617640920164e-07,
|
|
"loss": 0.2609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09501869231462479,
|
|
"step": 4105,
|
|
"valid_targets_mean": 2814.2,
|
|
"valid_targets_min": 1177
|
|
},
|
|
{
|
|
"epoch": 6.58700882117081,
|
|
"grad_norm": 0.7772905310391393,
|
|
"learning_rate": 4.2691561945312764e-07,
|
|
"loss": 0.2844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1361604928970337,
|
|
"step": 4110,
|
|
"valid_targets_mean": 3158.1,
|
|
"valid_targets_min": 1647
|
|
},
|
|
{
|
|
"epoch": 6.595028067361668,
|
|
"grad_norm": 0.6055157996600555,
|
|
"learning_rate": 4.106475929611886e-07,
|
|
"loss": 0.2787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10309708118438721,
|
|
"step": 4115,
|
|
"valid_targets_mean": 3729.2,
|
|
"valid_targets_min": 1610
|
|
},
|
|
{
|
|
"epoch": 6.603047313552526,
|
|
"grad_norm": 0.7008833517826353,
|
|
"learning_rate": 3.9469235669080007e-07,
|
|
"loss": 0.2764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12836217880249023,
|
|
"step": 4120,
|
|
"valid_targets_mean": 3563.2,
|
|
"valid_targets_min": 1348
|
|
},
|
|
{
|
|
"epoch": 6.611066559743384,
|
|
"grad_norm": 0.5659713506979073,
|
|
"learning_rate": 3.7905016540495053e-07,
|
|
"loss": 0.2728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10678073763847351,
|
|
"step": 4125,
|
|
"valid_targets_mean": 5325.9,
|
|
"valid_targets_min": 1283
|
|
},
|
|
{
|
|
"epoch": 6.619085805934242,
|
|
"grad_norm": 0.8252214089753425,
|
|
"learning_rate": 3.63721268868118e-07,
|
|
"loss": 0.2834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13685831427574158,
|
|
"step": 4130,
|
|
"valid_targets_mean": 2919.6,
|
|
"valid_targets_min": 1197
|
|
},
|
|
{
|
|
"epoch": 6.6271050521251,
|
|
"grad_norm": 0.7225618863258757,
|
|
"learning_rate": 3.487059118422997e-07,
|
|
"loss": 0.264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12511911988258362,
|
|
"step": 4135,
|
|
"valid_targets_mean": 3616.0,
|
|
"valid_targets_min": 1859
|
|
},
|
|
{
|
|
"epoch": 6.635124298315958,
|
|
"grad_norm": 0.7213244827448435,
|
|
"learning_rate": 3.3400433408308895e-07,
|
|
"loss": 0.274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11407344788312912,
|
|
"step": 4140,
|
|
"valid_targets_mean": 3371.5,
|
|
"valid_targets_min": 1165
|
|
},
|
|
{
|
|
"epoch": 6.643143544506817,
|
|
"grad_norm": 0.5863803353807132,
|
|
"learning_rate": 3.196167703358577e-07,
|
|
"loss": 0.2979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2087329626083374,
|
|
"step": 4145,
|
|
"valid_targets_mean": 6667.9,
|
|
"valid_targets_min": 2033
|
|
},
|
|
{
|
|
"epoch": 6.651162790697675,
|
|
"grad_norm": 0.7306156531205246,
|
|
"learning_rate": 3.0554345033199985e-07,
|
|
"loss": 0.2733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10838615149259567,
|
|
"step": 4150,
|
|
"valid_targets_mean": 2848.4,
|
|
"valid_targets_min": 1241
|
|
},
|
|
{
|
|
"epoch": 6.659182036888533,
|
|
"grad_norm": 0.5978142445674056,
|
|
"learning_rate": 2.917845987852652e-07,
|
|
"loss": 0.2693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12275230884552002,
|
|
"step": 4155,
|
|
"valid_targets_mean": 5131.2,
|
|
"valid_targets_min": 1631
|
|
},
|
|
{
|
|
"epoch": 6.6672012830793905,
|
|
"grad_norm": 0.6989850067101168,
|
|
"learning_rate": 2.783404353881758e-07,
|
|
"loss": 0.2791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12991738319396973,
|
|
"step": 4160,
|
|
"valid_targets_mean": 5254.8,
|
|
"valid_targets_min": 1794
|
|
},
|
|
{
|
|
"epoch": 6.6752205292702484,
|
|
"grad_norm": 0.6831761000655288,
|
|
"learning_rate": 2.652111748085151e-07,
|
|
"loss": 0.2649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12421713769435883,
|
|
"step": 4165,
|
|
"valid_targets_mean": 3544.2,
|
|
"valid_targets_min": 1009
|
|
},
|
|
{
|
|
"epoch": 6.683239775461106,
|
|
"grad_norm": 0.6637736639097456,
|
|
"learning_rate": 2.523970266859044e-07,
|
|
"loss": 0.2801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11612105369567871,
|
|
"step": 4170,
|
|
"valid_targets_mean": 4202.0,
|
|
"valid_targets_min": 1397
|
|
},
|
|
{
|
|
"epoch": 6.691259021651964,
|
|
"grad_norm": 0.8052071141988904,
|
|
"learning_rate": 2.398981956284363e-07,
|
|
"loss": 0.2914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1752295047044754,
|
|
"step": 4175,
|
|
"valid_targets_mean": 3697.4,
|
|
"valid_targets_min": 1804
|
|
},
|
|
{
|
|
"epoch": 6.699278267842823,
|
|
"grad_norm": 0.771846614986006,
|
|
"learning_rate": 2.2771488120944207e-07,
|
|
"loss": 0.2862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18034441769123077,
|
|
"step": 4180,
|
|
"valid_targets_mean": 3251.6,
|
|
"valid_targets_min": 1364
|
|
},
|
|
{
|
|
"epoch": 6.707297514033681,
|
|
"grad_norm": 0.7689430459462362,
|
|
"learning_rate": 2.1584727796427174e-07,
|
|
"loss": 0.2655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18182602524757385,
|
|
"step": 4185,
|
|
"valid_targets_mean": 3384.4,
|
|
"valid_targets_min": 1269
|
|
},
|
|
{
|
|
"epoch": 6.715316760224539,
|
|
"grad_norm": 0.6345934861559102,
|
|
"learning_rate": 2.0429557538720556e-07,
|
|
"loss": 0.2651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0992988795042038,
|
|
"step": 4190,
|
|
"valid_targets_mean": 3541.0,
|
|
"valid_targets_min": 1862
|
|
},
|
|
{
|
|
"epoch": 6.723336006415397,
|
|
"grad_norm": 0.7310071527515055,
|
|
"learning_rate": 1.930599579284298e-07,
|
|
"loss": 0.2799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11810122430324554,
|
|
"step": 4195,
|
|
"valid_targets_mean": 3602.1,
|
|
"valid_targets_min": 1845
|
|
},
|
|
{
|
|
"epoch": 6.731355252606255,
|
|
"grad_norm": 0.5664508946270108,
|
|
"learning_rate": 1.8214060499107679e-07,
|
|
"loss": 0.2624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1088356077671051,
|
|
"step": 4200,
|
|
"valid_targets_mean": 4533.4,
|
|
"valid_targets_min": 1299
|
|
},
|
|
{
|
|
"epoch": 6.739374498797113,
|
|
"grad_norm": 0.6657206647984147,
|
|
"learning_rate": 1.7153769092837614e-07,
|
|
"loss": 0.2601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10410232841968536,
|
|
"step": 4205,
|
|
"valid_targets_mean": 3370.6,
|
|
"valid_targets_min": 1057
|
|
},
|
|
{
|
|
"epoch": 6.7473937449879715,
|
|
"grad_norm": 0.7416720662788616,
|
|
"learning_rate": 1.6125138504086146e-07,
|
|
"loss": 0.2859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10707104951143265,
|
|
"step": 4210,
|
|
"valid_targets_mean": 2630.2,
|
|
"valid_targets_min": 855
|
|
},
|
|
{
|
|
"epoch": 6.755412991178829,
|
|
"grad_norm": 0.5671208154371713,
|
|
"learning_rate": 1.5128185157367247e-07,
|
|
"loss": 0.278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13333214819431305,
|
|
"step": 4215,
|
|
"valid_targets_mean": 4998.6,
|
|
"valid_targets_min": 1238
|
|
},
|
|
{
|
|
"epoch": 6.763432237369687,
|
|
"grad_norm": 0.7000691617144927,
|
|
"learning_rate": 1.4162924971393044e-07,
|
|
"loss": 0.2858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16572439670562744,
|
|
"step": 4220,
|
|
"valid_targets_mean": 4097.9,
|
|
"valid_targets_min": 1808
|
|
},
|
|
{
|
|
"epoch": 6.771451483560545,
|
|
"grad_norm": 1.0427903862301446,
|
|
"learning_rate": 1.322937335881891e-07,
|
|
"loss": 0.2674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14080201089382172,
|
|
"step": 4225,
|
|
"valid_targets_mean": 4474.2,
|
|
"valid_targets_min": 1133
|
|
},
|
|
{
|
|
"epoch": 6.779470729751403,
|
|
"grad_norm": 0.604225087803022,
|
|
"learning_rate": 1.2327545225999215e-07,
|
|
"loss": 0.2545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16035734117031097,
|
|
"step": 4230,
|
|
"valid_targets_mean": 5771.0,
|
|
"valid_targets_min": 1022
|
|
},
|
|
{
|
|
"epoch": 6.787489975942261,
|
|
"grad_norm": 0.7120534688025915,
|
|
"learning_rate": 1.145745497274664e-07,
|
|
"loss": 0.2602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16921287775039673,
|
|
"step": 4235,
|
|
"valid_targets_mean": 4174.0,
|
|
"valid_targets_min": 1988
|
|
},
|
|
{
|
|
"epoch": 6.79550922213312,
|
|
"grad_norm": 0.7648717180045367,
|
|
"learning_rate": 1.061911649210523e-07,
|
|
"loss": 0.2829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1884380578994751,
|
|
"step": 4240,
|
|
"valid_targets_mean": 5023.6,
|
|
"valid_targets_min": 1372
|
|
},
|
|
{
|
|
"epoch": 6.803528468323978,
|
|
"grad_norm": 0.6388536724286882,
|
|
"learning_rate": 9.812543170126365e-08,
|
|
"loss": 0.2469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11434964835643768,
|
|
"step": 4245,
|
|
"valid_targets_mean": 4338.6,
|
|
"valid_targets_min": 2334
|
|
},
|
|
{
|
|
"epoch": 6.811547714514836,
|
|
"grad_norm": 0.7270357721190687,
|
|
"learning_rate": 9.03774788565559e-08,
|
|
"loss": 0.2846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24188214540481567,
|
|
"step": 4250,
|
|
"valid_targets_mean": 5134.2,
|
|
"valid_targets_min": 1123
|
|
},
|
|
{
|
|
"epoch": 6.819566960705694,
|
|
"grad_norm": 0.8119218603253432,
|
|
"learning_rate": 8.294743010127448e-08,
|
|
"loss": 0.2646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15277987718582153,
|
|
"step": 4255,
|
|
"valid_targets_mean": 2758.6,
|
|
"valid_targets_min": 858
|
|
},
|
|
{
|
|
"epoch": 6.827586206896552,
|
|
"grad_norm": 0.7089787347354933,
|
|
"learning_rate": 7.583540407367418e-08,
|
|
"loss": 0.2769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17936328053474426,
|
|
"step": 4260,
|
|
"valid_targets_mean": 3873.8,
|
|
"valid_targets_min": 1148
|
|
},
|
|
{
|
|
"epoch": 6.8356054530874095,
|
|
"grad_norm": 0.733927681130997,
|
|
"learning_rate": 6.904151433402728e-08,
|
|
"loss": 0.2729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18088877201080322,
|
|
"step": 4265,
|
|
"valid_targets_mean": 5007.8,
|
|
"valid_targets_min": 1377
|
|
},
|
|
{
|
|
"epoch": 6.843624699278267,
|
|
"grad_norm": 0.7031485625421123,
|
|
"learning_rate": 6.256586936281172e-08,
|
|
"loss": 0.2865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10452007502317429,
|
|
"step": 4270,
|
|
"valid_targets_mean": 2666.0,
|
|
"valid_targets_min": 1150
|
|
},
|
|
{
|
|
"epoch": 6.851643945469126,
|
|
"grad_norm": 0.6652572684887978,
|
|
"learning_rate": 5.6408572558972475e-08,
|
|
"loss": 0.2957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13122780621051788,
|
|
"step": 4275,
|
|
"valid_targets_mean": 3638.0,
|
|
"valid_targets_min": 1016
|
|
},
|
|
{
|
|
"epoch": 6.859663191659984,
|
|
"grad_norm": 0.6857482931865787,
|
|
"learning_rate": 5.0569722238280605e-08,
|
|
"loss": 0.273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16107389330863953,
|
|
"step": 4280,
|
|
"valid_targets_mean": 4468.1,
|
|
"valid_targets_min": 803
|
|
},
|
|
{
|
|
"epoch": 6.867682437850842,
|
|
"grad_norm": 0.72913675672226,
|
|
"learning_rate": 4.504941163175236e-08,
|
|
"loss": 0.273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14377540349960327,
|
|
"step": 4285,
|
|
"valid_targets_mean": 3290.5,
|
|
"valid_targets_min": 1286
|
|
},
|
|
{
|
|
"epoch": 6.8757016840417,
|
|
"grad_norm": 0.6468123069769464,
|
|
"learning_rate": 3.984772888417032e-08,
|
|
"loss": 0.2614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16589681804180145,
|
|
"step": 4290,
|
|
"valid_targets_mean": 6056.2,
|
|
"valid_targets_min": 1364
|
|
},
|
|
{
|
|
"epoch": 6.883720930232558,
|
|
"grad_norm": 0.6669737110792908,
|
|
"learning_rate": 3.4964757052671216e-08,
|
|
"loss": 0.2664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14455217123031616,
|
|
"step": 4295,
|
|
"valid_targets_mean": 4423.9,
|
|
"valid_targets_min": 2216
|
|
},
|
|
{
|
|
"epoch": 6.891740176423416,
|
|
"grad_norm": 0.6447822559671433,
|
|
"learning_rate": 3.0400574105415856e-08,
|
|
"loss": 0.2685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10480152070522308,
|
|
"step": 4300,
|
|
"valid_targets_mean": 3992.2,
|
|
"valid_targets_min": 1304
|
|
},
|
|
{
|
|
"epoch": 6.899759422614274,
|
|
"grad_norm": 0.6269333630594336,
|
|
"learning_rate": 2.615525292035459e-08,
|
|
"loss": 0.2787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20834621787071228,
|
|
"step": 4305,
|
|
"valid_targets_mean": 6044.5,
|
|
"valid_targets_min": 1353
|
|
},
|
|
{
|
|
"epoch": 6.907778668805133,
|
|
"grad_norm": 0.6801689751096721,
|
|
"learning_rate": 2.222886128405266e-08,
|
|
"loss": 0.2638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10578888654708862,
|
|
"step": 4310,
|
|
"valid_targets_mean": 2282.5,
|
|
"valid_targets_min": 1331
|
|
},
|
|
{
|
|
"epoch": 6.9157979149959905,
|
|
"grad_norm": 0.616271937547341,
|
|
"learning_rate": 1.8621461890617752e-08,
|
|
"loss": 0.2574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0919659286737442,
|
|
"step": 4315,
|
|
"valid_targets_mean": 2920.1,
|
|
"valid_targets_min": 1139
|
|
},
|
|
{
|
|
"epoch": 6.923817161186848,
|
|
"grad_norm": 0.6251342212375184,
|
|
"learning_rate": 1.5333112340687463e-08,
|
|
"loss": 0.2624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1263146996498108,
|
|
"step": 4320,
|
|
"valid_targets_mean": 4399.0,
|
|
"valid_targets_min": 1203
|
|
},
|
|
{
|
|
"epoch": 6.931836407377706,
|
|
"grad_norm": 0.8172684670405291,
|
|
"learning_rate": 1.2363865140518905e-08,
|
|
"loss": 0.2673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1875896155834198,
|
|
"step": 4325,
|
|
"valid_targets_mean": 3179.0,
|
|
"valid_targets_min": 1288
|
|
},
|
|
{
|
|
"epoch": 6.939855653568564,
|
|
"grad_norm": 0.6804193695635374,
|
|
"learning_rate": 9.713767701151621e-09,
|
|
"loss": 0.276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14937788248062134,
|
|
"step": 4330,
|
|
"valid_targets_mean": 5056.6,
|
|
"valid_targets_min": 1414
|
|
},
|
|
{
|
|
"epoch": 6.947874899759423,
|
|
"grad_norm": 0.7198516572202387,
|
|
"learning_rate": 7.382862337641516e-09,
|
|
"loss": 0.2747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14979767799377441,
|
|
"step": 4335,
|
|
"valid_targets_mean": 3678.6,
|
|
"valid_targets_min": 1477
|
|
},
|
|
{
|
|
"epoch": 6.955894145950281,
|
|
"grad_norm": 0.8585805179317972,
|
|
"learning_rate": 5.371186268390283e-09,
|
|
"loss": 0.268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1378738284111023,
|
|
"step": 4340,
|
|
"valid_targets_mean": 2880.1,
|
|
"valid_targets_min": 1239
|
|
},
|
|
{
|
|
"epoch": 6.963913392141139,
|
|
"grad_norm": 0.7632442302373305,
|
|
"learning_rate": 3.678771614550325e-09,
|
|
"loss": 0.2667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21591196954250336,
|
|
"step": 4345,
|
|
"valid_targets_mean": 4265.0,
|
|
"valid_targets_min": 2028
|
|
},
|
|
{
|
|
"epoch": 6.971932638331997,
|
|
"grad_norm": 0.6597310967858202,
|
|
"learning_rate": 2.3056453995162763e-09,
|
|
"loss": 0.301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1997801512479782,
|
|
"step": 4350,
|
|
"valid_targets_mean": 5230.1,
|
|
"valid_targets_min": 2152
|
|
},
|
|
{
|
|
"epoch": 6.979951884522855,
|
|
"grad_norm": 0.6935715453803216,
|
|
"learning_rate": 1.2518295484875708e-09,
|
|
"loss": 0.292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13192936778068542,
|
|
"step": 4355,
|
|
"valid_targets_mean": 3180.9,
|
|
"valid_targets_min": 1182
|
|
},
|
|
{
|
|
"epoch": 6.987971130713713,
|
|
"grad_norm": 0.6257334178571583,
|
|
"learning_rate": 5.173408881198328e-10,
|
|
"loss": 0.2476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1475430130958557,
|
|
"step": 4360,
|
|
"valid_targets_mean": 5180.1,
|
|
"valid_targets_min": 2119
|
|
},
|
|
{
|
|
"epoch": 6.995990376904571,
|
|
"grad_norm": 0.6404645600840769,
|
|
"learning_rate": 1.0219114625398263e-10,
|
|
"loss": 0.2712,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07926057279109955,
|
|
"step": 4365,
|
|
"valid_targets_mean": 2509.1,
|
|
"valid_targets_min": 915
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3030981719493866,
|
|
"step": 4368,
|
|
"total_flos": 1.299011914721919e+18,
|
|
"train_loss": 0.3405153105383391,
|
|
"train_runtime": 50827.1401,
|
|
"train_samples_per_second": 1.373,
|
|
"train_steps_per_second": 0.086,
|
|
"valid_targets_mean": 4862.5,
|
|
"valid_targets_min": 1479
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 4368,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 1500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1.299011914721919e+18,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|