9640 lines
267 KiB
JSON
9640 lines
267 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 4361,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.008025682182985553,
|
|
"grad_norm": 16.365154769037186,
|
|
"learning_rate": 3.661327231121282e-07,
|
|
"loss": 0.9119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8281038999557495,
|
|
"step": 5,
|
|
"valid_targets_mean": 4279.1,
|
|
"valid_targets_min": 1055
|
|
},
|
|
{
|
|
"epoch": 0.016051364365971106,
|
|
"grad_norm": 17.496116394086645,
|
|
"learning_rate": 8.237986270022884e-07,
|
|
"loss": 0.8874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.9568569660186768,
|
|
"step": 10,
|
|
"valid_targets_mean": 2937.4,
|
|
"valid_targets_min": 900
|
|
},
|
|
{
|
|
"epoch": 0.024077046548956663,
|
|
"grad_norm": 14.712053866539966,
|
|
"learning_rate": 1.2814645308924487e-06,
|
|
"loss": 0.8615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7440463304519653,
|
|
"step": 15,
|
|
"valid_targets_mean": 4543.9,
|
|
"valid_targets_min": 2060
|
|
},
|
|
{
|
|
"epoch": 0.03210272873194221,
|
|
"grad_norm": 13.073126992438652,
|
|
"learning_rate": 1.7391304347826088e-06,
|
|
"loss": 0.8439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8706164360046387,
|
|
"step": 20,
|
|
"valid_targets_mean": 3092.1,
|
|
"valid_targets_min": 1348
|
|
},
|
|
{
|
|
"epoch": 0.04012841091492777,
|
|
"grad_norm": 8.692373813506133,
|
|
"learning_rate": 2.196796338672769e-06,
|
|
"loss": 0.7589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7745897769927979,
|
|
"step": 25,
|
|
"valid_targets_mean": 4186.9,
|
|
"valid_targets_min": 1379
|
|
},
|
|
{
|
|
"epoch": 0.048154093097913325,
|
|
"grad_norm": 4.548047758420085,
|
|
"learning_rate": 2.654462242562929e-06,
|
|
"loss": 0.715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6869417428970337,
|
|
"step": 30,
|
|
"valid_targets_mean": 4462.3,
|
|
"valid_targets_min": 1930
|
|
},
|
|
{
|
|
"epoch": 0.056179775280898875,
|
|
"grad_norm": 2.8722888618951186,
|
|
"learning_rate": 3.1121281464530894e-06,
|
|
"loss": 0.701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7097717523574829,
|
|
"step": 35,
|
|
"valid_targets_mean": 3311.8,
|
|
"valid_targets_min": 376
|
|
},
|
|
{
|
|
"epoch": 0.06420545746388442,
|
|
"grad_norm": 1.7797669184988352,
|
|
"learning_rate": 3.56979405034325e-06,
|
|
"loss": 0.6812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6491912007331848,
|
|
"step": 40,
|
|
"valid_targets_mean": 4286.9,
|
|
"valid_targets_min": 1315
|
|
},
|
|
{
|
|
"epoch": 0.07223113964686999,
|
|
"grad_norm": 1.5882324795872969,
|
|
"learning_rate": 4.0274599542334094e-06,
|
|
"loss": 0.6737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6690398454666138,
|
|
"step": 45,
|
|
"valid_targets_mean": 3165.4,
|
|
"valid_targets_min": 1038
|
|
},
|
|
{
|
|
"epoch": 0.08025682182985554,
|
|
"grad_norm": 1.1087886985826176,
|
|
"learning_rate": 4.48512585812357e-06,
|
|
"loss": 0.6209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5860173106193542,
|
|
"step": 50,
|
|
"valid_targets_mean": 4685.1,
|
|
"valid_targets_min": 1753
|
|
},
|
|
{
|
|
"epoch": 0.08828250401284109,
|
|
"grad_norm": 1.103579540245981,
|
|
"learning_rate": 4.94279176201373e-06,
|
|
"loss": 0.6206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6162932515144348,
|
|
"step": 55,
|
|
"valid_targets_mean": 3165.6,
|
|
"valid_targets_min": 1328
|
|
},
|
|
{
|
|
"epoch": 0.09630818619582665,
|
|
"grad_norm": 1.0038507120186022,
|
|
"learning_rate": 5.400457665903891e-06,
|
|
"loss": 0.6015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6310989856719971,
|
|
"step": 60,
|
|
"valid_targets_mean": 3079.7,
|
|
"valid_targets_min": 980
|
|
},
|
|
{
|
|
"epoch": 0.1043338683788122,
|
|
"grad_norm": 0.8090891242032995,
|
|
"learning_rate": 5.858123569794051e-06,
|
|
"loss": 0.5856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5514224767684937,
|
|
"step": 65,
|
|
"valid_targets_mean": 4080.7,
|
|
"valid_targets_min": 1042
|
|
},
|
|
{
|
|
"epoch": 0.11235955056179775,
|
|
"grad_norm": 0.7678169614668444,
|
|
"learning_rate": 6.31578947368421e-06,
|
|
"loss": 0.5887,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5969565510749817,
|
|
"step": 70,
|
|
"valid_targets_mean": 3865.3,
|
|
"valid_targets_min": 1163
|
|
},
|
|
{
|
|
"epoch": 0.12038523274478331,
|
|
"grad_norm": 0.8267967159124489,
|
|
"learning_rate": 6.773455377574372e-06,
|
|
"loss": 0.6224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5408726930618286,
|
|
"step": 75,
|
|
"valid_targets_mean": 2791.8,
|
|
"valid_targets_min": 559
|
|
},
|
|
{
|
|
"epoch": 0.12841091492776885,
|
|
"grad_norm": 0.8099409692620032,
|
|
"learning_rate": 7.231121281464531e-06,
|
|
"loss": 0.5759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6248391270637512,
|
|
"step": 80,
|
|
"valid_targets_mean": 3922.9,
|
|
"valid_targets_min": 958
|
|
},
|
|
{
|
|
"epoch": 0.13643659711075443,
|
|
"grad_norm": 0.820117437233282,
|
|
"learning_rate": 7.688787185354691e-06,
|
|
"loss": 0.5722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5872689485549927,
|
|
"step": 85,
|
|
"valid_targets_mean": 3152.1,
|
|
"valid_targets_min": 1170
|
|
},
|
|
{
|
|
"epoch": 0.14446227929373998,
|
|
"grad_norm": 0.7460578027273762,
|
|
"learning_rate": 8.146453089244852e-06,
|
|
"loss": 0.5711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.579184889793396,
|
|
"step": 90,
|
|
"valid_targets_mean": 3653.5,
|
|
"valid_targets_min": 1119
|
|
},
|
|
{
|
|
"epoch": 0.15248796147672553,
|
|
"grad_norm": 0.7793678684036389,
|
|
"learning_rate": 8.604118993135013e-06,
|
|
"loss": 0.5729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5987073183059692,
|
|
"step": 95,
|
|
"valid_targets_mean": 3142.2,
|
|
"valid_targets_min": 966
|
|
},
|
|
{
|
|
"epoch": 0.16051364365971107,
|
|
"grad_norm": 0.7267414545012443,
|
|
"learning_rate": 9.061784897025172e-06,
|
|
"loss": 0.5492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5588800311088562,
|
|
"step": 100,
|
|
"valid_targets_mean": 3407.6,
|
|
"valid_targets_min": 1303
|
|
},
|
|
{
|
|
"epoch": 0.16853932584269662,
|
|
"grad_norm": 0.7271759620125251,
|
|
"learning_rate": 9.519450800915333e-06,
|
|
"loss": 0.582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6017678380012512,
|
|
"step": 105,
|
|
"valid_targets_mean": 3842.7,
|
|
"valid_targets_min": 1039
|
|
},
|
|
{
|
|
"epoch": 0.17656500802568217,
|
|
"grad_norm": 0.6957622967915364,
|
|
"learning_rate": 9.977116704805492e-06,
|
|
"loss": 0.5351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4987550973892212,
|
|
"step": 110,
|
|
"valid_targets_mean": 3521.1,
|
|
"valid_targets_min": 1294
|
|
},
|
|
{
|
|
"epoch": 0.18459069020866772,
|
|
"grad_norm": 0.7094644784936989,
|
|
"learning_rate": 1.0434782608695653e-05,
|
|
"loss": 0.5101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5332086086273193,
|
|
"step": 115,
|
|
"valid_targets_mean": 3566.1,
|
|
"valid_targets_min": 1145
|
|
},
|
|
{
|
|
"epoch": 0.1926163723916533,
|
|
"grad_norm": 0.8142007688995723,
|
|
"learning_rate": 1.0892448512585814e-05,
|
|
"loss": 0.5222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5571641325950623,
|
|
"step": 120,
|
|
"valid_targets_mean": 2654.3,
|
|
"valid_targets_min": 947
|
|
},
|
|
{
|
|
"epoch": 0.20064205457463885,
|
|
"grad_norm": 0.6461895607541819,
|
|
"learning_rate": 1.1350114416475973e-05,
|
|
"loss": 0.5338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5178383588790894,
|
|
"step": 125,
|
|
"valid_targets_mean": 4079.1,
|
|
"valid_targets_min": 1116
|
|
},
|
|
{
|
|
"epoch": 0.2086677367576244,
|
|
"grad_norm": 0.6938331589049856,
|
|
"learning_rate": 1.1807780320366134e-05,
|
|
"loss": 0.508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4668303430080414,
|
|
"step": 130,
|
|
"valid_targets_mean": 3048.4,
|
|
"valid_targets_min": 1114
|
|
},
|
|
{
|
|
"epoch": 0.21669341894060995,
|
|
"grad_norm": 0.7552053167355036,
|
|
"learning_rate": 1.2265446224256295e-05,
|
|
"loss": 0.5199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5271793603897095,
|
|
"step": 135,
|
|
"valid_targets_mean": 2760.9,
|
|
"valid_targets_min": 1128
|
|
},
|
|
{
|
|
"epoch": 0.2247191011235955,
|
|
"grad_norm": 0.6816108156512145,
|
|
"learning_rate": 1.2723112128146454e-05,
|
|
"loss": 0.5226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5017613172531128,
|
|
"step": 140,
|
|
"valid_targets_mean": 3703.9,
|
|
"valid_targets_min": 1215
|
|
},
|
|
{
|
|
"epoch": 0.23274478330658105,
|
|
"grad_norm": 0.914483067821005,
|
|
"learning_rate": 1.3180778032036615e-05,
|
|
"loss": 0.5099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5468560457229614,
|
|
"step": 145,
|
|
"valid_targets_mean": 2995.1,
|
|
"valid_targets_min": 1382
|
|
},
|
|
{
|
|
"epoch": 0.24077046548956663,
|
|
"grad_norm": 0.7243905540498022,
|
|
"learning_rate": 1.3638443935926776e-05,
|
|
"loss": 0.5019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5504482984542847,
|
|
"step": 150,
|
|
"valid_targets_mean": 3361.8,
|
|
"valid_targets_min": 1073
|
|
},
|
|
{
|
|
"epoch": 0.24879614767255218,
|
|
"grad_norm": 0.7066882159823471,
|
|
"learning_rate": 1.4096109839816933e-05,
|
|
"loss": 0.4977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48558729887008667,
|
|
"step": 155,
|
|
"valid_targets_mean": 3944.9,
|
|
"valid_targets_min": 869
|
|
},
|
|
{
|
|
"epoch": 0.2568218298555377,
|
|
"grad_norm": 0.5652474810745806,
|
|
"learning_rate": 1.4553775743707096e-05,
|
|
"loss": 0.5016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49663928151130676,
|
|
"step": 160,
|
|
"valid_targets_mean": 4703.9,
|
|
"valid_targets_min": 1449
|
|
},
|
|
{
|
|
"epoch": 0.26484751203852325,
|
|
"grad_norm": 1.175945274088405,
|
|
"learning_rate": 1.5011441647597256e-05,
|
|
"loss": 0.4977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5345251560211182,
|
|
"step": 165,
|
|
"valid_targets_mean": 3563.1,
|
|
"valid_targets_min": 1141
|
|
},
|
|
{
|
|
"epoch": 0.27287319422150885,
|
|
"grad_norm": 0.7928538635224509,
|
|
"learning_rate": 1.5469107551487414e-05,
|
|
"loss": 0.4802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5115028023719788,
|
|
"step": 170,
|
|
"valid_targets_mean": 3172.6,
|
|
"valid_targets_min": 1373
|
|
},
|
|
{
|
|
"epoch": 0.2808988764044944,
|
|
"grad_norm": 0.7596088988097449,
|
|
"learning_rate": 1.5926773455377575e-05,
|
|
"loss": 0.4961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.438111275434494,
|
|
"step": 175,
|
|
"valid_targets_mean": 2769.4,
|
|
"valid_targets_min": 978
|
|
},
|
|
{
|
|
"epoch": 0.28892455858747995,
|
|
"grad_norm": 0.723952274827358,
|
|
"learning_rate": 1.6384439359267736e-05,
|
|
"loss": 0.4837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49589428305625916,
|
|
"step": 180,
|
|
"valid_targets_mean": 3808.0,
|
|
"valid_targets_min": 943
|
|
},
|
|
{
|
|
"epoch": 0.2969502407704655,
|
|
"grad_norm": 0.597185056759805,
|
|
"learning_rate": 1.6842105263157896e-05,
|
|
"loss": 0.4937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43518128991127014,
|
|
"step": 185,
|
|
"valid_targets_mean": 4507.8,
|
|
"valid_targets_min": 1232
|
|
},
|
|
{
|
|
"epoch": 0.30497592295345105,
|
|
"grad_norm": 0.8385841184024647,
|
|
"learning_rate": 1.7299771167048057e-05,
|
|
"loss": 0.4766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49923720955848694,
|
|
"step": 190,
|
|
"valid_targets_mean": 3690.2,
|
|
"valid_targets_min": 709
|
|
},
|
|
{
|
|
"epoch": 0.3130016051364366,
|
|
"grad_norm": 0.7398609559084139,
|
|
"learning_rate": 1.7757437070938218e-05,
|
|
"loss": 0.5099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5082335472106934,
|
|
"step": 195,
|
|
"valid_targets_mean": 3146.3,
|
|
"valid_targets_min": 1459
|
|
},
|
|
{
|
|
"epoch": 0.32102728731942215,
|
|
"grad_norm": 0.8067979712264914,
|
|
"learning_rate": 1.8215102974828376e-05,
|
|
"loss": 0.4807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5590534210205078,
|
|
"step": 200,
|
|
"valid_targets_mean": 2913.6,
|
|
"valid_targets_min": 1043
|
|
},
|
|
{
|
|
"epoch": 0.3290529695024077,
|
|
"grad_norm": 0.7487603150785928,
|
|
"learning_rate": 1.8672768878718537e-05,
|
|
"loss": 0.4757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4934680759906769,
|
|
"step": 205,
|
|
"valid_targets_mean": 3174.4,
|
|
"valid_targets_min": 1024
|
|
},
|
|
{
|
|
"epoch": 0.33707865168539325,
|
|
"grad_norm": 0.706511050697889,
|
|
"learning_rate": 1.9130434782608697e-05,
|
|
"loss": 0.4902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5012920498847961,
|
|
"step": 210,
|
|
"valid_targets_mean": 3630.4,
|
|
"valid_targets_min": 1505
|
|
},
|
|
{
|
|
"epoch": 0.3451043338683788,
|
|
"grad_norm": 0.749323210724116,
|
|
"learning_rate": 1.9588100686498858e-05,
|
|
"loss": 0.4891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5082265734672546,
|
|
"step": 215,
|
|
"valid_targets_mean": 3609.9,
|
|
"valid_targets_min": 1358
|
|
},
|
|
{
|
|
"epoch": 0.35313001605136435,
|
|
"grad_norm": 0.808613713174193,
|
|
"learning_rate": 2.004576659038902e-05,
|
|
"loss": 0.4723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44331106543540955,
|
|
"step": 220,
|
|
"valid_targets_mean": 2542.7,
|
|
"valid_targets_min": 1038
|
|
},
|
|
{
|
|
"epoch": 0.3611556982343499,
|
|
"grad_norm": 0.6137009138094073,
|
|
"learning_rate": 2.050343249427918e-05,
|
|
"loss": 0.4692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4351874887943268,
|
|
"step": 225,
|
|
"valid_targets_mean": 4124.8,
|
|
"valid_targets_min": 1116
|
|
},
|
|
{
|
|
"epoch": 0.36918138041733545,
|
|
"grad_norm": 0.6676385654965055,
|
|
"learning_rate": 2.0961098398169337e-05,
|
|
"loss": 0.4613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4492141902446747,
|
|
"step": 230,
|
|
"valid_targets_mean": 3956.2,
|
|
"valid_targets_min": 1439
|
|
},
|
|
{
|
|
"epoch": 0.37720706260032105,
|
|
"grad_norm": 0.6867172016696662,
|
|
"learning_rate": 2.14187643020595e-05,
|
|
"loss": 0.482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4772060513496399,
|
|
"step": 235,
|
|
"valid_targets_mean": 3597.8,
|
|
"valid_targets_min": 889
|
|
},
|
|
{
|
|
"epoch": 0.3852327447833066,
|
|
"grad_norm": 0.7098058860927564,
|
|
"learning_rate": 2.187643020594966e-05,
|
|
"loss": 0.476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48692435026168823,
|
|
"step": 240,
|
|
"valid_targets_mean": 3771.8,
|
|
"valid_targets_min": 1656
|
|
},
|
|
{
|
|
"epoch": 0.39325842696629215,
|
|
"grad_norm": 0.744538014479184,
|
|
"learning_rate": 2.2334096109839817e-05,
|
|
"loss": 0.4732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5315950512886047,
|
|
"step": 245,
|
|
"valid_targets_mean": 3194.8,
|
|
"valid_targets_min": 1058
|
|
},
|
|
{
|
|
"epoch": 0.4012841091492777,
|
|
"grad_norm": 0.7658622686727643,
|
|
"learning_rate": 2.279176201372998e-05,
|
|
"loss": 0.4697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4827132225036621,
|
|
"step": 250,
|
|
"valid_targets_mean": 3035.4,
|
|
"valid_targets_min": 1067
|
|
},
|
|
{
|
|
"epoch": 0.40930979133226325,
|
|
"grad_norm": 0.9674661347598291,
|
|
"learning_rate": 2.3249427917620138e-05,
|
|
"loss": 0.4572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.422331839799881,
|
|
"step": 255,
|
|
"valid_targets_mean": 4112.4,
|
|
"valid_targets_min": 1373
|
|
},
|
|
{
|
|
"epoch": 0.4173354735152488,
|
|
"grad_norm": 0.7090473340567546,
|
|
"learning_rate": 2.37070938215103e-05,
|
|
"loss": 0.4409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3879775106906891,
|
|
"step": 260,
|
|
"valid_targets_mean": 3270.5,
|
|
"valid_targets_min": 384
|
|
},
|
|
{
|
|
"epoch": 0.42536115569823435,
|
|
"grad_norm": 0.7271558492967691,
|
|
"learning_rate": 2.4164759725400463e-05,
|
|
"loss": 0.454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4516676366329193,
|
|
"step": 265,
|
|
"valid_targets_mean": 3534.4,
|
|
"valid_targets_min": 1178
|
|
},
|
|
{
|
|
"epoch": 0.4333868378812199,
|
|
"grad_norm": 0.8037053980770589,
|
|
"learning_rate": 2.462242562929062e-05,
|
|
"loss": 0.4486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4696701169013977,
|
|
"step": 270,
|
|
"valid_targets_mean": 2747.1,
|
|
"valid_targets_min": 1371
|
|
},
|
|
{
|
|
"epoch": 0.44141252006420545,
|
|
"grad_norm": 0.6353295011345187,
|
|
"learning_rate": 2.508009153318078e-05,
|
|
"loss": 0.4531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4106517434120178,
|
|
"step": 275,
|
|
"valid_targets_mean": 4268.9,
|
|
"valid_targets_min": 1061
|
|
},
|
|
{
|
|
"epoch": 0.449438202247191,
|
|
"grad_norm": 0.7894056055848692,
|
|
"learning_rate": 2.5537757437070943e-05,
|
|
"loss": 0.4821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5187530517578125,
|
|
"step": 280,
|
|
"valid_targets_mean": 4229.1,
|
|
"valid_targets_min": 1266
|
|
},
|
|
{
|
|
"epoch": 0.45746388443017655,
|
|
"grad_norm": 0.6259068011820181,
|
|
"learning_rate": 2.59954233409611e-05,
|
|
"loss": 0.4602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43051719665527344,
|
|
"step": 285,
|
|
"valid_targets_mean": 4432.5,
|
|
"valid_targets_min": 1045
|
|
},
|
|
{
|
|
"epoch": 0.4654895666131621,
|
|
"grad_norm": 0.6864170150264384,
|
|
"learning_rate": 2.645308924485126e-05,
|
|
"loss": 0.4559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47451895475387573,
|
|
"step": 290,
|
|
"valid_targets_mean": 3986.1,
|
|
"valid_targets_min": 1143
|
|
},
|
|
{
|
|
"epoch": 0.47351524879614765,
|
|
"grad_norm": 0.6711775370002279,
|
|
"learning_rate": 2.6910755148741422e-05,
|
|
"loss": 0.4344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4780963957309723,
|
|
"step": 295,
|
|
"valid_targets_mean": 3856.8,
|
|
"valid_targets_min": 922
|
|
},
|
|
{
|
|
"epoch": 0.48154093097913325,
|
|
"grad_norm": 0.8044909105295546,
|
|
"learning_rate": 2.7368421052631583e-05,
|
|
"loss": 0.4806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45625534653663635,
|
|
"step": 300,
|
|
"valid_targets_mean": 2922.5,
|
|
"valid_targets_min": 1130
|
|
},
|
|
{
|
|
"epoch": 0.4895666131621188,
|
|
"grad_norm": 0.7328912794572924,
|
|
"learning_rate": 2.782608695652174e-05,
|
|
"loss": 0.4589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42535263299942017,
|
|
"step": 305,
|
|
"valid_targets_mean": 3365.4,
|
|
"valid_targets_min": 1706
|
|
},
|
|
{
|
|
"epoch": 0.49759229534510435,
|
|
"grad_norm": 0.7855153684351185,
|
|
"learning_rate": 2.8283752860411904e-05,
|
|
"loss": 0.4883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5294452905654907,
|
|
"step": 310,
|
|
"valid_targets_mean": 3308.4,
|
|
"valid_targets_min": 864
|
|
},
|
|
{
|
|
"epoch": 0.5056179775280899,
|
|
"grad_norm": 0.8612532838709436,
|
|
"learning_rate": 2.8741418764302062e-05,
|
|
"loss": 0.4501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4717407822608948,
|
|
"step": 315,
|
|
"valid_targets_mean": 3046.2,
|
|
"valid_targets_min": 1298
|
|
},
|
|
{
|
|
"epoch": 0.5136436597110754,
|
|
"grad_norm": 0.9690264714389722,
|
|
"learning_rate": 2.9199084668192223e-05,
|
|
"loss": 0.4441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4594542980194092,
|
|
"step": 320,
|
|
"valid_targets_mean": 3022.1,
|
|
"valid_targets_min": 1207
|
|
},
|
|
{
|
|
"epoch": 0.521669341894061,
|
|
"grad_norm": 0.7301199940684472,
|
|
"learning_rate": 2.9656750572082384e-05,
|
|
"loss": 0.4401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49303609132766724,
|
|
"step": 325,
|
|
"valid_targets_mean": 3294.7,
|
|
"valid_targets_min": 1025
|
|
},
|
|
{
|
|
"epoch": 0.5296950240770465,
|
|
"grad_norm": 0.757792019387245,
|
|
"learning_rate": 3.0114416475972544e-05,
|
|
"loss": 0.4275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42807677388191223,
|
|
"step": 330,
|
|
"valid_targets_mean": 2722.2,
|
|
"valid_targets_min": 1135
|
|
},
|
|
{
|
|
"epoch": 0.5377207062600321,
|
|
"grad_norm": 0.6933196710760249,
|
|
"learning_rate": 3.05720823798627e-05,
|
|
"loss": 0.4587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41805028915405273,
|
|
"step": 335,
|
|
"valid_targets_mean": 3436.4,
|
|
"valid_targets_min": 1078
|
|
},
|
|
{
|
|
"epoch": 0.5457463884430177,
|
|
"grad_norm": 0.6612816601521995,
|
|
"learning_rate": 3.102974828375286e-05,
|
|
"loss": 0.4253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3453900218009949,
|
|
"step": 340,
|
|
"valid_targets_mean": 5510.3,
|
|
"valid_targets_min": 1802
|
|
},
|
|
{
|
|
"epoch": 0.5537720706260032,
|
|
"grad_norm": 0.6238075328761105,
|
|
"learning_rate": 3.1487414187643024e-05,
|
|
"loss": 0.4515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42972415685653687,
|
|
"step": 345,
|
|
"valid_targets_mean": 4279.7,
|
|
"valid_targets_min": 917
|
|
},
|
|
{
|
|
"epoch": 0.5617977528089888,
|
|
"grad_norm": 0.6219358732243676,
|
|
"learning_rate": 3.1945080091533184e-05,
|
|
"loss": 0.4447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4318506121635437,
|
|
"step": 350,
|
|
"valid_targets_mean": 4692.9,
|
|
"valid_targets_min": 1174
|
|
},
|
|
{
|
|
"epoch": 0.5698234349919743,
|
|
"grad_norm": 0.7574793329728934,
|
|
"learning_rate": 3.240274599542334e-05,
|
|
"loss": 0.4659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44273805618286133,
|
|
"step": 355,
|
|
"valid_targets_mean": 3120.9,
|
|
"valid_targets_min": 992
|
|
},
|
|
{
|
|
"epoch": 0.5778491171749599,
|
|
"grad_norm": 0.6482617816439753,
|
|
"learning_rate": 3.2860411899313506e-05,
|
|
"loss": 0.4544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42581114172935486,
|
|
"step": 360,
|
|
"valid_targets_mean": 4447.8,
|
|
"valid_targets_min": 1413
|
|
},
|
|
{
|
|
"epoch": 0.5858747993579454,
|
|
"grad_norm": 0.686945381260347,
|
|
"learning_rate": 3.331807780320366e-05,
|
|
"loss": 0.434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4290264844894409,
|
|
"step": 365,
|
|
"valid_targets_mean": 3640.3,
|
|
"valid_targets_min": 970
|
|
},
|
|
{
|
|
"epoch": 0.593900481540931,
|
|
"grad_norm": 0.6575481309281616,
|
|
"learning_rate": 3.377574370709382e-05,
|
|
"loss": 0.4787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46496686339378357,
|
|
"step": 370,
|
|
"valid_targets_mean": 3550.1,
|
|
"valid_targets_min": 1267
|
|
},
|
|
{
|
|
"epoch": 0.6019261637239165,
|
|
"grad_norm": 0.6914314193349927,
|
|
"learning_rate": 3.423340961098399e-05,
|
|
"loss": 0.4507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44865018129348755,
|
|
"step": 375,
|
|
"valid_targets_mean": 3414.4,
|
|
"valid_targets_min": 1251
|
|
},
|
|
{
|
|
"epoch": 0.6099518459069021,
|
|
"grad_norm": 0.7046696304014137,
|
|
"learning_rate": 3.469107551487414e-05,
|
|
"loss": 0.4311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40931010246276855,
|
|
"step": 380,
|
|
"valid_targets_mean": 4418.1,
|
|
"valid_targets_min": 1493
|
|
},
|
|
{
|
|
"epoch": 0.6179775280898876,
|
|
"grad_norm": 0.6254996759997042,
|
|
"learning_rate": 3.5148741418764304e-05,
|
|
"loss": 0.4295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40872251987457275,
|
|
"step": 385,
|
|
"valid_targets_mean": 4027.5,
|
|
"valid_targets_min": 1589
|
|
},
|
|
{
|
|
"epoch": 0.6260032102728732,
|
|
"grad_norm": 0.611577065098873,
|
|
"learning_rate": 3.5606407322654464e-05,
|
|
"loss": 0.4342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40076303482055664,
|
|
"step": 390,
|
|
"valid_targets_mean": 4520.4,
|
|
"valid_targets_min": 1152
|
|
},
|
|
{
|
|
"epoch": 0.6340288924558587,
|
|
"grad_norm": 0.7900132785433391,
|
|
"learning_rate": 3.6064073226544625e-05,
|
|
"loss": 0.4523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5017191171646118,
|
|
"step": 395,
|
|
"valid_targets_mean": 3023.3,
|
|
"valid_targets_min": 1293
|
|
},
|
|
{
|
|
"epoch": 0.6420545746388443,
|
|
"grad_norm": 0.6779419636968553,
|
|
"learning_rate": 3.6521739130434786e-05,
|
|
"loss": 0.4373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4543653130531311,
|
|
"step": 400,
|
|
"valid_targets_mean": 4106.4,
|
|
"valid_targets_min": 1425
|
|
},
|
|
{
|
|
"epoch": 0.6500802568218299,
|
|
"grad_norm": 0.6254447322254072,
|
|
"learning_rate": 3.697940503432495e-05,
|
|
"loss": 0.4278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.405331015586853,
|
|
"step": 405,
|
|
"valid_targets_mean": 4188.0,
|
|
"valid_targets_min": 1124
|
|
},
|
|
{
|
|
"epoch": 0.6581059390048154,
|
|
"grad_norm": 0.6303577356944859,
|
|
"learning_rate": 3.743707093821511e-05,
|
|
"loss": 0.4132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4267370402812958,
|
|
"step": 410,
|
|
"valid_targets_mean": 4161.0,
|
|
"valid_targets_min": 1301
|
|
},
|
|
{
|
|
"epoch": 0.666131621187801,
|
|
"grad_norm": 0.7030120190506385,
|
|
"learning_rate": 3.789473684210526e-05,
|
|
"loss": 0.4498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42860108613967896,
|
|
"step": 415,
|
|
"valid_targets_mean": 3955.1,
|
|
"valid_targets_min": 1239
|
|
},
|
|
{
|
|
"epoch": 0.6741573033707865,
|
|
"grad_norm": 0.6581686038569584,
|
|
"learning_rate": 3.835240274599543e-05,
|
|
"loss": 0.4268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4512285590171814,
|
|
"step": 420,
|
|
"valid_targets_mean": 3924.8,
|
|
"valid_targets_min": 1355
|
|
},
|
|
{
|
|
"epoch": 0.6821829855537721,
|
|
"grad_norm": 0.756628865367184,
|
|
"learning_rate": 3.8810068649885584e-05,
|
|
"loss": 0.4695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4795045852661133,
|
|
"step": 425,
|
|
"valid_targets_mean": 4387.9,
|
|
"valid_targets_min": 1816
|
|
},
|
|
{
|
|
"epoch": 0.6902086677367576,
|
|
"grad_norm": 0.7662636504641192,
|
|
"learning_rate": 3.9267734553775745e-05,
|
|
"loss": 0.4167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4381055533885956,
|
|
"step": 430,
|
|
"valid_targets_mean": 2924.4,
|
|
"valid_targets_min": 1082
|
|
},
|
|
{
|
|
"epoch": 0.6982343499197432,
|
|
"grad_norm": 1.033320494285786,
|
|
"learning_rate": 3.9725400457665905e-05,
|
|
"loss": 0.4209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3927018642425537,
|
|
"step": 435,
|
|
"valid_targets_mean": 3549.4,
|
|
"valid_targets_min": 1076
|
|
},
|
|
{
|
|
"epoch": 0.7062600321027287,
|
|
"grad_norm": 0.7062961305913518,
|
|
"learning_rate": 3.99999743609667e-05,
|
|
"loss": 0.4329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4608943462371826,
|
|
"step": 440,
|
|
"valid_targets_mean": 3368.2,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 0.7142857142857143,
|
|
"grad_norm": 0.6873006567827332,
|
|
"learning_rate": 3.999968592259695e-05,
|
|
"loss": 0.4356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4301835894584656,
|
|
"step": 445,
|
|
"valid_targets_mean": 4241.4,
|
|
"valid_targets_min": 1417
|
|
},
|
|
{
|
|
"epoch": 0.7223113964686998,
|
|
"grad_norm": 0.6923914347213874,
|
|
"learning_rate": 3.9999077001703266e-05,
|
|
"loss": 0.4177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4437353014945984,
|
|
"step": 450,
|
|
"valid_targets_mean": 3050.1,
|
|
"valid_targets_min": 330
|
|
},
|
|
{
|
|
"epoch": 0.7303370786516854,
|
|
"grad_norm": 0.7665026933063127,
|
|
"learning_rate": 3.999814760804324e-05,
|
|
"loss": 0.4138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46020281314849854,
|
|
"step": 455,
|
|
"valid_targets_mean": 2875.6,
|
|
"valid_targets_min": 1418
|
|
},
|
|
{
|
|
"epoch": 0.7383627608346709,
|
|
"grad_norm": 0.8187798341878944,
|
|
"learning_rate": 3.9996897756509806e-05,
|
|
"loss": 0.4416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47555121779441833,
|
|
"step": 460,
|
|
"valid_targets_mean": 3060.4,
|
|
"valid_targets_min": 1376
|
|
},
|
|
{
|
|
"epoch": 0.7463884430176565,
|
|
"grad_norm": 0.720184082971544,
|
|
"learning_rate": 3.9995327467131074e-05,
|
|
"loss": 0.3951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4051716923713684,
|
|
"step": 465,
|
|
"valid_targets_mean": 4154.4,
|
|
"valid_targets_min": 1428
|
|
},
|
|
{
|
|
"epoch": 0.7544141252006421,
|
|
"grad_norm": 0.6654721189878227,
|
|
"learning_rate": 3.9993436765069954e-05,
|
|
"loss": 0.424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4114856421947479,
|
|
"step": 470,
|
|
"valid_targets_mean": 3218.6,
|
|
"valid_targets_min": 1210
|
|
},
|
|
{
|
|
"epoch": 0.7624398073836276,
|
|
"grad_norm": 0.5767556280767502,
|
|
"learning_rate": 3.999122568062376e-05,
|
|
"loss": 0.4253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4691894054412842,
|
|
"step": 475,
|
|
"valid_targets_mean": 4412.3,
|
|
"valid_targets_min": 1132
|
|
},
|
|
{
|
|
"epoch": 0.7704654895666132,
|
|
"grad_norm": 0.652279691653588,
|
|
"learning_rate": 3.9988694249223747e-05,
|
|
"loss": 0.4129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40957486629486084,
|
|
"step": 480,
|
|
"valid_targets_mean": 3325.8,
|
|
"valid_targets_min": 1244
|
|
},
|
|
{
|
|
"epoch": 0.7784911717495987,
|
|
"grad_norm": 0.6860628182858192,
|
|
"learning_rate": 3.9985842511434544e-05,
|
|
"loss": 0.4133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4192296266555786,
|
|
"step": 485,
|
|
"valid_targets_mean": 3688.7,
|
|
"valid_targets_min": 1433
|
|
},
|
|
{
|
|
"epoch": 0.7865168539325843,
|
|
"grad_norm": 0.6941284023523708,
|
|
"learning_rate": 3.9982670512953446e-05,
|
|
"loss": 0.4065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4399743676185608,
|
|
"step": 490,
|
|
"valid_targets_mean": 3702.2,
|
|
"valid_targets_min": 1253
|
|
},
|
|
{
|
|
"epoch": 0.7945425361155698,
|
|
"grad_norm": 0.7522864996291869,
|
|
"learning_rate": 3.9979178304609777e-05,
|
|
"loss": 0.4169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42210859060287476,
|
|
"step": 495,
|
|
"valid_targets_mean": 3129.4,
|
|
"valid_targets_min": 1313
|
|
},
|
|
{
|
|
"epoch": 0.8025682182985554,
|
|
"grad_norm": 0.7374816056862248,
|
|
"learning_rate": 3.9975365942364e-05,
|
|
"loss": 0.4151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43236619234085083,
|
|
"step": 500,
|
|
"valid_targets_mean": 2733.7,
|
|
"valid_targets_min": 1333
|
|
},
|
|
{
|
|
"epoch": 0.8105939004815409,
|
|
"grad_norm": 0.6238668478340251,
|
|
"learning_rate": 3.997123348730685e-05,
|
|
"loss": 0.425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41368725895881653,
|
|
"step": 505,
|
|
"valid_targets_mean": 4156.4,
|
|
"valid_targets_min": 1439
|
|
},
|
|
{
|
|
"epoch": 0.8186195826645265,
|
|
"grad_norm": 0.6944175220320967,
|
|
"learning_rate": 3.9966781005658336e-05,
|
|
"loss": 0.4239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45034846663475037,
|
|
"step": 510,
|
|
"valid_targets_mean": 3712.1,
|
|
"valid_targets_min": 1460
|
|
},
|
|
{
|
|
"epoch": 0.826645264847512,
|
|
"grad_norm": 0.6480486779522427,
|
|
"learning_rate": 3.996200856876671e-05,
|
|
"loss": 0.4309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4665874242782593,
|
|
"step": 515,
|
|
"valid_targets_mean": 3517.8,
|
|
"valid_targets_min": 997
|
|
},
|
|
{
|
|
"epoch": 0.8346709470304976,
|
|
"grad_norm": 0.6456407397319328,
|
|
"learning_rate": 3.9956916253107316e-05,
|
|
"loss": 0.4228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39833593368530273,
|
|
"step": 520,
|
|
"valid_targets_mean": 3575.0,
|
|
"valid_targets_min": 1504
|
|
},
|
|
{
|
|
"epoch": 0.8426966292134831,
|
|
"grad_norm": 0.660111992120521,
|
|
"learning_rate": 3.995150414028134e-05,
|
|
"loss": 0.421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3663018047809601,
|
|
"step": 525,
|
|
"valid_targets_mean": 3260.3,
|
|
"valid_targets_min": 1222
|
|
},
|
|
{
|
|
"epoch": 0.8507223113964687,
|
|
"grad_norm": 0.717324959452497,
|
|
"learning_rate": 3.994577231701451e-05,
|
|
"loss": 0.4136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4186961054801941,
|
|
"step": 530,
|
|
"valid_targets_mean": 3432.5,
|
|
"valid_targets_min": 1408
|
|
},
|
|
{
|
|
"epoch": 0.8587479935794543,
|
|
"grad_norm": 0.6764646136408184,
|
|
"learning_rate": 3.993972087515574e-05,
|
|
"loss": 0.4179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47324612736701965,
|
|
"step": 535,
|
|
"valid_targets_mean": 4180.7,
|
|
"valid_targets_min": 1321
|
|
},
|
|
{
|
|
"epoch": 0.8667736757624398,
|
|
"grad_norm": 0.618031084473096,
|
|
"learning_rate": 3.9933349911675615e-05,
|
|
"loss": 0.4082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40126559138298035,
|
|
"step": 540,
|
|
"valid_targets_mean": 3449.5,
|
|
"valid_targets_min": 1117
|
|
},
|
|
{
|
|
"epoch": 0.8747993579454254,
|
|
"grad_norm": 0.7265701616636216,
|
|
"learning_rate": 3.9926659528664866e-05,
|
|
"loss": 0.3974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40569987893104553,
|
|
"step": 545,
|
|
"valid_targets_mean": 3038.2,
|
|
"valid_targets_min": 1073
|
|
},
|
|
{
|
|
"epoch": 0.8828250401284109,
|
|
"grad_norm": 0.7607455350659144,
|
|
"learning_rate": 3.9919649833332715e-05,
|
|
"loss": 0.4404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4864490330219269,
|
|
"step": 550,
|
|
"valid_targets_mean": 2973.1,
|
|
"valid_targets_min": 802
|
|
},
|
|
{
|
|
"epoch": 0.8908507223113965,
|
|
"grad_norm": 0.6725826274622788,
|
|
"learning_rate": 3.991232093800517e-05,
|
|
"loss": 0.4146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4315813481807709,
|
|
"step": 555,
|
|
"valid_targets_mean": 3724.4,
|
|
"valid_targets_min": 1251
|
|
},
|
|
{
|
|
"epoch": 0.898876404494382,
|
|
"grad_norm": 0.6456332131896761,
|
|
"learning_rate": 3.990467296012322e-05,
|
|
"loss": 0.4317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.481413871049881,
|
|
"step": 560,
|
|
"valid_targets_mean": 3815.8,
|
|
"valid_targets_min": 1270
|
|
},
|
|
{
|
|
"epoch": 0.9069020866773676,
|
|
"grad_norm": 0.7178965888939988,
|
|
"learning_rate": 3.989670602224094e-05,
|
|
"loss": 0.4153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41733279824256897,
|
|
"step": 565,
|
|
"valid_targets_mean": 3019.4,
|
|
"valid_targets_min": 935
|
|
},
|
|
{
|
|
"epoch": 0.9149277688603531,
|
|
"grad_norm": 0.7019745183963131,
|
|
"learning_rate": 3.988842025202358e-05,
|
|
"loss": 0.4087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4073405861854553,
|
|
"step": 570,
|
|
"valid_targets_mean": 3041.6,
|
|
"valid_targets_min": 1371
|
|
},
|
|
{
|
|
"epoch": 0.9229534510433387,
|
|
"grad_norm": 0.5955171823366848,
|
|
"learning_rate": 3.987981578224542e-05,
|
|
"loss": 0.4053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4253126382827759,
|
|
"step": 575,
|
|
"valid_targets_mean": 4519.7,
|
|
"valid_targets_min": 352
|
|
},
|
|
{
|
|
"epoch": 0.9309791332263242,
|
|
"grad_norm": 0.5976910706571698,
|
|
"learning_rate": 3.987089275078776e-05,
|
|
"loss": 0.4151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41478097438812256,
|
|
"step": 580,
|
|
"valid_targets_mean": 4310.6,
|
|
"valid_targets_min": 772
|
|
},
|
|
{
|
|
"epoch": 0.9390048154093098,
|
|
"grad_norm": 0.5792907986741818,
|
|
"learning_rate": 3.986165130063662e-05,
|
|
"loss": 0.4116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4044119715690613,
|
|
"step": 585,
|
|
"valid_targets_mean": 3892.1,
|
|
"valid_targets_min": 776
|
|
},
|
|
{
|
|
"epoch": 0.9470304975922953,
|
|
"grad_norm": 0.6268050352364904,
|
|
"learning_rate": 3.985209157988048e-05,
|
|
"loss": 0.4089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40783968567848206,
|
|
"step": 590,
|
|
"valid_targets_mean": 4071.4,
|
|
"valid_targets_min": 1714
|
|
},
|
|
{
|
|
"epoch": 0.9550561797752809,
|
|
"grad_norm": 0.7740022439809082,
|
|
"learning_rate": 3.984221374170793e-05,
|
|
"loss": 0.4101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48171159625053406,
|
|
"step": 595,
|
|
"valid_targets_mean": 2603.9,
|
|
"valid_targets_min": 1387
|
|
},
|
|
{
|
|
"epoch": 0.9630818619582665,
|
|
"grad_norm": 0.6845455553208946,
|
|
"learning_rate": 3.983201794440517e-05,
|
|
"loss": 0.4148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38116294145584106,
|
|
"step": 600,
|
|
"valid_targets_mean": 3388.9,
|
|
"valid_targets_min": 880
|
|
},
|
|
{
|
|
"epoch": 0.971107544141252,
|
|
"grad_norm": 0.655403732846823,
|
|
"learning_rate": 3.982150435135353e-05,
|
|
"loss": 0.4193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38999760150909424,
|
|
"step": 605,
|
|
"valid_targets_mean": 3224.8,
|
|
"valid_targets_min": 946
|
|
},
|
|
{
|
|
"epoch": 0.9791332263242376,
|
|
"grad_norm": 0.6012237639931528,
|
|
"learning_rate": 3.981067313102677e-05,
|
|
"loss": 0.3884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3408830165863037,
|
|
"step": 610,
|
|
"valid_targets_mean": 3372.9,
|
|
"valid_targets_min": 1617
|
|
},
|
|
{
|
|
"epoch": 0.9871589085072231,
|
|
"grad_norm": 0.7020674366811395,
|
|
"learning_rate": 3.979952445698847e-05,
|
|
"loss": 0.4212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42504286766052246,
|
|
"step": 615,
|
|
"valid_targets_mean": 3521.2,
|
|
"valid_targets_min": 1035
|
|
},
|
|
{
|
|
"epoch": 0.9951845906902087,
|
|
"grad_norm": 0.6656972370307107,
|
|
"learning_rate": 3.978805850788919e-05,
|
|
"loss": 0.4059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.474631667137146,
|
|
"step": 620,
|
|
"valid_targets_mean": 3657.8,
|
|
"valid_targets_min": 1270
|
|
},
|
|
{
|
|
"epoch": 1.0032102728731942,
|
|
"grad_norm": 0.9404864015806014,
|
|
"learning_rate": 3.9776275467463645e-05,
|
|
"loss": 0.4096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40525421500205994,
|
|
"step": 625,
|
|
"valid_targets_mean": 3189.4,
|
|
"valid_targets_min": 1141
|
|
},
|
|
{
|
|
"epoch": 1.0112359550561798,
|
|
"grad_norm": 0.6054977355925899,
|
|
"learning_rate": 3.9764175524527713e-05,
|
|
"loss": 0.3919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3619139790534973,
|
|
"step": 630,
|
|
"valid_targets_mean": 3977.3,
|
|
"valid_targets_min": 1078
|
|
},
|
|
{
|
|
"epoch": 1.0192616372391654,
|
|
"grad_norm": 0.7411010254313193,
|
|
"learning_rate": 3.975175887297545e-05,
|
|
"loss": 0.3833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3640092611312866,
|
|
"step": 635,
|
|
"valid_targets_mean": 3272.1,
|
|
"valid_targets_min": 268
|
|
},
|
|
{
|
|
"epoch": 1.0272873194221508,
|
|
"grad_norm": 0.6581125827094578,
|
|
"learning_rate": 3.9739025711775984e-05,
|
|
"loss": 0.3946,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39563223719596863,
|
|
"step": 640,
|
|
"valid_targets_mean": 3982.9,
|
|
"valid_targets_min": 1189
|
|
},
|
|
{
|
|
"epoch": 1.0353130016051364,
|
|
"grad_norm": 0.8480764188307504,
|
|
"learning_rate": 3.972597624497029e-05,
|
|
"loss": 0.4176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4385409355163574,
|
|
"step": 645,
|
|
"valid_targets_mean": 2578.4,
|
|
"valid_targets_min": 1251
|
|
},
|
|
{
|
|
"epoch": 1.043338683788122,
|
|
"grad_norm": 0.6362167216158906,
|
|
"learning_rate": 3.971261068166796e-05,
|
|
"loss": 0.3824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3679080009460449,
|
|
"step": 650,
|
|
"valid_targets_mean": 3428.2,
|
|
"valid_targets_min": 1062
|
|
},
|
|
{
|
|
"epoch": 1.0513643659711076,
|
|
"grad_norm": 0.6446648443520868,
|
|
"learning_rate": 3.969892923604383e-05,
|
|
"loss": 0.3777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33461201190948486,
|
|
"step": 655,
|
|
"valid_targets_mean": 4312.4,
|
|
"valid_targets_min": 992
|
|
},
|
|
{
|
|
"epoch": 1.0593900481540932,
|
|
"grad_norm": 0.539023510451271,
|
|
"learning_rate": 3.9684932127334555e-05,
|
|
"loss": 0.3759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35893625020980835,
|
|
"step": 660,
|
|
"valid_targets_mean": 4751.7,
|
|
"valid_targets_min": 1197
|
|
},
|
|
{
|
|
"epoch": 1.0674157303370786,
|
|
"grad_norm": 0.6531106754660415,
|
|
"learning_rate": 3.967061957983509e-05,
|
|
"loss": 0.3728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4052903652191162,
|
|
"step": 665,
|
|
"valid_targets_mean": 4544.1,
|
|
"valid_targets_min": 1359
|
|
},
|
|
{
|
|
"epoch": 1.0754414125200642,
|
|
"grad_norm": 0.6971890263899199,
|
|
"learning_rate": 3.965599182289511e-05,
|
|
"loss": 0.3804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40138018131256104,
|
|
"step": 670,
|
|
"valid_targets_mean": 3296.6,
|
|
"valid_targets_min": 1599
|
|
},
|
|
{
|
|
"epoch": 1.0834670947030498,
|
|
"grad_norm": 0.7746504316692098,
|
|
"learning_rate": 3.964104909091531e-05,
|
|
"loss": 0.3892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36633893847465515,
|
|
"step": 675,
|
|
"valid_targets_mean": 4136.9,
|
|
"valid_targets_min": 1246
|
|
},
|
|
{
|
|
"epoch": 1.0914927768860354,
|
|
"grad_norm": 0.6269594091342506,
|
|
"learning_rate": 3.962579162334368e-05,
|
|
"loss": 0.3939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3604351282119751,
|
|
"step": 680,
|
|
"valid_targets_mean": 3954.2,
|
|
"valid_targets_min": 805
|
|
},
|
|
{
|
|
"epoch": 1.0995184590690208,
|
|
"grad_norm": 0.7570854561957707,
|
|
"learning_rate": 3.961021966467165e-05,
|
|
"loss": 0.417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42220914363861084,
|
|
"step": 685,
|
|
"valid_targets_mean": 3369.1,
|
|
"valid_targets_min": 947
|
|
},
|
|
{
|
|
"epoch": 1.1075441412520064,
|
|
"grad_norm": 0.6945627630666767,
|
|
"learning_rate": 3.9594333464430155e-05,
|
|
"loss": 0.4047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40889281034469604,
|
|
"step": 690,
|
|
"valid_targets_mean": 3068.3,
|
|
"valid_targets_min": 915
|
|
},
|
|
{
|
|
"epoch": 1.115569823434992,
|
|
"grad_norm": 0.6150710439135962,
|
|
"learning_rate": 3.957813327718568e-05,
|
|
"loss": 0.3533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3410935401916504,
|
|
"step": 695,
|
|
"valid_targets_mean": 3559.2,
|
|
"valid_targets_min": 1145
|
|
},
|
|
{
|
|
"epoch": 1.1235955056179776,
|
|
"grad_norm": 0.6006584543983243,
|
|
"learning_rate": 3.956161936253615e-05,
|
|
"loss": 0.3811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38565054535865784,
|
|
"step": 700,
|
|
"valid_targets_mean": 4500.0,
|
|
"valid_targets_min": 1182
|
|
},
|
|
{
|
|
"epoch": 1.131621187800963,
|
|
"grad_norm": 0.6444523877124122,
|
|
"learning_rate": 3.954479198510676e-05,
|
|
"loss": 0.3891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39199990034103394,
|
|
"step": 705,
|
|
"valid_targets_mean": 4151.9,
|
|
"valid_targets_min": 1203
|
|
},
|
|
{
|
|
"epoch": 1.1396468699839486,
|
|
"grad_norm": 0.6065762925559409,
|
|
"learning_rate": 3.952765141454578e-05,
|
|
"loss": 0.4059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3749706745147705,
|
|
"step": 710,
|
|
"valid_targets_mean": 3839.2,
|
|
"valid_targets_min": 1445
|
|
},
|
|
{
|
|
"epoch": 1.1476725521669342,
|
|
"grad_norm": 0.670938806995955,
|
|
"learning_rate": 3.951019792552018e-05,
|
|
"loss": 0.3792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41016289591789246,
|
|
"step": 715,
|
|
"valid_targets_mean": 3589.6,
|
|
"valid_targets_min": 691
|
|
},
|
|
{
|
|
"epoch": 1.1556982343499198,
|
|
"grad_norm": 0.6691294584959062,
|
|
"learning_rate": 3.949243179771126e-05,
|
|
"loss": 0.3805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.362687885761261,
|
|
"step": 720,
|
|
"valid_targets_mean": 3292.9,
|
|
"valid_targets_min": 1351
|
|
},
|
|
{
|
|
"epoch": 1.1637239165329052,
|
|
"grad_norm": 0.638743797023757,
|
|
"learning_rate": 3.947435331581017e-05,
|
|
"loss": 0.3862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3965623676776886,
|
|
"step": 725,
|
|
"valid_targets_mean": 3694.2,
|
|
"valid_targets_min": 1165
|
|
},
|
|
{
|
|
"epoch": 1.1717495987158908,
|
|
"grad_norm": 0.7474825354884288,
|
|
"learning_rate": 3.945596276951333e-05,
|
|
"loss": 0.3706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36002659797668457,
|
|
"step": 730,
|
|
"valid_targets_mean": 4809.1,
|
|
"valid_targets_min": 1225
|
|
},
|
|
{
|
|
"epoch": 1.1797752808988764,
|
|
"grad_norm": 0.6631182902394778,
|
|
"learning_rate": 3.943726045351782e-05,
|
|
"loss": 0.3901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35704171657562256,
|
|
"step": 735,
|
|
"valid_targets_mean": 3717.0,
|
|
"valid_targets_min": 1162
|
|
},
|
|
{
|
|
"epoch": 1.187800963081862,
|
|
"grad_norm": 0.6400443929847337,
|
|
"learning_rate": 3.941824666751659e-05,
|
|
"loss": 0.3947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35999542474746704,
|
|
"step": 740,
|
|
"valid_targets_mean": 3954.2,
|
|
"valid_targets_min": 1562
|
|
},
|
|
{
|
|
"epoch": 1.1958266452648476,
|
|
"grad_norm": 0.5733447039598942,
|
|
"learning_rate": 3.939892171619375e-05,
|
|
"loss": 0.3828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36996322870254517,
|
|
"step": 745,
|
|
"valid_targets_mean": 4500.2,
|
|
"valid_targets_min": 1244
|
|
},
|
|
{
|
|
"epoch": 1.203852327447833,
|
|
"grad_norm": 0.6773664645859516,
|
|
"learning_rate": 3.9379285909219616e-05,
|
|
"loss": 0.3893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41598594188690186,
|
|
"step": 750,
|
|
"valid_targets_mean": 4372.4,
|
|
"valid_targets_min": 709
|
|
},
|
|
{
|
|
"epoch": 1.2118780096308186,
|
|
"grad_norm": 0.6709798684182527,
|
|
"learning_rate": 3.935933956124578e-05,
|
|
"loss": 0.3955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42714834213256836,
|
|
"step": 755,
|
|
"valid_targets_mean": 3476.9,
|
|
"valid_targets_min": 701
|
|
},
|
|
{
|
|
"epoch": 1.2199036918138042,
|
|
"grad_norm": 0.6293384896194132,
|
|
"learning_rate": 3.933908299190006e-05,
|
|
"loss": 0.3747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36231136322021484,
|
|
"step": 760,
|
|
"valid_targets_mean": 3840.1,
|
|
"valid_targets_min": 1475
|
|
},
|
|
{
|
|
"epoch": 1.2279293739967898,
|
|
"grad_norm": 0.7122264443238809,
|
|
"learning_rate": 3.931851652578137e-05,
|
|
"loss": 0.3944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3876253068447113,
|
|
"step": 765,
|
|
"valid_targets_mean": 3210.1,
|
|
"valid_targets_min": 1219
|
|
},
|
|
{
|
|
"epoch": 1.2359550561797752,
|
|
"grad_norm": 0.60205507204406,
|
|
"learning_rate": 3.929764049245454e-05,
|
|
"loss": 0.3459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35130780935287476,
|
|
"step": 770,
|
|
"valid_targets_mean": 4103.5,
|
|
"valid_targets_min": 1418
|
|
},
|
|
{
|
|
"epoch": 1.2439807383627608,
|
|
"grad_norm": 0.7632997488122586,
|
|
"learning_rate": 3.9276455226445015e-05,
|
|
"loss": 0.3817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3803650438785553,
|
|
"step": 775,
|
|
"valid_targets_mean": 2390.0,
|
|
"valid_targets_min": 928
|
|
},
|
|
{
|
|
"epoch": 1.2520064205457464,
|
|
"grad_norm": 0.6854331258948656,
|
|
"learning_rate": 3.92549610672335e-05,
|
|
"loss": 0.3683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42676568031311035,
|
|
"step": 780,
|
|
"valid_targets_mean": 3807.8,
|
|
"valid_targets_min": 1149
|
|
},
|
|
{
|
|
"epoch": 1.260032102728732,
|
|
"grad_norm": 0.77323842323909,
|
|
"learning_rate": 3.9233158359250526e-05,
|
|
"loss": 0.3735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3908432722091675,
|
|
"step": 785,
|
|
"valid_targets_mean": 3338.9,
|
|
"valid_targets_min": 1039
|
|
},
|
|
{
|
|
"epoch": 1.2680577849117176,
|
|
"grad_norm": 0.6346377970418353,
|
|
"learning_rate": 3.9211047451870925e-05,
|
|
"loss": 0.3912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43807274103164673,
|
|
"step": 790,
|
|
"valid_targets_mean": 4169.9,
|
|
"valid_targets_min": 1304
|
|
},
|
|
{
|
|
"epoch": 1.276083467094703,
|
|
"grad_norm": 0.6907459484344647,
|
|
"learning_rate": 3.9188628699408226e-05,
|
|
"loss": 0.3892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3881453275680542,
|
|
"step": 795,
|
|
"valid_targets_mean": 3487.3,
|
|
"valid_targets_min": 986
|
|
},
|
|
{
|
|
"epoch": 1.2841091492776886,
|
|
"grad_norm": 0.676241979356563,
|
|
"learning_rate": 3.916590246110899e-05,
|
|
"loss": 0.3891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4476223587989807,
|
|
"step": 800,
|
|
"valid_targets_mean": 4228.2,
|
|
"valid_targets_min": 1323
|
|
},
|
|
{
|
|
"epoch": 1.2921348314606742,
|
|
"grad_norm": 0.5876827663825741,
|
|
"learning_rate": 3.914286910114705e-05,
|
|
"loss": 0.3845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34339797496795654,
|
|
"step": 805,
|
|
"valid_targets_mean": 3932.6,
|
|
"valid_targets_min": 1215
|
|
},
|
|
{
|
|
"epoch": 1.3001605136436596,
|
|
"grad_norm": 0.6089101023117629,
|
|
"learning_rate": 3.9119528988617676e-05,
|
|
"loss": 0.3993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.414438396692276,
|
|
"step": 810,
|
|
"valid_targets_mean": 4279.9,
|
|
"valid_targets_min": 1322
|
|
},
|
|
{
|
|
"epoch": 1.3081861958266452,
|
|
"grad_norm": 0.6470745578400628,
|
|
"learning_rate": 3.909588249753164e-05,
|
|
"loss": 0.3964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3459415137767792,
|
|
"step": 815,
|
|
"valid_targets_mean": 3277.1,
|
|
"valid_targets_min": 968
|
|
},
|
|
{
|
|
"epoch": 1.3162118780096308,
|
|
"grad_norm": 0.6650977968815426,
|
|
"learning_rate": 3.907193000680924e-05,
|
|
"loss": 0.3905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36978620290756226,
|
|
"step": 820,
|
|
"valid_targets_mean": 3431.5,
|
|
"valid_targets_min": 1219
|
|
},
|
|
{
|
|
"epoch": 1.3242375601926164,
|
|
"grad_norm": 0.7298254669566454,
|
|
"learning_rate": 3.9047671900274256e-05,
|
|
"loss": 0.3822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3846935033798218,
|
|
"step": 825,
|
|
"valid_targets_mean": 2974.6,
|
|
"valid_targets_min": 1148
|
|
},
|
|
{
|
|
"epoch": 1.332263242375602,
|
|
"grad_norm": 0.6844389816008181,
|
|
"learning_rate": 3.9023108566647746e-05,
|
|
"loss": 0.3685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4156171679496765,
|
|
"step": 830,
|
|
"valid_targets_mean": 2907.9,
|
|
"valid_targets_min": 772
|
|
},
|
|
{
|
|
"epoch": 1.3402889245585876,
|
|
"grad_norm": 0.6594790747206619,
|
|
"learning_rate": 3.899824039954185e-05,
|
|
"loss": 0.3715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3709554374217987,
|
|
"step": 835,
|
|
"valid_targets_mean": 3148.1,
|
|
"valid_targets_min": 1405
|
|
},
|
|
{
|
|
"epoch": 1.348314606741573,
|
|
"grad_norm": 0.60539814447737,
|
|
"learning_rate": 3.897306779745347e-05,
|
|
"loss": 0.3497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3496387302875519,
|
|
"step": 840,
|
|
"valid_targets_mean": 3327.6,
|
|
"valid_targets_min": 1134
|
|
},
|
|
{
|
|
"epoch": 1.3563402889245586,
|
|
"grad_norm": 0.7786767710588808,
|
|
"learning_rate": 3.89475911637579e-05,
|
|
"loss": 0.3993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.414520263671875,
|
|
"step": 845,
|
|
"valid_targets_mean": 4100.6,
|
|
"valid_targets_min": 1424
|
|
},
|
|
{
|
|
"epoch": 1.3643659711075442,
|
|
"grad_norm": 0.8292368991216601,
|
|
"learning_rate": 3.892181090670233e-05,
|
|
"loss": 0.4044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45649927854537964,
|
|
"step": 850,
|
|
"valid_targets_mean": 2302.5,
|
|
"valid_targets_min": 1341
|
|
},
|
|
{
|
|
"epoch": 1.3723916532905296,
|
|
"grad_norm": 0.6214971751880546,
|
|
"learning_rate": 3.8895727439399366e-05,
|
|
"loss": 0.3643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3694024384021759,
|
|
"step": 855,
|
|
"valid_targets_mean": 4056.2,
|
|
"valid_targets_min": 1462
|
|
},
|
|
{
|
|
"epoch": 1.3804173354735152,
|
|
"grad_norm": 0.5764445893079742,
|
|
"learning_rate": 3.8869341179820314e-05,
|
|
"loss": 0.3727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40497541427612305,
|
|
"step": 860,
|
|
"valid_targets_mean": 5202.4,
|
|
"valid_targets_min": 1273
|
|
},
|
|
{
|
|
"epoch": 1.3884430176565008,
|
|
"grad_norm": 0.6195152303135256,
|
|
"learning_rate": 3.884265255078859e-05,
|
|
"loss": 0.3638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38994577527046204,
|
|
"step": 865,
|
|
"valid_targets_mean": 3859.1,
|
|
"valid_targets_min": 1189
|
|
},
|
|
{
|
|
"epoch": 1.3964686998394864,
|
|
"grad_norm": 0.5529784980062112,
|
|
"learning_rate": 3.881566197997286e-05,
|
|
"loss": 0.3632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3352459669113159,
|
|
"step": 870,
|
|
"valid_targets_mean": 3985.4,
|
|
"valid_targets_min": 1354
|
|
},
|
|
{
|
|
"epoch": 1.404494382022472,
|
|
"grad_norm": 0.7048937162527715,
|
|
"learning_rate": 3.8788369899880225e-05,
|
|
"loss": 0.3784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3993859589099884,
|
|
"step": 875,
|
|
"valid_targets_mean": 3015.9,
|
|
"valid_targets_min": 1136
|
|
},
|
|
{
|
|
"epoch": 1.4125200642054574,
|
|
"grad_norm": 0.5407953326916595,
|
|
"learning_rate": 3.8760776747849303e-05,
|
|
"loss": 0.392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37949904799461365,
|
|
"step": 880,
|
|
"valid_targets_mean": 4542.2,
|
|
"valid_targets_min": 1601
|
|
},
|
|
{
|
|
"epoch": 1.420545746388443,
|
|
"grad_norm": 0.7908712101641008,
|
|
"learning_rate": 3.873288296604317e-05,
|
|
"loss": 0.3745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3987817168235779,
|
|
"step": 885,
|
|
"valid_targets_mean": 2964.9,
|
|
"valid_targets_min": 938
|
|
},
|
|
{
|
|
"epoch": 1.4285714285714286,
|
|
"grad_norm": 0.6811572770208552,
|
|
"learning_rate": 3.870468900144233e-05,
|
|
"loss": 0.3921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3631388545036316,
|
|
"step": 890,
|
|
"valid_targets_mean": 2971.6,
|
|
"valid_targets_min": 1353
|
|
},
|
|
{
|
|
"epoch": 1.4365971107544142,
|
|
"grad_norm": 0.5953705611444196,
|
|
"learning_rate": 3.8676195305837536e-05,
|
|
"loss": 0.3916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3725372552871704,
|
|
"step": 895,
|
|
"valid_targets_mean": 3718.9,
|
|
"valid_targets_min": 1135
|
|
},
|
|
{
|
|
"epoch": 1.4446227929373996,
|
|
"grad_norm": 0.642949077391258,
|
|
"learning_rate": 3.864740233582252e-05,
|
|
"loss": 0.3486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.338529109954834,
|
|
"step": 900,
|
|
"valid_targets_mean": 3453.6,
|
|
"valid_targets_min": 1425
|
|
},
|
|
{
|
|
"epoch": 1.4526484751203852,
|
|
"grad_norm": 0.6008274296030178,
|
|
"learning_rate": 3.8618310552786715e-05,
|
|
"loss": 0.374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3302873373031616,
|
|
"step": 905,
|
|
"valid_targets_mean": 3723.4,
|
|
"valid_targets_min": 1627
|
|
},
|
|
{
|
|
"epoch": 1.4606741573033708,
|
|
"grad_norm": 0.5675049203049615,
|
|
"learning_rate": 3.858892042290785e-05,
|
|
"loss": 0.3645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3483647108078003,
|
|
"step": 910,
|
|
"valid_targets_mean": 3920.4,
|
|
"valid_targets_min": 1287
|
|
},
|
|
{
|
|
"epoch": 1.4686998394863564,
|
|
"grad_norm": 0.6452044287715353,
|
|
"learning_rate": 3.855923241714447e-05,
|
|
"loss": 0.3756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3616899847984314,
|
|
"step": 915,
|
|
"valid_targets_mean": 3063.2,
|
|
"valid_targets_min": 1120
|
|
},
|
|
{
|
|
"epoch": 1.476725521669342,
|
|
"grad_norm": 0.6383186186065046,
|
|
"learning_rate": 3.852924701122839e-05,
|
|
"loss": 0.4145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.398642361164093,
|
|
"step": 920,
|
|
"valid_targets_mean": 3685.3,
|
|
"valid_targets_min": 900
|
|
},
|
|
{
|
|
"epoch": 1.4847512038523274,
|
|
"grad_norm": 0.7221378422572026,
|
|
"learning_rate": 3.8498964685657084e-05,
|
|
"loss": 0.3776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35897132754325867,
|
|
"step": 925,
|
|
"valid_targets_mean": 2436.4,
|
|
"valid_targets_min": 1066
|
|
},
|
|
{
|
|
"epoch": 1.492776886035313,
|
|
"grad_norm": 0.6891796492080999,
|
|
"learning_rate": 3.846838592568599e-05,
|
|
"loss": 0.3844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4061475992202759,
|
|
"step": 930,
|
|
"valid_targets_mean": 2975.9,
|
|
"valid_targets_min": 1400
|
|
},
|
|
{
|
|
"epoch": 1.5008025682182986,
|
|
"grad_norm": 0.6054936981722907,
|
|
"learning_rate": 3.84375112213207e-05,
|
|
"loss": 0.376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3591410517692566,
|
|
"step": 935,
|
|
"valid_targets_mean": 3577.8,
|
|
"valid_targets_min": 855
|
|
},
|
|
{
|
|
"epoch": 1.508828250401284,
|
|
"grad_norm": 0.6133302423007465,
|
|
"learning_rate": 3.840634106730915e-05,
|
|
"loss": 0.3766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35326242446899414,
|
|
"step": 940,
|
|
"valid_targets_mean": 3537.2,
|
|
"valid_targets_min": 1269
|
|
},
|
|
{
|
|
"epoch": 1.5168539325842696,
|
|
"grad_norm": 0.7457747300557499,
|
|
"learning_rate": 3.837487596313366e-05,
|
|
"loss": 0.3971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4203342795372009,
|
|
"step": 945,
|
|
"valid_targets_mean": 2724.6,
|
|
"valid_targets_min": 1315
|
|
},
|
|
{
|
|
"epoch": 1.5248796147672552,
|
|
"grad_norm": 0.6422054863430279,
|
|
"learning_rate": 3.834311641300296e-05,
|
|
"loss": 0.3778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39546987414360046,
|
|
"step": 950,
|
|
"valid_targets_mean": 3808.1,
|
|
"valid_targets_min": 1170
|
|
},
|
|
{
|
|
"epoch": 1.5329052969502408,
|
|
"grad_norm": 0.5647865045371976,
|
|
"learning_rate": 3.831106292584406e-05,
|
|
"loss": 0.3838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3573455214500427,
|
|
"step": 955,
|
|
"valid_targets_mean": 4464.1,
|
|
"valid_targets_min": 1458
|
|
},
|
|
{
|
|
"epoch": 1.5409309791332264,
|
|
"grad_norm": 0.5786397434644119,
|
|
"learning_rate": 3.827871601529417e-05,
|
|
"loss": 0.3738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33765944838523865,
|
|
"step": 960,
|
|
"valid_targets_mean": 4110.8,
|
|
"valid_targets_min": 1512
|
|
},
|
|
{
|
|
"epoch": 1.548956661316212,
|
|
"grad_norm": 0.6168349516502579,
|
|
"learning_rate": 3.8246076199692395e-05,
|
|
"loss": 0.3378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3136134743690491,
|
|
"step": 965,
|
|
"valid_targets_mean": 3021.2,
|
|
"valid_targets_min": 1041
|
|
},
|
|
{
|
|
"epoch": 1.5569823434991974,
|
|
"grad_norm": 0.6129182059374374,
|
|
"learning_rate": 3.821314400207148e-05,
|
|
"loss": 0.3556,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36905360221862793,
|
|
"step": 970,
|
|
"valid_targets_mean": 3719.8,
|
|
"valid_targets_min": 1354
|
|
},
|
|
{
|
|
"epoch": 1.565008025682183,
|
|
"grad_norm": 0.6795045499467423,
|
|
"learning_rate": 3.817991995014939e-05,
|
|
"loss": 0.3807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3600926399230957,
|
|
"step": 975,
|
|
"valid_targets_mean": 3732.2,
|
|
"valid_targets_min": 1175
|
|
},
|
|
{
|
|
"epoch": 1.5730337078651684,
|
|
"grad_norm": 0.5200923990022152,
|
|
"learning_rate": 3.8146404576320894e-05,
|
|
"loss": 0.368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3647575080394745,
|
|
"step": 980,
|
|
"valid_targets_mean": 4750.6,
|
|
"valid_targets_min": 1632
|
|
},
|
|
{
|
|
"epoch": 1.581059390048154,
|
|
"grad_norm": 0.5980270685108303,
|
|
"learning_rate": 3.8112598417649015e-05,
|
|
"loss": 0.4009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4130314886569977,
|
|
"step": 985,
|
|
"valid_targets_mean": 4185.3,
|
|
"valid_targets_min": 823
|
|
},
|
|
{
|
|
"epoch": 1.5890850722311396,
|
|
"grad_norm": 0.7207436892179098,
|
|
"learning_rate": 3.80785020158564e-05,
|
|
"loss": 0.3812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3845211863517761,
|
|
"step": 990,
|
|
"valid_targets_mean": 2864.5,
|
|
"valid_targets_min": 1131
|
|
},
|
|
{
|
|
"epoch": 1.5971107544141252,
|
|
"grad_norm": 0.6159944435064358,
|
|
"learning_rate": 3.804411591731668e-05,
|
|
"loss": 0.3868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.446266770362854,
|
|
"step": 995,
|
|
"valid_targets_mean": 4245.4,
|
|
"valid_targets_min": 1416
|
|
},
|
|
{
|
|
"epoch": 1.6051364365971108,
|
|
"grad_norm": 0.6542980430154733,
|
|
"learning_rate": 3.80094406730457e-05,
|
|
"loss": 0.3893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35475975275039673,
|
|
"step": 1000,
|
|
"valid_targets_mean": 3144.3,
|
|
"valid_targets_min": 1239
|
|
},
|
|
{
|
|
"epoch": 1.6131621187800964,
|
|
"grad_norm": 0.6038313349717512,
|
|
"learning_rate": 3.7974476838692656e-05,
|
|
"loss": 0.3934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.400547057390213,
|
|
"step": 1005,
|
|
"valid_targets_mean": 3896.3,
|
|
"valid_targets_min": 1200
|
|
},
|
|
{
|
|
"epoch": 1.621187800963082,
|
|
"grad_norm": 0.771290746752353,
|
|
"learning_rate": 3.793922497453124e-05,
|
|
"loss": 0.3616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4035474956035614,
|
|
"step": 1010,
|
|
"valid_targets_mean": 2708.4,
|
|
"valid_targets_min": 895
|
|
},
|
|
{
|
|
"epoch": 1.6292134831460674,
|
|
"grad_norm": 0.7141671312303208,
|
|
"learning_rate": 3.790368564545067e-05,
|
|
"loss": 0.3696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.390037477016449,
|
|
"step": 1015,
|
|
"valid_targets_mean": 3064.1,
|
|
"valid_targets_min": 1512
|
|
},
|
|
{
|
|
"epoch": 1.637239165329053,
|
|
"grad_norm": 0.6571713725550373,
|
|
"learning_rate": 3.786785942094656e-05,
|
|
"loss": 0.3563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3853578567504883,
|
|
"step": 1020,
|
|
"valid_targets_mean": 3342.6,
|
|
"valid_targets_min": 1295
|
|
},
|
|
{
|
|
"epoch": 1.6452648475120384,
|
|
"grad_norm": 0.5401981575389858,
|
|
"learning_rate": 3.783174687511188e-05,
|
|
"loss": 0.3456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3374444544315338,
|
|
"step": 1025,
|
|
"valid_targets_mean": 4894.0,
|
|
"valid_targets_min": 1139
|
|
},
|
|
{
|
|
"epoch": 1.653290529695024,
|
|
"grad_norm": 0.7282477698960313,
|
|
"learning_rate": 3.7795348586627696e-05,
|
|
"loss": 0.3779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4296496510505676,
|
|
"step": 1030,
|
|
"valid_targets_mean": 2842.4,
|
|
"valid_targets_min": 922
|
|
},
|
|
{
|
|
"epoch": 1.6613162118780096,
|
|
"grad_norm": 0.5433997762343573,
|
|
"learning_rate": 3.7758665138753943e-05,
|
|
"loss": 0.3726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.331808865070343,
|
|
"step": 1035,
|
|
"valid_targets_mean": 4323.6,
|
|
"valid_targets_min": 1005
|
|
},
|
|
{
|
|
"epoch": 1.6693418940609952,
|
|
"grad_norm": 0.6398602495358743,
|
|
"learning_rate": 3.7721697119320054e-05,
|
|
"loss": 0.3854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40326300263404846,
|
|
"step": 1040,
|
|
"valid_targets_mean": 3351.8,
|
|
"valid_targets_min": 1561
|
|
},
|
|
{
|
|
"epoch": 1.6773675762439808,
|
|
"grad_norm": 0.5866146682121014,
|
|
"learning_rate": 3.768444512071553e-05,
|
|
"loss": 0.3658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3532131016254425,
|
|
"step": 1045,
|
|
"valid_targets_mean": 3730.1,
|
|
"valid_targets_min": 1517
|
|
},
|
|
{
|
|
"epoch": 1.6853932584269664,
|
|
"grad_norm": 0.7959865183463389,
|
|
"learning_rate": 3.7646909739880486e-05,
|
|
"loss": 0.3698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3300178050994873,
|
|
"step": 1050,
|
|
"valid_targets_mean": 3227.4,
|
|
"valid_targets_min": 962
|
|
},
|
|
{
|
|
"epoch": 1.6934189406099518,
|
|
"grad_norm": 0.598844956900591,
|
|
"learning_rate": 3.760909157829604e-05,
|
|
"loss": 0.3814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3704001307487488,
|
|
"step": 1055,
|
|
"valid_targets_mean": 3879.9,
|
|
"valid_targets_min": 1091
|
|
},
|
|
{
|
|
"epoch": 1.7014446227929374,
|
|
"grad_norm": 0.6048944363417414,
|
|
"learning_rate": 3.75709912419747e-05,
|
|
"loss": 0.367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38076353073120117,
|
|
"step": 1060,
|
|
"valid_targets_mean": 3856.0,
|
|
"valid_targets_min": 1625
|
|
},
|
|
{
|
|
"epoch": 1.709470304975923,
|
|
"grad_norm": 0.6892539369630136,
|
|
"learning_rate": 3.7532609341450655e-05,
|
|
"loss": 0.3733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38892602920532227,
|
|
"step": 1065,
|
|
"valid_targets_mean": 2845.4,
|
|
"valid_targets_min": 1759
|
|
},
|
|
{
|
|
"epoch": 1.7174959871589084,
|
|
"grad_norm": 0.6406662137883938,
|
|
"learning_rate": 3.749394649176998e-05,
|
|
"loss": 0.3749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40383175015449524,
|
|
"step": 1070,
|
|
"valid_targets_mean": 3329.1,
|
|
"valid_targets_min": 1291
|
|
},
|
|
{
|
|
"epoch": 1.725521669341894,
|
|
"grad_norm": 0.6164899752487527,
|
|
"learning_rate": 3.745500331248078e-05,
|
|
"loss": 0.3626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3861474394798279,
|
|
"step": 1075,
|
|
"valid_targets_mean": 4251.2,
|
|
"valid_targets_min": 1389
|
|
},
|
|
{
|
|
"epoch": 1.7335473515248796,
|
|
"grad_norm": 0.5777007722618619,
|
|
"learning_rate": 3.741578042762329e-05,
|
|
"loss": 0.3687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34206151962280273,
|
|
"step": 1080,
|
|
"valid_targets_mean": 4012.4,
|
|
"valid_targets_min": 1345
|
|
},
|
|
{
|
|
"epoch": 1.7415730337078652,
|
|
"grad_norm": 0.7402664409676605,
|
|
"learning_rate": 3.737627846571982e-05,
|
|
"loss": 0.3937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3692738711833954,
|
|
"step": 1085,
|
|
"valid_targets_mean": 2883.0,
|
|
"valid_targets_min": 1042
|
|
},
|
|
{
|
|
"epoch": 1.7495987158908508,
|
|
"grad_norm": 0.6087768814918771,
|
|
"learning_rate": 3.733649805976474e-05,
|
|
"loss": 0.3418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3561427593231201,
|
|
"step": 1090,
|
|
"valid_targets_mean": 3620.4,
|
|
"valid_targets_min": 1271
|
|
},
|
|
{
|
|
"epoch": 1.7576243980738364,
|
|
"grad_norm": 0.6352734564838006,
|
|
"learning_rate": 3.729643984721431e-05,
|
|
"loss": 0.3766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4255198836326599,
|
|
"step": 1095,
|
|
"valid_targets_mean": 3878.8,
|
|
"valid_targets_min": 978
|
|
},
|
|
{
|
|
"epoch": 1.7656500802568218,
|
|
"grad_norm": 0.6180827957716937,
|
|
"learning_rate": 3.725610446997646e-05,
|
|
"loss": 0.3617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33942580223083496,
|
|
"step": 1100,
|
|
"valid_targets_mean": 3706.7,
|
|
"valid_targets_min": 1282
|
|
},
|
|
{
|
|
"epoch": 1.7736757624398074,
|
|
"grad_norm": 0.5248422304984852,
|
|
"learning_rate": 3.7215492574400514e-05,
|
|
"loss": 0.3685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33461493253707886,
|
|
"step": 1105,
|
|
"valid_targets_mean": 4423.9,
|
|
"valid_targets_min": 996
|
|
},
|
|
{
|
|
"epoch": 1.7817014446227928,
|
|
"grad_norm": 0.5404259900418821,
|
|
"learning_rate": 3.717460481126684e-05,
|
|
"loss": 0.3735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34109145402908325,
|
|
"step": 1110,
|
|
"valid_targets_mean": 4821.2,
|
|
"valid_targets_min": 780
|
|
},
|
|
{
|
|
"epoch": 1.7897271268057784,
|
|
"grad_norm": 1.5528927418929022,
|
|
"learning_rate": 3.7133441835776386e-05,
|
|
"loss": 0.375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40200188755989075,
|
|
"step": 1115,
|
|
"valid_targets_mean": 3680.9,
|
|
"valid_targets_min": 997
|
|
},
|
|
{
|
|
"epoch": 1.797752808988764,
|
|
"grad_norm": 0.6712196511384089,
|
|
"learning_rate": 3.7092004307540235e-05,
|
|
"loss": 0.349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36297160387039185,
|
|
"step": 1120,
|
|
"valid_targets_mean": 2912.1,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 1.8057784911717496,
|
|
"grad_norm": 0.7135237262644113,
|
|
"learning_rate": 3.7050292890568995e-05,
|
|
"loss": 0.3421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33697420358657837,
|
|
"step": 1125,
|
|
"valid_targets_mean": 2443.8,
|
|
"valid_targets_min": 933
|
|
},
|
|
{
|
|
"epoch": 1.8138041733547352,
|
|
"grad_norm": 0.6294538703574741,
|
|
"learning_rate": 3.700830825326218e-05,
|
|
"loss": 0.3671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4305475354194641,
|
|
"step": 1130,
|
|
"valid_targets_mean": 3778.8,
|
|
"valid_targets_min": 1028
|
|
},
|
|
{
|
|
"epoch": 1.8218298555377208,
|
|
"grad_norm": 0.6640879387348214,
|
|
"learning_rate": 3.696605106839747e-05,
|
|
"loss": 0.357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40310508012771606,
|
|
"step": 1135,
|
|
"valid_targets_mean": 3140.6,
|
|
"valid_targets_min": 712
|
|
},
|
|
{
|
|
"epoch": 1.8298555377207064,
|
|
"grad_norm": 0.6583944793548265,
|
|
"learning_rate": 3.6923522013119964e-05,
|
|
"loss": 0.3973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38100820779800415,
|
|
"step": 1140,
|
|
"valid_targets_mean": 3698.3,
|
|
"valid_targets_min": 1024
|
|
},
|
|
{
|
|
"epoch": 1.8378812199036918,
|
|
"grad_norm": 0.6807406570719405,
|
|
"learning_rate": 3.688072176893132e-05,
|
|
"loss": 0.3706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3822127878665924,
|
|
"step": 1145,
|
|
"valid_targets_mean": 3460.6,
|
|
"valid_targets_min": 1305
|
|
},
|
|
{
|
|
"epoch": 1.8459069020866774,
|
|
"grad_norm": 0.5163713062958025,
|
|
"learning_rate": 3.683765102167881e-05,
|
|
"loss": 0.3798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3767138123512268,
|
|
"step": 1150,
|
|
"valid_targets_mean": 5020.5,
|
|
"valid_targets_min": 1762
|
|
},
|
|
{
|
|
"epoch": 1.8539325842696628,
|
|
"grad_norm": 0.6326583506783715,
|
|
"learning_rate": 3.679431046154438e-05,
|
|
"loss": 0.3709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3792263865470886,
|
|
"step": 1155,
|
|
"valid_targets_mean": 3690.9,
|
|
"valid_targets_min": 658
|
|
},
|
|
{
|
|
"epoch": 1.8619582664526484,
|
|
"grad_norm": 0.6204021935785907,
|
|
"learning_rate": 3.675070078303352e-05,
|
|
"loss": 0.3811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41327282786369324,
|
|
"step": 1160,
|
|
"valid_targets_mean": 4179.4,
|
|
"valid_targets_min": 1439
|
|
},
|
|
{
|
|
"epoch": 1.869983948635634,
|
|
"grad_norm": 0.5560746856286518,
|
|
"learning_rate": 3.670682268496421e-05,
|
|
"loss": 0.3617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3499162793159485,
|
|
"step": 1165,
|
|
"valid_targets_mean": 3980.8,
|
|
"valid_targets_min": 1177
|
|
},
|
|
{
|
|
"epoch": 1.8780096308186196,
|
|
"grad_norm": 0.5669482631543975,
|
|
"learning_rate": 3.6662676870455676e-05,
|
|
"loss": 0.3896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.390192449092865,
|
|
"step": 1170,
|
|
"valid_targets_mean": 3901.8,
|
|
"valid_targets_min": 1038
|
|
},
|
|
{
|
|
"epoch": 1.8860353130016052,
|
|
"grad_norm": 0.5770966388636284,
|
|
"learning_rate": 3.661826404691712e-05,
|
|
"loss": 0.3836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3734949827194214,
|
|
"step": 1175,
|
|
"valid_targets_mean": 4644.3,
|
|
"valid_targets_min": 992
|
|
},
|
|
{
|
|
"epoch": 1.8940609951845908,
|
|
"grad_norm": 0.5899548661087921,
|
|
"learning_rate": 3.657358492603641e-05,
|
|
"loss": 0.3509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3441034257411957,
|
|
"step": 1180,
|
|
"valid_targets_mean": 3685.6,
|
|
"valid_targets_min": 945
|
|
},
|
|
{
|
|
"epoch": 1.9020866773675762,
|
|
"grad_norm": 0.814110933581653,
|
|
"learning_rate": 3.652864022376866e-05,
|
|
"loss": 0.3689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3938433825969696,
|
|
"step": 1185,
|
|
"valid_targets_mean": 2818.9,
|
|
"valid_targets_min": 1244
|
|
},
|
|
{
|
|
"epoch": 1.9101123595505618,
|
|
"grad_norm": 0.6965842955940912,
|
|
"learning_rate": 3.648343066032476e-05,
|
|
"loss": 0.3668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35343319177627563,
|
|
"step": 1190,
|
|
"valid_targets_mean": 4491.8,
|
|
"valid_targets_min": 1504
|
|
},
|
|
{
|
|
"epoch": 1.9181380417335474,
|
|
"grad_norm": 0.5977523443483242,
|
|
"learning_rate": 3.643795696015985e-05,
|
|
"loss": 0.3866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3864300847053528,
|
|
"step": 1195,
|
|
"valid_targets_mean": 4148.3,
|
|
"valid_targets_min": 1038
|
|
},
|
|
{
|
|
"epoch": 1.9261637239165328,
|
|
"grad_norm": 0.67854603777391,
|
|
"learning_rate": 3.639221985196166e-05,
|
|
"loss": 0.3781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3579566776752472,
|
|
"step": 1200,
|
|
"valid_targets_mean": 2995.6,
|
|
"valid_targets_min": 1335
|
|
},
|
|
{
|
|
"epoch": 1.9341894060995184,
|
|
"grad_norm": 0.6343031201318327,
|
|
"learning_rate": 3.634622006863891e-05,
|
|
"loss": 0.3645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37107375264167786,
|
|
"step": 1205,
|
|
"valid_targets_mean": 4321.7,
|
|
"valid_targets_min": 1415
|
|
},
|
|
{
|
|
"epoch": 1.942215088282504,
|
|
"grad_norm": 0.6291902252679911,
|
|
"learning_rate": 3.62999583473095e-05,
|
|
"loss": 0.3723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3786380887031555,
|
|
"step": 1210,
|
|
"valid_targets_mean": 3248.1,
|
|
"valid_targets_min": 1048
|
|
},
|
|
{
|
|
"epoch": 1.9502407704654896,
|
|
"grad_norm": 0.7208964867085378,
|
|
"learning_rate": 3.625343542928874e-05,
|
|
"loss": 0.3683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34917140007019043,
|
|
"step": 1215,
|
|
"valid_targets_mean": 3308.2,
|
|
"valid_targets_min": 958
|
|
},
|
|
{
|
|
"epoch": 1.9582664526484752,
|
|
"grad_norm": 0.5771372331385178,
|
|
"learning_rate": 3.620665206007744e-05,
|
|
"loss": 0.3504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32531726360321045,
|
|
"step": 1220,
|
|
"valid_targets_mean": 3934.1,
|
|
"valid_targets_min": 1250
|
|
},
|
|
{
|
|
"epoch": 1.9662921348314608,
|
|
"grad_norm": 0.5931035434063252,
|
|
"learning_rate": 3.615960898934999e-05,
|
|
"loss": 0.3634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33713725209236145,
|
|
"step": 1225,
|
|
"valid_targets_mean": 3671.8,
|
|
"valid_targets_min": 1121
|
|
},
|
|
{
|
|
"epoch": 1.9743178170144462,
|
|
"grad_norm": 0.5788633054376541,
|
|
"learning_rate": 3.6112306970942334e-05,
|
|
"loss": 0.3608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33869656920433044,
|
|
"step": 1230,
|
|
"valid_targets_mean": 3771.1,
|
|
"valid_targets_min": 638
|
|
},
|
|
{
|
|
"epoch": 1.9823434991974318,
|
|
"grad_norm": 0.6435818913586552,
|
|
"learning_rate": 3.606474676283987e-05,
|
|
"loss": 0.3789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3851108253002167,
|
|
"step": 1235,
|
|
"valid_targets_mean": 3944.9,
|
|
"valid_targets_min": 1380
|
|
},
|
|
{
|
|
"epoch": 1.9903691813804172,
|
|
"grad_norm": 0.6374574016549951,
|
|
"learning_rate": 3.6016929127165365e-05,
|
|
"loss": 0.3761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39890849590301514,
|
|
"step": 1240,
|
|
"valid_targets_mean": 3596.4,
|
|
"valid_targets_min": 2052
|
|
},
|
|
{
|
|
"epoch": 1.9983948635634028,
|
|
"grad_norm": 0.6457539239154649,
|
|
"learning_rate": 3.5968854830166666e-05,
|
|
"loss": 0.3419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3129879832267761,
|
|
"step": 1245,
|
|
"valid_targets_mean": 3169.8,
|
|
"valid_targets_min": 1479
|
|
},
|
|
{
|
|
"epoch": 2.0064205457463884,
|
|
"grad_norm": 0.6536780042311249,
|
|
"learning_rate": 3.592052464220447e-05,
|
|
"loss": 0.3248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32612812519073486,
|
|
"step": 1250,
|
|
"valid_targets_mean": 3364.1,
|
|
"valid_targets_min": 1501
|
|
},
|
|
{
|
|
"epoch": 2.014446227929374,
|
|
"grad_norm": 0.77647591044912,
|
|
"learning_rate": 3.587193933773998e-05,
|
|
"loss": 0.3341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32003918290138245,
|
|
"step": 1255,
|
|
"valid_targets_mean": 3151.2,
|
|
"valid_targets_min": 901
|
|
},
|
|
{
|
|
"epoch": 2.0224719101123596,
|
|
"grad_norm": 0.6446004964473829,
|
|
"learning_rate": 3.582309969532247e-05,
|
|
"loss": 0.3619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3277965784072876,
|
|
"step": 1260,
|
|
"valid_targets_mean": 3857.6,
|
|
"valid_targets_min": 997
|
|
},
|
|
{
|
|
"epoch": 2.030497592295345,
|
|
"grad_norm": 0.55167111978132,
|
|
"learning_rate": 3.577400649757682e-05,
|
|
"loss": 0.331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3310242295265198,
|
|
"step": 1265,
|
|
"valid_targets_mean": 4918.2,
|
|
"valid_targets_min": 1161
|
|
},
|
|
{
|
|
"epoch": 2.038523274478331,
|
|
"grad_norm": 0.6822246665629514,
|
|
"learning_rate": 3.572466053119099e-05,
|
|
"loss": 0.3598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39388754963874817,
|
|
"step": 1270,
|
|
"valid_targets_mean": 3889.1,
|
|
"valid_targets_min": 1435
|
|
},
|
|
{
|
|
"epoch": 2.0465489566613164,
|
|
"grad_norm": 0.5681686285934089,
|
|
"learning_rate": 3.567506258690338e-05,
|
|
"loss": 0.3144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29393839836120605,
|
|
"step": 1275,
|
|
"valid_targets_mean": 4283.9,
|
|
"valid_targets_min": 1215
|
|
},
|
|
{
|
|
"epoch": 2.0545746388443016,
|
|
"grad_norm": 0.5816437641726608,
|
|
"learning_rate": 3.562521345949018e-05,
|
|
"loss": 0.328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26834946870803833,
|
|
"step": 1280,
|
|
"valid_targets_mean": 4222.9,
|
|
"valid_targets_min": 1298
|
|
},
|
|
{
|
|
"epoch": 2.062600321027287,
|
|
"grad_norm": 0.6254462219873159,
|
|
"learning_rate": 3.5575113947752666e-05,
|
|
"loss": 0.3228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30023327469825745,
|
|
"step": 1285,
|
|
"valid_targets_mean": 3487.6,
|
|
"valid_targets_min": 1410
|
|
},
|
|
{
|
|
"epoch": 2.070626003210273,
|
|
"grad_norm": 0.6030874590681511,
|
|
"learning_rate": 3.552476485450434e-05,
|
|
"loss": 0.3414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3220904469490051,
|
|
"step": 1290,
|
|
"valid_targets_mean": 3833.3,
|
|
"valid_targets_min": 1317
|
|
},
|
|
{
|
|
"epoch": 2.0786516853932584,
|
|
"grad_norm": 0.7125376215068204,
|
|
"learning_rate": 3.547416698655807e-05,
|
|
"loss": 0.327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35287854075431824,
|
|
"step": 1295,
|
|
"valid_targets_mean": 3205.2,
|
|
"valid_targets_min": 1281
|
|
},
|
|
{
|
|
"epoch": 2.086677367576244,
|
|
"grad_norm": 0.6052009911743303,
|
|
"learning_rate": 3.5423321154713245e-05,
|
|
"loss": 0.3243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3191089630126953,
|
|
"step": 1300,
|
|
"valid_targets_mean": 4253.9,
|
|
"valid_targets_min": 1379
|
|
},
|
|
{
|
|
"epoch": 2.0947030497592296,
|
|
"grad_norm": 0.585624277379928,
|
|
"learning_rate": 3.537222817374266e-05,
|
|
"loss": 0.3264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28600168228149414,
|
|
"step": 1305,
|
|
"valid_targets_mean": 3550.0,
|
|
"valid_targets_min": 1266
|
|
},
|
|
{
|
|
"epoch": 2.102728731942215,
|
|
"grad_norm": 0.6328560462975177,
|
|
"learning_rate": 3.532088886237956e-05,
|
|
"loss": 0.3401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3558768928050995,
|
|
"step": 1310,
|
|
"valid_targets_mean": 4102.1,
|
|
"valid_targets_min": 1489
|
|
},
|
|
{
|
|
"epoch": 2.110754414125201,
|
|
"grad_norm": 0.6767240406531382,
|
|
"learning_rate": 3.526930404330447e-05,
|
|
"loss": 0.3557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4191839098930359,
|
|
"step": 1315,
|
|
"valid_targets_mean": 3601.5,
|
|
"valid_targets_min": 946
|
|
},
|
|
{
|
|
"epoch": 2.1187800963081864,
|
|
"grad_norm": 0.6450537254039228,
|
|
"learning_rate": 3.521747454313201e-05,
|
|
"loss": 0.3406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33509010076522827,
|
|
"step": 1320,
|
|
"valid_targets_mean": 3253.1,
|
|
"valid_targets_min": 1039
|
|
},
|
|
{
|
|
"epoch": 2.1268057784911716,
|
|
"grad_norm": 0.6745965428543366,
|
|
"learning_rate": 3.516540119239769e-05,
|
|
"loss": 0.3565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3688873052597046,
|
|
"step": 1325,
|
|
"valid_targets_mean": 3753.4,
|
|
"valid_targets_min": 1637
|
|
},
|
|
{
|
|
"epoch": 2.134831460674157,
|
|
"grad_norm": 0.735190573298086,
|
|
"learning_rate": 3.511308482554454e-05,
|
|
"loss": 0.3543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3532335162162781,
|
|
"step": 1330,
|
|
"valid_targets_mean": 3294.4,
|
|
"valid_targets_min": 1381
|
|
},
|
|
{
|
|
"epoch": 2.142857142857143,
|
|
"grad_norm": 0.5336308891625168,
|
|
"learning_rate": 3.506052628090981e-05,
|
|
"loss": 0.3308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3015096187591553,
|
|
"step": 1335,
|
|
"valid_targets_mean": 4669.8,
|
|
"valid_targets_min": 1177
|
|
},
|
|
{
|
|
"epoch": 2.1508828250401284,
|
|
"grad_norm": 0.583411674198425,
|
|
"learning_rate": 3.500772640071145e-05,
|
|
"loss": 0.3395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3251872658729553,
|
|
"step": 1340,
|
|
"valid_targets_mean": 4422.6,
|
|
"valid_targets_min": 1550
|
|
},
|
|
{
|
|
"epoch": 2.158908507223114,
|
|
"grad_norm": 0.6790433284171083,
|
|
"learning_rate": 3.495468603103472e-05,
|
|
"loss": 0.3424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3658939599990845,
|
|
"step": 1345,
|
|
"valid_targets_mean": 3428.2,
|
|
"valid_targets_min": 1156
|
|
},
|
|
{
|
|
"epoch": 2.1669341894060996,
|
|
"grad_norm": 0.601379304222511,
|
|
"learning_rate": 3.4901406021818526e-05,
|
|
"loss": 0.3383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3487321734428406,
|
|
"step": 1350,
|
|
"valid_targets_mean": 4336.0,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 2.174959871589085,
|
|
"grad_norm": 0.6987683680889328,
|
|
"learning_rate": 3.4847887226841874e-05,
|
|
"loss": 0.3519,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3258175849914551,
|
|
"step": 1355,
|
|
"valid_targets_mean": 3282.4,
|
|
"valid_targets_min": 1616
|
|
},
|
|
{
|
|
"epoch": 2.182985553772071,
|
|
"grad_norm": 0.6783500880783389,
|
|
"learning_rate": 3.479413050371016e-05,
|
|
"loss": 0.3304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35359451174736023,
|
|
"step": 1360,
|
|
"valid_targets_mean": 3656.4,
|
|
"valid_targets_min": 1239
|
|
},
|
|
{
|
|
"epoch": 2.191011235955056,
|
|
"grad_norm": 0.6554761013006223,
|
|
"learning_rate": 3.4740136713841416e-05,
|
|
"loss": 0.3208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33448517322540283,
|
|
"step": 1365,
|
|
"valid_targets_mean": 3314.9,
|
|
"valid_targets_min": 1185
|
|
},
|
|
{
|
|
"epoch": 2.1990369181380416,
|
|
"grad_norm": 0.6794920426077007,
|
|
"learning_rate": 3.4685906722452537e-05,
|
|
"loss": 0.3551,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3383448123931885,
|
|
"step": 1370,
|
|
"valid_targets_mean": 3193.2,
|
|
"valid_targets_min": 1591
|
|
},
|
|
{
|
|
"epoch": 2.207062600321027,
|
|
"grad_norm": 0.6032608588459586,
|
|
"learning_rate": 3.4631441398545394e-05,
|
|
"loss": 0.3543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29828694462776184,
|
|
"step": 1375,
|
|
"valid_targets_mean": 4068.5,
|
|
"valid_targets_min": 1345
|
|
},
|
|
{
|
|
"epoch": 2.215088282504013,
|
|
"grad_norm": 0.6443449977781702,
|
|
"learning_rate": 3.4576741614892925e-05,
|
|
"loss": 0.3408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36095738410949707,
|
|
"step": 1380,
|
|
"valid_targets_mean": 3019.7,
|
|
"valid_targets_min": 1040
|
|
},
|
|
{
|
|
"epoch": 2.2231139646869984,
|
|
"grad_norm": 0.8539135116729987,
|
|
"learning_rate": 3.452180824802512e-05,
|
|
"loss": 0.3437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3896363377571106,
|
|
"step": 1385,
|
|
"valid_targets_mean": 3035.5,
|
|
"valid_targets_min": 864
|
|
},
|
|
{
|
|
"epoch": 2.231139646869984,
|
|
"grad_norm": 0.5560002066255036,
|
|
"learning_rate": 3.4466642178215015e-05,
|
|
"loss": 0.3445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32564520835876465,
|
|
"step": 1390,
|
|
"valid_targets_mean": 4075.1,
|
|
"valid_targets_min": 1145
|
|
},
|
|
{
|
|
"epoch": 2.2391653290529696,
|
|
"grad_norm": 0.7515581416616692,
|
|
"learning_rate": 3.441124428946455e-05,
|
|
"loss": 0.3016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32313257455825806,
|
|
"step": 1395,
|
|
"valid_targets_mean": 2598.9,
|
|
"valid_targets_min": 1149
|
|
},
|
|
{
|
|
"epoch": 2.247191011235955,
|
|
"grad_norm": 0.5840160543214769,
|
|
"learning_rate": 3.435561546949043e-05,
|
|
"loss": 0.3304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3101240396499634,
|
|
"step": 1400,
|
|
"valid_targets_mean": 4269.2,
|
|
"valid_targets_min": 1327
|
|
},
|
|
{
|
|
"epoch": 2.255216693418941,
|
|
"grad_norm": 0.610698848131706,
|
|
"learning_rate": 3.429975660970988e-05,
|
|
"loss": 0.3244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3215124011039734,
|
|
"step": 1405,
|
|
"valid_targets_mean": 3646.6,
|
|
"valid_targets_min": 1018
|
|
},
|
|
{
|
|
"epoch": 2.263242375601926,
|
|
"grad_norm": 0.7945229805318056,
|
|
"learning_rate": 3.424366860522638e-05,
|
|
"loss": 0.3447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37733930349349976,
|
|
"step": 1410,
|
|
"valid_targets_mean": 3075.1,
|
|
"valid_targets_min": 1286
|
|
},
|
|
{
|
|
"epoch": 2.2712680577849116,
|
|
"grad_norm": 0.7248815349783063,
|
|
"learning_rate": 3.41873523548153e-05,
|
|
"loss": 0.334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3721732497215271,
|
|
"step": 1415,
|
|
"valid_targets_mean": 3615.1,
|
|
"valid_targets_min": 434
|
|
},
|
|
{
|
|
"epoch": 2.279293739967897,
|
|
"grad_norm": 0.6389278860468958,
|
|
"learning_rate": 3.413080876090952e-05,
|
|
"loss": 0.3398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30875110626220703,
|
|
"step": 1420,
|
|
"valid_targets_mean": 3517.1,
|
|
"valid_targets_min": 958
|
|
},
|
|
{
|
|
"epoch": 2.287319422150883,
|
|
"grad_norm": 0.7468534504976667,
|
|
"learning_rate": 3.4074038729584955e-05,
|
|
"loss": 0.349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32828766107559204,
|
|
"step": 1425,
|
|
"valid_targets_mean": 2649.4,
|
|
"valid_targets_min": 1023
|
|
},
|
|
{
|
|
"epoch": 2.2953451043338684,
|
|
"grad_norm": 0.6150485993831529,
|
|
"learning_rate": 3.401704317054604e-05,
|
|
"loss": 0.3285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35845857858657837,
|
|
"step": 1430,
|
|
"valid_targets_mean": 4093.1,
|
|
"valid_targets_min": 1238
|
|
},
|
|
{
|
|
"epoch": 2.303370786516854,
|
|
"grad_norm": 0.7098746967306129,
|
|
"learning_rate": 3.395982299711114e-05,
|
|
"loss": 0.3448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3935844302177429,
|
|
"step": 1435,
|
|
"valid_targets_mean": 3564.3,
|
|
"valid_targets_min": 1117
|
|
},
|
|
{
|
|
"epoch": 2.3113964686998396,
|
|
"grad_norm": 0.7512963509518076,
|
|
"learning_rate": 3.390237912619793e-05,
|
|
"loss": 0.3373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39012032747268677,
|
|
"step": 1440,
|
|
"valid_targets_mean": 3086.4,
|
|
"valid_targets_min": 1351
|
|
},
|
|
{
|
|
"epoch": 2.319422150882825,
|
|
"grad_norm": 0.6876528217880742,
|
|
"learning_rate": 3.384471247830872e-05,
|
|
"loss": 0.3257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3373757600784302,
|
|
"step": 1445,
|
|
"valid_targets_mean": 3432.4,
|
|
"valid_targets_min": 1274
|
|
},
|
|
{
|
|
"epoch": 2.3274478330658104,
|
|
"grad_norm": 0.6451068752996163,
|
|
"learning_rate": 3.378682397751565e-05,
|
|
"loss": 0.3443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3339243531227112,
|
|
"step": 1450,
|
|
"valid_targets_mean": 3446.3,
|
|
"valid_targets_min": 1281
|
|
},
|
|
{
|
|
"epoch": 2.335473515248796,
|
|
"grad_norm": 0.6983335152539901,
|
|
"learning_rate": 3.372871455144593e-05,
|
|
"loss": 0.3342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3662850856781006,
|
|
"step": 1455,
|
|
"valid_targets_mean": 3232.6,
|
|
"valid_targets_min": 1560
|
|
},
|
|
{
|
|
"epoch": 2.3434991974317816,
|
|
"grad_norm": 0.6320578916239832,
|
|
"learning_rate": 3.367038513126697e-05,
|
|
"loss": 0.3368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3371465802192688,
|
|
"step": 1460,
|
|
"valid_targets_mean": 3747.4,
|
|
"valid_targets_min": 1164
|
|
},
|
|
{
|
|
"epoch": 2.351524879614767,
|
|
"grad_norm": 0.597911278501767,
|
|
"learning_rate": 3.3611836651671426e-05,
|
|
"loss": 0.3607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3991633951663971,
|
|
"step": 1465,
|
|
"valid_targets_mean": 4103.9,
|
|
"valid_targets_min": 1076
|
|
},
|
|
{
|
|
"epoch": 2.359550561797753,
|
|
"grad_norm": 0.641503876883114,
|
|
"learning_rate": 3.355307005086226e-05,
|
|
"loss": 0.3411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.302163302898407,
|
|
"step": 1470,
|
|
"valid_targets_mean": 3242.5,
|
|
"valid_targets_min": 653
|
|
},
|
|
{
|
|
"epoch": 2.3675762439807384,
|
|
"grad_norm": 0.6314711577546057,
|
|
"learning_rate": 3.34940862705377e-05,
|
|
"loss": 0.3189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30752742290496826,
|
|
"step": 1475,
|
|
"valid_targets_mean": 3384.4,
|
|
"valid_targets_min": 1024
|
|
},
|
|
{
|
|
"epoch": 2.375601926163724,
|
|
"grad_norm": 0.6698109444829109,
|
|
"learning_rate": 3.3434886255876106e-05,
|
|
"loss": 0.3443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3380153477191925,
|
|
"step": 1480,
|
|
"valid_targets_mean": 3436.1,
|
|
"valid_targets_min": 1405
|
|
},
|
|
{
|
|
"epoch": 2.3836276083467096,
|
|
"grad_norm": 0.5979011395987502,
|
|
"learning_rate": 3.33754709555209e-05,
|
|
"loss": 0.3443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3353763818740845,
|
|
"step": 1485,
|
|
"valid_targets_mean": 4199.6,
|
|
"valid_targets_min": 2072
|
|
},
|
|
{
|
|
"epoch": 2.391653290529695,
|
|
"grad_norm": 0.6770580171646131,
|
|
"learning_rate": 3.33158413215653e-05,
|
|
"loss": 0.3273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3340577483177185,
|
|
"step": 1490,
|
|
"valid_targets_mean": 3357.4,
|
|
"valid_targets_min": 1225
|
|
},
|
|
{
|
|
"epoch": 2.399678972712681,
|
|
"grad_norm": 0.5676571449725292,
|
|
"learning_rate": 3.325599830953708e-05,
|
|
"loss": 0.3701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36025285720825195,
|
|
"step": 1495,
|
|
"valid_targets_mean": 4441.1,
|
|
"valid_targets_min": 1247
|
|
},
|
|
{
|
|
"epoch": 2.407704654895666,
|
|
"grad_norm": 0.719422333636822,
|
|
"learning_rate": 3.31959428783833e-05,
|
|
"loss": 0.3449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3539617657661438,
|
|
"step": 1500,
|
|
"valid_targets_mean": 3037.1,
|
|
"valid_targets_min": 1099
|
|
},
|
|
{
|
|
"epoch": 2.4157303370786516,
|
|
"grad_norm": 0.6855816304379933,
|
|
"learning_rate": 3.3135675990454856e-05,
|
|
"loss": 0.3522,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4219123125076294,
|
|
"step": 1505,
|
|
"valid_targets_mean": 3423.2,
|
|
"valid_targets_min": 1060
|
|
},
|
|
{
|
|
"epoch": 2.423756019261637,
|
|
"grad_norm": 0.6204790162474024,
|
|
"learning_rate": 3.307519861149114e-05,
|
|
"loss": 0.3268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31686219573020935,
|
|
"step": 1510,
|
|
"valid_targets_mean": 3626.0,
|
|
"valid_targets_min": 1177
|
|
},
|
|
{
|
|
"epoch": 2.431781701444623,
|
|
"grad_norm": 0.6879941651406059,
|
|
"learning_rate": 3.3014511710604526e-05,
|
|
"loss": 0.3336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35248667001724243,
|
|
"step": 1515,
|
|
"valid_targets_mean": 2911.1,
|
|
"valid_targets_min": 1475
|
|
},
|
|
{
|
|
"epoch": 2.4398073836276084,
|
|
"grad_norm": 0.6240942996397502,
|
|
"learning_rate": 3.295361626026484e-05,
|
|
"loss": 0.3484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3463127613067627,
|
|
"step": 1520,
|
|
"valid_targets_mean": 3696.6,
|
|
"valid_targets_min": 1282
|
|
},
|
|
{
|
|
"epoch": 2.447833065810594,
|
|
"grad_norm": 0.6932447910012696,
|
|
"learning_rate": 3.28925132362838e-05,
|
|
"loss": 0.3359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35137391090393066,
|
|
"step": 1525,
|
|
"valid_targets_mean": 3055.4,
|
|
"valid_targets_min": 1246
|
|
},
|
|
{
|
|
"epoch": 2.4558587479935796,
|
|
"grad_norm": 0.6618716505794552,
|
|
"learning_rate": 3.2831203617799316e-05,
|
|
"loss": 0.3311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35246729850769043,
|
|
"step": 1530,
|
|
"valid_targets_mean": 3472.0,
|
|
"valid_targets_min": 1141
|
|
},
|
|
{
|
|
"epoch": 2.4638844301765648,
|
|
"grad_norm": 0.6925119918267985,
|
|
"learning_rate": 3.276968838725992e-05,
|
|
"loss": 0.3323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3349195718765259,
|
|
"step": 1535,
|
|
"valid_targets_mean": 3108.7,
|
|
"valid_targets_min": 1116
|
|
},
|
|
{
|
|
"epoch": 2.4719101123595504,
|
|
"grad_norm": 0.6483971933715901,
|
|
"learning_rate": 3.2707968530408886e-05,
|
|
"loss": 0.3372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31000465154647827,
|
|
"step": 1540,
|
|
"valid_targets_mean": 3485.7,
|
|
"valid_targets_min": 1234
|
|
},
|
|
{
|
|
"epoch": 2.479935794542536,
|
|
"grad_norm": 0.5706937100982172,
|
|
"learning_rate": 3.264604503626853e-05,
|
|
"loss": 0.3187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2994241714477539,
|
|
"step": 1545,
|
|
"valid_targets_mean": 3938.3,
|
|
"valid_targets_min": 1129
|
|
},
|
|
{
|
|
"epoch": 2.4879614767255216,
|
|
"grad_norm": 0.7220759670745577,
|
|
"learning_rate": 3.2583918897124315e-05,
|
|
"loss": 0.3454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3649590015411377,
|
|
"step": 1550,
|
|
"valid_targets_mean": 3566.5,
|
|
"valid_targets_min": 1228
|
|
},
|
|
{
|
|
"epoch": 2.495987158908507,
|
|
"grad_norm": 0.6404004525309419,
|
|
"learning_rate": 3.2521591108508965e-05,
|
|
"loss": 0.3483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3354296088218689,
|
|
"step": 1555,
|
|
"valid_targets_mean": 3605.5,
|
|
"valid_targets_min": 1242
|
|
},
|
|
{
|
|
"epoch": 2.504012841091493,
|
|
"grad_norm": 0.6106593507559929,
|
|
"learning_rate": 3.245906266918651e-05,
|
|
"loss": 0.3355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3291773796081543,
|
|
"step": 1560,
|
|
"valid_targets_mean": 3817.6,
|
|
"valid_targets_min": 1007
|
|
},
|
|
{
|
|
"epoch": 2.5120385232744784,
|
|
"grad_norm": 0.6275924493635835,
|
|
"learning_rate": 3.2396334581136315e-05,
|
|
"loss": 0.3462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34411320090293884,
|
|
"step": 1565,
|
|
"valid_targets_mean": 3806.2,
|
|
"valid_targets_min": 1124
|
|
},
|
|
{
|
|
"epoch": 2.520064205457464,
|
|
"grad_norm": 0.6513373426503003,
|
|
"learning_rate": 3.2333407849536936e-05,
|
|
"loss": 0.3319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36360228061676025,
|
|
"step": 1570,
|
|
"valid_targets_mean": 3577.8,
|
|
"valid_targets_min": 1373
|
|
},
|
|
{
|
|
"epoch": 2.5280898876404496,
|
|
"grad_norm": 0.7015903453412484,
|
|
"learning_rate": 3.2270283482750106e-05,
|
|
"loss": 0.3361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3667321503162384,
|
|
"step": 1575,
|
|
"valid_targets_mean": 2929.3,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 2.5361155698234352,
|
|
"grad_norm": 0.5284175040044605,
|
|
"learning_rate": 3.220696249230453e-05,
|
|
"loss": 0.334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29141658544540405,
|
|
"step": 1580,
|
|
"valid_targets_mean": 4190.7,
|
|
"valid_targets_min": 1020
|
|
},
|
|
{
|
|
"epoch": 2.5441412520064204,
|
|
"grad_norm": 0.5750188615474039,
|
|
"learning_rate": 3.214344589287969e-05,
|
|
"loss": 0.3394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3119628429412842,
|
|
"step": 1585,
|
|
"valid_targets_mean": 4027.4,
|
|
"valid_targets_min": 1157
|
|
},
|
|
{
|
|
"epoch": 2.552166934189406,
|
|
"grad_norm": 0.6136226978025248,
|
|
"learning_rate": 3.2079734702289555e-05,
|
|
"loss": 0.3334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32553696632385254,
|
|
"step": 1590,
|
|
"valid_targets_mean": 3637.4,
|
|
"valid_targets_min": 1500
|
|
},
|
|
{
|
|
"epoch": 2.5601926163723916,
|
|
"grad_norm": 0.616328720257945,
|
|
"learning_rate": 3.201582994146633e-05,
|
|
"loss": 0.313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.326008677482605,
|
|
"step": 1595,
|
|
"valid_targets_mean": 3554.6,
|
|
"valid_targets_min": 1450
|
|
},
|
|
{
|
|
"epoch": 2.568218298555377,
|
|
"grad_norm": 0.6942335025886992,
|
|
"learning_rate": 3.195173263444405e-05,
|
|
"loss": 0.3557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3514513373374939,
|
|
"step": 1600,
|
|
"valid_targets_mean": 3052.8,
|
|
"valid_targets_min": 1027
|
|
},
|
|
{
|
|
"epoch": 2.576243980738363,
|
|
"grad_norm": 0.5525678993493612,
|
|
"learning_rate": 3.188744380834218e-05,
|
|
"loss": 0.3338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.279376745223999,
|
|
"step": 1605,
|
|
"valid_targets_mean": 4410.0,
|
|
"valid_targets_min": 1463
|
|
},
|
|
{
|
|
"epoch": 2.5842696629213484,
|
|
"grad_norm": 0.6832993275387763,
|
|
"learning_rate": 3.182296449334916e-05,
|
|
"loss": 0.3567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37043702602386475,
|
|
"step": 1610,
|
|
"valid_targets_mean": 3094.4,
|
|
"valid_targets_min": 783
|
|
},
|
|
{
|
|
"epoch": 2.592295345104334,
|
|
"grad_norm": 0.594719262842279,
|
|
"learning_rate": 3.1758295722705895e-05,
|
|
"loss": 0.3542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3060463070869446,
|
|
"step": 1615,
|
|
"valid_targets_mean": 3643.6,
|
|
"valid_targets_min": 1101
|
|
},
|
|
{
|
|
"epoch": 2.600321027287319,
|
|
"grad_norm": 0.5726636560667273,
|
|
"learning_rate": 3.1693438532689196e-05,
|
|
"loss": 0.3394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2946881651878357,
|
|
"step": 1620,
|
|
"valid_targets_mean": 3807.2,
|
|
"valid_targets_min": 1168
|
|
},
|
|
{
|
|
"epoch": 2.608346709470305,
|
|
"grad_norm": 0.6069036348827563,
|
|
"learning_rate": 3.162839396259519e-05,
|
|
"loss": 0.3439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3479877710342407,
|
|
"step": 1625,
|
|
"valid_targets_mean": 3933.6,
|
|
"valid_targets_min": 1436
|
|
},
|
|
{
|
|
"epoch": 2.6163723916532904,
|
|
"grad_norm": 0.5972904349684691,
|
|
"learning_rate": 3.156316305472263e-05,
|
|
"loss": 0.3283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.303456574678421,
|
|
"step": 1630,
|
|
"valid_targets_mean": 3562.8,
|
|
"valid_targets_min": 1470
|
|
},
|
|
{
|
|
"epoch": 2.624398073836276,
|
|
"grad_norm": 0.6351854636602856,
|
|
"learning_rate": 3.149774685435626e-05,
|
|
"loss": 0.3382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3057955503463745,
|
|
"step": 1635,
|
|
"valid_targets_mean": 3843.1,
|
|
"valid_targets_min": 1286
|
|
},
|
|
{
|
|
"epoch": 2.6324237560192616,
|
|
"grad_norm": 0.6180387754996765,
|
|
"learning_rate": 3.143214640974996e-05,
|
|
"loss": 0.3205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33589479327201843,
|
|
"step": 1640,
|
|
"valid_targets_mean": 3874.8,
|
|
"valid_targets_min": 1232
|
|
},
|
|
{
|
|
"epoch": 2.640449438202247,
|
|
"grad_norm": 0.586191097287597,
|
|
"learning_rate": 3.136636277211005e-05,
|
|
"loss": 0.3695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33181440830230713,
|
|
"step": 1645,
|
|
"valid_targets_mean": 4036.0,
|
|
"valid_targets_min": 999
|
|
},
|
|
{
|
|
"epoch": 2.648475120385233,
|
|
"grad_norm": 0.5891419589461283,
|
|
"learning_rate": 3.13003969955784e-05,
|
|
"loss": 0.3433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37280628085136414,
|
|
"step": 1650,
|
|
"valid_targets_mean": 4139.4,
|
|
"valid_targets_min": 1754
|
|
},
|
|
{
|
|
"epoch": 2.6565008025682184,
|
|
"grad_norm": 0.6401205260202005,
|
|
"learning_rate": 3.1234250137215546e-05,
|
|
"loss": 0.3491,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3504320979118347,
|
|
"step": 1655,
|
|
"valid_targets_mean": 3556.1,
|
|
"valid_targets_min": 1563
|
|
},
|
|
{
|
|
"epoch": 2.664526484751204,
|
|
"grad_norm": 0.6602146380944036,
|
|
"learning_rate": 3.1167923256983724e-05,
|
|
"loss": 0.3379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34942203760147095,
|
|
"step": 1660,
|
|
"valid_targets_mean": 3273.7,
|
|
"valid_targets_min": 1040
|
|
},
|
|
{
|
|
"epoch": 2.6725521669341896,
|
|
"grad_norm": 0.6850430078281599,
|
|
"learning_rate": 3.110141741772994e-05,
|
|
"loss": 0.357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35717523097991943,
|
|
"step": 1665,
|
|
"valid_targets_mean": 3784.8,
|
|
"valid_targets_min": 1171
|
|
},
|
|
{
|
|
"epoch": 2.6805778491171752,
|
|
"grad_norm": 0.7284579158088035,
|
|
"learning_rate": 3.103473368516887e-05,
|
|
"loss": 0.3148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3448953628540039,
|
|
"step": 1670,
|
|
"valid_targets_mean": 3194.9,
|
|
"valid_targets_min": 1567
|
|
},
|
|
{
|
|
"epoch": 2.6886035313001604,
|
|
"grad_norm": 0.6399025354669599,
|
|
"learning_rate": 3.0967873127865856e-05,
|
|
"loss": 0.3317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3419637680053711,
|
|
"step": 1675,
|
|
"valid_targets_mean": 3699.6,
|
|
"valid_targets_min": 1305
|
|
},
|
|
{
|
|
"epoch": 2.696629213483146,
|
|
"grad_norm": 0.6033164165292226,
|
|
"learning_rate": 3.090083681721972e-05,
|
|
"loss": 0.3449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3032964766025543,
|
|
"step": 1680,
|
|
"valid_targets_mean": 3774.6,
|
|
"valid_targets_min": 1252
|
|
},
|
|
{
|
|
"epoch": 2.7046548956661316,
|
|
"grad_norm": 0.6068079972660507,
|
|
"learning_rate": 3.083362582744564e-05,
|
|
"loss": 0.3392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34635889530181885,
|
|
"step": 1685,
|
|
"valid_targets_mean": 4177.4,
|
|
"valid_targets_min": 955
|
|
},
|
|
{
|
|
"epoch": 2.712680577849117,
|
|
"grad_norm": 0.6743600862053089,
|
|
"learning_rate": 3.0766241235557914e-05,
|
|
"loss": 0.3468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.348676472902298,
|
|
"step": 1690,
|
|
"valid_targets_mean": 4156.9,
|
|
"valid_targets_min": 1073
|
|
},
|
|
{
|
|
"epoch": 2.720706260032103,
|
|
"grad_norm": 0.6195904950055389,
|
|
"learning_rate": 3.0698684121352704e-05,
|
|
"loss": 0.3482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32866066694259644,
|
|
"step": 1695,
|
|
"valid_targets_mean": 3407.6,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 2.7287319422150884,
|
|
"grad_norm": 0.5877025861002069,
|
|
"learning_rate": 3.0630955567390714e-05,
|
|
"loss": 0.3366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29404670000076294,
|
|
"step": 1700,
|
|
"valid_targets_mean": 4090.5,
|
|
"valid_targets_min": 1449
|
|
},
|
|
{
|
|
"epoch": 2.7367576243980736,
|
|
"grad_norm": 0.6052990781124924,
|
|
"learning_rate": 3.0563056658979915e-05,
|
|
"loss": 0.3267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3026697635650635,
|
|
"step": 1705,
|
|
"valid_targets_mean": 3652.8,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 2.744783306581059,
|
|
"grad_norm": 0.623051537406257,
|
|
"learning_rate": 3.0494988484158048e-05,
|
|
"loss": 0.3332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3201594948768616,
|
|
"step": 1710,
|
|
"valid_targets_mean": 3667.8,
|
|
"valid_targets_min": 920
|
|
},
|
|
{
|
|
"epoch": 2.752808988764045,
|
|
"grad_norm": 0.6644041037187817,
|
|
"learning_rate": 3.0426752133675274e-05,
|
|
"loss": 0.3516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3347300589084625,
|
|
"step": 1715,
|
|
"valid_targets_mean": 3578.6,
|
|
"valid_targets_min": 1336
|
|
},
|
|
{
|
|
"epoch": 2.7608346709470304,
|
|
"grad_norm": 0.642418641943127,
|
|
"learning_rate": 3.035834870097665e-05,
|
|
"loss": 0.3348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3560801148414612,
|
|
"step": 1720,
|
|
"valid_targets_mean": 3358.9,
|
|
"valid_targets_min": 1496
|
|
},
|
|
{
|
|
"epoch": 2.768860353130016,
|
|
"grad_norm": 0.576302900274275,
|
|
"learning_rate": 3.0289779282184627e-05,
|
|
"loss": 0.3246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30237460136413574,
|
|
"step": 1725,
|
|
"valid_targets_mean": 4074.6,
|
|
"valid_targets_min": 933
|
|
},
|
|
{
|
|
"epoch": 2.7768860353130016,
|
|
"grad_norm": 0.5500430293641589,
|
|
"learning_rate": 3.0221044976081484e-05,
|
|
"loss": 0.335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3329146206378937,
|
|
"step": 1730,
|
|
"valid_targets_mean": 4624.4,
|
|
"valid_targets_min": 1142
|
|
},
|
|
{
|
|
"epoch": 2.784911717495987,
|
|
"grad_norm": 0.6322025669205099,
|
|
"learning_rate": 3.0152146884091714e-05,
|
|
"loss": 0.3448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3330252766609192,
|
|
"step": 1735,
|
|
"valid_targets_mean": 3348.8,
|
|
"valid_targets_min": 1145
|
|
},
|
|
{
|
|
"epoch": 2.792937399678973,
|
|
"grad_norm": 0.5514448356332895,
|
|
"learning_rate": 3.0083086110264372e-05,
|
|
"loss": 0.3541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31193551421165466,
|
|
"step": 1740,
|
|
"valid_targets_mean": 4444.9,
|
|
"valid_targets_min": 253
|
|
},
|
|
{
|
|
"epoch": 2.8009630818619584,
|
|
"grad_norm": 0.5728937817110932,
|
|
"learning_rate": 3.0013863761255394e-05,
|
|
"loss": 0.3481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30753281712532043,
|
|
"step": 1745,
|
|
"valid_targets_mean": 3714.2,
|
|
"valid_targets_min": 1342
|
|
},
|
|
{
|
|
"epoch": 2.808988764044944,
|
|
"grad_norm": 0.6632606884631363,
|
|
"learning_rate": 2.9944480946309857e-05,
|
|
"loss": 0.3416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3611745834350586,
|
|
"step": 1750,
|
|
"valid_targets_mean": 3497.4,
|
|
"valid_targets_min": 1385
|
|
},
|
|
{
|
|
"epoch": 2.8170144462279296,
|
|
"grad_norm": 0.6479604803648288,
|
|
"learning_rate": 2.9874938777244203e-05,
|
|
"loss": 0.3337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32195645570755005,
|
|
"step": 1755,
|
|
"valid_targets_mean": 3638.7,
|
|
"valid_targets_min": 1544
|
|
},
|
|
{
|
|
"epoch": 2.825040128410915,
|
|
"grad_norm": 0.6886039287932629,
|
|
"learning_rate": 2.9805238368428417e-05,
|
|
"loss": 0.3305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34836989641189575,
|
|
"step": 1760,
|
|
"valid_targets_mean": 2880.3,
|
|
"valid_targets_min": 1131
|
|
},
|
|
{
|
|
"epoch": 2.8330658105939004,
|
|
"grad_norm": 0.6956406313119212,
|
|
"learning_rate": 2.9735380836768194e-05,
|
|
"loss": 0.3327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36599206924438477,
|
|
"step": 1765,
|
|
"valid_targets_mean": 3019.6,
|
|
"valid_targets_min": 1227
|
|
},
|
|
{
|
|
"epoch": 2.841091492776886,
|
|
"grad_norm": 0.5785722787350643,
|
|
"learning_rate": 2.966536730168702e-05,
|
|
"loss": 0.3453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.352041631937027,
|
|
"step": 1770,
|
|
"valid_targets_mean": 3826.6,
|
|
"valid_targets_min": 1261
|
|
},
|
|
{
|
|
"epoch": 2.8491171749598716,
|
|
"grad_norm": 0.7247657683100378,
|
|
"learning_rate": 2.9595198885108222e-05,
|
|
"loss": 0.336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37550970911979675,
|
|
"step": 1775,
|
|
"valid_targets_mean": 2794.8,
|
|
"valid_targets_min": 1024
|
|
},
|
|
{
|
|
"epoch": 2.857142857142857,
|
|
"grad_norm": 0.6544105507307673,
|
|
"learning_rate": 2.9524876711437027e-05,
|
|
"loss": 0.3367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32390421628952026,
|
|
"step": 1780,
|
|
"valid_targets_mean": 3228.1,
|
|
"valid_targets_min": 1000
|
|
},
|
|
{
|
|
"epoch": 2.865168539325843,
|
|
"grad_norm": 0.6020277433458299,
|
|
"learning_rate": 2.945440190754252e-05,
|
|
"loss": 0.3314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31498652696609497,
|
|
"step": 1785,
|
|
"valid_targets_mean": 3720.1,
|
|
"valid_targets_min": 776
|
|
},
|
|
{
|
|
"epoch": 2.8731942215088284,
|
|
"grad_norm": 0.6365715752148964,
|
|
"learning_rate": 2.9383775602739585e-05,
|
|
"loss": 0.34,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34130945801734924,
|
|
"step": 1790,
|
|
"valid_targets_mean": 3444.2,
|
|
"valid_targets_min": 1370
|
|
},
|
|
{
|
|
"epoch": 2.8812199036918136,
|
|
"grad_norm": 0.648158360003602,
|
|
"learning_rate": 2.931299892877081e-05,
|
|
"loss": 0.334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3198522925376892,
|
|
"step": 1795,
|
|
"valid_targets_mean": 3627.5,
|
|
"valid_targets_min": 1042
|
|
},
|
|
{
|
|
"epoch": 2.889245585874799,
|
|
"grad_norm": 0.7031136170500626,
|
|
"learning_rate": 2.924207301978837e-05,
|
|
"loss": 0.3326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32584348320961,
|
|
"step": 1800,
|
|
"valid_targets_mean": 3243.6,
|
|
"valid_targets_min": 1360
|
|
},
|
|
{
|
|
"epoch": 2.897271268057785,
|
|
"grad_norm": 0.5669061372540747,
|
|
"learning_rate": 2.917099901233582e-05,
|
|
"loss": 0.3278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28747546672821045,
|
|
"step": 1805,
|
|
"valid_targets_mean": 3952.2,
|
|
"valid_targets_min": 1138
|
|
},
|
|
{
|
|
"epoch": 2.9052969502407704,
|
|
"grad_norm": 0.5608337941346294,
|
|
"learning_rate": 2.9099778045329915e-05,
|
|
"loss": 0.3306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3231768012046814,
|
|
"step": 1810,
|
|
"valid_targets_mean": 4424.1,
|
|
"valid_targets_min": 1267
|
|
},
|
|
{
|
|
"epoch": 2.913322632423756,
|
|
"grad_norm": 0.5846035621240445,
|
|
"learning_rate": 2.902841126004234e-05,
|
|
"loss": 0.3412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3509044051170349,
|
|
"step": 1815,
|
|
"valid_targets_mean": 4331.2,
|
|
"valid_targets_min": 1173
|
|
},
|
|
{
|
|
"epoch": 2.9213483146067416,
|
|
"grad_norm": 2.827733512820143,
|
|
"learning_rate": 2.8956899800081434e-05,
|
|
"loss": 0.3456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31195124983787537,
|
|
"step": 1820,
|
|
"valid_targets_mean": 4123.7,
|
|
"valid_targets_min": 1393
|
|
},
|
|
{
|
|
"epoch": 2.929373996789727,
|
|
"grad_norm": 0.584139702943244,
|
|
"learning_rate": 2.888524481137384e-05,
|
|
"loss": 0.3341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28966689109802246,
|
|
"step": 1825,
|
|
"valid_targets_mean": 3555.9,
|
|
"valid_targets_min": 1637
|
|
},
|
|
{
|
|
"epoch": 2.937399678972713,
|
|
"grad_norm": 0.5440789173780904,
|
|
"learning_rate": 2.8813447442146177e-05,
|
|
"loss": 0.3249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3283085525035858,
|
|
"step": 1830,
|
|
"valid_targets_mean": 4409.1,
|
|
"valid_targets_min": 1630
|
|
},
|
|
{
|
|
"epoch": 2.9454253611556984,
|
|
"grad_norm": 0.5710511148757464,
|
|
"learning_rate": 2.8741508842906617e-05,
|
|
"loss": 0.3154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32850438356399536,
|
|
"step": 1835,
|
|
"valid_targets_mean": 4238.2,
|
|
"valid_targets_min": 943
|
|
},
|
|
{
|
|
"epoch": 2.953451043338684,
|
|
"grad_norm": 0.6036799675994731,
|
|
"learning_rate": 2.8669430166426452e-05,
|
|
"loss": 0.3313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37729257345199585,
|
|
"step": 1840,
|
|
"valid_targets_mean": 4215.8,
|
|
"valid_targets_min": 790
|
|
},
|
|
{
|
|
"epoch": 2.961476725521669,
|
|
"grad_norm": 0.6485264273595136,
|
|
"learning_rate": 2.8597212567721638e-05,
|
|
"loss": 0.3494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3663562834262848,
|
|
"step": 1845,
|
|
"valid_targets_mean": 3570.2,
|
|
"valid_targets_min": 330
|
|
},
|
|
{
|
|
"epoch": 2.969502407704655,
|
|
"grad_norm": 0.6320158472183215,
|
|
"learning_rate": 2.8524857204034256e-05,
|
|
"loss": 0.3391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37108752131462097,
|
|
"step": 1850,
|
|
"valid_targets_mean": 3795.7,
|
|
"valid_targets_min": 1090
|
|
},
|
|
{
|
|
"epoch": 2.9775280898876404,
|
|
"grad_norm": 0.6311382347360888,
|
|
"learning_rate": 2.8452365234813992e-05,
|
|
"loss": 0.3396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3191278278827667,
|
|
"step": 1855,
|
|
"valid_targets_mean": 3059.9,
|
|
"valid_targets_min": 1169
|
|
},
|
|
{
|
|
"epoch": 2.985553772070626,
|
|
"grad_norm": 0.6262041505291535,
|
|
"learning_rate": 2.8379737821699557e-05,
|
|
"loss": 0.3295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35369980335235596,
|
|
"step": 1860,
|
|
"valid_targets_mean": 3410.2,
|
|
"valid_targets_min": 1491
|
|
},
|
|
{
|
|
"epoch": 2.9935794542536116,
|
|
"grad_norm": 0.5159276437764774,
|
|
"learning_rate": 2.8306976128500054e-05,
|
|
"loss": 0.3212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.303882896900177,
|
|
"step": 1865,
|
|
"valid_targets_mean": 4231.1,
|
|
"valid_targets_min": 801
|
|
},
|
|
{
|
|
"epoch": 3.001605136436597,
|
|
"grad_norm": 0.5294348364209852,
|
|
"learning_rate": 2.823408132117635e-05,
|
|
"loss": 0.3112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3034864068031311,
|
|
"step": 1870,
|
|
"valid_targets_mean": 4717.5,
|
|
"valid_targets_min": 1370
|
|
},
|
|
{
|
|
"epoch": 3.009630818619583,
|
|
"grad_norm": 0.6119104915628378,
|
|
"learning_rate": 2.8161054567822375e-05,
|
|
"loss": 0.3209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.343681275844574,
|
|
"step": 1875,
|
|
"valid_targets_mean": 4448.6,
|
|
"valid_targets_min": 1059
|
|
},
|
|
{
|
|
"epoch": 3.0176565008025684,
|
|
"grad_norm": 0.7233252309423014,
|
|
"learning_rate": 2.8087897038646428e-05,
|
|
"loss": 0.276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30574288964271545,
|
|
"step": 1880,
|
|
"valid_targets_mean": 2855.6,
|
|
"valid_targets_min": 1488
|
|
},
|
|
{
|
|
"epoch": 3.0256821829855536,
|
|
"grad_norm": 0.7563879851360965,
|
|
"learning_rate": 2.801460990595239e-05,
|
|
"loss": 0.2983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32380393147468567,
|
|
"step": 1885,
|
|
"valid_targets_mean": 2793.2,
|
|
"valid_targets_min": 1155
|
|
},
|
|
{
|
|
"epoch": 3.033707865168539,
|
|
"grad_norm": 0.6484777609871455,
|
|
"learning_rate": 2.7941194344120965e-05,
|
|
"loss": 0.3018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29785701632499695,
|
|
"step": 1890,
|
|
"valid_targets_mean": 3357.6,
|
|
"valid_targets_min": 1070
|
|
},
|
|
{
|
|
"epoch": 3.041733547351525,
|
|
"grad_norm": 0.5812508985474834,
|
|
"learning_rate": 2.7867651529590864e-05,
|
|
"loss": 0.2776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25709599256515503,
|
|
"step": 1895,
|
|
"valid_targets_mean": 4839.6,
|
|
"valid_targets_min": 962
|
|
},
|
|
{
|
|
"epoch": 3.0497592295345104,
|
|
"grad_norm": 0.6606798219576945,
|
|
"learning_rate": 2.7793982640839933e-05,
|
|
"loss": 0.3018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32203584909439087,
|
|
"step": 1900,
|
|
"valid_targets_mean": 4152.2,
|
|
"valid_targets_min": 1769
|
|
},
|
|
{
|
|
"epoch": 3.057784911717496,
|
|
"grad_norm": 0.6222879637095324,
|
|
"learning_rate": 2.7720188858366275e-05,
|
|
"loss": 0.318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3056334853172302,
|
|
"step": 1905,
|
|
"valid_targets_mean": 3715.8,
|
|
"valid_targets_min": 972
|
|
},
|
|
{
|
|
"epoch": 3.0658105939004816,
|
|
"grad_norm": 0.7071348392961871,
|
|
"learning_rate": 2.7646271364669338e-05,
|
|
"loss": 0.3121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3370933532714844,
|
|
"step": 1910,
|
|
"valid_targets_mean": 3417.6,
|
|
"valid_targets_min": 1159
|
|
},
|
|
{
|
|
"epoch": 3.073836276083467,
|
|
"grad_norm": 0.8266077498132695,
|
|
"learning_rate": 2.7572231344230983e-05,
|
|
"loss": 0.3166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33440279960632324,
|
|
"step": 1915,
|
|
"valid_targets_mean": 2568.1,
|
|
"valid_targets_min": 1023
|
|
},
|
|
{
|
|
"epoch": 3.081861958266453,
|
|
"grad_norm": 0.7331664030746772,
|
|
"learning_rate": 2.7498069983496466e-05,
|
|
"loss": 0.3228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3464573621749878,
|
|
"step": 1920,
|
|
"valid_targets_mean": 3572.4,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 3.0898876404494384,
|
|
"grad_norm": 0.7007884018992945,
|
|
"learning_rate": 2.7423788470855455e-05,
|
|
"loss": 0.3115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30028945207595825,
|
|
"step": 1925,
|
|
"valid_targets_mean": 3473.9,
|
|
"valid_targets_min": 1219
|
|
},
|
|
{
|
|
"epoch": 3.0979133226324236,
|
|
"grad_norm": 0.6449378232302256,
|
|
"learning_rate": 2.734938799662297e-05,
|
|
"loss": 0.3235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.354037344455719,
|
|
"step": 1930,
|
|
"valid_targets_mean": 4059.1,
|
|
"valid_targets_min": 1348
|
|
},
|
|
{
|
|
"epoch": 3.105939004815409,
|
|
"grad_norm": 0.7574603407636901,
|
|
"learning_rate": 2.727486975302032e-05,
|
|
"loss": 0.3011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31982582807540894,
|
|
"step": 1935,
|
|
"valid_targets_mean": 2773.7,
|
|
"valid_targets_min": 921
|
|
},
|
|
{
|
|
"epoch": 3.113964686998395,
|
|
"grad_norm": 0.6160010636183304,
|
|
"learning_rate": 2.7200234934156012e-05,
|
|
"loss": 0.3022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28349655866622925,
|
|
"step": 1940,
|
|
"valid_targets_mean": 3761.6,
|
|
"valid_targets_min": 952
|
|
},
|
|
{
|
|
"epoch": 3.1219903691813804,
|
|
"grad_norm": 0.5773674657658079,
|
|
"learning_rate": 2.712548473600657e-05,
|
|
"loss": 0.3094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.332427442073822,
|
|
"step": 1945,
|
|
"valid_targets_mean": 4628.4,
|
|
"valid_targets_min": 1206
|
|
},
|
|
{
|
|
"epoch": 3.130016051364366,
|
|
"grad_norm": 0.7014310364777052,
|
|
"learning_rate": 2.7050620356397413e-05,
|
|
"loss": 0.3068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30048197507858276,
|
|
"step": 1950,
|
|
"valid_targets_mean": 4173.2,
|
|
"valid_targets_min": 1240
|
|
},
|
|
{
|
|
"epoch": 3.1380417335473516,
|
|
"grad_norm": 0.6895927540679659,
|
|
"learning_rate": 2.6975642994983658e-05,
|
|
"loss": 0.3256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.324870765209198,
|
|
"step": 1955,
|
|
"valid_targets_mean": 3185.0,
|
|
"valid_targets_min": 844
|
|
},
|
|
{
|
|
"epoch": 3.146067415730337,
|
|
"grad_norm": 0.6514370219430948,
|
|
"learning_rate": 2.6900553853230856e-05,
|
|
"loss": 0.2803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3029561936855316,
|
|
"step": 1960,
|
|
"valid_targets_mean": 3350.9,
|
|
"valid_targets_min": 1000
|
|
},
|
|
{
|
|
"epoch": 3.154093097913323,
|
|
"grad_norm": 0.6094796632114418,
|
|
"learning_rate": 2.6825354134395804e-05,
|
|
"loss": 0.307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28053367137908936,
|
|
"step": 1965,
|
|
"valid_targets_mean": 3813.1,
|
|
"valid_targets_min": 1676
|
|
},
|
|
{
|
|
"epoch": 3.162118780096308,
|
|
"grad_norm": 0.5777085322987245,
|
|
"learning_rate": 2.6750045043507196e-05,
|
|
"loss": 0.2868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30173203349113464,
|
|
"step": 1970,
|
|
"valid_targets_mean": 4592.2,
|
|
"valid_targets_min": 1504
|
|
},
|
|
{
|
|
"epoch": 3.1701444622792936,
|
|
"grad_norm": 0.7930867005559484,
|
|
"learning_rate": 2.667462778734637e-05,
|
|
"loss": 0.3199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3361539840698242,
|
|
"step": 1975,
|
|
"valid_targets_mean": 2789.4,
|
|
"valid_targets_min": 1639
|
|
},
|
|
{
|
|
"epoch": 3.178170144462279,
|
|
"grad_norm": 0.709595314749721,
|
|
"learning_rate": 2.6599103574427928e-05,
|
|
"loss": 0.3125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2936401963233948,
|
|
"step": 1980,
|
|
"valid_targets_mean": 2781.2,
|
|
"valid_targets_min": 943
|
|
},
|
|
{
|
|
"epoch": 3.186195826645265,
|
|
"grad_norm": 0.6807009159897556,
|
|
"learning_rate": 2.6523473614980395e-05,
|
|
"loss": 0.2918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3138418197631836,
|
|
"step": 1985,
|
|
"valid_targets_mean": 3311.0,
|
|
"valid_targets_min": 1214
|
|
},
|
|
{
|
|
"epoch": 3.1942215088282504,
|
|
"grad_norm": 0.6187474626704025,
|
|
"learning_rate": 2.6447739120926815e-05,
|
|
"loss": 0.3133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28726011514663696,
|
|
"step": 1990,
|
|
"valid_targets_mean": 3837.8,
|
|
"valid_targets_min": 958
|
|
},
|
|
{
|
|
"epoch": 3.202247191011236,
|
|
"grad_norm": 0.6404191727298474,
|
|
"learning_rate": 2.6371901305865346e-05,
|
|
"loss": 0.3069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30644625425338745,
|
|
"step": 1995,
|
|
"valid_targets_mean": 3247.6,
|
|
"valid_targets_min": 471
|
|
},
|
|
{
|
|
"epoch": 3.2102728731942216,
|
|
"grad_norm": 0.6657135063716464,
|
|
"learning_rate": 2.629596138504978e-05,
|
|
"loss": 0.3071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33626890182495117,
|
|
"step": 2000,
|
|
"valid_targets_mean": 3491.1,
|
|
"valid_targets_min": 1005
|
|
},
|
|
{
|
|
"epoch": 3.218298555377207,
|
|
"grad_norm": 0.6316670265098919,
|
|
"learning_rate": 2.6219920575370115e-05,
|
|
"loss": 0.302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3001091480255127,
|
|
"step": 2005,
|
|
"valid_targets_mean": 3983.2,
|
|
"valid_targets_min": 1352
|
|
},
|
|
{
|
|
"epoch": 3.226324237560193,
|
|
"grad_norm": 0.69285135646174,
|
|
"learning_rate": 2.6143780095332996e-05,
|
|
"loss": 0.3061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3485189378261566,
|
|
"step": 2010,
|
|
"valid_targets_mean": 3461.4,
|
|
"valid_targets_min": 1427
|
|
},
|
|
{
|
|
"epoch": 3.234349919743178,
|
|
"grad_norm": 0.6143625977890531,
|
|
"learning_rate": 2.6067541165042262e-05,
|
|
"loss": 0.283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2556491792201996,
|
|
"step": 2015,
|
|
"valid_targets_mean": 4159.9,
|
|
"valid_targets_min": 1225
|
|
},
|
|
{
|
|
"epoch": 3.2423756019261636,
|
|
"grad_norm": 0.7118579655043377,
|
|
"learning_rate": 2.5991205006179332e-05,
|
|
"loss": 0.3059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32580727338790894,
|
|
"step": 2020,
|
|
"valid_targets_mean": 3156.9,
|
|
"valid_targets_min": 1027
|
|
},
|
|
{
|
|
"epoch": 3.250401284109149,
|
|
"grad_norm": 0.5659197660528695,
|
|
"learning_rate": 2.5914772841983653e-05,
|
|
"loss": 0.2899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.289286732673645,
|
|
"step": 2025,
|
|
"valid_targets_mean": 4210.4,
|
|
"valid_targets_min": 1254
|
|
},
|
|
{
|
|
"epoch": 3.258426966292135,
|
|
"grad_norm": 0.6936146530831123,
|
|
"learning_rate": 2.5838245897233113e-05,
|
|
"loss": 0.3007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29941755533218384,
|
|
"step": 2030,
|
|
"valid_targets_mean": 3202.0,
|
|
"valid_targets_min": 1045
|
|
},
|
|
{
|
|
"epoch": 3.2664526484751204,
|
|
"grad_norm": 0.7723656929778846,
|
|
"learning_rate": 2.5761625398224387e-05,
|
|
"loss": 0.3092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3153168261051178,
|
|
"step": 2035,
|
|
"valid_targets_mean": 2803.0,
|
|
"valid_targets_min": 1389
|
|
},
|
|
{
|
|
"epoch": 3.274478330658106,
|
|
"grad_norm": 0.7571903888922744,
|
|
"learning_rate": 2.56849125727533e-05,
|
|
"loss": 0.3183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29117751121520996,
|
|
"step": 2040,
|
|
"valid_targets_mean": 4096.2,
|
|
"valid_targets_min": 1507
|
|
},
|
|
{
|
|
"epoch": 3.2825040128410916,
|
|
"grad_norm": 0.7540154225685142,
|
|
"learning_rate": 2.560810865009515e-05,
|
|
"loss": 0.3095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3248542547225952,
|
|
"step": 2045,
|
|
"valid_targets_mean": 2686.6,
|
|
"valid_targets_min": 996
|
|
},
|
|
{
|
|
"epoch": 3.290529695024077,
|
|
"grad_norm": 0.7200090044341196,
|
|
"learning_rate": 2.5531214860985015e-05,
|
|
"loss": 0.2995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3248041570186615,
|
|
"step": 2050,
|
|
"valid_targets_mean": 3316.2,
|
|
"valid_targets_min": 956
|
|
},
|
|
{
|
|
"epoch": 3.2985553772070624,
|
|
"grad_norm": 0.6890981019413359,
|
|
"learning_rate": 2.5454232437598032e-05,
|
|
"loss": 0.3147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3354513645172119,
|
|
"step": 2055,
|
|
"valid_targets_mean": 3204.1,
|
|
"valid_targets_min": 991
|
|
},
|
|
{
|
|
"epoch": 3.306581059390048,
|
|
"grad_norm": 0.5690871570207988,
|
|
"learning_rate": 2.537716261352963e-05,
|
|
"loss": 0.3022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30914515256881714,
|
|
"step": 2060,
|
|
"valid_targets_mean": 4318.7,
|
|
"valid_targets_min": 1138
|
|
},
|
|
{
|
|
"epoch": 3.3146067415730336,
|
|
"grad_norm": 0.7017171629006755,
|
|
"learning_rate": 2.53000066237758e-05,
|
|
"loss": 0.3378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3322071433067322,
|
|
"step": 2065,
|
|
"valid_targets_mean": 3062.3,
|
|
"valid_targets_min": 268
|
|
},
|
|
{
|
|
"epoch": 3.322632423756019,
|
|
"grad_norm": 0.5943195801639204,
|
|
"learning_rate": 2.5222765704713265e-05,
|
|
"loss": 0.3057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27359461784362793,
|
|
"step": 2070,
|
|
"valid_targets_mean": 4676.4,
|
|
"valid_targets_min": 931
|
|
},
|
|
{
|
|
"epoch": 3.330658105939005,
|
|
"grad_norm": 0.7674425986612798,
|
|
"learning_rate": 2.5145441094079702e-05,
|
|
"loss": 0.3012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2971890866756439,
|
|
"step": 2075,
|
|
"valid_targets_mean": 2716.6,
|
|
"valid_targets_min": 938
|
|
},
|
|
{
|
|
"epoch": 3.3386837881219904,
|
|
"grad_norm": 0.7441598621074355,
|
|
"learning_rate": 2.5068034030953888e-05,
|
|
"loss": 0.3029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3212779760360718,
|
|
"step": 2080,
|
|
"valid_targets_mean": 2951.2,
|
|
"valid_targets_min": 409
|
|
},
|
|
{
|
|
"epoch": 3.346709470304976,
|
|
"grad_norm": 0.6071235447252913,
|
|
"learning_rate": 2.499054575573585e-05,
|
|
"loss": 0.2947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2689248323440552,
|
|
"step": 2085,
|
|
"valid_targets_mean": 3951.7,
|
|
"valid_targets_min": 1311
|
|
},
|
|
{
|
|
"epoch": 3.3547351524879616,
|
|
"grad_norm": 0.6051627662417424,
|
|
"learning_rate": 2.491297751012699e-05,
|
|
"loss": 0.3178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32054704427719116,
|
|
"step": 2090,
|
|
"valid_targets_mean": 3891.2,
|
|
"valid_targets_min": 1555
|
|
},
|
|
{
|
|
"epoch": 3.362760834670947,
|
|
"grad_norm": 0.5064262662740953,
|
|
"learning_rate": 2.483533053711018e-05,
|
|
"loss": 0.2956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24276487529277802,
|
|
"step": 2095,
|
|
"valid_targets_mean": 4906.4,
|
|
"valid_targets_min": 1767
|
|
},
|
|
{
|
|
"epoch": 3.370786516853933,
|
|
"grad_norm": 0.6148087862267558,
|
|
"learning_rate": 2.4757606080929856e-05,
|
|
"loss": 0.3154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35952067375183105,
|
|
"step": 2100,
|
|
"valid_targets_mean": 3973.8,
|
|
"valid_targets_min": 556
|
|
},
|
|
{
|
|
"epoch": 3.378812199036918,
|
|
"grad_norm": 0.6633335805445548,
|
|
"learning_rate": 2.467980538707207e-05,
|
|
"loss": 0.3085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3176499605178833,
|
|
"step": 2105,
|
|
"valid_targets_mean": 3617.9,
|
|
"valid_targets_min": 802
|
|
},
|
|
{
|
|
"epoch": 3.3868378812199036,
|
|
"grad_norm": 0.6194689891941603,
|
|
"learning_rate": 2.460192970224454e-05,
|
|
"loss": 0.3075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3110244870185852,
|
|
"step": 2110,
|
|
"valid_targets_mean": 3994.5,
|
|
"valid_targets_min": 961
|
|
},
|
|
{
|
|
"epoch": 3.394863563402889,
|
|
"grad_norm": 0.5825528009606489,
|
|
"learning_rate": 2.4523980274356665e-05,
|
|
"loss": 0.2976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28234344720840454,
|
|
"step": 2115,
|
|
"valid_targets_mean": 4555.2,
|
|
"valid_targets_min": 1773
|
|
},
|
|
{
|
|
"epoch": 3.402889245585875,
|
|
"grad_norm": 0.7072729825631819,
|
|
"learning_rate": 2.444595835249952e-05,
|
|
"loss": 0.3177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3438180387020111,
|
|
"step": 2120,
|
|
"valid_targets_mean": 3380.2,
|
|
"valid_targets_min": 951
|
|
},
|
|
{
|
|
"epoch": 3.4109149277688604,
|
|
"grad_norm": 0.594527253246516,
|
|
"learning_rate": 2.436786518692587e-05,
|
|
"loss": 0.3104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28130388259887695,
|
|
"step": 2125,
|
|
"valid_targets_mean": 4428.9,
|
|
"valid_targets_min": 1512
|
|
},
|
|
{
|
|
"epoch": 3.418940609951846,
|
|
"grad_norm": 0.7299288118582348,
|
|
"learning_rate": 2.428970202903011e-05,
|
|
"loss": 0.3155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31851768493652344,
|
|
"step": 2130,
|
|
"valid_targets_mean": 3562.2,
|
|
"valid_targets_min": 1532
|
|
},
|
|
{
|
|
"epoch": 3.4269662921348316,
|
|
"grad_norm": 0.6640775184719934,
|
|
"learning_rate": 2.4211470131328208e-05,
|
|
"loss": 0.32,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3092896044254303,
|
|
"step": 2135,
|
|
"valid_targets_mean": 3561.9,
|
|
"valid_targets_min": 1271
|
|
},
|
|
{
|
|
"epoch": 3.4349919743178168,
|
|
"grad_norm": 0.7371896500657646,
|
|
"learning_rate": 2.4133170747437657e-05,
|
|
"loss": 0.3125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31986141204833984,
|
|
"step": 2140,
|
|
"valid_targets_mean": 2790.1,
|
|
"valid_targets_min": 1238
|
|
},
|
|
{
|
|
"epoch": 3.4430176565008024,
|
|
"grad_norm": 0.5815235098142043,
|
|
"learning_rate": 2.4054805132057362e-05,
|
|
"loss": 0.3237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34602972865104675,
|
|
"step": 2145,
|
|
"valid_targets_mean": 4356.5,
|
|
"valid_targets_min": 1210
|
|
},
|
|
{
|
|
"epoch": 3.451043338683788,
|
|
"grad_norm": 0.670169728101637,
|
|
"learning_rate": 2.397637454094756e-05,
|
|
"loss": 0.3056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3168160319328308,
|
|
"step": 2150,
|
|
"valid_targets_mean": 3099.5,
|
|
"valid_targets_min": 1141
|
|
},
|
|
{
|
|
"epoch": 3.4590690208667736,
|
|
"grad_norm": 0.7246435959084999,
|
|
"learning_rate": 2.3897880230909678e-05,
|
|
"loss": 0.3024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33634674549102783,
|
|
"step": 2155,
|
|
"valid_targets_mean": 3086.9,
|
|
"valid_targets_min": 1168
|
|
},
|
|
{
|
|
"epoch": 3.467094703049759,
|
|
"grad_norm": 0.5976025268254245,
|
|
"learning_rate": 2.3819323459766194e-05,
|
|
"loss": 0.3089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2722659707069397,
|
|
"step": 2160,
|
|
"valid_targets_mean": 3841.1,
|
|
"valid_targets_min": 1572
|
|
},
|
|
{
|
|
"epoch": 3.475120385232745,
|
|
"grad_norm": 0.6767062066233622,
|
|
"learning_rate": 2.3740705486340492e-05,
|
|
"loss": 0.3243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3141717314720154,
|
|
"step": 2165,
|
|
"valid_targets_mean": 3287.6,
|
|
"valid_targets_min": 1239
|
|
},
|
|
{
|
|
"epoch": 3.4831460674157304,
|
|
"grad_norm": 0.7116621272080358,
|
|
"learning_rate": 2.366202757043668e-05,
|
|
"loss": 0.3085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2997426986694336,
|
|
"step": 2170,
|
|
"valid_targets_mean": 3052.1,
|
|
"valid_targets_min": 1039
|
|
},
|
|
{
|
|
"epoch": 3.491171749598716,
|
|
"grad_norm": 0.6433294547139315,
|
|
"learning_rate": 2.358329097281942e-05,
|
|
"loss": 0.3066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2968630790710449,
|
|
"step": 2175,
|
|
"valid_targets_mean": 3942.4,
|
|
"valid_targets_min": 1538
|
|
},
|
|
{
|
|
"epoch": 3.4991974317817016,
|
|
"grad_norm": 0.7124566594264524,
|
|
"learning_rate": 2.3504496955193695e-05,
|
|
"loss": 0.2994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30782055854797363,
|
|
"step": 2180,
|
|
"valid_targets_mean": 2905.1,
|
|
"valid_targets_min": 1294
|
|
},
|
|
{
|
|
"epoch": 3.5072231139646872,
|
|
"grad_norm": 0.5775098539362448,
|
|
"learning_rate": 2.342564678018462e-05,
|
|
"loss": 0.3216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3038385510444641,
|
|
"step": 2185,
|
|
"valid_targets_mean": 4300.8,
|
|
"valid_targets_min": 869
|
|
},
|
|
{
|
|
"epoch": 3.515248796147673,
|
|
"grad_norm": 0.6382323162026908,
|
|
"learning_rate": 2.3346741711317185e-05,
|
|
"loss": 0.3048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2961849272251129,
|
|
"step": 2190,
|
|
"valid_targets_mean": 3535.6,
|
|
"valid_targets_min": 1543
|
|
},
|
|
{
|
|
"epoch": 3.523274478330658,
|
|
"grad_norm": 0.5564061819330308,
|
|
"learning_rate": 2.326778301299603e-05,
|
|
"loss": 0.3055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2865520417690277,
|
|
"step": 2195,
|
|
"valid_targets_mean": 4475.6,
|
|
"valid_targets_min": 1364
|
|
},
|
|
{
|
|
"epoch": 3.5313001605136436,
|
|
"grad_norm": 0.5778432970225726,
|
|
"learning_rate": 2.3188771950485172e-05,
|
|
"loss": 0.2883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30924904346466064,
|
|
"step": 2200,
|
|
"valid_targets_mean": 4674.0,
|
|
"valid_targets_min": 1234
|
|
},
|
|
{
|
|
"epoch": 3.539325842696629,
|
|
"grad_norm": 0.6261379914534313,
|
|
"learning_rate": 2.3109709789887732e-05,
|
|
"loss": 0.3176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30463993549346924,
|
|
"step": 2205,
|
|
"valid_targets_mean": 4209.2,
|
|
"valid_targets_min": 1545
|
|
},
|
|
{
|
|
"epoch": 3.547351524879615,
|
|
"grad_norm": 0.7180281584603996,
|
|
"learning_rate": 2.3030597798125642e-05,
|
|
"loss": 0.3011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3093678057193756,
|
|
"step": 2210,
|
|
"valid_targets_mean": 3064.6,
|
|
"valid_targets_min": 1449
|
|
},
|
|
{
|
|
"epoch": 3.5553772070626004,
|
|
"grad_norm": 0.5663042313099708,
|
|
"learning_rate": 2.295143724291935e-05,
|
|
"loss": 0.3248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3240026533603668,
|
|
"step": 2215,
|
|
"valid_targets_mean": 4600.2,
|
|
"valid_targets_min": 1137
|
|
},
|
|
{
|
|
"epoch": 3.563402889245586,
|
|
"grad_norm": 0.6689627822906121,
|
|
"learning_rate": 2.2872229392767497e-05,
|
|
"loss": 0.318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35001277923583984,
|
|
"step": 2220,
|
|
"valid_targets_mean": 3385.9,
|
|
"valid_targets_min": 960
|
|
},
|
|
{
|
|
"epoch": 3.571428571428571,
|
|
"grad_norm": 0.7896098083528881,
|
|
"learning_rate": 2.2792975516926615e-05,
|
|
"loss": 0.2976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3305118680000305,
|
|
"step": 2225,
|
|
"valid_targets_mean": 2686.9,
|
|
"valid_targets_min": 922
|
|
},
|
|
{
|
|
"epoch": 3.579454253611557,
|
|
"grad_norm": 1.5602072787266907,
|
|
"learning_rate": 2.271367688539075e-05,
|
|
"loss": 0.3019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3324968218803406,
|
|
"step": 2230,
|
|
"valid_targets_mean": 3841.5,
|
|
"valid_targets_min": 1215
|
|
},
|
|
{
|
|
"epoch": 3.5874799357945424,
|
|
"grad_norm": 0.6872236922825736,
|
|
"learning_rate": 2.2634334768871133e-05,
|
|
"loss": 0.3038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3679652810096741,
|
|
"step": 2235,
|
|
"valid_targets_mean": 3434.4,
|
|
"valid_targets_min": 1241
|
|
},
|
|
{
|
|
"epoch": 3.595505617977528,
|
|
"grad_norm": 0.5814797853457037,
|
|
"learning_rate": 2.2554950438775813e-05,
|
|
"loss": 0.2937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27784669399261475,
|
|
"step": 2240,
|
|
"valid_targets_mean": 4185.3,
|
|
"valid_targets_min": 1849
|
|
},
|
|
{
|
|
"epoch": 3.6035313001605136,
|
|
"grad_norm": 0.7385230856050149,
|
|
"learning_rate": 2.2475525167189298e-05,
|
|
"loss": 0.3046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31691181659698486,
|
|
"step": 2245,
|
|
"valid_targets_mean": 3146.3,
|
|
"valid_targets_min": 1146
|
|
},
|
|
{
|
|
"epoch": 3.611556982343499,
|
|
"grad_norm": 0.6066455896986883,
|
|
"learning_rate": 2.2396060226852148e-05,
|
|
"loss": 0.3037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27889129519462585,
|
|
"step": 2250,
|
|
"valid_targets_mean": 3909.2,
|
|
"valid_targets_min": 1549
|
|
},
|
|
{
|
|
"epoch": 3.619582664526485,
|
|
"grad_norm": 0.6964457809695556,
|
|
"learning_rate": 2.2316556891140594e-05,
|
|
"loss": 0.307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29776930809020996,
|
|
"step": 2255,
|
|
"valid_targets_mean": 3086.8,
|
|
"valid_targets_min": 1283
|
|
},
|
|
{
|
|
"epoch": 3.6276083467094704,
|
|
"grad_norm": 0.6695989405163266,
|
|
"learning_rate": 2.2237016434046124e-05,
|
|
"loss": 0.3272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3111193776130676,
|
|
"step": 2260,
|
|
"valid_targets_mean": 3337.4,
|
|
"valid_targets_min": 1300
|
|
},
|
|
{
|
|
"epoch": 3.635634028892456,
|
|
"grad_norm": 0.6281046982756264,
|
|
"learning_rate": 2.2157440130155084e-05,
|
|
"loss": 0.306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3216857612133026,
|
|
"step": 2265,
|
|
"valid_targets_mean": 3938.3,
|
|
"valid_targets_min": 1358
|
|
},
|
|
{
|
|
"epoch": 3.6436597110754416,
|
|
"grad_norm": 0.5962832831744961,
|
|
"learning_rate": 2.2077829254628224e-05,
|
|
"loss": 0.2942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2838427722454071,
|
|
"step": 2270,
|
|
"valid_targets_mean": 4565.3,
|
|
"valid_targets_min": 1539
|
|
},
|
|
{
|
|
"epoch": 3.6516853932584272,
|
|
"grad_norm": 0.647311406446543,
|
|
"learning_rate": 2.1998185083180297e-05,
|
|
"loss": 0.2942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29513323307037354,
|
|
"step": 2275,
|
|
"valid_targets_mean": 3089.2,
|
|
"valid_targets_min": 1319
|
|
},
|
|
{
|
|
"epoch": 3.6597110754414124,
|
|
"grad_norm": 0.6290307173792978,
|
|
"learning_rate": 2.1918508892059608e-05,
|
|
"loss": 0.302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3228530287742615,
|
|
"step": 2280,
|
|
"valid_targets_mean": 3675.5,
|
|
"valid_targets_min": 1009
|
|
},
|
|
{
|
|
"epoch": 3.667736757624398,
|
|
"grad_norm": 0.6345580747228391,
|
|
"learning_rate": 2.1838801958027533e-05,
|
|
"loss": 0.3246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3375920355319977,
|
|
"step": 2285,
|
|
"valid_targets_mean": 3950.9,
|
|
"valid_targets_min": 1258
|
|
},
|
|
{
|
|
"epoch": 3.6757624398073836,
|
|
"grad_norm": 0.5655972668671305,
|
|
"learning_rate": 2.175906555833811e-05,
|
|
"loss": 0.3187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2859722375869751,
|
|
"step": 2290,
|
|
"valid_targets_mean": 4356.9,
|
|
"valid_targets_min": 1417
|
|
},
|
|
{
|
|
"epoch": 3.683788121990369,
|
|
"grad_norm": 0.6962123986239671,
|
|
"learning_rate": 2.1679300970717526e-05,
|
|
"loss": 0.3099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34465909004211426,
|
|
"step": 2295,
|
|
"valid_targets_mean": 3692.1,
|
|
"valid_targets_min": 1410
|
|
},
|
|
{
|
|
"epoch": 3.691813804173355,
|
|
"grad_norm": 0.6296140888904415,
|
|
"learning_rate": 2.1599509473343688e-05,
|
|
"loss": 0.3069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3080332279205322,
|
|
"step": 2300,
|
|
"valid_targets_mean": 4191.6,
|
|
"valid_targets_min": 855
|
|
},
|
|
{
|
|
"epoch": 3.6998394863563404,
|
|
"grad_norm": 0.5431914919826406,
|
|
"learning_rate": 2.1519692344825684e-05,
|
|
"loss": 0.3065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2953745424747467,
|
|
"step": 2305,
|
|
"valid_targets_mean": 4610.4,
|
|
"valid_targets_min": 1081
|
|
},
|
|
{
|
|
"epoch": 3.7078651685393256,
|
|
"grad_norm": 0.5181935726181423,
|
|
"learning_rate": 2.1439850864183343e-05,
|
|
"loss": 0.2962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3181419372558594,
|
|
"step": 2310,
|
|
"valid_targets_mean": 5796.2,
|
|
"valid_targets_min": 1275
|
|
},
|
|
{
|
|
"epoch": 3.715890850722311,
|
|
"grad_norm": 0.7263970751742419,
|
|
"learning_rate": 2.1359986310826723e-05,
|
|
"loss": 0.3059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31813183426856995,
|
|
"step": 2315,
|
|
"valid_targets_mean": 2845.2,
|
|
"valid_targets_min": 1101
|
|
},
|
|
{
|
|
"epoch": 3.723916532905297,
|
|
"grad_norm": 0.7267670395274008,
|
|
"learning_rate": 2.1280099964535594e-05,
|
|
"loss": 0.3219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3690904378890991,
|
|
"step": 2320,
|
|
"valid_targets_mean": 2955.6,
|
|
"valid_targets_min": 1196
|
|
},
|
|
{
|
|
"epoch": 3.7319422150882824,
|
|
"grad_norm": 0.543879979163991,
|
|
"learning_rate": 2.120019310543896e-05,
|
|
"loss": 0.3302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2906736731529236,
|
|
"step": 2325,
|
|
"valid_targets_mean": 4608.6,
|
|
"valid_targets_min": 1519
|
|
},
|
|
{
|
|
"epoch": 3.739967897271268,
|
|
"grad_norm": 0.6876204923153976,
|
|
"learning_rate": 2.1120267013994527e-05,
|
|
"loss": 0.2991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32874318957328796,
|
|
"step": 2330,
|
|
"valid_targets_mean": 3443.5,
|
|
"valid_targets_min": 1439
|
|
},
|
|
{
|
|
"epoch": 3.7479935794542536,
|
|
"grad_norm": 0.7297117601417845,
|
|
"learning_rate": 2.104032297096817e-05,
|
|
"loss": 0.3072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35540735721588135,
|
|
"step": 2335,
|
|
"valid_targets_mean": 3066.7,
|
|
"valid_targets_min": 952
|
|
},
|
|
{
|
|
"epoch": 3.756019261637239,
|
|
"grad_norm": 0.692709041110468,
|
|
"learning_rate": 2.096036225741346e-05,
|
|
"loss": 0.319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25243479013442993,
|
|
"step": 2340,
|
|
"valid_targets_mean": 2655.6,
|
|
"valid_targets_min": 583
|
|
},
|
|
{
|
|
"epoch": 3.764044943820225,
|
|
"grad_norm": 0.615340233848847,
|
|
"learning_rate": 2.088038615465107e-05,
|
|
"loss": 0.3206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31913554668426514,
|
|
"step": 2345,
|
|
"valid_targets_mean": 3971.2,
|
|
"valid_targets_min": 1199
|
|
},
|
|
{
|
|
"epoch": 3.7720706260032104,
|
|
"grad_norm": 0.5773106255359278,
|
|
"learning_rate": 2.080039594424829e-05,
|
|
"loss": 0.3019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3077567219734192,
|
|
"step": 2350,
|
|
"valid_targets_mean": 4385.9,
|
|
"valid_targets_min": 1475
|
|
},
|
|
{
|
|
"epoch": 3.780096308186196,
|
|
"grad_norm": 0.5924350583707719,
|
|
"learning_rate": 2.0720392907998473e-05,
|
|
"loss": 0.3201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.295453816652298,
|
|
"step": 2355,
|
|
"valid_targets_mean": 3621.1,
|
|
"valid_targets_min": 842
|
|
},
|
|
{
|
|
"epoch": 3.7881219903691816,
|
|
"grad_norm": 0.7107945335352901,
|
|
"learning_rate": 2.0640378327900507e-05,
|
|
"loss": 0.3124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37266454100608826,
|
|
"step": 2360,
|
|
"valid_targets_mean": 3357.1,
|
|
"valid_targets_min": 1148
|
|
},
|
|
{
|
|
"epoch": 3.796147672552167,
|
|
"grad_norm": 0.6773417323122055,
|
|
"learning_rate": 2.0560353486138255e-05,
|
|
"loss": 0.2952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2854129672050476,
|
|
"step": 2365,
|
|
"valid_targets_mean": 2956.8,
|
|
"valid_targets_min": 1358
|
|
},
|
|
{
|
|
"epoch": 3.8041733547351524,
|
|
"grad_norm": 0.7043682091096448,
|
|
"learning_rate": 2.0480319665060014e-05,
|
|
"loss": 0.3021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29182106256484985,
|
|
"step": 2370,
|
|
"valid_targets_mean": 2907.1,
|
|
"valid_targets_min": 986
|
|
},
|
|
{
|
|
"epoch": 3.812199036918138,
|
|
"grad_norm": 0.7177447932336871,
|
|
"learning_rate": 2.0400278147157973e-05,
|
|
"loss": 0.2984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33026134967803955,
|
|
"step": 2375,
|
|
"valid_targets_mean": 2862.9,
|
|
"valid_targets_min": 1537
|
|
},
|
|
{
|
|
"epoch": 3.8202247191011236,
|
|
"grad_norm": 0.6091503048210362,
|
|
"learning_rate": 2.0320230215047667e-05,
|
|
"loss": 0.3199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3347359895706177,
|
|
"step": 2380,
|
|
"valid_targets_mean": 4621.8,
|
|
"valid_targets_min": 1313
|
|
},
|
|
{
|
|
"epoch": 3.828250401284109,
|
|
"grad_norm": 0.8268290435161959,
|
|
"learning_rate": 2.0240177151447397e-05,
|
|
"loss": 0.3128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2996595501899719,
|
|
"step": 2385,
|
|
"valid_targets_mean": 2327.0,
|
|
"valid_targets_min": 376
|
|
},
|
|
{
|
|
"epoch": 3.836276083467095,
|
|
"grad_norm": 0.6700903340027047,
|
|
"learning_rate": 2.0160120239157705e-05,
|
|
"loss": 0.3155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32205504179000854,
|
|
"step": 2390,
|
|
"valid_targets_mean": 3434.1,
|
|
"valid_targets_min": 1252
|
|
},
|
|
{
|
|
"epoch": 3.8443017656500804,
|
|
"grad_norm": 0.6800096953633628,
|
|
"learning_rate": 2.0080060761040795e-05,
|
|
"loss": 0.3194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3294561505317688,
|
|
"step": 2395,
|
|
"valid_targets_mean": 3238.8,
|
|
"valid_targets_min": 1109
|
|
},
|
|
{
|
|
"epoch": 3.8523274478330656,
|
|
"grad_norm": 0.6148537553676106,
|
|
"learning_rate": 2e-05,
|
|
"loss": 0.3093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3511123061180115,
|
|
"step": 2400,
|
|
"valid_targets_mean": 4205.2,
|
|
"valid_targets_min": 1063
|
|
},
|
|
{
|
|
"epoch": 3.860353130016051,
|
|
"grad_norm": 0.5742816789614423,
|
|
"learning_rate": 1.991993923895921e-05,
|
|
"loss": 0.3136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31678056716918945,
|
|
"step": 2405,
|
|
"valid_targets_mean": 4459.7,
|
|
"valid_targets_min": 1603
|
|
},
|
|
{
|
|
"epoch": 3.868378812199037,
|
|
"grad_norm": 0.7102840837378127,
|
|
"learning_rate": 1.9839879760842302e-05,
|
|
"loss": 0.2989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3091365098953247,
|
|
"step": 2410,
|
|
"valid_targets_mean": 3083.0,
|
|
"valid_targets_min": 1431
|
|
},
|
|
{
|
|
"epoch": 3.8764044943820224,
|
|
"grad_norm": 0.5352543858479464,
|
|
"learning_rate": 1.975982284855261e-05,
|
|
"loss": 0.2945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2758001685142517,
|
|
"step": 2415,
|
|
"valid_targets_mean": 4567.4,
|
|
"valid_targets_min": 1407
|
|
},
|
|
{
|
|
"epoch": 3.884430176565008,
|
|
"grad_norm": 0.5272480348043538,
|
|
"learning_rate": 1.967976978495234e-05,
|
|
"loss": 0.3269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2742311954498291,
|
|
"step": 2420,
|
|
"valid_targets_mean": 5050.8,
|
|
"valid_targets_min": 1252
|
|
},
|
|
{
|
|
"epoch": 3.8924558587479936,
|
|
"grad_norm": 0.7744860324760398,
|
|
"learning_rate": 1.959972185284203e-05,
|
|
"loss": 0.3062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3590608835220337,
|
|
"step": 2425,
|
|
"valid_targets_mean": 3540.2,
|
|
"valid_targets_min": 955
|
|
},
|
|
{
|
|
"epoch": 3.900481540930979,
|
|
"grad_norm": 0.701400659726649,
|
|
"learning_rate": 1.951968033493999e-05,
|
|
"loss": 0.3213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3506782650947571,
|
|
"step": 2430,
|
|
"valid_targets_mean": 2976.8,
|
|
"valid_targets_min": 1403
|
|
},
|
|
{
|
|
"epoch": 3.908507223113965,
|
|
"grad_norm": 0.6993292769774188,
|
|
"learning_rate": 1.9439646513861748e-05,
|
|
"loss": 0.308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3178505599498749,
|
|
"step": 2435,
|
|
"valid_targets_mean": 3588.0,
|
|
"valid_targets_min": 999
|
|
},
|
|
{
|
|
"epoch": 3.9165329052969504,
|
|
"grad_norm": 0.5694465559674572,
|
|
"learning_rate": 1.93596216720995e-05,
|
|
"loss": 0.3283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29791536927223206,
|
|
"step": 2440,
|
|
"valid_targets_mean": 4113.2,
|
|
"valid_targets_min": 1066
|
|
},
|
|
{
|
|
"epoch": 3.924558587479936,
|
|
"grad_norm": 0.6285759961241951,
|
|
"learning_rate": 1.9279607092001534e-05,
|
|
"loss": 0.3248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29356417059898376,
|
|
"step": 2445,
|
|
"valid_targets_mean": 3790.2,
|
|
"valid_targets_min": 1407
|
|
},
|
|
{
|
|
"epoch": 3.932584269662921,
|
|
"grad_norm": 0.542701693428039,
|
|
"learning_rate": 1.9199604055751718e-05,
|
|
"loss": 0.3003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26342204213142395,
|
|
"step": 2450,
|
|
"valid_targets_mean": 4474.4,
|
|
"valid_targets_min": 1115
|
|
},
|
|
{
|
|
"epoch": 3.940609951845907,
|
|
"grad_norm": 0.5824458635342638,
|
|
"learning_rate": 1.9119613845348935e-05,
|
|
"loss": 0.3175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33034420013427734,
|
|
"step": 2455,
|
|
"valid_targets_mean": 4194.2,
|
|
"valid_targets_min": 1135
|
|
},
|
|
{
|
|
"epoch": 3.9486356340288924,
|
|
"grad_norm": 0.6574485000210932,
|
|
"learning_rate": 1.9039637742586545e-05,
|
|
"loss": 0.3116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2920242249965668,
|
|
"step": 2460,
|
|
"valid_targets_mean": 3502.9,
|
|
"valid_targets_min": 1192
|
|
},
|
|
{
|
|
"epoch": 3.956661316211878,
|
|
"grad_norm": 0.6960977947598095,
|
|
"learning_rate": 1.8959677029031832e-05,
|
|
"loss": 0.321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3272545635700226,
|
|
"step": 2465,
|
|
"valid_targets_mean": 3309.7,
|
|
"valid_targets_min": 1501
|
|
},
|
|
{
|
|
"epoch": 3.9646869983948636,
|
|
"grad_norm": 0.5916748772935494,
|
|
"learning_rate": 1.8879732986005476e-05,
|
|
"loss": 0.309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28794342279434204,
|
|
"step": 2470,
|
|
"valid_targets_mean": 3835.8,
|
|
"valid_targets_min": 1326
|
|
},
|
|
{
|
|
"epoch": 3.972712680577849,
|
|
"grad_norm": 0.672183242447473,
|
|
"learning_rate": 1.8799806894561042e-05,
|
|
"loss": 0.3225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3604351878166199,
|
|
"step": 2475,
|
|
"valid_targets_mean": 3576.2,
|
|
"valid_targets_min": 1016
|
|
},
|
|
{
|
|
"epoch": 3.980738362760835,
|
|
"grad_norm": 0.6811153811743201,
|
|
"learning_rate": 1.8719900035464412e-05,
|
|
"loss": 0.2923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2955937385559082,
|
|
"step": 2480,
|
|
"valid_targets_mean": 3188.5,
|
|
"valid_targets_min": 1015
|
|
},
|
|
{
|
|
"epoch": 3.98876404494382,
|
|
"grad_norm": 0.6314167458174202,
|
|
"learning_rate": 1.864001368917328e-05,
|
|
"loss": 0.3096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31698083877563477,
|
|
"step": 2485,
|
|
"valid_targets_mean": 4282.2,
|
|
"valid_targets_min": 1303
|
|
},
|
|
{
|
|
"epoch": 3.9967897271268056,
|
|
"grad_norm": 0.576341948267428,
|
|
"learning_rate": 1.856014913581666e-05,
|
|
"loss": 0.3048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.302348792552948,
|
|
"step": 2490,
|
|
"valid_targets_mean": 4706.8,
|
|
"valid_targets_min": 801
|
|
},
|
|
{
|
|
"epoch": 4.004815409309791,
|
|
"grad_norm": 0.6047650432736136,
|
|
"learning_rate": 1.8480307655174322e-05,
|
|
"loss": 0.2843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2897290289402008,
|
|
"step": 2495,
|
|
"valid_targets_mean": 3599.8,
|
|
"valid_targets_min": 1409
|
|
},
|
|
{
|
|
"epoch": 4.012841091492777,
|
|
"grad_norm": 0.6873106394784395,
|
|
"learning_rate": 1.8400490526656322e-05,
|
|
"loss": 0.2924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27196377515792847,
|
|
"step": 2500,
|
|
"valid_targets_mean": 3144.9,
|
|
"valid_targets_min": 1573
|
|
},
|
|
{
|
|
"epoch": 4.020866773675762,
|
|
"grad_norm": 0.6851682795115649,
|
|
"learning_rate": 1.8320699029282477e-05,
|
|
"loss": 0.2877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2716870903968811,
|
|
"step": 2505,
|
|
"valid_targets_mean": 3586.3,
|
|
"valid_targets_min": 1283
|
|
},
|
|
{
|
|
"epoch": 4.028892455858748,
|
|
"grad_norm": 0.5506233031613497,
|
|
"learning_rate": 1.8240934441661898e-05,
|
|
"loss": 0.2811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26533517241477966,
|
|
"step": 2510,
|
|
"valid_targets_mean": 4727.2,
|
|
"valid_targets_min": 1251
|
|
},
|
|
{
|
|
"epoch": 4.036918138041734,
|
|
"grad_norm": 0.6548141677553122,
|
|
"learning_rate": 1.8161198041972478e-05,
|
|
"loss": 0.2644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2559020519256592,
|
|
"step": 2515,
|
|
"valid_targets_mean": 3472.1,
|
|
"valid_targets_min": 1380
|
|
},
|
|
{
|
|
"epoch": 4.044943820224719,
|
|
"grad_norm": 0.7236576690236846,
|
|
"learning_rate": 1.8081491107940405e-05,
|
|
"loss": 0.2751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30744728446006775,
|
|
"step": 2520,
|
|
"valid_targets_mean": 3150.6,
|
|
"valid_targets_min": 946
|
|
},
|
|
{
|
|
"epoch": 4.052969502407705,
|
|
"grad_norm": 0.7506664844299054,
|
|
"learning_rate": 1.8001814916819706e-05,
|
|
"loss": 0.2971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3097061216831207,
|
|
"step": 2525,
|
|
"valid_targets_mean": 2938.5,
|
|
"valid_targets_min": 1246
|
|
},
|
|
{
|
|
"epoch": 4.06099518459069,
|
|
"grad_norm": 0.7947360137855174,
|
|
"learning_rate": 1.7922170745371782e-05,
|
|
"loss": 0.2946,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3051798641681671,
|
|
"step": 2530,
|
|
"valid_targets_mean": 2646.4,
|
|
"valid_targets_min": 1480
|
|
},
|
|
{
|
|
"epoch": 4.069020866773676,
|
|
"grad_norm": 0.8016294702709386,
|
|
"learning_rate": 1.7842559869844926e-05,
|
|
"loss": 0.2791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3093782961368561,
|
|
"step": 2535,
|
|
"valid_targets_mean": 3184.4,
|
|
"valid_targets_min": 996
|
|
},
|
|
{
|
|
"epoch": 4.077046548956662,
|
|
"grad_norm": 0.5530459964373236,
|
|
"learning_rate": 1.7762983565953882e-05,
|
|
"loss": 0.27,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2800839841365814,
|
|
"step": 2540,
|
|
"valid_targets_mean": 4882.8,
|
|
"valid_targets_min": 1216
|
|
},
|
|
{
|
|
"epoch": 4.085072231139647,
|
|
"grad_norm": 0.7437066520541108,
|
|
"learning_rate": 1.768344310885941e-05,
|
|
"loss": 0.2624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2729197144508362,
|
|
"step": 2545,
|
|
"valid_targets_mean": 2958.8,
|
|
"valid_targets_min": 1279
|
|
},
|
|
{
|
|
"epoch": 4.093097913322633,
|
|
"grad_norm": 0.694118690780993,
|
|
"learning_rate": 1.760393977314786e-05,
|
|
"loss": 0.2689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.245416522026062,
|
|
"step": 2550,
|
|
"valid_targets_mean": 2976.6,
|
|
"valid_targets_min": 1076
|
|
},
|
|
{
|
|
"epoch": 4.101123595505618,
|
|
"grad_norm": 0.605501563324919,
|
|
"learning_rate": 1.7524474832810715e-05,
|
|
"loss": 0.2832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2622923254966736,
|
|
"step": 2555,
|
|
"valid_targets_mean": 4320.6,
|
|
"valid_targets_min": 1773
|
|
},
|
|
{
|
|
"epoch": 4.109149277688603,
|
|
"grad_norm": 0.7792497366942087,
|
|
"learning_rate": 1.7445049561224197e-05,
|
|
"loss": 0.28,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30106616020202637,
|
|
"step": 2560,
|
|
"valid_targets_mean": 2597.1,
|
|
"valid_targets_min": 1159
|
|
},
|
|
{
|
|
"epoch": 4.117174959871589,
|
|
"grad_norm": 0.5760598885992643,
|
|
"learning_rate": 1.736566523112888e-05,
|
|
"loss": 0.2913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2812322974205017,
|
|
"step": 2565,
|
|
"valid_targets_mean": 5157.0,
|
|
"valid_targets_min": 1719
|
|
},
|
|
{
|
|
"epoch": 4.125200642054574,
|
|
"grad_norm": 0.6909854690455403,
|
|
"learning_rate": 1.7286323114609264e-05,
|
|
"loss": 0.2809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30117636919021606,
|
|
"step": 2570,
|
|
"valid_targets_mean": 3695.1,
|
|
"valid_targets_min": 1531
|
|
},
|
|
{
|
|
"epoch": 4.13322632423756,
|
|
"grad_norm": 0.6389586808078425,
|
|
"learning_rate": 1.7207024483073395e-05,
|
|
"loss": 0.2925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2964931130409241,
|
|
"step": 2575,
|
|
"valid_targets_mean": 4014.4,
|
|
"valid_targets_min": 1119
|
|
},
|
|
{
|
|
"epoch": 4.141252006420546,
|
|
"grad_norm": 0.6196695239547542,
|
|
"learning_rate": 1.7127770607232506e-05,
|
|
"loss": 0.2814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30494269728660583,
|
|
"step": 2580,
|
|
"valid_targets_mean": 4270.8,
|
|
"valid_targets_min": 992
|
|
},
|
|
{
|
|
"epoch": 4.149277688603531,
|
|
"grad_norm": 0.6682591706555707,
|
|
"learning_rate": 1.7048562757080658e-05,
|
|
"loss": 0.2875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29162952303886414,
|
|
"step": 2585,
|
|
"valid_targets_mean": 3661.6,
|
|
"valid_targets_min": 1340
|
|
},
|
|
{
|
|
"epoch": 4.157303370786517,
|
|
"grad_norm": 0.6802774692782385,
|
|
"learning_rate": 1.6969402201874358e-05,
|
|
"loss": 0.2924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30426400899887085,
|
|
"step": 2590,
|
|
"valid_targets_mean": 3356.0,
|
|
"valid_targets_min": 1076
|
|
},
|
|
{
|
|
"epoch": 4.165329052969502,
|
|
"grad_norm": 0.6772272914547192,
|
|
"learning_rate": 1.6890290210112268e-05,
|
|
"loss": 0.2732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29196813702583313,
|
|
"step": 2595,
|
|
"valid_targets_mean": 3533.8,
|
|
"valid_targets_min": 1128
|
|
},
|
|
{
|
|
"epoch": 4.173354735152488,
|
|
"grad_norm": 0.6623890896159287,
|
|
"learning_rate": 1.6811228049514828e-05,
|
|
"loss": 0.2911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2664312422275543,
|
|
"step": 2600,
|
|
"valid_targets_mean": 3161.8,
|
|
"valid_targets_min": 1164
|
|
},
|
|
{
|
|
"epoch": 4.181380417335474,
|
|
"grad_norm": 0.7840467945004942,
|
|
"learning_rate": 1.6732216987003972e-05,
|
|
"loss": 0.2778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26371073722839355,
|
|
"step": 2605,
|
|
"valid_targets_mean": 3060.9,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 4.189406099518459,
|
|
"grad_norm": 0.8289256705212245,
|
|
"learning_rate": 1.6653258288682815e-05,
|
|
"loss": 0.2817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32684239745140076,
|
|
"step": 2610,
|
|
"valid_targets_mean": 2775.8,
|
|
"valid_targets_min": 902
|
|
},
|
|
{
|
|
"epoch": 4.197431781701445,
|
|
"grad_norm": 0.6808706478573116,
|
|
"learning_rate": 1.6574353219815383e-05,
|
|
"loss": 0.2809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23801538348197937,
|
|
"step": 2615,
|
|
"valid_targets_mean": 2954.1,
|
|
"valid_targets_min": 1566
|
|
},
|
|
{
|
|
"epoch": 4.20545746388443,
|
|
"grad_norm": 0.62587608097377,
|
|
"learning_rate": 1.6495503044806305e-05,
|
|
"loss": 0.2913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3103441894054413,
|
|
"step": 2620,
|
|
"valid_targets_mean": 4234.4,
|
|
"valid_targets_min": 1267
|
|
},
|
|
{
|
|
"epoch": 4.213483146067416,
|
|
"grad_norm": 0.6409221982967469,
|
|
"learning_rate": 1.641670902718058e-05,
|
|
"loss": 0.2955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3056131601333618,
|
|
"step": 2625,
|
|
"valid_targets_mean": 3910.8,
|
|
"valid_targets_min": 1027
|
|
},
|
|
{
|
|
"epoch": 4.221508828250402,
|
|
"grad_norm": 0.5885876492142961,
|
|
"learning_rate": 1.633797242956332e-05,
|
|
"loss": 0.2941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3021639585494995,
|
|
"step": 2630,
|
|
"valid_targets_mean": 4420.8,
|
|
"valid_targets_min": 1080
|
|
},
|
|
{
|
|
"epoch": 4.229534510433387,
|
|
"grad_norm": 0.6826900601360031,
|
|
"learning_rate": 1.625929451365951e-05,
|
|
"loss": 0.291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2933671176433563,
|
|
"step": 2635,
|
|
"valid_targets_mean": 4272.6,
|
|
"valid_targets_min": 1310
|
|
},
|
|
{
|
|
"epoch": 4.237560192616373,
|
|
"grad_norm": 0.6874288378582567,
|
|
"learning_rate": 1.618067654023381e-05,
|
|
"loss": 0.2734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27449920773506165,
|
|
"step": 2640,
|
|
"valid_targets_mean": 3953.1,
|
|
"valid_targets_min": 1607
|
|
},
|
|
{
|
|
"epoch": 4.245585874799358,
|
|
"grad_norm": 0.6981034090717605,
|
|
"learning_rate": 1.6102119769090325e-05,
|
|
"loss": 0.2801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2708626091480255,
|
|
"step": 2645,
|
|
"valid_targets_mean": 3233.2,
|
|
"valid_targets_min": 1446
|
|
},
|
|
{
|
|
"epoch": 4.253611556982343,
|
|
"grad_norm": 0.6481074085528771,
|
|
"learning_rate": 1.6023625459052442e-05,
|
|
"loss": 0.2759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25088566541671753,
|
|
"step": 2650,
|
|
"valid_targets_mean": 3499.5,
|
|
"valid_targets_min": 1149
|
|
},
|
|
{
|
|
"epoch": 4.261637239165329,
|
|
"grad_norm": 0.6974948565340451,
|
|
"learning_rate": 1.594519486794264e-05,
|
|
"loss": 0.2843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2752339839935303,
|
|
"step": 2655,
|
|
"valid_targets_mean": 3157.4,
|
|
"valid_targets_min": 937
|
|
},
|
|
{
|
|
"epoch": 4.269662921348314,
|
|
"grad_norm": 0.5825215539125852,
|
|
"learning_rate": 1.586682925256235e-05,
|
|
"loss": 0.294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26275527477264404,
|
|
"step": 2660,
|
|
"valid_targets_mean": 4241.9,
|
|
"valid_targets_min": 1238
|
|
},
|
|
{
|
|
"epoch": 4.2776886035313,
|
|
"grad_norm": 0.6000700750100424,
|
|
"learning_rate": 1.5788529868671792e-05,
|
|
"loss": 0.2734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2660018801689148,
|
|
"step": 2665,
|
|
"valid_targets_mean": 4947.9,
|
|
"valid_targets_min": 1413
|
|
},
|
|
{
|
|
"epoch": 4.285714285714286,
|
|
"grad_norm": 0.6149354416567213,
|
|
"learning_rate": 1.5710297970969893e-05,
|
|
"loss": 0.2912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27097922563552856,
|
|
"step": 2670,
|
|
"valid_targets_mean": 3808.4,
|
|
"valid_targets_min": 1176
|
|
},
|
|
{
|
|
"epoch": 4.293739967897271,
|
|
"grad_norm": 0.7405391893650735,
|
|
"learning_rate": 1.563213481307413e-05,
|
|
"loss": 0.2826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2933095693588257,
|
|
"step": 2675,
|
|
"valid_targets_mean": 2927.8,
|
|
"valid_targets_min": 1432
|
|
},
|
|
{
|
|
"epoch": 4.301765650080257,
|
|
"grad_norm": 0.6189337068073089,
|
|
"learning_rate": 1.5554041647500485e-05,
|
|
"loss": 0.2827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28295230865478516,
|
|
"step": 2680,
|
|
"valid_targets_mean": 3911.2,
|
|
"valid_targets_min": 1456
|
|
},
|
|
{
|
|
"epoch": 4.309791332263242,
|
|
"grad_norm": 0.6106063610792366,
|
|
"learning_rate": 1.5476019725643342e-05,
|
|
"loss": 0.3007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31785130500793457,
|
|
"step": 2685,
|
|
"valid_targets_mean": 4389.3,
|
|
"valid_targets_min": 638
|
|
},
|
|
{
|
|
"epoch": 4.317817014446228,
|
|
"grad_norm": 0.6045793878756607,
|
|
"learning_rate": 1.5398070297755465e-05,
|
|
"loss": 0.2862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2901744544506073,
|
|
"step": 2690,
|
|
"valid_targets_mean": 4333.0,
|
|
"valid_targets_min": 992
|
|
},
|
|
{
|
|
"epoch": 4.325842696629214,
|
|
"grad_norm": 0.6121310269518968,
|
|
"learning_rate": 1.5320194612927935e-05,
|
|
"loss": 0.3034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3235659599304199,
|
|
"step": 2695,
|
|
"valid_targets_mean": 4533.2,
|
|
"valid_targets_min": 1040
|
|
},
|
|
{
|
|
"epoch": 4.333868378812199,
|
|
"grad_norm": 0.6632471494699812,
|
|
"learning_rate": 1.5242393919070146e-05,
|
|
"loss": 0.3149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37380480766296387,
|
|
"step": 2700,
|
|
"valid_targets_mean": 3909.4,
|
|
"valid_targets_min": 1445
|
|
},
|
|
{
|
|
"epoch": 4.341894060995185,
|
|
"grad_norm": 0.6103593907040346,
|
|
"learning_rate": 1.5164669462889825e-05,
|
|
"loss": 0.2789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2843911945819855,
|
|
"step": 2705,
|
|
"valid_targets_mean": 4273.2,
|
|
"valid_targets_min": 1038
|
|
},
|
|
{
|
|
"epoch": 4.34991974317817,
|
|
"grad_norm": 0.6927533939249583,
|
|
"learning_rate": 1.5087022489873016e-05,
|
|
"loss": 0.2815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2757907509803772,
|
|
"step": 2710,
|
|
"valid_targets_mean": 4045.8,
|
|
"valid_targets_min": 1244
|
|
},
|
|
{
|
|
"epoch": 4.357945425361156,
|
|
"grad_norm": 0.7375003474025302,
|
|
"learning_rate": 1.5009454244264156e-05,
|
|
"loss": 0.2898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2731490731239319,
|
|
"step": 2715,
|
|
"valid_targets_mean": 3280.8,
|
|
"valid_targets_min": 1417
|
|
},
|
|
{
|
|
"epoch": 4.365971107544142,
|
|
"grad_norm": 0.5498224158436009,
|
|
"learning_rate": 1.4931965969046113e-05,
|
|
"loss": 0.2642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2583663761615753,
|
|
"step": 2720,
|
|
"valid_targets_mean": 5119.2,
|
|
"valid_targets_min": 1782
|
|
},
|
|
{
|
|
"epoch": 4.373996789727126,
|
|
"grad_norm": 0.6140294109733144,
|
|
"learning_rate": 1.4854558905920303e-05,
|
|
"loss": 0.3005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2864842414855957,
|
|
"step": 2725,
|
|
"valid_targets_mean": 3779.6,
|
|
"valid_targets_min": 986
|
|
},
|
|
{
|
|
"epoch": 4.382022471910112,
|
|
"grad_norm": 0.6400165965273884,
|
|
"learning_rate": 1.4777234295286742e-05,
|
|
"loss": 0.2936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2937953472137451,
|
|
"step": 2730,
|
|
"valid_targets_mean": 4377.2,
|
|
"valid_targets_min": 434
|
|
},
|
|
{
|
|
"epoch": 4.390048154093098,
|
|
"grad_norm": 0.5004595331231856,
|
|
"learning_rate": 1.469999337622421e-05,
|
|
"loss": 0.2665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2171623408794403,
|
|
"step": 2735,
|
|
"valid_targets_mean": 5296.3,
|
|
"valid_targets_min": 1042
|
|
},
|
|
{
|
|
"epoch": 4.398073836276083,
|
|
"grad_norm": 0.7329575966964067,
|
|
"learning_rate": 1.4622837386470373e-05,
|
|
"loss": 0.3116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3318957984447479,
|
|
"step": 2740,
|
|
"valid_targets_mean": 3016.0,
|
|
"valid_targets_min": 1087
|
|
},
|
|
{
|
|
"epoch": 4.406099518459069,
|
|
"grad_norm": 0.6222678960642175,
|
|
"learning_rate": 1.4545767562401973e-05,
|
|
"loss": 0.2877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2935987114906311,
|
|
"step": 2745,
|
|
"valid_targets_mean": 4258.2,
|
|
"valid_targets_min": 471
|
|
},
|
|
{
|
|
"epoch": 4.414125200642054,
|
|
"grad_norm": 0.597749830170109,
|
|
"learning_rate": 1.4468785139014986e-05,
|
|
"loss": 0.3003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2925226092338562,
|
|
"step": 2750,
|
|
"valid_targets_mean": 4484.2,
|
|
"valid_targets_min": 1401
|
|
},
|
|
{
|
|
"epoch": 4.42215088282504,
|
|
"grad_norm": 0.6852632536951936,
|
|
"learning_rate": 1.4391891349904859e-05,
|
|
"loss": 0.2954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2912892997264862,
|
|
"step": 2755,
|
|
"valid_targets_mean": 3215.1,
|
|
"valid_targets_min": 1228
|
|
},
|
|
{
|
|
"epoch": 4.430176565008026,
|
|
"grad_norm": 0.6883728318326824,
|
|
"learning_rate": 1.4315087427246706e-05,
|
|
"loss": 0.2942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25406256318092346,
|
|
"step": 2760,
|
|
"valid_targets_mean": 3068.6,
|
|
"valid_targets_min": 1357
|
|
},
|
|
{
|
|
"epoch": 4.438202247191011,
|
|
"grad_norm": 0.6458698355274707,
|
|
"learning_rate": 1.4238374601775617e-05,
|
|
"loss": 0.2794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2687876224517822,
|
|
"step": 2765,
|
|
"valid_targets_mean": 3603.2,
|
|
"valid_targets_min": 1198
|
|
},
|
|
{
|
|
"epoch": 4.446227929373997,
|
|
"grad_norm": 0.7081247753680334,
|
|
"learning_rate": 1.4161754102766892e-05,
|
|
"loss": 0.2863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27763521671295166,
|
|
"step": 2770,
|
|
"valid_targets_mean": 3080.9,
|
|
"valid_targets_min": 1142
|
|
},
|
|
{
|
|
"epoch": 4.454253611556982,
|
|
"grad_norm": 0.6262792867872256,
|
|
"learning_rate": 1.4085227158016355e-05,
|
|
"loss": 0.2835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3188110589981079,
|
|
"step": 2775,
|
|
"valid_targets_mean": 3860.1,
|
|
"valid_targets_min": 1004
|
|
},
|
|
{
|
|
"epoch": 4.462279293739968,
|
|
"grad_norm": 0.5692895031872488,
|
|
"learning_rate": 1.4008794993820676e-05,
|
|
"loss": 0.2704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2683429718017578,
|
|
"step": 2780,
|
|
"valid_targets_mean": 4667.8,
|
|
"valid_targets_min": 1616
|
|
},
|
|
{
|
|
"epoch": 4.470304975922954,
|
|
"grad_norm": 0.6843807792328362,
|
|
"learning_rate": 1.3932458834957745e-05,
|
|
"loss": 0.3015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27892106771469116,
|
|
"step": 2785,
|
|
"valid_targets_mean": 3662.2,
|
|
"valid_targets_min": 1001
|
|
},
|
|
{
|
|
"epoch": 4.478330658105939,
|
|
"grad_norm": 0.6218533321930769,
|
|
"learning_rate": 1.3856219904667012e-05,
|
|
"loss": 0.2803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2659008502960205,
|
|
"step": 2790,
|
|
"valid_targets_mean": 3989.8,
|
|
"valid_targets_min": 1127
|
|
},
|
|
{
|
|
"epoch": 4.486356340288925,
|
|
"grad_norm": 0.9926885770155593,
|
|
"learning_rate": 1.3780079424629893e-05,
|
|
"loss": 0.2986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2910704016685486,
|
|
"step": 2795,
|
|
"valid_targets_mean": 3134.6,
|
|
"valid_targets_min": 1450
|
|
},
|
|
{
|
|
"epoch": 4.49438202247191,
|
|
"grad_norm": 0.5813734378215742,
|
|
"learning_rate": 1.3704038614950221e-05,
|
|
"loss": 0.2589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22326640784740448,
|
|
"step": 2800,
|
|
"valid_targets_mean": 3664.1,
|
|
"valid_targets_min": 1021
|
|
},
|
|
{
|
|
"epoch": 4.502407704654896,
|
|
"grad_norm": 0.6834582486184548,
|
|
"learning_rate": 1.362809869413466e-05,
|
|
"loss": 0.3078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27819108963012695,
|
|
"step": 2805,
|
|
"valid_targets_mean": 3979.6,
|
|
"valid_targets_min": 917
|
|
},
|
|
{
|
|
"epoch": 4.510433386837882,
|
|
"grad_norm": 0.7316500830393926,
|
|
"learning_rate": 1.355226087907319e-05,
|
|
"loss": 0.2832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.286344438791275,
|
|
"step": 2810,
|
|
"valid_targets_mean": 3819.7,
|
|
"valid_targets_min": 1545
|
|
},
|
|
{
|
|
"epoch": 4.518459069020867,
|
|
"grad_norm": 0.8079481968564893,
|
|
"learning_rate": 1.3476526385019609e-05,
|
|
"loss": 0.2918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2971421778202057,
|
|
"step": 2815,
|
|
"valid_targets_mean": 2711.0,
|
|
"valid_targets_min": 1083
|
|
},
|
|
{
|
|
"epoch": 4.526484751203852,
|
|
"grad_norm": 0.5773840114432426,
|
|
"learning_rate": 1.3400896425572078e-05,
|
|
"loss": 0.297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26456740498542786,
|
|
"step": 2820,
|
|
"valid_targets_mean": 4277.2,
|
|
"valid_targets_min": 1148
|
|
},
|
|
{
|
|
"epoch": 4.534510433386838,
|
|
"grad_norm": 0.7125971542242997,
|
|
"learning_rate": 1.3325372212653638e-05,
|
|
"loss": 0.2772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29959946870803833,
|
|
"step": 2825,
|
|
"valid_targets_mean": 3192.7,
|
|
"valid_targets_min": 1184
|
|
},
|
|
{
|
|
"epoch": 4.542536115569823,
|
|
"grad_norm": 0.7669758664386392,
|
|
"learning_rate": 1.324995495649281e-05,
|
|
"loss": 0.2773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28065788745880127,
|
|
"step": 2830,
|
|
"valid_targets_mean": 2871.2,
|
|
"valid_targets_min": 1045
|
|
},
|
|
{
|
|
"epoch": 4.550561797752809,
|
|
"grad_norm": 0.6470990315188059,
|
|
"learning_rate": 1.3174645865604201e-05,
|
|
"loss": 0.2948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3098318576812744,
|
|
"step": 2835,
|
|
"valid_targets_mean": 3984.9,
|
|
"valid_targets_min": 1060
|
|
},
|
|
{
|
|
"epoch": 4.558587479935794,
|
|
"grad_norm": 0.7621885510018819,
|
|
"learning_rate": 1.3099446146769146e-05,
|
|
"loss": 0.3058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31331419944763184,
|
|
"step": 2840,
|
|
"valid_targets_mean": 3240.9,
|
|
"valid_targets_min": 1203
|
|
},
|
|
{
|
|
"epoch": 4.56661316211878,
|
|
"grad_norm": 0.6521886209234062,
|
|
"learning_rate": 1.302435700501635e-05,
|
|
"loss": 0.2906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2640630006790161,
|
|
"step": 2845,
|
|
"valid_targets_mean": 3935.1,
|
|
"valid_targets_min": 1487
|
|
},
|
|
{
|
|
"epoch": 4.574638844301766,
|
|
"grad_norm": 0.685434878009597,
|
|
"learning_rate": 1.294937964360259e-05,
|
|
"loss": 0.2819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2975344657897949,
|
|
"step": 2850,
|
|
"valid_targets_mean": 3383.8,
|
|
"valid_targets_min": 1554
|
|
},
|
|
{
|
|
"epoch": 4.582664526484751,
|
|
"grad_norm": 0.6190164159047746,
|
|
"learning_rate": 1.2874515263993435e-05,
|
|
"loss": 0.2834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2956984341144562,
|
|
"step": 2855,
|
|
"valid_targets_mean": 4099.1,
|
|
"valid_targets_min": 1237
|
|
},
|
|
{
|
|
"epoch": 4.590690208667737,
|
|
"grad_norm": 0.5675235026540855,
|
|
"learning_rate": 1.2799765065843994e-05,
|
|
"loss": 0.2813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2910670042037964,
|
|
"step": 2860,
|
|
"valid_targets_mean": 4889.2,
|
|
"valid_targets_min": 1189
|
|
},
|
|
{
|
|
"epoch": 4.598715890850722,
|
|
"grad_norm": 0.679623971316975,
|
|
"learning_rate": 1.2725130246979683e-05,
|
|
"loss": 0.3088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3240700960159302,
|
|
"step": 2865,
|
|
"valid_targets_mean": 3311.3,
|
|
"valid_targets_min": 1135
|
|
},
|
|
{
|
|
"epoch": 4.606741573033708,
|
|
"grad_norm": 0.7728104404734595,
|
|
"learning_rate": 1.2650612003377036e-05,
|
|
"loss": 0.2795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3312641680240631,
|
|
"step": 2870,
|
|
"valid_targets_mean": 3641.9,
|
|
"valid_targets_min": 1121
|
|
},
|
|
{
|
|
"epoch": 4.614767255216694,
|
|
"grad_norm": 0.5403729050630909,
|
|
"learning_rate": 1.2576211529144554e-05,
|
|
"loss": 0.2741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2730843126773834,
|
|
"step": 2875,
|
|
"valid_targets_mean": 4973.5,
|
|
"valid_targets_min": 915
|
|
},
|
|
{
|
|
"epoch": 4.622792937399679,
|
|
"grad_norm": 0.64855144351719,
|
|
"learning_rate": 1.2501930016503541e-05,
|
|
"loss": 0.3056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31970155239105225,
|
|
"step": 2880,
|
|
"valid_targets_mean": 3558.5,
|
|
"valid_targets_min": 1333
|
|
},
|
|
{
|
|
"epoch": 4.630818619582665,
|
|
"grad_norm": 0.5655839165200601,
|
|
"learning_rate": 1.2427768655769024e-05,
|
|
"loss": 0.2826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28301215171813965,
|
|
"step": 2885,
|
|
"valid_targets_mean": 4622.1,
|
|
"valid_targets_min": 1336
|
|
},
|
|
{
|
|
"epoch": 4.63884430176565,
|
|
"grad_norm": 0.6503306622773746,
|
|
"learning_rate": 1.2353728635330667e-05,
|
|
"loss": 0.2923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27559348940849304,
|
|
"step": 2890,
|
|
"valid_targets_mean": 4309.4,
|
|
"valid_targets_min": 1264
|
|
},
|
|
{
|
|
"epoch": 4.646869983948635,
|
|
"grad_norm": 0.5729399098100818,
|
|
"learning_rate": 1.2279811141633735e-05,
|
|
"loss": 0.2718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22769290208816528,
|
|
"step": 2895,
|
|
"valid_targets_mean": 4252.6,
|
|
"valid_targets_min": 1157
|
|
},
|
|
{
|
|
"epoch": 4.654895666131621,
|
|
"grad_norm": 0.7196393710865336,
|
|
"learning_rate": 1.2206017359160075e-05,
|
|
"loss": 0.2821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28762704133987427,
|
|
"step": 2900,
|
|
"valid_targets_mean": 3014.8,
|
|
"valid_targets_min": 956
|
|
},
|
|
{
|
|
"epoch": 4.662921348314606,
|
|
"grad_norm": 0.6303452247696679,
|
|
"learning_rate": 1.2132348470409143e-05,
|
|
"loss": 0.2832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32175523042678833,
|
|
"step": 2905,
|
|
"valid_targets_mean": 4308.5,
|
|
"valid_targets_min": 1252
|
|
},
|
|
{
|
|
"epoch": 4.670947030497592,
|
|
"grad_norm": 0.7076896418601729,
|
|
"learning_rate": 1.2058805655879038e-05,
|
|
"loss": 0.2899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28790634870529175,
|
|
"step": 2910,
|
|
"valid_targets_mean": 3426.9,
|
|
"valid_targets_min": 952
|
|
},
|
|
{
|
|
"epoch": 4.678972712680578,
|
|
"grad_norm": 0.7483897177575349,
|
|
"learning_rate": 1.198539009404762e-05,
|
|
"loss": 0.2648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2777247428894043,
|
|
"step": 2915,
|
|
"valid_targets_mean": 3015.3,
|
|
"valid_targets_min": 1617
|
|
},
|
|
{
|
|
"epoch": 4.686998394863563,
|
|
"grad_norm": 0.6755480080207942,
|
|
"learning_rate": 1.1912102961353584e-05,
|
|
"loss": 0.2852,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27196720242500305,
|
|
"step": 2920,
|
|
"valid_targets_mean": 3789.8,
|
|
"valid_targets_min": 1039
|
|
},
|
|
{
|
|
"epoch": 4.695024077046549,
|
|
"grad_norm": 0.6673367266121275,
|
|
"learning_rate": 1.1838945432177635e-05,
|
|
"loss": 0.2634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2592198848724365,
|
|
"step": 2925,
|
|
"valid_targets_mean": 3203.7,
|
|
"valid_targets_min": 1198
|
|
},
|
|
{
|
|
"epoch": 4.703049759229534,
|
|
"grad_norm": 0.7098919358193364,
|
|
"learning_rate": 1.1765918678823662e-05,
|
|
"loss": 0.2673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.291146457195282,
|
|
"step": 2930,
|
|
"valid_targets_mean": 3117.5,
|
|
"valid_targets_min": 1274
|
|
},
|
|
{
|
|
"epoch": 4.71107544141252,
|
|
"grad_norm": 0.6966520726409877,
|
|
"learning_rate": 1.1693023871499957e-05,
|
|
"loss": 0.2719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28456640243530273,
|
|
"step": 2935,
|
|
"valid_targets_mean": 3993.9,
|
|
"valid_targets_min": 1081
|
|
},
|
|
{
|
|
"epoch": 4.719101123595506,
|
|
"grad_norm": 0.7330913491473154,
|
|
"learning_rate": 1.1620262178300456e-05,
|
|
"loss": 0.3047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2917308807373047,
|
|
"step": 2940,
|
|
"valid_targets_mean": 3266.5,
|
|
"valid_targets_min": 1478
|
|
},
|
|
{
|
|
"epoch": 4.727126805778491,
|
|
"grad_norm": 0.7418747285858878,
|
|
"learning_rate": 1.1547634765186016e-05,
|
|
"loss": 0.2908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29506009817123413,
|
|
"step": 2945,
|
|
"valid_targets_mean": 2939.9,
|
|
"valid_targets_min": 738
|
|
},
|
|
{
|
|
"epoch": 4.735152487961477,
|
|
"grad_norm": 0.6598557520086463,
|
|
"learning_rate": 1.1475142795965757e-05,
|
|
"loss": 0.2742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2781981825828552,
|
|
"step": 2950,
|
|
"valid_targets_mean": 3554.8,
|
|
"valid_targets_min": 1351
|
|
},
|
|
{
|
|
"epoch": 4.743178170144462,
|
|
"grad_norm": 0.7260058259437949,
|
|
"learning_rate": 1.140278743227837e-05,
|
|
"loss": 0.3057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30531060695648193,
|
|
"step": 2955,
|
|
"valid_targets_mean": 3263.6,
|
|
"valid_targets_min": 1244
|
|
},
|
|
{
|
|
"epoch": 4.751203852327448,
|
|
"grad_norm": 0.7361674553299693,
|
|
"learning_rate": 1.1330569833573551e-05,
|
|
"loss": 0.2934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29504889249801636,
|
|
"step": 2960,
|
|
"valid_targets_mean": 2916.2,
|
|
"valid_targets_min": 1252
|
|
},
|
|
{
|
|
"epoch": 4.759229534510434,
|
|
"grad_norm": 0.6861365075278192,
|
|
"learning_rate": 1.1258491157093396e-05,
|
|
"loss": 0.2851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27314841747283936,
|
|
"step": 2965,
|
|
"valid_targets_mean": 3319.3,
|
|
"valid_targets_min": 896
|
|
},
|
|
{
|
|
"epoch": 4.767255216693419,
|
|
"grad_norm": 0.6301354650301532,
|
|
"learning_rate": 1.1186552557853825e-05,
|
|
"loss": 0.278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26609140634536743,
|
|
"step": 2970,
|
|
"valid_targets_mean": 4180.5,
|
|
"valid_targets_min": 961
|
|
},
|
|
{
|
|
"epoch": 4.775280898876405,
|
|
"grad_norm": 0.6556739197835177,
|
|
"learning_rate": 1.1114755188626168e-05,
|
|
"loss": 0.298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2980227768421173,
|
|
"step": 2975,
|
|
"valid_targets_mean": 4423.4,
|
|
"valid_targets_min": 1059
|
|
},
|
|
{
|
|
"epoch": 4.78330658105939,
|
|
"grad_norm": 0.7662988617516535,
|
|
"learning_rate": 1.104310019991857e-05,
|
|
"loss": 0.2785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3143603801727295,
|
|
"step": 2980,
|
|
"valid_targets_mean": 3178.4,
|
|
"valid_targets_min": 1100
|
|
},
|
|
{
|
|
"epoch": 4.791332263242376,
|
|
"grad_norm": 0.6604321783368594,
|
|
"learning_rate": 1.0971588739957656e-05,
|
|
"loss": 0.2891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2832232117652893,
|
|
"step": 2985,
|
|
"valid_targets_mean": 3923.4,
|
|
"valid_targets_min": 1339
|
|
},
|
|
{
|
|
"epoch": 4.799357945425362,
|
|
"grad_norm": 0.684082619283228,
|
|
"learning_rate": 1.0900221954670088e-05,
|
|
"loss": 0.2983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3246185779571533,
|
|
"step": 2990,
|
|
"valid_targets_mean": 3605.9,
|
|
"valid_targets_min": 795
|
|
},
|
|
{
|
|
"epoch": 4.807383627608346,
|
|
"grad_norm": 0.5680070804156448,
|
|
"learning_rate": 1.0829000987664182e-05,
|
|
"loss": 0.2591,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2782781422138214,
|
|
"step": 2995,
|
|
"valid_targets_mean": 5242.7,
|
|
"valid_targets_min": 1137
|
|
},
|
|
{
|
|
"epoch": 4.815409309791332,
|
|
"grad_norm": 0.6705197374149907,
|
|
"learning_rate": 1.0757926980211631e-05,
|
|
"loss": 0.2904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27220720052719116,
|
|
"step": 3000,
|
|
"valid_targets_mean": 3483.9,
|
|
"valid_targets_min": 1027
|
|
},
|
|
{
|
|
"epoch": 4.823434991974318,
|
|
"grad_norm": 0.5661660054249947,
|
|
"learning_rate": 1.0687001071229193e-05,
|
|
"loss": 0.3026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2823578119277954,
|
|
"step": 3005,
|
|
"valid_targets_mean": 4847.9,
|
|
"valid_targets_min": 1133
|
|
},
|
|
{
|
|
"epoch": 4.831460674157303,
|
|
"grad_norm": 0.7375459649734657,
|
|
"learning_rate": 1.0616224397260418e-05,
|
|
"loss": 0.3018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31219345331192017,
|
|
"step": 3010,
|
|
"valid_targets_mean": 3198.8,
|
|
"valid_targets_min": 1539
|
|
},
|
|
{
|
|
"epoch": 4.839486356340289,
|
|
"grad_norm": 0.6073977672296423,
|
|
"learning_rate": 1.0545598092457486e-05,
|
|
"loss": 0.2802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2647751569747925,
|
|
"step": 3015,
|
|
"valid_targets_mean": 3968.2,
|
|
"valid_targets_min": 1755
|
|
},
|
|
{
|
|
"epoch": 4.847512038523274,
|
|
"grad_norm": 0.6145734206210263,
|
|
"learning_rate": 1.0475123288562973e-05,
|
|
"loss": 0.2879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26080113649368286,
|
|
"step": 3020,
|
|
"valid_targets_mean": 3956.1,
|
|
"valid_targets_min": 1269
|
|
},
|
|
{
|
|
"epoch": 4.85553772070626,
|
|
"grad_norm": 0.6934639922156027,
|
|
"learning_rate": 1.0404801114891778e-05,
|
|
"loss": 0.3029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30166614055633545,
|
|
"step": 3025,
|
|
"valid_targets_mean": 3559.4,
|
|
"valid_targets_min": 958
|
|
},
|
|
{
|
|
"epoch": 4.863563402889246,
|
|
"grad_norm": 0.7343097239955374,
|
|
"learning_rate": 1.033463269831299e-05,
|
|
"loss": 0.2842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.275170236825943,
|
|
"step": 3030,
|
|
"valid_targets_mean": 3092.5,
|
|
"valid_targets_min": 1227
|
|
},
|
|
{
|
|
"epoch": 4.871589085072231,
|
|
"grad_norm": 0.6667652221853023,
|
|
"learning_rate": 1.0264619163231806e-05,
|
|
"loss": 0.2796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2529391050338745,
|
|
"step": 3035,
|
|
"valid_targets_mean": 3286.1,
|
|
"valid_targets_min": 951
|
|
},
|
|
{
|
|
"epoch": 4.879614767255217,
|
|
"grad_norm": 0.7253262792550317,
|
|
"learning_rate": 1.0194761631571582e-05,
|
|
"loss": 0.2825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2711363434791565,
|
|
"step": 3040,
|
|
"valid_targets_mean": 3164.8,
|
|
"valid_targets_min": 772
|
|
},
|
|
{
|
|
"epoch": 4.887640449438202,
|
|
"grad_norm": 0.7068160702540172,
|
|
"learning_rate": 1.0125061222755803e-05,
|
|
"loss": 0.2721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28815412521362305,
|
|
"step": 3045,
|
|
"valid_targets_mean": 3623.1,
|
|
"valid_targets_min": 1120
|
|
},
|
|
{
|
|
"epoch": 4.895666131621188,
|
|
"grad_norm": 0.751684093005097,
|
|
"learning_rate": 1.0055519053690143e-05,
|
|
"loss": 0.2949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33063000440597534,
|
|
"step": 3050,
|
|
"valid_targets_mean": 3098.9,
|
|
"valid_targets_min": 1331
|
|
},
|
|
{
|
|
"epoch": 4.903691813804174,
|
|
"grad_norm": 0.6598742921299661,
|
|
"learning_rate": 9.986136238744611e-06,
|
|
"loss": 0.2819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29148566722869873,
|
|
"step": 3055,
|
|
"valid_targets_mean": 4101.8,
|
|
"valid_targets_min": 1455
|
|
},
|
|
{
|
|
"epoch": 4.911717495987159,
|
|
"grad_norm": 0.6900348203525665,
|
|
"learning_rate": 9.916913889735631e-06,
|
|
"loss": 0.2889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.293303519487381,
|
|
"step": 3060,
|
|
"valid_targets_mean": 3191.4,
|
|
"valid_targets_min": 1679
|
|
},
|
|
{
|
|
"epoch": 4.919743178170145,
|
|
"grad_norm": 0.8273335298986744,
|
|
"learning_rate": 9.847853115908287e-06,
|
|
"loss": 0.2685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2886984646320343,
|
|
"step": 3065,
|
|
"valid_targets_mean": 3264.2,
|
|
"valid_targets_min": 352
|
|
},
|
|
{
|
|
"epoch": 4.9277688603531296,
|
|
"grad_norm": 0.6705591631273496,
|
|
"learning_rate": 9.778955023918522e-06,
|
|
"loss": 0.2857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29090893268585205,
|
|
"step": 3070,
|
|
"valid_targets_mean": 3594.6,
|
|
"valid_targets_min": 938
|
|
},
|
|
{
|
|
"epoch": 4.935794542536115,
|
|
"grad_norm": 0.640664997434248,
|
|
"learning_rate": 9.710220717815378e-06,
|
|
"loss": 0.295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3294585943222046,
|
|
"step": 3075,
|
|
"valid_targets_mean": 4173.6,
|
|
"valid_targets_min": 1152
|
|
},
|
|
{
|
|
"epoch": 4.943820224719101,
|
|
"grad_norm": 0.6540285297079634,
|
|
"learning_rate": 9.641651299023356e-06,
|
|
"loss": 0.2843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3010079264640808,
|
|
"step": 3080,
|
|
"valid_targets_mean": 4036.2,
|
|
"valid_targets_min": 709
|
|
},
|
|
{
|
|
"epoch": 4.951845906902086,
|
|
"grad_norm": 0.8547242686318872,
|
|
"learning_rate": 9.573247866324735e-06,
|
|
"loss": 0.2739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30390411615371704,
|
|
"step": 3085,
|
|
"valid_targets_mean": 2339.9,
|
|
"valid_targets_min": 1379
|
|
},
|
|
{
|
|
"epoch": 4.959871589085072,
|
|
"grad_norm": 0.6372650310192473,
|
|
"learning_rate": 9.505011515841955e-06,
|
|
"loss": 0.2734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2598150968551636,
|
|
"step": 3090,
|
|
"valid_targets_mean": 3830.6,
|
|
"valid_targets_min": 1372
|
|
},
|
|
{
|
|
"epoch": 4.967897271268058,
|
|
"grad_norm": 0.6529668675911442,
|
|
"learning_rate": 9.436943341020086e-06,
|
|
"loss": 0.2836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28081637620925903,
|
|
"step": 3095,
|
|
"valid_targets_mean": 3915.2,
|
|
"valid_targets_min": 1102
|
|
},
|
|
{
|
|
"epoch": 4.975922953451043,
|
|
"grad_norm": 0.8561424678927981,
|
|
"learning_rate": 9.369044432609286e-06,
|
|
"loss": 0.2965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32556384801864624,
|
|
"step": 3100,
|
|
"valid_targets_mean": 2449.5,
|
|
"valid_targets_min": 1134
|
|
},
|
|
{
|
|
"epoch": 4.983948635634029,
|
|
"grad_norm": 0.696513018012123,
|
|
"learning_rate": 9.301315878647303e-06,
|
|
"loss": 0.2727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2820906937122345,
|
|
"step": 3105,
|
|
"valid_targets_mean": 3442.9,
|
|
"valid_targets_min": 1358
|
|
},
|
|
{
|
|
"epoch": 4.991974317817014,
|
|
"grad_norm": 0.5372970323443184,
|
|
"learning_rate": 9.233758764442093e-06,
|
|
"loss": 0.2957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2948618531227112,
|
|
"step": 3110,
|
|
"valid_targets_mean": 5083.3,
|
|
"valid_targets_min": 1327
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.5721522259584688,
|
|
"learning_rate": 9.166374172554362e-06,
|
|
"loss": 0.283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2717619240283966,
|
|
"step": 3115,
|
|
"valid_targets_mean": 4476.3,
|
|
"valid_targets_min": 1554
|
|
},
|
|
{
|
|
"epoch": 5.008025682182986,
|
|
"grad_norm": 0.7028196715892202,
|
|
"learning_rate": 9.099163182780284e-06,
|
|
"loss": 0.2645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26374906301498413,
|
|
"step": 3120,
|
|
"valid_targets_mean": 2963.1,
|
|
"valid_targets_min": 1066
|
|
},
|
|
{
|
|
"epoch": 5.016051364365971,
|
|
"grad_norm": 0.7696006439733051,
|
|
"learning_rate": 9.032126872134158e-06,
|
|
"loss": 0.2731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2787320911884308,
|
|
"step": 3125,
|
|
"valid_targets_mean": 2732.6,
|
|
"valid_targets_min": 1007
|
|
},
|
|
{
|
|
"epoch": 5.024077046548957,
|
|
"grad_norm": 0.7872504261588326,
|
|
"learning_rate": 8.965266314831137e-06,
|
|
"loss": 0.2849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2909359335899353,
|
|
"step": 3130,
|
|
"valid_targets_mean": 2736.1,
|
|
"valid_targets_min": 997
|
|
},
|
|
{
|
|
"epoch": 5.032102728731942,
|
|
"grad_norm": 0.6274466577943987,
|
|
"learning_rate": 8.898582582270068e-06,
|
|
"loss": 0.2635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2512774169445038,
|
|
"step": 3135,
|
|
"valid_targets_mean": 4189.9,
|
|
"valid_targets_min": 1460
|
|
},
|
|
{
|
|
"epoch": 5.040128410914928,
|
|
"grad_norm": 0.6556114382291309,
|
|
"learning_rate": 8.83207674301628e-06,
|
|
"loss": 0.28,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2501053810119629,
|
|
"step": 3140,
|
|
"valid_targets_mean": 3795.9,
|
|
"valid_targets_min": 1004
|
|
},
|
|
{
|
|
"epoch": 5.048154093097914,
|
|
"grad_norm": 0.61009563906796,
|
|
"learning_rate": 8.765749862784455e-06,
|
|
"loss": 0.2825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29685959219932556,
|
|
"step": 3145,
|
|
"valid_targets_mean": 4718.1,
|
|
"valid_targets_min": 1407
|
|
},
|
|
{
|
|
"epoch": 5.056179775280899,
|
|
"grad_norm": 0.6491962573724314,
|
|
"learning_rate": 8.699603004421602e-06,
|
|
"loss": 0.2471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21681779623031616,
|
|
"step": 3150,
|
|
"valid_targets_mean": 4017.5,
|
|
"valid_targets_min": 1653
|
|
},
|
|
{
|
|
"epoch": 5.064205457463885,
|
|
"grad_norm": 0.6382446301996547,
|
|
"learning_rate": 8.633637227889955e-06,
|
|
"loss": 0.2696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2507069706916809,
|
|
"step": 3155,
|
|
"valid_targets_mean": 4214.1,
|
|
"valid_targets_min": 1351
|
|
},
|
|
{
|
|
"epoch": 5.07223113964687,
|
|
"grad_norm": 0.9082089287241969,
|
|
"learning_rate": 8.567853590250045e-06,
|
|
"loss": 0.2592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2966684401035309,
|
|
"step": 3160,
|
|
"valid_targets_mean": 2296.4,
|
|
"valid_targets_min": 1046
|
|
},
|
|
{
|
|
"epoch": 5.080256821829855,
|
|
"grad_norm": 0.6891005130401128,
|
|
"learning_rate": 8.502253145643752e-06,
|
|
"loss": 0.2679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2803407311439514,
|
|
"step": 3165,
|
|
"valid_targets_mean": 3472.6,
|
|
"valid_targets_min": 855
|
|
},
|
|
{
|
|
"epoch": 5.088282504012841,
|
|
"grad_norm": 0.6074842704584884,
|
|
"learning_rate": 8.436836945277368e-06,
|
|
"loss": 0.2726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24347522854804993,
|
|
"step": 3170,
|
|
"valid_targets_mean": 4252.8,
|
|
"valid_targets_min": 1376
|
|
},
|
|
{
|
|
"epoch": 5.096308186195826,
|
|
"grad_norm": 0.77635219125442,
|
|
"learning_rate": 8.371606037404815e-06,
|
|
"loss": 0.2705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.322262704372406,
|
|
"step": 3175,
|
|
"valid_targets_mean": 3187.7,
|
|
"valid_targets_min": 314
|
|
},
|
|
{
|
|
"epoch": 5.104333868378812,
|
|
"grad_norm": 0.8111524825143072,
|
|
"learning_rate": 8.306561467310812e-06,
|
|
"loss": 0.2874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34277021884918213,
|
|
"step": 3180,
|
|
"valid_targets_mean": 2792.9,
|
|
"valid_targets_min": 1334
|
|
},
|
|
{
|
|
"epoch": 5.112359550561798,
|
|
"grad_norm": 0.7183226282012349,
|
|
"learning_rate": 8.241704277294111e-06,
|
|
"loss": 0.2658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29030734300613403,
|
|
"step": 3185,
|
|
"valid_targets_mean": 3870.0,
|
|
"valid_targets_min": 1323
|
|
},
|
|
{
|
|
"epoch": 5.120385232744783,
|
|
"grad_norm": 0.7157095212942316,
|
|
"learning_rate": 8.17703550665084e-06,
|
|
"loss": 0.2758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2843115031719208,
|
|
"step": 3190,
|
|
"valid_targets_mean": 3981.7,
|
|
"valid_targets_min": 1298
|
|
},
|
|
{
|
|
"epoch": 5.128410914927769,
|
|
"grad_norm": 0.7049255535567401,
|
|
"learning_rate": 8.112556191657824e-06,
|
|
"loss": 0.2886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2728787660598755,
|
|
"step": 3195,
|
|
"valid_targets_mean": 3743.2,
|
|
"valid_targets_min": 1198
|
|
},
|
|
{
|
|
"epoch": 5.136436597110754,
|
|
"grad_norm": 0.6730398147904004,
|
|
"learning_rate": 8.04826736555595e-06,
|
|
"loss": 0.2825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29003259539604187,
|
|
"step": 3200,
|
|
"valid_targets_mean": 3665.5,
|
|
"valid_targets_min": 1267
|
|
},
|
|
{
|
|
"epoch": 5.14446227929374,
|
|
"grad_norm": 0.6407484716160854,
|
|
"learning_rate": 7.984170058533675e-06,
|
|
"loss": 0.2787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28247955441474915,
|
|
"step": 3205,
|
|
"valid_targets_mean": 4352.7,
|
|
"valid_targets_min": 1421
|
|
},
|
|
{
|
|
"epoch": 5.152487961476726,
|
|
"grad_norm": 0.7332216128802403,
|
|
"learning_rate": 7.920265297710451e-06,
|
|
"loss": 0.2799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30390608310699463,
|
|
"step": 3210,
|
|
"valid_targets_mean": 3291.6,
|
|
"valid_targets_min": 1485
|
|
},
|
|
{
|
|
"epoch": 5.160513643659711,
|
|
"grad_norm": 0.6408246952572944,
|
|
"learning_rate": 7.85655410712032e-06,
|
|
"loss": 0.267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28235888481140137,
|
|
"step": 3215,
|
|
"valid_targets_mean": 4396.0,
|
|
"valid_targets_min": 1274
|
|
},
|
|
{
|
|
"epoch": 5.168539325842697,
|
|
"grad_norm": 1.4622303613933785,
|
|
"learning_rate": 7.793037507695478e-06,
|
|
"loss": 0.2613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28642934560775757,
|
|
"step": 3220,
|
|
"valid_targets_mean": 3732.4,
|
|
"valid_targets_min": 836
|
|
},
|
|
{
|
|
"epoch": 5.176565008025682,
|
|
"grad_norm": 0.6261803138910497,
|
|
"learning_rate": 7.729716517249899e-06,
|
|
"loss": 0.2693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29643821716308594,
|
|
"step": 3225,
|
|
"valid_targets_mean": 4334.1,
|
|
"valid_targets_min": 1816
|
|
},
|
|
{
|
|
"epoch": 5.184590690208668,
|
|
"grad_norm": 0.7971986237143821,
|
|
"learning_rate": 7.666592150463069e-06,
|
|
"loss": 0.255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2789764404296875,
|
|
"step": 3230,
|
|
"valid_targets_mean": 2786.9,
|
|
"valid_targets_min": 996
|
|
},
|
|
{
|
|
"epoch": 5.192616372391654,
|
|
"grad_norm": 0.6329447455212467,
|
|
"learning_rate": 7.603665418863693e-06,
|
|
"loss": 0.2528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26866644620895386,
|
|
"step": 3235,
|
|
"valid_targets_mean": 4041.2,
|
|
"valid_targets_min": 1396
|
|
},
|
|
{
|
|
"epoch": 5.200642054574639,
|
|
"grad_norm": 0.624871891648874,
|
|
"learning_rate": 7.540937330813487e-06,
|
|
"loss": 0.2533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24816308915615082,
|
|
"step": 3240,
|
|
"valid_targets_mean": 4548.3,
|
|
"valid_targets_min": 1573
|
|
},
|
|
{
|
|
"epoch": 5.208667736757624,
|
|
"grad_norm": 0.7628667533037322,
|
|
"learning_rate": 7.47840889149104e-06,
|
|
"loss": 0.2656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2734125852584839,
|
|
"step": 3245,
|
|
"valid_targets_mean": 2898.1,
|
|
"valid_targets_min": 1061
|
|
},
|
|
{
|
|
"epoch": 5.21669341894061,
|
|
"grad_norm": 0.6676189122504462,
|
|
"learning_rate": 7.416081102875696e-06,
|
|
"loss": 0.2711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2677164077758789,
|
|
"step": 3250,
|
|
"valid_targets_mean": 3523.7,
|
|
"valid_targets_min": 1038
|
|
},
|
|
{
|
|
"epoch": 5.224719101123595,
|
|
"grad_norm": 0.6638910587004355,
|
|
"learning_rate": 7.353954963731475e-06,
|
|
"loss": 0.2701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26987558603286743,
|
|
"step": 3255,
|
|
"valid_targets_mean": 4011.5,
|
|
"valid_targets_min": 1430
|
|
},
|
|
{
|
|
"epoch": 5.232744783306581,
|
|
"grad_norm": 0.7441107670421949,
|
|
"learning_rate": 7.292031469591121e-06,
|
|
"loss": 0.2602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.271876722574234,
|
|
"step": 3260,
|
|
"valid_targets_mean": 3341.5,
|
|
"valid_targets_min": 1543
|
|
},
|
|
{
|
|
"epoch": 5.240770465489566,
|
|
"grad_norm": 0.6546300553732923,
|
|
"learning_rate": 7.230311612740086e-06,
|
|
"loss": 0.2645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24306584894657135,
|
|
"step": 3265,
|
|
"valid_targets_mean": 3431.6,
|
|
"valid_targets_min": 1813
|
|
},
|
|
{
|
|
"epoch": 5.248796147672552,
|
|
"grad_norm": 0.650793669220539,
|
|
"learning_rate": 7.168796382200682e-06,
|
|
"loss": 0.2947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28111374378204346,
|
|
"step": 3270,
|
|
"valid_targets_mean": 4243.4,
|
|
"valid_targets_min": 1034
|
|
},
|
|
{
|
|
"epoch": 5.256821829855538,
|
|
"grad_norm": 0.8023617413003873,
|
|
"learning_rate": 7.1074867637162185e-06,
|
|
"loss": 0.2676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25308892130851746,
|
|
"step": 3275,
|
|
"valid_targets_mean": 3021.6,
|
|
"valid_targets_min": 844
|
|
},
|
|
{
|
|
"epoch": 5.264847512038523,
|
|
"grad_norm": 0.8073776524379921,
|
|
"learning_rate": 7.046383739735166e-06,
|
|
"loss": 0.2714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2922864556312561,
|
|
"step": 3280,
|
|
"valid_targets_mean": 2854.3,
|
|
"valid_targets_min": 1145
|
|
},
|
|
{
|
|
"epoch": 5.272873194221509,
|
|
"grad_norm": 0.5652256288130875,
|
|
"learning_rate": 6.985488289395479e-06,
|
|
"loss": 0.251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2350950688123703,
|
|
"step": 3285,
|
|
"valid_targets_mean": 4722.4,
|
|
"valid_targets_min": 1439
|
|
},
|
|
{
|
|
"epoch": 5.280898876404494,
|
|
"grad_norm": 0.6118362228040427,
|
|
"learning_rate": 6.924801388508868e-06,
|
|
"loss": 0.2628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29027315974235535,
|
|
"step": 3290,
|
|
"valid_targets_mean": 4790.4,
|
|
"valid_targets_min": 1108
|
|
},
|
|
{
|
|
"epoch": 5.28892455858748,
|
|
"grad_norm": 0.7157273214802166,
|
|
"learning_rate": 6.864324009545154e-06,
|
|
"loss": 0.274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27840450406074524,
|
|
"step": 3295,
|
|
"valid_targets_mean": 3583.9,
|
|
"valid_targets_min": 1602
|
|
},
|
|
{
|
|
"epoch": 5.296950240770466,
|
|
"grad_norm": 0.7326336173792627,
|
|
"learning_rate": 6.804057121616714e-06,
|
|
"loss": 0.2563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2584049701690674,
|
|
"step": 3300,
|
|
"valid_targets_mean": 3162.8,
|
|
"valid_targets_min": 1487
|
|
},
|
|
{
|
|
"epoch": 5.304975922953451,
|
|
"grad_norm": 0.685284656393273,
|
|
"learning_rate": 6.744001690462922e-06,
|
|
"loss": 0.2674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31883978843688965,
|
|
"step": 3305,
|
|
"valid_targets_mean": 3783.1,
|
|
"valid_targets_min": 638
|
|
},
|
|
{
|
|
"epoch": 5.313001605136437,
|
|
"grad_norm": 0.7199476658903159,
|
|
"learning_rate": 6.684158678434707e-06,
|
|
"loss": 0.287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25980404019355774,
|
|
"step": 3310,
|
|
"valid_targets_mean": 3433.7,
|
|
"valid_targets_min": 1271
|
|
},
|
|
{
|
|
"epoch": 5.321027287319422,
|
|
"grad_norm": 0.6211550645001752,
|
|
"learning_rate": 6.624529044479109e-06,
|
|
"loss": 0.2645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2359941452741623,
|
|
"step": 3315,
|
|
"valid_targets_mean": 4210.4,
|
|
"valid_targets_min": 855
|
|
},
|
|
{
|
|
"epoch": 5.329052969502408,
|
|
"grad_norm": 0.6674472461806916,
|
|
"learning_rate": 6.565113744123901e-06,
|
|
"loss": 0.2724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23335188627243042,
|
|
"step": 3320,
|
|
"valid_targets_mean": 3725.8,
|
|
"valid_targets_min": 1234
|
|
},
|
|
{
|
|
"epoch": 5.337078651685394,
|
|
"grad_norm": 0.7038574454769413,
|
|
"learning_rate": 6.50591372946231e-06,
|
|
"loss": 0.2685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29542288184165955,
|
|
"step": 3325,
|
|
"valid_targets_mean": 3744.2,
|
|
"valid_targets_min": 1303
|
|
},
|
|
{
|
|
"epoch": 5.345104333868379,
|
|
"grad_norm": 0.7574830654699383,
|
|
"learning_rate": 6.446929949137748e-06,
|
|
"loss": 0.2758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28295376896858215,
|
|
"step": 3330,
|
|
"valid_targets_mean": 3107.1,
|
|
"valid_targets_min": 963
|
|
},
|
|
{
|
|
"epoch": 5.353130016051364,
|
|
"grad_norm": 0.6905364922410734,
|
|
"learning_rate": 6.388163348328582e-06,
|
|
"loss": 0.2705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2604389190673828,
|
|
"step": 3335,
|
|
"valid_targets_mean": 3948.9,
|
|
"valid_targets_min": 1374
|
|
},
|
|
{
|
|
"epoch": 5.36115569823435,
|
|
"grad_norm": 0.7529014715434292,
|
|
"learning_rate": 6.329614868733038e-06,
|
|
"loss": 0.2695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2660890519618988,
|
|
"step": 3340,
|
|
"valid_targets_mean": 2927.8,
|
|
"valid_targets_min": 921
|
|
},
|
|
{
|
|
"epoch": 5.369181380417335,
|
|
"grad_norm": 0.6879865336839671,
|
|
"learning_rate": 6.271285448554072e-06,
|
|
"loss": 0.2678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26959991455078125,
|
|
"step": 3345,
|
|
"valid_targets_mean": 3519.4,
|
|
"valid_targets_min": 1199
|
|
},
|
|
{
|
|
"epoch": 5.377207062600321,
|
|
"grad_norm": 0.6145976842321501,
|
|
"learning_rate": 6.213176022484353e-06,
|
|
"loss": 0.2636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22655436396598816,
|
|
"step": 3350,
|
|
"valid_targets_mean": 4158.6,
|
|
"valid_targets_min": 1747
|
|
},
|
|
{
|
|
"epoch": 5.385232744783306,
|
|
"grad_norm": 0.7107005253192303,
|
|
"learning_rate": 6.1552875216912866e-06,
|
|
"loss": 0.2581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2658652663230896,
|
|
"step": 3355,
|
|
"valid_targets_mean": 3421.7,
|
|
"valid_targets_min": 1212
|
|
},
|
|
{
|
|
"epoch": 5.393258426966292,
|
|
"grad_norm": 0.674811204663308,
|
|
"learning_rate": 6.097620873802068e-06,
|
|
"loss": 0.2677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2858463227748871,
|
|
"step": 3360,
|
|
"valid_targets_mean": 3763.8,
|
|
"valid_targets_min": 1217
|
|
},
|
|
{
|
|
"epoch": 5.401284109149278,
|
|
"grad_norm": 0.7146185005402997,
|
|
"learning_rate": 6.0401770028888675e-06,
|
|
"loss": 0.2769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24183401465415955,
|
|
"step": 3365,
|
|
"valid_targets_mean": 3409.9,
|
|
"valid_targets_min": 1149
|
|
},
|
|
{
|
|
"epoch": 5.409309791332263,
|
|
"grad_norm": 0.667506370660471,
|
|
"learning_rate": 5.982956829453965e-06,
|
|
"loss": 0.2627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28192341327667236,
|
|
"step": 3370,
|
|
"valid_targets_mean": 4554.3,
|
|
"valid_targets_min": 1352
|
|
},
|
|
{
|
|
"epoch": 5.417335473515249,
|
|
"grad_norm": 0.7933415200137237,
|
|
"learning_rate": 5.925961270415042e-06,
|
|
"loss": 0.2878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2737930417060852,
|
|
"step": 3375,
|
|
"valid_targets_mean": 2729.6,
|
|
"valid_targets_min": 978
|
|
},
|
|
{
|
|
"epoch": 5.425361155698234,
|
|
"grad_norm": 0.7079261946700439,
|
|
"learning_rate": 5.8691912390904815e-06,
|
|
"loss": 0.2796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28683871030807495,
|
|
"step": 3380,
|
|
"valid_targets_mean": 3378.9,
|
|
"valid_targets_min": 1352
|
|
},
|
|
{
|
|
"epoch": 5.43338683788122,
|
|
"grad_norm": 0.9005005460674107,
|
|
"learning_rate": 5.812647645184704e-06,
|
|
"loss": 0.2686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2666788697242737,
|
|
"step": 3385,
|
|
"valid_targets_mean": 2767.6,
|
|
"valid_targets_min": 1134
|
|
},
|
|
{
|
|
"epoch": 5.441412520064206,
|
|
"grad_norm": 0.728211796957506,
|
|
"learning_rate": 5.756331394773624e-06,
|
|
"loss": 0.2666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2518937289714813,
|
|
"step": 3390,
|
|
"valid_targets_mean": 3498.5,
|
|
"valid_targets_min": 1080
|
|
},
|
|
{
|
|
"epoch": 5.449438202247191,
|
|
"grad_norm": 0.6766610695457769,
|
|
"learning_rate": 5.7002433902901276e-06,
|
|
"loss": 0.2591,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23692555725574493,
|
|
"step": 3395,
|
|
"valid_targets_mean": 3883.9,
|
|
"valid_targets_min": 1040
|
|
},
|
|
{
|
|
"epoch": 5.457463884430177,
|
|
"grad_norm": 0.7012318409830777,
|
|
"learning_rate": 5.644384530509574e-06,
|
|
"loss": 0.269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26356691122055054,
|
|
"step": 3400,
|
|
"valid_targets_mean": 3750.8,
|
|
"valid_targets_min": 471
|
|
},
|
|
{
|
|
"epoch": 5.465489566613162,
|
|
"grad_norm": 0.6515901817381305,
|
|
"learning_rate": 5.588755710535452e-06,
|
|
"loss": 0.2699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29717350006103516,
|
|
"step": 3405,
|
|
"valid_targets_mean": 4879.9,
|
|
"valid_targets_min": 1426
|
|
},
|
|
{
|
|
"epoch": 5.473515248796148,
|
|
"grad_norm": 0.7334900252139014,
|
|
"learning_rate": 5.533357821784991e-06,
|
|
"loss": 0.2676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2853018045425415,
|
|
"step": 3410,
|
|
"valid_targets_mean": 3355.4,
|
|
"valid_targets_min": 1027
|
|
},
|
|
{
|
|
"epoch": 5.481540930979134,
|
|
"grad_norm": 0.8156358520464441,
|
|
"learning_rate": 5.47819175197488e-06,
|
|
"loss": 0.2755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2689981162548065,
|
|
"step": 3415,
|
|
"valid_targets_mean": 2932.2,
|
|
"valid_targets_min": 1356
|
|
},
|
|
{
|
|
"epoch": 5.489566613162118,
|
|
"grad_norm": 0.867399289497061,
|
|
"learning_rate": 5.42325838510708e-06,
|
|
"loss": 0.2699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2851722240447998,
|
|
"step": 3420,
|
|
"valid_targets_mean": 2509.0,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 5.497592295345104,
|
|
"grad_norm": 0.6788085983123993,
|
|
"learning_rate": 5.3685586014546075e-06,
|
|
"loss": 0.272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26143547892570496,
|
|
"step": 3425,
|
|
"valid_targets_mean": 3595.4,
|
|
"valid_targets_min": 1698
|
|
},
|
|
{
|
|
"epoch": 5.50561797752809,
|
|
"grad_norm": 0.6039374442518733,
|
|
"learning_rate": 5.314093277547465e-06,
|
|
"loss": 0.2677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24622441828250885,
|
|
"step": 3430,
|
|
"valid_targets_mean": 4332.7,
|
|
"valid_targets_min": 1216
|
|
},
|
|
{
|
|
"epoch": 5.513643659711075,
|
|
"grad_norm": 0.7661170176793389,
|
|
"learning_rate": 5.259863286158591e-06,
|
|
"loss": 0.2592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2666742503643036,
|
|
"step": 3435,
|
|
"valid_targets_mean": 2801.1,
|
|
"valid_targets_min": 701
|
|
},
|
|
{
|
|
"epoch": 5.521669341894061,
|
|
"grad_norm": 0.7492478963857534,
|
|
"learning_rate": 5.205869496289846e-06,
|
|
"loss": 0.2753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2677985429763794,
|
|
"step": 3440,
|
|
"valid_targets_mean": 3756.2,
|
|
"valid_targets_min": 1763
|
|
},
|
|
{
|
|
"epoch": 5.529695024077046,
|
|
"grad_norm": 0.6674098552074867,
|
|
"learning_rate": 5.152112773158125e-06,
|
|
"loss": 0.2722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2541694641113281,
|
|
"step": 3445,
|
|
"valid_targets_mean": 3911.6,
|
|
"valid_targets_min": 1159
|
|
},
|
|
{
|
|
"epoch": 5.537720706260032,
|
|
"grad_norm": 0.7780249786879712,
|
|
"learning_rate": 5.0985939781814765e-06,
|
|
"loss": 0.2552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29299432039260864,
|
|
"step": 3450,
|
|
"valid_targets_mean": 2987.5,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 5.545746388443018,
|
|
"grad_norm": 0.6693781469369839,
|
|
"learning_rate": 5.045313968965282e-06,
|
|
"loss": 0.2585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2667461633682251,
|
|
"step": 3455,
|
|
"valid_targets_mean": 3851.2,
|
|
"valid_targets_min": 1196
|
|
},
|
|
{
|
|
"epoch": 5.553772070626003,
|
|
"grad_norm": 0.6821690346020746,
|
|
"learning_rate": 4.992273599288546e-06,
|
|
"loss": 0.2451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22878427803516388,
|
|
"step": 3460,
|
|
"valid_targets_mean": 3344.8,
|
|
"valid_targets_min": 1155
|
|
},
|
|
{
|
|
"epoch": 5.561797752808989,
|
|
"grad_norm": 0.7715652626253425,
|
|
"learning_rate": 4.9394737190902e-06,
|
|
"loss": 0.2871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29785871505737305,
|
|
"step": 3465,
|
|
"valid_targets_mean": 2995.1,
|
|
"valid_targets_min": 389
|
|
},
|
|
{
|
|
"epoch": 5.569823434991974,
|
|
"grad_norm": 0.6309471033562661,
|
|
"learning_rate": 4.88691517445546e-06,
|
|
"loss": 0.2663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23600821197032928,
|
|
"step": 3470,
|
|
"valid_targets_mean": 3899.7,
|
|
"valid_targets_min": 1349
|
|
},
|
|
{
|
|
"epoch": 5.57784911717496,
|
|
"grad_norm": 0.7080806067596288,
|
|
"learning_rate": 4.834598807602317e-06,
|
|
"loss": 0.2853,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28819161653518677,
|
|
"step": 3475,
|
|
"valid_targets_mean": 3709.2,
|
|
"valid_targets_min": 1120
|
|
},
|
|
{
|
|
"epoch": 5.585874799357946,
|
|
"grad_norm": 0.6941238036682374,
|
|
"learning_rate": 4.78252545686799e-06,
|
|
"loss": 0.272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24807021021842957,
|
|
"step": 3480,
|
|
"valid_targets_mean": 3215.1,
|
|
"valid_targets_min": 1692
|
|
},
|
|
{
|
|
"epoch": 5.593900481540931,
|
|
"grad_norm": 0.6420402627619426,
|
|
"learning_rate": 4.730695956695532e-06,
|
|
"loss": 0.2668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2711610496044159,
|
|
"step": 3485,
|
|
"valid_targets_mean": 4498.0,
|
|
"valid_targets_min": 1623
|
|
},
|
|
{
|
|
"epoch": 5.601926163723917,
|
|
"grad_norm": 0.6499072659145726,
|
|
"learning_rate": 4.679111137620442e-06,
|
|
"loss": 0.2773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29854172468185425,
|
|
"step": 3490,
|
|
"valid_targets_mean": 3762.9,
|
|
"valid_targets_min": 991
|
|
},
|
|
{
|
|
"epoch": 5.609951845906902,
|
|
"grad_norm": 0.8758809795986889,
|
|
"learning_rate": 4.627771826257341e-06,
|
|
"loss": 0.2684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2851346731185913,
|
|
"step": 3495,
|
|
"valid_targets_mean": 3827.5,
|
|
"valid_targets_min": 939
|
|
},
|
|
{
|
|
"epoch": 5.617977528089888,
|
|
"grad_norm": 0.7255536688370324,
|
|
"learning_rate": 4.576678845286757e-06,
|
|
"loss": 0.2586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24996979534626007,
|
|
"step": 3500,
|
|
"valid_targets_mean": 3492.2,
|
|
"valid_targets_min": 1295
|
|
},
|
|
{
|
|
"epoch": 5.626003210272874,
|
|
"grad_norm": 0.6678297373489821,
|
|
"learning_rate": 4.525833013441931e-06,
|
|
"loss": 0.2742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2579232454299927,
|
|
"step": 3505,
|
|
"valid_targets_mean": 4029.7,
|
|
"valid_targets_min": 268
|
|
},
|
|
{
|
|
"epoch": 5.634028892455858,
|
|
"grad_norm": 0.7428852763091396,
|
|
"learning_rate": 4.475235145495669e-06,
|
|
"loss": 0.2684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29175353050231934,
|
|
"step": 3510,
|
|
"valid_targets_mean": 3082.1,
|
|
"valid_targets_min": 1258
|
|
},
|
|
{
|
|
"epoch": 5.642054574638844,
|
|
"grad_norm": 0.8068987863560063,
|
|
"learning_rate": 4.424886052247339e-06,
|
|
"loss": 0.2851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2745383381843567,
|
|
"step": 3515,
|
|
"valid_targets_mean": 2619.2,
|
|
"valid_targets_min": 1407
|
|
},
|
|
{
|
|
"epoch": 5.65008025682183,
|
|
"grad_norm": 0.6476617203171715,
|
|
"learning_rate": 4.374786540509821e-06,
|
|
"loss": 0.2993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25007113814353943,
|
|
"step": 3520,
|
|
"valid_targets_mean": 3936.6,
|
|
"valid_targets_min": 1042
|
|
},
|
|
{
|
|
"epoch": 5.658105939004815,
|
|
"grad_norm": 0.7154099366086117,
|
|
"learning_rate": 4.324937413096628e-06,
|
|
"loss": 0.2814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28672540187835693,
|
|
"step": 3525,
|
|
"valid_targets_mean": 4098.1,
|
|
"valid_targets_min": 1082
|
|
},
|
|
{
|
|
"epoch": 5.666131621187801,
|
|
"grad_norm": 0.6746454484203894,
|
|
"learning_rate": 4.275339468809019e-06,
|
|
"loss": 0.2669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2559202015399933,
|
|
"step": 3530,
|
|
"valid_targets_mean": 3849.5,
|
|
"valid_targets_min": 1713
|
|
},
|
|
{
|
|
"epoch": 5.674157303370786,
|
|
"grad_norm": 0.6064227015443127,
|
|
"learning_rate": 4.225993502423182e-06,
|
|
"loss": 0.2509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2597361207008362,
|
|
"step": 3535,
|
|
"valid_targets_mean": 4703.6,
|
|
"valid_targets_min": 1283
|
|
},
|
|
{
|
|
"epoch": 5.682182985553772,
|
|
"grad_norm": 0.6189592793863808,
|
|
"learning_rate": 4.1769003046775315e-06,
|
|
"loss": 0.2791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24650642275810242,
|
|
"step": 3540,
|
|
"valid_targets_mean": 4220.0,
|
|
"valid_targets_min": 839
|
|
},
|
|
{
|
|
"epoch": 5.690208667736758,
|
|
"grad_norm": 0.7960768691143333,
|
|
"learning_rate": 4.128060662260025e-06,
|
|
"loss": 0.2698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2830795645713806,
|
|
"step": 3545,
|
|
"valid_targets_mean": 3400.8,
|
|
"valid_targets_min": 1411
|
|
},
|
|
{
|
|
"epoch": 5.698234349919743,
|
|
"grad_norm": 0.6790132294866114,
|
|
"learning_rate": 4.079475357795535e-06,
|
|
"loss": 0.2559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26166296005249023,
|
|
"step": 3550,
|
|
"valid_targets_mean": 3994.7,
|
|
"valid_targets_min": 1492
|
|
},
|
|
{
|
|
"epoch": 5.706260032102729,
|
|
"grad_norm": 0.7565417428594704,
|
|
"learning_rate": 4.031145169833342e-06,
|
|
"loss": 0.2795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28407415747642517,
|
|
"step": 3555,
|
|
"valid_targets_mean": 3173.0,
|
|
"valid_targets_min": 1120
|
|
},
|
|
{
|
|
"epoch": 5.714285714285714,
|
|
"grad_norm": 0.8628548769265821,
|
|
"learning_rate": 3.9830708728346445e-06,
|
|
"loss": 0.2747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.328813374042511,
|
|
"step": 3560,
|
|
"valid_targets_mean": 2677.8,
|
|
"valid_targets_min": 1148
|
|
},
|
|
{
|
|
"epoch": 5.7223113964687,
|
|
"grad_norm": 0.66142096480927,
|
|
"learning_rate": 3.935253237160132e-06,
|
|
"loss": 0.2716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30078476667404175,
|
|
"step": 3565,
|
|
"valid_targets_mean": 4044.1,
|
|
"valid_targets_min": 889
|
|
},
|
|
{
|
|
"epoch": 5.730337078651686,
|
|
"grad_norm": 0.6959435220318307,
|
|
"learning_rate": 3.887693029057675e-06,
|
|
"loss": 0.2764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2812281847000122,
|
|
"step": 3570,
|
|
"valid_targets_mean": 3466.2,
|
|
"valid_targets_min": 1581
|
|
},
|
|
{
|
|
"epoch": 5.738362760834671,
|
|
"grad_norm": 0.7296943656318424,
|
|
"learning_rate": 3.8403910106500104e-06,
|
|
"loss": 0.2673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2752944231033325,
|
|
"step": 3575,
|
|
"valid_targets_mean": 3409.1,
|
|
"valid_targets_min": 691
|
|
},
|
|
{
|
|
"epoch": 5.746388443017657,
|
|
"grad_norm": 0.6418408238723913,
|
|
"learning_rate": 3.7933479399225604e-06,
|
|
"loss": 0.2547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26858317852020264,
|
|
"step": 3580,
|
|
"valid_targets_mean": 3835.9,
|
|
"valid_targets_min": 1304
|
|
},
|
|
{
|
|
"epoch": 5.754414125200642,
|
|
"grad_norm": 0.6786406299363769,
|
|
"learning_rate": 3.746564570711266e-06,
|
|
"loss": 0.2549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22533011436462402,
|
|
"step": 3585,
|
|
"valid_targets_mean": 3330.8,
|
|
"valid_targets_min": 1116
|
|
},
|
|
{
|
|
"epoch": 5.762439807383627,
|
|
"grad_norm": 0.679761163484394,
|
|
"learning_rate": 3.7000416526905026e-06,
|
|
"loss": 0.257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23244601488113403,
|
|
"step": 3590,
|
|
"valid_targets_mean": 3195.7,
|
|
"valid_targets_min": 1050
|
|
},
|
|
{
|
|
"epoch": 5.770465489566613,
|
|
"grad_norm": 0.6593264777202515,
|
|
"learning_rate": 3.6537799313610946e-06,
|
|
"loss": 0.276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28260430693626404,
|
|
"step": 3595,
|
|
"valid_targets_mean": 3717.1,
|
|
"valid_targets_min": 1219
|
|
},
|
|
{
|
|
"epoch": 5.778491171749598,
|
|
"grad_norm": 0.6516651091372667,
|
|
"learning_rate": 3.607780148038347e-06,
|
|
"loss": 0.2626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.265555739402771,
|
|
"step": 3600,
|
|
"valid_targets_mean": 3660.4,
|
|
"valid_targets_min": 1342
|
|
},
|
|
{
|
|
"epoch": 5.786516853932584,
|
|
"grad_norm": 0.6346382024013276,
|
|
"learning_rate": 3.5620430398401596e-06,
|
|
"loss": 0.2937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31354761123657227,
|
|
"step": 3605,
|
|
"valid_targets_mean": 3970.5,
|
|
"valid_targets_min": 1017
|
|
},
|
|
{
|
|
"epoch": 5.79454253611557,
|
|
"grad_norm": 0.7516302706249705,
|
|
"learning_rate": 3.5165693396752443e-06,
|
|
"loss": 0.2762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29920682311058044,
|
|
"step": 3610,
|
|
"valid_targets_mean": 3449.4,
|
|
"valid_targets_min": 1495
|
|
},
|
|
{
|
|
"epoch": 5.802568218298555,
|
|
"grad_norm": 0.6600224715003938,
|
|
"learning_rate": 3.471359776231347e-06,
|
|
"loss": 0.2668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23621638119220734,
|
|
"step": 3615,
|
|
"valid_targets_mean": 3702.8,
|
|
"valid_targets_min": 1678
|
|
},
|
|
{
|
|
"epoch": 5.810593900481541,
|
|
"grad_norm": 0.7358090427580186,
|
|
"learning_rate": 3.426415073963594e-06,
|
|
"loss": 0.2613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26070553064346313,
|
|
"step": 3620,
|
|
"valid_targets_mean": 3261.7,
|
|
"valid_targets_min": 987
|
|
},
|
|
{
|
|
"epoch": 5.818619582664526,
|
|
"grad_norm": 0.7790272943001081,
|
|
"learning_rate": 3.3817359530828873e-06,
|
|
"loss": 0.2729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25227174162864685,
|
|
"step": 3625,
|
|
"valid_targets_mean": 3139.7,
|
|
"valid_targets_min": 1257
|
|
},
|
|
{
|
|
"epoch": 5.826645264847512,
|
|
"grad_norm": 0.7240165506558754,
|
|
"learning_rate": 3.3373231295443277e-06,
|
|
"loss": 0.2791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30265337228775024,
|
|
"step": 3630,
|
|
"valid_targets_mean": 3238.6,
|
|
"valid_targets_min": 1124
|
|
},
|
|
{
|
|
"epoch": 5.834670947030498,
|
|
"grad_norm": 0.6469651432991523,
|
|
"learning_rate": 3.2931773150357893e-06,
|
|
"loss": 0.2831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32214218378067017,
|
|
"step": 3635,
|
|
"valid_targets_mean": 4129.1,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 5.842696629213483,
|
|
"grad_norm": 0.7628021603456647,
|
|
"learning_rate": 3.2492992169664837e-06,
|
|
"loss": 0.2648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2638700008392334,
|
|
"step": 3640,
|
|
"valid_targets_mean": 3118.8,
|
|
"valid_targets_min": 795
|
|
},
|
|
{
|
|
"epoch": 5.850722311396469,
|
|
"grad_norm": 0.6148833480819821,
|
|
"learning_rate": 3.2056895384556275e-06,
|
|
"loss": 0.2673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24907363951206207,
|
|
"step": 3645,
|
|
"valid_targets_mean": 4532.2,
|
|
"valid_targets_min": 1253
|
|
},
|
|
{
|
|
"epoch": 5.858747993579454,
|
|
"grad_norm": 0.761972727424089,
|
|
"learning_rate": 3.1623489783211904e-06,
|
|
"loss": 0.2589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24171032011508942,
|
|
"step": 3650,
|
|
"valid_targets_mean": 2953.6,
|
|
"valid_targets_min": 1176
|
|
},
|
|
{
|
|
"epoch": 5.86677367576244,
|
|
"grad_norm": 0.7522172802951316,
|
|
"learning_rate": 3.1192782310686874e-06,
|
|
"loss": 0.2676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26823219656944275,
|
|
"step": 3655,
|
|
"valid_targets_mean": 3377.9,
|
|
"valid_targets_min": 933
|
|
},
|
|
{
|
|
"epoch": 5.874799357945426,
|
|
"grad_norm": 0.6617020531618129,
|
|
"learning_rate": 3.0764779868800398e-06,
|
|
"loss": 0.2877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27875378727912903,
|
|
"step": 3660,
|
|
"valid_targets_mean": 3896.2,
|
|
"valid_targets_min": 1066
|
|
},
|
|
{
|
|
"epoch": 5.882825040128411,
|
|
"grad_norm": 0.7101118434694875,
|
|
"learning_rate": 3.0339489316025394e-06,
|
|
"loss": 0.2795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27330130338668823,
|
|
"step": 3665,
|
|
"valid_targets_mean": 3561.6,
|
|
"valid_targets_min": 1507
|
|
},
|
|
{
|
|
"epoch": 5.890850722311397,
|
|
"grad_norm": 0.6450870863466617,
|
|
"learning_rate": 2.991691746737828e-06,
|
|
"loss": 0.2787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2615209221839905,
|
|
"step": 3670,
|
|
"valid_targets_mean": 4304.2,
|
|
"valid_targets_min": 1255
|
|
},
|
|
{
|
|
"epoch": 5.898876404494382,
|
|
"grad_norm": 0.6106099879280161,
|
|
"learning_rate": 2.949707109431006e-06,
|
|
"loss": 0.2659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23242050409317017,
|
|
"step": 3675,
|
|
"valid_targets_mean": 4171.4,
|
|
"valid_targets_min": 1425
|
|
},
|
|
{
|
|
"epoch": 5.906902086677368,
|
|
"grad_norm": 0.7091286633875294,
|
|
"learning_rate": 2.907995692459773e-06,
|
|
"loss": 0.2874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3001560568809509,
|
|
"step": 3680,
|
|
"valid_targets_mean": 3491.3,
|
|
"valid_targets_min": 1173
|
|
},
|
|
{
|
|
"epoch": 5.914927768860353,
|
|
"grad_norm": 0.5996610497508525,
|
|
"learning_rate": 2.8665581642236227e-06,
|
|
"loss": 0.2526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2610725462436676,
|
|
"step": 3685,
|
|
"valid_targets_mean": 4341.4,
|
|
"valid_targets_min": 1758
|
|
},
|
|
{
|
|
"epoch": 5.922953451043338,
|
|
"grad_norm": 0.6674547183322811,
|
|
"learning_rate": 2.825395188733169e-06,
|
|
"loss": 0.2729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.293914258480072,
|
|
"step": 3690,
|
|
"valid_targets_mean": 3962.7,
|
|
"valid_targets_min": 1463
|
|
},
|
|
{
|
|
"epoch": 5.930979133226324,
|
|
"grad_norm": 0.7290527327593421,
|
|
"learning_rate": 2.784507425599492e-06,
|
|
"loss": 0.2536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.268707275390625,
|
|
"step": 3695,
|
|
"valid_targets_mean": 3365.1,
|
|
"valid_targets_min": 871
|
|
},
|
|
{
|
|
"epoch": 5.93900481540931,
|
|
"grad_norm": 0.5989130202594268,
|
|
"learning_rate": 2.743895530023544e-06,
|
|
"loss": 0.273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2561895251274109,
|
|
"step": 3700,
|
|
"valid_targets_mean": 4537.6,
|
|
"valid_targets_min": 1543
|
|
},
|
|
{
|
|
"epoch": 5.947030497592295,
|
|
"grad_norm": 0.6225243807868891,
|
|
"learning_rate": 2.7035601527856914e-06,
|
|
"loss": 0.2671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2956143617630005,
|
|
"step": 3705,
|
|
"valid_targets_mean": 4456.6,
|
|
"valid_targets_min": 970
|
|
},
|
|
{
|
|
"epoch": 5.955056179775281,
|
|
"grad_norm": 0.6177809570579359,
|
|
"learning_rate": 2.6635019402352645e-06,
|
|
"loss": 0.2578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24951475858688354,
|
|
"step": 3710,
|
|
"valid_targets_mean": 4016.3,
|
|
"valid_targets_min": 1073
|
|
},
|
|
{
|
|
"epoch": 5.963081861958266,
|
|
"grad_norm": 0.7003290481241636,
|
|
"learning_rate": 2.6237215342801857e-06,
|
|
"loss": 0.2684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.264138400554657,
|
|
"step": 3715,
|
|
"valid_targets_mean": 3455.7,
|
|
"valid_targets_min": 1635
|
|
},
|
|
{
|
|
"epoch": 5.971107544141252,
|
|
"grad_norm": 0.6681624013736531,
|
|
"learning_rate": 2.5842195723767205e-06,
|
|
"loss": 0.2627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26048558950424194,
|
|
"step": 3720,
|
|
"valid_targets_mean": 3697.0,
|
|
"valid_targets_min": 1062
|
|
},
|
|
{
|
|
"epoch": 5.979133226324238,
|
|
"grad_norm": 0.6770506025292115,
|
|
"learning_rate": 2.544996687519219e-06,
|
|
"loss": 0.2716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2558070719242096,
|
|
"step": 3725,
|
|
"valid_targets_mean": 4088.9,
|
|
"valid_targets_min": 1198
|
|
},
|
|
{
|
|
"epoch": 5.987158908507223,
|
|
"grad_norm": 0.65494747170184,
|
|
"learning_rate": 2.5060535082300266e-06,
|
|
"loss": 0.2696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2641768455505371,
|
|
"step": 3730,
|
|
"valid_targets_mean": 4304.0,
|
|
"valid_targets_min": 1185
|
|
},
|
|
{
|
|
"epoch": 5.995184590690209,
|
|
"grad_norm": 0.6580663741178295,
|
|
"learning_rate": 2.467390658549349e-06,
|
|
"loss": 0.2709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25386250019073486,
|
|
"step": 3735,
|
|
"valid_targets_mean": 3931.6,
|
|
"valid_targets_min": 1251
|
|
},
|
|
{
|
|
"epoch": 6.003210272873194,
|
|
"grad_norm": 0.6609129312105617,
|
|
"learning_rate": 2.429008758025302e-06,
|
|
"loss": 0.2556,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.240494504570961,
|
|
"step": 3740,
|
|
"valid_targets_mean": 3519.3,
|
|
"valid_targets_min": 1992
|
|
},
|
|
{
|
|
"epoch": 6.01123595505618,
|
|
"grad_norm": 0.6043625332630266,
|
|
"learning_rate": 2.3909084217039634e-06,
|
|
"loss": 0.261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2506016492843628,
|
|
"step": 3745,
|
|
"valid_targets_mean": 4015.0,
|
|
"valid_targets_min": 1222
|
|
},
|
|
{
|
|
"epoch": 6.019261637239166,
|
|
"grad_norm": 0.5950220221820791,
|
|
"learning_rate": 2.353090260119515e-06,
|
|
"loss": 0.2725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2733362913131714,
|
|
"step": 3750,
|
|
"valid_targets_mean": 4690.9,
|
|
"valid_targets_min": 1116
|
|
},
|
|
{
|
|
"epoch": 6.027287319422151,
|
|
"grad_norm": 0.641564879472514,
|
|
"learning_rate": 2.3155548792844674e-06,
|
|
"loss": 0.2713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2534390091896057,
|
|
"step": 3755,
|
|
"valid_targets_mean": 4232.5,
|
|
"valid_targets_min": 1408
|
|
},
|
|
{
|
|
"epoch": 6.035313001605137,
|
|
"grad_norm": 0.635615739470399,
|
|
"learning_rate": 2.27830288067995e-06,
|
|
"loss": 0.2663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25196021795272827,
|
|
"step": 3760,
|
|
"valid_targets_mean": 4085.7,
|
|
"valid_targets_min": 1458
|
|
},
|
|
{
|
|
"epoch": 6.043338683788122,
|
|
"grad_norm": 0.6293298206499798,
|
|
"learning_rate": 2.241334861246058e-06,
|
|
"loss": 0.2655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31275883316993713,
|
|
"step": 3765,
|
|
"valid_targets_mean": 4687.8,
|
|
"valid_targets_min": 1539
|
|
},
|
|
{
|
|
"epoch": 6.051364365971107,
|
|
"grad_norm": 0.5517631931946747,
|
|
"learning_rate": 2.2046514133723075e-06,
|
|
"loss": 0.2458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21695272624492645,
|
|
"step": 3770,
|
|
"valid_targets_mean": 5017.5,
|
|
"valid_targets_min": 1265
|
|
},
|
|
{
|
|
"epoch": 6.059390048154093,
|
|
"grad_norm": 0.7825812950414968,
|
|
"learning_rate": 2.1682531248881266e-06,
|
|
"loss": 0.2729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2954491972923279,
|
|
"step": 3775,
|
|
"valid_targets_mean": 3071.6,
|
|
"valid_targets_min": 1101
|
|
},
|
|
{
|
|
"epoch": 6.067415730337078,
|
|
"grad_norm": 0.6522195845191905,
|
|
"learning_rate": 2.1321405790534412e-06,
|
|
"loss": 0.2606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25011345744132996,
|
|
"step": 3780,
|
|
"valid_targets_mean": 4214.2,
|
|
"valid_targets_min": 1376
|
|
},
|
|
{
|
|
"epoch": 6.075441412520064,
|
|
"grad_norm": 0.6684965773301453,
|
|
"learning_rate": 2.096314354549334e-06,
|
|
"loss": 0.257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2573276162147522,
|
|
"step": 3785,
|
|
"valid_targets_mean": 4025.8,
|
|
"valid_targets_min": 1377
|
|
},
|
|
{
|
|
"epoch": 6.08346709470305,
|
|
"grad_norm": 0.6843181435064984,
|
|
"learning_rate": 2.0607750254687554e-06,
|
|
"loss": 0.2576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24609273672103882,
|
|
"step": 3790,
|
|
"valid_targets_mean": 3715.8,
|
|
"valid_targets_min": 1027
|
|
},
|
|
{
|
|
"epoch": 6.091492776886035,
|
|
"grad_norm": 0.6244190710579253,
|
|
"learning_rate": 2.025523161307348e-06,
|
|
"loss": 0.2715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23711435496807098,
|
|
"step": 3795,
|
|
"valid_targets_mean": 4353.8,
|
|
"valid_targets_min": 1169
|
|
},
|
|
{
|
|
"epoch": 6.099518459069021,
|
|
"grad_norm": 0.6767540386031448,
|
|
"learning_rate": 1.990559326954307e-06,
|
|
"loss": 0.2779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26229235529899597,
|
|
"step": 3800,
|
|
"valid_targets_mean": 3611.9,
|
|
"valid_targets_min": 1177
|
|
},
|
|
{
|
|
"epoch": 6.107544141252006,
|
|
"grad_norm": 0.6967622626787459,
|
|
"learning_rate": 1.955884082683317e-06,
|
|
"loss": 0.2642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2815011441707611,
|
|
"step": 3805,
|
|
"valid_targets_mean": 4021.6,
|
|
"valid_targets_min": 1286
|
|
},
|
|
{
|
|
"epoch": 6.115569823434992,
|
|
"grad_norm": 0.7635767856160525,
|
|
"learning_rate": 1.9214979841435967e-06,
|
|
"loss": 0.2857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2640751600265503,
|
|
"step": 3810,
|
|
"valid_targets_mean": 3299.8,
|
|
"valid_targets_min": 1370
|
|
},
|
|
{
|
|
"epoch": 6.123595505617978,
|
|
"grad_norm": 0.7643734195535864,
|
|
"learning_rate": 1.8874015823509873e-06,
|
|
"loss": 0.2687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26773497462272644,
|
|
"step": 3815,
|
|
"valid_targets_mean": 3123.9,
|
|
"valid_targets_min": 1440
|
|
},
|
|
{
|
|
"epoch": 6.131621187800963,
|
|
"grad_norm": 0.7638564305291277,
|
|
"learning_rate": 1.8535954236791044e-06,
|
|
"loss": 0.2681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2928069233894348,
|
|
"step": 3820,
|
|
"valid_targets_mean": 3132.9,
|
|
"valid_targets_min": 1215
|
|
},
|
|
{
|
|
"epoch": 6.139646869983949,
|
|
"grad_norm": 0.7309852068289207,
|
|
"learning_rate": 1.8200800498506166e-06,
|
|
"loss": 0.2559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25853630900382996,
|
|
"step": 3825,
|
|
"valid_targets_mean": 3305.2,
|
|
"valid_targets_min": 1157
|
|
},
|
|
{
|
|
"epoch": 6.147672552166934,
|
|
"grad_norm": 1.0379556227321034,
|
|
"learning_rate": 1.7868559979285293e-06,
|
|
"loss": 0.2699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24523857235908508,
|
|
"step": 3830,
|
|
"valid_targets_mean": 4801.6,
|
|
"valid_targets_min": 1208
|
|
},
|
|
{
|
|
"epoch": 6.15569823434992,
|
|
"grad_norm": 0.7697784910515804,
|
|
"learning_rate": 1.7539238003076087e-06,
|
|
"loss": 0.2757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2687429189682007,
|
|
"step": 3835,
|
|
"valid_targets_mean": 3305.1,
|
|
"valid_targets_min": 1310
|
|
},
|
|
{
|
|
"epoch": 6.163723916532906,
|
|
"grad_norm": 0.7901596755771663,
|
|
"learning_rate": 1.721283984705835e-06,
|
|
"loss": 0.2569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26297926902770996,
|
|
"step": 3840,
|
|
"valid_targets_mean": 3141.2,
|
|
"valid_targets_min": 1273
|
|
},
|
|
{
|
|
"epoch": 6.171749598715891,
|
|
"grad_norm": 0.6693595725707855,
|
|
"learning_rate": 1.6889370741559407e-06,
|
|
"loss": 0.2589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2816668152809143,
|
|
"step": 3845,
|
|
"valid_targets_mean": 4197.4,
|
|
"valid_targets_min": 1535
|
|
},
|
|
{
|
|
"epoch": 6.179775280898877,
|
|
"grad_norm": 0.6095767577749887,
|
|
"learning_rate": 1.6568835869970445e-06,
|
|
"loss": 0.2484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25367146730422974,
|
|
"step": 3850,
|
|
"valid_targets_mean": 4505.4,
|
|
"valid_targets_min": 1323
|
|
},
|
|
{
|
|
"epoch": 6.187800963081862,
|
|
"grad_norm": 0.6159057363579331,
|
|
"learning_rate": 1.625124036866339e-06,
|
|
"loss": 0.2475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26072919368743896,
|
|
"step": 3855,
|
|
"valid_targets_mean": 4523.3,
|
|
"valid_targets_min": 1082
|
|
},
|
|
{
|
|
"epoch": 6.195826645264847,
|
|
"grad_norm": 0.6408480069865946,
|
|
"learning_rate": 1.5936589326908513e-06,
|
|
"loss": 0.263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24124114215373993,
|
|
"step": 3860,
|
|
"valid_targets_mean": 4169.6,
|
|
"valid_targets_min": 1493
|
|
},
|
|
{
|
|
"epoch": 6.203852327447833,
|
|
"grad_norm": 0.7061166318044098,
|
|
"learning_rate": 1.5624887786793008e-06,
|
|
"loss": 0.2559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24038687348365784,
|
|
"step": 3865,
|
|
"valid_targets_mean": 3499.5,
|
|
"valid_targets_min": 1371
|
|
},
|
|
{
|
|
"epoch": 6.211878009630818,
|
|
"grad_norm": 0.7681743030242524,
|
|
"learning_rate": 1.531614074314014e-06,
|
|
"loss": 0.257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2935416102409363,
|
|
"step": 3870,
|
|
"valid_targets_mean": 3272.6,
|
|
"valid_targets_min": 1531
|
|
},
|
|
{
|
|
"epoch": 6.219903691813804,
|
|
"grad_norm": 0.6617974211034061,
|
|
"learning_rate": 1.501035314342918e-06,
|
|
"loss": 0.2635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22823978960514069,
|
|
"step": 3875,
|
|
"valid_targets_mean": 4078.3,
|
|
"valid_targets_min": 1570
|
|
},
|
|
{
|
|
"epoch": 6.22792937399679,
|
|
"grad_norm": 0.7423943281446085,
|
|
"learning_rate": 1.4707529887716177e-06,
|
|
"loss": 0.2746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26716870069503784,
|
|
"step": 3880,
|
|
"valid_targets_mean": 3395.4,
|
|
"valid_targets_min": 1143
|
|
},
|
|
{
|
|
"epoch": 6.235955056179775,
|
|
"grad_norm": 0.6557083616318948,
|
|
"learning_rate": 1.4407675828555378e-06,
|
|
"loss": 0.2416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23446784913539886,
|
|
"step": 3885,
|
|
"valid_targets_mean": 4327.8,
|
|
"valid_targets_min": 1505
|
|
},
|
|
{
|
|
"epoch": 6.243980738362761,
|
|
"grad_norm": 0.6665031355083085,
|
|
"learning_rate": 1.4110795770921536e-06,
|
|
"loss": 0.2558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24336105585098267,
|
|
"step": 3890,
|
|
"valid_targets_mean": 3787.2,
|
|
"valid_targets_min": 1721
|
|
},
|
|
{
|
|
"epoch": 6.252006420545746,
|
|
"grad_norm": 0.8288846905864304,
|
|
"learning_rate": 1.3816894472132903e-06,
|
|
"loss": 0.2889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.311612606048584,
|
|
"step": 3895,
|
|
"valid_targets_mean": 3689.6,
|
|
"valid_targets_min": 389
|
|
},
|
|
{
|
|
"epoch": 6.260032102728732,
|
|
"grad_norm": 0.6191849375535133,
|
|
"learning_rate": 1.3525976641774862e-06,
|
|
"loss": 0.2657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26539063453674316,
|
|
"step": 3900,
|
|
"valid_targets_mean": 4864.2,
|
|
"valid_targets_min": 1058
|
|
},
|
|
{
|
|
"epoch": 6.268057784911718,
|
|
"grad_norm": 0.6067706010888021,
|
|
"learning_rate": 1.3238046941624694e-06,
|
|
"loss": 0.2578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2506489157676697,
|
|
"step": 3905,
|
|
"valid_targets_mean": 4544.1,
|
|
"valid_targets_min": 1556
|
|
},
|
|
{
|
|
"epoch": 6.276083467094703,
|
|
"grad_norm": 0.6814771720932544,
|
|
"learning_rate": 1.295310998557673e-06,
|
|
"loss": 0.2496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27432727813720703,
|
|
"step": 3910,
|
|
"valid_targets_mean": 4610.1,
|
|
"valid_targets_min": 1168
|
|
},
|
|
{
|
|
"epoch": 6.284109149277689,
|
|
"grad_norm": 0.6089953067982075,
|
|
"learning_rate": 1.2671170339568372e-06,
|
|
"loss": 0.2636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27631884813308716,
|
|
"step": 3915,
|
|
"valid_targets_mean": 4669.3,
|
|
"valid_targets_min": 1195
|
|
},
|
|
{
|
|
"epoch": 6.292134831460674,
|
|
"grad_norm": 0.8602830266529754,
|
|
"learning_rate": 1.239223252150703e-06,
|
|
"loss": 0.2457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2694093585014343,
|
|
"step": 3920,
|
|
"valid_targets_mean": 3236.1,
|
|
"valid_targets_min": 559
|
|
},
|
|
{
|
|
"epoch": 6.30016051364366,
|
|
"grad_norm": 0.6033709534152092,
|
|
"learning_rate": 1.211630100119776e-06,
|
|
"loss": 0.2569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24608641862869263,
|
|
"step": 3925,
|
|
"valid_targets_mean": 5077.7,
|
|
"valid_targets_min": 2576
|
|
},
|
|
{
|
|
"epoch": 6.308186195826646,
|
|
"grad_norm": 0.6648393163640094,
|
|
"learning_rate": 1.1843380200271426e-06,
|
|
"loss": 0.2625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29300227761268616,
|
|
"step": 3930,
|
|
"valid_targets_mean": 3978.7,
|
|
"valid_targets_min": 1188
|
|
},
|
|
{
|
|
"epoch": 6.31621187800963,
|
|
"grad_norm": 0.7874700220841598,
|
|
"learning_rate": 1.1573474492114122e-06,
|
|
"loss": 0.2677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.286835253238678,
|
|
"step": 3935,
|
|
"valid_targets_mean": 3434.0,
|
|
"valid_targets_min": 1550
|
|
},
|
|
{
|
|
"epoch": 6.324237560192616,
|
|
"grad_norm": 0.8112919746025927,
|
|
"learning_rate": 1.1306588201796863e-06,
|
|
"loss": 0.2737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2816393971443176,
|
|
"step": 3940,
|
|
"valid_targets_mean": 2846.8,
|
|
"valid_targets_min": 1115
|
|
},
|
|
{
|
|
"epoch": 6.332263242375602,
|
|
"grad_norm": 0.6249539476748315,
|
|
"learning_rate": 1.1042725606006388e-06,
|
|
"loss": 0.2568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25709593296051025,
|
|
"step": 3945,
|
|
"valid_targets_mean": 4450.9,
|
|
"valid_targets_min": 1133
|
|
},
|
|
{
|
|
"epoch": 6.340288924558587,
|
|
"grad_norm": 0.7092838822345757,
|
|
"learning_rate": 1.078189093297668e-06,
|
|
"loss": 0.2693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2616301476955414,
|
|
"step": 3950,
|
|
"valid_targets_mean": 3603.2,
|
|
"valid_targets_min": 1701
|
|
},
|
|
{
|
|
"epoch": 6.348314606741573,
|
|
"grad_norm": 0.7800403012302811,
|
|
"learning_rate": 1.0524088362421048e-06,
|
|
"loss": 0.2506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25242942571640015,
|
|
"step": 3955,
|
|
"valid_targets_mean": 2897.1,
|
|
"valid_targets_min": 1403
|
|
},
|
|
{
|
|
"epoch": 6.356340288924558,
|
|
"grad_norm": 0.6584729674688261,
|
|
"learning_rate": 1.0269322025465334e-06,
|
|
"loss": 0.2562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2677876949310303,
|
|
"step": 3960,
|
|
"valid_targets_mean": 4395.2,
|
|
"valid_targets_min": 1489
|
|
},
|
|
{
|
|
"epoch": 6.364365971107544,
|
|
"grad_norm": 0.6582060608620135,
|
|
"learning_rate": 1.0017596004581564e-06,
|
|
"loss": 0.255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23136237263679504,
|
|
"step": 3965,
|
|
"valid_targets_mean": 4148.5,
|
|
"valid_targets_min": 1072
|
|
},
|
|
{
|
|
"epoch": 6.37239165329053,
|
|
"grad_norm": 0.6977166086583847,
|
|
"learning_rate": 9.768914333522584e-07,
|
|
"loss": 0.2622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25464189052581787,
|
|
"step": 3970,
|
|
"valid_targets_mean": 3374.2,
|
|
"valid_targets_min": 1042
|
|
},
|
|
{
|
|
"epoch": 6.380417335473515,
|
|
"grad_norm": 0.6799990478070559,
|
|
"learning_rate": 9.5232809972575e-07,
|
|
"loss": 0.2493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2644619643688202,
|
|
"step": 3975,
|
|
"valid_targets_mean": 4050.6,
|
|
"valid_targets_min": 1124
|
|
},
|
|
{
|
|
"epoch": 6.388443017656501,
|
|
"grad_norm": 0.7140751362434293,
|
|
"learning_rate": 9.280699931907633e-07,
|
|
"loss": 0.2746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2497372031211853,
|
|
"step": 3980,
|
|
"valid_targets_mean": 3695.1,
|
|
"valid_targets_min": 1145
|
|
},
|
|
{
|
|
"epoch": 6.396468699839486,
|
|
"grad_norm": 0.7367103158715349,
|
|
"learning_rate": 9.04117502468369e-07,
|
|
"loss": 0.2503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2470214068889618,
|
|
"step": 3985,
|
|
"valid_targets_mean": 3423.9,
|
|
"valid_targets_min": 583
|
|
},
|
|
{
|
|
"epoch": 6.404494382022472,
|
|
"grad_norm": 0.7170355865504934,
|
|
"learning_rate": 8.804710113823311e-07,
|
|
"loss": 0.2484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2651573717594147,
|
|
"step": 3990,
|
|
"valid_targets_mean": 3731.3,
|
|
"valid_targets_min": 1424
|
|
},
|
|
{
|
|
"epoch": 6.412520064205458,
|
|
"grad_norm": 0.7606977154165133,
|
|
"learning_rate": 8.571308988529492e-07,
|
|
"loss": 0.2511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25213515758514404,
|
|
"step": 3995,
|
|
"valid_targets_mean": 3235.7,
|
|
"valid_targets_min": 1155
|
|
},
|
|
{
|
|
"epoch": 6.420545746388443,
|
|
"grad_norm": 0.7906278823558095,
|
|
"learning_rate": 8.3409753889101e-07,
|
|
"loss": 0.2625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2560886740684509,
|
|
"step": 4000,
|
|
"valid_targets_mean": 2735.9,
|
|
"valid_targets_min": 1444
|
|
},
|
|
{
|
|
"epoch": 6.428571428571429,
|
|
"grad_norm": 0.6859682357109737,
|
|
"learning_rate": 8.11371300591779e-07,
|
|
"loss": 0.2522,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22252848744392395,
|
|
"step": 4005,
|
|
"valid_targets_mean": 3720.5,
|
|
"valid_targets_min": 915
|
|
},
|
|
{
|
|
"epoch": 6.436597110754414,
|
|
"grad_norm": 0.7524648333805172,
|
|
"learning_rate": 7.889525481290805e-07,
|
|
"loss": 0.2582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26605695486068726,
|
|
"step": 4010,
|
|
"valid_targets_mean": 3399.4,
|
|
"valid_targets_min": 1148
|
|
},
|
|
{
|
|
"epoch": 6.4446227929374,
|
|
"grad_norm": 0.7281681154992267,
|
|
"learning_rate": 7.668416407494761e-07,
|
|
"loss": 0.2621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24970248341560364,
|
|
"step": 4015,
|
|
"valid_targets_mean": 3396.1,
|
|
"valid_targets_min": 1220
|
|
},
|
|
{
|
|
"epoch": 6.452648475120386,
|
|
"grad_norm": 0.6492470026185418,
|
|
"learning_rate": 7.450389327665042e-07,
|
|
"loss": 0.2647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24129517376422882,
|
|
"step": 4020,
|
|
"valid_targets_mean": 4176.8,
|
|
"valid_targets_min": 985
|
|
},
|
|
{
|
|
"epoch": 6.460674157303371,
|
|
"grad_norm": 0.6861006306441435,
|
|
"learning_rate": 7.235447735549895e-07,
|
|
"loss": 0.2526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2715156674385071,
|
|
"step": 4025,
|
|
"valid_targets_mean": 3901.2,
|
|
"valid_targets_min": 1152
|
|
},
|
|
{
|
|
"epoch": 6.468699839486356,
|
|
"grad_norm": 0.7147296461165827,
|
|
"learning_rate": 7.02359507545467e-07,
|
|
"loss": 0.2476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22897732257843018,
|
|
"step": 4030,
|
|
"valid_targets_mean": 2959.2,
|
|
"valid_targets_min": 1385
|
|
},
|
|
{
|
|
"epoch": 6.476725521669342,
|
|
"grad_norm": 0.7937051817551097,
|
|
"learning_rate": 6.814834742186361e-07,
|
|
"loss": 0.2598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2752697765827179,
|
|
"step": 4035,
|
|
"valid_targets_mean": 3066.4,
|
|
"valid_targets_min": 371
|
|
},
|
|
{
|
|
"epoch": 6.484751203852327,
|
|
"grad_norm": 0.7410144382798508,
|
|
"learning_rate": 6.60917008099946e-07,
|
|
"loss": 0.2619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23270457983016968,
|
|
"step": 4040,
|
|
"valid_targets_mean": 3141.5,
|
|
"valid_targets_min": 1358
|
|
},
|
|
{
|
|
"epoch": 6.492776886035313,
|
|
"grad_norm": 0.800086382192242,
|
|
"learning_rate": 6.406604387542259e-07,
|
|
"loss": 0.2746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2799673080444336,
|
|
"step": 4045,
|
|
"valid_targets_mean": 2901.5,
|
|
"valid_targets_min": 1358
|
|
},
|
|
{
|
|
"epoch": 6.500802568218298,
|
|
"grad_norm": 0.7348181040681707,
|
|
"learning_rate": 6.207140907803877e-07,
|
|
"loss": 0.2495,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24244631826877594,
|
|
"step": 4050,
|
|
"valid_targets_mean": 3028.5,
|
|
"valid_targets_min": 1261
|
|
},
|
|
{
|
|
"epoch": 6.508828250401284,
|
|
"grad_norm": 0.6664106189649117,
|
|
"learning_rate": 6.010782838062534e-07,
|
|
"loss": 0.2713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25469452142715454,
|
|
"step": 4055,
|
|
"valid_targets_mean": 3765.4,
|
|
"valid_targets_min": 922
|
|
},
|
|
{
|
|
"epoch": 6.51685393258427,
|
|
"grad_norm": 0.7685771980561349,
|
|
"learning_rate": 5.817533324834146e-07,
|
|
"loss": 0.2637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28033727407455444,
|
|
"step": 4060,
|
|
"valid_targets_mean": 3131.8,
|
|
"valid_targets_min": 843
|
|
},
|
|
{
|
|
"epoch": 6.524879614767255,
|
|
"grad_norm": 0.7210336167267086,
|
|
"learning_rate": 5.627395464821894e-07,
|
|
"loss": 0.2766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3079640567302704,
|
|
"step": 4065,
|
|
"valid_targets_mean": 4371.7,
|
|
"valid_targets_min": 1120
|
|
},
|
|
{
|
|
"epoch": 6.532905296950241,
|
|
"grad_norm": 0.7071104205816832,
|
|
"learning_rate": 5.440372304866692e-07,
|
|
"loss": 0.2675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25912243127822876,
|
|
"step": 4070,
|
|
"valid_targets_mean": 3736.2,
|
|
"valid_targets_min": 1339
|
|
},
|
|
{
|
|
"epoch": 6.540930979133226,
|
|
"grad_norm": 0.7443164203559729,
|
|
"learning_rate": 5.256466841898334e-07,
|
|
"loss": 0.2649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2510913908481598,
|
|
"step": 4075,
|
|
"valid_targets_mean": 3203.0,
|
|
"valid_targets_min": 1038
|
|
},
|
|
{
|
|
"epoch": 6.548956661316212,
|
|
"grad_norm": 0.6542707200421682,
|
|
"learning_rate": 5.075682022887441e-07,
|
|
"loss": 0.2686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26009780168533325,
|
|
"step": 4080,
|
|
"valid_targets_mean": 4122.8,
|
|
"valid_targets_min": 1185
|
|
},
|
|
{
|
|
"epoch": 6.556982343499198,
|
|
"grad_norm": 0.6902735382131953,
|
|
"learning_rate": 4.898020744798282e-07,
|
|
"loss": 0.2619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2277672290802002,
|
|
"step": 4085,
|
|
"valid_targets_mean": 3521.8,
|
|
"valid_targets_min": 1446
|
|
},
|
|
{
|
|
"epoch": 6.565008025682183,
|
|
"grad_norm": 0.7952907796319953,
|
|
"learning_rate": 4.7234858545422536e-07,
|
|
"loss": 0.2576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27155131101608276,
|
|
"step": 4090,
|
|
"valid_targets_mean": 3286.8,
|
|
"valid_targets_min": 1241
|
|
},
|
|
{
|
|
"epoch": 6.573033707865169,
|
|
"grad_norm": 0.7989789682861794,
|
|
"learning_rate": 4.552080148932425e-07,
|
|
"loss": 0.2768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2788954973220825,
|
|
"step": 4095,
|
|
"valid_targets_mean": 3179.0,
|
|
"valid_targets_min": 1370
|
|
},
|
|
{
|
|
"epoch": 6.581059390048154,
|
|
"grad_norm": 0.5939988936419627,
|
|
"learning_rate": 4.383806374638555e-07,
|
|
"loss": 0.2635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2657738924026489,
|
|
"step": 4100,
|
|
"valid_targets_mean": 5266.7,
|
|
"valid_targets_min": 1523
|
|
},
|
|
{
|
|
"epoch": 6.589085072231139,
|
|
"grad_norm": 0.6727300799134852,
|
|
"learning_rate": 4.2186672281432363e-07,
|
|
"loss": 0.2609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23324981331825256,
|
|
"step": 4105,
|
|
"valid_targets_mean": 3770.4,
|
|
"valid_targets_min": 287
|
|
},
|
|
{
|
|
"epoch": 6.597110754414125,
|
|
"grad_norm": 0.6734377745295584,
|
|
"learning_rate": 4.056665355698508e-07,
|
|
"loss": 0.2612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23244278132915497,
|
|
"step": 4110,
|
|
"valid_targets_mean": 3445.9,
|
|
"valid_targets_min": 1001
|
|
},
|
|
{
|
|
"epoch": 6.60513643659711,
|
|
"grad_norm": 0.7834105980185365,
|
|
"learning_rate": 3.89780335328358e-07,
|
|
"loss": 0.2802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27651554346084595,
|
|
"step": 4115,
|
|
"valid_targets_mean": 3127.2,
|
|
"valid_targets_min": 1276
|
|
},
|
|
{
|
|
"epoch": 6.613162118780096,
|
|
"grad_norm": 0.7333348309941607,
|
|
"learning_rate": 3.7420837665632205e-07,
|
|
"loss": 0.2651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24929103255271912,
|
|
"step": 4120,
|
|
"valid_targets_mean": 3165.4,
|
|
"valid_targets_min": 1109
|
|
},
|
|
{
|
|
"epoch": 6.621187800963082,
|
|
"grad_norm": 0.7104599464303136,
|
|
"learning_rate": 3.5895090908469433e-07,
|
|
"loss": 0.2536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24504637718200684,
|
|
"step": 4125,
|
|
"valid_targets_mean": 3472.9,
|
|
"valid_targets_min": 1488
|
|
},
|
|
{
|
|
"epoch": 6.629213483146067,
|
|
"grad_norm": 0.7313425842197019,
|
|
"learning_rate": 3.4400817710489754e-07,
|
|
"loss": 0.254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2676655650138855,
|
|
"step": 4130,
|
|
"valid_targets_mean": 3394.2,
|
|
"valid_targets_min": 1276
|
|
},
|
|
{
|
|
"epoch": 6.637239165329053,
|
|
"grad_norm": 0.8292888227000904,
|
|
"learning_rate": 3.293804201649131e-07,
|
|
"loss": 0.2668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29136985540390015,
|
|
"step": 4135,
|
|
"valid_targets_mean": 3664.8,
|
|
"valid_targets_min": 1175
|
|
},
|
|
{
|
|
"epoch": 6.645264847512038,
|
|
"grad_norm": 0.5859501361645664,
|
|
"learning_rate": 3.1506787266545104e-07,
|
|
"loss": 0.2575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24845753610134125,
|
|
"step": 4140,
|
|
"valid_targets_mean": 5134.4,
|
|
"valid_targets_min": 1182
|
|
},
|
|
{
|
|
"epoch": 6.653290529695024,
|
|
"grad_norm": 0.7281321667517009,
|
|
"learning_rate": 3.010707639561727e-07,
|
|
"loss": 0.2715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2665691673755646,
|
|
"step": 4145,
|
|
"valid_targets_mean": 3363.2,
|
|
"valid_targets_min": 1333
|
|
},
|
|
{
|
|
"epoch": 6.66131621187801,
|
|
"grad_norm": 0.7920868353941164,
|
|
"learning_rate": 2.873893183320431e-07,
|
|
"loss": 0.2598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2711898684501648,
|
|
"step": 4150,
|
|
"valid_targets_mean": 2907.1,
|
|
"valid_targets_min": 1018
|
|
},
|
|
{
|
|
"epoch": 6.669341894060995,
|
|
"grad_norm": 0.6561227126487458,
|
|
"learning_rate": 2.740237550297131e-07,
|
|
"loss": 0.2707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2264401614665985,
|
|
"step": 4155,
|
|
"valid_targets_mean": 4246.2,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 6.677367576243981,
|
|
"grad_norm": 0.7822630808636535,
|
|
"learning_rate": 2.609742882240207e-07,
|
|
"loss": 0.2635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2742968797683716,
|
|
"step": 4160,
|
|
"valid_targets_mean": 3012.2,
|
|
"valid_targets_min": 986
|
|
},
|
|
{
|
|
"epoch": 6.685393258426966,
|
|
"grad_norm": 0.6996173646361625,
|
|
"learning_rate": 2.482411270245533e-07,
|
|
"loss": 0.2485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23721328377723694,
|
|
"step": 4165,
|
|
"valid_targets_mean": 3408.8,
|
|
"valid_targets_min": 1075
|
|
},
|
|
{
|
|
"epoch": 6.693418940609952,
|
|
"grad_norm": 0.6688769831306771,
|
|
"learning_rate": 2.358244754722927e-07,
|
|
"loss": 0.2592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2499692291021347,
|
|
"step": 4170,
|
|
"valid_targets_mean": 3887.4,
|
|
"valid_targets_min": 1497
|
|
},
|
|
{
|
|
"epoch": 6.701444622792938,
|
|
"grad_norm": 0.6787400043092254,
|
|
"learning_rate": 2.2372453253635796e-07,
|
|
"loss": 0.2513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25045540928840637,
|
|
"step": 4175,
|
|
"valid_targets_mean": 3968.2,
|
|
"valid_targets_min": 1744
|
|
},
|
|
{
|
|
"epoch": 6.709470304975923,
|
|
"grad_norm": 0.6473638464909378,
|
|
"learning_rate": 2.1194149211080982e-07,
|
|
"loss": 0.2588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2810041308403015,
|
|
"step": 4180,
|
|
"valid_targets_mean": 3957.2,
|
|
"valid_targets_min": 1115
|
|
},
|
|
{
|
|
"epoch": 6.717495987158909,
|
|
"grad_norm": 0.6197896248687305,
|
|
"learning_rate": 2.004755430115335e-07,
|
|
"loss": 0.2644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25658589601516724,
|
|
"step": 4185,
|
|
"valid_targets_mean": 4486.0,
|
|
"valid_targets_min": 1636
|
|
},
|
|
{
|
|
"epoch": 6.725521669341894,
|
|
"grad_norm": 0.7883436500238206,
|
|
"learning_rate": 1.8932686897323417e-07,
|
|
"loss": 0.2444,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25040990114212036,
|
|
"step": 4190,
|
|
"valid_targets_mean": 3039.8,
|
|
"valid_targets_min": 1195
|
|
},
|
|
{
|
|
"epoch": 6.73354735152488,
|
|
"grad_norm": 0.895767599434398,
|
|
"learning_rate": 1.7849564864647506e-07,
|
|
"loss": 0.2649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2832660973072052,
|
|
"step": 4195,
|
|
"valid_targets_mean": 3377.8,
|
|
"valid_targets_min": 1116
|
|
},
|
|
{
|
|
"epoch": 6.741573033707866,
|
|
"grad_norm": 0.8047400855889523,
|
|
"learning_rate": 1.679820555948264e-07,
|
|
"loss": 0.271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25083500146865845,
|
|
"step": 4200,
|
|
"valid_targets_mean": 3126.9,
|
|
"valid_targets_min": 972
|
|
},
|
|
{
|
|
"epoch": 6.74959871589085,
|
|
"grad_norm": 0.6803760541463084,
|
|
"learning_rate": 1.5778625829207196e-07,
|
|
"loss": 0.2739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3004777431488037,
|
|
"step": 4205,
|
|
"valid_targets_mean": 4165.6,
|
|
"valid_targets_min": 253
|
|
},
|
|
{
|
|
"epoch": 6.757624398073836,
|
|
"grad_norm": 0.6321367609880298,
|
|
"learning_rate": 1.4790842011952023e-07,
|
|
"loss": 0.2538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2993676960468292,
|
|
"step": 4210,
|
|
"valid_targets_mean": 4401.8,
|
|
"valid_targets_min": 1407
|
|
},
|
|
{
|
|
"epoch": 6.765650080256822,
|
|
"grad_norm": 0.8119049931711297,
|
|
"learning_rate": 1.3834869936338424e-07,
|
|
"loss": 0.2673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28349822759628296,
|
|
"step": 4215,
|
|
"valid_targets_mean": 3300.0,
|
|
"valid_targets_min": 1545
|
|
},
|
|
{
|
|
"epoch": 6.773675762439807,
|
|
"grad_norm": 0.695882137327403,
|
|
"learning_rate": 1.2910724921224139e-07,
|
|
"loss": 0.2634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2799147367477417,
|
|
"step": 4220,
|
|
"valid_targets_mean": 5068.1,
|
|
"valid_targets_min": 1627
|
|
},
|
|
{
|
|
"epoch": 6.781701444622793,
|
|
"grad_norm": 0.7725028058938449,
|
|
"learning_rate": 1.2018421775457978e-07,
|
|
"loss": 0.2878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27902424335479736,
|
|
"step": 4225,
|
|
"valid_targets_mean": 3362.6,
|
|
"valid_targets_min": 1015
|
|
},
|
|
{
|
|
"epoch": 6.789727126805778,
|
|
"grad_norm": 0.6769490909910145,
|
|
"learning_rate": 1.115797479764269e-07,
|
|
"loss": 0.2528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25704652070999146,
|
|
"step": 4230,
|
|
"valid_targets_mean": 3682.8,
|
|
"valid_targets_min": 1141
|
|
},
|
|
{
|
|
"epoch": 6.797752808988764,
|
|
"grad_norm": 0.8594325620743727,
|
|
"learning_rate": 1.0329397775905802e-07,
|
|
"loss": 0.2597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3076463043689728,
|
|
"step": 4235,
|
|
"valid_targets_mean": 2420.0,
|
|
"valid_targets_min": 1070
|
|
},
|
|
{
|
|
"epoch": 6.80577849117175,
|
|
"grad_norm": 0.617003296257221,
|
|
"learning_rate": 9.532703987678692e-08,
|
|
"loss": 0.264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2473968118429184,
|
|
"step": 4240,
|
|
"valid_targets_mean": 4426.4,
|
|
"valid_targets_min": 1000
|
|
},
|
|
{
|
|
"epoch": 6.813804173354735,
|
|
"grad_norm": 0.5926557654034222,
|
|
"learning_rate": 8.767906199483422e-08,
|
|
"loss": 0.2572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2276577353477478,
|
|
"step": 4245,
|
|
"valid_targets_mean": 4371.1,
|
|
"valid_targets_min": 1358
|
|
},
|
|
{
|
|
"epoch": 6.821829855537721,
|
|
"grad_norm": 0.7135348353247951,
|
|
"learning_rate": 8.0350166667289e-08,
|
|
"loss": 0.2662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2855631411075592,
|
|
"step": 4250,
|
|
"valid_targets_mean": 3677.2,
|
|
"valid_targets_min": 1379
|
|
},
|
|
{
|
|
"epoch": 6.829855537720706,
|
|
"grad_norm": 0.831693402912546,
|
|
"learning_rate": 7.33404713351371e-08,
|
|
"loss": 0.2608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2552441358566284,
|
|
"step": 4255,
|
|
"valid_targets_mean": 2769.9,
|
|
"valid_targets_min": 1047
|
|
},
|
|
{
|
|
"epoch": 6.837881219903692,
|
|
"grad_norm": 0.7887719938281736,
|
|
"learning_rate": 6.665008832438923e-08,
|
|
"loss": 0.2531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2538256347179413,
|
|
"step": 4260,
|
|
"valid_targets_mean": 3059.6,
|
|
"valid_targets_min": 1526
|
|
},
|
|
{
|
|
"epoch": 6.845906902086678,
|
|
"grad_norm": 0.7254856581707254,
|
|
"learning_rate": 6.027912484426468e-08,
|
|
"loss": 0.2735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26410308480262756,
|
|
"step": 4265,
|
|
"valid_targets_mean": 4334.0,
|
|
"valid_targets_min": 1258
|
|
},
|
|
{
|
|
"epoch": 6.853932584269663,
|
|
"grad_norm": 0.7106278636161031,
|
|
"learning_rate": 5.4227682985494854e-08,
|
|
"loss": 0.2682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24151332676410675,
|
|
"step": 4270,
|
|
"valid_targets_mean": 3586.1,
|
|
"valid_targets_min": 783
|
|
},
|
|
{
|
|
"epoch": 6.861958266452649,
|
|
"grad_norm": 0.7479307274811522,
|
|
"learning_rate": 4.849585971866688e-08,
|
|
"loss": 0.2413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22860613465309143,
|
|
"step": 4275,
|
|
"valid_targets_mean": 2862.4,
|
|
"valid_targets_min": 1004
|
|
},
|
|
{
|
|
"epoch": 6.8699839486356336,
|
|
"grad_norm": 0.8744704395870363,
|
|
"learning_rate": 4.3083746892684796e-08,
|
|
"loss": 0.258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31074780225753784,
|
|
"step": 4280,
|
|
"valid_targets_mean": 2433.4,
|
|
"valid_targets_min": 968
|
|
},
|
|
{
|
|
"epoch": 6.878009630818619,
|
|
"grad_norm": 0.6710071373279751,
|
|
"learning_rate": 3.7991431233288525e-08,
|
|
"loss": 0.2555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2323283851146698,
|
|
"step": 4285,
|
|
"valid_targets_mean": 3863.1,
|
|
"valid_targets_min": 1259
|
|
},
|
|
{
|
|
"epoch": 6.886035313001605,
|
|
"grad_norm": 0.5698356361744851,
|
|
"learning_rate": 3.321899434166831e-08,
|
|
"loss": 0.2539,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23712842166423798,
|
|
"step": 4290,
|
|
"valid_targets_mean": 5432.4,
|
|
"valid_targets_min": 1656
|
|
},
|
|
{
|
|
"epoch": 6.89406099518459,
|
|
"grad_norm": 0.8722725025796716,
|
|
"learning_rate": 2.876651269315689e-08,
|
|
"loss": 0.2602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2614433169364929,
|
|
"step": 4295,
|
|
"valid_targets_mean": 3633.1,
|
|
"valid_targets_min": 938
|
|
},
|
|
{
|
|
"epoch": 6.902086677367576,
|
|
"grad_norm": 0.7439561148262642,
|
|
"learning_rate": 2.4634057636001573e-08,
|
|
"loss": 0.2597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.272623747587204,
|
|
"step": 4300,
|
|
"valid_targets_mean": 3313.8,
|
|
"valid_targets_min": 1455
|
|
},
|
|
{
|
|
"epoch": 6.910112359550562,
|
|
"grad_norm": 0.817167129068665,
|
|
"learning_rate": 2.082169539022294e-08,
|
|
"loss": 0.2586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23937168717384338,
|
|
"step": 4305,
|
|
"valid_targets_mean": 2467.1,
|
|
"valid_targets_min": 1276
|
|
},
|
|
{
|
|
"epoch": 6.918138041733547,
|
|
"grad_norm": 0.6572968011355121,
|
|
"learning_rate": 1.7329487046555683e-08,
|
|
"loss": 0.2442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2369089424610138,
|
|
"step": 4310,
|
|
"valid_targets_mean": 3833.9,
|
|
"valid_targets_min": 966
|
|
},
|
|
{
|
|
"epoch": 6.926163723916533,
|
|
"grad_norm": 1.6819116462806118,
|
|
"learning_rate": 1.4157488565464949e-08,
|
|
"loss": 0.2713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25402212142944336,
|
|
"step": 4315,
|
|
"valid_targets_mean": 4337.1,
|
|
"valid_targets_min": 1217
|
|
},
|
|
{
|
|
"epoch": 6.934189406099518,
|
|
"grad_norm": 0.8235897521978418,
|
|
"learning_rate": 1.1305750776253733e-08,
|
|
"loss": 0.2722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28470149636268616,
|
|
"step": 4320,
|
|
"valid_targets_mean": 2744.7,
|
|
"valid_targets_min": 1174
|
|
},
|
|
{
|
|
"epoch": 6.942215088282504,
|
|
"grad_norm": 0.7720421257367757,
|
|
"learning_rate": 8.774319376245733e-09,
|
|
"loss": 0.2648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3208145499229431,
|
|
"step": 4325,
|
|
"valid_targets_mean": 3309.5,
|
|
"valid_targets_min": 1116
|
|
},
|
|
{
|
|
"epoch": 6.95024077046549,
|
|
"grad_norm": 0.6362978433422675,
|
|
"learning_rate": 6.56323493005262e-09,
|
|
"loss": 0.2556,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2465464174747467,
|
|
"step": 4330,
|
|
"valid_targets_mean": 3921.2,
|
|
"valid_targets_min": 1040
|
|
},
|
|
{
|
|
"epoch": 6.958266452648475,
|
|
"grad_norm": 0.6268067753299079,
|
|
"learning_rate": 4.672532868927882e-09,
|
|
"loss": 0.244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24979601800441742,
|
|
"step": 4335,
|
|
"valid_targets_mean": 4359.4,
|
|
"valid_targets_min": 1527
|
|
},
|
|
{
|
|
"epoch": 6.966292134831461,
|
|
"grad_norm": 0.701875865278465,
|
|
"learning_rate": 3.102243490196166e-09,
|
|
"loss": 0.2689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2627236545085907,
|
|
"step": 4340,
|
|
"valid_targets_mean": 3739.5,
|
|
"valid_targets_min": 1417
|
|
},
|
|
{
|
|
"epoch": 6.974317817014446,
|
|
"grad_norm": 0.8516090021132041,
|
|
"learning_rate": 1.8523919567692283e-09,
|
|
"loss": 0.2611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26574981212615967,
|
|
"step": 4345,
|
|
"valid_targets_mean": 2489.9,
|
|
"valid_targets_min": 871
|
|
},
|
|
{
|
|
"epoch": 6.982343499197432,
|
|
"grad_norm": 0.7196532796044037,
|
|
"learning_rate": 9.229982967373652e-10,
|
|
"loss": 0.2802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2694946229457855,
|
|
"step": 4350,
|
|
"valid_targets_mean": 3620.7,
|
|
"valid_targets_min": 384
|
|
},
|
|
{
|
|
"epoch": 6.990369181380418,
|
|
"grad_norm": 0.681758500951631,
|
|
"learning_rate": 3.1407740305633425e-10,
|
|
"loss": 0.2677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29379069805145264,
|
|
"step": 4355,
|
|
"valid_targets_mean": 4138.7,
|
|
"valid_targets_min": 946
|
|
},
|
|
{
|
|
"epoch": 6.998394863563403,
|
|
"grad_norm": 0.7098716365976682,
|
|
"learning_rate": 2.563903330532469e-11,
|
|
"loss": 0.2654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25322800874710083,
|
|
"step": 4360,
|
|
"valid_targets_mean": 3424.4,
|
|
"valid_targets_min": 1259
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2583603262901306,
|
|
"step": 4361,
|
|
"total_flos": 1215876924768256.0,
|
|
"train_loss": 0.3324449480782679,
|
|
"train_runtime": 33281.4443,
|
|
"train_samples_per_second": 2.094,
|
|
"train_steps_per_second": 0.131,
|
|
"valid_targets_mean": 3816.1,
|
|
"valid_targets_min": 1694
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 4361,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 1500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1215876924768256.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|