8650 lines
240 KiB
JSON
8650 lines
240 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 3913,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.008952551477170993,
|
|
"grad_norm": 16.49591257441783,
|
|
"learning_rate": 4.0816326530612243e-07,
|
|
"loss": 0.8686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4666103720664978,
|
|
"step": 5,
|
|
"valid_targets_mean": 4519.9,
|
|
"valid_targets_min": 1559
|
|
},
|
|
{
|
|
"epoch": 0.017905102954341987,
|
|
"grad_norm": 17.298933206431972,
|
|
"learning_rate": 9.183673469387756e-07,
|
|
"loss": 0.8909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43466073274612427,
|
|
"step": 10,
|
|
"valid_targets_mean": 2747.4,
|
|
"valid_targets_min": 1250
|
|
},
|
|
{
|
|
"epoch": 0.02685765443151298,
|
|
"grad_norm": 16.827321924247315,
|
|
"learning_rate": 1.4285714285714286e-06,
|
|
"loss": 0.8744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3920298218727112,
|
|
"step": 15,
|
|
"valid_targets_mean": 3581.1,
|
|
"valid_targets_min": 1067
|
|
},
|
|
{
|
|
"epoch": 0.03581020590868397,
|
|
"grad_norm": 12.478016288643031,
|
|
"learning_rate": 1.938775510204082e-06,
|
|
"loss": 0.8919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46270671486854553,
|
|
"step": 20,
|
|
"valid_targets_mean": 2438.8,
|
|
"valid_targets_min": 1372
|
|
},
|
|
{
|
|
"epoch": 0.044762757385854966,
|
|
"grad_norm": 7.6946355193316345,
|
|
"learning_rate": 2.4489795918367347e-06,
|
|
"loss": 0.8118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36239463090896606,
|
|
"step": 25,
|
|
"valid_targets_mean": 2902.6,
|
|
"valid_targets_min": 1355
|
|
},
|
|
{
|
|
"epoch": 0.05371530886302596,
|
|
"grad_norm": 4.232599090692266,
|
|
"learning_rate": 2.959183673469388e-06,
|
|
"loss": 0.7486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33849599957466125,
|
|
"step": 30,
|
|
"valid_targets_mean": 3254.2,
|
|
"valid_targets_min": 1473
|
|
},
|
|
{
|
|
"epoch": 0.06266786034019696,
|
|
"grad_norm": 2.8884711311766234,
|
|
"learning_rate": 3.469387755102041e-06,
|
|
"loss": 0.7087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35638314485549927,
|
|
"step": 35,
|
|
"valid_targets_mean": 2818.5,
|
|
"valid_targets_min": 607
|
|
},
|
|
{
|
|
"epoch": 0.07162041181736795,
|
|
"grad_norm": 1.8606253940579665,
|
|
"learning_rate": 3.979591836734694e-06,
|
|
"loss": 0.6355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2763682007789612,
|
|
"step": 40,
|
|
"valid_targets_mean": 2783.8,
|
|
"valid_targets_min": 1126
|
|
},
|
|
{
|
|
"epoch": 0.08057296329453895,
|
|
"grad_norm": 1.5458228348682868,
|
|
"learning_rate": 4.489795918367348e-06,
|
|
"loss": 0.648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3491474986076355,
|
|
"step": 45,
|
|
"valid_targets_mean": 3077.9,
|
|
"valid_targets_min": 1310
|
|
},
|
|
{
|
|
"epoch": 0.08952551477170993,
|
|
"grad_norm": 1.2608980315826672,
|
|
"learning_rate": 5e-06,
|
|
"loss": 0.6069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3331415057182312,
|
|
"step": 50,
|
|
"valid_targets_mean": 3091.8,
|
|
"valid_targets_min": 1474
|
|
},
|
|
{
|
|
"epoch": 0.09847806624888093,
|
|
"grad_norm": 0.942020827015156,
|
|
"learning_rate": 5.510204081632653e-06,
|
|
"loss": 0.6139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3337685167789459,
|
|
"step": 55,
|
|
"valid_targets_mean": 3797.2,
|
|
"valid_targets_min": 2898
|
|
},
|
|
{
|
|
"epoch": 0.10743061772605192,
|
|
"grad_norm": 0.949804363038843,
|
|
"learning_rate": 6.020408163265307e-06,
|
|
"loss": 0.5488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3087773025035858,
|
|
"step": 60,
|
|
"valid_targets_mean": 4610.4,
|
|
"valid_targets_min": 1564
|
|
},
|
|
{
|
|
"epoch": 0.11638316920322292,
|
|
"grad_norm": 0.7193398126181899,
|
|
"learning_rate": 6.530612244897959e-06,
|
|
"loss": 0.6095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22723770141601562,
|
|
"step": 65,
|
|
"valid_targets_mean": 3857.2,
|
|
"valid_targets_min": 968
|
|
},
|
|
{
|
|
"epoch": 0.12533572068039392,
|
|
"grad_norm": 0.7588167962315939,
|
|
"learning_rate": 7.0408163265306125e-06,
|
|
"loss": 0.5624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29583364725112915,
|
|
"step": 70,
|
|
"valid_targets_mean": 3903.6,
|
|
"valid_targets_min": 1367
|
|
},
|
|
{
|
|
"epoch": 0.13428827215756492,
|
|
"grad_norm": 0.7317870592280868,
|
|
"learning_rate": 7.551020408163265e-06,
|
|
"loss": 0.5739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.213375985622406,
|
|
"step": 75,
|
|
"valid_targets_mean": 3182.4,
|
|
"valid_targets_min": 1621
|
|
},
|
|
{
|
|
"epoch": 0.1432408236347359,
|
|
"grad_norm": 0.758736502420054,
|
|
"learning_rate": 8.06122448979592e-06,
|
|
"loss": 0.5921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.251443475484848,
|
|
"step": 80,
|
|
"valid_targets_mean": 3973.0,
|
|
"valid_targets_min": 1283
|
|
},
|
|
{
|
|
"epoch": 0.1521933751119069,
|
|
"grad_norm": 0.8048639714424809,
|
|
"learning_rate": 8.571428571428571e-06,
|
|
"loss": 0.5738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2732312083244324,
|
|
"step": 85,
|
|
"valid_targets_mean": 2992.2,
|
|
"valid_targets_min": 1093
|
|
},
|
|
{
|
|
"epoch": 0.1611459265890779,
|
|
"grad_norm": 0.7340125250194033,
|
|
"learning_rate": 9.081632653061225e-06,
|
|
"loss": 0.5579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25547561049461365,
|
|
"step": 90,
|
|
"valid_targets_mean": 2874.4,
|
|
"valid_targets_min": 1616
|
|
},
|
|
{
|
|
"epoch": 0.1700984780662489,
|
|
"grad_norm": 0.6621673773996177,
|
|
"learning_rate": 9.591836734693878e-06,
|
|
"loss": 0.5464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2635806202888489,
|
|
"step": 95,
|
|
"valid_targets_mean": 4164.6,
|
|
"valid_targets_min": 1091
|
|
},
|
|
{
|
|
"epoch": 0.17905102954341987,
|
|
"grad_norm": 0.5908341411665475,
|
|
"learning_rate": 1.0102040816326531e-05,
|
|
"loss": 0.5235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3442309498786926,
|
|
"step": 100,
|
|
"valid_targets_mean": 6593.6,
|
|
"valid_targets_min": 2032
|
|
},
|
|
{
|
|
"epoch": 0.18800358102059087,
|
|
"grad_norm": 0.6686214031508748,
|
|
"learning_rate": 1.0612244897959186e-05,
|
|
"loss": 0.5113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31256240606307983,
|
|
"step": 105,
|
|
"valid_targets_mean": 5066.4,
|
|
"valid_targets_min": 1405
|
|
},
|
|
{
|
|
"epoch": 0.19695613249776187,
|
|
"grad_norm": 0.6681187078608829,
|
|
"learning_rate": 1.1122448979591838e-05,
|
|
"loss": 0.5178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2890857458114624,
|
|
"step": 110,
|
|
"valid_targets_mean": 4381.5,
|
|
"valid_targets_min": 1342
|
|
},
|
|
{
|
|
"epoch": 0.20590868397493287,
|
|
"grad_norm": 0.7026152513616436,
|
|
"learning_rate": 1.1632653061224491e-05,
|
|
"loss": 0.5156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23764872550964355,
|
|
"step": 115,
|
|
"valid_targets_mean": 3407.8,
|
|
"valid_targets_min": 976
|
|
},
|
|
{
|
|
"epoch": 0.21486123545210384,
|
|
"grad_norm": 0.705302694688467,
|
|
"learning_rate": 1.2142857142857142e-05,
|
|
"loss": 0.5083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2273426502943039,
|
|
"step": 120,
|
|
"valid_targets_mean": 3928.0,
|
|
"valid_targets_min": 1690
|
|
},
|
|
{
|
|
"epoch": 0.22381378692927484,
|
|
"grad_norm": 0.7484839660312749,
|
|
"learning_rate": 1.2653061224489798e-05,
|
|
"loss": 0.5391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18557226657867432,
|
|
"step": 125,
|
|
"valid_targets_mean": 2330.8,
|
|
"valid_targets_min": 1250
|
|
},
|
|
{
|
|
"epoch": 0.23276633840644584,
|
|
"grad_norm": 0.7160792181517066,
|
|
"learning_rate": 1.316326530612245e-05,
|
|
"loss": 0.5299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3373291790485382,
|
|
"step": 130,
|
|
"valid_targets_mean": 3965.8,
|
|
"valid_targets_min": 1350
|
|
},
|
|
{
|
|
"epoch": 0.24171888988361684,
|
|
"grad_norm": 0.6476324474732508,
|
|
"learning_rate": 1.3673469387755102e-05,
|
|
"loss": 0.5149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2963748574256897,
|
|
"step": 135,
|
|
"valid_targets_mean": 5056.6,
|
|
"valid_targets_min": 1288
|
|
},
|
|
{
|
|
"epoch": 0.25067144136078784,
|
|
"grad_norm": 0.5706629071327133,
|
|
"learning_rate": 1.4183673469387755e-05,
|
|
"loss": 0.5034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23031815886497498,
|
|
"step": 140,
|
|
"valid_targets_mean": 5887.0,
|
|
"valid_targets_min": 1510
|
|
},
|
|
{
|
|
"epoch": 0.25962399283795884,
|
|
"grad_norm": 0.6087657636188725,
|
|
"learning_rate": 1.469387755102041e-05,
|
|
"loss": 0.4746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3000361919403076,
|
|
"step": 145,
|
|
"valid_targets_mean": 5092.6,
|
|
"valid_targets_min": 1163
|
|
},
|
|
{
|
|
"epoch": 0.26857654431512984,
|
|
"grad_norm": 0.7437663805138169,
|
|
"learning_rate": 1.5204081632653063e-05,
|
|
"loss": 0.4949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2728891968727112,
|
|
"step": 150,
|
|
"valid_targets_mean": 3300.6,
|
|
"valid_targets_min": 738
|
|
},
|
|
{
|
|
"epoch": 0.2775290957923008,
|
|
"grad_norm": 0.7018877269558821,
|
|
"learning_rate": 1.5714285714285715e-05,
|
|
"loss": 0.5209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2847294509410858,
|
|
"step": 155,
|
|
"valid_targets_mean": 3328.4,
|
|
"valid_targets_min": 1380
|
|
},
|
|
{
|
|
"epoch": 0.2864816472694718,
|
|
"grad_norm": 0.674193694437514,
|
|
"learning_rate": 1.6224489795918368e-05,
|
|
"loss": 0.5125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37893906235694885,
|
|
"step": 160,
|
|
"valid_targets_mean": 5591.1,
|
|
"valid_targets_min": 2806
|
|
},
|
|
{
|
|
"epoch": 0.2954341987466428,
|
|
"grad_norm": 0.6383105740666587,
|
|
"learning_rate": 1.673469387755102e-05,
|
|
"loss": 0.5008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30907806754112244,
|
|
"step": 165,
|
|
"valid_targets_mean": 5553.2,
|
|
"valid_targets_min": 1773
|
|
},
|
|
{
|
|
"epoch": 0.3043867502238138,
|
|
"grad_norm": 0.635478910064303,
|
|
"learning_rate": 1.7244897959183674e-05,
|
|
"loss": 0.4626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16394081711769104,
|
|
"step": 170,
|
|
"valid_targets_mean": 2659.8,
|
|
"valid_targets_min": 1578
|
|
},
|
|
{
|
|
"epoch": 0.3133393017009848,
|
|
"grad_norm": 0.6408016364792337,
|
|
"learning_rate": 1.7755102040816327e-05,
|
|
"loss": 0.501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26969802379608154,
|
|
"step": 175,
|
|
"valid_targets_mean": 4537.2,
|
|
"valid_targets_min": 1860
|
|
},
|
|
{
|
|
"epoch": 0.3222918531781558,
|
|
"grad_norm": 0.7532590420669183,
|
|
"learning_rate": 1.826530612244898e-05,
|
|
"loss": 0.4535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22142918407917023,
|
|
"step": 180,
|
|
"valid_targets_mean": 3185.2,
|
|
"valid_targets_min": 810
|
|
},
|
|
{
|
|
"epoch": 0.3312444046553268,
|
|
"grad_norm": 0.682281377108529,
|
|
"learning_rate": 1.8775510204081636e-05,
|
|
"loss": 0.474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26704490184783936,
|
|
"step": 185,
|
|
"valid_targets_mean": 5240.8,
|
|
"valid_targets_min": 1677
|
|
},
|
|
{
|
|
"epoch": 0.3401969561324978,
|
|
"grad_norm": 0.7548919483126133,
|
|
"learning_rate": 1.928571428571429e-05,
|
|
"loss": 0.5005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2760850787162781,
|
|
"step": 190,
|
|
"valid_targets_mean": 2447.2,
|
|
"valid_targets_min": 648
|
|
},
|
|
{
|
|
"epoch": 0.34914950760966873,
|
|
"grad_norm": 0.5571129919492352,
|
|
"learning_rate": 1.979591836734694e-05,
|
|
"loss": 0.4893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20134209096431732,
|
|
"step": 195,
|
|
"valid_targets_mean": 4296.0,
|
|
"valid_targets_min": 1469
|
|
},
|
|
{
|
|
"epoch": 0.35810205908683973,
|
|
"grad_norm": 0.7295265109368269,
|
|
"learning_rate": 2.0306122448979594e-05,
|
|
"loss": 0.4687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.209572434425354,
|
|
"step": 200,
|
|
"valid_targets_mean": 3726.4,
|
|
"valid_targets_min": 1347
|
|
},
|
|
{
|
|
"epoch": 0.36705461056401073,
|
|
"grad_norm": 0.6965350987539844,
|
|
"learning_rate": 2.0816326530612247e-05,
|
|
"loss": 0.4337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25286203622817993,
|
|
"step": 205,
|
|
"valid_targets_mean": 4697.0,
|
|
"valid_targets_min": 1313
|
|
},
|
|
{
|
|
"epoch": 0.37600716204118173,
|
|
"grad_norm": 0.6480471167428298,
|
|
"learning_rate": 2.13265306122449e-05,
|
|
"loss": 0.4741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2200263887643814,
|
|
"step": 210,
|
|
"valid_targets_mean": 3665.0,
|
|
"valid_targets_min": 1142
|
|
},
|
|
{
|
|
"epoch": 0.38495971351835273,
|
|
"grad_norm": 0.8564301909823199,
|
|
"learning_rate": 2.1836734693877552e-05,
|
|
"loss": 0.4655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18472859263420105,
|
|
"step": 215,
|
|
"valid_targets_mean": 2090.5,
|
|
"valid_targets_min": 438
|
|
},
|
|
{
|
|
"epoch": 0.39391226499552373,
|
|
"grad_norm": 0.6354279960876454,
|
|
"learning_rate": 2.2346938775510205e-05,
|
|
"loss": 0.4649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23229990899562836,
|
|
"step": 220,
|
|
"valid_targets_mean": 4387.4,
|
|
"valid_targets_min": 1532
|
|
},
|
|
{
|
|
"epoch": 0.40286481647269473,
|
|
"grad_norm": 0.6789177721083521,
|
|
"learning_rate": 2.2857142857142858e-05,
|
|
"loss": 0.4429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22782085835933685,
|
|
"step": 225,
|
|
"valid_targets_mean": 3793.0,
|
|
"valid_targets_min": 2323
|
|
},
|
|
{
|
|
"epoch": 0.41181736794986573,
|
|
"grad_norm": 0.6516095638644306,
|
|
"learning_rate": 2.336734693877551e-05,
|
|
"loss": 0.4971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30450618267059326,
|
|
"step": 230,
|
|
"valid_targets_mean": 4820.1,
|
|
"valid_targets_min": 1458
|
|
},
|
|
{
|
|
"epoch": 0.42076991942703673,
|
|
"grad_norm": 0.693576705410677,
|
|
"learning_rate": 2.3877551020408167e-05,
|
|
"loss": 0.4478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25775548815727234,
|
|
"step": 235,
|
|
"valid_targets_mean": 4522.9,
|
|
"valid_targets_min": 1256
|
|
},
|
|
{
|
|
"epoch": 0.4297224709042077,
|
|
"grad_norm": 0.5947860501405103,
|
|
"learning_rate": 2.438775510204082e-05,
|
|
"loss": 0.4666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2969934642314911,
|
|
"step": 240,
|
|
"valid_targets_mean": 6070.8,
|
|
"valid_targets_min": 2159
|
|
},
|
|
{
|
|
"epoch": 0.4386750223813787,
|
|
"grad_norm": 0.7892029600583624,
|
|
"learning_rate": 2.4897959183673473e-05,
|
|
"loss": 0.4616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2159813940525055,
|
|
"step": 245,
|
|
"valid_targets_mean": 3087.0,
|
|
"valid_targets_min": 1141
|
|
},
|
|
{
|
|
"epoch": 0.4476275738585497,
|
|
"grad_norm": 0.6788801249806748,
|
|
"learning_rate": 2.5408163265306125e-05,
|
|
"loss": 0.4693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2609705924987793,
|
|
"step": 250,
|
|
"valid_targets_mean": 4786.5,
|
|
"valid_targets_min": 1562
|
|
},
|
|
{
|
|
"epoch": 0.4565801253357207,
|
|
"grad_norm": 0.7361627557491236,
|
|
"learning_rate": 2.5918367346938778e-05,
|
|
"loss": 0.4342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2108224332332611,
|
|
"step": 255,
|
|
"valid_targets_mean": 3732.5,
|
|
"valid_targets_min": 1577
|
|
},
|
|
{
|
|
"epoch": 0.4655326768128917,
|
|
"grad_norm": 0.7565470090146987,
|
|
"learning_rate": 2.642857142857143e-05,
|
|
"loss": 0.4244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2238048017024994,
|
|
"step": 260,
|
|
"valid_targets_mean": 3423.0,
|
|
"valid_targets_min": 1153
|
|
},
|
|
{
|
|
"epoch": 0.4744852282900627,
|
|
"grad_norm": 0.6303638496536128,
|
|
"learning_rate": 2.6938775510204084e-05,
|
|
"loss": 0.4441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1890367716550827,
|
|
"step": 265,
|
|
"valid_targets_mean": 4411.1,
|
|
"valid_targets_min": 1263
|
|
},
|
|
{
|
|
"epoch": 0.4834377797672337,
|
|
"grad_norm": 0.7302565591847386,
|
|
"learning_rate": 2.7448979591836737e-05,
|
|
"loss": 0.4208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19841700792312622,
|
|
"step": 270,
|
|
"valid_targets_mean": 2944.1,
|
|
"valid_targets_min": 1350
|
|
},
|
|
{
|
|
"epoch": 0.4923903312444047,
|
|
"grad_norm": 0.5863910054331536,
|
|
"learning_rate": 2.7959183673469393e-05,
|
|
"loss": 0.4246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18440236151218414,
|
|
"step": 275,
|
|
"valid_targets_mean": 4238.5,
|
|
"valid_targets_min": 1273
|
|
},
|
|
{
|
|
"epoch": 0.5013428827215757,
|
|
"grad_norm": 0.6774434443668762,
|
|
"learning_rate": 2.8469387755102046e-05,
|
|
"loss": 0.4048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1615563929080963,
|
|
"step": 280,
|
|
"valid_targets_mean": 2832.2,
|
|
"valid_targets_min": 1141
|
|
},
|
|
{
|
|
"epoch": 0.5102954341987467,
|
|
"grad_norm": 0.6644483687290574,
|
|
"learning_rate": 2.89795918367347e-05,
|
|
"loss": 0.4356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12459085136651993,
|
|
"step": 285,
|
|
"valid_targets_mean": 2369.5,
|
|
"valid_targets_min": 896
|
|
},
|
|
{
|
|
"epoch": 0.5192479856759177,
|
|
"grad_norm": 0.7685920251159404,
|
|
"learning_rate": 2.948979591836735e-05,
|
|
"loss": 0.4356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18276526033878326,
|
|
"step": 290,
|
|
"valid_targets_mean": 2990.8,
|
|
"valid_targets_min": 1289
|
|
},
|
|
{
|
|
"epoch": 0.5282005371530887,
|
|
"grad_norm": 0.6355310059485909,
|
|
"learning_rate": 3.0000000000000004e-05,
|
|
"loss": 0.4321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1871587336063385,
|
|
"step": 295,
|
|
"valid_targets_mean": 5225.5,
|
|
"valid_targets_min": 1319
|
|
},
|
|
{
|
|
"epoch": 0.5371530886302597,
|
|
"grad_norm": 1.018574390466646,
|
|
"learning_rate": 3.0510204081632654e-05,
|
|
"loss": 0.4465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2808264195919037,
|
|
"step": 300,
|
|
"valid_targets_mean": 4023.5,
|
|
"valid_targets_min": 1624
|
|
},
|
|
{
|
|
"epoch": 0.5461056401074306,
|
|
"grad_norm": 0.6281786892326823,
|
|
"learning_rate": 3.102040816326531e-05,
|
|
"loss": 0.4131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20266380906105042,
|
|
"step": 305,
|
|
"valid_targets_mean": 4063.4,
|
|
"valid_targets_min": 1769
|
|
},
|
|
{
|
|
"epoch": 0.5550581915846016,
|
|
"grad_norm": 0.7808363483204404,
|
|
"learning_rate": 3.153061224489796e-05,
|
|
"loss": 0.4336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23263785243034363,
|
|
"step": 310,
|
|
"valid_targets_mean": 6843.8,
|
|
"valid_targets_min": 1471
|
|
},
|
|
{
|
|
"epoch": 0.5640107430617726,
|
|
"grad_norm": 0.831519619870683,
|
|
"learning_rate": 3.2040816326530615e-05,
|
|
"loss": 0.4348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19032128155231476,
|
|
"step": 315,
|
|
"valid_targets_mean": 3595.5,
|
|
"valid_targets_min": 1225
|
|
},
|
|
{
|
|
"epoch": 0.5729632945389436,
|
|
"grad_norm": 1.073552738434753,
|
|
"learning_rate": 3.255102040816327e-05,
|
|
"loss": 0.4172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1755063533782959,
|
|
"step": 320,
|
|
"valid_targets_mean": 3981.4,
|
|
"valid_targets_min": 1390
|
|
},
|
|
{
|
|
"epoch": 0.5819158460161146,
|
|
"grad_norm": 0.69129535171428,
|
|
"learning_rate": 3.306122448979592e-05,
|
|
"loss": 0.4478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20842593908309937,
|
|
"step": 325,
|
|
"valid_targets_mean": 3756.1,
|
|
"valid_targets_min": 867
|
|
},
|
|
{
|
|
"epoch": 0.5908683974932856,
|
|
"grad_norm": 0.6287282438450149,
|
|
"learning_rate": 3.357142857142858e-05,
|
|
"loss": 0.4415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12963131070137024,
|
|
"step": 330,
|
|
"valid_targets_mean": 2827.4,
|
|
"valid_targets_min": 829
|
|
},
|
|
{
|
|
"epoch": 0.5998209489704566,
|
|
"grad_norm": 0.5643853229439663,
|
|
"learning_rate": 3.408163265306123e-05,
|
|
"loss": 0.4135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18341860175132751,
|
|
"step": 335,
|
|
"valid_targets_mean": 4425.6,
|
|
"valid_targets_min": 1437
|
|
},
|
|
{
|
|
"epoch": 0.6087735004476276,
|
|
"grad_norm": 0.7125901635629522,
|
|
"learning_rate": 3.459183673469388e-05,
|
|
"loss": 0.4423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3656802773475647,
|
|
"step": 340,
|
|
"valid_targets_mean": 5335.0,
|
|
"valid_targets_min": 1545
|
|
},
|
|
{
|
|
"epoch": 0.6177260519247986,
|
|
"grad_norm": 0.620910199580982,
|
|
"learning_rate": 3.510204081632653e-05,
|
|
"loss": 0.428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22268570959568024,
|
|
"step": 345,
|
|
"valid_targets_mean": 5409.2,
|
|
"valid_targets_min": 1050
|
|
},
|
|
{
|
|
"epoch": 0.6266786034019696,
|
|
"grad_norm": 0.7650139202001973,
|
|
"learning_rate": 3.561224489795918e-05,
|
|
"loss": 0.4633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23281125724315643,
|
|
"step": 350,
|
|
"valid_targets_mean": 4042.8,
|
|
"valid_targets_min": 1140
|
|
},
|
|
{
|
|
"epoch": 0.6356311548791406,
|
|
"grad_norm": 0.9201532516782224,
|
|
"learning_rate": 3.612244897959184e-05,
|
|
"loss": 0.4281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25236573815345764,
|
|
"step": 355,
|
|
"valid_targets_mean": 3224.9,
|
|
"valid_targets_min": 1933
|
|
},
|
|
{
|
|
"epoch": 0.6445837063563116,
|
|
"grad_norm": 0.7183411085392811,
|
|
"learning_rate": 3.6632653061224494e-05,
|
|
"loss": 0.4701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17252346873283386,
|
|
"step": 360,
|
|
"valid_targets_mean": 2688.4,
|
|
"valid_targets_min": 1418
|
|
},
|
|
{
|
|
"epoch": 0.6535362578334826,
|
|
"grad_norm": 0.582680287280911,
|
|
"learning_rate": 3.714285714285715e-05,
|
|
"loss": 0.4149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19246900081634521,
|
|
"step": 365,
|
|
"valid_targets_mean": 5347.9,
|
|
"valid_targets_min": 1211
|
|
},
|
|
{
|
|
"epoch": 0.6624888093106536,
|
|
"grad_norm": 0.9129377165838232,
|
|
"learning_rate": 3.76530612244898e-05,
|
|
"loss": 0.4551,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27284592390060425,
|
|
"step": 370,
|
|
"valid_targets_mean": 4036.4,
|
|
"valid_targets_min": 1053
|
|
},
|
|
{
|
|
"epoch": 0.6714413607878246,
|
|
"grad_norm": 0.6678131909597557,
|
|
"learning_rate": 3.816326530612245e-05,
|
|
"loss": 0.4152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20466414093971252,
|
|
"step": 375,
|
|
"valid_targets_mean": 3872.2,
|
|
"valid_targets_min": 1191
|
|
},
|
|
{
|
|
"epoch": 0.6803939122649956,
|
|
"grad_norm": 0.8045668374112488,
|
|
"learning_rate": 3.8673469387755105e-05,
|
|
"loss": 0.4116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22858558595180511,
|
|
"step": 380,
|
|
"valid_targets_mean": 3204.1,
|
|
"valid_targets_min": 1715
|
|
},
|
|
{
|
|
"epoch": 0.6893464637421666,
|
|
"grad_norm": 0.666520730258989,
|
|
"learning_rate": 3.9183673469387755e-05,
|
|
"loss": 0.4161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17605772614479065,
|
|
"step": 385,
|
|
"valid_targets_mean": 4473.6,
|
|
"valid_targets_min": 1557
|
|
},
|
|
{
|
|
"epoch": 0.6982990152193375,
|
|
"grad_norm": 0.6247290424770604,
|
|
"learning_rate": 3.969387755102041e-05,
|
|
"loss": 0.4045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25795891880989075,
|
|
"step": 390,
|
|
"valid_targets_mean": 4912.9,
|
|
"valid_targets_min": 1158
|
|
},
|
|
{
|
|
"epoch": 0.7072515666965085,
|
|
"grad_norm": 0.779100748971421,
|
|
"learning_rate": 3.9999968156003224e-05,
|
|
"loss": 0.4716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44641977548599243,
|
|
"step": 395,
|
|
"valid_targets_mean": 5953.0,
|
|
"valid_targets_min": 599
|
|
},
|
|
{
|
|
"epoch": 0.7162041181736795,
|
|
"grad_norm": 0.6495151157923535,
|
|
"learning_rate": 3.999960991220401e-05,
|
|
"loss": 0.3952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20517157018184662,
|
|
"step": 400,
|
|
"valid_targets_mean": 3357.4,
|
|
"valid_targets_min": 1749
|
|
},
|
|
{
|
|
"epoch": 0.7251566696508505,
|
|
"grad_norm": 0.7150419841254799,
|
|
"learning_rate": 3.9998853626763316e-05,
|
|
"loss": 0.4102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18629693984985352,
|
|
"step": 405,
|
|
"valid_targets_mean": 3205.6,
|
|
"valid_targets_min": 1522
|
|
},
|
|
{
|
|
"epoch": 0.7341092211280215,
|
|
"grad_norm": 0.7390419158141074,
|
|
"learning_rate": 3.999769931473309e-05,
|
|
"loss": 0.4018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14274990558624268,
|
|
"step": 410,
|
|
"valid_targets_mean": 2136.0,
|
|
"valid_targets_min": 994
|
|
},
|
|
{
|
|
"epoch": 0.7430617726051925,
|
|
"grad_norm": 1.259396937429778,
|
|
"learning_rate": 3.999614699908698e-05,
|
|
"loss": 0.438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26997411251068115,
|
|
"step": 415,
|
|
"valid_targets_mean": 4593.8,
|
|
"valid_targets_min": 1766
|
|
},
|
|
{
|
|
"epoch": 0.7520143240823635,
|
|
"grad_norm": 0.6154121664436549,
|
|
"learning_rate": 3.999419671071993e-05,
|
|
"loss": 0.4231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20233558118343353,
|
|
"step": 420,
|
|
"valid_targets_mean": 4364.0,
|
|
"valid_targets_min": 1588
|
|
},
|
|
{
|
|
"epoch": 0.7609668755595345,
|
|
"grad_norm": 0.6423223707679937,
|
|
"learning_rate": 3.999184848844746e-05,
|
|
"loss": 0.3865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1704905480146408,
|
|
"step": 425,
|
|
"valid_targets_mean": 3104.9,
|
|
"valid_targets_min": 1439
|
|
},
|
|
{
|
|
"epoch": 0.7699194270367055,
|
|
"grad_norm": 0.729810705804325,
|
|
"learning_rate": 3.9989102379005026e-05,
|
|
"loss": 0.4225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1957702934741974,
|
|
"step": 430,
|
|
"valid_targets_mean": 3646.9,
|
|
"valid_targets_min": 1005
|
|
},
|
|
{
|
|
"epoch": 0.7788719785138765,
|
|
"grad_norm": 0.7345669763856479,
|
|
"learning_rate": 3.9985958437046976e-05,
|
|
"loss": 0.453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23787890374660492,
|
|
"step": 435,
|
|
"valid_targets_mean": 3216.0,
|
|
"valid_targets_min": 568
|
|
},
|
|
{
|
|
"epoch": 0.7878245299910475,
|
|
"grad_norm": 0.781247168859166,
|
|
"learning_rate": 3.998241672514551e-05,
|
|
"loss": 0.4173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17754238843917847,
|
|
"step": 440,
|
|
"valid_targets_mean": 2567.8,
|
|
"valid_targets_min": 1113
|
|
},
|
|
{
|
|
"epoch": 0.7967770814682185,
|
|
"grad_norm": 0.5671255551819204,
|
|
"learning_rate": 3.997847731378946e-05,
|
|
"loss": 0.4087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26526498794555664,
|
|
"step": 445,
|
|
"valid_targets_mean": 6251.0,
|
|
"valid_targets_min": 1697
|
|
},
|
|
{
|
|
"epoch": 0.8057296329453895,
|
|
"grad_norm": 0.6272203363976995,
|
|
"learning_rate": 3.9974140281382844e-05,
|
|
"loss": 0.4155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22401854395866394,
|
|
"step": 450,
|
|
"valid_targets_mean": 4900.2,
|
|
"valid_targets_min": 1566
|
|
},
|
|
{
|
|
"epoch": 0.8146821844225605,
|
|
"grad_norm": 0.6084346283327203,
|
|
"learning_rate": 3.996940571424331e-05,
|
|
"loss": 0.3975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2205761969089508,
|
|
"step": 455,
|
|
"valid_targets_mean": 5000.9,
|
|
"valid_targets_min": 1425
|
|
},
|
|
{
|
|
"epoch": 0.8236347358997315,
|
|
"grad_norm": 0.6541164392281177,
|
|
"learning_rate": 3.996427370660045e-05,
|
|
"loss": 0.4075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18832074105739594,
|
|
"step": 460,
|
|
"valid_targets_mean": 4095.9,
|
|
"valid_targets_min": 821
|
|
},
|
|
{
|
|
"epoch": 0.8325872873769025,
|
|
"grad_norm": 0.6460255342842145,
|
|
"learning_rate": 3.995874436059389e-05,
|
|
"loss": 0.4071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18600459396839142,
|
|
"step": 465,
|
|
"valid_targets_mean": 3793.5,
|
|
"valid_targets_min": 2096
|
|
},
|
|
{
|
|
"epoch": 0.8415398388540735,
|
|
"grad_norm": 0.6200976937245356,
|
|
"learning_rate": 3.9952817786271264e-05,
|
|
"loss": 0.416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13933804631233215,
|
|
"step": 470,
|
|
"valid_targets_mean": 3723.6,
|
|
"valid_targets_min": 1207
|
|
},
|
|
{
|
|
"epoch": 0.8504923903312444,
|
|
"grad_norm": 0.6168712556601921,
|
|
"learning_rate": 3.994649410158605e-05,
|
|
"loss": 0.4072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22264635562896729,
|
|
"step": 475,
|
|
"valid_targets_mean": 4751.8,
|
|
"valid_targets_min": 2191
|
|
},
|
|
{
|
|
"epoch": 0.8594449418084154,
|
|
"grad_norm": 0.6514273070342461,
|
|
"learning_rate": 3.99397734323952e-05,
|
|
"loss": 0.4224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20883207023143768,
|
|
"step": 480,
|
|
"valid_targets_mean": 4464.0,
|
|
"valid_targets_min": 1847
|
|
},
|
|
{
|
|
"epoch": 0.8683974932855864,
|
|
"grad_norm": 0.800906223272931,
|
|
"learning_rate": 3.993265591245662e-05,
|
|
"loss": 0.4075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1863759309053421,
|
|
"step": 485,
|
|
"valid_targets_mean": 2938.4,
|
|
"valid_targets_min": 871
|
|
},
|
|
{
|
|
"epoch": 0.8773500447627574,
|
|
"grad_norm": 0.83277145829148,
|
|
"learning_rate": 3.992514168342655e-05,
|
|
"loss": 0.4067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17269662022590637,
|
|
"step": 490,
|
|
"valid_targets_mean": 2428.8,
|
|
"valid_targets_min": 1668
|
|
},
|
|
{
|
|
"epoch": 0.8863025962399284,
|
|
"grad_norm": 0.7304828801141144,
|
|
"learning_rate": 3.9917230894856705e-05,
|
|
"loss": 0.4063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2120320200920105,
|
|
"step": 495,
|
|
"valid_targets_mean": 3160.0,
|
|
"valid_targets_min": 1270
|
|
},
|
|
{
|
|
"epoch": 0.8952551477170994,
|
|
"grad_norm": 0.7572826126724849,
|
|
"learning_rate": 3.990892370419132e-05,
|
|
"loss": 0.4093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20016592741012573,
|
|
"step": 500,
|
|
"valid_targets_mean": 3175.8,
|
|
"valid_targets_min": 799
|
|
},
|
|
{
|
|
"epoch": 0.9042076991942704,
|
|
"grad_norm": 0.5919370272102532,
|
|
"learning_rate": 3.9900220276764013e-05,
|
|
"loss": 0.4074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16611447930335999,
|
|
"step": 505,
|
|
"valid_targets_mean": 3963.9,
|
|
"valid_targets_min": 743
|
|
},
|
|
{
|
|
"epoch": 0.9131602506714414,
|
|
"grad_norm": 0.6819403148937269,
|
|
"learning_rate": 3.989112078579449e-05,
|
|
"loss": 0.418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2877485752105713,
|
|
"step": 510,
|
|
"valid_targets_mean": 5093.2,
|
|
"valid_targets_min": 1691
|
|
},
|
|
{
|
|
"epoch": 0.9221128021486124,
|
|
"grad_norm": 0.6769829439772228,
|
|
"learning_rate": 3.988162541238509e-05,
|
|
"loss": 0.4057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3087572157382965,
|
|
"step": 515,
|
|
"valid_targets_mean": 4994.5,
|
|
"valid_targets_min": 1352
|
|
},
|
|
{
|
|
"epoch": 0.9310653536257834,
|
|
"grad_norm": 0.7318964239583158,
|
|
"learning_rate": 3.98717343455172e-05,
|
|
"loss": 0.3829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.178988978266716,
|
|
"step": 520,
|
|
"valid_targets_mean": 3043.4,
|
|
"valid_targets_min": 1840
|
|
},
|
|
{
|
|
"epoch": 0.9400179051029544,
|
|
"grad_norm": 0.736170364782059,
|
|
"learning_rate": 3.9861447782047495e-05,
|
|
"loss": 0.426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2085171490907669,
|
|
"step": 525,
|
|
"valid_targets_mean": 3353.6,
|
|
"valid_targets_min": 1939
|
|
},
|
|
{
|
|
"epoch": 0.9489704565801254,
|
|
"grad_norm": 0.6775094297571437,
|
|
"learning_rate": 3.9850765926704e-05,
|
|
"loss": 0.4128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21749690175056458,
|
|
"step": 530,
|
|
"valid_targets_mean": 4364.0,
|
|
"valid_targets_min": 1464
|
|
},
|
|
{
|
|
"epoch": 0.9579230080572964,
|
|
"grad_norm": 0.7321608884861143,
|
|
"learning_rate": 3.9839688992082004e-05,
|
|
"loss": 0.3747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20859259366989136,
|
|
"step": 535,
|
|
"valid_targets_mean": 3524.5,
|
|
"valid_targets_min": 1556
|
|
},
|
|
{
|
|
"epoch": 0.9668755595344674,
|
|
"grad_norm": 0.6153393187558923,
|
|
"learning_rate": 3.9828217198639884e-05,
|
|
"loss": 0.4165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21272610127925873,
|
|
"step": 540,
|
|
"valid_targets_mean": 4658.5,
|
|
"valid_targets_min": 1706
|
|
},
|
|
{
|
|
"epoch": 0.9758281110116384,
|
|
"grad_norm": 0.798525150352497,
|
|
"learning_rate": 3.981635077469468e-05,
|
|
"loss": 0.4188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15253381431102753,
|
|
"step": 545,
|
|
"valid_targets_mean": 1837.2,
|
|
"valid_targets_min": 883
|
|
},
|
|
{
|
|
"epoch": 0.9847806624888094,
|
|
"grad_norm": 0.7395653489663312,
|
|
"learning_rate": 3.980408995641751e-05,
|
|
"loss": 0.4116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2817031741142273,
|
|
"step": 550,
|
|
"valid_targets_mean": 3830.6,
|
|
"valid_targets_min": 1066
|
|
},
|
|
{
|
|
"epoch": 0.9937332139659804,
|
|
"grad_norm": 0.7202520372221168,
|
|
"learning_rate": 3.979143498782898e-05,
|
|
"loss": 0.4252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20223543047904968,
|
|
"step": 555,
|
|
"valid_targets_mean": 3306.4,
|
|
"valid_targets_min": 1423
|
|
},
|
|
{
|
|
"epoch": 1.001790510295434,
|
|
"grad_norm": 0.579798364095407,
|
|
"learning_rate": 3.977838612079419e-05,
|
|
"loss": 0.372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14297722280025482,
|
|
"step": 560,
|
|
"valid_targets_mean": 3010.2,
|
|
"valid_targets_min": 1194
|
|
},
|
|
{
|
|
"epoch": 1.0107430617726052,
|
|
"grad_norm": 0.7410636986612479,
|
|
"learning_rate": 3.976494361501786e-05,
|
|
"loss": 0.4051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15187795460224152,
|
|
"step": 565,
|
|
"valid_targets_mean": 2236.6,
|
|
"valid_targets_min": 1057
|
|
},
|
|
{
|
|
"epoch": 1.019695613249776,
|
|
"grad_norm": 0.692386761584416,
|
|
"learning_rate": 3.975110773803904e-05,
|
|
"loss": 0.373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2205927073955536,
|
|
"step": 570,
|
|
"valid_targets_mean": 4001.6,
|
|
"valid_targets_min": 1337
|
|
},
|
|
{
|
|
"epoch": 1.0286481647269472,
|
|
"grad_norm": 0.760062922065256,
|
|
"learning_rate": 3.973687876522587e-05,
|
|
"loss": 0.4077,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1433718055486679,
|
|
"step": 575,
|
|
"valid_targets_mean": 2319.2,
|
|
"valid_targets_min": 1285
|
|
},
|
|
{
|
|
"epoch": 1.037600716204118,
|
|
"grad_norm": 0.6231058381003745,
|
|
"learning_rate": 3.9722256979770054e-05,
|
|
"loss": 0.377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25835368037223816,
|
|
"step": 580,
|
|
"valid_targets_mean": 5127.9,
|
|
"valid_targets_min": 1433
|
|
},
|
|
{
|
|
"epoch": 1.0465532676812892,
|
|
"grad_norm": 0.7798383704851466,
|
|
"learning_rate": 3.970724267268125e-05,
|
|
"loss": 0.4182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18632589280605316,
|
|
"step": 585,
|
|
"valid_targets_mean": 2504.1,
|
|
"valid_targets_min": 1374
|
|
},
|
|
{
|
|
"epoch": 1.05550581915846,
|
|
"grad_norm": 0.6724122975388895,
|
|
"learning_rate": 3.969183614278125e-05,
|
|
"loss": 0.3795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16075168550014496,
|
|
"step": 590,
|
|
"valid_targets_mean": 2468.0,
|
|
"valid_targets_min": 1272
|
|
},
|
|
{
|
|
"epoch": 1.0644583706356312,
|
|
"grad_norm": 0.6216207940286806,
|
|
"learning_rate": 3.9676037696698056e-05,
|
|
"loss": 0.3942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1869785636663437,
|
|
"step": 595,
|
|
"valid_targets_mean": 3637.8,
|
|
"valid_targets_min": 1239
|
|
},
|
|
{
|
|
"epoch": 1.073410922112802,
|
|
"grad_norm": 0.6038132355180709,
|
|
"learning_rate": 3.9659847648859775e-05,
|
|
"loss": 0.3771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19958503544330597,
|
|
"step": 600,
|
|
"valid_targets_mean": 5002.8,
|
|
"valid_targets_min": 2207
|
|
},
|
|
{
|
|
"epoch": 1.0823634735899732,
|
|
"grad_norm": 0.6547559144298567,
|
|
"learning_rate": 3.9643266321488354e-05,
|
|
"loss": 0.3635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1961314082145691,
|
|
"step": 605,
|
|
"valid_targets_mean": 3541.4,
|
|
"valid_targets_min": 1259
|
|
},
|
|
{
|
|
"epoch": 1.091316025067144,
|
|
"grad_norm": 0.6283731669036798,
|
|
"learning_rate": 3.962629404459317e-05,
|
|
"loss": 0.368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20619988441467285,
|
|
"step": 610,
|
|
"valid_targets_mean": 4151.5,
|
|
"valid_targets_min": 1027
|
|
},
|
|
{
|
|
"epoch": 1.1002685765443152,
|
|
"grad_norm": 0.733013694119783,
|
|
"learning_rate": 3.960893115596445e-05,
|
|
"loss": 0.3824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19611495733261108,
|
|
"step": 615,
|
|
"valid_targets_mean": 3181.6,
|
|
"valid_targets_min": 1191
|
|
},
|
|
{
|
|
"epoch": 1.109221128021486,
|
|
"grad_norm": 0.7451977959813969,
|
|
"learning_rate": 3.959117800116658e-05,
|
|
"loss": 0.3859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2177126407623291,
|
|
"step": 620,
|
|
"valid_targets_mean": 3460.2,
|
|
"valid_targets_min": 1551
|
|
},
|
|
{
|
|
"epoch": 1.1181736794986572,
|
|
"grad_norm": 0.6425600662981381,
|
|
"learning_rate": 3.9573034933531195e-05,
|
|
"loss": 0.3747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1971556693315506,
|
|
"step": 625,
|
|
"valid_targets_mean": 5205.9,
|
|
"valid_targets_min": 1687
|
|
},
|
|
{
|
|
"epoch": 1.127126230975828,
|
|
"grad_norm": 0.7396238738527312,
|
|
"learning_rate": 3.955450231415014e-05,
|
|
"loss": 0.3657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18451285362243652,
|
|
"step": 630,
|
|
"valid_targets_mean": 3393.9,
|
|
"valid_targets_min": 1696
|
|
},
|
|
{
|
|
"epoch": 1.1360787824529992,
|
|
"grad_norm": 0.6577118059689598,
|
|
"learning_rate": 3.953558051186834e-05,
|
|
"loss": 0.3687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15830595791339874,
|
|
"step": 635,
|
|
"valid_targets_mean": 2799.0,
|
|
"valid_targets_min": 1209
|
|
},
|
|
{
|
|
"epoch": 1.14503133393017,
|
|
"grad_norm": 0.5395992030023509,
|
|
"learning_rate": 3.95162699032764e-05,
|
|
"loss": 0.3835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20836785435676575,
|
|
"step": 640,
|
|
"valid_targets_mean": 5411.8,
|
|
"valid_targets_min": 1545
|
|
},
|
|
{
|
|
"epoch": 1.153983885407341,
|
|
"grad_norm": 0.7044513534849349,
|
|
"learning_rate": 3.949657087270313e-05,
|
|
"loss": 0.3472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1730477213859558,
|
|
"step": 645,
|
|
"valid_targets_mean": 2762.4,
|
|
"valid_targets_min": 1430
|
|
},
|
|
{
|
|
"epoch": 1.162936436884512,
|
|
"grad_norm": 0.7783993520311729,
|
|
"learning_rate": 3.947648381220789e-05,
|
|
"loss": 0.3658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1869765818119049,
|
|
"step": 650,
|
|
"valid_targets_mean": 2660.5,
|
|
"valid_targets_min": 1459
|
|
},
|
|
{
|
|
"epoch": 1.1718889883616832,
|
|
"grad_norm": 0.7192368699302448,
|
|
"learning_rate": 3.9456009121572824e-05,
|
|
"loss": 0.3592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1545119732618332,
|
|
"step": 655,
|
|
"valid_targets_mean": 2477.6,
|
|
"valid_targets_min": 1394
|
|
},
|
|
{
|
|
"epoch": 1.180841539838854,
|
|
"grad_norm": 0.592724596003384,
|
|
"learning_rate": 3.943514720829485e-05,
|
|
"loss": 0.3654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15250423550605774,
|
|
"step": 660,
|
|
"valid_targets_mean": 3685.5,
|
|
"valid_targets_min": 1099
|
|
},
|
|
{
|
|
"epoch": 1.189794091316025,
|
|
"grad_norm": 0.8159242881243397,
|
|
"learning_rate": 3.941389848757756e-05,
|
|
"loss": 0.3938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18470154702663422,
|
|
"step": 665,
|
|
"valid_targets_mean": 2337.1,
|
|
"valid_targets_min": 1237
|
|
},
|
|
{
|
|
"epoch": 1.198746642793196,
|
|
"grad_norm": 0.7782799836744587,
|
|
"learning_rate": 3.9392263382323e-05,
|
|
"loss": 0.3885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20358330011367798,
|
|
"step": 670,
|
|
"valid_targets_mean": 5639.0,
|
|
"valid_targets_min": 2708
|
|
},
|
|
{
|
|
"epoch": 1.207699194270367,
|
|
"grad_norm": 0.8082493752911124,
|
|
"learning_rate": 3.93702423231232e-05,
|
|
"loss": 0.4009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17506203055381775,
|
|
"step": 675,
|
|
"valid_targets_mean": 2361.5,
|
|
"valid_targets_min": 1256
|
|
},
|
|
{
|
|
"epoch": 1.216651745747538,
|
|
"grad_norm": 0.5418104763492934,
|
|
"learning_rate": 3.9347835748251645e-05,
|
|
"loss": 0.3607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18025299906730652,
|
|
"step": 680,
|
|
"valid_targets_mean": 5253.0,
|
|
"valid_targets_min": 1219
|
|
},
|
|
{
|
|
"epoch": 1.225604297224709,
|
|
"grad_norm": 0.6442211318310618,
|
|
"learning_rate": 3.9325044103654526e-05,
|
|
"loss": 0.3393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20891106128692627,
|
|
"step": 685,
|
|
"valid_targets_mean": 4414.8,
|
|
"valid_targets_min": 1615
|
|
},
|
|
{
|
|
"epoch": 1.23455684870188,
|
|
"grad_norm": 0.6451467432686171,
|
|
"learning_rate": 3.9301867842941867e-05,
|
|
"loss": 0.3718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19293522834777832,
|
|
"step": 690,
|
|
"valid_targets_mean": 3735.8,
|
|
"valid_targets_min": 1013
|
|
},
|
|
{
|
|
"epoch": 1.243509400179051,
|
|
"grad_norm": 0.5339050160402266,
|
|
"learning_rate": 3.9278307427378495e-05,
|
|
"loss": 0.3661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18412509560585022,
|
|
"step": 695,
|
|
"valid_targets_mean": 6164.5,
|
|
"valid_targets_min": 1894
|
|
},
|
|
{
|
|
"epoch": 1.252461951656222,
|
|
"grad_norm": 0.62297846690471,
|
|
"learning_rate": 3.92543633258749e-05,
|
|
"loss": 0.3706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13548460602760315,
|
|
"step": 700,
|
|
"valid_targets_mean": 2993.4,
|
|
"valid_targets_min": 1667
|
|
},
|
|
{
|
|
"epoch": 1.261414503133393,
|
|
"grad_norm": 0.6745712113247786,
|
|
"learning_rate": 3.923003601497785e-05,
|
|
"loss": 0.3885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21251079440116882,
|
|
"step": 705,
|
|
"valid_targets_mean": 3604.5,
|
|
"valid_targets_min": 1443
|
|
},
|
|
{
|
|
"epoch": 1.2703670546105639,
|
|
"grad_norm": 0.6186655781741142,
|
|
"learning_rate": 3.920532597886091e-05,
|
|
"loss": 0.3796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1852312535047531,
|
|
"step": 710,
|
|
"valid_targets_mean": 5199.1,
|
|
"valid_targets_min": 1838
|
|
},
|
|
{
|
|
"epoch": 1.279319606087735,
|
|
"grad_norm": 0.6476873687407971,
|
|
"learning_rate": 3.918023370931485e-05,
|
|
"loss": 0.3554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16326086223125458,
|
|
"step": 715,
|
|
"valid_targets_mean": 3766.6,
|
|
"valid_targets_min": 1334
|
|
},
|
|
{
|
|
"epoch": 1.288272157564906,
|
|
"grad_norm": 0.7346554234208614,
|
|
"learning_rate": 3.915475970573782e-05,
|
|
"loss": 0.365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16769498586654663,
|
|
"step": 720,
|
|
"valid_targets_mean": 2661.0,
|
|
"valid_targets_min": 1397
|
|
},
|
|
{
|
|
"epoch": 1.297224709042077,
|
|
"grad_norm": 0.5311121017298314,
|
|
"learning_rate": 3.9128904475125414e-05,
|
|
"loss": 0.3647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2087760865688324,
|
|
"step": 725,
|
|
"valid_targets_mean": 5643.4,
|
|
"valid_targets_min": 1375
|
|
},
|
|
{
|
|
"epoch": 1.3061772605192479,
|
|
"grad_norm": 0.7108382046067068,
|
|
"learning_rate": 3.910266853206058e-05,
|
|
"loss": 0.3929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.326129674911499,
|
|
"step": 730,
|
|
"valid_targets_mean": 3954.4,
|
|
"valid_targets_min": 883
|
|
},
|
|
{
|
|
"epoch": 1.315129811996419,
|
|
"grad_norm": 0.739273369261475,
|
|
"learning_rate": 3.907605239870342e-05,
|
|
"loss": 0.3799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20131108164787292,
|
|
"step": 735,
|
|
"valid_targets_mean": 2797.8,
|
|
"valid_targets_min": 1263
|
|
},
|
|
{
|
|
"epoch": 1.32408236347359,
|
|
"grad_norm": 0.6341507161751151,
|
|
"learning_rate": 3.904905660478072e-05,
|
|
"loss": 0.3627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19136598706245422,
|
|
"step": 740,
|
|
"valid_targets_mean": 3837.9,
|
|
"valid_targets_min": 1419
|
|
},
|
|
{
|
|
"epoch": 1.333034914950761,
|
|
"grad_norm": 0.5325044817359841,
|
|
"learning_rate": 3.9021681687575465e-05,
|
|
"loss": 0.3511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09225505590438843,
|
|
"step": 745,
|
|
"valid_targets_mean": 2742.8,
|
|
"valid_targets_min": 1209
|
|
},
|
|
{
|
|
"epoch": 1.3419874664279319,
|
|
"grad_norm": 0.6544505821705787,
|
|
"learning_rate": 3.8993928191916134e-05,
|
|
"loss": 0.347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2583926320075989,
|
|
"step": 750,
|
|
"valid_targets_mean": 4458.0,
|
|
"valid_targets_min": 1105
|
|
},
|
|
{
|
|
"epoch": 1.350940017905103,
|
|
"grad_norm": 0.7669301555471535,
|
|
"learning_rate": 3.8965796670165856e-05,
|
|
"loss": 0.3814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12664443254470825,
|
|
"step": 755,
|
|
"valid_targets_mean": 2020.8,
|
|
"valid_targets_min": 1072
|
|
},
|
|
{
|
|
"epoch": 1.3598925693822739,
|
|
"grad_norm": 0.7129506043666346,
|
|
"learning_rate": 3.893728768221139e-05,
|
|
"loss": 0.3773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25370943546295166,
|
|
"step": 760,
|
|
"valid_targets_mean": 3605.6,
|
|
"valid_targets_min": 1012
|
|
},
|
|
{
|
|
"epoch": 1.368845120859445,
|
|
"grad_norm": 0.6334981286822261,
|
|
"learning_rate": 3.8908401795452033e-05,
|
|
"loss": 0.3679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24940751492977142,
|
|
"step": 765,
|
|
"valid_targets_mean": 4710.8,
|
|
"valid_targets_min": 1575
|
|
},
|
|
{
|
|
"epoch": 1.3777976723366159,
|
|
"grad_norm": 0.6085268704857869,
|
|
"learning_rate": 3.8879139584788286e-05,
|
|
"loss": 0.3591,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16782809793949127,
|
|
"step": 770,
|
|
"valid_targets_mean": 3777.8,
|
|
"valid_targets_min": 1416
|
|
},
|
|
{
|
|
"epoch": 1.386750223813787,
|
|
"grad_norm": 0.7858602694978715,
|
|
"learning_rate": 3.884950163261042e-05,
|
|
"loss": 0.373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13924919068813324,
|
|
"step": 775,
|
|
"valid_targets_mean": 2685.4,
|
|
"valid_targets_min": 1551
|
|
},
|
|
{
|
|
"epoch": 1.3957027752909579,
|
|
"grad_norm": 0.6289344762472306,
|
|
"learning_rate": 3.8819488528786904e-05,
|
|
"loss": 0.3881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18281199038028717,
|
|
"step": 780,
|
|
"valid_targets_mean": 4110.0,
|
|
"valid_targets_min": 1287
|
|
},
|
|
{
|
|
"epoch": 1.404655326768129,
|
|
"grad_norm": 0.6406051764111861,
|
|
"learning_rate": 3.878910087065264e-05,
|
|
"loss": 0.3444,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16362640261650085,
|
|
"step": 785,
|
|
"valid_targets_mean": 2657.2,
|
|
"valid_targets_min": 1445
|
|
},
|
|
{
|
|
"epoch": 1.4136078782452999,
|
|
"grad_norm": 0.6648737360349115,
|
|
"learning_rate": 3.8758339262997094e-05,
|
|
"loss": 0.3706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13713306188583374,
|
|
"step": 790,
|
|
"valid_targets_mean": 2693.4,
|
|
"valid_targets_min": 959
|
|
},
|
|
{
|
|
"epoch": 1.422560429722471,
|
|
"grad_norm": 0.4917049041944973,
|
|
"learning_rate": 3.872720431805224e-05,
|
|
"loss": 0.3745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11397358775138855,
|
|
"step": 795,
|
|
"valid_targets_mean": 3676.1,
|
|
"valid_targets_min": 1163
|
|
},
|
|
{
|
|
"epoch": 1.4315129811996419,
|
|
"grad_norm": 0.646876600165104,
|
|
"learning_rate": 3.86956966554804e-05,
|
|
"loss": 0.371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11982043087482452,
|
|
"step": 800,
|
|
"valid_targets_mean": 2264.4,
|
|
"valid_targets_min": 1625
|
|
},
|
|
{
|
|
"epoch": 1.440465532676813,
|
|
"grad_norm": 0.5032837505063902,
|
|
"learning_rate": 3.8663816902361896e-05,
|
|
"loss": 0.374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17598767578601837,
|
|
"step": 805,
|
|
"valid_targets_mean": 5998.6,
|
|
"valid_targets_min": 926
|
|
},
|
|
{
|
|
"epoch": 1.4494180841539839,
|
|
"grad_norm": 0.7075325188332748,
|
|
"learning_rate": 3.863156569318256e-05,
|
|
"loss": 0.3306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22245752811431885,
|
|
"step": 810,
|
|
"valid_targets_mean": 4238.8,
|
|
"valid_targets_min": 1245
|
|
},
|
|
{
|
|
"epoch": 1.4583706356311548,
|
|
"grad_norm": 0.7140064280888134,
|
|
"learning_rate": 3.8598943669821124e-05,
|
|
"loss": 0.3672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21389253437519073,
|
|
"step": 815,
|
|
"valid_targets_mean": 2880.5,
|
|
"valid_targets_min": 1271
|
|
},
|
|
{
|
|
"epoch": 1.4673231871083259,
|
|
"grad_norm": 0.644094370810947,
|
|
"learning_rate": 3.856595148153643e-05,
|
|
"loss": 0.3645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11374405771493912,
|
|
"step": 820,
|
|
"valid_targets_mean": 2398.5,
|
|
"valid_targets_min": 380
|
|
},
|
|
{
|
|
"epoch": 1.476275738585497,
|
|
"grad_norm": 0.6911180314953034,
|
|
"learning_rate": 3.853258978495454e-05,
|
|
"loss": 0.3937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20857420563697815,
|
|
"step": 825,
|
|
"valid_targets_mean": 3739.2,
|
|
"valid_targets_min": 1153
|
|
},
|
|
{
|
|
"epoch": 1.4852282900626679,
|
|
"grad_norm": 0.6730820644303149,
|
|
"learning_rate": 3.8498859244055616e-05,
|
|
"loss": 0.3506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16855305433273315,
|
|
"step": 830,
|
|
"valid_targets_mean": 2936.1,
|
|
"valid_targets_min": 1202
|
|
},
|
|
{
|
|
"epoch": 1.4941808415398388,
|
|
"grad_norm": 0.6485920300501455,
|
|
"learning_rate": 3.8464760530160756e-05,
|
|
"loss": 0.3849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21470798552036285,
|
|
"step": 835,
|
|
"valid_targets_mean": 3659.1,
|
|
"valid_targets_min": 1797
|
|
},
|
|
{
|
|
"epoch": 1.5031333930170099,
|
|
"grad_norm": 0.6962762200783088,
|
|
"learning_rate": 3.843029432191858e-05,
|
|
"loss": 0.364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18541723489761353,
|
|
"step": 840,
|
|
"valid_targets_mean": 3259.0,
|
|
"valid_targets_min": 1029
|
|
},
|
|
{
|
|
"epoch": 1.512085944494181,
|
|
"grad_norm": 0.6094253072530402,
|
|
"learning_rate": 3.839546130529179e-05,
|
|
"loss": 0.328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13300365209579468,
|
|
"step": 845,
|
|
"valid_targets_mean": 3356.0,
|
|
"valid_targets_min": 1277
|
|
},
|
|
{
|
|
"epoch": 1.5210384959713519,
|
|
"grad_norm": 0.541921565334437,
|
|
"learning_rate": 3.8360262173543467e-05,
|
|
"loss": 0.3565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11396744847297668,
|
|
"step": 850,
|
|
"valid_targets_mean": 3406.4,
|
|
"valid_targets_min": 1086
|
|
},
|
|
{
|
|
"epoch": 1.5299910474485228,
|
|
"grad_norm": 0.7195629206207933,
|
|
"learning_rate": 3.8324697627223263e-05,
|
|
"loss": 0.363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14234784245491028,
|
|
"step": 855,
|
|
"valid_targets_mean": 3167.8,
|
|
"valid_targets_min": 1432
|
|
},
|
|
{
|
|
"epoch": 1.5389435989256937,
|
|
"grad_norm": 1.1044858797443737,
|
|
"learning_rate": 3.828876837415352e-05,
|
|
"loss": 0.3762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2260916531085968,
|
|
"step": 860,
|
|
"valid_targets_mean": 3686.5,
|
|
"valid_targets_min": 1234
|
|
},
|
|
{
|
|
"epoch": 1.5478961504028648,
|
|
"grad_norm": 0.6173627572346159,
|
|
"learning_rate": 3.8252475129415127e-05,
|
|
"loss": 0.3492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11603335291147232,
|
|
"step": 865,
|
|
"valid_targets_mean": 2601.2,
|
|
"valid_targets_min": 1231
|
|
},
|
|
{
|
|
"epoch": 1.5568487018800359,
|
|
"grad_norm": 0.612868623270254,
|
|
"learning_rate": 3.82158186153333e-05,
|
|
"loss": 0.3695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1899331510066986,
|
|
"step": 870,
|
|
"valid_targets_mean": 3989.4,
|
|
"valid_targets_min": 668
|
|
},
|
|
{
|
|
"epoch": 1.5658012533572068,
|
|
"grad_norm": 0.8308887319226448,
|
|
"learning_rate": 3.817879956146323e-05,
|
|
"loss": 0.3751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23491792380809784,
|
|
"step": 875,
|
|
"valid_targets_mean": 2837.4,
|
|
"valid_targets_min": 1314
|
|
},
|
|
{
|
|
"epoch": 1.5747538048343777,
|
|
"grad_norm": 0.5706204421068442,
|
|
"learning_rate": 3.814141870457553e-05,
|
|
"loss": 0.3548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21271702647209167,
|
|
"step": 880,
|
|
"valid_targets_mean": 3921.5,
|
|
"valid_targets_min": 1627
|
|
},
|
|
{
|
|
"epoch": 1.5837063563115488,
|
|
"grad_norm": 0.5905492028618122,
|
|
"learning_rate": 3.810367678864159e-05,
|
|
"loss": 0.347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1883733570575714,
|
|
"step": 885,
|
|
"valid_targets_mean": 4394.5,
|
|
"valid_targets_min": 1679
|
|
},
|
|
{
|
|
"epoch": 1.5926589077887199,
|
|
"grad_norm": 0.7269147011558954,
|
|
"learning_rate": 3.806557456481878e-05,
|
|
"loss": 0.3491,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1721513271331787,
|
|
"step": 890,
|
|
"valid_targets_mean": 2881.2,
|
|
"valid_targets_min": 319
|
|
},
|
|
{
|
|
"epoch": 1.6016114592658908,
|
|
"grad_norm": 0.6068211615448192,
|
|
"learning_rate": 3.8027112791435466e-05,
|
|
"loss": 0.3932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1983214020729065,
|
|
"step": 895,
|
|
"valid_targets_mean": 5167.1,
|
|
"valid_targets_min": 1281
|
|
},
|
|
{
|
|
"epoch": 1.6105640107430617,
|
|
"grad_norm": 0.6812461385143791,
|
|
"learning_rate": 3.7988292233975947e-05,
|
|
"loss": 0.3481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13996651768684387,
|
|
"step": 900,
|
|
"valid_targets_mean": 2514.6,
|
|
"valid_targets_min": 1322
|
|
},
|
|
{
|
|
"epoch": 1.6195165622202328,
|
|
"grad_norm": 0.5697069170093747,
|
|
"learning_rate": 3.7949113665065226e-05,
|
|
"loss": 0.3647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11300276964902878,
|
|
"step": 905,
|
|
"valid_targets_mean": 2734.5,
|
|
"valid_targets_min": 954
|
|
},
|
|
{
|
|
"epoch": 1.6284691136974039,
|
|
"grad_norm": 0.7967680943849905,
|
|
"learning_rate": 3.7909577864453593e-05,
|
|
"loss": 0.3654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19516390562057495,
|
|
"step": 910,
|
|
"valid_targets_mean": 2512.0,
|
|
"valid_targets_min": 1348
|
|
},
|
|
{
|
|
"epoch": 1.6374216651745748,
|
|
"grad_norm": 0.6714153332367297,
|
|
"learning_rate": 3.786968561900116e-05,
|
|
"loss": 0.3911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1842774748802185,
|
|
"step": 915,
|
|
"valid_targets_mean": 3405.9,
|
|
"valid_targets_min": 1271
|
|
},
|
|
{
|
|
"epoch": 1.6463742166517457,
|
|
"grad_norm": 0.6561038510002812,
|
|
"learning_rate": 3.782943772266213e-05,
|
|
"loss": 0.3851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23451654613018036,
|
|
"step": 920,
|
|
"valid_targets_mean": 4284.4,
|
|
"valid_targets_min": 530
|
|
},
|
|
{
|
|
"epoch": 1.6553267681289168,
|
|
"grad_norm": 0.6142681543103801,
|
|
"learning_rate": 3.7788834976469095e-05,
|
|
"loss": 0.3575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20726540684700012,
|
|
"step": 925,
|
|
"valid_targets_mean": 4569.1,
|
|
"valid_targets_min": 2473
|
|
},
|
|
{
|
|
"epoch": 1.6642793196060879,
|
|
"grad_norm": 0.6213721879990686,
|
|
"learning_rate": 3.7747878188516965e-05,
|
|
"loss": 0.3717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21202799677848816,
|
|
"step": 930,
|
|
"valid_targets_mean": 4032.8,
|
|
"valid_targets_min": 2941
|
|
},
|
|
{
|
|
"epoch": 1.6732318710832588,
|
|
"grad_norm": 0.5774124752627617,
|
|
"learning_rate": 3.770656817394703e-05,
|
|
"loss": 0.3759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09195344150066376,
|
|
"step": 935,
|
|
"valid_targets_mean": 2147.5,
|
|
"valid_targets_min": 1288
|
|
},
|
|
{
|
|
"epoch": 1.6821844225604297,
|
|
"grad_norm": 0.5792812289191612,
|
|
"learning_rate": 3.7664905754930616e-05,
|
|
"loss": 0.3811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2350800782442093,
|
|
"step": 940,
|
|
"valid_targets_mean": 5344.9,
|
|
"valid_targets_min": 2115
|
|
},
|
|
{
|
|
"epoch": 1.6911369740376008,
|
|
"grad_norm": 0.5250025119182727,
|
|
"learning_rate": 3.762289176065276e-05,
|
|
"loss": 0.3378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19462850689888,
|
|
"step": 945,
|
|
"valid_targets_mean": 6841.0,
|
|
"valid_targets_min": 2135
|
|
},
|
|
{
|
|
"epoch": 1.7000895255147717,
|
|
"grad_norm": 0.649613775475992,
|
|
"learning_rate": 3.758052702729576e-05,
|
|
"loss": 0.3504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17525091767311096,
|
|
"step": 950,
|
|
"valid_targets_mean": 3303.4,
|
|
"valid_targets_min": 1450
|
|
},
|
|
{
|
|
"epoch": 1.7090420769919428,
|
|
"grad_norm": 0.6480009935206632,
|
|
"learning_rate": 3.753781239802245e-05,
|
|
"loss": 0.3371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17123568058013916,
|
|
"step": 955,
|
|
"valid_targets_mean": 3213.5,
|
|
"valid_targets_min": 1432
|
|
},
|
|
{
|
|
"epoch": 1.7179946284691137,
|
|
"grad_norm": 0.5665419742760008,
|
|
"learning_rate": 3.749474872295946e-05,
|
|
"loss": 0.3598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17659839987754822,
|
|
"step": 960,
|
|
"valid_targets_mean": 4176.4,
|
|
"valid_targets_min": 1155
|
|
},
|
|
{
|
|
"epoch": 1.7269471799462845,
|
|
"grad_norm": 0.4711008135049166,
|
|
"learning_rate": 3.745133685918032e-05,
|
|
"loss": 0.358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13374367356300354,
|
|
"step": 965,
|
|
"valid_targets_mean": 4934.9,
|
|
"valid_targets_min": 1677
|
|
},
|
|
{
|
|
"epoch": 1.7358997314234557,
|
|
"grad_norm": 0.5941103023388754,
|
|
"learning_rate": 3.740757767068834e-05,
|
|
"loss": 0.353,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18420416116714478,
|
|
"step": 970,
|
|
"valid_targets_mean": 4014.4,
|
|
"valid_targets_min": 1444
|
|
},
|
|
{
|
|
"epoch": 1.7448522829006268,
|
|
"grad_norm": 0.6018670288242158,
|
|
"learning_rate": 3.7363472028399476e-05,
|
|
"loss": 0.3582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20882412791252136,
|
|
"step": 975,
|
|
"valid_targets_mean": 4805.8,
|
|
"valid_targets_min": 1521
|
|
},
|
|
{
|
|
"epoch": 1.7538048343777977,
|
|
"grad_norm": 0.5684940241609523,
|
|
"learning_rate": 3.7319020810124965e-05,
|
|
"loss": 0.3466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13425326347351074,
|
|
"step": 980,
|
|
"valid_targets_mean": 2943.4,
|
|
"valid_targets_min": 1368
|
|
},
|
|
{
|
|
"epoch": 1.7627573858549686,
|
|
"grad_norm": 0.578302453704476,
|
|
"learning_rate": 3.727422490055386e-05,
|
|
"loss": 0.3591,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16196070611476898,
|
|
"step": 985,
|
|
"valid_targets_mean": 4246.9,
|
|
"valid_targets_min": 1018
|
|
},
|
|
{
|
|
"epoch": 1.7717099373321397,
|
|
"grad_norm": 0.5126381116701886,
|
|
"learning_rate": 3.72290851912354e-05,
|
|
"loss": 0.3493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1364893913269043,
|
|
"step": 990,
|
|
"valid_targets_mean": 4596.4,
|
|
"valid_targets_min": 1367
|
|
},
|
|
{
|
|
"epoch": 1.7806624888093108,
|
|
"grad_norm": 0.6689103493428239,
|
|
"learning_rate": 3.718360258056133e-05,
|
|
"loss": 0.3603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17874747514724731,
|
|
"step": 995,
|
|
"valid_targets_mean": 3822.0,
|
|
"valid_targets_min": 1367
|
|
},
|
|
{
|
|
"epoch": 1.7896150402864817,
|
|
"grad_norm": 0.5733552205006976,
|
|
"learning_rate": 3.713777797374794e-05,
|
|
"loss": 0.342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1342494636774063,
|
|
"step": 1000,
|
|
"valid_targets_mean": 3976.6,
|
|
"valid_targets_min": 1779
|
|
},
|
|
{
|
|
"epoch": 1.7985675917636526,
|
|
"grad_norm": 0.615956841130552,
|
|
"learning_rate": 3.709161228281811e-05,
|
|
"loss": 0.3577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1488458812236786,
|
|
"step": 1005,
|
|
"valid_targets_mean": 3553.6,
|
|
"valid_targets_min": 1921
|
|
},
|
|
{
|
|
"epoch": 1.8075201432408237,
|
|
"grad_norm": 0.6675507908636208,
|
|
"learning_rate": 3.704510642658314e-05,
|
|
"loss": 0.3541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10533076524734497,
|
|
"step": 1010,
|
|
"valid_targets_mean": 2549.6,
|
|
"valid_targets_min": 1152
|
|
},
|
|
{
|
|
"epoch": 1.8164726947179948,
|
|
"grad_norm": 0.5622567774770557,
|
|
"learning_rate": 3.699826133062443e-05,
|
|
"loss": 0.3486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16377931833267212,
|
|
"step": 1015,
|
|
"valid_targets_mean": 4790.0,
|
|
"valid_targets_min": 1564
|
|
},
|
|
{
|
|
"epoch": 1.8254252461951657,
|
|
"grad_norm": 0.7045205939224921,
|
|
"learning_rate": 3.6951077927275126e-05,
|
|
"loss": 0.3626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17814645171165466,
|
|
"step": 1020,
|
|
"valid_targets_mean": 3490.4,
|
|
"valid_targets_min": 1701
|
|
},
|
|
{
|
|
"epoch": 1.8343777976723366,
|
|
"grad_norm": 0.5623018666792298,
|
|
"learning_rate": 3.6903557155601503e-05,
|
|
"loss": 0.3467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15948252379894257,
|
|
"step": 1025,
|
|
"valid_targets_mean": 4541.9,
|
|
"valid_targets_min": 1733
|
|
},
|
|
{
|
|
"epoch": 1.8433303491495077,
|
|
"grad_norm": 0.6793583770529867,
|
|
"learning_rate": 3.685569996138431e-05,
|
|
"loss": 0.3511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18709158897399902,
|
|
"step": 1030,
|
|
"valid_targets_mean": 3235.8,
|
|
"valid_targets_min": 1280
|
|
},
|
|
{
|
|
"epoch": 1.8522829006266786,
|
|
"grad_norm": 0.6230901893070521,
|
|
"learning_rate": 3.680750729709993e-05,
|
|
"loss": 0.3584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20924879610538483,
|
|
"step": 1035,
|
|
"valid_targets_mean": 3889.1,
|
|
"valid_targets_min": 1246
|
|
},
|
|
{
|
|
"epoch": 1.8612354521038497,
|
|
"grad_norm": 0.6213562591140741,
|
|
"learning_rate": 3.675898012190143e-05,
|
|
"loss": 0.3459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17583440244197845,
|
|
"step": 1040,
|
|
"valid_targets_mean": 3936.8,
|
|
"valid_targets_min": 1664
|
|
},
|
|
{
|
|
"epoch": 1.8701880035810206,
|
|
"grad_norm": 0.6347145618018462,
|
|
"learning_rate": 3.6710119401599474e-05,
|
|
"loss": 0.3631,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1309717744588852,
|
|
"step": 1045,
|
|
"valid_targets_mean": 2106.9,
|
|
"valid_targets_min": 857
|
|
},
|
|
{
|
|
"epoch": 1.8791405550581914,
|
|
"grad_norm": 0.58973298577871,
|
|
"learning_rate": 3.6660926108643086e-05,
|
|
"loss": 0.3545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19553452730178833,
|
|
"step": 1050,
|
|
"valid_targets_mean": 3828.2,
|
|
"valid_targets_min": 1020
|
|
},
|
|
{
|
|
"epoch": 1.8880931065353626,
|
|
"grad_norm": 0.6896943990427881,
|
|
"learning_rate": 3.661140122210032e-05,
|
|
"loss": 0.3628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12913288176059723,
|
|
"step": 1055,
|
|
"valid_targets_mean": 2498.1,
|
|
"valid_targets_min": 1305
|
|
},
|
|
{
|
|
"epoch": 1.8970456580125337,
|
|
"grad_norm": 0.623411962311995,
|
|
"learning_rate": 3.656154572763877e-05,
|
|
"loss": 0.3239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14518050849437714,
|
|
"step": 1060,
|
|
"valid_targets_mean": 4387.0,
|
|
"valid_targets_min": 1385
|
|
},
|
|
{
|
|
"epoch": 1.9059982094897046,
|
|
"grad_norm": 0.6976422673738203,
|
|
"learning_rate": 3.651136061750592e-05,
|
|
"loss": 0.3605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1363721787929535,
|
|
"step": 1065,
|
|
"valid_targets_mean": 2462.8,
|
|
"valid_targets_min": 1088
|
|
},
|
|
{
|
|
"epoch": 1.9149507609668754,
|
|
"grad_norm": 0.5544024419127411,
|
|
"learning_rate": 3.646084689050942e-05,
|
|
"loss": 0.3551,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23097077012062073,
|
|
"step": 1070,
|
|
"valid_targets_mean": 5681.8,
|
|
"valid_targets_min": 777
|
|
},
|
|
{
|
|
"epoch": 1.9239033124440466,
|
|
"grad_norm": 0.5989450206693978,
|
|
"learning_rate": 3.641000555199725e-05,
|
|
"loss": 0.3672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3325088322162628,
|
|
"step": 1075,
|
|
"valid_targets_mean": 6761.1,
|
|
"valid_targets_min": 1202
|
|
},
|
|
{
|
|
"epoch": 1.9328558639212177,
|
|
"grad_norm": 0.5099408282063519,
|
|
"learning_rate": 3.6358837613837604e-05,
|
|
"loss": 0.3281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1250154674053192,
|
|
"step": 1080,
|
|
"valid_targets_mean": 3475.6,
|
|
"valid_targets_min": 1433
|
|
},
|
|
{
|
|
"epoch": 1.9418084153983886,
|
|
"grad_norm": 0.6114130466606059,
|
|
"learning_rate": 3.630734409439887e-05,
|
|
"loss": 0.3657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18339493870735168,
|
|
"step": 1085,
|
|
"valid_targets_mean": 4119.0,
|
|
"valid_targets_min": 1102
|
|
},
|
|
{
|
|
"epoch": 1.9507609668755594,
|
|
"grad_norm": 0.7222349042904347,
|
|
"learning_rate": 3.625552601852928e-05,
|
|
"loss": 0.3591,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1875397115945816,
|
|
"step": 1090,
|
|
"valid_targets_mean": 2863.2,
|
|
"valid_targets_min": 940
|
|
},
|
|
{
|
|
"epoch": 1.9597135183527306,
|
|
"grad_norm": 0.6804750208910717,
|
|
"learning_rate": 3.6203384417536566e-05,
|
|
"loss": 0.3557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18265077471733093,
|
|
"step": 1095,
|
|
"valid_targets_mean": 3868.4,
|
|
"valid_targets_min": 1400
|
|
},
|
|
{
|
|
"epoch": 1.9686660698299017,
|
|
"grad_norm": 0.6657262397912295,
|
|
"learning_rate": 3.615092032916736e-05,
|
|
"loss": 0.3675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16759824752807617,
|
|
"step": 1100,
|
|
"valid_targets_mean": 3627.8,
|
|
"valid_targets_min": 810
|
|
},
|
|
{
|
|
"epoch": 1.9776186213070726,
|
|
"grad_norm": 0.6073636882366005,
|
|
"learning_rate": 3.6098134797586646e-05,
|
|
"loss": 0.3568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12195791304111481,
|
|
"step": 1105,
|
|
"valid_targets_mean": 2877.5,
|
|
"valid_targets_min": 1204
|
|
},
|
|
{
|
|
"epoch": 1.9865711727842434,
|
|
"grad_norm": 0.8596770463224701,
|
|
"learning_rate": 3.604502887335688e-05,
|
|
"loss": 0.3513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19915196299552917,
|
|
"step": 1110,
|
|
"valid_targets_mean": 5150.5,
|
|
"valid_targets_min": 2758
|
|
},
|
|
{
|
|
"epoch": 1.9955237242614146,
|
|
"grad_norm": 0.5642215443988173,
|
|
"learning_rate": 3.599160361341715e-05,
|
|
"loss": 0.3378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16652005910873413,
|
|
"step": 1115,
|
|
"valid_targets_mean": 4599.1,
|
|
"valid_targets_min": 742
|
|
},
|
|
{
|
|
"epoch": 2.003581020590868,
|
|
"grad_norm": 0.6052219331868266,
|
|
"learning_rate": 3.59378600810621e-05,
|
|
"loss": 0.3411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20534177124500275,
|
|
"step": 1120,
|
|
"valid_targets_mean": 5202.9,
|
|
"valid_targets_min": 2496
|
|
},
|
|
{
|
|
"epoch": 2.0125335720680395,
|
|
"grad_norm": 0.6372134090351926,
|
|
"learning_rate": 3.588379934592078e-05,
|
|
"loss": 0.322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12786754965782166,
|
|
"step": 1125,
|
|
"valid_targets_mean": 2973.5,
|
|
"valid_targets_min": 1293
|
|
},
|
|
{
|
|
"epoch": 2.0214861235452104,
|
|
"grad_norm": 0.6723864873622751,
|
|
"learning_rate": 3.5829422483935374e-05,
|
|
"loss": 0.3583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15999622642993927,
|
|
"step": 1130,
|
|
"valid_targets_mean": 3738.4,
|
|
"valid_targets_min": 1399
|
|
},
|
|
{
|
|
"epoch": 2.0304386750223813,
|
|
"grad_norm": 0.6401323119556832,
|
|
"learning_rate": 3.577473057733975e-05,
|
|
"loss": 0.3392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12839260697364807,
|
|
"step": 1135,
|
|
"valid_targets_mean": 3227.8,
|
|
"valid_targets_min": 684
|
|
},
|
|
{
|
|
"epoch": 2.039391226499552,
|
|
"grad_norm": 0.5527559741075448,
|
|
"learning_rate": 3.571972471463795e-05,
|
|
"loss": 0.3239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17527739703655243,
|
|
"step": 1140,
|
|
"valid_targets_mean": 5061.2,
|
|
"valid_targets_min": 1219
|
|
},
|
|
{
|
|
"epoch": 2.0483437779767235,
|
|
"grad_norm": 0.453223635680156,
|
|
"learning_rate": 3.566440599058253e-05,
|
|
"loss": 0.3252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0904235690832138,
|
|
"step": 1145,
|
|
"valid_targets_mean": 3904.6,
|
|
"valid_targets_min": 1378
|
|
},
|
|
{
|
|
"epoch": 2.0572963294538944,
|
|
"grad_norm": 0.7061541670317182,
|
|
"learning_rate": 3.560877550615275e-05,
|
|
"loss": 0.3318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17458279430866241,
|
|
"step": 1150,
|
|
"valid_targets_mean": 3816.8,
|
|
"valid_targets_min": 1423
|
|
},
|
|
{
|
|
"epoch": 2.0662488809310653,
|
|
"grad_norm": 0.5763514441348033,
|
|
"learning_rate": 3.555283436853267e-05,
|
|
"loss": 0.3163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10202936828136444,
|
|
"step": 1155,
|
|
"valid_targets_mean": 2085.8,
|
|
"valid_targets_min": 348
|
|
},
|
|
{
|
|
"epoch": 2.075201432408236,
|
|
"grad_norm": 0.6128115206461063,
|
|
"learning_rate": 3.549658369108911e-05,
|
|
"loss": 0.3059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07467757165431976,
|
|
"step": 1160,
|
|
"valid_targets_mean": 1714.5,
|
|
"valid_targets_min": 854
|
|
},
|
|
{
|
|
"epoch": 2.0841539838854075,
|
|
"grad_norm": 0.6446047657894652,
|
|
"learning_rate": 3.544002459334952e-05,
|
|
"loss": 0.3237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20798584818840027,
|
|
"step": 1165,
|
|
"valid_targets_mean": 5297.9,
|
|
"valid_targets_min": 2317
|
|
},
|
|
{
|
|
"epoch": 2.0931065353625784,
|
|
"grad_norm": 0.7059351630467845,
|
|
"learning_rate": 3.5383158200979636e-05,
|
|
"loss": 0.3004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17275607585906982,
|
|
"step": 1170,
|
|
"valid_targets_mean": 2938.5,
|
|
"valid_targets_min": 1575
|
|
},
|
|
{
|
|
"epoch": 2.1020590868397493,
|
|
"grad_norm": 0.5826650464806887,
|
|
"learning_rate": 3.532598564576117e-05,
|
|
"loss": 0.329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14418719708919525,
|
|
"step": 1175,
|
|
"valid_targets_mean": 4263.2,
|
|
"valid_targets_min": 1440
|
|
},
|
|
{
|
|
"epoch": 2.11101163831692,
|
|
"grad_norm": 0.6875978539339866,
|
|
"learning_rate": 3.526850806556919e-05,
|
|
"loss": 0.3385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17782482504844666,
|
|
"step": 1180,
|
|
"valid_targets_mean": 3434.2,
|
|
"valid_targets_min": 2377
|
|
},
|
|
{
|
|
"epoch": 2.1199641897940915,
|
|
"grad_norm": 0.5535655704379627,
|
|
"learning_rate": 3.521072660434952e-05,
|
|
"loss": 0.3388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19075921177864075,
|
|
"step": 1185,
|
|
"valid_targets_mean": 4385.1,
|
|
"valid_targets_min": 1440
|
|
},
|
|
{
|
|
"epoch": 2.1289167412712624,
|
|
"grad_norm": 0.5373177167125995,
|
|
"learning_rate": 3.5152642412095984e-05,
|
|
"loss": 0.333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1952834129333496,
|
|
"step": 1190,
|
|
"valid_targets_mean": 4648.5,
|
|
"valid_targets_min": 1982
|
|
},
|
|
{
|
|
"epoch": 2.1378692927484333,
|
|
"grad_norm": 0.6179979971643651,
|
|
"learning_rate": 3.5094256644827474e-05,
|
|
"loss": 0.3193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12283094227313995,
|
|
"step": 1195,
|
|
"valid_targets_mean": 3926.1,
|
|
"valid_targets_min": 1374
|
|
},
|
|
{
|
|
"epoch": 2.146821844225604,
|
|
"grad_norm": 0.6783117172187012,
|
|
"learning_rate": 3.503557046456501e-05,
|
|
"loss": 0.3227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17674294114112854,
|
|
"step": 1200,
|
|
"valid_targets_mean": 3098.4,
|
|
"valid_targets_min": 1504
|
|
},
|
|
{
|
|
"epoch": 2.1557743957027755,
|
|
"grad_norm": 0.5363304730541903,
|
|
"learning_rate": 3.4976585039308535e-05,
|
|
"loss": 0.3409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18800324201583862,
|
|
"step": 1205,
|
|
"valid_targets_mean": 5661.8,
|
|
"valid_targets_min": 592
|
|
},
|
|
{
|
|
"epoch": 2.1647269471799464,
|
|
"grad_norm": 0.6647851175758868,
|
|
"learning_rate": 3.491730154301372e-05,
|
|
"loss": 0.3397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14593389630317688,
|
|
"step": 1210,
|
|
"valid_targets_mean": 3087.5,
|
|
"valid_targets_min": 1108
|
|
},
|
|
{
|
|
"epoch": 2.1736794986571173,
|
|
"grad_norm": 0.6195332060911277,
|
|
"learning_rate": 3.485772115556859e-05,
|
|
"loss": 0.3178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20628832280635834,
|
|
"step": 1215,
|
|
"valid_targets_mean": 5044.6,
|
|
"valid_targets_min": 1387
|
|
},
|
|
{
|
|
"epoch": 2.182632050134288,
|
|
"grad_norm": 0.6787497059916711,
|
|
"learning_rate": 3.4797845062770045e-05,
|
|
"loss": 0.3337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11903105676174164,
|
|
"step": 1220,
|
|
"valid_targets_mean": 2183.8,
|
|
"valid_targets_min": 1277
|
|
},
|
|
{
|
|
"epoch": 2.191584601611459,
|
|
"grad_norm": 0.6282562749240457,
|
|
"learning_rate": 3.473767445630022e-05,
|
|
"loss": 0.3268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.142918199300766,
|
|
"step": 1225,
|
|
"valid_targets_mean": 3534.5,
|
|
"valid_targets_min": 1357
|
|
},
|
|
{
|
|
"epoch": 2.2005371530886304,
|
|
"grad_norm": 0.7211672197526297,
|
|
"learning_rate": 3.467721053370284e-05,
|
|
"loss": 0.3097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16219381988048553,
|
|
"step": 1230,
|
|
"valid_targets_mean": 2918.1,
|
|
"valid_targets_min": 1453
|
|
},
|
|
{
|
|
"epoch": 2.2094897045658013,
|
|
"grad_norm": 0.5816702367464138,
|
|
"learning_rate": 3.4616454498359306e-05,
|
|
"loss": 0.308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13409647345542908,
|
|
"step": 1235,
|
|
"valid_targets_mean": 3830.8,
|
|
"valid_targets_min": 1249
|
|
},
|
|
{
|
|
"epoch": 2.218442256042972,
|
|
"grad_norm": 0.5734139252571135,
|
|
"learning_rate": 3.4555407559464825e-05,
|
|
"loss": 0.3242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18026067316532135,
|
|
"step": 1240,
|
|
"valid_targets_mean": 5342.8,
|
|
"valid_targets_min": 1652
|
|
},
|
|
{
|
|
"epoch": 2.227394807520143,
|
|
"grad_norm": 0.771243368082431,
|
|
"learning_rate": 3.4494070932004274e-05,
|
|
"loss": 0.3111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1591377556324005,
|
|
"step": 1245,
|
|
"valid_targets_mean": 2311.4,
|
|
"valid_targets_min": 1421
|
|
},
|
|
{
|
|
"epoch": 2.2363473589973144,
|
|
"grad_norm": 0.5336435857271173,
|
|
"learning_rate": 3.4432445836728055e-05,
|
|
"loss": 0.3079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08339603245258331,
|
|
"step": 1250,
|
|
"valid_targets_mean": 2657.4,
|
|
"valid_targets_min": 1575
|
|
},
|
|
{
|
|
"epoch": 2.2452999104744853,
|
|
"grad_norm": 0.6603765461133814,
|
|
"learning_rate": 3.4370533500127794e-05,
|
|
"loss": 0.3127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1445946991443634,
|
|
"step": 1255,
|
|
"valid_targets_mean": 2847.9,
|
|
"valid_targets_min": 1215
|
|
},
|
|
{
|
|
"epoch": 2.254252461951656,
|
|
"grad_norm": 0.562964584770617,
|
|
"learning_rate": 3.430833515441193e-05,
|
|
"loss": 0.3146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1468866467475891,
|
|
"step": 1260,
|
|
"valid_targets_mean": 4139.5,
|
|
"valid_targets_min": 1707
|
|
},
|
|
{
|
|
"epoch": 2.263205013428827,
|
|
"grad_norm": 0.5539438164656543,
|
|
"learning_rate": 3.424585203748119e-05,
|
|
"loss": 0.3575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2521764039993286,
|
|
"step": 1265,
|
|
"valid_targets_mean": 6956.5,
|
|
"valid_targets_min": 1362
|
|
},
|
|
{
|
|
"epoch": 2.2721575649059984,
|
|
"grad_norm": 0.525859014560588,
|
|
"learning_rate": 3.4183085392903965e-05,
|
|
"loss": 0.3346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13610875606536865,
|
|
"step": 1270,
|
|
"valid_targets_mean": 4491.5,
|
|
"valid_targets_min": 1387
|
|
},
|
|
{
|
|
"epoch": 2.2811101163831693,
|
|
"grad_norm": 0.5713317851483184,
|
|
"learning_rate": 3.41200364698915e-05,
|
|
"loss": 0.3171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16015830636024475,
|
|
"step": 1275,
|
|
"valid_targets_mean": 4629.2,
|
|
"valid_targets_min": 1121
|
|
},
|
|
{
|
|
"epoch": 2.29006266786034,
|
|
"grad_norm": 0.6197825601277361,
|
|
"learning_rate": 3.405670652327313e-05,
|
|
"loss": 0.324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12453843653202057,
|
|
"step": 1280,
|
|
"valid_targets_mean": 2702.1,
|
|
"valid_targets_min": 1198
|
|
},
|
|
{
|
|
"epoch": 2.299015219337511,
|
|
"grad_norm": 0.6081138136553563,
|
|
"learning_rate": 3.399309681347123e-05,
|
|
"loss": 0.352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24878063797950745,
|
|
"step": 1285,
|
|
"valid_targets_mean": 5480.4,
|
|
"valid_targets_min": 1878
|
|
},
|
|
{
|
|
"epoch": 2.307967770814682,
|
|
"grad_norm": 0.5662395930780025,
|
|
"learning_rate": 3.392920860647617e-05,
|
|
"loss": 0.3291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20692947506904602,
|
|
"step": 1290,
|
|
"valid_targets_mean": 5727.5,
|
|
"valid_targets_min": 1652
|
|
},
|
|
{
|
|
"epoch": 2.3169203222918533,
|
|
"grad_norm": 0.6357833331434369,
|
|
"learning_rate": 3.3865043173821074e-05,
|
|
"loss": 0.3363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16444212198257446,
|
|
"step": 1295,
|
|
"valid_targets_mean": 3905.9,
|
|
"valid_targets_min": 1131
|
|
},
|
|
{
|
|
"epoch": 2.325872873769024,
|
|
"grad_norm": 0.6131522307396422,
|
|
"learning_rate": 3.380060179255656e-05,
|
|
"loss": 0.3392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1174757182598114,
|
|
"step": 1300,
|
|
"valid_targets_mean": 3201.8,
|
|
"valid_targets_min": 1403
|
|
},
|
|
{
|
|
"epoch": 2.334825425246195,
|
|
"grad_norm": 0.6863608314070286,
|
|
"learning_rate": 3.37358857452253e-05,
|
|
"loss": 0.3427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14921192824840546,
|
|
"step": 1305,
|
|
"valid_targets_mean": 3493.2,
|
|
"valid_targets_min": 1655
|
|
},
|
|
{
|
|
"epoch": 2.3437779767233664,
|
|
"grad_norm": 0.5237704728272313,
|
|
"learning_rate": 3.367089631983651e-05,
|
|
"loss": 0.3363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16521351039409637,
|
|
"step": 1310,
|
|
"valid_targets_mean": 5021.4,
|
|
"valid_targets_min": 2158
|
|
},
|
|
{
|
|
"epoch": 2.3527305282005373,
|
|
"grad_norm": 0.6996060481011926,
|
|
"learning_rate": 3.360563480984029e-05,
|
|
"loss": 0.3055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1772424280643463,
|
|
"step": 1315,
|
|
"valid_targets_mean": 3603.8,
|
|
"valid_targets_min": 568
|
|
},
|
|
{
|
|
"epoch": 2.361683079677708,
|
|
"grad_norm": 0.7294480018686996,
|
|
"learning_rate": 3.3540102514101904e-05,
|
|
"loss": 0.3376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20145997405052185,
|
|
"step": 1320,
|
|
"valid_targets_mean": 2952.6,
|
|
"valid_targets_min": 957
|
|
},
|
|
{
|
|
"epoch": 2.370635631154879,
|
|
"grad_norm": 0.6581096747139857,
|
|
"learning_rate": 3.347430073687592e-05,
|
|
"loss": 0.3387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19283172488212585,
|
|
"step": 1325,
|
|
"valid_targets_mean": 4159.8,
|
|
"valid_targets_min": 1204
|
|
},
|
|
{
|
|
"epoch": 2.37958818263205,
|
|
"grad_norm": 0.7560342075752016,
|
|
"learning_rate": 3.340823078778024e-05,
|
|
"loss": 0.3084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13571259379386902,
|
|
"step": 1330,
|
|
"valid_targets_mean": 3687.5,
|
|
"valid_targets_min": 1318
|
|
},
|
|
{
|
|
"epoch": 2.3885407341092213,
|
|
"grad_norm": 0.6492264139164943,
|
|
"learning_rate": 3.3341893981770086e-05,
|
|
"loss": 0.3373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17319390177726746,
|
|
"step": 1335,
|
|
"valid_targets_mean": 4212.1,
|
|
"valid_targets_min": 1287
|
|
},
|
|
{
|
|
"epoch": 2.397493285586392,
|
|
"grad_norm": 0.7608633793249342,
|
|
"learning_rate": 3.327529163911174e-05,
|
|
"loss": 0.3399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.180658221244812,
|
|
"step": 1340,
|
|
"valid_targets_mean": 3035.4,
|
|
"valid_targets_min": 1567
|
|
},
|
|
{
|
|
"epoch": 2.406445837063563,
|
|
"grad_norm": 0.6040569218759534,
|
|
"learning_rate": 3.320842508535636e-05,
|
|
"loss": 0.3418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09077704697847366,
|
|
"step": 1345,
|
|
"valid_targets_mean": 2779.8,
|
|
"valid_targets_min": 1270
|
|
},
|
|
{
|
|
"epoch": 2.415398388540734,
|
|
"grad_norm": 0.5775762445982943,
|
|
"learning_rate": 3.314129565131355e-05,
|
|
"loss": 0.3059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17546844482421875,
|
|
"step": 1350,
|
|
"valid_targets_mean": 4502.0,
|
|
"valid_targets_min": 1548
|
|
},
|
|
{
|
|
"epoch": 2.424350940017905,
|
|
"grad_norm": 0.6769552471767344,
|
|
"learning_rate": 3.3073904673024854e-05,
|
|
"loss": 0.3349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15670402348041534,
|
|
"step": 1355,
|
|
"valid_targets_mean": 3034.5,
|
|
"valid_targets_min": 999
|
|
},
|
|
{
|
|
"epoch": 2.433303491495076,
|
|
"grad_norm": 0.6316224464390162,
|
|
"learning_rate": 3.300625349173723e-05,
|
|
"loss": 0.3222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16486474871635437,
|
|
"step": 1360,
|
|
"valid_targets_mean": 3398.2,
|
|
"valid_targets_min": 1208
|
|
},
|
|
{
|
|
"epoch": 2.442256042972247,
|
|
"grad_norm": 0.5840973689561537,
|
|
"learning_rate": 3.29383434538763e-05,
|
|
"loss": 0.3246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24210324883460999,
|
|
"step": 1365,
|
|
"valid_targets_mean": 4687.2,
|
|
"valid_targets_min": 1368
|
|
},
|
|
{
|
|
"epoch": 2.451208594449418,
|
|
"grad_norm": 0.7500647888106254,
|
|
"learning_rate": 3.287017591101957e-05,
|
|
"loss": 0.3285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13718849420547485,
|
|
"step": 1370,
|
|
"valid_targets_mean": 1851.1,
|
|
"valid_targets_min": 742
|
|
},
|
|
{
|
|
"epoch": 2.4601611459265893,
|
|
"grad_norm": 0.5428272494700659,
|
|
"learning_rate": 3.2801752219869536e-05,
|
|
"loss": 0.324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12291097640991211,
|
|
"step": 1375,
|
|
"valid_targets_mean": 3957.2,
|
|
"valid_targets_min": 1438
|
|
},
|
|
{
|
|
"epoch": 2.46911369740376,
|
|
"grad_norm": 0.7959123302263251,
|
|
"learning_rate": 3.273307374222667e-05,
|
|
"loss": 0.3425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13849949836730957,
|
|
"step": 1380,
|
|
"valid_targets_mean": 2558.8,
|
|
"valid_targets_min": 972
|
|
},
|
|
{
|
|
"epoch": 2.478066248880931,
|
|
"grad_norm": 0.7062680019503911,
|
|
"learning_rate": 3.266414184496233e-05,
|
|
"loss": 0.3342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1923518180847168,
|
|
"step": 1385,
|
|
"valid_targets_mean": 4609.4,
|
|
"valid_targets_min": 1581
|
|
},
|
|
{
|
|
"epoch": 2.487018800358102,
|
|
"grad_norm": 0.6406251692788578,
|
|
"learning_rate": 3.2594957899991566e-05,
|
|
"loss": 0.3448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11892961710691452,
|
|
"step": 1390,
|
|
"valid_targets_mean": 2757.5,
|
|
"valid_targets_min": 980
|
|
},
|
|
{
|
|
"epoch": 2.495971351835273,
|
|
"grad_norm": 1.2275726101592588,
|
|
"learning_rate": 3.2525523284245766e-05,
|
|
"loss": 0.31,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11468460410833359,
|
|
"step": 1395,
|
|
"valid_targets_mean": 3384.9,
|
|
"valid_targets_min": 1793
|
|
},
|
|
{
|
|
"epoch": 2.504923903312444,
|
|
"grad_norm": 0.6884681320356183,
|
|
"learning_rate": 3.245583937964532e-05,
|
|
"loss": 0.3223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.134379580616951,
|
|
"step": 1400,
|
|
"valid_targets_mean": 2567.1,
|
|
"valid_targets_min": 1334
|
|
},
|
|
{
|
|
"epoch": 2.513876454789615,
|
|
"grad_norm": 0.7779458297473585,
|
|
"learning_rate": 3.238590757307206e-05,
|
|
"loss": 0.3203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15265807509422302,
|
|
"step": 1405,
|
|
"valid_targets_mean": 2609.0,
|
|
"valid_targets_min": 756
|
|
},
|
|
{
|
|
"epoch": 2.522829006266786,
|
|
"grad_norm": 0.6640063486633304,
|
|
"learning_rate": 3.2315729256341686e-05,
|
|
"loss": 0.3235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1987053006887436,
|
|
"step": 1410,
|
|
"valid_targets_mean": 3837.5,
|
|
"valid_targets_min": 1001
|
|
},
|
|
{
|
|
"epoch": 2.5317815577439573,
|
|
"grad_norm": 0.5553044657523085,
|
|
"learning_rate": 3.2245305826176063e-05,
|
|
"loss": 0.3326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19759173691272736,
|
|
"step": 1415,
|
|
"valid_targets_mean": 6199.4,
|
|
"valid_targets_min": 2226
|
|
},
|
|
{
|
|
"epoch": 2.5407341092211277,
|
|
"grad_norm": 0.709939055780021,
|
|
"learning_rate": 3.217463868417541e-05,
|
|
"loss": 0.3079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1832863688468933,
|
|
"step": 1420,
|
|
"valid_targets_mean": 5806.5,
|
|
"valid_targets_min": 1427
|
|
},
|
|
{
|
|
"epoch": 2.549686660698299,
|
|
"grad_norm": 0.6357425947737642,
|
|
"learning_rate": 3.2103729236790434e-05,
|
|
"loss": 0.3533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.195903941988945,
|
|
"step": 1425,
|
|
"valid_targets_mean": 3483.8,
|
|
"valid_targets_min": 810
|
|
},
|
|
{
|
|
"epoch": 2.55863921217547,
|
|
"grad_norm": 0.636197345665908,
|
|
"learning_rate": 3.203257889529428e-05,
|
|
"loss": 0.3258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11567019671201706,
|
|
"step": 1430,
|
|
"valid_targets_mean": 2667.1,
|
|
"valid_targets_min": 1435
|
|
},
|
|
{
|
|
"epoch": 2.567591763652641,
|
|
"grad_norm": 0.7235618047532826,
|
|
"learning_rate": 3.196118907575452e-05,
|
|
"loss": 0.3383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1823103278875351,
|
|
"step": 1435,
|
|
"valid_targets_mean": 3246.8,
|
|
"valid_targets_min": 1396
|
|
},
|
|
{
|
|
"epoch": 2.576544315129812,
|
|
"grad_norm": 0.7047547532105115,
|
|
"learning_rate": 3.188956119900491e-05,
|
|
"loss": 0.3224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16611355543136597,
|
|
"step": 1440,
|
|
"valid_targets_mean": 3224.5,
|
|
"valid_targets_min": 1763
|
|
},
|
|
{
|
|
"epoch": 2.585496866606983,
|
|
"grad_norm": 0.6653266014374837,
|
|
"learning_rate": 3.181769669061713e-05,
|
|
"loss": 0.2964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1501968652009964,
|
|
"step": 1445,
|
|
"valid_targets_mean": 3479.5,
|
|
"valid_targets_min": 1221
|
|
},
|
|
{
|
|
"epoch": 2.594449418084154,
|
|
"grad_norm": 0.6844620037691017,
|
|
"learning_rate": 3.174559698087244e-05,
|
|
"loss": 0.3123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1589304506778717,
|
|
"step": 1450,
|
|
"valid_targets_mean": 2945.6,
|
|
"valid_targets_min": 297
|
|
},
|
|
{
|
|
"epoch": 2.603401969561325,
|
|
"grad_norm": 0.7613153397840512,
|
|
"learning_rate": 3.1673263504733136e-05,
|
|
"loss": 0.3334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18859216570854187,
|
|
"step": 1455,
|
|
"valid_targets_mean": 2624.0,
|
|
"valid_targets_min": 639
|
|
},
|
|
{
|
|
"epoch": 2.6123545210384957,
|
|
"grad_norm": 0.5871776640120691,
|
|
"learning_rate": 3.160069770181411e-05,
|
|
"loss": 0.31,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11848124116659164,
|
|
"step": 1460,
|
|
"valid_targets_mean": 3289.4,
|
|
"valid_targets_min": 943
|
|
},
|
|
{
|
|
"epoch": 2.621307072515667,
|
|
"grad_norm": 0.605999766136029,
|
|
"learning_rate": 3.152790101635408e-05,
|
|
"loss": 0.3255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13618788123130798,
|
|
"step": 1465,
|
|
"valid_targets_mean": 2836.8,
|
|
"valid_targets_min": 770
|
|
},
|
|
{
|
|
"epoch": 2.630259623992838,
|
|
"grad_norm": 0.5726662176186619,
|
|
"learning_rate": 3.145487489718692e-05,
|
|
"loss": 0.3194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15494957566261292,
|
|
"step": 1470,
|
|
"valid_targets_mean": 3869.1,
|
|
"valid_targets_min": 1322
|
|
},
|
|
{
|
|
"epoch": 2.639212175470009,
|
|
"grad_norm": 0.5365989748453186,
|
|
"learning_rate": 3.138162079771278e-05,
|
|
"loss": 0.3115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15874773263931274,
|
|
"step": 1475,
|
|
"valid_targets_mean": 4577.9,
|
|
"valid_targets_min": 1279
|
|
},
|
|
{
|
|
"epoch": 2.64816472694718,
|
|
"grad_norm": 0.5551684662320231,
|
|
"learning_rate": 3.1308140175869216e-05,
|
|
"loss": 0.3184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14446908235549927,
|
|
"step": 1480,
|
|
"valid_targets_mean": 4658.5,
|
|
"valid_targets_min": 1400
|
|
},
|
|
{
|
|
"epoch": 2.657117278424351,
|
|
"grad_norm": 0.6388833788450519,
|
|
"learning_rate": 3.123443449410211e-05,
|
|
"loss": 0.3168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1660110056400299,
|
|
"step": 1485,
|
|
"valid_targets_mean": 3828.0,
|
|
"valid_targets_min": 1524
|
|
},
|
|
{
|
|
"epoch": 2.666069829901522,
|
|
"grad_norm": 0.7296008634040168,
|
|
"learning_rate": 3.1160505219336594e-05,
|
|
"loss": 0.3215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19480803608894348,
|
|
"step": 1490,
|
|
"valid_targets_mean": 3797.8,
|
|
"valid_targets_min": 1757
|
|
},
|
|
{
|
|
"epoch": 2.675022381378693,
|
|
"grad_norm": 0.5872767622862354,
|
|
"learning_rate": 3.108635382294787e-05,
|
|
"loss": 0.3317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18170863389968872,
|
|
"step": 1495,
|
|
"valid_targets_mean": 4497.4,
|
|
"valid_targets_min": 1429
|
|
},
|
|
{
|
|
"epoch": 2.6839749328558637,
|
|
"grad_norm": 0.6001385480022825,
|
|
"learning_rate": 3.101198178073189e-05,
|
|
"loss": 0.3295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15100830793380737,
|
|
"step": 1500,
|
|
"valid_targets_mean": 3492.2,
|
|
"valid_targets_min": 1173
|
|
},
|
|
{
|
|
"epoch": 2.692927484333035,
|
|
"grad_norm": 0.5942570457251561,
|
|
"learning_rate": 3.093739057287603e-05,
|
|
"loss": 0.3549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2285933643579483,
|
|
"step": 1505,
|
|
"valid_targets_mean": 5245.6,
|
|
"valid_targets_min": 1920
|
|
},
|
|
{
|
|
"epoch": 2.701880035810206,
|
|
"grad_norm": 0.5518275448844328,
|
|
"learning_rate": 3.086258168392957e-05,
|
|
"loss": 0.3334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13938848674297333,
|
|
"step": 1510,
|
|
"valid_targets_mean": 3998.1,
|
|
"valid_targets_min": 2193
|
|
},
|
|
{
|
|
"epoch": 2.710832587287377,
|
|
"grad_norm": 0.5695709347856279,
|
|
"learning_rate": 3.0787556602774195e-05,
|
|
"loss": 0.31,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11261847615242004,
|
|
"step": 1515,
|
|
"valid_targets_mean": 3775.1,
|
|
"valid_targets_min": 1405
|
|
},
|
|
{
|
|
"epoch": 2.7197851387645477,
|
|
"grad_norm": 0.6491649502754991,
|
|
"learning_rate": 3.071231682259437e-05,
|
|
"loss": 0.3165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0938911885023117,
|
|
"step": 1520,
|
|
"valid_targets_mean": 2205.0,
|
|
"valid_targets_min": 1617
|
|
},
|
|
{
|
|
"epoch": 2.7287376902417186,
|
|
"grad_norm": 0.7019210724410154,
|
|
"learning_rate": 3.063686384084756e-05,
|
|
"loss": 0.3346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19694435596466064,
|
|
"step": 1525,
|
|
"valid_targets_mean": 3414.0,
|
|
"valid_targets_min": 1186
|
|
},
|
|
{
|
|
"epoch": 2.73769024171889,
|
|
"grad_norm": 0.6690446047856118,
|
|
"learning_rate": 3.05611991592345e-05,
|
|
"loss": 0.3368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1392868608236313,
|
|
"step": 1530,
|
|
"valid_targets_mean": 2480.8,
|
|
"valid_targets_min": 323
|
|
},
|
|
{
|
|
"epoch": 2.746642793196061,
|
|
"grad_norm": 0.522232064697356,
|
|
"learning_rate": 3.0485324283669288e-05,
|
|
"loss": 0.3168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13069400191307068,
|
|
"step": 1535,
|
|
"valid_targets_mean": 4926.6,
|
|
"valid_targets_min": 762
|
|
},
|
|
{
|
|
"epoch": 2.7555953446732318,
|
|
"grad_norm": 0.6183033586621423,
|
|
"learning_rate": 3.0409240724249334e-05,
|
|
"loss": 0.3265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1853337287902832,
|
|
"step": 1540,
|
|
"valid_targets_mean": 4915.0,
|
|
"valid_targets_min": 1817
|
|
},
|
|
{
|
|
"epoch": 2.764547896150403,
|
|
"grad_norm": 0.4868431778426567,
|
|
"learning_rate": 3.033294999522545e-05,
|
|
"loss": 0.3172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15458467602729797,
|
|
"step": 1545,
|
|
"valid_targets_mean": 4923.0,
|
|
"valid_targets_min": 1974
|
|
},
|
|
{
|
|
"epoch": 2.773500447627574,
|
|
"grad_norm": 0.7871239617122384,
|
|
"learning_rate": 3.0256453614971594e-05,
|
|
"loss": 0.3226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17956718802452087,
|
|
"step": 1550,
|
|
"valid_targets_mean": 2356.8,
|
|
"valid_targets_min": 1062
|
|
},
|
|
{
|
|
"epoch": 2.782452999104745,
|
|
"grad_norm": 0.6070057091754848,
|
|
"learning_rate": 3.017975310595469e-05,
|
|
"loss": 0.3107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1523509919643402,
|
|
"step": 1555,
|
|
"valid_targets_mean": 3920.9,
|
|
"valid_targets_min": 1789
|
|
},
|
|
{
|
|
"epoch": 2.7914055505819158,
|
|
"grad_norm": 0.5835084934822887,
|
|
"learning_rate": 3.0102849994704343e-05,
|
|
"loss": 0.3137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24419042468070984,
|
|
"step": 1560,
|
|
"valid_targets_mean": 6938.5,
|
|
"valid_targets_min": 1161
|
|
},
|
|
{
|
|
"epoch": 2.8003581020590866,
|
|
"grad_norm": 0.5723181812585326,
|
|
"learning_rate": 3.0025745811782444e-05,
|
|
"loss": 0.3184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16458186507225037,
|
|
"step": 1565,
|
|
"valid_targets_mean": 6416.8,
|
|
"valid_targets_min": 954
|
|
},
|
|
{
|
|
"epoch": 2.809310653536258,
|
|
"grad_norm": 0.8168745210965721,
|
|
"learning_rate": 2.994844209175269e-05,
|
|
"loss": 0.3298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2504898011684418,
|
|
"step": 1570,
|
|
"valid_targets_mean": 6881.8,
|
|
"valid_targets_min": 2422
|
|
},
|
|
{
|
|
"epoch": 2.818263205013429,
|
|
"grad_norm": 0.6134084352950554,
|
|
"learning_rate": 2.987094037315008e-05,
|
|
"loss": 0.3277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10477450489997864,
|
|
"step": 1575,
|
|
"valid_targets_mean": 2753.1,
|
|
"valid_targets_min": 1248
|
|
},
|
|
{
|
|
"epoch": 2.8272157564905998,
|
|
"grad_norm": 0.6017942096883853,
|
|
"learning_rate": 2.9793242198450258e-05,
|
|
"loss": 0.329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22128619253635406,
|
|
"step": 1580,
|
|
"valid_targets_mean": 4756.5,
|
|
"valid_targets_min": 1344
|
|
},
|
|
{
|
|
"epoch": 2.836168307967771,
|
|
"grad_norm": 0.6881016094467826,
|
|
"learning_rate": 2.9715349114038825e-05,
|
|
"loss": 0.3471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2594001293182373,
|
|
"step": 1585,
|
|
"valid_targets_mean": 4732.9,
|
|
"valid_targets_min": 1482
|
|
},
|
|
{
|
|
"epoch": 2.845120859444942,
|
|
"grad_norm": 0.6892520214493385,
|
|
"learning_rate": 2.9637262670180597e-05,
|
|
"loss": 0.3321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15683342516422272,
|
|
"step": 1590,
|
|
"valid_targets_mean": 4689.9,
|
|
"valid_targets_min": 1435
|
|
},
|
|
{
|
|
"epoch": 2.854073410922113,
|
|
"grad_norm": 0.5987162236177794,
|
|
"learning_rate": 2.955898442098869e-05,
|
|
"loss": 0.3264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.131935715675354,
|
|
"step": 1595,
|
|
"valid_targets_mean": 3541.9,
|
|
"valid_targets_min": 2303
|
|
},
|
|
{
|
|
"epoch": 2.8630259623992838,
|
|
"grad_norm": 0.6058561074570699,
|
|
"learning_rate": 2.948051592439363e-05,
|
|
"loss": 0.3157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15223629772663116,
|
|
"step": 1600,
|
|
"valid_targets_mean": 4136.6,
|
|
"valid_targets_min": 923
|
|
},
|
|
{
|
|
"epoch": 2.8719785138764546,
|
|
"grad_norm": 0.643496513498291,
|
|
"learning_rate": 2.9401858742112334e-05,
|
|
"loss": 0.3344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1517014503479004,
|
|
"step": 1605,
|
|
"valid_targets_mean": 3460.1,
|
|
"valid_targets_min": 752
|
|
},
|
|
{
|
|
"epoch": 2.880931065353626,
|
|
"grad_norm": 0.6081337292995073,
|
|
"learning_rate": 2.9323014439617044e-05,
|
|
"loss": 0.3244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1477939486503601,
|
|
"step": 1610,
|
|
"valid_targets_mean": 3606.8,
|
|
"valid_targets_min": 1532
|
|
},
|
|
{
|
|
"epoch": 2.889883616830797,
|
|
"grad_norm": 0.7519232605972647,
|
|
"learning_rate": 2.924398458610414e-05,
|
|
"loss": 0.3522,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16121116280555725,
|
|
"step": 1615,
|
|
"valid_targets_mean": 2771.6,
|
|
"valid_targets_min": 1246
|
|
},
|
|
{
|
|
"epoch": 2.8988361683079678,
|
|
"grad_norm": 0.7600425688063651,
|
|
"learning_rate": 2.9164770754462926e-05,
|
|
"loss": 0.33,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11213885247707367,
|
|
"step": 1620,
|
|
"valid_targets_mean": 1783.6,
|
|
"valid_targets_min": 1319
|
|
},
|
|
{
|
|
"epoch": 2.9077887197851386,
|
|
"grad_norm": 0.7521189124061843,
|
|
"learning_rate": 2.9085374521244333e-05,
|
|
"loss": 0.3579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12697280943393707,
|
|
"step": 1625,
|
|
"valid_targets_mean": 2481.1,
|
|
"valid_targets_min": 1211
|
|
},
|
|
{
|
|
"epoch": 2.9167412712623095,
|
|
"grad_norm": 0.6453096854133333,
|
|
"learning_rate": 2.900579746662954e-05,
|
|
"loss": 0.3219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2347714602947235,
|
|
"step": 1630,
|
|
"valid_targets_mean": 4646.9,
|
|
"valid_targets_min": 1423
|
|
},
|
|
{
|
|
"epoch": 2.925693822739481,
|
|
"grad_norm": 0.6077027161283227,
|
|
"learning_rate": 2.8926041174398496e-05,
|
|
"loss": 0.3281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14254099130630493,
|
|
"step": 1635,
|
|
"valid_targets_mean": 2738.0,
|
|
"valid_targets_min": 1211
|
|
},
|
|
{
|
|
"epoch": 2.9346463742166518,
|
|
"grad_norm": 0.6255429232553751,
|
|
"learning_rate": 2.8846107231898445e-05,
|
|
"loss": 0.3075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11949039995670319,
|
|
"step": 1640,
|
|
"valid_targets_mean": 2887.8,
|
|
"valid_targets_min": 1159
|
|
},
|
|
{
|
|
"epoch": 2.9435989256938226,
|
|
"grad_norm": 0.5375741825450264,
|
|
"learning_rate": 2.8765997230012295e-05,
|
|
"loss": 0.3161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16180676221847534,
|
|
"step": 1645,
|
|
"valid_targets_mean": 4740.6,
|
|
"valid_targets_min": 1853
|
|
},
|
|
{
|
|
"epoch": 2.952551477170994,
|
|
"grad_norm": 0.5606317759146799,
|
|
"learning_rate": 2.868571276312698e-05,
|
|
"loss": 0.3313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09235462546348572,
|
|
"step": 1650,
|
|
"valid_targets_mean": 2538.4,
|
|
"valid_targets_min": 940
|
|
},
|
|
{
|
|
"epoch": 2.961504028648165,
|
|
"grad_norm": 0.495487570027274,
|
|
"learning_rate": 2.860525542910171e-05,
|
|
"loss": 0.3625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15310625731945038,
|
|
"step": 1655,
|
|
"valid_targets_mean": 6490.4,
|
|
"valid_targets_min": 1336
|
|
},
|
|
{
|
|
"epoch": 2.9704565801253358,
|
|
"grad_norm": 0.6035435039933449,
|
|
"learning_rate": 2.852462682923619e-05,
|
|
"loss": 0.3151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11764907091856003,
|
|
"step": 1660,
|
|
"valid_targets_mean": 3602.6,
|
|
"valid_targets_min": 1870
|
|
},
|
|
{
|
|
"epoch": 2.9794091316025066,
|
|
"grad_norm": 0.5800177987722784,
|
|
"learning_rate": 2.844382856823872e-05,
|
|
"loss": 0.315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17618133127689362,
|
|
"step": 1665,
|
|
"valid_targets_mean": 4074.4,
|
|
"valid_targets_min": 1877
|
|
},
|
|
{
|
|
"epoch": 2.9883616830796775,
|
|
"grad_norm": 0.5646098547959599,
|
|
"learning_rate": 2.8362862254194298e-05,
|
|
"loss": 0.3428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1157437115907669,
|
|
"step": 1670,
|
|
"valid_targets_mean": 4417.1,
|
|
"valid_targets_min": 1411
|
|
},
|
|
{
|
|
"epoch": 2.997314234556849,
|
|
"grad_norm": 0.5842366355959351,
|
|
"learning_rate": 2.8281729498532574e-05,
|
|
"loss": 0.3067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11269823461771011,
|
|
"step": 1675,
|
|
"valid_targets_mean": 2794.1,
|
|
"valid_targets_min": 1527
|
|
},
|
|
{
|
|
"epoch": 3.0053715308863027,
|
|
"grad_norm": 0.7896580577617582,
|
|
"learning_rate": 2.8200431915995805e-05,
|
|
"loss": 0.3189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18021970987319946,
|
|
"step": 1680,
|
|
"valid_targets_mean": 2496.4,
|
|
"valid_targets_min": 1147
|
|
},
|
|
{
|
|
"epoch": 3.0143240823634736,
|
|
"grad_norm": 0.6135743160512117,
|
|
"learning_rate": 2.8118971124606712e-05,
|
|
"loss": 0.2863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11502575874328613,
|
|
"step": 1685,
|
|
"valid_targets_mean": 3006.2,
|
|
"valid_targets_min": 546
|
|
},
|
|
{
|
|
"epoch": 3.0232766338406445,
|
|
"grad_norm": 0.778235505268862,
|
|
"learning_rate": 2.8037348745636274e-05,
|
|
"loss": 0.3078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16493195295333862,
|
|
"step": 1690,
|
|
"valid_targets_mean": 3107.1,
|
|
"valid_targets_min": 1264
|
|
},
|
|
{
|
|
"epoch": 3.0322291853178154,
|
|
"grad_norm": 0.6754797455330058,
|
|
"learning_rate": 2.7955566403571464e-05,
|
|
"loss": 0.2957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10485086590051651,
|
|
"step": 1695,
|
|
"valid_targets_mean": 2169.6,
|
|
"valid_targets_min": 997
|
|
},
|
|
{
|
|
"epoch": 3.0411817367949867,
|
|
"grad_norm": 0.6283660091763017,
|
|
"learning_rate": 2.78736257260829e-05,
|
|
"loss": 0.2894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18023446202278137,
|
|
"step": 1700,
|
|
"valid_targets_mean": 4900.4,
|
|
"valid_targets_min": 1553
|
|
},
|
|
{
|
|
"epoch": 3.0501342882721576,
|
|
"grad_norm": 0.613476158397626,
|
|
"learning_rate": 2.7791528343992494e-05,
|
|
"loss": 0.2878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14587222039699554,
|
|
"step": 1705,
|
|
"valid_targets_mean": 4158.8,
|
|
"valid_targets_min": 2135
|
|
},
|
|
{
|
|
"epoch": 3.0590868397493285,
|
|
"grad_norm": 0.7294903187312021,
|
|
"learning_rate": 2.7709275891240936e-05,
|
|
"loss": 0.3128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1322895586490631,
|
|
"step": 1710,
|
|
"valid_targets_mean": 2585.0,
|
|
"valid_targets_min": 1878
|
|
},
|
|
{
|
|
"epoch": 3.0680393912264994,
|
|
"grad_norm": 0.6572500474612717,
|
|
"learning_rate": 2.7626870004855236e-05,
|
|
"loss": 0.2841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15101733803749084,
|
|
"step": 1715,
|
|
"valid_targets_mean": 4297.8,
|
|
"valid_targets_min": 1439
|
|
},
|
|
{
|
|
"epoch": 3.0769919427036707,
|
|
"grad_norm": 0.6803417140783832,
|
|
"learning_rate": 2.7544312324916088e-05,
|
|
"loss": 0.2985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14546285569667816,
|
|
"step": 1720,
|
|
"valid_targets_mean": 3869.4,
|
|
"valid_targets_min": 1324
|
|
},
|
|
{
|
|
"epoch": 3.0859444941808416,
|
|
"grad_norm": 0.6317442286776777,
|
|
"learning_rate": 2.7461604494525257e-05,
|
|
"loss": 0.3139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1800333857536316,
|
|
"step": 1725,
|
|
"valid_targets_mean": 4861.0,
|
|
"valid_targets_min": 1163
|
|
},
|
|
{
|
|
"epoch": 3.0948970456580125,
|
|
"grad_norm": 0.7419679306586767,
|
|
"learning_rate": 2.7378748159772888e-05,
|
|
"loss": 0.2832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15141788125038147,
|
|
"step": 1730,
|
|
"valid_targets_mean": 2425.8,
|
|
"valid_targets_min": 1532
|
|
},
|
|
{
|
|
"epoch": 3.1038495971351834,
|
|
"grad_norm": 0.7184181395352648,
|
|
"learning_rate": 2.7295744969704725e-05,
|
|
"loss": 0.2963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19635578989982605,
|
|
"step": 1735,
|
|
"valid_targets_mean": 3549.2,
|
|
"valid_targets_min": 1355
|
|
},
|
|
{
|
|
"epoch": 3.1128021486123547,
|
|
"grad_norm": 0.5637059424050045,
|
|
"learning_rate": 2.7212596576289264e-05,
|
|
"loss": 0.3024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0870228111743927,
|
|
"step": 1740,
|
|
"valid_targets_mean": 2423.9,
|
|
"valid_targets_min": 872
|
|
},
|
|
{
|
|
"epoch": 3.1217547000895256,
|
|
"grad_norm": 0.6187756682224378,
|
|
"learning_rate": 2.712930463438496e-05,
|
|
"loss": 0.3054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1919945776462555,
|
|
"step": 1745,
|
|
"valid_targets_mean": 4811.8,
|
|
"valid_targets_min": 955
|
|
},
|
|
{
|
|
"epoch": 3.1307072515666965,
|
|
"grad_norm": 0.59176036605801,
|
|
"learning_rate": 2.7045870801707194e-05,
|
|
"loss": 0.3001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11655890196561813,
|
|
"step": 1750,
|
|
"valid_targets_mean": 2882.5,
|
|
"valid_targets_min": 1410
|
|
},
|
|
{
|
|
"epoch": 3.1396598030438674,
|
|
"grad_norm": 0.6273416731577955,
|
|
"learning_rate": 2.6962296738795344e-05,
|
|
"loss": 0.2834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1495613157749176,
|
|
"step": 1755,
|
|
"valid_targets_mean": 5002.0,
|
|
"valid_targets_min": 1440
|
|
},
|
|
{
|
|
"epoch": 3.1486123545210383,
|
|
"grad_norm": 0.645728944689026,
|
|
"learning_rate": 2.687858410897971e-05,
|
|
"loss": 0.3095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1856779158115387,
|
|
"step": 1760,
|
|
"valid_targets_mean": 3773.8,
|
|
"valid_targets_min": 1118
|
|
},
|
|
{
|
|
"epoch": 3.1575649059982096,
|
|
"grad_norm": 0.7516562586294452,
|
|
"learning_rate": 2.679473457834842e-05,
|
|
"loss": 0.3043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14939184486865997,
|
|
"step": 1765,
|
|
"valid_targets_mean": 2931.6,
|
|
"valid_targets_min": 1250
|
|
},
|
|
{
|
|
"epoch": 3.1665174574753805,
|
|
"grad_norm": 0.6046394108437115,
|
|
"learning_rate": 2.6710749815714262e-05,
|
|
"loss": 0.2895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13386991620063782,
|
|
"step": 1770,
|
|
"valid_targets_mean": 3748.0,
|
|
"valid_targets_min": 1578
|
|
},
|
|
{
|
|
"epoch": 3.1754700089525514,
|
|
"grad_norm": 0.7118151612384511,
|
|
"learning_rate": 2.6626631492581475e-05,
|
|
"loss": 0.3022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16341659426689148,
|
|
"step": 1775,
|
|
"valid_targets_mean": 2723.5,
|
|
"valid_targets_min": 601
|
|
},
|
|
{
|
|
"epoch": 3.1844225604297223,
|
|
"grad_norm": 0.6155300408382743,
|
|
"learning_rate": 2.654238128311249e-05,
|
|
"loss": 0.2781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15156874060630798,
|
|
"step": 1780,
|
|
"valid_targets_mean": 3313.4,
|
|
"valid_targets_min": 1402
|
|
},
|
|
{
|
|
"epoch": 3.1933751119068936,
|
|
"grad_norm": 0.8067424596360118,
|
|
"learning_rate": 2.645800086409458e-05,
|
|
"loss": 0.3205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20328623056411743,
|
|
"step": 1785,
|
|
"valid_targets_mean": 3177.9,
|
|
"valid_targets_min": 1310
|
|
},
|
|
{
|
|
"epoch": 3.2023276633840645,
|
|
"grad_norm": 0.641857203118475,
|
|
"learning_rate": 2.637349191490654e-05,
|
|
"loss": 0.2971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2052251100540161,
|
|
"step": 1790,
|
|
"valid_targets_mean": 4034.6,
|
|
"valid_targets_min": 668
|
|
},
|
|
{
|
|
"epoch": 3.2112802148612354,
|
|
"grad_norm": 0.6368261308999402,
|
|
"learning_rate": 2.6288856117485216e-05,
|
|
"loss": 0.2986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17902475595474243,
|
|
"step": 1795,
|
|
"valid_targets_mean": 4138.2,
|
|
"valid_targets_min": 1206
|
|
},
|
|
{
|
|
"epoch": 3.2202327663384063,
|
|
"grad_norm": 0.5548069381687681,
|
|
"learning_rate": 2.6204095156292048e-05,
|
|
"loss": 0.3007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14810031652450562,
|
|
"step": 1800,
|
|
"valid_targets_mean": 5104.6,
|
|
"valid_targets_min": 1210
|
|
},
|
|
{
|
|
"epoch": 3.2291853178155776,
|
|
"grad_norm": 0.753660257908115,
|
|
"learning_rate": 2.6119210718279538e-05,
|
|
"loss": 0.2975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11428888142108917,
|
|
"step": 1805,
|
|
"valid_targets_mean": 2015.9,
|
|
"valid_targets_min": 1291
|
|
},
|
|
{
|
|
"epoch": 3.2381378692927485,
|
|
"grad_norm": 0.5660494935503344,
|
|
"learning_rate": 2.60342044928577e-05,
|
|
"loss": 0.2941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13116174936294556,
|
|
"step": 1810,
|
|
"valid_targets_mean": 4031.4,
|
|
"valid_targets_min": 1595
|
|
},
|
|
{
|
|
"epoch": 3.2470904207699194,
|
|
"grad_norm": 0.533929651082135,
|
|
"learning_rate": 2.5949078171860395e-05,
|
|
"loss": 0.3124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12700650095939636,
|
|
"step": 1815,
|
|
"valid_targets_mean": 5214.2,
|
|
"valid_targets_min": 1254
|
|
},
|
|
{
|
|
"epoch": 3.2560429722470903,
|
|
"grad_norm": 0.5416322919386064,
|
|
"learning_rate": 2.5863833449511706e-05,
|
|
"loss": 0.2854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17714446783065796,
|
|
"step": 1820,
|
|
"valid_targets_mean": 6259.0,
|
|
"valid_targets_min": 1927
|
|
},
|
|
{
|
|
"epoch": 3.2649955237242616,
|
|
"grad_norm": 0.43344116835180807,
|
|
"learning_rate": 2.5778472022392184e-05,
|
|
"loss": 0.2884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21672791242599487,
|
|
"step": 1825,
|
|
"valid_targets_mean": 10914.1,
|
|
"valid_targets_min": 1495
|
|
},
|
|
{
|
|
"epoch": 3.2739480752014325,
|
|
"grad_norm": 0.5802891194562758,
|
|
"learning_rate": 2.5692995589405087e-05,
|
|
"loss": 0.3197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.127014622092247,
|
|
"step": 1830,
|
|
"valid_targets_mean": 3817.8,
|
|
"valid_targets_min": 1465
|
|
},
|
|
{
|
|
"epoch": 3.2829006266786034,
|
|
"grad_norm": 0.64587786194205,
|
|
"learning_rate": 2.5607405851742578e-05,
|
|
"loss": 0.2982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1963973343372345,
|
|
"step": 1835,
|
|
"valid_targets_mean": 4352.9,
|
|
"valid_targets_min": 1256
|
|
},
|
|
{
|
|
"epoch": 3.2918531781557743,
|
|
"grad_norm": 0.5684432148710743,
|
|
"learning_rate": 2.5521704512851884e-05,
|
|
"loss": 0.2806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15847089886665344,
|
|
"step": 1840,
|
|
"valid_targets_mean": 4296.1,
|
|
"valid_targets_min": 979
|
|
},
|
|
{
|
|
"epoch": 3.3008057296329456,
|
|
"grad_norm": 0.6666687517731704,
|
|
"learning_rate": 2.5435893278401328e-05,
|
|
"loss": 0.2888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1337079405784607,
|
|
"step": 1845,
|
|
"valid_targets_mean": 3256.2,
|
|
"valid_targets_min": 1158
|
|
},
|
|
{
|
|
"epoch": 3.3097582811101165,
|
|
"grad_norm": 0.7717906084958367,
|
|
"learning_rate": 2.534997385624647e-05,
|
|
"loss": 0.2996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12368161976337433,
|
|
"step": 1850,
|
|
"valid_targets_mean": 2424.4,
|
|
"valid_targets_min": 890
|
|
},
|
|
{
|
|
"epoch": 3.3187108325872874,
|
|
"grad_norm": 0.735202247533644,
|
|
"learning_rate": 2.5263947956396043e-05,
|
|
"loss": 0.2875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.112772636115551,
|
|
"step": 1855,
|
|
"valid_targets_mean": 2231.6,
|
|
"valid_targets_min": 1053
|
|
},
|
|
{
|
|
"epoch": 3.3276633840644583,
|
|
"grad_norm": 0.7702737075049189,
|
|
"learning_rate": 2.5177817290977967e-05,
|
|
"loss": 0.2646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14182201027870178,
|
|
"step": 1860,
|
|
"valid_targets_mean": 2349.6,
|
|
"valid_targets_min": 1470
|
|
},
|
|
{
|
|
"epoch": 3.336615935541629,
|
|
"grad_norm": 0.6119811350195906,
|
|
"learning_rate": 2.5091583574205247e-05,
|
|
"loss": 0.2932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14366689324378967,
|
|
"step": 1865,
|
|
"valid_targets_mean": 4402.0,
|
|
"valid_targets_min": 1261
|
|
},
|
|
{
|
|
"epoch": 3.3455684870188005,
|
|
"grad_norm": 0.6505709086023806,
|
|
"learning_rate": 2.5005248522341868e-05,
|
|
"loss": 0.3175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13018080592155457,
|
|
"step": 1870,
|
|
"valid_targets_mean": 2860.2,
|
|
"valid_targets_min": 1648
|
|
},
|
|
{
|
|
"epoch": 3.3545210384959714,
|
|
"grad_norm": 0.7074105189549208,
|
|
"learning_rate": 2.4918813853668632e-05,
|
|
"loss": 0.2738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19293466210365295,
|
|
"step": 1875,
|
|
"valid_targets_mean": 4045.8,
|
|
"valid_targets_min": 1493
|
|
},
|
|
{
|
|
"epoch": 3.3634735899731423,
|
|
"grad_norm": 0.5807379486607306,
|
|
"learning_rate": 2.483228128844896e-05,
|
|
"loss": 0.3031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1795765608549118,
|
|
"step": 1880,
|
|
"valid_targets_mean": 5007.6,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 3.372426141450313,
|
|
"grad_norm": 0.6968229011978457,
|
|
"learning_rate": 2.4745652548894654e-05,
|
|
"loss": 0.2943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14516671001911163,
|
|
"step": 1885,
|
|
"valid_targets_mean": 2635.5,
|
|
"valid_targets_min": 627
|
|
},
|
|
{
|
|
"epoch": 3.3813786929274845,
|
|
"grad_norm": 0.5800079788765263,
|
|
"learning_rate": 2.4658929359131634e-05,
|
|
"loss": 0.2961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09096183627843857,
|
|
"step": 1890,
|
|
"valid_targets_mean": 2528.5,
|
|
"valid_targets_min": 1193
|
|
},
|
|
{
|
|
"epoch": 3.3903312444046554,
|
|
"grad_norm": 0.6408890861943591,
|
|
"learning_rate": 2.4572113445165603e-05,
|
|
"loss": 0.2975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13080674409866333,
|
|
"step": 1895,
|
|
"valid_targets_mean": 3830.5,
|
|
"valid_targets_min": 1113
|
|
},
|
|
{
|
|
"epoch": 3.3992837958818263,
|
|
"grad_norm": 0.5575769883321318,
|
|
"learning_rate": 2.4485206534847706e-05,
|
|
"loss": 0.2928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18095198273658752,
|
|
"step": 1900,
|
|
"valid_targets_mean": 5400.2,
|
|
"valid_targets_min": 1789
|
|
},
|
|
{
|
|
"epoch": 3.408236347358997,
|
|
"grad_norm": 0.6243600107033797,
|
|
"learning_rate": 2.439821035784014e-05,
|
|
"loss": 0.3077,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12245135009288788,
|
|
"step": 1905,
|
|
"valid_targets_mean": 3295.4,
|
|
"valid_targets_min": 1052
|
|
},
|
|
{
|
|
"epoch": 3.4171888988361685,
|
|
"grad_norm": 0.7984110210507213,
|
|
"learning_rate": 2.431112664558173e-05,
|
|
"loss": 0.32,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12867091596126556,
|
|
"step": 1910,
|
|
"valid_targets_mean": 2177.6,
|
|
"valid_targets_min": 1007
|
|
},
|
|
{
|
|
"epoch": 3.4261414503133394,
|
|
"grad_norm": 0.7626136272645896,
|
|
"learning_rate": 2.4223957131253467e-05,
|
|
"loss": 0.2966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16945910453796387,
|
|
"step": 1915,
|
|
"valid_targets_mean": 4280.2,
|
|
"valid_targets_min": 954
|
|
},
|
|
{
|
|
"epoch": 3.4350940017905103,
|
|
"grad_norm": 0.879960636032773,
|
|
"learning_rate": 2.4136703549744028e-05,
|
|
"loss": 0.3266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17531342804431915,
|
|
"step": 1920,
|
|
"valid_targets_mean": 2265.9,
|
|
"valid_targets_min": 1111
|
|
},
|
|
{
|
|
"epoch": 3.444046553267681,
|
|
"grad_norm": 0.6069014751890103,
|
|
"learning_rate": 2.4049367637615215e-05,
|
|
"loss": 0.3254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15128836035728455,
|
|
"step": 1925,
|
|
"valid_targets_mean": 3898.9,
|
|
"valid_targets_min": 1362
|
|
},
|
|
{
|
|
"epoch": 3.452999104744852,
|
|
"grad_norm": 0.6683394762004051,
|
|
"learning_rate": 2.396195113306742e-05,
|
|
"loss": 0.2926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11693670600652695,
|
|
"step": 1930,
|
|
"valid_targets_mean": 2843.2,
|
|
"valid_targets_min": 1079
|
|
},
|
|
{
|
|
"epoch": 3.4619516562220234,
|
|
"grad_norm": 0.5527522960821272,
|
|
"learning_rate": 2.3874455775905036e-05,
|
|
"loss": 0.3042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10419487953186035,
|
|
"step": 1935,
|
|
"valid_targets_mean": 2983.6,
|
|
"valid_targets_min": 854
|
|
},
|
|
{
|
|
"epoch": 3.4709042076991943,
|
|
"grad_norm": 0.5252735073672182,
|
|
"learning_rate": 2.3786883307501794e-05,
|
|
"loss": 0.2839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10443928092718124,
|
|
"step": 1940,
|
|
"valid_targets_mean": 3916.2,
|
|
"valid_targets_min": 1351
|
|
},
|
|
{
|
|
"epoch": 3.479856759176365,
|
|
"grad_norm": 0.635207514509153,
|
|
"learning_rate": 2.3699235470766147e-05,
|
|
"loss": 0.3259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1575881689786911,
|
|
"step": 1945,
|
|
"valid_targets_mean": 4008.0,
|
|
"valid_targets_min": 1165
|
|
},
|
|
{
|
|
"epoch": 3.4888093106535365,
|
|
"grad_norm": 0.6106698152368751,
|
|
"learning_rate": 2.3611514010106564e-05,
|
|
"loss": 0.3025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09959106892347336,
|
|
"step": 1950,
|
|
"valid_targets_mean": 2759.2,
|
|
"valid_targets_min": 1435
|
|
},
|
|
{
|
|
"epoch": 3.4977618621307074,
|
|
"grad_norm": 0.673225130627237,
|
|
"learning_rate": 2.35237206713968e-05,
|
|
"loss": 0.3329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14371871948242188,
|
|
"step": 1955,
|
|
"valid_targets_mean": 3085.5,
|
|
"valid_targets_min": 1152
|
|
},
|
|
{
|
|
"epoch": 3.5067144136078783,
|
|
"grad_norm": 0.6968504081569306,
|
|
"learning_rate": 2.3435857201941172e-05,
|
|
"loss": 0.3298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17559152841567993,
|
|
"step": 1960,
|
|
"valid_targets_mean": 3212.4,
|
|
"valid_targets_min": 1197
|
|
},
|
|
{
|
|
"epoch": 3.515666965085049,
|
|
"grad_norm": 0.819564689611184,
|
|
"learning_rate": 2.3347925350439765e-05,
|
|
"loss": 0.2835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08735944330692291,
|
|
"step": 1965,
|
|
"valid_targets_mean": 2583.0,
|
|
"valid_targets_min": 1021
|
|
},
|
|
{
|
|
"epoch": 3.52461951656222,
|
|
"grad_norm": 0.6468820188123998,
|
|
"learning_rate": 2.3259926866953636e-05,
|
|
"loss": 0.3245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1301145851612091,
|
|
"step": 1970,
|
|
"valid_targets_mean": 2990.2,
|
|
"valid_targets_min": 1568
|
|
},
|
|
{
|
|
"epoch": 3.5335720680393914,
|
|
"grad_norm": 0.6244370600947371,
|
|
"learning_rate": 2.3171863502869993e-05,
|
|
"loss": 0.2956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1787623018026352,
|
|
"step": 1975,
|
|
"valid_targets_mean": 4180.9,
|
|
"valid_targets_min": 1222
|
|
},
|
|
{
|
|
"epoch": 3.5425246195165623,
|
|
"grad_norm": 0.5967752262455601,
|
|
"learning_rate": 2.3083737010867316e-05,
|
|
"loss": 0.2792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1309892237186432,
|
|
"step": 1980,
|
|
"valid_targets_mean": 4325.4,
|
|
"valid_targets_min": 2600
|
|
},
|
|
{
|
|
"epoch": 3.551477170993733,
|
|
"grad_norm": 0.6563237980167774,
|
|
"learning_rate": 2.2995549144880487e-05,
|
|
"loss": 0.3024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21774521470069885,
|
|
"step": 1985,
|
|
"valid_targets_mean": 5259.1,
|
|
"valid_targets_min": 500
|
|
},
|
|
{
|
|
"epoch": 3.560429722470904,
|
|
"grad_norm": 0.7265770510021908,
|
|
"learning_rate": 2.290730166006589e-05,
|
|
"loss": 0.2966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1371801793575287,
|
|
"step": 1990,
|
|
"valid_targets_mean": 2824.9,
|
|
"valid_targets_min": 1326
|
|
},
|
|
{
|
|
"epoch": 3.569382273948075,
|
|
"grad_norm": 0.5905568901366419,
|
|
"learning_rate": 2.2818996312766474e-05,
|
|
"loss": 0.3078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12975779175758362,
|
|
"step": 1995,
|
|
"valid_targets_mean": 3795.4,
|
|
"valid_targets_min": 1783
|
|
},
|
|
{
|
|
"epoch": 3.5783348254252463,
|
|
"grad_norm": 0.9382745533338204,
|
|
"learning_rate": 2.2730634860476778e-05,
|
|
"loss": 0.303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15109074115753174,
|
|
"step": 2000,
|
|
"valid_targets_mean": 2627.1,
|
|
"valid_targets_min": 1023
|
|
},
|
|
{
|
|
"epoch": 3.587287376902417,
|
|
"grad_norm": 0.6451958349593174,
|
|
"learning_rate": 2.264221906180798e-05,
|
|
"loss": 0.3049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13331124186515808,
|
|
"step": 2005,
|
|
"valid_targets_mean": 4238.4,
|
|
"valid_targets_min": 648
|
|
},
|
|
{
|
|
"epoch": 3.596239928379588,
|
|
"grad_norm": 0.5218084865899012,
|
|
"learning_rate": 2.255375067645289e-05,
|
|
"loss": 0.2895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12357210367918015,
|
|
"step": 2010,
|
|
"valid_targets_mean": 4533.5,
|
|
"valid_targets_min": 1235
|
|
},
|
|
{
|
|
"epoch": 3.6051924798567594,
|
|
"grad_norm": 0.6280492068387579,
|
|
"learning_rate": 2.2465231465150902e-05,
|
|
"loss": 0.2742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15180960297584534,
|
|
"step": 2015,
|
|
"valid_targets_mean": 4229.2,
|
|
"valid_targets_min": 1155
|
|
},
|
|
{
|
|
"epoch": 3.6141450313339303,
|
|
"grad_norm": 0.5459403625133381,
|
|
"learning_rate": 2.2376663189653002e-05,
|
|
"loss": 0.3012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14584055542945862,
|
|
"step": 2020,
|
|
"valid_targets_mean": 4946.4,
|
|
"valid_targets_min": 939
|
|
},
|
|
{
|
|
"epoch": 3.623097582811101,
|
|
"grad_norm": 0.9835261973194854,
|
|
"learning_rate": 2.2288047612686655e-05,
|
|
"loss": 0.316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17734041810035706,
|
|
"step": 2025,
|
|
"valid_targets_mean": 2308.6,
|
|
"valid_targets_min": 762
|
|
},
|
|
{
|
|
"epoch": 3.632050134288272,
|
|
"grad_norm": 0.6974222571881543,
|
|
"learning_rate": 2.219938649792074e-05,
|
|
"loss": 0.2864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1139960065484047,
|
|
"step": 2030,
|
|
"valid_targets_mean": 2714.5,
|
|
"valid_targets_min": 1142
|
|
},
|
|
{
|
|
"epoch": 3.641002685765443,
|
|
"grad_norm": 0.7573070540679137,
|
|
"learning_rate": 2.2110681609930458e-05,
|
|
"loss": 0.2925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10645341873168945,
|
|
"step": 2035,
|
|
"valid_targets_mean": 2737.5,
|
|
"valid_targets_min": 1408
|
|
},
|
|
{
|
|
"epoch": 3.6499552372426143,
|
|
"grad_norm": 0.5556169607261522,
|
|
"learning_rate": 2.2021934714162212e-05,
|
|
"loss": 0.2987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12965692579746246,
|
|
"step": 2040,
|
|
"valid_targets_mean": 5005.0,
|
|
"valid_targets_min": 1399
|
|
},
|
|
{
|
|
"epoch": 3.658907788719785,
|
|
"grad_norm": 0.5206042195827867,
|
|
"learning_rate": 2.1933147576898447e-05,
|
|
"loss": 0.3091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11286148428916931,
|
|
"step": 2045,
|
|
"valid_targets_mean": 3836.8,
|
|
"valid_targets_min": 1443
|
|
},
|
|
{
|
|
"epoch": 3.667860340196956,
|
|
"grad_norm": 0.7793043605399187,
|
|
"learning_rate": 2.1844321965222525e-05,
|
|
"loss": 0.3053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22073161602020264,
|
|
"step": 2050,
|
|
"valid_targets_mean": 3054.5,
|
|
"valid_targets_min": 994
|
|
},
|
|
{
|
|
"epoch": 3.6768128916741274,
|
|
"grad_norm": 0.5769817589721981,
|
|
"learning_rate": 2.1755459646983545e-05,
|
|
"loss": 0.2876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10495971888303757,
|
|
"step": 2055,
|
|
"valid_targets_mean": 3625.0,
|
|
"valid_targets_min": 1958
|
|
},
|
|
{
|
|
"epoch": 3.6857654431512983,
|
|
"grad_norm": 0.6481517125690924,
|
|
"learning_rate": 2.1666562390761147e-05,
|
|
"loss": 0.3158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18431368470191956,
|
|
"step": 2060,
|
|
"valid_targets_mean": 5741.5,
|
|
"valid_targets_min": 1239
|
|
},
|
|
{
|
|
"epoch": 3.694717994628469,
|
|
"grad_norm": 0.7275087755723013,
|
|
"learning_rate": 2.1577631965830324e-05,
|
|
"loss": 0.2813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12168044596910477,
|
|
"step": 2065,
|
|
"valid_targets_mean": 2084.9,
|
|
"valid_targets_min": 1283
|
|
},
|
|
{
|
|
"epoch": 3.70367054610564,
|
|
"grad_norm": 0.6423090431771338,
|
|
"learning_rate": 2.1488670142126234e-05,
|
|
"loss": 0.3113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1942213922739029,
|
|
"step": 2070,
|
|
"valid_targets_mean": 4032.9,
|
|
"valid_targets_min": 1211
|
|
},
|
|
{
|
|
"epoch": 3.712623097582811,
|
|
"grad_norm": 0.5518641128616621,
|
|
"learning_rate": 2.13996786902089e-05,
|
|
"loss": 0.2999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1275741159915924,
|
|
"step": 2075,
|
|
"valid_targets_mean": 3884.5,
|
|
"valid_targets_min": 1262
|
|
},
|
|
{
|
|
"epoch": 3.7215756490599823,
|
|
"grad_norm": 0.8036840000440701,
|
|
"learning_rate": 2.1310659381228066e-05,
|
|
"loss": 0.3017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11641264706850052,
|
|
"step": 2080,
|
|
"valid_targets_mean": 1787.6,
|
|
"valid_targets_min": 482
|
|
},
|
|
{
|
|
"epoch": 3.730528200537153,
|
|
"grad_norm": 0.6250950737248954,
|
|
"learning_rate": 2.122161398688788e-05,
|
|
"loss": 0.294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16850410401821136,
|
|
"step": 2085,
|
|
"valid_targets_mean": 5024.2,
|
|
"valid_targets_min": 1243
|
|
},
|
|
{
|
|
"epoch": 3.739480752014324,
|
|
"grad_norm": 0.7620593924231298,
|
|
"learning_rate": 2.1132544279411655e-05,
|
|
"loss": 0.3078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15078432857990265,
|
|
"step": 2090,
|
|
"valid_targets_mean": 2521.5,
|
|
"valid_targets_min": 1417
|
|
},
|
|
{
|
|
"epoch": 3.748433303491495,
|
|
"grad_norm": 0.6376558624151332,
|
|
"learning_rate": 2.1043452031506578e-05,
|
|
"loss": 0.3013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18131475150585175,
|
|
"step": 2095,
|
|
"valid_targets_mean": 4089.2,
|
|
"valid_targets_min": 1127
|
|
},
|
|
{
|
|
"epoch": 3.757385854968666,
|
|
"grad_norm": 0.6510696932710319,
|
|
"learning_rate": 2.0954339016328485e-05,
|
|
"loss": 0.2851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15922397375106812,
|
|
"step": 2100,
|
|
"valid_targets_mean": 4445.5,
|
|
"valid_targets_min": 2170
|
|
},
|
|
{
|
|
"epoch": 3.766338406445837,
|
|
"grad_norm": 0.577097450206775,
|
|
"learning_rate": 2.08652070074465e-05,
|
|
"loss": 0.3004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09382876753807068,
|
|
"step": 2105,
|
|
"valid_targets_mean": 3492.4,
|
|
"valid_targets_min": 1608
|
|
},
|
|
{
|
|
"epoch": 3.775290957923008,
|
|
"grad_norm": 0.5921188375445776,
|
|
"learning_rate": 2.077605777880778e-05,
|
|
"loss": 0.3021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10202471911907196,
|
|
"step": 2110,
|
|
"valid_targets_mean": 2826.9,
|
|
"valid_targets_min": 1268
|
|
},
|
|
{
|
|
"epoch": 3.784243509400179,
|
|
"grad_norm": 0.5115559628016806,
|
|
"learning_rate": 2.0686893104702198e-05,
|
|
"loss": 0.2949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10683996230363846,
|
|
"step": 2115,
|
|
"valid_targets_mean": 5196.5,
|
|
"valid_targets_min": 1911
|
|
},
|
|
{
|
|
"epoch": 3.7931960608773503,
|
|
"grad_norm": 0.7148038157802632,
|
|
"learning_rate": 2.0597714759727028e-05,
|
|
"loss": 0.3003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17320764064788818,
|
|
"step": 2120,
|
|
"valid_targets_mean": 3600.4,
|
|
"valid_targets_min": 1208
|
|
},
|
|
{
|
|
"epoch": 3.802148612354521,
|
|
"grad_norm": 0.568336718540444,
|
|
"learning_rate": 2.050852451875163e-05,
|
|
"loss": 0.2686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10769255459308624,
|
|
"step": 2125,
|
|
"valid_targets_mean": 3134.1,
|
|
"valid_targets_min": 724
|
|
},
|
|
{
|
|
"epoch": 3.811101163831692,
|
|
"grad_norm": 2.2889402597376063,
|
|
"learning_rate": 2.041932415688212e-05,
|
|
"loss": 0.3295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24901776015758514,
|
|
"step": 2130,
|
|
"valid_targets_mean": 5238.8,
|
|
"valid_targets_min": 1279
|
|
},
|
|
{
|
|
"epoch": 3.820053715308863,
|
|
"grad_norm": 0.7498566172955531,
|
|
"learning_rate": 2.0330115449426054e-05,
|
|
"loss": 0.2988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17251518368721008,
|
|
"step": 2135,
|
|
"valid_targets_mean": 3523.2,
|
|
"valid_targets_min": 1317
|
|
},
|
|
{
|
|
"epoch": 3.829006266786034,
|
|
"grad_norm": 0.6454834290568767,
|
|
"learning_rate": 2.0240900171857064e-05,
|
|
"loss": 0.3042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13242211937904358,
|
|
"step": 2140,
|
|
"valid_targets_mean": 2924.2,
|
|
"valid_targets_min": 1211
|
|
},
|
|
{
|
|
"epoch": 3.837958818263205,
|
|
"grad_norm": 0.7545116802419165,
|
|
"learning_rate": 2.0151680099779574e-05,
|
|
"loss": 0.2945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14533577859401703,
|
|
"step": 2145,
|
|
"valid_targets_mean": 2128.8,
|
|
"valid_targets_min": 923
|
|
},
|
|
{
|
|
"epoch": 3.846911369740376,
|
|
"grad_norm": 0.5716602619910933,
|
|
"learning_rate": 2.0062457008893408e-05,
|
|
"loss": 0.3153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13911041617393494,
|
|
"step": 2150,
|
|
"valid_targets_mean": 5648.6,
|
|
"valid_targets_min": 686
|
|
},
|
|
{
|
|
"epoch": 3.855863921217547,
|
|
"grad_norm": 0.719492062343642,
|
|
"learning_rate": 1.9973232674958487e-05,
|
|
"loss": 0.305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18894167244434357,
|
|
"step": 2155,
|
|
"valid_targets_mean": 2912.6,
|
|
"valid_targets_min": 1167
|
|
},
|
|
{
|
|
"epoch": 3.864816472694718,
|
|
"grad_norm": 0.6512482583559067,
|
|
"learning_rate": 1.9884008873759446e-05,
|
|
"loss": 0.3,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13799162209033966,
|
|
"step": 2160,
|
|
"valid_targets_mean": 3115.5,
|
|
"valid_targets_min": 1117
|
|
},
|
|
{
|
|
"epoch": 3.8737690241718887,
|
|
"grad_norm": 0.5445724516346668,
|
|
"learning_rate": 1.979478738107035e-05,
|
|
"loss": 0.33,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20787352323532104,
|
|
"step": 2165,
|
|
"valid_targets_mean": 6374.6,
|
|
"valid_targets_min": 1262
|
|
},
|
|
{
|
|
"epoch": 3.88272157564906,
|
|
"grad_norm": 0.5513455230236962,
|
|
"learning_rate": 1.9705569972619295e-05,
|
|
"loss": 0.3131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14444196224212646,
|
|
"step": 2170,
|
|
"valid_targets_mean": 5843.2,
|
|
"valid_targets_min": 1690
|
|
},
|
|
{
|
|
"epoch": 3.891674127126231,
|
|
"grad_norm": 1.1283522903092538,
|
|
"learning_rate": 1.961635842405311e-05,
|
|
"loss": 0.2905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14688581228256226,
|
|
"step": 2175,
|
|
"valid_targets_mean": 4568.0,
|
|
"valid_targets_min": 1533
|
|
},
|
|
{
|
|
"epoch": 3.900626678603402,
|
|
"grad_norm": 0.46615888239311576,
|
|
"learning_rate": 1.9527154510901972e-05,
|
|
"loss": 0.2936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12086974084377289,
|
|
"step": 2180,
|
|
"valid_targets_mean": 5431.4,
|
|
"valid_targets_min": 852
|
|
},
|
|
{
|
|
"epoch": 3.909579230080573,
|
|
"grad_norm": 0.6815721768826772,
|
|
"learning_rate": 1.9437960008544126e-05,
|
|
"loss": 0.291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11174547672271729,
|
|
"step": 2185,
|
|
"valid_targets_mean": 2458.1,
|
|
"valid_targets_min": 857
|
|
},
|
|
{
|
|
"epoch": 3.918531781557744,
|
|
"grad_norm": 0.6300320312371228,
|
|
"learning_rate": 1.9348776692170494e-05,
|
|
"loss": 0.2948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1311410665512085,
|
|
"step": 2190,
|
|
"valid_targets_mean": 3613.5,
|
|
"valid_targets_min": 1647
|
|
},
|
|
{
|
|
"epoch": 3.927484333034915,
|
|
"grad_norm": 0.6036465789016668,
|
|
"learning_rate": 1.9259606336749374e-05,
|
|
"loss": 0.2958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13302668929100037,
|
|
"step": 2195,
|
|
"valid_targets_mean": 3842.6,
|
|
"valid_targets_min": 1443
|
|
},
|
|
{
|
|
"epoch": 3.936436884512086,
|
|
"grad_norm": 0.5896110554577646,
|
|
"learning_rate": 1.9170450716991122e-05,
|
|
"loss": 0.3009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12744395434856415,
|
|
"step": 2200,
|
|
"valid_targets_mean": 2985.9,
|
|
"valid_targets_min": 1156
|
|
},
|
|
{
|
|
"epoch": 3.9453894359892567,
|
|
"grad_norm": 0.6582203423205444,
|
|
"learning_rate": 1.90813116073128e-05,
|
|
"loss": 0.2944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17242342233657837,
|
|
"step": 2205,
|
|
"valid_targets_mean": 4057.6,
|
|
"valid_targets_min": 1444
|
|
},
|
|
{
|
|
"epoch": 3.954341987466428,
|
|
"grad_norm": 0.6421456714075086,
|
|
"learning_rate": 1.899219078180289e-05,
|
|
"loss": 0.3032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14306621253490448,
|
|
"step": 2210,
|
|
"valid_targets_mean": 3036.6,
|
|
"valid_targets_min": 1355
|
|
},
|
|
{
|
|
"epoch": 3.963294538943599,
|
|
"grad_norm": 0.6036407507169996,
|
|
"learning_rate": 1.8903090014185984e-05,
|
|
"loss": 0.3139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13021019101142883,
|
|
"step": 2215,
|
|
"valid_targets_mean": 3490.6,
|
|
"valid_targets_min": 1559
|
|
},
|
|
{
|
|
"epoch": 3.97224709042077,
|
|
"grad_norm": 0.6312426539317694,
|
|
"learning_rate": 1.881401107778744e-05,
|
|
"loss": 0.307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14264509081840515,
|
|
"step": 2220,
|
|
"valid_targets_mean": 3784.1,
|
|
"valid_targets_min": 1696
|
|
},
|
|
{
|
|
"epoch": 3.981199641897941,
|
|
"grad_norm": 0.7232138636484131,
|
|
"learning_rate": 1.8724955745498147e-05,
|
|
"loss": 0.2896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1296793520450592,
|
|
"step": 2225,
|
|
"valid_targets_mean": 2648.1,
|
|
"valid_targets_min": 1959
|
|
},
|
|
{
|
|
"epoch": 3.990152193375112,
|
|
"grad_norm": 0.7135232139570937,
|
|
"learning_rate": 1.863592578973921e-05,
|
|
"loss": 0.2891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1762787401676178,
|
|
"step": 2230,
|
|
"valid_targets_mean": 2832.0,
|
|
"valid_targets_min": 1144
|
|
},
|
|
{
|
|
"epoch": 3.999104744852283,
|
|
"grad_norm": 0.6524854149615098,
|
|
"learning_rate": 1.854692298242667e-05,
|
|
"loss": 0.2938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08761607855558395,
|
|
"step": 2235,
|
|
"valid_targets_mean": 2061.9,
|
|
"valid_targets_min": 1150
|
|
},
|
|
{
|
|
"epoch": 4.007162041181736,
|
|
"grad_norm": 0.652689916111008,
|
|
"learning_rate": 1.8457949094936265e-05,
|
|
"loss": 0.2998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1780852973461151,
|
|
"step": 2240,
|
|
"valid_targets_mean": 3954.2,
|
|
"valid_targets_min": 1405
|
|
},
|
|
{
|
|
"epoch": 4.016114592658908,
|
|
"grad_norm": 0.6939405565326737,
|
|
"learning_rate": 1.8369005898068136e-05,
|
|
"loss": 0.2707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13696125149726868,
|
|
"step": 2245,
|
|
"valid_targets_mean": 3110.2,
|
|
"valid_targets_min": 1338
|
|
},
|
|
{
|
|
"epoch": 4.025067144136079,
|
|
"grad_norm": 0.6378249349993657,
|
|
"learning_rate": 1.8280095162011617e-05,
|
|
"loss": 0.2519,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13300368189811707,
|
|
"step": 2250,
|
|
"valid_targets_mean": 4896.8,
|
|
"valid_targets_min": 592
|
|
},
|
|
{
|
|
"epoch": 4.0340196956132495,
|
|
"grad_norm": 0.7368942381013488,
|
|
"learning_rate": 1.819121865630998e-05,
|
|
"loss": 0.2842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15255354344844818,
|
|
"step": 2255,
|
|
"valid_targets_mean": 3039.0,
|
|
"valid_targets_min": 1374
|
|
},
|
|
{
|
|
"epoch": 4.042972247090421,
|
|
"grad_norm": 0.7766928914583657,
|
|
"learning_rate": 1.8102378149825242e-05,
|
|
"loss": 0.2697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13009774684906006,
|
|
"step": 2260,
|
|
"valid_targets_mean": 2237.8,
|
|
"valid_targets_min": 1130
|
|
},
|
|
{
|
|
"epoch": 4.051924798567592,
|
|
"grad_norm": 0.7000134703860864,
|
|
"learning_rate": 1.801357541070294e-05,
|
|
"loss": 0.2663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1389508992433548,
|
|
"step": 2265,
|
|
"valid_targets_mean": 3212.9,
|
|
"valid_targets_min": 1664
|
|
},
|
|
{
|
|
"epoch": 4.060877350044763,
|
|
"grad_norm": 0.63986533138509,
|
|
"learning_rate": 1.7924812206336956e-05,
|
|
"loss": 0.2903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1371454894542694,
|
|
"step": 2270,
|
|
"valid_targets_mean": 3442.1,
|
|
"valid_targets_min": 1547
|
|
},
|
|
{
|
|
"epoch": 4.069829901521934,
|
|
"grad_norm": 0.7149119111892002,
|
|
"learning_rate": 1.7836090303334312e-05,
|
|
"loss": 0.2856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14737096428871155,
|
|
"step": 2275,
|
|
"valid_targets_mean": 3364.8,
|
|
"valid_targets_min": 1102
|
|
},
|
|
{
|
|
"epoch": 4.078782452999104,
|
|
"grad_norm": 0.7621996479130542,
|
|
"learning_rate": 1.7747411467480058e-05,
|
|
"loss": 0.2726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07246645539999008,
|
|
"step": 2280,
|
|
"valid_targets_mean": 2137.5,
|
|
"valid_targets_min": 297
|
|
},
|
|
{
|
|
"epoch": 4.087735004476276,
|
|
"grad_norm": 0.6927270599507354,
|
|
"learning_rate": 1.7658777463702076e-05,
|
|
"loss": 0.2907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13911257684230804,
|
|
"step": 2285,
|
|
"valid_targets_mean": 3724.5,
|
|
"valid_targets_min": 1392
|
|
},
|
|
{
|
|
"epoch": 4.096687555953447,
|
|
"grad_norm": 0.6592900626100703,
|
|
"learning_rate": 1.7570190056035996e-05,
|
|
"loss": 0.275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12096744775772095,
|
|
"step": 2290,
|
|
"valid_targets_mean": 3600.1,
|
|
"valid_targets_min": 629
|
|
},
|
|
{
|
|
"epoch": 4.1056401074306175,
|
|
"grad_norm": 0.5529767948719162,
|
|
"learning_rate": 1.7481651007590054e-05,
|
|
"loss": 0.2932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08802123367786407,
|
|
"step": 2295,
|
|
"valid_targets_mean": 3757.2,
|
|
"valid_targets_min": 943
|
|
},
|
|
{
|
|
"epoch": 4.114592658907789,
|
|
"grad_norm": 0.7168183953666155,
|
|
"learning_rate": 1.7393162080510045e-05,
|
|
"loss": 0.2813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11998733878135681,
|
|
"step": 2300,
|
|
"valid_targets_mean": 3039.2,
|
|
"valid_targets_min": 1347
|
|
},
|
|
{
|
|
"epoch": 4.12354521038496,
|
|
"grad_norm": 0.6658442031434119,
|
|
"learning_rate": 1.7304725035944194e-05,
|
|
"loss": 0.2847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18848788738250732,
|
|
"step": 2305,
|
|
"valid_targets_mean": 4259.6,
|
|
"valid_targets_min": 1697
|
|
},
|
|
{
|
|
"epoch": 4.132497761862131,
|
|
"grad_norm": 0.6199747775481698,
|
|
"learning_rate": 1.7216341634008168e-05,
|
|
"loss": 0.2881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13999530673027039,
|
|
"step": 2310,
|
|
"valid_targets_mean": 4771.9,
|
|
"valid_targets_min": 1507
|
|
},
|
|
{
|
|
"epoch": 4.141450313339302,
|
|
"grad_norm": 0.6147583167095588,
|
|
"learning_rate": 1.7128013633749978e-05,
|
|
"loss": 0.2712,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09678598493337631,
|
|
"step": 2315,
|
|
"valid_targets_mean": 3455.9,
|
|
"valid_targets_min": 1205
|
|
},
|
|
{
|
|
"epoch": 4.150402864816472,
|
|
"grad_norm": 0.6828990726624986,
|
|
"learning_rate": 1.7039742793115025e-05,
|
|
"loss": 0.2925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21309149265289307,
|
|
"step": 2320,
|
|
"valid_targets_mean": 4329.9,
|
|
"valid_targets_min": 1531
|
|
},
|
|
{
|
|
"epoch": 4.159355416293644,
|
|
"grad_norm": 0.6398092008512408,
|
|
"learning_rate": 1.6951530868911097e-05,
|
|
"loss": 0.2774,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13850344717502594,
|
|
"step": 2325,
|
|
"valid_targets_mean": 4057.2,
|
|
"valid_targets_min": 1079
|
|
},
|
|
{
|
|
"epoch": 4.168307967770815,
|
|
"grad_norm": 0.5902243331399586,
|
|
"learning_rate": 1.6863379616773388e-05,
|
|
"loss": 0.2889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13679037988185883,
|
|
"step": 2330,
|
|
"valid_targets_mean": 5202.8,
|
|
"valid_targets_min": 1586
|
|
},
|
|
{
|
|
"epoch": 4.1772605192479855,
|
|
"grad_norm": 0.7060895769409575,
|
|
"learning_rate": 1.6775290791129567e-05,
|
|
"loss": 0.2823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1696832776069641,
|
|
"step": 2335,
|
|
"valid_targets_mean": 3682.5,
|
|
"valid_targets_min": 686
|
|
},
|
|
{
|
|
"epoch": 4.186213070725157,
|
|
"grad_norm": 0.7160039942556854,
|
|
"learning_rate": 1.668726614516488e-05,
|
|
"loss": 0.2933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14439858496189117,
|
|
"step": 2340,
|
|
"valid_targets_mean": 3356.0,
|
|
"valid_targets_min": 1383
|
|
},
|
|
{
|
|
"epoch": 4.195165622202327,
|
|
"grad_norm": 0.5775126603858747,
|
|
"learning_rate": 1.6599307430787216e-05,
|
|
"loss": 0.2946,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15367811918258667,
|
|
"step": 2345,
|
|
"valid_targets_mean": 4872.6,
|
|
"valid_targets_min": 1973
|
|
},
|
|
{
|
|
"epoch": 4.204118173679499,
|
|
"grad_norm": 0.5871347654839713,
|
|
"learning_rate": 1.6511416398592264e-05,
|
|
"loss": 0.2655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1796134114265442,
|
|
"step": 2350,
|
|
"valid_targets_mean": 4855.1,
|
|
"valid_targets_min": 1235
|
|
},
|
|
{
|
|
"epoch": 4.21307072515667,
|
|
"grad_norm": 0.6986302940453207,
|
|
"learning_rate": 1.64235947978287e-05,
|
|
"loss": 0.2681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14644259214401245,
|
|
"step": 2355,
|
|
"valid_targets_mean": 3897.0,
|
|
"valid_targets_min": 1721
|
|
},
|
|
{
|
|
"epoch": 4.22202327663384,
|
|
"grad_norm": 0.6254039148831865,
|
|
"learning_rate": 1.6335844376363295e-05,
|
|
"loss": 0.2742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13061976432800293,
|
|
"step": 2360,
|
|
"valid_targets_mean": 4755.5,
|
|
"valid_targets_min": 1734
|
|
},
|
|
{
|
|
"epoch": 4.230975828111012,
|
|
"grad_norm": 0.7819861990910386,
|
|
"learning_rate": 1.6248166880646225e-05,
|
|
"loss": 0.2887,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1424158662557602,
|
|
"step": 2365,
|
|
"valid_targets_mean": 2361.4,
|
|
"valid_targets_min": 1039
|
|
},
|
|
{
|
|
"epoch": 4.239928379588183,
|
|
"grad_norm": 0.6227715778784336,
|
|
"learning_rate": 1.6160564055676235e-05,
|
|
"loss": 0.2796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14585551619529724,
|
|
"step": 2370,
|
|
"valid_targets_mean": 3901.1,
|
|
"valid_targets_min": 1765
|
|
},
|
|
{
|
|
"epoch": 4.2488809310653535,
|
|
"grad_norm": 0.6909008649845587,
|
|
"learning_rate": 1.6073037644965938e-05,
|
|
"loss": 0.2892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14594003558158875,
|
|
"step": 2375,
|
|
"valid_targets_mean": 4022.0,
|
|
"valid_targets_min": 1773
|
|
},
|
|
{
|
|
"epoch": 4.257833482542525,
|
|
"grad_norm": 0.46912244899919914,
|
|
"learning_rate": 1.5985589390507117e-05,
|
|
"loss": 0.2758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06319141387939453,
|
|
"step": 2380,
|
|
"valid_targets_mean": 2577.6,
|
|
"valid_targets_min": 1459
|
|
},
|
|
{
|
|
"epoch": 4.266786034019695,
|
|
"grad_norm": 0.7369101921583909,
|
|
"learning_rate": 1.589822103273607e-05,
|
|
"loss": 0.2775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14047077298164368,
|
|
"step": 2385,
|
|
"valid_targets_mean": 2833.1,
|
|
"valid_targets_min": 1702
|
|
},
|
|
{
|
|
"epoch": 4.275738585496867,
|
|
"grad_norm": 0.7399346097155552,
|
|
"learning_rate": 1.5810934310498922e-05,
|
|
"loss": 0.2716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1556873321533203,
|
|
"step": 2390,
|
|
"valid_targets_mean": 3457.5,
|
|
"valid_targets_min": 1405
|
|
},
|
|
{
|
|
"epoch": 4.284691136974038,
|
|
"grad_norm": 0.5486188212174534,
|
|
"learning_rate": 1.5723730961017082e-05,
|
|
"loss": 0.2573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14268964529037476,
|
|
"step": 2395,
|
|
"valid_targets_mean": 5756.1,
|
|
"valid_targets_min": 1075
|
|
},
|
|
{
|
|
"epoch": 4.293643688451208,
|
|
"grad_norm": 0.5690671174903594,
|
|
"learning_rate": 1.5636612719852615e-05,
|
|
"loss": 0.2747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12536361813545227,
|
|
"step": 2400,
|
|
"valid_targets_mean": 3819.0,
|
|
"valid_targets_min": 762
|
|
},
|
|
{
|
|
"epoch": 4.30259623992838,
|
|
"grad_norm": 0.8433999368027125,
|
|
"learning_rate": 1.5549581320873715e-05,
|
|
"loss": 0.2776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2086883932352066,
|
|
"step": 2405,
|
|
"valid_targets_mean": 4699.0,
|
|
"valid_targets_min": 1064
|
|
},
|
|
{
|
|
"epoch": 4.311548791405551,
|
|
"grad_norm": 0.5776608739956453,
|
|
"learning_rate": 1.5462638496220223e-05,
|
|
"loss": 0.2836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1258239895105362,
|
|
"step": 2410,
|
|
"valid_targets_mean": 5195.8,
|
|
"valid_targets_min": 1653
|
|
},
|
|
{
|
|
"epoch": 4.3205013428827215,
|
|
"grad_norm": 0.6302947068102072,
|
|
"learning_rate": 1.537578597626912e-05,
|
|
"loss": 0.2668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.172328382730484,
|
|
"step": 2415,
|
|
"valid_targets_mean": 4611.9,
|
|
"valid_targets_min": 1465
|
|
},
|
|
{
|
|
"epoch": 4.329453894359893,
|
|
"grad_norm": 0.5973858707112296,
|
|
"learning_rate": 1.528902548960009e-05,
|
|
"loss": 0.2727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11022946983575821,
|
|
"step": 2420,
|
|
"valid_targets_mean": 2726.6,
|
|
"valid_targets_min": 767
|
|
},
|
|
{
|
|
"epoch": 4.338406445837063,
|
|
"grad_norm": 0.7250095814311094,
|
|
"learning_rate": 1.5202358762961155e-05,
|
|
"loss": 0.2986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1334499567747116,
|
|
"step": 2425,
|
|
"valid_targets_mean": 2546.6,
|
|
"valid_targets_min": 607
|
|
},
|
|
{
|
|
"epoch": 4.347358997314235,
|
|
"grad_norm": 0.5687437987785104,
|
|
"learning_rate": 1.511578752123426e-05,
|
|
"loss": 0.268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1087578758597374,
|
|
"step": 2430,
|
|
"valid_targets_mean": 3391.1,
|
|
"valid_targets_min": 1508
|
|
},
|
|
{
|
|
"epoch": 4.356311548791406,
|
|
"grad_norm": 0.6236160085767933,
|
|
"learning_rate": 1.5029313487400958e-05,
|
|
"loss": 0.2813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15010881423950195,
|
|
"step": 2435,
|
|
"valid_targets_mean": 4927.9,
|
|
"valid_targets_min": 974
|
|
},
|
|
{
|
|
"epoch": 4.365264100268576,
|
|
"grad_norm": 0.6108000626099473,
|
|
"learning_rate": 1.4942938382508147e-05,
|
|
"loss": 0.2711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14772196114063263,
|
|
"step": 2440,
|
|
"valid_targets_mean": 4286.6,
|
|
"valid_targets_min": 2408
|
|
},
|
|
{
|
|
"epoch": 4.374216651745748,
|
|
"grad_norm": 0.5794759583267514,
|
|
"learning_rate": 1.4856663925633776e-05,
|
|
"loss": 0.2743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18810175359249115,
|
|
"step": 2445,
|
|
"valid_targets_mean": 6704.9,
|
|
"valid_targets_min": 1239
|
|
},
|
|
{
|
|
"epoch": 4.383169203222918,
|
|
"grad_norm": 0.5144877574661774,
|
|
"learning_rate": 1.4770491833852663e-05,
|
|
"loss": 0.2768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10823938250541687,
|
|
"step": 2450,
|
|
"valid_targets_mean": 4218.6,
|
|
"valid_targets_min": 1567
|
|
},
|
|
{
|
|
"epoch": 4.3921217547000895,
|
|
"grad_norm": 0.6115457695521388,
|
|
"learning_rate": 1.4684423822202297e-05,
|
|
"loss": 0.2798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1742391586303711,
|
|
"step": 2455,
|
|
"valid_targets_mean": 4900.4,
|
|
"valid_targets_min": 1170
|
|
},
|
|
{
|
|
"epoch": 4.401074306177261,
|
|
"grad_norm": 0.6603355387772237,
|
|
"learning_rate": 1.4598461603648736e-05,
|
|
"loss": 0.2697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1439456045627594,
|
|
"step": 2460,
|
|
"valid_targets_mean": 4160.2,
|
|
"valid_targets_min": 1588
|
|
},
|
|
{
|
|
"epoch": 4.410026857654431,
|
|
"grad_norm": 0.5592975604579535,
|
|
"learning_rate": 1.451260688905245e-05,
|
|
"loss": 0.2856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12507349252700806,
|
|
"step": 2465,
|
|
"valid_targets_mean": 4971.6,
|
|
"valid_targets_min": 1627
|
|
},
|
|
{
|
|
"epoch": 4.418979409131603,
|
|
"grad_norm": 0.5425052312698014,
|
|
"learning_rate": 1.4426861387134361e-05,
|
|
"loss": 0.2563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11168238520622253,
|
|
"step": 2470,
|
|
"valid_targets_mean": 4877.9,
|
|
"valid_targets_min": 1707
|
|
},
|
|
{
|
|
"epoch": 4.427931960608774,
|
|
"grad_norm": 0.5221747063544983,
|
|
"learning_rate": 1.4341226804441766e-05,
|
|
"loss": 0.2707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12132534384727478,
|
|
"step": 2475,
|
|
"valid_targets_mean": 5736.0,
|
|
"valid_targets_min": 1785
|
|
},
|
|
{
|
|
"epoch": 4.436884512085944,
|
|
"grad_norm": 0.6687015942354837,
|
|
"learning_rate": 1.4255704845314406e-05,
|
|
"loss": 0.2593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08877840638160706,
|
|
"step": 2480,
|
|
"valid_targets_mean": 3548.1,
|
|
"valid_targets_min": 1012
|
|
},
|
|
{
|
|
"epoch": 4.445837063563116,
|
|
"grad_norm": 0.6764095401280212,
|
|
"learning_rate": 1.4170297211850522e-05,
|
|
"loss": 0.2752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1928788721561432,
|
|
"step": 2485,
|
|
"valid_targets_mean": 4394.4,
|
|
"valid_targets_min": 2121
|
|
},
|
|
{
|
|
"epoch": 4.454789615040286,
|
|
"grad_norm": 0.6534942221563793,
|
|
"learning_rate": 1.4085005603873004e-05,
|
|
"loss": 0.2799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1439860314130783,
|
|
"step": 2490,
|
|
"valid_targets_mean": 4347.1,
|
|
"valid_targets_min": 1227
|
|
},
|
|
{
|
|
"epoch": 4.4637421665174575,
|
|
"grad_norm": 0.6576143650799924,
|
|
"learning_rate": 1.3999831718895545e-05,
|
|
"loss": 0.274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14860865473747253,
|
|
"step": 2495,
|
|
"valid_targets_mean": 4357.6,
|
|
"valid_targets_min": 1706
|
|
},
|
|
{
|
|
"epoch": 4.472694717994629,
|
|
"grad_norm": 0.6216399384728338,
|
|
"learning_rate": 1.3914777252088851e-05,
|
|
"loss": 0.2809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0829973816871643,
|
|
"step": 2500,
|
|
"valid_targets_mean": 2239.1,
|
|
"valid_targets_min": 1102
|
|
},
|
|
{
|
|
"epoch": 4.481647269471799,
|
|
"grad_norm": 0.677746287070446,
|
|
"learning_rate": 1.3829843896246917e-05,
|
|
"loss": 0.2695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10659922659397125,
|
|
"step": 2505,
|
|
"valid_targets_mean": 3080.5,
|
|
"valid_targets_min": 1430
|
|
},
|
|
{
|
|
"epoch": 4.490599820948971,
|
|
"grad_norm": 0.6894645564542359,
|
|
"learning_rate": 1.3745033341753337e-05,
|
|
"loss": 0.2888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14530538022518158,
|
|
"step": 2510,
|
|
"valid_targets_mean": 3823.0,
|
|
"valid_targets_min": 713
|
|
},
|
|
{
|
|
"epoch": 4.499552372426141,
|
|
"grad_norm": 0.6848821237514404,
|
|
"learning_rate": 1.3660347276547645e-05,
|
|
"loss": 0.2802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1529255509376526,
|
|
"step": 2515,
|
|
"valid_targets_mean": 3820.8,
|
|
"valid_targets_min": 1396
|
|
},
|
|
{
|
|
"epoch": 4.508504923903312,
|
|
"grad_norm": 0.6641129507383274,
|
|
"learning_rate": 1.3575787386091745e-05,
|
|
"loss": 0.2949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14307177066802979,
|
|
"step": 2520,
|
|
"valid_targets_mean": 3578.8,
|
|
"valid_targets_min": 1528
|
|
},
|
|
{
|
|
"epoch": 4.517457475380484,
|
|
"grad_norm": 0.536151610151765,
|
|
"learning_rate": 1.3491355353336338e-05,
|
|
"loss": 0.279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12758341431617737,
|
|
"step": 2525,
|
|
"valid_targets_mean": 4726.1,
|
|
"valid_targets_min": 1227
|
|
},
|
|
{
|
|
"epoch": 4.526410026857654,
|
|
"grad_norm": 0.7168460298420432,
|
|
"learning_rate": 1.3407052858687432e-05,
|
|
"loss": 0.2813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16223827004432678,
|
|
"step": 2530,
|
|
"valid_targets_mean": 4002.4,
|
|
"valid_targets_min": 799
|
|
},
|
|
{
|
|
"epoch": 4.5353625783348255,
|
|
"grad_norm": 0.5770371894689392,
|
|
"learning_rate": 1.3322881579972934e-05,
|
|
"loss": 0.2863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19758792221546173,
|
|
"step": 2535,
|
|
"valid_targets_mean": 6236.2,
|
|
"valid_targets_min": 1662
|
|
},
|
|
{
|
|
"epoch": 4.544315129811997,
|
|
"grad_norm": 0.635407424302777,
|
|
"learning_rate": 1.323884319240921e-05,
|
|
"loss": 0.29,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14273598790168762,
|
|
"step": 2540,
|
|
"valid_targets_mean": 3251.4,
|
|
"valid_targets_min": 566
|
|
},
|
|
{
|
|
"epoch": 4.553267681289167,
|
|
"grad_norm": 0.6247115660878144,
|
|
"learning_rate": 1.3154939368567771e-05,
|
|
"loss": 0.2786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11011286079883575,
|
|
"step": 2545,
|
|
"valid_targets_mean": 3317.2,
|
|
"valid_targets_min": 1259
|
|
},
|
|
{
|
|
"epoch": 4.562220232766339,
|
|
"grad_norm": 0.7097429777926196,
|
|
"learning_rate": 1.3071171778341975e-05,
|
|
"loss": 0.2733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11763446033000946,
|
|
"step": 2550,
|
|
"valid_targets_mean": 2547.4,
|
|
"valid_targets_min": 1533
|
|
},
|
|
{
|
|
"epoch": 4.571172784243509,
|
|
"grad_norm": 0.7507832272507481,
|
|
"learning_rate": 1.2987542088913801e-05,
|
|
"loss": 0.3048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15912875533103943,
|
|
"step": 2555,
|
|
"valid_targets_mean": 3061.6,
|
|
"valid_targets_min": 1198
|
|
},
|
|
{
|
|
"epoch": 4.58012533572068,
|
|
"grad_norm": 0.688816376455702,
|
|
"learning_rate": 1.290405196472064e-05,
|
|
"loss": 0.2936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14911401271820068,
|
|
"step": 2560,
|
|
"valid_targets_mean": 3128.0,
|
|
"valid_targets_min": 795
|
|
},
|
|
{
|
|
"epoch": 4.589077887197852,
|
|
"grad_norm": 0.661762219202973,
|
|
"learning_rate": 1.282070306742222e-05,
|
|
"loss": 0.3011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12021621316671371,
|
|
"step": 2565,
|
|
"valid_targets_mean": 2507.5,
|
|
"valid_targets_min": 1491
|
|
},
|
|
{
|
|
"epoch": 4.598030438675022,
|
|
"grad_norm": 0.9591904980428351,
|
|
"learning_rate": 1.2737497055867482e-05,
|
|
"loss": 0.274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11839226633310318,
|
|
"step": 2570,
|
|
"valid_targets_mean": 1713.8,
|
|
"valid_targets_min": 1207
|
|
},
|
|
{
|
|
"epoch": 4.6069829901521935,
|
|
"grad_norm": 0.6924345045587993,
|
|
"learning_rate": 1.2654435586061604e-05,
|
|
"loss": 0.2834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16126775741577148,
|
|
"step": 2575,
|
|
"valid_targets_mean": 3368.1,
|
|
"valid_targets_min": 1571
|
|
},
|
|
{
|
|
"epoch": 4.615935541629364,
|
|
"grad_norm": 0.6083972711681029,
|
|
"learning_rate": 1.2571520311133012e-05,
|
|
"loss": 0.292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23135092854499817,
|
|
"step": 2580,
|
|
"valid_targets_mean": 5297.0,
|
|
"valid_targets_min": 940
|
|
},
|
|
{
|
|
"epoch": 4.624888093106535,
|
|
"grad_norm": 0.6780051708511852,
|
|
"learning_rate": 1.2488752881300514e-05,
|
|
"loss": 0.2801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10475669801235199,
|
|
"step": 2585,
|
|
"valid_targets_mean": 2886.1,
|
|
"valid_targets_min": 1939
|
|
},
|
|
{
|
|
"epoch": 4.633840644583707,
|
|
"grad_norm": 0.5687322217353462,
|
|
"learning_rate": 1.2406134943840398e-05,
|
|
"loss": 0.2707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14360423386096954,
|
|
"step": 2590,
|
|
"valid_targets_mean": 4537.2,
|
|
"valid_targets_min": 1153
|
|
},
|
|
{
|
|
"epoch": 4.642793196060877,
|
|
"grad_norm": 0.6473869261700407,
|
|
"learning_rate": 1.232366814305372e-05,
|
|
"loss": 0.2602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11344386637210846,
|
|
"step": 2595,
|
|
"valid_targets_mean": 3455.0,
|
|
"valid_targets_min": 935
|
|
},
|
|
{
|
|
"epoch": 4.651745747538048,
|
|
"grad_norm": 0.6749099477987037,
|
|
"learning_rate": 1.2241354120233527e-05,
|
|
"loss": 0.3105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15856057405471802,
|
|
"step": 2600,
|
|
"valid_targets_mean": 4211.4,
|
|
"valid_targets_min": 913
|
|
},
|
|
{
|
|
"epoch": 4.66069829901522,
|
|
"grad_norm": 0.7881154419641004,
|
|
"learning_rate": 1.2159194513632213e-05,
|
|
"loss": 0.2963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1507546603679657,
|
|
"step": 2605,
|
|
"valid_targets_mean": 2755.9,
|
|
"valid_targets_min": 1225
|
|
},
|
|
{
|
|
"epoch": 4.66965085049239,
|
|
"grad_norm": 0.7379646376101288,
|
|
"learning_rate": 1.20771909584289e-05,
|
|
"loss": 0.2742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09531432390213013,
|
|
"step": 2610,
|
|
"valid_targets_mean": 2293.4,
|
|
"valid_targets_min": 863
|
|
},
|
|
{
|
|
"epoch": 4.6786034019695615,
|
|
"grad_norm": 0.6830443105031209,
|
|
"learning_rate": 1.1995345086696919e-05,
|
|
"loss": 0.2893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1766771525144577,
|
|
"step": 2615,
|
|
"valid_targets_mean": 3217.0,
|
|
"valid_targets_min": 1326
|
|
},
|
|
{
|
|
"epoch": 4.687555953446733,
|
|
"grad_norm": 0.5962665890821673,
|
|
"learning_rate": 1.1913658527371284e-05,
|
|
"loss": 0.2672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14932385087013245,
|
|
"step": 2620,
|
|
"valid_targets_mean": 5058.2,
|
|
"valid_targets_min": 1528
|
|
},
|
|
{
|
|
"epoch": 4.696508504923903,
|
|
"grad_norm": 0.47356874610934896,
|
|
"learning_rate": 1.1832132906216314e-05,
|
|
"loss": 0.2873,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18485575914382935,
|
|
"step": 2625,
|
|
"valid_targets_mean": 8106.5,
|
|
"valid_targets_min": 1666
|
|
},
|
|
{
|
|
"epoch": 4.705461056401075,
|
|
"grad_norm": 0.56409332554102,
|
|
"learning_rate": 1.1750769845793268e-05,
|
|
"loss": 0.2845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16708049178123474,
|
|
"step": 2630,
|
|
"valid_targets_mean": 5391.1,
|
|
"valid_targets_min": 1698
|
|
},
|
|
{
|
|
"epoch": 4.714413607878245,
|
|
"grad_norm": 0.601411143213395,
|
|
"learning_rate": 1.1669570965428028e-05,
|
|
"loss": 0.2795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1906755566596985,
|
|
"step": 2635,
|
|
"valid_targets_mean": 5690.4,
|
|
"valid_targets_min": 1583
|
|
},
|
|
{
|
|
"epoch": 4.723366159355416,
|
|
"grad_norm": 0.6789549439651793,
|
|
"learning_rate": 1.1588537881178902e-05,
|
|
"loss": 0.2693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11652162671089172,
|
|
"step": 2640,
|
|
"valid_targets_mean": 2992.8,
|
|
"valid_targets_min": 1505
|
|
},
|
|
{
|
|
"epoch": 4.732318710832587,
|
|
"grad_norm": 0.6723833109915706,
|
|
"learning_rate": 1.150767220580444e-05,
|
|
"loss": 0.2652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11995535343885422,
|
|
"step": 2645,
|
|
"valid_targets_mean": 3536.5,
|
|
"valid_targets_min": 1642
|
|
},
|
|
{
|
|
"epoch": 4.741271262309758,
|
|
"grad_norm": 0.6575823926090214,
|
|
"learning_rate": 1.1426975548731329e-05,
|
|
"loss": 0.2759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10528416931629181,
|
|
"step": 2650,
|
|
"valid_targets_mean": 3233.6,
|
|
"valid_targets_min": 1417
|
|
},
|
|
{
|
|
"epoch": 4.7502238137869295,
|
|
"grad_norm": 0.8973962588630344,
|
|
"learning_rate": 1.1346449516022395e-05,
|
|
"loss": 0.2694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13312417268753052,
|
|
"step": 2655,
|
|
"valid_targets_mean": 2539.0,
|
|
"valid_targets_min": 1387
|
|
},
|
|
{
|
|
"epoch": 4.7591763652641,
|
|
"grad_norm": 0.7406132454253875,
|
|
"learning_rate": 1.1266095710344611e-05,
|
|
"loss": 0.277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11993972957134247,
|
|
"step": 2660,
|
|
"valid_targets_mean": 2144.9,
|
|
"valid_targets_min": 1320
|
|
},
|
|
{
|
|
"epoch": 4.768128916741271,
|
|
"grad_norm": 0.6553369560744424,
|
|
"learning_rate": 1.1185915730937209e-05,
|
|
"loss": 0.2665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2034987509250641,
|
|
"step": 2665,
|
|
"valid_targets_mean": 4664.8,
|
|
"valid_targets_min": 2213
|
|
},
|
|
{
|
|
"epoch": 4.777081468218443,
|
|
"grad_norm": 0.6504080770189988,
|
|
"learning_rate": 1.1105911173579831e-05,
|
|
"loss": 0.2553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13247156143188477,
|
|
"step": 2670,
|
|
"valid_targets_mean": 3853.6,
|
|
"valid_targets_min": 1582
|
|
},
|
|
{
|
|
"epoch": 4.786034019695613,
|
|
"grad_norm": 0.5396966396231404,
|
|
"learning_rate": 1.1026083630560814e-05,
|
|
"loss": 0.2622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13336597383022308,
|
|
"step": 2675,
|
|
"valid_targets_mean": 4759.8,
|
|
"valid_targets_min": 1589
|
|
},
|
|
{
|
|
"epoch": 4.794986571172784,
|
|
"grad_norm": 0.7028031847571087,
|
|
"learning_rate": 1.0946434690645446e-05,
|
|
"loss": 0.2942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11444665491580963,
|
|
"step": 2680,
|
|
"valid_targets_mean": 2722.8,
|
|
"valid_targets_min": 957
|
|
},
|
|
{
|
|
"epoch": 4.803939122649956,
|
|
"grad_norm": 0.7611209196171498,
|
|
"learning_rate": 1.08669659390444e-05,
|
|
"loss": 0.2674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15461182594299316,
|
|
"step": 2685,
|
|
"valid_targets_mean": 2940.1,
|
|
"valid_targets_min": 1102
|
|
},
|
|
{
|
|
"epoch": 4.812891674127126,
|
|
"grad_norm": 0.4826017225952945,
|
|
"learning_rate": 1.0787678957382124e-05,
|
|
"loss": 0.2717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14187920093536377,
|
|
"step": 2690,
|
|
"valid_targets_mean": 7101.1,
|
|
"valid_targets_min": 2404
|
|
},
|
|
{
|
|
"epoch": 4.8218442256042975,
|
|
"grad_norm": 0.6866006226835454,
|
|
"learning_rate": 1.0708575323665422e-05,
|
|
"loss": 0.2911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22017309069633484,
|
|
"step": 2695,
|
|
"valid_targets_mean": 4468.1,
|
|
"valid_targets_min": 945
|
|
},
|
|
{
|
|
"epoch": 4.830796777081468,
|
|
"grad_norm": 0.5309211399639769,
|
|
"learning_rate": 1.062965661225201e-05,
|
|
"loss": 0.287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2241315245628357,
|
|
"step": 2700,
|
|
"valid_targets_mean": 7465.5,
|
|
"valid_targets_min": 2375
|
|
},
|
|
{
|
|
"epoch": 4.839749328558639,
|
|
"grad_norm": 0.4591025984323562,
|
|
"learning_rate": 1.0550924393819204e-05,
|
|
"loss": 0.2628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11950083076953888,
|
|
"step": 2705,
|
|
"valid_targets_mean": 7293.9,
|
|
"valid_targets_min": 1849
|
|
},
|
|
{
|
|
"epoch": 4.84870188003581,
|
|
"grad_norm": 0.6619076366623318,
|
|
"learning_rate": 1.0472380235332631e-05,
|
|
"loss": 0.286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22019797563552856,
|
|
"step": 2710,
|
|
"valid_targets_mean": 4832.2,
|
|
"valid_targets_min": 1383
|
|
},
|
|
{
|
|
"epoch": 4.857654431512981,
|
|
"grad_norm": 0.5843763984853266,
|
|
"learning_rate": 1.0394025700015064e-05,
|
|
"loss": 0.2837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13572683930397034,
|
|
"step": 2715,
|
|
"valid_targets_mean": 5376.1,
|
|
"valid_targets_min": 603
|
|
},
|
|
{
|
|
"epoch": 4.866606982990152,
|
|
"grad_norm": 0.7344743693867272,
|
|
"learning_rate": 1.0315862347315314e-05,
|
|
"loss": 0.2768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16285094618797302,
|
|
"step": 2720,
|
|
"valid_targets_mean": 2709.2,
|
|
"valid_targets_min": 715
|
|
},
|
|
{
|
|
"epoch": 4.875559534467323,
|
|
"grad_norm": 0.6503799281847995,
|
|
"learning_rate": 1.0237891732877202e-05,
|
|
"loss": 0.2572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07052436470985413,
|
|
"step": 2725,
|
|
"valid_targets_mean": 2046.9,
|
|
"valid_targets_min": 984
|
|
},
|
|
{
|
|
"epoch": 4.884512085944494,
|
|
"grad_norm": 0.617330707981824,
|
|
"learning_rate": 1.0160115408508533e-05,
|
|
"loss": 0.2828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12248671799898148,
|
|
"step": 2730,
|
|
"valid_targets_mean": 4232.9,
|
|
"valid_targets_min": 1337
|
|
},
|
|
{
|
|
"epoch": 4.8934646374216655,
|
|
"grad_norm": 0.7526164512433189,
|
|
"learning_rate": 1.008253492215031e-05,
|
|
"loss": 0.2639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12305112183094025,
|
|
"step": 2735,
|
|
"valid_targets_mean": 2726.4,
|
|
"valid_targets_min": 1245
|
|
},
|
|
{
|
|
"epoch": 4.902417188898836,
|
|
"grad_norm": 0.7644756910958415,
|
|
"learning_rate": 1.0005151817845853e-05,
|
|
"loss": 0.2853,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1255050152540207,
|
|
"step": 2740,
|
|
"valid_targets_mean": 3517.9,
|
|
"valid_targets_min": 1480
|
|
},
|
|
{
|
|
"epoch": 4.911369740376007,
|
|
"grad_norm": 0.697384124011533,
|
|
"learning_rate": 9.927967635710087e-06,
|
|
"loss": 0.2894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.198576420545578,
|
|
"step": 2745,
|
|
"valid_targets_mean": 3570.9,
|
|
"valid_targets_min": 1700
|
|
},
|
|
{
|
|
"epoch": 4.920322291853179,
|
|
"grad_norm": 0.6518775685954262,
|
|
"learning_rate": 9.850983911898913e-06,
|
|
"loss": 0.2784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13477128744125366,
|
|
"step": 2750,
|
|
"valid_targets_mean": 3412.9,
|
|
"valid_targets_min": 1245
|
|
},
|
|
{
|
|
"epoch": 4.929274843330349,
|
|
"grad_norm": 0.5702427451034175,
|
|
"learning_rate": 9.774202178578592e-06,
|
|
"loss": 0.2736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13000229001045227,
|
|
"step": 2755,
|
|
"valid_targets_mean": 5218.1,
|
|
"valid_targets_min": 867
|
|
},
|
|
{
|
|
"epoch": 4.93822739480752,
|
|
"grad_norm": 0.5856656942790103,
|
|
"learning_rate": 9.69762396389529e-06,
|
|
"loss": 0.2932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17482590675354004,
|
|
"step": 2760,
|
|
"valid_targets_mean": 6087.2,
|
|
"valid_targets_min": 309
|
|
},
|
|
{
|
|
"epoch": 4.947179946284691,
|
|
"grad_norm": 0.5588495480516651,
|
|
"learning_rate": 9.621250791944659e-06,
|
|
"loss": 0.279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14949437975883484,
|
|
"step": 2765,
|
|
"valid_targets_mean": 6034.0,
|
|
"valid_targets_min": 2237
|
|
},
|
|
{
|
|
"epoch": 4.956132497761862,
|
|
"grad_norm": 0.6552422528340169,
|
|
"learning_rate": 9.545084182741476e-06,
|
|
"loss": 0.2758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13703200221061707,
|
|
"step": 2770,
|
|
"valid_targets_mean": 4118.9,
|
|
"valid_targets_min": 1240
|
|
},
|
|
{
|
|
"epoch": 4.9650850492390335,
|
|
"grad_norm": 0.6357647022187265,
|
|
"learning_rate": 9.469125652189403e-06,
|
|
"loss": 0.2812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16705867648124695,
|
|
"step": 2775,
|
|
"valid_targets_mean": 4768.6,
|
|
"valid_targets_min": 1812
|
|
},
|
|
{
|
|
"epoch": 4.974037600716204,
|
|
"grad_norm": 0.6226941191762654,
|
|
"learning_rate": 9.393376712050847e-06,
|
|
"loss": 0.28,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10897770524024963,
|
|
"step": 2780,
|
|
"valid_targets_mean": 3234.6,
|
|
"valid_targets_min": 1360
|
|
},
|
|
{
|
|
"epoch": 4.982990152193375,
|
|
"grad_norm": 0.6967866479605265,
|
|
"learning_rate": 9.317838869916835e-06,
|
|
"loss": 0.2968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13590985536575317,
|
|
"step": 2785,
|
|
"valid_targets_mean": 3151.5,
|
|
"valid_targets_min": 1099
|
|
},
|
|
{
|
|
"epoch": 4.991942703670546,
|
|
"grad_norm": 0.7032441654198661,
|
|
"learning_rate": 9.242513629177033e-06,
|
|
"loss": 0.2618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09712253510951996,
|
|
"step": 2790,
|
|
"valid_targets_mean": 2091.2,
|
|
"valid_targets_min": 1157
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.8998685532132251,
|
|
"learning_rate": 9.167402488989784e-06,
|
|
"loss": 0.2633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21871262788772583,
|
|
"step": 2795,
|
|
"valid_targets_mean": 3141.6,
|
|
"valid_targets_min": 1243
|
|
},
|
|
{
|
|
"epoch": 5.008952551477171,
|
|
"grad_norm": 0.5672203119629013,
|
|
"learning_rate": 9.092506944252344e-06,
|
|
"loss": 0.2481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09469739347696304,
|
|
"step": 2800,
|
|
"valid_targets_mean": 3623.1,
|
|
"valid_targets_min": 1407
|
|
},
|
|
{
|
|
"epoch": 5.017905102954342,
|
|
"grad_norm": 0.655519497598404,
|
|
"learning_rate": 9.017828485571044e-06,
|
|
"loss": 0.2587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17740251123905182,
|
|
"step": 2805,
|
|
"valid_targets_mean": 4623.1,
|
|
"valid_targets_min": 2129
|
|
},
|
|
{
|
|
"epoch": 5.026857654431513,
|
|
"grad_norm": 0.7020677912715662,
|
|
"learning_rate": 8.943368599231706e-06,
|
|
"loss": 0.2697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13922971487045288,
|
|
"step": 2810,
|
|
"valid_targets_mean": 3362.6,
|
|
"valid_targets_min": 479
|
|
},
|
|
{
|
|
"epoch": 5.035810205908684,
|
|
"grad_norm": 0.825644641170174,
|
|
"learning_rate": 8.869128767169986e-06,
|
|
"loss": 0.2573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1696254163980484,
|
|
"step": 2815,
|
|
"valid_targets_mean": 3811.8,
|
|
"valid_targets_min": 1768
|
|
},
|
|
{
|
|
"epoch": 5.044762757385855,
|
|
"grad_norm": 0.8818205726342686,
|
|
"learning_rate": 8.79511046694194e-06,
|
|
"loss": 0.2848,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13734868168830872,
|
|
"step": 2820,
|
|
"valid_targets_mean": 2140.6,
|
|
"valid_targets_min": 1360
|
|
},
|
|
{
|
|
"epoch": 5.053715308863026,
|
|
"grad_norm": 0.702209345246406,
|
|
"learning_rate": 8.721315171694591e-06,
|
|
"loss": 0.2796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18483301997184753,
|
|
"step": 2825,
|
|
"valid_targets_mean": 6754.9,
|
|
"valid_targets_min": 1059
|
|
},
|
|
{
|
|
"epoch": 5.062667860340197,
|
|
"grad_norm": 0.6393303273760027,
|
|
"learning_rate": 8.647744350136612e-06,
|
|
"loss": 0.2655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11104267835617065,
|
|
"step": 2830,
|
|
"valid_targets_mean": 3685.9,
|
|
"valid_targets_min": 1589
|
|
},
|
|
{
|
|
"epoch": 5.071620411817368,
|
|
"grad_norm": 0.5255206558374389,
|
|
"learning_rate": 8.574399466509085e-06,
|
|
"loss": 0.2881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10467648506164551,
|
|
"step": 2835,
|
|
"valid_targets_mean": 5690.4,
|
|
"valid_targets_min": 2675
|
|
},
|
|
{
|
|
"epoch": 5.080572963294539,
|
|
"grad_norm": 0.7143778521850589,
|
|
"learning_rate": 8.501281980556369e-06,
|
|
"loss": 0.266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10567007958889008,
|
|
"step": 2840,
|
|
"valid_targets_mean": 2356.2,
|
|
"valid_targets_min": 650
|
|
},
|
|
{
|
|
"epoch": 5.08952551477171,
|
|
"grad_norm": 0.716682380290392,
|
|
"learning_rate": 8.428393347497057e-06,
|
|
"loss": 0.3047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16459281742572784,
|
|
"step": 2845,
|
|
"valid_targets_mean": 3752.9,
|
|
"valid_targets_min": 943
|
|
},
|
|
{
|
|
"epoch": 5.098478066248881,
|
|
"grad_norm": 0.5273948530519541,
|
|
"learning_rate": 8.355735017995011e-06,
|
|
"loss": 0.2467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13910041749477386,
|
|
"step": 2850,
|
|
"valid_targets_mean": 6443.0,
|
|
"valid_targets_min": 1560
|
|
},
|
|
{
|
|
"epoch": 5.107430617726052,
|
|
"grad_norm": 0.6510951941881681,
|
|
"learning_rate": 8.283308438130458e-06,
|
|
"loss": 0.2723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1511838138103485,
|
|
"step": 2855,
|
|
"valid_targets_mean": 4896.4,
|
|
"valid_targets_min": 1342
|
|
},
|
|
{
|
|
"epoch": 5.116383169203223,
|
|
"grad_norm": 0.6858690794166993,
|
|
"learning_rate": 8.211115049371266e-06,
|
|
"loss": 0.2588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12362384796142578,
|
|
"step": 2860,
|
|
"valid_targets_mean": 2373.6,
|
|
"valid_targets_min": 957
|
|
},
|
|
{
|
|
"epoch": 5.125335720680394,
|
|
"grad_norm": 0.5535898753458209,
|
|
"learning_rate": 8.13915628854419e-06,
|
|
"loss": 0.2864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07326360791921616,
|
|
"step": 2865,
|
|
"valid_targets_mean": 3076.2,
|
|
"valid_targets_min": 634
|
|
},
|
|
{
|
|
"epoch": 5.134288272157565,
|
|
"grad_norm": 0.7396227483650418,
|
|
"learning_rate": 8.067433587806336e-06,
|
|
"loss": 0.2736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11240927875041962,
|
|
"step": 2870,
|
|
"valid_targets_mean": 2971.2,
|
|
"valid_targets_min": 1502
|
|
},
|
|
{
|
|
"epoch": 5.143240823634736,
|
|
"grad_norm": 0.7846909642291244,
|
|
"learning_rate": 7.995948374616628e-06,
|
|
"loss": 0.2662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20373129844665527,
|
|
"step": 2875,
|
|
"valid_targets_mean": 3556.4,
|
|
"valid_targets_min": 1123
|
|
},
|
|
{
|
|
"epoch": 5.1521933751119064,
|
|
"grad_norm": 0.6285062997656536,
|
|
"learning_rate": 7.924702071707386e-06,
|
|
"loss": 0.2546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14518675208091736,
|
|
"step": 2880,
|
|
"valid_targets_mean": 4632.1,
|
|
"valid_targets_min": 1531
|
|
},
|
|
{
|
|
"epoch": 5.161145926589078,
|
|
"grad_norm": 0.6039304591242447,
|
|
"learning_rate": 7.853696097056037e-06,
|
|
"loss": 0.2948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11315350234508514,
|
|
"step": 2885,
|
|
"valid_targets_mean": 4218.2,
|
|
"valid_targets_min": 2653
|
|
},
|
|
{
|
|
"epoch": 5.170098478066249,
|
|
"grad_norm": 0.8183337860784501,
|
|
"learning_rate": 7.782931863856888e-06,
|
|
"loss": 0.2621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12211750447750092,
|
|
"step": 2890,
|
|
"valid_targets_mean": 2672.9,
|
|
"valid_targets_min": 1298
|
|
},
|
|
{
|
|
"epoch": 5.17905102954342,
|
|
"grad_norm": 0.7130275214913603,
|
|
"learning_rate": 7.71241078049298e-06,
|
|
"loss": 0.2751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09092959016561508,
|
|
"step": 2895,
|
|
"valid_targets_mean": 2564.5,
|
|
"valid_targets_min": 1837
|
|
},
|
|
{
|
|
"epoch": 5.188003581020591,
|
|
"grad_norm": 0.5935134877912135,
|
|
"learning_rate": 7.642134250508069e-06,
|
|
"loss": 0.24,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15817828476428986,
|
|
"step": 2900,
|
|
"valid_targets_mean": 5829.6,
|
|
"valid_targets_min": 1443
|
|
},
|
|
{
|
|
"epoch": 5.196956132497762,
|
|
"grad_norm": 0.8005018784062845,
|
|
"learning_rate": 7.57210367257871e-06,
|
|
"loss": 0.2573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10520976781845093,
|
|
"step": 2905,
|
|
"valid_targets_mean": 1867.4,
|
|
"valid_targets_min": 1007
|
|
},
|
|
{
|
|
"epoch": 5.205908683974933,
|
|
"grad_norm": 0.798936430838001,
|
|
"learning_rate": 7.502320440486399e-06,
|
|
"loss": 0.2439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1396452784538269,
|
|
"step": 2910,
|
|
"valid_targets_mean": 3387.9,
|
|
"valid_targets_min": 1281
|
|
},
|
|
{
|
|
"epoch": 5.214861235452104,
|
|
"grad_norm": 0.6016086885660464,
|
|
"learning_rate": 7.4327859430898445e-06,
|
|
"loss": 0.2552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11716702580451965,
|
|
"step": 2915,
|
|
"valid_targets_mean": 4086.6,
|
|
"valid_targets_min": 2135
|
|
},
|
|
{
|
|
"epoch": 5.2238137869292744,
|
|
"grad_norm": 0.6393638894017067,
|
|
"learning_rate": 7.363501564297302e-06,
|
|
"loss": 0.2665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13421833515167236,
|
|
"step": 2920,
|
|
"valid_targets_mean": 3354.6,
|
|
"valid_targets_min": 1245
|
|
},
|
|
{
|
|
"epoch": 5.232766338406446,
|
|
"grad_norm": 0.8074938423836445,
|
|
"learning_rate": 7.294468683039058e-06,
|
|
"loss": 0.2741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1262279748916626,
|
|
"step": 2925,
|
|
"valid_targets_mean": 2581.8,
|
|
"valid_targets_min": 1215
|
|
},
|
|
{
|
|
"epoch": 5.241718889883617,
|
|
"grad_norm": 0.8329732110149718,
|
|
"learning_rate": 7.225688673239981e-06,
|
|
"loss": 0.2614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16714711487293243,
|
|
"step": 2930,
|
|
"valid_targets_mean": 2932.5,
|
|
"valid_targets_min": 1553
|
|
},
|
|
{
|
|
"epoch": 5.250671441360788,
|
|
"grad_norm": 0.7395911133371526,
|
|
"learning_rate": 7.1571629037921804e-06,
|
|
"loss": 0.2699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13582395017147064,
|
|
"step": 2935,
|
|
"valid_targets_mean": 3136.4,
|
|
"valid_targets_min": 1113
|
|
},
|
|
{
|
|
"epoch": 5.259623992837959,
|
|
"grad_norm": 0.614797462595206,
|
|
"learning_rate": 7.088892738527731e-06,
|
|
"loss": 0.2588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16834554076194763,
|
|
"step": 2940,
|
|
"valid_targets_mean": 4725.5,
|
|
"valid_targets_min": 1186
|
|
},
|
|
{
|
|
"epoch": 5.26857654431513,
|
|
"grad_norm": 0.7133096956091478,
|
|
"learning_rate": 7.020879536191571e-06,
|
|
"loss": 0.262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.159225195646286,
|
|
"step": 2945,
|
|
"valid_targets_mean": 4072.2,
|
|
"valid_targets_min": 1758
|
|
},
|
|
{
|
|
"epoch": 5.277529095792301,
|
|
"grad_norm": 0.7046101552726547,
|
|
"learning_rate": 6.953124650414447e-06,
|
|
"loss": 0.256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22066622972488403,
|
|
"step": 2950,
|
|
"valid_targets_mean": 4210.6,
|
|
"valid_targets_min": 1025
|
|
},
|
|
{
|
|
"epoch": 5.286481647269472,
|
|
"grad_norm": 0.669736961464681,
|
|
"learning_rate": 6.885629429685943e-06,
|
|
"loss": 0.2647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1097680926322937,
|
|
"step": 2955,
|
|
"valid_targets_mean": 3142.9,
|
|
"valid_targets_min": 1246
|
|
},
|
|
{
|
|
"epoch": 5.2954341987466425,
|
|
"grad_norm": 0.8029811043618871,
|
|
"learning_rate": 6.818395217327696e-06,
|
|
"loss": 0.2474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14252065122127533,
|
|
"step": 2960,
|
|
"valid_targets_mean": 2978.2,
|
|
"valid_targets_min": 1504
|
|
},
|
|
{
|
|
"epoch": 5.304386750223814,
|
|
"grad_norm": 0.7781260969246138,
|
|
"learning_rate": 6.751423351466604e-06,
|
|
"loss": 0.2716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10851228982210159,
|
|
"step": 2965,
|
|
"valid_targets_mean": 2605.2,
|
|
"valid_targets_min": 1713
|
|
},
|
|
{
|
|
"epoch": 5.313339301700985,
|
|
"grad_norm": 0.6460543913806297,
|
|
"learning_rate": 6.684715165008242e-06,
|
|
"loss": 0.2795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17115241289138794,
|
|
"step": 2970,
|
|
"valid_targets_mean": 4524.1,
|
|
"valid_targets_min": 1583
|
|
},
|
|
{
|
|
"epoch": 5.322291853178156,
|
|
"grad_norm": 0.7163531771180065,
|
|
"learning_rate": 6.618271985610316e-06,
|
|
"loss": 0.2527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08804437518119812,
|
|
"step": 2975,
|
|
"valid_targets_mean": 2492.9,
|
|
"valid_targets_min": 1156
|
|
},
|
|
{
|
|
"epoch": 5.331244404655327,
|
|
"grad_norm": 0.6507792520937868,
|
|
"learning_rate": 6.552095135656211e-06,
|
|
"loss": 0.2641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17436163127422333,
|
|
"step": 2980,
|
|
"valid_targets_mean": 4282.0,
|
|
"valid_targets_min": 1098
|
|
},
|
|
{
|
|
"epoch": 5.340196956132498,
|
|
"grad_norm": 0.6625767763231452,
|
|
"learning_rate": 6.486185932228732e-06,
|
|
"loss": 0.2788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1317645013332367,
|
|
"step": 2985,
|
|
"valid_targets_mean": 3719.6,
|
|
"valid_targets_min": 1221
|
|
},
|
|
{
|
|
"epoch": 5.349149507609669,
|
|
"grad_norm": 0.8136256888520679,
|
|
"learning_rate": 6.420545687083823e-06,
|
|
"loss": 0.2634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14313414692878723,
|
|
"step": 2990,
|
|
"valid_targets_mean": 2730.9,
|
|
"valid_targets_min": 1379
|
|
},
|
|
{
|
|
"epoch": 5.35810205908684,
|
|
"grad_norm": 0.704770853254314,
|
|
"learning_rate": 6.355175706624521e-06,
|
|
"loss": 0.2547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12149710953235626,
|
|
"step": 2995,
|
|
"valid_targets_mean": 3353.9,
|
|
"valid_targets_min": 1524
|
|
},
|
|
{
|
|
"epoch": 5.3670546105640105,
|
|
"grad_norm": 0.7794678826905497,
|
|
"learning_rate": 6.290077291874923e-06,
|
|
"loss": 0.281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20259982347488403,
|
|
"step": 3000,
|
|
"valid_targets_mean": 3896.9,
|
|
"valid_targets_min": 380
|
|
},
|
|
{
|
|
"epoch": 5.376007162041182,
|
|
"grad_norm": 0.5949025014278967,
|
|
"learning_rate": 6.225251738454281e-06,
|
|
"loss": 0.2535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09551064670085907,
|
|
"step": 3005,
|
|
"valid_targets_mean": 2741.0,
|
|
"valid_targets_min": 1655
|
|
},
|
|
{
|
|
"epoch": 5.384959713518353,
|
|
"grad_norm": 0.6274746645978216,
|
|
"learning_rate": 6.160700336551246e-06,
|
|
"loss": 0.2566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12926769256591797,
|
|
"step": 3010,
|
|
"valid_targets_mean": 3956.5,
|
|
"valid_targets_min": 1077
|
|
},
|
|
{
|
|
"epoch": 5.393912264995524,
|
|
"grad_norm": 0.5995098175946255,
|
|
"learning_rate": 6.096424370898186e-06,
|
|
"loss": 0.2635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13776710629463196,
|
|
"step": 3015,
|
|
"valid_targets_mean": 5238.2,
|
|
"valid_targets_min": 1205
|
|
},
|
|
{
|
|
"epoch": 5.402864816472695,
|
|
"grad_norm": 0.8693786800154273,
|
|
"learning_rate": 6.032425120745586e-06,
|
|
"loss": 0.2632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11495324224233627,
|
|
"step": 3020,
|
|
"valid_targets_mean": 2983.0,
|
|
"valid_targets_min": 1227
|
|
},
|
|
{
|
|
"epoch": 5.411817367949865,
|
|
"grad_norm": 0.4417681435455645,
|
|
"learning_rate": 5.968703859836615e-06,
|
|
"loss": 0.2571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08964274823665619,
|
|
"step": 3025,
|
|
"valid_targets_mean": 6241.9,
|
|
"valid_targets_min": 1478
|
|
},
|
|
{
|
|
"epoch": 5.420769919427037,
|
|
"grad_norm": 0.6451109995686765,
|
|
"learning_rate": 5.905261856381779e-06,
|
|
"loss": 0.2792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1320190727710724,
|
|
"step": 3030,
|
|
"valid_targets_mean": 3444.6,
|
|
"valid_targets_min": 1422
|
|
},
|
|
{
|
|
"epoch": 5.429722470904208,
|
|
"grad_norm": 0.5982446796929841,
|
|
"learning_rate": 5.8421003730336635e-06,
|
|
"loss": 0.2664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19169670343399048,
|
|
"step": 3035,
|
|
"valid_targets_mean": 4772.6,
|
|
"valid_targets_min": 1219
|
|
},
|
|
{
|
|
"epoch": 5.4386750223813785,
|
|
"grad_norm": 1.0526800510164407,
|
|
"learning_rate": 5.77922066686182e-06,
|
|
"loss": 0.2649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.129215806722641,
|
|
"step": 3040,
|
|
"valid_targets_mean": 3020.8,
|
|
"valid_targets_min": 1197
|
|
},
|
|
{
|
|
"epoch": 5.44762757385855,
|
|
"grad_norm": 0.710300937036674,
|
|
"learning_rate": 5.716623989327723e-06,
|
|
"loss": 0.2453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1642998456954956,
|
|
"step": 3045,
|
|
"valid_targets_mean": 4067.4,
|
|
"valid_targets_min": 854
|
|
},
|
|
{
|
|
"epoch": 5.456580125335721,
|
|
"grad_norm": 0.6236049531620286,
|
|
"learning_rate": 5.654311586259882e-06,
|
|
"loss": 0.2696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14742004871368408,
|
|
"step": 3050,
|
|
"valid_targets_mean": 4697.9,
|
|
"valid_targets_min": 1989
|
|
},
|
|
{
|
|
"epoch": 5.465532676812892,
|
|
"grad_norm": 0.9100445506977155,
|
|
"learning_rate": 5.592284697829049e-06,
|
|
"loss": 0.262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12023288756608963,
|
|
"step": 3055,
|
|
"valid_targets_mean": 2732.1,
|
|
"valid_targets_min": 629
|
|
},
|
|
{
|
|
"epoch": 5.474485228290063,
|
|
"grad_norm": 0.7130523821840068,
|
|
"learning_rate": 5.5305445585235315e-06,
|
|
"loss": 0.2702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11885958164930344,
|
|
"step": 3060,
|
|
"valid_targets_mean": 2787.4,
|
|
"valid_targets_min": 1352
|
|
},
|
|
{
|
|
"epoch": 5.483437779767233,
|
|
"grad_norm": 0.6155452380583275,
|
|
"learning_rate": 5.469092397124609e-06,
|
|
"loss": 0.2556,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14290834963321686,
|
|
"step": 3065,
|
|
"valid_targets_mean": 5837.5,
|
|
"valid_targets_min": 1530
|
|
},
|
|
{
|
|
"epoch": 5.492390331244405,
|
|
"grad_norm": 0.6357781709172484,
|
|
"learning_rate": 5.407929436682098e-06,
|
|
"loss": 0.2854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10490871220827103,
|
|
"step": 3070,
|
|
"valid_targets_mean": 3321.1,
|
|
"valid_targets_min": 1508
|
|
},
|
|
{
|
|
"epoch": 5.501342882721576,
|
|
"grad_norm": 0.8268851686010698,
|
|
"learning_rate": 5.347056894490006e-06,
|
|
"loss": 0.2545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14326724410057068,
|
|
"step": 3075,
|
|
"valid_targets_mean": 3260.1,
|
|
"valid_targets_min": 1403
|
|
},
|
|
{
|
|
"epoch": 5.5102954341987465,
|
|
"grad_norm": 0.5411962844748759,
|
|
"learning_rate": 5.286475982062283e-06,
|
|
"loss": 0.2506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12881606817245483,
|
|
"step": 3080,
|
|
"valid_targets_mean": 6029.1,
|
|
"valid_targets_min": 1722
|
|
},
|
|
{
|
|
"epoch": 5.519247985675918,
|
|
"grad_norm": 0.6334671165729598,
|
|
"learning_rate": 5.226187905108746e-06,
|
|
"loss": 0.2562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12549631297588348,
|
|
"step": 3085,
|
|
"valid_targets_mean": 4009.8,
|
|
"valid_targets_min": 2518
|
|
},
|
|
{
|
|
"epoch": 5.528200537153088,
|
|
"grad_norm": 0.6692364678732721,
|
|
"learning_rate": 5.166193863511046e-06,
|
|
"loss": 0.2607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15974244475364685,
|
|
"step": 3090,
|
|
"valid_targets_mean": 4663.0,
|
|
"valid_targets_min": 1498
|
|
},
|
|
{
|
|
"epoch": 5.53715308863026,
|
|
"grad_norm": 0.6288056109133601,
|
|
"learning_rate": 5.106495051298815e-06,
|
|
"loss": 0.2453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16460061073303223,
|
|
"step": 3095,
|
|
"valid_targets_mean": 5248.2,
|
|
"valid_targets_min": 1210
|
|
},
|
|
{
|
|
"epoch": 5.546105640107431,
|
|
"grad_norm": 0.6708522131884734,
|
|
"learning_rate": 5.047092656625887e-06,
|
|
"loss": 0.2741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1474558711051941,
|
|
"step": 3100,
|
|
"valid_targets_mean": 3772.6,
|
|
"valid_targets_min": 1412
|
|
},
|
|
{
|
|
"epoch": 5.555058191584601,
|
|
"grad_norm": 0.6267068759154054,
|
|
"learning_rate": 4.98798786174667e-06,
|
|
"loss": 0.2546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10332757234573364,
|
|
"step": 3105,
|
|
"valid_targets_mean": 3453.0,
|
|
"valid_targets_min": 1433
|
|
},
|
|
{
|
|
"epoch": 5.564010743061773,
|
|
"grad_norm": 0.6319201392464182,
|
|
"learning_rate": 4.9291818429925585e-06,
|
|
"loss": 0.2739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10445370525121689,
|
|
"step": 3110,
|
|
"valid_targets_mean": 3067.1,
|
|
"valid_targets_min": 1353
|
|
},
|
|
{
|
|
"epoch": 5.572963294538944,
|
|
"grad_norm": 0.5676958670427478,
|
|
"learning_rate": 4.8706757707486075e-06,
|
|
"loss": 0.2728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09762459993362427,
|
|
"step": 3115,
|
|
"valid_targets_mean": 4044.8,
|
|
"valid_targets_min": 1640
|
|
},
|
|
{
|
|
"epoch": 5.5819158460161145,
|
|
"grad_norm": 0.6600426946715785,
|
|
"learning_rate": 4.812470809430179e-06,
|
|
"loss": 0.2641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17617090046405792,
|
|
"step": 3120,
|
|
"valid_targets_mean": 5532.6,
|
|
"valid_targets_min": 1143
|
|
},
|
|
{
|
|
"epoch": 5.590868397493286,
|
|
"grad_norm": 0.72512681265544,
|
|
"learning_rate": 4.754568117459788e-06,
|
|
"loss": 0.2521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13752184808254242,
|
|
"step": 3125,
|
|
"valid_targets_mean": 3298.5,
|
|
"valid_targets_min": 1072
|
|
},
|
|
{
|
|
"epoch": 5.599820948970456,
|
|
"grad_norm": 0.7546139947956615,
|
|
"learning_rate": 4.69696884724403e-06,
|
|
"loss": 0.2624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13179543614387512,
|
|
"step": 3130,
|
|
"valid_targets_mean": 2606.4,
|
|
"valid_targets_min": 954
|
|
},
|
|
{
|
|
"epoch": 5.608773500447628,
|
|
"grad_norm": 0.6414231679701241,
|
|
"learning_rate": 4.639674145150679e-06,
|
|
"loss": 0.2689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13612650334835052,
|
|
"step": 3135,
|
|
"valid_targets_mean": 4690.4,
|
|
"valid_targets_min": 1975
|
|
},
|
|
{
|
|
"epoch": 5.617726051924799,
|
|
"grad_norm": 0.7849198197821255,
|
|
"learning_rate": 4.582685151485826e-06,
|
|
"loss": 0.2479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12271968275308609,
|
|
"step": 3140,
|
|
"valid_targets_mean": 2440.4,
|
|
"valid_targets_min": 1279
|
|
},
|
|
{
|
|
"epoch": 5.626678603401969,
|
|
"grad_norm": 0.7190190450522136,
|
|
"learning_rate": 4.526003000471238e-06,
|
|
"loss": 0.2751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10943076759576797,
|
|
"step": 3145,
|
|
"valid_targets_mean": 3442.4,
|
|
"valid_targets_min": 1326
|
|
},
|
|
{
|
|
"epoch": 5.635631154879141,
|
|
"grad_norm": 0.6723105509161403,
|
|
"learning_rate": 4.469628820221723e-06,
|
|
"loss": 0.2407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09454697370529175,
|
|
"step": 3150,
|
|
"valid_targets_mean": 2455.6,
|
|
"valid_targets_min": 1625
|
|
},
|
|
{
|
|
"epoch": 5.644583706356311,
|
|
"grad_norm": 0.6951817724940638,
|
|
"learning_rate": 4.4135637327227365e-06,
|
|
"loss": 0.2548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13040387630462646,
|
|
"step": 3155,
|
|
"valid_targets_mean": 3532.4,
|
|
"valid_targets_min": 1533
|
|
},
|
|
{
|
|
"epoch": 5.6535362578334825,
|
|
"grad_norm": 0.7380519857603212,
|
|
"learning_rate": 4.357808853808014e-06,
|
|
"loss": 0.2695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2020113617181778,
|
|
"step": 3160,
|
|
"valid_targets_mean": 4735.9,
|
|
"valid_targets_min": 2317
|
|
},
|
|
{
|
|
"epoch": 5.662488809310654,
|
|
"grad_norm": 0.6194983497856827,
|
|
"learning_rate": 4.302365293137383e-06,
|
|
"loss": 0.2823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09762684255838394,
|
|
"step": 3165,
|
|
"valid_targets_mean": 4547.6,
|
|
"valid_targets_min": 592
|
|
},
|
|
{
|
|
"epoch": 5.671441360787824,
|
|
"grad_norm": 0.6052656519002572,
|
|
"learning_rate": 4.247234154174649e-06,
|
|
"loss": 0.2613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10471494495868683,
|
|
"step": 3170,
|
|
"valid_targets_mean": 4168.4,
|
|
"valid_targets_min": 1212
|
|
},
|
|
{
|
|
"epoch": 5.680393912264996,
|
|
"grad_norm": 0.6514165243579992,
|
|
"learning_rate": 4.192416534165666e-06,
|
|
"loss": 0.2783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12467285245656967,
|
|
"step": 3175,
|
|
"valid_targets_mean": 4404.5,
|
|
"valid_targets_min": 2024
|
|
},
|
|
{
|
|
"epoch": 5.689346463742167,
|
|
"grad_norm": 0.5202351850880886,
|
|
"learning_rate": 4.137913524116488e-06,
|
|
"loss": 0.2734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11434194445610046,
|
|
"step": 3180,
|
|
"valid_targets_mean": 4798.5,
|
|
"valid_targets_min": 734
|
|
},
|
|
{
|
|
"epoch": 5.698299015219337,
|
|
"grad_norm": 0.5713811257106961,
|
|
"learning_rate": 4.083726208771653e-06,
|
|
"loss": 0.2755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13401255011558533,
|
|
"step": 3185,
|
|
"valid_targets_mean": 5045.8,
|
|
"valid_targets_min": 2507
|
|
},
|
|
{
|
|
"epoch": 5.707251566696509,
|
|
"grad_norm": 0.6411090621867449,
|
|
"learning_rate": 4.0298556665925996e-06,
|
|
"loss": 0.2768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14346866309642792,
|
|
"step": 3190,
|
|
"valid_targets_mean": 3961.9,
|
|
"valid_targets_min": 2058
|
|
},
|
|
{
|
|
"epoch": 5.716204118173679,
|
|
"grad_norm": 0.7831831361437512,
|
|
"learning_rate": 3.976302969736183e-06,
|
|
"loss": 0.2535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09419474005699158,
|
|
"step": 3195,
|
|
"valid_targets_mean": 2366.5,
|
|
"valid_targets_min": 1530
|
|
},
|
|
{
|
|
"epoch": 5.7251566696508505,
|
|
"grad_norm": 0.5953201952601799,
|
|
"learning_rate": 3.923069184033357e-06,
|
|
"loss": 0.2622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14828859269618988,
|
|
"step": 3200,
|
|
"valid_targets_mean": 5864.6,
|
|
"valid_targets_min": 1024
|
|
},
|
|
{
|
|
"epoch": 5.734109221128022,
|
|
"grad_norm": 0.6184407471177551,
|
|
"learning_rate": 3.870155368967963e-06,
|
|
"loss": 0.2494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13603097200393677,
|
|
"step": 3205,
|
|
"valid_targets_mean": 4392.2,
|
|
"valid_targets_min": 1660
|
|
},
|
|
{
|
|
"epoch": 5.743061772605192,
|
|
"grad_norm": 0.7344526439829354,
|
|
"learning_rate": 3.8175625776556354e-06,
|
|
"loss": 0.2678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14963099360466003,
|
|
"step": 3210,
|
|
"valid_targets_mean": 3856.8,
|
|
"valid_targets_min": 1296
|
|
},
|
|
{
|
|
"epoch": 5.752014324082364,
|
|
"grad_norm": 0.6323484235013329,
|
|
"learning_rate": 3.76529185682283e-06,
|
|
"loss": 0.2642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13975292444229126,
|
|
"step": 3215,
|
|
"valid_targets_mean": 4587.0,
|
|
"valid_targets_min": 2248
|
|
},
|
|
{
|
|
"epoch": 5.760966875559534,
|
|
"grad_norm": 0.7162533875016649,
|
|
"learning_rate": 3.7133442467860215e-06,
|
|
"loss": 0.271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15536366403102875,
|
|
"step": 3220,
|
|
"valid_targets_mean": 4356.5,
|
|
"valid_targets_min": 1482
|
|
},
|
|
{
|
|
"epoch": 5.769919427036705,
|
|
"grad_norm": 0.6697775409439238,
|
|
"learning_rate": 3.6617207814309686e-06,
|
|
"loss": 0.266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09813784062862396,
|
|
"step": 3225,
|
|
"valid_targets_mean": 2768.2,
|
|
"valid_targets_min": 1433
|
|
},
|
|
{
|
|
"epoch": 5.778871978513877,
|
|
"grad_norm": 0.724357058918503,
|
|
"learning_rate": 3.610422488192169e-06,
|
|
"loss": 0.2633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11302490532398224,
|
|
"step": 3230,
|
|
"valid_targets_mean": 3345.9,
|
|
"valid_targets_min": 1072
|
|
},
|
|
{
|
|
"epoch": 5.787824529991047,
|
|
"grad_norm": 0.8637753559487381,
|
|
"learning_rate": 3.559450388032355e-06,
|
|
"loss": 0.2775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09676891565322876,
|
|
"step": 3235,
|
|
"valid_targets_mean": 2141.9,
|
|
"valid_targets_min": 1626
|
|
},
|
|
{
|
|
"epoch": 5.7967770814682185,
|
|
"grad_norm": 0.5678361887713559,
|
|
"learning_rate": 3.508805495422247e-06,
|
|
"loss": 0.2635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14245836436748505,
|
|
"step": 3240,
|
|
"valid_targets_mean": 4776.8,
|
|
"valid_targets_min": 1125
|
|
},
|
|
{
|
|
"epoch": 5.80572963294539,
|
|
"grad_norm": 0.8697062311998607,
|
|
"learning_rate": 3.4584888183203134e-06,
|
|
"loss": 0.2431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11090271174907684,
|
|
"step": 3245,
|
|
"valid_targets_mean": 2297.5,
|
|
"valid_targets_min": 1552
|
|
},
|
|
{
|
|
"epoch": 5.81468218442256,
|
|
"grad_norm": 0.7206579145760911,
|
|
"learning_rate": 3.4085013581527295e-06,
|
|
"loss": 0.2904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08114241808652878,
|
|
"step": 3250,
|
|
"valid_targets_mean": 2197.1,
|
|
"valid_targets_min": 867
|
|
},
|
|
{
|
|
"epoch": 5.823634735899732,
|
|
"grad_norm": 0.962494971346814,
|
|
"learning_rate": 3.3588441097934223e-06,
|
|
"loss": 0.262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15923663973808289,
|
|
"step": 3255,
|
|
"valid_targets_mean": 2391.8,
|
|
"valid_targets_min": 1293
|
|
},
|
|
{
|
|
"epoch": 5.832587287376903,
|
|
"grad_norm": 0.6003561256453428,
|
|
"learning_rate": 3.30951806154431e-06,
|
|
"loss": 0.2414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11105726659297943,
|
|
"step": 3260,
|
|
"valid_targets_mean": 4796.4,
|
|
"valid_targets_min": 762
|
|
},
|
|
{
|
|
"epoch": 5.841539838854073,
|
|
"grad_norm": 0.6864067176009239,
|
|
"learning_rate": 3.260524195115595e-06,
|
|
"loss": 0.2636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10556810349225998,
|
|
"step": 3265,
|
|
"valid_targets_mean": 3109.9,
|
|
"valid_targets_min": 1578
|
|
},
|
|
{
|
|
"epoch": 5.850492390331245,
|
|
"grad_norm": 0.7633968434678636,
|
|
"learning_rate": 3.2118634856062546e-06,
|
|
"loss": 0.2734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1667451560497284,
|
|
"step": 3270,
|
|
"valid_targets_mean": 3742.0,
|
|
"valid_targets_min": 2079
|
|
},
|
|
{
|
|
"epoch": 5.859444941808415,
|
|
"grad_norm": 0.901362913650791,
|
|
"learning_rate": 3.163536901484601e-06,
|
|
"loss": 0.2657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09401094168424606,
|
|
"step": 3275,
|
|
"valid_targets_mean": 2745.1,
|
|
"valid_targets_min": 406
|
|
},
|
|
{
|
|
"epoch": 5.8683974932855865,
|
|
"grad_norm": 0.5798302684786609,
|
|
"learning_rate": 3.115545404569045e-06,
|
|
"loss": 0.2667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1682177037000656,
|
|
"step": 3280,
|
|
"valid_targets_mean": 5518.4,
|
|
"valid_targets_min": 1412
|
|
},
|
|
{
|
|
"epoch": 5.877350044762757,
|
|
"grad_norm": 0.5757168480468727,
|
|
"learning_rate": 3.0678899500089244e-06,
|
|
"loss": 0.2489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1087426021695137,
|
|
"step": 3285,
|
|
"valid_targets_mean": 4423.0,
|
|
"valid_targets_min": 2758
|
|
},
|
|
{
|
|
"epoch": 5.886302596239928,
|
|
"grad_norm": 0.6310116283037832,
|
|
"learning_rate": 3.0205714862655135e-06,
|
|
"loss": 0.2691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06991326808929443,
|
|
"step": 3290,
|
|
"valid_targets_mean": 2434.1,
|
|
"valid_targets_min": 1352
|
|
},
|
|
{
|
|
"epoch": 5.8952551477171,
|
|
"grad_norm": 0.6840482387720046,
|
|
"learning_rate": 2.973590955093124e-06,
|
|
"loss": 0.2523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17993587255477905,
|
|
"step": 3295,
|
|
"valid_targets_mean": 4772.0,
|
|
"valid_targets_min": 1583
|
|
},
|
|
{
|
|
"epoch": 5.90420769919427,
|
|
"grad_norm": 0.6503479570111461,
|
|
"learning_rate": 2.9269492915203735e-06,
|
|
"loss": 0.2646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20532022416591644,
|
|
"step": 3300,
|
|
"valid_targets_mean": 4907.6,
|
|
"valid_targets_min": 759
|
|
},
|
|
{
|
|
"epoch": 5.913160250671441,
|
|
"grad_norm": 0.803170965499726,
|
|
"learning_rate": 2.880647423831591e-06,
|
|
"loss": 0.2813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09620766341686249,
|
|
"step": 3305,
|
|
"valid_targets_mean": 1729.2,
|
|
"valid_targets_min": 309
|
|
},
|
|
{
|
|
"epoch": 5.922112802148613,
|
|
"grad_norm": 0.6442650027465544,
|
|
"learning_rate": 2.8346862735483215e-06,
|
|
"loss": 0.2581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16743940114974976,
|
|
"step": 3310,
|
|
"valid_targets_mean": 5283.6,
|
|
"valid_targets_min": 1357
|
|
},
|
|
{
|
|
"epoch": 5.931065353625783,
|
|
"grad_norm": 0.6545274581161713,
|
|
"learning_rate": 2.789066755410994e-06,
|
|
"loss": 0.2544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13862073421478271,
|
|
"step": 3315,
|
|
"valid_targets_mean": 4231.5,
|
|
"valid_targets_min": 1748
|
|
},
|
|
{
|
|
"epoch": 5.9400179051029545,
|
|
"grad_norm": 0.6389435663416849,
|
|
"learning_rate": 2.7437897773607102e-06,
|
|
"loss": 0.273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09975261241197586,
|
|
"step": 3320,
|
|
"valid_targets_mean": 3778.2,
|
|
"valid_targets_min": 1710
|
|
},
|
|
{
|
|
"epoch": 5.948970456580126,
|
|
"grad_norm": 0.5835511788435559,
|
|
"learning_rate": 2.698856240521173e-06,
|
|
"loss": 0.251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14617975056171417,
|
|
"step": 3325,
|
|
"valid_targets_mean": 5683.6,
|
|
"valid_targets_min": 1113
|
|
},
|
|
{
|
|
"epoch": 5.957923008057296,
|
|
"grad_norm": 0.5665404863664787,
|
|
"learning_rate": 2.6542670391807667e-06,
|
|
"loss": 0.2549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1269206702709198,
|
|
"step": 3330,
|
|
"valid_targets_mean": 5419.6,
|
|
"valid_targets_min": 1200
|
|
},
|
|
{
|
|
"epoch": 5.966875559534468,
|
|
"grad_norm": 0.7472897852320771,
|
|
"learning_rate": 2.610023060774749e-06,
|
|
"loss": 0.2622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15214401483535767,
|
|
"step": 3335,
|
|
"valid_targets_mean": 3904.2,
|
|
"valid_targets_min": 1457
|
|
},
|
|
{
|
|
"epoch": 5.975828111011638,
|
|
"grad_norm": 0.7742003305228212,
|
|
"learning_rate": 2.566125185867576e-06,
|
|
"loss": 0.265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17470315098762512,
|
|
"step": 3340,
|
|
"valid_targets_mean": 3845.4,
|
|
"valid_targets_min": 1932
|
|
},
|
|
{
|
|
"epoch": 5.984780662488809,
|
|
"grad_norm": 0.7491756904261838,
|
|
"learning_rate": 2.5225742881354043e-06,
|
|
"loss": 0.2704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12955431640148163,
|
|
"step": 3345,
|
|
"valid_targets_mean": 3063.2,
|
|
"valid_targets_min": 1256
|
|
},
|
|
{
|
|
"epoch": 5.993733213965981,
|
|
"grad_norm": 0.6987549042529995,
|
|
"learning_rate": 2.479371234348682e-06,
|
|
"loss": 0.2827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10832779109477997,
|
|
"step": 3350,
|
|
"valid_targets_mean": 3335.0,
|
|
"valid_targets_min": 1000
|
|
},
|
|
{
|
|
"epoch": 6.001790510295434,
|
|
"grad_norm": 0.7069990185531398,
|
|
"learning_rate": 2.4365168843549e-06,
|
|
"loss": 0.2712,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08464382588863373,
|
|
"step": 3355,
|
|
"valid_targets_mean": 3527.0,
|
|
"valid_targets_min": 1385
|
|
},
|
|
{
|
|
"epoch": 6.010743061772605,
|
|
"grad_norm": 0.6685974475107747,
|
|
"learning_rate": 2.3940120910614774e-06,
|
|
"loss": 0.2424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14645615220069885,
|
|
"step": 3360,
|
|
"valid_targets_mean": 4492.4,
|
|
"valid_targets_min": 2039
|
|
},
|
|
{
|
|
"epoch": 6.019695613249776,
|
|
"grad_norm": 0.6914934325979221,
|
|
"learning_rate": 2.351857700418807e-06,
|
|
"loss": 0.2627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11661306023597717,
|
|
"step": 3365,
|
|
"valid_targets_mean": 3499.0,
|
|
"valid_targets_min": 1563
|
|
},
|
|
{
|
|
"epoch": 6.028648164726947,
|
|
"grad_norm": 0.621813437866408,
|
|
"learning_rate": 2.310054551403391e-06,
|
|
"loss": 0.2594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08839264512062073,
|
|
"step": 3370,
|
|
"valid_targets_mean": 2995.2,
|
|
"valid_targets_min": 747
|
|
},
|
|
{
|
|
"epoch": 6.0376007162041185,
|
|
"grad_norm": 0.7788394346376253,
|
|
"learning_rate": 2.2686034760011656e-06,
|
|
"loss": 0.2563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12395849823951721,
|
|
"step": 3375,
|
|
"valid_targets_mean": 2545.0,
|
|
"valid_targets_min": 1173
|
|
},
|
|
{
|
|
"epoch": 6.046553267681289,
|
|
"grad_norm": 0.8526120857062531,
|
|
"learning_rate": 2.2275052991909218e-06,
|
|
"loss": 0.2508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13557417690753937,
|
|
"step": 3380,
|
|
"valid_targets_mean": 2442.6,
|
|
"valid_targets_min": 1236
|
|
},
|
|
{
|
|
"epoch": 6.05550581915846,
|
|
"grad_norm": 0.6879366966499265,
|
|
"learning_rate": 2.186760838927897e-06,
|
|
"loss": 0.2464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12125202268362045,
|
|
"step": 3385,
|
|
"valid_targets_mean": 3931.8,
|
|
"valid_targets_min": 1224
|
|
},
|
|
{
|
|
"epoch": 6.064458370635631,
|
|
"grad_norm": 0.5909197508211376,
|
|
"learning_rate": 2.146370906127506e-06,
|
|
"loss": 0.2528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13690954446792603,
|
|
"step": 3390,
|
|
"valid_targets_mean": 3945.5,
|
|
"valid_targets_min": 1216
|
|
},
|
|
{
|
|
"epoch": 6.073410922112802,
|
|
"grad_norm": 0.669659070570112,
|
|
"learning_rate": 2.1063363046491813e-06,
|
|
"loss": 0.2429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11727896332740784,
|
|
"step": 3395,
|
|
"valid_targets_mean": 3985.1,
|
|
"valid_targets_min": 1442
|
|
},
|
|
{
|
|
"epoch": 6.082363473589973,
|
|
"grad_norm": 0.7079044777647083,
|
|
"learning_rate": 2.0666578312803942e-06,
|
|
"loss": 0.2502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11680565029382706,
|
|
"step": 3400,
|
|
"valid_targets_mean": 3181.2,
|
|
"valid_targets_min": 1560
|
|
},
|
|
{
|
|
"epoch": 6.091316025067144,
|
|
"grad_norm": 0.7267778237592878,
|
|
"learning_rate": 2.0273362757207726e-06,
|
|
"loss": 0.2538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1440717875957489,
|
|
"step": 3405,
|
|
"valid_targets_mean": 3801.5,
|
|
"valid_targets_min": 788
|
|
},
|
|
{
|
|
"epoch": 6.100268576544315,
|
|
"grad_norm": 0.6851230389909834,
|
|
"learning_rate": 1.988372420566416e-06,
|
|
"loss": 0.2486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10894258320331573,
|
|
"step": 3410,
|
|
"valid_targets_mean": 3692.6,
|
|
"valid_targets_min": 2252
|
|
},
|
|
{
|
|
"epoch": 6.1092211280214865,
|
|
"grad_norm": 0.723418442049336,
|
|
"learning_rate": 1.9497670412942835e-06,
|
|
"loss": 0.2573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12403066456317902,
|
|
"step": 3415,
|
|
"valid_targets_mean": 2928.9,
|
|
"valid_targets_min": 1355
|
|
},
|
|
{
|
|
"epoch": 6.118173679498657,
|
|
"grad_norm": 0.7207144614470365,
|
|
"learning_rate": 1.9115209062467954e-06,
|
|
"loss": 0.2526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10563968867063522,
|
|
"step": 3420,
|
|
"valid_targets_mean": 3282.1,
|
|
"valid_targets_min": 1383
|
|
},
|
|
{
|
|
"epoch": 6.127126230975828,
|
|
"grad_norm": 0.8346687848959415,
|
|
"learning_rate": 1.8736347766165086e-06,
|
|
"loss": 0.2662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11731438338756561,
|
|
"step": 3425,
|
|
"valid_targets_mean": 2362.8,
|
|
"valid_targets_min": 1038
|
|
},
|
|
{
|
|
"epoch": 6.136078782452999,
|
|
"grad_norm": 0.636727979042535,
|
|
"learning_rate": 1.8361094064309993e-06,
|
|
"loss": 0.2438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14710702002048492,
|
|
"step": 3430,
|
|
"valid_targets_mean": 4899.9,
|
|
"valid_targets_min": 2263
|
|
},
|
|
{
|
|
"epoch": 6.14503133393017,
|
|
"grad_norm": 0.7467477214038792,
|
|
"learning_rate": 1.7989455425378266e-06,
|
|
"loss": 0.274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1593431681394577,
|
|
"step": 3435,
|
|
"valid_targets_mean": 4329.8,
|
|
"valid_targets_min": 1444
|
|
},
|
|
{
|
|
"epoch": 6.153983885407341,
|
|
"grad_norm": 0.6264376858160309,
|
|
"learning_rate": 1.7621439245896943e-06,
|
|
"loss": 0.262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18382921814918518,
|
|
"step": 3440,
|
|
"valid_targets_mean": 7472.1,
|
|
"valid_targets_min": 1363
|
|
},
|
|
{
|
|
"epoch": 6.162936436884512,
|
|
"grad_norm": 0.7282562384461457,
|
|
"learning_rate": 1.7257052850296996e-06,
|
|
"loss": 0.2436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14502188563346863,
|
|
"step": 3445,
|
|
"valid_targets_mean": 3724.5,
|
|
"valid_targets_min": 1729
|
|
},
|
|
{
|
|
"epoch": 6.171888988361683,
|
|
"grad_norm": 0.7815809174043036,
|
|
"learning_rate": 1.6896303490767829e-06,
|
|
"loss": 0.2808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11601326614618301,
|
|
"step": 3450,
|
|
"valid_targets_mean": 2493.1,
|
|
"valid_targets_min": 1297
|
|
},
|
|
{
|
|
"epoch": 6.180841539838854,
|
|
"grad_norm": 0.6086355952564768,
|
|
"learning_rate": 1.6539198347112884e-06,
|
|
"loss": 0.2514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11718467622995377,
|
|
"step": 3455,
|
|
"valid_targets_mean": 4018.1,
|
|
"valid_targets_min": 1443
|
|
},
|
|
{
|
|
"epoch": 6.189794091316025,
|
|
"grad_norm": 0.9504972739313073,
|
|
"learning_rate": 1.6185744526606706e-06,
|
|
"loss": 0.2624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09783005714416504,
|
|
"step": 3460,
|
|
"valid_targets_mean": 3889.9,
|
|
"valid_targets_min": 1889
|
|
},
|
|
{
|
|
"epoch": 6.198746642793196,
|
|
"grad_norm": 0.724465952354465,
|
|
"learning_rate": 1.583594906385335e-06,
|
|
"loss": 0.2535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1024479940533638,
|
|
"step": 3465,
|
|
"valid_targets_mean": 2325.9,
|
|
"valid_targets_min": 1076
|
|
},
|
|
{
|
|
"epoch": 6.207699194270367,
|
|
"grad_norm": 0.6451374857999173,
|
|
"learning_rate": 1.548981892064665e-06,
|
|
"loss": 0.262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07624150067567825,
|
|
"step": 3470,
|
|
"valid_targets_mean": 2470.6,
|
|
"valid_targets_min": 1121
|
|
},
|
|
{
|
|
"epoch": 6.216651745747538,
|
|
"grad_norm": 0.5455560588187984,
|
|
"learning_rate": 1.5147360985831516e-06,
|
|
"loss": 0.2606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09515611827373505,
|
|
"step": 3475,
|
|
"valid_targets_mean": 4178.1,
|
|
"valid_targets_min": 1288
|
|
},
|
|
{
|
|
"epoch": 6.225604297224709,
|
|
"grad_norm": 0.816357841000134,
|
|
"learning_rate": 1.4808582075166778e-06,
|
|
"loss": 0.2658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09542781114578247,
|
|
"step": 3480,
|
|
"valid_targets_mean": 2079.2,
|
|
"valid_targets_min": 1162
|
|
},
|
|
{
|
|
"epoch": 6.23455684870188,
|
|
"grad_norm": 0.81830346676513,
|
|
"learning_rate": 1.447348893118965e-06,
|
|
"loss": 0.2536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12241758406162262,
|
|
"step": 3485,
|
|
"valid_targets_mean": 2740.4,
|
|
"valid_targets_min": 1158
|
|
},
|
|
{
|
|
"epoch": 6.243509400179051,
|
|
"grad_norm": 0.762453707084996,
|
|
"learning_rate": 1.4142088223081408e-06,
|
|
"loss": 0.2588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10295930504798889,
|
|
"step": 3490,
|
|
"valid_targets_mean": 2399.1,
|
|
"valid_targets_min": 1530
|
|
},
|
|
{
|
|
"epoch": 6.252461951656222,
|
|
"grad_norm": 0.7034908425844338,
|
|
"learning_rate": 1.3814386546534864e-06,
|
|
"loss": 0.2926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15138891339302063,
|
|
"step": 3495,
|
|
"valid_targets_mean": 3894.6,
|
|
"valid_targets_min": 876
|
|
},
|
|
{
|
|
"epoch": 6.261414503133393,
|
|
"grad_norm": 0.4626175053772806,
|
|
"learning_rate": 1.3490390423622856e-06,
|
|
"loss": 0.2617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0836140513420105,
|
|
"step": 3500,
|
|
"valid_targets_mean": 3548.6,
|
|
"valid_targets_min": 1162
|
|
},
|
|
{
|
|
"epoch": 6.270367054610564,
|
|
"grad_norm": 0.7895264365296455,
|
|
"learning_rate": 1.3170106302668616e-06,
|
|
"loss": 0.2594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11527480185031891,
|
|
"step": 3505,
|
|
"valid_targets_mean": 3602.1,
|
|
"valid_targets_min": 1366
|
|
},
|
|
{
|
|
"epoch": 6.279319606087735,
|
|
"grad_norm": 0.8147171086824726,
|
|
"learning_rate": 1.2853540558117272e-06,
|
|
"loss": 0.2762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15035514533519745,
|
|
"step": 3510,
|
|
"valid_targets_mean": 5440.1,
|
|
"valid_targets_min": 1958
|
|
},
|
|
{
|
|
"epoch": 6.288272157564906,
|
|
"grad_norm": 0.7116251172852573,
|
|
"learning_rate": 1.254069949040917e-06,
|
|
"loss": 0.2752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11078624427318573,
|
|
"step": 3515,
|
|
"valid_targets_mean": 3214.0,
|
|
"valid_targets_min": 854
|
|
},
|
|
{
|
|
"epoch": 6.2972247090420765,
|
|
"grad_norm": 0.7297254614713066,
|
|
"learning_rate": 1.2231589325854354e-06,
|
|
"loss": 0.2498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11363118886947632,
|
|
"step": 3520,
|
|
"valid_targets_mean": 2799.8,
|
|
"valid_targets_min": 939
|
|
},
|
|
{
|
|
"epoch": 6.306177260519248,
|
|
"grad_norm": 0.6596134416441236,
|
|
"learning_rate": 1.1926216216508735e-06,
|
|
"loss": 0.258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08517421782016754,
|
|
"step": 3525,
|
|
"valid_targets_mean": 2873.4,
|
|
"valid_targets_min": 1744
|
|
},
|
|
{
|
|
"epoch": 6.315129811996419,
|
|
"grad_norm": 0.69883908604293,
|
|
"learning_rate": 1.162458624005145e-06,
|
|
"loss": 0.2499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12335070222616196,
|
|
"step": 3530,
|
|
"valid_targets_mean": 3635.4,
|
|
"valid_targets_min": 1704
|
|
},
|
|
{
|
|
"epoch": 6.32408236347359,
|
|
"grad_norm": 0.6847726574413945,
|
|
"learning_rate": 1.1326705399664207e-06,
|
|
"loss": 0.2588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09604361653327942,
|
|
"step": 3535,
|
|
"valid_targets_mean": 2974.9,
|
|
"valid_targets_min": 1474
|
|
},
|
|
{
|
|
"epoch": 6.333034914950761,
|
|
"grad_norm": 0.6519594989356231,
|
|
"learning_rate": 1.103257962391151e-06,
|
|
"loss": 0.2329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12078134715557098,
|
|
"step": 3540,
|
|
"valid_targets_mean": 4011.9,
|
|
"valid_targets_min": 1443
|
|
},
|
|
{
|
|
"epoch": 6.341987466427932,
|
|
"grad_norm": 0.5937147175773294,
|
|
"learning_rate": 1.0742214766622938e-06,
|
|
"loss": 0.2651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08249370008707047,
|
|
"step": 3545,
|
|
"valid_targets_mean": 3290.8,
|
|
"valid_targets_min": 935
|
|
},
|
|
{
|
|
"epoch": 6.350940017905103,
|
|
"grad_norm": 0.7381445050429146,
|
|
"learning_rate": 1.0455616606776363e-06,
|
|
"loss": 0.2779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13618944585323334,
|
|
"step": 3550,
|
|
"valid_targets_mean": 3257.4,
|
|
"valid_targets_min": 1219
|
|
},
|
|
{
|
|
"epoch": 6.359892569382274,
|
|
"grad_norm": 0.700522577996905,
|
|
"learning_rate": 1.0172790848383141e-06,
|
|
"loss": 0.266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1201668530702591,
|
|
"step": 3555,
|
|
"valid_targets_mean": 4017.9,
|
|
"valid_targets_min": 1709
|
|
},
|
|
{
|
|
"epoch": 6.3688451208594445,
|
|
"grad_norm": 0.8464430462952892,
|
|
"learning_rate": 9.893743120374543e-07,
|
|
"loss": 0.2676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13719336688518524,
|
|
"step": 3560,
|
|
"valid_targets_mean": 2657.0,
|
|
"valid_targets_min": 1553
|
|
},
|
|
{
|
|
"epoch": 6.377797672336616,
|
|
"grad_norm": 0.6417224127363806,
|
|
"learning_rate": 9.618478976489709e-07,
|
|
"loss": 0.2467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10005088150501251,
|
|
"step": 3565,
|
|
"valid_targets_mean": 3522.0,
|
|
"valid_targets_min": 1346
|
|
},
|
|
{
|
|
"epoch": 6.386750223813787,
|
|
"grad_norm": 0.8450055799005987,
|
|
"learning_rate": 9.347003895165052e-07,
|
|
"loss": 0.2432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12907400727272034,
|
|
"step": 3570,
|
|
"valid_targets_mean": 2274.2,
|
|
"valid_targets_min": 871
|
|
},
|
|
{
|
|
"epoch": 6.395702775290958,
|
|
"grad_norm": 0.718865189721249,
|
|
"learning_rate": 9.079323279425245e-07,
|
|
"loss": 0.2729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15633174777030945,
|
|
"step": 3575,
|
|
"valid_targets_mean": 3628.4,
|
|
"valid_targets_min": 1727
|
|
},
|
|
{
|
|
"epoch": 6.404655326768129,
|
|
"grad_norm": 0.6318602168953558,
|
|
"learning_rate": 8.815442456775835e-07,
|
|
"loss": 0.2485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0951131284236908,
|
|
"step": 3580,
|
|
"valid_targets_mean": 3976.5,
|
|
"valid_targets_min": 1513
|
|
},
|
|
{
|
|
"epoch": 6.4136078782453,
|
|
"grad_norm": 0.6896320314558233,
|
|
"learning_rate": 8.555366679097043e-07,
|
|
"loss": 0.2468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10344749689102173,
|
|
"step": 3585,
|
|
"valid_targets_mean": 2882.4,
|
|
"valid_targets_min": 1102
|
|
},
|
|
{
|
|
"epoch": 6.422560429722471,
|
|
"grad_norm": 0.5944068667542841,
|
|
"learning_rate": 8.29910112253931e-07,
|
|
"loss": 0.2509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12075631320476532,
|
|
"step": 3590,
|
|
"valid_targets_mean": 5318.9,
|
|
"valid_targets_min": 1346
|
|
},
|
|
{
|
|
"epoch": 6.431512981199642,
|
|
"grad_norm": 0.7009103872992966,
|
|
"learning_rate": 8.046650887420227e-07,
|
|
"loss": 0.2539,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16663353145122528,
|
|
"step": 3595,
|
|
"valid_targets_mean": 4816.9,
|
|
"valid_targets_min": 1604
|
|
},
|
|
{
|
|
"epoch": 6.4404655326768125,
|
|
"grad_norm": 0.6102090137908595,
|
|
"learning_rate": 7.798020998123101e-07,
|
|
"loss": 0.2287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09184370934963226,
|
|
"step": 3600,
|
|
"valid_targets_mean": 3440.4,
|
|
"valid_targets_min": 1266
|
|
},
|
|
{
|
|
"epoch": 6.449418084153984,
|
|
"grad_norm": 0.7312966151837536,
|
|
"learning_rate": 7.55321640299691e-07,
|
|
"loss": 0.2707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1317036747932434,
|
|
"step": 3605,
|
|
"valid_targets_mean": 3115.0,
|
|
"valid_targets_min": 955
|
|
},
|
|
{
|
|
"epoch": 6.458370635631155,
|
|
"grad_norm": 0.6431639047809755,
|
|
"learning_rate": 7.312241974257861e-07,
|
|
"loss": 0.258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11406281590461731,
|
|
"step": 3610,
|
|
"valid_targets_mean": 4320.2,
|
|
"valid_targets_min": 1317
|
|
},
|
|
{
|
|
"epoch": 6.467323187108326,
|
|
"grad_norm": 0.7636759815012041,
|
|
"learning_rate": 7.075102507892295e-07,
|
|
"loss": 0.2344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10190820693969727,
|
|
"step": 3615,
|
|
"valid_targets_mean": 2279.1,
|
|
"valid_targets_min": 1466
|
|
},
|
|
{
|
|
"epoch": 6.476275738585497,
|
|
"grad_norm": 0.8129538828808625,
|
|
"learning_rate": 6.841802723561408e-07,
|
|
"loss": 0.2441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11288426071405411,
|
|
"step": 3620,
|
|
"valid_targets_mean": 2167.2,
|
|
"valid_targets_min": 1163
|
|
},
|
|
{
|
|
"epoch": 6.485228290062668,
|
|
"grad_norm": 0.5596904145823007,
|
|
"learning_rate": 6.612347264507257e-07,
|
|
"loss": 0.2435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10565987974405289,
|
|
"step": 3625,
|
|
"valid_targets_mean": 5062.5,
|
|
"valid_targets_min": 1552
|
|
},
|
|
{
|
|
"epoch": 6.494180841539839,
|
|
"grad_norm": 0.5924035256383883,
|
|
"learning_rate": 6.386740697460281e-07,
|
|
"loss": 0.2756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1311139017343521,
|
|
"step": 3630,
|
|
"valid_targets_mean": 5557.9,
|
|
"valid_targets_min": 295
|
|
},
|
|
{
|
|
"epoch": 6.50313339301701,
|
|
"grad_norm": 0.7289701592713733,
|
|
"learning_rate": 6.164987512548415e-07,
|
|
"loss": 0.2647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15614090859889984,
|
|
"step": 3635,
|
|
"valid_targets_mean": 3911.8,
|
|
"valid_targets_min": 1792
|
|
},
|
|
{
|
|
"epoch": 6.5120859444941805,
|
|
"grad_norm": 0.5887989220028383,
|
|
"learning_rate": 5.947092123207854e-07,
|
|
"loss": 0.237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12084167450666428,
|
|
"step": 3640,
|
|
"valid_targets_mean": 4592.4,
|
|
"valid_targets_min": 2004
|
|
},
|
|
{
|
|
"epoch": 6.521038495971352,
|
|
"grad_norm": 0.866493744978262,
|
|
"learning_rate": 5.733058866095143e-07,
|
|
"loss": 0.2459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15592843294143677,
|
|
"step": 3645,
|
|
"valid_targets_mean": 2719.5,
|
|
"valid_targets_min": 832
|
|
},
|
|
{
|
|
"epoch": 6.529991047448523,
|
|
"grad_norm": 0.6686036394371799,
|
|
"learning_rate": 5.522892001000801e-07,
|
|
"loss": 0.2405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1646374613046646,
|
|
"step": 3650,
|
|
"valid_targets_mean": 4997.0,
|
|
"valid_targets_min": 1410
|
|
},
|
|
{
|
|
"epoch": 6.538943598925694,
|
|
"grad_norm": 0.5839134082394074,
|
|
"learning_rate": 5.316595710764594e-07,
|
|
"loss": 0.2834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.193353071808815,
|
|
"step": 3655,
|
|
"valid_targets_mean": 6390.2,
|
|
"valid_targets_min": 728
|
|
},
|
|
{
|
|
"epoch": 6.547896150402865,
|
|
"grad_norm": 0.5616135043250162,
|
|
"learning_rate": 5.114174101192282e-07,
|
|
"loss": 0.27,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14416781067848206,
|
|
"step": 3660,
|
|
"valid_targets_mean": 6041.5,
|
|
"valid_targets_min": 1649
|
|
},
|
|
{
|
|
"epoch": 6.556848701880035,
|
|
"grad_norm": 0.7640989189195657,
|
|
"learning_rate": 4.915631200973958e-07,
|
|
"loss": 0.2373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13019560277462006,
|
|
"step": 3665,
|
|
"valid_targets_mean": 3621.5,
|
|
"valid_targets_min": 1080
|
|
},
|
|
{
|
|
"epoch": 6.565801253357207,
|
|
"grad_norm": 0.6726753158457589,
|
|
"learning_rate": 4.7209709616038214e-07,
|
|
"loss": 0.2545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15556487441062927,
|
|
"step": 3670,
|
|
"valid_targets_mean": 4878.2,
|
|
"valid_targets_min": 1111
|
|
},
|
|
{
|
|
"epoch": 6.574753804834378,
|
|
"grad_norm": 0.7209662902786135,
|
|
"learning_rate": 4.5301972573014163e-07,
|
|
"loss": 0.2411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11224491894245148,
|
|
"step": 3675,
|
|
"valid_targets_mean": 2792.8,
|
|
"valid_targets_min": 1572
|
|
},
|
|
{
|
|
"epoch": 6.5837063563115485,
|
|
"grad_norm": 0.7821872619160591,
|
|
"learning_rate": 4.3433138849347854e-07,
|
|
"loss": 0.2571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.091234490275383,
|
|
"step": 3680,
|
|
"valid_targets_mean": 1755.8,
|
|
"valid_targets_min": 1107
|
|
},
|
|
{
|
|
"epoch": 6.59265890778872,
|
|
"grad_norm": 0.7028208056439585,
|
|
"learning_rate": 4.1603245639446845e-07,
|
|
"loss": 0.2597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13173747062683105,
|
|
"step": 3685,
|
|
"valid_targets_mean": 4195.1,
|
|
"valid_targets_min": 1660
|
|
},
|
|
{
|
|
"epoch": 6.601611459265891,
|
|
"grad_norm": 0.6866687362574865,
|
|
"learning_rate": 3.981232936270596e-07,
|
|
"loss": 0.2561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14300760626792908,
|
|
"step": 3690,
|
|
"valid_targets_mean": 4124.4,
|
|
"valid_targets_min": 1608
|
|
},
|
|
{
|
|
"epoch": 6.610564010743062,
|
|
"grad_norm": 0.7201161780473606,
|
|
"learning_rate": 3.806042566278345e-07,
|
|
"loss": 0.2454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09710663557052612,
|
|
"step": 3695,
|
|
"valid_targets_mean": 2816.4,
|
|
"valid_targets_min": 1473
|
|
},
|
|
{
|
|
"epoch": 6.619516562220233,
|
|
"grad_norm": 0.7227544704360982,
|
|
"learning_rate": 3.6347569406890213e-07,
|
|
"loss": 0.2606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13512368500232697,
|
|
"step": 3700,
|
|
"valid_targets_mean": 3177.6,
|
|
"valid_targets_min": 1742
|
|
},
|
|
{
|
|
"epoch": 6.628469113697403,
|
|
"grad_norm": 0.7298426483697191,
|
|
"learning_rate": 3.467379468509746e-07,
|
|
"loss": 0.2636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16440506279468536,
|
|
"step": 3705,
|
|
"valid_targets_mean": 3989.5,
|
|
"valid_targets_min": 482
|
|
},
|
|
{
|
|
"epoch": 6.637421665174575,
|
|
"grad_norm": 0.6520707507301194,
|
|
"learning_rate": 3.3039134809656594e-07,
|
|
"loss": 0.2387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09945560991764069,
|
|
"step": 3710,
|
|
"valid_targets_mean": 3097.5,
|
|
"valid_targets_min": 1278
|
|
},
|
|
{
|
|
"epoch": 6.646374216651746,
|
|
"grad_norm": 0.7574937610571322,
|
|
"learning_rate": 3.144362231433706e-07,
|
|
"loss": 0.2687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15985745191574097,
|
|
"step": 3715,
|
|
"valid_targets_mean": 4052.4,
|
|
"valid_targets_min": 1452
|
|
},
|
|
{
|
|
"epoch": 6.6553267681289165,
|
|
"grad_norm": 0.6807781785735898,
|
|
"learning_rate": 2.9887288953778903e-07,
|
|
"loss": 0.2498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12004221230745316,
|
|
"step": 3720,
|
|
"valid_targets_mean": 3874.6,
|
|
"valid_targets_min": 1429
|
|
},
|
|
{
|
|
"epoch": 6.664279319606088,
|
|
"grad_norm": 0.7422840275979297,
|
|
"learning_rate": 2.8370165702860553e-07,
|
|
"loss": 0.2733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16807028651237488,
|
|
"step": 3725,
|
|
"valid_targets_mean": 3694.1,
|
|
"valid_targets_min": 1465
|
|
},
|
|
{
|
|
"epoch": 6.673231871083258,
|
|
"grad_norm": 0.7450098153423828,
|
|
"learning_rate": 2.6892282756082246e-07,
|
|
"loss": 0.2559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11174918711185455,
|
|
"step": 3730,
|
|
"valid_targets_mean": 3676.8,
|
|
"valid_targets_min": 1557
|
|
},
|
|
{
|
|
"epoch": 6.68218442256043,
|
|
"grad_norm": 0.7111965111121455,
|
|
"learning_rate": 2.545366952696582e-07,
|
|
"loss": 0.2437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10589167475700378,
|
|
"step": 3735,
|
|
"valid_targets_mean": 3334.2,
|
|
"valid_targets_min": 1023
|
|
},
|
|
{
|
|
"epoch": 6.691136974037601,
|
|
"grad_norm": 0.6293266970949545,
|
|
"learning_rate": 2.405435464746786e-07,
|
|
"loss": 0.2496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14255636930465698,
|
|
"step": 3740,
|
|
"valid_targets_mean": 4811.6,
|
|
"valid_targets_min": 1805
|
|
},
|
|
{
|
|
"epoch": 6.700089525514771,
|
|
"grad_norm": 0.6652721706334741,
|
|
"learning_rate": 2.2694365967411258e-07,
|
|
"loss": 0.2536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14178217947483063,
|
|
"step": 3745,
|
|
"valid_targets_mean": 4991.2,
|
|
"valid_targets_min": 1887
|
|
},
|
|
{
|
|
"epoch": 6.709042076991943,
|
|
"grad_norm": 0.7681480091833459,
|
|
"learning_rate": 2.137373055393055e-07,
|
|
"loss": 0.2557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1362355649471283,
|
|
"step": 3750,
|
|
"valid_targets_mean": 3588.9,
|
|
"valid_targets_min": 1403
|
|
},
|
|
{
|
|
"epoch": 6.717994628469114,
|
|
"grad_norm": 0.7191832014484213,
|
|
"learning_rate": 2.009247469093234e-07,
|
|
"loss": 0.2476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16261494159698486,
|
|
"step": 3755,
|
|
"valid_targets_mean": 4700.6,
|
|
"valid_targets_min": 1921
|
|
},
|
|
{
|
|
"epoch": 6.7269471799462845,
|
|
"grad_norm": 0.6889617601852991,
|
|
"learning_rate": 1.8850623878573505e-07,
|
|
"loss": 0.2528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19672788679599762,
|
|
"step": 3760,
|
|
"valid_targets_mean": 4380.6,
|
|
"valid_targets_min": 1376
|
|
},
|
|
{
|
|
"epoch": 6.735899731423456,
|
|
"grad_norm": 0.5481225860807925,
|
|
"learning_rate": 1.7648202832752924e-07,
|
|
"loss": 0.2842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30157795548439026,
|
|
"step": 3765,
|
|
"valid_targets_mean": 7732.9,
|
|
"valid_targets_min": 1137
|
|
},
|
|
{
|
|
"epoch": 6.744852282900626,
|
|
"grad_norm": 0.710685504394315,
|
|
"learning_rate": 1.6485235484619665e-07,
|
|
"loss": 0.2569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12485798448324203,
|
|
"step": 3770,
|
|
"valid_targets_mean": 2765.8,
|
|
"valid_targets_min": 1182
|
|
},
|
|
{
|
|
"epoch": 6.753804834377798,
|
|
"grad_norm": 0.689831853046377,
|
|
"learning_rate": 1.5361744980096905e-07,
|
|
"loss": 0.2755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10303008556365967,
|
|
"step": 3775,
|
|
"valid_targets_mean": 3598.8,
|
|
"valid_targets_min": 1571
|
|
},
|
|
{
|
|
"epoch": 6.762757385854969,
|
|
"grad_norm": 0.7124881086751207,
|
|
"learning_rate": 1.427775367942097e-07,
|
|
"loss": 0.2682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1335175335407257,
|
|
"step": 3780,
|
|
"valid_targets_mean": 3160.1,
|
|
"valid_targets_min": 1357
|
|
},
|
|
{
|
|
"epoch": 6.771709937332139,
|
|
"grad_norm": 0.6559820335978044,
|
|
"learning_rate": 1.3233283156696142e-07,
|
|
"loss": 0.2566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1501990109682083,
|
|
"step": 3785,
|
|
"valid_targets_mean": 4496.8,
|
|
"valid_targets_min": 1821
|
|
},
|
|
{
|
|
"epoch": 6.780662488809311,
|
|
"grad_norm": 0.5956093434224163,
|
|
"learning_rate": 1.222835419946633e-07,
|
|
"loss": 0.2617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09017609059810638,
|
|
"step": 3790,
|
|
"valid_targets_mean": 3518.2,
|
|
"valid_targets_min": 858
|
|
},
|
|
{
|
|
"epoch": 6.789615040286481,
|
|
"grad_norm": 0.5830036783313224,
|
|
"learning_rate": 1.1262986808300291e-07,
|
|
"loss": 0.2376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10115193575620651,
|
|
"step": 3795,
|
|
"valid_targets_mean": 5107.1,
|
|
"valid_targets_min": 1177
|
|
},
|
|
{
|
|
"epoch": 6.7985675917636526,
|
|
"grad_norm": 0.7584116947768964,
|
|
"learning_rate": 1.0337200196393505e-07,
|
|
"loss": 0.2609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1397339403629303,
|
|
"step": 3800,
|
|
"valid_targets_mean": 3640.5,
|
|
"valid_targets_min": 1659
|
|
},
|
|
{
|
|
"epoch": 6.807520143240824,
|
|
"grad_norm": 0.6528866226497931,
|
|
"learning_rate": 9.451012789186476e-08,
|
|
"loss": 0.2583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10747568309307098,
|
|
"step": 3805,
|
|
"valid_targets_mean": 3717.6,
|
|
"valid_targets_min": 1563
|
|
},
|
|
{
|
|
"epoch": 6.816472694717994,
|
|
"grad_norm": 0.790754630686848,
|
|
"learning_rate": 8.604442223998144e-08,
|
|
"loss": 0.2556,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1327494978904724,
|
|
"step": 3810,
|
|
"valid_targets_mean": 3487.0,
|
|
"valid_targets_min": 1412
|
|
},
|
|
{
|
|
"epoch": 6.825425246195166,
|
|
"grad_norm": 0.6658217086863656,
|
|
"learning_rate": 7.797505349673496e-08,
|
|
"loss": 0.2525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11113050580024719,
|
|
"step": 3815,
|
|
"valid_targets_mean": 3609.1,
|
|
"valid_targets_min": 1262
|
|
},
|
|
{
|
|
"epoch": 6.834377797672337,
|
|
"grad_norm": 0.7049353307220068,
|
|
"learning_rate": 7.030218226250051e-08,
|
|
"loss": 0.2599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10403628647327423,
|
|
"step": 3820,
|
|
"valid_targets_mean": 3243.0,
|
|
"valid_targets_min": 1126
|
|
},
|
|
{
|
|
"epoch": 6.843330349149507,
|
|
"grad_norm": 0.5860588029996026,
|
|
"learning_rate": 6.302596124636573e-08,
|
|
"loss": 0.268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1433483511209488,
|
|
"step": 3825,
|
|
"valid_targets_mean": 4925.4,
|
|
"valid_targets_min": 2027
|
|
},
|
|
{
|
|
"epoch": 6.852282900626679,
|
|
"grad_norm": 0.7879325522414711,
|
|
"learning_rate": 5.6146535263106314e-08,
|
|
"loss": 0.2523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1332634687423706,
|
|
"step": 3830,
|
|
"valid_targets_mean": 2959.4,
|
|
"valid_targets_min": 1468
|
|
},
|
|
{
|
|
"epoch": 6.861235452103849,
|
|
"grad_norm": 0.8092109840762858,
|
|
"learning_rate": 4.966404123029289e-08,
|
|
"loss": 0.2761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11193729937076569,
|
|
"step": 3835,
|
|
"valid_targets_mean": 2531.8,
|
|
"valid_targets_min": 1080
|
|
},
|
|
{
|
|
"epoch": 6.8701880035810206,
|
|
"grad_norm": 0.5253238898739874,
|
|
"learning_rate": 4.357860816557091e-08,
|
|
"loss": 0.2474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09079030156135559,
|
|
"step": 3840,
|
|
"valid_targets_mean": 5008.1,
|
|
"valid_targets_min": 1724
|
|
},
|
|
{
|
|
"epoch": 6.879140555058192,
|
|
"grad_norm": 0.6837265207779236,
|
|
"learning_rate": 3.789035718408718e-08,
|
|
"loss": 0.2376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1591939926147461,
|
|
"step": 3845,
|
|
"valid_targets_mean": 4355.1,
|
|
"valid_targets_min": 1326
|
|
},
|
|
{
|
|
"epoch": 6.888093106535362,
|
|
"grad_norm": 0.6377651911445792,
|
|
"learning_rate": 3.259940149609175e-08,
|
|
"loss": 0.2596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1554187387228012,
|
|
"step": 3850,
|
|
"valid_targets_mean": 4470.0,
|
|
"valid_targets_min": 1377
|
|
},
|
|
{
|
|
"epoch": 6.897045658012534,
|
|
"grad_norm": 0.6876946100735892,
|
|
"learning_rate": 2.7705846404670888e-08,
|
|
"loss": 0.2511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10332445800304413,
|
|
"step": 3855,
|
|
"valid_targets_mean": 2918.2,
|
|
"valid_targets_min": 1389
|
|
},
|
|
{
|
|
"epoch": 6.905998209489704,
|
|
"grad_norm": 0.6433560412596225,
|
|
"learning_rate": 2.3209789303657582e-08,
|
|
"loss": 0.2769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1013648509979248,
|
|
"step": 3860,
|
|
"valid_targets_mean": 3309.1,
|
|
"valid_targets_min": 1396
|
|
},
|
|
{
|
|
"epoch": 6.914950760966875,
|
|
"grad_norm": 0.6004214021548672,
|
|
"learning_rate": 1.9111319675699792e-08,
|
|
"loss": 0.2485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10128806531429291,
|
|
"step": 3865,
|
|
"valid_targets_mean": 3911.9,
|
|
"valid_targets_min": 1753
|
|
},
|
|
{
|
|
"epoch": 6.923903312444047,
|
|
"grad_norm": 0.6818541228429529,
|
|
"learning_rate": 1.5410519090461872e-08,
|
|
"loss": 0.2567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10375615209341049,
|
|
"step": 3870,
|
|
"valid_targets_mean": 2862.6,
|
|
"valid_targets_min": 1050
|
|
},
|
|
{
|
|
"epoch": 6.932855863921217,
|
|
"grad_norm": 0.9019330979790738,
|
|
"learning_rate": 1.210746120302142e-08,
|
|
"loss": 0.2735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14588722586631775,
|
|
"step": 3875,
|
|
"valid_targets_mean": 3001.8,
|
|
"valid_targets_min": 710
|
|
},
|
|
{
|
|
"epoch": 6.9418084153983886,
|
|
"grad_norm": 0.6213345710457211,
|
|
"learning_rate": 9.202211752386003e-09,
|
|
"loss": 0.244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10529154539108276,
|
|
"step": 3880,
|
|
"valid_targets_mean": 3572.5,
|
|
"valid_targets_min": 1550
|
|
},
|
|
{
|
|
"epoch": 6.95076096687556,
|
|
"grad_norm": 0.8144722091986042,
|
|
"learning_rate": 6.694828560200872e-09,
|
|
"loss": 0.2721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12102942913770676,
|
|
"step": 3885,
|
|
"valid_targets_mean": 2542.0,
|
|
"valid_targets_min": 977
|
|
},
|
|
{
|
|
"epoch": 6.95971351835273,
|
|
"grad_norm": 0.6060836910004495,
|
|
"learning_rate": 4.585361529581001e-09,
|
|
"loss": 0.2725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10048162937164307,
|
|
"step": 3890,
|
|
"valid_targets_mean": 3793.5,
|
|
"valid_targets_min": 1837
|
|
},
|
|
{
|
|
"epoch": 6.968666069829902,
|
|
"grad_norm": 0.6612027073540584,
|
|
"learning_rate": 2.87385264413631e-09,
|
|
"loss": 0.2799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09530146420001984,
|
|
"step": 3895,
|
|
"valid_targets_mean": 3492.0,
|
|
"valid_targets_min": 1252
|
|
},
|
|
{
|
|
"epoch": 6.977618621307073,
|
|
"grad_norm": 0.6346262077642075,
|
|
"learning_rate": 1.560335967114579e-09,
|
|
"loss": 0.2443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10467442870140076,
|
|
"step": 3900,
|
|
"valid_targets_mean": 3068.6,
|
|
"valid_targets_min": 1651
|
|
},
|
|
{
|
|
"epoch": 6.9865711727842434,
|
|
"grad_norm": 0.7723757881455082,
|
|
"learning_rate": 6.4483764074863e-10,
|
|
"loss": 0.2471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08849972486495972,
|
|
"step": 3905,
|
|
"valid_targets_mean": 2084.8,
|
|
"valid_targets_min": 829
|
|
},
|
|
{
|
|
"epoch": 6.995523724261415,
|
|
"grad_norm": 0.5277717756862081,
|
|
"learning_rate": 1.273758857145424e-10,
|
|
"loss": 0.2542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13350322842597961,
|
|
"step": 3910,
|
|
"valid_targets_mean": 4850.9,
|
|
"valid_targets_min": 863
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25713130831718445,
|
|
"step": 3913,
|
|
"total_flos": 1.082631006881579e+18,
|
|
"train_loss": 0.32484868789553917,
|
|
"train_runtime": 42725.2777,
|
|
"train_samples_per_second": 1.463,
|
|
"train_steps_per_second": 0.092,
|
|
"valid_targets_mean": 3542.5,
|
|
"valid_targets_min": 1547
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 3913,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 1500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1.082631006881579e+18,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|