13226 lines
367 KiB
JSON
13226 lines
367 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 5992,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.005841121495327103,
|
|
"grad_norm": 11.479184595707551,
|
|
"learning_rate": 2.666666666666667e-07,
|
|
"loss": 0.8047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8354127407073975,
|
|
"step": 5,
|
|
"valid_targets_mean": 4820.5,
|
|
"valid_targets_min": 1948
|
|
},
|
|
{
|
|
"epoch": 0.011682242990654205,
|
|
"grad_norm": 10.627415747119105,
|
|
"learning_rate": 6.000000000000001e-07,
|
|
"loss": 0.8098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7991741299629211,
|
|
"step": 10,
|
|
"valid_targets_mean": 5528.0,
|
|
"valid_targets_min": 1754
|
|
},
|
|
{
|
|
"epoch": 0.017523364485981307,
|
|
"grad_norm": 9.708482226660152,
|
|
"learning_rate": 9.333333333333334e-07,
|
|
"loss": 0.7943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.786708652973175,
|
|
"step": 15,
|
|
"valid_targets_mean": 5946.0,
|
|
"valid_targets_min": 1197
|
|
},
|
|
{
|
|
"epoch": 0.02336448598130841,
|
|
"grad_norm": 8.879739176843549,
|
|
"learning_rate": 1.2666666666666669e-06,
|
|
"loss": 0.7997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7762587070465088,
|
|
"step": 20,
|
|
"valid_targets_mean": 5531.8,
|
|
"valid_targets_min": 2486
|
|
},
|
|
{
|
|
"epoch": 0.029205607476635514,
|
|
"grad_norm": 6.83285072470726,
|
|
"learning_rate": 1.6000000000000001e-06,
|
|
"loss": 0.795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.810775101184845,
|
|
"step": 25,
|
|
"valid_targets_mean": 5151.7,
|
|
"valid_targets_min": 2580
|
|
},
|
|
{
|
|
"epoch": 0.035046728971962614,
|
|
"grad_norm": 4.970503546524842,
|
|
"learning_rate": 1.9333333333333336e-06,
|
|
"loss": 0.7104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6350469589233398,
|
|
"step": 30,
|
|
"valid_targets_mean": 6109.1,
|
|
"valid_targets_min": 1822
|
|
},
|
|
{
|
|
"epoch": 0.04088785046728972,
|
|
"grad_norm": 3.6215198853515234,
|
|
"learning_rate": 2.266666666666667e-06,
|
|
"loss": 0.6853,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6807482838630676,
|
|
"step": 35,
|
|
"valid_targets_mean": 5918.6,
|
|
"valid_targets_min": 2500
|
|
},
|
|
{
|
|
"epoch": 0.04672897196261682,
|
|
"grad_norm": 3.0459462625539175,
|
|
"learning_rate": 2.6e-06,
|
|
"loss": 0.6736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6302399635314941,
|
|
"step": 40,
|
|
"valid_targets_mean": 4884.9,
|
|
"valid_targets_min": 1758
|
|
},
|
|
{
|
|
"epoch": 0.052570093457943924,
|
|
"grad_norm": 1.877690988727815,
|
|
"learning_rate": 2.9333333333333338e-06,
|
|
"loss": 0.6291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6197364330291748,
|
|
"step": 45,
|
|
"valid_targets_mean": 5880.9,
|
|
"valid_targets_min": 1651
|
|
},
|
|
{
|
|
"epoch": 0.05841121495327103,
|
|
"grad_norm": 1.3899780039378742,
|
|
"learning_rate": 3.266666666666667e-06,
|
|
"loss": 0.6281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6223489046096802,
|
|
"step": 50,
|
|
"valid_targets_mean": 5741.8,
|
|
"valid_targets_min": 2603
|
|
},
|
|
{
|
|
"epoch": 0.06425233644859812,
|
|
"grad_norm": 1.0480142809146582,
|
|
"learning_rate": 3.6000000000000003e-06,
|
|
"loss": 0.5944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.575444757938385,
|
|
"step": 55,
|
|
"valid_targets_mean": 6440.6,
|
|
"valid_targets_min": 2980
|
|
},
|
|
{
|
|
"epoch": 0.07009345794392523,
|
|
"grad_norm": 1.0914226909745381,
|
|
"learning_rate": 3.9333333333333335e-06,
|
|
"loss": 0.6179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6286702156066895,
|
|
"step": 60,
|
|
"valid_targets_mean": 5562.6,
|
|
"valid_targets_min": 1952
|
|
},
|
|
{
|
|
"epoch": 0.07593457943925233,
|
|
"grad_norm": 0.9233697692761986,
|
|
"learning_rate": 4.266666666666668e-06,
|
|
"loss": 0.5893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6319542527198792,
|
|
"step": 65,
|
|
"valid_targets_mean": 5562.5,
|
|
"valid_targets_min": 2075
|
|
},
|
|
{
|
|
"epoch": 0.08177570093457943,
|
|
"grad_norm": 0.7707493007999066,
|
|
"learning_rate": 4.600000000000001e-06,
|
|
"loss": 0.5561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5523564219474792,
|
|
"step": 70,
|
|
"valid_targets_mean": 6176.3,
|
|
"valid_targets_min": 2947
|
|
},
|
|
{
|
|
"epoch": 0.08761682242990654,
|
|
"grad_norm": 0.6621314511180669,
|
|
"learning_rate": 4.933333333333334e-06,
|
|
"loss": 0.5536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5228535532951355,
|
|
"step": 75,
|
|
"valid_targets_mean": 5612.0,
|
|
"valid_targets_min": 2905
|
|
},
|
|
{
|
|
"epoch": 0.09345794392523364,
|
|
"grad_norm": 0.6252476823179565,
|
|
"learning_rate": 5.2666666666666665e-06,
|
|
"loss": 0.5442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5942779183387756,
|
|
"step": 80,
|
|
"valid_targets_mean": 5825.6,
|
|
"valid_targets_min": 2688
|
|
},
|
|
{
|
|
"epoch": 0.09929906542056074,
|
|
"grad_norm": 0.5463642393595309,
|
|
"learning_rate": 5.600000000000001e-06,
|
|
"loss": 0.5336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48444172739982605,
|
|
"step": 85,
|
|
"valid_targets_mean": 4849.2,
|
|
"valid_targets_min": 2132
|
|
},
|
|
{
|
|
"epoch": 0.10514018691588785,
|
|
"grad_norm": 0.5512545102313745,
|
|
"learning_rate": 5.933333333333335e-06,
|
|
"loss": 0.5191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5778092741966248,
|
|
"step": 90,
|
|
"valid_targets_mean": 5401.3,
|
|
"valid_targets_min": 2436
|
|
},
|
|
{
|
|
"epoch": 0.11098130841121495,
|
|
"grad_norm": 0.487532002938438,
|
|
"learning_rate": 6.266666666666668e-06,
|
|
"loss": 0.5193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.535785973072052,
|
|
"step": 95,
|
|
"valid_targets_mean": 6065.9,
|
|
"valid_targets_min": 2203
|
|
},
|
|
{
|
|
"epoch": 0.11682242990654206,
|
|
"grad_norm": 0.5107177685204701,
|
|
"learning_rate": 6.600000000000001e-06,
|
|
"loss": 0.5058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4956916272640228,
|
|
"step": 100,
|
|
"valid_targets_mean": 4907.4,
|
|
"valid_targets_min": 1873
|
|
},
|
|
{
|
|
"epoch": 0.12266355140186916,
|
|
"grad_norm": 0.47567332807730417,
|
|
"learning_rate": 6.9333333333333344e-06,
|
|
"loss": 0.489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4762095510959625,
|
|
"step": 105,
|
|
"valid_targets_mean": 5346.8,
|
|
"valid_targets_min": 2388
|
|
},
|
|
{
|
|
"epoch": 0.12850467289719625,
|
|
"grad_norm": 0.44232517408758215,
|
|
"learning_rate": 7.266666666666668e-06,
|
|
"loss": 0.4944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44979381561279297,
|
|
"step": 110,
|
|
"valid_targets_mean": 5396.9,
|
|
"valid_targets_min": 1719
|
|
},
|
|
{
|
|
"epoch": 0.13434579439252337,
|
|
"grad_norm": 0.44510391906165403,
|
|
"learning_rate": 7.600000000000001e-06,
|
|
"loss": 0.515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5045997500419617,
|
|
"step": 115,
|
|
"valid_targets_mean": 6530.5,
|
|
"valid_targets_min": 1945
|
|
},
|
|
{
|
|
"epoch": 0.14018691588785046,
|
|
"grad_norm": 0.4935979390613404,
|
|
"learning_rate": 7.933333333333334e-06,
|
|
"loss": 0.5083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43278780579566956,
|
|
"step": 120,
|
|
"valid_targets_mean": 5010.7,
|
|
"valid_targets_min": 2988
|
|
},
|
|
{
|
|
"epoch": 0.14602803738317757,
|
|
"grad_norm": 0.45375366194394606,
|
|
"learning_rate": 8.266666666666667e-06,
|
|
"loss": 0.4883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46848687529563904,
|
|
"step": 125,
|
|
"valid_targets_mean": 6073.9,
|
|
"valid_targets_min": 2064
|
|
},
|
|
{
|
|
"epoch": 0.15186915887850466,
|
|
"grad_norm": 0.47366383721953453,
|
|
"learning_rate": 8.6e-06,
|
|
"loss": 0.4955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.518092691898346,
|
|
"step": 130,
|
|
"valid_targets_mean": 5669.1,
|
|
"valid_targets_min": 2253
|
|
},
|
|
{
|
|
"epoch": 0.15771028037383178,
|
|
"grad_norm": 0.49336844269499885,
|
|
"learning_rate": 8.933333333333333e-06,
|
|
"loss": 0.4917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5175104737281799,
|
|
"step": 135,
|
|
"valid_targets_mean": 5734.3,
|
|
"valid_targets_min": 1637
|
|
},
|
|
{
|
|
"epoch": 0.16355140186915887,
|
|
"grad_norm": 0.48680553855916103,
|
|
"learning_rate": 9.266666666666667e-06,
|
|
"loss": 0.4537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4745837152004242,
|
|
"step": 140,
|
|
"valid_targets_mean": 5279.0,
|
|
"valid_targets_min": 2099
|
|
},
|
|
{
|
|
"epoch": 0.169392523364486,
|
|
"grad_norm": 0.44346379407512027,
|
|
"learning_rate": 9.600000000000001e-06,
|
|
"loss": 0.4494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4899725019931793,
|
|
"step": 145,
|
|
"valid_targets_mean": 5414.0,
|
|
"valid_targets_min": 1806
|
|
},
|
|
{
|
|
"epoch": 0.17523364485981308,
|
|
"grad_norm": 0.4873729712591485,
|
|
"learning_rate": 9.933333333333334e-06,
|
|
"loss": 0.4725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.472247451543808,
|
|
"step": 150,
|
|
"valid_targets_mean": 5090.5,
|
|
"valid_targets_min": 2149
|
|
},
|
|
{
|
|
"epoch": 0.1810747663551402,
|
|
"grad_norm": 0.44672632031256115,
|
|
"learning_rate": 1.0266666666666668e-05,
|
|
"loss": 0.4451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4719986915588379,
|
|
"step": 155,
|
|
"valid_targets_mean": 6144.0,
|
|
"valid_targets_min": 2621
|
|
},
|
|
{
|
|
"epoch": 0.18691588785046728,
|
|
"grad_norm": 0.4328885410477105,
|
|
"learning_rate": 1.0600000000000002e-05,
|
|
"loss": 0.4531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.376224160194397,
|
|
"step": 160,
|
|
"valid_targets_mean": 5749.7,
|
|
"valid_targets_min": 1482
|
|
},
|
|
{
|
|
"epoch": 0.1927570093457944,
|
|
"grad_norm": 0.4642119637647211,
|
|
"learning_rate": 1.0933333333333334e-05,
|
|
"loss": 0.4481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45913925766944885,
|
|
"step": 165,
|
|
"valid_targets_mean": 5624.1,
|
|
"valid_targets_min": 2382
|
|
},
|
|
{
|
|
"epoch": 0.1985981308411215,
|
|
"grad_norm": 0.4273461199469453,
|
|
"learning_rate": 1.1266666666666668e-05,
|
|
"loss": 0.4532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4911107122898102,
|
|
"step": 170,
|
|
"valid_targets_mean": 6964.2,
|
|
"valid_targets_min": 2929
|
|
},
|
|
{
|
|
"epoch": 0.2044392523364486,
|
|
"grad_norm": 0.4373262051414203,
|
|
"learning_rate": 1.16e-05,
|
|
"loss": 0.4403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44236335158348083,
|
|
"step": 175,
|
|
"valid_targets_mean": 6391.4,
|
|
"valid_targets_min": 1715
|
|
},
|
|
{
|
|
"epoch": 0.2102803738317757,
|
|
"grad_norm": 0.4409088760876575,
|
|
"learning_rate": 1.1933333333333335e-05,
|
|
"loss": 0.4419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.455395370721817,
|
|
"step": 180,
|
|
"valid_targets_mean": 6558.7,
|
|
"valid_targets_min": 2388
|
|
},
|
|
{
|
|
"epoch": 0.2161214953271028,
|
|
"grad_norm": 0.44865762808318627,
|
|
"learning_rate": 1.2266666666666667e-05,
|
|
"loss": 0.444,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43866419792175293,
|
|
"step": 185,
|
|
"valid_targets_mean": 6358.2,
|
|
"valid_targets_min": 2653
|
|
},
|
|
{
|
|
"epoch": 0.2219626168224299,
|
|
"grad_norm": 0.4598396523764954,
|
|
"learning_rate": 1.2600000000000001e-05,
|
|
"loss": 0.4661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4637612998485565,
|
|
"step": 190,
|
|
"valid_targets_mean": 5285.2,
|
|
"valid_targets_min": 2218
|
|
},
|
|
{
|
|
"epoch": 0.22780373831775702,
|
|
"grad_norm": 0.46553478935280257,
|
|
"learning_rate": 1.2933333333333334e-05,
|
|
"loss": 0.4443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44283318519592285,
|
|
"step": 195,
|
|
"valid_targets_mean": 6229.5,
|
|
"valid_targets_min": 1733
|
|
},
|
|
{
|
|
"epoch": 0.2336448598130841,
|
|
"grad_norm": 0.453491266867907,
|
|
"learning_rate": 1.3266666666666668e-05,
|
|
"loss": 0.462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42222538590431213,
|
|
"step": 200,
|
|
"valid_targets_mean": 5695.6,
|
|
"valid_targets_min": 1692
|
|
},
|
|
{
|
|
"epoch": 0.23948598130841123,
|
|
"grad_norm": 0.4361659695712366,
|
|
"learning_rate": 1.3600000000000002e-05,
|
|
"loss": 0.4387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4658869206905365,
|
|
"step": 205,
|
|
"valid_targets_mean": 5514.7,
|
|
"valid_targets_min": 1636
|
|
},
|
|
{
|
|
"epoch": 0.24532710280373832,
|
|
"grad_norm": 0.4713193210286301,
|
|
"learning_rate": 1.3933333333333334e-05,
|
|
"loss": 0.4417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.456927090883255,
|
|
"step": 210,
|
|
"valid_targets_mean": 6220.2,
|
|
"valid_targets_min": 2516
|
|
},
|
|
{
|
|
"epoch": 0.25116822429906543,
|
|
"grad_norm": 0.45818829668885225,
|
|
"learning_rate": 1.4266666666666668e-05,
|
|
"loss": 0.4472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4341546595096588,
|
|
"step": 215,
|
|
"valid_targets_mean": 6071.2,
|
|
"valid_targets_min": 1960
|
|
},
|
|
{
|
|
"epoch": 0.2570093457943925,
|
|
"grad_norm": 0.46204732967982076,
|
|
"learning_rate": 1.46e-05,
|
|
"loss": 0.4276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44364961981773376,
|
|
"step": 220,
|
|
"valid_targets_mean": 6311.7,
|
|
"valid_targets_min": 1977
|
|
},
|
|
{
|
|
"epoch": 0.2628504672897196,
|
|
"grad_norm": 0.4714794052403706,
|
|
"learning_rate": 1.4933333333333335e-05,
|
|
"loss": 0.438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.427877813577652,
|
|
"step": 225,
|
|
"valid_targets_mean": 5052.8,
|
|
"valid_targets_min": 1843
|
|
},
|
|
{
|
|
"epoch": 0.26869158878504673,
|
|
"grad_norm": 0.5169476841582885,
|
|
"learning_rate": 1.5266666666666667e-05,
|
|
"loss": 0.454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43776941299438477,
|
|
"step": 230,
|
|
"valid_targets_mean": 4807.5,
|
|
"valid_targets_min": 2114
|
|
},
|
|
{
|
|
"epoch": 0.27453271028037385,
|
|
"grad_norm": 0.4881122641046318,
|
|
"learning_rate": 1.5600000000000003e-05,
|
|
"loss": 0.4314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4352596700191498,
|
|
"step": 235,
|
|
"valid_targets_mean": 5091.6,
|
|
"valid_targets_min": 1167
|
|
},
|
|
{
|
|
"epoch": 0.2803738317757009,
|
|
"grad_norm": 0.419488813319491,
|
|
"learning_rate": 1.5933333333333336e-05,
|
|
"loss": 0.4097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37354040145874023,
|
|
"step": 240,
|
|
"valid_targets_mean": 5649.5,
|
|
"valid_targets_min": 2048
|
|
},
|
|
{
|
|
"epoch": 0.286214953271028,
|
|
"grad_norm": 0.49792088323163647,
|
|
"learning_rate": 1.6266666666666668e-05,
|
|
"loss": 0.4197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4059886038303375,
|
|
"step": 245,
|
|
"valid_targets_mean": 4584.7,
|
|
"valid_targets_min": 1919
|
|
},
|
|
{
|
|
"epoch": 0.29205607476635514,
|
|
"grad_norm": 0.4617297456044616,
|
|
"learning_rate": 1.66e-05,
|
|
"loss": 0.4131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42720678448677063,
|
|
"step": 250,
|
|
"valid_targets_mean": 5270.1,
|
|
"valid_targets_min": 2567
|
|
},
|
|
{
|
|
"epoch": 0.29789719626168226,
|
|
"grad_norm": 0.4437969255961,
|
|
"learning_rate": 1.6933333333333336e-05,
|
|
"loss": 0.4115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4137004315853119,
|
|
"step": 255,
|
|
"valid_targets_mean": 6041.2,
|
|
"valid_targets_min": 1837
|
|
},
|
|
{
|
|
"epoch": 0.3037383177570093,
|
|
"grad_norm": 0.440320753308823,
|
|
"learning_rate": 1.726666666666667e-05,
|
|
"loss": 0.4113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3923732042312622,
|
|
"step": 260,
|
|
"valid_targets_mean": 5562.5,
|
|
"valid_targets_min": 2032
|
|
},
|
|
{
|
|
"epoch": 0.30957943925233644,
|
|
"grad_norm": 0.472414623739308,
|
|
"learning_rate": 1.76e-05,
|
|
"loss": 0.4247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.411083459854126,
|
|
"step": 265,
|
|
"valid_targets_mean": 5372.0,
|
|
"valid_targets_min": 1697
|
|
},
|
|
{
|
|
"epoch": 0.31542056074766356,
|
|
"grad_norm": 0.45972481489774447,
|
|
"learning_rate": 1.7933333333333333e-05,
|
|
"loss": 0.4127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.420600563287735,
|
|
"step": 270,
|
|
"valid_targets_mean": 5366.1,
|
|
"valid_targets_min": 2182
|
|
},
|
|
{
|
|
"epoch": 0.3212616822429907,
|
|
"grad_norm": 0.5218641538122593,
|
|
"learning_rate": 1.826666666666667e-05,
|
|
"loss": 0.4147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38025030493736267,
|
|
"step": 275,
|
|
"valid_targets_mean": 4253.5,
|
|
"valid_targets_min": 1680
|
|
},
|
|
{
|
|
"epoch": 0.32710280373831774,
|
|
"grad_norm": 0.44076699099879724,
|
|
"learning_rate": 1.86e-05,
|
|
"loss": 0.3984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4041338264942169,
|
|
"step": 280,
|
|
"valid_targets_mean": 5513.0,
|
|
"valid_targets_min": 1542
|
|
},
|
|
{
|
|
"epoch": 0.33294392523364486,
|
|
"grad_norm": 0.4921855549313766,
|
|
"learning_rate": 1.8933333333333334e-05,
|
|
"loss": 0.4185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4079442024230957,
|
|
"step": 285,
|
|
"valid_targets_mean": 5712.2,
|
|
"valid_targets_min": 2578
|
|
},
|
|
{
|
|
"epoch": 0.338785046728972,
|
|
"grad_norm": 0.45082147801722083,
|
|
"learning_rate": 1.926666666666667e-05,
|
|
"loss": 0.3971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4123026430606842,
|
|
"step": 290,
|
|
"valid_targets_mean": 5110.5,
|
|
"valid_targets_min": 1949
|
|
},
|
|
{
|
|
"epoch": 0.3446261682242991,
|
|
"grad_norm": 0.6508543138871955,
|
|
"learning_rate": 1.9600000000000002e-05,
|
|
"loss": 0.4098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3836798667907715,
|
|
"step": 295,
|
|
"valid_targets_mean": 5572.0,
|
|
"valid_targets_min": 2216
|
|
},
|
|
{
|
|
"epoch": 0.35046728971962615,
|
|
"grad_norm": 0.45731470347866116,
|
|
"learning_rate": 1.9933333333333334e-05,
|
|
"loss": 0.4019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37751829624176025,
|
|
"step": 300,
|
|
"valid_targets_mean": 5243.2,
|
|
"valid_targets_min": 1430
|
|
},
|
|
{
|
|
"epoch": 0.35630841121495327,
|
|
"grad_norm": 0.4962106561712862,
|
|
"learning_rate": 2.026666666666667e-05,
|
|
"loss": 0.4137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4140518605709076,
|
|
"step": 305,
|
|
"valid_targets_mean": 5733.5,
|
|
"valid_targets_min": 2635
|
|
},
|
|
{
|
|
"epoch": 0.3621495327102804,
|
|
"grad_norm": 0.46939750611314474,
|
|
"learning_rate": 2.0600000000000003e-05,
|
|
"loss": 0.3875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39470720291137695,
|
|
"step": 310,
|
|
"valid_targets_mean": 4945.5,
|
|
"valid_targets_min": 1125
|
|
},
|
|
{
|
|
"epoch": 0.3679906542056075,
|
|
"grad_norm": 0.4719994148189123,
|
|
"learning_rate": 2.0933333333333335e-05,
|
|
"loss": 0.4038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4157972037792206,
|
|
"step": 315,
|
|
"valid_targets_mean": 6161.2,
|
|
"valid_targets_min": 1726
|
|
},
|
|
{
|
|
"epoch": 0.37383177570093457,
|
|
"grad_norm": 0.4698933394273725,
|
|
"learning_rate": 2.1266666666666667e-05,
|
|
"loss": 0.3843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3597273826599121,
|
|
"step": 320,
|
|
"valid_targets_mean": 5515.3,
|
|
"valid_targets_min": 900
|
|
},
|
|
{
|
|
"epoch": 0.3796728971962617,
|
|
"grad_norm": 0.505339838648519,
|
|
"learning_rate": 2.1600000000000003e-05,
|
|
"loss": 0.3908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40655389428138733,
|
|
"step": 325,
|
|
"valid_targets_mean": 4660.8,
|
|
"valid_targets_min": 1645
|
|
},
|
|
{
|
|
"epoch": 0.3855140186915888,
|
|
"grad_norm": 0.4402560594360587,
|
|
"learning_rate": 2.1933333333333336e-05,
|
|
"loss": 0.4205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43622326850891113,
|
|
"step": 330,
|
|
"valid_targets_mean": 6193.7,
|
|
"valid_targets_min": 1687
|
|
},
|
|
{
|
|
"epoch": 0.39135514018691586,
|
|
"grad_norm": 0.49115603742292524,
|
|
"learning_rate": 2.2266666666666668e-05,
|
|
"loss": 0.4102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4196571409702301,
|
|
"step": 335,
|
|
"valid_targets_mean": 5755.1,
|
|
"valid_targets_min": 1942
|
|
},
|
|
{
|
|
"epoch": 0.397196261682243,
|
|
"grad_norm": 0.5157299427837051,
|
|
"learning_rate": 2.26e-05,
|
|
"loss": 0.4118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4203086793422699,
|
|
"step": 340,
|
|
"valid_targets_mean": 4767.8,
|
|
"valid_targets_min": 1427
|
|
},
|
|
{
|
|
"epoch": 0.4030373831775701,
|
|
"grad_norm": 0.42514761577110716,
|
|
"learning_rate": 2.2933333333333336e-05,
|
|
"loss": 0.4014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4157892167568207,
|
|
"step": 345,
|
|
"valid_targets_mean": 6157.1,
|
|
"valid_targets_min": 2026
|
|
},
|
|
{
|
|
"epoch": 0.4088785046728972,
|
|
"grad_norm": 0.5289165318453317,
|
|
"learning_rate": 2.326666666666667e-05,
|
|
"loss": 0.3926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36945974826812744,
|
|
"step": 350,
|
|
"valid_targets_mean": 5827.6,
|
|
"valid_targets_min": 3072
|
|
},
|
|
{
|
|
"epoch": 0.4147196261682243,
|
|
"grad_norm": 0.4575909795271013,
|
|
"learning_rate": 2.36e-05,
|
|
"loss": 0.3986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4150479733943939,
|
|
"step": 355,
|
|
"valid_targets_mean": 5435.7,
|
|
"valid_targets_min": 2495
|
|
},
|
|
{
|
|
"epoch": 0.4205607476635514,
|
|
"grad_norm": 0.4266880997469769,
|
|
"learning_rate": 2.3933333333333337e-05,
|
|
"loss": 0.3941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4025033414363861,
|
|
"step": 360,
|
|
"valid_targets_mean": 6013.2,
|
|
"valid_targets_min": 2231
|
|
},
|
|
{
|
|
"epoch": 0.4264018691588785,
|
|
"grad_norm": 0.437208341533262,
|
|
"learning_rate": 2.426666666666667e-05,
|
|
"loss": 0.4023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42723163962364197,
|
|
"step": 365,
|
|
"valid_targets_mean": 6272.8,
|
|
"valid_targets_min": 2822
|
|
},
|
|
{
|
|
"epoch": 0.4322429906542056,
|
|
"grad_norm": 0.615906701295091,
|
|
"learning_rate": 2.46e-05,
|
|
"loss": 0.3805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4113134443759918,
|
|
"step": 370,
|
|
"valid_targets_mean": 5354.6,
|
|
"valid_targets_min": 2634
|
|
},
|
|
{
|
|
"epoch": 0.4380841121495327,
|
|
"grad_norm": 0.6355106442387787,
|
|
"learning_rate": 2.4933333333333334e-05,
|
|
"loss": 0.3942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36405622959136963,
|
|
"step": 375,
|
|
"valid_targets_mean": 5292.0,
|
|
"valid_targets_min": 2376
|
|
},
|
|
{
|
|
"epoch": 0.4439252336448598,
|
|
"grad_norm": 0.47967728931668935,
|
|
"learning_rate": 2.526666666666667e-05,
|
|
"loss": 0.3714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38750597834587097,
|
|
"step": 380,
|
|
"valid_targets_mean": 6394.0,
|
|
"valid_targets_min": 3308
|
|
},
|
|
{
|
|
"epoch": 0.4497663551401869,
|
|
"grad_norm": 0.5204849052735185,
|
|
"learning_rate": 2.5600000000000002e-05,
|
|
"loss": 0.3698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37866243720054626,
|
|
"step": 385,
|
|
"valid_targets_mean": 4464.0,
|
|
"valid_targets_min": 2215
|
|
},
|
|
{
|
|
"epoch": 0.45560747663551404,
|
|
"grad_norm": 0.4615067572338228,
|
|
"learning_rate": 2.5933333333333335e-05,
|
|
"loss": 0.3813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3938007056713104,
|
|
"step": 390,
|
|
"valid_targets_mean": 6278.8,
|
|
"valid_targets_min": 2026
|
|
},
|
|
{
|
|
"epoch": 0.4614485981308411,
|
|
"grad_norm": 0.443358509210325,
|
|
"learning_rate": 2.6266666666666667e-05,
|
|
"loss": 0.3867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37548351287841797,
|
|
"step": 395,
|
|
"valid_targets_mean": 5385.5,
|
|
"valid_targets_min": 2129
|
|
},
|
|
{
|
|
"epoch": 0.4672897196261682,
|
|
"grad_norm": 0.4634412677908311,
|
|
"learning_rate": 2.6600000000000003e-05,
|
|
"loss": 0.3949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40691423416137695,
|
|
"step": 400,
|
|
"valid_targets_mean": 5990.2,
|
|
"valid_targets_min": 1602
|
|
},
|
|
{
|
|
"epoch": 0.47313084112149534,
|
|
"grad_norm": 0.4604163802676528,
|
|
"learning_rate": 2.6933333333333335e-05,
|
|
"loss": 0.4186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44039955735206604,
|
|
"step": 405,
|
|
"valid_targets_mean": 6043.8,
|
|
"valid_targets_min": 2774
|
|
},
|
|
{
|
|
"epoch": 0.47897196261682246,
|
|
"grad_norm": 0.42379357123667577,
|
|
"learning_rate": 2.7266666666666668e-05,
|
|
"loss": 0.3801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3857991695404053,
|
|
"step": 410,
|
|
"valid_targets_mean": 6434.7,
|
|
"valid_targets_min": 3038
|
|
},
|
|
{
|
|
"epoch": 0.4848130841121495,
|
|
"grad_norm": 0.5049279770906719,
|
|
"learning_rate": 2.76e-05,
|
|
"loss": 0.4123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42528796195983887,
|
|
"step": 415,
|
|
"valid_targets_mean": 5532.4,
|
|
"valid_targets_min": 1751
|
|
},
|
|
{
|
|
"epoch": 0.49065420560747663,
|
|
"grad_norm": 0.4241511477225977,
|
|
"learning_rate": 2.7933333333333336e-05,
|
|
"loss": 0.3709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3701103627681732,
|
|
"step": 420,
|
|
"valid_targets_mean": 5978.2,
|
|
"valid_targets_min": 1682
|
|
},
|
|
{
|
|
"epoch": 0.49649532710280375,
|
|
"grad_norm": 0.4703998965135955,
|
|
"learning_rate": 2.8266666666666668e-05,
|
|
"loss": 0.3875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3813694417476654,
|
|
"step": 425,
|
|
"valid_targets_mean": 5729.2,
|
|
"valid_targets_min": 1822
|
|
},
|
|
{
|
|
"epoch": 0.5023364485981309,
|
|
"grad_norm": 0.437735213431304,
|
|
"learning_rate": 2.86e-05,
|
|
"loss": 0.3751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34693992137908936,
|
|
"step": 430,
|
|
"valid_targets_mean": 5772.6,
|
|
"valid_targets_min": 3156
|
|
},
|
|
{
|
|
"epoch": 0.508177570093458,
|
|
"grad_norm": 0.4208949200311329,
|
|
"learning_rate": 2.8933333333333336e-05,
|
|
"loss": 0.3733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37803253531455994,
|
|
"step": 435,
|
|
"valid_targets_mean": 5941.2,
|
|
"valid_targets_min": 2209
|
|
},
|
|
{
|
|
"epoch": 0.514018691588785,
|
|
"grad_norm": 0.47067283379419833,
|
|
"learning_rate": 2.926666666666667e-05,
|
|
"loss": 0.3747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3605988323688507,
|
|
"step": 440,
|
|
"valid_targets_mean": 5724.1,
|
|
"valid_targets_min": 2677
|
|
},
|
|
{
|
|
"epoch": 0.5198598130841121,
|
|
"grad_norm": 0.4637669266760772,
|
|
"learning_rate": 2.96e-05,
|
|
"loss": 0.3788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3778402507305145,
|
|
"step": 445,
|
|
"valid_targets_mean": 5752.5,
|
|
"valid_targets_min": 1113
|
|
},
|
|
{
|
|
"epoch": 0.5257009345794392,
|
|
"grad_norm": 0.42663802111417554,
|
|
"learning_rate": 2.9933333333333334e-05,
|
|
"loss": 0.3953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.389392614364624,
|
|
"step": 450,
|
|
"valid_targets_mean": 6022.4,
|
|
"valid_targets_min": 2245
|
|
},
|
|
{
|
|
"epoch": 0.5315420560747663,
|
|
"grad_norm": 0.43579674442119926,
|
|
"learning_rate": 3.026666666666667e-05,
|
|
"loss": 0.3787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3771781027317047,
|
|
"step": 455,
|
|
"valid_targets_mean": 5570.6,
|
|
"valid_targets_min": 2296
|
|
},
|
|
{
|
|
"epoch": 0.5373831775700935,
|
|
"grad_norm": 0.44514265404775333,
|
|
"learning_rate": 3.0600000000000005e-05,
|
|
"loss": 0.3696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3682945966720581,
|
|
"step": 460,
|
|
"valid_targets_mean": 5276.5,
|
|
"valid_targets_min": 1426
|
|
},
|
|
{
|
|
"epoch": 0.5432242990654206,
|
|
"grad_norm": 0.5171730514004608,
|
|
"learning_rate": 3.093333333333334e-05,
|
|
"loss": 0.3822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3828747272491455,
|
|
"step": 465,
|
|
"valid_targets_mean": 5954.2,
|
|
"valid_targets_min": 1325
|
|
},
|
|
{
|
|
"epoch": 0.5490654205607477,
|
|
"grad_norm": 0.45098125232504066,
|
|
"learning_rate": 3.126666666666667e-05,
|
|
"loss": 0.3948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4190347194671631,
|
|
"step": 470,
|
|
"valid_targets_mean": 5736.4,
|
|
"valid_targets_min": 1530
|
|
},
|
|
{
|
|
"epoch": 0.5549065420560748,
|
|
"grad_norm": 0.4126624408249842,
|
|
"learning_rate": 3.16e-05,
|
|
"loss": 0.393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3972722291946411,
|
|
"step": 475,
|
|
"valid_targets_mean": 7033.6,
|
|
"valid_targets_min": 2186
|
|
},
|
|
{
|
|
"epoch": 0.5607476635514018,
|
|
"grad_norm": 0.44192737598451554,
|
|
"learning_rate": 3.1933333333333335e-05,
|
|
"loss": 0.3878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4062243402004242,
|
|
"step": 480,
|
|
"valid_targets_mean": 6263.4,
|
|
"valid_targets_min": 1367
|
|
},
|
|
{
|
|
"epoch": 0.5665887850467289,
|
|
"grad_norm": 0.5061341526926898,
|
|
"learning_rate": 3.226666666666667e-05,
|
|
"loss": 0.3997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39719972014427185,
|
|
"step": 485,
|
|
"valid_targets_mean": 5056.1,
|
|
"valid_targets_min": 2095
|
|
},
|
|
{
|
|
"epoch": 0.572429906542056,
|
|
"grad_norm": 0.4675815864486061,
|
|
"learning_rate": 3.26e-05,
|
|
"loss": 0.3723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.406937837600708,
|
|
"step": 490,
|
|
"valid_targets_mean": 5896.6,
|
|
"valid_targets_min": 1814
|
|
},
|
|
{
|
|
"epoch": 0.5782710280373832,
|
|
"grad_norm": 0.531334167454602,
|
|
"learning_rate": 3.293333333333334e-05,
|
|
"loss": 0.3799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37766194343566895,
|
|
"step": 495,
|
|
"valid_targets_mean": 4633.9,
|
|
"valid_targets_min": 2506
|
|
},
|
|
{
|
|
"epoch": 0.5841121495327103,
|
|
"grad_norm": 0.4727253995272934,
|
|
"learning_rate": 3.326666666666667e-05,
|
|
"loss": 0.3839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3596220314502716,
|
|
"step": 500,
|
|
"valid_targets_mean": 5422.0,
|
|
"valid_targets_min": 1870
|
|
},
|
|
{
|
|
"epoch": 0.5899532710280374,
|
|
"grad_norm": 0.44957758073508897,
|
|
"learning_rate": 3.3600000000000004e-05,
|
|
"loss": 0.3863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36213621497154236,
|
|
"step": 505,
|
|
"valid_targets_mean": 5899.2,
|
|
"valid_targets_min": 2624
|
|
},
|
|
{
|
|
"epoch": 0.5957943925233645,
|
|
"grad_norm": 0.496723848524176,
|
|
"learning_rate": 3.3933333333333336e-05,
|
|
"loss": 0.3595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3795908987522125,
|
|
"step": 510,
|
|
"valid_targets_mean": 5120.7,
|
|
"valid_targets_min": 2442
|
|
},
|
|
{
|
|
"epoch": 0.6016355140186916,
|
|
"grad_norm": 0.5674767067292562,
|
|
"learning_rate": 3.426666666666667e-05,
|
|
"loss": 0.3602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3741105794906616,
|
|
"step": 515,
|
|
"valid_targets_mean": 5864.2,
|
|
"valid_targets_min": 2715
|
|
},
|
|
{
|
|
"epoch": 0.6074766355140186,
|
|
"grad_norm": 0.4346552418714086,
|
|
"learning_rate": 3.46e-05,
|
|
"loss": 0.3769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3566761314868927,
|
|
"step": 520,
|
|
"valid_targets_mean": 6245.4,
|
|
"valid_targets_min": 1686
|
|
},
|
|
{
|
|
"epoch": 0.6133177570093458,
|
|
"grad_norm": 0.4935855816788154,
|
|
"learning_rate": 3.493333333333333e-05,
|
|
"loss": 0.3637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37215232849121094,
|
|
"step": 525,
|
|
"valid_targets_mean": 5310.7,
|
|
"valid_targets_min": 1878
|
|
},
|
|
{
|
|
"epoch": 0.6191588785046729,
|
|
"grad_norm": 0.45682650223297044,
|
|
"learning_rate": 3.526666666666667e-05,
|
|
"loss": 0.3714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3885539472103119,
|
|
"step": 530,
|
|
"valid_targets_mean": 5806.2,
|
|
"valid_targets_min": 2943
|
|
},
|
|
{
|
|
"epoch": 0.625,
|
|
"grad_norm": 0.435643239198327,
|
|
"learning_rate": 3.5600000000000005e-05,
|
|
"loss": 0.376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38258394598960876,
|
|
"step": 535,
|
|
"valid_targets_mean": 6055.8,
|
|
"valid_targets_min": 2305
|
|
},
|
|
{
|
|
"epoch": 0.6308411214953271,
|
|
"grad_norm": 0.4901904130627175,
|
|
"learning_rate": 3.593333333333334e-05,
|
|
"loss": 0.3743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3959120810031891,
|
|
"step": 540,
|
|
"valid_targets_mean": 4983.1,
|
|
"valid_targets_min": 1856
|
|
},
|
|
{
|
|
"epoch": 0.6366822429906542,
|
|
"grad_norm": 0.4724730271917941,
|
|
"learning_rate": 3.626666666666667e-05,
|
|
"loss": 0.3676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35149022936820984,
|
|
"step": 545,
|
|
"valid_targets_mean": 5612.1,
|
|
"valid_targets_min": 2446
|
|
},
|
|
{
|
|
"epoch": 0.6425233644859814,
|
|
"grad_norm": 0.44606059252473446,
|
|
"learning_rate": 3.66e-05,
|
|
"loss": 0.3647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37455543875694275,
|
|
"step": 550,
|
|
"valid_targets_mean": 5307.3,
|
|
"valid_targets_min": 1413
|
|
},
|
|
{
|
|
"epoch": 0.6483644859813084,
|
|
"grad_norm": 0.4313134269713389,
|
|
"learning_rate": 3.6933333333333334e-05,
|
|
"loss": 0.3849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.395962119102478,
|
|
"step": 555,
|
|
"valid_targets_mean": 6806.0,
|
|
"valid_targets_min": 1923
|
|
},
|
|
{
|
|
"epoch": 0.6542056074766355,
|
|
"grad_norm": 0.4390269484725133,
|
|
"learning_rate": 3.726666666666667e-05,
|
|
"loss": 0.3821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37423574924468994,
|
|
"step": 560,
|
|
"valid_targets_mean": 6060.1,
|
|
"valid_targets_min": 2600
|
|
},
|
|
{
|
|
"epoch": 0.6600467289719626,
|
|
"grad_norm": 0.4382071107103676,
|
|
"learning_rate": 3.76e-05,
|
|
"loss": 0.3729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35634568333625793,
|
|
"step": 565,
|
|
"valid_targets_mean": 5332.8,
|
|
"valid_targets_min": 1813
|
|
},
|
|
{
|
|
"epoch": 0.6658878504672897,
|
|
"grad_norm": 0.47470986751378286,
|
|
"learning_rate": 3.793333333333334e-05,
|
|
"loss": 0.3694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3669619858264923,
|
|
"step": 570,
|
|
"valid_targets_mean": 5245.0,
|
|
"valid_targets_min": 2192
|
|
},
|
|
{
|
|
"epoch": 0.6717289719626168,
|
|
"grad_norm": 0.46747702146405734,
|
|
"learning_rate": 3.826666666666667e-05,
|
|
"loss": 0.3683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34138020873069763,
|
|
"step": 575,
|
|
"valid_targets_mean": 5011.5,
|
|
"valid_targets_min": 2415
|
|
},
|
|
{
|
|
"epoch": 0.677570093457944,
|
|
"grad_norm": 0.4854178105463103,
|
|
"learning_rate": 3.86e-05,
|
|
"loss": 0.3704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3592003285884857,
|
|
"step": 580,
|
|
"valid_targets_mean": 4981.9,
|
|
"valid_targets_min": 2037
|
|
},
|
|
{
|
|
"epoch": 0.6834112149532711,
|
|
"grad_norm": 0.48241009858113726,
|
|
"learning_rate": 3.8933333333333336e-05,
|
|
"loss": 0.3582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3381718695163727,
|
|
"step": 585,
|
|
"valid_targets_mean": 4464.1,
|
|
"valid_targets_min": 2466
|
|
},
|
|
{
|
|
"epoch": 0.6892523364485982,
|
|
"grad_norm": 0.4986120264287615,
|
|
"learning_rate": 3.926666666666667e-05,
|
|
"loss": 0.3639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3984701335430145,
|
|
"step": 590,
|
|
"valid_targets_mean": 5060.2,
|
|
"valid_targets_min": 2386
|
|
},
|
|
{
|
|
"epoch": 0.6950934579439252,
|
|
"grad_norm": 0.48547367803552127,
|
|
"learning_rate": 3.96e-05,
|
|
"loss": 0.3539,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3148202896118164,
|
|
"step": 595,
|
|
"valid_targets_mean": 5347.3,
|
|
"valid_targets_min": 1170
|
|
},
|
|
{
|
|
"epoch": 0.7009345794392523,
|
|
"grad_norm": 0.46807773263512037,
|
|
"learning_rate": 3.993333333333333e-05,
|
|
"loss": 0.3754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3767105042934418,
|
|
"step": 600,
|
|
"valid_targets_mean": 5245.2,
|
|
"valid_targets_min": 1823
|
|
},
|
|
{
|
|
"epoch": 0.7067757009345794,
|
|
"grad_norm": 0.5021912728899418,
|
|
"learning_rate": 3.9999945685002435e-05,
|
|
"loss": 0.3605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3660831153392792,
|
|
"step": 605,
|
|
"valid_targets_mean": 6047.2,
|
|
"valid_targets_min": 1314
|
|
},
|
|
{
|
|
"epoch": 0.7126168224299065,
|
|
"grad_norm": 0.5692924271522257,
|
|
"learning_rate": 3.999972503083043e-05,
|
|
"loss": 0.3734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3663847744464874,
|
|
"step": 610,
|
|
"valid_targets_mean": 5881.4,
|
|
"valid_targets_min": 2028
|
|
},
|
|
{
|
|
"epoch": 0.7184579439252337,
|
|
"grad_norm": 0.4689394287634872,
|
|
"learning_rate": 3.999933464466783e-05,
|
|
"loss": 0.372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3595750033855438,
|
|
"step": 615,
|
|
"valid_targets_mean": 5473.2,
|
|
"valid_targets_min": 3125
|
|
},
|
|
{
|
|
"epoch": 0.7242990654205608,
|
|
"grad_norm": 0.43245767449347183,
|
|
"learning_rate": 3.999877452982773e-05,
|
|
"loss": 0.3575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38104042410850525,
|
|
"step": 620,
|
|
"valid_targets_mean": 5845.6,
|
|
"valid_targets_min": 2574
|
|
},
|
|
{
|
|
"epoch": 0.7301401869158879,
|
|
"grad_norm": 0.4180479639375539,
|
|
"learning_rate": 3.999804469106367e-05,
|
|
"loss": 0.3568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3673703372478485,
|
|
"step": 625,
|
|
"valid_targets_mean": 5684.4,
|
|
"valid_targets_min": 2464
|
|
},
|
|
{
|
|
"epoch": 0.735981308411215,
|
|
"grad_norm": 0.4977280659571296,
|
|
"learning_rate": 3.9997145134569575e-05,
|
|
"loss": 0.3778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3884657323360443,
|
|
"step": 630,
|
|
"valid_targets_mean": 4676.9,
|
|
"valid_targets_min": 2409
|
|
},
|
|
{
|
|
"epoch": 0.741822429906542,
|
|
"grad_norm": 0.49887945106160153,
|
|
"learning_rate": 3.999607586797973e-05,
|
|
"loss": 0.3699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3521784842014313,
|
|
"step": 635,
|
|
"valid_targets_mean": 5383.1,
|
|
"valid_targets_min": 2041
|
|
},
|
|
{
|
|
"epoch": 0.7476635514018691,
|
|
"grad_norm": 0.4362849890126108,
|
|
"learning_rate": 3.9994836900368705e-05,
|
|
"loss": 0.3721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3162080943584442,
|
|
"step": 640,
|
|
"valid_targets_mean": 5514.6,
|
|
"valid_targets_min": 1736
|
|
},
|
|
{
|
|
"epoch": 0.7535046728971962,
|
|
"grad_norm": 0.42846974450432385,
|
|
"learning_rate": 3.999342824225125e-05,
|
|
"loss": 0.3582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3437802791595459,
|
|
"step": 645,
|
|
"valid_targets_mean": 5174.5,
|
|
"valid_targets_min": 1903
|
|
},
|
|
{
|
|
"epoch": 0.7593457943925234,
|
|
"grad_norm": 0.4702110814055239,
|
|
"learning_rate": 3.999184990558226e-05,
|
|
"loss": 0.3681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3680754601955414,
|
|
"step": 650,
|
|
"valid_targets_mean": 5935.9,
|
|
"valid_targets_min": 2637
|
|
},
|
|
{
|
|
"epoch": 0.7651869158878505,
|
|
"grad_norm": 0.4521594661962572,
|
|
"learning_rate": 3.9990101903756634e-05,
|
|
"loss": 0.3614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4023589789867401,
|
|
"step": 655,
|
|
"valid_targets_mean": 6019.6,
|
|
"valid_targets_min": 2158
|
|
},
|
|
{
|
|
"epoch": 0.7710280373831776,
|
|
"grad_norm": 0.4319123075241656,
|
|
"learning_rate": 3.998818425160916e-05,
|
|
"loss": 0.37,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3458693325519562,
|
|
"step": 660,
|
|
"valid_targets_mean": 5474.2,
|
|
"valid_targets_min": 2199
|
|
},
|
|
{
|
|
"epoch": 0.7768691588785047,
|
|
"grad_norm": 0.4570743088084169,
|
|
"learning_rate": 3.99860969654144e-05,
|
|
"loss": 0.352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34891095757484436,
|
|
"step": 665,
|
|
"valid_targets_mean": 5822.9,
|
|
"valid_targets_min": 1465
|
|
},
|
|
{
|
|
"epoch": 0.7827102803738317,
|
|
"grad_norm": 0.41615925992202135,
|
|
"learning_rate": 3.9983840062886576e-05,
|
|
"loss": 0.3513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3347201347351074,
|
|
"step": 670,
|
|
"valid_targets_mean": 5870.9,
|
|
"valid_targets_min": 1625
|
|
},
|
|
{
|
|
"epoch": 0.7885514018691588,
|
|
"grad_norm": 0.41851022278633343,
|
|
"learning_rate": 3.9981413563179364e-05,
|
|
"loss": 0.3581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35278037190437317,
|
|
"step": 675,
|
|
"valid_targets_mean": 7073.1,
|
|
"valid_targets_min": 3174
|
|
},
|
|
{
|
|
"epoch": 0.794392523364486,
|
|
"grad_norm": 0.4238054218494749,
|
|
"learning_rate": 3.9978817486885784e-05,
|
|
"loss": 0.3536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3774296045303345,
|
|
"step": 680,
|
|
"valid_targets_mean": 6483.1,
|
|
"valid_targets_min": 1875
|
|
},
|
|
{
|
|
"epoch": 0.8002336448598131,
|
|
"grad_norm": 0.44252675687651283,
|
|
"learning_rate": 3.9976051856038e-05,
|
|
"loss": 0.3637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.360280841588974,
|
|
"step": 685,
|
|
"valid_targets_mean": 6003.1,
|
|
"valid_targets_min": 3349
|
|
},
|
|
{
|
|
"epoch": 0.8060747663551402,
|
|
"grad_norm": 0.41712681748312275,
|
|
"learning_rate": 3.997311669410715e-05,
|
|
"loss": 0.3403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38688576221466064,
|
|
"step": 690,
|
|
"valid_targets_mean": 6179.1,
|
|
"valid_targets_min": 1972
|
|
},
|
|
{
|
|
"epoch": 0.8119158878504673,
|
|
"grad_norm": 0.4514384757877581,
|
|
"learning_rate": 3.997001202600309e-05,
|
|
"loss": 0.3682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3424813747406006,
|
|
"step": 695,
|
|
"valid_targets_mean": 5091.2,
|
|
"valid_targets_min": 1710
|
|
},
|
|
{
|
|
"epoch": 0.8177570093457944,
|
|
"grad_norm": 0.44609562496934063,
|
|
"learning_rate": 3.996673787807428e-05,
|
|
"loss": 0.3617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38202551007270813,
|
|
"step": 700,
|
|
"valid_targets_mean": 6012.3,
|
|
"valid_targets_min": 2941
|
|
},
|
|
{
|
|
"epoch": 0.8235981308411215,
|
|
"grad_norm": 0.44219751318797484,
|
|
"learning_rate": 3.9963294278107474e-05,
|
|
"loss": 0.335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3476925790309906,
|
|
"step": 705,
|
|
"valid_targets_mean": 5624.2,
|
|
"valid_targets_min": 1850
|
|
},
|
|
{
|
|
"epoch": 0.8294392523364486,
|
|
"grad_norm": 0.47262514198489997,
|
|
"learning_rate": 3.995968125532753e-05,
|
|
"loss": 0.3638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35253891348838806,
|
|
"step": 710,
|
|
"valid_targets_mean": 5460.7,
|
|
"valid_targets_min": 2146
|
|
},
|
|
{
|
|
"epoch": 0.8352803738317757,
|
|
"grad_norm": 0.462847079058649,
|
|
"learning_rate": 3.995589884039715e-05,
|
|
"loss": 0.3777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38792383670806885,
|
|
"step": 715,
|
|
"valid_targets_mean": 5280.8,
|
|
"valid_targets_min": 2326
|
|
},
|
|
{
|
|
"epoch": 0.8411214953271028,
|
|
"grad_norm": 0.473411569616632,
|
|
"learning_rate": 3.995194706541662e-05,
|
|
"loss": 0.3395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35165858268737793,
|
|
"step": 720,
|
|
"valid_targets_mean": 4774.9,
|
|
"valid_targets_min": 2153
|
|
},
|
|
{
|
|
"epoch": 0.8469626168224299,
|
|
"grad_norm": 0.4459796393513576,
|
|
"learning_rate": 3.994782596392353e-05,
|
|
"loss": 0.3581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34982171654701233,
|
|
"step": 725,
|
|
"valid_targets_mean": 5147.5,
|
|
"valid_targets_min": 1666
|
|
},
|
|
{
|
|
"epoch": 0.852803738317757,
|
|
"grad_norm": 0.4867104061360226,
|
|
"learning_rate": 3.9943535570892485e-05,
|
|
"loss": 0.3622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41728559136390686,
|
|
"step": 730,
|
|
"valid_targets_mean": 5970.3,
|
|
"valid_targets_min": 2645
|
|
},
|
|
{
|
|
"epoch": 0.8586448598130841,
|
|
"grad_norm": 0.43328003362899264,
|
|
"learning_rate": 3.993907592273485e-05,
|
|
"loss": 0.3672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3488587439060211,
|
|
"step": 735,
|
|
"valid_targets_mean": 5464.7,
|
|
"valid_targets_min": 2464
|
|
},
|
|
{
|
|
"epoch": 0.8644859813084113,
|
|
"grad_norm": 0.4223430523338479,
|
|
"learning_rate": 3.993444705729839e-05,
|
|
"loss": 0.3807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37746739387512207,
|
|
"step": 740,
|
|
"valid_targets_mean": 6541.0,
|
|
"valid_targets_min": 2124
|
|
},
|
|
{
|
|
"epoch": 0.8703271028037384,
|
|
"grad_norm": 0.45007232041066547,
|
|
"learning_rate": 3.992964901386696e-05,
|
|
"loss": 0.3491,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34460124373435974,
|
|
"step": 745,
|
|
"valid_targets_mean": 5169.9,
|
|
"valid_targets_min": 1915
|
|
},
|
|
{
|
|
"epoch": 0.8761682242990654,
|
|
"grad_norm": 0.4331622138939006,
|
|
"learning_rate": 3.992468183316021e-05,
|
|
"loss": 0.3827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38862431049346924,
|
|
"step": 750,
|
|
"valid_targets_mean": 5707.2,
|
|
"valid_targets_min": 2531
|
|
},
|
|
{
|
|
"epoch": 0.8820093457943925,
|
|
"grad_norm": 0.45526668365572714,
|
|
"learning_rate": 3.9919545557333184e-05,
|
|
"loss": 0.3528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37489041686058044,
|
|
"step": 755,
|
|
"valid_targets_mean": 5518.5,
|
|
"valid_targets_min": 2243
|
|
},
|
|
{
|
|
"epoch": 0.8878504672897196,
|
|
"grad_norm": 0.4885254875068743,
|
|
"learning_rate": 3.9914240229976e-05,
|
|
"loss": 0.3477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3212669789791107,
|
|
"step": 760,
|
|
"valid_targets_mean": 5645.2,
|
|
"valid_targets_min": 2253
|
|
},
|
|
{
|
|
"epoch": 0.8936915887850467,
|
|
"grad_norm": 0.46606428766077684,
|
|
"learning_rate": 3.990876589611347e-05,
|
|
"loss": 0.369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3618215322494507,
|
|
"step": 765,
|
|
"valid_targets_mean": 5931.8,
|
|
"valid_targets_min": 2240
|
|
},
|
|
{
|
|
"epoch": 0.8995327102803738,
|
|
"grad_norm": 0.4422457408871216,
|
|
"learning_rate": 3.9903122602204695e-05,
|
|
"loss": 0.3614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3736278712749481,
|
|
"step": 770,
|
|
"valid_targets_mean": 5395.3,
|
|
"valid_targets_min": 1905
|
|
},
|
|
{
|
|
"epoch": 0.905373831775701,
|
|
"grad_norm": 0.43955551702533535,
|
|
"learning_rate": 3.989731039614273e-05,
|
|
"loss": 0.3597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32695841789245605,
|
|
"step": 775,
|
|
"valid_targets_mean": 5155.9,
|
|
"valid_targets_min": 2026
|
|
},
|
|
{
|
|
"epoch": 0.9112149532710281,
|
|
"grad_norm": 0.5033650659639364,
|
|
"learning_rate": 3.989132932725411e-05,
|
|
"loss": 0.3648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38795962929725647,
|
|
"step": 780,
|
|
"valid_targets_mean": 5036.4,
|
|
"valid_targets_min": 1689
|
|
},
|
|
{
|
|
"epoch": 0.9170560747663551,
|
|
"grad_norm": 0.5157773161189055,
|
|
"learning_rate": 3.9885179446298466e-05,
|
|
"loss": 0.3627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36367368698120117,
|
|
"step": 785,
|
|
"valid_targets_mean": 5329.5,
|
|
"valid_targets_min": 1655
|
|
},
|
|
{
|
|
"epoch": 0.9228971962616822,
|
|
"grad_norm": 0.4178824154238193,
|
|
"learning_rate": 3.987886080546809e-05,
|
|
"loss": 0.356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3362779915332794,
|
|
"step": 790,
|
|
"valid_targets_mean": 6408.0,
|
|
"valid_targets_min": 2178
|
|
},
|
|
{
|
|
"epoch": 0.9287383177570093,
|
|
"grad_norm": 0.39134628835581486,
|
|
"learning_rate": 3.9872373458387506e-05,
|
|
"loss": 0.3597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3205227553844452,
|
|
"step": 795,
|
|
"valid_targets_mean": 6081.7,
|
|
"valid_targets_min": 2291
|
|
},
|
|
{
|
|
"epoch": 0.9345794392523364,
|
|
"grad_norm": 0.3662841426437191,
|
|
"learning_rate": 3.986571746011297e-05,
|
|
"loss": 0.364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35747018456459045,
|
|
"step": 800,
|
|
"valid_targets_mean": 6639.8,
|
|
"valid_targets_min": 2786
|
|
},
|
|
{
|
|
"epoch": 0.9404205607476636,
|
|
"grad_norm": 0.46226199278873664,
|
|
"learning_rate": 3.985889286713207e-05,
|
|
"loss": 0.3669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34330034255981445,
|
|
"step": 805,
|
|
"valid_targets_mean": 5161.7,
|
|
"valid_targets_min": 2213
|
|
},
|
|
{
|
|
"epoch": 0.9462616822429907,
|
|
"grad_norm": 0.47573925074759443,
|
|
"learning_rate": 3.9851899737363174e-05,
|
|
"loss": 0.366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37278613448143005,
|
|
"step": 810,
|
|
"valid_targets_mean": 4783.4,
|
|
"valid_targets_min": 1839
|
|
},
|
|
{
|
|
"epoch": 0.9521028037383178,
|
|
"grad_norm": 0.4280929966405482,
|
|
"learning_rate": 3.984473813015499e-05,
|
|
"loss": 0.3446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3326973021030426,
|
|
"step": 815,
|
|
"valid_targets_mean": 5807.5,
|
|
"valid_targets_min": 1637
|
|
},
|
|
{
|
|
"epoch": 0.9579439252336449,
|
|
"grad_norm": 0.45567980773375083,
|
|
"learning_rate": 3.983740810628606e-05,
|
|
"loss": 0.3523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37095797061920166,
|
|
"step": 820,
|
|
"valid_targets_mean": 5596.7,
|
|
"valid_targets_min": 2605
|
|
},
|
|
{
|
|
"epoch": 0.9637850467289719,
|
|
"grad_norm": 0.4347558891576217,
|
|
"learning_rate": 3.982990972796421e-05,
|
|
"loss": 0.3459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3300916254520416,
|
|
"step": 825,
|
|
"valid_targets_mean": 4881.4,
|
|
"valid_targets_min": 1409
|
|
},
|
|
{
|
|
"epoch": 0.969626168224299,
|
|
"grad_norm": 0.4802422108098755,
|
|
"learning_rate": 3.9822243058826056e-05,
|
|
"loss": 0.3584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3925137519836426,
|
|
"step": 830,
|
|
"valid_targets_mean": 5174.1,
|
|
"valid_targets_min": 1782
|
|
},
|
|
{
|
|
"epoch": 0.9754672897196262,
|
|
"grad_norm": 0.3999996288542558,
|
|
"learning_rate": 3.981440816393642e-05,
|
|
"loss": 0.3502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3279096186161041,
|
|
"step": 835,
|
|
"valid_targets_mean": 6507.6,
|
|
"valid_targets_min": 2060
|
|
},
|
|
{
|
|
"epoch": 0.9813084112149533,
|
|
"grad_norm": 0.41682740870688123,
|
|
"learning_rate": 3.980640510978785e-05,
|
|
"loss": 0.3597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3432118892669678,
|
|
"step": 840,
|
|
"valid_targets_mean": 6237.1,
|
|
"valid_targets_min": 2239
|
|
},
|
|
{
|
|
"epoch": 0.9871495327102804,
|
|
"grad_norm": 0.41768051728250977,
|
|
"learning_rate": 3.979823396429999e-05,
|
|
"loss": 0.3555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3321657180786133,
|
|
"step": 845,
|
|
"valid_targets_mean": 5371.2,
|
|
"valid_targets_min": 1799
|
|
},
|
|
{
|
|
"epoch": 0.9929906542056075,
|
|
"grad_norm": 0.4183373153195408,
|
|
"learning_rate": 3.978989479681903e-05,
|
|
"loss": 0.3614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3531838357448578,
|
|
"step": 850,
|
|
"valid_targets_mean": 6124.1,
|
|
"valid_targets_min": 2672
|
|
},
|
|
{
|
|
"epoch": 0.9988317757009346,
|
|
"grad_norm": 0.3944407617659571,
|
|
"learning_rate": 3.978138767811713e-05,
|
|
"loss": 0.3704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3871220350265503,
|
|
"step": 855,
|
|
"valid_targets_mean": 6825.3,
|
|
"valid_targets_min": 2361
|
|
},
|
|
{
|
|
"epoch": 1.0046728971962617,
|
|
"grad_norm": 0.43318721800034454,
|
|
"learning_rate": 3.977271268039178e-05,
|
|
"loss": 0.3339,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3945259153842926,
|
|
"step": 860,
|
|
"valid_targets_mean": 6038.5,
|
|
"valid_targets_min": 2148
|
|
},
|
|
{
|
|
"epoch": 1.0105140186915889,
|
|
"grad_norm": 0.45179738877335657,
|
|
"learning_rate": 3.976386987726523e-05,
|
|
"loss": 0.3513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3208051025867462,
|
|
"step": 865,
|
|
"valid_targets_mean": 5412.8,
|
|
"valid_targets_min": 2552
|
|
},
|
|
{
|
|
"epoch": 1.016355140186916,
|
|
"grad_norm": 0.4427817555560465,
|
|
"learning_rate": 3.9754859343783835e-05,
|
|
"loss": 0.3506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3336370885372162,
|
|
"step": 870,
|
|
"valid_targets_mean": 5629.0,
|
|
"valid_targets_min": 1811
|
|
},
|
|
{
|
|
"epoch": 1.022196261682243,
|
|
"grad_norm": 0.4166265596060764,
|
|
"learning_rate": 3.9745681156417444e-05,
|
|
"loss": 0.3554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3580349385738373,
|
|
"step": 875,
|
|
"valid_targets_mean": 5894.8,
|
|
"valid_targets_min": 1826
|
|
},
|
|
{
|
|
"epoch": 1.02803738317757,
|
|
"grad_norm": 0.4259089792306084,
|
|
"learning_rate": 3.973633539305872e-05,
|
|
"loss": 0.3619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3879115879535675,
|
|
"step": 880,
|
|
"valid_targets_mean": 5701.1,
|
|
"valid_targets_min": 2599
|
|
},
|
|
{
|
|
"epoch": 1.033878504672897,
|
|
"grad_norm": 0.4121017364286342,
|
|
"learning_rate": 3.9726822133022514e-05,
|
|
"loss": 0.3542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3073241412639618,
|
|
"step": 885,
|
|
"valid_targets_mean": 5880.9,
|
|
"valid_targets_min": 1820
|
|
},
|
|
{
|
|
"epoch": 1.0397196261682242,
|
|
"grad_norm": 1.1184637745895272,
|
|
"learning_rate": 3.9717141457045146e-05,
|
|
"loss": 0.3283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3247789740562439,
|
|
"step": 890,
|
|
"valid_targets_mean": 4878.8,
|
|
"valid_targets_min": 2310
|
|
},
|
|
{
|
|
"epoch": 1.0455607476635513,
|
|
"grad_norm": 0.4283376469069668,
|
|
"learning_rate": 3.970729344728379e-05,
|
|
"loss": 0.357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36405470967292786,
|
|
"step": 895,
|
|
"valid_targets_mean": 5853.6,
|
|
"valid_targets_min": 1894
|
|
},
|
|
{
|
|
"epoch": 1.0514018691588785,
|
|
"grad_norm": 0.4154073838163685,
|
|
"learning_rate": 3.969727818731569e-05,
|
|
"loss": 0.3402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32197728753089905,
|
|
"step": 900,
|
|
"valid_targets_mean": 5883.3,
|
|
"valid_targets_min": 2205
|
|
},
|
|
{
|
|
"epoch": 1.0572429906542056,
|
|
"grad_norm": 0.4543114379650681,
|
|
"learning_rate": 3.9687095762137526e-05,
|
|
"loss": 0.3557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3784042298793793,
|
|
"step": 905,
|
|
"valid_targets_mean": 6530.2,
|
|
"valid_targets_min": 1050
|
|
},
|
|
{
|
|
"epoch": 1.0630841121495327,
|
|
"grad_norm": 0.40752545189584716,
|
|
"learning_rate": 3.967674625816465e-05,
|
|
"loss": 0.3462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.331394225358963,
|
|
"step": 910,
|
|
"valid_targets_mean": 5818.6,
|
|
"valid_targets_min": 2573
|
|
},
|
|
{
|
|
"epoch": 1.0689252336448598,
|
|
"grad_norm": 0.4173414213491559,
|
|
"learning_rate": 3.966622976323036e-05,
|
|
"loss": 0.3541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3369159996509552,
|
|
"step": 915,
|
|
"valid_targets_mean": 6116.6,
|
|
"valid_targets_min": 2180
|
|
},
|
|
{
|
|
"epoch": 1.074766355140187,
|
|
"grad_norm": 0.39240171778118543,
|
|
"learning_rate": 3.9655546366585166e-05,
|
|
"loss": 0.3518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32544830441474915,
|
|
"step": 920,
|
|
"valid_targets_mean": 6105.3,
|
|
"valid_targets_min": 2609
|
|
},
|
|
{
|
|
"epoch": 1.080607476635514,
|
|
"grad_norm": 0.417915530289771,
|
|
"learning_rate": 3.964469615889603e-05,
|
|
"loss": 0.3521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3416964113712311,
|
|
"step": 925,
|
|
"valid_targets_mean": 5165.5,
|
|
"valid_targets_min": 1648
|
|
},
|
|
{
|
|
"epoch": 1.0864485981308412,
|
|
"grad_norm": 0.46601591071506104,
|
|
"learning_rate": 3.9633679232245574e-05,
|
|
"loss": 0.3534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4093070328235626,
|
|
"step": 930,
|
|
"valid_targets_mean": 5373.2,
|
|
"valid_targets_min": 1774
|
|
},
|
|
{
|
|
"epoch": 1.0922897196261683,
|
|
"grad_norm": 0.39303614247452445,
|
|
"learning_rate": 3.9622495680131334e-05,
|
|
"loss": 0.3368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31560638546943665,
|
|
"step": 935,
|
|
"valid_targets_mean": 6157.5,
|
|
"valid_targets_min": 2552
|
|
},
|
|
{
|
|
"epoch": 1.0981308411214954,
|
|
"grad_norm": 0.4135096863747279,
|
|
"learning_rate": 3.961114559746495e-05,
|
|
"loss": 0.3632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35395488142967224,
|
|
"step": 940,
|
|
"valid_targets_mean": 5978.0,
|
|
"valid_targets_min": 2689
|
|
},
|
|
{
|
|
"epoch": 1.1039719626168225,
|
|
"grad_norm": 0.4539763921457172,
|
|
"learning_rate": 3.959962908057134e-05,
|
|
"loss": 0.3325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34238043427467346,
|
|
"step": 945,
|
|
"valid_targets_mean": 5751.5,
|
|
"valid_targets_min": 1896
|
|
},
|
|
{
|
|
"epoch": 1.1098130841121496,
|
|
"grad_norm": 0.3986047070154931,
|
|
"learning_rate": 3.958794622718792e-05,
|
|
"loss": 0.3522,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.357377290725708,
|
|
"step": 950,
|
|
"valid_targets_mean": 6700.1,
|
|
"valid_targets_min": 1248
|
|
},
|
|
{
|
|
"epoch": 1.1156542056074765,
|
|
"grad_norm": 0.43530563925092924,
|
|
"learning_rate": 3.957609713646375e-05,
|
|
"loss": 0.3508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29740583896636963,
|
|
"step": 955,
|
|
"valid_targets_mean": 4419.7,
|
|
"valid_targets_min": 1662
|
|
},
|
|
{
|
|
"epoch": 1.1214953271028036,
|
|
"grad_norm": 0.4062699969228372,
|
|
"learning_rate": 3.95640819089587e-05,
|
|
"loss": 0.3376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34295594692230225,
|
|
"step": 960,
|
|
"valid_targets_mean": 5946.9,
|
|
"valid_targets_min": 1600
|
|
},
|
|
{
|
|
"epoch": 1.1273364485981308,
|
|
"grad_norm": 0.3881592276156987,
|
|
"learning_rate": 3.95519006466426e-05,
|
|
"loss": 0.3471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3159753978252411,
|
|
"step": 965,
|
|
"valid_targets_mean": 5441.6,
|
|
"valid_targets_min": 1730
|
|
},
|
|
{
|
|
"epoch": 1.1331775700934579,
|
|
"grad_norm": 0.4036290025349727,
|
|
"learning_rate": 3.953955345289436e-05,
|
|
"loss": 0.3505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3714885413646698,
|
|
"step": 970,
|
|
"valid_targets_mean": 6210.5,
|
|
"valid_targets_min": 2193
|
|
},
|
|
{
|
|
"epoch": 1.139018691588785,
|
|
"grad_norm": 0.5451837505683038,
|
|
"learning_rate": 3.952704043250112e-05,
|
|
"loss": 0.3762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3832462728023529,
|
|
"step": 975,
|
|
"valid_targets_mean": 5557.0,
|
|
"valid_targets_min": 2665
|
|
},
|
|
{
|
|
"epoch": 1.144859813084112,
|
|
"grad_norm": 0.3977874578512242,
|
|
"learning_rate": 3.9514361691657325e-05,
|
|
"loss": 0.3435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38316288590431213,
|
|
"step": 980,
|
|
"valid_targets_mean": 6318.5,
|
|
"valid_targets_min": 2338
|
|
},
|
|
{
|
|
"epoch": 1.1507009345794392,
|
|
"grad_norm": 0.42321049830234164,
|
|
"learning_rate": 3.950151733796385e-05,
|
|
"loss": 0.3545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34744253754615784,
|
|
"step": 985,
|
|
"valid_targets_mean": 5776.9,
|
|
"valid_targets_min": 2560
|
|
},
|
|
{
|
|
"epoch": 1.1565420560747663,
|
|
"grad_norm": 0.7141144352887067,
|
|
"learning_rate": 3.9488507480427085e-05,
|
|
"loss": 0.3639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3438970148563385,
|
|
"step": 990,
|
|
"valid_targets_mean": 5510.0,
|
|
"valid_targets_min": 2404
|
|
},
|
|
{
|
|
"epoch": 1.1623831775700935,
|
|
"grad_norm": 0.4856666409377243,
|
|
"learning_rate": 3.947533222945799e-05,
|
|
"loss": 0.3372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3783750534057617,
|
|
"step": 995,
|
|
"valid_targets_mean": 5578.7,
|
|
"valid_targets_min": 1073
|
|
},
|
|
{
|
|
"epoch": 1.1682242990654206,
|
|
"grad_norm": 0.4109205829169455,
|
|
"learning_rate": 3.946199169687119e-05,
|
|
"loss": 0.3314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31443047523498535,
|
|
"step": 1000,
|
|
"valid_targets_mean": 5393.5,
|
|
"valid_targets_min": 1330
|
|
},
|
|
{
|
|
"epoch": 1.1740654205607477,
|
|
"grad_norm": 0.36499637439117044,
|
|
"learning_rate": 3.944848599588399e-05,
|
|
"loss": 0.3575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3235953450202942,
|
|
"step": 1005,
|
|
"valid_targets_mean": 6528.0,
|
|
"valid_targets_min": 1985
|
|
},
|
|
{
|
|
"epoch": 1.1799065420560748,
|
|
"grad_norm": 0.41345920676784553,
|
|
"learning_rate": 3.943481524111545e-05,
|
|
"loss": 0.3343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32702577114105225,
|
|
"step": 1010,
|
|
"valid_targets_mean": 5629.5,
|
|
"valid_targets_min": 1720
|
|
},
|
|
{
|
|
"epoch": 1.185747663551402,
|
|
"grad_norm": 0.4135676986998468,
|
|
"learning_rate": 3.9420979548585384e-05,
|
|
"loss": 0.3561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3393076956272125,
|
|
"step": 1015,
|
|
"valid_targets_mean": 5811.6,
|
|
"valid_targets_min": 1536
|
|
},
|
|
{
|
|
"epoch": 1.191588785046729,
|
|
"grad_norm": 0.41385211730665744,
|
|
"learning_rate": 3.940697903571338e-05,
|
|
"loss": 0.3261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3284074366092682,
|
|
"step": 1020,
|
|
"valid_targets_mean": 5786.8,
|
|
"valid_targets_min": 2715
|
|
},
|
|
{
|
|
"epoch": 1.1974299065420562,
|
|
"grad_norm": 0.42570071236513835,
|
|
"learning_rate": 3.9392813821317836e-05,
|
|
"loss": 0.3397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34380602836608887,
|
|
"step": 1025,
|
|
"valid_targets_mean": 5812.1,
|
|
"valid_targets_min": 2559
|
|
},
|
|
{
|
|
"epoch": 1.203271028037383,
|
|
"grad_norm": 0.39114683017913454,
|
|
"learning_rate": 3.9378484025614885e-05,
|
|
"loss": 0.3458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34245774149894714,
|
|
"step": 1030,
|
|
"valid_targets_mean": 6201.0,
|
|
"valid_targets_min": 1924
|
|
},
|
|
{
|
|
"epoch": 1.2091121495327104,
|
|
"grad_norm": 0.43111338487891676,
|
|
"learning_rate": 3.9363989770217455e-05,
|
|
"loss": 0.3376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35740748047828674,
|
|
"step": 1035,
|
|
"valid_targets_mean": 5858.0,
|
|
"valid_targets_min": 1459
|
|
},
|
|
{
|
|
"epoch": 1.2149532710280373,
|
|
"grad_norm": 0.42325713425362455,
|
|
"learning_rate": 3.9349331178134164e-05,
|
|
"loss": 0.3464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34140506386756897,
|
|
"step": 1040,
|
|
"valid_targets_mean": 5434.8,
|
|
"valid_targets_min": 1639
|
|
},
|
|
{
|
|
"epoch": 1.2207943925233644,
|
|
"grad_norm": 0.458932141383282,
|
|
"learning_rate": 3.933450837376833e-05,
|
|
"loss": 0.359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3603627383708954,
|
|
"step": 1045,
|
|
"valid_targets_mean": 5991.7,
|
|
"valid_targets_min": 2899
|
|
},
|
|
{
|
|
"epoch": 1.2266355140186915,
|
|
"grad_norm": 0.4302446948037724,
|
|
"learning_rate": 3.931952148291689e-05,
|
|
"loss": 0.3513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3133419454097748,
|
|
"step": 1050,
|
|
"valid_targets_mean": 5302.3,
|
|
"valid_targets_min": 1788
|
|
},
|
|
{
|
|
"epoch": 1.2324766355140186,
|
|
"grad_norm": 0.6697093567150358,
|
|
"learning_rate": 3.930437063276932e-05,
|
|
"loss": 0.3674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4095683991909027,
|
|
"step": 1055,
|
|
"valid_targets_mean": 7185.9,
|
|
"valid_targets_min": 2335
|
|
},
|
|
{
|
|
"epoch": 1.2383177570093458,
|
|
"grad_norm": 0.44197206844922454,
|
|
"learning_rate": 3.9289055951906604e-05,
|
|
"loss": 0.3397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30018314719200134,
|
|
"step": 1060,
|
|
"valid_targets_mean": 5560.8,
|
|
"valid_targets_min": 1700
|
|
},
|
|
{
|
|
"epoch": 1.2441588785046729,
|
|
"grad_norm": 0.37987816345936104,
|
|
"learning_rate": 3.927357757030007e-05,
|
|
"loss": 0.3517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2782149016857147,
|
|
"step": 1065,
|
|
"valid_targets_mean": 5933.6,
|
|
"valid_targets_min": 2644
|
|
},
|
|
{
|
|
"epoch": 1.25,
|
|
"grad_norm": 0.41402417923634094,
|
|
"learning_rate": 3.9257935619310374e-05,
|
|
"loss": 0.3586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35787203907966614,
|
|
"step": 1070,
|
|
"valid_targets_mean": 6055.2,
|
|
"valid_targets_min": 2861
|
|
},
|
|
{
|
|
"epoch": 1.2558411214953271,
|
|
"grad_norm": 0.44515296313142055,
|
|
"learning_rate": 3.924213023168631e-05,
|
|
"loss": 0.3382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33823737502098083,
|
|
"step": 1075,
|
|
"valid_targets_mean": 5350.6,
|
|
"valid_targets_min": 2045
|
|
},
|
|
{
|
|
"epoch": 1.2616822429906542,
|
|
"grad_norm": 0.4249823212769211,
|
|
"learning_rate": 3.92261615415637e-05,
|
|
"loss": 0.3526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3187110126018524,
|
|
"step": 1080,
|
|
"valid_targets_mean": 5280.0,
|
|
"valid_targets_min": 2336
|
|
},
|
|
{
|
|
"epoch": 1.2675233644859814,
|
|
"grad_norm": 0.3980449617054414,
|
|
"learning_rate": 3.92100296844643e-05,
|
|
"loss": 0.3639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38308826088905334,
|
|
"step": 1085,
|
|
"valid_targets_mean": 6373.6,
|
|
"valid_targets_min": 1060
|
|
},
|
|
{
|
|
"epoch": 1.2733644859813085,
|
|
"grad_norm": 0.47911804233382815,
|
|
"learning_rate": 3.91937347972946e-05,
|
|
"loss": 0.3451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3377626836299896,
|
|
"step": 1090,
|
|
"valid_targets_mean": 4969.4,
|
|
"valid_targets_min": 2797
|
|
},
|
|
{
|
|
"epoch": 1.2792056074766356,
|
|
"grad_norm": 0.414798447539777,
|
|
"learning_rate": 3.9177277018344686e-05,
|
|
"loss": 0.3384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31901124119758606,
|
|
"step": 1095,
|
|
"valid_targets_mean": 5805.6,
|
|
"valid_targets_min": 2058
|
|
},
|
|
{
|
|
"epoch": 1.2850467289719627,
|
|
"grad_norm": 0.41060755500988166,
|
|
"learning_rate": 3.9160656487287056e-05,
|
|
"loss": 0.333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3396945893764496,
|
|
"step": 1100,
|
|
"valid_targets_mean": 5726.7,
|
|
"valid_targets_min": 2114
|
|
},
|
|
{
|
|
"epoch": 1.2908878504672896,
|
|
"grad_norm": 0.4404517558662993,
|
|
"learning_rate": 3.9143873345175436e-05,
|
|
"loss": 0.3314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31772372126579285,
|
|
"step": 1105,
|
|
"valid_targets_mean": 4700.1,
|
|
"valid_targets_min": 1331
|
|
},
|
|
{
|
|
"epoch": 1.296728971962617,
|
|
"grad_norm": 0.40851895378229214,
|
|
"learning_rate": 3.912692773444359e-05,
|
|
"loss": 0.3352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33944520354270935,
|
|
"step": 1110,
|
|
"valid_targets_mean": 5771.9,
|
|
"valid_targets_min": 2328
|
|
},
|
|
{
|
|
"epoch": 1.3025700934579438,
|
|
"grad_norm": 0.44041481325111587,
|
|
"learning_rate": 3.910981979890411e-05,
|
|
"loss": 0.3376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35632196068763733,
|
|
"step": 1115,
|
|
"valid_targets_mean": 5605.3,
|
|
"valid_targets_min": 1780
|
|
},
|
|
{
|
|
"epoch": 1.308411214953271,
|
|
"grad_norm": 0.4970008342969356,
|
|
"learning_rate": 3.9092549683747194e-05,
|
|
"loss": 0.3429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3759516179561615,
|
|
"step": 1120,
|
|
"valid_targets_mean": 4941.7,
|
|
"valid_targets_min": 2316
|
|
},
|
|
{
|
|
"epoch": 1.314252336448598,
|
|
"grad_norm": 0.4077410369413226,
|
|
"learning_rate": 3.90751175355394e-05,
|
|
"loss": 0.3339,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3058233857154846,
|
|
"step": 1125,
|
|
"valid_targets_mean": 5731.2,
|
|
"valid_targets_min": 1059
|
|
},
|
|
{
|
|
"epoch": 1.3200934579439252,
|
|
"grad_norm": 0.4951777160291066,
|
|
"learning_rate": 3.905752350222243e-05,
|
|
"loss": 0.3465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3920963704586029,
|
|
"step": 1130,
|
|
"valid_targets_mean": 4822.8,
|
|
"valid_targets_min": 2288
|
|
},
|
|
{
|
|
"epoch": 1.3259345794392523,
|
|
"grad_norm": 0.38889969779841976,
|
|
"learning_rate": 3.9039767733111856e-05,
|
|
"loss": 0.3226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32297757267951965,
|
|
"step": 1135,
|
|
"valid_targets_mean": 5938.7,
|
|
"valid_targets_min": 2439
|
|
},
|
|
{
|
|
"epoch": 1.3317757009345794,
|
|
"grad_norm": 0.480400984468517,
|
|
"learning_rate": 3.902185037889583e-05,
|
|
"loss": 0.3452,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36527159810066223,
|
|
"step": 1140,
|
|
"valid_targets_mean": 5351.2,
|
|
"valid_targets_min": 1293
|
|
},
|
|
{
|
|
"epoch": 1.3376168224299065,
|
|
"grad_norm": 0.8326003527016578,
|
|
"learning_rate": 3.9003771591633874e-05,
|
|
"loss": 0.3277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2808113098144531,
|
|
"step": 1145,
|
|
"valid_targets_mean": 4789.8,
|
|
"valid_targets_min": 1154
|
|
},
|
|
{
|
|
"epoch": 1.3434579439252337,
|
|
"grad_norm": 0.4867976708451017,
|
|
"learning_rate": 3.8985531524755525e-05,
|
|
"loss": 0.3433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35758742690086365,
|
|
"step": 1150,
|
|
"valid_targets_mean": 5700.8,
|
|
"valid_targets_min": 1011
|
|
},
|
|
{
|
|
"epoch": 1.3492990654205608,
|
|
"grad_norm": 0.4580533267169793,
|
|
"learning_rate": 3.896713033305903e-05,
|
|
"loss": 0.3329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31554511189460754,
|
|
"step": 1155,
|
|
"valid_targets_mean": 5196.8,
|
|
"valid_targets_min": 2416
|
|
},
|
|
{
|
|
"epoch": 1.355140186915888,
|
|
"grad_norm": 0.42742104842784595,
|
|
"learning_rate": 3.8948568172710114e-05,
|
|
"loss": 0.3377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3358675539493561,
|
|
"step": 1160,
|
|
"valid_targets_mean": 5107.1,
|
|
"valid_targets_min": 2401
|
|
},
|
|
{
|
|
"epoch": 1.360981308411215,
|
|
"grad_norm": 0.41305315755401223,
|
|
"learning_rate": 3.892984520124054e-05,
|
|
"loss": 0.3252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33403030037879944,
|
|
"step": 1165,
|
|
"valid_targets_mean": 5251.5,
|
|
"valid_targets_min": 2347
|
|
},
|
|
{
|
|
"epoch": 1.3668224299065421,
|
|
"grad_norm": 0.44941025723180894,
|
|
"learning_rate": 3.891096157754686e-05,
|
|
"loss": 0.3318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31698495149612427,
|
|
"step": 1170,
|
|
"valid_targets_mean": 5359.0,
|
|
"valid_targets_min": 1947
|
|
},
|
|
{
|
|
"epoch": 1.3726635514018692,
|
|
"grad_norm": 0.4462768291354129,
|
|
"learning_rate": 3.889191746188903e-05,
|
|
"loss": 0.3303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30545803904533386,
|
|
"step": 1175,
|
|
"valid_targets_mean": 4554.8,
|
|
"valid_targets_min": 1884
|
|
},
|
|
{
|
|
"epoch": 1.3785046728971961,
|
|
"grad_norm": 0.4295197572986724,
|
|
"learning_rate": 3.8872713015889064e-05,
|
|
"loss": 0.3188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3169842064380646,
|
|
"step": 1180,
|
|
"valid_targets_mean": 5840.5,
|
|
"valid_targets_min": 1303
|
|
},
|
|
{
|
|
"epoch": 1.3843457943925235,
|
|
"grad_norm": 0.4220006224485507,
|
|
"learning_rate": 3.885334840252963e-05,
|
|
"loss": 0.3484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3298092782497406,
|
|
"step": 1185,
|
|
"valid_targets_mean": 5287.1,
|
|
"valid_targets_min": 1700
|
|
},
|
|
{
|
|
"epoch": 1.3901869158878504,
|
|
"grad_norm": 0.45801990811070487,
|
|
"learning_rate": 3.8833823786152715e-05,
|
|
"loss": 0.3473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33156731724739075,
|
|
"step": 1190,
|
|
"valid_targets_mean": 4786.2,
|
|
"valid_targets_min": 1610
|
|
},
|
|
{
|
|
"epoch": 1.3960280373831775,
|
|
"grad_norm": 0.42784788917870875,
|
|
"learning_rate": 3.8814139332458194e-05,
|
|
"loss": 0.3474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38618728518486023,
|
|
"step": 1195,
|
|
"valid_targets_mean": 5711.7,
|
|
"valid_targets_min": 1982
|
|
},
|
|
{
|
|
"epoch": 1.4018691588785046,
|
|
"grad_norm": 0.43174667765485336,
|
|
"learning_rate": 3.879429520850244e-05,
|
|
"loss": 0.3384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34327569603919983,
|
|
"step": 1200,
|
|
"valid_targets_mean": 5349.2,
|
|
"valid_targets_min": 2093
|
|
},
|
|
{
|
|
"epoch": 1.4077102803738317,
|
|
"grad_norm": 0.3932923520586069,
|
|
"learning_rate": 3.87742915826969e-05,
|
|
"loss": 0.3394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3329240083694458,
|
|
"step": 1205,
|
|
"valid_targets_mean": 6588.8,
|
|
"valid_targets_min": 3232
|
|
},
|
|
{
|
|
"epoch": 1.4135514018691588,
|
|
"grad_norm": 0.42997298433956566,
|
|
"learning_rate": 3.8754128624806676e-05,
|
|
"loss": 0.3282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29949530959129333,
|
|
"step": 1210,
|
|
"valid_targets_mean": 5046.4,
|
|
"valid_targets_min": 1482
|
|
},
|
|
{
|
|
"epoch": 1.419392523364486,
|
|
"grad_norm": 0.454274307481997,
|
|
"learning_rate": 3.8733806505949064e-05,
|
|
"loss": 0.3348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3160513937473297,
|
|
"step": 1215,
|
|
"valid_targets_mean": 4237.8,
|
|
"valid_targets_min": 2252
|
|
},
|
|
{
|
|
"epoch": 1.425233644859813,
|
|
"grad_norm": 0.38961770511128324,
|
|
"learning_rate": 3.871332539859212e-05,
|
|
"loss": 0.3372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3583175241947174,
|
|
"step": 1220,
|
|
"valid_targets_mean": 6431.3,
|
|
"valid_targets_min": 2090
|
|
},
|
|
{
|
|
"epoch": 1.4310747663551402,
|
|
"grad_norm": 0.44521190371021885,
|
|
"learning_rate": 3.8692685476553186e-05,
|
|
"loss": 0.3249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3136119544506073,
|
|
"step": 1225,
|
|
"valid_targets_mean": 5782.5,
|
|
"valid_targets_min": 2619
|
|
},
|
|
{
|
|
"epoch": 1.4369158878504673,
|
|
"grad_norm": 0.4478057013300125,
|
|
"learning_rate": 3.8671886914997434e-05,
|
|
"loss": 0.344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35310816764831543,
|
|
"step": 1230,
|
|
"valid_targets_mean": 5266.4,
|
|
"valid_targets_min": 3359
|
|
},
|
|
{
|
|
"epoch": 1.4427570093457944,
|
|
"grad_norm": 0.46672526563993627,
|
|
"learning_rate": 3.8650929890436344e-05,
|
|
"loss": 0.311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33301374316215515,
|
|
"step": 1235,
|
|
"valid_targets_mean": 5232.6,
|
|
"valid_targets_min": 2945
|
|
},
|
|
{
|
|
"epoch": 1.4485981308411215,
|
|
"grad_norm": 0.46299326882194514,
|
|
"learning_rate": 3.8629814580726235e-05,
|
|
"loss": 0.315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35486769676208496,
|
|
"step": 1240,
|
|
"valid_targets_mean": 5428.1,
|
|
"valid_targets_min": 2248
|
|
},
|
|
{
|
|
"epoch": 1.4544392523364487,
|
|
"grad_norm": 0.39189735736286696,
|
|
"learning_rate": 3.860854116506675e-05,
|
|
"loss": 0.3198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3296653926372528,
|
|
"step": 1245,
|
|
"valid_targets_mean": 6033.6,
|
|
"valid_targets_min": 2408
|
|
},
|
|
{
|
|
"epoch": 1.4602803738317758,
|
|
"grad_norm": 0.40072307579431676,
|
|
"learning_rate": 3.858710982399934e-05,
|
|
"loss": 0.3322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36691975593566895,
|
|
"step": 1250,
|
|
"valid_targets_mean": 5773.2,
|
|
"valid_targets_min": 1672
|
|
},
|
|
{
|
|
"epoch": 1.4661214953271027,
|
|
"grad_norm": 0.38308045681559,
|
|
"learning_rate": 3.85655207394057e-05,
|
|
"loss": 0.3313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33937057852745056,
|
|
"step": 1255,
|
|
"valid_targets_mean": 6772.9,
|
|
"valid_targets_min": 2723
|
|
},
|
|
{
|
|
"epoch": 1.47196261682243,
|
|
"grad_norm": 0.4536095778102643,
|
|
"learning_rate": 3.854377409450628e-05,
|
|
"loss": 0.3526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34476378560066223,
|
|
"step": 1260,
|
|
"valid_targets_mean": 5363.3,
|
|
"valid_targets_min": 1755
|
|
},
|
|
{
|
|
"epoch": 1.477803738317757,
|
|
"grad_norm": 0.3924548171011381,
|
|
"learning_rate": 3.852187007385866e-05,
|
|
"loss": 0.3345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33087560534477234,
|
|
"step": 1265,
|
|
"valid_targets_mean": 5901.8,
|
|
"valid_targets_min": 2935
|
|
},
|
|
{
|
|
"epoch": 1.4836448598130842,
|
|
"grad_norm": 0.3831733990008748,
|
|
"learning_rate": 3.8499808863356074e-05,
|
|
"loss": 0.3478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3673917353153229,
|
|
"step": 1270,
|
|
"valid_targets_mean": 6423.4,
|
|
"valid_targets_min": 2227
|
|
},
|
|
{
|
|
"epoch": 1.4894859813084111,
|
|
"grad_norm": 0.4282471951566855,
|
|
"learning_rate": 3.8477590650225735e-05,
|
|
"loss": 0.3278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3192268908023834,
|
|
"step": 1275,
|
|
"valid_targets_mean": 5089.6,
|
|
"valid_targets_min": 2525
|
|
},
|
|
{
|
|
"epoch": 1.4953271028037383,
|
|
"grad_norm": 0.41352976481673875,
|
|
"learning_rate": 3.8455215623027324e-05,
|
|
"loss": 0.33,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32885506749153137,
|
|
"step": 1280,
|
|
"valid_targets_mean": 6270.8,
|
|
"valid_targets_min": 2945
|
|
},
|
|
{
|
|
"epoch": 1.5011682242990654,
|
|
"grad_norm": 0.39644499981000175,
|
|
"learning_rate": 3.8432683971651344e-05,
|
|
"loss": 0.3277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33941832184791565,
|
|
"step": 1285,
|
|
"valid_targets_mean": 6487.2,
|
|
"valid_targets_min": 2331
|
|
},
|
|
{
|
|
"epoch": 1.5070093457943925,
|
|
"grad_norm": 0.4305245778867432,
|
|
"learning_rate": 3.840999588731753e-05,
|
|
"loss": 0.3147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3260078430175781,
|
|
"step": 1290,
|
|
"valid_targets_mean": 5159.0,
|
|
"valid_targets_min": 2008
|
|
},
|
|
{
|
|
"epoch": 1.5128504672897196,
|
|
"grad_norm": 0.3947093270576276,
|
|
"learning_rate": 3.83871515625732e-05,
|
|
"loss": 0.3241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2994173765182495,
|
|
"step": 1295,
|
|
"valid_targets_mean": 5452.9,
|
|
"valid_targets_min": 1590
|
|
},
|
|
{
|
|
"epoch": 1.5186915887850467,
|
|
"grad_norm": 0.40963386760632187,
|
|
"learning_rate": 3.836415119129167e-05,
|
|
"loss": 0.3246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3296498954296112,
|
|
"step": 1300,
|
|
"valid_targets_mean": 5888.2,
|
|
"valid_targets_min": 2089
|
|
},
|
|
{
|
|
"epoch": 1.5245327102803738,
|
|
"grad_norm": 0.43104771588357327,
|
|
"learning_rate": 3.834099496867055e-05,
|
|
"loss": 0.3391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3090157210826874,
|
|
"step": 1305,
|
|
"valid_targets_mean": 5326.5,
|
|
"valid_targets_min": 1815
|
|
},
|
|
{
|
|
"epoch": 1.530373831775701,
|
|
"grad_norm": 0.4318907637958081,
|
|
"learning_rate": 3.831768309123015e-05,
|
|
"loss": 0.3291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.284053772687912,
|
|
"step": 1310,
|
|
"valid_targets_mean": 5142.5,
|
|
"valid_targets_min": 1855
|
|
},
|
|
{
|
|
"epoch": 1.536214953271028,
|
|
"grad_norm": 0.40472598971757817,
|
|
"learning_rate": 3.8294215756811746e-05,
|
|
"loss": 0.3187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3039734959602356,
|
|
"step": 1315,
|
|
"valid_targets_mean": 5368.1,
|
|
"valid_targets_min": 1841
|
|
},
|
|
{
|
|
"epoch": 1.542056074766355,
|
|
"grad_norm": 0.59526148713433,
|
|
"learning_rate": 3.827059316457595e-05,
|
|
"loss": 0.3291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34755298495292664,
|
|
"step": 1320,
|
|
"valid_targets_mean": 5450.8,
|
|
"valid_targets_min": 1509
|
|
},
|
|
{
|
|
"epoch": 1.5478971962616823,
|
|
"grad_norm": 0.4506838889718593,
|
|
"learning_rate": 3.824681551500098e-05,
|
|
"loss": 0.3349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3734554946422577,
|
|
"step": 1325,
|
|
"valid_targets_mean": 5401.4,
|
|
"valid_targets_min": 2703
|
|
},
|
|
{
|
|
"epoch": 1.5537383177570092,
|
|
"grad_norm": 0.3991112683010186,
|
|
"learning_rate": 3.822288300988103e-05,
|
|
"loss": 0.3457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3018367290496826,
|
|
"step": 1330,
|
|
"valid_targets_mean": 6000.3,
|
|
"valid_targets_min": 1938
|
|
},
|
|
{
|
|
"epoch": 1.5595794392523366,
|
|
"grad_norm": 0.45176957041992927,
|
|
"learning_rate": 3.8198795852324454e-05,
|
|
"loss": 0.3371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3295012414455414,
|
|
"step": 1335,
|
|
"valid_targets_mean": 4874.6,
|
|
"valid_targets_min": 1389
|
|
},
|
|
{
|
|
"epoch": 1.5654205607476634,
|
|
"grad_norm": 0.4358235777215621,
|
|
"learning_rate": 3.8174554246752135e-05,
|
|
"loss": 0.3501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34282031655311584,
|
|
"step": 1340,
|
|
"valid_targets_mean": 5978.1,
|
|
"valid_targets_min": 2162
|
|
},
|
|
{
|
|
"epoch": 1.5712616822429908,
|
|
"grad_norm": 0.43029775042148855,
|
|
"learning_rate": 3.81501583988957e-05,
|
|
"loss": 0.3218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3698880970478058,
|
|
"step": 1345,
|
|
"valid_targets_mean": 6209.8,
|
|
"valid_targets_min": 2115
|
|
},
|
|
{
|
|
"epoch": 1.5771028037383177,
|
|
"grad_norm": 0.441785338116025,
|
|
"learning_rate": 3.8125608515795775e-05,
|
|
"loss": 0.3358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36506715416908264,
|
|
"step": 1350,
|
|
"valid_targets_mean": 5399.0,
|
|
"valid_targets_min": 2668
|
|
},
|
|
{
|
|
"epoch": 1.582943925233645,
|
|
"grad_norm": 0.41825988230162786,
|
|
"learning_rate": 3.810090480580027e-05,
|
|
"loss": 0.3381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3644230365753174,
|
|
"step": 1355,
|
|
"valid_targets_mean": 5754.0,
|
|
"valid_targets_min": 3008
|
|
},
|
|
{
|
|
"epoch": 1.588785046728972,
|
|
"grad_norm": 0.4060838840246129,
|
|
"learning_rate": 3.807604747856255e-05,
|
|
"loss": 0.3364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35247740149497986,
|
|
"step": 1360,
|
|
"valid_targets_mean": 6199.8,
|
|
"valid_targets_min": 1683
|
|
},
|
|
{
|
|
"epoch": 1.594626168224299,
|
|
"grad_norm": 0.4248354477073209,
|
|
"learning_rate": 3.8051036745039695e-05,
|
|
"loss": 0.3097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.308304101228714,
|
|
"step": 1365,
|
|
"valid_targets_mean": 5060.8,
|
|
"valid_targets_min": 2509
|
|
},
|
|
{
|
|
"epoch": 1.6004672897196262,
|
|
"grad_norm": 0.4239863989294774,
|
|
"learning_rate": 3.80258728174907e-05,
|
|
"loss": 0.3137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3077244758605957,
|
|
"step": 1370,
|
|
"valid_targets_mean": 4832.9,
|
|
"valid_targets_min": 1786
|
|
},
|
|
{
|
|
"epoch": 1.6063084112149533,
|
|
"grad_norm": 0.4601335799146211,
|
|
"learning_rate": 3.8000555909474685e-05,
|
|
"loss": 0.3339,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3185292184352875,
|
|
"step": 1375,
|
|
"valid_targets_mean": 5490.9,
|
|
"valid_targets_min": 2394
|
|
},
|
|
{
|
|
"epoch": 1.6121495327102804,
|
|
"grad_norm": 0.4054843681758954,
|
|
"learning_rate": 3.7975086235849063e-05,
|
|
"loss": 0.3136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34226754307746887,
|
|
"step": 1380,
|
|
"valid_targets_mean": 5719.9,
|
|
"valid_targets_min": 1757
|
|
},
|
|
{
|
|
"epoch": 1.6179906542056075,
|
|
"grad_norm": 0.3808708375130962,
|
|
"learning_rate": 3.794946401276772e-05,
|
|
"loss": 0.3216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34809359908103943,
|
|
"step": 1385,
|
|
"valid_targets_mean": 6442.9,
|
|
"valid_targets_min": 1796
|
|
},
|
|
{
|
|
"epoch": 1.6238317757009346,
|
|
"grad_norm": 0.41365934442004987,
|
|
"learning_rate": 3.7923689457679204e-05,
|
|
"loss": 0.3304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36455702781677246,
|
|
"step": 1390,
|
|
"valid_targets_mean": 6201.2,
|
|
"valid_targets_min": 2312
|
|
},
|
|
{
|
|
"epoch": 1.6296728971962615,
|
|
"grad_norm": 0.37522916324946515,
|
|
"learning_rate": 3.7897762789324834e-05,
|
|
"loss": 0.3247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29573896527290344,
|
|
"step": 1395,
|
|
"valid_targets_mean": 5950.8,
|
|
"valid_targets_min": 1627
|
|
},
|
|
{
|
|
"epoch": 1.6355140186915889,
|
|
"grad_norm": 0.4298639851665236,
|
|
"learning_rate": 3.78716842277369e-05,
|
|
"loss": 0.3286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3457094132900238,
|
|
"step": 1400,
|
|
"valid_targets_mean": 5808.9,
|
|
"valid_targets_min": 2088
|
|
},
|
|
{
|
|
"epoch": 1.6413551401869158,
|
|
"grad_norm": 0.39842948310822773,
|
|
"learning_rate": 3.784545399423674e-05,
|
|
"loss": 0.3142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28623780608177185,
|
|
"step": 1405,
|
|
"valid_targets_mean": 5264.2,
|
|
"valid_targets_min": 2373
|
|
},
|
|
{
|
|
"epoch": 1.647196261682243,
|
|
"grad_norm": 0.41953437373224384,
|
|
"learning_rate": 3.7819072311432915e-05,
|
|
"loss": 0.3341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33276596665382385,
|
|
"step": 1410,
|
|
"valid_targets_mean": 5552.0,
|
|
"valid_targets_min": 2547
|
|
},
|
|
{
|
|
"epoch": 1.65303738317757,
|
|
"grad_norm": 0.4321322589640597,
|
|
"learning_rate": 3.779253940321927e-05,
|
|
"loss": 0.34,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32419800758361816,
|
|
"step": 1415,
|
|
"valid_targets_mean": 5380.8,
|
|
"valid_targets_min": 1489
|
|
},
|
|
{
|
|
"epoch": 1.6588785046728973,
|
|
"grad_norm": 0.4404623371045658,
|
|
"learning_rate": 3.776585549477307e-05,
|
|
"loss": 0.3286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3197462856769562,
|
|
"step": 1420,
|
|
"valid_targets_mean": 4894.4,
|
|
"valid_targets_min": 1269
|
|
},
|
|
{
|
|
"epoch": 1.6647196261682242,
|
|
"grad_norm": 0.4177163885518051,
|
|
"learning_rate": 3.7739020812553076e-05,
|
|
"loss": 0.3225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34838518500328064,
|
|
"step": 1425,
|
|
"valid_targets_mean": 6660.6,
|
|
"valid_targets_min": 2223
|
|
},
|
|
{
|
|
"epoch": 1.6705607476635516,
|
|
"grad_norm": 0.4223084951526111,
|
|
"learning_rate": 3.7712035584297615e-05,
|
|
"loss": 0.3277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3526113033294678,
|
|
"step": 1430,
|
|
"valid_targets_mean": 6112.0,
|
|
"valid_targets_min": 1624
|
|
},
|
|
{
|
|
"epoch": 1.6764018691588785,
|
|
"grad_norm": 0.7019633038093199,
|
|
"learning_rate": 3.7684900039022675e-05,
|
|
"loss": 0.3214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2943001687526703,
|
|
"step": 1435,
|
|
"valid_targets_mean": 5781.2,
|
|
"valid_targets_min": 2361
|
|
},
|
|
{
|
|
"epoch": 1.6822429906542056,
|
|
"grad_norm": 0.5245481759987278,
|
|
"learning_rate": 3.7657614407019906e-05,
|
|
"loss": 0.3177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3568675220012665,
|
|
"step": 1440,
|
|
"valid_targets_mean": 6311.0,
|
|
"valid_targets_min": 1894
|
|
},
|
|
{
|
|
"epoch": 1.6880841121495327,
|
|
"grad_norm": 0.40339930868259655,
|
|
"learning_rate": 3.7630178919854746e-05,
|
|
"loss": 0.3074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3306673467159271,
|
|
"step": 1445,
|
|
"valid_targets_mean": 5374.7,
|
|
"valid_targets_min": 2055
|
|
},
|
|
{
|
|
"epoch": 1.6939252336448598,
|
|
"grad_norm": 0.4671772555375747,
|
|
"learning_rate": 3.760259381036437e-05,
|
|
"loss": 0.3251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3321833312511444,
|
|
"step": 1450,
|
|
"valid_targets_mean": 5553.4,
|
|
"valid_targets_min": 2386
|
|
},
|
|
{
|
|
"epoch": 1.699766355140187,
|
|
"grad_norm": 0.40501130186278894,
|
|
"learning_rate": 3.7574859312655785e-05,
|
|
"loss": 0.319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31918981671333313,
|
|
"step": 1455,
|
|
"valid_targets_mean": 5144.3,
|
|
"valid_targets_min": 1111
|
|
},
|
|
{
|
|
"epoch": 1.705607476635514,
|
|
"grad_norm": 0.4452485823484549,
|
|
"learning_rate": 3.75469756621038e-05,
|
|
"loss": 0.3195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3718746602535248,
|
|
"step": 1460,
|
|
"valid_targets_mean": 6545.4,
|
|
"valid_targets_min": 1675
|
|
},
|
|
{
|
|
"epoch": 1.7114485981308412,
|
|
"grad_norm": 0.40663112799425677,
|
|
"learning_rate": 3.751894309534904e-05,
|
|
"loss": 0.3296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.309970885515213,
|
|
"step": 1465,
|
|
"valid_targets_mean": 5576.5,
|
|
"valid_targets_min": 2026
|
|
},
|
|
{
|
|
"epoch": 1.7172897196261683,
|
|
"grad_norm": 0.5185586039347374,
|
|
"learning_rate": 3.749076185029595e-05,
|
|
"loss": 0.3278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.368748277425766,
|
|
"step": 1470,
|
|
"valid_targets_mean": 4621.1,
|
|
"valid_targets_min": 2245
|
|
},
|
|
{
|
|
"epoch": 1.7231308411214954,
|
|
"grad_norm": 0.4292026609332738,
|
|
"learning_rate": 3.746243216611075e-05,
|
|
"loss": 0.3113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3298785388469696,
|
|
"step": 1475,
|
|
"valid_targets_mean": 5044.2,
|
|
"valid_targets_min": 2068
|
|
},
|
|
{
|
|
"epoch": 1.7289719626168223,
|
|
"grad_norm": 0.39832304778434585,
|
|
"learning_rate": 3.743395428321944e-05,
|
|
"loss": 0.3184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31063249707221985,
|
|
"step": 1480,
|
|
"valid_targets_mean": 6521.8,
|
|
"valid_targets_min": 3123
|
|
},
|
|
{
|
|
"epoch": 1.7348130841121496,
|
|
"grad_norm": 0.4156787814336058,
|
|
"learning_rate": 3.740532844330573e-05,
|
|
"loss": 0.3294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3447030484676361,
|
|
"step": 1485,
|
|
"valid_targets_mean": 6063.5,
|
|
"valid_targets_min": 2010
|
|
},
|
|
{
|
|
"epoch": 1.7406542056074765,
|
|
"grad_norm": 0.4281760548638144,
|
|
"learning_rate": 3.737655488930899e-05,
|
|
"loss": 0.3329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3490969240665436,
|
|
"step": 1490,
|
|
"valid_targets_mean": 5598.3,
|
|
"valid_targets_min": 2333
|
|
},
|
|
{
|
|
"epoch": 1.7464953271028039,
|
|
"grad_norm": 0.41837184245364056,
|
|
"learning_rate": 3.7347633865422214e-05,
|
|
"loss": 0.3361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36756864190101624,
|
|
"step": 1495,
|
|
"valid_targets_mean": 6193.6,
|
|
"valid_targets_min": 2182
|
|
},
|
|
{
|
|
"epoch": 1.7523364485981308,
|
|
"grad_norm": 0.42168025323452607,
|
|
"learning_rate": 3.731856561708993e-05,
|
|
"loss": 0.3114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3437415361404419,
|
|
"step": 1500,
|
|
"valid_targets_mean": 5818.3,
|
|
"valid_targets_min": 2234
|
|
},
|
|
{
|
|
"epoch": 1.758177570093458,
|
|
"grad_norm": 0.4341076699627194,
|
|
"learning_rate": 3.72893503910061e-05,
|
|
"loss": 0.3208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3414033353328705,
|
|
"step": 1505,
|
|
"valid_targets_mean": 5668.6,
|
|
"valid_targets_min": 2324
|
|
},
|
|
{
|
|
"epoch": 1.764018691588785,
|
|
"grad_norm": 0.41331391113222693,
|
|
"learning_rate": 3.725998843511206e-05,
|
|
"loss": 0.3151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35500195622444153,
|
|
"step": 1510,
|
|
"valid_targets_mean": 5526.5,
|
|
"valid_targets_min": 1185
|
|
},
|
|
{
|
|
"epoch": 1.769859813084112,
|
|
"grad_norm": 0.3927697698841332,
|
|
"learning_rate": 3.72304799985944e-05,
|
|
"loss": 0.3403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34743452072143555,
|
|
"step": 1515,
|
|
"valid_targets_mean": 6062.0,
|
|
"valid_targets_min": 1997
|
|
},
|
|
{
|
|
"epoch": 1.7757009345794392,
|
|
"grad_norm": 0.4026705919197804,
|
|
"learning_rate": 3.720082533188286e-05,
|
|
"loss": 0.3106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3544446527957916,
|
|
"step": 1520,
|
|
"valid_targets_mean": 6325.9,
|
|
"valid_targets_min": 2699
|
|
},
|
|
{
|
|
"epoch": 1.7815420560747663,
|
|
"grad_norm": 0.4023936689744677,
|
|
"learning_rate": 3.7171024686648146e-05,
|
|
"loss": 0.3144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3159964978694916,
|
|
"step": 1525,
|
|
"valid_targets_mean": 5508.0,
|
|
"valid_targets_min": 2392
|
|
},
|
|
{
|
|
"epoch": 1.7873831775700935,
|
|
"grad_norm": 0.5002220990065088,
|
|
"learning_rate": 3.7141078315799906e-05,
|
|
"loss": 0.3161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30780524015426636,
|
|
"step": 1530,
|
|
"valid_targets_mean": 4986.7,
|
|
"valid_targets_min": 2642
|
|
},
|
|
{
|
|
"epoch": 1.7932242990654206,
|
|
"grad_norm": 0.4772626958475521,
|
|
"learning_rate": 3.711098647348448e-05,
|
|
"loss": 0.3093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33184814453125,
|
|
"step": 1535,
|
|
"valid_targets_mean": 4291.1,
|
|
"valid_targets_min": 1822
|
|
},
|
|
{
|
|
"epoch": 1.7990654205607477,
|
|
"grad_norm": 0.425200222875786,
|
|
"learning_rate": 3.7080749415082797e-05,
|
|
"loss": 0.3275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34764203429222107,
|
|
"step": 1540,
|
|
"valid_targets_mean": 5729.4,
|
|
"valid_targets_min": 2020
|
|
},
|
|
{
|
|
"epoch": 1.8049065420560748,
|
|
"grad_norm": 0.41197501888756877,
|
|
"learning_rate": 3.7050367397208196e-05,
|
|
"loss": 0.2965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32613980770111084,
|
|
"step": 1545,
|
|
"valid_targets_mean": 5229.5,
|
|
"valid_targets_min": 1932
|
|
},
|
|
{
|
|
"epoch": 1.810747663551402,
|
|
"grad_norm": 0.45126194699392114,
|
|
"learning_rate": 3.701984067770422e-05,
|
|
"loss": 0.3362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32902419567108154,
|
|
"step": 1550,
|
|
"valid_targets_mean": 5429.2,
|
|
"valid_targets_min": 2186
|
|
},
|
|
{
|
|
"epoch": 1.8165887850467288,
|
|
"grad_norm": 0.4165591124375701,
|
|
"learning_rate": 3.6989169515642504e-05,
|
|
"loss": 0.3142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31407755613327026,
|
|
"step": 1555,
|
|
"valid_targets_mean": 5314.0,
|
|
"valid_targets_min": 2737
|
|
},
|
|
{
|
|
"epoch": 1.8224299065420562,
|
|
"grad_norm": 0.4562086788079457,
|
|
"learning_rate": 3.695835417132047e-05,
|
|
"loss": 0.3038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31847354769706726,
|
|
"step": 1560,
|
|
"valid_targets_mean": 5798.8,
|
|
"valid_targets_min": 2341
|
|
},
|
|
{
|
|
"epoch": 1.828271028037383,
|
|
"grad_norm": 0.43942090320378496,
|
|
"learning_rate": 3.692739490625922e-05,
|
|
"loss": 0.3242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34555545449256897,
|
|
"step": 1565,
|
|
"valid_targets_mean": 5194.0,
|
|
"valid_targets_min": 1808
|
|
},
|
|
{
|
|
"epoch": 1.8341121495327104,
|
|
"grad_norm": 0.3957827554950053,
|
|
"learning_rate": 3.6896291983201235e-05,
|
|
"loss": 0.3318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31819960474967957,
|
|
"step": 1570,
|
|
"valid_targets_mean": 5796.9,
|
|
"valid_targets_min": 2037
|
|
},
|
|
{
|
|
"epoch": 1.8399532710280373,
|
|
"grad_norm": 0.39074275393504254,
|
|
"learning_rate": 3.6865045666108204e-05,
|
|
"loss": 0.3088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3291510045528412,
|
|
"step": 1575,
|
|
"valid_targets_mean": 6791.7,
|
|
"valid_targets_min": 3106
|
|
},
|
|
{
|
|
"epoch": 1.8457943925233646,
|
|
"grad_norm": 0.3726709408018819,
|
|
"learning_rate": 3.6833656220158756e-05,
|
|
"loss": 0.3207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27397075295448303,
|
|
"step": 1580,
|
|
"valid_targets_mean": 6496.9,
|
|
"valid_targets_min": 2177
|
|
},
|
|
{
|
|
"epoch": 1.8516355140186915,
|
|
"grad_norm": 0.42888350433518846,
|
|
"learning_rate": 3.6802123911746204e-05,
|
|
"loss": 0.3121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3242558538913727,
|
|
"step": 1585,
|
|
"valid_targets_mean": 5642.2,
|
|
"valid_targets_min": 2437
|
|
},
|
|
{
|
|
"epoch": 1.8574766355140186,
|
|
"grad_norm": 0.408414916129565,
|
|
"learning_rate": 3.67704490084763e-05,
|
|
"loss": 0.3418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34484943747520447,
|
|
"step": 1590,
|
|
"valid_targets_mean": 5505.8,
|
|
"valid_targets_min": 2299
|
|
},
|
|
{
|
|
"epoch": 1.8633177570093458,
|
|
"grad_norm": 0.3936278968606128,
|
|
"learning_rate": 3.673863177916497e-05,
|
|
"loss": 0.3373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3572586476802826,
|
|
"step": 1595,
|
|
"valid_targets_mean": 6338.9,
|
|
"valid_targets_min": 2374
|
|
},
|
|
{
|
|
"epoch": 1.8691588785046729,
|
|
"grad_norm": 0.470725364191903,
|
|
"learning_rate": 3.670667249383599e-05,
|
|
"loss": 0.3184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32906249165534973,
|
|
"step": 1600,
|
|
"valid_targets_mean": 5094.5,
|
|
"valid_targets_min": 1868
|
|
},
|
|
{
|
|
"epoch": 1.875,
|
|
"grad_norm": 0.6232296317820922,
|
|
"learning_rate": 3.667457142371876e-05,
|
|
"loss": 0.3358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33299294114112854,
|
|
"step": 1605,
|
|
"valid_targets_mean": 5216.9,
|
|
"valid_targets_min": 1495
|
|
},
|
|
{
|
|
"epoch": 1.8808411214953271,
|
|
"grad_norm": 0.4229764833830271,
|
|
"learning_rate": 3.6642328841245954e-05,
|
|
"loss": 0.3185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31769677996635437,
|
|
"step": 1610,
|
|
"valid_targets_mean": 4991.8,
|
|
"valid_targets_min": 2174
|
|
},
|
|
{
|
|
"epoch": 1.8866822429906542,
|
|
"grad_norm": 0.39544759454399814,
|
|
"learning_rate": 3.6609945020051214e-05,
|
|
"loss": 0.3208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31398651003837585,
|
|
"step": 1615,
|
|
"valid_targets_mean": 5692.5,
|
|
"valid_targets_min": 2563
|
|
},
|
|
{
|
|
"epoch": 1.8925233644859814,
|
|
"grad_norm": 0.417557621113483,
|
|
"learning_rate": 3.657742023496684e-05,
|
|
"loss": 0.3253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34911048412323,
|
|
"step": 1620,
|
|
"valid_targets_mean": 5877.4,
|
|
"valid_targets_min": 2080
|
|
},
|
|
{
|
|
"epoch": 1.8983644859813085,
|
|
"grad_norm": 0.4443735359386049,
|
|
"learning_rate": 3.654475476202145e-05,
|
|
"loss": 0.3211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3166656494140625,
|
|
"step": 1625,
|
|
"valid_targets_mean": 4800.9,
|
|
"valid_targets_min": 2042
|
|
},
|
|
{
|
|
"epoch": 1.9042056074766354,
|
|
"grad_norm": 0.37944826469088444,
|
|
"learning_rate": 3.651194887843765e-05,
|
|
"loss": 0.3324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3472633361816406,
|
|
"step": 1630,
|
|
"valid_targets_mean": 6965.8,
|
|
"valid_targets_min": 2362
|
|
},
|
|
{
|
|
"epoch": 1.9100467289719627,
|
|
"grad_norm": 0.4479323697627743,
|
|
"learning_rate": 3.6479002862629656e-05,
|
|
"loss": 0.3165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3258894681930542,
|
|
"step": 1635,
|
|
"valid_targets_mean": 5623.9,
|
|
"valid_targets_min": 2685
|
|
},
|
|
{
|
|
"epoch": 1.9158878504672896,
|
|
"grad_norm": 0.427802267480232,
|
|
"learning_rate": 3.6445916994200964e-05,
|
|
"loss": 0.3292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31744977831840515,
|
|
"step": 1640,
|
|
"valid_targets_mean": 5198.1,
|
|
"valid_targets_min": 2054
|
|
},
|
|
{
|
|
"epoch": 1.921728971962617,
|
|
"grad_norm": 0.3653609207044453,
|
|
"learning_rate": 3.641269155394194e-05,
|
|
"loss": 0.326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30866459012031555,
|
|
"step": 1645,
|
|
"valid_targets_mean": 6470.2,
|
|
"valid_targets_min": 1683
|
|
},
|
|
{
|
|
"epoch": 1.9275700934579438,
|
|
"grad_norm": 0.40632579809334146,
|
|
"learning_rate": 3.6379326823827474e-05,
|
|
"loss": 0.3274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32673850655555725,
|
|
"step": 1650,
|
|
"valid_targets_mean": 5643.8,
|
|
"valid_targets_min": 1977
|
|
},
|
|
{
|
|
"epoch": 1.9334112149532712,
|
|
"grad_norm": 0.35418145970592574,
|
|
"learning_rate": 3.634582308701455e-05,
|
|
"loss": 0.3211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.311287522315979,
|
|
"step": 1655,
|
|
"valid_targets_mean": 7007.3,
|
|
"valid_targets_min": 2331
|
|
},
|
|
{
|
|
"epoch": 1.939252336448598,
|
|
"grad_norm": 0.461946554598689,
|
|
"learning_rate": 3.631218062783989e-05,
|
|
"loss": 0.333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3473937511444092,
|
|
"step": 1660,
|
|
"valid_targets_mean": 5191.1,
|
|
"valid_targets_min": 1703
|
|
},
|
|
{
|
|
"epoch": 1.9450934579439252,
|
|
"grad_norm": 0.41938658054938355,
|
|
"learning_rate": 3.6278399731817485e-05,
|
|
"loss": 0.3248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32422754168510437,
|
|
"step": 1665,
|
|
"valid_targets_mean": 5044.5,
|
|
"valid_targets_min": 2450
|
|
},
|
|
{
|
|
"epoch": 1.9509345794392523,
|
|
"grad_norm": 0.4117488291509606,
|
|
"learning_rate": 3.624448068563623e-05,
|
|
"loss": 0.3172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32139334082603455,
|
|
"step": 1670,
|
|
"valid_targets_mean": 5742.4,
|
|
"valid_targets_min": 1785
|
|
},
|
|
{
|
|
"epoch": 1.9567757009345794,
|
|
"grad_norm": 0.4240879756469647,
|
|
"learning_rate": 3.6210423777157416e-05,
|
|
"loss": 0.3101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3074384033679962,
|
|
"step": 1675,
|
|
"valid_targets_mean": 5541.8,
|
|
"valid_targets_min": 2129
|
|
},
|
|
{
|
|
"epoch": 1.9626168224299065,
|
|
"grad_norm": 0.38568729622308373,
|
|
"learning_rate": 3.617622929541239e-05,
|
|
"loss": 0.3189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31714144349098206,
|
|
"step": 1680,
|
|
"valid_targets_mean": 6190.2,
|
|
"valid_targets_min": 3575
|
|
},
|
|
{
|
|
"epoch": 1.9684579439252337,
|
|
"grad_norm": 0.39923442656652863,
|
|
"learning_rate": 3.614189753059999e-05,
|
|
"loss": 0.3106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30969682335853577,
|
|
"step": 1685,
|
|
"valid_targets_mean": 5581.9,
|
|
"valid_targets_min": 2060
|
|
},
|
|
{
|
|
"epoch": 1.9742990654205608,
|
|
"grad_norm": 0.37252892638853374,
|
|
"learning_rate": 3.610742877408418e-05,
|
|
"loss": 0.3289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3261638283729553,
|
|
"step": 1690,
|
|
"valid_targets_mean": 6634.6,
|
|
"valid_targets_min": 2332
|
|
},
|
|
{
|
|
"epoch": 1.980140186915888,
|
|
"grad_norm": 0.43094451464816136,
|
|
"learning_rate": 3.60728233183915e-05,
|
|
"loss": 0.3222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3321646749973297,
|
|
"step": 1695,
|
|
"valid_targets_mean": 5953.5,
|
|
"valid_targets_min": 2471
|
|
},
|
|
{
|
|
"epoch": 1.985981308411215,
|
|
"grad_norm": 0.3978824136399198,
|
|
"learning_rate": 3.6038081457208643e-05,
|
|
"loss": 0.3224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33620867133140564,
|
|
"step": 1700,
|
|
"valid_targets_mean": 5918.6,
|
|
"valid_targets_min": 2312
|
|
},
|
|
{
|
|
"epoch": 1.991822429906542,
|
|
"grad_norm": 0.46008186058308154,
|
|
"learning_rate": 3.600320348537992e-05,
|
|
"loss": 0.3212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3153410255908966,
|
|
"step": 1705,
|
|
"valid_targets_mean": 4260.7,
|
|
"valid_targets_min": 2043
|
|
},
|
|
{
|
|
"epoch": 1.9976635514018692,
|
|
"grad_norm": 0.4251221690902295,
|
|
"learning_rate": 3.596818969890478e-05,
|
|
"loss": 0.3294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35792016983032227,
|
|
"step": 1710,
|
|
"valid_targets_mean": 5693.0,
|
|
"valid_targets_min": 2572
|
|
},
|
|
{
|
|
"epoch": 2.003504672897196,
|
|
"grad_norm": 0.4138430671176715,
|
|
"learning_rate": 3.59330403949353e-05,
|
|
"loss": 0.2961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28317880630493164,
|
|
"step": 1715,
|
|
"valid_targets_mean": 5789.4,
|
|
"valid_targets_min": 1843
|
|
},
|
|
{
|
|
"epoch": 2.0093457943925235,
|
|
"grad_norm": 0.441933225499944,
|
|
"learning_rate": 3.5897755871773625e-05,
|
|
"loss": 0.3308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3173886835575104,
|
|
"step": 1720,
|
|
"valid_targets_mean": 5032.1,
|
|
"valid_targets_min": 2030
|
|
},
|
|
{
|
|
"epoch": 2.0151869158878504,
|
|
"grad_norm": 0.40464332634487293,
|
|
"learning_rate": 3.5862336428869514e-05,
|
|
"loss": 0.3147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3424083888530731,
|
|
"step": 1725,
|
|
"valid_targets_mean": 5861.6,
|
|
"valid_targets_min": 1842
|
|
},
|
|
{
|
|
"epoch": 2.0210280373831777,
|
|
"grad_norm": 0.4209466805823949,
|
|
"learning_rate": 3.582678236681772e-05,
|
|
"loss": 0.3177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3306629955768585,
|
|
"step": 1730,
|
|
"valid_targets_mean": 5832.0,
|
|
"valid_targets_min": 1774
|
|
},
|
|
{
|
|
"epoch": 2.0268691588785046,
|
|
"grad_norm": 0.4815601918768887,
|
|
"learning_rate": 3.579109398735547e-05,
|
|
"loss": 0.323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33767232298851013,
|
|
"step": 1735,
|
|
"valid_targets_mean": 4804.6,
|
|
"valid_targets_min": 2094
|
|
},
|
|
{
|
|
"epoch": 2.032710280373832,
|
|
"grad_norm": 0.3758784572812967,
|
|
"learning_rate": 3.575527159335992e-05,
|
|
"loss": 0.3358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3036559820175171,
|
|
"step": 1740,
|
|
"valid_targets_mean": 5856.0,
|
|
"valid_targets_min": 1880
|
|
},
|
|
{
|
|
"epoch": 2.038551401869159,
|
|
"grad_norm": 0.408802715235981,
|
|
"learning_rate": 3.571931548884556e-05,
|
|
"loss": 0.2925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31468871235847473,
|
|
"step": 1745,
|
|
"valid_targets_mean": 5491.1,
|
|
"valid_targets_min": 1855
|
|
},
|
|
{
|
|
"epoch": 2.044392523364486,
|
|
"grad_norm": 0.3850343548034133,
|
|
"learning_rate": 3.568322597896165e-05,
|
|
"loss": 0.3185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.281687468290329,
|
|
"step": 1750,
|
|
"valid_targets_mean": 5751.7,
|
|
"valid_targets_min": 2322
|
|
},
|
|
{
|
|
"epoch": 2.050233644859813,
|
|
"grad_norm": 0.41037620392928953,
|
|
"learning_rate": 3.564700336998961e-05,
|
|
"loss": 0.315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2946256101131439,
|
|
"step": 1755,
|
|
"valid_targets_mean": 6144.6,
|
|
"valid_targets_min": 2503
|
|
},
|
|
{
|
|
"epoch": 2.05607476635514,
|
|
"grad_norm": 0.3966523237717694,
|
|
"learning_rate": 3.561064796934045e-05,
|
|
"loss": 0.3112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31302592158317566,
|
|
"step": 1760,
|
|
"valid_targets_mean": 5528.4,
|
|
"valid_targets_min": 1801
|
|
},
|
|
{
|
|
"epoch": 2.0619158878504673,
|
|
"grad_norm": 0.40313538325161247,
|
|
"learning_rate": 3.557416008555213e-05,
|
|
"loss": 0.3233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3561849296092987,
|
|
"step": 1765,
|
|
"valid_targets_mean": 5672.8,
|
|
"valid_targets_min": 2362
|
|
},
|
|
{
|
|
"epoch": 2.067757009345794,
|
|
"grad_norm": 0.4026899447905019,
|
|
"learning_rate": 3.553754002828697e-05,
|
|
"loss": 0.3217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3139074742794037,
|
|
"step": 1770,
|
|
"valid_targets_mean": 5297.2,
|
|
"valid_targets_min": 2010
|
|
},
|
|
{
|
|
"epoch": 2.0735981308411215,
|
|
"grad_norm": 0.3861397220956584,
|
|
"learning_rate": 3.5500788108329e-05,
|
|
"loss": 0.322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34651097655296326,
|
|
"step": 1775,
|
|
"valid_targets_mean": 6576.5,
|
|
"valid_targets_min": 2807
|
|
},
|
|
{
|
|
"epoch": 2.0794392523364484,
|
|
"grad_norm": 0.4217829055483105,
|
|
"learning_rate": 3.5463904637581346e-05,
|
|
"loss": 0.3155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30645468831062317,
|
|
"step": 1780,
|
|
"valid_targets_mean": 4580.0,
|
|
"valid_targets_min": 1406
|
|
},
|
|
{
|
|
"epoch": 2.085280373831776,
|
|
"grad_norm": 0.425323498650336,
|
|
"learning_rate": 3.542688992906354e-05,
|
|
"loss": 0.3066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3114953637123108,
|
|
"step": 1785,
|
|
"valid_targets_mean": 5161.0,
|
|
"valid_targets_min": 1836
|
|
},
|
|
{
|
|
"epoch": 2.0911214953271027,
|
|
"grad_norm": 0.3807314192733229,
|
|
"learning_rate": 3.5389744296908926e-05,
|
|
"loss": 0.3226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30336999893188477,
|
|
"step": 1790,
|
|
"valid_targets_mean": 5606.8,
|
|
"valid_targets_min": 1205
|
|
},
|
|
{
|
|
"epoch": 2.09696261682243,
|
|
"grad_norm": 0.41014803237122144,
|
|
"learning_rate": 3.535246805636193e-05,
|
|
"loss": 0.3233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3046761155128479,
|
|
"step": 1795,
|
|
"valid_targets_mean": 5445.6,
|
|
"valid_targets_min": 1850
|
|
},
|
|
{
|
|
"epoch": 2.102803738317757,
|
|
"grad_norm": 0.5154331480683657,
|
|
"learning_rate": 3.531506152377543e-05,
|
|
"loss": 0.304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27760788798332214,
|
|
"step": 1800,
|
|
"valid_targets_mean": 5336.8,
|
|
"valid_targets_min": 1683
|
|
},
|
|
{
|
|
"epoch": 2.1086448598130842,
|
|
"grad_norm": 0.43855143425804205,
|
|
"learning_rate": 3.527752501660805e-05,
|
|
"loss": 0.317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2956927716732025,
|
|
"step": 1805,
|
|
"valid_targets_mean": 4711.1,
|
|
"valid_targets_min": 1889
|
|
},
|
|
{
|
|
"epoch": 2.114485981308411,
|
|
"grad_norm": 0.4147248815231461,
|
|
"learning_rate": 3.523985885342148e-05,
|
|
"loss": 0.3311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32205697894096375,
|
|
"step": 1810,
|
|
"valid_targets_mean": 5354.2,
|
|
"valid_targets_min": 2446
|
|
},
|
|
{
|
|
"epoch": 2.1203271028037385,
|
|
"grad_norm": 0.3748328707209332,
|
|
"learning_rate": 3.520206335387775e-05,
|
|
"loss": 0.297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2909047603607178,
|
|
"step": 1815,
|
|
"valid_targets_mean": 6360.2,
|
|
"valid_targets_min": 3241
|
|
},
|
|
{
|
|
"epoch": 2.1261682242990654,
|
|
"grad_norm": 0.4451810169484976,
|
|
"learning_rate": 3.516413883873655e-05,
|
|
"loss": 0.3218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34121760725975037,
|
|
"step": 1820,
|
|
"valid_targets_mean": 5477.1,
|
|
"valid_targets_min": 2613
|
|
},
|
|
{
|
|
"epoch": 2.1320093457943927,
|
|
"grad_norm": 0.4223642615945711,
|
|
"learning_rate": 3.5126085629852474e-05,
|
|
"loss": 0.3091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3351057767868042,
|
|
"step": 1825,
|
|
"valid_targets_mean": 5422.5,
|
|
"valid_targets_min": 1247
|
|
},
|
|
{
|
|
"epoch": 2.1378504672897196,
|
|
"grad_norm": 0.396688090201631,
|
|
"learning_rate": 3.508790405017229e-05,
|
|
"loss": 0.3436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3411674499511719,
|
|
"step": 1830,
|
|
"valid_targets_mean": 6312.1,
|
|
"valid_targets_min": 2965
|
|
},
|
|
{
|
|
"epoch": 2.143691588785047,
|
|
"grad_norm": 0.3840810283995289,
|
|
"learning_rate": 3.504959442373225e-05,
|
|
"loss": 0.3127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31600961089134216,
|
|
"step": 1835,
|
|
"valid_targets_mean": 6162.4,
|
|
"valid_targets_min": 3024
|
|
},
|
|
{
|
|
"epoch": 2.149532710280374,
|
|
"grad_norm": 0.4138988011306943,
|
|
"learning_rate": 3.501115707565528e-05,
|
|
"loss": 0.3299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3113376200199127,
|
|
"step": 1840,
|
|
"valid_targets_mean": 5663.0,
|
|
"valid_targets_min": 2476
|
|
},
|
|
{
|
|
"epoch": 2.1553738317757007,
|
|
"grad_norm": 0.39355554549958155,
|
|
"learning_rate": 3.497259233214823e-05,
|
|
"loss": 0.333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3274758756160736,
|
|
"step": 1845,
|
|
"valid_targets_mean": 5978.2,
|
|
"valid_targets_min": 2258
|
|
},
|
|
{
|
|
"epoch": 2.161214953271028,
|
|
"grad_norm": 0.38371254713217673,
|
|
"learning_rate": 3.4933900520499145e-05,
|
|
"loss": 0.3017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2383655160665512,
|
|
"step": 1850,
|
|
"valid_targets_mean": 5135.2,
|
|
"valid_targets_min": 2158
|
|
},
|
|
{
|
|
"epoch": 2.167056074766355,
|
|
"grad_norm": 0.4041204871877773,
|
|
"learning_rate": 3.489508196907445e-05,
|
|
"loss": 0.3143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28364819288253784,
|
|
"step": 1855,
|
|
"valid_targets_mean": 5379.9,
|
|
"valid_targets_min": 1888
|
|
},
|
|
{
|
|
"epoch": 2.1728971962616823,
|
|
"grad_norm": 0.452447989466483,
|
|
"learning_rate": 3.485613700731617e-05,
|
|
"loss": 0.3242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32433584332466125,
|
|
"step": 1860,
|
|
"valid_targets_mean": 4687.0,
|
|
"valid_targets_min": 1944
|
|
},
|
|
{
|
|
"epoch": 2.178738317757009,
|
|
"grad_norm": 0.4022406389895379,
|
|
"learning_rate": 3.4817065965739146e-05,
|
|
"loss": 0.3049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3077855408191681,
|
|
"step": 1865,
|
|
"valid_targets_mean": 6114.4,
|
|
"valid_targets_min": 2002
|
|
},
|
|
{
|
|
"epoch": 2.1845794392523366,
|
|
"grad_norm": 0.4993571931249473,
|
|
"learning_rate": 3.477786917592823e-05,
|
|
"loss": 0.3231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3498821556568146,
|
|
"step": 1870,
|
|
"valid_targets_mean": 5944.5,
|
|
"valid_targets_min": 1175
|
|
},
|
|
{
|
|
"epoch": 2.1904205607476634,
|
|
"grad_norm": 0.40174067332255015,
|
|
"learning_rate": 3.473854697053543e-05,
|
|
"loss": 0.3,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31975337862968445,
|
|
"step": 1875,
|
|
"valid_targets_mean": 6765.0,
|
|
"valid_targets_min": 3157
|
|
},
|
|
{
|
|
"epoch": 2.196261682242991,
|
|
"grad_norm": 0.37836371679981373,
|
|
"learning_rate": 3.469909968327716e-05,
|
|
"loss": 0.3077,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2990609109401703,
|
|
"step": 1880,
|
|
"valid_targets_mean": 6253.4,
|
|
"valid_targets_min": 2024
|
|
},
|
|
{
|
|
"epoch": 2.2021028037383177,
|
|
"grad_norm": 0.4172683254664366,
|
|
"learning_rate": 3.465952764893135e-05,
|
|
"loss": 0.3156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3172577917575836,
|
|
"step": 1885,
|
|
"valid_targets_mean": 5279.3,
|
|
"valid_targets_min": 1874
|
|
},
|
|
{
|
|
"epoch": 2.207943925233645,
|
|
"grad_norm": 0.3963182029976146,
|
|
"learning_rate": 3.4619831203334625e-05,
|
|
"loss": 0.3067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32124534249305725,
|
|
"step": 1890,
|
|
"valid_targets_mean": 5758.6,
|
|
"valid_targets_min": 1806
|
|
},
|
|
{
|
|
"epoch": 2.213785046728972,
|
|
"grad_norm": 0.42918928737796636,
|
|
"learning_rate": 3.458001068337945e-05,
|
|
"loss": 0.3201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31261077523231506,
|
|
"step": 1895,
|
|
"valid_targets_mean": 5385.5,
|
|
"valid_targets_min": 2034
|
|
},
|
|
{
|
|
"epoch": 2.2196261682242993,
|
|
"grad_norm": 0.4593728924022728,
|
|
"learning_rate": 3.4540066427011276e-05,
|
|
"loss": 0.3262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32418185472488403,
|
|
"step": 1900,
|
|
"valid_targets_mean": 5351.3,
|
|
"valid_targets_min": 2447
|
|
},
|
|
{
|
|
"epoch": 2.225467289719626,
|
|
"grad_norm": 0.4359145503010203,
|
|
"learning_rate": 3.449999877322569e-05,
|
|
"loss": 0.3316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3343602120876312,
|
|
"step": 1905,
|
|
"valid_targets_mean": 5808.5,
|
|
"valid_targets_min": 2141
|
|
},
|
|
{
|
|
"epoch": 2.231308411214953,
|
|
"grad_norm": 0.40075335678184776,
|
|
"learning_rate": 3.445980806206547e-05,
|
|
"loss": 0.3199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3569049835205078,
|
|
"step": 1910,
|
|
"valid_targets_mean": 6125.1,
|
|
"valid_targets_min": 1802
|
|
},
|
|
{
|
|
"epoch": 2.2371495327102804,
|
|
"grad_norm": 0.40236083835255276,
|
|
"learning_rate": 3.4419494634617794e-05,
|
|
"loss": 0.3337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28060463070869446,
|
|
"step": 1915,
|
|
"valid_targets_mean": 5098.1,
|
|
"valid_targets_min": 1919
|
|
},
|
|
{
|
|
"epoch": 2.2429906542056073,
|
|
"grad_norm": 0.4442805650010067,
|
|
"learning_rate": 3.437905883301128e-05,
|
|
"loss": 0.3261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33275625109672546,
|
|
"step": 1920,
|
|
"valid_targets_mean": 5163.2,
|
|
"valid_targets_min": 2148
|
|
},
|
|
{
|
|
"epoch": 2.2488317757009346,
|
|
"grad_norm": 0.378579231929438,
|
|
"learning_rate": 3.433850100041309e-05,
|
|
"loss": 0.3144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3530585467815399,
|
|
"step": 1925,
|
|
"valid_targets_mean": 6875.2,
|
|
"valid_targets_min": 2220
|
|
},
|
|
{
|
|
"epoch": 2.2546728971962615,
|
|
"grad_norm": 0.4372680078609299,
|
|
"learning_rate": 3.4297821481026046e-05,
|
|
"loss": 0.312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30737733840942383,
|
|
"step": 1930,
|
|
"valid_targets_mean": 5665.7,
|
|
"valid_targets_min": 1549
|
|
},
|
|
{
|
|
"epoch": 2.260514018691589,
|
|
"grad_norm": 0.41497951677174616,
|
|
"learning_rate": 3.4257020620085684e-05,
|
|
"loss": 0.3273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.328060120344162,
|
|
"step": 1935,
|
|
"valid_targets_mean": 5262.0,
|
|
"valid_targets_min": 1971
|
|
},
|
|
{
|
|
"epoch": 2.2663551401869158,
|
|
"grad_norm": 0.3894756587970426,
|
|
"learning_rate": 3.421609876385734e-05,
|
|
"loss": 0.3213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32363179326057434,
|
|
"step": 1940,
|
|
"valid_targets_mean": 6500.0,
|
|
"valid_targets_min": 2703
|
|
},
|
|
{
|
|
"epoch": 2.272196261682243,
|
|
"grad_norm": 0.40367945382596027,
|
|
"learning_rate": 3.417505625963318e-05,
|
|
"loss": 0.3251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34059906005859375,
|
|
"step": 1945,
|
|
"valid_targets_mean": 6095.9,
|
|
"valid_targets_min": 2214
|
|
},
|
|
{
|
|
"epoch": 2.27803738317757,
|
|
"grad_norm": 0.44162425377672776,
|
|
"learning_rate": 3.41338934557293e-05,
|
|
"loss": 0.3134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31986796855926514,
|
|
"step": 1950,
|
|
"valid_targets_mean": 5616.3,
|
|
"valid_targets_min": 2518
|
|
},
|
|
{
|
|
"epoch": 2.2838785046728973,
|
|
"grad_norm": 0.39926149522892707,
|
|
"learning_rate": 3.409261070148273e-05,
|
|
"loss": 0.3012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3251856863498688,
|
|
"step": 1955,
|
|
"valid_targets_mean": 5680.0,
|
|
"valid_targets_min": 2589
|
|
},
|
|
{
|
|
"epoch": 2.289719626168224,
|
|
"grad_norm": 0.41513490268518854,
|
|
"learning_rate": 3.4051208347248494e-05,
|
|
"loss": 0.3069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2736586630344391,
|
|
"step": 1960,
|
|
"valid_targets_mean": 5405.3,
|
|
"valid_targets_min": 2335
|
|
},
|
|
{
|
|
"epoch": 2.2955607476635516,
|
|
"grad_norm": 0.47840641467242206,
|
|
"learning_rate": 3.400968674439661e-05,
|
|
"loss": 0.3019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3064308166503906,
|
|
"step": 1965,
|
|
"valid_targets_mean": 4638.1,
|
|
"valid_targets_min": 2191
|
|
},
|
|
{
|
|
"epoch": 2.3014018691588785,
|
|
"grad_norm": 0.4624976523786879,
|
|
"learning_rate": 3.396804624530917e-05,
|
|
"loss": 0.3062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31720542907714844,
|
|
"step": 1970,
|
|
"valid_targets_mean": 5866.2,
|
|
"valid_targets_min": 2694
|
|
},
|
|
{
|
|
"epoch": 2.307242990654206,
|
|
"grad_norm": 0.4170783921542984,
|
|
"learning_rate": 3.392628720337723e-05,
|
|
"loss": 0.3111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.302960067987442,
|
|
"step": 1975,
|
|
"valid_targets_mean": 5605.3,
|
|
"valid_targets_min": 1660
|
|
},
|
|
{
|
|
"epoch": 2.3130841121495327,
|
|
"grad_norm": 0.45088416341601145,
|
|
"learning_rate": 3.3884409972997956e-05,
|
|
"loss": 0.3192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29619142413139343,
|
|
"step": 1980,
|
|
"valid_targets_mean": 4631.0,
|
|
"valid_targets_min": 2054
|
|
},
|
|
{
|
|
"epoch": 2.31892523364486,
|
|
"grad_norm": 0.4318159900694894,
|
|
"learning_rate": 3.384241490957151e-05,
|
|
"loss": 0.3029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2959307134151459,
|
|
"step": 1985,
|
|
"valid_targets_mean": 5250.8,
|
|
"valid_targets_min": 1946
|
|
},
|
|
{
|
|
"epoch": 2.324766355140187,
|
|
"grad_norm": 0.43062683815131825,
|
|
"learning_rate": 3.380030236949808e-05,
|
|
"loss": 0.3076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31163886189460754,
|
|
"step": 1990,
|
|
"valid_targets_mean": 5422.4,
|
|
"valid_targets_min": 1861
|
|
},
|
|
{
|
|
"epoch": 2.330607476635514,
|
|
"grad_norm": 0.4821918782511483,
|
|
"learning_rate": 3.375807271017484e-05,
|
|
"loss": 0.3093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2875775396823883,
|
|
"step": 1995,
|
|
"valid_targets_mean": 4843.8,
|
|
"valid_targets_min": 2029
|
|
},
|
|
{
|
|
"epoch": 2.336448598130841,
|
|
"grad_norm": 0.43563334376902846,
|
|
"learning_rate": 3.371572628999293e-05,
|
|
"loss": 0.3177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3240028917789459,
|
|
"step": 2000,
|
|
"valid_targets_mean": 5654.8,
|
|
"valid_targets_min": 1701
|
|
},
|
|
{
|
|
"epoch": 2.342289719626168,
|
|
"grad_norm": 0.45441229553459656,
|
|
"learning_rate": 3.367326346833438e-05,
|
|
"loss": 0.3014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3542272746562958,
|
|
"step": 2005,
|
|
"valid_targets_mean": 5261.2,
|
|
"valid_targets_min": 1580
|
|
},
|
|
{
|
|
"epoch": 2.3481308411214954,
|
|
"grad_norm": 0.4381811687957472,
|
|
"learning_rate": 3.363068460556914e-05,
|
|
"loss": 0.3146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.301166832447052,
|
|
"step": 2010,
|
|
"valid_targets_mean": 5102.1,
|
|
"valid_targets_min": 1641
|
|
},
|
|
{
|
|
"epoch": 2.3539719626168223,
|
|
"grad_norm": 0.4388071511118262,
|
|
"learning_rate": 3.358799006305191e-05,
|
|
"loss": 0.3062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36202552914619446,
|
|
"step": 2015,
|
|
"valid_targets_mean": 5855.4,
|
|
"valid_targets_min": 1664
|
|
},
|
|
{
|
|
"epoch": 2.3598130841121496,
|
|
"grad_norm": 0.38936966431377584,
|
|
"learning_rate": 3.354518020311916e-05,
|
|
"loss": 0.2994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.285942405462265,
|
|
"step": 2020,
|
|
"valid_targets_mean": 6293.1,
|
|
"valid_targets_min": 3275
|
|
},
|
|
{
|
|
"epoch": 2.3656542056074765,
|
|
"grad_norm": 0.3976424003236878,
|
|
"learning_rate": 3.3502255389086024e-05,
|
|
"loss": 0.3073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31830695271492004,
|
|
"step": 2025,
|
|
"valid_targets_mean": 6134.0,
|
|
"valid_targets_min": 2070
|
|
},
|
|
{
|
|
"epoch": 2.371495327102804,
|
|
"grad_norm": 0.4920175040066912,
|
|
"learning_rate": 3.3459215985243226e-05,
|
|
"loss": 0.3057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3205171525478363,
|
|
"step": 2030,
|
|
"valid_targets_mean": 5897.0,
|
|
"valid_targets_min": 2569
|
|
},
|
|
{
|
|
"epoch": 2.3773364485981308,
|
|
"grad_norm": 0.43027497353573996,
|
|
"learning_rate": 3.341606235685397e-05,
|
|
"loss": 0.2896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2974620759487152,
|
|
"step": 2035,
|
|
"valid_targets_mean": 5775.1,
|
|
"valid_targets_min": 1451
|
|
},
|
|
{
|
|
"epoch": 2.383177570093458,
|
|
"grad_norm": 0.6970453186505922,
|
|
"learning_rate": 3.337279487015083e-05,
|
|
"loss": 0.3176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31976059079170227,
|
|
"step": 2040,
|
|
"valid_targets_mean": 5283.4,
|
|
"valid_targets_min": 2549
|
|
},
|
|
{
|
|
"epoch": 2.389018691588785,
|
|
"grad_norm": 0.42631300070660416,
|
|
"learning_rate": 3.332941389233272e-05,
|
|
"loss": 0.3192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32492488622665405,
|
|
"step": 2045,
|
|
"valid_targets_mean": 5331.4,
|
|
"valid_targets_min": 2271
|
|
},
|
|
{
|
|
"epoch": 2.3948598130841123,
|
|
"grad_norm": 0.41585923241453177,
|
|
"learning_rate": 3.328591979156166e-05,
|
|
"loss": 0.3092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3212294578552246,
|
|
"step": 2050,
|
|
"valid_targets_mean": 5490.7,
|
|
"valid_targets_min": 2263
|
|
},
|
|
{
|
|
"epoch": 2.4007009345794392,
|
|
"grad_norm": 0.40820742505941143,
|
|
"learning_rate": 3.324231293695976e-05,
|
|
"loss": 0.3198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2977217435836792,
|
|
"step": 2055,
|
|
"valid_targets_mean": 5691.6,
|
|
"valid_targets_min": 1436
|
|
},
|
|
{
|
|
"epoch": 2.406542056074766,
|
|
"grad_norm": 0.4115408484003873,
|
|
"learning_rate": 3.3198593698606014e-05,
|
|
"loss": 0.315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28937411308288574,
|
|
"step": 2060,
|
|
"valid_targets_mean": 5735.4,
|
|
"valid_targets_min": 2029
|
|
},
|
|
{
|
|
"epoch": 2.4123831775700935,
|
|
"grad_norm": 0.45025596377179705,
|
|
"learning_rate": 3.3154762447533184e-05,
|
|
"loss": 0.3081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37173905968666077,
|
|
"step": 2065,
|
|
"valid_targets_mean": 6067.2,
|
|
"valid_targets_min": 2504
|
|
},
|
|
{
|
|
"epoch": 2.418224299065421,
|
|
"grad_norm": 0.42925540668842327,
|
|
"learning_rate": 3.311081955572466e-05,
|
|
"loss": 0.3052,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2943716049194336,
|
|
"step": 2070,
|
|
"valid_targets_mean": 5781.2,
|
|
"valid_targets_min": 2828
|
|
},
|
|
{
|
|
"epoch": 2.4240654205607477,
|
|
"grad_norm": 0.43291327703471155,
|
|
"learning_rate": 3.306676539611131e-05,
|
|
"loss": 0.3007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31416118144989014,
|
|
"step": 2075,
|
|
"valid_targets_mean": 5756.1,
|
|
"valid_targets_min": 2497
|
|
},
|
|
{
|
|
"epoch": 2.4299065420560746,
|
|
"grad_norm": 0.4110656731479779,
|
|
"learning_rate": 3.3022600342568264e-05,
|
|
"loss": 0.3086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2756284773349762,
|
|
"step": 2080,
|
|
"valid_targets_mean": 5231.1,
|
|
"valid_targets_min": 2093
|
|
},
|
|
{
|
|
"epoch": 2.435747663551402,
|
|
"grad_norm": 0.4560428876561674,
|
|
"learning_rate": 3.297832476991181e-05,
|
|
"loss": 0.3105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31953519582748413,
|
|
"step": 2085,
|
|
"valid_targets_mean": 5687.0,
|
|
"valid_targets_min": 2755
|
|
},
|
|
{
|
|
"epoch": 2.441588785046729,
|
|
"grad_norm": 0.8852128385650728,
|
|
"learning_rate": 3.2933939053896164e-05,
|
|
"loss": 0.2894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25769922137260437,
|
|
"step": 2090,
|
|
"valid_targets_mean": 5893.7,
|
|
"valid_targets_min": 2295
|
|
},
|
|
{
|
|
"epoch": 2.447429906542056,
|
|
"grad_norm": 0.3971095689806776,
|
|
"learning_rate": 3.2889443571210297e-05,
|
|
"loss": 0.2851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3128497004508972,
|
|
"step": 2095,
|
|
"valid_targets_mean": 5814.8,
|
|
"valid_targets_min": 1894
|
|
},
|
|
{
|
|
"epoch": 2.453271028037383,
|
|
"grad_norm": 0.40476908211653045,
|
|
"learning_rate": 3.284483869947476e-05,
|
|
"loss": 0.2984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3076663315296173,
|
|
"step": 2100,
|
|
"valid_targets_mean": 6258.9,
|
|
"valid_targets_min": 2823
|
|
},
|
|
{
|
|
"epoch": 2.4591121495327104,
|
|
"grad_norm": 0.3965887184313558,
|
|
"learning_rate": 3.280012481723842e-05,
|
|
"loss": 0.2996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2987020015716553,
|
|
"step": 2105,
|
|
"valid_targets_mean": 5638.6,
|
|
"valid_targets_min": 2408
|
|
},
|
|
{
|
|
"epoch": 2.4649532710280373,
|
|
"grad_norm": 0.4713563463636379,
|
|
"learning_rate": 3.275530230397532e-05,
|
|
"loss": 0.3109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3060981035232544,
|
|
"step": 2110,
|
|
"valid_targets_mean": 5757.5,
|
|
"valid_targets_min": 1149
|
|
},
|
|
{
|
|
"epoch": 2.4707943925233646,
|
|
"grad_norm": 0.40652266759020195,
|
|
"learning_rate": 3.27103715400814e-05,
|
|
"loss": 0.3263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32375678420066833,
|
|
"step": 2115,
|
|
"valid_targets_mean": 5691.6,
|
|
"valid_targets_min": 1677
|
|
},
|
|
{
|
|
"epoch": 2.4766355140186915,
|
|
"grad_norm": 0.4267247140325748,
|
|
"learning_rate": 3.2665332906871307e-05,
|
|
"loss": 0.312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.298218697309494,
|
|
"step": 2120,
|
|
"valid_targets_mean": 5089.9,
|
|
"valid_targets_min": 2032
|
|
},
|
|
{
|
|
"epoch": 2.482476635514019,
|
|
"grad_norm": 0.40027617737895355,
|
|
"learning_rate": 3.262018678657515e-05,
|
|
"loss": 0.3156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33351612091064453,
|
|
"step": 2125,
|
|
"valid_targets_mean": 5791.7,
|
|
"valid_targets_min": 1758
|
|
},
|
|
{
|
|
"epoch": 2.4883177570093458,
|
|
"grad_norm": 0.5556790237570676,
|
|
"learning_rate": 3.2574933562335215e-05,
|
|
"loss": 0.3107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26804497838020325,
|
|
"step": 2130,
|
|
"valid_targets_mean": 4788.6,
|
|
"valid_targets_min": 2737
|
|
},
|
|
{
|
|
"epoch": 2.494158878504673,
|
|
"grad_norm": 0.4598709912485897,
|
|
"learning_rate": 3.25295736182028e-05,
|
|
"loss": 0.3018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3119233548641205,
|
|
"step": 2135,
|
|
"valid_targets_mean": 4753.2,
|
|
"valid_targets_min": 1886
|
|
},
|
|
{
|
|
"epoch": 2.5,
|
|
"grad_norm": 0.44752579209390675,
|
|
"learning_rate": 3.2484107339134864e-05,
|
|
"loss": 0.3007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28754010796546936,
|
|
"step": 2140,
|
|
"valid_targets_mean": 5339.3,
|
|
"valid_targets_min": 1925
|
|
},
|
|
{
|
|
"epoch": 2.505841121495327,
|
|
"grad_norm": 0.43312796904146056,
|
|
"learning_rate": 3.2438535110990813e-05,
|
|
"loss": 0.2934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28001078963279724,
|
|
"step": 2145,
|
|
"valid_targets_mean": 5033.2,
|
|
"valid_targets_min": 2682
|
|
},
|
|
{
|
|
"epoch": 2.5116822429906542,
|
|
"grad_norm": 0.40183148749323416,
|
|
"learning_rate": 3.239285732052922e-05,
|
|
"loss": 0.3034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2620558440685272,
|
|
"step": 2150,
|
|
"valid_targets_mean": 5101.5,
|
|
"valid_targets_min": 1369
|
|
},
|
|
{
|
|
"epoch": 2.5175233644859816,
|
|
"grad_norm": 0.3642416670123409,
|
|
"learning_rate": 3.234707435540453e-05,
|
|
"loss": 0.2952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3006448745727539,
|
|
"step": 2155,
|
|
"valid_targets_mean": 7085.8,
|
|
"valid_targets_min": 2547
|
|
},
|
|
{
|
|
"epoch": 2.5233644859813085,
|
|
"grad_norm": 0.4319488883574311,
|
|
"learning_rate": 3.2301186604163776e-05,
|
|
"loss": 0.3182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.319521963596344,
|
|
"step": 2160,
|
|
"valid_targets_mean": 5333.9,
|
|
"valid_targets_min": 1888
|
|
},
|
|
{
|
|
"epoch": 2.5292056074766354,
|
|
"grad_norm": 0.4556387799595045,
|
|
"learning_rate": 3.225519445624327e-05,
|
|
"loss": 0.3089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3256971836090088,
|
|
"step": 2165,
|
|
"valid_targets_mean": 5992.4,
|
|
"valid_targets_min": 2178
|
|
},
|
|
{
|
|
"epoch": 2.5350467289719627,
|
|
"grad_norm": 0.3870941349209684,
|
|
"learning_rate": 3.220909830196533e-05,
|
|
"loss": 0.2896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28848034143447876,
|
|
"step": 2170,
|
|
"valid_targets_mean": 5701.3,
|
|
"valid_targets_min": 2405
|
|
},
|
|
{
|
|
"epoch": 2.5408878504672896,
|
|
"grad_norm": 0.3986798911894801,
|
|
"learning_rate": 3.216289853253494e-05,
|
|
"loss": 0.2957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2743896543979645,
|
|
"step": 2175,
|
|
"valid_targets_mean": 5662.8,
|
|
"valid_targets_min": 1796
|
|
},
|
|
{
|
|
"epoch": 2.546728971962617,
|
|
"grad_norm": 0.4132132089149138,
|
|
"learning_rate": 3.2116595540036416e-05,
|
|
"loss": 0.305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29365861415863037,
|
|
"step": 2180,
|
|
"valid_targets_mean": 5416.1,
|
|
"valid_targets_min": 2584
|
|
},
|
|
{
|
|
"epoch": 2.552570093457944,
|
|
"grad_norm": 0.44370799832377533,
|
|
"learning_rate": 3.207018971743013e-05,
|
|
"loss": 0.3349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30516281723976135,
|
|
"step": 2185,
|
|
"valid_targets_mean": 5229.1,
|
|
"valid_targets_min": 1496
|
|
},
|
|
{
|
|
"epoch": 2.558411214953271,
|
|
"grad_norm": 0.37118639077459037,
|
|
"learning_rate": 3.202368145854912e-05,
|
|
"loss": 0.3071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2941318452358246,
|
|
"step": 2190,
|
|
"valid_targets_mean": 5785.8,
|
|
"valid_targets_min": 1653
|
|
},
|
|
{
|
|
"epoch": 2.564252336448598,
|
|
"grad_norm": 0.43555216904417493,
|
|
"learning_rate": 3.1977071158095775e-05,
|
|
"loss": 0.322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3392646014690399,
|
|
"step": 2195,
|
|
"valid_targets_mean": 5928.2,
|
|
"valid_targets_min": 2577
|
|
},
|
|
{
|
|
"epoch": 2.5700934579439254,
|
|
"grad_norm": 0.40234493856928444,
|
|
"learning_rate": 3.1930359211638496e-05,
|
|
"loss": 0.2934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2882296144962311,
|
|
"step": 2200,
|
|
"valid_targets_mean": 5579.8,
|
|
"valid_targets_min": 2879
|
|
},
|
|
{
|
|
"epoch": 2.5759345794392523,
|
|
"grad_norm": 0.39845315845497875,
|
|
"learning_rate": 3.188354601560833e-05,
|
|
"loss": 0.3123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29944178462028503,
|
|
"step": 2205,
|
|
"valid_targets_mean": 6076.4,
|
|
"valid_targets_min": 2935
|
|
},
|
|
{
|
|
"epoch": 2.581775700934579,
|
|
"grad_norm": 0.42236763253277865,
|
|
"learning_rate": 3.1836631967295564e-05,
|
|
"loss": 0.3135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3272985517978668,
|
|
"step": 2210,
|
|
"valid_targets_mean": 6010.0,
|
|
"valid_targets_min": 2467
|
|
},
|
|
{
|
|
"epoch": 2.5876168224299065,
|
|
"grad_norm": 0.3957502141763996,
|
|
"learning_rate": 3.1789617464846414e-05,
|
|
"loss": 0.3142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32450947165489197,
|
|
"step": 2215,
|
|
"valid_targets_mean": 5652.2,
|
|
"valid_targets_min": 995
|
|
},
|
|
{
|
|
"epoch": 2.593457943925234,
|
|
"grad_norm": 0.47751004827882826,
|
|
"learning_rate": 3.1742502907259634e-05,
|
|
"loss": 0.2938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27879127860069275,
|
|
"step": 2220,
|
|
"valid_targets_mean": 5128.6,
|
|
"valid_targets_min": 1884
|
|
},
|
|
{
|
|
"epoch": 2.5992990654205608,
|
|
"grad_norm": 0.444604434159396,
|
|
"learning_rate": 3.169528869438309e-05,
|
|
"loss": 0.2893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2587922513484955,
|
|
"step": 2225,
|
|
"valid_targets_mean": 5397.6,
|
|
"valid_targets_min": 1416
|
|
},
|
|
{
|
|
"epoch": 2.6051401869158877,
|
|
"grad_norm": 0.4263175602055219,
|
|
"learning_rate": 3.164797522691042e-05,
|
|
"loss": 0.3074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30093303322792053,
|
|
"step": 2230,
|
|
"valid_targets_mean": 5701.8,
|
|
"valid_targets_min": 1372
|
|
},
|
|
{
|
|
"epoch": 2.610981308411215,
|
|
"grad_norm": 0.4200218764911032,
|
|
"learning_rate": 3.1600562906377586e-05,
|
|
"loss": 0.2846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26697325706481934,
|
|
"step": 2235,
|
|
"valid_targets_mean": 4946.8,
|
|
"valid_targets_min": 1874
|
|
},
|
|
{
|
|
"epoch": 2.616822429906542,
|
|
"grad_norm": 0.3884756204969027,
|
|
"learning_rate": 3.155305213515949e-05,
|
|
"loss": 0.2965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2580297887325287,
|
|
"step": 2240,
|
|
"valid_targets_mean": 5740.0,
|
|
"valid_targets_min": 1927
|
|
},
|
|
{
|
|
"epoch": 2.6226635514018692,
|
|
"grad_norm": 0.42868825134220895,
|
|
"learning_rate": 3.150544331646659e-05,
|
|
"loss": 0.3035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32735779881477356,
|
|
"step": 2245,
|
|
"valid_targets_mean": 6044.5,
|
|
"valid_targets_min": 2149
|
|
},
|
|
{
|
|
"epoch": 2.628504672897196,
|
|
"grad_norm": 0.40769112294839843,
|
|
"learning_rate": 3.14577368543414e-05,
|
|
"loss": 0.3149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29262158274650574,
|
|
"step": 2250,
|
|
"valid_targets_mean": 5568.8,
|
|
"valid_targets_min": 2256
|
|
},
|
|
{
|
|
"epoch": 2.6343457943925235,
|
|
"grad_norm": 0.40959706937989504,
|
|
"learning_rate": 3.140993315365513e-05,
|
|
"loss": 0.2947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27782008051872253,
|
|
"step": 2255,
|
|
"valid_targets_mean": 5704.0,
|
|
"valid_targets_min": 2453
|
|
},
|
|
{
|
|
"epoch": 2.6401869158878504,
|
|
"grad_norm": 0.4115092832292235,
|
|
"learning_rate": 3.136203262010422e-05,
|
|
"loss": 0.3033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2591207027435303,
|
|
"step": 2260,
|
|
"valid_targets_mean": 5982.8,
|
|
"valid_targets_min": 2064
|
|
},
|
|
{
|
|
"epoch": 2.6460280373831777,
|
|
"grad_norm": 0.42686578000888376,
|
|
"learning_rate": 3.1314035660206896e-05,
|
|
"loss": 0.3013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32638734579086304,
|
|
"step": 2265,
|
|
"valid_targets_mean": 5439.2,
|
|
"valid_targets_min": 2612
|
|
},
|
|
{
|
|
"epoch": 2.6518691588785046,
|
|
"grad_norm": 0.4093841698991089,
|
|
"learning_rate": 3.1265942681299746e-05,
|
|
"loss": 0.3179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34621158242225647,
|
|
"step": 2270,
|
|
"valid_targets_mean": 6199.3,
|
|
"valid_targets_min": 1622
|
|
},
|
|
{
|
|
"epoch": 2.6577102803738315,
|
|
"grad_norm": 0.4687045137371953,
|
|
"learning_rate": 3.121775409153424e-05,
|
|
"loss": 0.3052,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29042503237724304,
|
|
"step": 2275,
|
|
"valid_targets_mean": 4595.1,
|
|
"valid_targets_min": 1684
|
|
},
|
|
{
|
|
"epoch": 2.663551401869159,
|
|
"grad_norm": 0.45488288770015084,
|
|
"learning_rate": 3.1169470299873256e-05,
|
|
"loss": 0.2927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28985312581062317,
|
|
"step": 2280,
|
|
"valid_targets_mean": 5459.7,
|
|
"valid_targets_min": 1767
|
|
},
|
|
{
|
|
"epoch": 2.669392523364486,
|
|
"grad_norm": 0.43619980701595307,
|
|
"learning_rate": 3.112109171608762e-05,
|
|
"loss": 0.3032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2635429799556732,
|
|
"step": 2285,
|
|
"valid_targets_mean": 4794.6,
|
|
"valid_targets_min": 2070
|
|
},
|
|
{
|
|
"epoch": 2.675233644859813,
|
|
"grad_norm": 0.41860052699694544,
|
|
"learning_rate": 3.1072618750752664e-05,
|
|
"loss": 0.3094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30349448323249817,
|
|
"step": 2290,
|
|
"valid_targets_mean": 6056.5,
|
|
"valid_targets_min": 1922
|
|
},
|
|
{
|
|
"epoch": 2.68107476635514,
|
|
"grad_norm": 0.46796942459922386,
|
|
"learning_rate": 3.102405181524468e-05,
|
|
"loss": 0.282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3332636058330536,
|
|
"step": 2295,
|
|
"valid_targets_mean": 4867.6,
|
|
"valid_targets_min": 1671
|
|
},
|
|
{
|
|
"epoch": 2.6869158878504673,
|
|
"grad_norm": 0.43594060031965326,
|
|
"learning_rate": 3.097539132173746e-05,
|
|
"loss": 0.2897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2886786460876465,
|
|
"step": 2300,
|
|
"valid_targets_mean": 5635.3,
|
|
"valid_targets_min": 1706
|
|
},
|
|
{
|
|
"epoch": 2.6927570093457946,
|
|
"grad_norm": 0.4146212791869328,
|
|
"learning_rate": 3.0926637683198816e-05,
|
|
"loss": 0.3001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31483951210975647,
|
|
"step": 2305,
|
|
"valid_targets_mean": 6523.1,
|
|
"valid_targets_min": 1779
|
|
},
|
|
{
|
|
"epoch": 2.6985981308411215,
|
|
"grad_norm": 0.38274618373598673,
|
|
"learning_rate": 3.0877791313387025e-05,
|
|
"loss": 0.2979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2925668954849243,
|
|
"step": 2310,
|
|
"valid_targets_mean": 6093.2,
|
|
"valid_targets_min": 1658
|
|
},
|
|
{
|
|
"epoch": 2.7044392523364484,
|
|
"grad_norm": 0.4082692622565626,
|
|
"learning_rate": 3.0828852626847384e-05,
|
|
"loss": 0.2858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2669233977794647,
|
|
"step": 2315,
|
|
"valid_targets_mean": 5367.7,
|
|
"valid_targets_min": 2339
|
|
},
|
|
{
|
|
"epoch": 2.710280373831776,
|
|
"grad_norm": 0.424505205978606,
|
|
"learning_rate": 3.077982203890862e-05,
|
|
"loss": 0.3187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.295175164937973,
|
|
"step": 2320,
|
|
"valid_targets_mean": 5184.5,
|
|
"valid_targets_min": 2217
|
|
},
|
|
{
|
|
"epoch": 2.7161214953271027,
|
|
"grad_norm": 0.4276859410434356,
|
|
"learning_rate": 3.0730699965679445e-05,
|
|
"loss": 0.2924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3128901422023773,
|
|
"step": 2325,
|
|
"valid_targets_mean": 5330.0,
|
|
"valid_targets_min": 2169
|
|
},
|
|
{
|
|
"epoch": 2.72196261682243,
|
|
"grad_norm": 0.43755787921213707,
|
|
"learning_rate": 3.068148682404492e-05,
|
|
"loss": 0.2956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25755974650382996,
|
|
"step": 2330,
|
|
"valid_targets_mean": 5374.5,
|
|
"valid_targets_min": 2197
|
|
},
|
|
{
|
|
"epoch": 2.727803738317757,
|
|
"grad_norm": 0.4219341328430994,
|
|
"learning_rate": 3.063218303166304e-05,
|
|
"loss": 0.3008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31067270040512085,
|
|
"step": 2335,
|
|
"valid_targets_mean": 6167.3,
|
|
"valid_targets_min": 2969
|
|
},
|
|
{
|
|
"epoch": 2.7336448598130842,
|
|
"grad_norm": 0.3850314226707135,
|
|
"learning_rate": 3.058278900696108e-05,
|
|
"loss": 0.2988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29373252391815186,
|
|
"step": 2340,
|
|
"valid_targets_mean": 5818.9,
|
|
"valid_targets_min": 1635
|
|
},
|
|
{
|
|
"epoch": 2.739485981308411,
|
|
"grad_norm": 0.5948931686349086,
|
|
"learning_rate": 3.053330516913211e-05,
|
|
"loss": 0.3078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32392436265945435,
|
|
"step": 2345,
|
|
"valid_targets_mean": 5414.1,
|
|
"valid_targets_min": 1451
|
|
},
|
|
{
|
|
"epoch": 2.7453271028037385,
|
|
"grad_norm": 0.3855953249573375,
|
|
"learning_rate": 3.048373193813144e-05,
|
|
"loss": 0.3078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30761486291885376,
|
|
"step": 2350,
|
|
"valid_targets_mean": 5771.8,
|
|
"valid_targets_min": 1851
|
|
},
|
|
{
|
|
"epoch": 2.7511682242990654,
|
|
"grad_norm": 0.41824687288569384,
|
|
"learning_rate": 3.0434069734672996e-05,
|
|
"loss": 0.2934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27998051047325134,
|
|
"step": 2355,
|
|
"valid_targets_mean": 4909.0,
|
|
"valid_targets_min": 1658
|
|
},
|
|
{
|
|
"epoch": 2.7570093457943923,
|
|
"grad_norm": 0.42052656124274196,
|
|
"learning_rate": 3.0384318980225808e-05,
|
|
"loss": 0.2975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2931758165359497,
|
|
"step": 2360,
|
|
"valid_targets_mean": 5022.9,
|
|
"valid_targets_min": 2108
|
|
},
|
|
{
|
|
"epoch": 2.7628504672897196,
|
|
"grad_norm": 0.391365366935654,
|
|
"learning_rate": 3.03344800970104e-05,
|
|
"loss": 0.2905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3144325911998749,
|
|
"step": 2365,
|
|
"valid_targets_mean": 6967.4,
|
|
"valid_targets_min": 1707
|
|
},
|
|
{
|
|
"epoch": 2.768691588785047,
|
|
"grad_norm": 0.4108998839227264,
|
|
"learning_rate": 3.0284553507995253e-05,
|
|
"loss": 0.3187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35110151767730713,
|
|
"step": 2370,
|
|
"valid_targets_mean": 6974.8,
|
|
"valid_targets_min": 1833
|
|
},
|
|
{
|
|
"epoch": 2.774532710280374,
|
|
"grad_norm": 0.39301134904443413,
|
|
"learning_rate": 3.023453963689314e-05,
|
|
"loss": 0.2857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29636508226394653,
|
|
"step": 2375,
|
|
"valid_targets_mean": 5945.7,
|
|
"valid_targets_min": 2073
|
|
},
|
|
{
|
|
"epoch": 2.7803738317757007,
|
|
"grad_norm": 0.38896439411726336,
|
|
"learning_rate": 3.0184438908157605e-05,
|
|
"loss": 0.2995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3098658621311188,
|
|
"step": 2380,
|
|
"valid_targets_mean": 6182.0,
|
|
"valid_targets_min": 2169
|
|
},
|
|
{
|
|
"epoch": 2.786214953271028,
|
|
"grad_norm": 0.3866866935688301,
|
|
"learning_rate": 3.01342517469793e-05,
|
|
"loss": 0.2958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29920798540115356,
|
|
"step": 2385,
|
|
"valid_targets_mean": 6343.6,
|
|
"valid_targets_min": 3001
|
|
},
|
|
{
|
|
"epoch": 2.792056074766355,
|
|
"grad_norm": 0.4385094977681287,
|
|
"learning_rate": 3.008397857928243e-05,
|
|
"loss": 0.2847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31698358058929443,
|
|
"step": 2390,
|
|
"valid_targets_mean": 5532.2,
|
|
"valid_targets_min": 2001
|
|
},
|
|
{
|
|
"epoch": 2.7978971962616823,
|
|
"grad_norm": 0.39998173447929175,
|
|
"learning_rate": 3.0033619831721097e-05,
|
|
"loss": 0.3014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3236994743347168,
|
|
"step": 2395,
|
|
"valid_targets_mean": 5935.3,
|
|
"valid_targets_min": 1540
|
|
},
|
|
{
|
|
"epoch": 2.803738317757009,
|
|
"grad_norm": 0.45316902414505694,
|
|
"learning_rate": 2.9983175931675706e-05,
|
|
"loss": 0.2784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28546059131622314,
|
|
"step": 2400,
|
|
"valid_targets_mean": 5509.2,
|
|
"valid_targets_min": 1921
|
|
},
|
|
{
|
|
"epoch": 2.8095794392523366,
|
|
"grad_norm": 0.4245855072343648,
|
|
"learning_rate": 2.9932647307249313e-05,
|
|
"loss": 0.3128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3079212009906769,
|
|
"step": 2405,
|
|
"valid_targets_mean": 5650.3,
|
|
"valid_targets_min": 1777
|
|
},
|
|
{
|
|
"epoch": 2.8154205607476634,
|
|
"grad_norm": 0.41632456028509257,
|
|
"learning_rate": 2.9882034387264025e-05,
|
|
"loss": 0.2932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3011241853237152,
|
|
"step": 2410,
|
|
"valid_targets_mean": 5383.0,
|
|
"valid_targets_min": 1875
|
|
},
|
|
{
|
|
"epoch": 2.821261682242991,
|
|
"grad_norm": 0.39334670831603835,
|
|
"learning_rate": 2.983133760125732e-05,
|
|
"loss": 0.2815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25486207008361816,
|
|
"step": 2415,
|
|
"valid_targets_mean": 5521.0,
|
|
"valid_targets_min": 2542
|
|
},
|
|
{
|
|
"epoch": 2.8271028037383177,
|
|
"grad_norm": 0.417252044089302,
|
|
"learning_rate": 2.9780557379478438e-05,
|
|
"loss": 0.2968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.294577419757843,
|
|
"step": 2420,
|
|
"valid_targets_mean": 5942.2,
|
|
"valid_targets_min": 2816
|
|
},
|
|
{
|
|
"epoch": 2.832943925233645,
|
|
"grad_norm": 0.38793853767319914,
|
|
"learning_rate": 2.9729694152884715e-05,
|
|
"loss": 0.3151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3186401128768921,
|
|
"step": 2425,
|
|
"valid_targets_mean": 6581.5,
|
|
"valid_targets_min": 1781
|
|
},
|
|
{
|
|
"epoch": 2.838785046728972,
|
|
"grad_norm": 0.38919645196571007,
|
|
"learning_rate": 2.967874835313791e-05,
|
|
"loss": 0.2846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27760031819343567,
|
|
"step": 2430,
|
|
"valid_targets_mean": 5986.4,
|
|
"valid_targets_min": 3236
|
|
},
|
|
{
|
|
"epoch": 2.8446261682242993,
|
|
"grad_norm": 0.40540892435286563,
|
|
"learning_rate": 2.9627720412600573e-05,
|
|
"loss": 0.3089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3234311044216156,
|
|
"step": 2435,
|
|
"valid_targets_mean": 5792.5,
|
|
"valid_targets_min": 2634
|
|
},
|
|
{
|
|
"epoch": 2.850467289719626,
|
|
"grad_norm": 0.3895589519910875,
|
|
"learning_rate": 2.9576610764332348e-05,
|
|
"loss": 0.2823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3025819957256317,
|
|
"step": 2440,
|
|
"valid_targets_mean": 5960.0,
|
|
"valid_targets_min": 1809
|
|
},
|
|
{
|
|
"epoch": 2.856308411214953,
|
|
"grad_norm": 0.4150892350158093,
|
|
"learning_rate": 2.952541984208633e-05,
|
|
"loss": 0.3153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29966676235198975,
|
|
"step": 2445,
|
|
"valid_targets_mean": 5761.8,
|
|
"valid_targets_min": 2712
|
|
},
|
|
{
|
|
"epoch": 2.8621495327102804,
|
|
"grad_norm": 0.4131992293667484,
|
|
"learning_rate": 2.9474148080305335e-05,
|
|
"loss": 0.3123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3662201464176178,
|
|
"step": 2450,
|
|
"valid_targets_mean": 6808.3,
|
|
"valid_targets_min": 1455
|
|
},
|
|
{
|
|
"epoch": 2.8679906542056077,
|
|
"grad_norm": 0.4016518380973992,
|
|
"learning_rate": 2.9422795914118264e-05,
|
|
"loss": 0.3026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30236372351646423,
|
|
"step": 2455,
|
|
"valid_targets_mean": 6181.0,
|
|
"valid_targets_min": 2279
|
|
},
|
|
{
|
|
"epoch": 2.8738317757009346,
|
|
"grad_norm": 0.3950182022481884,
|
|
"learning_rate": 2.9371363779336373e-05,
|
|
"loss": 0.3125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3179415464401245,
|
|
"step": 2460,
|
|
"valid_targets_mean": 5895.0,
|
|
"valid_targets_min": 1397
|
|
},
|
|
{
|
|
"epoch": 2.8796728971962615,
|
|
"grad_norm": 0.3841519830526832,
|
|
"learning_rate": 2.9319852112449596e-05,
|
|
"loss": 0.3001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2723807990550995,
|
|
"step": 2465,
|
|
"valid_targets_mean": 5459.1,
|
|
"valid_targets_min": 1344
|
|
},
|
|
{
|
|
"epoch": 2.885514018691589,
|
|
"grad_norm": 0.42666162837534144,
|
|
"learning_rate": 2.9268261350622823e-05,
|
|
"loss": 0.2995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3082943260669708,
|
|
"step": 2470,
|
|
"valid_targets_mean": 5254.4,
|
|
"valid_targets_min": 2328
|
|
},
|
|
{
|
|
"epoch": 2.8913551401869158,
|
|
"grad_norm": 0.4404244049322486,
|
|
"learning_rate": 2.9216591931692223e-05,
|
|
"loss": 0.2966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28562456369400024,
|
|
"step": 2475,
|
|
"valid_targets_mean": 5578.5,
|
|
"valid_targets_min": 2595
|
|
},
|
|
{
|
|
"epoch": 2.897196261682243,
|
|
"grad_norm": 0.398151141070087,
|
|
"learning_rate": 2.9164844294161477e-05,
|
|
"loss": 0.3056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2878023684024811,
|
|
"step": 2480,
|
|
"valid_targets_mean": 5964.7,
|
|
"valid_targets_min": 2968
|
|
},
|
|
{
|
|
"epoch": 2.90303738317757,
|
|
"grad_norm": 0.42676334590606824,
|
|
"learning_rate": 2.9113018877198115e-05,
|
|
"loss": 0.3038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31750181317329407,
|
|
"step": 2485,
|
|
"valid_targets_mean": 5199.8,
|
|
"valid_targets_min": 1986
|
|
},
|
|
{
|
|
"epoch": 2.9088785046728973,
|
|
"grad_norm": 0.500601862336987,
|
|
"learning_rate": 2.9061116120629732e-05,
|
|
"loss": 0.2992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2680962383747101,
|
|
"step": 2490,
|
|
"valid_targets_mean": 4563.4,
|
|
"valid_targets_min": 1751
|
|
},
|
|
{
|
|
"epoch": 2.914719626168224,
|
|
"grad_norm": 0.47168686314112623,
|
|
"learning_rate": 2.900913646494029e-05,
|
|
"loss": 0.3073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28487786650657654,
|
|
"step": 2495,
|
|
"valid_targets_mean": 4612.8,
|
|
"valid_targets_min": 2005
|
|
},
|
|
{
|
|
"epoch": 2.9205607476635516,
|
|
"grad_norm": 0.3994877280494155,
|
|
"learning_rate": 2.8957080351266386e-05,
|
|
"loss": 0.306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3075542747974396,
|
|
"step": 2500,
|
|
"valid_targets_mean": 5794.8,
|
|
"valid_targets_min": 1665
|
|
},
|
|
{
|
|
"epoch": 2.9264018691588785,
|
|
"grad_norm": 0.4323237129360249,
|
|
"learning_rate": 2.8904948221393477e-05,
|
|
"loss": 0.3026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3322840929031372,
|
|
"step": 2505,
|
|
"valid_targets_mean": 5915.1,
|
|
"valid_targets_min": 2756
|
|
},
|
|
{
|
|
"epoch": 2.9322429906542054,
|
|
"grad_norm": 0.4379007008645764,
|
|
"learning_rate": 2.8852740517752157e-05,
|
|
"loss": 0.3026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3063560426235199,
|
|
"step": 2510,
|
|
"valid_targets_mean": 6204.1,
|
|
"valid_targets_min": 2432
|
|
},
|
|
{
|
|
"epoch": 2.9380841121495327,
|
|
"grad_norm": 0.4378285269250111,
|
|
"learning_rate": 2.8800457683414387e-05,
|
|
"loss": 0.3062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30580148100852966,
|
|
"step": 2515,
|
|
"valid_targets_mean": 5459.3,
|
|
"valid_targets_min": 2119
|
|
},
|
|
{
|
|
"epoch": 2.94392523364486,
|
|
"grad_norm": 0.4276271609166218,
|
|
"learning_rate": 2.8748100162089753e-05,
|
|
"loss": 0.3075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3241829574108124,
|
|
"step": 2520,
|
|
"valid_targets_mean": 5585.4,
|
|
"valid_targets_min": 2258
|
|
},
|
|
{
|
|
"epoch": 2.949766355140187,
|
|
"grad_norm": 0.42024013548756195,
|
|
"learning_rate": 2.869566839812167e-05,
|
|
"loss": 0.2948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2671094536781311,
|
|
"step": 2525,
|
|
"valid_targets_mean": 5605.5,
|
|
"valid_targets_min": 2339
|
|
},
|
|
{
|
|
"epoch": 2.955607476635514,
|
|
"grad_norm": 0.40387718439848425,
|
|
"learning_rate": 2.864316283648365e-05,
|
|
"loss": 0.2906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3035791218280792,
|
|
"step": 2530,
|
|
"valid_targets_mean": 5731.5,
|
|
"valid_targets_min": 2651
|
|
},
|
|
{
|
|
"epoch": 2.961448598130841,
|
|
"grad_norm": 0.3928664700050598,
|
|
"learning_rate": 2.8590583922775484e-05,
|
|
"loss": 0.2957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2930658459663391,
|
|
"step": 2535,
|
|
"valid_targets_mean": 6278.0,
|
|
"valid_targets_min": 2723
|
|
},
|
|
{
|
|
"epoch": 2.9672897196261685,
|
|
"grad_norm": 0.4680484377037859,
|
|
"learning_rate": 2.85379321032195e-05,
|
|
"loss": 0.2898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2976778447628021,
|
|
"step": 2540,
|
|
"valid_targets_mean": 5083.4,
|
|
"valid_targets_min": 2063
|
|
},
|
|
{
|
|
"epoch": 2.9731308411214954,
|
|
"grad_norm": 0.407573051086408,
|
|
"learning_rate": 2.8485207824656734e-05,
|
|
"loss": 0.3039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2978443503379822,
|
|
"step": 2545,
|
|
"valid_targets_mean": 5559.9,
|
|
"valid_targets_min": 2083
|
|
},
|
|
{
|
|
"epoch": 2.9789719626168223,
|
|
"grad_norm": 0.3985019477187551,
|
|
"learning_rate": 2.8432411534543193e-05,
|
|
"loss": 0.3004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28615304827690125,
|
|
"step": 2550,
|
|
"valid_targets_mean": 5777.2,
|
|
"valid_targets_min": 2343
|
|
},
|
|
{
|
|
"epoch": 2.9848130841121496,
|
|
"grad_norm": 0.44027586267092284,
|
|
"learning_rate": 2.8379543680945994e-05,
|
|
"loss": 0.3003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2871052920818329,
|
|
"step": 2555,
|
|
"valid_targets_mean": 5053.7,
|
|
"valid_targets_min": 1421
|
|
},
|
|
{
|
|
"epoch": 2.9906542056074765,
|
|
"grad_norm": 0.4094838222093643,
|
|
"learning_rate": 2.832660471253962e-05,
|
|
"loss": 0.3034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3204616606235504,
|
|
"step": 2560,
|
|
"valid_targets_mean": 5860.7,
|
|
"valid_targets_min": 1508
|
|
},
|
|
{
|
|
"epoch": 2.996495327102804,
|
|
"grad_norm": 0.4523774037682343,
|
|
"learning_rate": 2.8273595078602065e-05,
|
|
"loss": 0.2996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3175162374973297,
|
|
"step": 2565,
|
|
"valid_targets_mean": 5465.5,
|
|
"valid_targets_min": 1889
|
|
},
|
|
{
|
|
"epoch": 3.0023364485981308,
|
|
"grad_norm": 0.4301848753187624,
|
|
"learning_rate": 2.822051522901104e-05,
|
|
"loss": 0.2881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2638271152973175,
|
|
"step": 2570,
|
|
"valid_targets_mean": 5376.0,
|
|
"valid_targets_min": 2174
|
|
},
|
|
{
|
|
"epoch": 3.008177570093458,
|
|
"grad_norm": 0.4821307121257306,
|
|
"learning_rate": 2.8167365614240184e-05,
|
|
"loss": 0.3024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3373398780822754,
|
|
"step": 2575,
|
|
"valid_targets_mean": 4736.7,
|
|
"valid_targets_min": 1129
|
|
},
|
|
{
|
|
"epoch": 3.014018691588785,
|
|
"grad_norm": 0.41056905524078674,
|
|
"learning_rate": 2.811414668535518e-05,
|
|
"loss": 0.289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31374630331993103,
|
|
"step": 2580,
|
|
"valid_targets_mean": 6453.4,
|
|
"valid_targets_min": 3071
|
|
},
|
|
{
|
|
"epoch": 3.0198598130841123,
|
|
"grad_norm": 0.48429128189208803,
|
|
"learning_rate": 2.8060858894009983e-05,
|
|
"loss": 0.2995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2930625081062317,
|
|
"step": 2585,
|
|
"valid_targets_mean": 5356.6,
|
|
"valid_targets_min": 1404
|
|
},
|
|
{
|
|
"epoch": 3.0257009345794392,
|
|
"grad_norm": 0.4051764135748791,
|
|
"learning_rate": 2.8007502692442944e-05,
|
|
"loss": 0.301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31336429715156555,
|
|
"step": 2590,
|
|
"valid_targets_mean": 6048.8,
|
|
"valid_targets_min": 1776
|
|
},
|
|
{
|
|
"epoch": 3.0315420560747666,
|
|
"grad_norm": 0.38852244246155476,
|
|
"learning_rate": 2.7954078533473016e-05,
|
|
"loss": 0.3202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2958604693412781,
|
|
"step": 2595,
|
|
"valid_targets_mean": 5664.9,
|
|
"valid_targets_min": 1531
|
|
},
|
|
{
|
|
"epoch": 3.0373831775700935,
|
|
"grad_norm": 0.4079234505011854,
|
|
"learning_rate": 2.7900586870495865e-05,
|
|
"loss": 0.2702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28028610348701477,
|
|
"step": 2600,
|
|
"valid_targets_mean": 5273.4,
|
|
"valid_targets_min": 2279
|
|
},
|
|
{
|
|
"epoch": 3.0432242990654204,
|
|
"grad_norm": 0.4252969242410287,
|
|
"learning_rate": 2.7847028157480063e-05,
|
|
"loss": 0.3041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32855701446533203,
|
|
"step": 2605,
|
|
"valid_targets_mean": 5501.9,
|
|
"valid_targets_min": 1230
|
|
},
|
|
{
|
|
"epoch": 3.0490654205607477,
|
|
"grad_norm": 0.5733385455157712,
|
|
"learning_rate": 2.7793402848963206e-05,
|
|
"loss": 0.291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27661633491516113,
|
|
"step": 2610,
|
|
"valid_targets_mean": 4929.2,
|
|
"valid_targets_min": 2849
|
|
},
|
|
{
|
|
"epoch": 3.0549065420560746,
|
|
"grad_norm": 0.45414390475504174,
|
|
"learning_rate": 2.7739711400048062e-05,
|
|
"loss": 0.2864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3208957612514496,
|
|
"step": 2615,
|
|
"valid_targets_mean": 4579.8,
|
|
"valid_targets_min": 2132
|
|
},
|
|
{
|
|
"epoch": 3.060747663551402,
|
|
"grad_norm": 0.4234160890588865,
|
|
"learning_rate": 2.7685954266398726e-05,
|
|
"loss": 0.2953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2573545277118683,
|
|
"step": 2620,
|
|
"valid_targets_mean": 5702.2,
|
|
"valid_targets_min": 1953
|
|
},
|
|
{
|
|
"epoch": 3.066588785046729,
|
|
"grad_norm": 0.40406109963347575,
|
|
"learning_rate": 2.7632131904236735e-05,
|
|
"loss": 0.3108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32955726981163025,
|
|
"step": 2625,
|
|
"valid_targets_mean": 5898.6,
|
|
"valid_targets_min": 2047
|
|
},
|
|
{
|
|
"epoch": 3.072429906542056,
|
|
"grad_norm": 0.3909313040362952,
|
|
"learning_rate": 2.7578244770337207e-05,
|
|
"loss": 0.295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3030218780040741,
|
|
"step": 2630,
|
|
"valid_targets_mean": 6410.5,
|
|
"valid_targets_min": 2277
|
|
},
|
|
{
|
|
"epoch": 3.078271028037383,
|
|
"grad_norm": 0.40847359144194995,
|
|
"learning_rate": 2.7524293322024943e-05,
|
|
"loss": 0.3024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29161033034324646,
|
|
"step": 2635,
|
|
"valid_targets_mean": 5736.8,
|
|
"valid_targets_min": 2182
|
|
},
|
|
{
|
|
"epoch": 3.0841121495327104,
|
|
"grad_norm": 0.41351072928493554,
|
|
"learning_rate": 2.7470278017170588e-05,
|
|
"loss": 0.2838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2842245101928711,
|
|
"step": 2640,
|
|
"valid_targets_mean": 5075.8,
|
|
"valid_targets_min": 2043
|
|
},
|
|
{
|
|
"epoch": 3.0899532710280373,
|
|
"grad_norm": 0.4508632913859823,
|
|
"learning_rate": 2.7416199314186696e-05,
|
|
"loss": 0.3023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27859821915626526,
|
|
"step": 2645,
|
|
"valid_targets_mean": 4748.4,
|
|
"valid_targets_min": 1821
|
|
},
|
|
{
|
|
"epoch": 3.0957943925233646,
|
|
"grad_norm": 0.4062021241890471,
|
|
"learning_rate": 2.736205767202387e-05,
|
|
"loss": 0.3029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30542391538619995,
|
|
"step": 2650,
|
|
"valid_targets_mean": 5953.5,
|
|
"valid_targets_min": 3133
|
|
},
|
|
{
|
|
"epoch": 3.1016355140186915,
|
|
"grad_norm": 0.6072491645119501,
|
|
"learning_rate": 2.7307853550166874e-05,
|
|
"loss": 0.2877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2810097634792328,
|
|
"step": 2655,
|
|
"valid_targets_mean": 5598.1,
|
|
"valid_targets_min": 1871
|
|
},
|
|
{
|
|
"epoch": 3.107476635514019,
|
|
"grad_norm": 0.37453590360891054,
|
|
"learning_rate": 2.7253587408630697e-05,
|
|
"loss": 0.2924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3081425130367279,
|
|
"step": 2660,
|
|
"valid_targets_mean": 6146.9,
|
|
"valid_targets_min": 1566
|
|
},
|
|
{
|
|
"epoch": 3.1133177570093458,
|
|
"grad_norm": 0.37875773629812454,
|
|
"learning_rate": 2.7199259707956683e-05,
|
|
"loss": 0.3053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2963075637817383,
|
|
"step": 2665,
|
|
"valid_targets_mean": 6255.2,
|
|
"valid_targets_min": 2684
|
|
},
|
|
{
|
|
"epoch": 3.119158878504673,
|
|
"grad_norm": 0.4228590075704972,
|
|
"learning_rate": 2.7144870909208614e-05,
|
|
"loss": 0.2817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3038904368877411,
|
|
"step": 2670,
|
|
"valid_targets_mean": 5233.6,
|
|
"valid_targets_min": 2330
|
|
},
|
|
{
|
|
"epoch": 3.125,
|
|
"grad_norm": 0.42019838820236616,
|
|
"learning_rate": 2.7090421473968786e-05,
|
|
"loss": 0.2926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32292571663856506,
|
|
"step": 2675,
|
|
"valid_targets_mean": 6352.8,
|
|
"valid_targets_min": 2842
|
|
},
|
|
{
|
|
"epoch": 3.130841121495327,
|
|
"grad_norm": 0.4425194384903545,
|
|
"learning_rate": 2.7035911864334113e-05,
|
|
"loss": 0.291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3058614730834961,
|
|
"step": 2680,
|
|
"valid_targets_mean": 5878.6,
|
|
"valid_targets_min": 2376
|
|
},
|
|
{
|
|
"epoch": 3.1366822429906542,
|
|
"grad_norm": 0.49323516678365686,
|
|
"learning_rate": 2.6981342542912168e-05,
|
|
"loss": 0.3216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3545699417591095,
|
|
"step": 2685,
|
|
"valid_targets_mean": 5543.0,
|
|
"valid_targets_min": 1927
|
|
},
|
|
{
|
|
"epoch": 3.142523364485981,
|
|
"grad_norm": 0.39814057316424606,
|
|
"learning_rate": 2.6926713972817306e-05,
|
|
"loss": 0.2975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28222233057022095,
|
|
"step": 2690,
|
|
"valid_targets_mean": 6342.9,
|
|
"valid_targets_min": 2422
|
|
},
|
|
{
|
|
"epoch": 3.1483644859813085,
|
|
"grad_norm": 0.46539178311341084,
|
|
"learning_rate": 2.68720266176667e-05,
|
|
"loss": 0.3099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3224466145038605,
|
|
"step": 2695,
|
|
"valid_targets_mean": 5362.3,
|
|
"valid_targets_min": 2133
|
|
},
|
|
{
|
|
"epoch": 3.1542056074766354,
|
|
"grad_norm": 0.4829181991453719,
|
|
"learning_rate": 2.6817280941576403e-05,
|
|
"loss": 0.309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3304422199726105,
|
|
"step": 2700,
|
|
"valid_targets_mean": 4640.2,
|
|
"valid_targets_min": 1879
|
|
},
|
|
{
|
|
"epoch": 3.1600467289719627,
|
|
"grad_norm": 0.38946803224715565,
|
|
"learning_rate": 2.6762477409157436e-05,
|
|
"loss": 0.2991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25945451855659485,
|
|
"step": 2705,
|
|
"valid_targets_mean": 5402.2,
|
|
"valid_targets_min": 1945
|
|
},
|
|
{
|
|
"epoch": 3.1658878504672896,
|
|
"grad_norm": 0.40309250365402305,
|
|
"learning_rate": 2.6707616485511823e-05,
|
|
"loss": 0.2855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2987315356731415,
|
|
"step": 2710,
|
|
"valid_targets_mean": 5825.2,
|
|
"valid_targets_min": 1938
|
|
},
|
|
{
|
|
"epoch": 3.171728971962617,
|
|
"grad_norm": 0.41228183089568615,
|
|
"learning_rate": 2.665269863622867e-05,
|
|
"loss": 0.2959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33366140723228455,
|
|
"step": 2715,
|
|
"valid_targets_mean": 6381.8,
|
|
"valid_targets_min": 2829
|
|
},
|
|
{
|
|
"epoch": 3.177570093457944,
|
|
"grad_norm": 0.40354990940248436,
|
|
"learning_rate": 2.6597724327380155e-05,
|
|
"loss": 0.2884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29080894589424133,
|
|
"step": 2720,
|
|
"valid_targets_mean": 5897.1,
|
|
"valid_targets_min": 1886
|
|
},
|
|
{
|
|
"epoch": 3.183411214953271,
|
|
"grad_norm": 0.38727982584654597,
|
|
"learning_rate": 2.654269402551766e-05,
|
|
"loss": 0.2953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27056458592414856,
|
|
"step": 2725,
|
|
"valid_targets_mean": 5880.4,
|
|
"valid_targets_min": 1759
|
|
},
|
|
{
|
|
"epoch": 3.189252336448598,
|
|
"grad_norm": 0.41651806797309204,
|
|
"learning_rate": 2.6487608197667717e-05,
|
|
"loss": 0.288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2789590358734131,
|
|
"step": 2730,
|
|
"valid_targets_mean": 5224.4,
|
|
"valid_targets_min": 1917
|
|
},
|
|
{
|
|
"epoch": 3.1950934579439254,
|
|
"grad_norm": 0.39138005858329544,
|
|
"learning_rate": 2.6432467311328133e-05,
|
|
"loss": 0.293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27812659740448,
|
|
"step": 2735,
|
|
"valid_targets_mean": 5978.3,
|
|
"valid_targets_min": 1914
|
|
},
|
|
{
|
|
"epoch": 3.2009345794392523,
|
|
"grad_norm": 0.45311640767854044,
|
|
"learning_rate": 2.6377271834463944e-05,
|
|
"loss": 0.2926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27436068654060364,
|
|
"step": 2740,
|
|
"valid_targets_mean": 4702.4,
|
|
"valid_targets_min": 1141
|
|
},
|
|
{
|
|
"epoch": 3.2067757009345796,
|
|
"grad_norm": 0.3829484195369102,
|
|
"learning_rate": 2.632202223550351e-05,
|
|
"loss": 0.2876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24764461815357208,
|
|
"step": 2745,
|
|
"valid_targets_mean": 5754.2,
|
|
"valid_targets_min": 2515
|
|
},
|
|
{
|
|
"epoch": 3.2126168224299065,
|
|
"grad_norm": 0.43122277561457023,
|
|
"learning_rate": 2.6266718983334504e-05,
|
|
"loss": 0.3041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3068701922893524,
|
|
"step": 2750,
|
|
"valid_targets_mean": 5600.0,
|
|
"valid_targets_min": 2108
|
|
},
|
|
{
|
|
"epoch": 3.218457943925234,
|
|
"grad_norm": 0.4507423250422832,
|
|
"learning_rate": 2.6211362547299918e-05,
|
|
"loss": 0.3049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2750387191772461,
|
|
"step": 2755,
|
|
"valid_targets_mean": 5278.5,
|
|
"valid_targets_min": 2123
|
|
},
|
|
{
|
|
"epoch": 3.2242990654205608,
|
|
"grad_norm": 0.41600908119094315,
|
|
"learning_rate": 2.6155953397194128e-05,
|
|
"loss": 0.3102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3210826516151428,
|
|
"step": 2760,
|
|
"valid_targets_mean": 5975.2,
|
|
"valid_targets_min": 2498
|
|
},
|
|
{
|
|
"epoch": 3.2301401869158877,
|
|
"grad_norm": 0.4036381634460004,
|
|
"learning_rate": 2.610049200325885e-05,
|
|
"loss": 0.2963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32514798641204834,
|
|
"step": 2765,
|
|
"valid_targets_mean": 6432.9,
|
|
"valid_targets_min": 3357
|
|
},
|
|
{
|
|
"epoch": 3.235981308411215,
|
|
"grad_norm": 0.385370816678723,
|
|
"learning_rate": 2.604497883617921e-05,
|
|
"loss": 0.3299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30732622742652893,
|
|
"step": 2770,
|
|
"valid_targets_mean": 6445.8,
|
|
"valid_targets_min": 2400
|
|
},
|
|
{
|
|
"epoch": 3.241822429906542,
|
|
"grad_norm": 0.39253575263940427,
|
|
"learning_rate": 2.5989414367079695e-05,
|
|
"loss": 0.2964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33682724833488464,
|
|
"step": 2775,
|
|
"valid_targets_mean": 6262.7,
|
|
"valid_targets_min": 1766
|
|
},
|
|
{
|
|
"epoch": 3.2476635514018692,
|
|
"grad_norm": 0.40368122584724336,
|
|
"learning_rate": 2.593379906752019e-05,
|
|
"loss": 0.2908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2637290358543396,
|
|
"step": 2780,
|
|
"valid_targets_mean": 5206.8,
|
|
"valid_targets_min": 2567
|
|
},
|
|
{
|
|
"epoch": 3.253504672897196,
|
|
"grad_norm": 0.4430820347656125,
|
|
"learning_rate": 2.587813340949196e-05,
|
|
"loss": 0.3016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2873803377151489,
|
|
"step": 2785,
|
|
"valid_targets_mean": 4972.0,
|
|
"valid_targets_min": 2077
|
|
},
|
|
{
|
|
"epoch": 3.2593457943925235,
|
|
"grad_norm": 0.41053965787771834,
|
|
"learning_rate": 2.5822417865413642e-05,
|
|
"loss": 0.3038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3109011650085449,
|
|
"step": 2790,
|
|
"valid_targets_mean": 5957.8,
|
|
"valid_targets_min": 2559
|
|
},
|
|
{
|
|
"epoch": 3.2651869158878504,
|
|
"grad_norm": 0.4563213164775311,
|
|
"learning_rate": 2.5766652908127246e-05,
|
|
"loss": 0.3014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3083871304988861,
|
|
"step": 2795,
|
|
"valid_targets_mean": 5437.2,
|
|
"valid_targets_min": 1239
|
|
},
|
|
{
|
|
"epoch": 3.2710280373831777,
|
|
"grad_norm": 0.4064202583171943,
|
|
"learning_rate": 2.5710839010894146e-05,
|
|
"loss": 0.302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2930283546447754,
|
|
"step": 2800,
|
|
"valid_targets_mean": 5542.5,
|
|
"valid_targets_min": 2474
|
|
},
|
|
{
|
|
"epoch": 3.2768691588785046,
|
|
"grad_norm": 0.39823449179750925,
|
|
"learning_rate": 2.5654976647391044e-05,
|
|
"loss": 0.298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29686835408210754,
|
|
"step": 2805,
|
|
"valid_targets_mean": 6508.8,
|
|
"valid_targets_min": 3790
|
|
},
|
|
{
|
|
"epoch": 3.282710280373832,
|
|
"grad_norm": 0.40654121864952214,
|
|
"learning_rate": 2.5599066291705964e-05,
|
|
"loss": 0.2819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.307128369808197,
|
|
"step": 2810,
|
|
"valid_targets_mean": 6496.3,
|
|
"valid_targets_min": 2295
|
|
},
|
|
{
|
|
"epoch": 3.288551401869159,
|
|
"grad_norm": 0.43302564601203025,
|
|
"learning_rate": 2.5543108418334233e-05,
|
|
"loss": 0.2972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30620256066322327,
|
|
"step": 2815,
|
|
"valid_targets_mean": 5950.8,
|
|
"valid_targets_min": 2057
|
|
},
|
|
{
|
|
"epoch": 3.294392523364486,
|
|
"grad_norm": 0.40609218372201134,
|
|
"learning_rate": 2.5487103502174444e-05,
|
|
"loss": 0.2762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2590007483959198,
|
|
"step": 2820,
|
|
"valid_targets_mean": 5593.8,
|
|
"valid_targets_min": 1394
|
|
},
|
|
{
|
|
"epoch": 3.300233644859813,
|
|
"grad_norm": 0.42870937173002693,
|
|
"learning_rate": 2.5431052018524417e-05,
|
|
"loss": 0.2856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2798367440700531,
|
|
"step": 2825,
|
|
"valid_targets_mean": 4891.1,
|
|
"valid_targets_min": 3149
|
|
},
|
|
{
|
|
"epoch": 3.30607476635514,
|
|
"grad_norm": 0.40252621677306183,
|
|
"learning_rate": 2.5374954443077196e-05,
|
|
"loss": 0.2965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32747986912727356,
|
|
"step": 2830,
|
|
"valid_targets_mean": 5430.8,
|
|
"valid_targets_min": 2436
|
|
},
|
|
{
|
|
"epoch": 3.3119158878504673,
|
|
"grad_norm": 0.4204145329803901,
|
|
"learning_rate": 2.5318811251916976e-05,
|
|
"loss": 0.3005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2893204689025879,
|
|
"step": 2835,
|
|
"valid_targets_mean": 5146.1,
|
|
"valid_targets_min": 2275
|
|
},
|
|
{
|
|
"epoch": 3.317757009345794,
|
|
"grad_norm": 0.42184586399846113,
|
|
"learning_rate": 2.526262292151509e-05,
|
|
"loss": 0.2842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2970890700817108,
|
|
"step": 2840,
|
|
"valid_targets_mean": 5416.8,
|
|
"valid_targets_min": 1652
|
|
},
|
|
{
|
|
"epoch": 3.3235981308411215,
|
|
"grad_norm": 0.48355273312435465,
|
|
"learning_rate": 2.5206389928725958e-05,
|
|
"loss": 0.2852,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2701546251773834,
|
|
"step": 2845,
|
|
"valid_targets_mean": 4583.7,
|
|
"valid_targets_min": 2359
|
|
},
|
|
{
|
|
"epoch": 3.3294392523364484,
|
|
"grad_norm": 0.41594620466623644,
|
|
"learning_rate": 2.5150112750783026e-05,
|
|
"loss": 0.2951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3118717670440674,
|
|
"step": 2850,
|
|
"valid_targets_mean": 5841.0,
|
|
"valid_targets_min": 3415
|
|
},
|
|
{
|
|
"epoch": 3.335280373831776,
|
|
"grad_norm": 0.444788967917164,
|
|
"learning_rate": 2.509379186529473e-05,
|
|
"loss": 0.2919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28450438380241394,
|
|
"step": 2855,
|
|
"valid_targets_mean": 5576.6,
|
|
"valid_targets_min": 1851
|
|
},
|
|
{
|
|
"epoch": 3.3411214953271027,
|
|
"grad_norm": 0.42489466755813177,
|
|
"learning_rate": 2.5037427750240445e-05,
|
|
"loss": 0.2768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2713904082775116,
|
|
"step": 2860,
|
|
"valid_targets_mean": 5176.0,
|
|
"valid_targets_min": 2084
|
|
},
|
|
{
|
|
"epoch": 3.34696261682243,
|
|
"grad_norm": 0.38071733604500085,
|
|
"learning_rate": 2.4981020883966425e-05,
|
|
"loss": 0.307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2581561505794525,
|
|
"step": 2865,
|
|
"valid_targets_mean": 5820.6,
|
|
"valid_targets_min": 2109
|
|
},
|
|
{
|
|
"epoch": 3.352803738317757,
|
|
"grad_norm": 0.399513309245566,
|
|
"learning_rate": 2.4924571745181724e-05,
|
|
"loss": 0.2756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26574933528900146,
|
|
"step": 2870,
|
|
"valid_targets_mean": 5735.5,
|
|
"valid_targets_min": 2678
|
|
},
|
|
{
|
|
"epoch": 3.3586448598130842,
|
|
"grad_norm": 0.402622894928941,
|
|
"learning_rate": 2.4868080812954185e-05,
|
|
"loss": 0.2966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2706460654735565,
|
|
"step": 2875,
|
|
"valid_targets_mean": 5029.0,
|
|
"valid_targets_min": 1339
|
|
},
|
|
{
|
|
"epoch": 3.364485981308411,
|
|
"grad_norm": 0.4033228593360558,
|
|
"learning_rate": 2.4811548566706288e-05,
|
|
"loss": 0.2824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.289938747882843,
|
|
"step": 2880,
|
|
"valid_targets_mean": 5287.6,
|
|
"valid_targets_min": 1745
|
|
},
|
|
{
|
|
"epoch": 3.3703271028037385,
|
|
"grad_norm": 0.42193021880262604,
|
|
"learning_rate": 2.4754975486211187e-05,
|
|
"loss": 0.2874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2947331368923187,
|
|
"step": 2885,
|
|
"valid_targets_mean": 5212.9,
|
|
"valid_targets_min": 1252
|
|
},
|
|
{
|
|
"epoch": 3.3761682242990654,
|
|
"grad_norm": 0.41571423031170984,
|
|
"learning_rate": 2.4698362051588555e-05,
|
|
"loss": 0.2753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2950482964515686,
|
|
"step": 2890,
|
|
"valid_targets_mean": 5511.8,
|
|
"valid_targets_min": 2646
|
|
},
|
|
{
|
|
"epoch": 3.3820093457943923,
|
|
"grad_norm": 0.45365923224879867,
|
|
"learning_rate": 2.4641708743300536e-05,
|
|
"loss": 0.2958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3130311071872711,
|
|
"step": 2895,
|
|
"valid_targets_mean": 6427.8,
|
|
"valid_targets_min": 3101
|
|
},
|
|
{
|
|
"epoch": 3.3878504672897196,
|
|
"grad_norm": 0.4199470808735991,
|
|
"learning_rate": 2.458501604214769e-05,
|
|
"loss": 0.3002,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3063619136810303,
|
|
"step": 2900,
|
|
"valid_targets_mean": 6109.6,
|
|
"valid_targets_min": 1936
|
|
},
|
|
{
|
|
"epoch": 3.393691588785047,
|
|
"grad_norm": 0.4341812789321833,
|
|
"learning_rate": 2.4528284429264883e-05,
|
|
"loss": 0.2912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2797603905200958,
|
|
"step": 2905,
|
|
"valid_targets_mean": 5854.7,
|
|
"valid_targets_min": 2468
|
|
},
|
|
{
|
|
"epoch": 3.399532710280374,
|
|
"grad_norm": 0.41188931120840044,
|
|
"learning_rate": 2.4471514386117214e-05,
|
|
"loss": 0.3056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2890479862689972,
|
|
"step": 2910,
|
|
"valid_targets_mean": 5888.2,
|
|
"valid_targets_min": 2914
|
|
},
|
|
{
|
|
"epoch": 3.4053738317757007,
|
|
"grad_norm": 0.41227912114811227,
|
|
"learning_rate": 2.4414706394495914e-05,
|
|
"loss": 0.2989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2869231700897217,
|
|
"step": 2915,
|
|
"valid_targets_mean": 5944.6,
|
|
"valid_targets_min": 2834
|
|
},
|
|
{
|
|
"epoch": 3.411214953271028,
|
|
"grad_norm": 0.39072787847409035,
|
|
"learning_rate": 2.4357860936514307e-05,
|
|
"loss": 0.2739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2844496965408325,
|
|
"step": 2920,
|
|
"valid_targets_mean": 5993.4,
|
|
"valid_targets_min": 2040
|
|
},
|
|
{
|
|
"epoch": 3.417056074766355,
|
|
"grad_norm": 0.39501822174112977,
|
|
"learning_rate": 2.4300978494603652e-05,
|
|
"loss": 0.3026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.267410546541214,
|
|
"step": 2925,
|
|
"valid_targets_mean": 5802.3,
|
|
"valid_targets_min": 2629
|
|
},
|
|
{
|
|
"epoch": 3.4228971962616823,
|
|
"grad_norm": 0.43403654354105364,
|
|
"learning_rate": 2.424405955150909e-05,
|
|
"loss": 0.2782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2899346649646759,
|
|
"step": 2930,
|
|
"valid_targets_mean": 5880.0,
|
|
"valid_targets_min": 1793
|
|
},
|
|
{
|
|
"epoch": 3.428738317757009,
|
|
"grad_norm": 0.383225511935292,
|
|
"learning_rate": 2.418710459028554e-05,
|
|
"loss": 0.2994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27475807070732117,
|
|
"step": 2935,
|
|
"valid_targets_mean": 5982.9,
|
|
"valid_targets_min": 2584
|
|
},
|
|
{
|
|
"epoch": 3.4345794392523366,
|
|
"grad_norm": 0.3997732570779305,
|
|
"learning_rate": 2.4130114094293594e-05,
|
|
"loss": 0.2836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3027922809123993,
|
|
"step": 2940,
|
|
"valid_targets_mean": 5528.2,
|
|
"valid_targets_min": 2208
|
|
},
|
|
{
|
|
"epoch": 3.4404205607476634,
|
|
"grad_norm": 0.4036880562894883,
|
|
"learning_rate": 2.4073088547195416e-05,
|
|
"loss": 0.2827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.256763219833374,
|
|
"step": 2945,
|
|
"valid_targets_mean": 5622.1,
|
|
"valid_targets_min": 2134
|
|
},
|
|
{
|
|
"epoch": 3.446261682242991,
|
|
"grad_norm": 0.3822038824231059,
|
|
"learning_rate": 2.4016028432950654e-05,
|
|
"loss": 0.258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21580404043197632,
|
|
"step": 2950,
|
|
"valid_targets_mean": 5228.1,
|
|
"valid_targets_min": 1314
|
|
},
|
|
{
|
|
"epoch": 3.4521028037383177,
|
|
"grad_norm": 0.4370968794464783,
|
|
"learning_rate": 2.3958934235812287e-05,
|
|
"loss": 0.2803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2583424150943756,
|
|
"step": 2955,
|
|
"valid_targets_mean": 5110.3,
|
|
"valid_targets_min": 2689
|
|
},
|
|
{
|
|
"epoch": 3.457943925233645,
|
|
"grad_norm": 0.3909926874917433,
|
|
"learning_rate": 2.390180644032257e-05,
|
|
"loss": 0.2836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.269477516412735,
|
|
"step": 2960,
|
|
"valid_targets_mean": 5547.4,
|
|
"valid_targets_min": 2225
|
|
},
|
|
{
|
|
"epoch": 3.463785046728972,
|
|
"grad_norm": 0.37003216538590267,
|
|
"learning_rate": 2.3844645531308892e-05,
|
|
"loss": 0.2915,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26191771030426025,
|
|
"step": 2965,
|
|
"valid_targets_mean": 6164.2,
|
|
"valid_targets_min": 2628
|
|
},
|
|
{
|
|
"epoch": 3.4696261682242993,
|
|
"grad_norm": 0.40552592020294015,
|
|
"learning_rate": 2.3787451993879663e-05,
|
|
"loss": 0.3064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32133132219314575,
|
|
"step": 2970,
|
|
"valid_targets_mean": 6012.0,
|
|
"valid_targets_min": 2158
|
|
},
|
|
{
|
|
"epoch": 3.475467289719626,
|
|
"grad_norm": 0.3896825541368539,
|
|
"learning_rate": 2.373022631342021e-05,
|
|
"loss": 0.2989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28370749950408936,
|
|
"step": 2975,
|
|
"valid_targets_mean": 5712.2,
|
|
"valid_targets_min": 1383
|
|
},
|
|
{
|
|
"epoch": 3.481308411214953,
|
|
"grad_norm": 0.38694933864050585,
|
|
"learning_rate": 2.3672968975588636e-05,
|
|
"loss": 0.2905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30645880103111267,
|
|
"step": 2980,
|
|
"valid_targets_mean": 6986.4,
|
|
"valid_targets_min": 1909
|
|
},
|
|
{
|
|
"epoch": 3.4871495327102804,
|
|
"grad_norm": 0.4029014204798469,
|
|
"learning_rate": 2.3615680466311726e-05,
|
|
"loss": 0.3051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28445541858673096,
|
|
"step": 2985,
|
|
"valid_targets_mean": 5609.7,
|
|
"valid_targets_min": 2513
|
|
},
|
|
{
|
|
"epoch": 3.4929906542056077,
|
|
"grad_norm": 0.4767254851082154,
|
|
"learning_rate": 2.355836127178079e-05,
|
|
"loss": 0.2749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28997018933296204,
|
|
"step": 2990,
|
|
"valid_targets_mean": 4441.5,
|
|
"valid_targets_min": 1498
|
|
},
|
|
{
|
|
"epoch": 3.4988317757009346,
|
|
"grad_norm": 0.4048472651549233,
|
|
"learning_rate": 2.3501011878447574e-05,
|
|
"loss": 0.2876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28445860743522644,
|
|
"step": 2995,
|
|
"valid_targets_mean": 6125.5,
|
|
"valid_targets_min": 2221
|
|
},
|
|
{
|
|
"epoch": 3.5046728971962615,
|
|
"grad_norm": 0.40726251998666385,
|
|
"learning_rate": 2.34436327730201e-05,
|
|
"loss": 0.2771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2876676619052887,
|
|
"step": 3000,
|
|
"valid_targets_mean": 5461.1,
|
|
"valid_targets_min": 1786
|
|
},
|
|
{
|
|
"epoch": 3.510514018691589,
|
|
"grad_norm": 0.42915183299561316,
|
|
"learning_rate": 2.3386224442458555e-05,
|
|
"loss": 0.288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3253763020038605,
|
|
"step": 3005,
|
|
"valid_targets_mean": 5370.8,
|
|
"valid_targets_min": 1573
|
|
},
|
|
{
|
|
"epoch": 3.5163551401869158,
|
|
"grad_norm": 0.411218500970574,
|
|
"learning_rate": 2.3328787373971142e-05,
|
|
"loss": 0.2696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26935479044914246,
|
|
"step": 3010,
|
|
"valid_targets_mean": 5618.0,
|
|
"valid_targets_min": 2408
|
|
},
|
|
{
|
|
"epoch": 3.522196261682243,
|
|
"grad_norm": 0.4193658712555695,
|
|
"learning_rate": 2.327132205500997e-05,
|
|
"loss": 0.2981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3451460897922516,
|
|
"step": 3015,
|
|
"valid_targets_mean": 5794.2,
|
|
"valid_targets_min": 1238
|
|
},
|
|
{
|
|
"epoch": 3.52803738317757,
|
|
"grad_norm": 0.565111663677681,
|
|
"learning_rate": 2.321382897326689e-05,
|
|
"loss": 0.2885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27852508425712585,
|
|
"step": 3020,
|
|
"valid_targets_mean": 4789.2,
|
|
"valid_targets_min": 2243
|
|
},
|
|
{
|
|
"epoch": 3.5338785046728973,
|
|
"grad_norm": 0.41645063334709964,
|
|
"learning_rate": 2.3156308616669367e-05,
|
|
"loss": 0.2799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2782963812351227,
|
|
"step": 3025,
|
|
"valid_targets_mean": 5083.9,
|
|
"valid_targets_min": 1200
|
|
},
|
|
{
|
|
"epoch": 3.539719626168224,
|
|
"grad_norm": 0.42003137614953506,
|
|
"learning_rate": 2.3098761473376346e-05,
|
|
"loss": 0.2813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2951483726501465,
|
|
"step": 3030,
|
|
"valid_targets_mean": 5812.4,
|
|
"valid_targets_min": 1471
|
|
},
|
|
{
|
|
"epoch": 3.5455607476635516,
|
|
"grad_norm": 0.41963603495824775,
|
|
"learning_rate": 2.30411880317741e-05,
|
|
"loss": 0.2826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28758910298347473,
|
|
"step": 3035,
|
|
"valid_targets_mean": 5694.2,
|
|
"valid_targets_min": 2072
|
|
},
|
|
{
|
|
"epoch": 3.5514018691588785,
|
|
"grad_norm": 0.4057172941117195,
|
|
"learning_rate": 2.2983588780472094e-05,
|
|
"loss": 0.3148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3291498124599457,
|
|
"step": 3040,
|
|
"valid_targets_mean": 5783.3,
|
|
"valid_targets_min": 1700
|
|
},
|
|
{
|
|
"epoch": 3.5572429906542054,
|
|
"grad_norm": 0.40763756809923407,
|
|
"learning_rate": 2.2925964208298817e-05,
|
|
"loss": 0.2914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30356550216674805,
|
|
"step": 3045,
|
|
"valid_targets_mean": 5517.3,
|
|
"valid_targets_min": 1998
|
|
},
|
|
{
|
|
"epoch": 3.5630841121495327,
|
|
"grad_norm": 0.46641393895662314,
|
|
"learning_rate": 2.2868314804297676e-05,
|
|
"loss": 0.2941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2889040410518646,
|
|
"step": 3050,
|
|
"valid_targets_mean": 4926.5,
|
|
"valid_targets_min": 1788
|
|
},
|
|
{
|
|
"epoch": 3.56892523364486,
|
|
"grad_norm": 0.37002056739050826,
|
|
"learning_rate": 2.2810641057722785e-05,
|
|
"loss": 0.2855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24075160920619965,
|
|
"step": 3055,
|
|
"valid_targets_mean": 6217.2,
|
|
"valid_targets_min": 3645
|
|
},
|
|
{
|
|
"epoch": 3.574766355140187,
|
|
"grad_norm": 0.4155850395152606,
|
|
"learning_rate": 2.275294345803487e-05,
|
|
"loss": 0.2927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29531291127204895,
|
|
"step": 3060,
|
|
"valid_targets_mean": 5556.4,
|
|
"valid_targets_min": 2209
|
|
},
|
|
{
|
|
"epoch": 3.580607476635514,
|
|
"grad_norm": 0.40114015669298797,
|
|
"learning_rate": 2.2695222494897088e-05,
|
|
"loss": 0.2896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2709489166736603,
|
|
"step": 3065,
|
|
"valid_targets_mean": 5711.8,
|
|
"valid_targets_min": 2165
|
|
},
|
|
{
|
|
"epoch": 3.586448598130841,
|
|
"grad_norm": 0.3877616907273333,
|
|
"learning_rate": 2.2637478658170876e-05,
|
|
"loss": 0.2979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3136334717273712,
|
|
"step": 3070,
|
|
"valid_targets_mean": 6670.8,
|
|
"valid_targets_min": 2213
|
|
},
|
|
{
|
|
"epoch": 3.5922897196261685,
|
|
"grad_norm": 0.41780702356272376,
|
|
"learning_rate": 2.2579712437911784e-05,
|
|
"loss": 0.2862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2988295257091522,
|
|
"step": 3075,
|
|
"valid_targets_mean": 5617.1,
|
|
"valid_targets_min": 1865
|
|
},
|
|
{
|
|
"epoch": 3.5981308411214954,
|
|
"grad_norm": 0.41144833307860573,
|
|
"learning_rate": 2.252192432436533e-05,
|
|
"loss": 0.2762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32601210474967957,
|
|
"step": 3080,
|
|
"valid_targets_mean": 6181.0,
|
|
"valid_targets_min": 2456
|
|
},
|
|
{
|
|
"epoch": 3.6039719626168223,
|
|
"grad_norm": 0.4211739319378303,
|
|
"learning_rate": 2.246411480796283e-05,
|
|
"loss": 0.282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30185258388519287,
|
|
"step": 3085,
|
|
"valid_targets_mean": 5384.4,
|
|
"valid_targets_min": 2202
|
|
},
|
|
{
|
|
"epoch": 3.6098130841121496,
|
|
"grad_norm": 0.47364969282145913,
|
|
"learning_rate": 2.2406284379317253e-05,
|
|
"loss": 0.2743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2871364653110504,
|
|
"step": 3090,
|
|
"valid_targets_mean": 5446.3,
|
|
"valid_targets_min": 2412
|
|
},
|
|
{
|
|
"epoch": 3.6156542056074765,
|
|
"grad_norm": 0.3967841834873997,
|
|
"learning_rate": 2.2348433529219028e-05,
|
|
"loss": 0.2807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2732541859149933,
|
|
"step": 3095,
|
|
"valid_targets_mean": 5750.4,
|
|
"valid_targets_min": 2573
|
|
},
|
|
{
|
|
"epoch": 3.621495327102804,
|
|
"grad_norm": 0.41175592148447676,
|
|
"learning_rate": 2.2290562748631906e-05,
|
|
"loss": 0.2727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2609311044216156,
|
|
"step": 3100,
|
|
"valid_targets_mean": 5446.2,
|
|
"valid_targets_min": 1754
|
|
},
|
|
{
|
|
"epoch": 3.6273364485981308,
|
|
"grad_norm": 0.4648583749544435,
|
|
"learning_rate": 2.223267252868878e-05,
|
|
"loss": 0.3046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3278321921825409,
|
|
"step": 3105,
|
|
"valid_targets_mean": 5188.4,
|
|
"valid_targets_min": 1875
|
|
},
|
|
{
|
|
"epoch": 3.633177570093458,
|
|
"grad_norm": 0.41659598509854956,
|
|
"learning_rate": 2.217476336068751e-05,
|
|
"loss": 0.2806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2862778306007385,
|
|
"step": 3110,
|
|
"valid_targets_mean": 5139.3,
|
|
"valid_targets_min": 1544
|
|
},
|
|
{
|
|
"epoch": 3.639018691588785,
|
|
"grad_norm": 0.4238493313751695,
|
|
"learning_rate": 2.2116835736086773e-05,
|
|
"loss": 0.2906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29518821835517883,
|
|
"step": 3115,
|
|
"valid_targets_mean": 5911.6,
|
|
"valid_targets_min": 2504
|
|
},
|
|
{
|
|
"epoch": 3.6448598130841123,
|
|
"grad_norm": 0.40832569246131944,
|
|
"learning_rate": 2.205889014650189e-05,
|
|
"loss": 0.2732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3204886019229889,
|
|
"step": 3120,
|
|
"valid_targets_mean": 6162.2,
|
|
"valid_targets_min": 2329
|
|
},
|
|
{
|
|
"epoch": 3.6507009345794392,
|
|
"grad_norm": 0.40336747909848053,
|
|
"learning_rate": 2.200092708370063e-05,
|
|
"loss": 0.2967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28418999910354614,
|
|
"step": 3125,
|
|
"valid_targets_mean": 5732.4,
|
|
"valid_targets_min": 2356
|
|
},
|
|
{
|
|
"epoch": 3.656542056074766,
|
|
"grad_norm": 0.3949185992269837,
|
|
"learning_rate": 2.1942947039599046e-05,
|
|
"loss": 0.2991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2705404460430145,
|
|
"step": 3130,
|
|
"valid_targets_mean": 5589.9,
|
|
"valid_targets_min": 2302
|
|
},
|
|
{
|
|
"epoch": 3.6623831775700935,
|
|
"grad_norm": 0.4111711146269223,
|
|
"learning_rate": 2.1884950506257333e-05,
|
|
"loss": 0.2751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27517828345298767,
|
|
"step": 3135,
|
|
"valid_targets_mean": 5080.9,
|
|
"valid_targets_min": 1711
|
|
},
|
|
{
|
|
"epoch": 3.668224299065421,
|
|
"grad_norm": 0.418275372923392,
|
|
"learning_rate": 2.18269379758756e-05,
|
|
"loss": 0.2917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3062473237514496,
|
|
"step": 3140,
|
|
"valid_targets_mean": 5279.8,
|
|
"valid_targets_min": 2318
|
|
},
|
|
{
|
|
"epoch": 3.6740654205607477,
|
|
"grad_norm": 0.40044816291781177,
|
|
"learning_rate": 2.1768909940789742e-05,
|
|
"loss": 0.2845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2771221697330475,
|
|
"step": 3145,
|
|
"valid_targets_mean": 5522.4,
|
|
"valid_targets_min": 2294
|
|
},
|
|
{
|
|
"epoch": 3.6799065420560746,
|
|
"grad_norm": 0.36992584423590225,
|
|
"learning_rate": 2.1710866893467215e-05,
|
|
"loss": 0.2585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24198798835277557,
|
|
"step": 3150,
|
|
"valid_targets_mean": 6323.7,
|
|
"valid_targets_min": 2731
|
|
},
|
|
{
|
|
"epoch": 3.685747663551402,
|
|
"grad_norm": 0.42498338180119394,
|
|
"learning_rate": 2.165280932650289e-05,
|
|
"loss": 0.2816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23473167419433594,
|
|
"step": 3155,
|
|
"valid_targets_mean": 5274.5,
|
|
"valid_targets_min": 2061
|
|
},
|
|
{
|
|
"epoch": 3.691588785046729,
|
|
"grad_norm": 0.4202380986941298,
|
|
"learning_rate": 2.1594737732614862e-05,
|
|
"loss": 0.2769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2501426637172699,
|
|
"step": 3160,
|
|
"valid_targets_mean": 5290.2,
|
|
"valid_targets_min": 2482
|
|
},
|
|
{
|
|
"epoch": 3.697429906542056,
|
|
"grad_norm": 0.4370551929566076,
|
|
"learning_rate": 2.1536652604640272e-05,
|
|
"loss": 0.2858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28328055143356323,
|
|
"step": 3165,
|
|
"valid_targets_mean": 5228.4,
|
|
"valid_targets_min": 1862
|
|
},
|
|
{
|
|
"epoch": 3.703271028037383,
|
|
"grad_norm": 0.5798116679652049,
|
|
"learning_rate": 2.1478554435531116e-05,
|
|
"loss": 0.2743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2639475166797638,
|
|
"step": 3170,
|
|
"valid_targets_mean": 5131.8,
|
|
"valid_targets_min": 2794
|
|
},
|
|
{
|
|
"epoch": 3.7091121495327104,
|
|
"grad_norm": 0.38970225941064546,
|
|
"learning_rate": 2.1420443718350062e-05,
|
|
"loss": 0.2967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2827509641647339,
|
|
"step": 3175,
|
|
"valid_targets_mean": 5701.1,
|
|
"valid_targets_min": 1741
|
|
},
|
|
{
|
|
"epoch": 3.7149532710280373,
|
|
"grad_norm": 0.43606001334666344,
|
|
"learning_rate": 2.1362320946266288e-05,
|
|
"loss": 0.2718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2605997323989868,
|
|
"step": 3180,
|
|
"valid_targets_mean": 5297.4,
|
|
"valid_targets_min": 1758
|
|
},
|
|
{
|
|
"epoch": 3.7207943925233646,
|
|
"grad_norm": 0.42539704889247193,
|
|
"learning_rate": 2.1304186612551242e-05,
|
|
"loss": 0.2885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25733837485313416,
|
|
"step": 3185,
|
|
"valid_targets_mean": 5253.3,
|
|
"valid_targets_min": 2069
|
|
},
|
|
{
|
|
"epoch": 3.7266355140186915,
|
|
"grad_norm": 0.4167958047194669,
|
|
"learning_rate": 2.124604121057454e-05,
|
|
"loss": 0.2733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2676849067211151,
|
|
"step": 3190,
|
|
"valid_targets_mean": 5438.6,
|
|
"valid_targets_min": 2119
|
|
},
|
|
{
|
|
"epoch": 3.7324766355140184,
|
|
"grad_norm": 0.42079611419766655,
|
|
"learning_rate": 2.11878852337997e-05,
|
|
"loss": 0.2856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3075539767742157,
|
|
"step": 3195,
|
|
"valid_targets_mean": 5747.5,
|
|
"valid_targets_min": 2265
|
|
},
|
|
{
|
|
"epoch": 3.7383177570093458,
|
|
"grad_norm": 0.6513212663661373,
|
|
"learning_rate": 2.1129719175779995e-05,
|
|
"loss": 0.2842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29054751992225647,
|
|
"step": 3200,
|
|
"valid_targets_mean": 5274.3,
|
|
"valid_targets_min": 1815
|
|
},
|
|
{
|
|
"epoch": 3.744158878504673,
|
|
"grad_norm": 0.43317890287182936,
|
|
"learning_rate": 2.1071543530154243e-05,
|
|
"loss": 0.2947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27314242720603943,
|
|
"step": 3205,
|
|
"valid_targets_mean": 5371.8,
|
|
"valid_targets_min": 2024
|
|
},
|
|
{
|
|
"epoch": 3.75,
|
|
"grad_norm": 0.3872501523323306,
|
|
"learning_rate": 2.101335879064265e-05,
|
|
"loss": 0.2821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2475658804178238,
|
|
"step": 3210,
|
|
"valid_targets_mean": 5694.4,
|
|
"valid_targets_min": 1866
|
|
},
|
|
{
|
|
"epoch": 3.755841121495327,
|
|
"grad_norm": 0.45058837363831183,
|
|
"learning_rate": 2.0955165451042574e-05,
|
|
"loss": 0.2775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24615828692913055,
|
|
"step": 3215,
|
|
"valid_targets_mean": 4939.7,
|
|
"valid_targets_min": 2230
|
|
},
|
|
{
|
|
"epoch": 3.7616822429906542,
|
|
"grad_norm": 0.390791222951631,
|
|
"learning_rate": 2.089696400522439e-05,
|
|
"loss": 0.2691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22410106658935547,
|
|
"step": 3220,
|
|
"valid_targets_mean": 5044.1,
|
|
"valid_targets_min": 2220
|
|
},
|
|
{
|
|
"epoch": 3.7675233644859816,
|
|
"grad_norm": 0.39725589356610724,
|
|
"learning_rate": 2.0838754947127243e-05,
|
|
"loss": 0.2951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2563037574291229,
|
|
"step": 3225,
|
|
"valid_targets_mean": 5362.0,
|
|
"valid_targets_min": 1982
|
|
},
|
|
{
|
|
"epoch": 3.7733644859813085,
|
|
"grad_norm": 0.40030300889002707,
|
|
"learning_rate": 2.078053877075489e-05,
|
|
"loss": 0.2804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24394099414348602,
|
|
"step": 3230,
|
|
"valid_targets_mean": 5376.5,
|
|
"valid_targets_min": 1240
|
|
},
|
|
{
|
|
"epoch": 3.7792056074766354,
|
|
"grad_norm": 0.41107966737117624,
|
|
"learning_rate": 2.07223159701715e-05,
|
|
"loss": 0.2805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28964677453041077,
|
|
"step": 3235,
|
|
"valid_targets_mean": 5315.1,
|
|
"valid_targets_min": 1845
|
|
},
|
|
{
|
|
"epoch": 3.7850467289719627,
|
|
"grad_norm": 0.4265002322949946,
|
|
"learning_rate": 2.0664087039497453e-05,
|
|
"loss": 0.2825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30202871561050415,
|
|
"step": 3240,
|
|
"valid_targets_mean": 5223.2,
|
|
"valid_targets_min": 1793
|
|
},
|
|
{
|
|
"epoch": 3.7908878504672896,
|
|
"grad_norm": 0.3680713366162329,
|
|
"learning_rate": 2.0605852472905177e-05,
|
|
"loss": 0.2652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2540769875049591,
|
|
"step": 3245,
|
|
"valid_targets_mean": 6091.8,
|
|
"valid_targets_min": 2115
|
|
},
|
|
{
|
|
"epoch": 3.796728971962617,
|
|
"grad_norm": 0.44572680639994183,
|
|
"learning_rate": 2.0547612764614885e-05,
|
|
"loss": 0.2837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2717747688293457,
|
|
"step": 3250,
|
|
"valid_targets_mean": 5570.0,
|
|
"valid_targets_min": 2221
|
|
},
|
|
{
|
|
"epoch": 3.802570093457944,
|
|
"grad_norm": 0.4131294841142459,
|
|
"learning_rate": 2.0489368408890474e-05,
|
|
"loss": 0.2699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22424978017807007,
|
|
"step": 3255,
|
|
"valid_targets_mean": 4926.9,
|
|
"valid_targets_min": 1882
|
|
},
|
|
{
|
|
"epoch": 3.808411214953271,
|
|
"grad_norm": 0.403183606580533,
|
|
"learning_rate": 2.0431119900035248e-05,
|
|
"loss": 0.2924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27552691102027893,
|
|
"step": 3260,
|
|
"valid_targets_mean": 6027.2,
|
|
"valid_targets_min": 2315
|
|
},
|
|
{
|
|
"epoch": 3.814252336448598,
|
|
"grad_norm": 0.37723297306598913,
|
|
"learning_rate": 2.0372867732387786e-05,
|
|
"loss": 0.2768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24837321043014526,
|
|
"step": 3265,
|
|
"valid_targets_mean": 5635.3,
|
|
"valid_targets_min": 2268
|
|
},
|
|
{
|
|
"epoch": 3.8200934579439254,
|
|
"grad_norm": 0.42869182594566424,
|
|
"learning_rate": 2.0314612400317694e-05,
|
|
"loss": 0.2743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2365128993988037,
|
|
"step": 3270,
|
|
"valid_targets_mean": 5407.6,
|
|
"valid_targets_min": 2789
|
|
},
|
|
{
|
|
"epoch": 3.8259345794392523,
|
|
"grad_norm": 0.4502337420491974,
|
|
"learning_rate": 2.0256354398221446e-05,
|
|
"loss": 0.2734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2947944104671478,
|
|
"step": 3275,
|
|
"valid_targets_mean": 5284.2,
|
|
"valid_targets_min": 2309
|
|
},
|
|
{
|
|
"epoch": 3.831775700934579,
|
|
"grad_norm": 0.4214061952665872,
|
|
"learning_rate": 2.0198094220518163e-05,
|
|
"loss": 0.2947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2924845516681671,
|
|
"step": 3280,
|
|
"valid_targets_mean": 5907.5,
|
|
"valid_targets_min": 2081
|
|
},
|
|
{
|
|
"epoch": 3.8376168224299065,
|
|
"grad_norm": 0.39075607001464213,
|
|
"learning_rate": 2.0139832361645454e-05,
|
|
"loss": 0.2766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24021585285663605,
|
|
"step": 3285,
|
|
"valid_targets_mean": 5629.6,
|
|
"valid_targets_min": 2176
|
|
},
|
|
{
|
|
"epoch": 3.843457943925234,
|
|
"grad_norm": 0.42305490510053284,
|
|
"learning_rate": 2.008156931605518e-05,
|
|
"loss": 0.2833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31245508790016174,
|
|
"step": 3290,
|
|
"valid_targets_mean": 5449.0,
|
|
"valid_targets_min": 2457
|
|
},
|
|
{
|
|
"epoch": 3.8492990654205608,
|
|
"grad_norm": 0.3872850631508317,
|
|
"learning_rate": 2.0023305578209263e-05,
|
|
"loss": 0.2705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2824211120605469,
|
|
"step": 3295,
|
|
"valid_targets_mean": 6288.6,
|
|
"valid_targets_min": 2785
|
|
},
|
|
{
|
|
"epoch": 3.8551401869158877,
|
|
"grad_norm": 0.41951028414693853,
|
|
"learning_rate": 1.9965041642575516e-05,
|
|
"loss": 0.3006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3180820643901825,
|
|
"step": 3300,
|
|
"valid_targets_mean": 6768.4,
|
|
"valid_targets_min": 2302
|
|
},
|
|
{
|
|
"epoch": 3.860981308411215,
|
|
"grad_norm": 0.5001106982457001,
|
|
"learning_rate": 1.990677800362343e-05,
|
|
"loss": 0.2822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2834451496601105,
|
|
"step": 3305,
|
|
"valid_targets_mean": 4615.2,
|
|
"valid_targets_min": 2113
|
|
},
|
|
{
|
|
"epoch": 3.866822429906542,
|
|
"grad_norm": 0.4609944497631607,
|
|
"learning_rate": 1.9848515155819973e-05,
|
|
"loss": 0.2995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26194652915000916,
|
|
"step": 3310,
|
|
"valid_targets_mean": 4772.7,
|
|
"valid_targets_min": 1708
|
|
},
|
|
{
|
|
"epoch": 3.8726635514018692,
|
|
"grad_norm": 0.38730270798845934,
|
|
"learning_rate": 1.9790253593625396e-05,
|
|
"loss": 0.2929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30897340178489685,
|
|
"step": 3315,
|
|
"valid_targets_mean": 6356.5,
|
|
"valid_targets_min": 1565
|
|
},
|
|
{
|
|
"epoch": 3.878504672897196,
|
|
"grad_norm": 0.4043437092010342,
|
|
"learning_rate": 1.9731993811489047e-05,
|
|
"loss": 0.2925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2723514139652252,
|
|
"step": 3320,
|
|
"valid_targets_mean": 5484.5,
|
|
"valid_targets_min": 2103
|
|
},
|
|
{
|
|
"epoch": 3.8843457943925235,
|
|
"grad_norm": 0.3952830644371665,
|
|
"learning_rate": 1.9673736303845158e-05,
|
|
"loss": 0.2764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2781417965888977,
|
|
"step": 3325,
|
|
"valid_targets_mean": 5384.1,
|
|
"valid_targets_min": 2182
|
|
},
|
|
{
|
|
"epoch": 3.8901869158878504,
|
|
"grad_norm": 0.4254823321110087,
|
|
"learning_rate": 1.9615481565108673e-05,
|
|
"loss": 0.2844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30224576592445374,
|
|
"step": 3330,
|
|
"valid_targets_mean": 5443.0,
|
|
"valid_targets_min": 2057
|
|
},
|
|
{
|
|
"epoch": 3.8960280373831777,
|
|
"grad_norm": 0.3954494652918975,
|
|
"learning_rate": 1.9557230089671026e-05,
|
|
"loss": 0.2897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27612149715423584,
|
|
"step": 3335,
|
|
"valid_targets_mean": 5736.2,
|
|
"valid_targets_min": 1725
|
|
},
|
|
{
|
|
"epoch": 3.9018691588785046,
|
|
"grad_norm": 0.3670922838249416,
|
|
"learning_rate": 1.9498982371895953e-05,
|
|
"loss": 0.2817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28239965438842773,
|
|
"step": 3340,
|
|
"valid_targets_mean": 6414.0,
|
|
"valid_targets_min": 1733
|
|
},
|
|
{
|
|
"epoch": 3.9077102803738315,
|
|
"grad_norm": 0.4239757135665577,
|
|
"learning_rate": 1.9440738906115297e-05,
|
|
"loss": 0.2935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28301239013671875,
|
|
"step": 3345,
|
|
"valid_targets_mean": 5594.7,
|
|
"valid_targets_min": 1175
|
|
},
|
|
{
|
|
"epoch": 3.913551401869159,
|
|
"grad_norm": 0.4566397284747995,
|
|
"learning_rate": 1.9382500186624838e-05,
|
|
"loss": 0.2871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2904594838619232,
|
|
"step": 3350,
|
|
"valid_targets_mean": 4535.2,
|
|
"valid_targets_min": 2022
|
|
},
|
|
{
|
|
"epoch": 3.919392523364486,
|
|
"grad_norm": 0.39494593162538866,
|
|
"learning_rate": 1.932426670768005e-05,
|
|
"loss": 0.285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31745240092277527,
|
|
"step": 3355,
|
|
"valid_targets_mean": 6291.8,
|
|
"valid_targets_min": 2200
|
|
},
|
|
{
|
|
"epoch": 3.925233644859813,
|
|
"grad_norm": 0.4934107256418833,
|
|
"learning_rate": 1.9266038963491946e-05,
|
|
"loss": 0.2828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2901515066623688,
|
|
"step": 3360,
|
|
"valid_targets_mean": 5174.6,
|
|
"valid_targets_min": 1154
|
|
},
|
|
{
|
|
"epoch": 3.93107476635514,
|
|
"grad_norm": 0.43390977538060277,
|
|
"learning_rate": 1.9207817448222875e-05,
|
|
"loss": 0.2917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31198960542678833,
|
|
"step": 3365,
|
|
"valid_targets_mean": 5237.3,
|
|
"valid_targets_min": 1933
|
|
},
|
|
{
|
|
"epoch": 3.9369158878504673,
|
|
"grad_norm": 0.4142284610448972,
|
|
"learning_rate": 1.9149602655982297e-05,
|
|
"loss": 0.2911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32645806670188904,
|
|
"step": 3370,
|
|
"valid_targets_mean": 6003.1,
|
|
"valid_targets_min": 1774
|
|
},
|
|
{
|
|
"epoch": 3.9427570093457946,
|
|
"grad_norm": 0.3910913484892235,
|
|
"learning_rate": 1.909139508082265e-05,
|
|
"loss": 0.2894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2706674635410309,
|
|
"step": 3375,
|
|
"valid_targets_mean": 5626.8,
|
|
"valid_targets_min": 2993
|
|
},
|
|
{
|
|
"epoch": 3.9485981308411215,
|
|
"grad_norm": 0.8977088830195632,
|
|
"learning_rate": 1.9033195216735093e-05,
|
|
"loss": 0.2893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27054107189178467,
|
|
"step": 3380,
|
|
"valid_targets_mean": 5457.1,
|
|
"valid_targets_min": 2491
|
|
},
|
|
{
|
|
"epoch": 3.9544392523364484,
|
|
"grad_norm": 0.4365245004829976,
|
|
"learning_rate": 1.897500355764537e-05,
|
|
"loss": 0.2674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2724755108356476,
|
|
"step": 3385,
|
|
"valid_targets_mean": 5455.5,
|
|
"valid_targets_min": 2407
|
|
},
|
|
{
|
|
"epoch": 3.960280373831776,
|
|
"grad_norm": 0.4027454457289698,
|
|
"learning_rate": 1.8916820597409562e-05,
|
|
"loss": 0.2826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27773240208625793,
|
|
"step": 3390,
|
|
"valid_targets_mean": 6394.4,
|
|
"valid_targets_min": 2446
|
|
},
|
|
{
|
|
"epoch": 3.9661214953271027,
|
|
"grad_norm": 0.393171611761046,
|
|
"learning_rate": 1.885864682980995e-05,
|
|
"loss": 0.2728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22891652584075928,
|
|
"step": 3395,
|
|
"valid_targets_mean": 5167.9,
|
|
"valid_targets_min": 2354
|
|
},
|
|
{
|
|
"epoch": 3.97196261682243,
|
|
"grad_norm": 0.4001868027335881,
|
|
"learning_rate": 1.8800482748550783e-05,
|
|
"loss": 0.2882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2518956661224365,
|
|
"step": 3400,
|
|
"valid_targets_mean": 5215.5,
|
|
"valid_targets_min": 957
|
|
},
|
|
{
|
|
"epoch": 3.977803738317757,
|
|
"grad_norm": 0.4345105260655604,
|
|
"learning_rate": 1.874232884725412e-05,
|
|
"loss": 0.2876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2979337275028229,
|
|
"step": 3405,
|
|
"valid_targets_mean": 5040.1,
|
|
"valid_targets_min": 1436
|
|
},
|
|
{
|
|
"epoch": 3.9836448598130842,
|
|
"grad_norm": 0.4022516142378114,
|
|
"learning_rate": 1.8684185619455613e-05,
|
|
"loss": 0.2848,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27095749974250793,
|
|
"step": 3410,
|
|
"valid_targets_mean": 5603.7,
|
|
"valid_targets_min": 1977
|
|
},
|
|
{
|
|
"epoch": 3.989485981308411,
|
|
"grad_norm": 0.4232150572502771,
|
|
"learning_rate": 1.862605355860034e-05,
|
|
"loss": 0.2812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2842687964439392,
|
|
"step": 3415,
|
|
"valid_targets_mean": 5814.9,
|
|
"valid_targets_min": 1391
|
|
},
|
|
{
|
|
"epoch": 3.9953271028037385,
|
|
"grad_norm": 0.4008131379989371,
|
|
"learning_rate": 1.85679331580386e-05,
|
|
"loss": 0.2845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2540505826473236,
|
|
"step": 3420,
|
|
"valid_targets_mean": 5575.8,
|
|
"valid_targets_min": 2107
|
|
},
|
|
{
|
|
"epoch": 4.001168224299065,
|
|
"grad_norm": 0.44358584360033204,
|
|
"learning_rate": 1.850982491102174e-05,
|
|
"loss": 0.2816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23419976234436035,
|
|
"step": 3425,
|
|
"valid_targets_mean": 4898.2,
|
|
"valid_targets_min": 2304
|
|
},
|
|
{
|
|
"epoch": 4.007009345794392,
|
|
"grad_norm": 0.4426580905529289,
|
|
"learning_rate": 1.8451729310697964e-05,
|
|
"loss": 0.2728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2587369382381439,
|
|
"step": 3430,
|
|
"valid_targets_mean": 5773.8,
|
|
"valid_targets_min": 3288
|
|
},
|
|
{
|
|
"epoch": 4.01285046728972,
|
|
"grad_norm": 0.39756921147483276,
|
|
"learning_rate": 1.8393646850108146e-05,
|
|
"loss": 0.2769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26732444763183594,
|
|
"step": 3435,
|
|
"valid_targets_mean": 5854.5,
|
|
"valid_targets_min": 2255
|
|
},
|
|
{
|
|
"epoch": 4.018691588785047,
|
|
"grad_norm": 0.391527623419628,
|
|
"learning_rate": 1.8335578022181636e-05,
|
|
"loss": 0.289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30110302567481995,
|
|
"step": 3440,
|
|
"valid_targets_mean": 5969.1,
|
|
"valid_targets_min": 2065
|
|
},
|
|
{
|
|
"epoch": 4.024532710280374,
|
|
"grad_norm": 0.45703102430467957,
|
|
"learning_rate": 1.8277523319732107e-05,
|
|
"loss": 0.2819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2626097500324249,
|
|
"step": 3445,
|
|
"valid_targets_mean": 4999.4,
|
|
"valid_targets_min": 1968
|
|
},
|
|
{
|
|
"epoch": 4.030373831775701,
|
|
"grad_norm": 0.5512043495468287,
|
|
"learning_rate": 1.821948323545334e-05,
|
|
"loss": 0.3071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31768998503685,
|
|
"step": 3450,
|
|
"valid_targets_mean": 5667.5,
|
|
"valid_targets_min": 2037
|
|
},
|
|
{
|
|
"epoch": 4.036214953271028,
|
|
"grad_norm": 0.4128674068816934,
|
|
"learning_rate": 1.8161458261915056e-05,
|
|
"loss": 0.2584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2624343931674957,
|
|
"step": 3455,
|
|
"valid_targets_mean": 5267.0,
|
|
"valid_targets_min": 1834
|
|
},
|
|
{
|
|
"epoch": 4.042056074766355,
|
|
"grad_norm": 0.44265026347900754,
|
|
"learning_rate": 1.8103448891558747e-05,
|
|
"loss": 0.2786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31395700573921204,
|
|
"step": 3460,
|
|
"valid_targets_mean": 5872.9,
|
|
"valid_targets_min": 2078
|
|
},
|
|
{
|
|
"epoch": 4.047897196261682,
|
|
"grad_norm": 0.43561749552773404,
|
|
"learning_rate": 1.8045455616693463e-05,
|
|
"loss": 0.2859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3131434917449951,
|
|
"step": 3465,
|
|
"valid_targets_mean": 5768.8,
|
|
"valid_targets_min": 2456
|
|
},
|
|
{
|
|
"epoch": 4.053738317757009,
|
|
"grad_norm": 0.3823618570764507,
|
|
"learning_rate": 1.7987478929491686e-05,
|
|
"loss": 0.2626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25440332293510437,
|
|
"step": 3470,
|
|
"valid_targets_mean": 5683.8,
|
|
"valid_targets_min": 2613
|
|
},
|
|
{
|
|
"epoch": 4.059579439252336,
|
|
"grad_norm": 0.38329858503883524,
|
|
"learning_rate": 1.7929519321985094e-05,
|
|
"loss": 0.2922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27852609753608704,
|
|
"step": 3475,
|
|
"valid_targets_mean": 5998.4,
|
|
"valid_targets_min": 2459
|
|
},
|
|
{
|
|
"epoch": 4.065420560747664,
|
|
"grad_norm": 0.4405968680580364,
|
|
"learning_rate": 1.7871577286060435e-05,
|
|
"loss": 0.282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3077717125415802,
|
|
"step": 3480,
|
|
"valid_targets_mean": 6192.9,
|
|
"valid_targets_min": 2244
|
|
},
|
|
{
|
|
"epoch": 4.071261682242991,
|
|
"grad_norm": 0.40802202482531774,
|
|
"learning_rate": 1.781365331345533e-05,
|
|
"loss": 0.2855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25366151332855225,
|
|
"step": 3485,
|
|
"valid_targets_mean": 5436.5,
|
|
"valid_targets_min": 2655
|
|
},
|
|
{
|
|
"epoch": 4.077102803738318,
|
|
"grad_norm": 0.43537791309131574,
|
|
"learning_rate": 1.7755747895754085e-05,
|
|
"loss": 0.2899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2750924229621887,
|
|
"step": 3490,
|
|
"valid_targets_mean": 4995.9,
|
|
"valid_targets_min": 1881
|
|
},
|
|
{
|
|
"epoch": 4.082943925233645,
|
|
"grad_norm": 0.4420237709473812,
|
|
"learning_rate": 1.769786152438354e-05,
|
|
"loss": 0.2697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25820469856262207,
|
|
"step": 3495,
|
|
"valid_targets_mean": 4875.0,
|
|
"valid_targets_min": 1819
|
|
},
|
|
{
|
|
"epoch": 4.088785046728972,
|
|
"grad_norm": 0.4076146864307845,
|
|
"learning_rate": 1.763999469060892e-05,
|
|
"loss": 0.2871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29605168104171753,
|
|
"step": 3500,
|
|
"valid_targets_mean": 5861.2,
|
|
"valid_targets_min": 2204
|
|
},
|
|
{
|
|
"epoch": 4.094626168224299,
|
|
"grad_norm": 0.4263444099981479,
|
|
"learning_rate": 1.7582147885529613e-05,
|
|
"loss": 0.2832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3094277083873749,
|
|
"step": 3505,
|
|
"valid_targets_mean": 6264.2,
|
|
"valid_targets_min": 1335
|
|
},
|
|
{
|
|
"epoch": 4.100467289719626,
|
|
"grad_norm": 0.4679907083685303,
|
|
"learning_rate": 1.752432160007503e-05,
|
|
"loss": 0.2777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29197028279304504,
|
|
"step": 3510,
|
|
"valid_targets_mean": 5022.2,
|
|
"valid_targets_min": 2677
|
|
},
|
|
{
|
|
"epoch": 4.106308411214953,
|
|
"grad_norm": 0.43177877903259876,
|
|
"learning_rate": 1.7466516325000457e-05,
|
|
"loss": 0.2714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2556833028793335,
|
|
"step": 3515,
|
|
"valid_targets_mean": 4607.8,
|
|
"valid_targets_min": 1796
|
|
},
|
|
{
|
|
"epoch": 4.11214953271028,
|
|
"grad_norm": 0.42866312529570494,
|
|
"learning_rate": 1.7408732550882852e-05,
|
|
"loss": 0.2929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3144102394580841,
|
|
"step": 3520,
|
|
"valid_targets_mean": 5593.2,
|
|
"valid_targets_min": 1797
|
|
},
|
|
{
|
|
"epoch": 4.117990654205608,
|
|
"grad_norm": 0.4043920236425012,
|
|
"learning_rate": 1.7350970768116712e-05,
|
|
"loss": 0.2658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2533397376537323,
|
|
"step": 3525,
|
|
"valid_targets_mean": 5599.7,
|
|
"valid_targets_min": 2256
|
|
},
|
|
{
|
|
"epoch": 4.123831775700935,
|
|
"grad_norm": 0.3903696761533992,
|
|
"learning_rate": 1.7293231466909897e-05,
|
|
"loss": 0.2743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2742694616317749,
|
|
"step": 3530,
|
|
"valid_targets_mean": 5844.7,
|
|
"valid_targets_min": 2806
|
|
},
|
|
{
|
|
"epoch": 4.1296728971962615,
|
|
"grad_norm": 0.4145104857229933,
|
|
"learning_rate": 1.7235515137279462e-05,
|
|
"loss": 0.28,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2851099967956543,
|
|
"step": 3535,
|
|
"valid_targets_mean": 5547.1,
|
|
"valid_targets_min": 2434
|
|
},
|
|
{
|
|
"epoch": 4.135514018691588,
|
|
"grad_norm": 0.4354219954297596,
|
|
"learning_rate": 1.717782226904752e-05,
|
|
"loss": 0.2976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.289950966835022,
|
|
"step": 3540,
|
|
"valid_targets_mean": 5555.8,
|
|
"valid_targets_min": 1905
|
|
},
|
|
{
|
|
"epoch": 4.141355140186916,
|
|
"grad_norm": 0.406692012438645,
|
|
"learning_rate": 1.7120153351837073e-05,
|
|
"loss": 0.297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.291618674993515,
|
|
"step": 3545,
|
|
"valid_targets_mean": 6243.3,
|
|
"valid_targets_min": 2242
|
|
},
|
|
{
|
|
"epoch": 4.147196261682243,
|
|
"grad_norm": 0.4380849124359527,
|
|
"learning_rate": 1.706250887506785e-05,
|
|
"loss": 0.2872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.298784077167511,
|
|
"step": 3550,
|
|
"valid_targets_mean": 5167.5,
|
|
"valid_targets_min": 2285
|
|
},
|
|
{
|
|
"epoch": 4.15303738317757,
|
|
"grad_norm": 0.4462947915434177,
|
|
"learning_rate": 1.7004889327952177e-05,
|
|
"loss": 0.2931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28448617458343506,
|
|
"step": 3555,
|
|
"valid_targets_mean": 5631.5,
|
|
"valid_targets_min": 1592
|
|
},
|
|
{
|
|
"epoch": 4.158878504672897,
|
|
"grad_norm": 0.45530015431832993,
|
|
"learning_rate": 1.6947295199490787e-05,
|
|
"loss": 0.2969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28277477622032166,
|
|
"step": 3560,
|
|
"valid_targets_mean": 4524.4,
|
|
"valid_targets_min": 1951
|
|
},
|
|
{
|
|
"epoch": 4.164719626168225,
|
|
"grad_norm": 0.39667892739522503,
|
|
"learning_rate": 1.6889726978468713e-05,
|
|
"loss": 0.2636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2695672810077667,
|
|
"step": 3565,
|
|
"valid_targets_mean": 5895.5,
|
|
"valid_targets_min": 1879
|
|
},
|
|
{
|
|
"epoch": 4.170560747663552,
|
|
"grad_norm": 0.6603049990454726,
|
|
"learning_rate": 1.6832185153451108e-05,
|
|
"loss": 0.2736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2901749908924103,
|
|
"step": 3570,
|
|
"valid_targets_mean": 5803.2,
|
|
"valid_targets_min": 1878
|
|
},
|
|
{
|
|
"epoch": 4.1764018691588785,
|
|
"grad_norm": 0.3967469875428986,
|
|
"learning_rate": 1.67746702127791e-05,
|
|
"loss": 0.2818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2535322904586792,
|
|
"step": 3575,
|
|
"valid_targets_mean": 5486.2,
|
|
"valid_targets_min": 2164
|
|
},
|
|
{
|
|
"epoch": 4.182242990654205,
|
|
"grad_norm": 0.4152366743210126,
|
|
"learning_rate": 1.671718264456569e-05,
|
|
"loss": 0.2852,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32268470525741577,
|
|
"step": 3580,
|
|
"valid_targets_mean": 6214.8,
|
|
"valid_targets_min": 2018
|
|
},
|
|
{
|
|
"epoch": 4.188084112149533,
|
|
"grad_norm": 0.4270680557973105,
|
|
"learning_rate": 1.6659722936691536e-05,
|
|
"loss": 0.2719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28647327423095703,
|
|
"step": 3585,
|
|
"valid_targets_mean": 6215.5,
|
|
"valid_targets_min": 1930
|
|
},
|
|
{
|
|
"epoch": 4.19392523364486,
|
|
"grad_norm": 0.395118555062425,
|
|
"learning_rate": 1.6602291576800876e-05,
|
|
"loss": 0.2788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29289209842681885,
|
|
"step": 3590,
|
|
"valid_targets_mean": 6839.2,
|
|
"valid_targets_min": 3136
|
|
},
|
|
{
|
|
"epoch": 4.199766355140187,
|
|
"grad_norm": 0.7548259662350397,
|
|
"learning_rate": 1.654488905229736e-05,
|
|
"loss": 0.2795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2715909779071808,
|
|
"step": 3595,
|
|
"valid_targets_mean": 5511.6,
|
|
"valid_targets_min": 3053
|
|
},
|
|
{
|
|
"epoch": 4.205607476635514,
|
|
"grad_norm": 0.3763250842619912,
|
|
"learning_rate": 1.6487515850339935e-05,
|
|
"loss": 0.2787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2914019823074341,
|
|
"step": 3600,
|
|
"valid_targets_mean": 7110.7,
|
|
"valid_targets_min": 2383
|
|
},
|
|
{
|
|
"epoch": 4.211448598130841,
|
|
"grad_norm": 0.38866956559742566,
|
|
"learning_rate": 1.6430172457838673e-05,
|
|
"loss": 0.2783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2806011140346527,
|
|
"step": 3605,
|
|
"valid_targets_mean": 6411.0,
|
|
"valid_targets_min": 2505
|
|
},
|
|
{
|
|
"epoch": 4.2172897196261685,
|
|
"grad_norm": 0.3939789568213437,
|
|
"learning_rate": 1.6372859361450678e-05,
|
|
"loss": 0.2977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35683950781822205,
|
|
"step": 3610,
|
|
"valid_targets_mean": 7333.9,
|
|
"valid_targets_min": 1772
|
|
},
|
|
{
|
|
"epoch": 4.223130841121495,
|
|
"grad_norm": 0.4204564145847554,
|
|
"learning_rate": 1.6315577047575934e-05,
|
|
"loss": 0.2865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28803369402885437,
|
|
"step": 3615,
|
|
"valid_targets_mean": 5763.4,
|
|
"valid_targets_min": 2143
|
|
},
|
|
{
|
|
"epoch": 4.228971962616822,
|
|
"grad_norm": 0.42643309839226406,
|
|
"learning_rate": 1.625832600235318e-05,
|
|
"loss": 0.2807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2618163824081421,
|
|
"step": 3620,
|
|
"valid_targets_mean": 5101.8,
|
|
"valid_targets_min": 1630
|
|
},
|
|
{
|
|
"epoch": 4.234813084112149,
|
|
"grad_norm": 0.41904099887237145,
|
|
"learning_rate": 1.6201106711655794e-05,
|
|
"loss": 0.3196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34197959303855896,
|
|
"step": 3625,
|
|
"valid_targets_mean": 6342.7,
|
|
"valid_targets_min": 2190
|
|
},
|
|
{
|
|
"epoch": 4.240654205607477,
|
|
"grad_norm": 0.4081002610736401,
|
|
"learning_rate": 1.614391966108766e-05,
|
|
"loss": 0.2764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2950407862663269,
|
|
"step": 3630,
|
|
"valid_targets_mean": 5916.2,
|
|
"valid_targets_min": 1752
|
|
},
|
|
{
|
|
"epoch": 4.246495327102804,
|
|
"grad_norm": 0.4222883859962455,
|
|
"learning_rate": 1.6086765335979028e-05,
|
|
"loss": 0.2911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3142858147621155,
|
|
"step": 3635,
|
|
"valid_targets_mean": 5609.0,
|
|
"valid_targets_min": 1674
|
|
},
|
|
{
|
|
"epoch": 4.252336448598131,
|
|
"grad_norm": 0.4182389354947673,
|
|
"learning_rate": 1.602964422138246e-05,
|
|
"loss": 0.2825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2604644000530243,
|
|
"step": 3640,
|
|
"valid_targets_mean": 5277.4,
|
|
"valid_targets_min": 2639
|
|
},
|
|
{
|
|
"epoch": 4.258177570093458,
|
|
"grad_norm": 0.4329543586908771,
|
|
"learning_rate": 1.597255680206863e-05,
|
|
"loss": 0.2844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30379918217658997,
|
|
"step": 3645,
|
|
"valid_targets_mean": 5440.1,
|
|
"valid_targets_min": 2478
|
|
},
|
|
{
|
|
"epoch": 4.264018691588785,
|
|
"grad_norm": 0.43308005754642837,
|
|
"learning_rate": 1.5915503562522263e-05,
|
|
"loss": 0.287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31246984004974365,
|
|
"step": 3650,
|
|
"valid_targets_mean": 5595.8,
|
|
"valid_targets_min": 2322
|
|
},
|
|
{
|
|
"epoch": 4.269859813084112,
|
|
"grad_norm": 0.41989652368546604,
|
|
"learning_rate": 1.5858484986938027e-05,
|
|
"loss": 0.2909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2678254544734955,
|
|
"step": 3655,
|
|
"valid_targets_mean": 4999.8,
|
|
"valid_targets_min": 2513
|
|
},
|
|
{
|
|
"epoch": 4.275700934579439,
|
|
"grad_norm": 0.4391111030367495,
|
|
"learning_rate": 1.5801501559216373e-05,
|
|
"loss": 0.2827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2680926024913788,
|
|
"step": 3660,
|
|
"valid_targets_mean": 5341.6,
|
|
"valid_targets_min": 1275
|
|
},
|
|
{
|
|
"epoch": 4.281542056074766,
|
|
"grad_norm": 0.427131446636136,
|
|
"learning_rate": 1.57445537629595e-05,
|
|
"loss": 0.2663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26541757583618164,
|
|
"step": 3665,
|
|
"valid_targets_mean": 5527.2,
|
|
"valid_targets_min": 2665
|
|
},
|
|
{
|
|
"epoch": 4.287383177570094,
|
|
"grad_norm": 0.43964136856036096,
|
|
"learning_rate": 1.5687642081467175e-05,
|
|
"loss": 0.2829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29628387093544006,
|
|
"step": 3670,
|
|
"valid_targets_mean": 5240.6,
|
|
"valid_targets_min": 1772
|
|
},
|
|
{
|
|
"epoch": 4.293224299065421,
|
|
"grad_norm": 0.4030416645380175,
|
|
"learning_rate": 1.5630766997732702e-05,
|
|
"loss": 0.2708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2894074022769928,
|
|
"step": 3675,
|
|
"valid_targets_mean": 6064.8,
|
|
"valid_targets_min": 2256
|
|
},
|
|
{
|
|
"epoch": 4.299065420560748,
|
|
"grad_norm": 0.4167842753429368,
|
|
"learning_rate": 1.5573928994438777e-05,
|
|
"loss": 0.2676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.262351930141449,
|
|
"step": 3680,
|
|
"valid_targets_mean": 5514.5,
|
|
"valid_targets_min": 1526
|
|
},
|
|
{
|
|
"epoch": 4.304906542056075,
|
|
"grad_norm": 0.44339480796025954,
|
|
"learning_rate": 1.5517128553953392e-05,
|
|
"loss": 0.2723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2504121661186218,
|
|
"step": 3685,
|
|
"valid_targets_mean": 5116.9,
|
|
"valid_targets_min": 1314
|
|
},
|
|
{
|
|
"epoch": 4.3107476635514015,
|
|
"grad_norm": 0.40981732115219516,
|
|
"learning_rate": 1.5460366158325764e-05,
|
|
"loss": 0.293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3088397681713104,
|
|
"step": 3690,
|
|
"valid_targets_mean": 6050.6,
|
|
"valid_targets_min": 2903
|
|
},
|
|
{
|
|
"epoch": 4.316588785046729,
|
|
"grad_norm": 0.4263488998715942,
|
|
"learning_rate": 1.5403642289282254e-05,
|
|
"loss": 0.2679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27205970883369446,
|
|
"step": 3695,
|
|
"valid_targets_mean": 5364.2,
|
|
"valid_targets_min": 2122
|
|
},
|
|
{
|
|
"epoch": 4.322429906542056,
|
|
"grad_norm": 0.43656657488171446,
|
|
"learning_rate": 1.5346957428222222e-05,
|
|
"loss": 0.2764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27193698287010193,
|
|
"step": 3700,
|
|
"valid_targets_mean": 5571.8,
|
|
"valid_targets_min": 2395
|
|
},
|
|
{
|
|
"epoch": 4.328271028037383,
|
|
"grad_norm": 0.4387307654249669,
|
|
"learning_rate": 1.529031205621401e-05,
|
|
"loss": 0.2726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2943962812423706,
|
|
"step": 3705,
|
|
"valid_targets_mean": 5890.2,
|
|
"valid_targets_min": 2567
|
|
},
|
|
{
|
|
"epoch": 4.33411214953271,
|
|
"grad_norm": 0.43978161196470816,
|
|
"learning_rate": 1.5233706653990809e-05,
|
|
"loss": 0.283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2751840353012085,
|
|
"step": 3710,
|
|
"valid_targets_mean": 4734.6,
|
|
"valid_targets_min": 1883
|
|
},
|
|
{
|
|
"epoch": 4.339953271028038,
|
|
"grad_norm": 0.4153590340854129,
|
|
"learning_rate": 1.5177141701946603e-05,
|
|
"loss": 0.2648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25770244002342224,
|
|
"step": 3715,
|
|
"valid_targets_mean": 5333.2,
|
|
"valid_targets_min": 2250
|
|
},
|
|
{
|
|
"epoch": 4.345794392523365,
|
|
"grad_norm": 0.4063646205475939,
|
|
"learning_rate": 1.5120617680132092e-05,
|
|
"loss": 0.2949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3436930179595947,
|
|
"step": 3720,
|
|
"valid_targets_mean": 6206.8,
|
|
"valid_targets_min": 2435
|
|
},
|
|
{
|
|
"epoch": 4.3516355140186915,
|
|
"grad_norm": 0.4342829788693681,
|
|
"learning_rate": 1.50641350682506e-05,
|
|
"loss": 0.2604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.284504771232605,
|
|
"step": 3725,
|
|
"valid_targets_mean": 5869.9,
|
|
"valid_targets_min": 1542
|
|
},
|
|
{
|
|
"epoch": 4.357476635514018,
|
|
"grad_norm": 0.4008397644165794,
|
|
"learning_rate": 1.5007694345654045e-05,
|
|
"loss": 0.2823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27086859941482544,
|
|
"step": 3730,
|
|
"valid_targets_mean": 5524.0,
|
|
"valid_targets_min": 1706
|
|
},
|
|
{
|
|
"epoch": 4.363317757009346,
|
|
"grad_norm": 0.3931376278398186,
|
|
"learning_rate": 1.4951295991338794e-05,
|
|
"loss": 0.2652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27761712670326233,
|
|
"step": 3735,
|
|
"valid_targets_mean": 5959.3,
|
|
"valid_targets_min": 1790
|
|
},
|
|
{
|
|
"epoch": 4.369158878504673,
|
|
"grad_norm": 0.40743844772532395,
|
|
"learning_rate": 1.4894940483941693e-05,
|
|
"loss": 0.2729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24737940728664398,
|
|
"step": 3740,
|
|
"valid_targets_mean": 5492.9,
|
|
"valid_targets_min": 2840
|
|
},
|
|
{
|
|
"epoch": 4.375,
|
|
"grad_norm": 0.4754356791871312,
|
|
"learning_rate": 1.4838628301735928e-05,
|
|
"loss": 0.2626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25431880354881287,
|
|
"step": 3745,
|
|
"valid_targets_mean": 4473.2,
|
|
"valid_targets_min": 2255
|
|
},
|
|
{
|
|
"epoch": 4.380841121495327,
|
|
"grad_norm": 0.4276039880057662,
|
|
"learning_rate": 1.4782359922627025e-05,
|
|
"loss": 0.2776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3075639307498932,
|
|
"step": 3750,
|
|
"valid_targets_mean": 6025.2,
|
|
"valid_targets_min": 2167
|
|
},
|
|
{
|
|
"epoch": 4.386682242990654,
|
|
"grad_norm": 0.4538832321936314,
|
|
"learning_rate": 1.4726135824148733e-05,
|
|
"loss": 0.2875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2721441686153412,
|
|
"step": 3755,
|
|
"valid_targets_mean": 5466.0,
|
|
"valid_targets_min": 1978
|
|
},
|
|
{
|
|
"epoch": 4.392523364485982,
|
|
"grad_norm": 0.4063390729494782,
|
|
"learning_rate": 1.4669956483459028e-05,
|
|
"loss": 0.2828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2651199400424957,
|
|
"step": 3760,
|
|
"valid_targets_mean": 5259.6,
|
|
"valid_targets_min": 1063
|
|
},
|
|
{
|
|
"epoch": 4.3983644859813085,
|
|
"grad_norm": 0.39090653581582036,
|
|
"learning_rate": 1.4613822377336033e-05,
|
|
"loss": 0.29,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2767545282840729,
|
|
"step": 3765,
|
|
"valid_targets_mean": 6316.3,
|
|
"valid_targets_min": 1789
|
|
},
|
|
{
|
|
"epoch": 4.404205607476635,
|
|
"grad_norm": 0.40936747916418736,
|
|
"learning_rate": 1.4557733982173969e-05,
|
|
"loss": 0.2859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30963850021362305,
|
|
"step": 3770,
|
|
"valid_targets_mean": 6187.2,
|
|
"valid_targets_min": 2881
|
|
},
|
|
{
|
|
"epoch": 4.410046728971962,
|
|
"grad_norm": 0.43136442251237955,
|
|
"learning_rate": 1.4501691773979136e-05,
|
|
"loss": 0.2621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24799656867980957,
|
|
"step": 3775,
|
|
"valid_targets_mean": 5457.0,
|
|
"valid_targets_min": 2980
|
|
},
|
|
{
|
|
"epoch": 4.41588785046729,
|
|
"grad_norm": 0.7268939990040836,
|
|
"learning_rate": 1.4445696228365851e-05,
|
|
"loss": 0.292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31584587693214417,
|
|
"step": 3780,
|
|
"valid_targets_mean": 5724.0,
|
|
"valid_targets_min": 2015
|
|
},
|
|
{
|
|
"epoch": 4.421728971962617,
|
|
"grad_norm": 0.4456326173686848,
|
|
"learning_rate": 1.438974782055241e-05,
|
|
"loss": 0.2595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2478577345609665,
|
|
"step": 3785,
|
|
"valid_targets_mean": 4866.3,
|
|
"valid_targets_min": 1298
|
|
},
|
|
{
|
|
"epoch": 4.427570093457944,
|
|
"grad_norm": 0.3857425476735318,
|
|
"learning_rate": 1.4333847025357077e-05,
|
|
"loss": 0.2896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2800256311893463,
|
|
"step": 3790,
|
|
"valid_targets_mean": 6008.1,
|
|
"valid_targets_min": 1837
|
|
},
|
|
{
|
|
"epoch": 4.433411214953271,
|
|
"grad_norm": 0.38605982421298124,
|
|
"learning_rate": 1.427799431719404e-05,
|
|
"loss": 0.2653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27088865637779236,
|
|
"step": 3795,
|
|
"valid_targets_mean": 6350.2,
|
|
"valid_targets_min": 2042
|
|
},
|
|
{
|
|
"epoch": 4.4392523364485985,
|
|
"grad_norm": 0.412361210096277,
|
|
"learning_rate": 1.4222190170069374e-05,
|
|
"loss": 0.2779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.275704950094223,
|
|
"step": 3800,
|
|
"valid_targets_mean": 5514.2,
|
|
"valid_targets_min": 2459
|
|
},
|
|
{
|
|
"epoch": 4.445093457943925,
|
|
"grad_norm": 0.41275649603602915,
|
|
"learning_rate": 1.4166435057577049e-05,
|
|
"loss": 0.2532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2371559888124466,
|
|
"step": 3805,
|
|
"valid_targets_mean": 5076.4,
|
|
"valid_targets_min": 1840
|
|
},
|
|
{
|
|
"epoch": 4.450934579439252,
|
|
"grad_norm": 0.42910841614199374,
|
|
"learning_rate": 1.4110729452894874e-05,
|
|
"loss": 0.2578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25244569778442383,
|
|
"step": 3810,
|
|
"valid_targets_mean": 5489.7,
|
|
"valid_targets_min": 2220
|
|
},
|
|
{
|
|
"epoch": 4.456775700934579,
|
|
"grad_norm": 0.40585378302948855,
|
|
"learning_rate": 1.4055073828780514e-05,
|
|
"loss": 0.2686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26677393913269043,
|
|
"step": 3815,
|
|
"valid_targets_mean": 5656.9,
|
|
"valid_targets_min": 2048
|
|
},
|
|
{
|
|
"epoch": 4.462616822429906,
|
|
"grad_norm": 0.4513184733122387,
|
|
"learning_rate": 1.3999468657567455e-05,
|
|
"loss": 0.2793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3054150342941284,
|
|
"step": 3820,
|
|
"valid_targets_mean": 5553.2,
|
|
"valid_targets_min": 1653
|
|
},
|
|
{
|
|
"epoch": 4.468457943925234,
|
|
"grad_norm": 0.41714270388390134,
|
|
"learning_rate": 1.3943914411160994e-05,
|
|
"loss": 0.2821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3024083375930786,
|
|
"step": 3825,
|
|
"valid_targets_mean": 6597.5,
|
|
"valid_targets_min": 2318
|
|
},
|
|
{
|
|
"epoch": 4.474299065420561,
|
|
"grad_norm": 0.42075059405974846,
|
|
"learning_rate": 1.3888411561034276e-05,
|
|
"loss": 0.2927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24868591129779816,
|
|
"step": 3830,
|
|
"valid_targets_mean": 5788.6,
|
|
"valid_targets_min": 2710
|
|
},
|
|
{
|
|
"epoch": 4.480140186915888,
|
|
"grad_norm": 0.510817366246663,
|
|
"learning_rate": 1.3832960578224221e-05,
|
|
"loss": 0.2725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27942323684692383,
|
|
"step": 3835,
|
|
"valid_targets_mean": 5117.0,
|
|
"valid_targets_min": 2121
|
|
},
|
|
{
|
|
"epoch": 4.485981308411215,
|
|
"grad_norm": 0.45004438473504027,
|
|
"learning_rate": 1.3777561933327585e-05,
|
|
"loss": 0.2966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2690734267234802,
|
|
"step": 3840,
|
|
"valid_targets_mean": 5270.0,
|
|
"valid_targets_min": 1870
|
|
},
|
|
{
|
|
"epoch": 4.491822429906542,
|
|
"grad_norm": 0.4394404770224177,
|
|
"learning_rate": 1.3722216096496954e-05,
|
|
"loss": 0.2592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26824283599853516,
|
|
"step": 3845,
|
|
"valid_targets_mean": 5213.8,
|
|
"valid_targets_min": 1432
|
|
},
|
|
{
|
|
"epoch": 4.497663551401869,
|
|
"grad_norm": 0.40820184938973003,
|
|
"learning_rate": 1.366692353743673e-05,
|
|
"loss": 0.2742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2764577567577362,
|
|
"step": 3850,
|
|
"valid_targets_mean": 6074.0,
|
|
"valid_targets_min": 2540
|
|
},
|
|
{
|
|
"epoch": 4.503504672897196,
|
|
"grad_norm": 0.3877462295352374,
|
|
"learning_rate": 1.3611684725399177e-05,
|
|
"loss": 0.264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26264891028404236,
|
|
"step": 3855,
|
|
"valid_targets_mean": 6151.8,
|
|
"valid_targets_min": 1714
|
|
},
|
|
{
|
|
"epoch": 4.509345794392523,
|
|
"grad_norm": 0.4490528334140696,
|
|
"learning_rate": 1.3556500129180412e-05,
|
|
"loss": 0.2669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27551373839378357,
|
|
"step": 3860,
|
|
"valid_targets_mean": 5614.1,
|
|
"valid_targets_min": 1931
|
|
},
|
|
{
|
|
"epoch": 4.515186915887851,
|
|
"grad_norm": 0.4078399603543392,
|
|
"learning_rate": 1.3501370217116442e-05,
|
|
"loss": 0.2669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2950684428215027,
|
|
"step": 3865,
|
|
"valid_targets_mean": 6344.1,
|
|
"valid_targets_min": 1886
|
|
},
|
|
{
|
|
"epoch": 4.521028037383178,
|
|
"grad_norm": 0.8058674141987873,
|
|
"learning_rate": 1.3446295457079192e-05,
|
|
"loss": 0.2703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26965174078941345,
|
|
"step": 3870,
|
|
"valid_targets_mean": 5633.1,
|
|
"valid_targets_min": 2174
|
|
},
|
|
{
|
|
"epoch": 4.526869158878505,
|
|
"grad_norm": 0.44757122225791435,
|
|
"learning_rate": 1.339127631647251e-05,
|
|
"loss": 0.2881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29099199175834656,
|
|
"step": 3875,
|
|
"valid_targets_mean": 5833.1,
|
|
"valid_targets_min": 2533
|
|
},
|
|
{
|
|
"epoch": 4.5327102803738315,
|
|
"grad_norm": 0.4345216309360046,
|
|
"learning_rate": 1.3336313262228248e-05,
|
|
"loss": 0.2667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2706601321697235,
|
|
"step": 3880,
|
|
"valid_targets_mean": 5019.8,
|
|
"valid_targets_min": 1725
|
|
},
|
|
{
|
|
"epoch": 4.538551401869158,
|
|
"grad_norm": 0.4069160465131445,
|
|
"learning_rate": 1.3281406760802223e-05,
|
|
"loss": 0.2645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2913552224636078,
|
|
"step": 3885,
|
|
"valid_targets_mean": 5895.9,
|
|
"valid_targets_min": 2110
|
|
},
|
|
{
|
|
"epoch": 4.544392523364486,
|
|
"grad_norm": 0.45132503482397246,
|
|
"learning_rate": 1.3226557278170345e-05,
|
|
"loss": 0.2706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25714111328125,
|
|
"step": 3890,
|
|
"valid_targets_mean": 4555.0,
|
|
"valid_targets_min": 2214
|
|
},
|
|
{
|
|
"epoch": 4.550233644859813,
|
|
"grad_norm": 0.4053256634300803,
|
|
"learning_rate": 1.3171765279824604e-05,
|
|
"loss": 0.293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3051871061325073,
|
|
"step": 3895,
|
|
"valid_targets_mean": 6052.5,
|
|
"valid_targets_min": 2310
|
|
},
|
|
{
|
|
"epoch": 4.55607476635514,
|
|
"grad_norm": 0.44980212787733337,
|
|
"learning_rate": 1.3117031230769146e-05,
|
|
"loss": 0.284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2857699394226074,
|
|
"step": 3900,
|
|
"valid_targets_mean": 5322.2,
|
|
"valid_targets_min": 2099
|
|
},
|
|
{
|
|
"epoch": 4.561915887850468,
|
|
"grad_norm": 0.4700454544601915,
|
|
"learning_rate": 1.306235559551631e-05,
|
|
"loss": 0.2833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29254770278930664,
|
|
"step": 3905,
|
|
"valid_targets_mean": 5031.0,
|
|
"valid_targets_min": 1759
|
|
},
|
|
{
|
|
"epoch": 4.567757009345795,
|
|
"grad_norm": 0.42654377062880194,
|
|
"learning_rate": 1.3007738838082689e-05,
|
|
"loss": 0.282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24933581054210663,
|
|
"step": 3910,
|
|
"valid_targets_mean": 5171.5,
|
|
"valid_targets_min": 2310
|
|
},
|
|
{
|
|
"epoch": 4.5735981308411215,
|
|
"grad_norm": 0.3886914790235232,
|
|
"learning_rate": 1.295318142198521e-05,
|
|
"loss": 0.2692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2425815612077713,
|
|
"step": 3915,
|
|
"valid_targets_mean": 6167.8,
|
|
"valid_targets_min": 3048
|
|
},
|
|
{
|
|
"epoch": 4.579439252336448,
|
|
"grad_norm": 0.420213274430817,
|
|
"learning_rate": 1.2898683810237178e-05,
|
|
"loss": 0.2806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.284930557012558,
|
|
"step": 3920,
|
|
"valid_targets_mean": 5620.5,
|
|
"valid_targets_min": 2094
|
|
},
|
|
{
|
|
"epoch": 4.585280373831775,
|
|
"grad_norm": 0.43079662789216666,
|
|
"learning_rate": 1.2844246465344364e-05,
|
|
"loss": 0.2764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26316216588020325,
|
|
"step": 3925,
|
|
"valid_targets_mean": 5403.5,
|
|
"valid_targets_min": 1912
|
|
},
|
|
{
|
|
"epoch": 4.591121495327103,
|
|
"grad_norm": 0.42016952519813644,
|
|
"learning_rate": 1.2789869849301064e-05,
|
|
"loss": 0.277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23005062341690063,
|
|
"step": 3930,
|
|
"valid_targets_mean": 5792.9,
|
|
"valid_targets_min": 1747
|
|
},
|
|
{
|
|
"epoch": 4.59696261682243,
|
|
"grad_norm": 0.42250667745488013,
|
|
"learning_rate": 1.2735554423586179e-05,
|
|
"loss": 0.2573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23001569509506226,
|
|
"step": 3935,
|
|
"valid_targets_mean": 4903.9,
|
|
"valid_targets_min": 1852
|
|
},
|
|
{
|
|
"epoch": 4.602803738317757,
|
|
"grad_norm": 0.3944016830033981,
|
|
"learning_rate": 1.2681300649159315e-05,
|
|
"loss": 0.2742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2975599467754364,
|
|
"step": 3940,
|
|
"valid_targets_mean": 6357.0,
|
|
"valid_targets_min": 2232
|
|
},
|
|
{
|
|
"epoch": 4.608644859813084,
|
|
"grad_norm": 0.4094978817954704,
|
|
"learning_rate": 1.2627108986456862e-05,
|
|
"loss": 0.2645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23677599430084229,
|
|
"step": 3945,
|
|
"valid_targets_mean": 5648.9,
|
|
"valid_targets_min": 2879
|
|
},
|
|
{
|
|
"epoch": 4.614485981308412,
|
|
"grad_norm": 0.4242087860781529,
|
|
"learning_rate": 1.2572979895388082e-05,
|
|
"loss": 0.2708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2935549318790436,
|
|
"step": 3950,
|
|
"valid_targets_mean": 5951.4,
|
|
"valid_targets_min": 2336
|
|
},
|
|
{
|
|
"epoch": 4.6203271028037385,
|
|
"grad_norm": 0.3938086709982357,
|
|
"learning_rate": 1.251891383533121e-05,
|
|
"loss": 0.2635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24639947712421417,
|
|
"step": 3955,
|
|
"valid_targets_mean": 6372.2,
|
|
"valid_targets_min": 2892
|
|
},
|
|
{
|
|
"epoch": 4.626168224299065,
|
|
"grad_norm": 0.41709565510140206,
|
|
"learning_rate": 1.2464911265129549e-05,
|
|
"loss": 0.279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2547627389431,
|
|
"step": 3960,
|
|
"valid_targets_mean": 6514.5,
|
|
"valid_targets_min": 2387
|
|
},
|
|
{
|
|
"epoch": 4.632009345794392,
|
|
"grad_norm": 0.3796204969951175,
|
|
"learning_rate": 1.2410972643087595e-05,
|
|
"loss": 0.2767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2719391882419586,
|
|
"step": 3965,
|
|
"valid_targets_mean": 6366.7,
|
|
"valid_targets_min": 1987
|
|
},
|
|
{
|
|
"epoch": 4.63785046728972,
|
|
"grad_norm": 0.4174128683055382,
|
|
"learning_rate": 1.2357098426967125e-05,
|
|
"loss": 0.2762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3114030063152313,
|
|
"step": 3970,
|
|
"valid_targets_mean": 6520.2,
|
|
"valid_targets_min": 2298
|
|
},
|
|
{
|
|
"epoch": 4.643691588785047,
|
|
"grad_norm": 0.3984694306923491,
|
|
"learning_rate": 1.2303289073983311e-05,
|
|
"loss": 0.2561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2598063349723816,
|
|
"step": 3975,
|
|
"valid_targets_mean": 6122.2,
|
|
"valid_targets_min": 2205
|
|
},
|
|
{
|
|
"epoch": 4.649532710280374,
|
|
"grad_norm": 0.42492239869590254,
|
|
"learning_rate": 1.2249545040800879e-05,
|
|
"loss": 0.2916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27392348647117615,
|
|
"step": 3980,
|
|
"valid_targets_mean": 5436.6,
|
|
"valid_targets_min": 1878
|
|
},
|
|
{
|
|
"epoch": 4.655373831775701,
|
|
"grad_norm": 0.43996785272080147,
|
|
"learning_rate": 1.2195866783530174e-05,
|
|
"loss": 0.2888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3115236461162567,
|
|
"step": 3985,
|
|
"valid_targets_mean": 5623.9,
|
|
"valid_targets_min": 1688
|
|
},
|
|
{
|
|
"epoch": 4.661214953271028,
|
|
"grad_norm": 0.42240840353846126,
|
|
"learning_rate": 1.2142254757723335e-05,
|
|
"loss": 0.2609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2700909376144409,
|
|
"step": 3990,
|
|
"valid_targets_mean": 5701.5,
|
|
"valid_targets_min": 2686
|
|
},
|
|
{
|
|
"epoch": 4.667056074766355,
|
|
"grad_norm": 0.4273608480381604,
|
|
"learning_rate": 1.2088709418370406e-05,
|
|
"loss": 0.2728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27809008955955505,
|
|
"step": 3995,
|
|
"valid_targets_mean": 5609.1,
|
|
"valid_targets_min": 2591
|
|
},
|
|
{
|
|
"epoch": 4.672897196261682,
|
|
"grad_norm": 0.41870339492492537,
|
|
"learning_rate": 1.2035231219895494e-05,
|
|
"loss": 0.2779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3221145570278168,
|
|
"step": 4000,
|
|
"valid_targets_mean": 6212.7,
|
|
"valid_targets_min": 2072
|
|
},
|
|
{
|
|
"epoch": 4.678738317757009,
|
|
"grad_norm": 0.4159515456897901,
|
|
"learning_rate": 1.1981820616152887e-05,
|
|
"loss": 0.2525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2259254902601242,
|
|
"step": 4005,
|
|
"valid_targets_mean": 5036.4,
|
|
"valid_targets_min": 1590
|
|
},
|
|
{
|
|
"epoch": 4.684579439252336,
|
|
"grad_norm": 0.43202214474189543,
|
|
"learning_rate": 1.1928478060423225e-05,
|
|
"loss": 0.2706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2779081165790558,
|
|
"step": 4010,
|
|
"valid_targets_mean": 5781.0,
|
|
"valid_targets_min": 2930
|
|
},
|
|
{
|
|
"epoch": 4.690420560747664,
|
|
"grad_norm": 0.46945607529692945,
|
|
"learning_rate": 1.1875204005409637e-05,
|
|
"loss": 0.2603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2556251585483551,
|
|
"step": 4015,
|
|
"valid_targets_mean": 5080.1,
|
|
"valid_targets_min": 2060
|
|
},
|
|
{
|
|
"epoch": 4.696261682242991,
|
|
"grad_norm": 0.5428775138738806,
|
|
"learning_rate": 1.1821998903233918e-05,
|
|
"loss": 0.2665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.307949036359787,
|
|
"step": 4020,
|
|
"valid_targets_mean": 5736.1,
|
|
"valid_targets_min": 1698
|
|
},
|
|
{
|
|
"epoch": 4.702102803738318,
|
|
"grad_norm": 0.4225282647404694,
|
|
"learning_rate": 1.1768863205432663e-05,
|
|
"loss": 0.2657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24387653172016144,
|
|
"step": 4025,
|
|
"valid_targets_mean": 5581.7,
|
|
"valid_targets_min": 2055
|
|
},
|
|
{
|
|
"epoch": 4.707943925233645,
|
|
"grad_norm": 0.42089970683471317,
|
|
"learning_rate": 1.1715797362953464e-05,
|
|
"loss": 0.2812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32187190651893616,
|
|
"step": 4030,
|
|
"valid_targets_mean": 6036.2,
|
|
"valid_targets_min": 1763
|
|
},
|
|
{
|
|
"epoch": 4.713785046728972,
|
|
"grad_norm": 0.3896160499377915,
|
|
"learning_rate": 1.166280182615106e-05,
|
|
"loss": 0.2641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25558310747146606,
|
|
"step": 4035,
|
|
"valid_targets_mean": 5959.1,
|
|
"valid_targets_min": 2116
|
|
},
|
|
{
|
|
"epoch": 4.719626168224299,
|
|
"grad_norm": 0.39307544586031745,
|
|
"learning_rate": 1.1609877044783536e-05,
|
|
"loss": 0.2765,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28174594044685364,
|
|
"step": 4040,
|
|
"valid_targets_mean": 6388.2,
|
|
"valid_targets_min": 3537
|
|
},
|
|
{
|
|
"epoch": 4.725467289719626,
|
|
"grad_norm": 0.3919701499605165,
|
|
"learning_rate": 1.1557023468008496e-05,
|
|
"loss": 0.2594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2626386880874634,
|
|
"step": 4045,
|
|
"valid_targets_mean": 6123.5,
|
|
"valid_targets_min": 2029
|
|
},
|
|
{
|
|
"epoch": 4.731308411214953,
|
|
"grad_norm": 0.4438706378629001,
|
|
"learning_rate": 1.150424154437924e-05,
|
|
"loss": 0.2648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2521232068538666,
|
|
"step": 4050,
|
|
"valid_targets_mean": 4404.8,
|
|
"valid_targets_min": 1363
|
|
},
|
|
{
|
|
"epoch": 4.73714953271028,
|
|
"grad_norm": 0.38735775627173946,
|
|
"learning_rate": 1.1451531721840973e-05,
|
|
"loss": 0.2745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2430744171142578,
|
|
"step": 4055,
|
|
"valid_targets_mean": 5572.7,
|
|
"valid_targets_min": 2817
|
|
},
|
|
{
|
|
"epoch": 4.742990654205608,
|
|
"grad_norm": 0.4636372923716385,
|
|
"learning_rate": 1.1398894447727015e-05,
|
|
"loss": 0.2858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2955625355243683,
|
|
"step": 4060,
|
|
"valid_targets_mean": 5410.2,
|
|
"valid_targets_min": 1890
|
|
},
|
|
{
|
|
"epoch": 4.748831775700935,
|
|
"grad_norm": 0.477977117520862,
|
|
"learning_rate": 1.1346330168754957e-05,
|
|
"loss": 0.2752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2977015972137451,
|
|
"step": 4065,
|
|
"valid_targets_mean": 5727.5,
|
|
"valid_targets_min": 2648
|
|
},
|
|
{
|
|
"epoch": 4.7546728971962615,
|
|
"grad_norm": 0.4192632608032592,
|
|
"learning_rate": 1.1293839331022922e-05,
|
|
"loss": 0.2654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3005378842353821,
|
|
"step": 4070,
|
|
"valid_targets_mean": 5423.5,
|
|
"valid_targets_min": 1406
|
|
},
|
|
{
|
|
"epoch": 4.760514018691588,
|
|
"grad_norm": 0.4562097359350653,
|
|
"learning_rate": 1.1241422380005757e-05,
|
|
"loss": 0.2611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24501563608646393,
|
|
"step": 4075,
|
|
"valid_targets_mean": 5600.3,
|
|
"valid_targets_min": 2203
|
|
},
|
|
{
|
|
"epoch": 4.766355140186916,
|
|
"grad_norm": 0.39299359465483685,
|
|
"learning_rate": 1.1189079760551263e-05,
|
|
"loss": 0.2767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.281164288520813,
|
|
"step": 4080,
|
|
"valid_targets_mean": 6494.1,
|
|
"valid_targets_min": 1498
|
|
},
|
|
{
|
|
"epoch": 4.772196261682243,
|
|
"grad_norm": 0.4458641333058199,
|
|
"learning_rate": 1.1136811916876365e-05,
|
|
"loss": 0.2707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2321058064699173,
|
|
"step": 4085,
|
|
"valid_targets_mean": 4922.5,
|
|
"valid_targets_min": 2584
|
|
},
|
|
{
|
|
"epoch": 4.77803738317757,
|
|
"grad_norm": 0.44204267547385173,
|
|
"learning_rate": 1.1084619292563436e-05,
|
|
"loss": 0.2593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23031234741210938,
|
|
"step": 4090,
|
|
"valid_targets_mean": 5257.7,
|
|
"valid_targets_min": 1794
|
|
},
|
|
{
|
|
"epoch": 4.783878504672897,
|
|
"grad_norm": 0.4317833868431402,
|
|
"learning_rate": 1.103250233055647e-05,
|
|
"loss": 0.2677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2659016251564026,
|
|
"step": 4095,
|
|
"valid_targets_mean": 5476.3,
|
|
"valid_targets_min": 2285
|
|
},
|
|
{
|
|
"epoch": 4.789719626168225,
|
|
"grad_norm": 0.3625374026033937,
|
|
"learning_rate": 1.0980461473157335e-05,
|
|
"loss": 0.2634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23356233537197113,
|
|
"step": 4100,
|
|
"valid_targets_mean": 6572.5,
|
|
"valid_targets_min": 2357
|
|
},
|
|
{
|
|
"epoch": 4.795560747663552,
|
|
"grad_norm": 0.4231535680276758,
|
|
"learning_rate": 1.0928497162022004e-05,
|
|
"loss": 0.2681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2500455677509308,
|
|
"step": 4105,
|
|
"valid_targets_mean": 5657.2,
|
|
"valid_targets_min": 2424
|
|
},
|
|
{
|
|
"epoch": 4.8014018691588785,
|
|
"grad_norm": 0.38049199088200536,
|
|
"learning_rate": 1.0876609838156855e-05,
|
|
"loss": 0.268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21486498415470123,
|
|
"step": 4110,
|
|
"valid_targets_mean": 5687.4,
|
|
"valid_targets_min": 2530
|
|
},
|
|
{
|
|
"epoch": 4.807242990654205,
|
|
"grad_norm": 0.4103114421887154,
|
|
"learning_rate": 1.0824799941914878e-05,
|
|
"loss": 0.2698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3114211857318878,
|
|
"step": 4115,
|
|
"valid_targets_mean": 5787.3,
|
|
"valid_targets_min": 1966
|
|
},
|
|
{
|
|
"epoch": 4.813084112149532,
|
|
"grad_norm": 0.471878880670459,
|
|
"learning_rate": 1.0773067912991948e-05,
|
|
"loss": 0.2699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28342583775520325,
|
|
"step": 4120,
|
|
"valid_targets_mean": 4750.7,
|
|
"valid_targets_min": 2384
|
|
},
|
|
{
|
|
"epoch": 4.81892523364486,
|
|
"grad_norm": 0.40330377170853704,
|
|
"learning_rate": 1.0721414190423132e-05,
|
|
"loss": 0.2651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2640862762928009,
|
|
"step": 4125,
|
|
"valid_targets_mean": 5796.6,
|
|
"valid_targets_min": 2239
|
|
},
|
|
{
|
|
"epoch": 4.824766355140187,
|
|
"grad_norm": 0.4511545611874466,
|
|
"learning_rate": 1.0669839212578892e-05,
|
|
"loss": 0.2504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26860710978507996,
|
|
"step": 4130,
|
|
"valid_targets_mean": 5683.4,
|
|
"valid_targets_min": 1649
|
|
},
|
|
{
|
|
"epoch": 4.830607476635514,
|
|
"grad_norm": 0.42169626733243626,
|
|
"learning_rate": 1.0618343417161447e-05,
|
|
"loss": 0.2839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3053106665611267,
|
|
"step": 4135,
|
|
"valid_targets_mean": 6027.6,
|
|
"valid_targets_min": 2838
|
|
},
|
|
{
|
|
"epoch": 4.836448598130842,
|
|
"grad_norm": 0.4230813182401775,
|
|
"learning_rate": 1.0566927241200986e-05,
|
|
"loss": 0.2749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2419642210006714,
|
|
"step": 4140,
|
|
"valid_targets_mean": 5682.8,
|
|
"valid_targets_min": 2205
|
|
},
|
|
{
|
|
"epoch": 4.8422897196261685,
|
|
"grad_norm": 0.46700176330469634,
|
|
"learning_rate": 1.051559112105201e-05,
|
|
"loss": 0.2571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2690257728099823,
|
|
"step": 4145,
|
|
"valid_targets_mean": 5597.2,
|
|
"valid_targets_min": 1615
|
|
},
|
|
{
|
|
"epoch": 4.848130841121495,
|
|
"grad_norm": 0.4677494610377651,
|
|
"learning_rate": 1.0464335492389597e-05,
|
|
"loss": 0.2646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23696915805339813,
|
|
"step": 4150,
|
|
"valid_targets_mean": 4785.5,
|
|
"valid_targets_min": 2556
|
|
},
|
|
{
|
|
"epoch": 4.853971962616822,
|
|
"grad_norm": 0.3997943149093783,
|
|
"learning_rate": 1.0413160790205735e-05,
|
|
"loss": 0.2827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26244935393333435,
|
|
"step": 4155,
|
|
"valid_targets_mean": 6102.7,
|
|
"valid_targets_min": 3331
|
|
},
|
|
{
|
|
"epoch": 4.859813084112149,
|
|
"grad_norm": 0.42245810266744194,
|
|
"learning_rate": 1.0362067448805577e-05,
|
|
"loss": 0.2777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2558356523513794,
|
|
"step": 4160,
|
|
"valid_targets_mean": 5467.6,
|
|
"valid_targets_min": 2375
|
|
},
|
|
{
|
|
"epoch": 4.865654205607477,
|
|
"grad_norm": 0.42091879708173535,
|
|
"learning_rate": 1.0311055901803828e-05,
|
|
"loss": 0.2924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.251865953207016,
|
|
"step": 4165,
|
|
"valid_targets_mean": 5414.9,
|
|
"valid_targets_min": 2145
|
|
},
|
|
{
|
|
"epoch": 4.871495327102804,
|
|
"grad_norm": 0.4323729254845641,
|
|
"learning_rate": 1.026012658212101e-05,
|
|
"loss": 0.2713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.301832914352417,
|
|
"step": 4170,
|
|
"valid_targets_mean": 5339.2,
|
|
"valid_targets_min": 1310
|
|
},
|
|
{
|
|
"epoch": 4.877336448598131,
|
|
"grad_norm": 0.3796713962350321,
|
|
"learning_rate": 1.0209279921979806e-05,
|
|
"loss": 0.2882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2729886472225189,
|
|
"step": 4175,
|
|
"valid_targets_mean": 6516.4,
|
|
"valid_targets_min": 1989
|
|
},
|
|
{
|
|
"epoch": 4.883177570093458,
|
|
"grad_norm": 0.3989554379035716,
|
|
"learning_rate": 1.0158516352901375e-05,
|
|
"loss": 0.2633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2601727545261383,
|
|
"step": 4180,
|
|
"valid_targets_mean": 5655.2,
|
|
"valid_targets_min": 2219
|
|
},
|
|
{
|
|
"epoch": 4.8890186915887845,
|
|
"grad_norm": 0.438077514897807,
|
|
"learning_rate": 1.0107836305701736e-05,
|
|
"loss": 0.2677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2909350097179413,
|
|
"step": 4185,
|
|
"valid_targets_mean": 5307.4,
|
|
"valid_targets_min": 2354
|
|
},
|
|
{
|
|
"epoch": 4.894859813084112,
|
|
"grad_norm": 0.4263701164951172,
|
|
"learning_rate": 1.005724021048807e-05,
|
|
"loss": 0.283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29434189200401306,
|
|
"step": 4190,
|
|
"valid_targets_mean": 5932.5,
|
|
"valid_targets_min": 2136
|
|
},
|
|
{
|
|
"epoch": 4.900700934579439,
|
|
"grad_norm": 0.45017510249401965,
|
|
"learning_rate": 1.0006728496655067e-05,
|
|
"loss": 0.2689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27489393949508667,
|
|
"step": 4195,
|
|
"valid_targets_mean": 5981.7,
|
|
"valid_targets_min": 1869
|
|
},
|
|
{
|
|
"epoch": 4.906542056074766,
|
|
"grad_norm": 0.42764787683541317,
|
|
"learning_rate": 9.956301592881312e-06,
|
|
"loss": 0.2828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30563271045684814,
|
|
"step": 4200,
|
|
"valid_targets_mean": 6126.8,
|
|
"valid_targets_min": 2322
|
|
},
|
|
{
|
|
"epoch": 4.912383177570094,
|
|
"grad_norm": 0.6123892819367832,
|
|
"learning_rate": 9.905959927125627e-06,
|
|
"loss": 0.2732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2883468270301819,
|
|
"step": 4205,
|
|
"valid_targets_mean": 6399.1,
|
|
"valid_targets_min": 2031
|
|
},
|
|
{
|
|
"epoch": 4.918224299065421,
|
|
"grad_norm": 0.43619925451635266,
|
|
"learning_rate": 9.855703926623452e-06,
|
|
"loss": 0.2675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26147788763046265,
|
|
"step": 4210,
|
|
"valid_targets_mean": 5592.4,
|
|
"valid_targets_min": 2196
|
|
},
|
|
{
|
|
"epoch": 4.924065420560748,
|
|
"grad_norm": 0.3979350018625402,
|
|
"learning_rate": 9.805534017883187e-06,
|
|
"loss": 0.2779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27453354001045227,
|
|
"step": 4215,
|
|
"valid_targets_mean": 6053.3,
|
|
"valid_targets_min": 2510
|
|
},
|
|
{
|
|
"epoch": 4.929906542056075,
|
|
"grad_norm": 0.44420179254587544,
|
|
"learning_rate": 9.755450626682611e-06,
|
|
"loss": 0.2759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27016738057136536,
|
|
"step": 4220,
|
|
"valid_targets_mean": 4841.8,
|
|
"valid_targets_min": 2023
|
|
},
|
|
{
|
|
"epoch": 4.9357476635514015,
|
|
"grad_norm": 0.4565796091561529,
|
|
"learning_rate": 9.705454178065268e-06,
|
|
"loss": 0.277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26040318608283997,
|
|
"step": 4225,
|
|
"valid_targets_mean": 4894.6,
|
|
"valid_targets_min": 2083
|
|
},
|
|
{
|
|
"epoch": 4.941588785046729,
|
|
"grad_norm": 0.4317474478264227,
|
|
"learning_rate": 9.655545096336815e-06,
|
|
"loss": 0.2886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29813551902770996,
|
|
"step": 4230,
|
|
"valid_targets_mean": 6116.0,
|
|
"valid_targets_min": 2431
|
|
},
|
|
{
|
|
"epoch": 4.947429906542056,
|
|
"grad_norm": 0.41432155633368367,
|
|
"learning_rate": 9.60572380506146e-06,
|
|
"loss": 0.2774,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2804547846317291,
|
|
"step": 4235,
|
|
"valid_targets_mean": 5483.4,
|
|
"valid_targets_min": 1504
|
|
},
|
|
{
|
|
"epoch": 4.953271028037383,
|
|
"grad_norm": 0.4332184838443979,
|
|
"learning_rate": 9.555990727058368e-06,
|
|
"loss": 0.2564,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25518372654914856,
|
|
"step": 4240,
|
|
"valid_targets_mean": 5369.3,
|
|
"valid_targets_min": 2550
|
|
},
|
|
{
|
|
"epoch": 4.95911214953271,
|
|
"grad_norm": 0.40329701442642457,
|
|
"learning_rate": 9.506346284398071e-06,
|
|
"loss": 0.2704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2704017460346222,
|
|
"step": 4245,
|
|
"valid_targets_mean": 6149.2,
|
|
"valid_targets_min": 1880
|
|
},
|
|
{
|
|
"epoch": 4.964953271028038,
|
|
"grad_norm": 0.4243004057715675,
|
|
"learning_rate": 9.456790898398846e-06,
|
|
"loss": 0.2715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31128373742103577,
|
|
"step": 4250,
|
|
"valid_targets_mean": 5881.2,
|
|
"valid_targets_min": 1758
|
|
},
|
|
{
|
|
"epoch": 4.970794392523365,
|
|
"grad_norm": 0.388141443294983,
|
|
"learning_rate": 9.407324989623198e-06,
|
|
"loss": 0.2717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3127017915248871,
|
|
"step": 4255,
|
|
"valid_targets_mean": 6945.3,
|
|
"valid_targets_min": 1803
|
|
},
|
|
{
|
|
"epoch": 4.9766355140186915,
|
|
"grad_norm": 0.4360771137131523,
|
|
"learning_rate": 9.35794897787426e-06,
|
|
"loss": 0.2677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28903940320014954,
|
|
"step": 4260,
|
|
"valid_targets_mean": 5132.9,
|
|
"valid_targets_min": 2073
|
|
},
|
|
{
|
|
"epoch": 4.982476635514018,
|
|
"grad_norm": 0.43539637874275816,
|
|
"learning_rate": 9.308663282192237e-06,
|
|
"loss": 0.2795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29845666885375977,
|
|
"step": 4265,
|
|
"valid_targets_mean": 5752.2,
|
|
"valid_targets_min": 2280
|
|
},
|
|
{
|
|
"epoch": 4.988317757009346,
|
|
"grad_norm": 0.4840394559907446,
|
|
"learning_rate": 9.259468320850825e-06,
|
|
"loss": 0.267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2759968340396881,
|
|
"step": 4270,
|
|
"valid_targets_mean": 5128.4,
|
|
"valid_targets_min": 1869
|
|
},
|
|
{
|
|
"epoch": 4.994158878504673,
|
|
"grad_norm": 0.4364826699285254,
|
|
"learning_rate": 9.210364511353715e-06,
|
|
"loss": 0.2791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2965710163116455,
|
|
"step": 4275,
|
|
"valid_targets_mean": 5580.8,
|
|
"valid_targets_min": 1914
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.4611264543496623,
|
|
"learning_rate": 9.16135227043099e-06,
|
|
"loss": 0.2745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21842102706432343,
|
|
"step": 4280,
|
|
"valid_targets_mean": 5229.1,
|
|
"valid_targets_min": 2750
|
|
},
|
|
{
|
|
"epoch": 5.005841121495327,
|
|
"grad_norm": 0.4683489727372989,
|
|
"learning_rate": 9.112432014035644e-06,
|
|
"loss": 0.2565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.268197625875473,
|
|
"step": 4285,
|
|
"valid_targets_mean": 4820.5,
|
|
"valid_targets_min": 1948
|
|
},
|
|
{
|
|
"epoch": 5.011682242990654,
|
|
"grad_norm": 0.43258540675996304,
|
|
"learning_rate": 9.063604157339994e-06,
|
|
"loss": 0.2631,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26144930720329285,
|
|
"step": 4290,
|
|
"valid_targets_mean": 5528.0,
|
|
"valid_targets_min": 1754
|
|
},
|
|
{
|
|
"epoch": 5.017523364485982,
|
|
"grad_norm": 0.3916867752903766,
|
|
"learning_rate": 9.014869114732213e-06,
|
|
"loss": 0.2714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2616852819919586,
|
|
"step": 4295,
|
|
"valid_targets_mean": 5946.0,
|
|
"valid_targets_min": 1197
|
|
},
|
|
{
|
|
"epoch": 5.0233644859813085,
|
|
"grad_norm": 0.41312118698011996,
|
|
"learning_rate": 8.966227299812771e-06,
|
|
"loss": 0.2783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26967307925224304,
|
|
"step": 4300,
|
|
"valid_targets_mean": 5531.8,
|
|
"valid_targets_min": 2486
|
|
},
|
|
{
|
|
"epoch": 5.029205607476635,
|
|
"grad_norm": 0.4255494802057752,
|
|
"learning_rate": 8.917679125390961e-06,
|
|
"loss": 0.2847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29777324199676514,
|
|
"step": 4305,
|
|
"valid_targets_mean": 5151.7,
|
|
"valid_targets_min": 2580
|
|
},
|
|
{
|
|
"epoch": 5.035046728971962,
|
|
"grad_norm": 0.38599668006282095,
|
|
"learning_rate": 8.869225003481342e-06,
|
|
"loss": 0.2595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22807276248931885,
|
|
"step": 4310,
|
|
"valid_targets_mean": 6109.1,
|
|
"valid_targets_min": 1822
|
|
},
|
|
{
|
|
"epoch": 5.04088785046729,
|
|
"grad_norm": 0.39735683614266176,
|
|
"learning_rate": 8.820865345300308e-06,
|
|
"loss": 0.2582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26792192459106445,
|
|
"step": 4315,
|
|
"valid_targets_mean": 5918.6,
|
|
"valid_targets_min": 2500
|
|
},
|
|
{
|
|
"epoch": 5.046728971962617,
|
|
"grad_norm": 0.4618135641321975,
|
|
"learning_rate": 8.772600561262559e-06,
|
|
"loss": 0.2749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24628634750843048,
|
|
"step": 4320,
|
|
"valid_targets_mean": 4884.9,
|
|
"valid_targets_min": 1758
|
|
},
|
|
{
|
|
"epoch": 5.052570093457944,
|
|
"grad_norm": 0.3989075052962697,
|
|
"learning_rate": 8.72443106097761e-06,
|
|
"loss": 0.2639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2674161493778229,
|
|
"step": 4325,
|
|
"valid_targets_mean": 5880.9,
|
|
"valid_targets_min": 1651
|
|
},
|
|
{
|
|
"epoch": 5.058411214953271,
|
|
"grad_norm": 0.4164544649769128,
|
|
"learning_rate": 8.676357253246337e-06,
|
|
"loss": 0.2763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27681827545166016,
|
|
"step": 4330,
|
|
"valid_targets_mean": 5741.8,
|
|
"valid_targets_min": 2603
|
|
},
|
|
{
|
|
"epoch": 5.0642523364485985,
|
|
"grad_norm": 0.39137232666349897,
|
|
"learning_rate": 8.628379546057509e-06,
|
|
"loss": 0.2662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26229265332221985,
|
|
"step": 4335,
|
|
"valid_targets_mean": 6440.6,
|
|
"valid_targets_min": 2980
|
|
},
|
|
{
|
|
"epoch": 5.070093457943925,
|
|
"grad_norm": 0.4504147247060977,
|
|
"learning_rate": 8.580498346584316e-06,
|
|
"loss": 0.2858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2967561185359955,
|
|
"step": 4340,
|
|
"valid_targets_mean": 5562.6,
|
|
"valid_targets_min": 1952
|
|
},
|
|
{
|
|
"epoch": 5.075934579439252,
|
|
"grad_norm": 0.42295314402864825,
|
|
"learning_rate": 8.532714061180897e-06,
|
|
"loss": 0.2755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3003305494785309,
|
|
"step": 4345,
|
|
"valid_targets_mean": 5562.5,
|
|
"valid_targets_min": 2075
|
|
},
|
|
{
|
|
"epoch": 5.081775700934579,
|
|
"grad_norm": 0.4191011455744012,
|
|
"learning_rate": 8.485027095378932e-06,
|
|
"loss": 0.2622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2694641649723053,
|
|
"step": 4350,
|
|
"valid_targets_mean": 6176.3,
|
|
"valid_targets_min": 2947
|
|
},
|
|
{
|
|
"epoch": 5.087616822429906,
|
|
"grad_norm": 0.42076555094488277,
|
|
"learning_rate": 8.437437853884163e-06,
|
|
"loss": 0.2684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2562076151371002,
|
|
"step": 4355,
|
|
"valid_targets_mean": 5612.0,
|
|
"valid_targets_min": 2905
|
|
},
|
|
{
|
|
"epoch": 5.093457943925234,
|
|
"grad_norm": 0.507269734667298,
|
|
"learning_rate": 8.389946740572984e-06,
|
|
"loss": 0.2705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30784040689468384,
|
|
"step": 4360,
|
|
"valid_targets_mean": 5825.6,
|
|
"valid_targets_min": 2688
|
|
},
|
|
{
|
|
"epoch": 5.099299065420561,
|
|
"grad_norm": 0.43599873478434875,
|
|
"learning_rate": 8.342554158488985e-06,
|
|
"loss": 0.2705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23588453233242035,
|
|
"step": 4365,
|
|
"valid_targets_mean": 4849.2,
|
|
"valid_targets_min": 2132
|
|
},
|
|
{
|
|
"epoch": 5.105140186915888,
|
|
"grad_norm": 0.6173383600179773,
|
|
"learning_rate": 8.295260509839562e-06,
|
|
"loss": 0.2678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30119559168815613,
|
|
"step": 4370,
|
|
"valid_targets_mean": 5401.3,
|
|
"valid_targets_min": 2436
|
|
},
|
|
{
|
|
"epoch": 5.110981308411215,
|
|
"grad_norm": 0.42490335125220347,
|
|
"learning_rate": 8.248066195992504e-06,
|
|
"loss": 0.2706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2909223735332489,
|
|
"step": 4375,
|
|
"valid_targets_mean": 6065.9,
|
|
"valid_targets_min": 2203
|
|
},
|
|
{
|
|
"epoch": 5.116822429906542,
|
|
"grad_norm": 0.5761638051994594,
|
|
"learning_rate": 8.200971617472547e-06,
|
|
"loss": 0.2664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27119868993759155,
|
|
"step": 4380,
|
|
"valid_targets_mean": 4907.4,
|
|
"valid_targets_min": 1873
|
|
},
|
|
{
|
|
"epoch": 5.122663551401869,
|
|
"grad_norm": 0.4362184052087601,
|
|
"learning_rate": 8.153977173958008e-06,
|
|
"loss": 0.2604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2558010518550873,
|
|
"step": 4385,
|
|
"valid_targets_mean": 5346.8,
|
|
"valid_targets_min": 2388
|
|
},
|
|
{
|
|
"epoch": 5.128504672897196,
|
|
"grad_norm": 0.40200829311629754,
|
|
"learning_rate": 8.1070832642774e-06,
|
|
"loss": 0.2673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24114705622196198,
|
|
"step": 4390,
|
|
"valid_targets_mean": 5396.9,
|
|
"valid_targets_min": 1719
|
|
},
|
|
{
|
|
"epoch": 5.134345794392523,
|
|
"grad_norm": 0.39071778802358004,
|
|
"learning_rate": 8.060290286406031e-06,
|
|
"loss": 0.2869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2901424169540405,
|
|
"step": 4395,
|
|
"valid_targets_mean": 6530.5,
|
|
"valid_targets_min": 1945
|
|
},
|
|
{
|
|
"epoch": 5.140186915887851,
|
|
"grad_norm": 0.4989140821712292,
|
|
"learning_rate": 8.013598637462617e-06,
|
|
"loss": 0.2858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22212795913219452,
|
|
"step": 4400,
|
|
"valid_targets_mean": 5010.7,
|
|
"valid_targets_min": 2988
|
|
},
|
|
{
|
|
"epoch": 5.146028037383178,
|
|
"grad_norm": 0.40646365220244335,
|
|
"learning_rate": 7.967008713705933e-06,
|
|
"loss": 0.2757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2639615833759308,
|
|
"step": 4405,
|
|
"valid_targets_mean": 6073.9,
|
|
"valid_targets_min": 2064
|
|
},
|
|
{
|
|
"epoch": 5.151869158878505,
|
|
"grad_norm": 0.4502747440150536,
|
|
"learning_rate": 7.920520910531443e-06,
|
|
"loss": 0.2848,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3038928210735321,
|
|
"step": 4410,
|
|
"valid_targets_mean": 5669.1,
|
|
"valid_targets_min": 2253
|
|
},
|
|
{
|
|
"epoch": 5.1577102803738315,
|
|
"grad_norm": 0.4333635335582143,
|
|
"learning_rate": 7.874135622467944e-06,
|
|
"loss": 0.2864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30569159984588623,
|
|
"step": 4415,
|
|
"valid_targets_mean": 5734.3,
|
|
"valid_targets_min": 1637
|
|
},
|
|
{
|
|
"epoch": 5.163551401869159,
|
|
"grad_norm": 0.44276032237355006,
|
|
"learning_rate": 7.8278532431742e-06,
|
|
"loss": 0.2554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2711534798145294,
|
|
"step": 4420,
|
|
"valid_targets_mean": 5279.0,
|
|
"valid_targets_min": 2099
|
|
},
|
|
{
|
|
"epoch": 5.169392523364486,
|
|
"grad_norm": 0.43052301595969106,
|
|
"learning_rate": 7.781674165435644e-06,
|
|
"loss": 0.2588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2860606610774994,
|
|
"step": 4425,
|
|
"valid_targets_mean": 5414.0,
|
|
"valid_targets_min": 1806
|
|
},
|
|
{
|
|
"epoch": 5.175233644859813,
|
|
"grad_norm": 0.46246703405026807,
|
|
"learning_rate": 7.735598781160991e-06,
|
|
"loss": 0.2791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27741578221321106,
|
|
"step": 4430,
|
|
"valid_targets_mean": 5090.5,
|
|
"valid_targets_min": 2149
|
|
},
|
|
{
|
|
"epoch": 5.18107476635514,
|
|
"grad_norm": 0.4214284999855625,
|
|
"learning_rate": 7.68962748137896e-06,
|
|
"loss": 0.2618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28107988834381104,
|
|
"step": 4435,
|
|
"valid_targets_mean": 6144.0,
|
|
"valid_targets_min": 2621
|
|
},
|
|
{
|
|
"epoch": 5.186915887850467,
|
|
"grad_norm": 0.4203723649827079,
|
|
"learning_rate": 7.64376065623492e-06,
|
|
"loss": 0.2707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21902942657470703,
|
|
"step": 4440,
|
|
"valid_targets_mean": 5749.7,
|
|
"valid_targets_min": 1482
|
|
},
|
|
{
|
|
"epoch": 5.192757009345795,
|
|
"grad_norm": 0.41649589041402146,
|
|
"learning_rate": 7.5979986949875986e-06,
|
|
"loss": 0.2678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2734936475753784,
|
|
"step": 4445,
|
|
"valid_targets_mean": 5624.1,
|
|
"valid_targets_min": 2382
|
|
},
|
|
{
|
|
"epoch": 5.1985981308411215,
|
|
"grad_norm": 0.3944991851972713,
|
|
"learning_rate": 7.5523419860057845e-06,
|
|
"loss": 0.2749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30521130561828613,
|
|
"step": 4450,
|
|
"valid_targets_mean": 6964.2,
|
|
"valid_targets_min": 2929
|
|
},
|
|
{
|
|
"epoch": 5.204439252336448,
|
|
"grad_norm": 0.40138490077279976,
|
|
"learning_rate": 7.5067909167649965e-06,
|
|
"loss": 0.2652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27103111147880554,
|
|
"step": 4455,
|
|
"valid_targets_mean": 6391.4,
|
|
"valid_targets_min": 1715
|
|
},
|
|
{
|
|
"epoch": 5.210280373831775,
|
|
"grad_norm": 0.4326472785441912,
|
|
"learning_rate": 7.461345873844239e-06,
|
|
"loss": 0.2717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28950178623199463,
|
|
"step": 4460,
|
|
"valid_targets_mean": 6558.7,
|
|
"valid_targets_min": 2388
|
|
},
|
|
{
|
|
"epoch": 5.216121495327103,
|
|
"grad_norm": 0.42443524926962173,
|
|
"learning_rate": 7.416007242922694e-06,
|
|
"loss": 0.2726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27208998799324036,
|
|
"step": 4465,
|
|
"valid_targets_mean": 6358.2,
|
|
"valid_targets_min": 2653
|
|
},
|
|
{
|
|
"epoch": 5.22196261682243,
|
|
"grad_norm": 0.4573719288221464,
|
|
"learning_rate": 7.37077540877646e-06,
|
|
"loss": 0.2902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28502973914146423,
|
|
"step": 4470,
|
|
"valid_targets_mean": 5285.2,
|
|
"valid_targets_min": 2218
|
|
},
|
|
{
|
|
"epoch": 5.227803738317757,
|
|
"grad_norm": 0.42232249768621266,
|
|
"learning_rate": 7.325650755275276e-06,
|
|
"loss": 0.276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28038978576660156,
|
|
"step": 4475,
|
|
"valid_targets_mean": 6229.5,
|
|
"valid_targets_min": 1733
|
|
},
|
|
{
|
|
"epoch": 5.233644859813084,
|
|
"grad_norm": 0.4236545754387443,
|
|
"learning_rate": 7.280633665379257e-06,
|
|
"loss": 0.2938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26462996006011963,
|
|
"step": 4480,
|
|
"valid_targets_mean": 5695.6,
|
|
"valid_targets_min": 1692
|
|
},
|
|
{
|
|
"epoch": 5.239485981308412,
|
|
"grad_norm": 0.48126782968127746,
|
|
"learning_rate": 7.235724521135681e-06,
|
|
"loss": 0.2757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29571735858917236,
|
|
"step": 4485,
|
|
"valid_targets_mean": 5514.7,
|
|
"valid_targets_min": 1636
|
|
},
|
|
{
|
|
"epoch": 5.2453271028037385,
|
|
"grad_norm": 0.3840063677644065,
|
|
"learning_rate": 7.190923703675714e-06,
|
|
"loss": 0.2776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.289844274520874,
|
|
"step": 4490,
|
|
"valid_targets_mean": 6220.2,
|
|
"valid_targets_min": 2516
|
|
},
|
|
{
|
|
"epoch": 5.251168224299065,
|
|
"grad_norm": 0.4049229747384855,
|
|
"learning_rate": 7.146231593211168e-06,
|
|
"loss": 0.2848,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2767803966999054,
|
|
"step": 4495,
|
|
"valid_targets_mean": 6071.2,
|
|
"valid_targets_min": 1960
|
|
},
|
|
{
|
|
"epoch": 5.257009345794392,
|
|
"grad_norm": 0.4236150256241915,
|
|
"learning_rate": 7.10164856903131e-06,
|
|
"loss": 0.2658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28978127241134644,
|
|
"step": 4500,
|
|
"valid_targets_mean": 6311.7,
|
|
"valid_targets_min": 1977
|
|
},
|
|
{
|
|
"epoch": 5.26285046728972,
|
|
"grad_norm": 0.4483580783052783,
|
|
"learning_rate": 7.057175009499615e-06,
|
|
"loss": 0.2752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2645438611507416,
|
|
"step": 4505,
|
|
"valid_targets_mean": 5052.8,
|
|
"valid_targets_min": 1843
|
|
},
|
|
{
|
|
"epoch": 5.268691588785047,
|
|
"grad_norm": 0.49713299573095937,
|
|
"learning_rate": 7.012811292050552e-06,
|
|
"loss": 0.2897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2687908709049225,
|
|
"step": 4510,
|
|
"valid_targets_mean": 4807.5,
|
|
"valid_targets_min": 2114
|
|
},
|
|
{
|
|
"epoch": 5.274532710280374,
|
|
"grad_norm": 0.47556925097824054,
|
|
"learning_rate": 6.9685577931864076e-06,
|
|
"loss": 0.2726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27352479100227356,
|
|
"step": 4515,
|
|
"valid_targets_mean": 5091.6,
|
|
"valid_targets_min": 1167
|
|
},
|
|
{
|
|
"epoch": 5.280373831775701,
|
|
"grad_norm": 0.40959955090866595,
|
|
"learning_rate": 6.924414888474069e-06,
|
|
"loss": 0.2574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22826071083545685,
|
|
"step": 4520,
|
|
"valid_targets_mean": 5649.5,
|
|
"valid_targets_min": 2048
|
|
},
|
|
{
|
|
"epoch": 5.286214953271028,
|
|
"grad_norm": 0.4436010225086012,
|
|
"learning_rate": 6.880382952541844e-06,
|
|
"loss": 0.2668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24194328486919403,
|
|
"step": 4525,
|
|
"valid_targets_mean": 4584.7,
|
|
"valid_targets_min": 1919
|
|
},
|
|
{
|
|
"epoch": 5.292056074766355,
|
|
"grad_norm": 0.42455517231059375,
|
|
"learning_rate": 6.8364623590762745e-06,
|
|
"loss": 0.2623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27113601565361023,
|
|
"step": 4530,
|
|
"valid_targets_mean": 5270.1,
|
|
"valid_targets_min": 2567
|
|
},
|
|
{
|
|
"epoch": 5.297897196261682,
|
|
"grad_norm": 0.4330872131546424,
|
|
"learning_rate": 6.792653480818962e-06,
|
|
"loss": 0.263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2699664235115051,
|
|
"step": 4535,
|
|
"valid_targets_mean": 6041.2,
|
|
"valid_targets_min": 1837
|
|
},
|
|
{
|
|
"epoch": 5.303738317757009,
|
|
"grad_norm": 0.40521424813169155,
|
|
"learning_rate": 6.748956689563435e-06,
|
|
"loss": 0.265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24600763618946075,
|
|
"step": 4540,
|
|
"valid_targets_mean": 5562.5,
|
|
"valid_targets_min": 2032
|
|
},
|
|
{
|
|
"epoch": 5.309579439252336,
|
|
"grad_norm": 0.43376360909895234,
|
|
"learning_rate": 6.705372356151962e-06,
|
|
"loss": 0.271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25839272141456604,
|
|
"step": 4545,
|
|
"valid_targets_mean": 5372.0,
|
|
"valid_targets_min": 1697
|
|
},
|
|
{
|
|
"epoch": 5.315420560747664,
|
|
"grad_norm": 0.4353152227520632,
|
|
"learning_rate": 6.661900850472402e-06,
|
|
"loss": 0.2654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2736652195453644,
|
|
"step": 4550,
|
|
"valid_targets_mean": 5366.1,
|
|
"valid_targets_min": 2182
|
|
},
|
|
{
|
|
"epoch": 5.321261682242991,
|
|
"grad_norm": 0.4548478300403231,
|
|
"learning_rate": 6.618542541455093e-06,
|
|
"loss": 0.2668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2330966740846634,
|
|
"step": 4555,
|
|
"valid_targets_mean": 4253.5,
|
|
"valid_targets_min": 1680
|
|
},
|
|
{
|
|
"epoch": 5.327102803738318,
|
|
"grad_norm": 0.42863632872071106,
|
|
"learning_rate": 6.5752977970697e-06,
|
|
"loss": 0.2583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2637367248535156,
|
|
"step": 4560,
|
|
"valid_targets_mean": 5513.0,
|
|
"valid_targets_min": 1542
|
|
},
|
|
{
|
|
"epoch": 5.332943925233645,
|
|
"grad_norm": 0.5208691580259405,
|
|
"learning_rate": 6.532166984322099e-06,
|
|
"loss": 0.2773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27387258410453796,
|
|
"step": 4565,
|
|
"valid_targets_mean": 5712.2,
|
|
"valid_targets_min": 2578
|
|
},
|
|
{
|
|
"epoch": 5.338785046728972,
|
|
"grad_norm": 0.4245023796320968,
|
|
"learning_rate": 6.489150469251249e-06,
|
|
"loss": 0.2585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2684333622455597,
|
|
"step": 4570,
|
|
"valid_targets_mean": 5110.5,
|
|
"valid_targets_min": 1949
|
|
},
|
|
{
|
|
"epoch": 5.344626168224299,
|
|
"grad_norm": 0.4071767094601245,
|
|
"learning_rate": 6.446248616926114e-06,
|
|
"loss": 0.2678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24832861125469208,
|
|
"step": 4575,
|
|
"valid_targets_mean": 5572.0,
|
|
"valid_targets_min": 2216
|
|
},
|
|
{
|
|
"epoch": 5.350467289719626,
|
|
"grad_norm": 0.4091789252547195,
|
|
"learning_rate": 6.403461791442529e-06,
|
|
"loss": 0.2631,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2447708398103714,
|
|
"step": 4580,
|
|
"valid_targets_mean": 5243.2,
|
|
"valid_targets_min": 1430
|
|
},
|
|
{
|
|
"epoch": 5.356308411214953,
|
|
"grad_norm": 0.4632397004598627,
|
|
"learning_rate": 6.360790355920155e-06,
|
|
"loss": 0.276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.278586208820343,
|
|
"step": 4585,
|
|
"valid_targets_mean": 5733.5,
|
|
"valid_targets_min": 2635
|
|
},
|
|
{
|
|
"epoch": 5.36214953271028,
|
|
"grad_norm": 0.43597175895312734,
|
|
"learning_rate": 6.318234672499337e-06,
|
|
"loss": 0.2543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2519454061985016,
|
|
"step": 4590,
|
|
"valid_targets_mean": 4945.5,
|
|
"valid_targets_min": 1125
|
|
},
|
|
{
|
|
"epoch": 5.367990654205608,
|
|
"grad_norm": 0.408601206597811,
|
|
"learning_rate": 6.2757951023380936e-06,
|
|
"loss": 0.2694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2848447859287262,
|
|
"step": 4595,
|
|
"valid_targets_mean": 6161.2,
|
|
"valid_targets_min": 1726
|
|
},
|
|
{
|
|
"epoch": 5.373831775700935,
|
|
"grad_norm": 0.4182728218319651,
|
|
"learning_rate": 6.233472005609021e-06,
|
|
"loss": 0.2517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23566116392612457,
|
|
"step": 4600,
|
|
"valid_targets_mean": 5515.3,
|
|
"valid_targets_min": 900
|
|
},
|
|
{
|
|
"epoch": 5.3796728971962615,
|
|
"grad_norm": 0.46922106732392105,
|
|
"learning_rate": 6.191265741496211e-06,
|
|
"loss": 0.2572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26454827189445496,
|
|
"step": 4605,
|
|
"valid_targets_mean": 4660.8,
|
|
"valid_targets_min": 1645
|
|
},
|
|
{
|
|
"epoch": 5.385514018691588,
|
|
"grad_norm": 0.42832779089663414,
|
|
"learning_rate": 6.14917666819226e-06,
|
|
"loss": 0.2849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2982839345932007,
|
|
"step": 4610,
|
|
"valid_targets_mean": 6193.7,
|
|
"valid_targets_min": 1687
|
|
},
|
|
{
|
|
"epoch": 5.391355140186916,
|
|
"grad_norm": 0.4196168004228742,
|
|
"learning_rate": 6.107205142895189e-06,
|
|
"loss": 0.2754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2876335084438324,
|
|
"step": 4615,
|
|
"valid_targets_mean": 5755.1,
|
|
"valid_targets_min": 1942
|
|
},
|
|
{
|
|
"epoch": 5.397196261682243,
|
|
"grad_norm": 0.4937332736866036,
|
|
"learning_rate": 6.0653515218054294e-06,
|
|
"loss": 0.2781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28219643235206604,
|
|
"step": 4620,
|
|
"valid_targets_mean": 4767.8,
|
|
"valid_targets_min": 1427
|
|
},
|
|
{
|
|
"epoch": 5.40303738317757,
|
|
"grad_norm": 0.3936347192333555,
|
|
"learning_rate": 6.023616160122774e-06,
|
|
"loss": 0.2703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28621363639831543,
|
|
"step": 4625,
|
|
"valid_targets_mean": 6157.1,
|
|
"valid_targets_min": 2026
|
|
},
|
|
{
|
|
"epoch": 5.408878504672897,
|
|
"grad_norm": 0.4031899865613844,
|
|
"learning_rate": 5.981999412043393e-06,
|
|
"loss": 0.2658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2411481887102127,
|
|
"step": 4630,
|
|
"valid_targets_mean": 5827.6,
|
|
"valid_targets_min": 3072
|
|
},
|
|
{
|
|
"epoch": 5.414719626168225,
|
|
"grad_norm": 0.46209586635637834,
|
|
"learning_rate": 5.940501630756818e-06,
|
|
"loss": 0.2692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27909615635871887,
|
|
"step": 4635,
|
|
"valid_targets_mean": 5435.7,
|
|
"valid_targets_min": 2495
|
|
},
|
|
{
|
|
"epoch": 5.420560747663552,
|
|
"grad_norm": 0.4395501104635503,
|
|
"learning_rate": 5.899123168442955e-06,
|
|
"loss": 0.2644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27366703748703003,
|
|
"step": 4640,
|
|
"valid_targets_mean": 6013.2,
|
|
"valid_targets_min": 2231
|
|
},
|
|
{
|
|
"epoch": 5.4264018691588785,
|
|
"grad_norm": 0.4210057893048416,
|
|
"learning_rate": 5.857864376269051e-06,
|
|
"loss": 0.2739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30027782917022705,
|
|
"step": 4645,
|
|
"valid_targets_mean": 6272.8,
|
|
"valid_targets_min": 2822
|
|
},
|
|
{
|
|
"epoch": 5.432242990654205,
|
|
"grad_norm": 0.4260888116931904,
|
|
"learning_rate": 5.816725604386777e-06,
|
|
"loss": 0.2584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2813575565814972,
|
|
"step": 4650,
|
|
"valid_targets_mean": 5354.6,
|
|
"valid_targets_min": 2634
|
|
},
|
|
{
|
|
"epoch": 5.438084112149532,
|
|
"grad_norm": 0.4194469152580931,
|
|
"learning_rate": 5.775707201929221e-06,
|
|
"loss": 0.2675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2401328831911087,
|
|
"step": 4655,
|
|
"valid_targets_mean": 5292.0,
|
|
"valid_targets_min": 2376
|
|
},
|
|
{
|
|
"epoch": 5.44392523364486,
|
|
"grad_norm": 0.4788716404012861,
|
|
"learning_rate": 5.734809517007909e-06,
|
|
"loss": 0.2524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2655356228351593,
|
|
"step": 4660,
|
|
"valid_targets_mean": 6394.0,
|
|
"valid_targets_min": 3308
|
|
},
|
|
{
|
|
"epoch": 5.449766355140187,
|
|
"grad_norm": 0.46730924931667234,
|
|
"learning_rate": 5.694032896709896e-06,
|
|
"loss": 0.2455,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25024116039276123,
|
|
"step": 4665,
|
|
"valid_targets_mean": 4464.0,
|
|
"valid_targets_min": 2215
|
|
},
|
|
{
|
|
"epoch": 5.455607476635514,
|
|
"grad_norm": 0.40454631372370203,
|
|
"learning_rate": 5.65337768709479e-06,
|
|
"loss": 0.257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.274148166179657,
|
|
"step": 4670,
|
|
"valid_targets_mean": 6278.8,
|
|
"valid_targets_min": 2026
|
|
},
|
|
{
|
|
"epoch": 5.461448598130841,
|
|
"grad_norm": 0.41843865349901566,
|
|
"learning_rate": 5.6128442331918145e-06,
|
|
"loss": 0.2626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2573558986186981,
|
|
"step": 4675,
|
|
"valid_targets_mean": 5385.5,
|
|
"valid_targets_min": 2129
|
|
},
|
|
{
|
|
"epoch": 5.4672897196261685,
|
|
"grad_norm": 0.4202672078038736,
|
|
"learning_rate": 5.572432878996881e-06,
|
|
"loss": 0.2746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2836419641971588,
|
|
"step": 4680,
|
|
"valid_targets_mean": 5990.2,
|
|
"valid_targets_min": 1602
|
|
},
|
|
{
|
|
"epoch": 5.473130841121495,
|
|
"grad_norm": 0.4347593961517237,
|
|
"learning_rate": 5.53214396746969e-06,
|
|
"loss": 0.2949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31787553429603577,
|
|
"step": 4685,
|
|
"valid_targets_mean": 6043.8,
|
|
"valid_targets_min": 2774
|
|
},
|
|
{
|
|
"epoch": 5.478971962616822,
|
|
"grad_norm": 0.42415490565841074,
|
|
"learning_rate": 5.4919778405308e-06,
|
|
"loss": 0.2578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2648090422153473,
|
|
"step": 4690,
|
|
"valid_targets_mean": 6434.7,
|
|
"valid_targets_min": 3038
|
|
},
|
|
{
|
|
"epoch": 5.484813084112149,
|
|
"grad_norm": 0.44211672929709117,
|
|
"learning_rate": 5.451934839058739e-06,
|
|
"loss": 0.2901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2979629337787628,
|
|
"step": 4695,
|
|
"valid_targets_mean": 5532.4,
|
|
"valid_targets_min": 1751
|
|
},
|
|
{
|
|
"epoch": 5.490654205607477,
|
|
"grad_norm": 0.45246404092781883,
|
|
"learning_rate": 5.412015302887086e-06,
|
|
"loss": 0.2502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25458788871765137,
|
|
"step": 4700,
|
|
"valid_targets_mean": 5978.2,
|
|
"valid_targets_min": 1682
|
|
},
|
|
{
|
|
"epoch": 5.496495327102804,
|
|
"grad_norm": 0.42806712268451436,
|
|
"learning_rate": 5.372219570801618e-06,
|
|
"loss": 0.2627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26325559616088867,
|
|
"step": 4705,
|
|
"valid_targets_mean": 5729.2,
|
|
"valid_targets_min": 1822
|
|
},
|
|
{
|
|
"epoch": 5.502336448598131,
|
|
"grad_norm": 0.41956486507599516,
|
|
"learning_rate": 5.332547980537426e-06,
|
|
"loss": 0.2583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23256827890872955,
|
|
"step": 4710,
|
|
"valid_targets_mean": 5772.6,
|
|
"valid_targets_min": 3156
|
|
},
|
|
{
|
|
"epoch": 5.508177570093458,
|
|
"grad_norm": 0.40164826394390835,
|
|
"learning_rate": 5.2930008687760306e-06,
|
|
"loss": 0.2558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26282739639282227,
|
|
"step": 4715,
|
|
"valid_targets_mean": 5941.2,
|
|
"valid_targets_min": 2209
|
|
},
|
|
{
|
|
"epoch": 5.5140186915887845,
|
|
"grad_norm": 0.40848247460357523,
|
|
"learning_rate": 5.2535785711425525e-06,
|
|
"loss": 0.2543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.247559055685997,
|
|
"step": 4720,
|
|
"valid_targets_mean": 5724.1,
|
|
"valid_targets_min": 2677
|
|
},
|
|
{
|
|
"epoch": 5.519859813084112,
|
|
"grad_norm": 0.4227736506049368,
|
|
"learning_rate": 5.2142814222028355e-06,
|
|
"loss": 0.2673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26744377613067627,
|
|
"step": 4725,
|
|
"valid_targets_mean": 5752.5,
|
|
"valid_targets_min": 1113
|
|
},
|
|
{
|
|
"epoch": 5.525700934579439,
|
|
"grad_norm": 0.4164006892493979,
|
|
"learning_rate": 5.175109755460641e-06,
|
|
"loss": 0.2761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2751937210559845,
|
|
"step": 4730,
|
|
"valid_targets_mean": 6022.4,
|
|
"valid_targets_min": 2245
|
|
},
|
|
{
|
|
"epoch": 5.531542056074766,
|
|
"grad_norm": 0.4411745980001515,
|
|
"learning_rate": 5.136063903354778e-06,
|
|
"loss": 0.2616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26078012585639954,
|
|
"step": 4735,
|
|
"valid_targets_mean": 5570.6,
|
|
"valid_targets_min": 2296
|
|
},
|
|
{
|
|
"epoch": 5.537383177570094,
|
|
"grad_norm": 0.4259849447370609,
|
|
"learning_rate": 5.097144197256316e-06,
|
|
"loss": 0.252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.253182977437973,
|
|
"step": 4740,
|
|
"valid_targets_mean": 5276.5,
|
|
"valid_targets_min": 1426
|
|
},
|
|
{
|
|
"epoch": 5.543224299065421,
|
|
"grad_norm": 0.4238544664927004,
|
|
"learning_rate": 5.058350967465757e-06,
|
|
"loss": 0.269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2661297619342804,
|
|
"step": 4745,
|
|
"valid_targets_mean": 5954.2,
|
|
"valid_targets_min": 1325
|
|
},
|
|
{
|
|
"epoch": 5.549065420560748,
|
|
"grad_norm": 0.44669883735428134,
|
|
"learning_rate": 5.0196845432102394e-06,
|
|
"loss": 0.2741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30163872241973877,
|
|
"step": 4750,
|
|
"valid_targets_mean": 5736.4,
|
|
"valid_targets_min": 1530
|
|
},
|
|
{
|
|
"epoch": 5.554906542056075,
|
|
"grad_norm": 0.41393181154711595,
|
|
"learning_rate": 4.981145252640718e-06,
|
|
"loss": 0.2796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28708192706108093,
|
|
"step": 4755,
|
|
"valid_targets_mean": 7033.6,
|
|
"valid_targets_min": 2186
|
|
},
|
|
{
|
|
"epoch": 5.5607476635514015,
|
|
"grad_norm": 0.4234875640672243,
|
|
"learning_rate": 4.9427334228292245e-06,
|
|
"loss": 0.2732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2930881083011627,
|
|
"step": 4760,
|
|
"valid_targets_mean": 6263.4,
|
|
"valid_targets_min": 1367
|
|
},
|
|
{
|
|
"epoch": 5.566588785046729,
|
|
"grad_norm": 0.47136730803256843,
|
|
"learning_rate": 4.9044493797660605e-06,
|
|
"loss": 0.2821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2794117331504822,
|
|
"step": 4765,
|
|
"valid_targets_mean": 5056.1,
|
|
"valid_targets_min": 2095
|
|
},
|
|
{
|
|
"epoch": 5.572429906542056,
|
|
"grad_norm": 0.42230779533879886,
|
|
"learning_rate": 4.8662934483570314e-06,
|
|
"loss": 0.2623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2908078730106354,
|
|
"step": 4770,
|
|
"valid_targets_mean": 5896.6,
|
|
"valid_targets_min": 1814
|
|
},
|
|
{
|
|
"epoch": 5.578271028037383,
|
|
"grad_norm": 0.4836059321912568,
|
|
"learning_rate": 4.828265952420692e-06,
|
|
"loss": 0.2632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25239965319633484,
|
|
"step": 4775,
|
|
"valid_targets_mean": 4633.9,
|
|
"valid_targets_min": 2506
|
|
},
|
|
{
|
|
"epoch": 5.58411214953271,
|
|
"grad_norm": 0.439358700724636,
|
|
"learning_rate": 4.790367214685614e-06,
|
|
"loss": 0.2727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24691395461559296,
|
|
"step": 4780,
|
|
"valid_targets_mean": 5422.0,
|
|
"valid_targets_min": 1870
|
|
},
|
|
{
|
|
"epoch": 5.589953271028038,
|
|
"grad_norm": 0.42525212335356083,
|
|
"learning_rate": 4.752597556787628e-06,
|
|
"loss": 0.2756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25299638509750366,
|
|
"step": 4785,
|
|
"valid_targets_mean": 5899.2,
|
|
"valid_targets_min": 2624
|
|
},
|
|
{
|
|
"epoch": 5.595794392523365,
|
|
"grad_norm": 0.4380366597363887,
|
|
"learning_rate": 4.714957299267109e-06,
|
|
"loss": 0.2491,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.259724885225296,
|
|
"step": 4790,
|
|
"valid_targets_mean": 5120.7,
|
|
"valid_targets_min": 2442
|
|
},
|
|
{
|
|
"epoch": 5.6016355140186915,
|
|
"grad_norm": 0.40543975636638047,
|
|
"learning_rate": 4.677446761566229e-06,
|
|
"loss": 0.2528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.269046813249588,
|
|
"step": 4795,
|
|
"valid_targets_mean": 5864.2,
|
|
"valid_targets_min": 2715
|
|
},
|
|
{
|
|
"epoch": 5.607476635514018,
|
|
"grad_norm": 0.41740461527535416,
|
|
"learning_rate": 4.640066262026284e-06,
|
|
"loss": 0.2689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2549550235271454,
|
|
"step": 4800,
|
|
"valid_targets_mean": 6245.4,
|
|
"valid_targets_min": 1686
|
|
},
|
|
{
|
|
"epoch": 5.613317757009346,
|
|
"grad_norm": 0.42672614122506614,
|
|
"learning_rate": 4.6028161178849674e-06,
|
|
"loss": 0.2511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25964632630348206,
|
|
"step": 4805,
|
|
"valid_targets_mean": 5310.7,
|
|
"valid_targets_min": 1878
|
|
},
|
|
{
|
|
"epoch": 5.619158878504673,
|
|
"grad_norm": 0.42983016829580734,
|
|
"learning_rate": 4.565696645273672e-06,
|
|
"loss": 0.265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2747996747493744,
|
|
"step": 4810,
|
|
"valid_targets_mean": 5806.2,
|
|
"valid_targets_min": 2943
|
|
},
|
|
{
|
|
"epoch": 5.625,
|
|
"grad_norm": 0.403434116192146,
|
|
"learning_rate": 4.528708159214832e-06,
|
|
"loss": 0.2696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.275636225938797,
|
|
"step": 4815,
|
|
"valid_targets_mean": 6055.8,
|
|
"valid_targets_min": 2305
|
|
},
|
|
{
|
|
"epoch": 5.630841121495327,
|
|
"grad_norm": 0.4709087748119695,
|
|
"learning_rate": 4.491850973619239e-06,
|
|
"loss": 0.2654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27921801805496216,
|
|
"step": 4820,
|
|
"valid_targets_mean": 4983.1,
|
|
"valid_targets_min": 1856
|
|
},
|
|
{
|
|
"epoch": 5.636682242990654,
|
|
"grad_norm": 0.41399488471927987,
|
|
"learning_rate": 4.45512540128336e-06,
|
|
"loss": 0.2606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24737860262393951,
|
|
"step": 4825,
|
|
"valid_targets_mean": 5612.1,
|
|
"valid_targets_min": 2446
|
|
},
|
|
{
|
|
"epoch": 5.642523364485982,
|
|
"grad_norm": 0.44550327822345687,
|
|
"learning_rate": 4.418531753886701e-06,
|
|
"loss": 0.2587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2654818594455719,
|
|
"step": 4830,
|
|
"valid_targets_mean": 5307.3,
|
|
"valid_targets_min": 1413
|
|
},
|
|
{
|
|
"epoch": 5.6483644859813085,
|
|
"grad_norm": 0.40024641497957175,
|
|
"learning_rate": 4.382070341989168e-06,
|
|
"loss": 0.2814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29396870732307434,
|
|
"step": 4835,
|
|
"valid_targets_mean": 6806.0,
|
|
"valid_targets_min": 1923
|
|
},
|
|
{
|
|
"epoch": 5.654205607476635,
|
|
"grad_norm": 0.40116633214813135,
|
|
"learning_rate": 4.3457414750284175e-06,
|
|
"loss": 0.2738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27017083764076233,
|
|
"step": 4840,
|
|
"valid_targets_mean": 6060.1,
|
|
"valid_targets_min": 2600
|
|
},
|
|
{
|
|
"epoch": 5.660046728971962,
|
|
"grad_norm": 1.0029959030138396,
|
|
"learning_rate": 4.309545461317235e-06,
|
|
"loss": 0.2611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2504059374332428,
|
|
"step": 4845,
|
|
"valid_targets_mean": 5332.8,
|
|
"valid_targets_min": 1813
|
|
},
|
|
{
|
|
"epoch": 5.66588785046729,
|
|
"grad_norm": 0.6032905267824268,
|
|
"learning_rate": 4.2734826080409086e-06,
|
|
"loss": 0.2635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25679001212120056,
|
|
"step": 4850,
|
|
"valid_targets_mean": 5245.0,
|
|
"valid_targets_min": 2192
|
|
},
|
|
{
|
|
"epoch": 5.671728971962617,
|
|
"grad_norm": 0.4418602102717248,
|
|
"learning_rate": 4.237553221254645e-06,
|
|
"loss": 0.261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22983582317829132,
|
|
"step": 4855,
|
|
"valid_targets_mean": 5011.5,
|
|
"valid_targets_min": 2415
|
|
},
|
|
{
|
|
"epoch": 5.677570093457944,
|
|
"grad_norm": 0.47633403582267947,
|
|
"learning_rate": 4.201757605880959e-06,
|
|
"loss": 0.2641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.241469606757164,
|
|
"step": 4860,
|
|
"valid_targets_mean": 4981.9,
|
|
"valid_targets_min": 2037
|
|
},
|
|
{
|
|
"epoch": 5.683411214953271,
|
|
"grad_norm": 0.4618199245860873,
|
|
"learning_rate": 4.166096065707068e-06,
|
|
"loss": 0.2521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2260758876800537,
|
|
"step": 4865,
|
|
"valid_targets_mean": 4464.1,
|
|
"valid_targets_min": 2466
|
|
},
|
|
{
|
|
"epoch": 5.6892523364485985,
|
|
"grad_norm": 0.4976550263588077,
|
|
"learning_rate": 4.130568903382355e-06,
|
|
"loss": 0.2566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2779473066329956,
|
|
"step": 4870,
|
|
"valid_targets_mean": 5060.2,
|
|
"valid_targets_min": 2386
|
|
},
|
|
{
|
|
"epoch": 5.695093457943925,
|
|
"grad_norm": 0.4268473310428514,
|
|
"learning_rate": 4.095176420415754e-06,
|
|
"loss": 0.2485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21427325904369354,
|
|
"step": 4875,
|
|
"valid_targets_mean": 5347.3,
|
|
"valid_targets_min": 1170
|
|
},
|
|
{
|
|
"epoch": 5.700934579439252,
|
|
"grad_norm": 0.45307763160655745,
|
|
"learning_rate": 4.059918917173233e-06,
|
|
"loss": 0.2708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27570435404777527,
|
|
"step": 4880,
|
|
"valid_targets_mean": 5245.2,
|
|
"valid_targets_min": 1823
|
|
},
|
|
{
|
|
"epoch": 5.706775700934579,
|
|
"grad_norm": 0.3999801674917073,
|
|
"learning_rate": 4.0247966928752055e-06,
|
|
"loss": 0.2579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2637891471385956,
|
|
"step": 4885,
|
|
"valid_targets_mean": 6047.2,
|
|
"valid_targets_min": 1314
|
|
},
|
|
{
|
|
"epoch": 5.712616822429906,
|
|
"grad_norm": 0.4330483343970451,
|
|
"learning_rate": 3.9898100455940295e-06,
|
|
"loss": 0.2698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2656976878643036,
|
|
"step": 4890,
|
|
"valid_targets_mean": 5881.4,
|
|
"valid_targets_min": 2028
|
|
},
|
|
{
|
|
"epoch": 5.718457943925234,
|
|
"grad_norm": 0.4804701847710103,
|
|
"learning_rate": 3.954959272251448e-06,
|
|
"loss": 0.2626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2590313255786896,
|
|
"step": 4895,
|
|
"valid_targets_mean": 5473.2,
|
|
"valid_targets_min": 3125
|
|
},
|
|
{
|
|
"epoch": 5.724299065420561,
|
|
"grad_norm": 0.4060481224584283,
|
|
"learning_rate": 3.920244668616094e-06,
|
|
"loss": 0.2554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2776969373226166,
|
|
"step": 4900,
|
|
"valid_targets_mean": 5845.6,
|
|
"valid_targets_min": 2574
|
|
},
|
|
{
|
|
"epoch": 5.730140186915888,
|
|
"grad_norm": 0.4194042969777047,
|
|
"learning_rate": 3.88566652930094e-06,
|
|
"loss": 0.2603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2641277611255646,
|
|
"step": 4905,
|
|
"valid_targets_mean": 5684.4,
|
|
"valid_targets_min": 2464
|
|
},
|
|
{
|
|
"epoch": 5.7359813084112155,
|
|
"grad_norm": 0.5129639752002928,
|
|
"learning_rate": 3.851225147760853e-06,
|
|
"loss": 0.2681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2674592435359955,
|
|
"step": 4910,
|
|
"valid_targets_mean": 4676.9,
|
|
"valid_targets_min": 2409
|
|
},
|
|
{
|
|
"epoch": 5.741822429906542,
|
|
"grad_norm": 0.4594146975273755,
|
|
"learning_rate": 3.816920816290066e-06,
|
|
"loss": 0.2672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2520206868648529,
|
|
"step": 4915,
|
|
"valid_targets_mean": 5383.1,
|
|
"valid_targets_min": 2041
|
|
},
|
|
{
|
|
"epoch": 5.747663551401869,
|
|
"grad_norm": 0.40125096155074497,
|
|
"learning_rate": 3.7827538260197007e-06,
|
|
"loss": 0.2674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2182815670967102,
|
|
"step": 4920,
|
|
"valid_targets_mean": 5514.6,
|
|
"valid_targets_min": 1736
|
|
},
|
|
{
|
|
"epoch": 5.753504672897196,
|
|
"grad_norm": 0.42089606434712473,
|
|
"learning_rate": 3.748724466915299e-06,
|
|
"loss": 0.2575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23861844837665558,
|
|
"step": 4925,
|
|
"valid_targets_mean": 5174.5,
|
|
"valid_targets_min": 1903
|
|
},
|
|
{
|
|
"epoch": 5.759345794392523,
|
|
"grad_norm": 0.4294694592990514,
|
|
"learning_rate": 3.714833027774385e-06,
|
|
"loss": 0.2643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2703806459903717,
|
|
"step": 4930,
|
|
"valid_targets_mean": 5935.9,
|
|
"valid_targets_min": 2637
|
|
},
|
|
{
|
|
"epoch": 5.765186915887851,
|
|
"grad_norm": 0.41511739665193403,
|
|
"learning_rate": 3.6810797962239853e-06,
|
|
"loss": 0.2622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29377683997154236,
|
|
"step": 4935,
|
|
"valid_targets_mean": 6019.6,
|
|
"valid_targets_min": 2158
|
|
},
|
|
{
|
|
"epoch": 5.771028037383178,
|
|
"grad_norm": 0.4362376135201013,
|
|
"learning_rate": 3.647465058718187e-06,
|
|
"loss": 0.2739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2443886399269104,
|
|
"step": 4940,
|
|
"valid_targets_mean": 5474.2,
|
|
"valid_targets_min": 2199
|
|
},
|
|
{
|
|
"epoch": 5.776869158878505,
|
|
"grad_norm": 0.4248915231170928,
|
|
"learning_rate": 3.6139891005357376e-06,
|
|
"loss": 0.2525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2535012662410736,
|
|
"step": 4945,
|
|
"valid_targets_mean": 5822.9,
|
|
"valid_targets_min": 1465
|
|
},
|
|
{
|
|
"epoch": 5.7827102803738315,
|
|
"grad_norm": 0.3956227323228594,
|
|
"learning_rate": 3.580652205777588e-06,
|
|
"loss": 0.2534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24253308773040771,
|
|
"step": 4950,
|
|
"valid_targets_mean": 5870.9,
|
|
"valid_targets_min": 1625
|
|
},
|
|
{
|
|
"epoch": 5.788551401869158,
|
|
"grad_norm": 0.3660559219141033,
|
|
"learning_rate": 3.547454657364513e-06,
|
|
"loss": 0.2626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26675376296043396,
|
|
"step": 4955,
|
|
"valid_targets_mean": 7073.1,
|
|
"valid_targets_min": 3174
|
|
},
|
|
{
|
|
"epoch": 5.794392523364486,
|
|
"grad_norm": 0.4210735613251997,
|
|
"learning_rate": 3.5143967370346734e-06,
|
|
"loss": 0.2577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28480228781700134,
|
|
"step": 4960,
|
|
"valid_targets_mean": 6483.1,
|
|
"valid_targets_min": 1875
|
|
},
|
|
{
|
|
"epoch": 5.800233644859813,
|
|
"grad_norm": 0.424941136801449,
|
|
"learning_rate": 3.481478725341261e-06,
|
|
"loss": 0.268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2671777606010437,
|
|
"step": 4965,
|
|
"valid_targets_mean": 6003.1,
|
|
"valid_targets_min": 3349
|
|
},
|
|
{
|
|
"epoch": 5.80607476635514,
|
|
"grad_norm": 0.46100159338353386,
|
|
"learning_rate": 3.448700901650104e-06,
|
|
"loss": 0.2433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2870350480079651,
|
|
"step": 4970,
|
|
"valid_targets_mean": 6179.1,
|
|
"valid_targets_min": 1972
|
|
},
|
|
{
|
|
"epoch": 5.811915887850468,
|
|
"grad_norm": 0.4343925194559382,
|
|
"learning_rate": 3.4160635441372868e-06,
|
|
"loss": 0.2681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24074487388134003,
|
|
"step": 4975,
|
|
"valid_targets_mean": 5091.2,
|
|
"valid_targets_min": 1710
|
|
},
|
|
{
|
|
"epoch": 5.817757009345795,
|
|
"grad_norm": 0.416632563446703,
|
|
"learning_rate": 3.383566929786797e-06,
|
|
"loss": 0.2615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28673669695854187,
|
|
"step": 4980,
|
|
"valid_targets_mean": 6012.3,
|
|
"valid_targets_min": 2941
|
|
},
|
|
{
|
|
"epoch": 5.8235981308411215,
|
|
"grad_norm": 0.44248771682531135,
|
|
"learning_rate": 3.3512113343881804e-06,
|
|
"loss": 0.2423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2531958520412445,
|
|
"step": 4985,
|
|
"valid_targets_mean": 5624.2,
|
|
"valid_targets_min": 1850
|
|
},
|
|
{
|
|
"epoch": 5.829439252336448,
|
|
"grad_norm": 0.4043008486776463,
|
|
"learning_rate": 3.3189970325341967e-06,
|
|
"loss": 0.2695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.261023610830307,
|
|
"step": 4990,
|
|
"valid_targets_mean": 5460.7,
|
|
"valid_targets_min": 2146
|
|
},
|
|
{
|
|
"epoch": 5.835280373831775,
|
|
"grad_norm": 0.45790573229262044,
|
|
"learning_rate": 3.286924297618477e-06,
|
|
"loss": 0.2804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.285582572221756,
|
|
"step": 4995,
|
|
"valid_targets_mean": 5280.8,
|
|
"valid_targets_min": 2326
|
|
},
|
|
{
|
|
"epoch": 5.841121495327103,
|
|
"grad_norm": 0.559306376973599,
|
|
"learning_rate": 3.254993401833224e-06,
|
|
"loss": 0.2448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24659843742847443,
|
|
"step": 5000,
|
|
"valid_targets_mean": 4774.9,
|
|
"valid_targets_min": 2153
|
|
},
|
|
{
|
|
"epoch": 5.84696261682243,
|
|
"grad_norm": 0.43595152802143494,
|
|
"learning_rate": 3.2232046161668885e-06,
|
|
"loss": 0.264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2534125745296478,
|
|
"step": 5005,
|
|
"valid_targets_mean": 5147.5,
|
|
"valid_targets_min": 1666
|
|
},
|
|
{
|
|
"epoch": 5.852803738317757,
|
|
"grad_norm": 0.5655597480193508,
|
|
"learning_rate": 3.1915582104018793e-06,
|
|
"loss": 0.2706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3168410658836365,
|
|
"step": 5010,
|
|
"valid_targets_mean": 5970.3,
|
|
"valid_targets_min": 2645
|
|
},
|
|
{
|
|
"epoch": 5.858644859813084,
|
|
"grad_norm": 0.43902798894826256,
|
|
"learning_rate": 3.1600544531122512e-06,
|
|
"loss": 0.2721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25323641300201416,
|
|
"step": 5015,
|
|
"valid_targets_mean": 5464.7,
|
|
"valid_targets_min": 2464
|
|
},
|
|
{
|
|
"epoch": 5.864485981308412,
|
|
"grad_norm": 0.4194385130959453,
|
|
"learning_rate": 3.1286936116614574e-06,
|
|
"loss": 0.286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.284280925989151,
|
|
"step": 5020,
|
|
"valid_targets_mean": 6541.0,
|
|
"valid_targets_min": 2124
|
|
},
|
|
{
|
|
"epoch": 5.8703271028037385,
|
|
"grad_norm": 0.42636554761373385,
|
|
"learning_rate": 3.097475952200055e-06,
|
|
"loss": 0.2537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24875234067440033,
|
|
"step": 5025,
|
|
"valid_targets_mean": 5169.9,
|
|
"valid_targets_min": 1915
|
|
},
|
|
{
|
|
"epoch": 5.876168224299065,
|
|
"grad_norm": 0.4968054881313789,
|
|
"learning_rate": 3.0664017396634695e-06,
|
|
"loss": 0.288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.290635883808136,
|
|
"step": 5030,
|
|
"valid_targets_mean": 5707.2,
|
|
"valid_targets_min": 2531
|
|
},
|
|
{
|
|
"epoch": 5.882009345794392,
|
|
"grad_norm": 0.42840634767690183,
|
|
"learning_rate": 3.0354712377697117e-06,
|
|
"loss": 0.2588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27921369671821594,
|
|
"step": 5035,
|
|
"valid_targets_mean": 5518.5,
|
|
"valid_targets_min": 2243
|
|
},
|
|
{
|
|
"epoch": 5.88785046728972,
|
|
"grad_norm": 0.44318553783503895,
|
|
"learning_rate": 3.0046847090171784e-06,
|
|
"loss": 0.2547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23435504734516144,
|
|
"step": 5040,
|
|
"valid_targets_mean": 5645.2,
|
|
"valid_targets_min": 2253
|
|
},
|
|
{
|
|
"epoch": 5.893691588785047,
|
|
"grad_norm": 0.4089761481301967,
|
|
"learning_rate": 2.9740424146824055e-06,
|
|
"loss": 0.2753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2663430869579315,
|
|
"step": 5045,
|
|
"valid_targets_mean": 5931.8,
|
|
"valid_targets_min": 2240
|
|
},
|
|
{
|
|
"epoch": 5.899532710280374,
|
|
"grad_norm": 0.4546566903701525,
|
|
"learning_rate": 2.9435446148178502e-06,
|
|
"loss": 0.2656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27404436469078064,
|
|
"step": 5050,
|
|
"valid_targets_mean": 5395.3,
|
|
"valid_targets_min": 1905
|
|
},
|
|
{
|
|
"epoch": 5.905373831775701,
|
|
"grad_norm": 0.4400351999567633,
|
|
"learning_rate": 2.913191568249678e-06,
|
|
"loss": 0.2702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23482485115528107,
|
|
"step": 5055,
|
|
"valid_targets_mean": 5155.9,
|
|
"valid_targets_min": 2026
|
|
},
|
|
{
|
|
"epoch": 5.911214953271028,
|
|
"grad_norm": 0.44878922843671526,
|
|
"learning_rate": 2.882983532575585e-06,
|
|
"loss": 0.2694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2808745205402374,
|
|
"step": 5060,
|
|
"valid_targets_mean": 5036.4,
|
|
"valid_targets_min": 1689
|
|
},
|
|
{
|
|
"epoch": 5.917056074766355,
|
|
"grad_norm": 0.4584108282725527,
|
|
"learning_rate": 2.8529207641626013e-06,
|
|
"loss": 0.2663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27109354734420776,
|
|
"step": 5065,
|
|
"valid_targets_mean": 5329.5,
|
|
"valid_targets_min": 1655
|
|
},
|
|
{
|
|
"epoch": 5.922897196261682,
|
|
"grad_norm": 0.39203588777262127,
|
|
"learning_rate": 2.8230035181449068e-06,
|
|
"loss": 0.2692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.254946231842041,
|
|
"step": 5070,
|
|
"valid_targets_mean": 6408.0,
|
|
"valid_targets_min": 2178
|
|
},
|
|
{
|
|
"epoch": 5.928738317757009,
|
|
"grad_norm": 0.3949060922967789,
|
|
"learning_rate": 2.7932320484216746e-06,
|
|
"loss": 0.2704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23990599811077118,
|
|
"step": 5075,
|
|
"valid_targets_mean": 6081.7,
|
|
"valid_targets_min": 2291
|
|
},
|
|
{
|
|
"epoch": 5.934579439252336,
|
|
"grad_norm": 0.3925798715877199,
|
|
"learning_rate": 2.7636066076549204e-06,
|
|
"loss": 0.2723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26905086636543274,
|
|
"step": 5080,
|
|
"valid_targets_mean": 6639.8,
|
|
"valid_targets_min": 2786
|
|
},
|
|
{
|
|
"epoch": 5.940420560747664,
|
|
"grad_norm": 0.48880602496852127,
|
|
"learning_rate": 2.734127447267363e-06,
|
|
"loss": 0.2741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2491501420736313,
|
|
"step": 5085,
|
|
"valid_targets_mean": 5161.7,
|
|
"valid_targets_min": 2213
|
|
},
|
|
{
|
|
"epoch": 5.946261682242991,
|
|
"grad_norm": 0.47195638795181344,
|
|
"learning_rate": 2.7047948174402548e-06,
|
|
"loss": 0.2738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27451005578041077,
|
|
"step": 5090,
|
|
"valid_targets_mean": 4783.4,
|
|
"valid_targets_min": 1839
|
|
},
|
|
{
|
|
"epoch": 5.952102803738318,
|
|
"grad_norm": 0.40620840681443987,
|
|
"learning_rate": 2.675608967111312e-06,
|
|
"loss": 0.2544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2466089278459549,
|
|
"step": 5095,
|
|
"valid_targets_mean": 5807.5,
|
|
"valid_targets_min": 1637
|
|
},
|
|
{
|
|
"epoch": 5.957943925233645,
|
|
"grad_norm": 0.4441938299724838,
|
|
"learning_rate": 2.6465701439725577e-06,
|
|
"loss": 0.2607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2778709828853607,
|
|
"step": 5100,
|
|
"valid_targets_mean": 5596.7,
|
|
"valid_targets_min": 2605
|
|
},
|
|
{
|
|
"epoch": 5.963785046728972,
|
|
"grad_norm": 0.4522457180124922,
|
|
"learning_rate": 2.6176785944682513e-06,
|
|
"loss": 0.2567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23234021663665771,
|
|
"step": 5105,
|
|
"valid_targets_mean": 4881.4,
|
|
"valid_targets_min": 1409
|
|
},
|
|
{
|
|
"epoch": 5.969626168224299,
|
|
"grad_norm": 0.4824798642636127,
|
|
"learning_rate": 2.5889345637927644e-06,
|
|
"loss": 0.2647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29285183548927307,
|
|
"step": 5110,
|
|
"valid_targets_mean": 5174.1,
|
|
"valid_targets_min": 1782
|
|
},
|
|
{
|
|
"epoch": 5.975467289719626,
|
|
"grad_norm": 0.4047514161587277,
|
|
"learning_rate": 2.5603382958885316e-06,
|
|
"loss": 0.2666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24747729301452637,
|
|
"step": 5115,
|
|
"valid_targets_mean": 6507.6,
|
|
"valid_targets_min": 2060
|
|
},
|
|
{
|
|
"epoch": 5.981308411214953,
|
|
"grad_norm": 0.46544278126760263,
|
|
"learning_rate": 2.531890033443967e-06,
|
|
"loss": 0.2712,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2592564523220062,
|
|
"step": 5120,
|
|
"valid_targets_mean": 6237.1,
|
|
"valid_targets_min": 2239
|
|
},
|
|
{
|
|
"epoch": 5.98714953271028,
|
|
"grad_norm": 0.609889407906906,
|
|
"learning_rate": 2.5035900178914e-06,
|
|
"loss": 0.2653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24389086663722992,
|
|
"step": 5125,
|
|
"valid_targets_mean": 5371.2,
|
|
"valid_targets_min": 1799
|
|
},
|
|
{
|
|
"epoch": 5.992990654205608,
|
|
"grad_norm": 0.40910971908828375,
|
|
"learning_rate": 2.475438489405022e-06,
|
|
"loss": 0.2685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2672082483768463,
|
|
"step": 5130,
|
|
"valid_targets_mean": 6124.1,
|
|
"valid_targets_min": 2672
|
|
},
|
|
{
|
|
"epoch": 5.998831775700935,
|
|
"grad_norm": 0.40210696325112216,
|
|
"learning_rate": 2.4474356868988757e-06,
|
|
"loss": 0.284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30484625697135925,
|
|
"step": 5135,
|
|
"valid_targets_mean": 6825.3,
|
|
"valid_targets_min": 2361
|
|
},
|
|
{
|
|
"epoch": 6.0046728971962615,
|
|
"grad_norm": 0.4521110597106376,
|
|
"learning_rate": 2.419581848024799e-06,
|
|
"loss": 0.2396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30943915247917175,
|
|
"step": 5140,
|
|
"valid_targets_mean": 6038.5,
|
|
"valid_targets_min": 2148
|
|
},
|
|
{
|
|
"epoch": 6.010514018691588,
|
|
"grad_norm": 0.46779056179486517,
|
|
"learning_rate": 2.39187720917041e-06,
|
|
"loss": 0.2571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23215711116790771,
|
|
"step": 5145,
|
|
"valid_targets_mean": 5412.8,
|
|
"valid_targets_min": 2552
|
|
},
|
|
{
|
|
"epoch": 6.016355140186916,
|
|
"grad_norm": 0.4184933007754886,
|
|
"learning_rate": 2.3643220054571226e-06,
|
|
"loss": 0.265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25030407309532166,
|
|
"step": 5150,
|
|
"valid_targets_mean": 5629.0,
|
|
"valid_targets_min": 1811
|
|
},
|
|
{
|
|
"epoch": 6.022196261682243,
|
|
"grad_norm": 0.42769463427153154,
|
|
"learning_rate": 2.3369164707381285e-06,
|
|
"loss": 0.2707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2737978994846344,
|
|
"step": 5155,
|
|
"valid_targets_mean": 5894.8,
|
|
"valid_targets_min": 1826
|
|
},
|
|
{
|
|
"epoch": 6.02803738317757,
|
|
"grad_norm": 0.4312368352820477,
|
|
"learning_rate": 2.3096608375964304e-06,
|
|
"loss": 0.2725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29464882612228394,
|
|
"step": 5160,
|
|
"valid_targets_mean": 5701.1,
|
|
"valid_targets_min": 2599
|
|
},
|
|
{
|
|
"epoch": 6.033878504672897,
|
|
"grad_norm": 0.44715222719353126,
|
|
"learning_rate": 2.28255533734284e-06,
|
|
"loss": 0.2674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22759544849395752,
|
|
"step": 5165,
|
|
"valid_targets_mean": 5880.9,
|
|
"valid_targets_min": 1820
|
|
},
|
|
{
|
|
"epoch": 6.039719626168225,
|
|
"grad_norm": 0.48651365757257625,
|
|
"learning_rate": 2.2556002000140564e-06,
|
|
"loss": 0.244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2393018752336502,
|
|
"step": 5170,
|
|
"valid_targets_mean": 4878.8,
|
|
"valid_targets_min": 2310
|
|
},
|
|
{
|
|
"epoch": 6.045560747663552,
|
|
"grad_norm": 0.4543564828830646,
|
|
"learning_rate": 2.2287956543706725e-06,
|
|
"loss": 0.2731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28125330805778503,
|
|
"step": 5175,
|
|
"valid_targets_mean": 5853.6,
|
|
"valid_targets_min": 1894
|
|
},
|
|
{
|
|
"epoch": 6.0514018691588785,
|
|
"grad_norm": 0.4019423602801627,
|
|
"learning_rate": 2.202141927895267e-06,
|
|
"loss": 0.2532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2439814805984497,
|
|
"step": 5180,
|
|
"valid_targets_mean": 5883.3,
|
|
"valid_targets_min": 2205
|
|
},
|
|
{
|
|
"epoch": 6.057242990654205,
|
|
"grad_norm": 0.41152561912789926,
|
|
"learning_rate": 2.175639246790444e-06,
|
|
"loss": 0.2684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3023858368396759,
|
|
"step": 5185,
|
|
"valid_targets_mean": 6530.2,
|
|
"valid_targets_min": 1050
|
|
},
|
|
{
|
|
"epoch": 6.063084112149533,
|
|
"grad_norm": 0.4790645753271405,
|
|
"learning_rate": 2.149287835976943e-06,
|
|
"loss": 0.2632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2515408992767334,
|
|
"step": 5190,
|
|
"valid_targets_mean": 5818.6,
|
|
"valid_targets_min": 2573
|
|
},
|
|
{
|
|
"epoch": 6.06892523364486,
|
|
"grad_norm": 0.3931190684367785,
|
|
"learning_rate": 2.1230879190917108e-06,
|
|
"loss": 0.2732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2579682171344757,
|
|
"step": 5195,
|
|
"valid_targets_mean": 6116.6,
|
|
"valid_targets_min": 2180
|
|
},
|
|
{
|
|
"epoch": 6.074766355140187,
|
|
"grad_norm": 0.4310966826587432,
|
|
"learning_rate": 2.0970397184860003e-06,
|
|
"loss": 0.2688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24607686698436737,
|
|
"step": 5200,
|
|
"valid_targets_mean": 6105.3,
|
|
"valid_targets_min": 2609
|
|
},
|
|
{
|
|
"epoch": 6.080607476635514,
|
|
"grad_norm": 0.49073540489854567,
|
|
"learning_rate": 2.0711434552235012e-06,
|
|
"loss": 0.2627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25251638889312744,
|
|
"step": 5205,
|
|
"valid_targets_mean": 5165.5,
|
|
"valid_targets_min": 1648
|
|
},
|
|
{
|
|
"epoch": 6.086448598130841,
|
|
"grad_norm": 0.46287238594320634,
|
|
"learning_rate": 2.0453993490784518e-06,
|
|
"loss": 0.2651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3165760934352875,
|
|
"step": 5210,
|
|
"valid_targets_mean": 5373.2,
|
|
"valid_targets_min": 1774
|
|
},
|
|
{
|
|
"epoch": 6.0922897196261685,
|
|
"grad_norm": 0.3857527984356534,
|
|
"learning_rate": 2.01980761853378e-06,
|
|
"loss": 0.2542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24172580242156982,
|
|
"step": 5215,
|
|
"valid_targets_mean": 6157.5,
|
|
"valid_targets_min": 2552
|
|
},
|
|
{
|
|
"epoch": 6.098130841121495,
|
|
"grad_norm": 0.4184489787402962,
|
|
"learning_rate": 1.9943684807792275e-06,
|
|
"loss": 0.2793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2733888030052185,
|
|
"step": 5220,
|
|
"valid_targets_mean": 5978.0,
|
|
"valid_targets_min": 2689
|
|
},
|
|
{
|
|
"epoch": 6.103971962616822,
|
|
"grad_norm": 0.4223434653694505,
|
|
"learning_rate": 1.9690821517095448e-06,
|
|
"loss": 0.2483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25891411304473877,
|
|
"step": 5225,
|
|
"valid_targets_mean": 5751.5,
|
|
"valid_targets_min": 1896
|
|
},
|
|
{
|
|
"epoch": 6.109813084112149,
|
|
"grad_norm": 0.387812463794055,
|
|
"learning_rate": 1.9439488459226296e-06,
|
|
"loss": 0.2673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2819809913635254,
|
|
"step": 5230,
|
|
"valid_targets_mean": 6700.1,
|
|
"valid_targets_min": 1248
|
|
},
|
|
{
|
|
"epoch": 6.115654205607477,
|
|
"grad_norm": 0.5078844539179485,
|
|
"learning_rate": 1.9189687767177177e-06,
|
|
"loss": 0.2642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20685899257659912,
|
|
"step": 5235,
|
|
"valid_targets_mean": 4419.7,
|
|
"valid_targets_min": 1662
|
|
},
|
|
{
|
|
"epoch": 6.121495327102804,
|
|
"grad_norm": 0.41878188536875044,
|
|
"learning_rate": 1.8941421560935635e-06,
|
|
"loss": 0.2576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2704790532588959,
|
|
"step": 5240,
|
|
"valid_targets_mean": 5946.9,
|
|
"valid_targets_min": 1600
|
|
},
|
|
{
|
|
"epoch": 6.127336448598131,
|
|
"grad_norm": 0.4131987579433658,
|
|
"learning_rate": 1.8694691947466514e-06,
|
|
"loss": 0.2644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23708589375019073,
|
|
"step": 5245,
|
|
"valid_targets_mean": 5441.6,
|
|
"valid_targets_min": 1730
|
|
},
|
|
{
|
|
"epoch": 6.133177570093458,
|
|
"grad_norm": 0.40239055990186645,
|
|
"learning_rate": 1.8449501020694071e-06,
|
|
"loss": 0.2715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2920968532562256,
|
|
"step": 5250,
|
|
"valid_targets_mean": 6210.5,
|
|
"valid_targets_min": 2193
|
|
},
|
|
{
|
|
"epoch": 6.139018691588785,
|
|
"grad_norm": 0.5487853385417065,
|
|
"learning_rate": 1.820585086148403e-06,
|
|
"loss": 0.2935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29845932126045227,
|
|
"step": 5255,
|
|
"valid_targets_mean": 5557.0,
|
|
"valid_targets_min": 2665
|
|
},
|
|
{
|
|
"epoch": 6.144859813084112,
|
|
"grad_norm": 0.44919752708163324,
|
|
"learning_rate": 1.7963743537626222e-06,
|
|
"loss": 0.2619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2963375449180603,
|
|
"step": 5260,
|
|
"valid_targets_mean": 6318.5,
|
|
"valid_targets_min": 2338
|
|
},
|
|
{
|
|
"epoch": 6.150700934579439,
|
|
"grad_norm": 0.42101024639208723,
|
|
"learning_rate": 1.772318110381681e-06,
|
|
"loss": 0.2709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26524022221565247,
|
|
"step": 5265,
|
|
"valid_targets_mean": 5776.9,
|
|
"valid_targets_min": 2560
|
|
},
|
|
{
|
|
"epoch": 6.156542056074766,
|
|
"grad_norm": 0.428741040209304,
|
|
"learning_rate": 1.7484165601640813e-06,
|
|
"loss": 0.2799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26596641540527344,
|
|
"step": 5270,
|
|
"valid_targets_mean": 5510.0,
|
|
"valid_targets_min": 2404
|
|
},
|
|
{
|
|
"epoch": 6.162383177570094,
|
|
"grad_norm": 0.4383381003277258,
|
|
"learning_rate": 1.724669905955505e-06,
|
|
"loss": 0.2566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2953561246395111,
|
|
"step": 5275,
|
|
"valid_targets_mean": 5578.7,
|
|
"valid_targets_min": 1073
|
|
},
|
|
{
|
|
"epoch": 6.168224299065421,
|
|
"grad_norm": 0.4352589529848281,
|
|
"learning_rate": 1.701078349287062e-06,
|
|
"loss": 0.2506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23372475802898407,
|
|
"step": 5280,
|
|
"valid_targets_mean": 5393.5,
|
|
"valid_targets_min": 1330
|
|
},
|
|
{
|
|
"epoch": 6.174065420560748,
|
|
"grad_norm": 0.372702518853426,
|
|
"learning_rate": 1.6776420903736013e-06,
|
|
"loss": 0.2747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2500326931476593,
|
|
"step": 5285,
|
|
"valid_targets_mean": 6528.0,
|
|
"valid_targets_min": 1985
|
|
},
|
|
{
|
|
"epoch": 6.179906542056075,
|
|
"grad_norm": 0.4331926516781692,
|
|
"learning_rate": 1.654361328112002e-06,
|
|
"loss": 0.2562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2493422031402588,
|
|
"step": 5290,
|
|
"valid_targets_mean": 5629.5,
|
|
"valid_targets_min": 1720
|
|
},
|
|
{
|
|
"epoch": 6.1857476635514015,
|
|
"grad_norm": 0.4089960171494385,
|
|
"learning_rate": 1.6312362600794828e-06,
|
|
"loss": 0.2779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26634353399276733,
|
|
"step": 5295,
|
|
"valid_targets_mean": 5811.6,
|
|
"valid_targets_min": 1536
|
|
},
|
|
{
|
|
"epoch": 6.191588785046729,
|
|
"grad_norm": 0.42296365212052706,
|
|
"learning_rate": 1.6082670825319358e-06,
|
|
"loss": 0.2517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24992568790912628,
|
|
"step": 5300,
|
|
"valid_targets_mean": 5786.8,
|
|
"valid_targets_min": 2715
|
|
},
|
|
{
|
|
"epoch": 6.197429906542056,
|
|
"grad_norm": 0.5605606058494581,
|
|
"learning_rate": 1.585453990402257e-06,
|
|
"loss": 0.2632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.263784259557724,
|
|
"step": 5305,
|
|
"valid_targets_mean": 5812.1,
|
|
"valid_targets_min": 2559
|
|
},
|
|
{
|
|
"epoch": 6.203271028037383,
|
|
"grad_norm": 0.3933633671902831,
|
|
"learning_rate": 1.562797177298676e-06,
|
|
"loss": 0.2667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2653537392616272,
|
|
"step": 5310,
|
|
"valid_targets_mean": 6201.0,
|
|
"valid_targets_min": 1924
|
|
},
|
|
{
|
|
"epoch": 6.20911214953271,
|
|
"grad_norm": 0.4605486551240352,
|
|
"learning_rate": 1.5402968355031434e-06,
|
|
"loss": 0.2629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27782005071640015,
|
|
"step": 5315,
|
|
"valid_targets_mean": 5858.0,
|
|
"valid_targets_min": 1459
|
|
},
|
|
{
|
|
"epoch": 6.214953271028038,
|
|
"grad_norm": 0.4087166450390765,
|
|
"learning_rate": 1.5179531559696693e-06,
|
|
"loss": 0.2709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26611506938934326,
|
|
"step": 5320,
|
|
"valid_targets_mean": 5434.8,
|
|
"valid_targets_min": 1639
|
|
},
|
|
{
|
|
"epoch": 6.220794392523365,
|
|
"grad_norm": 0.41194166491312645,
|
|
"learning_rate": 1.4957663283227231e-06,
|
|
"loss": 0.2823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2839483916759491,
|
|
"step": 5325,
|
|
"valid_targets_mean": 5991.7,
|
|
"valid_targets_min": 2899
|
|
},
|
|
{
|
|
"epoch": 6.2266355140186915,
|
|
"grad_norm": 0.43499211742338745,
|
|
"learning_rate": 1.4737365408556193e-06,
|
|
"loss": 0.2716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2314544916152954,
|
|
"step": 5330,
|
|
"valid_targets_mean": 5302.3,
|
|
"valid_targets_min": 1788
|
|
},
|
|
{
|
|
"epoch": 6.232476635514018,
|
|
"grad_norm": 0.4277040010563867,
|
|
"learning_rate": 1.4518639805289026e-06,
|
|
"loss": 0.2919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.334431529045105,
|
|
"step": 5335,
|
|
"valid_targets_mean": 7185.9,
|
|
"valid_targets_min": 2335
|
|
},
|
|
{
|
|
"epoch": 6.238317757009346,
|
|
"grad_norm": 0.4239667564677219,
|
|
"learning_rate": 1.4301488329687918e-06,
|
|
"loss": 0.2644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22421784698963165,
|
|
"step": 5340,
|
|
"valid_targets_mean": 5560.8,
|
|
"valid_targets_min": 1700
|
|
},
|
|
{
|
|
"epoch": 6.244158878504673,
|
|
"grad_norm": 0.38859622719257375,
|
|
"learning_rate": 1.4085912824655834e-06,
|
|
"loss": 0.2738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21340501308441162,
|
|
"step": 5345,
|
|
"valid_targets_mean": 5933.6,
|
|
"valid_targets_min": 2644
|
|
},
|
|
{
|
|
"epoch": 6.25,
|
|
"grad_norm": 0.4239949555026895,
|
|
"learning_rate": 1.3871915119720836e-06,
|
|
"loss": 0.2823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28783032298088074,
|
|
"step": 5350,
|
|
"valid_targets_mean": 6055.2,
|
|
"valid_targets_min": 2861
|
|
},
|
|
{
|
|
"epoch": 6.255841121495327,
|
|
"grad_norm": 0.43303373485847035,
|
|
"learning_rate": 1.3659497031020763e-06,
|
|
"loss": 0.258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2582138478755951,
|
|
"step": 5355,
|
|
"valid_targets_mean": 5350.6,
|
|
"valid_targets_min": 2045
|
|
},
|
|
{
|
|
"epoch": 6.261682242990654,
|
|
"grad_norm": 0.4231912505359721,
|
|
"learning_rate": 1.3448660361287647e-06,
|
|
"loss": 0.2753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24362236261367798,
|
|
"step": 5360,
|
|
"valid_targets_mean": 5280.0,
|
|
"valid_targets_min": 2336
|
|
},
|
|
{
|
|
"epoch": 6.267523364485982,
|
|
"grad_norm": 0.42964211280894804,
|
|
"learning_rate": 1.3239406899832518e-06,
|
|
"loss": 0.2838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3035826086997986,
|
|
"step": 5365,
|
|
"valid_targets_mean": 6373.6,
|
|
"valid_targets_min": 1060
|
|
},
|
|
{
|
|
"epoch": 6.2733644859813085,
|
|
"grad_norm": 0.4399084742581887,
|
|
"learning_rate": 1.3031738422530027e-06,
|
|
"loss": 0.2662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25916388630867004,
|
|
"step": 5370,
|
|
"valid_targets_mean": 4969.4,
|
|
"valid_targets_min": 2797
|
|
},
|
|
{
|
|
"epoch": 6.279205607476635,
|
|
"grad_norm": 0.3913066930694842,
|
|
"learning_rate": 1.2825656691803622e-06,
|
|
"loss": 0.2612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2434965819120407,
|
|
"step": 5375,
|
|
"valid_targets_mean": 5805.6,
|
|
"valid_targets_min": 2058
|
|
},
|
|
{
|
|
"epoch": 6.285046728971962,
|
|
"grad_norm": 0.4405066662240576,
|
|
"learning_rate": 1.2621163456610508e-06,
|
|
"loss": 0.2592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26449039578437805,
|
|
"step": 5380,
|
|
"valid_targets_mean": 5726.7,
|
|
"valid_targets_min": 2114
|
|
},
|
|
{
|
|
"epoch": 6.29088785046729,
|
|
"grad_norm": 0.451605056226235,
|
|
"learning_rate": 1.2418260452426756e-06,
|
|
"loss": 0.2511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23515813052654266,
|
|
"step": 5385,
|
|
"valid_targets_mean": 4700.1,
|
|
"valid_targets_min": 1331
|
|
},
|
|
{
|
|
"epoch": 6.296728971962617,
|
|
"grad_norm": 0.4073389853937397,
|
|
"learning_rate": 1.2216949401232526e-06,
|
|
"loss": 0.2583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2655232846736908,
|
|
"step": 5390,
|
|
"valid_targets_mean": 5771.9,
|
|
"valid_targets_min": 2328
|
|
},
|
|
{
|
|
"epoch": 6.302570093457944,
|
|
"grad_norm": 0.4535748701357271,
|
|
"learning_rate": 1.2017232011497604e-06,
|
|
"loss": 0.2648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2899724543094635,
|
|
"step": 5395,
|
|
"valid_targets_mean": 5605.3,
|
|
"valid_targets_min": 1780
|
|
},
|
|
{
|
|
"epoch": 6.308411214953271,
|
|
"grad_norm": 0.49682659277635977,
|
|
"learning_rate": 1.1819109978166821e-06,
|
|
"loss": 0.2636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28811997175216675,
|
|
"step": 5400,
|
|
"valid_targets_mean": 4941.7,
|
|
"valid_targets_min": 2316
|
|
},
|
|
{
|
|
"epoch": 6.3142523364485985,
|
|
"grad_norm": 0.6005332464615274,
|
|
"learning_rate": 1.162258498264559e-06,
|
|
"loss": 0.2574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2394605278968811,
|
|
"step": 5405,
|
|
"valid_targets_mean": 5731.2,
|
|
"valid_targets_min": 1059
|
|
},
|
|
{
|
|
"epoch": 6.320093457943925,
|
|
"grad_norm": 0.48301401499869573,
|
|
"learning_rate": 1.1427658692785814e-06,
|
|
"loss": 0.2699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3031206429004669,
|
|
"step": 5410,
|
|
"valid_targets_mean": 4822.8,
|
|
"valid_targets_min": 2288
|
|
},
|
|
{
|
|
"epoch": 6.325934579439252,
|
|
"grad_norm": 0.42143444781989364,
|
|
"learning_rate": 1.1234332762871625e-06,
|
|
"loss": 0.247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24695651233196259,
|
|
"step": 5415,
|
|
"valid_targets_mean": 5938.7,
|
|
"valid_targets_min": 2439
|
|
},
|
|
{
|
|
"epoch": 6.331775700934579,
|
|
"grad_norm": 0.43804941278454473,
|
|
"learning_rate": 1.1042608833605284e-06,
|
|
"loss": 0.2706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28811928629875183,
|
|
"step": 5420,
|
|
"valid_targets_mean": 5351.2,
|
|
"valid_targets_min": 1293
|
|
},
|
|
{
|
|
"epoch": 6.337616822429906,
|
|
"grad_norm": 0.46181218850131955,
|
|
"learning_rate": 1.0852488532093352e-06,
|
|
"loss": 0.2549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2105894535779953,
|
|
"step": 5425,
|
|
"valid_targets_mean": 4789.8,
|
|
"valid_targets_min": 1154
|
|
},
|
|
{
|
|
"epoch": 6.343457943925234,
|
|
"grad_norm": 0.4470724851094692,
|
|
"learning_rate": 1.0663973471832923e-06,
|
|
"loss": 0.2668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2820603847503662,
|
|
"step": 5430,
|
|
"valid_targets_mean": 5700.8,
|
|
"valid_targets_min": 1011
|
|
},
|
|
{
|
|
"epoch": 6.349299065420561,
|
|
"grad_norm": 0.47950411254007624,
|
|
"learning_rate": 1.0477065252697783e-06,
|
|
"loss": 0.259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23679699003696442,
|
|
"step": 5435,
|
|
"valid_targets_mean": 5196.8,
|
|
"valid_targets_min": 2416
|
|
},
|
|
{
|
|
"epoch": 6.355140186915888,
|
|
"grad_norm": 0.48754813167767713,
|
|
"learning_rate": 1.0291765460924963e-06,
|
|
"loss": 0.2645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2585705518722534,
|
|
"step": 5440,
|
|
"valid_targets_mean": 5107.1,
|
|
"valid_targets_min": 2401
|
|
},
|
|
{
|
|
"epoch": 6.360981308411215,
|
|
"grad_norm": 0.4719680013293211,
|
|
"learning_rate": 1.0108075669101192e-06,
|
|
"loss": 0.255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2556621730327606,
|
|
"step": 5445,
|
|
"valid_targets_mean": 5251.5,
|
|
"valid_targets_min": 2347
|
|
},
|
|
{
|
|
"epoch": 6.366822429906542,
|
|
"grad_norm": 0.42805706894690704,
|
|
"learning_rate": 9.925997436149614e-07,
|
|
"loss": 0.258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24644438922405243,
|
|
"step": 5450,
|
|
"valid_targets_mean": 5359.0,
|
|
"valid_targets_min": 1947
|
|
},
|
|
{
|
|
"epoch": 6.372663551401869,
|
|
"grad_norm": 0.5025152611537526,
|
|
"learning_rate": 9.74553230731654e-07,
|
|
"loss": 0.2568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22970841825008392,
|
|
"step": 5455,
|
|
"valid_targets_mean": 4554.8,
|
|
"valid_targets_min": 1884
|
|
},
|
|
{
|
|
"epoch": 6.378504672897196,
|
|
"grad_norm": 0.43616285295387286,
|
|
"learning_rate": 9.566681814158252e-07,
|
|
"loss": 0.2466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24354954063892365,
|
|
"step": 5460,
|
|
"valid_targets_mean": 5840.5,
|
|
"valid_targets_min": 1303
|
|
},
|
|
{
|
|
"epoch": 6.384345794392523,
|
|
"grad_norm": 0.4548808656284345,
|
|
"learning_rate": 9.389447474528191e-07,
|
|
"loss": 0.2734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25270333886146545,
|
|
"step": 5465,
|
|
"valid_targets_mean": 5287.1,
|
|
"valid_targets_min": 1700
|
|
},
|
|
{
|
|
"epoch": 6.390186915887851,
|
|
"grad_norm": 0.44073173145001426,
|
|
"learning_rate": 9.213830792563839e-07,
|
|
"loss": 0.2729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2532658278942108,
|
|
"step": 5470,
|
|
"valid_targets_mean": 4786.2,
|
|
"valid_targets_min": 1610
|
|
},
|
|
{
|
|
"epoch": 6.396028037383178,
|
|
"grad_norm": 0.4666795064104298,
|
|
"learning_rate": 9.039833258674214e-07,
|
|
"loss": 0.2747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31317755579948425,
|
|
"step": 5475,
|
|
"valid_targets_mean": 5711.7,
|
|
"valid_targets_min": 1982
|
|
},
|
|
{
|
|
"epoch": 6.401869158878505,
|
|
"grad_norm": 0.45231965169743416,
|
|
"learning_rate": 8.867456349526904e-07,
|
|
"loss": 0.2653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2651308476924896,
|
|
"step": 5480,
|
|
"valid_targets_mean": 5349.2,
|
|
"valid_targets_min": 2093
|
|
},
|
|
{
|
|
"epoch": 6.4077102803738315,
|
|
"grad_norm": 0.39546259404028283,
|
|
"learning_rate": 8.696701528035901e-07,
|
|
"loss": 0.2707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26629671454429626,
|
|
"step": 5485,
|
|
"valid_targets_mean": 6588.8,
|
|
"valid_targets_min": 3232
|
|
},
|
|
{
|
|
"epoch": 6.413551401869159,
|
|
"grad_norm": 0.42666673619155354,
|
|
"learning_rate": 8.52757024334887e-07,
|
|
"loss": 0.2571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23164886236190796,
|
|
"step": 5490,
|
|
"valid_targets_mean": 5046.4,
|
|
"valid_targets_min": 1482
|
|
},
|
|
{
|
|
"epoch": 6.419392523364486,
|
|
"grad_norm": 0.4470994403175756,
|
|
"learning_rate": 8.360063930835083e-07,
|
|
"loss": 0.2611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22943691909313202,
|
|
"step": 5495,
|
|
"valid_targets_mean": 4237.8,
|
|
"valid_targets_min": 2252
|
|
},
|
|
{
|
|
"epoch": 6.425233644859813,
|
|
"grad_norm": 0.5029452666320885,
|
|
"learning_rate": 8.194184012073015e-07,
|
|
"loss": 0.2643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28803229331970215,
|
|
"step": 5500,
|
|
"valid_targets_mean": 6431.3,
|
|
"valid_targets_min": 2090
|
|
},
|
|
{
|
|
"epoch": 6.43107476635514,
|
|
"grad_norm": 0.4182716835583643,
|
|
"learning_rate": 8.029931894838472e-07,
|
|
"loss": 0.258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24630238115787506,
|
|
"step": 5505,
|
|
"valid_targets_mean": 5782.5,
|
|
"valid_targets_min": 2619
|
|
},
|
|
{
|
|
"epoch": 6.436915887850468,
|
|
"grad_norm": 0.44920152502497906,
|
|
"learning_rate": 7.867308973092624e-07,
|
|
"loss": 0.2715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2746298015117645,
|
|
"step": 5510,
|
|
"valid_targets_mean": 5266.4,
|
|
"valid_targets_min": 3359
|
|
},
|
|
{
|
|
"epoch": 6.442757009345795,
|
|
"grad_norm": 0.4355831823095298,
|
|
"learning_rate": 7.70631662696999e-07,
|
|
"loss": 0.2431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26222366094589233,
|
|
"step": 5515,
|
|
"valid_targets_mean": 5232.6,
|
|
"valid_targets_min": 2945
|
|
},
|
|
{
|
|
"epoch": 6.4485981308411215,
|
|
"grad_norm": 0.4549330736885335,
|
|
"learning_rate": 7.54695622276691e-07,
|
|
"loss": 0.2444,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2802484929561615,
|
|
"step": 5520,
|
|
"valid_targets_mean": 5428.1,
|
|
"valid_targets_min": 2248
|
|
},
|
|
{
|
|
"epoch": 6.454439252336448,
|
|
"grad_norm": 0.4539440741928064,
|
|
"learning_rate": 7.389229112929963e-07,
|
|
"loss": 0.2479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25847795605659485,
|
|
"step": 5525,
|
|
"valid_targets_mean": 6033.6,
|
|
"valid_targets_min": 2408
|
|
},
|
|
{
|
|
"epoch": 6.460280373831775,
|
|
"grad_norm": 0.4237697534531004,
|
|
"learning_rate": 7.233136636044347e-07,
|
|
"loss": 0.2618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2876898944377899,
|
|
"step": 5530,
|
|
"valid_targets_mean": 5773.2,
|
|
"valid_targets_min": 1672
|
|
},
|
|
{
|
|
"epoch": 6.466121495327103,
|
|
"grad_norm": 0.3838304656911597,
|
|
"learning_rate": 7.078680116822601e-07,
|
|
"loss": 0.2653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27687564492225647,
|
|
"step": 5535,
|
|
"valid_targets_mean": 6772.9,
|
|
"valid_targets_min": 2723
|
|
},
|
|
{
|
|
"epoch": 6.47196261682243,
|
|
"grad_norm": 0.4626396611886884,
|
|
"learning_rate": 6.925860866093393e-07,
|
|
"loss": 0.284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2737261950969696,
|
|
"step": 5540,
|
|
"valid_targets_mean": 5363.3,
|
|
"valid_targets_min": 1755
|
|
},
|
|
{
|
|
"epoch": 6.477803738317757,
|
|
"grad_norm": 0.4128825639602186,
|
|
"learning_rate": 6.774680180790327e-07,
|
|
"loss": 0.2644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2656351625919342,
|
|
"step": 5545,
|
|
"valid_targets_mean": 5901.8,
|
|
"valid_targets_min": 2935
|
|
},
|
|
{
|
|
"epoch": 6.483644859813084,
|
|
"grad_norm": 0.4022714549183059,
|
|
"learning_rate": 6.625139343940978e-07,
|
|
"loss": 0.2796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2975848913192749,
|
|
"step": 5550,
|
|
"valid_targets_mean": 6423.4,
|
|
"valid_targets_min": 2227
|
|
},
|
|
{
|
|
"epoch": 6.489485981308412,
|
|
"grad_norm": 0.45565355283632053,
|
|
"learning_rate": 6.477239624655984e-07,
|
|
"loss": 0.2545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2438191920518875,
|
|
"step": 5555,
|
|
"valid_targets_mean": 5089.6,
|
|
"valid_targets_min": 2525
|
|
},
|
|
{
|
|
"epoch": 6.4953271028037385,
|
|
"grad_norm": 0.4489989541486985,
|
|
"learning_rate": 6.330982278118281e-07,
|
|
"loss": 0.257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2629946172237396,
|
|
"step": 5560,
|
|
"valid_targets_mean": 6270.8,
|
|
"valid_targets_min": 2945
|
|
},
|
|
{
|
|
"epoch": 6.501168224299065,
|
|
"grad_norm": 0.39209788735369944,
|
|
"learning_rate": 6.186368545572508e-07,
|
|
"loss": 0.2606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27894487977027893,
|
|
"step": 5565,
|
|
"valid_targets_mean": 6487.2,
|
|
"valid_targets_min": 2331
|
|
},
|
|
{
|
|
"epoch": 6.507009345794392,
|
|
"grad_norm": 0.43221093301231256,
|
|
"learning_rate": 6.043399654314331e-07,
|
|
"loss": 0.2456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2554604411125183,
|
|
"step": 5570,
|
|
"valid_targets_mean": 5159.0,
|
|
"valid_targets_min": 2008
|
|
},
|
|
{
|
|
"epoch": 6.51285046728972,
|
|
"grad_norm": 0.4482912109951502,
|
|
"learning_rate": 5.902076817680136e-07,
|
|
"loss": 0.2534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23211169242858887,
|
|
"step": 5575,
|
|
"valid_targets_mean": 5452.9,
|
|
"valid_targets_min": 1590
|
|
},
|
|
{
|
|
"epoch": 6.518691588785047,
|
|
"grad_norm": 0.5250567515919674,
|
|
"learning_rate": 5.762401235036774e-07,
|
|
"loss": 0.2598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26514503359794617,
|
|
"step": 5580,
|
|
"valid_targets_mean": 5888.2,
|
|
"valid_targets_min": 2089
|
|
},
|
|
{
|
|
"epoch": 6.524532710280374,
|
|
"grad_norm": 0.4296114465310338,
|
|
"learning_rate": 5.624374091771257e-07,
|
|
"loss": 0.2713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2370259165763855,
|
|
"step": 5585,
|
|
"valid_targets_mean": 5326.5,
|
|
"valid_targets_min": 1815
|
|
},
|
|
{
|
|
"epoch": 6.530373831775701,
|
|
"grad_norm": 0.45396376685111695,
|
|
"learning_rate": 5.487996559280806e-07,
|
|
"loss": 0.2605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21717329323291779,
|
|
"step": 5590,
|
|
"valid_targets_mean": 5142.5,
|
|
"valid_targets_min": 1855
|
|
},
|
|
{
|
|
"epoch": 6.536214953271028,
|
|
"grad_norm": 0.4081025049343576,
|
|
"learning_rate": 5.353269794962779e-07,
|
|
"loss": 0.2498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23534925282001495,
|
|
"step": 5595,
|
|
"valid_targets_mean": 5368.1,
|
|
"valid_targets_min": 1841
|
|
},
|
|
{
|
|
"epoch": 6.542056074766355,
|
|
"grad_norm": 0.45572067940202726,
|
|
"learning_rate": 5.220194942205048e-07,
|
|
"loss": 0.2626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28286468982696533,
|
|
"step": 5600,
|
|
"valid_targets_mean": 5450.8,
|
|
"valid_targets_min": 1509
|
|
},
|
|
{
|
|
"epoch": 6.547897196261682,
|
|
"grad_norm": 0.446752550148099,
|
|
"learning_rate": 5.088773130376124e-07,
|
|
"loss": 0.2631,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2989085614681244,
|
|
"step": 5605,
|
|
"valid_targets_mean": 5401.4,
|
|
"valid_targets_min": 2703
|
|
},
|
|
{
|
|
"epoch": 6.553738317757009,
|
|
"grad_norm": 0.3925967058768185,
|
|
"learning_rate": 4.959005474815581e-07,
|
|
"loss": 0.2787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2421044111251831,
|
|
"step": 5610,
|
|
"valid_targets_mean": 6000.3,
|
|
"valid_targets_min": 1938
|
|
},
|
|
{
|
|
"epoch": 6.559579439252336,
|
|
"grad_norm": 0.4626232069307274,
|
|
"learning_rate": 4.830893076824716e-07,
|
|
"loss": 0.2681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25503337383270264,
|
|
"step": 5615,
|
|
"valid_targets_mean": 4874.6,
|
|
"valid_targets_min": 1389
|
|
},
|
|
{
|
|
"epoch": 6.565420560747664,
|
|
"grad_norm": 0.42493930773729083,
|
|
"learning_rate": 4.7044370236570823e-07,
|
|
"loss": 0.281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2773296535015106,
|
|
"step": 5620,
|
|
"valid_targets_mean": 5978.1,
|
|
"valid_targets_min": 2162
|
|
},
|
|
{
|
|
"epoch": 6.571261682242991,
|
|
"grad_norm": 0.4563969633131315,
|
|
"learning_rate": 4.579638388509322e-07,
|
|
"loss": 0.2562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3040846884250641,
|
|
"step": 5625,
|
|
"valid_targets_mean": 6209.8,
|
|
"valid_targets_min": 2115
|
|
},
|
|
{
|
|
"epoch": 6.577102803738318,
|
|
"grad_norm": 0.46715673323603374,
|
|
"learning_rate": 4.4564982305120406e-07,
|
|
"loss": 0.2673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29139015078544617,
|
|
"step": 5630,
|
|
"valid_targets_mean": 5399.0,
|
|
"valid_targets_min": 2668
|
|
},
|
|
{
|
|
"epoch": 6.582943925233645,
|
|
"grad_norm": 0.45463153277470086,
|
|
"learning_rate": 4.335017594720792e-07,
|
|
"loss": 0.27,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29586896300315857,
|
|
"step": 5635,
|
|
"valid_targets_mean": 5754.0,
|
|
"valid_targets_min": 3008
|
|
},
|
|
{
|
|
"epoch": 6.588785046728972,
|
|
"grad_norm": 0.4287752467727481,
|
|
"learning_rate": 4.215197512107261e-07,
|
|
"loss": 0.2705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2867179811000824,
|
|
"step": 5640,
|
|
"valid_targets_mean": 6199.8,
|
|
"valid_targets_min": 1683
|
|
},
|
|
{
|
|
"epoch": 6.594626168224299,
|
|
"grad_norm": 0.4402479317040057,
|
|
"learning_rate": 4.09703899955054e-07,
|
|
"loss": 0.244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2414342164993286,
|
|
"step": 5645,
|
|
"valid_targets_mean": 5060.8,
|
|
"valid_targets_min": 2509
|
|
},
|
|
{
|
|
"epoch": 6.600467289719626,
|
|
"grad_norm": 0.4460193614850032,
|
|
"learning_rate": 3.980543059828312e-07,
|
|
"loss": 0.2473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23999297618865967,
|
|
"step": 5650,
|
|
"valid_targets_mean": 4832.9,
|
|
"valid_targets_min": 1786
|
|
},
|
|
{
|
|
"epoch": 6.606308411214953,
|
|
"grad_norm": 0.45136450906010933,
|
|
"learning_rate": 3.865710681608592e-07,
|
|
"loss": 0.2682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25398561358451843,
|
|
"step": 5655,
|
|
"valid_targets_mean": 5490.9,
|
|
"valid_targets_min": 2394
|
|
},
|
|
{
|
|
"epoch": 6.61214953271028,
|
|
"grad_norm": 0.4307388895446753,
|
|
"learning_rate": 3.7525428394411976e-07,
|
|
"loss": 0.2465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27364346385002136,
|
|
"step": 5660,
|
|
"valid_targets_mean": 5719.9,
|
|
"valid_targets_min": 1757
|
|
},
|
|
{
|
|
"epoch": 6.617990654205608,
|
|
"grad_norm": 0.44714278700601634,
|
|
"learning_rate": 3.6410404937494925e-07,
|
|
"loss": 0.2584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28164374828338623,
|
|
"step": 5665,
|
|
"valid_targets_mean": 6442.9,
|
|
"valid_targets_min": 1796
|
|
},
|
|
{
|
|
"epoch": 6.623831775700935,
|
|
"grad_norm": 0.4304306265848142,
|
|
"learning_rate": 3.531204590822213e-07,
|
|
"loss": 0.2661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29788148403167725,
|
|
"step": 5670,
|
|
"valid_targets_mean": 6201.2,
|
|
"valid_targets_min": 2312
|
|
},
|
|
{
|
|
"epoch": 6.6296728971962615,
|
|
"grad_norm": 1.0335775635797146,
|
|
"learning_rate": 3.423036062805496e-07,
|
|
"loss": 0.2615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23358510434627533,
|
|
"step": 5675,
|
|
"valid_targets_mean": 5950.8,
|
|
"valid_targets_min": 1627
|
|
},
|
|
{
|
|
"epoch": 6.635514018691588,
|
|
"grad_norm": 0.4390344533190195,
|
|
"learning_rate": 3.316535827694978e-07,
|
|
"loss": 0.2638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2808207869529724,
|
|
"step": 5680,
|
|
"valid_targets_mean": 5808.9,
|
|
"valid_targets_min": 2088
|
|
},
|
|
{
|
|
"epoch": 6.641355140186916,
|
|
"grad_norm": 0.4301753204685441,
|
|
"learning_rate": 3.211704789327885e-07,
|
|
"loss": 0.2519,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22018708288669586,
|
|
"step": 5685,
|
|
"valid_targets_mean": 5264.2,
|
|
"valid_targets_min": 2373
|
|
},
|
|
{
|
|
"epoch": 6.647196261682243,
|
|
"grad_norm": 0.45078733982638636,
|
|
"learning_rate": 3.1085438373755105e-07,
|
|
"loss": 0.2725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2686904966831207,
|
|
"step": 5690,
|
|
"valid_targets_mean": 5552.0,
|
|
"valid_targets_min": 2547
|
|
},
|
|
{
|
|
"epoch": 6.65303738317757,
|
|
"grad_norm": 0.5661387964332304,
|
|
"learning_rate": 3.0070538473355947e-07,
|
|
"loss": 0.2752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25857385993003845,
|
|
"step": 5695,
|
|
"valid_targets_mean": 5380.8,
|
|
"valid_targets_min": 1489
|
|
},
|
|
{
|
|
"epoch": 6.658878504672897,
|
|
"grad_norm": 0.6426555115711413,
|
|
"learning_rate": 2.907235680524867e-07,
|
|
"loss": 0.2617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24820299446582794,
|
|
"step": 5700,
|
|
"valid_targets_mean": 4894.4,
|
|
"valid_targets_min": 1269
|
|
},
|
|
{
|
|
"epoch": 6.664719626168225,
|
|
"grad_norm": 0.4150863190657568,
|
|
"learning_rate": 2.8090901840717835e-07,
|
|
"loss": 0.2594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28781768679618835,
|
|
"step": 5705,
|
|
"valid_targets_mean": 6660.6,
|
|
"valid_targets_min": 2223
|
|
},
|
|
{
|
|
"epoch": 6.670560747663552,
|
|
"grad_norm": 0.42684769686540225,
|
|
"learning_rate": 2.712618190909311e-07,
|
|
"loss": 0.2633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29110485315322876,
|
|
"step": 5710,
|
|
"valid_targets_mean": 6112.0,
|
|
"valid_targets_min": 1624
|
|
},
|
|
{
|
|
"epoch": 6.6764018691588785,
|
|
"grad_norm": 0.43564824072684766,
|
|
"learning_rate": 2.6178205197678885e-07,
|
|
"loss": 0.2584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23521395027637482,
|
|
"step": 5715,
|
|
"valid_targets_mean": 5781.2,
|
|
"valid_targets_min": 2361
|
|
},
|
|
{
|
|
"epoch": 6.682242990654205,
|
|
"grad_norm": 0.4432088670387932,
|
|
"learning_rate": 2.524697975168455e-07,
|
|
"loss": 0.2519,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29726672172546387,
|
|
"step": 5720,
|
|
"valid_targets_mean": 6311.0,
|
|
"valid_targets_min": 1894
|
|
},
|
|
{
|
|
"epoch": 6.688084112149532,
|
|
"grad_norm": 0.43246737810053454,
|
|
"learning_rate": 2.4332513474155884e-07,
|
|
"loss": 0.2429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26422181725502014,
|
|
"step": 5725,
|
|
"valid_targets_mean": 5374.7,
|
|
"valid_targets_min": 2055
|
|
},
|
|
{
|
|
"epoch": 6.69392523364486,
|
|
"grad_norm": 0.4336700906223618,
|
|
"learning_rate": 2.3434814125908867e-07,
|
|
"loss": 0.258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2661651372909546,
|
|
"step": 5730,
|
|
"valid_targets_mean": 5553.4,
|
|
"valid_targets_min": 2386
|
|
},
|
|
{
|
|
"epoch": 6.699766355140187,
|
|
"grad_norm": 0.4358422056563847,
|
|
"learning_rate": 2.255388932546332e-07,
|
|
"loss": 0.2551,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25160327553749084,
|
|
"step": 5735,
|
|
"valid_targets_mean": 5144.3,
|
|
"valid_targets_min": 1111
|
|
},
|
|
{
|
|
"epoch": 6.705607476635514,
|
|
"grad_norm": 0.6380515684412333,
|
|
"learning_rate": 2.1689746548977598e-07,
|
|
"loss": 0.2571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3115694522857666,
|
|
"step": 5740,
|
|
"valid_targets_mean": 6545.4,
|
|
"valid_targets_min": 1675
|
|
},
|
|
{
|
|
"epoch": 6.711448598130842,
|
|
"grad_norm": 0.4286849907930518,
|
|
"learning_rate": 2.0842393130186433e-07,
|
|
"loss": 0.2663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24729816615581512,
|
|
"step": 5745,
|
|
"valid_targets_mean": 5576.5,
|
|
"valid_targets_min": 2026
|
|
},
|
|
{
|
|
"epoch": 6.7172897196261685,
|
|
"grad_norm": 0.5129058033384012,
|
|
"learning_rate": 2.001183626033787e-07,
|
|
"loss": 0.2608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2895936965942383,
|
|
"step": 5750,
|
|
"valid_targets_mean": 4621.1,
|
|
"valid_targets_min": 2245
|
|
},
|
|
{
|
|
"epoch": 6.723130841121495,
|
|
"grad_norm": 0.4451998189751084,
|
|
"learning_rate": 1.9198082988132417e-07,
|
|
"loss": 0.2487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.262713760137558,
|
|
"step": 5755,
|
|
"valid_targets_mean": 5044.2,
|
|
"valid_targets_min": 2068
|
|
},
|
|
{
|
|
"epoch": 6.728971962616822,
|
|
"grad_norm": 0.39650464849133815,
|
|
"learning_rate": 1.8401140219662882e-07,
|
|
"loss": 0.2599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2527901828289032,
|
|
"step": 5760,
|
|
"valid_targets_mean": 6521.8,
|
|
"valid_targets_min": 3123
|
|
},
|
|
{
|
|
"epoch": 6.734813084112149,
|
|
"grad_norm": 0.43908643926874913,
|
|
"learning_rate": 1.7621014718356643e-07,
|
|
"loss": 0.2641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.283011794090271,
|
|
"step": 5765,
|
|
"valid_targets_mean": 6063.5,
|
|
"valid_targets_min": 2010
|
|
},
|
|
{
|
|
"epoch": 6.740654205607477,
|
|
"grad_norm": 0.42349552303391536,
|
|
"learning_rate": 1.685771310491724e-07,
|
|
"loss": 0.2672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28472378849983215,
|
|
"step": 5770,
|
|
"valid_targets_mean": 5598.3,
|
|
"valid_targets_min": 2333
|
|
},
|
|
{
|
|
"epoch": 6.746495327102804,
|
|
"grad_norm": 0.42587039203675187,
|
|
"learning_rate": 1.6111241857268866e-07,
|
|
"loss": 0.2713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30324944853782654,
|
|
"step": 5775,
|
|
"valid_targets_mean": 6193.6,
|
|
"valid_targets_min": 2182
|
|
},
|
|
{
|
|
"epoch": 6.752336448598131,
|
|
"grad_norm": 0.4155961458741729,
|
|
"learning_rate": 1.538160731050131e-07,
|
|
"loss": 0.2503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28202149271965027,
|
|
"step": 5780,
|
|
"valid_targets_mean": 5818.3,
|
|
"valid_targets_min": 2234
|
|
},
|
|
{
|
|
"epoch": 6.758177570093458,
|
|
"grad_norm": 0.42466771014228505,
|
|
"learning_rate": 1.466881565681577e-07,
|
|
"loss": 0.2548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27775925397872925,
|
|
"step": 5785,
|
|
"valid_targets_mean": 5668.6,
|
|
"valid_targets_min": 2324
|
|
},
|
|
{
|
|
"epoch": 6.7640186915887845,
|
|
"grad_norm": 0.4577469010651718,
|
|
"learning_rate": 1.3972872945472894e-07,
|
|
"loss": 0.2545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28986647725105286,
|
|
"step": 5790,
|
|
"valid_targets_mean": 5526.5,
|
|
"valid_targets_min": 1185
|
|
},
|
|
{
|
|
"epoch": 6.769859813084112,
|
|
"grad_norm": 0.5242091063311692,
|
|
"learning_rate": 1.3293785082740818e-07,
|
|
"loss": 0.2806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29190441966056824,
|
|
"step": 5795,
|
|
"valid_targets_mean": 6062.0,
|
|
"valid_targets_min": 1997
|
|
},
|
|
{
|
|
"epoch": 6.775700934579439,
|
|
"grad_norm": 0.4252947521863791,
|
|
"learning_rate": 1.2631557831845443e-07,
|
|
"loss": 0.2477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29828718304634094,
|
|
"step": 5800,
|
|
"valid_targets_mean": 6325.9,
|
|
"valid_targets_min": 2699
|
|
},
|
|
{
|
|
"epoch": 6.781542056074766,
|
|
"grad_norm": 0.4481683354032902,
|
|
"learning_rate": 1.198619681292179e-07,
|
|
"loss": 0.2527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25535690784454346,
|
|
"step": 5805,
|
|
"valid_targets_mean": 5508.0,
|
|
"valid_targets_min": 2392
|
|
},
|
|
{
|
|
"epoch": 6.787383177570094,
|
|
"grad_norm": 0.6924376239453618,
|
|
"learning_rate": 1.1357707502965387e-07,
|
|
"loss": 0.2549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23754143714904785,
|
|
"step": 5810,
|
|
"valid_targets_mean": 4986.7,
|
|
"valid_targets_min": 2642
|
|
},
|
|
{
|
|
"epoch": 6.793224299065421,
|
|
"grad_norm": 0.4956060257632549,
|
|
"learning_rate": 1.0746095235786958e-07,
|
|
"loss": 0.2511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2543995678424835,
|
|
"step": 5815,
|
|
"valid_targets_mean": 4291.1,
|
|
"valid_targets_min": 1822
|
|
},
|
|
{
|
|
"epoch": 6.799065420560748,
|
|
"grad_norm": 0.4203674310834747,
|
|
"learning_rate": 1.0151365201965802e-07,
|
|
"loss": 0.2691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2865830361843109,
|
|
"step": 5820,
|
|
"valid_targets_mean": 5729.4,
|
|
"valid_targets_min": 2020
|
|
},
|
|
{
|
|
"epoch": 6.804906542056075,
|
|
"grad_norm": 0.4291407771921918,
|
|
"learning_rate": 9.573522448807381e-08,
|
|
"loss": 0.2366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26590022444725037,
|
|
"step": 5825,
|
|
"valid_targets_mean": 5229.5,
|
|
"valid_targets_min": 1932
|
|
},
|
|
{
|
|
"epoch": 6.8107476635514015,
|
|
"grad_norm": 0.44147213610443,
|
|
"learning_rate": 9.012571880298915e-08,
|
|
"loss": 0.2747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2622389495372772,
|
|
"step": 5830,
|
|
"valid_targets_mean": 5429.2,
|
|
"valid_targets_min": 2186
|
|
},
|
|
{
|
|
"epoch": 6.816588785046729,
|
|
"grad_norm": 0.428442499251548,
|
|
"learning_rate": 8.468518257068737e-08,
|
|
"loss": 0.2492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24649639427661896,
|
|
"step": 5835,
|
|
"valid_targets_mean": 5314.0,
|
|
"valid_targets_min": 2737
|
|
},
|
|
{
|
|
"epoch": 6.822429906542056,
|
|
"grad_norm": 0.4409387942168038,
|
|
"learning_rate": 7.941366196345446e-08,
|
|
"loss": 0.2462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26033255457878113,
|
|
"step": 5840,
|
|
"valid_targets_mean": 5798.8,
|
|
"valid_targets_min": 2341
|
|
},
|
|
{
|
|
"epoch": 6.828271028037383,
|
|
"grad_norm": 0.4362915268615533,
|
|
"learning_rate": 7.431120171919049e-08,
|
|
"loss": 0.2653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28445425629615784,
|
|
"step": 5845,
|
|
"valid_targets_mean": 5194.0,
|
|
"valid_targets_min": 1808
|
|
},
|
|
{
|
|
"epoch": 6.83411214953271,
|
|
"grad_norm": 0.4277746375483683,
|
|
"learning_rate": 6.937784514102986e-08,
|
|
"loss": 0.2729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2590380311012268,
|
|
"step": 5850,
|
|
"valid_targets_mean": 5796.9,
|
|
"valid_targets_min": 2037
|
|
},
|
|
{
|
|
"epoch": 6.839953271028038,
|
|
"grad_norm": 0.39065745984528505,
|
|
"learning_rate": 6.46136340969683e-08,
|
|
"loss": 0.25,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2734238803386688,
|
|
"step": 5855,
|
|
"valid_targets_mean": 6791.7,
|
|
"valid_targets_min": 3106
|
|
},
|
|
{
|
|
"epoch": 6.845794392523365,
|
|
"grad_norm": 0.3994789487863353,
|
|
"learning_rate": 6.001860901951207e-08,
|
|
"loss": 0.2602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22150321304798126,
|
|
"step": 5860,
|
|
"valid_targets_mean": 6496.9,
|
|
"valid_targets_min": 2177
|
|
},
|
|
{
|
|
"epoch": 6.8516355140186915,
|
|
"grad_norm": 0.4553487054332497,
|
|
"learning_rate": 5.5592808905333694e-08,
|
|
"loss": 0.2554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2668880820274353,
|
|
"step": 5865,
|
|
"valid_targets_mean": 5642.2,
|
|
"valid_targets_min": 2437
|
|
},
|
|
{
|
|
"epoch": 6.857476635514018,
|
|
"grad_norm": 0.45120380554081785,
|
|
"learning_rate": 5.133627131494567e-08,
|
|
"loss": 0.2823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2851678431034088,
|
|
"step": 5870,
|
|
"valid_targets_mean": 5505.8,
|
|
"valid_targets_min": 2299
|
|
},
|
|
{
|
|
"epoch": 6.863317757009346,
|
|
"grad_norm": 0.40311783901864123,
|
|
"learning_rate": 4.724903237237399e-08,
|
|
"loss": 0.2771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30013713240623474,
|
|
"step": 5875,
|
|
"valid_targets_mean": 6338.9,
|
|
"valid_targets_min": 2374
|
|
},
|
|
{
|
|
"epoch": 6.869158878504673,
|
|
"grad_norm": 0.4632569422813869,
|
|
"learning_rate": 4.3331126764853956e-08,
|
|
"loss": 0.2583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26290735602378845,
|
|
"step": 5880,
|
|
"valid_targets_mean": 5094.5,
|
|
"valid_targets_min": 1868
|
|
},
|
|
{
|
|
"epoch": 6.875,
|
|
"grad_norm": 0.4749602573521509,
|
|
"learning_rate": 3.958258774253931e-08,
|
|
"loss": 0.2769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27191290259361267,
|
|
"step": 5885,
|
|
"valid_targets_mean": 5216.9,
|
|
"valid_targets_min": 1495
|
|
},
|
|
{
|
|
"epoch": 6.880841121495327,
|
|
"grad_norm": 0.4487438219009109,
|
|
"learning_rate": 3.6003447118218015e-08,
|
|
"loss": 0.2585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25419095158576965,
|
|
"step": 5890,
|
|
"valid_targets_mean": 4991.8,
|
|
"valid_targets_min": 2174
|
|
},
|
|
{
|
|
"epoch": 6.886682242990654,
|
|
"grad_norm": 0.5775991443758761,
|
|
"learning_rate": 3.2593735267039126e-08,
|
|
"loss": 0.2611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2532874047756195,
|
|
"step": 5895,
|
|
"valid_targets_mean": 5692.5,
|
|
"valid_targets_min": 2563
|
|
},
|
|
{
|
|
"epoch": 6.892523364485982,
|
|
"grad_norm": 0.473083707285834,
|
|
"learning_rate": 2.9353481126259687e-08,
|
|
"loss": 0.2665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28738054633140564,
|
|
"step": 5900,
|
|
"valid_targets_mean": 5877.4,
|
|
"valid_targets_min": 2080
|
|
},
|
|
{
|
|
"epoch": 6.8983644859813085,
|
|
"grad_norm": 0.4604620853258466,
|
|
"learning_rate": 2.628271219500045e-08,
|
|
"loss": 0.2615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24936725199222565,
|
|
"step": 5905,
|
|
"valid_targets_mean": 4800.9,
|
|
"valid_targets_min": 2042
|
|
},
|
|
{
|
|
"epoch": 6.904205607476635,
|
|
"grad_norm": 0.5209873230405653,
|
|
"learning_rate": 2.3381454534001647e-08,
|
|
"loss": 0.2757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29813849925994873,
|
|
"step": 5910,
|
|
"valid_targets_mean": 6965.8,
|
|
"valid_targets_min": 2362
|
|
},
|
|
{
|
|
"epoch": 6.910046728971962,
|
|
"grad_norm": 0.4155367608426339,
|
|
"learning_rate": 2.0649732765414265e-08,
|
|
"loss": 0.2577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26827654242515564,
|
|
"step": 5915,
|
|
"valid_targets_mean": 5623.9,
|
|
"valid_targets_min": 2685
|
|
},
|
|
{
|
|
"epoch": 6.91588785046729,
|
|
"grad_norm": 0.43424759335449487,
|
|
"learning_rate": 1.808757007258688e-08,
|
|
"loss": 0.2656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25616493821144104,
|
|
"step": 5920,
|
|
"valid_targets_mean": 5198.1,
|
|
"valid_targets_min": 2054
|
|
},
|
|
{
|
|
"epoch": 6.921728971962617,
|
|
"grad_norm": 0.3862390153870307,
|
|
"learning_rate": 1.5694988199868034e-08,
|
|
"loss": 0.2702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2595643103122711,
|
|
"step": 5925,
|
|
"valid_targets_mean": 6470.2,
|
|
"valid_targets_min": 1683
|
|
},
|
|
{
|
|
"epoch": 6.927570093457944,
|
|
"grad_norm": 0.42118474818335216,
|
|
"learning_rate": 1.3472007452419721e-08,
|
|
"loss": 0.271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2712821662425995,
|
|
"step": 5930,
|
|
"valid_targets_mean": 5643.8,
|
|
"valid_targets_min": 1977
|
|
},
|
|
{
|
|
"epoch": 6.933411214953271,
|
|
"grad_norm": 0.38037614791304125,
|
|
"learning_rate": 1.141864669604864e-08,
|
|
"loss": 0.2641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26157400012016296,
|
|
"step": 5935,
|
|
"valid_targets_mean": 7007.3,
|
|
"valid_targets_min": 2331
|
|
},
|
|
{
|
|
"epoch": 6.9392523364485985,
|
|
"grad_norm": 0.46151406081350077,
|
|
"learning_rate": 9.53492335704409e-09,
|
|
"loss": 0.2757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2869607210159302,
|
|
"step": 5940,
|
|
"valid_targets_mean": 5191.1,
|
|
"valid_targets_min": 1703
|
|
},
|
|
{
|
|
"epoch": 6.945093457943925,
|
|
"grad_norm": 0.46103200513560466,
|
|
"learning_rate": 7.820853422033647e-09,
|
|
"loss": 0.2666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2593061625957489,
|
|
"step": 5945,
|
|
"valid_targets_mean": 5044.5,
|
|
"valid_targets_min": 2450
|
|
},
|
|
{
|
|
"epoch": 6.950934579439252,
|
|
"grad_norm": 0.42990884876631735,
|
|
"learning_rate": 6.276451437841058e-09,
|
|
"loss": 0.2576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26017293334007263,
|
|
"step": 5950,
|
|
"valid_targets_mean": 5742.4,
|
|
"valid_targets_min": 1785
|
|
},
|
|
{
|
|
"epoch": 6.956775700934579,
|
|
"grad_norm": 0.40981695838883586,
|
|
"learning_rate": 4.901730511368552e-09,
|
|
"loss": 0.2521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24987871944904327,
|
|
"step": 5955,
|
|
"valid_targets_mean": 5541.8,
|
|
"valid_targets_min": 2129
|
|
},
|
|
{
|
|
"epoch": 6.962616822429906,
|
|
"grad_norm": 0.41337286298358783,
|
|
"learning_rate": 3.6967023094813813e-09,
|
|
"loss": 0.2639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.261912077665329,
|
|
"step": 5960,
|
|
"valid_targets_mean": 6190.2,
|
|
"valid_targets_min": 3575
|
|
},
|
|
{
|
|
"epoch": 6.968457943925234,
|
|
"grad_norm": 0.4235227708351569,
|
|
"learning_rate": 2.66137705891234e-09,
|
|
"loss": 0.2503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25253400206565857,
|
|
"step": 5965,
|
|
"valid_targets_mean": 5581.9,
|
|
"valid_targets_min": 2060
|
|
},
|
|
{
|
|
"epoch": 6.974299065420561,
|
|
"grad_norm": 0.4144524753922158,
|
|
"learning_rate": 1.7957635461729462e-09,
|
|
"loss": 0.2735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2770947217941284,
|
|
"step": 5970,
|
|
"valid_targets_mean": 6634.6,
|
|
"valid_targets_min": 2332
|
|
},
|
|
{
|
|
"epoch": 6.980140186915888,
|
|
"grad_norm": 0.5615694945767085,
|
|
"learning_rate": 1.0998691174779475e-09,
|
|
"loss": 0.2667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27905651926994324,
|
|
"step": 5975,
|
|
"valid_targets_mean": 5953.5,
|
|
"valid_targets_min": 2471
|
|
},
|
|
{
|
|
"epoch": 6.9859813084112155,
|
|
"grad_norm": 0.4175897035511998,
|
|
"learning_rate": 5.736996786898097e-10,
|
|
"loss": 0.2663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2838425934314728,
|
|
"step": 5980,
|
|
"valid_targets_mean": 5918.6,
|
|
"valid_targets_min": 2312
|
|
},
|
|
{
|
|
"epoch": 6.991822429906542,
|
|
"grad_norm": 0.493604017943128,
|
|
"learning_rate": 2.1725969525432378e-10,
|
|
"loss": 0.2615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24788911640644073,
|
|
"step": 5985,
|
|
"valid_targets_mean": 4260.7,
|
|
"valid_targets_min": 2043
|
|
},
|
|
{
|
|
"epoch": 6.997663551401869,
|
|
"grad_norm": 0.42174741800125287,
|
|
"learning_rate": 3.0552192180621775e-11,
|
|
"loss": 0.2744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3033873736858368,
|
|
"step": 5990,
|
|
"valid_targets_mean": 5693.0,
|
|
"valid_targets_min": 2572
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2079296112060547,
|
|
"step": 5992,
|
|
"total_flos": 4008973724024832.0,
|
|
"train_loss": 0.3080715393729617,
|
|
"train_runtime": 45846.0647,
|
|
"train_samples_per_second": 3.134,
|
|
"train_steps_per_second": 0.131,
|
|
"valid_targets_mean": 5229.1,
|
|
"valid_targets_min": 2750
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 5992,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 1500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 4008973724024832.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|