18968 lines
527 KiB
JSON
18968 lines
527 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 6.999592999592999,
|
|
"eval_steps": 500,
|
|
"global_step": 8602,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.00407000407000407,
|
|
"grad_norm": 16.688738856727717,
|
|
"learning_rate": 1.8583042973286877e-07,
|
|
"loss": 0.6602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.309528112411499,
|
|
"step": 5,
|
|
"valid_targets_mean": 7389.0,
|
|
"valid_targets_min": 5627
|
|
},
|
|
{
|
|
"epoch": 0.00814000814000814,
|
|
"grad_norm": 16.284518291358502,
|
|
"learning_rate": 4.181184668989548e-07,
|
|
"loss": 0.7004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23656058311462402,
|
|
"step": 10,
|
|
"valid_targets_mean": 1294.8,
|
|
"valid_targets_min": 422
|
|
},
|
|
{
|
|
"epoch": 0.01221001221001221,
|
|
"grad_norm": 14.7645423037296,
|
|
"learning_rate": 6.504065040650407e-07,
|
|
"loss": 0.6791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3238072991371155,
|
|
"step": 15,
|
|
"valid_targets_mean": 8539.9,
|
|
"valid_targets_min": 6761
|
|
},
|
|
{
|
|
"epoch": 0.01628001628001628,
|
|
"grad_norm": 13.258019811923578,
|
|
"learning_rate": 8.826945412311266e-07,
|
|
"loss": 0.6127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28863972425460815,
|
|
"step": 20,
|
|
"valid_targets_mean": 7579.0,
|
|
"valid_targets_min": 6174
|
|
},
|
|
{
|
|
"epoch": 0.02035002035002035,
|
|
"grad_norm": 10.994253756620328,
|
|
"learning_rate": 1.1149825783972125e-06,
|
|
"loss": 0.6115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29230475425720215,
|
|
"step": 25,
|
|
"valid_targets_mean": 7328.2,
|
|
"valid_targets_min": 4839
|
|
},
|
|
{
|
|
"epoch": 0.02442002442002442,
|
|
"grad_norm": 7.768784524679733,
|
|
"learning_rate": 1.3472706155632985e-06,
|
|
"loss": 0.604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3191682696342468,
|
|
"step": 30,
|
|
"valid_targets_mean": 6838.2,
|
|
"valid_targets_min": 5034
|
|
},
|
|
{
|
|
"epoch": 0.02849002849002849,
|
|
"grad_norm": 5.751080564869644,
|
|
"learning_rate": 1.5795586527293845e-06,
|
|
"loss": 0.5845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29048168659210205,
|
|
"step": 35,
|
|
"valid_targets_mean": 7248.2,
|
|
"valid_targets_min": 5567
|
|
},
|
|
{
|
|
"epoch": 0.03256003256003256,
|
|
"grad_norm": 5.28995649654892,
|
|
"learning_rate": 1.8118466898954705e-06,
|
|
"loss": 0.5404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2563433349132538,
|
|
"step": 40,
|
|
"valid_targets_mean": 6234.0,
|
|
"valid_targets_min": 5603
|
|
},
|
|
{
|
|
"epoch": 0.03663003663003663,
|
|
"grad_norm": 6.617277965973996,
|
|
"learning_rate": 2.0441347270615568e-06,
|
|
"loss": 0.5329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12342505156993866,
|
|
"step": 45,
|
|
"valid_targets_mean": 602.0,
|
|
"valid_targets_min": 152
|
|
},
|
|
{
|
|
"epoch": 0.0407000407000407,
|
|
"grad_norm": 3.0152890224296596,
|
|
"learning_rate": 2.2764227642276426e-06,
|
|
"loss": 0.4588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22148989140987396,
|
|
"step": 50,
|
|
"valid_targets_mean": 7299.9,
|
|
"valid_targets_min": 6158
|
|
},
|
|
{
|
|
"epoch": 0.04477004477004477,
|
|
"grad_norm": 2.061368844016502,
|
|
"learning_rate": 2.5087108013937284e-06,
|
|
"loss": 0.4436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2079901248216629,
|
|
"step": 55,
|
|
"valid_targets_mean": 6140.0,
|
|
"valid_targets_min": 4648
|
|
},
|
|
{
|
|
"epoch": 0.04884004884004884,
|
|
"grad_norm": 1.356693424104223,
|
|
"learning_rate": 2.7409988385598146e-06,
|
|
"loss": 0.4246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21061325073242188,
|
|
"step": 60,
|
|
"valid_targets_mean": 6603.0,
|
|
"valid_targets_min": 4882
|
|
},
|
|
{
|
|
"epoch": 0.05291005291005291,
|
|
"grad_norm": 1.0151992206933238,
|
|
"learning_rate": 2.9732868757259004e-06,
|
|
"loss": 0.4018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20564204454421997,
|
|
"step": 65,
|
|
"valid_targets_mean": 8015.0,
|
|
"valid_targets_min": 6344
|
|
},
|
|
{
|
|
"epoch": 0.05698005698005698,
|
|
"grad_norm": 0.8990999330833235,
|
|
"learning_rate": 3.205574912891986e-06,
|
|
"loss": 0.3898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18953731656074524,
|
|
"step": 70,
|
|
"valid_targets_mean": 6305.2,
|
|
"valid_targets_min": 4622
|
|
},
|
|
{
|
|
"epoch": 0.06105006105006105,
|
|
"grad_norm": 0.7436312346393562,
|
|
"learning_rate": 3.4378629500580724e-06,
|
|
"loss": 0.392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20007365942001343,
|
|
"step": 75,
|
|
"valid_targets_mean": 7890.8,
|
|
"valid_targets_min": 5485
|
|
},
|
|
{
|
|
"epoch": 0.06512006512006512,
|
|
"grad_norm": 0.6714920662823174,
|
|
"learning_rate": 3.670150987224158e-06,
|
|
"loss": 0.387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.175824835896492,
|
|
"step": 80,
|
|
"valid_targets_mean": 7016.6,
|
|
"valid_targets_min": 4960
|
|
},
|
|
{
|
|
"epoch": 0.06919006919006919,
|
|
"grad_norm": 0.6272831883685218,
|
|
"learning_rate": 3.902439024390244e-06,
|
|
"loss": 0.373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17543438076972961,
|
|
"step": 85,
|
|
"valid_targets_mean": 6527.1,
|
|
"valid_targets_min": 5399
|
|
},
|
|
{
|
|
"epoch": 0.07326007326007326,
|
|
"grad_norm": 0.7089605501278246,
|
|
"learning_rate": 4.13472706155633e-06,
|
|
"loss": 0.3711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18534767627716064,
|
|
"step": 90,
|
|
"valid_targets_mean": 4995.0,
|
|
"valid_targets_min": 618
|
|
},
|
|
{
|
|
"epoch": 0.07733007733007732,
|
|
"grad_norm": 0.5692706983764877,
|
|
"learning_rate": 4.367015098722416e-06,
|
|
"loss": 0.3807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1900652050971985,
|
|
"step": 95,
|
|
"valid_targets_mean": 7219.4,
|
|
"valid_targets_min": 5764
|
|
},
|
|
{
|
|
"epoch": 0.0814000814000814,
|
|
"grad_norm": 0.5526365515310625,
|
|
"learning_rate": 4.599303135888502e-06,
|
|
"loss": 0.364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18986254930496216,
|
|
"step": 100,
|
|
"valid_targets_mean": 7998.5,
|
|
"valid_targets_min": 5902
|
|
},
|
|
{
|
|
"epoch": 0.08547008547008547,
|
|
"grad_norm": 0.5591512425354309,
|
|
"learning_rate": 4.831591173054588e-06,
|
|
"loss": 0.3303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1477121263742447,
|
|
"step": 105,
|
|
"valid_targets_mean": 7377.4,
|
|
"valid_targets_min": 5015
|
|
},
|
|
{
|
|
"epoch": 0.08954008954008955,
|
|
"grad_norm": 0.47170286143316387,
|
|
"learning_rate": 5.063879210220674e-06,
|
|
"loss": 0.3322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14665284752845764,
|
|
"step": 110,
|
|
"valid_targets_mean": 6794.0,
|
|
"valid_targets_min": 5283
|
|
},
|
|
{
|
|
"epoch": 0.0936100936100936,
|
|
"grad_norm": 0.546971917140672,
|
|
"learning_rate": 5.29616724738676e-06,
|
|
"loss": 0.3457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1953507661819458,
|
|
"step": 115,
|
|
"valid_targets_mean": 6656.4,
|
|
"valid_targets_min": 4364
|
|
},
|
|
{
|
|
"epoch": 0.09768009768009768,
|
|
"grad_norm": 0.4730703657980557,
|
|
"learning_rate": 5.528455284552846e-06,
|
|
"loss": 0.3323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1581360101699829,
|
|
"step": 120,
|
|
"valid_targets_mean": 7178.4,
|
|
"valid_targets_min": 4448
|
|
},
|
|
{
|
|
"epoch": 0.10175010175010175,
|
|
"grad_norm": 0.5438344423212753,
|
|
"learning_rate": 5.7607433217189324e-06,
|
|
"loss": 0.3264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18480417132377625,
|
|
"step": 125,
|
|
"valid_targets_mean": 7619.8,
|
|
"valid_targets_min": 5737
|
|
},
|
|
{
|
|
"epoch": 0.10582010582010581,
|
|
"grad_norm": 0.42704045318550676,
|
|
"learning_rate": 5.993031358885018e-06,
|
|
"loss": 0.2884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13994517922401428,
|
|
"step": 130,
|
|
"valid_targets_mean": 8754.4,
|
|
"valid_targets_min": 5752
|
|
},
|
|
{
|
|
"epoch": 0.10989010989010989,
|
|
"grad_norm": 0.5266020359474044,
|
|
"learning_rate": 6.225319396051104e-06,
|
|
"loss": 0.2866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15675979852676392,
|
|
"step": 135,
|
|
"valid_targets_mean": 6341.0,
|
|
"valid_targets_min": 4543
|
|
},
|
|
{
|
|
"epoch": 0.11396011396011396,
|
|
"grad_norm": 0.5048534740315108,
|
|
"learning_rate": 6.45760743321719e-06,
|
|
"loss": 0.2751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15559503436088562,
|
|
"step": 140,
|
|
"valid_targets_mean": 7181.9,
|
|
"valid_targets_min": 4563
|
|
},
|
|
{
|
|
"epoch": 0.11803011803011804,
|
|
"grad_norm": 1.134587905553992,
|
|
"learning_rate": 6.6898954703832765e-06,
|
|
"loss": 0.3357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12442926317453384,
|
|
"step": 145,
|
|
"valid_targets_mean": 1104.4,
|
|
"valid_targets_min": 161
|
|
},
|
|
{
|
|
"epoch": 0.1221001221001221,
|
|
"grad_norm": 0.4270206279409013,
|
|
"learning_rate": 6.922183507549362e-06,
|
|
"loss": 0.32,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13907991349697113,
|
|
"step": 150,
|
|
"valid_targets_mean": 7018.2,
|
|
"valid_targets_min": 5424
|
|
},
|
|
{
|
|
"epoch": 0.12617012617012616,
|
|
"grad_norm": 0.4635086442233948,
|
|
"learning_rate": 7.154471544715448e-06,
|
|
"loss": 0.3187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1630023717880249,
|
|
"step": 155,
|
|
"valid_targets_mean": 7738.4,
|
|
"valid_targets_min": 5329
|
|
},
|
|
{
|
|
"epoch": 0.13024013024013023,
|
|
"grad_norm": 0.5079812404242928,
|
|
"learning_rate": 7.386759581881534e-06,
|
|
"loss": 0.325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1584567427635193,
|
|
"step": 160,
|
|
"valid_targets_mean": 6426.6,
|
|
"valid_targets_min": 5560
|
|
},
|
|
{
|
|
"epoch": 0.1343101343101343,
|
|
"grad_norm": 0.5050284094328902,
|
|
"learning_rate": 7.61904761904762e-06,
|
|
"loss": 0.3163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16165009140968323,
|
|
"step": 165,
|
|
"valid_targets_mean": 6169.9,
|
|
"valid_targets_min": 3446
|
|
},
|
|
{
|
|
"epoch": 0.13838013838013838,
|
|
"grad_norm": 0.498091820817201,
|
|
"learning_rate": 7.851335656213705e-06,
|
|
"loss": 0.301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15471090376377106,
|
|
"step": 170,
|
|
"valid_targets_mean": 7204.6,
|
|
"valid_targets_min": 5381
|
|
},
|
|
{
|
|
"epoch": 0.14245014245014245,
|
|
"grad_norm": 0.48505113847764975,
|
|
"learning_rate": 8.083623693379791e-06,
|
|
"loss": 0.3053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1558820754289627,
|
|
"step": 175,
|
|
"valid_targets_mean": 7600.4,
|
|
"valid_targets_min": 5241
|
|
},
|
|
{
|
|
"epoch": 0.14652014652014653,
|
|
"grad_norm": 0.4895882936126897,
|
|
"learning_rate": 8.315911730545877e-06,
|
|
"loss": 0.2968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16261817514896393,
|
|
"step": 180,
|
|
"valid_targets_mean": 7098.5,
|
|
"valid_targets_min": 5607
|
|
},
|
|
{
|
|
"epoch": 0.1505901505901506,
|
|
"grad_norm": 0.49703616694051983,
|
|
"learning_rate": 8.548199767711964e-06,
|
|
"loss": 0.3019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15736651420593262,
|
|
"step": 185,
|
|
"valid_targets_mean": 6955.1,
|
|
"valid_targets_min": 5530
|
|
},
|
|
{
|
|
"epoch": 0.15466015466015465,
|
|
"grad_norm": 0.47329084552349354,
|
|
"learning_rate": 8.78048780487805e-06,
|
|
"loss": 0.2955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15062673389911652,
|
|
"step": 190,
|
|
"valid_targets_mean": 6748.1,
|
|
"valid_targets_min": 5364
|
|
},
|
|
{
|
|
"epoch": 0.15873015873015872,
|
|
"grad_norm": 0.5304350179179551,
|
|
"learning_rate": 9.012775842044136e-06,
|
|
"loss": 0.299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14595365524291992,
|
|
"step": 195,
|
|
"valid_targets_mean": 6273.9,
|
|
"valid_targets_min": 5438
|
|
},
|
|
{
|
|
"epoch": 0.1628001628001628,
|
|
"grad_norm": 0.4804611842699093,
|
|
"learning_rate": 9.24506387921022e-06,
|
|
"loss": 0.2991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14614039659500122,
|
|
"step": 200,
|
|
"valid_targets_mean": 6419.5,
|
|
"valid_targets_min": 4879
|
|
},
|
|
{
|
|
"epoch": 0.16687016687016687,
|
|
"grad_norm": 1.0372553917536562,
|
|
"learning_rate": 9.477351916376307e-06,
|
|
"loss": 0.2949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1451706886291504,
|
|
"step": 205,
|
|
"valid_targets_mean": 6483.4,
|
|
"valid_targets_min": 4309
|
|
},
|
|
{
|
|
"epoch": 0.17094017094017094,
|
|
"grad_norm": 0.5083493341784381,
|
|
"learning_rate": 9.709639953542393e-06,
|
|
"loss": 0.2928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16406603157520294,
|
|
"step": 210,
|
|
"valid_targets_mean": 6825.0,
|
|
"valid_targets_min": 5205
|
|
},
|
|
{
|
|
"epoch": 0.17501017501017502,
|
|
"grad_norm": 0.46301643664731623,
|
|
"learning_rate": 9.94192799070848e-06,
|
|
"loss": 0.2875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13244540989398956,
|
|
"step": 215,
|
|
"valid_targets_mean": 6729.4,
|
|
"valid_targets_min": 5200
|
|
},
|
|
{
|
|
"epoch": 0.1790801790801791,
|
|
"grad_norm": 0.49215202614656767,
|
|
"learning_rate": 1.0174216027874565e-05,
|
|
"loss": 0.2844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13969576358795166,
|
|
"step": 220,
|
|
"valid_targets_mean": 6094.0,
|
|
"valid_targets_min": 5302
|
|
},
|
|
{
|
|
"epoch": 0.18315018315018314,
|
|
"grad_norm": 0.4871918972499842,
|
|
"learning_rate": 1.0406504065040652e-05,
|
|
"loss": 0.2876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14923205971717834,
|
|
"step": 225,
|
|
"valid_targets_mean": 7140.0,
|
|
"valid_targets_min": 5270
|
|
},
|
|
{
|
|
"epoch": 0.1872201872201872,
|
|
"grad_norm": 0.4471280600208976,
|
|
"learning_rate": 1.0638792102206736e-05,
|
|
"loss": 0.2776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.131822407245636,
|
|
"step": 230,
|
|
"valid_targets_mean": 7425.0,
|
|
"valid_targets_min": 5281
|
|
},
|
|
{
|
|
"epoch": 0.19129019129019129,
|
|
"grad_norm": 1.2605795171969003,
|
|
"learning_rate": 1.0871080139372822e-05,
|
|
"loss": 0.2845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1726950705051422,
|
|
"step": 235,
|
|
"valid_targets_mean": 7063.4,
|
|
"valid_targets_min": 5386
|
|
},
|
|
{
|
|
"epoch": 0.19536019536019536,
|
|
"grad_norm": 0.4932596835670058,
|
|
"learning_rate": 1.1103368176538909e-05,
|
|
"loss": 0.2845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14937910437583923,
|
|
"step": 240,
|
|
"valid_targets_mean": 6827.0,
|
|
"valid_targets_min": 4547
|
|
},
|
|
{
|
|
"epoch": 0.19943019943019943,
|
|
"grad_norm": 0.9167985105196412,
|
|
"learning_rate": 1.1335656213704995e-05,
|
|
"loss": 0.2634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09083765000104904,
|
|
"step": 245,
|
|
"valid_targets_mean": 2047.0,
|
|
"valid_targets_min": 165
|
|
},
|
|
{
|
|
"epoch": 0.2035002035002035,
|
|
"grad_norm": 0.4621000775660505,
|
|
"learning_rate": 1.1567944250871081e-05,
|
|
"loss": 0.2744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1386224627494812,
|
|
"step": 250,
|
|
"valid_targets_mean": 6754.1,
|
|
"valid_targets_min": 5196
|
|
},
|
|
{
|
|
"epoch": 0.20757020757020758,
|
|
"grad_norm": 0.5251600558673276,
|
|
"learning_rate": 1.1800232288037167e-05,
|
|
"loss": 0.2744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13912081718444824,
|
|
"step": 255,
|
|
"valid_targets_mean": 5985.6,
|
|
"valid_targets_min": 5028
|
|
},
|
|
{
|
|
"epoch": 0.21164021164021163,
|
|
"grad_norm": 0.5423901600668142,
|
|
"learning_rate": 1.2032520325203254e-05,
|
|
"loss": 0.2795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12727007269859314,
|
|
"step": 260,
|
|
"valid_targets_mean": 5963.9,
|
|
"valid_targets_min": 5237
|
|
},
|
|
{
|
|
"epoch": 0.2157102157102157,
|
|
"grad_norm": 0.5203028801997557,
|
|
"learning_rate": 1.2264808362369338e-05,
|
|
"loss": 0.274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1433340162038803,
|
|
"step": 265,
|
|
"valid_targets_mean": 6319.0,
|
|
"valid_targets_min": 5412
|
|
},
|
|
{
|
|
"epoch": 0.21978021978021978,
|
|
"grad_norm": 0.5281674096137197,
|
|
"learning_rate": 1.2497096399535424e-05,
|
|
"loss": 0.2597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11158867180347443,
|
|
"step": 270,
|
|
"valid_targets_mean": 7125.8,
|
|
"valid_targets_min": 5080
|
|
},
|
|
{
|
|
"epoch": 0.22385022385022385,
|
|
"grad_norm": 0.6582757970482628,
|
|
"learning_rate": 1.272938443670151e-05,
|
|
"loss": 0.2744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13590899109840393,
|
|
"step": 275,
|
|
"valid_targets_mean": 6123.5,
|
|
"valid_targets_min": 4521
|
|
},
|
|
{
|
|
"epoch": 0.22792022792022792,
|
|
"grad_norm": 2.7614047900823837,
|
|
"learning_rate": 1.2961672473867597e-05,
|
|
"loss": 0.3331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24310331046581268,
|
|
"step": 280,
|
|
"valid_targets_mean": 1886.0,
|
|
"valid_targets_min": 1111
|
|
},
|
|
{
|
|
"epoch": 0.231990231990232,
|
|
"grad_norm": 1.5491066018218123,
|
|
"learning_rate": 1.3193960511033683e-05,
|
|
"loss": 0.4133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20701418817043304,
|
|
"step": 285,
|
|
"valid_targets_mean": 1497.6,
|
|
"valid_targets_min": 893
|
|
},
|
|
{
|
|
"epoch": 0.23606023606023607,
|
|
"grad_norm": 1.1536064750715551,
|
|
"learning_rate": 1.3426248548199769e-05,
|
|
"loss": 0.3633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1264546811580658,
|
|
"step": 290,
|
|
"valid_targets_mean": 1356.0,
|
|
"valid_targets_min": 914
|
|
},
|
|
{
|
|
"epoch": 0.24013024013024012,
|
|
"grad_norm": 1.2647474847869884,
|
|
"learning_rate": 1.3658536585365855e-05,
|
|
"loss": 0.3452,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1455959528684616,
|
|
"step": 295,
|
|
"valid_targets_mean": 1324.5,
|
|
"valid_targets_min": 680
|
|
},
|
|
{
|
|
"epoch": 0.2442002442002442,
|
|
"grad_norm": 0.997749848075399,
|
|
"learning_rate": 1.389082462253194e-05,
|
|
"loss": 0.3337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15397366881370544,
|
|
"step": 300,
|
|
"valid_targets_mean": 1931.8,
|
|
"valid_targets_min": 876
|
|
},
|
|
{
|
|
"epoch": 0.24827024827024827,
|
|
"grad_norm": 1.0630278095878878,
|
|
"learning_rate": 1.4123112659698026e-05,
|
|
"loss": 0.32,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1414937824010849,
|
|
"step": 305,
|
|
"valid_targets_mean": 1292.2,
|
|
"valid_targets_min": 734
|
|
},
|
|
{
|
|
"epoch": 0.2523402523402523,
|
|
"grad_norm": 0.9931657056115516,
|
|
"learning_rate": 1.4355400696864112e-05,
|
|
"loss": 0.3245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1270873248577118,
|
|
"step": 310,
|
|
"valid_targets_mean": 1469.6,
|
|
"valid_targets_min": 655
|
|
},
|
|
{
|
|
"epoch": 0.2564102564102564,
|
|
"grad_norm": 1.1018628472649998,
|
|
"learning_rate": 1.4587688734030199e-05,
|
|
"loss": 0.2936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15461672842502594,
|
|
"step": 315,
|
|
"valid_targets_mean": 1548.4,
|
|
"valid_targets_min": 737
|
|
},
|
|
{
|
|
"epoch": 0.26048026048026046,
|
|
"grad_norm": 1.0914493892414534,
|
|
"learning_rate": 1.4819976771196285e-05,
|
|
"loss": 0.3116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13322024047374725,
|
|
"step": 320,
|
|
"valid_targets_mean": 1316.5,
|
|
"valid_targets_min": 759
|
|
},
|
|
{
|
|
"epoch": 0.26455026455026454,
|
|
"grad_norm": 1.1154554492176114,
|
|
"learning_rate": 1.5052264808362371e-05,
|
|
"loss": 0.2911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11124762892723083,
|
|
"step": 325,
|
|
"valid_targets_mean": 1249.6,
|
|
"valid_targets_min": 753
|
|
},
|
|
{
|
|
"epoch": 0.2686202686202686,
|
|
"grad_norm": 1.056667017250088,
|
|
"learning_rate": 1.528455284552846e-05,
|
|
"loss": 0.2896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18556742370128632,
|
|
"step": 330,
|
|
"valid_targets_mean": 1833.0,
|
|
"valid_targets_min": 927
|
|
},
|
|
{
|
|
"epoch": 0.2726902726902727,
|
|
"grad_norm": 1.004899439380304,
|
|
"learning_rate": 1.5516840882694542e-05,
|
|
"loss": 0.299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14251361787319183,
|
|
"step": 335,
|
|
"valid_targets_mean": 1338.4,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 0.27676027676027676,
|
|
"grad_norm": 1.1790950500190507,
|
|
"learning_rate": 1.5749128919860628e-05,
|
|
"loss": 0.2872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12715528905391693,
|
|
"step": 340,
|
|
"valid_targets_mean": 1244.6,
|
|
"valid_targets_min": 771
|
|
},
|
|
{
|
|
"epoch": 0.28083028083028083,
|
|
"grad_norm": 1.3870604631519712,
|
|
"learning_rate": 1.5981416957026714e-05,
|
|
"loss": 0.2683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12742307782173157,
|
|
"step": 345,
|
|
"valid_targets_mean": 1732.5,
|
|
"valid_targets_min": 986
|
|
},
|
|
{
|
|
"epoch": 0.2849002849002849,
|
|
"grad_norm": 1.0033141410182251,
|
|
"learning_rate": 1.62137049941928e-05,
|
|
"loss": 0.277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12324304133653641,
|
|
"step": 350,
|
|
"valid_targets_mean": 1389.9,
|
|
"valid_targets_min": 1103
|
|
},
|
|
{
|
|
"epoch": 0.288970288970289,
|
|
"grad_norm": 1.0333697812051308,
|
|
"learning_rate": 1.6445993031358887e-05,
|
|
"loss": 0.2801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15279968082904816,
|
|
"step": 355,
|
|
"valid_targets_mean": 1593.2,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 0.29304029304029305,
|
|
"grad_norm": 0.915729241739608,
|
|
"learning_rate": 1.6678281068524973e-05,
|
|
"loss": 0.2777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16900552809238434,
|
|
"step": 360,
|
|
"valid_targets_mean": 1926.6,
|
|
"valid_targets_min": 974
|
|
},
|
|
{
|
|
"epoch": 0.29711029711029713,
|
|
"grad_norm": 1.0882207091700582,
|
|
"learning_rate": 1.691056910569106e-05,
|
|
"loss": 0.2835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1652759164571762,
|
|
"step": 365,
|
|
"valid_targets_mean": 1953.1,
|
|
"valid_targets_min": 869
|
|
},
|
|
{
|
|
"epoch": 0.3011803011803012,
|
|
"grad_norm": 0.9858438574283391,
|
|
"learning_rate": 1.7142857142857142e-05,
|
|
"loss": 0.28,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12480325996875763,
|
|
"step": 370,
|
|
"valid_targets_mean": 1327.2,
|
|
"valid_targets_min": 1023
|
|
},
|
|
{
|
|
"epoch": 0.3052503052503053,
|
|
"grad_norm": 0.9782058898631699,
|
|
"learning_rate": 1.7375145180023228e-05,
|
|
"loss": 0.2822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14944294095039368,
|
|
"step": 375,
|
|
"valid_targets_mean": 1717.9,
|
|
"valid_targets_min": 709
|
|
},
|
|
{
|
|
"epoch": 0.3093203093203093,
|
|
"grad_norm": 1.2128421132256473,
|
|
"learning_rate": 1.7607433217189314e-05,
|
|
"loss": 0.2676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11604733765125275,
|
|
"step": 380,
|
|
"valid_targets_mean": 1139.9,
|
|
"valid_targets_min": 658
|
|
},
|
|
{
|
|
"epoch": 0.31339031339031337,
|
|
"grad_norm": 1.2503504626315993,
|
|
"learning_rate": 1.78397212543554e-05,
|
|
"loss": 0.2778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13267236948013306,
|
|
"step": 385,
|
|
"valid_targets_mean": 1444.8,
|
|
"valid_targets_min": 662
|
|
},
|
|
{
|
|
"epoch": 0.31746031746031744,
|
|
"grad_norm": 1.0828865674968162,
|
|
"learning_rate": 1.807200929152149e-05,
|
|
"loss": 0.2823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09767653793096542,
|
|
"step": 390,
|
|
"valid_targets_mean": 1242.2,
|
|
"valid_targets_min": 859
|
|
},
|
|
{
|
|
"epoch": 0.3215303215303215,
|
|
"grad_norm": 1.0500436494857386,
|
|
"learning_rate": 1.8304297328687576e-05,
|
|
"loss": 0.2551,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15620818734169006,
|
|
"step": 395,
|
|
"valid_targets_mean": 1569.2,
|
|
"valid_targets_min": 950
|
|
},
|
|
{
|
|
"epoch": 0.3256003256003256,
|
|
"grad_norm": 1.2368316222797435,
|
|
"learning_rate": 1.8536585365853663e-05,
|
|
"loss": 0.2673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12553861737251282,
|
|
"step": 400,
|
|
"valid_targets_mean": 1539.8,
|
|
"valid_targets_min": 686
|
|
},
|
|
{
|
|
"epoch": 0.32967032967032966,
|
|
"grad_norm": 0.970316022027174,
|
|
"learning_rate": 1.8768873403019745e-05,
|
|
"loss": 0.2611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14505316317081451,
|
|
"step": 405,
|
|
"valid_targets_mean": 1610.6,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 0.33374033374033374,
|
|
"grad_norm": 1.028981175641749,
|
|
"learning_rate": 1.900116144018583e-05,
|
|
"loss": 0.2553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09692301601171494,
|
|
"step": 410,
|
|
"valid_targets_mean": 1190.4,
|
|
"valid_targets_min": 779
|
|
},
|
|
{
|
|
"epoch": 0.3378103378103378,
|
|
"grad_norm": 1.079548761208398,
|
|
"learning_rate": 1.9233449477351918e-05,
|
|
"loss": 0.2736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1386578381061554,
|
|
"step": 415,
|
|
"valid_targets_mean": 1467.1,
|
|
"valid_targets_min": 822
|
|
},
|
|
{
|
|
"epoch": 0.3418803418803419,
|
|
"grad_norm": 1.0316182713272737,
|
|
"learning_rate": 1.9465737514518004e-05,
|
|
"loss": 0.2507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10993292927742004,
|
|
"step": 420,
|
|
"valid_targets_mean": 1350.5,
|
|
"valid_targets_min": 868
|
|
},
|
|
{
|
|
"epoch": 0.34595034595034596,
|
|
"grad_norm": 1.1022256452208465,
|
|
"learning_rate": 1.969802555168409e-05,
|
|
"loss": 0.2608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10702995955944061,
|
|
"step": 425,
|
|
"valid_targets_mean": 1283.5,
|
|
"valid_targets_min": 781
|
|
},
|
|
{
|
|
"epoch": 0.35002035002035004,
|
|
"grad_norm": 0.9493395584058351,
|
|
"learning_rate": 1.9930313588850176e-05,
|
|
"loss": 0.2614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10873595625162125,
|
|
"step": 430,
|
|
"valid_targets_mean": 1303.2,
|
|
"valid_targets_min": 793
|
|
},
|
|
{
|
|
"epoch": 0.3540903540903541,
|
|
"grad_norm": 1.0733764593758766,
|
|
"learning_rate": 2.016260162601626e-05,
|
|
"loss": 0.2592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11143694072961807,
|
|
"step": 435,
|
|
"valid_targets_mean": 1309.1,
|
|
"valid_targets_min": 806
|
|
},
|
|
{
|
|
"epoch": 0.3581603581603582,
|
|
"grad_norm": 0.9098321906535183,
|
|
"learning_rate": 2.039488966318235e-05,
|
|
"loss": 0.2569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10678255558013916,
|
|
"step": 440,
|
|
"valid_targets_mean": 1397.6,
|
|
"valid_targets_min": 803
|
|
},
|
|
{
|
|
"epoch": 0.36223036223036226,
|
|
"grad_norm": 0.9876007650935522,
|
|
"learning_rate": 2.0627177700348432e-05,
|
|
"loss": 0.2537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09560855478048325,
|
|
"step": 445,
|
|
"valid_targets_mean": 1187.8,
|
|
"valid_targets_min": 660
|
|
},
|
|
{
|
|
"epoch": 0.3663003663003663,
|
|
"grad_norm": 0.9221536800328648,
|
|
"learning_rate": 2.085946573751452e-05,
|
|
"loss": 0.2369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10400258004665375,
|
|
"step": 450,
|
|
"valid_targets_mean": 1547.0,
|
|
"valid_targets_min": 853
|
|
},
|
|
{
|
|
"epoch": 0.37037037037037035,
|
|
"grad_norm": 1.1802888394338766,
|
|
"learning_rate": 2.1091753774680604e-05,
|
|
"loss": 0.2557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13287541270256042,
|
|
"step": 455,
|
|
"valid_targets_mean": 1704.2,
|
|
"valid_targets_min": 719
|
|
},
|
|
{
|
|
"epoch": 0.3744403744403744,
|
|
"grad_norm": 0.9714075439297265,
|
|
"learning_rate": 2.132404181184669e-05,
|
|
"loss": 0.258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09262743592262268,
|
|
"step": 460,
|
|
"valid_targets_mean": 1255.0,
|
|
"valid_targets_min": 765
|
|
},
|
|
{
|
|
"epoch": 0.3785103785103785,
|
|
"grad_norm": 0.9864351204877326,
|
|
"learning_rate": 2.1556329849012777e-05,
|
|
"loss": 0.2523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12507298588752747,
|
|
"step": 465,
|
|
"valid_targets_mean": 1550.8,
|
|
"valid_targets_min": 875
|
|
},
|
|
{
|
|
"epoch": 0.38258038258038257,
|
|
"grad_norm": 1.0261001318904166,
|
|
"learning_rate": 2.1788617886178863e-05,
|
|
"loss": 0.2503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12351306527853012,
|
|
"step": 470,
|
|
"valid_targets_mean": 1362.0,
|
|
"valid_targets_min": 739
|
|
},
|
|
{
|
|
"epoch": 0.38665038665038665,
|
|
"grad_norm": 0.9746834269517334,
|
|
"learning_rate": 2.202090592334495e-05,
|
|
"loss": 0.2597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14458400011062622,
|
|
"step": 475,
|
|
"valid_targets_mean": 1666.8,
|
|
"valid_targets_min": 760
|
|
},
|
|
{
|
|
"epoch": 0.3907203907203907,
|
|
"grad_norm": 1.0332993949409497,
|
|
"learning_rate": 2.2253193960511035e-05,
|
|
"loss": 0.243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10632194578647614,
|
|
"step": 480,
|
|
"valid_targets_mean": 1319.6,
|
|
"valid_targets_min": 1046
|
|
},
|
|
{
|
|
"epoch": 0.3947903947903948,
|
|
"grad_norm": 0.9844297761113842,
|
|
"learning_rate": 2.2485481997677125e-05,
|
|
"loss": 0.2621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14072856307029724,
|
|
"step": 485,
|
|
"valid_targets_mean": 1727.9,
|
|
"valid_targets_min": 851
|
|
},
|
|
{
|
|
"epoch": 0.39886039886039887,
|
|
"grad_norm": 0.9685951678785188,
|
|
"learning_rate": 2.2717770034843208e-05,
|
|
"loss": 0.2609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13506445288658142,
|
|
"step": 490,
|
|
"valid_targets_mean": 1643.2,
|
|
"valid_targets_min": 907
|
|
},
|
|
{
|
|
"epoch": 0.40293040293040294,
|
|
"grad_norm": 1.0189730102784529,
|
|
"learning_rate": 2.295005807200929e-05,
|
|
"loss": 0.2662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11292707920074463,
|
|
"step": 495,
|
|
"valid_targets_mean": 1478.5,
|
|
"valid_targets_min": 764
|
|
},
|
|
{
|
|
"epoch": 0.407000407000407,
|
|
"grad_norm": 0.8788626334372063,
|
|
"learning_rate": 2.318234610917538e-05,
|
|
"loss": 0.2454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11425843089818954,
|
|
"step": 500,
|
|
"valid_targets_mean": 1522.6,
|
|
"valid_targets_min": 822
|
|
},
|
|
{
|
|
"epoch": 0.4110704110704111,
|
|
"grad_norm": 0.966805238471609,
|
|
"learning_rate": 2.3414634146341463e-05,
|
|
"loss": 0.245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12144134938716888,
|
|
"step": 505,
|
|
"valid_targets_mean": 1589.0,
|
|
"valid_targets_min": 982
|
|
},
|
|
{
|
|
"epoch": 0.41514041514041516,
|
|
"grad_norm": 0.9177681985342081,
|
|
"learning_rate": 2.3646922183507553e-05,
|
|
"loss": 0.2566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12525641918182373,
|
|
"step": 510,
|
|
"valid_targets_mean": 1574.8,
|
|
"valid_targets_min": 816
|
|
},
|
|
{
|
|
"epoch": 0.4192104192104192,
|
|
"grad_norm": 1.0766725696353001,
|
|
"learning_rate": 2.3879210220673635e-05,
|
|
"loss": 0.2458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12393151223659515,
|
|
"step": 515,
|
|
"valid_targets_mean": 1422.0,
|
|
"valid_targets_min": 721
|
|
},
|
|
{
|
|
"epoch": 0.42328042328042326,
|
|
"grad_norm": 1.2191089483428443,
|
|
"learning_rate": 2.4111498257839725e-05,
|
|
"loss": 0.2391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09133152663707733,
|
|
"step": 520,
|
|
"valid_targets_mean": 1150.6,
|
|
"valid_targets_min": 741
|
|
},
|
|
{
|
|
"epoch": 0.42735042735042733,
|
|
"grad_norm": 0.9898911682614888,
|
|
"learning_rate": 2.4343786295005808e-05,
|
|
"loss": 0.2475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13438482582569122,
|
|
"step": 525,
|
|
"valid_targets_mean": 1684.2,
|
|
"valid_targets_min": 734
|
|
},
|
|
{
|
|
"epoch": 0.4314204314204314,
|
|
"grad_norm": 1.260093754517711,
|
|
"learning_rate": 2.4576074332171894e-05,
|
|
"loss": 0.2505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14740093052387238,
|
|
"step": 530,
|
|
"valid_targets_mean": 1937.1,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 0.4354904354904355,
|
|
"grad_norm": 0.9354629606274133,
|
|
"learning_rate": 2.480836236933798e-05,
|
|
"loss": 0.2472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13996005058288574,
|
|
"step": 535,
|
|
"valid_targets_mean": 1842.2,
|
|
"valid_targets_min": 620
|
|
},
|
|
{
|
|
"epoch": 0.43956043956043955,
|
|
"grad_norm": 0.9747857839014156,
|
|
"learning_rate": 2.5040650406504066e-05,
|
|
"loss": 0.2407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14165043830871582,
|
|
"step": 540,
|
|
"valid_targets_mean": 1637.6,
|
|
"valid_targets_min": 785
|
|
},
|
|
{
|
|
"epoch": 0.4436304436304436,
|
|
"grad_norm": 0.9131689808991684,
|
|
"learning_rate": 2.5272938443670153e-05,
|
|
"loss": 0.246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12118691951036453,
|
|
"step": 545,
|
|
"valid_targets_mean": 1969.9,
|
|
"valid_targets_min": 740
|
|
},
|
|
{
|
|
"epoch": 0.4477004477004477,
|
|
"grad_norm": 1.119476251101989,
|
|
"learning_rate": 2.550522648083624e-05,
|
|
"loss": 0.2414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11075526475906372,
|
|
"step": 550,
|
|
"valid_targets_mean": 1432.1,
|
|
"valid_targets_min": 711
|
|
},
|
|
{
|
|
"epoch": 0.4517704517704518,
|
|
"grad_norm": 0.9872821443353325,
|
|
"learning_rate": 2.5737514518002325e-05,
|
|
"loss": 0.2427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1334969699382782,
|
|
"step": 555,
|
|
"valid_targets_mean": 1695.6,
|
|
"valid_targets_min": 1122
|
|
},
|
|
{
|
|
"epoch": 0.45584045584045585,
|
|
"grad_norm": 1.1068786822143657,
|
|
"learning_rate": 2.596980255516841e-05,
|
|
"loss": 0.2403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12975133955478668,
|
|
"step": 560,
|
|
"valid_targets_mean": 1524.5,
|
|
"valid_targets_min": 722
|
|
},
|
|
{
|
|
"epoch": 0.4599104599104599,
|
|
"grad_norm": 1.006903711549231,
|
|
"learning_rate": 2.6202090592334494e-05,
|
|
"loss": 0.2268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10383662581443787,
|
|
"step": 565,
|
|
"valid_targets_mean": 1289.2,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 0.463980463980464,
|
|
"grad_norm": 1.0929411309492179,
|
|
"learning_rate": 2.6434378629500584e-05,
|
|
"loss": 0.2331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11088858544826508,
|
|
"step": 570,
|
|
"valid_targets_mean": 1519.5,
|
|
"valid_targets_min": 1011
|
|
},
|
|
{
|
|
"epoch": 0.46805046805046807,
|
|
"grad_norm": 1.038541955016419,
|
|
"learning_rate": 2.6666666666666667e-05,
|
|
"loss": 0.2433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12774452567100525,
|
|
"step": 575,
|
|
"valid_targets_mean": 1731.4,
|
|
"valid_targets_min": 1277
|
|
},
|
|
{
|
|
"epoch": 0.47212047212047215,
|
|
"grad_norm": 1.012572853810221,
|
|
"learning_rate": 2.6898954703832756e-05,
|
|
"loss": 0.2307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12323933839797974,
|
|
"step": 580,
|
|
"valid_targets_mean": 1486.1,
|
|
"valid_targets_min": 807
|
|
},
|
|
{
|
|
"epoch": 0.47619047619047616,
|
|
"grad_norm": 0.9487437622651357,
|
|
"learning_rate": 2.713124274099884e-05,
|
|
"loss": 0.2441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13745994865894318,
|
|
"step": 585,
|
|
"valid_targets_mean": 1549.9,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 0.48026048026048024,
|
|
"grad_norm": 0.9664417051166028,
|
|
"learning_rate": 2.736353077816493e-05,
|
|
"loss": 0.239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11426427215337753,
|
|
"step": 590,
|
|
"valid_targets_mean": 1656.0,
|
|
"valid_targets_min": 1294
|
|
},
|
|
{
|
|
"epoch": 0.4843304843304843,
|
|
"grad_norm": 0.9917889070000425,
|
|
"learning_rate": 2.759581881533101e-05,
|
|
"loss": 0.2403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.108737051486969,
|
|
"step": 595,
|
|
"valid_targets_mean": 1368.8,
|
|
"valid_targets_min": 790
|
|
},
|
|
{
|
|
"epoch": 0.4884004884004884,
|
|
"grad_norm": 0.9927611626798195,
|
|
"learning_rate": 2.7828106852497098e-05,
|
|
"loss": 0.2358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1328611820936203,
|
|
"step": 600,
|
|
"valid_targets_mean": 1765.8,
|
|
"valid_targets_min": 592
|
|
},
|
|
{
|
|
"epoch": 0.49247049247049246,
|
|
"grad_norm": 0.9537846739982789,
|
|
"learning_rate": 2.8060394889663184e-05,
|
|
"loss": 0.2408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12659400701522827,
|
|
"step": 605,
|
|
"valid_targets_mean": 1586.8,
|
|
"valid_targets_min": 662
|
|
},
|
|
{
|
|
"epoch": 0.49654049654049653,
|
|
"grad_norm": 1.0425594316679838,
|
|
"learning_rate": 2.829268292682927e-05,
|
|
"loss": 0.2454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11810390651226044,
|
|
"step": 610,
|
|
"valid_targets_mean": 1336.9,
|
|
"valid_targets_min": 622
|
|
},
|
|
{
|
|
"epoch": 0.5006105006105006,
|
|
"grad_norm": 0.9994638757290513,
|
|
"learning_rate": 2.8524970963995356e-05,
|
|
"loss": 0.2419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07577566802501678,
|
|
"step": 615,
|
|
"valid_targets_mean": 1097.1,
|
|
"valid_targets_min": 625
|
|
},
|
|
{
|
|
"epoch": 0.5046805046805046,
|
|
"grad_norm": 0.9987546354296077,
|
|
"learning_rate": 2.8757259001161443e-05,
|
|
"loss": 0.2543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.095686174929142,
|
|
"step": 620,
|
|
"valid_targets_mean": 1484.9,
|
|
"valid_targets_min": 888
|
|
},
|
|
{
|
|
"epoch": 0.5087505087505088,
|
|
"grad_norm": 0.9799026275534664,
|
|
"learning_rate": 2.898954703832753e-05,
|
|
"loss": 0.2462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12912192940711975,
|
|
"step": 625,
|
|
"valid_targets_mean": 1757.0,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 0.5128205128205128,
|
|
"grad_norm": 1.1131113884891992,
|
|
"learning_rate": 2.9221835075493615e-05,
|
|
"loss": 0.2488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1408751755952835,
|
|
"step": 630,
|
|
"valid_targets_mean": 1476.1,
|
|
"valid_targets_min": 522
|
|
},
|
|
{
|
|
"epoch": 0.5168905168905169,
|
|
"grad_norm": 1.380784458630799,
|
|
"learning_rate": 2.9454123112659698e-05,
|
|
"loss": 0.2346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10641973465681076,
|
|
"step": 635,
|
|
"valid_targets_mean": 1230.9,
|
|
"valid_targets_min": 697
|
|
},
|
|
{
|
|
"epoch": 0.5209605209605209,
|
|
"grad_norm": 0.9088580356736188,
|
|
"learning_rate": 2.9686411149825787e-05,
|
|
"loss": 0.2325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12748828530311584,
|
|
"step": 640,
|
|
"valid_targets_mean": 1671.0,
|
|
"valid_targets_min": 1010
|
|
},
|
|
{
|
|
"epoch": 0.525030525030525,
|
|
"grad_norm": 1.164121895920127,
|
|
"learning_rate": 2.991869918699187e-05,
|
|
"loss": 0.2378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11815395206212997,
|
|
"step": 645,
|
|
"valid_targets_mean": 1411.0,
|
|
"valid_targets_min": 769
|
|
},
|
|
{
|
|
"epoch": 0.5291005291005291,
|
|
"grad_norm": 0.8907723008997975,
|
|
"learning_rate": 3.015098722415796e-05,
|
|
"loss": 0.2399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10028189420700073,
|
|
"step": 650,
|
|
"valid_targets_mean": 1298.6,
|
|
"valid_targets_min": 745
|
|
},
|
|
{
|
|
"epoch": 0.5331705331705332,
|
|
"grad_norm": 1.0360817268903895,
|
|
"learning_rate": 3.0383275261324043e-05,
|
|
"loss": 0.2356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10057306289672852,
|
|
"step": 655,
|
|
"valid_targets_mean": 1118.9,
|
|
"valid_targets_min": 680
|
|
},
|
|
{
|
|
"epoch": 0.5372405372405372,
|
|
"grad_norm": 1.0170055972555234,
|
|
"learning_rate": 3.061556329849013e-05,
|
|
"loss": 0.2286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1395065188407898,
|
|
"step": 660,
|
|
"valid_targets_mean": 1905.9,
|
|
"valid_targets_min": 1347
|
|
},
|
|
{
|
|
"epoch": 0.5413105413105413,
|
|
"grad_norm": 0.9835868888525366,
|
|
"learning_rate": 3.084785133565622e-05,
|
|
"loss": 0.2448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1703340709209442,
|
|
"step": 665,
|
|
"valid_targets_mean": 1636.0,
|
|
"valid_targets_min": 851
|
|
},
|
|
{
|
|
"epoch": 0.5453805453805454,
|
|
"grad_norm": 1.140427074478225,
|
|
"learning_rate": 3.10801393728223e-05,
|
|
"loss": 0.2303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10298797488212585,
|
|
"step": 670,
|
|
"valid_targets_mean": 1457.2,
|
|
"valid_targets_min": 903
|
|
},
|
|
{
|
|
"epoch": 0.5494505494505495,
|
|
"grad_norm": 0.9191202259828434,
|
|
"learning_rate": 3.131242740998839e-05,
|
|
"loss": 0.2275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12798643112182617,
|
|
"step": 675,
|
|
"valid_targets_mean": 1674.4,
|
|
"valid_targets_min": 1240
|
|
},
|
|
{
|
|
"epoch": 0.5535205535205535,
|
|
"grad_norm": 0.7293467596139723,
|
|
"learning_rate": 3.154471544715447e-05,
|
|
"loss": 0.2256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09391629695892334,
|
|
"step": 680,
|
|
"valid_targets_mean": 1697.1,
|
|
"valid_targets_min": 917
|
|
},
|
|
{
|
|
"epoch": 0.5575905575905576,
|
|
"grad_norm": 0.904279770364778,
|
|
"learning_rate": 3.177700348432056e-05,
|
|
"loss": 0.2122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09674836695194244,
|
|
"step": 685,
|
|
"valid_targets_mean": 1580.9,
|
|
"valid_targets_min": 930
|
|
},
|
|
{
|
|
"epoch": 0.5616605616605617,
|
|
"grad_norm": 0.914638142832871,
|
|
"learning_rate": 3.200929152148664e-05,
|
|
"loss": 0.2356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12186380475759506,
|
|
"step": 690,
|
|
"valid_targets_mean": 1655.1,
|
|
"valid_targets_min": 1227
|
|
},
|
|
{
|
|
"epoch": 0.5657305657305657,
|
|
"grad_norm": 0.980002165464352,
|
|
"learning_rate": 3.2241579558652736e-05,
|
|
"loss": 0.2299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11551667749881744,
|
|
"step": 695,
|
|
"valid_targets_mean": 1565.8,
|
|
"valid_targets_min": 870
|
|
},
|
|
{
|
|
"epoch": 0.5698005698005698,
|
|
"grad_norm": 1.0160528027914253,
|
|
"learning_rate": 3.2473867595818815e-05,
|
|
"loss": 0.234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11251623183488846,
|
|
"step": 700,
|
|
"valid_targets_mean": 1564.6,
|
|
"valid_targets_min": 822
|
|
},
|
|
{
|
|
"epoch": 0.5738705738705738,
|
|
"grad_norm": 0.874400875096317,
|
|
"learning_rate": 3.270615563298491e-05,
|
|
"loss": 0.2377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10493709146976471,
|
|
"step": 705,
|
|
"valid_targets_mean": 1540.5,
|
|
"valid_targets_min": 1214
|
|
},
|
|
{
|
|
"epoch": 0.577940577940578,
|
|
"grad_norm": 1.0055632341013279,
|
|
"learning_rate": 3.293844367015099e-05,
|
|
"loss": 0.2264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10630016028881073,
|
|
"step": 710,
|
|
"valid_targets_mean": 1268.6,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 0.582010582010582,
|
|
"grad_norm": 0.9150089746862734,
|
|
"learning_rate": 3.3170731707317074e-05,
|
|
"loss": 0.2416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08800828456878662,
|
|
"step": 715,
|
|
"valid_targets_mean": 1232.5,
|
|
"valid_targets_min": 646
|
|
},
|
|
{
|
|
"epoch": 0.5860805860805861,
|
|
"grad_norm": 1.0300200490056817,
|
|
"learning_rate": 3.340301974448316e-05,
|
|
"loss": 0.2413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12390526384115219,
|
|
"step": 720,
|
|
"valid_targets_mean": 3032.8,
|
|
"valid_targets_min": 2115
|
|
},
|
|
{
|
|
"epoch": 0.5901505901505901,
|
|
"grad_norm": 0.7910194368969308,
|
|
"learning_rate": 3.3635307781649246e-05,
|
|
"loss": 0.2256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10102251172065735,
|
|
"step": 725,
|
|
"valid_targets_mean": 2553.1,
|
|
"valid_targets_min": 324
|
|
},
|
|
{
|
|
"epoch": 0.5942205942205943,
|
|
"grad_norm": 0.6300733315613507,
|
|
"learning_rate": 3.386759581881533e-05,
|
|
"loss": 0.1743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08729104697704315,
|
|
"step": 730,
|
|
"valid_targets_mean": 2212.5,
|
|
"valid_targets_min": 982
|
|
},
|
|
{
|
|
"epoch": 0.5982905982905983,
|
|
"grad_norm": 0.5343519490234325,
|
|
"learning_rate": 3.409988385598142e-05,
|
|
"loss": 0.158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09156970679759979,
|
|
"step": 735,
|
|
"valid_targets_mean": 3619.4,
|
|
"valid_targets_min": 2479
|
|
},
|
|
{
|
|
"epoch": 0.6023606023606024,
|
|
"grad_norm": 0.5149220005414784,
|
|
"learning_rate": 3.4332171893147505e-05,
|
|
"loss": 0.1691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.058362703770399094,
|
|
"step": 740,
|
|
"valid_targets_mean": 3030.5,
|
|
"valid_targets_min": 811
|
|
},
|
|
{
|
|
"epoch": 0.6064306064306064,
|
|
"grad_norm": 0.548518746299089,
|
|
"learning_rate": 3.456445993031359e-05,
|
|
"loss": 0.1633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08780361711978912,
|
|
"step": 745,
|
|
"valid_targets_mean": 2922.1,
|
|
"valid_targets_min": 1431
|
|
},
|
|
{
|
|
"epoch": 0.6105006105006106,
|
|
"grad_norm": 0.9179436846133354,
|
|
"learning_rate": 3.479674796747968e-05,
|
|
"loss": 0.2208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17820069193840027,
|
|
"step": 750,
|
|
"valid_targets_mean": 1991.0,
|
|
"valid_targets_min": 703
|
|
},
|
|
{
|
|
"epoch": 0.6145706145706146,
|
|
"grad_norm": 0.49726406345055274,
|
|
"learning_rate": 3.5029036004645764e-05,
|
|
"loss": 0.1867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0863165408372879,
|
|
"step": 755,
|
|
"valid_targets_mean": 3251.1,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 0.6186406186406186,
|
|
"grad_norm": 0.5932447846887592,
|
|
"learning_rate": 3.526132404181185e-05,
|
|
"loss": 0.1687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08991111069917679,
|
|
"step": 760,
|
|
"valid_targets_mean": 2681.1,
|
|
"valid_targets_min": 846
|
|
},
|
|
{
|
|
"epoch": 0.6227106227106227,
|
|
"grad_norm": 0.6378496859580239,
|
|
"learning_rate": 3.5493612078977936e-05,
|
|
"loss": 0.2089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10263900458812714,
|
|
"step": 765,
|
|
"valid_targets_mean": 3618.6,
|
|
"valid_targets_min": 1369
|
|
},
|
|
{
|
|
"epoch": 0.6267806267806267,
|
|
"grad_norm": 0.48515918366222455,
|
|
"learning_rate": 3.572590011614402e-05,
|
|
"loss": 0.125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07100746035575867,
|
|
"step": 770,
|
|
"valid_targets_mean": 2980.5,
|
|
"valid_targets_min": 754
|
|
},
|
|
{
|
|
"epoch": 0.6308506308506309,
|
|
"grad_norm": 0.5735235317908716,
|
|
"learning_rate": 3.595818815331011e-05,
|
|
"loss": 0.1468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08798329532146454,
|
|
"step": 775,
|
|
"valid_targets_mean": 2610.8,
|
|
"valid_targets_min": 776
|
|
},
|
|
{
|
|
"epoch": 0.6349206349206349,
|
|
"grad_norm": 0.456776675223125,
|
|
"learning_rate": 3.6190476190476195e-05,
|
|
"loss": 0.1449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07948589324951172,
|
|
"step": 780,
|
|
"valid_targets_mean": 3132.1,
|
|
"valid_targets_min": 754
|
|
},
|
|
{
|
|
"epoch": 0.638990638990639,
|
|
"grad_norm": 0.9011326371623981,
|
|
"learning_rate": 3.642276422764228e-05,
|
|
"loss": 0.2117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2592279314994812,
|
|
"step": 785,
|
|
"valid_targets_mean": 2020.8,
|
|
"valid_targets_min": 664
|
|
},
|
|
{
|
|
"epoch": 0.643060643060643,
|
|
"grad_norm": 0.4732929096663725,
|
|
"learning_rate": 3.665505226480837e-05,
|
|
"loss": 0.1367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0800432413816452,
|
|
"step": 790,
|
|
"valid_targets_mean": 3713.8,
|
|
"valid_targets_min": 2542
|
|
},
|
|
{
|
|
"epoch": 0.6471306471306472,
|
|
"grad_norm": 0.526624007045652,
|
|
"learning_rate": 3.688734030197445e-05,
|
|
"loss": 0.1514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09772545099258423,
|
|
"step": 795,
|
|
"valid_targets_mean": 3191.9,
|
|
"valid_targets_min": 1462
|
|
},
|
|
{
|
|
"epoch": 0.6512006512006512,
|
|
"grad_norm": 0.559506972012892,
|
|
"learning_rate": 3.711962833914054e-05,
|
|
"loss": 0.1341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09087392687797546,
|
|
"step": 800,
|
|
"valid_targets_mean": 3721.0,
|
|
"valid_targets_min": 3281
|
|
},
|
|
{
|
|
"epoch": 0.6552706552706553,
|
|
"grad_norm": 0.556479447905555,
|
|
"learning_rate": 3.7351916376306626e-05,
|
|
"loss": 0.1439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07183747738599777,
|
|
"step": 805,
|
|
"valid_targets_mean": 3360.4,
|
|
"valid_targets_min": 2189
|
|
},
|
|
{
|
|
"epoch": 0.6593406593406593,
|
|
"grad_norm": 0.47771197862438014,
|
|
"learning_rate": 3.758420441347271e-05,
|
|
"loss": 0.1509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06540735065937042,
|
|
"step": 810,
|
|
"valid_targets_mean": 3223.9,
|
|
"valid_targets_min": 1590
|
|
},
|
|
{
|
|
"epoch": 0.6634106634106635,
|
|
"grad_norm": 0.6194871689055971,
|
|
"learning_rate": 3.78164924506388e-05,
|
|
"loss": 0.1668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09501364827156067,
|
|
"step": 815,
|
|
"valid_targets_mean": 2352.1,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 0.6674806674806675,
|
|
"grad_norm": 0.5709548511328791,
|
|
"learning_rate": 3.804878048780488e-05,
|
|
"loss": 0.1586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07070401310920715,
|
|
"step": 820,
|
|
"valid_targets_mean": 2612.9,
|
|
"valid_targets_min": 701
|
|
},
|
|
{
|
|
"epoch": 0.6715506715506715,
|
|
"grad_norm": 0.6298911863782369,
|
|
"learning_rate": 3.828106852497097e-05,
|
|
"loss": 0.1973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13569210469722748,
|
|
"step": 825,
|
|
"valid_targets_mean": 2791.8,
|
|
"valid_targets_min": 1652
|
|
},
|
|
{
|
|
"epoch": 0.6756206756206756,
|
|
"grad_norm": 0.41285244882235234,
|
|
"learning_rate": 3.851335656213705e-05,
|
|
"loss": 0.1546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06671032309532166,
|
|
"step": 830,
|
|
"valid_targets_mean": 3704.5,
|
|
"valid_targets_min": 1908
|
|
},
|
|
{
|
|
"epoch": 0.6796906796906796,
|
|
"grad_norm": 0.5540522932015873,
|
|
"learning_rate": 3.874564459930314e-05,
|
|
"loss": 0.1368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05759810283780098,
|
|
"step": 835,
|
|
"valid_targets_mean": 3204.0,
|
|
"valid_targets_min": 842
|
|
},
|
|
{
|
|
"epoch": 0.6837606837606838,
|
|
"grad_norm": 0.5067086140456947,
|
|
"learning_rate": 3.897793263646922e-05,
|
|
"loss": 0.1998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07755052298307419,
|
|
"step": 840,
|
|
"valid_targets_mean": 3402.9,
|
|
"valid_targets_min": 2552
|
|
},
|
|
{
|
|
"epoch": 0.6878306878306878,
|
|
"grad_norm": 0.4997196495634557,
|
|
"learning_rate": 3.9210220673635316e-05,
|
|
"loss": 0.1657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06302007287740707,
|
|
"step": 845,
|
|
"valid_targets_mean": 2979.9,
|
|
"valid_targets_min": 753
|
|
},
|
|
{
|
|
"epoch": 0.6919006919006919,
|
|
"grad_norm": 0.6481848237866301,
|
|
"learning_rate": 3.9442508710801395e-05,
|
|
"loss": 0.1583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08520719408988953,
|
|
"step": 850,
|
|
"valid_targets_mean": 1296.9,
|
|
"valid_targets_min": 568
|
|
},
|
|
{
|
|
"epoch": 0.6959706959706959,
|
|
"grad_norm": 0.6442175252045691,
|
|
"learning_rate": 3.967479674796748e-05,
|
|
"loss": 0.3765,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15752309560775757,
|
|
"step": 855,
|
|
"valid_targets_mean": 3126.9,
|
|
"valid_targets_min": 1070
|
|
},
|
|
{
|
|
"epoch": 0.7000407000407001,
|
|
"grad_norm": 0.6674141205524147,
|
|
"learning_rate": 3.990708478513357e-05,
|
|
"loss": 0.157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07941246777772903,
|
|
"step": 860,
|
|
"valid_targets_mean": 2456.6,
|
|
"valid_targets_min": 329
|
|
},
|
|
{
|
|
"epoch": 0.7041107041107041,
|
|
"grad_norm": 0.9275941441191697,
|
|
"learning_rate": 3.999998518042097e-05,
|
|
"loss": 0.1578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09268821775913239,
|
|
"step": 865,
|
|
"valid_targets_mean": 1027.8,
|
|
"valid_targets_min": 608
|
|
},
|
|
{
|
|
"epoch": 0.7081807081807082,
|
|
"grad_norm": 0.5285806533784231,
|
|
"learning_rate": 3.99998946164064e-05,
|
|
"loss": 0.2591,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09387123584747314,
|
|
"step": 870,
|
|
"valid_targets_mean": 4479.8,
|
|
"valid_targets_min": 2772
|
|
},
|
|
{
|
|
"epoch": 0.7122507122507122,
|
|
"grad_norm": 0.37957034276853,
|
|
"learning_rate": 3.99997217218491e-05,
|
|
"loss": 0.1232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03582998737692833,
|
|
"step": 875,
|
|
"valid_targets_mean": 4843.0,
|
|
"valid_targets_min": 3778
|
|
},
|
|
{
|
|
"epoch": 0.7163207163207164,
|
|
"grad_norm": 0.41394530709191624,
|
|
"learning_rate": 3.999946649746077e-05,
|
|
"loss": 0.1502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07335696369409561,
|
|
"step": 880,
|
|
"valid_targets_mean": 4306.0,
|
|
"valid_targets_min": 2336
|
|
},
|
|
{
|
|
"epoch": 0.7203907203907204,
|
|
"grad_norm": 0.4646047347434619,
|
|
"learning_rate": 3.999912894429209e-05,
|
|
"loss": 0.1473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09124352037906647,
|
|
"step": 885,
|
|
"valid_targets_mean": 3579.5,
|
|
"valid_targets_min": 846
|
|
},
|
|
{
|
|
"epoch": 0.7244607244607245,
|
|
"grad_norm": 0.44725991333845716,
|
|
"learning_rate": 3.999870906373257e-05,
|
|
"loss": 0.136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07628832757472992,
|
|
"step": 890,
|
|
"valid_targets_mean": 2909.1,
|
|
"valid_targets_min": 798
|
|
},
|
|
{
|
|
"epoch": 0.7285307285307285,
|
|
"grad_norm": 0.5172719098407891,
|
|
"learning_rate": 3.999820685751071e-05,
|
|
"loss": 0.1286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.057969748973846436,
|
|
"step": 895,
|
|
"valid_targets_mean": 2008.5,
|
|
"valid_targets_min": 533
|
|
},
|
|
{
|
|
"epoch": 0.7326007326007326,
|
|
"grad_norm": 0.5824331819625881,
|
|
"learning_rate": 3.9997622327693844e-05,
|
|
"loss": 0.1338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06565313041210175,
|
|
"step": 900,
|
|
"valid_targets_mean": 2598.2,
|
|
"valid_targets_min": 833
|
|
},
|
|
{
|
|
"epoch": 0.7366707366707367,
|
|
"grad_norm": 0.3535344374037899,
|
|
"learning_rate": 3.999695547668823e-05,
|
|
"loss": 0.1257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06244448944926262,
|
|
"step": 905,
|
|
"valid_targets_mean": 4031.8,
|
|
"valid_targets_min": 3391
|
|
},
|
|
{
|
|
"epoch": 0.7407407407407407,
|
|
"grad_norm": 0.6239344756497878,
|
|
"learning_rate": 3.9996206307238974e-05,
|
|
"loss": 0.1553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15695582330226898,
|
|
"step": 910,
|
|
"valid_targets_mean": 1909.9,
|
|
"valid_targets_min": 732
|
|
},
|
|
{
|
|
"epoch": 0.7448107448107448,
|
|
"grad_norm": 0.5990548788322103,
|
|
"learning_rate": 3.9995374822430085e-05,
|
|
"loss": 0.1626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06945429742336273,
|
|
"step": 915,
|
|
"valid_targets_mean": 1908.9,
|
|
"valid_targets_min": 538
|
|
},
|
|
{
|
|
"epoch": 0.7488807488807488,
|
|
"grad_norm": 0.7675363094142649,
|
|
"learning_rate": 3.999446102568441e-05,
|
|
"loss": 0.12,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06405394524335861,
|
|
"step": 920,
|
|
"valid_targets_mean": 824.9,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 0.752950752950753,
|
|
"grad_norm": 0.485700948966615,
|
|
"learning_rate": 3.9993464920763625e-05,
|
|
"loss": 0.1248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.059826068580150604,
|
|
"step": 925,
|
|
"valid_targets_mean": 3181.1,
|
|
"valid_targets_min": 2328
|
|
},
|
|
{
|
|
"epoch": 0.757020757020757,
|
|
"grad_norm": 0.5240818086557136,
|
|
"learning_rate": 3.9992386511768256e-05,
|
|
"loss": 0.1205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0472230464220047,
|
|
"step": 930,
|
|
"valid_targets_mean": 2581.4,
|
|
"valid_targets_min": 684
|
|
},
|
|
{
|
|
"epoch": 0.7610907610907611,
|
|
"grad_norm": 0.45360997168418743,
|
|
"learning_rate": 3.999122580313763e-05,
|
|
"loss": 0.1932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06742153316736221,
|
|
"step": 935,
|
|
"valid_targets_mean": 3461.2,
|
|
"valid_targets_min": 2795
|
|
},
|
|
{
|
|
"epoch": 0.7651607651607651,
|
|
"grad_norm": 0.44183845519885645,
|
|
"learning_rate": 3.998998279964985e-05,
|
|
"loss": 0.1417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06665977835655212,
|
|
"step": 940,
|
|
"valid_targets_mean": 3382.6,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 0.7692307692307693,
|
|
"grad_norm": 0.3888042558436287,
|
|
"learning_rate": 3.99886575064218e-05,
|
|
"loss": 0.1605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04930161312222481,
|
|
"step": 945,
|
|
"valid_targets_mean": 3966.6,
|
|
"valid_targets_min": 3279
|
|
},
|
|
{
|
|
"epoch": 0.7733007733007733,
|
|
"grad_norm": 0.781091504085825,
|
|
"learning_rate": 3.9987249928909134e-05,
|
|
"loss": 0.1596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06562864780426025,
|
|
"step": 950,
|
|
"valid_targets_mean": 792.1,
|
|
"valid_targets_min": 536
|
|
},
|
|
{
|
|
"epoch": 0.7773707773707774,
|
|
"grad_norm": 0.5459088120181143,
|
|
"learning_rate": 3.998576007290619e-05,
|
|
"loss": 0.2026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07265473157167435,
|
|
"step": 955,
|
|
"valid_targets_mean": 2227.4,
|
|
"valid_targets_min": 853
|
|
},
|
|
{
|
|
"epoch": 0.7814407814407814,
|
|
"grad_norm": 0.5220107941150061,
|
|
"learning_rate": 3.998418794454604e-05,
|
|
"loss": 0.1365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06951494514942169,
|
|
"step": 960,
|
|
"valid_targets_mean": 3105.6,
|
|
"valid_targets_min": 2621
|
|
},
|
|
{
|
|
"epoch": 0.7855107855107855,
|
|
"grad_norm": 0.8069948423800993,
|
|
"learning_rate": 3.998253355030043e-05,
|
|
"loss": 0.1811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08019239455461502,
|
|
"step": 965,
|
|
"valid_targets_mean": 1028.0,
|
|
"valid_targets_min": 728
|
|
},
|
|
{
|
|
"epoch": 0.7895807895807896,
|
|
"grad_norm": 0.7429706122743682,
|
|
"learning_rate": 3.9980796896979754e-05,
|
|
"loss": 0.1414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08751674741506577,
|
|
"step": 970,
|
|
"valid_targets_mean": 1574.2,
|
|
"valid_targets_min": 618
|
|
},
|
|
{
|
|
"epoch": 0.7936507936507936,
|
|
"grad_norm": 0.39203442846152786,
|
|
"learning_rate": 3.997897799173304e-05,
|
|
"loss": 0.1386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08704735338687897,
|
|
"step": 975,
|
|
"valid_targets_mean": 4135.0,
|
|
"valid_targets_min": 907
|
|
},
|
|
{
|
|
"epoch": 0.7977207977207977,
|
|
"grad_norm": 0.5960348844477907,
|
|
"learning_rate": 3.9977076842047875e-05,
|
|
"loss": 0.1466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11054828017950058,
|
|
"step": 980,
|
|
"valid_targets_mean": 3060.8,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 0.8017908017908018,
|
|
"grad_norm": 0.4111968138383219,
|
|
"learning_rate": 3.997509345575045e-05,
|
|
"loss": 0.1165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05692315846681595,
|
|
"step": 985,
|
|
"valid_targets_mean": 3413.8,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 0.8058608058608059,
|
|
"grad_norm": 0.4256293187737857,
|
|
"learning_rate": 3.997302784100548e-05,
|
|
"loss": 0.1143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07128673046827316,
|
|
"step": 990,
|
|
"valid_targets_mean": 4560.9,
|
|
"valid_targets_min": 2613
|
|
},
|
|
{
|
|
"epoch": 0.8099308099308099,
|
|
"grad_norm": 0.5345662568540609,
|
|
"learning_rate": 3.9970880006316154e-05,
|
|
"loss": 0.13,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08130151033401489,
|
|
"step": 995,
|
|
"valid_targets_mean": 2847.2,
|
|
"valid_targets_min": 531
|
|
},
|
|
{
|
|
"epoch": 0.814000814000814,
|
|
"grad_norm": 0.4593774947539876,
|
|
"learning_rate": 3.996864996052416e-05,
|
|
"loss": 0.1566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06226513907313347,
|
|
"step": 1000,
|
|
"valid_targets_mean": 2571.5,
|
|
"valid_targets_min": 826
|
|
},
|
|
{
|
|
"epoch": 0.818070818070818,
|
|
"grad_norm": 0.4128348658429294,
|
|
"learning_rate": 3.996633771280956e-05,
|
|
"loss": 0.1443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05955663323402405,
|
|
"step": 1005,
|
|
"valid_targets_mean": 3709.6,
|
|
"valid_targets_min": 3024
|
|
},
|
|
{
|
|
"epoch": 0.8221408221408222,
|
|
"grad_norm": 0.4342247430230778,
|
|
"learning_rate": 3.996394327269085e-05,
|
|
"loss": 0.1308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07043300569057465,
|
|
"step": 1010,
|
|
"valid_targets_mean": 3559.0,
|
|
"valid_targets_min": 2588
|
|
},
|
|
{
|
|
"epoch": 0.8262108262108262,
|
|
"grad_norm": 0.6288265395421371,
|
|
"learning_rate": 3.996146665002486e-05,
|
|
"loss": 0.1342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08794619888067245,
|
|
"step": 1015,
|
|
"valid_targets_mean": 913.9,
|
|
"valid_targets_min": 502
|
|
},
|
|
{
|
|
"epoch": 0.8302808302808303,
|
|
"grad_norm": 0.42302660971720984,
|
|
"learning_rate": 3.995890785500673e-05,
|
|
"loss": 0.1468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.061350464820861816,
|
|
"step": 1020,
|
|
"valid_targets_mean": 3301.9,
|
|
"valid_targets_min": 1158
|
|
},
|
|
{
|
|
"epoch": 0.8343508343508343,
|
|
"grad_norm": 0.3202899133836535,
|
|
"learning_rate": 3.995626689816986e-05,
|
|
"loss": 0.1109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05292180925607681,
|
|
"step": 1025,
|
|
"valid_targets_mean": 3874.9,
|
|
"valid_targets_min": 3044
|
|
},
|
|
{
|
|
"epoch": 0.8384208384208384,
|
|
"grad_norm": 0.4395087698128267,
|
|
"learning_rate": 3.9953543790385885e-05,
|
|
"loss": 0.1466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06124342978000641,
|
|
"step": 1030,
|
|
"valid_targets_mean": 2167.8,
|
|
"valid_targets_min": 539
|
|
},
|
|
{
|
|
"epoch": 0.8424908424908425,
|
|
"grad_norm": 0.45572335142795467,
|
|
"learning_rate": 3.99507385428646e-05,
|
|
"loss": 0.1409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0583365373313427,
|
|
"step": 1035,
|
|
"valid_targets_mean": 2397.0,
|
|
"valid_targets_min": 387
|
|
},
|
|
{
|
|
"epoch": 0.8465608465608465,
|
|
"grad_norm": 0.6042755009543974,
|
|
"learning_rate": 3.994785116715395e-05,
|
|
"loss": 0.1498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08790750801563263,
|
|
"step": 1040,
|
|
"valid_targets_mean": 2340.9,
|
|
"valid_targets_min": 872
|
|
},
|
|
{
|
|
"epoch": 0.8506308506308506,
|
|
"grad_norm": 0.4360163793065081,
|
|
"learning_rate": 3.9944881675139956e-05,
|
|
"loss": 0.1384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.052534107118844986,
|
|
"step": 1045,
|
|
"valid_targets_mean": 3017.2,
|
|
"valid_targets_min": 822
|
|
},
|
|
{
|
|
"epoch": 0.8547008547008547,
|
|
"grad_norm": 0.7915374414247746,
|
|
"learning_rate": 3.9941830079046686e-05,
|
|
"loss": 0.149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10059764981269836,
|
|
"step": 1050,
|
|
"valid_targets_mean": 1405.9,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 0.8587708587708588,
|
|
"grad_norm": 0.5768502808481547,
|
|
"learning_rate": 3.9938696391436165e-05,
|
|
"loss": 0.1304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08434294909238815,
|
|
"step": 1055,
|
|
"valid_targets_mean": 2249.2,
|
|
"valid_targets_min": 808
|
|
},
|
|
{
|
|
"epoch": 0.8628408628408628,
|
|
"grad_norm": 0.44611255063891225,
|
|
"learning_rate": 3.993548062520839e-05,
|
|
"loss": 0.1566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06437411904335022,
|
|
"step": 1060,
|
|
"valid_targets_mean": 2900.5,
|
|
"valid_targets_min": 1010
|
|
},
|
|
{
|
|
"epoch": 0.8669108669108669,
|
|
"grad_norm": 0.40352294092211805,
|
|
"learning_rate": 3.9932182793601216e-05,
|
|
"loss": 0.1292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.047206662595272064,
|
|
"step": 1065,
|
|
"valid_targets_mean": 2432.6,
|
|
"valid_targets_min": 524
|
|
},
|
|
{
|
|
"epoch": 0.870980870980871,
|
|
"grad_norm": 0.48655652159394597,
|
|
"learning_rate": 3.992880291019032e-05,
|
|
"loss": 0.1236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04721086844801903,
|
|
"step": 1070,
|
|
"valid_targets_mean": 1303.5,
|
|
"valid_targets_min": 516
|
|
},
|
|
{
|
|
"epoch": 0.8750508750508751,
|
|
"grad_norm": 0.5368186314445392,
|
|
"learning_rate": 3.992534098888916e-05,
|
|
"loss": 0.1446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06309214979410172,
|
|
"step": 1075,
|
|
"valid_targets_mean": 1762.1,
|
|
"valid_targets_min": 342
|
|
},
|
|
{
|
|
"epoch": 0.8791208791208791,
|
|
"grad_norm": 0.3615301144651535,
|
|
"learning_rate": 3.992179704394891e-05,
|
|
"loss": 0.1391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.061992619186639786,
|
|
"step": 1080,
|
|
"valid_targets_mean": 3301.8,
|
|
"valid_targets_min": 1440
|
|
},
|
|
{
|
|
"epoch": 0.8831908831908832,
|
|
"grad_norm": 0.47684964360761173,
|
|
"learning_rate": 3.991817108995838e-05,
|
|
"loss": 0.1145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.053332049399614334,
|
|
"step": 1085,
|
|
"valid_targets_mean": 2274.2,
|
|
"valid_targets_min": 708
|
|
},
|
|
{
|
|
"epoch": 0.8872608872608873,
|
|
"grad_norm": 0.3277156842140217,
|
|
"learning_rate": 3.9914463141844e-05,
|
|
"loss": 0.1241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.058171793818473816,
|
|
"step": 1090,
|
|
"valid_targets_mean": 5103.1,
|
|
"valid_targets_min": 3653
|
|
},
|
|
{
|
|
"epoch": 0.8913308913308914,
|
|
"grad_norm": 0.37913532995319227,
|
|
"learning_rate": 3.991067321486971e-05,
|
|
"loss": 0.1207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04157901927828789,
|
|
"step": 1095,
|
|
"valid_targets_mean": 2366.5,
|
|
"valid_targets_min": 684
|
|
},
|
|
{
|
|
"epoch": 0.8954008954008954,
|
|
"grad_norm": 0.44728273843652605,
|
|
"learning_rate": 3.990680132463694e-05,
|
|
"loss": 0.1065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06624357402324677,
|
|
"step": 1100,
|
|
"valid_targets_mean": 3352.1,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 0.8994708994708994,
|
|
"grad_norm": 0.3699276991369879,
|
|
"learning_rate": 3.99028474870845e-05,
|
|
"loss": 0.1372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06432875990867615,
|
|
"step": 1105,
|
|
"valid_targets_mean": 4009.6,
|
|
"valid_targets_min": 3076
|
|
},
|
|
{
|
|
"epoch": 0.9035409035409036,
|
|
"grad_norm": 0.559438917402308,
|
|
"learning_rate": 3.989881171848857e-05,
|
|
"loss": 0.1463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19042916595935822,
|
|
"step": 1110,
|
|
"valid_targets_mean": 2811.9,
|
|
"valid_targets_min": 1536
|
|
},
|
|
{
|
|
"epoch": 0.9076109076109076,
|
|
"grad_norm": 0.4455049575824879,
|
|
"learning_rate": 3.989469403546258e-05,
|
|
"loss": 0.1275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07045942544937134,
|
|
"step": 1115,
|
|
"valid_targets_mean": 2669.4,
|
|
"valid_targets_min": 582
|
|
},
|
|
{
|
|
"epoch": 0.9116809116809117,
|
|
"grad_norm": 0.35606075856961944,
|
|
"learning_rate": 3.989049445495718e-05,
|
|
"loss": 0.1334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05114404112100601,
|
|
"step": 1120,
|
|
"valid_targets_mean": 3261.9,
|
|
"valid_targets_min": 633
|
|
},
|
|
{
|
|
"epoch": 0.9157509157509157,
|
|
"grad_norm": 0.37757857329286704,
|
|
"learning_rate": 3.9886212994260146e-05,
|
|
"loss": 0.1585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04709208756685257,
|
|
"step": 1125,
|
|
"valid_targets_mean": 3287.2,
|
|
"valid_targets_min": 1130
|
|
},
|
|
{
|
|
"epoch": 0.9198209198209198,
|
|
"grad_norm": 0.40732084354821907,
|
|
"learning_rate": 3.98818496709963e-05,
|
|
"loss": 0.1207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06523814797401428,
|
|
"step": 1130,
|
|
"valid_targets_mean": 2858.9,
|
|
"valid_targets_min": 492
|
|
},
|
|
{
|
|
"epoch": 0.9238909238909239,
|
|
"grad_norm": 0.7103621320340062,
|
|
"learning_rate": 3.987740450312751e-05,
|
|
"loss": 0.1378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08393198251724243,
|
|
"step": 1135,
|
|
"valid_targets_mean": 2531.5,
|
|
"valid_targets_min": 1045
|
|
},
|
|
{
|
|
"epoch": 0.927960927960928,
|
|
"grad_norm": 0.38515151498760647,
|
|
"learning_rate": 3.9872877508952506e-05,
|
|
"loss": 0.1224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04183115065097809,
|
|
"step": 1140,
|
|
"valid_targets_mean": 2692.0,
|
|
"valid_targets_min": 647
|
|
},
|
|
{
|
|
"epoch": 0.932030932030932,
|
|
"grad_norm": 0.6540079458171796,
|
|
"learning_rate": 3.9868268707106884e-05,
|
|
"loss": 0.1961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13763931393623352,
|
|
"step": 1145,
|
|
"valid_targets_mean": 1831.5,
|
|
"valid_targets_min": 806
|
|
},
|
|
{
|
|
"epoch": 0.9361009361009361,
|
|
"grad_norm": 0.6012472482945602,
|
|
"learning_rate": 3.9863578116563e-05,
|
|
"loss": 0.1604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06246567517518997,
|
|
"step": 1150,
|
|
"valid_targets_mean": 1300.1,
|
|
"valid_targets_min": 575
|
|
},
|
|
{
|
|
"epoch": 0.9401709401709402,
|
|
"grad_norm": 0.44241303636120555,
|
|
"learning_rate": 3.9858805756629906e-05,
|
|
"loss": 0.1246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.057974182069301605,
|
|
"step": 1155,
|
|
"valid_targets_mean": 3615.9,
|
|
"valid_targets_min": 3193
|
|
},
|
|
{
|
|
"epoch": 0.9442409442409443,
|
|
"grad_norm": 0.3903708665427787,
|
|
"learning_rate": 3.985395164695324e-05,
|
|
"loss": 0.1287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05488427355885506,
|
|
"step": 1160,
|
|
"valid_targets_mean": 3944.1,
|
|
"valid_targets_min": 3433
|
|
},
|
|
{
|
|
"epoch": 0.9483109483109483,
|
|
"grad_norm": 0.39685042835856477,
|
|
"learning_rate": 3.98490158075152e-05,
|
|
"loss": 0.1271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05394378677010536,
|
|
"step": 1165,
|
|
"valid_targets_mean": 2874.5,
|
|
"valid_targets_min": 767
|
|
},
|
|
{
|
|
"epoch": 0.9523809523809523,
|
|
"grad_norm": 0.4929139704427634,
|
|
"learning_rate": 3.9843998258634397e-05,
|
|
"loss": 0.119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0530795156955719,
|
|
"step": 1170,
|
|
"valid_targets_mean": 2262.1,
|
|
"valid_targets_min": 785
|
|
},
|
|
{
|
|
"epoch": 0.9564509564509565,
|
|
"grad_norm": 0.7315882847416906,
|
|
"learning_rate": 3.983889902096582e-05,
|
|
"loss": 0.1631,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08292382210493088,
|
|
"step": 1175,
|
|
"valid_targets_mean": 1222.8,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 0.9605209605209605,
|
|
"grad_norm": 0.4497078672225934,
|
|
"learning_rate": 3.9833718115500735e-05,
|
|
"loss": 0.1105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.057786524295806885,
|
|
"step": 1180,
|
|
"valid_targets_mean": 2184.0,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 0.9645909645909646,
|
|
"grad_norm": 0.37472458302150724,
|
|
"learning_rate": 3.9828455563566585e-05,
|
|
"loss": 0.1212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.058492764830589294,
|
|
"step": 1185,
|
|
"valid_targets_mean": 3445.0,
|
|
"valid_targets_min": 1606
|
|
},
|
|
{
|
|
"epoch": 0.9686609686609686,
|
|
"grad_norm": 0.49178605406882114,
|
|
"learning_rate": 3.982311138682693e-05,
|
|
"loss": 0.1299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07257603108882904,
|
|
"step": 1190,
|
|
"valid_targets_mean": 2760.0,
|
|
"valid_targets_min": 678
|
|
},
|
|
{
|
|
"epoch": 0.9727309727309728,
|
|
"grad_norm": 0.30926831647178493,
|
|
"learning_rate": 3.981768560728132e-05,
|
|
"loss": 0.1272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06540581583976746,
|
|
"step": 1195,
|
|
"valid_targets_mean": 5550.0,
|
|
"valid_targets_min": 3997
|
|
},
|
|
{
|
|
"epoch": 0.9768009768009768,
|
|
"grad_norm": 0.3480919181704012,
|
|
"learning_rate": 3.981217824726525e-05,
|
|
"loss": 0.113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.035894401371479034,
|
|
"step": 1200,
|
|
"valid_targets_mean": 2142.1,
|
|
"valid_targets_min": 706
|
|
},
|
|
{
|
|
"epoch": 0.9808709808709809,
|
|
"grad_norm": 0.6953856332846773,
|
|
"learning_rate": 3.9806589329450045e-05,
|
|
"loss": 0.1299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09473567456007004,
|
|
"step": 1205,
|
|
"valid_targets_mean": 1705.0,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 0.9849409849409849,
|
|
"grad_norm": 0.6447718942126139,
|
|
"learning_rate": 3.980091887684274e-05,
|
|
"loss": 0.1484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11416690051555634,
|
|
"step": 1210,
|
|
"valid_targets_mean": 2792.0,
|
|
"valid_targets_min": 2379
|
|
},
|
|
{
|
|
"epoch": 0.989010989010989,
|
|
"grad_norm": 0.44493565718387146,
|
|
"learning_rate": 3.979516691278605e-05,
|
|
"loss": 0.2021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05794357508420944,
|
|
"step": 1215,
|
|
"valid_targets_mean": 3489.0,
|
|
"valid_targets_min": 2861
|
|
},
|
|
{
|
|
"epoch": 0.9930809930809931,
|
|
"grad_norm": 0.3554690075057992,
|
|
"learning_rate": 3.9789333460958195e-05,
|
|
"loss": 0.1248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06033288687467575,
|
|
"step": 1220,
|
|
"valid_targets_mean": 3850.4,
|
|
"valid_targets_min": 2453
|
|
},
|
|
{
|
|
"epoch": 0.9971509971509972,
|
|
"grad_norm": 0.39798751935774657,
|
|
"learning_rate": 3.978341854537288e-05,
|
|
"loss": 0.1202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06392422318458557,
|
|
"step": 1225,
|
|
"valid_targets_mean": 3932.9,
|
|
"valid_targets_min": 3389
|
|
},
|
|
{
|
|
"epoch": 1.0008140008140007,
|
|
"grad_norm": 0.5953058538336475,
|
|
"learning_rate": 3.977742219037914e-05,
|
|
"loss": 0.1898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14234861731529236,
|
|
"step": 1230,
|
|
"valid_targets_mean": 8215.2,
|
|
"valid_targets_min": 6163
|
|
},
|
|
{
|
|
"epoch": 1.0048840048840049,
|
|
"grad_norm": 0.4684299696968115,
|
|
"learning_rate": 3.9771344420661265e-05,
|
|
"loss": 0.2882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.144585520029068,
|
|
"step": 1235,
|
|
"valid_targets_mean": 8594.4,
|
|
"valid_targets_min": 6006
|
|
},
|
|
{
|
|
"epoch": 1.008954008954009,
|
|
"grad_norm": 0.6867925767436964,
|
|
"learning_rate": 3.9765185261238685e-05,
|
|
"loss": 0.28,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1256946623325348,
|
|
"step": 1240,
|
|
"valid_targets_mean": 3578.4,
|
|
"valid_targets_min": 289
|
|
},
|
|
{
|
|
"epoch": 1.0130240130240131,
|
|
"grad_norm": 0.42999406091039816,
|
|
"learning_rate": 3.9758944737465885e-05,
|
|
"loss": 0.2688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13307125866413116,
|
|
"step": 1245,
|
|
"valid_targets_mean": 7019.9,
|
|
"valid_targets_min": 4985
|
|
},
|
|
{
|
|
"epoch": 1.017094017094017,
|
|
"grad_norm": 0.43330214352879454,
|
|
"learning_rate": 3.9752622875032275e-05,
|
|
"loss": 0.2461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12875577807426453,
|
|
"step": 1250,
|
|
"valid_targets_mean": 6956.6,
|
|
"valid_targets_min": 1457
|
|
},
|
|
{
|
|
"epoch": 1.0211640211640212,
|
|
"grad_norm": 0.4137992292045322,
|
|
"learning_rate": 3.974621969996213e-05,
|
|
"loss": 0.253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12030914425849915,
|
|
"step": 1255,
|
|
"valid_targets_mean": 7361.6,
|
|
"valid_targets_min": 4961
|
|
},
|
|
{
|
|
"epoch": 1.0252340252340253,
|
|
"grad_norm": 0.5022158686145597,
|
|
"learning_rate": 3.973973523861442e-05,
|
|
"loss": 0.269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15889611840248108,
|
|
"step": 1260,
|
|
"valid_targets_mean": 7058.5,
|
|
"valid_targets_min": 5985
|
|
},
|
|
{
|
|
"epoch": 1.0293040293040292,
|
|
"grad_norm": 0.39049657893691275,
|
|
"learning_rate": 3.973316951768275e-05,
|
|
"loss": 0.2651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14278875291347504,
|
|
"step": 1265,
|
|
"valid_targets_mean": 7664.2,
|
|
"valid_targets_min": 5169
|
|
},
|
|
{
|
|
"epoch": 1.0333740333740333,
|
|
"grad_norm": 0.42504045855242684,
|
|
"learning_rate": 3.972652256419522e-05,
|
|
"loss": 0.2518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12719795107841492,
|
|
"step": 1270,
|
|
"valid_targets_mean": 7111.8,
|
|
"valid_targets_min": 5348
|
|
},
|
|
{
|
|
"epoch": 1.0374440374440375,
|
|
"grad_norm": 0.4952882040537086,
|
|
"learning_rate": 3.971979440551436e-05,
|
|
"loss": 0.2482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11728663742542267,
|
|
"step": 1275,
|
|
"valid_targets_mean": 4238.9,
|
|
"valid_targets_min": 152
|
|
},
|
|
{
|
|
"epoch": 1.0415140415140416,
|
|
"grad_norm": 0.4181601879292703,
|
|
"learning_rate": 3.9712985069336955e-05,
|
|
"loss": 0.2433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12817908823490143,
|
|
"step": 1280,
|
|
"valid_targets_mean": 7199.4,
|
|
"valid_targets_min": 4258
|
|
},
|
|
{
|
|
"epoch": 1.0455840455840455,
|
|
"grad_norm": 0.48722985192181184,
|
|
"learning_rate": 3.9706094583693954e-05,
|
|
"loss": 0.2459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1204463317990303,
|
|
"step": 1285,
|
|
"valid_targets_mean": 7243.4,
|
|
"valid_targets_min": 5831
|
|
},
|
|
{
|
|
"epoch": 1.0496540496540496,
|
|
"grad_norm": 0.4108207703430436,
|
|
"learning_rate": 3.96991229769504e-05,
|
|
"loss": 0.2421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1202164739370346,
|
|
"step": 1290,
|
|
"valid_targets_mean": 6950.6,
|
|
"valid_targets_min": 3862
|
|
},
|
|
{
|
|
"epoch": 1.0537240537240538,
|
|
"grad_norm": 0.40991192760617895,
|
|
"learning_rate": 3.969207027780524e-05,
|
|
"loss": 0.239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1293250173330307,
|
|
"step": 1295,
|
|
"valid_targets_mean": 6991.9,
|
|
"valid_targets_min": 5696
|
|
},
|
|
{
|
|
"epoch": 1.0577940577940579,
|
|
"grad_norm": 0.42826333297089153,
|
|
"learning_rate": 3.968493651529126e-05,
|
|
"loss": 0.2323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1293313354253769,
|
|
"step": 1300,
|
|
"valid_targets_mean": 6963.5,
|
|
"valid_targets_min": 4947
|
|
},
|
|
{
|
|
"epoch": 1.0618640618640618,
|
|
"grad_norm": 0.39443414280580313,
|
|
"learning_rate": 3.967772171877494e-05,
|
|
"loss": 0.2361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11275345087051392,
|
|
"step": 1305,
|
|
"valid_targets_mean": 6436.9,
|
|
"valid_targets_min": 4988
|
|
},
|
|
{
|
|
"epoch": 1.065934065934066,
|
|
"grad_norm": 0.4373139454687315,
|
|
"learning_rate": 3.967042591795634e-05,
|
|
"loss": 0.2394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11951905488967896,
|
|
"step": 1310,
|
|
"valid_targets_mean": 7518.0,
|
|
"valid_targets_min": 5476
|
|
},
|
|
{
|
|
"epoch": 1.07000407000407,
|
|
"grad_norm": 0.38657017917234543,
|
|
"learning_rate": 3.966304914286898e-05,
|
|
"loss": 0.2313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10981670022010803,
|
|
"step": 1315,
|
|
"valid_targets_mean": 7422.6,
|
|
"valid_targets_min": 5226
|
|
},
|
|
{
|
|
"epoch": 1.074074074074074,
|
|
"grad_norm": 0.46603622908971615,
|
|
"learning_rate": 3.965559142387972e-05,
|
|
"loss": 0.2526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15436352789402008,
|
|
"step": 1320,
|
|
"valid_targets_mean": 7080.0,
|
|
"valid_targets_min": 3753
|
|
},
|
|
{
|
|
"epoch": 1.078144078144078,
|
|
"grad_norm": 0.5089544183960755,
|
|
"learning_rate": 3.964805279168862e-05,
|
|
"loss": 0.2634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13544388115406036,
|
|
"step": 1325,
|
|
"valid_targets_mean": 6064.6,
|
|
"valid_targets_min": 5161
|
|
},
|
|
{
|
|
"epoch": 1.0822140822140822,
|
|
"grad_norm": 0.4213050484999416,
|
|
"learning_rate": 3.9640433277328824e-05,
|
|
"loss": 0.2211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11462973058223724,
|
|
"step": 1330,
|
|
"valid_targets_mean": 7388.1,
|
|
"valid_targets_min": 5795
|
|
},
|
|
{
|
|
"epoch": 1.0862840862840863,
|
|
"grad_norm": 0.3819112265649167,
|
|
"learning_rate": 3.963273291216645e-05,
|
|
"loss": 0.2253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10795051604509354,
|
|
"step": 1335,
|
|
"valid_targets_mean": 7696.0,
|
|
"valid_targets_min": 4658
|
|
},
|
|
{
|
|
"epoch": 1.0903540903540903,
|
|
"grad_norm": 0.4114432948340316,
|
|
"learning_rate": 3.9624951727900406e-05,
|
|
"loss": 0.2316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12693330645561218,
|
|
"step": 1340,
|
|
"valid_targets_mean": 7799.1,
|
|
"valid_targets_min": 5508
|
|
},
|
|
{
|
|
"epoch": 1.0944240944240944,
|
|
"grad_norm": 0.4056233219924477,
|
|
"learning_rate": 3.9617089756562334e-05,
|
|
"loss": 0.2435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11809612810611725,
|
|
"step": 1345,
|
|
"valid_targets_mean": 7341.6,
|
|
"valid_targets_min": 6454
|
|
},
|
|
{
|
|
"epoch": 1.0984940984940985,
|
|
"grad_norm": 0.4188231753878829,
|
|
"learning_rate": 3.960914703051642e-05,
|
|
"loss": 0.234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10444575548171997,
|
|
"step": 1350,
|
|
"valid_targets_mean": 6818.0,
|
|
"valid_targets_min": 4392
|
|
},
|
|
{
|
|
"epoch": 1.1025641025641026,
|
|
"grad_norm": 0.3886197018701831,
|
|
"learning_rate": 3.960112358245927e-05,
|
|
"loss": 0.2331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10350356996059418,
|
|
"step": 1355,
|
|
"valid_targets_mean": 7857.6,
|
|
"valid_targets_min": 5394
|
|
},
|
|
{
|
|
"epoch": 1.1066341066341066,
|
|
"grad_norm": 0.35882023777430566,
|
|
"learning_rate": 3.9593019445419814e-05,
|
|
"loss": 0.205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09432516247034073,
|
|
"step": 1360,
|
|
"valid_targets_mean": 8314.1,
|
|
"valid_targets_min": 4837
|
|
},
|
|
{
|
|
"epoch": 1.1107041107041107,
|
|
"grad_norm": 0.3392184288683715,
|
|
"learning_rate": 3.9584834652759124e-05,
|
|
"loss": 0.2068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08807634562253952,
|
|
"step": 1365,
|
|
"valid_targets_mean": 7368.2,
|
|
"valid_targets_min": 6027
|
|
},
|
|
{
|
|
"epoch": 1.1147741147741148,
|
|
"grad_norm": 0.4323967342653223,
|
|
"learning_rate": 3.95765692381703e-05,
|
|
"loss": 0.2166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11778441816568375,
|
|
"step": 1370,
|
|
"valid_targets_mean": 6435.0,
|
|
"valid_targets_min": 4238
|
|
},
|
|
{
|
|
"epoch": 1.118844118844119,
|
|
"grad_norm": 1.7395054695099226,
|
|
"learning_rate": 3.956822323567832e-05,
|
|
"loss": 0.2381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10033954679965973,
|
|
"step": 1375,
|
|
"valid_targets_mean": 247.8,
|
|
"valid_targets_min": 137
|
|
},
|
|
{
|
|
"epoch": 1.1229141229141228,
|
|
"grad_norm": 0.42995283027411474,
|
|
"learning_rate": 3.9559796679639914e-05,
|
|
"loss": 0.2437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11196690052747726,
|
|
"step": 1380,
|
|
"valid_targets_mean": 6807.0,
|
|
"valid_targets_min": 5165
|
|
},
|
|
{
|
|
"epoch": 1.126984126984127,
|
|
"grad_norm": 0.4579494248210466,
|
|
"learning_rate": 3.955128960474341e-05,
|
|
"loss": 0.2476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1293865293264389,
|
|
"step": 1385,
|
|
"valid_targets_mean": 6989.9,
|
|
"valid_targets_min": 5104
|
|
},
|
|
{
|
|
"epoch": 1.131054131054131,
|
|
"grad_norm": 0.41699725958422584,
|
|
"learning_rate": 3.95427020460086e-05,
|
|
"loss": 0.2457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13233298063278198,
|
|
"step": 1390,
|
|
"valid_targets_mean": 6692.2,
|
|
"valid_targets_min": 5068
|
|
},
|
|
{
|
|
"epoch": 1.1351241351241352,
|
|
"grad_norm": 0.4309390492887969,
|
|
"learning_rate": 3.953403403878659e-05,
|
|
"loss": 0.2386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1428421139717102,
|
|
"step": 1395,
|
|
"valid_targets_mean": 7419.6,
|
|
"valid_targets_min": 5578
|
|
},
|
|
{
|
|
"epoch": 1.1391941391941391,
|
|
"grad_norm": 0.3957195172464458,
|
|
"learning_rate": 3.952528561875966e-05,
|
|
"loss": 0.2278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11624685674905777,
|
|
"step": 1400,
|
|
"valid_targets_mean": 7536.5,
|
|
"valid_targets_min": 5075
|
|
},
|
|
{
|
|
"epoch": 1.1432641432641433,
|
|
"grad_norm": 0.4039821590351794,
|
|
"learning_rate": 3.951645682194109e-05,
|
|
"loss": 0.2366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1071617528796196,
|
|
"step": 1405,
|
|
"valid_targets_mean": 7221.5,
|
|
"valid_targets_min": 5841
|
|
},
|
|
{
|
|
"epoch": 1.1473341473341474,
|
|
"grad_norm": 0.41845015880407643,
|
|
"learning_rate": 3.950754768467506e-05,
|
|
"loss": 0.23,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11402362585067749,
|
|
"step": 1410,
|
|
"valid_targets_mean": 7287.5,
|
|
"valid_targets_min": 5151
|
|
},
|
|
{
|
|
"epoch": 1.1514041514041513,
|
|
"grad_norm": 0.40797870681631443,
|
|
"learning_rate": 3.949855824363647e-05,
|
|
"loss": 0.2405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11144113540649414,
|
|
"step": 1415,
|
|
"valid_targets_mean": 6738.4,
|
|
"valid_targets_min": 4042
|
|
},
|
|
{
|
|
"epoch": 1.1554741554741554,
|
|
"grad_norm": 0.43409193061927426,
|
|
"learning_rate": 3.948948853583081e-05,
|
|
"loss": 0.2307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12913505733013153,
|
|
"step": 1420,
|
|
"valid_targets_mean": 6331.0,
|
|
"valid_targets_min": 5496
|
|
},
|
|
{
|
|
"epoch": 1.1595441595441596,
|
|
"grad_norm": 0.43403376610064975,
|
|
"learning_rate": 3.9480338598593955e-05,
|
|
"loss": 0.2347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11496134102344513,
|
|
"step": 1425,
|
|
"valid_targets_mean": 6271.1,
|
|
"valid_targets_min": 4695
|
|
},
|
|
{
|
|
"epoch": 1.1636141636141637,
|
|
"grad_norm": 0.45486378077738254,
|
|
"learning_rate": 3.947110846959207e-05,
|
|
"loss": 0.2352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12110753357410431,
|
|
"step": 1430,
|
|
"valid_targets_mean": 5790.8,
|
|
"valid_targets_min": 5021
|
|
},
|
|
{
|
|
"epoch": 1.1676841676841676,
|
|
"grad_norm": 0.4865911390888994,
|
|
"learning_rate": 3.9461798186821434e-05,
|
|
"loss": 0.2264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09251779317855835,
|
|
"step": 1435,
|
|
"valid_targets_mean": 3386.8,
|
|
"valid_targets_min": 2279
|
|
},
|
|
{
|
|
"epoch": 1.1717541717541717,
|
|
"grad_norm": 0.45904203833658064,
|
|
"learning_rate": 3.9452407788608275e-05,
|
|
"loss": 0.2263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11358363926410675,
|
|
"step": 1440,
|
|
"valid_targets_mean": 6240.5,
|
|
"valid_targets_min": 5093
|
|
},
|
|
{
|
|
"epoch": 1.1758241758241759,
|
|
"grad_norm": 0.46020651070927404,
|
|
"learning_rate": 3.9442937313608646e-05,
|
|
"loss": 0.2275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11856567859649658,
|
|
"step": 1445,
|
|
"valid_targets_mean": 6329.5,
|
|
"valid_targets_min": 4589
|
|
},
|
|
{
|
|
"epoch": 1.17989417989418,
|
|
"grad_norm": 0.3983320494029986,
|
|
"learning_rate": 3.94333868008082e-05,
|
|
"loss": 0.2265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12220582365989685,
|
|
"step": 1450,
|
|
"valid_targets_mean": 7589.2,
|
|
"valid_targets_min": 4963
|
|
},
|
|
{
|
|
"epoch": 1.183964183964184,
|
|
"grad_norm": 0.4581307530062702,
|
|
"learning_rate": 3.94237562895221e-05,
|
|
"loss": 0.2273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09797120094299316,
|
|
"step": 1455,
|
|
"valid_targets_mean": 5577.9,
|
|
"valid_targets_min": 4458
|
|
},
|
|
{
|
|
"epoch": 1.188034188034188,
|
|
"grad_norm": 0.5075652321634042,
|
|
"learning_rate": 3.941404581939481e-05,
|
|
"loss": 0.2259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10852977633476257,
|
|
"step": 1460,
|
|
"valid_targets_mean": 5432.5,
|
|
"valid_targets_min": 4433
|
|
},
|
|
{
|
|
"epoch": 1.1921041921041922,
|
|
"grad_norm": 0.6232838095901682,
|
|
"learning_rate": 3.940425543039996e-05,
|
|
"loss": 0.2301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11723101139068604,
|
|
"step": 1465,
|
|
"valid_targets_mean": 6361.8,
|
|
"valid_targets_min": 5201
|
|
},
|
|
{
|
|
"epoch": 1.196174196174196,
|
|
"grad_norm": 0.42438428970084924,
|
|
"learning_rate": 3.939438516284015e-05,
|
|
"loss": 0.2312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10583050549030304,
|
|
"step": 1470,
|
|
"valid_targets_mean": 5824.4,
|
|
"valid_targets_min": 5129
|
|
},
|
|
{
|
|
"epoch": 1.2002442002442002,
|
|
"grad_norm": 0.8552840375409755,
|
|
"learning_rate": 3.938443505734684e-05,
|
|
"loss": 0.2043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1607503890991211,
|
|
"step": 1475,
|
|
"valid_targets_mean": 1637.6,
|
|
"valid_targets_min": 137
|
|
},
|
|
{
|
|
"epoch": 1.2043142043142043,
|
|
"grad_norm": 0.4282729691315241,
|
|
"learning_rate": 3.9374405154880104e-05,
|
|
"loss": 0.2251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11325763911008835,
|
|
"step": 1480,
|
|
"valid_targets_mean": 6474.6,
|
|
"valid_targets_min": 4751
|
|
},
|
|
{
|
|
"epoch": 1.2083842083842085,
|
|
"grad_norm": 0.45919849824696074,
|
|
"learning_rate": 3.9364295496728545e-05,
|
|
"loss": 0.2223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1222546175122261,
|
|
"step": 1485,
|
|
"valid_targets_mean": 7186.2,
|
|
"valid_targets_min": 5358
|
|
},
|
|
{
|
|
"epoch": 1.2124542124542124,
|
|
"grad_norm": 0.41905751280920195,
|
|
"learning_rate": 3.9354106124509045e-05,
|
|
"loss": 0.2288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11492247879505157,
|
|
"step": 1490,
|
|
"valid_targets_mean": 7126.8,
|
|
"valid_targets_min": 4078
|
|
},
|
|
{
|
|
"epoch": 1.2165242165242165,
|
|
"grad_norm": 0.4228687168767464,
|
|
"learning_rate": 3.934383708016667e-05,
|
|
"loss": 0.2203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11097435653209686,
|
|
"step": 1495,
|
|
"valid_targets_mean": 7022.1,
|
|
"valid_targets_min": 4940
|
|
},
|
|
{
|
|
"epoch": 1.2205942205942206,
|
|
"grad_norm": 0.42085225391276915,
|
|
"learning_rate": 3.9333488405974434e-05,
|
|
"loss": 0.2153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11520600318908691,
|
|
"step": 1500,
|
|
"valid_targets_mean": 5900.6,
|
|
"valid_targets_min": 4779
|
|
},
|
|
{
|
|
"epoch": 1.2246642246642248,
|
|
"grad_norm": 0.4633839046298897,
|
|
"learning_rate": 3.932306014453315e-05,
|
|
"loss": 0.2254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11590239405632019,
|
|
"step": 1505,
|
|
"valid_targets_mean": 6290.1,
|
|
"valid_targets_min": 4620
|
|
},
|
|
{
|
|
"epoch": 1.2287342287342287,
|
|
"grad_norm": 0.9660012618752344,
|
|
"learning_rate": 3.9312552338771284e-05,
|
|
"loss": 0.2534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10209794342517853,
|
|
"step": 1510,
|
|
"valid_targets_mean": 1162.2,
|
|
"valid_targets_min": 788
|
|
},
|
|
{
|
|
"epoch": 1.2328042328042328,
|
|
"grad_norm": 0.8959866942571094,
|
|
"learning_rate": 3.9301965031944724e-05,
|
|
"loss": 0.2481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11823749542236328,
|
|
"step": 1515,
|
|
"valid_targets_mean": 1500.6,
|
|
"valid_targets_min": 957
|
|
},
|
|
{
|
|
"epoch": 1.236874236874237,
|
|
"grad_norm": 0.92051115161302,
|
|
"learning_rate": 3.929129826763663e-05,
|
|
"loss": 0.2339,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11000481992959976,
|
|
"step": 1520,
|
|
"valid_targets_mean": 1371.2,
|
|
"valid_targets_min": 796
|
|
},
|
|
{
|
|
"epoch": 1.2409442409442408,
|
|
"grad_norm": 0.8905552213698138,
|
|
"learning_rate": 3.928055208975726e-05,
|
|
"loss": 0.2341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13280996680259705,
|
|
"step": 1525,
|
|
"valid_targets_mean": 1433.6,
|
|
"valid_targets_min": 697
|
|
},
|
|
{
|
|
"epoch": 1.245014245014245,
|
|
"grad_norm": 0.7573822137035331,
|
|
"learning_rate": 3.926972654254379e-05,
|
|
"loss": 0.2295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10896831750869751,
|
|
"step": 1530,
|
|
"valid_targets_mean": 1578.9,
|
|
"valid_targets_min": 638
|
|
},
|
|
{
|
|
"epoch": 1.249084249084249,
|
|
"grad_norm": 0.7786863011930503,
|
|
"learning_rate": 3.92588216705601e-05,
|
|
"loss": 0.2278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11522305011749268,
|
|
"step": 1535,
|
|
"valid_targets_mean": 1746.4,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 1.2531542531542532,
|
|
"grad_norm": 0.7938573989349429,
|
|
"learning_rate": 3.924783751869663e-05,
|
|
"loss": 0.2354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09873868525028229,
|
|
"step": 1540,
|
|
"valid_targets_mean": 1309.9,
|
|
"valid_targets_min": 761
|
|
},
|
|
{
|
|
"epoch": 1.2572242572242573,
|
|
"grad_norm": 0.9426871570045071,
|
|
"learning_rate": 3.923677413217019e-05,
|
|
"loss": 0.2181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13097123801708221,
|
|
"step": 1545,
|
|
"valid_targets_mean": 1368.6,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 1.2612942612942613,
|
|
"grad_norm": 0.8036694210964767,
|
|
"learning_rate": 3.9225631556523744e-05,
|
|
"loss": 0.2371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11239443719387054,
|
|
"step": 1550,
|
|
"valid_targets_mean": 1491.8,
|
|
"valid_targets_min": 867
|
|
},
|
|
{
|
|
"epoch": 1.2653642653642654,
|
|
"grad_norm": 0.8084037583756437,
|
|
"learning_rate": 3.921440983762624e-05,
|
|
"loss": 0.2265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12547224760055542,
|
|
"step": 1555,
|
|
"valid_targets_mean": 1994.0,
|
|
"valid_targets_min": 790
|
|
},
|
|
{
|
|
"epoch": 1.2694342694342695,
|
|
"grad_norm": 0.7784763220071113,
|
|
"learning_rate": 3.920310902167245e-05,
|
|
"loss": 0.2192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11351943016052246,
|
|
"step": 1560,
|
|
"valid_targets_mean": 1523.4,
|
|
"valid_targets_min": 896
|
|
},
|
|
{
|
|
"epoch": 1.2735042735042734,
|
|
"grad_norm": 0.7880339591266707,
|
|
"learning_rate": 3.919172915518271e-05,
|
|
"loss": 0.2321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11668528616428375,
|
|
"step": 1565,
|
|
"valid_targets_mean": 1790.1,
|
|
"valid_targets_min": 1009
|
|
},
|
|
{
|
|
"epoch": 1.2775742775742776,
|
|
"grad_norm": 0.7920585958427674,
|
|
"learning_rate": 3.918027028500282e-05,
|
|
"loss": 0.2223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0921139121055603,
|
|
"step": 1570,
|
|
"valid_targets_mean": 1394.0,
|
|
"valid_targets_min": 717
|
|
},
|
|
{
|
|
"epoch": 1.2816442816442817,
|
|
"grad_norm": 0.925447932695446,
|
|
"learning_rate": 3.916873245830376e-05,
|
|
"loss": 0.2118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08572347462177277,
|
|
"step": 1575,
|
|
"valid_targets_mean": 1129.0,
|
|
"valid_targets_min": 659
|
|
},
|
|
{
|
|
"epoch": 1.2857142857142856,
|
|
"grad_norm": 0.9814126638216883,
|
|
"learning_rate": 3.915711572258157e-05,
|
|
"loss": 0.21,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10210913419723511,
|
|
"step": 1580,
|
|
"valid_targets_mean": 1466.9,
|
|
"valid_targets_min": 642
|
|
},
|
|
{
|
|
"epoch": 1.2897842897842897,
|
|
"grad_norm": 0.7835577295777144,
|
|
"learning_rate": 3.914542012565711e-05,
|
|
"loss": 0.2195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07693362236022949,
|
|
"step": 1585,
|
|
"valid_targets_mean": 1129.4,
|
|
"valid_targets_min": 765
|
|
},
|
|
{
|
|
"epoch": 1.2938542938542938,
|
|
"grad_norm": 0.9086732320776554,
|
|
"learning_rate": 3.913364571567586e-05,
|
|
"loss": 0.2301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10597139596939087,
|
|
"step": 1590,
|
|
"valid_targets_mean": 1322.8,
|
|
"valid_targets_min": 631
|
|
},
|
|
{
|
|
"epoch": 1.297924297924298,
|
|
"grad_norm": 0.9559891752335423,
|
|
"learning_rate": 3.912179254110777e-05,
|
|
"loss": 0.2298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1286805272102356,
|
|
"step": 1595,
|
|
"valid_targets_mean": 1612.0,
|
|
"valid_targets_min": 623
|
|
},
|
|
{
|
|
"epoch": 1.301994301994302,
|
|
"grad_norm": 0.7298047621289229,
|
|
"learning_rate": 3.9109860650747e-05,
|
|
"loss": 0.222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08292436599731445,
|
|
"step": 1600,
|
|
"valid_targets_mean": 1402.1,
|
|
"valid_targets_min": 940
|
|
},
|
|
{
|
|
"epoch": 1.306064306064306,
|
|
"grad_norm": 0.712092191185825,
|
|
"learning_rate": 3.9097850093711775e-05,
|
|
"loss": 0.2224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09930809587240219,
|
|
"step": 1605,
|
|
"valid_targets_mean": 1337.8,
|
|
"valid_targets_min": 776
|
|
},
|
|
{
|
|
"epoch": 1.3101343101343101,
|
|
"grad_norm": 0.8337686772674006,
|
|
"learning_rate": 3.908576091944412e-05,
|
|
"loss": 0.2199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12775862216949463,
|
|
"step": 1610,
|
|
"valid_targets_mean": 1525.5,
|
|
"valid_targets_min": 623
|
|
},
|
|
{
|
|
"epoch": 1.3142043142043143,
|
|
"grad_norm": 0.8033816144064492,
|
|
"learning_rate": 3.907359317770973e-05,
|
|
"loss": 0.226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12140291929244995,
|
|
"step": 1615,
|
|
"valid_targets_mean": 1441.0,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 1.3182743182743182,
|
|
"grad_norm": 0.8666725972794019,
|
|
"learning_rate": 3.90613469185977e-05,
|
|
"loss": 0.22,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06115573272109032,
|
|
"step": 1620,
|
|
"valid_targets_mean": 847.9,
|
|
"valid_targets_min": 611
|
|
},
|
|
{
|
|
"epoch": 1.3223443223443223,
|
|
"grad_norm": 0.831110404158148,
|
|
"learning_rate": 3.904902219252035e-05,
|
|
"loss": 0.2135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11510695517063141,
|
|
"step": 1625,
|
|
"valid_targets_mean": 1524.9,
|
|
"valid_targets_min": 793
|
|
},
|
|
{
|
|
"epoch": 1.3264143264143264,
|
|
"grad_norm": 0.747854141552853,
|
|
"learning_rate": 3.903661905021302e-05,
|
|
"loss": 0.2131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11657039076089859,
|
|
"step": 1630,
|
|
"valid_targets_mean": 1779.4,
|
|
"valid_targets_min": 563
|
|
},
|
|
{
|
|
"epoch": 1.3304843304843303,
|
|
"grad_norm": 0.7620995195915982,
|
|
"learning_rate": 3.9024137542733846e-05,
|
|
"loss": 0.2137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12204901874065399,
|
|
"step": 1635,
|
|
"valid_targets_mean": 1741.2,
|
|
"valid_targets_min": 1341
|
|
},
|
|
{
|
|
"epoch": 1.3345543345543345,
|
|
"grad_norm": 0.8238415295946188,
|
|
"learning_rate": 3.9011577721463574e-05,
|
|
"loss": 0.2143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12938036024570465,
|
|
"step": 1640,
|
|
"valid_targets_mean": 1875.6,
|
|
"valid_targets_min": 1107
|
|
},
|
|
{
|
|
"epoch": 1.3386243386243386,
|
|
"grad_norm": 0.799416998929109,
|
|
"learning_rate": 3.899893963810531e-05,
|
|
"loss": 0.2233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1031196340918541,
|
|
"step": 1645,
|
|
"valid_targets_mean": 1419.5,
|
|
"valid_targets_min": 680
|
|
},
|
|
{
|
|
"epoch": 1.3426943426943427,
|
|
"grad_norm": 0.7699078977660259,
|
|
"learning_rate": 3.898622334468435e-05,
|
|
"loss": 0.2078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14640526473522186,
|
|
"step": 1650,
|
|
"valid_targets_mean": 1968.2,
|
|
"valid_targets_min": 981
|
|
},
|
|
{
|
|
"epoch": 1.3467643467643469,
|
|
"grad_norm": 0.7511380642686718,
|
|
"learning_rate": 3.897342889354793e-05,
|
|
"loss": 0.2129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10663345456123352,
|
|
"step": 1655,
|
|
"valid_targets_mean": 1568.9,
|
|
"valid_targets_min": 937
|
|
},
|
|
{
|
|
"epoch": 1.3508343508343508,
|
|
"grad_norm": 0.8037134681077442,
|
|
"learning_rate": 3.896055633736504e-05,
|
|
"loss": 0.2163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11559002846479416,
|
|
"step": 1660,
|
|
"valid_targets_mean": 1645.2,
|
|
"valid_targets_min": 804
|
|
},
|
|
{
|
|
"epoch": 1.354904354904355,
|
|
"grad_norm": 0.7949561381518031,
|
|
"learning_rate": 3.894760572912618e-05,
|
|
"loss": 0.218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1040218248963356,
|
|
"step": 1665,
|
|
"valid_targets_mean": 1574.9,
|
|
"valid_targets_min": 1019
|
|
},
|
|
{
|
|
"epoch": 1.358974358974359,
|
|
"grad_norm": 0.7793788473775995,
|
|
"learning_rate": 3.8934577122143156e-05,
|
|
"loss": 0.2091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09460228681564331,
|
|
"step": 1670,
|
|
"valid_targets_mean": 1121.6,
|
|
"valid_targets_min": 882
|
|
},
|
|
{
|
|
"epoch": 1.363044363044363,
|
|
"grad_norm": 0.7618292018526198,
|
|
"learning_rate": 3.892147057004888e-05,
|
|
"loss": 0.2066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11588133871555328,
|
|
"step": 1675,
|
|
"valid_targets_mean": 1818.1,
|
|
"valid_targets_min": 1168
|
|
},
|
|
{
|
|
"epoch": 1.367114367114367,
|
|
"grad_norm": 0.9942145132530782,
|
|
"learning_rate": 3.89082861267971e-05,
|
|
"loss": 0.1994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09862836450338364,
|
|
"step": 1680,
|
|
"valid_targets_mean": 1417.8,
|
|
"valid_targets_min": 678
|
|
},
|
|
{
|
|
"epoch": 1.3711843711843712,
|
|
"grad_norm": 0.8502030826682979,
|
|
"learning_rate": 3.889502384666223e-05,
|
|
"loss": 0.2146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08241011202335358,
|
|
"step": 1685,
|
|
"valid_targets_mean": 1051.0,
|
|
"valid_targets_min": 676
|
|
},
|
|
{
|
|
"epoch": 1.3752543752543753,
|
|
"grad_norm": 0.696356628682868,
|
|
"learning_rate": 3.8881683784239086e-05,
|
|
"loss": 0.2097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09263870120048523,
|
|
"step": 1690,
|
|
"valid_targets_mean": 1431.1,
|
|
"valid_targets_min": 794
|
|
},
|
|
{
|
|
"epoch": 1.3793243793243795,
|
|
"grad_norm": 0.7609564628672522,
|
|
"learning_rate": 3.8868265994442694e-05,
|
|
"loss": 0.2116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11136119812726974,
|
|
"step": 1695,
|
|
"valid_targets_mean": 1486.9,
|
|
"valid_targets_min": 742
|
|
},
|
|
{
|
|
"epoch": 1.3833943833943834,
|
|
"grad_norm": 0.779149722488893,
|
|
"learning_rate": 3.8854770532508036e-05,
|
|
"loss": 0.2125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12626802921295166,
|
|
"step": 1700,
|
|
"valid_targets_mean": 1734.5,
|
|
"valid_targets_min": 741
|
|
},
|
|
{
|
|
"epoch": 1.3874643874643875,
|
|
"grad_norm": 0.8189444335835692,
|
|
"learning_rate": 3.884119745398984e-05,
|
|
"loss": 0.2214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12554654479026794,
|
|
"step": 1705,
|
|
"valid_targets_mean": 1712.5,
|
|
"valid_targets_min": 1157
|
|
},
|
|
{
|
|
"epoch": 1.3915343915343916,
|
|
"grad_norm": 0.7562880779738609,
|
|
"learning_rate": 3.882754681476235e-05,
|
|
"loss": 0.2004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14017203450202942,
|
|
"step": 1710,
|
|
"valid_targets_mean": 1723.8,
|
|
"valid_targets_min": 862
|
|
},
|
|
{
|
|
"epoch": 1.3956043956043955,
|
|
"grad_norm": 0.789327271767066,
|
|
"learning_rate": 3.881381867101908e-05,
|
|
"loss": 0.2231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1333657205104828,
|
|
"step": 1715,
|
|
"valid_targets_mean": 1633.1,
|
|
"valid_targets_min": 1019
|
|
},
|
|
{
|
|
"epoch": 1.3996743996743997,
|
|
"grad_norm": 0.6904489176477637,
|
|
"learning_rate": 3.880001307927262e-05,
|
|
"loss": 0.2132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1002424880862236,
|
|
"step": 1720,
|
|
"valid_targets_mean": 1376.1,
|
|
"valid_targets_min": 1058
|
|
},
|
|
{
|
|
"epoch": 1.4037444037444038,
|
|
"grad_norm": 0.7331099974034027,
|
|
"learning_rate": 3.878613009635434e-05,
|
|
"loss": 0.2286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11483565717935562,
|
|
"step": 1725,
|
|
"valid_targets_mean": 1576.5,
|
|
"valid_targets_min": 618
|
|
},
|
|
{
|
|
"epoch": 1.4078144078144077,
|
|
"grad_norm": 0.7374050495229207,
|
|
"learning_rate": 3.877216977941424e-05,
|
|
"loss": 0.2045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12511426210403442,
|
|
"step": 1730,
|
|
"valid_targets_mean": 1615.8,
|
|
"valid_targets_min": 629
|
|
},
|
|
{
|
|
"epoch": 1.4118844118844118,
|
|
"grad_norm": 0.7263856360035532,
|
|
"learning_rate": 3.875813218592063e-05,
|
|
"loss": 0.212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11008419096469879,
|
|
"step": 1735,
|
|
"valid_targets_mean": 1881.2,
|
|
"valid_targets_min": 1167
|
|
},
|
|
{
|
|
"epoch": 1.415954415954416,
|
|
"grad_norm": 0.7714742596712739,
|
|
"learning_rate": 3.874401737365996e-05,
|
|
"loss": 0.2132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11159549653530121,
|
|
"step": 1740,
|
|
"valid_targets_mean": 1367.2,
|
|
"valid_targets_min": 699
|
|
},
|
|
{
|
|
"epoch": 1.42002442002442,
|
|
"grad_norm": 0.768333986171705,
|
|
"learning_rate": 3.872982540073654e-05,
|
|
"loss": 0.2009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08359374105930328,
|
|
"step": 1745,
|
|
"valid_targets_mean": 1174.9,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 1.4240944240944242,
|
|
"grad_norm": 0.730500600250479,
|
|
"learning_rate": 3.871555632557232e-05,
|
|
"loss": 0.2059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09920322895050049,
|
|
"step": 1750,
|
|
"valid_targets_mean": 1556.2,
|
|
"valid_targets_min": 504
|
|
},
|
|
{
|
|
"epoch": 1.4281644281644281,
|
|
"grad_norm": 0.8117781426270219,
|
|
"learning_rate": 3.870121020690663e-05,
|
|
"loss": 0.21,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11729609966278076,
|
|
"step": 1755,
|
|
"valid_targets_mean": 1885.0,
|
|
"valid_targets_min": 1331
|
|
},
|
|
{
|
|
"epoch": 1.4322344322344323,
|
|
"grad_norm": 0.7215350454916396,
|
|
"learning_rate": 3.868678710379599e-05,
|
|
"loss": 0.2111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07775722444057465,
|
|
"step": 1760,
|
|
"valid_targets_mean": 1306.1,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 1.4363044363044364,
|
|
"grad_norm": 0.837829363749683,
|
|
"learning_rate": 3.86722870756138e-05,
|
|
"loss": 0.215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08048004657030106,
|
|
"step": 1765,
|
|
"valid_targets_mean": 1330.1,
|
|
"valid_targets_min": 958
|
|
},
|
|
{
|
|
"epoch": 1.4403744403744403,
|
|
"grad_norm": 0.6691854525025749,
|
|
"learning_rate": 3.865771018205014e-05,
|
|
"loss": 0.2026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08131928741931915,
|
|
"step": 1770,
|
|
"valid_targets_mean": 1465.0,
|
|
"valid_targets_min": 963
|
|
},
|
|
{
|
|
"epoch": 1.4444444444444444,
|
|
"grad_norm": 0.8158369333281974,
|
|
"learning_rate": 3.864305648311149e-05,
|
|
"loss": 0.2094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10300800949335098,
|
|
"step": 1775,
|
|
"valid_targets_mean": 1428.0,
|
|
"valid_targets_min": 793
|
|
},
|
|
{
|
|
"epoch": 1.4485144485144485,
|
|
"grad_norm": 0.948520302318546,
|
|
"learning_rate": 3.8628326039120524e-05,
|
|
"loss": 0.2104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13352897763252258,
|
|
"step": 1780,
|
|
"valid_targets_mean": 1633.1,
|
|
"valid_targets_min": 917
|
|
},
|
|
{
|
|
"epoch": 1.4525844525844525,
|
|
"grad_norm": 0.7365952174032329,
|
|
"learning_rate": 3.861351891071583e-05,
|
|
"loss": 0.2024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08271881937980652,
|
|
"step": 1785,
|
|
"valid_targets_mean": 1246.5,
|
|
"valid_targets_min": 872
|
|
},
|
|
{
|
|
"epoch": 1.4566544566544566,
|
|
"grad_norm": 0.6720445528339655,
|
|
"learning_rate": 3.8598635158851694e-05,
|
|
"loss": 0.2104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10842913389205933,
|
|
"step": 1790,
|
|
"valid_targets_mean": 1903.6,
|
|
"valid_targets_min": 1220
|
|
},
|
|
{
|
|
"epoch": 1.4607244607244607,
|
|
"grad_norm": 0.7972994996959792,
|
|
"learning_rate": 3.858367484479779e-05,
|
|
"loss": 0.1889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09797993302345276,
|
|
"step": 1795,
|
|
"valid_targets_mean": 1391.6,
|
|
"valid_targets_min": 712
|
|
},
|
|
{
|
|
"epoch": 1.4647944647944648,
|
|
"grad_norm": 0.8151552299642496,
|
|
"learning_rate": 3.856863803013897e-05,
|
|
"loss": 0.2032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11069940030574799,
|
|
"step": 1800,
|
|
"valid_targets_mean": 1295.0,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 1.468864468864469,
|
|
"grad_norm": 0.6855785516547465,
|
|
"learning_rate": 3.855352477677504e-05,
|
|
"loss": 0.208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06866128742694855,
|
|
"step": 1805,
|
|
"valid_targets_mean": 1414.8,
|
|
"valid_targets_min": 746
|
|
},
|
|
{
|
|
"epoch": 1.4729344729344729,
|
|
"grad_norm": 0.8055255610517995,
|
|
"learning_rate": 3.853833514692044e-05,
|
|
"loss": 0.1952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0873202309012413,
|
|
"step": 1810,
|
|
"valid_targets_mean": 1059.4,
|
|
"valid_targets_min": 723
|
|
},
|
|
{
|
|
"epoch": 1.477004477004477,
|
|
"grad_norm": 0.7866122138481166,
|
|
"learning_rate": 3.852306920310401e-05,
|
|
"loss": 0.2106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11211128532886505,
|
|
"step": 1815,
|
|
"valid_targets_mean": 1634.2,
|
|
"valid_targets_min": 1327
|
|
},
|
|
{
|
|
"epoch": 1.4810744810744811,
|
|
"grad_norm": 0.7173011268227322,
|
|
"learning_rate": 3.850772700816877e-05,
|
|
"loss": 0.2035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09077475965023041,
|
|
"step": 1820,
|
|
"valid_targets_mean": 1412.5,
|
|
"valid_targets_min": 902
|
|
},
|
|
{
|
|
"epoch": 1.485144485144485,
|
|
"grad_norm": 0.6952841023611934,
|
|
"learning_rate": 3.8492308625271596e-05,
|
|
"loss": 0.2087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09670276939868927,
|
|
"step": 1825,
|
|
"valid_targets_mean": 1558.8,
|
|
"valid_targets_min": 803
|
|
},
|
|
{
|
|
"epoch": 1.4892144892144892,
|
|
"grad_norm": 0.7685210240880278,
|
|
"learning_rate": 3.8476814117883034e-05,
|
|
"loss": 0.2071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10378531366586685,
|
|
"step": 1830,
|
|
"valid_targets_mean": 1441.2,
|
|
"valid_targets_min": 1079
|
|
},
|
|
{
|
|
"epoch": 1.4932844932844933,
|
|
"grad_norm": 0.7239079288465091,
|
|
"learning_rate": 3.846124354978697e-05,
|
|
"loss": 0.2006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09632142633199692,
|
|
"step": 1835,
|
|
"valid_targets_mean": 1449.8,
|
|
"valid_targets_min": 1263
|
|
},
|
|
{
|
|
"epoch": 1.4973544973544972,
|
|
"grad_norm": 0.7850402456186506,
|
|
"learning_rate": 3.8445596985080404e-05,
|
|
"loss": 0.2141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10480426251888275,
|
|
"step": 1840,
|
|
"valid_targets_mean": 1487.4,
|
|
"valid_targets_min": 844
|
|
},
|
|
{
|
|
"epoch": 1.5014245014245016,
|
|
"grad_norm": 0.8068791427285449,
|
|
"learning_rate": 3.842987448817319e-05,
|
|
"loss": 0.2094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09140418469905853,
|
|
"step": 1845,
|
|
"valid_targets_mean": 1421.4,
|
|
"valid_targets_min": 667
|
|
},
|
|
{
|
|
"epoch": 1.5054945054945055,
|
|
"grad_norm": 0.7097429620733522,
|
|
"learning_rate": 3.841407612378775e-05,
|
|
"loss": 0.2126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12428690493106842,
|
|
"step": 1850,
|
|
"valid_targets_mean": 1822.6,
|
|
"valid_targets_min": 1041
|
|
},
|
|
{
|
|
"epoch": 1.5095645095645096,
|
|
"grad_norm": 0.7932172765143252,
|
|
"learning_rate": 3.839820195695883e-05,
|
|
"loss": 0.2104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09613991528749466,
|
|
"step": 1855,
|
|
"valid_targets_mean": 1327.4,
|
|
"valid_targets_min": 777
|
|
},
|
|
{
|
|
"epoch": 1.5136345136345137,
|
|
"grad_norm": 0.7320076010626049,
|
|
"learning_rate": 3.8382252053033196e-05,
|
|
"loss": 0.21,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10572035610675812,
|
|
"step": 1860,
|
|
"valid_targets_mean": 1823.8,
|
|
"valid_targets_min": 1283
|
|
},
|
|
{
|
|
"epoch": 1.5177045177045176,
|
|
"grad_norm": 0.806858170502788,
|
|
"learning_rate": 3.836622647766943e-05,
|
|
"loss": 0.1985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06952077150344849,
|
|
"step": 1865,
|
|
"valid_targets_mean": 924.0,
|
|
"valid_targets_min": 620
|
|
},
|
|
{
|
|
"epoch": 1.5217745217745218,
|
|
"grad_norm": 0.7219548062410487,
|
|
"learning_rate": 3.835012529683757e-05,
|
|
"loss": 0.2066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08829911798238754,
|
|
"step": 1870,
|
|
"valid_targets_mean": 1480.2,
|
|
"valid_targets_min": 656
|
|
},
|
|
{
|
|
"epoch": 1.525844525844526,
|
|
"grad_norm": 0.6994940063547743,
|
|
"learning_rate": 3.833394857681894e-05,
|
|
"loss": 0.2068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08126124739646912,
|
|
"step": 1875,
|
|
"valid_targets_mean": 1415.5,
|
|
"valid_targets_min": 697
|
|
},
|
|
{
|
|
"epoch": 1.5299145299145298,
|
|
"grad_norm": 0.7327643766970442,
|
|
"learning_rate": 3.831769638420577e-05,
|
|
"loss": 0.201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1019296646118164,
|
|
"step": 1880,
|
|
"valid_targets_mean": 1655.8,
|
|
"valid_targets_min": 816
|
|
},
|
|
{
|
|
"epoch": 1.533984533984534,
|
|
"grad_norm": 0.6800577601183724,
|
|
"learning_rate": 3.830136878590104e-05,
|
|
"loss": 0.1983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09412302821874619,
|
|
"step": 1885,
|
|
"valid_targets_mean": 1553.0,
|
|
"valid_targets_min": 850
|
|
},
|
|
{
|
|
"epoch": 1.538054538054538,
|
|
"grad_norm": 0.7426379902022742,
|
|
"learning_rate": 3.8284965849118066e-05,
|
|
"loss": 0.195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09913598001003265,
|
|
"step": 1890,
|
|
"valid_targets_mean": 1353.9,
|
|
"valid_targets_min": 747
|
|
},
|
|
{
|
|
"epoch": 1.542124542124542,
|
|
"grad_norm": 0.7457989847263609,
|
|
"learning_rate": 3.826848764138036e-05,
|
|
"loss": 0.2131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08886919915676117,
|
|
"step": 1895,
|
|
"valid_targets_mean": 1420.9,
|
|
"valid_targets_min": 933
|
|
},
|
|
{
|
|
"epoch": 1.5461945461945463,
|
|
"grad_norm": 0.7559545112406654,
|
|
"learning_rate": 3.825193423052127e-05,
|
|
"loss": 0.2021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09691812098026276,
|
|
"step": 1900,
|
|
"valid_targets_mean": 1394.9,
|
|
"valid_targets_min": 827
|
|
},
|
|
{
|
|
"epoch": 1.5502645502645502,
|
|
"grad_norm": 0.6854552164584192,
|
|
"learning_rate": 3.823530568468371e-05,
|
|
"loss": 0.1928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08764408528804779,
|
|
"step": 1905,
|
|
"valid_targets_mean": 1383.2,
|
|
"valid_targets_min": 909
|
|
},
|
|
{
|
|
"epoch": 1.5543345543345544,
|
|
"grad_norm": 0.6804870010828482,
|
|
"learning_rate": 3.821860207231991e-05,
|
|
"loss": 0.1948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11076577752828598,
|
|
"step": 1910,
|
|
"valid_targets_mean": 1682.4,
|
|
"valid_targets_min": 1094
|
|
},
|
|
{
|
|
"epoch": 1.5584045584045585,
|
|
"grad_norm": 0.8066726711083975,
|
|
"learning_rate": 3.82018234621911e-05,
|
|
"loss": 0.1862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09034565091133118,
|
|
"step": 1915,
|
|
"valid_targets_mean": 1118.4,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 1.5624745624745624,
|
|
"grad_norm": 0.7147210480241867,
|
|
"learning_rate": 3.818496992336725e-05,
|
|
"loss": 0.2039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10571155697107315,
|
|
"step": 1920,
|
|
"valid_targets_mean": 1538.2,
|
|
"valid_targets_min": 610
|
|
},
|
|
{
|
|
"epoch": 1.5665445665445665,
|
|
"grad_norm": 0.6741394266281067,
|
|
"learning_rate": 3.816804152522678e-05,
|
|
"loss": 0.1969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10726112127304077,
|
|
"step": 1925,
|
|
"valid_targets_mean": 1570.1,
|
|
"valid_targets_min": 713
|
|
},
|
|
{
|
|
"epoch": 1.5706145706145707,
|
|
"grad_norm": 0.6935025181842417,
|
|
"learning_rate": 3.815103833745626e-05,
|
|
"loss": 0.2066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09748126566410065,
|
|
"step": 1930,
|
|
"valid_targets_mean": 1537.8,
|
|
"valid_targets_min": 874
|
|
},
|
|
{
|
|
"epoch": 1.5746845746845746,
|
|
"grad_norm": 0.6821876374487306,
|
|
"learning_rate": 3.8133960430050135e-05,
|
|
"loss": 0.1984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09882050007581711,
|
|
"step": 1935,
|
|
"valid_targets_mean": 1471.2,
|
|
"valid_targets_min": 765
|
|
},
|
|
{
|
|
"epoch": 1.578754578754579,
|
|
"grad_norm": 0.6990112986801081,
|
|
"learning_rate": 3.811680787331047e-05,
|
|
"loss": 0.2032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08577939867973328,
|
|
"step": 1940,
|
|
"valid_targets_mean": 1254.4,
|
|
"valid_targets_min": 816
|
|
},
|
|
{
|
|
"epoch": 1.5828245828245828,
|
|
"grad_norm": 0.7102883473320752,
|
|
"learning_rate": 3.809958073784658e-05,
|
|
"loss": 0.2058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0998358279466629,
|
|
"step": 1945,
|
|
"valid_targets_mean": 1482.9,
|
|
"valid_targets_min": 1182
|
|
},
|
|
{
|
|
"epoch": 1.5868945868945867,
|
|
"grad_norm": 0.7009814403089336,
|
|
"learning_rate": 3.8082279094574815e-05,
|
|
"loss": 0.1932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11195679008960724,
|
|
"step": 1950,
|
|
"valid_targets_mean": 1583.2,
|
|
"valid_targets_min": 363
|
|
},
|
|
{
|
|
"epoch": 1.590964590964591,
|
|
"grad_norm": 0.41631203954390233,
|
|
"learning_rate": 3.8064903014718245e-05,
|
|
"loss": 0.1322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07122399657964706,
|
|
"step": 1955,
|
|
"valid_targets_mean": 3710.1,
|
|
"valid_targets_min": 3074
|
|
},
|
|
{
|
|
"epoch": 1.595034595034595,
|
|
"grad_norm": 0.4600103764733667,
|
|
"learning_rate": 3.804745256980634e-05,
|
|
"loss": 0.1224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06652016937732697,
|
|
"step": 1960,
|
|
"valid_targets_mean": 2577.6,
|
|
"valid_targets_min": 700
|
|
},
|
|
{
|
|
"epoch": 1.5991045991045991,
|
|
"grad_norm": 0.4537614450983102,
|
|
"learning_rate": 3.80299278316747e-05,
|
|
"loss": 0.1233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05843440815806389,
|
|
"step": 1965,
|
|
"valid_targets_mean": 3309.8,
|
|
"valid_targets_min": 803
|
|
},
|
|
{
|
|
"epoch": 1.6031746031746033,
|
|
"grad_norm": 0.43458913842163255,
|
|
"learning_rate": 3.801232887246479e-05,
|
|
"loss": 0.1138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06071528047323227,
|
|
"step": 1970,
|
|
"valid_targets_mean": 2484.8,
|
|
"valid_targets_min": 920
|
|
},
|
|
{
|
|
"epoch": 1.6072446072446072,
|
|
"grad_norm": 0.42621344020743934,
|
|
"learning_rate": 3.799465576462357e-05,
|
|
"loss": 0.1221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06493829935789108,
|
|
"step": 1975,
|
|
"valid_targets_mean": 3095.1,
|
|
"valid_targets_min": 806
|
|
},
|
|
{
|
|
"epoch": 1.6113146113146113,
|
|
"grad_norm": 0.7467123259446526,
|
|
"learning_rate": 3.7976908580903246e-05,
|
|
"loss": 0.1723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10322582721710205,
|
|
"step": 1980,
|
|
"valid_targets_mean": 1663.1,
|
|
"valid_targets_min": 718
|
|
},
|
|
{
|
|
"epoch": 1.6153846153846154,
|
|
"grad_norm": 0.41841694093835413,
|
|
"learning_rate": 3.7959087394360974e-05,
|
|
"loss": 0.1278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0684170350432396,
|
|
"step": 1985,
|
|
"valid_targets_mean": 3443.2,
|
|
"valid_targets_min": 1133
|
|
},
|
|
{
|
|
"epoch": 1.6194546194546193,
|
|
"grad_norm": 0.4482119640964086,
|
|
"learning_rate": 3.794119227835854e-05,
|
|
"loss": 0.1293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05158419907093048,
|
|
"step": 1990,
|
|
"valid_targets_mean": 1609.9,
|
|
"valid_targets_min": 859
|
|
},
|
|
{
|
|
"epoch": 1.6235246235246237,
|
|
"grad_norm": 0.2808356049969262,
|
|
"learning_rate": 3.792322330656206e-05,
|
|
"loss": 0.1547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.045812997967004776,
|
|
"step": 1995,
|
|
"valid_targets_mean": 5572.1,
|
|
"valid_targets_min": 5150
|
|
},
|
|
{
|
|
"epoch": 1.6275946275946276,
|
|
"grad_norm": 0.38378216926736985,
|
|
"learning_rate": 3.790518055294168e-05,
|
|
"loss": 0.1071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07117560505867004,
|
|
"step": 2000,
|
|
"valid_targets_mean": 3950.0,
|
|
"valid_targets_min": 502
|
|
},
|
|
{
|
|
"epoch": 1.6316646316646317,
|
|
"grad_norm": 0.3830231524830745,
|
|
"learning_rate": 3.788706409177129e-05,
|
|
"loss": 0.1128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06173840910196304,
|
|
"step": 2005,
|
|
"valid_targets_mean": 3669.2,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 1.6357346357346358,
|
|
"grad_norm": 0.35711060786805376,
|
|
"learning_rate": 3.7868873997628174e-05,
|
|
"loss": 0.1102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.050001971423625946,
|
|
"step": 2010,
|
|
"valid_targets_mean": 2980.4,
|
|
"valid_targets_min": 518
|
|
},
|
|
{
|
|
"epoch": 1.6398046398046398,
|
|
"grad_norm": 0.3974333252134189,
|
|
"learning_rate": 3.7850610345392735e-05,
|
|
"loss": 0.1717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06219826638698578,
|
|
"step": 2015,
|
|
"valid_targets_mean": 3675.4,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 1.6438746438746439,
|
|
"grad_norm": 0.3281743505589197,
|
|
"learning_rate": 3.7832273210248214e-05,
|
|
"loss": 0.1041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.045098382979631424,
|
|
"step": 2020,
|
|
"valid_targets_mean": 4013.2,
|
|
"valid_targets_min": 3548
|
|
},
|
|
{
|
|
"epoch": 1.647944647944648,
|
|
"grad_norm": 0.5366337338266325,
|
|
"learning_rate": 3.7813862667680304e-05,
|
|
"loss": 0.1251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.061837535351514816,
|
|
"step": 2025,
|
|
"valid_targets_mean": 2000.5,
|
|
"valid_targets_min": 644
|
|
},
|
|
{
|
|
"epoch": 1.652014652014652,
|
|
"grad_norm": 0.34924838955717313,
|
|
"learning_rate": 3.7795378793476904e-05,
|
|
"loss": 0.0976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03679157420992851,
|
|
"step": 2030,
|
|
"valid_targets_mean": 2904.4,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 1.656084656084656,
|
|
"grad_norm": 0.48035620858335015,
|
|
"learning_rate": 3.777682166372779e-05,
|
|
"loss": 0.1255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04888860881328583,
|
|
"step": 2035,
|
|
"valid_targets_mean": 1487.6,
|
|
"valid_targets_min": 516
|
|
},
|
|
{
|
|
"epoch": 1.6601546601546602,
|
|
"grad_norm": 0.5576087684474084,
|
|
"learning_rate": 3.775819135482429e-05,
|
|
"loss": 0.1286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07822857797145844,
|
|
"step": 2040,
|
|
"valid_targets_mean": 2006.0,
|
|
"valid_targets_min": 557
|
|
},
|
|
{
|
|
"epoch": 1.664224664224664,
|
|
"grad_norm": 0.441534949218794,
|
|
"learning_rate": 3.773948794345899e-05,
|
|
"loss": 0.1316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07287167012691498,
|
|
"step": 2045,
|
|
"valid_targets_mean": 3359.2,
|
|
"valid_targets_min": 1086
|
|
},
|
|
{
|
|
"epoch": 1.6682946682946684,
|
|
"grad_norm": 0.4390733330236822,
|
|
"learning_rate": 3.7720711506625384e-05,
|
|
"loss": 0.1316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08404476940631866,
|
|
"step": 2050,
|
|
"valid_targets_mean": 2866.8,
|
|
"valid_targets_min": 1008
|
|
},
|
|
{
|
|
"epoch": 1.6723646723646723,
|
|
"grad_norm": 0.4736301286588309,
|
|
"learning_rate": 3.7701862121617595e-05,
|
|
"loss": 0.1765,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04762580618262291,
|
|
"step": 2055,
|
|
"valid_targets_mean": 3259.9,
|
|
"valid_targets_min": 2702
|
|
},
|
|
{
|
|
"epoch": 1.6764346764346765,
|
|
"grad_norm": 0.3397713444353576,
|
|
"learning_rate": 3.768293986603003e-05,
|
|
"loss": 0.1081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04629997909069061,
|
|
"step": 2060,
|
|
"valid_targets_mean": 4086.4,
|
|
"valid_targets_min": 3798
|
|
},
|
|
{
|
|
"epoch": 1.6805046805046806,
|
|
"grad_norm": 0.8340021398967473,
|
|
"learning_rate": 3.7663944817757094e-05,
|
|
"loss": 0.1331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10405413806438446,
|
|
"step": 2065,
|
|
"valid_targets_mean": 1020.8,
|
|
"valid_targets_min": 598
|
|
},
|
|
{
|
|
"epoch": 1.6845746845746845,
|
|
"grad_norm": 0.4169904470975252,
|
|
"learning_rate": 3.7644877054992814e-05,
|
|
"loss": 0.146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.047049522399902344,
|
|
"step": 2070,
|
|
"valid_targets_mean": 2028.4,
|
|
"valid_targets_min": 590
|
|
},
|
|
{
|
|
"epoch": 1.6886446886446886,
|
|
"grad_norm": 0.3457500004146195,
|
|
"learning_rate": 3.7625736656230576e-05,
|
|
"loss": 0.1384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06108832731842995,
|
|
"step": 2075,
|
|
"valid_targets_mean": 4046.9,
|
|
"valid_targets_min": 3390
|
|
},
|
|
{
|
|
"epoch": 1.6927146927146928,
|
|
"grad_norm": 0.6869412401670636,
|
|
"learning_rate": 3.760652370026277e-05,
|
|
"loss": 0.1655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16195940971374512,
|
|
"step": 2080,
|
|
"valid_targets_mean": 2247.4,
|
|
"valid_targets_min": 984
|
|
},
|
|
{
|
|
"epoch": 1.6967846967846967,
|
|
"grad_norm": 0.40632518442102483,
|
|
"learning_rate": 3.758723826618045e-05,
|
|
"loss": 0.3003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0758669525384903,
|
|
"step": 2085,
|
|
"valid_targets_mean": 3348.9,
|
|
"valid_targets_min": 2176
|
|
},
|
|
{
|
|
"epoch": 1.7008547008547008,
|
|
"grad_norm": 0.4606205763061986,
|
|
"learning_rate": 3.7567880433373066e-05,
|
|
"loss": 0.1286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.060970671474933624,
|
|
"step": 2090,
|
|
"valid_targets_mean": 2385.1,
|
|
"valid_targets_min": 1074
|
|
},
|
|
{
|
|
"epoch": 1.704924704924705,
|
|
"grad_norm": 0.45654254366777014,
|
|
"learning_rate": 3.754845028152807e-05,
|
|
"loss": 0.1317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.062153562903404236,
|
|
"step": 2095,
|
|
"valid_targets_mean": 3126.2,
|
|
"valid_targets_min": 1212
|
|
},
|
|
{
|
|
"epoch": 1.7089947089947088,
|
|
"grad_norm": 0.3935119632617659,
|
|
"learning_rate": 3.7528947890630635e-05,
|
|
"loss": 0.2285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06273317337036133,
|
|
"step": 2100,
|
|
"valid_targets_mean": 4386.0,
|
|
"valid_targets_min": 2258
|
|
},
|
|
{
|
|
"epoch": 1.7130647130647132,
|
|
"grad_norm": 0.30725829749200173,
|
|
"learning_rate": 3.750937334096331e-05,
|
|
"loss": 0.1001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07420308887958527,
|
|
"step": 2105,
|
|
"valid_targets_mean": 3738.9,
|
|
"valid_targets_min": 1466
|
|
},
|
|
{
|
|
"epoch": 1.717134717134717,
|
|
"grad_norm": 0.3408574469787128,
|
|
"learning_rate": 3.7489726713105673e-05,
|
|
"loss": 0.1222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05906776338815689,
|
|
"step": 2110,
|
|
"valid_targets_mean": 3522.6,
|
|
"valid_targets_min": 560
|
|
},
|
|
{
|
|
"epoch": 1.7212047212047212,
|
|
"grad_norm": 0.3873420033129566,
|
|
"learning_rate": 3.747000808793404e-05,
|
|
"loss": 0.1279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06194659322500229,
|
|
"step": 2115,
|
|
"valid_targets_mean": 3383.1,
|
|
"valid_targets_min": 1189
|
|
},
|
|
{
|
|
"epoch": 1.7252747252747254,
|
|
"grad_norm": 0.35534762128764597,
|
|
"learning_rate": 3.745021754662109e-05,
|
|
"loss": 0.1073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.050105899572372437,
|
|
"step": 2120,
|
|
"valid_targets_mean": 3610.1,
|
|
"valid_targets_min": 2257
|
|
},
|
|
{
|
|
"epoch": 1.7293447293447293,
|
|
"grad_norm": 0.35587621922779594,
|
|
"learning_rate": 3.7430355170635536e-05,
|
|
"loss": 0.1049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.040797870606184006,
|
|
"step": 2125,
|
|
"valid_targets_mean": 3479.8,
|
|
"valid_targets_min": 886
|
|
},
|
|
{
|
|
"epoch": 1.7334147334147334,
|
|
"grad_norm": 0.4101618167732939,
|
|
"learning_rate": 3.7410421041741846e-05,
|
|
"loss": 0.1137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05459919571876526,
|
|
"step": 2130,
|
|
"valid_targets_mean": 3346.6,
|
|
"valid_targets_min": 2409
|
|
},
|
|
{
|
|
"epoch": 1.7374847374847375,
|
|
"grad_norm": 0.3706410137638395,
|
|
"learning_rate": 3.7390415241999815e-05,
|
|
"loss": 0.1038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.049584001302719116,
|
|
"step": 2135,
|
|
"valid_targets_mean": 3552.0,
|
|
"valid_targets_min": 2309
|
|
},
|
|
{
|
|
"epoch": 1.7415547415547414,
|
|
"grad_norm": 0.6514439786770229,
|
|
"learning_rate": 3.737033785376431e-05,
|
|
"loss": 0.168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11145202815532684,
|
|
"step": 2140,
|
|
"valid_targets_mean": 2149.8,
|
|
"valid_targets_min": 645
|
|
},
|
|
{
|
|
"epoch": 1.7456247456247458,
|
|
"grad_norm": 0.3388516246206135,
|
|
"learning_rate": 3.735018895968487e-05,
|
|
"loss": 0.0977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04621146619319916,
|
|
"step": 2145,
|
|
"valid_targets_mean": 3310.2,
|
|
"valid_targets_min": 545
|
|
},
|
|
{
|
|
"epoch": 1.7496947496947497,
|
|
"grad_norm": 0.4563091012984944,
|
|
"learning_rate": 3.73299686427054e-05,
|
|
"loss": 0.1009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05684323236346245,
|
|
"step": 2150,
|
|
"valid_targets_mean": 2458.4,
|
|
"valid_targets_min": 667
|
|
},
|
|
{
|
|
"epoch": 1.7537647537647536,
|
|
"grad_norm": 0.329941297300451,
|
|
"learning_rate": 3.730967698606383e-05,
|
|
"loss": 0.0983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.046611517667770386,
|
|
"step": 2155,
|
|
"valid_targets_mean": 3501.5,
|
|
"valid_targets_min": 2353
|
|
},
|
|
{
|
|
"epoch": 1.757834757834758,
|
|
"grad_norm": 0.39991464682747385,
|
|
"learning_rate": 3.728931407329174e-05,
|
|
"loss": 0.1032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07516320049762726,
|
|
"step": 2160,
|
|
"valid_targets_mean": 3220.2,
|
|
"valid_targets_min": 792
|
|
},
|
|
{
|
|
"epoch": 1.7619047619047619,
|
|
"grad_norm": 0.33577937245255396,
|
|
"learning_rate": 3.7268879988214075e-05,
|
|
"loss": 0.1624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04279305413365364,
|
|
"step": 2165,
|
|
"valid_targets_mean": 3818.5,
|
|
"valid_targets_min": 3276
|
|
},
|
|
{
|
|
"epoch": 1.765974765974766,
|
|
"grad_norm": 0.4325931023841163,
|
|
"learning_rate": 3.724837481494874e-05,
|
|
"loss": 0.1356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06765176355838776,
|
|
"step": 2170,
|
|
"valid_targets_mean": 2390.9,
|
|
"valid_targets_min": 647
|
|
},
|
|
{
|
|
"epoch": 1.7700447700447701,
|
|
"grad_norm": 0.5070623066629864,
|
|
"learning_rate": 3.722779863790626e-05,
|
|
"loss": 0.1246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06652006506919861,
|
|
"step": 2175,
|
|
"valid_targets_mean": 1500.6,
|
|
"valid_targets_min": 673
|
|
},
|
|
{
|
|
"epoch": 1.774114774114774,
|
|
"grad_norm": 0.6537069454458774,
|
|
"learning_rate": 3.7207151541789505e-05,
|
|
"loss": 0.1476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.113385409116745,
|
|
"step": 2180,
|
|
"valid_targets_mean": 1711.2,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 1.7781847781847782,
|
|
"grad_norm": 0.44224511896857577,
|
|
"learning_rate": 3.7186433611593225e-05,
|
|
"loss": 0.1587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07473345100879669,
|
|
"step": 2185,
|
|
"valid_targets_mean": 3036.1,
|
|
"valid_targets_min": 945
|
|
},
|
|
{
|
|
"epoch": 1.7822547822547823,
|
|
"grad_norm": 0.48920469501680053,
|
|
"learning_rate": 3.716564493260381e-05,
|
|
"loss": 0.119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08563308417797089,
|
|
"step": 2190,
|
|
"valid_targets_mean": 3317.2,
|
|
"valid_targets_min": 2265
|
|
},
|
|
{
|
|
"epoch": 1.7863247863247862,
|
|
"grad_norm": 0.32615432603009253,
|
|
"learning_rate": 3.714478559039887e-05,
|
|
"loss": 0.1445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.058499690145254135,
|
|
"step": 2195,
|
|
"valid_targets_mean": 3875.2,
|
|
"valid_targets_min": 3051
|
|
},
|
|
{
|
|
"epoch": 1.7903947903947905,
|
|
"grad_norm": 0.5285568387969577,
|
|
"learning_rate": 3.712385567084689e-05,
|
|
"loss": 0.1235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08171027898788452,
|
|
"step": 2200,
|
|
"valid_targets_mean": 2069.8,
|
|
"valid_targets_min": 523
|
|
},
|
|
{
|
|
"epoch": 1.7944647944647945,
|
|
"grad_norm": 0.3459525440694431,
|
|
"learning_rate": 3.710285526010693e-05,
|
|
"loss": 0.1107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04602465033531189,
|
|
"step": 2205,
|
|
"valid_targets_mean": 1640.8,
|
|
"valid_targets_min": 468
|
|
},
|
|
{
|
|
"epoch": 1.7985347985347986,
|
|
"grad_norm": 0.25004285177963237,
|
|
"learning_rate": 3.7081784444628185e-05,
|
|
"loss": 0.1158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03536680340766907,
|
|
"step": 2210,
|
|
"valid_targets_mean": 4435.1,
|
|
"valid_targets_min": 885
|
|
},
|
|
{
|
|
"epoch": 1.8026048026048027,
|
|
"grad_norm": 0.38886600769121415,
|
|
"learning_rate": 3.7060643311149706e-05,
|
|
"loss": 0.1023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.041570305824279785,
|
|
"step": 2215,
|
|
"valid_targets_mean": 1889.2,
|
|
"valid_targets_min": 546
|
|
},
|
|
{
|
|
"epoch": 1.8066748066748066,
|
|
"grad_norm": 0.32987051157703745,
|
|
"learning_rate": 3.703943194670001e-05,
|
|
"loss": 0.1019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.059109874069690704,
|
|
"step": 2220,
|
|
"valid_targets_mean": 4257.1,
|
|
"valid_targets_min": 2414
|
|
},
|
|
{
|
|
"epoch": 1.8107448107448108,
|
|
"grad_norm": 0.551577193902464,
|
|
"learning_rate": 3.7018150438596696e-05,
|
|
"loss": 0.1197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.061953105032444,
|
|
"step": 2225,
|
|
"valid_targets_mean": 1502.4,
|
|
"valid_targets_min": 697
|
|
},
|
|
{
|
|
"epoch": 1.8148148148148149,
|
|
"grad_norm": 0.43626117102890194,
|
|
"learning_rate": 3.6996798874446144e-05,
|
|
"loss": 0.1251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05732385441660881,
|
|
"step": 2230,
|
|
"valid_targets_mean": 3466.0,
|
|
"valid_targets_min": 2777
|
|
},
|
|
{
|
|
"epoch": 1.8188848188848188,
|
|
"grad_norm": 0.4137126451869474,
|
|
"learning_rate": 3.6975377342143105e-05,
|
|
"loss": 0.1235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06378593295812607,
|
|
"step": 2235,
|
|
"valid_targets_mean": 3982.1,
|
|
"valid_targets_min": 3061
|
|
},
|
|
{
|
|
"epoch": 1.822954822954823,
|
|
"grad_norm": 0.33696208651772863,
|
|
"learning_rate": 3.695388592987036e-05,
|
|
"loss": 0.1067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05193319916725159,
|
|
"step": 2240,
|
|
"valid_targets_mean": 2788.0,
|
|
"valid_targets_min": 868
|
|
},
|
|
{
|
|
"epoch": 1.827024827024827,
|
|
"grad_norm": 0.37878300759015127,
|
|
"learning_rate": 3.693232472609837e-05,
|
|
"loss": 0.1137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04331493750214577,
|
|
"step": 2245,
|
|
"valid_targets_mean": 2120.6,
|
|
"valid_targets_min": 752
|
|
},
|
|
{
|
|
"epoch": 1.831094831094831,
|
|
"grad_norm": 0.35149923790855736,
|
|
"learning_rate": 3.6910693819584865e-05,
|
|
"loss": 0.1236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05358132719993591,
|
|
"step": 2250,
|
|
"valid_targets_mean": 3477.1,
|
|
"valid_targets_min": 1141
|
|
},
|
|
{
|
|
"epoch": 1.8351648351648353,
|
|
"grad_norm": 0.31501331438328894,
|
|
"learning_rate": 3.688899329937454e-05,
|
|
"loss": 0.0938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05189646780490875,
|
|
"step": 2255,
|
|
"valid_targets_mean": 3888.4,
|
|
"valid_targets_min": 3540
|
|
},
|
|
{
|
|
"epoch": 1.8392348392348392,
|
|
"grad_norm": 0.3667292332003592,
|
|
"learning_rate": 3.6867223254798645e-05,
|
|
"loss": 0.1238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04574829339981079,
|
|
"step": 2260,
|
|
"valid_targets_mean": 3086.8,
|
|
"valid_targets_min": 773
|
|
},
|
|
{
|
|
"epoch": 1.8433048433048433,
|
|
"grad_norm": 0.3290532742643368,
|
|
"learning_rate": 3.6845383775474626e-05,
|
|
"loss": 0.1172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03846554830670357,
|
|
"step": 2265,
|
|
"valid_targets_mean": 3558.6,
|
|
"valid_targets_min": 2587
|
|
},
|
|
{
|
|
"epoch": 1.8473748473748475,
|
|
"grad_norm": 0.3938162713057422,
|
|
"learning_rate": 3.6823474951305766e-05,
|
|
"loss": 0.1351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05593733489513397,
|
|
"step": 2270,
|
|
"valid_targets_mean": 2688.9,
|
|
"valid_targets_min": 773
|
|
},
|
|
{
|
|
"epoch": 1.8514448514448514,
|
|
"grad_norm": 0.4221991891103622,
|
|
"learning_rate": 3.6801496872480825e-05,
|
|
"loss": 0.117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06380049139261246,
|
|
"step": 2275,
|
|
"valid_targets_mean": 3734.4,
|
|
"valid_targets_min": 3068
|
|
},
|
|
{
|
|
"epoch": 1.8555148555148555,
|
|
"grad_norm": 0.608114991462476,
|
|
"learning_rate": 3.6779449629473615e-05,
|
|
"loss": 0.1291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0737733393907547,
|
|
"step": 2280,
|
|
"valid_targets_mean": 1816.1,
|
|
"valid_targets_min": 893
|
|
},
|
|
{
|
|
"epoch": 1.8595848595848596,
|
|
"grad_norm": 0.5129055915940605,
|
|
"learning_rate": 3.675733331304271e-05,
|
|
"loss": 0.1212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05952981114387512,
|
|
"step": 2285,
|
|
"valid_targets_mean": 1444.4,
|
|
"valid_targets_min": 708
|
|
},
|
|
{
|
|
"epoch": 1.8636548636548635,
|
|
"grad_norm": 0.4126348530013301,
|
|
"learning_rate": 3.6735148014230985e-05,
|
|
"loss": 0.1227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04681888595223427,
|
|
"step": 2290,
|
|
"valid_targets_mean": 1768.8,
|
|
"valid_targets_min": 617
|
|
},
|
|
{
|
|
"epoch": 1.8677248677248677,
|
|
"grad_norm": 0.4275179875164524,
|
|
"learning_rate": 3.671289382436532e-05,
|
|
"loss": 0.1124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05094176530838013,
|
|
"step": 2295,
|
|
"valid_targets_mean": 2634.9,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 1.8717948717948718,
|
|
"grad_norm": 0.5049917920987161,
|
|
"learning_rate": 3.669057083505617e-05,
|
|
"loss": 0.1074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06733274459838867,
|
|
"step": 2300,
|
|
"valid_targets_mean": 1836.2,
|
|
"valid_targets_min": 785
|
|
},
|
|
{
|
|
"epoch": 1.8758648758648757,
|
|
"grad_norm": 0.5528905832066613,
|
|
"learning_rate": 3.6668179138197205e-05,
|
|
"loss": 0.1311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10168108344078064,
|
|
"step": 2305,
|
|
"valid_targets_mean": 2148.2,
|
|
"valid_targets_min": 765
|
|
},
|
|
{
|
|
"epoch": 1.87993487993488,
|
|
"grad_norm": 0.299832096960279,
|
|
"learning_rate": 3.664571882596495e-05,
|
|
"loss": 0.1033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.033232755959033966,
|
|
"step": 2310,
|
|
"valid_targets_mean": 3639.2,
|
|
"valid_targets_min": 2863
|
|
},
|
|
{
|
|
"epoch": 1.884004884004884,
|
|
"grad_norm": 0.36729005263055425,
|
|
"learning_rate": 3.662318999081837e-05,
|
|
"loss": 0.1037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03816433623433113,
|
|
"step": 2315,
|
|
"valid_targets_mean": 3033.4,
|
|
"valid_targets_min": 555
|
|
},
|
|
{
|
|
"epoch": 1.888074888074888,
|
|
"grad_norm": 0.43795223384191606,
|
|
"learning_rate": 3.660059272549852e-05,
|
|
"loss": 0.1148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06973383575677872,
|
|
"step": 2320,
|
|
"valid_targets_mean": 2346.6,
|
|
"valid_targets_min": 721
|
|
},
|
|
{
|
|
"epoch": 1.8921448921448922,
|
|
"grad_norm": 0.33790000025819755,
|
|
"learning_rate": 3.657792712302814e-05,
|
|
"loss": 0.0941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.051636915653944016,
|
|
"step": 2325,
|
|
"valid_targets_mean": 4902.1,
|
|
"valid_targets_min": 3640
|
|
},
|
|
{
|
|
"epoch": 1.8962148962148961,
|
|
"grad_norm": 0.3604771168113708,
|
|
"learning_rate": 3.655519327671129e-05,
|
|
"loss": 0.0961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05189387500286102,
|
|
"step": 2330,
|
|
"valid_targets_mean": 4536.6,
|
|
"valid_targets_min": 2020
|
|
},
|
|
{
|
|
"epoch": 1.9002849002849003,
|
|
"grad_norm": 0.33562019525922326,
|
|
"learning_rate": 3.6532391280132964e-05,
|
|
"loss": 0.1146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03787705674767494,
|
|
"step": 2335,
|
|
"valid_targets_mean": 3218.6,
|
|
"valid_targets_min": 739
|
|
},
|
|
{
|
|
"epoch": 1.9043549043549044,
|
|
"grad_norm": 0.3743647264168468,
|
|
"learning_rate": 3.650952122715869e-05,
|
|
"loss": 0.1353,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.060260578989982605,
|
|
"step": 2340,
|
|
"valid_targets_mean": 3774.6,
|
|
"valid_targets_min": 2945
|
|
},
|
|
{
|
|
"epoch": 1.9084249084249083,
|
|
"grad_norm": 0.37262671797339586,
|
|
"learning_rate": 3.648658321193415e-05,
|
|
"loss": 0.1104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05858001112937927,
|
|
"step": 2345,
|
|
"valid_targets_mean": 3045.4,
|
|
"valid_targets_min": 704
|
|
},
|
|
{
|
|
"epoch": 1.9124949124949127,
|
|
"grad_norm": 0.35884300375055905,
|
|
"learning_rate": 3.646357732888482e-05,
|
|
"loss": 0.1125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04959436506032944,
|
|
"step": 2350,
|
|
"valid_targets_mean": 2492.8,
|
|
"valid_targets_min": 774
|
|
},
|
|
{
|
|
"epoch": 1.9165649165649166,
|
|
"grad_norm": 0.3589263199724622,
|
|
"learning_rate": 3.644050367271553e-05,
|
|
"loss": 0.1383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04746074602007866,
|
|
"step": 2355,
|
|
"valid_targets_mean": 2564.6,
|
|
"valid_targets_min": 485
|
|
},
|
|
{
|
|
"epoch": 1.9206349206349205,
|
|
"grad_norm": 0.406490304131688,
|
|
"learning_rate": 3.641736233841012e-05,
|
|
"loss": 0.106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.046355560421943665,
|
|
"step": 2360,
|
|
"valid_targets_mean": 3002.5,
|
|
"valid_targets_min": 1048
|
|
},
|
|
{
|
|
"epoch": 1.9247049247049248,
|
|
"grad_norm": 0.5534145172199101,
|
|
"learning_rate": 3.639415342123101e-05,
|
|
"loss": 0.1178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.050700727850198746,
|
|
"step": 2365,
|
|
"valid_targets_mean": 2588.5,
|
|
"valid_targets_min": 722
|
|
},
|
|
{
|
|
"epoch": 1.9287749287749287,
|
|
"grad_norm": 0.4908901714271384,
|
|
"learning_rate": 3.637087701671885e-05,
|
|
"loss": 0.1188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06826741248369217,
|
|
"step": 2370,
|
|
"valid_targets_mean": 1443.8,
|
|
"valid_targets_min": 865
|
|
},
|
|
{
|
|
"epoch": 1.9328449328449329,
|
|
"grad_norm": 0.4557072327685435,
|
|
"learning_rate": 3.63475332206921e-05,
|
|
"loss": 0.1604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.061702802777290344,
|
|
"step": 2375,
|
|
"valid_targets_mean": 2283.1,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 1.936914936914937,
|
|
"grad_norm": 0.41528320209522723,
|
|
"learning_rate": 3.6324122129246616e-05,
|
|
"loss": 0.1383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.052136339247226715,
|
|
"step": 2380,
|
|
"valid_targets_mean": 3202.0,
|
|
"valid_targets_min": 782
|
|
},
|
|
{
|
|
"epoch": 1.940984940984941,
|
|
"grad_norm": 0.4546869915497293,
|
|
"learning_rate": 3.630064383875533e-05,
|
|
"loss": 0.1089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05638446286320686,
|
|
"step": 2385,
|
|
"valid_targets_mean": 2586.9,
|
|
"valid_targets_min": 795
|
|
},
|
|
{
|
|
"epoch": 1.945054945054945,
|
|
"grad_norm": 0.4016367333178925,
|
|
"learning_rate": 3.627709844586774e-05,
|
|
"loss": 0.1106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05390872061252594,
|
|
"step": 2390,
|
|
"valid_targets_mean": 3345.4,
|
|
"valid_targets_min": 2017
|
|
},
|
|
{
|
|
"epoch": 1.9491249491249492,
|
|
"grad_norm": 0.37713484435403527,
|
|
"learning_rate": 3.6253486047509634e-05,
|
|
"loss": 0.1097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.048263922333717346,
|
|
"step": 2395,
|
|
"valid_targets_mean": 1937.5,
|
|
"valid_targets_min": 798
|
|
},
|
|
{
|
|
"epoch": 1.953194953194953,
|
|
"grad_norm": 0.3376638144663447,
|
|
"learning_rate": 3.622980674088258e-05,
|
|
"loss": 0.0975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05167309567332268,
|
|
"step": 2400,
|
|
"valid_targets_mean": 3302.2,
|
|
"valid_targets_min": 860
|
|
},
|
|
{
|
|
"epoch": 1.9572649572649574,
|
|
"grad_norm": 0.3884117732632468,
|
|
"learning_rate": 3.620606062346361e-05,
|
|
"loss": 0.1393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05166970193386078,
|
|
"step": 2405,
|
|
"valid_targets_mean": 3287.2,
|
|
"valid_targets_min": 878
|
|
},
|
|
{
|
|
"epoch": 1.9613349613349613,
|
|
"grad_norm": 0.31443143791227623,
|
|
"learning_rate": 3.618224779300478e-05,
|
|
"loss": 0.0968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.040656059980392456,
|
|
"step": 2410,
|
|
"valid_targets_mean": 3800.8,
|
|
"valid_targets_min": 2602
|
|
},
|
|
{
|
|
"epoch": 1.9654049654049655,
|
|
"grad_norm": 0.5020381670968926,
|
|
"learning_rate": 3.6158368347532755e-05,
|
|
"loss": 0.1142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06920788437128067,
|
|
"step": 2415,
|
|
"valid_targets_mean": 2853.9,
|
|
"valid_targets_min": 945
|
|
},
|
|
{
|
|
"epoch": 1.9694749694749696,
|
|
"grad_norm": 0.4185287229781912,
|
|
"learning_rate": 3.613442238534845e-05,
|
|
"loss": 0.1109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06565561890602112,
|
|
"step": 2420,
|
|
"valid_targets_mean": 3633.9,
|
|
"valid_targets_min": 2059
|
|
},
|
|
{
|
|
"epoch": 1.9735449735449735,
|
|
"grad_norm": 0.3315436014809084,
|
|
"learning_rate": 3.611041000502659e-05,
|
|
"loss": 0.1101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.057864412665367126,
|
|
"step": 2425,
|
|
"valid_targets_mean": 4244.8,
|
|
"valid_targets_min": 2529
|
|
},
|
|
{
|
|
"epoch": 1.9776149776149776,
|
|
"grad_norm": 0.3194580707254878,
|
|
"learning_rate": 3.60863313054153e-05,
|
|
"loss": 0.0961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06427641212940216,
|
|
"step": 2430,
|
|
"valid_targets_mean": 4650.8,
|
|
"valid_targets_min": 892
|
|
},
|
|
{
|
|
"epoch": 1.9816849816849818,
|
|
"grad_norm": 0.25904177449049903,
|
|
"learning_rate": 3.6062186385635734e-05,
|
|
"loss": 0.1062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03253565728664398,
|
|
"step": 2435,
|
|
"valid_targets_mean": 4432.1,
|
|
"valid_targets_min": 1657
|
|
},
|
|
{
|
|
"epoch": 1.9857549857549857,
|
|
"grad_norm": 0.5544926636783111,
|
|
"learning_rate": 3.603797534508162e-05,
|
|
"loss": 0.1641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13297711312770844,
|
|
"step": 2440,
|
|
"valid_targets_mean": 2023.0,
|
|
"valid_targets_min": 709
|
|
},
|
|
{
|
|
"epoch": 1.9898249898249898,
|
|
"grad_norm": 0.3150367876998634,
|
|
"learning_rate": 3.6013698283418896e-05,
|
|
"loss": 0.1479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04680216312408447,
|
|
"step": 2445,
|
|
"valid_targets_mean": 3241.9,
|
|
"valid_targets_min": 527
|
|
},
|
|
{
|
|
"epoch": 1.993894993894994,
|
|
"grad_norm": 0.37009261570296975,
|
|
"learning_rate": 3.598935530058528e-05,
|
|
"loss": 0.1168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07674941420555115,
|
|
"step": 2450,
|
|
"valid_targets_mean": 3313.4,
|
|
"valid_targets_min": 2308
|
|
},
|
|
{
|
|
"epoch": 1.9979649979649978,
|
|
"grad_norm": 0.31909775388216893,
|
|
"learning_rate": 3.5964946496789836e-05,
|
|
"loss": 0.0989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05330783128738403,
|
|
"step": 2455,
|
|
"valid_targets_mean": 3463.5,
|
|
"valid_targets_min": 2883
|
|
},
|
|
{
|
|
"epoch": 2.0016280016280015,
|
|
"grad_norm": 0.4880867496018015,
|
|
"learning_rate": 3.5940471972512604e-05,
|
|
"loss": 0.1897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10746130347251892,
|
|
"step": 2460,
|
|
"valid_targets_mean": 8514.0,
|
|
"valid_targets_min": 6788
|
|
},
|
|
{
|
|
"epoch": 2.005698005698006,
|
|
"grad_norm": 0.4659157360495663,
|
|
"learning_rate": 3.591593182850415e-05,
|
|
"loss": 0.2389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11680585891008377,
|
|
"step": 2465,
|
|
"valid_targets_mean": 6668.0,
|
|
"valid_targets_min": 5933
|
|
},
|
|
{
|
|
"epoch": 2.0097680097680097,
|
|
"grad_norm": 0.43232390156358325,
|
|
"learning_rate": 3.5891326165785196e-05,
|
|
"loss": 0.2266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10285025835037231,
|
|
"step": 2470,
|
|
"valid_targets_mean": 4107.0,
|
|
"valid_targets_min": 161
|
|
},
|
|
{
|
|
"epoch": 2.0138380138380136,
|
|
"grad_norm": 0.3634051714444297,
|
|
"learning_rate": 3.586665508564613e-05,
|
|
"loss": 0.2222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10061359405517578,
|
|
"step": 2475,
|
|
"valid_targets_mean": 7946.0,
|
|
"valid_targets_min": 5596
|
|
},
|
|
{
|
|
"epoch": 2.017908017908018,
|
|
"grad_norm": 0.37783795713600243,
|
|
"learning_rate": 3.5841918689646666e-05,
|
|
"loss": 0.2125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1137465387582779,
|
|
"step": 2480,
|
|
"valid_targets_mean": 7392.8,
|
|
"valid_targets_min": 5629
|
|
},
|
|
{
|
|
"epoch": 2.021978021978022,
|
|
"grad_norm": 0.398790604122284,
|
|
"learning_rate": 3.581711707961539e-05,
|
|
"loss": 0.215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1090719997882843,
|
|
"step": 2485,
|
|
"valid_targets_mean": 6879.6,
|
|
"valid_targets_min": 5113
|
|
},
|
|
{
|
|
"epoch": 2.0260480260480263,
|
|
"grad_norm": 0.5235045531846907,
|
|
"learning_rate": 3.579225035764934e-05,
|
|
"loss": 0.2323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11241519451141357,
|
|
"step": 2490,
|
|
"valid_targets_mean": 6919.5,
|
|
"valid_targets_min": 5080
|
|
},
|
|
{
|
|
"epoch": 2.03011803011803,
|
|
"grad_norm": 0.4208616593976156,
|
|
"learning_rate": 3.576731862611359e-05,
|
|
"loss": 0.2256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09949975460767746,
|
|
"step": 2495,
|
|
"valid_targets_mean": 6377.0,
|
|
"valid_targets_min": 3939
|
|
},
|
|
{
|
|
"epoch": 2.034188034188034,
|
|
"grad_norm": 0.3829013185412194,
|
|
"learning_rate": 3.5742321987640826e-05,
|
|
"loss": 0.2128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10645119845867157,
|
|
"step": 2500,
|
|
"valid_targets_mean": 6392.2,
|
|
"valid_targets_min": 4355
|
|
},
|
|
{
|
|
"epoch": 2.0382580382580384,
|
|
"grad_norm": 0.44985938212529025,
|
|
"learning_rate": 3.571726054513093e-05,
|
|
"loss": 0.2059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12094033509492874,
|
|
"step": 2505,
|
|
"valid_targets_mean": 5196.5,
|
|
"valid_targets_min": 137
|
|
},
|
|
{
|
|
"epoch": 2.0423280423280423,
|
|
"grad_norm": 0.34320734656442425,
|
|
"learning_rate": 3.569213440175057e-05,
|
|
"loss": 0.2107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09449905157089233,
|
|
"step": 2510,
|
|
"valid_targets_mean": 7344.4,
|
|
"valid_targets_min": 5244
|
|
},
|
|
{
|
|
"epoch": 2.0463980463980462,
|
|
"grad_norm": 0.32668719505554467,
|
|
"learning_rate": 3.566694366093272e-05,
|
|
"loss": 0.2047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08982042968273163,
|
|
"step": 2515,
|
|
"valid_targets_mean": 7921.0,
|
|
"valid_targets_min": 5311
|
|
},
|
|
{
|
|
"epoch": 2.0504680504680506,
|
|
"grad_norm": 0.3852739489735939,
|
|
"learning_rate": 3.564168842637631e-05,
|
|
"loss": 0.2145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10812553018331528,
|
|
"step": 2520,
|
|
"valid_targets_mean": 7931.2,
|
|
"valid_targets_min": 6015
|
|
},
|
|
{
|
|
"epoch": 2.0545380545380545,
|
|
"grad_norm": 0.3845046248072148,
|
|
"learning_rate": 3.561636880204573e-05,
|
|
"loss": 0.2036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09657958149909973,
|
|
"step": 2525,
|
|
"valid_targets_mean": 7640.8,
|
|
"valid_targets_min": 5248
|
|
},
|
|
{
|
|
"epoch": 2.0586080586080584,
|
|
"grad_norm": 0.37913655017775,
|
|
"learning_rate": 3.559098489217048e-05,
|
|
"loss": 0.2041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10077941417694092,
|
|
"step": 2530,
|
|
"valid_targets_mean": 7249.6,
|
|
"valid_targets_min": 5217
|
|
},
|
|
{
|
|
"epoch": 2.0626780626780628,
|
|
"grad_norm": 0.3932696577304994,
|
|
"learning_rate": 3.556553680124463e-05,
|
|
"loss": 0.2097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11539335548877716,
|
|
"step": 2535,
|
|
"valid_targets_mean": 7619.9,
|
|
"valid_targets_min": 5094
|
|
},
|
|
{
|
|
"epoch": 2.0667480667480667,
|
|
"grad_norm": 0.4553738196822326,
|
|
"learning_rate": 3.554002463402651e-05,
|
|
"loss": 0.2039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09798746556043625,
|
|
"step": 2540,
|
|
"valid_targets_mean": 7732.8,
|
|
"valid_targets_min": 5842
|
|
},
|
|
{
|
|
"epoch": 2.070818070818071,
|
|
"grad_norm": 0.3421874470170016,
|
|
"learning_rate": 3.55144484955382e-05,
|
|
"loss": 0.2065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10603936016559601,
|
|
"step": 2545,
|
|
"valid_targets_mean": 7952.9,
|
|
"valid_targets_min": 5494
|
|
},
|
|
{
|
|
"epoch": 2.074888074888075,
|
|
"grad_norm": 0.411703732721423,
|
|
"learning_rate": 3.5488808491065115e-05,
|
|
"loss": 0.2239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11069735139608383,
|
|
"step": 2550,
|
|
"valid_targets_mean": 8001.0,
|
|
"valid_targets_min": 5454
|
|
},
|
|
{
|
|
"epoch": 2.078958078958079,
|
|
"grad_norm": 0.4858270959003259,
|
|
"learning_rate": 3.546310472615559e-05,
|
|
"loss": 0.2287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07974497973918915,
|
|
"step": 2555,
|
|
"valid_targets_mean": 2985.1,
|
|
"valid_targets_min": 1896
|
|
},
|
|
{
|
|
"epoch": 2.083028083028083,
|
|
"grad_norm": 0.36276165908540825,
|
|
"learning_rate": 3.5437337306620426e-05,
|
|
"loss": 0.1861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0890931487083435,
|
|
"step": 2560,
|
|
"valid_targets_mean": 6714.6,
|
|
"valid_targets_min": 4917
|
|
},
|
|
{
|
|
"epoch": 2.087098087098087,
|
|
"grad_norm": 0.38710060664879364,
|
|
"learning_rate": 3.5411506338532467e-05,
|
|
"loss": 0.2025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11066673696041107,
|
|
"step": 2565,
|
|
"valid_targets_mean": 7595.2,
|
|
"valid_targets_min": 5488
|
|
},
|
|
{
|
|
"epoch": 2.091168091168091,
|
|
"grad_norm": 0.42287295340468783,
|
|
"learning_rate": 3.538561192822616e-05,
|
|
"loss": 0.205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11034491658210754,
|
|
"step": 2570,
|
|
"valid_targets_mean": 7046.1,
|
|
"valid_targets_min": 5579
|
|
},
|
|
{
|
|
"epoch": 2.0952380952380953,
|
|
"grad_norm": 0.39997010448045195,
|
|
"learning_rate": 3.535965418229709e-05,
|
|
"loss": 0.2113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10264366120100021,
|
|
"step": 2575,
|
|
"valid_targets_mean": 7108.0,
|
|
"valid_targets_min": 4963
|
|
},
|
|
{
|
|
"epoch": 2.0993080993080993,
|
|
"grad_norm": 0.389640401517337,
|
|
"learning_rate": 3.53336332076016e-05,
|
|
"loss": 0.2072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10438866168260574,
|
|
"step": 2580,
|
|
"valid_targets_mean": 6810.1,
|
|
"valid_targets_min": 5666
|
|
},
|
|
{
|
|
"epoch": 2.1033781033781036,
|
|
"grad_norm": 0.36942338263571345,
|
|
"learning_rate": 3.530754911125631e-05,
|
|
"loss": 0.1992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09709086269140244,
|
|
"step": 2585,
|
|
"valid_targets_mean": 8068.5,
|
|
"valid_targets_min": 5524
|
|
},
|
|
{
|
|
"epoch": 2.1074481074481075,
|
|
"grad_norm": 0.4068615598584502,
|
|
"learning_rate": 3.528140200063766e-05,
|
|
"loss": 0.1805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08543336391448975,
|
|
"step": 2590,
|
|
"valid_targets_mean": 6553.8,
|
|
"valid_targets_min": 3621
|
|
},
|
|
{
|
|
"epoch": 2.1115181115181114,
|
|
"grad_norm": 0.363276338830108,
|
|
"learning_rate": 3.525519198338152e-05,
|
|
"loss": 0.1848,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09692707657814026,
|
|
"step": 2595,
|
|
"valid_targets_mean": 6762.5,
|
|
"valid_targets_min": 4968
|
|
},
|
|
{
|
|
"epoch": 2.1155881155881158,
|
|
"grad_norm": 0.40355801888377707,
|
|
"learning_rate": 3.522891916738269e-05,
|
|
"loss": 0.1979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09964744746685028,
|
|
"step": 2600,
|
|
"valid_targets_mean": 6143.9,
|
|
"valid_targets_min": 4917
|
|
},
|
|
{
|
|
"epoch": 2.1196581196581197,
|
|
"grad_norm": 0.5117847968390805,
|
|
"learning_rate": 3.520258366079451e-05,
|
|
"loss": 0.2029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14205092191696167,
|
|
"step": 2605,
|
|
"valid_targets_mean": 4583.8,
|
|
"valid_targets_min": 215
|
|
},
|
|
{
|
|
"epoch": 2.1237281237281236,
|
|
"grad_norm": 0.3623495414988248,
|
|
"learning_rate": 3.5176185572028396e-05,
|
|
"loss": 0.2115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10662650316953659,
|
|
"step": 2610,
|
|
"valid_targets_mean": 7317.1,
|
|
"valid_targets_min": 4948
|
|
},
|
|
{
|
|
"epoch": 2.127798127798128,
|
|
"grad_norm": 0.4038325996304427,
|
|
"learning_rate": 3.514972500975334e-05,
|
|
"loss": 0.2245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10691466927528381,
|
|
"step": 2615,
|
|
"valid_targets_mean": 6437.6,
|
|
"valid_targets_min": 4510
|
|
},
|
|
{
|
|
"epoch": 2.131868131868132,
|
|
"grad_norm": 0.41453320525991616,
|
|
"learning_rate": 3.512320208289556e-05,
|
|
"loss": 0.2166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11678023636341095,
|
|
"step": 2620,
|
|
"valid_targets_mean": 7707.9,
|
|
"valid_targets_min": 5796
|
|
},
|
|
{
|
|
"epoch": 2.1359381359381358,
|
|
"grad_norm": 0.3827950421697951,
|
|
"learning_rate": 3.509661690063796e-05,
|
|
"loss": 0.2045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10019416362047195,
|
|
"step": 2625,
|
|
"valid_targets_mean": 7900.9,
|
|
"valid_targets_min": 5643
|
|
},
|
|
{
|
|
"epoch": 2.14000814000814,
|
|
"grad_norm": 0.4162389900061376,
|
|
"learning_rate": 3.506996957241975e-05,
|
|
"loss": 0.2082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11353873461484909,
|
|
"step": 2630,
|
|
"valid_targets_mean": 7627.2,
|
|
"valid_targets_min": 5742
|
|
},
|
|
{
|
|
"epoch": 2.144078144078144,
|
|
"grad_norm": 0.37061577197439505,
|
|
"learning_rate": 3.5043260207935964e-05,
|
|
"loss": 0.2116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12082710862159729,
|
|
"step": 2635,
|
|
"valid_targets_mean": 8725.6,
|
|
"valid_targets_min": 5141
|
|
},
|
|
{
|
|
"epoch": 2.148148148148148,
|
|
"grad_norm": 0.3989344474846749,
|
|
"learning_rate": 3.5016488917137005e-05,
|
|
"loss": 0.2076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11526183038949966,
|
|
"step": 2640,
|
|
"valid_targets_mean": 6967.8,
|
|
"valid_targets_min": 6088
|
|
},
|
|
{
|
|
"epoch": 2.1522181522181523,
|
|
"grad_norm": 0.3765813295326696,
|
|
"learning_rate": 3.4989655810228185e-05,
|
|
"loss": 0.2105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09672106802463531,
|
|
"step": 2645,
|
|
"valid_targets_mean": 7140.1,
|
|
"valid_targets_min": 4834
|
|
},
|
|
{
|
|
"epoch": 2.156288156288156,
|
|
"grad_norm": 0.4244375123230793,
|
|
"learning_rate": 3.496276099766932e-05,
|
|
"loss": 0.2086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09745915234088898,
|
|
"step": 2650,
|
|
"valid_targets_mean": 6706.9,
|
|
"valid_targets_min": 5108
|
|
},
|
|
{
|
|
"epoch": 2.1603581603581605,
|
|
"grad_norm": 0.41712309629614364,
|
|
"learning_rate": 3.493580459017419e-05,
|
|
"loss": 0.2119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1050487756729126,
|
|
"step": 2655,
|
|
"valid_targets_mean": 5931.4,
|
|
"valid_targets_min": 4708
|
|
},
|
|
{
|
|
"epoch": 2.1644281644281644,
|
|
"grad_norm": 0.36084098571878304,
|
|
"learning_rate": 3.4908786698710196e-05,
|
|
"loss": 0.2033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09638667106628418,
|
|
"step": 2660,
|
|
"valid_targets_mean": 7145.6,
|
|
"valid_targets_min": 5280
|
|
},
|
|
{
|
|
"epoch": 2.1684981684981683,
|
|
"grad_norm": 0.660596763158056,
|
|
"learning_rate": 3.488170743449779e-05,
|
|
"loss": 0.1977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06803116202354431,
|
|
"step": 2665,
|
|
"valid_targets_mean": 1554.9,
|
|
"valid_targets_min": 592
|
|
},
|
|
{
|
|
"epoch": 2.1725681725681727,
|
|
"grad_norm": 0.4482747012803121,
|
|
"learning_rate": 3.4854566909010074e-05,
|
|
"loss": 0.1998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11789162456989288,
|
|
"step": 2670,
|
|
"valid_targets_mean": 7069.5,
|
|
"valid_targets_min": 4765
|
|
},
|
|
{
|
|
"epoch": 2.1766381766381766,
|
|
"grad_norm": 0.3901951315778572,
|
|
"learning_rate": 3.482736523397237e-05,
|
|
"loss": 0.2034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10189478099346161,
|
|
"step": 2675,
|
|
"valid_targets_mean": 6193.4,
|
|
"valid_targets_min": 5587
|
|
},
|
|
{
|
|
"epoch": 2.1807081807081805,
|
|
"grad_norm": 0.42745988843150207,
|
|
"learning_rate": 3.4800102521361686e-05,
|
|
"loss": 0.2057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10757580399513245,
|
|
"step": 2680,
|
|
"valid_targets_mean": 6612.2,
|
|
"valid_targets_min": 4894
|
|
},
|
|
{
|
|
"epoch": 2.184778184778185,
|
|
"grad_norm": 0.36920716366061657,
|
|
"learning_rate": 3.477277888340631e-05,
|
|
"loss": 0.1988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09517497569322586,
|
|
"step": 2685,
|
|
"valid_targets_mean": 6605.9,
|
|
"valid_targets_min": 3795
|
|
},
|
|
{
|
|
"epoch": 2.1888481888481888,
|
|
"grad_norm": 0.3841241265624308,
|
|
"learning_rate": 3.474539443258534e-05,
|
|
"loss": 0.2044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10367844998836517,
|
|
"step": 2690,
|
|
"valid_targets_mean": 7186.8,
|
|
"valid_targets_min": 4890
|
|
},
|
|
{
|
|
"epoch": 2.192918192918193,
|
|
"grad_norm": 0.412689694061922,
|
|
"learning_rate": 3.47179492816282e-05,
|
|
"loss": 0.2084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10491199791431427,
|
|
"step": 2695,
|
|
"valid_targets_mean": 6544.4,
|
|
"valid_targets_min": 4615
|
|
},
|
|
{
|
|
"epoch": 2.196988196988197,
|
|
"grad_norm": 0.4079072132791562,
|
|
"learning_rate": 3.4690443543514195e-05,
|
|
"loss": 0.2094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1100861206650734,
|
|
"step": 2700,
|
|
"valid_targets_mean": 6339.2,
|
|
"valid_targets_min": 4798
|
|
},
|
|
{
|
|
"epoch": 2.201058201058201,
|
|
"grad_norm": 0.49329598417369225,
|
|
"learning_rate": 3.466287733147204e-05,
|
|
"loss": 0.1755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11788356304168701,
|
|
"step": 2705,
|
|
"valid_targets_mean": 4558.8,
|
|
"valid_targets_min": 148
|
|
},
|
|
{
|
|
"epoch": 2.2051282051282053,
|
|
"grad_norm": 0.4145578788267481,
|
|
"learning_rate": 3.463525075897939e-05,
|
|
"loss": 0.205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11438199877738953,
|
|
"step": 2710,
|
|
"valid_targets_mean": 6807.9,
|
|
"valid_targets_min": 5164
|
|
},
|
|
{
|
|
"epoch": 2.209198209198209,
|
|
"grad_norm": 0.4801367983090269,
|
|
"learning_rate": 3.4607563939762376e-05,
|
|
"loss": 0.1984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10205637663602829,
|
|
"step": 2715,
|
|
"valid_targets_mean": 7011.8,
|
|
"valid_targets_min": 4974
|
|
},
|
|
{
|
|
"epoch": 2.213268213268213,
|
|
"grad_norm": 0.3925286843177837,
|
|
"learning_rate": 3.4579816987795153e-05,
|
|
"loss": 0.2054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09270621836185455,
|
|
"step": 2720,
|
|
"valid_targets_mean": 5794.2,
|
|
"valid_targets_min": 4809
|
|
},
|
|
{
|
|
"epoch": 2.2173382173382175,
|
|
"grad_norm": 0.407243764629195,
|
|
"learning_rate": 3.45520100172994e-05,
|
|
"loss": 0.1948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09255869686603546,
|
|
"step": 2725,
|
|
"valid_targets_mean": 6401.8,
|
|
"valid_targets_min": 4842
|
|
},
|
|
{
|
|
"epoch": 2.2214082214082214,
|
|
"grad_norm": 0.42364061860080554,
|
|
"learning_rate": 3.452414314274386e-05,
|
|
"loss": 0.2004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10335355252027512,
|
|
"step": 2730,
|
|
"valid_targets_mean": 6182.1,
|
|
"valid_targets_min": 5116
|
|
},
|
|
{
|
|
"epoch": 2.2254782254782253,
|
|
"grad_norm": 0.3622471152141575,
|
|
"learning_rate": 3.449621647884389e-05,
|
|
"loss": 0.2009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09193432331085205,
|
|
"step": 2735,
|
|
"valid_targets_mean": 6165.1,
|
|
"valid_targets_min": 5764
|
|
},
|
|
{
|
|
"epoch": 2.2295482295482296,
|
|
"grad_norm": 0.7288165001567957,
|
|
"learning_rate": 3.446823014056096e-05,
|
|
"loss": 0.2248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08235251158475876,
|
|
"step": 2740,
|
|
"valid_targets_mean": 1428.2,
|
|
"valid_targets_min": 1062
|
|
},
|
|
{
|
|
"epoch": 2.2336182336182335,
|
|
"grad_norm": 0.7611801465102082,
|
|
"learning_rate": 3.444018424310221e-05,
|
|
"loss": 0.2024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0987827330827713,
|
|
"step": 2745,
|
|
"valid_targets_mean": 1663.0,
|
|
"valid_targets_min": 960
|
|
},
|
|
{
|
|
"epoch": 2.237688237688238,
|
|
"grad_norm": 0.7650679543370236,
|
|
"learning_rate": 3.441207890191993e-05,
|
|
"loss": 0.1961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09653256833553314,
|
|
"step": 2750,
|
|
"valid_targets_mean": 1476.5,
|
|
"valid_targets_min": 933
|
|
},
|
|
{
|
|
"epoch": 2.241758241758242,
|
|
"grad_norm": 0.83957302169369,
|
|
"learning_rate": 3.438391423271115e-05,
|
|
"loss": 0.1927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0952417328953743,
|
|
"step": 2755,
|
|
"valid_targets_mean": 1478.1,
|
|
"valid_targets_min": 881
|
|
},
|
|
{
|
|
"epoch": 2.2458282458282457,
|
|
"grad_norm": 0.8264299057033202,
|
|
"learning_rate": 3.435569035141708e-05,
|
|
"loss": 0.1963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11602550745010376,
|
|
"step": 2760,
|
|
"valid_targets_mean": 1640.6,
|
|
"valid_targets_min": 869
|
|
},
|
|
{
|
|
"epoch": 2.24989824989825,
|
|
"grad_norm": 0.8302853801654866,
|
|
"learning_rate": 3.4327407374222726e-05,
|
|
"loss": 0.1974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10423608124256134,
|
|
"step": 2765,
|
|
"valid_targets_mean": 1709.0,
|
|
"valid_targets_min": 1090
|
|
},
|
|
{
|
|
"epoch": 2.253968253968254,
|
|
"grad_norm": 0.7943422274719701,
|
|
"learning_rate": 3.429906541755633e-05,
|
|
"loss": 0.1857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07585322856903076,
|
|
"step": 2770,
|
|
"valid_targets_mean": 1157.9,
|
|
"valid_targets_min": 761
|
|
},
|
|
{
|
|
"epoch": 2.258038258038258,
|
|
"grad_norm": 0.7565321550178822,
|
|
"learning_rate": 3.427066459808896e-05,
|
|
"loss": 0.2034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09391198307275772,
|
|
"step": 2775,
|
|
"valid_targets_mean": 1553.6,
|
|
"valid_targets_min": 1241
|
|
},
|
|
{
|
|
"epoch": 2.262108262108262,
|
|
"grad_norm": 0.7572888191925212,
|
|
"learning_rate": 3.4242205032733964e-05,
|
|
"loss": 0.1866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.089296355843544,
|
|
"step": 2780,
|
|
"valid_targets_mean": 1538.1,
|
|
"valid_targets_min": 733
|
|
},
|
|
{
|
|
"epoch": 2.266178266178266,
|
|
"grad_norm": 0.7064148997241197,
|
|
"learning_rate": 3.421368683864653e-05,
|
|
"loss": 0.193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05855751782655716,
|
|
"step": 2785,
|
|
"valid_targets_mean": 1250.6,
|
|
"valid_targets_min": 835
|
|
},
|
|
{
|
|
"epoch": 2.2702482702482705,
|
|
"grad_norm": 0.8701439644199427,
|
|
"learning_rate": 3.41851101332232e-05,
|
|
"loss": 0.1852,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1104547530412674,
|
|
"step": 2790,
|
|
"valid_targets_mean": 1596.2,
|
|
"valid_targets_min": 1046
|
|
},
|
|
{
|
|
"epoch": 2.2743182743182744,
|
|
"grad_norm": 0.7800184045885545,
|
|
"learning_rate": 3.4156475034101366e-05,
|
|
"loss": 0.1964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09086226671934128,
|
|
"step": 2795,
|
|
"valid_targets_mean": 1646.5,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 2.2783882783882783,
|
|
"grad_norm": 0.7543167696467623,
|
|
"learning_rate": 3.4127781659158834e-05,
|
|
"loss": 0.186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09387262165546417,
|
|
"step": 2800,
|
|
"valid_targets_mean": 1448.9,
|
|
"valid_targets_min": 570
|
|
},
|
|
{
|
|
"epoch": 2.2824582824582826,
|
|
"grad_norm": 0.7308404855311503,
|
|
"learning_rate": 3.409903012651327e-05,
|
|
"loss": 0.1791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06849798560142517,
|
|
"step": 2805,
|
|
"valid_targets_mean": 1164.1,
|
|
"valid_targets_min": 548
|
|
},
|
|
{
|
|
"epoch": 2.2865282865282865,
|
|
"grad_norm": 0.8596378098769865,
|
|
"learning_rate": 3.407022055452176e-05,
|
|
"loss": 0.1771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09337285161018372,
|
|
"step": 2810,
|
|
"valid_targets_mean": 1338.0,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 2.2905982905982905,
|
|
"grad_norm": 0.7766417971092897,
|
|
"learning_rate": 3.404135306178032e-05,
|
|
"loss": 0.1929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09825626015663147,
|
|
"step": 2815,
|
|
"valid_targets_mean": 1632.8,
|
|
"valid_targets_min": 896
|
|
},
|
|
{
|
|
"epoch": 2.294668294668295,
|
|
"grad_norm": 0.7789615600939394,
|
|
"learning_rate": 3.401242776712339e-05,
|
|
"loss": 0.1962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12328001856803894,
|
|
"step": 2820,
|
|
"valid_targets_mean": 1806.5,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 2.2987382987382987,
|
|
"grad_norm": 0.7920477301948577,
|
|
"learning_rate": 3.3983444789623356e-05,
|
|
"loss": 0.1953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08308230340480804,
|
|
"step": 2825,
|
|
"valid_targets_mean": 1289.9,
|
|
"valid_targets_min": 706
|
|
},
|
|
{
|
|
"epoch": 2.3028083028083026,
|
|
"grad_norm": 0.8975491925387518,
|
|
"learning_rate": 3.395440424859007e-05,
|
|
"loss": 0.1868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06022972613573074,
|
|
"step": 2830,
|
|
"valid_targets_mean": 1354.1,
|
|
"valid_targets_min": 754
|
|
},
|
|
{
|
|
"epoch": 2.306878306878307,
|
|
"grad_norm": 0.7130180704385317,
|
|
"learning_rate": 3.3925306263570316e-05,
|
|
"loss": 0.1846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08599118888378143,
|
|
"step": 2835,
|
|
"valid_targets_mean": 1461.9,
|
|
"valid_targets_min": 819
|
|
},
|
|
{
|
|
"epoch": 2.310948310948311,
|
|
"grad_norm": 0.7039318125811921,
|
|
"learning_rate": 3.389615095434739e-05,
|
|
"loss": 0.1933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07211598753929138,
|
|
"step": 2840,
|
|
"valid_targets_mean": 1225.5,
|
|
"valid_targets_min": 684
|
|
},
|
|
{
|
|
"epoch": 2.315018315018315,
|
|
"grad_norm": 0.9104668810680625,
|
|
"learning_rate": 3.386693844094055e-05,
|
|
"loss": 0.1982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09691354632377625,
|
|
"step": 2845,
|
|
"valid_targets_mean": 1282.2,
|
|
"valid_targets_min": 711
|
|
},
|
|
{
|
|
"epoch": 2.319088319088319,
|
|
"grad_norm": 0.7159321952742587,
|
|
"learning_rate": 3.3837668843604506e-05,
|
|
"loss": 0.1839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09997346997261047,
|
|
"step": 2850,
|
|
"valid_targets_mean": 1634.8,
|
|
"valid_targets_min": 1190
|
|
},
|
|
{
|
|
"epoch": 2.323158323158323,
|
|
"grad_norm": 0.7393893216811349,
|
|
"learning_rate": 3.380834228282901e-05,
|
|
"loss": 0.1797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09477157890796661,
|
|
"step": 2855,
|
|
"valid_targets_mean": 1615.9,
|
|
"valid_targets_min": 774
|
|
},
|
|
{
|
|
"epoch": 2.3272283272283274,
|
|
"grad_norm": 0.6968353858472797,
|
|
"learning_rate": 3.377895887933828e-05,
|
|
"loss": 0.1845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09568726271390915,
|
|
"step": 2860,
|
|
"valid_targets_mean": 1848.4,
|
|
"valid_targets_min": 734
|
|
},
|
|
{
|
|
"epoch": 2.3312983312983313,
|
|
"grad_norm": 0.792449089844968,
|
|
"learning_rate": 3.374951875409052e-05,
|
|
"loss": 0.1835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09505010396242142,
|
|
"step": 2865,
|
|
"valid_targets_mean": 1570.6,
|
|
"valid_targets_min": 1011
|
|
},
|
|
{
|
|
"epoch": 2.335368335368335,
|
|
"grad_norm": 0.8817699677604942,
|
|
"learning_rate": 3.372002202827744e-05,
|
|
"loss": 0.1854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10640811175107956,
|
|
"step": 2870,
|
|
"valid_targets_mean": 1697.0,
|
|
"valid_targets_min": 803
|
|
},
|
|
{
|
|
"epoch": 2.3394383394383396,
|
|
"grad_norm": 0.8000585484054449,
|
|
"learning_rate": 3.369046882332376e-05,
|
|
"loss": 0.186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0811472162604332,
|
|
"step": 2875,
|
|
"valid_targets_mean": 1307.1,
|
|
"valid_targets_min": 782
|
|
},
|
|
{
|
|
"epoch": 2.3435083435083435,
|
|
"grad_norm": 0.7232841587277343,
|
|
"learning_rate": 3.36608592608867e-05,
|
|
"loss": 0.1799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09704796224832535,
|
|
"step": 2880,
|
|
"valid_targets_mean": 1596.6,
|
|
"valid_targets_min": 646
|
|
},
|
|
{
|
|
"epoch": 2.347578347578348,
|
|
"grad_norm": 0.75091103368943,
|
|
"learning_rate": 3.363119346285546e-05,
|
|
"loss": 0.1913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1288718581199646,
|
|
"step": 2885,
|
|
"valid_targets_mean": 1817.5,
|
|
"valid_targets_min": 922
|
|
},
|
|
{
|
|
"epoch": 2.3516483516483517,
|
|
"grad_norm": 0.7628714260858689,
|
|
"learning_rate": 3.360147155135074e-05,
|
|
"loss": 0.1863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10485200583934784,
|
|
"step": 2890,
|
|
"valid_targets_mean": 1822.2,
|
|
"valid_targets_min": 796
|
|
},
|
|
{
|
|
"epoch": 2.3557183557183556,
|
|
"grad_norm": 0.7313328666663071,
|
|
"learning_rate": 3.3571693648724255e-05,
|
|
"loss": 0.178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09335388988256454,
|
|
"step": 2895,
|
|
"valid_targets_mean": 1542.4,
|
|
"valid_targets_min": 903
|
|
},
|
|
{
|
|
"epoch": 2.35978835978836,
|
|
"grad_norm": 0.7976060703436042,
|
|
"learning_rate": 3.354185987755818e-05,
|
|
"loss": 0.1777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10187899321317673,
|
|
"step": 2900,
|
|
"valid_targets_mean": 1459.5,
|
|
"valid_targets_min": 780
|
|
},
|
|
{
|
|
"epoch": 2.363858363858364,
|
|
"grad_norm": 0.7155507200782179,
|
|
"learning_rate": 3.35119703606647e-05,
|
|
"loss": 0.1774,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09475696831941605,
|
|
"step": 2905,
|
|
"valid_targets_mean": 1581.1,
|
|
"valid_targets_min": 783
|
|
},
|
|
{
|
|
"epoch": 2.367928367928368,
|
|
"grad_norm": 0.7284814264168356,
|
|
"learning_rate": 3.3482025221085476e-05,
|
|
"loss": 0.1794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07198642194271088,
|
|
"step": 2910,
|
|
"valid_targets_mean": 1342.1,
|
|
"valid_targets_min": 662
|
|
},
|
|
{
|
|
"epoch": 2.371998371998372,
|
|
"grad_norm": 0.7533633677117174,
|
|
"learning_rate": 3.345202458209112e-05,
|
|
"loss": 0.1824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08188924193382263,
|
|
"step": 2915,
|
|
"valid_targets_mean": 1269.1,
|
|
"valid_targets_min": 724
|
|
},
|
|
{
|
|
"epoch": 2.376068376068376,
|
|
"grad_norm": 0.7522234550738248,
|
|
"learning_rate": 3.342196856718074e-05,
|
|
"loss": 0.177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0987510234117508,
|
|
"step": 2920,
|
|
"valid_targets_mean": 1510.1,
|
|
"valid_targets_min": 966
|
|
},
|
|
{
|
|
"epoch": 2.38013838013838,
|
|
"grad_norm": 0.7796084834753764,
|
|
"learning_rate": 3.339185730008138e-05,
|
|
"loss": 0.1838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07946356385946274,
|
|
"step": 2925,
|
|
"valid_targets_mean": 1327.8,
|
|
"valid_targets_min": 773
|
|
},
|
|
{
|
|
"epoch": 2.3842083842083843,
|
|
"grad_norm": 1.1234420570664228,
|
|
"learning_rate": 3.336169090474756e-05,
|
|
"loss": 0.181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07645580172538757,
|
|
"step": 2930,
|
|
"valid_targets_mean": 1353.2,
|
|
"valid_targets_min": 807
|
|
},
|
|
{
|
|
"epoch": 2.3882783882783882,
|
|
"grad_norm": 0.8228399599394453,
|
|
"learning_rate": 3.333146950536069e-05,
|
|
"loss": 0.1858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06878844648599625,
|
|
"step": 2935,
|
|
"valid_targets_mean": 1142.5,
|
|
"valid_targets_min": 782
|
|
},
|
|
{
|
|
"epoch": 2.392348392348392,
|
|
"grad_norm": 0.761793565454502,
|
|
"learning_rate": 3.330119322632866e-05,
|
|
"loss": 0.177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10029375553131104,
|
|
"step": 2940,
|
|
"valid_targets_mean": 1849.9,
|
|
"valid_targets_min": 634
|
|
},
|
|
{
|
|
"epoch": 2.3964183964183965,
|
|
"grad_norm": 0.7748556030785365,
|
|
"learning_rate": 3.327086219228525e-05,
|
|
"loss": 0.1915,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09164921939373016,
|
|
"step": 2945,
|
|
"valid_targets_mean": 1599.6,
|
|
"valid_targets_min": 573
|
|
},
|
|
{
|
|
"epoch": 2.4004884004884004,
|
|
"grad_norm": 0.6823141885908128,
|
|
"learning_rate": 3.324047652808963e-05,
|
|
"loss": 0.1843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10002493113279343,
|
|
"step": 2950,
|
|
"valid_targets_mean": 1994.9,
|
|
"valid_targets_min": 1102
|
|
},
|
|
{
|
|
"epoch": 2.4045584045584047,
|
|
"grad_norm": 0.7203959180089063,
|
|
"learning_rate": 3.321003635882588e-05,
|
|
"loss": 0.1871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06791776418685913,
|
|
"step": 2955,
|
|
"valid_targets_mean": 1239.2,
|
|
"valid_targets_min": 841
|
|
},
|
|
{
|
|
"epoch": 2.4086284086284087,
|
|
"grad_norm": 0.8522458805730017,
|
|
"learning_rate": 3.3179541809802436e-05,
|
|
"loss": 0.178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0885857418179512,
|
|
"step": 2960,
|
|
"valid_targets_mean": 1467.5,
|
|
"valid_targets_min": 956
|
|
},
|
|
{
|
|
"epoch": 2.4126984126984126,
|
|
"grad_norm": 0.8485280697833679,
|
|
"learning_rate": 3.31489930065516e-05,
|
|
"loss": 0.1856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09528080374002457,
|
|
"step": 2965,
|
|
"valid_targets_mean": 1300.9,
|
|
"valid_targets_min": 857
|
|
},
|
|
{
|
|
"epoch": 2.416768416768417,
|
|
"grad_norm": 0.702328495769866,
|
|
"learning_rate": 3.311839007482902e-05,
|
|
"loss": 0.1826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09654178470373154,
|
|
"step": 2970,
|
|
"valid_targets_mean": 1389.8,
|
|
"valid_targets_min": 808
|
|
},
|
|
{
|
|
"epoch": 2.420838420838421,
|
|
"grad_norm": 0.7498418061623674,
|
|
"learning_rate": 3.308773314061315e-05,
|
|
"loss": 0.1704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0779038816690445,
|
|
"step": 2975,
|
|
"valid_targets_mean": 1303.8,
|
|
"valid_targets_min": 677
|
|
},
|
|
{
|
|
"epoch": 2.4249084249084247,
|
|
"grad_norm": 0.7339750048913246,
|
|
"learning_rate": 3.3057022330104764e-05,
|
|
"loss": 0.1756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0759783387184143,
|
|
"step": 2980,
|
|
"valid_targets_mean": 1220.1,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 2.428978428978429,
|
|
"grad_norm": 0.7678673826329332,
|
|
"learning_rate": 3.30262577697264e-05,
|
|
"loss": 0.1803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07477933913469315,
|
|
"step": 2985,
|
|
"valid_targets_mean": 1262.0,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 2.433048433048433,
|
|
"grad_norm": 0.7882443666752398,
|
|
"learning_rate": 3.299543958612188e-05,
|
|
"loss": 0.1851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10648937523365021,
|
|
"step": 2990,
|
|
"valid_targets_mean": 1615.0,
|
|
"valid_targets_min": 669
|
|
},
|
|
{
|
|
"epoch": 2.4371184371184373,
|
|
"grad_norm": 0.7375257054384906,
|
|
"learning_rate": 3.2964567906155775e-05,
|
|
"loss": 0.1798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10215851664543152,
|
|
"step": 2995,
|
|
"valid_targets_mean": 1806.5,
|
|
"valid_targets_min": 792
|
|
},
|
|
{
|
|
"epoch": 2.4411884411884412,
|
|
"grad_norm": 0.7644653496961722,
|
|
"learning_rate": 3.293364285691284e-05,
|
|
"loss": 0.1777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09152808785438538,
|
|
"step": 3000,
|
|
"valid_targets_mean": 1627.6,
|
|
"valid_targets_min": 736
|
|
},
|
|
{
|
|
"epoch": 2.445258445258445,
|
|
"grad_norm": 0.836057897371144,
|
|
"learning_rate": 3.290266456569756e-05,
|
|
"loss": 0.1831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12586501240730286,
|
|
"step": 3005,
|
|
"valid_targets_mean": 1822.9,
|
|
"valid_targets_min": 694
|
|
},
|
|
{
|
|
"epoch": 2.4493284493284495,
|
|
"grad_norm": 0.7684573774704392,
|
|
"learning_rate": 3.2871633160033596e-05,
|
|
"loss": 0.1779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11305080354213715,
|
|
"step": 3010,
|
|
"valid_targets_mean": 1848.9,
|
|
"valid_targets_min": 1437
|
|
},
|
|
{
|
|
"epoch": 2.4533984533984534,
|
|
"grad_norm": 0.7722426718287829,
|
|
"learning_rate": 3.2840548767663226e-05,
|
|
"loss": 0.1766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11125504225492477,
|
|
"step": 3015,
|
|
"valid_targets_mean": 1852.4,
|
|
"valid_targets_min": 809
|
|
},
|
|
{
|
|
"epoch": 2.4574684574684573,
|
|
"grad_norm": 0.8693277147754886,
|
|
"learning_rate": 3.2809411516546876e-05,
|
|
"loss": 0.1758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0825982391834259,
|
|
"step": 3020,
|
|
"valid_targets_mean": 1375.5,
|
|
"valid_targets_min": 790
|
|
},
|
|
{
|
|
"epoch": 2.4615384615384617,
|
|
"grad_norm": 0.8569411244780402,
|
|
"learning_rate": 3.2778221534862554e-05,
|
|
"loss": 0.1653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08256229013204575,
|
|
"step": 3025,
|
|
"valid_targets_mean": 1338.8,
|
|
"valid_targets_min": 750
|
|
},
|
|
{
|
|
"epoch": 2.4656084656084656,
|
|
"grad_norm": 0.8496633562930437,
|
|
"learning_rate": 3.274697895100536e-05,
|
|
"loss": 0.1798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11016149818897247,
|
|
"step": 3030,
|
|
"valid_targets_mean": 1778.8,
|
|
"valid_targets_min": 826
|
|
},
|
|
{
|
|
"epoch": 2.4696784696784695,
|
|
"grad_norm": 0.7550200846250567,
|
|
"learning_rate": 3.2715683893586904e-05,
|
|
"loss": 0.177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08121539652347565,
|
|
"step": 3035,
|
|
"valid_targets_mean": 1600.8,
|
|
"valid_targets_min": 907
|
|
},
|
|
{
|
|
"epoch": 2.473748473748474,
|
|
"grad_norm": 0.7407812428035063,
|
|
"learning_rate": 3.2684336491434814e-05,
|
|
"loss": 0.1717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09471592307090759,
|
|
"step": 3040,
|
|
"valid_targets_mean": 1474.5,
|
|
"valid_targets_min": 1093
|
|
},
|
|
{
|
|
"epoch": 2.4778184778184777,
|
|
"grad_norm": 1.167858594983234,
|
|
"learning_rate": 3.2652936873592206e-05,
|
|
"loss": 0.1796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09139646589756012,
|
|
"step": 3045,
|
|
"valid_targets_mean": 1577.2,
|
|
"valid_targets_min": 1119
|
|
},
|
|
{
|
|
"epoch": 2.4818884818884817,
|
|
"grad_norm": 0.7675583595322624,
|
|
"learning_rate": 3.262148516931714e-05,
|
|
"loss": 0.1777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10131756961345673,
|
|
"step": 3050,
|
|
"valid_targets_mean": 1512.1,
|
|
"valid_targets_min": 700
|
|
},
|
|
{
|
|
"epoch": 2.485958485958486,
|
|
"grad_norm": 0.7516413523459232,
|
|
"learning_rate": 3.25899815080821e-05,
|
|
"loss": 0.1768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07350552827119827,
|
|
"step": 3055,
|
|
"valid_targets_mean": 1288.2,
|
|
"valid_targets_min": 810
|
|
},
|
|
{
|
|
"epoch": 2.49002849002849,
|
|
"grad_norm": 0.9663636034775042,
|
|
"learning_rate": 3.2558426019573435e-05,
|
|
"loss": 0.1801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0857287123799324,
|
|
"step": 3060,
|
|
"valid_targets_mean": 1328.9,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 2.4940984940984943,
|
|
"grad_norm": 0.7525509366960841,
|
|
"learning_rate": 3.2526818833690855e-05,
|
|
"loss": 0.1734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10586247593164444,
|
|
"step": 3065,
|
|
"valid_targets_mean": 1542.5,
|
|
"valid_targets_min": 673
|
|
},
|
|
{
|
|
"epoch": 2.498168498168498,
|
|
"grad_norm": 0.8552319935455616,
|
|
"learning_rate": 3.2495160080546895e-05,
|
|
"loss": 0.1866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09937754273414612,
|
|
"step": 3070,
|
|
"valid_targets_mean": 1215.6,
|
|
"valid_targets_min": 641
|
|
},
|
|
{
|
|
"epoch": 2.502238502238502,
|
|
"grad_norm": 0.7825010890917209,
|
|
"learning_rate": 3.246344989046635e-05,
|
|
"loss": 0.1744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07913076877593994,
|
|
"step": 3075,
|
|
"valid_targets_mean": 1222.6,
|
|
"valid_targets_min": 701
|
|
},
|
|
{
|
|
"epoch": 2.5063085063085064,
|
|
"grad_norm": 0.7499645512602379,
|
|
"learning_rate": 3.243168839398576e-05,
|
|
"loss": 0.1907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09814945608377457,
|
|
"step": 3080,
|
|
"valid_targets_mean": 1850.6,
|
|
"valid_targets_min": 1202
|
|
},
|
|
{
|
|
"epoch": 2.5103785103785103,
|
|
"grad_norm": 0.7240473408886264,
|
|
"learning_rate": 3.239987572185288e-05,
|
|
"loss": 0.1777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06967031210660934,
|
|
"step": 3085,
|
|
"valid_targets_mean": 1265.5,
|
|
"valid_targets_min": 657
|
|
},
|
|
{
|
|
"epoch": 2.5144485144485147,
|
|
"grad_norm": 0.7723805357606571,
|
|
"learning_rate": 3.2368012005026136e-05,
|
|
"loss": 0.1818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08349703252315521,
|
|
"step": 3090,
|
|
"valid_targets_mean": 1388.0,
|
|
"valid_targets_min": 672
|
|
},
|
|
{
|
|
"epoch": 2.5185185185185186,
|
|
"grad_norm": 0.740778910182572,
|
|
"learning_rate": 3.233609737467407e-05,
|
|
"loss": 0.175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10719085484743118,
|
|
"step": 3095,
|
|
"valid_targets_mean": 1813.6,
|
|
"valid_targets_min": 1019
|
|
},
|
|
{
|
|
"epoch": 2.5225885225885225,
|
|
"grad_norm": 0.7925251844992214,
|
|
"learning_rate": 3.2304131962174804e-05,
|
|
"loss": 0.1735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0703994482755661,
|
|
"step": 3100,
|
|
"valid_targets_mean": 1114.6,
|
|
"valid_targets_min": 732
|
|
},
|
|
{
|
|
"epoch": 2.526658526658527,
|
|
"grad_norm": 0.7739162778424586,
|
|
"learning_rate": 3.227211589911554e-05,
|
|
"loss": 0.1826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06456971168518066,
|
|
"step": 3105,
|
|
"valid_targets_mean": 1194.5,
|
|
"valid_targets_min": 634
|
|
},
|
|
{
|
|
"epoch": 2.5307285307285308,
|
|
"grad_norm": 0.6811693756020172,
|
|
"learning_rate": 3.224004931729195e-05,
|
|
"loss": 0.1712,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0884883850812912,
|
|
"step": 3110,
|
|
"valid_targets_mean": 1756.5,
|
|
"valid_targets_min": 905
|
|
},
|
|
{
|
|
"epoch": 2.5347985347985347,
|
|
"grad_norm": 1.2605982496459476,
|
|
"learning_rate": 3.220793234870769e-05,
|
|
"loss": 0.1712,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10120239853858948,
|
|
"step": 3115,
|
|
"valid_targets_mean": 1554.9,
|
|
"valid_targets_min": 596
|
|
},
|
|
{
|
|
"epoch": 2.538868538868539,
|
|
"grad_norm": 0.7456047283707952,
|
|
"learning_rate": 3.217576512557383e-05,
|
|
"loss": 0.1703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07132766395807266,
|
|
"step": 3120,
|
|
"valid_targets_mean": 1214.6,
|
|
"valid_targets_min": 687
|
|
},
|
|
{
|
|
"epoch": 2.542938542938543,
|
|
"grad_norm": 0.7668765276856079,
|
|
"learning_rate": 3.214354778030831e-05,
|
|
"loss": 0.1855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10113206505775452,
|
|
"step": 3125,
|
|
"valid_targets_mean": 1955.1,
|
|
"valid_targets_min": 1199
|
|
},
|
|
{
|
|
"epoch": 2.547008547008547,
|
|
"grad_norm": 0.6688254932482489,
|
|
"learning_rate": 3.211128044553542e-05,
|
|
"loss": 0.1733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08862347900867462,
|
|
"step": 3130,
|
|
"valid_targets_mean": 1758.6,
|
|
"valid_targets_min": 1214
|
|
},
|
|
{
|
|
"epoch": 2.551078551078551,
|
|
"grad_norm": 0.7105824934530232,
|
|
"learning_rate": 3.2078963254085186e-05,
|
|
"loss": 0.1713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09190790355205536,
|
|
"step": 3135,
|
|
"valid_targets_mean": 1711.5,
|
|
"valid_targets_min": 1122
|
|
},
|
|
{
|
|
"epoch": 2.555148555148555,
|
|
"grad_norm": 1.574075103571208,
|
|
"learning_rate": 3.2046596338992934e-05,
|
|
"loss": 0.1663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08651512861251831,
|
|
"step": 3140,
|
|
"valid_targets_mean": 1489.6,
|
|
"valid_targets_min": 850
|
|
},
|
|
{
|
|
"epoch": 2.559218559218559,
|
|
"grad_norm": 0.7171578485312335,
|
|
"learning_rate": 3.201417983349865e-05,
|
|
"loss": 0.1604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07850717753171921,
|
|
"step": 3145,
|
|
"valid_targets_mean": 1572.4,
|
|
"valid_targets_min": 1318
|
|
},
|
|
{
|
|
"epoch": 2.5632885632885634,
|
|
"grad_norm": 0.784020145965083,
|
|
"learning_rate": 3.198171387104645e-05,
|
|
"loss": 0.1769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09676879644393921,
|
|
"step": 3150,
|
|
"valid_targets_mean": 1500.0,
|
|
"valid_targets_min": 1043
|
|
},
|
|
{
|
|
"epoch": 2.5673585673585673,
|
|
"grad_norm": 0.7023031187317561,
|
|
"learning_rate": 3.194919858528405e-05,
|
|
"loss": 0.1745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10829615592956543,
|
|
"step": 3155,
|
|
"valid_targets_mean": 1774.9,
|
|
"valid_targets_min": 920
|
|
},
|
|
{
|
|
"epoch": 2.571428571428571,
|
|
"grad_norm": 0.7266495375892049,
|
|
"learning_rate": 3.191663411006222e-05,
|
|
"loss": 0.172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09090869128704071,
|
|
"step": 3160,
|
|
"valid_targets_mean": 1547.1,
|
|
"valid_targets_min": 847
|
|
},
|
|
{
|
|
"epoch": 2.5754985754985755,
|
|
"grad_norm": 0.7082754020615566,
|
|
"learning_rate": 3.1884020579434216e-05,
|
|
"loss": 0.1756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09622791409492493,
|
|
"step": 3165,
|
|
"valid_targets_mean": 1769.4,
|
|
"valid_targets_min": 738
|
|
},
|
|
{
|
|
"epoch": 2.5795685795685794,
|
|
"grad_norm": 0.7671758979055356,
|
|
"learning_rate": 3.1851358127655214e-05,
|
|
"loss": 0.1761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09749025106430054,
|
|
"step": 3170,
|
|
"valid_targets_mean": 1540.1,
|
|
"valid_targets_min": 594
|
|
},
|
|
{
|
|
"epoch": 2.583638583638584,
|
|
"grad_norm": 0.646884294048327,
|
|
"learning_rate": 3.1818646889181815e-05,
|
|
"loss": 0.1779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09275157749652863,
|
|
"step": 3175,
|
|
"valid_targets_mean": 2228.6,
|
|
"valid_targets_min": 1393
|
|
},
|
|
{
|
|
"epoch": 2.5877085877085877,
|
|
"grad_norm": 0.43091544980003527,
|
|
"learning_rate": 3.1785886998671406e-05,
|
|
"loss": 0.1544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07351458072662354,
|
|
"step": 3180,
|
|
"valid_targets_mean": 3981.9,
|
|
"valid_targets_min": 3545
|
|
},
|
|
{
|
|
"epoch": 2.591778591778592,
|
|
"grad_norm": 0.40608611395729444,
|
|
"learning_rate": 3.1753078590981697e-05,
|
|
"loss": 0.1094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.053135018795728683,
|
|
"step": 3185,
|
|
"valid_targets_mean": 2837.8,
|
|
"valid_targets_min": 1024
|
|
},
|
|
{
|
|
"epoch": 2.595848595848596,
|
|
"grad_norm": 0.2970617855486435,
|
|
"learning_rate": 3.1720221801170076e-05,
|
|
"loss": 0.0971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.029508689418435097,
|
|
"step": 3190,
|
|
"valid_targets_mean": 3453.5,
|
|
"valid_targets_min": 3026
|
|
},
|
|
{
|
|
"epoch": 2.5999185999186,
|
|
"grad_norm": 0.4040679062651357,
|
|
"learning_rate": 3.1687316764493145e-05,
|
|
"loss": 0.1199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06386222690343857,
|
|
"step": 3195,
|
|
"valid_targets_mean": 3272.8,
|
|
"valid_targets_min": 2213
|
|
},
|
|
{
|
|
"epoch": 2.603988603988604,
|
|
"grad_norm": 0.42140297889686756,
|
|
"learning_rate": 3.165436361640608e-05,
|
|
"loss": 0.0958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06314370036125183,
|
|
"step": 3200,
|
|
"valid_targets_mean": 3111.5,
|
|
"valid_targets_min": 1134
|
|
},
|
|
{
|
|
"epoch": 2.608058608058608,
|
|
"grad_norm": 0.4318510588456392,
|
|
"learning_rate": 3.162136249256214e-05,
|
|
"loss": 0.1067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06179654225707054,
|
|
"step": 3205,
|
|
"valid_targets_mean": 2179.2,
|
|
"valid_targets_min": 663
|
|
},
|
|
{
|
|
"epoch": 2.612128612128612,
|
|
"grad_norm": 0.6151866793044011,
|
|
"learning_rate": 3.158831352881204e-05,
|
|
"loss": 0.1586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06750689446926117,
|
|
"step": 3210,
|
|
"valid_targets_mean": 2128.4,
|
|
"valid_targets_min": 535
|
|
},
|
|
{
|
|
"epoch": 2.6161986161986164,
|
|
"grad_norm": 0.4023184284236594,
|
|
"learning_rate": 3.1555216861203466e-05,
|
|
"loss": 0.1023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04353209584951401,
|
|
"step": 3215,
|
|
"valid_targets_mean": 1964.0,
|
|
"valid_targets_min": 811
|
|
},
|
|
{
|
|
"epoch": 2.6202686202686203,
|
|
"grad_norm": 0.4367154826598129,
|
|
"learning_rate": 3.1522072625980466e-05,
|
|
"loss": 0.1231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07076150923967361,
|
|
"step": 3220,
|
|
"valid_targets_mean": 2673.8,
|
|
"valid_targets_min": 1498
|
|
},
|
|
{
|
|
"epoch": 2.624338624338624,
|
|
"grad_norm": 0.24902381526805464,
|
|
"learning_rate": 3.1488880959582905e-05,
|
|
"loss": 0.1221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03554770350456238,
|
|
"step": 3225,
|
|
"valid_targets_mean": 4407.2,
|
|
"valid_targets_min": 3285
|
|
},
|
|
{
|
|
"epoch": 2.6284086284086285,
|
|
"grad_norm": 0.2979605188474624,
|
|
"learning_rate": 3.14556419986459e-05,
|
|
"loss": 0.0961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05047299340367317,
|
|
"step": 3230,
|
|
"valid_targets_mean": 3491.2,
|
|
"valid_targets_min": 1048
|
|
},
|
|
{
|
|
"epoch": 2.6324786324786325,
|
|
"grad_norm": 0.3571486698231609,
|
|
"learning_rate": 3.142235587999924e-05,
|
|
"loss": 0.1024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05532723292708397,
|
|
"step": 3235,
|
|
"valid_targets_mean": 2983.9,
|
|
"valid_targets_min": 1290
|
|
},
|
|
{
|
|
"epoch": 2.6365486365486364,
|
|
"grad_norm": 0.4898779952522662,
|
|
"learning_rate": 3.138902274066688e-05,
|
|
"loss": 0.0993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.058787234127521515,
|
|
"step": 3240,
|
|
"valid_targets_mean": 2018.6,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 2.6406186406186407,
|
|
"grad_norm": 0.3257155158788977,
|
|
"learning_rate": 3.13556427178663e-05,
|
|
"loss": 0.1497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05110851302742958,
|
|
"step": 3245,
|
|
"valid_targets_mean": 3854.4,
|
|
"valid_targets_min": 2336
|
|
},
|
|
{
|
|
"epoch": 2.6446886446886446,
|
|
"grad_norm": 0.34560277752942015,
|
|
"learning_rate": 3.1322215949008e-05,
|
|
"loss": 0.093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04530660808086395,
|
|
"step": 3250,
|
|
"valid_targets_mean": 2544.9,
|
|
"valid_targets_min": 782
|
|
},
|
|
{
|
|
"epoch": 2.6487586487586485,
|
|
"grad_norm": 0.39091465479047305,
|
|
"learning_rate": 3.1288742571694905e-05,
|
|
"loss": 0.1043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.036396391689777374,
|
|
"step": 3255,
|
|
"valid_targets_mean": 3094.4,
|
|
"valid_targets_min": 1086
|
|
},
|
|
{
|
|
"epoch": 2.652828652828653,
|
|
"grad_norm": 0.34705120235384473,
|
|
"learning_rate": 3.1255222723721815e-05,
|
|
"loss": 0.0876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.045067206025123596,
|
|
"step": 3260,
|
|
"valid_targets_mean": 3285.0,
|
|
"valid_targets_min": 732
|
|
},
|
|
{
|
|
"epoch": 2.656898656898657,
|
|
"grad_norm": 0.537067848663477,
|
|
"learning_rate": 3.12216565430748e-05,
|
|
"loss": 0.1162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0710548609495163,
|
|
"step": 3265,
|
|
"valid_targets_mean": 1575.8,
|
|
"valid_targets_min": 770
|
|
},
|
|
{
|
|
"epoch": 2.6609686609686607,
|
|
"grad_norm": 0.39399172193475046,
|
|
"learning_rate": 3.118804416793069e-05,
|
|
"loss": 0.1081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04866083711385727,
|
|
"step": 3270,
|
|
"valid_targets_mean": 2609.9,
|
|
"valid_targets_min": 592
|
|
},
|
|
{
|
|
"epoch": 2.665038665038665,
|
|
"grad_norm": 0.4293292007143029,
|
|
"learning_rate": 3.115438573665649e-05,
|
|
"loss": 0.12,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.055250879377126694,
|
|
"step": 3275,
|
|
"valid_targets_mean": 3045.1,
|
|
"valid_targets_min": 2224
|
|
},
|
|
{
|
|
"epoch": 2.669108669108669,
|
|
"grad_norm": 0.39199172279612116,
|
|
"learning_rate": 3.112068138780876e-05,
|
|
"loss": 0.1163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.041924867779016495,
|
|
"step": 3280,
|
|
"valid_targets_mean": 2729.9,
|
|
"valid_targets_min": 511
|
|
},
|
|
{
|
|
"epoch": 2.6731786731786733,
|
|
"grad_norm": 0.36149115315511404,
|
|
"learning_rate": 3.108693126013308e-05,
|
|
"loss": 0.1538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.056219682097435,
|
|
"step": 3285,
|
|
"valid_targets_mean": 3708.0,
|
|
"valid_targets_min": 2328
|
|
},
|
|
{
|
|
"epoch": 2.677248677248677,
|
|
"grad_norm": 0.39693264903199954,
|
|
"learning_rate": 3.105313549256352e-05,
|
|
"loss": 0.0941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0479307621717453,
|
|
"step": 3290,
|
|
"valid_targets_mean": 3069.4,
|
|
"valid_targets_min": 884
|
|
},
|
|
{
|
|
"epoch": 2.6813186813186816,
|
|
"grad_norm": 0.6370107315415748,
|
|
"learning_rate": 3.1019294224222015e-05,
|
|
"loss": 0.1239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06845805794000626,
|
|
"step": 3295,
|
|
"valid_targets_mean": 1417.6,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 2.6853886853886855,
|
|
"grad_norm": 0.6791334033335826,
|
|
"learning_rate": 3.098540759441778e-05,
|
|
"loss": 0.1326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0740579143166542,
|
|
"step": 3300,
|
|
"valid_targets_mean": 1554.2,
|
|
"valid_targets_min": 568
|
|
},
|
|
{
|
|
"epoch": 2.6894586894586894,
|
|
"grad_norm": 0.40157879779417277,
|
|
"learning_rate": 3.0951475742646784e-05,
|
|
"loss": 0.1122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05114832520484924,
|
|
"step": 3305,
|
|
"valid_targets_mean": 3759.4,
|
|
"valid_targets_min": 2808
|
|
},
|
|
{
|
|
"epoch": 2.6935286935286937,
|
|
"grad_norm": 0.48697071062687214,
|
|
"learning_rate": 3.0917498808591154e-05,
|
|
"loss": 0.1849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15444746613502502,
|
|
"step": 3310,
|
|
"valid_targets_mean": 2962.8,
|
|
"valid_targets_min": 652
|
|
},
|
|
{
|
|
"epoch": 2.6975986975986976,
|
|
"grad_norm": 0.35131488324463145,
|
|
"learning_rate": 3.088347693211861e-05,
|
|
"loss": 0.2389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05616980791091919,
|
|
"step": 3315,
|
|
"valid_targets_mean": 4014.0,
|
|
"valid_targets_min": 3573
|
|
},
|
|
{
|
|
"epoch": 2.7016687016687015,
|
|
"grad_norm": 0.9395914594142927,
|
|
"learning_rate": 3.084941025328185e-05,
|
|
"loss": 0.1178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06727755814790726,
|
|
"step": 3320,
|
|
"valid_targets_mean": 2568.0,
|
|
"valid_targets_min": 822
|
|
},
|
|
{
|
|
"epoch": 2.705738705738706,
|
|
"grad_norm": 0.6735134741940402,
|
|
"learning_rate": 3.081529891231802e-05,
|
|
"loss": 0.1426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18474054336547852,
|
|
"step": 3325,
|
|
"valid_targets_mean": 2774.0,
|
|
"valid_targets_min": 2135
|
|
},
|
|
{
|
|
"epoch": 2.70980870980871,
|
|
"grad_norm": 0.3312285374873109,
|
|
"learning_rate": 3.078114304964814e-05,
|
|
"loss": 0.1753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.045162081718444824,
|
|
"step": 3330,
|
|
"valid_targets_mean": 4021.2,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 2.7138787138787137,
|
|
"grad_norm": 0.37157549346068147,
|
|
"learning_rate": 3.0746942805876474e-05,
|
|
"loss": 0.0963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07260533422231674,
|
|
"step": 3335,
|
|
"valid_targets_mean": 4983.9,
|
|
"valid_targets_min": 2733
|
|
},
|
|
{
|
|
"epoch": 2.717948717948718,
|
|
"grad_norm": 0.3681236408727857,
|
|
"learning_rate": 3.071269832178999e-05,
|
|
"loss": 0.1073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06341656297445297,
|
|
"step": 3340,
|
|
"valid_targets_mean": 4406.2,
|
|
"valid_targets_min": 2571
|
|
},
|
|
{
|
|
"epoch": 2.722018722018722,
|
|
"grad_norm": 0.30039279390137386,
|
|
"learning_rate": 3.0678409738357785e-05,
|
|
"loss": 0.1049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0368649959564209,
|
|
"step": 3345,
|
|
"valid_targets_mean": 3177.5,
|
|
"valid_targets_min": 970
|
|
},
|
|
{
|
|
"epoch": 2.726088726088726,
|
|
"grad_norm": 0.36018710976640456,
|
|
"learning_rate": 3.0644077196730494e-05,
|
|
"loss": 0.0974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.035121314227581024,
|
|
"step": 3350,
|
|
"valid_targets_mean": 2699.2,
|
|
"valid_targets_min": 748
|
|
},
|
|
{
|
|
"epoch": 2.7301587301587302,
|
|
"grad_norm": 0.4353275473221667,
|
|
"learning_rate": 3.060970083823969e-05,
|
|
"loss": 0.0983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06210782751441002,
|
|
"step": 3355,
|
|
"valid_targets_mean": 3400.1,
|
|
"valid_targets_min": 719
|
|
},
|
|
{
|
|
"epoch": 2.734228734228734,
|
|
"grad_norm": 0.482149801452687,
|
|
"learning_rate": 3.057528080439734e-05,
|
|
"loss": 0.1012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05291946232318878,
|
|
"step": 3360,
|
|
"valid_targets_mean": 2561.8,
|
|
"valid_targets_min": 1820
|
|
},
|
|
{
|
|
"epoch": 2.738298738298738,
|
|
"grad_norm": 0.41734213557928257,
|
|
"learning_rate": 3.054081723689518e-05,
|
|
"loss": 0.0863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.060540057718753815,
|
|
"step": 3365,
|
|
"valid_targets_mean": 3014.8,
|
|
"valid_targets_min": 950
|
|
},
|
|
{
|
|
"epoch": 2.7423687423687424,
|
|
"grad_norm": 0.5192547449781431,
|
|
"learning_rate": 3.050631027760418e-05,
|
|
"loss": 0.154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06714431941509247,
|
|
"step": 3370,
|
|
"valid_targets_mean": 3569.5,
|
|
"valid_targets_min": 3170
|
|
},
|
|
{
|
|
"epoch": 2.7464387464387463,
|
|
"grad_norm": 0.35017994375089784,
|
|
"learning_rate": 3.0471760068573926e-05,
|
|
"loss": 0.0779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04404710233211517,
|
|
"step": 3375,
|
|
"valid_targets_mean": 3777.1,
|
|
"valid_targets_min": 3122
|
|
},
|
|
{
|
|
"epoch": 2.7505087505087507,
|
|
"grad_norm": 0.3637498254659091,
|
|
"learning_rate": 3.0437166752032027e-05,
|
|
"loss": 0.0901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05123971402645111,
|
|
"step": 3380,
|
|
"valid_targets_mean": 4054.6,
|
|
"valid_targets_min": 3511
|
|
},
|
|
{
|
|
"epoch": 2.7545787545787546,
|
|
"grad_norm": 0.3404345345130886,
|
|
"learning_rate": 3.0402530470383573e-05,
|
|
"loss": 0.087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.051291994750499725,
|
|
"step": 3385,
|
|
"valid_targets_mean": 3485.4,
|
|
"valid_targets_min": 792
|
|
},
|
|
{
|
|
"epoch": 2.758648758648759,
|
|
"grad_norm": 0.46398708542556194,
|
|
"learning_rate": 3.0367851366210507e-05,
|
|
"loss": 0.1075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05058937892317772,
|
|
"step": 3390,
|
|
"valid_targets_mean": 2089.2,
|
|
"valid_targets_min": 795
|
|
},
|
|
{
|
|
"epoch": 2.762718762718763,
|
|
"grad_norm": 0.34745051162795376,
|
|
"learning_rate": 3.0333129582271043e-05,
|
|
"loss": 0.1277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04806285351514816,
|
|
"step": 3395,
|
|
"valid_targets_mean": 3405.9,
|
|
"valid_targets_min": 1196
|
|
},
|
|
{
|
|
"epoch": 2.7667887667887667,
|
|
"grad_norm": 0.4972495007269904,
|
|
"learning_rate": 3.029836526149911e-05,
|
|
"loss": 0.1255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.058603860437870026,
|
|
"step": 3400,
|
|
"valid_targets_mean": 2210.1,
|
|
"valid_targets_min": 575
|
|
},
|
|
{
|
|
"epoch": 2.770858770858771,
|
|
"grad_norm": 0.6469804675168275,
|
|
"learning_rate": 3.0263558547003734e-05,
|
|
"loss": 0.1165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08365291357040405,
|
|
"step": 3405,
|
|
"valid_targets_mean": 1888.6,
|
|
"valid_targets_min": 609
|
|
},
|
|
{
|
|
"epoch": 2.774928774928775,
|
|
"grad_norm": 0.5555623184854452,
|
|
"learning_rate": 3.022870958206845e-05,
|
|
"loss": 0.1227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04522637277841568,
|
|
"step": 3410,
|
|
"valid_targets_mean": 1463.1,
|
|
"valid_targets_min": 542
|
|
},
|
|
{
|
|
"epoch": 2.778998778998779,
|
|
"grad_norm": 0.3659096617640059,
|
|
"learning_rate": 3.019381851015072e-05,
|
|
"loss": 0.1366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.043918170034885406,
|
|
"step": 3415,
|
|
"valid_targets_mean": 3743.8,
|
|
"valid_targets_min": 2776
|
|
},
|
|
{
|
|
"epoch": 2.7830687830687832,
|
|
"grad_norm": 0.4195904880972671,
|
|
"learning_rate": 3.0158885474881354e-05,
|
|
"loss": 0.1064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.039590660482645035,
|
|
"step": 3420,
|
|
"valid_targets_mean": 1335.4,
|
|
"valid_targets_min": 629
|
|
},
|
|
{
|
|
"epoch": 2.787138787138787,
|
|
"grad_norm": 0.5872682399941394,
|
|
"learning_rate": 3.0123910620063888e-05,
|
|
"loss": 0.1287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05974281579256058,
|
|
"step": 3425,
|
|
"valid_targets_mean": 3961.1,
|
|
"valid_targets_min": 3701
|
|
},
|
|
{
|
|
"epoch": 2.791208791208791,
|
|
"grad_norm": 0.42579563457253944,
|
|
"learning_rate": 3.008889408967403e-05,
|
|
"loss": 0.1103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05832969397306442,
|
|
"step": 3430,
|
|
"valid_targets_mean": 2015.1,
|
|
"valid_targets_min": 736
|
|
},
|
|
{
|
|
"epoch": 2.7952787952787954,
|
|
"grad_norm": 0.49595139616926265,
|
|
"learning_rate": 3.0053836027859024e-05,
|
|
"loss": 0.1041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08861617743968964,
|
|
"step": 3435,
|
|
"valid_targets_mean": 2458.6,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 2.7993487993487993,
|
|
"grad_norm": 0.31433751021873957,
|
|
"learning_rate": 3.0018736578937112e-05,
|
|
"loss": 0.0902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03106316737830639,
|
|
"step": 3440,
|
|
"valid_targets_mean": 2187.8,
|
|
"valid_targets_min": 848
|
|
},
|
|
{
|
|
"epoch": 2.8034188034188032,
|
|
"grad_norm": 0.30827047949470593,
|
|
"learning_rate": 2.9983595887396864e-05,
|
|
"loss": 0.0911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04991922527551651,
|
|
"step": 3445,
|
|
"valid_targets_mean": 5241.1,
|
|
"valid_targets_min": 4456
|
|
},
|
|
{
|
|
"epoch": 2.8074888074888076,
|
|
"grad_norm": 0.37077247640612376,
|
|
"learning_rate": 2.9948414097896678e-05,
|
|
"loss": 0.0949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.057815488427877426,
|
|
"step": 3450,
|
|
"valid_targets_mean": 4078.9,
|
|
"valid_targets_min": 1864
|
|
},
|
|
{
|
|
"epoch": 2.8115588115588115,
|
|
"grad_norm": 0.3475158518838076,
|
|
"learning_rate": 2.9913191355264092e-05,
|
|
"loss": 0.1095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04626894369721413,
|
|
"step": 3455,
|
|
"valid_targets_mean": 3414.1,
|
|
"valid_targets_min": 1361
|
|
},
|
|
{
|
|
"epoch": 2.8156288156288154,
|
|
"grad_norm": 0.3999990815443734,
|
|
"learning_rate": 2.9877927804495255e-05,
|
|
"loss": 0.1089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0528426468372345,
|
|
"step": 3460,
|
|
"valid_targets_mean": 3541.4,
|
|
"valid_targets_min": 1814
|
|
},
|
|
{
|
|
"epoch": 2.8196988196988197,
|
|
"grad_norm": 0.5010947429743733,
|
|
"learning_rate": 2.9842623590754294e-05,
|
|
"loss": 0.1171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04774967581033707,
|
|
"step": 3465,
|
|
"valid_targets_mean": 1152.0,
|
|
"valid_targets_min": 716
|
|
},
|
|
{
|
|
"epoch": 2.8237688237688237,
|
|
"grad_norm": 0.33526324008830605,
|
|
"learning_rate": 2.980727885937272e-05,
|
|
"loss": 0.0832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04189581423997879,
|
|
"step": 3470,
|
|
"valid_targets_mean": 3898.9,
|
|
"valid_targets_min": 2719
|
|
},
|
|
{
|
|
"epoch": 2.8278388278388276,
|
|
"grad_norm": 0.5262205319214922,
|
|
"learning_rate": 2.9771893755848857e-05,
|
|
"loss": 0.1081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.039720356464385986,
|
|
"step": 3475,
|
|
"valid_targets_mean": 1102.4,
|
|
"valid_targets_min": 664
|
|
},
|
|
{
|
|
"epoch": 2.831908831908832,
|
|
"grad_norm": 0.3448120952299084,
|
|
"learning_rate": 2.97364684258472e-05,
|
|
"loss": 0.1029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03923565521836281,
|
|
"step": 3480,
|
|
"valid_targets_mean": 2702.8,
|
|
"valid_targets_min": 1584
|
|
},
|
|
{
|
|
"epoch": 2.835978835978836,
|
|
"grad_norm": 0.3424380111555321,
|
|
"learning_rate": 2.9701003015197862e-05,
|
|
"loss": 0.0824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.037075385451316833,
|
|
"step": 3485,
|
|
"valid_targets_mean": 2633.6,
|
|
"valid_targets_min": 1003
|
|
},
|
|
{
|
|
"epoch": 2.84004884004884,
|
|
"grad_norm": 0.37114327921089785,
|
|
"learning_rate": 2.9665497669895926e-05,
|
|
"loss": 0.1087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04176440089941025,
|
|
"step": 3490,
|
|
"valid_targets_mean": 2853.4,
|
|
"valid_targets_min": 560
|
|
},
|
|
{
|
|
"epoch": 2.844118844118844,
|
|
"grad_norm": 0.35943776778483205,
|
|
"learning_rate": 2.962995253610089e-05,
|
|
"loss": 0.1022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05177730694413185,
|
|
"step": 3495,
|
|
"valid_targets_mean": 3300.2,
|
|
"valid_targets_min": 1049
|
|
},
|
|
{
|
|
"epoch": 2.8481888481888484,
|
|
"grad_norm": 0.46323078254655087,
|
|
"learning_rate": 2.9594367760136026e-05,
|
|
"loss": 0.1228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07231918722391129,
|
|
"step": 3500,
|
|
"valid_targets_mean": 2829.9,
|
|
"valid_targets_min": 1497
|
|
},
|
|
{
|
|
"epoch": 2.8522588522588523,
|
|
"grad_norm": 0.4260912091223462,
|
|
"learning_rate": 2.955874348848781e-05,
|
|
"loss": 0.1006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05701189488172531,
|
|
"step": 3505,
|
|
"valid_targets_mean": 3187.1,
|
|
"valid_targets_min": 825
|
|
},
|
|
{
|
|
"epoch": 2.8563288563288562,
|
|
"grad_norm": 0.49909290086865893,
|
|
"learning_rate": 2.952307986780528e-05,
|
|
"loss": 0.117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06764853000640869,
|
|
"step": 3510,
|
|
"valid_targets_mean": 2835.8,
|
|
"valid_targets_min": 829
|
|
},
|
|
{
|
|
"epoch": 2.8603988603988606,
|
|
"grad_norm": 0.45907551992299245,
|
|
"learning_rate": 2.9487377044899487e-05,
|
|
"loss": 0.1089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.050722379237413406,
|
|
"step": 3515,
|
|
"valid_targets_mean": 2051.9,
|
|
"valid_targets_min": 650
|
|
},
|
|
{
|
|
"epoch": 2.8644688644688645,
|
|
"grad_norm": 0.4353504024468164,
|
|
"learning_rate": 2.945163516674284e-05,
|
|
"loss": 0.1086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05996803194284439,
|
|
"step": 3520,
|
|
"valid_targets_mean": 2904.0,
|
|
"valid_targets_min": 729
|
|
},
|
|
{
|
|
"epoch": 2.8685388685388684,
|
|
"grad_norm": 0.30376720990195905,
|
|
"learning_rate": 2.9415854380468523e-05,
|
|
"loss": 0.0927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03928172215819359,
|
|
"step": 3525,
|
|
"valid_targets_mean": 3957.1,
|
|
"valid_targets_min": 3408
|
|
},
|
|
{
|
|
"epoch": 2.8726088726088728,
|
|
"grad_norm": 0.3136319878421944,
|
|
"learning_rate": 2.9380034833369892e-05,
|
|
"loss": 0.0944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03363402932882309,
|
|
"step": 3530,
|
|
"valid_targets_mean": 3422.8,
|
|
"valid_targets_min": 2543
|
|
},
|
|
{
|
|
"epoch": 2.8766788766788767,
|
|
"grad_norm": 0.43540877616440243,
|
|
"learning_rate": 2.934417667289986e-05,
|
|
"loss": 0.1261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.061333850026130676,
|
|
"step": 3535,
|
|
"valid_targets_mean": 2753.1,
|
|
"valid_targets_min": 1072
|
|
},
|
|
{
|
|
"epoch": 2.8807488807488806,
|
|
"grad_norm": 0.3782032866681542,
|
|
"learning_rate": 2.9308280046670306e-05,
|
|
"loss": 0.0878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.046056248247623444,
|
|
"step": 3540,
|
|
"valid_targets_mean": 2810.2,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 2.884818884818885,
|
|
"grad_norm": 0.38282746322202976,
|
|
"learning_rate": 2.9272345102451424e-05,
|
|
"loss": 0.0974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06318642199039459,
|
|
"step": 3545,
|
|
"valid_targets_mean": 5088.1,
|
|
"valid_targets_min": 976
|
|
},
|
|
{
|
|
"epoch": 2.888888888888889,
|
|
"grad_norm": 0.34609048416468996,
|
|
"learning_rate": 2.923637198817118e-05,
|
|
"loss": 0.096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.043254029005765915,
|
|
"step": 3550,
|
|
"valid_targets_mean": 4085.0,
|
|
"valid_targets_min": 484
|
|
},
|
|
{
|
|
"epoch": 2.8929588929588927,
|
|
"grad_norm": 0.2674998878729295,
|
|
"learning_rate": 2.920036085191466e-05,
|
|
"loss": 0.0825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03606154024600983,
|
|
"step": 3555,
|
|
"valid_targets_mean": 4375.1,
|
|
"valid_targets_min": 1148
|
|
},
|
|
{
|
|
"epoch": 2.897028897028897,
|
|
"grad_norm": 0.4242295936507459,
|
|
"learning_rate": 2.9164311841923453e-05,
|
|
"loss": 0.0987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07503281533718109,
|
|
"step": 3560,
|
|
"valid_targets_mean": 4233.1,
|
|
"valid_targets_min": 3268
|
|
},
|
|
{
|
|
"epoch": 2.901098901098901,
|
|
"grad_norm": 0.3646136002979487,
|
|
"learning_rate": 2.9128225106595073e-05,
|
|
"loss": 0.0969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.050537366420030594,
|
|
"step": 3565,
|
|
"valid_targets_mean": 3133.2,
|
|
"valid_targets_min": 559
|
|
},
|
|
{
|
|
"epoch": 2.905168905168905,
|
|
"grad_norm": 0.3850447940122486,
|
|
"learning_rate": 2.909210079448233e-05,
|
|
"loss": 0.1205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04207482933998108,
|
|
"step": 3570,
|
|
"valid_targets_mean": 2683.9,
|
|
"valid_targets_min": 689
|
|
},
|
|
{
|
|
"epoch": 2.9092389092389093,
|
|
"grad_norm": 0.4532382464548252,
|
|
"learning_rate": 2.905593905429272e-05,
|
|
"loss": 0.1218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06049978733062744,
|
|
"step": 3575,
|
|
"valid_targets_mean": 1883.4,
|
|
"valid_targets_min": 616
|
|
},
|
|
{
|
|
"epoch": 2.913308913308913,
|
|
"grad_norm": 0.4613149439936778,
|
|
"learning_rate": 2.9019740034887812e-05,
|
|
"loss": 0.078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.053340524435043335,
|
|
"step": 3580,
|
|
"valid_targets_mean": 2458.2,
|
|
"valid_targets_min": 602
|
|
},
|
|
{
|
|
"epoch": 2.9173789173789175,
|
|
"grad_norm": 0.3550345526134136,
|
|
"learning_rate": 2.898350388528263e-05,
|
|
"loss": 0.1219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05539257824420929,
|
|
"step": 3585,
|
|
"valid_targets_mean": 3971.8,
|
|
"valid_targets_min": 3551
|
|
},
|
|
{
|
|
"epoch": 2.9214489214489214,
|
|
"grad_norm": 0.4104168375230188,
|
|
"learning_rate": 2.8947230754645056e-05,
|
|
"loss": 0.0957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03185092657804489,
|
|
"step": 3590,
|
|
"valid_targets_mean": 1747.2,
|
|
"valid_targets_min": 596
|
|
},
|
|
{
|
|
"epoch": 2.925518925518926,
|
|
"grad_norm": 0.335588797815782,
|
|
"learning_rate": 2.891092079229521e-05,
|
|
"loss": 0.1003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04076407849788666,
|
|
"step": 3595,
|
|
"valid_targets_mean": 3301.9,
|
|
"valid_targets_min": 799
|
|
},
|
|
{
|
|
"epoch": 2.9295889295889297,
|
|
"grad_norm": 0.43098042545889287,
|
|
"learning_rate": 2.887457414770482e-05,
|
|
"loss": 0.1138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.058630071580410004,
|
|
"step": 3600,
|
|
"valid_targets_mean": 3122.6,
|
|
"valid_targets_min": 706
|
|
},
|
|
{
|
|
"epoch": 2.9336589336589336,
|
|
"grad_norm": 0.44968298408503327,
|
|
"learning_rate": 2.883819097049662e-05,
|
|
"loss": 0.1518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05955217033624649,
|
|
"step": 3605,
|
|
"valid_targets_mean": 1916.1,
|
|
"valid_targets_min": 710
|
|
},
|
|
{
|
|
"epoch": 2.937728937728938,
|
|
"grad_norm": 0.3761169572763773,
|
|
"learning_rate": 2.880177141044374e-05,
|
|
"loss": 0.106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.042976461350917816,
|
|
"step": 3610,
|
|
"valid_targets_mean": 3087.8,
|
|
"valid_targets_min": 647
|
|
},
|
|
{
|
|
"epoch": 2.941798941798942,
|
|
"grad_norm": 0.49089665811727856,
|
|
"learning_rate": 2.8765315617469083e-05,
|
|
"loss": 0.1023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04839683324098587,
|
|
"step": 3615,
|
|
"valid_targets_mean": 1806.6,
|
|
"valid_targets_min": 716
|
|
},
|
|
{
|
|
"epoch": 2.9458689458689458,
|
|
"grad_norm": 0.3819345463527236,
|
|
"learning_rate": 2.8728823741644693e-05,
|
|
"loss": 0.096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.055881187319755554,
|
|
"step": 3620,
|
|
"valid_targets_mean": 3530.9,
|
|
"valid_targets_min": 2920
|
|
},
|
|
{
|
|
"epoch": 2.94993894993895,
|
|
"grad_norm": 0.4248398215986827,
|
|
"learning_rate": 2.869229593319115e-05,
|
|
"loss": 0.0982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05369921028614044,
|
|
"step": 3625,
|
|
"valid_targets_mean": 2021.4,
|
|
"valid_targets_min": 832
|
|
},
|
|
{
|
|
"epoch": 2.954008954008954,
|
|
"grad_norm": 0.5366502780379148,
|
|
"learning_rate": 2.8655732342476974e-05,
|
|
"loss": 0.0871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04955916851758957,
|
|
"step": 3630,
|
|
"valid_targets_mean": 1252.5,
|
|
"valid_targets_min": 759
|
|
},
|
|
{
|
|
"epoch": 2.958078958078958,
|
|
"grad_norm": 0.3725411467334531,
|
|
"learning_rate": 2.8619133120017977e-05,
|
|
"loss": 0.1168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04662502557039261,
|
|
"step": 3635,
|
|
"valid_targets_mean": 3125.5,
|
|
"valid_targets_min": 756
|
|
},
|
|
{
|
|
"epoch": 2.9621489621489623,
|
|
"grad_norm": 0.4735438299144219,
|
|
"learning_rate": 2.858249841647663e-05,
|
|
"loss": 0.0898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06342704594135284,
|
|
"step": 3640,
|
|
"valid_targets_mean": 2589.4,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 2.966218966218966,
|
|
"grad_norm": 0.3229059404155852,
|
|
"learning_rate": 2.8545828382661483e-05,
|
|
"loss": 0.0944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03534681349992752,
|
|
"step": 3645,
|
|
"valid_targets_mean": 2489.9,
|
|
"valid_targets_min": 529
|
|
},
|
|
{
|
|
"epoch": 2.97028897028897,
|
|
"grad_norm": 0.4160099670775479,
|
|
"learning_rate": 2.850912316952653e-05,
|
|
"loss": 0.1063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06118070334196091,
|
|
"step": 3650,
|
|
"valid_targets_mean": 3904.1,
|
|
"valid_targets_min": 2017
|
|
},
|
|
{
|
|
"epoch": 2.9743589743589745,
|
|
"grad_norm": 0.27448891928689173,
|
|
"learning_rate": 2.847238292817057e-05,
|
|
"loss": 0.0903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03835466504096985,
|
|
"step": 3655,
|
|
"valid_targets_mean": 4848.6,
|
|
"valid_targets_min": 516
|
|
},
|
|
{
|
|
"epoch": 2.9784289784289784,
|
|
"grad_norm": 0.3412081869287908,
|
|
"learning_rate": 2.8435607809836585e-05,
|
|
"loss": 0.0875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03919822350144386,
|
|
"step": 3660,
|
|
"valid_targets_mean": 2295.4,
|
|
"valid_targets_min": 772
|
|
},
|
|
{
|
|
"epoch": 2.9824989824989823,
|
|
"grad_norm": 0.369354587305238,
|
|
"learning_rate": 2.8398797965911164e-05,
|
|
"loss": 0.0966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05215475335717201,
|
|
"step": 3665,
|
|
"valid_targets_mean": 4149.0,
|
|
"valid_targets_min": 2807
|
|
},
|
|
{
|
|
"epoch": 2.9865689865689866,
|
|
"grad_norm": 0.4781896046920586,
|
|
"learning_rate": 2.836195354792382e-05,
|
|
"loss": 0.1795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11530614644289017,
|
|
"step": 3670,
|
|
"valid_targets_mean": 3047.1,
|
|
"valid_targets_min": 2759
|
|
},
|
|
{
|
|
"epoch": 2.9906389906389905,
|
|
"grad_norm": 0.32100235515429487,
|
|
"learning_rate": 2.8325074707546397e-05,
|
|
"loss": 0.102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03835485503077507,
|
|
"step": 3675,
|
|
"valid_targets_mean": 3380.2,
|
|
"valid_targets_min": 941
|
|
},
|
|
{
|
|
"epoch": 2.9947089947089944,
|
|
"grad_norm": 0.3685878918749171,
|
|
"learning_rate": 2.8288161596592445e-05,
|
|
"loss": 0.1049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.029963500797748566,
|
|
"step": 3680,
|
|
"valid_targets_mean": 2062.9,
|
|
"valid_targets_min": 759
|
|
},
|
|
{
|
|
"epoch": 2.998778998778999,
|
|
"grad_norm": 0.5570354007672066,
|
|
"learning_rate": 2.8251214367016584e-05,
|
|
"loss": 0.1036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11446891725063324,
|
|
"step": 3685,
|
|
"valid_targets_mean": 2155.2,
|
|
"valid_targets_min": 927
|
|
},
|
|
{
|
|
"epoch": 3.0024420024420024,
|
|
"grad_norm": 0.5300936680238741,
|
|
"learning_rate": 2.8214233170913897e-05,
|
|
"loss": 0.1816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10420374572277069,
|
|
"step": 3690,
|
|
"valid_targets_mean": 5435.1,
|
|
"valid_targets_min": 3516
|
|
},
|
|
{
|
|
"epoch": 3.0065120065120063,
|
|
"grad_norm": 0.38181099491046794,
|
|
"learning_rate": 2.8177218160519274e-05,
|
|
"loss": 0.2127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10185196250677109,
|
|
"step": 3695,
|
|
"valid_targets_mean": 7654.2,
|
|
"valid_targets_min": 6354
|
|
},
|
|
{
|
|
"epoch": 3.0105820105820107,
|
|
"grad_norm": 0.3636523334412447,
|
|
"learning_rate": 2.8140169488206813e-05,
|
|
"loss": 0.2034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11651530861854553,
|
|
"step": 3700,
|
|
"valid_targets_mean": 7341.6,
|
|
"valid_targets_min": 5754
|
|
},
|
|
{
|
|
"epoch": 3.0146520146520146,
|
|
"grad_norm": 0.35386050254952656,
|
|
"learning_rate": 2.810308730648919e-05,
|
|
"loss": 0.1961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0877489298582077,
|
|
"step": 3705,
|
|
"valid_targets_mean": 7666.2,
|
|
"valid_targets_min": 5666
|
|
},
|
|
{
|
|
"epoch": 3.0187220187220185,
|
|
"grad_norm": 0.39000718209727786,
|
|
"learning_rate": 2.8065971768017014e-05,
|
|
"loss": 0.1975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1017606109380722,
|
|
"step": 3710,
|
|
"valid_targets_mean": 7469.0,
|
|
"valid_targets_min": 5646
|
|
},
|
|
{
|
|
"epoch": 3.022792022792023,
|
|
"grad_norm": 0.3982430381116518,
|
|
"learning_rate": 2.802882302557821e-05,
|
|
"loss": 0.1975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11622816324234009,
|
|
"step": 3715,
|
|
"valid_targets_mean": 7046.9,
|
|
"valid_targets_min": 5571
|
|
},
|
|
{
|
|
"epoch": 3.0268620268620268,
|
|
"grad_norm": 0.3840578048570892,
|
|
"learning_rate": 2.7991641232097385e-05,
|
|
"loss": 0.2136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11238230019807816,
|
|
"step": 3720,
|
|
"valid_targets_mean": 7827.6,
|
|
"valid_targets_min": 5405
|
|
},
|
|
{
|
|
"epoch": 3.030932030932031,
|
|
"grad_norm": 0.37593713287199076,
|
|
"learning_rate": 2.7954426540635213e-05,
|
|
"loss": 0.2008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10586348176002502,
|
|
"step": 3725,
|
|
"valid_targets_mean": 7023.5,
|
|
"valid_targets_min": 5664
|
|
},
|
|
{
|
|
"epoch": 3.035002035002035,
|
|
"grad_norm": 0.3617967688248237,
|
|
"learning_rate": 2.7917179104387792e-05,
|
|
"loss": 0.1929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09483138471841812,
|
|
"step": 3730,
|
|
"valid_targets_mean": 7069.8,
|
|
"valid_targets_min": 5103
|
|
},
|
|
{
|
|
"epoch": 3.039072039072039,
|
|
"grad_norm": 0.37727049937650137,
|
|
"learning_rate": 2.787989907668601e-05,
|
|
"loss": 0.1813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09244755655527115,
|
|
"step": 3735,
|
|
"valid_targets_mean": 6712.4,
|
|
"valid_targets_min": 5469
|
|
},
|
|
{
|
|
"epoch": 3.0431420431420433,
|
|
"grad_norm": 0.3766512807014509,
|
|
"learning_rate": 2.7842586610994913e-05,
|
|
"loss": 0.1949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09342852234840393,
|
|
"step": 3740,
|
|
"valid_targets_mean": 7289.2,
|
|
"valid_targets_min": 5513
|
|
},
|
|
{
|
|
"epoch": 3.047212047212047,
|
|
"grad_norm": 0.3695612492414105,
|
|
"learning_rate": 2.7805241860913095e-05,
|
|
"loss": 0.1865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09307433664798737,
|
|
"step": 3745,
|
|
"valid_targets_mean": 7022.1,
|
|
"valid_targets_min": 5691
|
|
},
|
|
{
|
|
"epoch": 3.051282051282051,
|
|
"grad_norm": 0.39612551211364405,
|
|
"learning_rate": 2.7767864980172046e-05,
|
|
"loss": 0.1948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10077991336584091,
|
|
"step": 3750,
|
|
"valid_targets_mean": 6736.0,
|
|
"valid_targets_min": 5550
|
|
},
|
|
{
|
|
"epoch": 3.0553520553520555,
|
|
"grad_norm": 0.4921659164636033,
|
|
"learning_rate": 2.773045612263552e-05,
|
|
"loss": 0.1827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09157957881689072,
|
|
"step": 3755,
|
|
"valid_targets_mean": 6346.9,
|
|
"valid_targets_min": 5067
|
|
},
|
|
{
|
|
"epoch": 3.0594220594220594,
|
|
"grad_norm": 0.4073510419270923,
|
|
"learning_rate": 2.7693015442298896e-05,
|
|
"loss": 0.1868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.095358707010746,
|
|
"step": 3760,
|
|
"valid_targets_mean": 6784.0,
|
|
"valid_targets_min": 5618
|
|
},
|
|
{
|
|
"epoch": 3.0634920634920633,
|
|
"grad_norm": 0.4015377800668027,
|
|
"learning_rate": 2.7655543093288567e-05,
|
|
"loss": 0.1934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10923737287521362,
|
|
"step": 3765,
|
|
"valid_targets_mean": 7568.4,
|
|
"valid_targets_min": 5856
|
|
},
|
|
{
|
|
"epoch": 3.0675620675620676,
|
|
"grad_norm": 0.4034210421570319,
|
|
"learning_rate": 2.7618039229861298e-05,
|
|
"loss": 0.1854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09021735936403275,
|
|
"step": 3770,
|
|
"valid_targets_mean": 7198.5,
|
|
"valid_targets_min": 4793
|
|
},
|
|
{
|
|
"epoch": 3.0716320716320715,
|
|
"grad_norm": 0.3908082661531363,
|
|
"learning_rate": 2.7580504006403565e-05,
|
|
"loss": 0.1908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09716609120368958,
|
|
"step": 3775,
|
|
"valid_targets_mean": 6680.5,
|
|
"valid_targets_min": 5350
|
|
},
|
|
{
|
|
"epoch": 3.075702075702076,
|
|
"grad_norm": 0.38406455833735326,
|
|
"learning_rate": 2.7542937577430947e-05,
|
|
"loss": 0.2048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1004624292254448,
|
|
"step": 3780,
|
|
"valid_targets_mean": 6770.6,
|
|
"valid_targets_min": 5753
|
|
},
|
|
{
|
|
"epoch": 3.07977207977208,
|
|
"grad_norm": 0.7240136077873857,
|
|
"learning_rate": 2.7505340097587488e-05,
|
|
"loss": 0.2012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07324851304292679,
|
|
"step": 3785,
|
|
"valid_targets_mean": 1419.8,
|
|
"valid_targets_min": 289
|
|
},
|
|
{
|
|
"epoch": 3.0838420838420837,
|
|
"grad_norm": 0.4204933410198007,
|
|
"learning_rate": 2.7467711721645045e-05,
|
|
"loss": 0.1675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09934137761592865,
|
|
"step": 3790,
|
|
"valid_targets_mean": 6784.2,
|
|
"valid_targets_min": 5042
|
|
},
|
|
{
|
|
"epoch": 3.087912087912088,
|
|
"grad_norm": 0.3742729131572597,
|
|
"learning_rate": 2.7430052604502663e-05,
|
|
"loss": 0.1841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09341548383235931,
|
|
"step": 3795,
|
|
"valid_targets_mean": 6774.9,
|
|
"valid_targets_min": 3875
|
|
},
|
|
{
|
|
"epoch": 3.091982091982092,
|
|
"grad_norm": 0.41169203877187577,
|
|
"learning_rate": 2.7392362901185944e-05,
|
|
"loss": 0.1893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09741862118244171,
|
|
"step": 3800,
|
|
"valid_targets_mean": 7047.6,
|
|
"valid_targets_min": 6134
|
|
},
|
|
{
|
|
"epoch": 3.096052096052096,
|
|
"grad_norm": 0.3908141039193812,
|
|
"learning_rate": 2.7354642766846383e-05,
|
|
"loss": 0.1919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10031354427337646,
|
|
"step": 3805,
|
|
"valid_targets_mean": 6886.6,
|
|
"valid_targets_min": 5869
|
|
},
|
|
{
|
|
"epoch": 3.1001221001221,
|
|
"grad_norm": 0.3737173994845085,
|
|
"learning_rate": 2.7316892356760768e-05,
|
|
"loss": 0.1872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0877181887626648,
|
|
"step": 3810,
|
|
"valid_targets_mean": 7559.8,
|
|
"valid_targets_min": 6145
|
|
},
|
|
{
|
|
"epoch": 3.104192104192104,
|
|
"grad_norm": 0.345233755743295,
|
|
"learning_rate": 2.727911182633049e-05,
|
|
"loss": 0.1766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08521901816129684,
|
|
"step": 3815,
|
|
"valid_targets_mean": 8920.5,
|
|
"valid_targets_min": 4780
|
|
},
|
|
{
|
|
"epoch": 3.1082621082621085,
|
|
"grad_norm": 0.3279700741389342,
|
|
"learning_rate": 2.724130133108096e-05,
|
|
"loss": 0.164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07514619082212448,
|
|
"step": 3820,
|
|
"valid_targets_mean": 7393.4,
|
|
"valid_targets_min": 5116
|
|
},
|
|
{
|
|
"epoch": 3.1123321123321124,
|
|
"grad_norm": 0.31728564944220194,
|
|
"learning_rate": 2.720346102666092e-05,
|
|
"loss": 0.1677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08252862095832825,
|
|
"step": 3825,
|
|
"valid_targets_mean": 8477.5,
|
|
"valid_targets_min": 4033
|
|
},
|
|
{
|
|
"epoch": 3.1164021164021163,
|
|
"grad_norm": 0.3897732561895216,
|
|
"learning_rate": 2.7165591068841835e-05,
|
|
"loss": 0.1878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09388996660709381,
|
|
"step": 3830,
|
|
"valid_targets_mean": 6736.6,
|
|
"valid_targets_min": 4646
|
|
},
|
|
{
|
|
"epoch": 3.1204721204721206,
|
|
"grad_norm": 0.4003693550688361,
|
|
"learning_rate": 2.7127691613517236e-05,
|
|
"loss": 0.1769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09637527167797089,
|
|
"step": 3835,
|
|
"valid_targets_mean": 7551.5,
|
|
"valid_targets_min": 5461
|
|
},
|
|
{
|
|
"epoch": 3.1245421245421245,
|
|
"grad_norm": 0.42189953518448664,
|
|
"learning_rate": 2.7089762816702072e-05,
|
|
"loss": 0.198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11182335764169693,
|
|
"step": 3840,
|
|
"valid_targets_mean": 7091.0,
|
|
"valid_targets_min": 4879
|
|
},
|
|
{
|
|
"epoch": 3.1286121286121285,
|
|
"grad_norm": 0.3964626223828562,
|
|
"learning_rate": 2.70518048345321e-05,
|
|
"loss": 0.2008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08981172740459442,
|
|
"step": 3845,
|
|
"valid_targets_mean": 6269.8,
|
|
"valid_targets_min": 4606
|
|
},
|
|
{
|
|
"epoch": 3.132682132682133,
|
|
"grad_norm": 0.4236778964061071,
|
|
"learning_rate": 2.7013817823263206e-05,
|
|
"loss": 0.1998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08929223567247391,
|
|
"step": 3850,
|
|
"valid_targets_mean": 6818.4,
|
|
"valid_targets_min": 5525
|
|
},
|
|
{
|
|
"epoch": 3.1367521367521367,
|
|
"grad_norm": 0.4184282493040094,
|
|
"learning_rate": 2.6975801939270762e-05,
|
|
"loss": 0.1842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09297116100788116,
|
|
"step": 3855,
|
|
"valid_targets_mean": 7365.4,
|
|
"valid_targets_min": 5000
|
|
},
|
|
{
|
|
"epoch": 3.1408221408221406,
|
|
"grad_norm": 0.3991658399742734,
|
|
"learning_rate": 2.6937757339049027e-05,
|
|
"loss": 0.1928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08477732539176941,
|
|
"step": 3860,
|
|
"valid_targets_mean": 6212.1,
|
|
"valid_targets_min": 3988
|
|
},
|
|
{
|
|
"epoch": 3.144892144892145,
|
|
"grad_norm": 0.3522450277037621,
|
|
"learning_rate": 2.6899684179210446e-05,
|
|
"loss": 0.1925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09426316618919373,
|
|
"step": 3865,
|
|
"valid_targets_mean": 8396.9,
|
|
"valid_targets_min": 6131
|
|
},
|
|
{
|
|
"epoch": 3.148962148962149,
|
|
"grad_norm": 0.40217083920878643,
|
|
"learning_rate": 2.6861582616485048e-05,
|
|
"loss": 0.1903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09596344083547592,
|
|
"step": 3870,
|
|
"valid_targets_mean": 7651.0,
|
|
"valid_targets_min": 4061
|
|
},
|
|
{
|
|
"epoch": 3.1530321530321532,
|
|
"grad_norm": 0.38649324870646884,
|
|
"learning_rate": 2.6823452807719763e-05,
|
|
"loss": 0.1934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09395226836204529,
|
|
"step": 3875,
|
|
"valid_targets_mean": 7090.1,
|
|
"valid_targets_min": 5203
|
|
},
|
|
{
|
|
"epoch": 3.157102157102157,
|
|
"grad_norm": 0.37118239164661415,
|
|
"learning_rate": 2.678529490987783e-05,
|
|
"loss": 0.1927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09067308902740479,
|
|
"step": 3880,
|
|
"valid_targets_mean": 7001.0,
|
|
"valid_targets_min": 5362
|
|
},
|
|
{
|
|
"epoch": 3.161172161172161,
|
|
"grad_norm": 0.3848861837027517,
|
|
"learning_rate": 2.674710908003808e-05,
|
|
"loss": 0.1932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09417563676834106,
|
|
"step": 3885,
|
|
"valid_targets_mean": 6359.2,
|
|
"valid_targets_min": 5236
|
|
},
|
|
{
|
|
"epoch": 3.1652421652421654,
|
|
"grad_norm": 0.407400812686477,
|
|
"learning_rate": 2.6708895475394362e-05,
|
|
"loss": 0.1897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10153548419475555,
|
|
"step": 3890,
|
|
"valid_targets_mean": 5939.8,
|
|
"valid_targets_min": 4693
|
|
},
|
|
{
|
|
"epoch": 3.1693121693121693,
|
|
"grad_norm": 1.1139169285060038,
|
|
"learning_rate": 2.6670654253254834e-05,
|
|
"loss": 0.1609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03590000420808792,
|
|
"step": 3895,
|
|
"valid_targets_mean": 208.6,
|
|
"valid_targets_min": 134
|
|
},
|
|
{
|
|
"epoch": 3.173382173382173,
|
|
"grad_norm": 0.4169772802304493,
|
|
"learning_rate": 2.663238557104136e-05,
|
|
"loss": 0.1918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09831123054027557,
|
|
"step": 3900,
|
|
"valid_targets_mean": 6008.5,
|
|
"valid_targets_min": 4956
|
|
},
|
|
{
|
|
"epoch": 3.1774521774521776,
|
|
"grad_norm": 0.37807853509176925,
|
|
"learning_rate": 2.659408958628883e-05,
|
|
"loss": 0.1839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08299198746681213,
|
|
"step": 3905,
|
|
"valid_targets_mean": 6042.0,
|
|
"valid_targets_min": 4817
|
|
},
|
|
{
|
|
"epoch": 3.1815221815221815,
|
|
"grad_norm": 0.3981260221085175,
|
|
"learning_rate": 2.6555766456644553e-05,
|
|
"loss": 0.1912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09234784543514252,
|
|
"step": 3910,
|
|
"valid_targets_mean": 6608.6,
|
|
"valid_targets_min": 5102
|
|
},
|
|
{
|
|
"epoch": 3.185592185592186,
|
|
"grad_norm": 0.3880642673862131,
|
|
"learning_rate": 2.6517416339867544e-05,
|
|
"loss": 0.1823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09438446909189224,
|
|
"step": 3915,
|
|
"valid_targets_mean": 6670.1,
|
|
"valid_targets_min": 4869
|
|
},
|
|
{
|
|
"epoch": 3.1896621896621897,
|
|
"grad_norm": 0.38644112067025166,
|
|
"learning_rate": 2.6479039393827944e-05,
|
|
"loss": 0.1834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08336789160966873,
|
|
"step": 3920,
|
|
"valid_targets_mean": 6088.4,
|
|
"valid_targets_min": 4946
|
|
},
|
|
{
|
|
"epoch": 3.1937321937321936,
|
|
"grad_norm": 0.4014072145721109,
|
|
"learning_rate": 2.6440635776506316e-05,
|
|
"loss": 0.1939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08969050645828247,
|
|
"step": 3925,
|
|
"valid_targets_mean": 6228.4,
|
|
"valid_targets_min": 4856
|
|
},
|
|
{
|
|
"epoch": 3.197802197802198,
|
|
"grad_norm": 0.4562972639302328,
|
|
"learning_rate": 2.6402205645993038e-05,
|
|
"loss": 0.1926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09550561010837555,
|
|
"step": 3930,
|
|
"valid_targets_mean": 5376.6,
|
|
"valid_targets_min": 4145
|
|
},
|
|
{
|
|
"epoch": 3.201872201872202,
|
|
"grad_norm": 0.48273208307655285,
|
|
"learning_rate": 2.636374916048761e-05,
|
|
"loss": 0.1558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09403058886528015,
|
|
"step": 3935,
|
|
"valid_targets_mean": 6309.2,
|
|
"valid_targets_min": 5305
|
|
},
|
|
{
|
|
"epoch": 3.205942205942206,
|
|
"grad_norm": 0.40883426520856037,
|
|
"learning_rate": 2.6325266478298032e-05,
|
|
"loss": 0.1849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08947273343801498,
|
|
"step": 3940,
|
|
"valid_targets_mean": 6221.5,
|
|
"valid_targets_min": 4636
|
|
},
|
|
{
|
|
"epoch": 3.21001221001221,
|
|
"grad_norm": 0.39532763897190637,
|
|
"learning_rate": 2.6286757757840144e-05,
|
|
"loss": 0.1858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09781331568956375,
|
|
"step": 3945,
|
|
"valid_targets_mean": 6831.5,
|
|
"valid_targets_min": 5668
|
|
},
|
|
{
|
|
"epoch": 3.214082214082214,
|
|
"grad_norm": 0.4023824820856924,
|
|
"learning_rate": 2.6248223157636982e-05,
|
|
"loss": 0.187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09898592531681061,
|
|
"step": 3950,
|
|
"valid_targets_mean": 6437.1,
|
|
"valid_targets_min": 5004
|
|
},
|
|
{
|
|
"epoch": 3.218152218152218,
|
|
"grad_norm": 0.4037103091391165,
|
|
"learning_rate": 2.62096628363181e-05,
|
|
"loss": 0.1791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0871603712439537,
|
|
"step": 3955,
|
|
"valid_targets_mean": 6205.8,
|
|
"valid_targets_min": 5341
|
|
},
|
|
{
|
|
"epoch": 3.2222222222222223,
|
|
"grad_norm": 0.4037836380504369,
|
|
"learning_rate": 2.6171076952618943e-05,
|
|
"loss": 0.1846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09813414514064789,
|
|
"step": 3960,
|
|
"valid_targets_mean": 6748.0,
|
|
"valid_targets_min": 5363
|
|
},
|
|
{
|
|
"epoch": 3.2262922262922262,
|
|
"grad_norm": 0.407885387401038,
|
|
"learning_rate": 2.61324656653802e-05,
|
|
"loss": 0.184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0859188586473465,
|
|
"step": 3965,
|
|
"valid_targets_mean": 5210.5,
|
|
"valid_targets_min": 791
|
|
},
|
|
{
|
|
"epoch": 3.23036223036223,
|
|
"grad_norm": 0.6923397809278381,
|
|
"learning_rate": 2.60938291335471e-05,
|
|
"loss": 0.1989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07540939748287201,
|
|
"step": 3970,
|
|
"valid_targets_mean": 1279.5,
|
|
"valid_targets_min": 746
|
|
},
|
|
{
|
|
"epoch": 3.2344322344322345,
|
|
"grad_norm": 0.7921805175588379,
|
|
"learning_rate": 2.6055167516168828e-05,
|
|
"loss": 0.1767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09494996815919876,
|
|
"step": 3975,
|
|
"valid_targets_mean": 1635.8,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 3.2385022385022384,
|
|
"grad_norm": 0.7552177145973241,
|
|
"learning_rate": 2.6016480972397807e-05,
|
|
"loss": 0.1707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08284762501716614,
|
|
"step": 3980,
|
|
"valid_targets_mean": 1420.2,
|
|
"valid_targets_min": 826
|
|
},
|
|
{
|
|
"epoch": 3.2425722425722427,
|
|
"grad_norm": 0.768222331032816,
|
|
"learning_rate": 2.5977769661489102e-05,
|
|
"loss": 0.166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07532330602407455,
|
|
"step": 3985,
|
|
"valid_targets_mean": 1399.9,
|
|
"valid_targets_min": 742
|
|
},
|
|
{
|
|
"epoch": 3.2466422466422467,
|
|
"grad_norm": 0.7896098768251143,
|
|
"learning_rate": 2.5939033742799692e-05,
|
|
"loss": 0.1753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06821978092193604,
|
|
"step": 3990,
|
|
"valid_targets_mean": 1309.9,
|
|
"valid_targets_min": 718
|
|
},
|
|
{
|
|
"epoch": 3.2507122507122506,
|
|
"grad_norm": 0.7418241829283232,
|
|
"learning_rate": 2.5900273375787898e-05,
|
|
"loss": 0.1737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0990295559167862,
|
|
"step": 3995,
|
|
"valid_targets_mean": 1729.4,
|
|
"valid_targets_min": 764
|
|
},
|
|
{
|
|
"epoch": 3.254782254782255,
|
|
"grad_norm": 0.6949460534292148,
|
|
"learning_rate": 2.586148872001265e-05,
|
|
"loss": 0.159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08115358650684357,
|
|
"step": 4000,
|
|
"valid_targets_mean": 1651.4,
|
|
"valid_targets_min": 814
|
|
},
|
|
{
|
|
"epoch": 3.258852258852259,
|
|
"grad_norm": 0.8287359657436814,
|
|
"learning_rate": 2.5822679935132876e-05,
|
|
"loss": 0.1777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06513510644435883,
|
|
"step": 4005,
|
|
"valid_targets_mean": 1192.0,
|
|
"valid_targets_min": 610
|
|
},
|
|
{
|
|
"epoch": 3.2629222629222627,
|
|
"grad_norm": 0.7416322068860572,
|
|
"learning_rate": 2.578384718090685e-05,
|
|
"loss": 0.163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09329591691493988,
|
|
"step": 4010,
|
|
"valid_targets_mean": 1879.0,
|
|
"valid_targets_min": 817
|
|
},
|
|
{
|
|
"epoch": 3.266992266992267,
|
|
"grad_norm": 0.6590970920655556,
|
|
"learning_rate": 2.5744990617191486e-05,
|
|
"loss": 0.1659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.075216144323349,
|
|
"step": 4015,
|
|
"valid_targets_mean": 1500.4,
|
|
"valid_targets_min": 837
|
|
},
|
|
{
|
|
"epoch": 3.271062271062271,
|
|
"grad_norm": 0.7902390874577276,
|
|
"learning_rate": 2.5706110403941724e-05,
|
|
"loss": 0.1701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11390472203493118,
|
|
"step": 4020,
|
|
"valid_targets_mean": 1661.8,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 3.2751322751322753,
|
|
"grad_norm": 0.6950850372161688,
|
|
"learning_rate": 2.5667206701209876e-05,
|
|
"loss": 0.1649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08275754749774933,
|
|
"step": 4025,
|
|
"valid_targets_mean": 1537.8,
|
|
"valid_targets_min": 704
|
|
},
|
|
{
|
|
"epoch": 3.2792022792022792,
|
|
"grad_norm": 0.7394050894613096,
|
|
"learning_rate": 2.562827966914492e-05,
|
|
"loss": 0.1611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06054488569498062,
|
|
"step": 4030,
|
|
"valid_targets_mean": 1227.0,
|
|
"valid_targets_min": 722
|
|
},
|
|
{
|
|
"epoch": 3.283272283272283,
|
|
"grad_norm": 0.6772775941130794,
|
|
"learning_rate": 2.5589329467991885e-05,
|
|
"loss": 0.1581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07103030383586884,
|
|
"step": 4035,
|
|
"valid_targets_mean": 1364.5,
|
|
"valid_targets_min": 642
|
|
},
|
|
{
|
|
"epoch": 3.2873422873422875,
|
|
"grad_norm": 0.7940146053459805,
|
|
"learning_rate": 2.555035625809118e-05,
|
|
"loss": 0.1558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07808074355125427,
|
|
"step": 4040,
|
|
"valid_targets_mean": 1358.5,
|
|
"valid_targets_min": 518
|
|
},
|
|
{
|
|
"epoch": 3.2914122914122914,
|
|
"grad_norm": 0.7802294896653876,
|
|
"learning_rate": 2.5511360199877934e-05,
|
|
"loss": 0.1729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10007496178150177,
|
|
"step": 4045,
|
|
"valid_targets_mean": 1590.6,
|
|
"valid_targets_min": 728
|
|
},
|
|
{
|
|
"epoch": 3.2954822954822953,
|
|
"grad_norm": 0.7087462700416979,
|
|
"learning_rate": 2.5472341453881316e-05,
|
|
"loss": 0.1702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1003953143954277,
|
|
"step": 4050,
|
|
"valid_targets_mean": 1791.4,
|
|
"valid_targets_min": 820
|
|
},
|
|
{
|
|
"epoch": 3.2995522995522997,
|
|
"grad_norm": 0.7315742027165163,
|
|
"learning_rate": 2.543330018072389e-05,
|
|
"loss": 0.1718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09038805961608887,
|
|
"step": 4055,
|
|
"valid_targets_mean": 1470.8,
|
|
"valid_targets_min": 818
|
|
},
|
|
{
|
|
"epoch": 3.3036223036223036,
|
|
"grad_norm": 0.7849182759993877,
|
|
"learning_rate": 2.5394236541120978e-05,
|
|
"loss": 0.1585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06879038363695145,
|
|
"step": 4060,
|
|
"valid_targets_mean": 1278.6,
|
|
"valid_targets_min": 625
|
|
},
|
|
{
|
|
"epoch": 3.3076923076923075,
|
|
"grad_norm": 0.6965689168981399,
|
|
"learning_rate": 2.5355150695879952e-05,
|
|
"loss": 0.1639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1073441207408905,
|
|
"step": 4065,
|
|
"valid_targets_mean": 2313.5,
|
|
"valid_targets_min": 790
|
|
},
|
|
{
|
|
"epoch": 3.311762311762312,
|
|
"grad_norm": 0.7410314939380847,
|
|
"learning_rate": 2.5316042805899616e-05,
|
|
"loss": 0.1712,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11727465689182281,
|
|
"step": 4070,
|
|
"valid_targets_mean": 2231.9,
|
|
"valid_targets_min": 1335
|
|
},
|
|
{
|
|
"epoch": 3.3158323158323157,
|
|
"grad_norm": 0.6764884077292488,
|
|
"learning_rate": 2.5276913032169485e-05,
|
|
"loss": 0.1645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09181191027164459,
|
|
"step": 4075,
|
|
"valid_targets_mean": 1729.5,
|
|
"valid_targets_min": 719
|
|
},
|
|
{
|
|
"epoch": 3.3199023199023197,
|
|
"grad_norm": 0.7670282548657762,
|
|
"learning_rate": 2.52377615357692e-05,
|
|
"loss": 0.156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06349903345108032,
|
|
"step": 4080,
|
|
"valid_targets_mean": 1222.9,
|
|
"valid_targets_min": 697
|
|
},
|
|
{
|
|
"epoch": 3.323972323972324,
|
|
"grad_norm": 0.7717936984864601,
|
|
"learning_rate": 2.5198588477867806e-05,
|
|
"loss": 0.158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09327976405620575,
|
|
"step": 4085,
|
|
"valid_targets_mean": 1521.1,
|
|
"valid_targets_min": 998
|
|
},
|
|
{
|
|
"epoch": 3.328042328042328,
|
|
"grad_norm": 0.7083051922595436,
|
|
"learning_rate": 2.515939401972311e-05,
|
|
"loss": 0.159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08291327953338623,
|
|
"step": 4090,
|
|
"valid_targets_mean": 1693.9,
|
|
"valid_targets_min": 629
|
|
},
|
|
{
|
|
"epoch": 3.3321123321123323,
|
|
"grad_norm": 0.7259684658670004,
|
|
"learning_rate": 2.5120178322681003e-05,
|
|
"loss": 0.1615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0862584039568901,
|
|
"step": 4095,
|
|
"valid_targets_mean": 1630.2,
|
|
"valid_targets_min": 1078
|
|
},
|
|
{
|
|
"epoch": 3.336182336182336,
|
|
"grad_norm": 0.6785358648236004,
|
|
"learning_rate": 2.5080941548174825e-05,
|
|
"loss": 0.1595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07994429767131805,
|
|
"step": 4100,
|
|
"valid_targets_mean": 1843.0,
|
|
"valid_targets_min": 1075
|
|
},
|
|
{
|
|
"epoch": 3.34025234025234,
|
|
"grad_norm": 0.7835115275000952,
|
|
"learning_rate": 2.5041683857724676e-05,
|
|
"loss": 0.1646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09122762084007263,
|
|
"step": 4105,
|
|
"valid_targets_mean": 1848.2,
|
|
"valid_targets_min": 1317
|
|
},
|
|
{
|
|
"epoch": 3.3443223443223444,
|
|
"grad_norm": 0.7678656781444083,
|
|
"learning_rate": 2.5002405412936748e-05,
|
|
"loss": 0.1569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09253916144371033,
|
|
"step": 4110,
|
|
"valid_targets_mean": 1727.4,
|
|
"valid_targets_min": 957
|
|
},
|
|
{
|
|
"epoch": 3.3483923483923483,
|
|
"grad_norm": 0.7652220167507279,
|
|
"learning_rate": 2.4963106375502673e-05,
|
|
"loss": 0.1606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0644087865948677,
|
|
"step": 4115,
|
|
"valid_targets_mean": 1115.4,
|
|
"valid_targets_min": 643
|
|
},
|
|
{
|
|
"epoch": 3.3524623524623527,
|
|
"grad_norm": 0.749831847944443,
|
|
"learning_rate": 2.492378690719887e-05,
|
|
"loss": 0.1639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08243843168020248,
|
|
"step": 4120,
|
|
"valid_targets_mean": 1547.2,
|
|
"valid_targets_min": 1111
|
|
},
|
|
{
|
|
"epoch": 3.3565323565323566,
|
|
"grad_norm": 0.7150068797955984,
|
|
"learning_rate": 2.4884447169885855e-05,
|
|
"loss": 0.1519,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07063218951225281,
|
|
"step": 4125,
|
|
"valid_targets_mean": 1450.0,
|
|
"valid_targets_min": 705
|
|
},
|
|
{
|
|
"epoch": 3.3606023606023605,
|
|
"grad_norm": 0.8508456830574319,
|
|
"learning_rate": 2.4845087325507557e-05,
|
|
"loss": 0.1572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08344951272010803,
|
|
"step": 4130,
|
|
"valid_targets_mean": 1539.0,
|
|
"valid_targets_min": 1288
|
|
},
|
|
{
|
|
"epoch": 3.364672364672365,
|
|
"grad_norm": 0.75421710601425,
|
|
"learning_rate": 2.4805707536090708e-05,
|
|
"loss": 0.1478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0741521492600441,
|
|
"step": 4135,
|
|
"valid_targets_mean": 1321.2,
|
|
"valid_targets_min": 753
|
|
},
|
|
{
|
|
"epoch": 3.3687423687423688,
|
|
"grad_norm": 0.737658592772203,
|
|
"learning_rate": 2.476630796374413e-05,
|
|
"loss": 0.159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07522574812173843,
|
|
"step": 4140,
|
|
"valid_targets_mean": 1480.0,
|
|
"valid_targets_min": 817
|
|
},
|
|
{
|
|
"epoch": 3.3728123728123727,
|
|
"grad_norm": 0.7186314188969304,
|
|
"learning_rate": 2.4726888770658103e-05,
|
|
"loss": 0.1536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06374834477901459,
|
|
"step": 4145,
|
|
"valid_targets_mean": 1293.9,
|
|
"valid_targets_min": 974
|
|
},
|
|
{
|
|
"epoch": 3.376882376882377,
|
|
"grad_norm": 0.7633726706709022,
|
|
"learning_rate": 2.4687450119103637e-05,
|
|
"loss": 0.1579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08639413118362427,
|
|
"step": 4150,
|
|
"valid_targets_mean": 1529.2,
|
|
"valid_targets_min": 657
|
|
},
|
|
{
|
|
"epoch": 3.380952380952381,
|
|
"grad_norm": 1.1964337610025615,
|
|
"learning_rate": 2.464799217143188e-05,
|
|
"loss": 0.1559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0945994108915329,
|
|
"step": 4155,
|
|
"valid_targets_mean": 1678.0,
|
|
"valid_targets_min": 880
|
|
},
|
|
{
|
|
"epoch": 3.385022385022385,
|
|
"grad_norm": 0.7985356155534645,
|
|
"learning_rate": 2.46085150900734e-05,
|
|
"loss": 0.1614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09271052479743958,
|
|
"step": 4160,
|
|
"valid_targets_mean": 1609.4,
|
|
"valid_targets_min": 1050
|
|
},
|
|
{
|
|
"epoch": 3.389092389092389,
|
|
"grad_norm": 1.0437056142019698,
|
|
"learning_rate": 2.4569019037537525e-05,
|
|
"loss": 0.1538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07413659244775772,
|
|
"step": 4165,
|
|
"valid_targets_mean": 1449.0,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 3.393162393162393,
|
|
"grad_norm": 0.7535290857067112,
|
|
"learning_rate": 2.4529504176411685e-05,
|
|
"loss": 0.1568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07152436673641205,
|
|
"step": 4170,
|
|
"valid_targets_mean": 1344.0,
|
|
"valid_targets_min": 703
|
|
},
|
|
{
|
|
"epoch": 3.397232397232397,
|
|
"grad_norm": 0.805609957537731,
|
|
"learning_rate": 2.448997066936073e-05,
|
|
"loss": 0.1672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07804272323846817,
|
|
"step": 4175,
|
|
"valid_targets_mean": 1469.4,
|
|
"valid_targets_min": 934
|
|
},
|
|
{
|
|
"epoch": 3.4013024013024014,
|
|
"grad_norm": 0.9417672703147607,
|
|
"learning_rate": 2.445041867912629e-05,
|
|
"loss": 0.1694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11873181909322739,
|
|
"step": 4180,
|
|
"valid_targets_mean": 1829.1,
|
|
"valid_targets_min": 734
|
|
},
|
|
{
|
|
"epoch": 3.4053724053724053,
|
|
"grad_norm": 1.044289943148221,
|
|
"learning_rate": 2.4410848368526053e-05,
|
|
"loss": 0.157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10154280066490173,
|
|
"step": 4185,
|
|
"valid_targets_mean": 2010.8,
|
|
"valid_targets_min": 1437
|
|
},
|
|
{
|
|
"epoch": 3.4094424094424096,
|
|
"grad_norm": 0.7104274369527875,
|
|
"learning_rate": 2.4371259900453126e-05,
|
|
"loss": 0.158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.060282643884420395,
|
|
"step": 4190,
|
|
"valid_targets_mean": 1405.2,
|
|
"valid_targets_min": 551
|
|
},
|
|
{
|
|
"epoch": 3.4135124135124135,
|
|
"grad_norm": 0.8437670408273207,
|
|
"learning_rate": 2.433165343787538e-05,
|
|
"loss": 0.164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10241234302520752,
|
|
"step": 4195,
|
|
"valid_targets_mean": 1817.6,
|
|
"valid_targets_min": 931
|
|
},
|
|
{
|
|
"epoch": 3.4175824175824174,
|
|
"grad_norm": 0.726473705947651,
|
|
"learning_rate": 2.4292029143834756e-05,
|
|
"loss": 0.1523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07595465332269669,
|
|
"step": 4200,
|
|
"valid_targets_mean": 1599.1,
|
|
"valid_targets_min": 904
|
|
},
|
|
{
|
|
"epoch": 3.421652421652422,
|
|
"grad_norm": 0.7469567163494483,
|
|
"learning_rate": 2.425238718144659e-05,
|
|
"loss": 0.1507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08603636920452118,
|
|
"step": 4205,
|
|
"valid_targets_mean": 1774.8,
|
|
"valid_targets_min": 1158
|
|
},
|
|
{
|
|
"epoch": 3.4257224257224257,
|
|
"grad_norm": 0.8033287596020903,
|
|
"learning_rate": 2.4212727713898948e-05,
|
|
"loss": 0.148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07098281383514404,
|
|
"step": 4210,
|
|
"valid_targets_mean": 1229.2,
|
|
"valid_targets_min": 756
|
|
},
|
|
{
|
|
"epoch": 3.42979242979243,
|
|
"grad_norm": 0.8172817454896187,
|
|
"learning_rate": 2.417305090445198e-05,
|
|
"loss": 0.164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08913031220436096,
|
|
"step": 4215,
|
|
"valid_targets_mean": 1536.9,
|
|
"valid_targets_min": 854
|
|
},
|
|
{
|
|
"epoch": 3.433862433862434,
|
|
"grad_norm": 0.7239421409227166,
|
|
"learning_rate": 2.4133356916437202e-05,
|
|
"loss": 0.1581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0854625403881073,
|
|
"step": 4220,
|
|
"valid_targets_mean": 1739.5,
|
|
"valid_targets_min": 1462
|
|
},
|
|
{
|
|
"epoch": 3.437932437932438,
|
|
"grad_norm": 0.7197452670352181,
|
|
"learning_rate": 2.4093645913256855e-05,
|
|
"loss": 0.1544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06945577263832092,
|
|
"step": 4225,
|
|
"valid_targets_mean": 1475.9,
|
|
"valid_targets_min": 944
|
|
},
|
|
{
|
|
"epoch": 3.442002442002442,
|
|
"grad_norm": 0.7090142042404015,
|
|
"learning_rate": 2.405391805838322e-05,
|
|
"loss": 0.1531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07137700915336609,
|
|
"step": 4230,
|
|
"valid_targets_mean": 1482.6,
|
|
"valid_targets_min": 1227
|
|
},
|
|
{
|
|
"epoch": 3.446072446072446,
|
|
"grad_norm": 0.7513863866998308,
|
|
"learning_rate": 2.401417351535795e-05,
|
|
"loss": 0.1573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06505010277032852,
|
|
"step": 4235,
|
|
"valid_targets_mean": 1276.5,
|
|
"valid_targets_min": 740
|
|
},
|
|
{
|
|
"epoch": 3.45014245014245,
|
|
"grad_norm": 0.7487725404958699,
|
|
"learning_rate": 2.3974412447791403e-05,
|
|
"loss": 0.1584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09261137992143631,
|
|
"step": 4240,
|
|
"valid_targets_mean": 2024.8,
|
|
"valid_targets_min": 881
|
|
},
|
|
{
|
|
"epoch": 3.4542124542124544,
|
|
"grad_norm": 0.8187666821446936,
|
|
"learning_rate": 2.3934635019361955e-05,
|
|
"loss": 0.1561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08315032720565796,
|
|
"step": 4245,
|
|
"valid_targets_mean": 1288.6,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 3.4582824582824583,
|
|
"grad_norm": 0.753634047230146,
|
|
"learning_rate": 2.3894841393815323e-05,
|
|
"loss": 0.1521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.055352650582790375,
|
|
"step": 4250,
|
|
"valid_targets_mean": 1103.9,
|
|
"valid_targets_min": 814
|
|
},
|
|
{
|
|
"epoch": 3.462352462352462,
|
|
"grad_norm": 0.8079463943992531,
|
|
"learning_rate": 2.3855031734963925e-05,
|
|
"loss": 0.1473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07735898345708847,
|
|
"step": 4255,
|
|
"valid_targets_mean": 1392.9,
|
|
"valid_targets_min": 820
|
|
},
|
|
{
|
|
"epoch": 3.4664224664224665,
|
|
"grad_norm": 0.7707769218886613,
|
|
"learning_rate": 2.3815206206686144e-05,
|
|
"loss": 0.154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06932565569877625,
|
|
"step": 4260,
|
|
"valid_targets_mean": 1392.4,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 3.4704924704924704,
|
|
"grad_norm": 0.7958744117773525,
|
|
"learning_rate": 2.3775364972925737e-05,
|
|
"loss": 0.1574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07428084313869476,
|
|
"step": 4265,
|
|
"valid_targets_mean": 1441.8,
|
|
"valid_targets_min": 728
|
|
},
|
|
{
|
|
"epoch": 3.4745624745624744,
|
|
"grad_norm": 0.7647293162181219,
|
|
"learning_rate": 2.3735508197691068e-05,
|
|
"loss": 0.1493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07184837758541107,
|
|
"step": 4270,
|
|
"valid_targets_mean": 1413.5,
|
|
"valid_targets_min": 692
|
|
},
|
|
{
|
|
"epoch": 3.4786324786324787,
|
|
"grad_norm": 0.760684869364684,
|
|
"learning_rate": 2.369563604505451e-05,
|
|
"loss": 0.1566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07639266550540924,
|
|
"step": 4275,
|
|
"valid_targets_mean": 1323.5,
|
|
"valid_targets_min": 840
|
|
},
|
|
{
|
|
"epoch": 3.4827024827024826,
|
|
"grad_norm": 0.785131473550303,
|
|
"learning_rate": 2.365574867915172e-05,
|
|
"loss": 0.1568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07727733254432678,
|
|
"step": 4280,
|
|
"valid_targets_mean": 1488.4,
|
|
"valid_targets_min": 950
|
|
},
|
|
{
|
|
"epoch": 3.4867724867724865,
|
|
"grad_norm": 0.712055390091951,
|
|
"learning_rate": 2.361584626418099e-05,
|
|
"loss": 0.1509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08490264415740967,
|
|
"step": 4285,
|
|
"valid_targets_mean": 1935.9,
|
|
"valid_targets_min": 878
|
|
},
|
|
{
|
|
"epoch": 3.490842490842491,
|
|
"grad_norm": 0.8293965286613701,
|
|
"learning_rate": 2.3575928964402558e-05,
|
|
"loss": 0.1579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08114030957221985,
|
|
"step": 4290,
|
|
"valid_targets_mean": 1502.1,
|
|
"valid_targets_min": 836
|
|
},
|
|
{
|
|
"epoch": 3.494912494912495,
|
|
"grad_norm": 0.7983467061857048,
|
|
"learning_rate": 2.353599694413794e-05,
|
|
"loss": 0.1521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08377361297607422,
|
|
"step": 4295,
|
|
"valid_targets_mean": 1535.1,
|
|
"valid_targets_min": 1013
|
|
},
|
|
{
|
|
"epoch": 3.498982498982499,
|
|
"grad_norm": 0.7795845542928671,
|
|
"learning_rate": 2.349605036776925e-05,
|
|
"loss": 0.1579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06817035377025604,
|
|
"step": 4300,
|
|
"valid_targets_mean": 1346.4,
|
|
"valid_targets_min": 748
|
|
},
|
|
{
|
|
"epoch": 3.503052503052503,
|
|
"grad_norm": 0.9206942614110478,
|
|
"learning_rate": 2.3456089399738514e-05,
|
|
"loss": 0.1538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.079506516456604,
|
|
"step": 4305,
|
|
"valid_targets_mean": 1543.9,
|
|
"valid_targets_min": 729
|
|
},
|
|
{
|
|
"epoch": 3.5071225071225074,
|
|
"grad_norm": 0.7302785723268027,
|
|
"learning_rate": 2.3416114204547e-05,
|
|
"loss": 0.1637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07028716802597046,
|
|
"step": 4310,
|
|
"valid_targets_mean": 1541.9,
|
|
"valid_targets_min": 1219
|
|
},
|
|
{
|
|
"epoch": 3.5111925111925113,
|
|
"grad_norm": 0.7112489536142887,
|
|
"learning_rate": 2.3376124946754574e-05,
|
|
"loss": 0.155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07776269316673279,
|
|
"step": 4315,
|
|
"valid_targets_mean": 1632.8,
|
|
"valid_targets_min": 916
|
|
},
|
|
{
|
|
"epoch": 3.515262515262515,
|
|
"grad_norm": 0.7261049103300148,
|
|
"learning_rate": 2.3336121790978955e-05,
|
|
"loss": 0.1547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06957513093948364,
|
|
"step": 4320,
|
|
"valid_targets_mean": 1349.9,
|
|
"valid_targets_min": 646
|
|
},
|
|
{
|
|
"epoch": 3.5193325193325196,
|
|
"grad_norm": 0.7903987711410705,
|
|
"learning_rate": 2.3296104901895074e-05,
|
|
"loss": 0.1508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08000437915325165,
|
|
"step": 4325,
|
|
"valid_targets_mean": 1383.0,
|
|
"valid_targets_min": 906
|
|
},
|
|
{
|
|
"epoch": 3.5234025234025235,
|
|
"grad_norm": 0.7622357569514845,
|
|
"learning_rate": 2.3256074444234437e-05,
|
|
"loss": 0.1479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07310856133699417,
|
|
"step": 4330,
|
|
"valid_targets_mean": 1577.5,
|
|
"valid_targets_min": 848
|
|
},
|
|
{
|
|
"epoch": 3.5274725274725274,
|
|
"grad_norm": 0.8743060398490866,
|
|
"learning_rate": 2.3216030582784358e-05,
|
|
"loss": 0.1597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07247571647167206,
|
|
"step": 4335,
|
|
"valid_targets_mean": 1183.5,
|
|
"valid_targets_min": 692
|
|
},
|
|
{
|
|
"epoch": 3.5315425315425317,
|
|
"grad_norm": 0.7953087241662045,
|
|
"learning_rate": 2.317597348238735e-05,
|
|
"loss": 0.1468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06898859143257141,
|
|
"step": 4340,
|
|
"valid_targets_mean": 1373.9,
|
|
"valid_targets_min": 821
|
|
},
|
|
{
|
|
"epoch": 3.5356125356125356,
|
|
"grad_norm": 0.7438998227523352,
|
|
"learning_rate": 2.3135903307940425e-05,
|
|
"loss": 0.1473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06783884018659592,
|
|
"step": 4345,
|
|
"valid_targets_mean": 1428.1,
|
|
"valid_targets_min": 1013
|
|
},
|
|
{
|
|
"epoch": 3.5396825396825395,
|
|
"grad_norm": 0.7403466593709432,
|
|
"learning_rate": 2.3095820224394418e-05,
|
|
"loss": 0.1497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06895196437835693,
|
|
"step": 4350,
|
|
"valid_targets_mean": 1420.9,
|
|
"valid_targets_min": 781
|
|
},
|
|
{
|
|
"epoch": 3.543752543752544,
|
|
"grad_norm": 0.7776344239197351,
|
|
"learning_rate": 2.305572439675329e-05,
|
|
"loss": 0.163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08171670138835907,
|
|
"step": 4355,
|
|
"valid_targets_mean": 1255.1,
|
|
"valid_targets_min": 548
|
|
},
|
|
{
|
|
"epoch": 3.547822547822548,
|
|
"grad_norm": 0.7334294368492669,
|
|
"learning_rate": 2.3015615990073483e-05,
|
|
"loss": 0.1475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0776100903749466,
|
|
"step": 4360,
|
|
"valid_targets_mean": 1411.0,
|
|
"valid_targets_min": 694
|
|
},
|
|
{
|
|
"epoch": 3.5518925518925517,
|
|
"grad_norm": 0.7080206142881729,
|
|
"learning_rate": 2.29754951694632e-05,
|
|
"loss": 0.156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0899009108543396,
|
|
"step": 4365,
|
|
"valid_targets_mean": 1973.8,
|
|
"valid_targets_min": 1030
|
|
},
|
|
{
|
|
"epoch": 3.555962555962556,
|
|
"grad_norm": 0.6615510612620584,
|
|
"learning_rate": 2.2935362100081767e-05,
|
|
"loss": 0.1414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.061547666788101196,
|
|
"step": 4370,
|
|
"valid_targets_mean": 1580.2,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 3.56003256003256,
|
|
"grad_norm": 0.81839532426026,
|
|
"learning_rate": 2.2895216947138924e-05,
|
|
"loss": 0.145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07387113571166992,
|
|
"step": 4375,
|
|
"valid_targets_mean": 1425.4,
|
|
"valid_targets_min": 990
|
|
},
|
|
{
|
|
"epoch": 3.564102564102564,
|
|
"grad_norm": 0.7054362018903443,
|
|
"learning_rate": 2.285505987589415e-05,
|
|
"loss": 0.1498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.055615417659282684,
|
|
"step": 4380,
|
|
"valid_targets_mean": 1299.6,
|
|
"valid_targets_min": 639
|
|
},
|
|
{
|
|
"epoch": 3.5681725681725682,
|
|
"grad_norm": 0.8007028427332834,
|
|
"learning_rate": 2.281489105165599e-05,
|
|
"loss": 0.1478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0648592859506607,
|
|
"step": 4385,
|
|
"valid_targets_mean": 1458.8,
|
|
"valid_targets_min": 721
|
|
},
|
|
{
|
|
"epoch": 3.572242572242572,
|
|
"grad_norm": 0.8445299441357773,
|
|
"learning_rate": 2.277471063978137e-05,
|
|
"loss": 0.1504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07231980562210083,
|
|
"step": 4390,
|
|
"valid_targets_mean": 1329.8,
|
|
"valid_targets_min": 874
|
|
},
|
|
{
|
|
"epoch": 3.576312576312576,
|
|
"grad_norm": 0.6807816095455992,
|
|
"learning_rate": 2.2734518805674913e-05,
|
|
"loss": 0.153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07544272392988205,
|
|
"step": 4395,
|
|
"valid_targets_mean": 1559.9,
|
|
"valid_targets_min": 686
|
|
},
|
|
{
|
|
"epoch": 3.5803825803825804,
|
|
"grad_norm": 0.8452163681063782,
|
|
"learning_rate": 2.269431571478828e-05,
|
|
"loss": 0.1538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08027100563049316,
|
|
"step": 4400,
|
|
"valid_targets_mean": 1392.5,
|
|
"valid_targets_min": 713
|
|
},
|
|
{
|
|
"epoch": 3.5844525844525843,
|
|
"grad_norm": 0.7806355035255146,
|
|
"learning_rate": 2.2654101532619443e-05,
|
|
"loss": 0.152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06163328140974045,
|
|
"step": 4405,
|
|
"valid_targets_mean": 1209.0,
|
|
"valid_targets_min": 393
|
|
},
|
|
{
|
|
"epoch": 3.5885225885225887,
|
|
"grad_norm": 0.48984256846528007,
|
|
"learning_rate": 2.2613876424712052e-05,
|
|
"loss": 0.1305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04831802099943161,
|
|
"step": 4410,
|
|
"valid_targets_mean": 2470.8,
|
|
"valid_targets_min": 816
|
|
},
|
|
{
|
|
"epoch": 3.5925925925925926,
|
|
"grad_norm": 0.39618952222130516,
|
|
"learning_rate": 2.257364055665473e-05,
|
|
"loss": 0.0913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04768257588148117,
|
|
"step": 4415,
|
|
"valid_targets_mean": 3278.9,
|
|
"valid_targets_min": 2461
|
|
},
|
|
{
|
|
"epoch": 3.596662596662597,
|
|
"grad_norm": 0.39078618847965113,
|
|
"learning_rate": 2.2533394094080397e-05,
|
|
"loss": 0.0877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.048612505197525024,
|
|
"step": 4420,
|
|
"valid_targets_mean": 3783.6,
|
|
"valid_targets_min": 2571
|
|
},
|
|
{
|
|
"epoch": 3.600732600732601,
|
|
"grad_norm": 0.3479538454031919,
|
|
"learning_rate": 2.2493137202665566e-05,
|
|
"loss": 0.1058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.046523869037628174,
|
|
"step": 4425,
|
|
"valid_targets_mean": 3905.0,
|
|
"valid_targets_min": 3179
|
|
},
|
|
{
|
|
"epoch": 3.6048026048026047,
|
|
"grad_norm": 0.40917149997042873,
|
|
"learning_rate": 2.2452870048129707e-05,
|
|
"loss": 0.0847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.038769036531448364,
|
|
"step": 4430,
|
|
"valid_targets_mean": 3000.1,
|
|
"valid_targets_min": 943
|
|
},
|
|
{
|
|
"epoch": 3.608872608872609,
|
|
"grad_norm": 0.7387439093745595,
|
|
"learning_rate": 2.241259279623453e-05,
|
|
"loss": 0.1063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06168878823518753,
|
|
"step": 4435,
|
|
"valid_targets_mean": 974.8,
|
|
"valid_targets_min": 525
|
|
},
|
|
{
|
|
"epoch": 3.612942612942613,
|
|
"grad_norm": 0.43905368150585783,
|
|
"learning_rate": 2.2372305612783305e-05,
|
|
"loss": 0.1354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04938036948442459,
|
|
"step": 4440,
|
|
"valid_targets_mean": 2743.8,
|
|
"valid_targets_min": 954
|
|
},
|
|
{
|
|
"epoch": 3.617012617012617,
|
|
"grad_norm": 0.4689396969033086,
|
|
"learning_rate": 2.233200866362019e-05,
|
|
"loss": 0.094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.060613177716732025,
|
|
"step": 4445,
|
|
"valid_targets_mean": 2744.4,
|
|
"valid_targets_min": 718
|
|
},
|
|
{
|
|
"epoch": 3.6210826210826212,
|
|
"grad_norm": 0.3460122217852074,
|
|
"learning_rate": 2.2291702114629542e-05,
|
|
"loss": 0.1055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0620415098965168,
|
|
"step": 4450,
|
|
"valid_targets_mean": 3279.1,
|
|
"valid_targets_min": 540
|
|
},
|
|
{
|
|
"epoch": 3.625152625152625,
|
|
"grad_norm": 0.2918413867985773,
|
|
"learning_rate": 2.225138613173524e-05,
|
|
"loss": 0.1076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.033721111714839935,
|
|
"step": 4455,
|
|
"valid_targets_mean": 4515.5,
|
|
"valid_targets_min": 1008
|
|
},
|
|
{
|
|
"epoch": 3.629222629222629,
|
|
"grad_norm": 0.3411037126787543,
|
|
"learning_rate": 2.2211060880899996e-05,
|
|
"loss": 0.0876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04788324981927872,
|
|
"step": 4460,
|
|
"valid_targets_mean": 3701.6,
|
|
"valid_targets_min": 818
|
|
},
|
|
{
|
|
"epoch": 3.6332926332926334,
|
|
"grad_norm": 0.29653848428471286,
|
|
"learning_rate": 2.217072652812468e-05,
|
|
"loss": 0.0905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.036281127482652664,
|
|
"step": 4465,
|
|
"valid_targets_mean": 3858.5,
|
|
"valid_targets_min": 1006
|
|
},
|
|
{
|
|
"epoch": 3.6373626373626373,
|
|
"grad_norm": 0.5325825065869296,
|
|
"learning_rate": 2.213038323944761e-05,
|
|
"loss": 0.1021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0998295247554779,
|
|
"step": 4470,
|
|
"valid_targets_mean": 2629.8,
|
|
"valid_targets_min": 614
|
|
},
|
|
{
|
|
"epoch": 3.6414326414326412,
|
|
"grad_norm": 0.37544688292236766,
|
|
"learning_rate": 2.209003118094392e-05,
|
|
"loss": 0.1219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03794893994927406,
|
|
"step": 4475,
|
|
"valid_targets_mean": 3368.0,
|
|
"valid_targets_min": 1997
|
|
},
|
|
{
|
|
"epoch": 3.6455026455026456,
|
|
"grad_norm": 0.5425961999646625,
|
|
"learning_rate": 2.2049670518724818e-05,
|
|
"loss": 0.0897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04944489151239395,
|
|
"step": 4480,
|
|
"valid_targets_mean": 1644.2,
|
|
"valid_targets_min": 674
|
|
},
|
|
{
|
|
"epoch": 3.6495726495726495,
|
|
"grad_norm": 0.31434891274177323,
|
|
"learning_rate": 2.2009301418936945e-05,
|
|
"loss": 0.0828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.038889069110155106,
|
|
"step": 4485,
|
|
"valid_targets_mean": 3700.4,
|
|
"valid_targets_min": 989
|
|
},
|
|
{
|
|
"epoch": 3.6536426536426534,
|
|
"grad_norm": 0.3810739437224756,
|
|
"learning_rate": 2.1968924047761665e-05,
|
|
"loss": 0.0787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03368036821484566,
|
|
"step": 4490,
|
|
"valid_targets_mean": 3537.8,
|
|
"valid_targets_min": 2616
|
|
},
|
|
{
|
|
"epoch": 3.6577126577126577,
|
|
"grad_norm": 0.43609678797054763,
|
|
"learning_rate": 2.1928538571414416e-05,
|
|
"loss": 0.1085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05374626815319061,
|
|
"step": 4495,
|
|
"valid_targets_mean": 2184.5,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 3.6617826617826617,
|
|
"grad_norm": 0.4437529891094936,
|
|
"learning_rate": 2.1888145156143966e-05,
|
|
"loss": 0.0984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05205199122428894,
|
|
"step": 4500,
|
|
"valid_targets_mean": 2054.5,
|
|
"valid_targets_min": 893
|
|
},
|
|
{
|
|
"epoch": 3.665852665852666,
|
|
"grad_norm": 0.43380985224197094,
|
|
"learning_rate": 2.184774396823178e-05,
|
|
"loss": 0.1051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0552193820476532,
|
|
"step": 4505,
|
|
"valid_targets_mean": 3681.5,
|
|
"valid_targets_min": 2353
|
|
},
|
|
{
|
|
"epoch": 3.66992266992267,
|
|
"grad_norm": 0.3799243340192024,
|
|
"learning_rate": 2.180733517399133e-05,
|
|
"loss": 0.0995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.049977511167526245,
|
|
"step": 4510,
|
|
"valid_targets_mean": 3436.9,
|
|
"valid_targets_min": 1810
|
|
},
|
|
{
|
|
"epoch": 3.6739926739926743,
|
|
"grad_norm": 0.32571580848557247,
|
|
"learning_rate": 2.1766918939767394e-05,
|
|
"loss": 0.1345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.036109715700149536,
|
|
"step": 4515,
|
|
"valid_targets_mean": 3740.8,
|
|
"valid_targets_min": 1955
|
|
},
|
|
{
|
|
"epoch": 3.678062678062678,
|
|
"grad_norm": 0.39033800490114573,
|
|
"learning_rate": 2.1726495431935364e-05,
|
|
"loss": 0.0864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04185417294502258,
|
|
"step": 4520,
|
|
"valid_targets_mean": 3092.0,
|
|
"valid_targets_min": 711
|
|
},
|
|
{
|
|
"epoch": 3.682132682132682,
|
|
"grad_norm": 0.6397111709341369,
|
|
"learning_rate": 2.1686064816900587e-05,
|
|
"loss": 0.1228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07134007662534714,
|
|
"step": 4525,
|
|
"valid_targets_mean": 1687.1,
|
|
"valid_targets_min": 643
|
|
},
|
|
{
|
|
"epoch": 3.6862026862026864,
|
|
"grad_norm": 0.5232501844782532,
|
|
"learning_rate": 2.164562726109766e-05,
|
|
"loss": 0.1125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04346204549074173,
|
|
"step": 4530,
|
|
"valid_targets_mean": 2173.0,
|
|
"valid_targets_min": 538
|
|
},
|
|
{
|
|
"epoch": 3.6902726902726903,
|
|
"grad_norm": 0.39995472780169194,
|
|
"learning_rate": 2.1605182930989764e-05,
|
|
"loss": 0.0916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03484119474887848,
|
|
"step": 4535,
|
|
"valid_targets_mean": 1954.0,
|
|
"valid_targets_min": 664
|
|
},
|
|
{
|
|
"epoch": 3.6943426943426942,
|
|
"grad_norm": 0.5523582831008328,
|
|
"learning_rate": 2.1564731993067958e-05,
|
|
"loss": 0.2142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10305885225534439,
|
|
"step": 4540,
|
|
"valid_targets_mean": 2692.1,
|
|
"valid_targets_min": 1180
|
|
},
|
|
{
|
|
"epoch": 3.6984126984126986,
|
|
"grad_norm": 0.43560416380796335,
|
|
"learning_rate": 2.1524274613850495e-05,
|
|
"loss": 0.1839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.039621271193027496,
|
|
"step": 4545,
|
|
"valid_targets_mean": 2071.0,
|
|
"valid_targets_min": 756
|
|
},
|
|
{
|
|
"epoch": 3.7024827024827025,
|
|
"grad_norm": 0.35770234003575513,
|
|
"learning_rate": 2.1483810959882154e-05,
|
|
"loss": 0.1031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04059296101331711,
|
|
"step": 4550,
|
|
"valid_targets_mean": 3086.0,
|
|
"valid_targets_min": 844
|
|
},
|
|
{
|
|
"epoch": 3.7065527065527064,
|
|
"grad_norm": 0.6970042913323712,
|
|
"learning_rate": 2.144334119773355e-05,
|
|
"loss": 0.1881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12542730569839478,
|
|
"step": 4555,
|
|
"valid_targets_mean": 2081.2,
|
|
"valid_targets_min": 1146
|
|
},
|
|
{
|
|
"epoch": 3.7106227106227108,
|
|
"grad_norm": 0.31025486801351915,
|
|
"learning_rate": 2.1402865494000435e-05,
|
|
"loss": 0.1039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0449528843164444,
|
|
"step": 4560,
|
|
"valid_targets_mean": 4426.4,
|
|
"valid_targets_min": 3507
|
|
},
|
|
{
|
|
"epoch": 3.7146927146927147,
|
|
"grad_norm": 0.3347224349640861,
|
|
"learning_rate": 2.1362384015303002e-05,
|
|
"loss": 0.0879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04645727574825287,
|
|
"step": 4565,
|
|
"valid_targets_mean": 4391.1,
|
|
"valid_targets_min": 3483
|
|
},
|
|
{
|
|
"epoch": 3.7187627187627186,
|
|
"grad_norm": 0.37334581619913065,
|
|
"learning_rate": 2.1321896928285256e-05,
|
|
"loss": 0.1023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.048772167414426804,
|
|
"step": 4570,
|
|
"valid_targets_mean": 3939.6,
|
|
"valid_targets_min": 784
|
|
},
|
|
{
|
|
"epoch": 3.722832722832723,
|
|
"grad_norm": 0.4447642934119185,
|
|
"learning_rate": 2.128140439961426e-05,
|
|
"loss": 0.0878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.033333856612443924,
|
|
"step": 4575,
|
|
"valid_targets_mean": 1469.4,
|
|
"valid_targets_min": 779
|
|
},
|
|
{
|
|
"epoch": 3.726902726902727,
|
|
"grad_norm": 0.5372359551407455,
|
|
"learning_rate": 2.1240906595979488e-05,
|
|
"loss": 0.0905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.039604753255844116,
|
|
"step": 4580,
|
|
"valid_targets_mean": 1845.1,
|
|
"valid_targets_min": 674
|
|
},
|
|
{
|
|
"epoch": 3.7309727309727307,
|
|
"grad_norm": 0.4637069162414348,
|
|
"learning_rate": 2.1200403684092112e-05,
|
|
"loss": 0.0839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04348008334636688,
|
|
"step": 4585,
|
|
"valid_targets_mean": 2154.5,
|
|
"valid_targets_min": 712
|
|
},
|
|
{
|
|
"epoch": 3.735042735042735,
|
|
"grad_norm": 0.49301713165155914,
|
|
"learning_rate": 2.115989583068436e-05,
|
|
"loss": 0.0904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04291559010744095,
|
|
"step": 4590,
|
|
"valid_targets_mean": 3409.4,
|
|
"valid_targets_min": 2422
|
|
},
|
|
{
|
|
"epoch": 3.739112739112739,
|
|
"grad_norm": 0.34908985580550017,
|
|
"learning_rate": 2.1119383202508793e-05,
|
|
"loss": 0.0769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.026736728847026825,
|
|
"step": 4595,
|
|
"valid_targets_mean": 2888.1,
|
|
"valid_targets_min": 576
|
|
},
|
|
{
|
|
"epoch": 3.743182743182743,
|
|
"grad_norm": 0.3051490004834012,
|
|
"learning_rate": 2.107886596633761e-05,
|
|
"loss": 0.1341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.022518891841173172,
|
|
"step": 4600,
|
|
"valid_targets_mean": 3605.9,
|
|
"valid_targets_min": 772
|
|
},
|
|
{
|
|
"epoch": 3.7472527472527473,
|
|
"grad_norm": 0.39215842357161595,
|
|
"learning_rate": 2.1038344288961995e-05,
|
|
"loss": 0.0736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03927973657846451,
|
|
"step": 4605,
|
|
"valid_targets_mean": 2694.1,
|
|
"valid_targets_min": 722
|
|
},
|
|
{
|
|
"epoch": 3.751322751322751,
|
|
"grad_norm": 0.3416371707837828,
|
|
"learning_rate": 2.099781833719142e-05,
|
|
"loss": 0.0758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04170195013284683,
|
|
"step": 4610,
|
|
"valid_targets_mean": 3837.2,
|
|
"valid_targets_min": 2788
|
|
},
|
|
{
|
|
"epoch": 3.7553927553927555,
|
|
"grad_norm": 0.3686746265338282,
|
|
"learning_rate": 2.095728827785294e-05,
|
|
"loss": 0.0796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03984172269701958,
|
|
"step": 4615,
|
|
"valid_targets_mean": 2829.4,
|
|
"valid_targets_min": 614
|
|
},
|
|
{
|
|
"epoch": 3.7594627594627594,
|
|
"grad_norm": 0.6031062567308154,
|
|
"learning_rate": 2.091675427779052e-05,
|
|
"loss": 0.1129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.125014990568161,
|
|
"step": 4620,
|
|
"valid_targets_mean": 1989.6,
|
|
"valid_targets_min": 608
|
|
},
|
|
{
|
|
"epoch": 3.763532763532764,
|
|
"grad_norm": 0.34464040068494,
|
|
"learning_rate": 2.0876216503864348e-05,
|
|
"loss": 0.0963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03857950493693352,
|
|
"step": 4625,
|
|
"valid_targets_mean": 3851.4,
|
|
"valid_targets_min": 3150
|
|
},
|
|
{
|
|
"epoch": 3.7676027676027677,
|
|
"grad_norm": 0.521731284937972,
|
|
"learning_rate": 2.0835675122950155e-05,
|
|
"loss": 0.1206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0498424731194973,
|
|
"step": 4630,
|
|
"valid_targets_mean": 2037.9,
|
|
"valid_targets_min": 718
|
|
},
|
|
{
|
|
"epoch": 3.7716727716727716,
|
|
"grad_norm": 0.6148462798244299,
|
|
"learning_rate": 2.079513030193852e-05,
|
|
"loss": 0.0937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04556414484977722,
|
|
"step": 4635,
|
|
"valid_targets_mean": 3679.2,
|
|
"valid_targets_min": 1333
|
|
},
|
|
{
|
|
"epoch": 3.775742775742776,
|
|
"grad_norm": 0.4489274468020415,
|
|
"learning_rate": 2.0754582207734163e-05,
|
|
"loss": 0.1115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0359991192817688,
|
|
"step": 4640,
|
|
"valid_targets_mean": 1756.9,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 3.77981277981278,
|
|
"grad_norm": 0.4685040453171337,
|
|
"learning_rate": 2.0714031007255308e-05,
|
|
"loss": 0.1217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.039639245718717575,
|
|
"step": 4645,
|
|
"valid_targets_mean": 1352.6,
|
|
"valid_targets_min": 803
|
|
},
|
|
{
|
|
"epoch": 3.7838827838827838,
|
|
"grad_norm": 0.38901622311150574,
|
|
"learning_rate": 2.0673476867432945e-05,
|
|
"loss": 0.0985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07358896732330322,
|
|
"step": 4650,
|
|
"valid_targets_mean": 2522.5,
|
|
"valid_targets_min": 902
|
|
},
|
|
{
|
|
"epoch": 3.787952787952788,
|
|
"grad_norm": 0.45401232368813943,
|
|
"learning_rate": 2.0632919955210177e-05,
|
|
"loss": 0.1103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.044406771659851074,
|
|
"step": 4655,
|
|
"valid_targets_mean": 3511.5,
|
|
"valid_targets_min": 2393
|
|
},
|
|
{
|
|
"epoch": 3.792022792022792,
|
|
"grad_norm": 0.33635740471339176,
|
|
"learning_rate": 2.0592360437541506e-05,
|
|
"loss": 0.0954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03269575536251068,
|
|
"step": 4660,
|
|
"valid_targets_mean": 3454.1,
|
|
"valid_targets_min": 804
|
|
},
|
|
{
|
|
"epoch": 3.796092796092796,
|
|
"grad_norm": 0.3694812668687151,
|
|
"learning_rate": 2.055179848139217e-05,
|
|
"loss": 0.0927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04042567312717438,
|
|
"step": 4665,
|
|
"valid_targets_mean": 5271.5,
|
|
"valid_targets_min": 4147
|
|
},
|
|
{
|
|
"epoch": 3.8001628001628003,
|
|
"grad_norm": 0.3719991625867249,
|
|
"learning_rate": 2.0511234253737445e-05,
|
|
"loss": 0.081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04839537665247917,
|
|
"step": 4670,
|
|
"valid_targets_mean": 4163.8,
|
|
"valid_targets_min": 871
|
|
},
|
|
{
|
|
"epoch": 3.804232804232804,
|
|
"grad_norm": 0.38045962863863597,
|
|
"learning_rate": 2.047066792156195e-05,
|
|
"loss": 0.076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.027333011850714684,
|
|
"step": 4675,
|
|
"valid_targets_mean": 4198.2,
|
|
"valid_targets_min": 896
|
|
},
|
|
{
|
|
"epoch": 3.808302808302808,
|
|
"grad_norm": 0.34894815665055257,
|
|
"learning_rate": 2.0430099651858972e-05,
|
|
"loss": 0.0897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.046752214431762695,
|
|
"step": 4680,
|
|
"valid_targets_mean": 3781.4,
|
|
"valid_targets_min": 2862
|
|
},
|
|
{
|
|
"epoch": 3.8123728123728124,
|
|
"grad_norm": 0.6275665055735403,
|
|
"learning_rate": 2.038952961162978e-05,
|
|
"loss": 0.109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06374616920948029,
|
|
"step": 4685,
|
|
"valid_targets_mean": 1718.8,
|
|
"valid_targets_min": 497
|
|
},
|
|
{
|
|
"epoch": 3.8164428164428164,
|
|
"grad_norm": 0.40826142469048216,
|
|
"learning_rate": 2.034895796788292e-05,
|
|
"loss": 0.0907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05431363359093666,
|
|
"step": 4690,
|
|
"valid_targets_mean": 4016.9,
|
|
"valid_targets_min": 3217
|
|
},
|
|
{
|
|
"epoch": 3.8205128205128203,
|
|
"grad_norm": 0.4250331729338565,
|
|
"learning_rate": 2.030838488763355e-05,
|
|
"loss": 0.1007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03880765289068222,
|
|
"step": 4695,
|
|
"valid_targets_mean": 2624.6,
|
|
"valid_targets_min": 901
|
|
},
|
|
{
|
|
"epoch": 3.8245828245828246,
|
|
"grad_norm": 0.39072073946366787,
|
|
"learning_rate": 2.0267810537902727e-05,
|
|
"loss": 0.0771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04679608345031738,
|
|
"step": 4700,
|
|
"valid_targets_mean": 3859.0,
|
|
"valid_targets_min": 2425
|
|
},
|
|
{
|
|
"epoch": 3.8286528286528285,
|
|
"grad_norm": 0.3928920752567127,
|
|
"learning_rate": 2.0227235085716754e-05,
|
|
"loss": 0.0994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06546978652477264,
|
|
"step": 4705,
|
|
"valid_targets_mean": 3524.6,
|
|
"valid_targets_min": 3017
|
|
},
|
|
{
|
|
"epoch": 3.832722832722833,
|
|
"grad_norm": 0.37248107631418687,
|
|
"learning_rate": 2.0186658698106445e-05,
|
|
"loss": 0.0862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.036713846027851105,
|
|
"step": 4710,
|
|
"valid_targets_mean": 3727.2,
|
|
"valid_targets_min": 3184
|
|
},
|
|
{
|
|
"epoch": 3.836792836792837,
|
|
"grad_norm": 0.6173637983811905,
|
|
"learning_rate": 2.0146081542106502e-05,
|
|
"loss": 0.0846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0795275866985321,
|
|
"step": 4715,
|
|
"valid_targets_mean": 2059.5,
|
|
"valid_targets_min": 478
|
|
},
|
|
{
|
|
"epoch": 3.840862840862841,
|
|
"grad_norm": 0.506439817469206,
|
|
"learning_rate": 2.010550378475475e-05,
|
|
"loss": 0.0888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.050123006105422974,
|
|
"step": 4720,
|
|
"valid_targets_mean": 1881.8,
|
|
"valid_targets_min": 955
|
|
},
|
|
{
|
|
"epoch": 3.844932844932845,
|
|
"grad_norm": 0.5407218171773867,
|
|
"learning_rate": 2.006492559309152e-05,
|
|
"loss": 0.095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0604960098862648,
|
|
"step": 4725,
|
|
"valid_targets_mean": 1654.2,
|
|
"valid_targets_min": 959
|
|
},
|
|
{
|
|
"epoch": 3.849002849002849,
|
|
"grad_norm": 0.3651902043497858,
|
|
"learning_rate": 2.0024347134158915e-05,
|
|
"loss": 0.0999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03338472917675972,
|
|
"step": 4730,
|
|
"valid_targets_mean": 2815.5,
|
|
"valid_targets_min": 748
|
|
},
|
|
{
|
|
"epoch": 3.8530728530728533,
|
|
"grad_norm": 0.4855059874008844,
|
|
"learning_rate": 1.9983768575000147e-05,
|
|
"loss": 0.0909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03731710463762283,
|
|
"step": 4735,
|
|
"valid_targets_mean": 1247.0,
|
|
"valid_targets_min": 495
|
|
},
|
|
{
|
|
"epoch": 3.857142857142857,
|
|
"grad_norm": 0.47569854814679674,
|
|
"learning_rate": 1.9943190082658827e-05,
|
|
"loss": 0.1019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.031180473044514656,
|
|
"step": 4740,
|
|
"valid_targets_mean": 2142.4,
|
|
"valid_targets_min": 655
|
|
},
|
|
{
|
|
"epoch": 3.861212861212861,
|
|
"grad_norm": 0.6078700792608549,
|
|
"learning_rate": 1.9902611824178306e-05,
|
|
"loss": 0.1117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.088534876704216,
|
|
"step": 4745,
|
|
"valid_targets_mean": 2176.2,
|
|
"valid_targets_min": 521
|
|
},
|
|
{
|
|
"epoch": 3.8652828652828655,
|
|
"grad_norm": 0.3966917197512265,
|
|
"learning_rate": 1.986203396660097e-05,
|
|
"loss": 0.0864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04920294135808945,
|
|
"step": 4750,
|
|
"valid_targets_mean": 3740.4,
|
|
"valid_targets_min": 2788
|
|
},
|
|
{
|
|
"epoch": 3.8693528693528694,
|
|
"grad_norm": 0.3078969308089153,
|
|
"learning_rate": 1.9821456676967552e-05,
|
|
"loss": 0.0758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03821399807929993,
|
|
"step": 4755,
|
|
"valid_targets_mean": 3481.5,
|
|
"valid_targets_min": 841
|
|
},
|
|
{
|
|
"epoch": 3.8734228734228733,
|
|
"grad_norm": 0.41592065293330216,
|
|
"learning_rate": 1.9780880122316434e-05,
|
|
"loss": 0.0881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04670462757349014,
|
|
"step": 4760,
|
|
"valid_targets_mean": 2820.0,
|
|
"valid_targets_min": 2144
|
|
},
|
|
{
|
|
"epoch": 3.8774928774928776,
|
|
"grad_norm": 0.42102684627161313,
|
|
"learning_rate": 1.9740304469682987e-05,
|
|
"loss": 0.1147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05189353972673416,
|
|
"step": 4765,
|
|
"valid_targets_mean": 3554.1,
|
|
"valid_targets_min": 2619
|
|
},
|
|
{
|
|
"epoch": 3.8815628815628815,
|
|
"grad_norm": 0.3289199750652924,
|
|
"learning_rate": 1.9699729886098876e-05,
|
|
"loss": 0.0714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03820990025997162,
|
|
"step": 4770,
|
|
"valid_targets_mean": 3334.5,
|
|
"valid_targets_min": 619
|
|
},
|
|
{
|
|
"epoch": 3.8856328856328854,
|
|
"grad_norm": 0.3257554126268986,
|
|
"learning_rate": 1.965915653859135e-05,
|
|
"loss": 0.0887,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.038173578679561615,
|
|
"step": 4775,
|
|
"valid_targets_mean": 4034.9,
|
|
"valid_targets_min": 486
|
|
},
|
|
{
|
|
"epoch": 3.88970288970289,
|
|
"grad_norm": 0.31496384696830865,
|
|
"learning_rate": 1.9618584594182576e-05,
|
|
"loss": 0.0871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.040112294256687164,
|
|
"step": 4780,
|
|
"valid_targets_mean": 3456.8,
|
|
"valid_targets_min": 995
|
|
},
|
|
{
|
|
"epoch": 3.8937728937728937,
|
|
"grad_norm": 0.3078455051176223,
|
|
"learning_rate": 1.957801421988894e-05,
|
|
"loss": 0.0722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.047052159905433655,
|
|
"step": 4785,
|
|
"valid_targets_mean": 4773.8,
|
|
"valid_targets_min": 3539
|
|
},
|
|
{
|
|
"epoch": 3.8978428978428976,
|
|
"grad_norm": 0.3723134741147702,
|
|
"learning_rate": 1.9537445582720385e-05,
|
|
"loss": 0.0946,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05127771943807602,
|
|
"step": 4790,
|
|
"valid_targets_mean": 4328.5,
|
|
"valid_targets_min": 3172
|
|
},
|
|
{
|
|
"epoch": 3.901912901912902,
|
|
"grad_norm": 0.306985867833895,
|
|
"learning_rate": 1.9496878849679667e-05,
|
|
"loss": 0.0781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02703310176730156,
|
|
"step": 4795,
|
|
"valid_targets_mean": 2120.1,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 3.905982905982906,
|
|
"grad_norm": 0.3994634886270254,
|
|
"learning_rate": 1.9456314187761726e-05,
|
|
"loss": 0.1164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.053539011627435684,
|
|
"step": 4800,
|
|
"valid_targets_mean": 2216.2,
|
|
"valid_targets_min": 541
|
|
},
|
|
{
|
|
"epoch": 3.91005291005291,
|
|
"grad_norm": 0.3309405585861779,
|
|
"learning_rate": 1.941575176395298e-05,
|
|
"loss": 0.1041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.029777104035019875,
|
|
"step": 4805,
|
|
"valid_targets_mean": 3338.9,
|
|
"valid_targets_min": 1938
|
|
},
|
|
{
|
|
"epoch": 3.914122914122914,
|
|
"grad_norm": 0.4467641072962703,
|
|
"learning_rate": 1.937519174523063e-05,
|
|
"loss": 0.0891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12722966074943542,
|
|
"step": 4810,
|
|
"valid_targets_mean": 2226.6,
|
|
"valid_targets_min": 1167
|
|
},
|
|
{
|
|
"epoch": 3.918192918192918,
|
|
"grad_norm": 0.390118295374521,
|
|
"learning_rate": 1.9334634298561962e-05,
|
|
"loss": 0.0919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.043785303831100464,
|
|
"step": 4815,
|
|
"valid_targets_mean": 3561.4,
|
|
"valid_targets_min": 2919
|
|
},
|
|
{
|
|
"epoch": 3.9222629222629224,
|
|
"grad_norm": 0.40458656901794954,
|
|
"learning_rate": 1.9294079590903697e-05,
|
|
"loss": 0.086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03705420717597008,
|
|
"step": 4820,
|
|
"valid_targets_mean": 1509.9,
|
|
"valid_targets_min": 559
|
|
},
|
|
{
|
|
"epoch": 3.9263329263329263,
|
|
"grad_norm": 0.525559021088802,
|
|
"learning_rate": 1.925352778920126e-05,
|
|
"loss": 0.09,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06773480027914047,
|
|
"step": 4825,
|
|
"valid_targets_mean": 3123.1,
|
|
"valid_targets_min": 883
|
|
},
|
|
{
|
|
"epoch": 3.9304029304029307,
|
|
"grad_norm": 0.7659028123265849,
|
|
"learning_rate": 1.9212979060388136e-05,
|
|
"loss": 0.1144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07741884142160416,
|
|
"step": 4830,
|
|
"valid_targets_mean": 1419.0,
|
|
"valid_targets_min": 337
|
|
},
|
|
{
|
|
"epoch": 3.9344729344729346,
|
|
"grad_norm": 0.37012249374521133,
|
|
"learning_rate": 1.917243357138514e-05,
|
|
"loss": 0.1177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03321508690714836,
|
|
"step": 4835,
|
|
"valid_targets_mean": 2953.9,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 3.9385429385429385,
|
|
"grad_norm": 0.38243163663291685,
|
|
"learning_rate": 1.9131891489099745e-05,
|
|
"loss": 0.0949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04113879054784775,
|
|
"step": 4840,
|
|
"valid_targets_mean": 3091.9,
|
|
"valid_targets_min": 529
|
|
},
|
|
{
|
|
"epoch": 3.942612942612943,
|
|
"grad_norm": 0.4160846401676222,
|
|
"learning_rate": 1.9091352980425427e-05,
|
|
"loss": 0.0951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05502070114016533,
|
|
"step": 4845,
|
|
"valid_targets_mean": 3593.4,
|
|
"valid_targets_min": 2556
|
|
},
|
|
{
|
|
"epoch": 3.9466829466829467,
|
|
"grad_norm": 0.44382246740561504,
|
|
"learning_rate": 1.9050818212240937e-05,
|
|
"loss": 0.0843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05704183131456375,
|
|
"step": 4850,
|
|
"valid_targets_mean": 3753.2,
|
|
"valid_targets_min": 3385
|
|
},
|
|
{
|
|
"epoch": 3.9507529507529506,
|
|
"grad_norm": 0.3788947152767503,
|
|
"learning_rate": 1.9010287351409617e-05,
|
|
"loss": 0.0822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03963714465498924,
|
|
"step": 4855,
|
|
"valid_targets_mean": 3442.1,
|
|
"valid_targets_min": 932
|
|
},
|
|
{
|
|
"epoch": 3.954822954822955,
|
|
"grad_norm": 0.6893550041691463,
|
|
"learning_rate": 1.896976056477874e-05,
|
|
"loss": 0.0852,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05745682120323181,
|
|
"step": 4860,
|
|
"valid_targets_mean": 1202.9,
|
|
"valid_targets_min": 598
|
|
},
|
|
{
|
|
"epoch": 3.958892958892959,
|
|
"grad_norm": 0.48501252893743335,
|
|
"learning_rate": 1.892923801917881e-05,
|
|
"loss": 0.0978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04939830303192139,
|
|
"step": 4865,
|
|
"valid_targets_mean": 3333.5,
|
|
"valid_targets_min": 1069
|
|
},
|
|
{
|
|
"epoch": 3.962962962962963,
|
|
"grad_norm": 1.5705058397396339,
|
|
"learning_rate": 1.888871988142285e-05,
|
|
"loss": 0.0818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.036230284720659256,
|
|
"step": 4870,
|
|
"valid_targets_mean": 1234.5,
|
|
"valid_targets_min": 680
|
|
},
|
|
{
|
|
"epoch": 3.967032967032967,
|
|
"grad_norm": 0.38273222211564595,
|
|
"learning_rate": 1.8848206318305762e-05,
|
|
"loss": 0.0806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.038350578397512436,
|
|
"step": 4875,
|
|
"valid_targets_mean": 2762.4,
|
|
"valid_targets_min": 774
|
|
},
|
|
{
|
|
"epoch": 3.971102971102971,
|
|
"grad_norm": 0.3525402431797138,
|
|
"learning_rate": 1.8807697496603604e-05,
|
|
"loss": 0.0942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04092249274253845,
|
|
"step": 4880,
|
|
"valid_targets_mean": 3842.5,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 3.975172975172975,
|
|
"grad_norm": 0.23643952371340574,
|
|
"learning_rate": 1.8767193583072917e-05,
|
|
"loss": 0.0754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03318733721971512,
|
|
"step": 4885,
|
|
"valid_targets_mean": 5590.6,
|
|
"valid_targets_min": 4657
|
|
},
|
|
{
|
|
"epoch": 3.9792429792429793,
|
|
"grad_norm": 0.34030143973987964,
|
|
"learning_rate": 1.8726694744450046e-05,
|
|
"loss": 0.081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04707394912838936,
|
|
"step": 4890,
|
|
"valid_targets_mean": 4327.4,
|
|
"valid_targets_min": 1320
|
|
},
|
|
{
|
|
"epoch": 3.9833129833129832,
|
|
"grad_norm": 0.3472447381234327,
|
|
"learning_rate": 1.868620114745043e-05,
|
|
"loss": 0.0863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04898809269070625,
|
|
"step": 4895,
|
|
"valid_targets_mean": 3547.8,
|
|
"valid_targets_min": 2463
|
|
},
|
|
{
|
|
"epoch": 3.987382987382987,
|
|
"grad_norm": 0.416132959419297,
|
|
"learning_rate": 1.8645712958767936e-05,
|
|
"loss": 0.1756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.031941697001457214,
|
|
"step": 4900,
|
|
"valid_targets_mean": 2903.5,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 3.9914529914529915,
|
|
"grad_norm": 0.33752542112182116,
|
|
"learning_rate": 1.8605230345074187e-05,
|
|
"loss": 0.0793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.035623304545879364,
|
|
"step": 4905,
|
|
"valid_targets_mean": 3095.8,
|
|
"valid_targets_min": 1014
|
|
},
|
|
{
|
|
"epoch": 3.9955229955229954,
|
|
"grad_norm": 0.35702567633782445,
|
|
"learning_rate": 1.8564753473017815e-05,
|
|
"loss": 0.0926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04029485583305359,
|
|
"step": 4910,
|
|
"valid_targets_mean": 3526.4,
|
|
"valid_targets_min": 2807
|
|
},
|
|
{
|
|
"epoch": 3.9995929995929997,
|
|
"grad_norm": 0.4652040750909582,
|
|
"learning_rate": 1.8524282509223857e-05,
|
|
"loss": 0.1126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09600470215082169,
|
|
"step": 4915,
|
|
"valid_targets_mean": 3006.5,
|
|
"valid_targets_min": 1214
|
|
},
|
|
{
|
|
"epoch": 4.003256003256003,
|
|
"grad_norm": 0.6419873740137033,
|
|
"learning_rate": 1.8483817620293002e-05,
|
|
"loss": 0.1742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15491032600402832,
|
|
"step": 4920,
|
|
"valid_targets_mean": 8029.9,
|
|
"valid_targets_min": 6142
|
|
},
|
|
{
|
|
"epoch": 4.007326007326007,
|
|
"grad_norm": 0.4261293015569112,
|
|
"learning_rate": 1.8443358972800943e-05,
|
|
"loss": 0.197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09373286366462708,
|
|
"step": 4925,
|
|
"valid_targets_mean": 6664.0,
|
|
"valid_targets_min": 5470
|
|
},
|
|
{
|
|
"epoch": 4.011396011396012,
|
|
"grad_norm": 0.43569183179763055,
|
|
"learning_rate": 1.8402906733297686e-05,
|
|
"loss": 0.1854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09796077013015747,
|
|
"step": 4930,
|
|
"valid_targets_mean": 7126.9,
|
|
"valid_targets_min": 5368
|
|
},
|
|
{
|
|
"epoch": 4.015466015466015,
|
|
"grad_norm": 0.3628222940332491,
|
|
"learning_rate": 1.836246106830684e-05,
|
|
"loss": 0.1787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08767811954021454,
|
|
"step": 4935,
|
|
"valid_targets_mean": 7634.9,
|
|
"valid_targets_min": 5614
|
|
},
|
|
{
|
|
"epoch": 4.0195360195360195,
|
|
"grad_norm": 0.41186264281524637,
|
|
"learning_rate": 1.832202214432497e-05,
|
|
"loss": 0.1851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08911505341529846,
|
|
"step": 4940,
|
|
"valid_targets_mean": 8630.8,
|
|
"valid_targets_min": 6362
|
|
},
|
|
{
|
|
"epoch": 4.023606023606024,
|
|
"grad_norm": 0.39527021624678743,
|
|
"learning_rate": 1.828159012782087e-05,
|
|
"loss": 0.1879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09319797903299332,
|
|
"step": 4945,
|
|
"valid_targets_mean": 7383.6,
|
|
"valid_targets_min": 5844
|
|
},
|
|
{
|
|
"epoch": 4.027676027676027,
|
|
"grad_norm": 0.3825769189458071,
|
|
"learning_rate": 1.824116518523492e-05,
|
|
"loss": 0.1958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1047782227396965,
|
|
"step": 4950,
|
|
"valid_targets_mean": 8017.0,
|
|
"valid_targets_min": 5283
|
|
},
|
|
{
|
|
"epoch": 4.031746031746032,
|
|
"grad_norm": 0.37851649415828814,
|
|
"learning_rate": 1.8200747482978358e-05,
|
|
"loss": 0.1838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07832890748977661,
|
|
"step": 4955,
|
|
"valid_targets_mean": 7051.1,
|
|
"valid_targets_min": 4135
|
|
},
|
|
{
|
|
"epoch": 4.035816035816036,
|
|
"grad_norm": 0.3899190146807035,
|
|
"learning_rate": 1.8160337187432637e-05,
|
|
"loss": 0.1778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07594281435012817,
|
|
"step": 4960,
|
|
"valid_targets_mean": 5370.4,
|
|
"valid_targets_min": 3919
|
|
},
|
|
{
|
|
"epoch": 4.0398860398860394,
|
|
"grad_norm": 0.3710326532028934,
|
|
"learning_rate": 1.8119934464948713e-05,
|
|
"loss": 0.1631,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07691587507724762,
|
|
"step": 4965,
|
|
"valid_targets_mean": 6063.6,
|
|
"valid_targets_min": 4863
|
|
},
|
|
{
|
|
"epoch": 4.043956043956044,
|
|
"grad_norm": 0.425559143325838,
|
|
"learning_rate": 1.8079539481846366e-05,
|
|
"loss": 0.1829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08631320297718048,
|
|
"step": 4970,
|
|
"valid_targets_mean": 7001.2,
|
|
"valid_targets_min": 6020
|
|
},
|
|
{
|
|
"epoch": 4.048026048026048,
|
|
"grad_norm": 0.3800530976695048,
|
|
"learning_rate": 1.8039152404413513e-05,
|
|
"loss": 0.1724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08500252664089203,
|
|
"step": 4975,
|
|
"valid_targets_mean": 6181.9,
|
|
"valid_targets_min": 4669
|
|
},
|
|
{
|
|
"epoch": 4.0520960520960525,
|
|
"grad_norm": 0.37536923845620823,
|
|
"learning_rate": 1.7998773398905536e-05,
|
|
"loss": 0.1768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07884109765291214,
|
|
"step": 4980,
|
|
"valid_targets_mean": 6250.2,
|
|
"valid_targets_min": 4900
|
|
},
|
|
{
|
|
"epoch": 4.056166056166056,
|
|
"grad_norm": 0.369134672032628,
|
|
"learning_rate": 1.795840263154457e-05,
|
|
"loss": 0.1731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0790032371878624,
|
|
"step": 4985,
|
|
"valid_targets_mean": 7156.4,
|
|
"valid_targets_min": 5323
|
|
},
|
|
{
|
|
"epoch": 4.06023606023606,
|
|
"grad_norm": 0.3823541536796184,
|
|
"learning_rate": 1.7918040268518863e-05,
|
|
"loss": 0.1729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08844245970249176,
|
|
"step": 4990,
|
|
"valid_targets_mean": 7215.8,
|
|
"valid_targets_min": 4720
|
|
},
|
|
{
|
|
"epoch": 4.064306064306065,
|
|
"grad_norm": 0.3864166564460856,
|
|
"learning_rate": 1.7877686475982045e-05,
|
|
"loss": 0.1791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08641092479228973,
|
|
"step": 4995,
|
|
"valid_targets_mean": 6550.8,
|
|
"valid_targets_min": 5739
|
|
},
|
|
{
|
|
"epoch": 4.068376068376068,
|
|
"grad_norm": 0.3793296622743899,
|
|
"learning_rate": 1.783734142005248e-05,
|
|
"loss": 0.1731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0891452431678772,
|
|
"step": 5000,
|
|
"valid_targets_mean": 6726.8,
|
|
"valid_targets_min": 4540
|
|
},
|
|
{
|
|
"epoch": 4.0724460724460725,
|
|
"grad_norm": 0.4723995482127612,
|
|
"learning_rate": 1.7797005266812556e-05,
|
|
"loss": 0.1789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0973764955997467,
|
|
"step": 5005,
|
|
"valid_targets_mean": 4573.4,
|
|
"valid_targets_min": 1039
|
|
},
|
|
{
|
|
"epoch": 4.076516076516077,
|
|
"grad_norm": 0.41477705618237604,
|
|
"learning_rate": 1.7756678182308018e-05,
|
|
"loss": 0.1895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08957439661026001,
|
|
"step": 5010,
|
|
"valid_targets_mean": 6430.0,
|
|
"valid_targets_min": 5412
|
|
},
|
|
{
|
|
"epoch": 4.08058608058608,
|
|
"grad_norm": 0.6352736728460759,
|
|
"learning_rate": 1.7716360332547286e-05,
|
|
"loss": 0.1702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.025655508041381836,
|
|
"step": 5015,
|
|
"valid_targets_mean": 583.1,
|
|
"valid_targets_min": 158
|
|
},
|
|
{
|
|
"epoch": 4.084656084656085,
|
|
"grad_norm": 0.3748310581528748,
|
|
"learning_rate": 1.7676051883500746e-05,
|
|
"loss": 0.1626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09119533747434616,
|
|
"step": 5020,
|
|
"valid_targets_mean": 8230.9,
|
|
"valid_targets_min": 5061
|
|
},
|
|
{
|
|
"epoch": 4.088726088726089,
|
|
"grad_norm": 0.4146647677224428,
|
|
"learning_rate": 1.76357530011001e-05,
|
|
"loss": 0.1744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09562458842992783,
|
|
"step": 5025,
|
|
"valid_targets_mean": 7238.9,
|
|
"valid_targets_min": 5112
|
|
},
|
|
{
|
|
"epoch": 4.0927960927960925,
|
|
"grad_norm": 0.3895491315857082,
|
|
"learning_rate": 1.7595463851237666e-05,
|
|
"loss": 0.1733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07687856256961823,
|
|
"step": 5030,
|
|
"valid_targets_mean": 6322.8,
|
|
"valid_targets_min": 4867
|
|
},
|
|
{
|
|
"epoch": 4.096866096866097,
|
|
"grad_norm": 0.3535398598607784,
|
|
"learning_rate": 1.7555184599765697e-05,
|
|
"loss": 0.1787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08618101477622986,
|
|
"step": 5035,
|
|
"valid_targets_mean": 7887.0,
|
|
"valid_targets_min": 6004
|
|
},
|
|
{
|
|
"epoch": 4.100936100936101,
|
|
"grad_norm": 0.3398185270105302,
|
|
"learning_rate": 1.7514915412495696e-05,
|
|
"loss": 0.1703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07757728546857834,
|
|
"step": 5040,
|
|
"valid_targets_mean": 7090.2,
|
|
"valid_targets_min": 4917
|
|
},
|
|
{
|
|
"epoch": 4.105006105006105,
|
|
"grad_norm": 0.3735006074847723,
|
|
"learning_rate": 1.7474656455197746e-05,
|
|
"loss": 0.1651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08108969032764435,
|
|
"step": 5045,
|
|
"valid_targets_mean": 7441.9,
|
|
"valid_targets_min": 4202
|
|
},
|
|
{
|
|
"epoch": 4.109076109076109,
|
|
"grad_norm": 0.3963767640674053,
|
|
"learning_rate": 1.7434407893599803e-05,
|
|
"loss": 0.1518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08019435405731201,
|
|
"step": 5050,
|
|
"valid_targets_mean": 7901.8,
|
|
"valid_targets_min": 5060
|
|
},
|
|
{
|
|
"epoch": 4.113146113146113,
|
|
"grad_norm": 0.3279720037805319,
|
|
"learning_rate": 1.7394169893387044e-05,
|
|
"loss": 0.1535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07386630773544312,
|
|
"step": 5055,
|
|
"valid_targets_mean": 8557.6,
|
|
"valid_targets_min": 6331
|
|
},
|
|
{
|
|
"epoch": 4.117216117216117,
|
|
"grad_norm": 0.43210024185743623,
|
|
"learning_rate": 1.735394262020115e-05,
|
|
"loss": 0.1791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08315213024616241,
|
|
"step": 5060,
|
|
"valid_targets_mean": 4976.4,
|
|
"valid_targets_min": 2847
|
|
},
|
|
{
|
|
"epoch": 4.121286121286121,
|
|
"grad_norm": 0.47219602019627277,
|
|
"learning_rate": 1.7313726239639662e-05,
|
|
"loss": 0.1603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09470256417989731,
|
|
"step": 5065,
|
|
"valid_targets_mean": 7129.1,
|
|
"valid_targets_min": 5331
|
|
},
|
|
{
|
|
"epoch": 4.1253561253561255,
|
|
"grad_norm": 0.43079423430747293,
|
|
"learning_rate": 1.7273520917255277e-05,
|
|
"loss": 0.1826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09296290576457977,
|
|
"step": 5070,
|
|
"valid_targets_mean": 6864.0,
|
|
"valid_targets_min": 4697
|
|
},
|
|
{
|
|
"epoch": 4.12942612942613,
|
|
"grad_norm": 0.43490675675482465,
|
|
"learning_rate": 1.7233326818555162e-05,
|
|
"loss": 0.189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0889597162604332,
|
|
"step": 5075,
|
|
"valid_targets_mean": 6631.9,
|
|
"valid_targets_min": 4623
|
|
},
|
|
{
|
|
"epoch": 4.133496133496133,
|
|
"grad_norm": 0.3756096966590715,
|
|
"learning_rate": 1.7193144109000286e-05,
|
|
"loss": 0.1814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08131799101829529,
|
|
"step": 5080,
|
|
"valid_targets_mean": 6330.4,
|
|
"valid_targets_min": 5424
|
|
},
|
|
{
|
|
"epoch": 4.137566137566138,
|
|
"grad_norm": 0.3746393665646649,
|
|
"learning_rate": 1.7152972954004745e-05,
|
|
"loss": 0.1741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08426746726036072,
|
|
"step": 5085,
|
|
"valid_targets_mean": 6814.8,
|
|
"valid_targets_min": 4964
|
|
},
|
|
{
|
|
"epoch": 4.141636141636142,
|
|
"grad_norm": 0.4195480953550515,
|
|
"learning_rate": 1.711281351893505e-05,
|
|
"loss": 0.1834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0972888171672821,
|
|
"step": 5090,
|
|
"valid_targets_mean": 6915.8,
|
|
"valid_targets_min": 5321
|
|
},
|
|
{
|
|
"epoch": 4.1457061457061455,
|
|
"grad_norm": 0.4316053213116156,
|
|
"learning_rate": 1.7072665969109485e-05,
|
|
"loss": 0.1739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08947832882404327,
|
|
"step": 5095,
|
|
"valid_targets_mean": 6464.5,
|
|
"valid_targets_min": 5185
|
|
},
|
|
{
|
|
"epoch": 4.14977614977615,
|
|
"grad_norm": 0.39114306149720146,
|
|
"learning_rate": 1.703253046979739e-05,
|
|
"loss": 0.1774,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09024079144001007,
|
|
"step": 5100,
|
|
"valid_targets_mean": 7048.6,
|
|
"valid_targets_min": 4913
|
|
},
|
|
{
|
|
"epoch": 4.153846153846154,
|
|
"grad_norm": 0.4185179236015934,
|
|
"learning_rate": 1.6992407186218512e-05,
|
|
"loss": 0.181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09964226186275482,
|
|
"step": 5105,
|
|
"valid_targets_mean": 7469.2,
|
|
"valid_targets_min": 5426
|
|
},
|
|
{
|
|
"epoch": 4.157916157916158,
|
|
"grad_norm": 0.40064531771229606,
|
|
"learning_rate": 1.6952296283542303e-05,
|
|
"loss": 0.1795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09262304753065109,
|
|
"step": 5110,
|
|
"valid_targets_mean": 6819.4,
|
|
"valid_targets_min": 5286
|
|
},
|
|
{
|
|
"epoch": 4.161986161986162,
|
|
"grad_norm": 0.38311400379103405,
|
|
"learning_rate": 1.691219792688726e-05,
|
|
"loss": 0.1779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09319957345724106,
|
|
"step": 5115,
|
|
"valid_targets_mean": 7107.5,
|
|
"valid_targets_min": 5201
|
|
},
|
|
{
|
|
"epoch": 4.166056166056166,
|
|
"grad_norm": 0.3933047787302955,
|
|
"learning_rate": 1.6872112281320218e-05,
|
|
"loss": 0.1745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08465088903903961,
|
|
"step": 5120,
|
|
"valid_targets_mean": 5642.6,
|
|
"valid_targets_min": 4838
|
|
},
|
|
{
|
|
"epoch": 4.17012617012617,
|
|
"grad_norm": 0.8525985607334213,
|
|
"learning_rate": 1.6832039511855702e-05,
|
|
"loss": 0.1417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13957032561302185,
|
|
"step": 5125,
|
|
"valid_targets_mean": 2054.1,
|
|
"valid_targets_min": 134
|
|
},
|
|
{
|
|
"epoch": 4.174196174196174,
|
|
"grad_norm": 0.4350065324995063,
|
|
"learning_rate": 1.6791979783455227e-05,
|
|
"loss": 0.1787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08149752765893936,
|
|
"step": 5130,
|
|
"valid_targets_mean": 6686.4,
|
|
"valid_targets_min": 5256
|
|
},
|
|
{
|
|
"epoch": 4.1782661782661785,
|
|
"grad_norm": 0.4132065668153816,
|
|
"learning_rate": 1.6751933261026604e-05,
|
|
"loss": 0.1715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09279554337263107,
|
|
"step": 5135,
|
|
"valid_targets_mean": 6216.6,
|
|
"valid_targets_min": 5542
|
|
},
|
|
{
|
|
"epoch": 4.182336182336182,
|
|
"grad_norm": 0.4704024966834831,
|
|
"learning_rate": 1.671190010942331e-05,
|
|
"loss": 0.1749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08651970326900482,
|
|
"step": 5140,
|
|
"valid_targets_mean": 6486.1,
|
|
"valid_targets_min": 5531
|
|
},
|
|
{
|
|
"epoch": 4.186406186406186,
|
|
"grad_norm": 0.41768609875122026,
|
|
"learning_rate": 1.667188049344377e-05,
|
|
"loss": 0.1703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08536796271800995,
|
|
"step": 5145,
|
|
"valid_targets_mean": 5797.4,
|
|
"valid_targets_min": 3911
|
|
},
|
|
{
|
|
"epoch": 4.190476190476191,
|
|
"grad_norm": 0.44999542060640974,
|
|
"learning_rate": 1.663187457783068e-05,
|
|
"loss": 0.1711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08768117427825928,
|
|
"step": 5150,
|
|
"valid_targets_mean": 6357.5,
|
|
"valid_targets_min": 5195
|
|
},
|
|
{
|
|
"epoch": 4.194546194546194,
|
|
"grad_norm": 0.4473459694163714,
|
|
"learning_rate": 1.659188252727035e-05,
|
|
"loss": 0.1791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08737886697053909,
|
|
"step": 5155,
|
|
"valid_targets_mean": 5808.9,
|
|
"valid_targets_min": 4959
|
|
},
|
|
{
|
|
"epoch": 4.1986161986161985,
|
|
"grad_norm": 0.5313940009593219,
|
|
"learning_rate": 1.6551904506392008e-05,
|
|
"loss": 0.1746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05604414641857147,
|
|
"step": 5160,
|
|
"valid_targets_mean": 2591.5,
|
|
"valid_targets_min": 842
|
|
},
|
|
{
|
|
"epoch": 4.202686202686203,
|
|
"grad_norm": 0.4458022538668301,
|
|
"learning_rate": 1.651194067976713e-05,
|
|
"loss": 0.1437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08318484574556351,
|
|
"step": 5165,
|
|
"valid_targets_mean": 6006.9,
|
|
"valid_targets_min": 4769
|
|
},
|
|
{
|
|
"epoch": 4.206756206756207,
|
|
"grad_norm": 0.4421797804474541,
|
|
"learning_rate": 1.6471991211908768e-05,
|
|
"loss": 0.1699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08438228070735931,
|
|
"step": 5170,
|
|
"valid_targets_mean": 6764.2,
|
|
"valid_targets_min": 5046
|
|
},
|
|
{
|
|
"epoch": 4.210826210826211,
|
|
"grad_norm": 0.44259170394302194,
|
|
"learning_rate": 1.6432056267270836e-05,
|
|
"loss": 0.1755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09295166283845901,
|
|
"step": 5175,
|
|
"valid_targets_mean": 6055.8,
|
|
"valid_targets_min": 5182
|
|
},
|
|
{
|
|
"epoch": 4.214896214896215,
|
|
"grad_norm": 0.4174437145697535,
|
|
"learning_rate": 1.6392136010247496e-05,
|
|
"loss": 0.1699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08445718884468079,
|
|
"step": 5180,
|
|
"valid_targets_mean": 7101.6,
|
|
"valid_targets_min": 5366
|
|
},
|
|
{
|
|
"epoch": 4.218966218966219,
|
|
"grad_norm": 0.39047654580039387,
|
|
"learning_rate": 1.6352230605172438e-05,
|
|
"loss": 0.1674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08764854073524475,
|
|
"step": 5185,
|
|
"valid_targets_mean": 7111.9,
|
|
"valid_targets_min": 5192
|
|
},
|
|
{
|
|
"epoch": 4.223036223036223,
|
|
"grad_norm": 0.44083057032394674,
|
|
"learning_rate": 1.63123402163182e-05,
|
|
"loss": 0.1726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08842865377664566,
|
|
"step": 5190,
|
|
"valid_targets_mean": 5509.9,
|
|
"valid_targets_min": 4790
|
|
},
|
|
{
|
|
"epoch": 4.227106227106227,
|
|
"grad_norm": 0.9039249010855949,
|
|
"learning_rate": 1.6272465007895528e-05,
|
|
"loss": 0.1706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09698623418807983,
|
|
"step": 5195,
|
|
"valid_targets_mean": 1581.9,
|
|
"valid_targets_min": 864
|
|
},
|
|
{
|
|
"epoch": 4.2311762311762315,
|
|
"grad_norm": 0.6984318659356797,
|
|
"learning_rate": 1.623260514405266e-05,
|
|
"loss": 0.1718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07721932977437973,
|
|
"step": 5200,
|
|
"valid_targets_mean": 1333.2,
|
|
"valid_targets_min": 632
|
|
},
|
|
{
|
|
"epoch": 4.235246235246235,
|
|
"grad_norm": 0.7988165421330412,
|
|
"learning_rate": 1.619276078887467e-05,
|
|
"loss": 0.1566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0798022449016571,
|
|
"step": 5205,
|
|
"valid_targets_mean": 1602.9,
|
|
"valid_targets_min": 875
|
|
},
|
|
{
|
|
"epoch": 4.239316239316239,
|
|
"grad_norm": 0.8839676972961594,
|
|
"learning_rate": 1.6152932106382795e-05,
|
|
"loss": 0.1535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07965122908353806,
|
|
"step": 5210,
|
|
"valid_targets_mean": 1340.4,
|
|
"valid_targets_min": 701
|
|
},
|
|
{
|
|
"epoch": 4.243386243386244,
|
|
"grad_norm": 0.7568566322974944,
|
|
"learning_rate": 1.6113119260533743e-05,
|
|
"loss": 0.1445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08617518842220306,
|
|
"step": 5215,
|
|
"valid_targets_mean": 1792.9,
|
|
"valid_targets_min": 898
|
|
},
|
|
{
|
|
"epoch": 4.247456247456247,
|
|
"grad_norm": 0.7853461839643316,
|
|
"learning_rate": 1.6073322415219045e-05,
|
|
"loss": 0.15,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07748760282993317,
|
|
"step": 5220,
|
|
"valid_targets_mean": 1433.1,
|
|
"valid_targets_min": 1070
|
|
},
|
|
{
|
|
"epoch": 4.2515262515262515,
|
|
"grad_norm": 0.775021891537993,
|
|
"learning_rate": 1.6033541734264358e-05,
|
|
"loss": 0.1558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07678981870412827,
|
|
"step": 5225,
|
|
"valid_targets_mean": 1589.5,
|
|
"valid_targets_min": 970
|
|
},
|
|
{
|
|
"epoch": 4.255596255596256,
|
|
"grad_norm": 0.7698989145856481,
|
|
"learning_rate": 1.5993777381428792e-05,
|
|
"loss": 0.1352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06523817032575607,
|
|
"step": 5230,
|
|
"valid_targets_mean": 1270.5,
|
|
"valid_targets_min": 648
|
|
},
|
|
{
|
|
"epoch": 4.259666259666259,
|
|
"grad_norm": 0.7897405384596183,
|
|
"learning_rate": 1.5954029520404252e-05,
|
|
"loss": 0.1584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08299176394939423,
|
|
"step": 5235,
|
|
"valid_targets_mean": 1674.5,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 4.263736263736264,
|
|
"grad_norm": 0.7201703295282144,
|
|
"learning_rate": 1.5914298314814752e-05,
|
|
"loss": 0.1395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05681309849023819,
|
|
"step": 5240,
|
|
"valid_targets_mean": 1406.0,
|
|
"valid_targets_min": 770
|
|
},
|
|
{
|
|
"epoch": 4.267806267806268,
|
|
"grad_norm": 0.8007703569426596,
|
|
"learning_rate": 1.587458392821574e-05,
|
|
"loss": 0.1468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0649387463927269,
|
|
"step": 5245,
|
|
"valid_targets_mean": 1381.2,
|
|
"valid_targets_min": 572
|
|
},
|
|
{
|
|
"epoch": 4.2718762718762715,
|
|
"grad_norm": 0.770739474815444,
|
|
"learning_rate": 1.5834886524093415e-05,
|
|
"loss": 0.1488,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08127094060182571,
|
|
"step": 5250,
|
|
"valid_targets_mean": 1580.4,
|
|
"valid_targets_min": 633
|
|
},
|
|
{
|
|
"epoch": 4.275946275946276,
|
|
"grad_norm": 0.8368026866328528,
|
|
"learning_rate": 1.5795206265864086e-05,
|
|
"loss": 0.1471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0783461183309555,
|
|
"step": 5255,
|
|
"valid_targets_mean": 1478.8,
|
|
"valid_targets_min": 923
|
|
},
|
|
{
|
|
"epoch": 4.28001628001628,
|
|
"grad_norm": 0.7152654874841173,
|
|
"learning_rate": 1.575554331687348e-05,
|
|
"loss": 0.1369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06594514846801758,
|
|
"step": 5260,
|
|
"valid_targets_mean": 1401.0,
|
|
"valid_targets_min": 740
|
|
},
|
|
{
|
|
"epoch": 4.284086284086284,
|
|
"grad_norm": 0.7469453449896892,
|
|
"learning_rate": 1.5715897840396056e-05,
|
|
"loss": 0.1379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06428131461143494,
|
|
"step": 5265,
|
|
"valid_targets_mean": 1588.9,
|
|
"valid_targets_min": 870
|
|
},
|
|
{
|
|
"epoch": 4.288156288156288,
|
|
"grad_norm": 0.7630766511387223,
|
|
"learning_rate": 1.5676269999634355e-05,
|
|
"loss": 0.1382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10081503540277481,
|
|
"step": 5270,
|
|
"valid_targets_mean": 2008.4,
|
|
"valid_targets_min": 1042
|
|
},
|
|
{
|
|
"epoch": 4.292226292226292,
|
|
"grad_norm": 0.7719765796935957,
|
|
"learning_rate": 1.5636659957718317e-05,
|
|
"loss": 0.1471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.058920711278915405,
|
|
"step": 5275,
|
|
"valid_targets_mean": 1209.0,
|
|
"valid_targets_min": 870
|
|
},
|
|
{
|
|
"epoch": 4.296296296296296,
|
|
"grad_norm": 0.7392001894065844,
|
|
"learning_rate": 1.5597067877704627e-05,
|
|
"loss": 0.1467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07120724767446518,
|
|
"step": 5280,
|
|
"valid_targets_mean": 1535.5,
|
|
"valid_targets_min": 740
|
|
},
|
|
{
|
|
"epoch": 4.3003663003663,
|
|
"grad_norm": 0.6940002890801722,
|
|
"learning_rate": 1.5557493922576e-05,
|
|
"loss": 0.1484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07191811501979828,
|
|
"step": 5285,
|
|
"valid_targets_mean": 1642.9,
|
|
"valid_targets_min": 906
|
|
},
|
|
{
|
|
"epoch": 4.3044363044363045,
|
|
"grad_norm": 0.7996020542171901,
|
|
"learning_rate": 1.5517938255240558e-05,
|
|
"loss": 0.1415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07438832521438599,
|
|
"step": 5290,
|
|
"valid_targets_mean": 1570.5,
|
|
"valid_targets_min": 814
|
|
},
|
|
{
|
|
"epoch": 4.308506308506309,
|
|
"grad_norm": 0.7539368794630141,
|
|
"learning_rate": 1.5478401038531132e-05,
|
|
"loss": 0.1442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06699814647436142,
|
|
"step": 5295,
|
|
"valid_targets_mean": 1722.1,
|
|
"valid_targets_min": 832
|
|
},
|
|
{
|
|
"epoch": 4.312576312576312,
|
|
"grad_norm": 0.6917100664184215,
|
|
"learning_rate": 1.543888243520462e-05,
|
|
"loss": 0.1418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05670752376317978,
|
|
"step": 5300,
|
|
"valid_targets_mean": 1453.5,
|
|
"valid_targets_min": 891
|
|
},
|
|
{
|
|
"epoch": 4.316646316646317,
|
|
"grad_norm": 0.9493477730431549,
|
|
"learning_rate": 1.5399382607941267e-05,
|
|
"loss": 0.1422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08421558141708374,
|
|
"step": 5305,
|
|
"valid_targets_mean": 1584.4,
|
|
"valid_targets_min": 734
|
|
},
|
|
{
|
|
"epoch": 4.320716320716321,
|
|
"grad_norm": 0.7583084640701927,
|
|
"learning_rate": 1.535990171934405e-05,
|
|
"loss": 0.1323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06385685503482819,
|
|
"step": 5310,
|
|
"valid_targets_mean": 1495.2,
|
|
"valid_targets_min": 737
|
|
},
|
|
{
|
|
"epoch": 4.3247863247863245,
|
|
"grad_norm": 0.8067972324986055,
|
|
"learning_rate": 1.5320439931937968e-05,
|
|
"loss": 0.138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07771594822406769,
|
|
"step": 5315,
|
|
"valid_targets_mean": 1737.0,
|
|
"valid_targets_min": 872
|
|
},
|
|
{
|
|
"epoch": 4.328856328856329,
|
|
"grad_norm": 0.791867781812196,
|
|
"learning_rate": 1.5280997408169412e-05,
|
|
"loss": 0.1346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07138702273368835,
|
|
"step": 5320,
|
|
"valid_targets_mean": 1431.6,
|
|
"valid_targets_min": 805
|
|
},
|
|
{
|
|
"epoch": 4.332926332926333,
|
|
"grad_norm": 0.7550711735218681,
|
|
"learning_rate": 1.5241574310405437e-05,
|
|
"loss": 0.1433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08015383780002594,
|
|
"step": 5325,
|
|
"valid_targets_mean": 2060.2,
|
|
"valid_targets_min": 1253
|
|
},
|
|
{
|
|
"epoch": 4.336996336996337,
|
|
"grad_norm": 0.8677822625949797,
|
|
"learning_rate": 1.5202170800933157e-05,
|
|
"loss": 0.144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0818401500582695,
|
|
"step": 5330,
|
|
"valid_targets_mean": 2063.9,
|
|
"valid_targets_min": 1264
|
|
},
|
|
{
|
|
"epoch": 4.341066341066341,
|
|
"grad_norm": 0.7474043536365914,
|
|
"learning_rate": 1.516278704195904e-05,
|
|
"loss": 0.1371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05221828818321228,
|
|
"step": 5335,
|
|
"valid_targets_mean": 1036.5,
|
|
"valid_targets_min": 613
|
|
},
|
|
{
|
|
"epoch": 4.345136345136345,
|
|
"grad_norm": 0.7889949523343204,
|
|
"learning_rate": 1.512342319560826e-05,
|
|
"loss": 0.1371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06708300858736038,
|
|
"step": 5340,
|
|
"valid_targets_mean": 1674.4,
|
|
"valid_targets_min": 768
|
|
},
|
|
{
|
|
"epoch": 4.349206349206349,
|
|
"grad_norm": 0.7873060459244604,
|
|
"learning_rate": 1.5084079423924008e-05,
|
|
"loss": 0.1365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0583968311548233,
|
|
"step": 5345,
|
|
"valid_targets_mean": 1249.8,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 4.353276353276353,
|
|
"grad_norm": 0.7925060855022024,
|
|
"learning_rate": 1.5044755888866838e-05,
|
|
"loss": 0.144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05081354081630707,
|
|
"step": 5350,
|
|
"valid_targets_mean": 1114.8,
|
|
"valid_targets_min": 800
|
|
},
|
|
{
|
|
"epoch": 4.357346357346358,
|
|
"grad_norm": 0.780052779669191,
|
|
"learning_rate": 1.5005452752314016e-05,
|
|
"loss": 0.1348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0766829401254654,
|
|
"step": 5355,
|
|
"valid_targets_mean": 1780.9,
|
|
"valid_targets_min": 933
|
|
},
|
|
{
|
|
"epoch": 4.361416361416361,
|
|
"grad_norm": 0.8731555755183956,
|
|
"learning_rate": 1.4966170176058804e-05,
|
|
"loss": 0.1355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06203790381550789,
|
|
"step": 5360,
|
|
"valid_targets_mean": 1113.6,
|
|
"valid_targets_min": 746
|
|
},
|
|
{
|
|
"epoch": 4.365486365486365,
|
|
"grad_norm": 0.8105488638885902,
|
|
"learning_rate": 1.4926908321809856e-05,
|
|
"loss": 0.1238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06966659426689148,
|
|
"step": 5365,
|
|
"valid_targets_mean": 1265.0,
|
|
"valid_targets_min": 814
|
|
},
|
|
{
|
|
"epoch": 4.36955636955637,
|
|
"grad_norm": 0.7570799683681111,
|
|
"learning_rate": 1.4887667351190508e-05,
|
|
"loss": 0.1387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07910779118537903,
|
|
"step": 5370,
|
|
"valid_targets_mean": 1652.1,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 4.373626373626374,
|
|
"grad_norm": 0.8373628622647403,
|
|
"learning_rate": 1.4848447425738135e-05,
|
|
"loss": 0.1351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05490382760763168,
|
|
"step": 5375,
|
|
"valid_targets_mean": 1231.2,
|
|
"valid_targets_min": 739
|
|
},
|
|
{
|
|
"epoch": 4.3776963776963775,
|
|
"grad_norm": 0.7459004049358479,
|
|
"learning_rate": 1.4809248706903476e-05,
|
|
"loss": 0.134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0814879834651947,
|
|
"step": 5380,
|
|
"valid_targets_mean": 1682.4,
|
|
"valid_targets_min": 831
|
|
},
|
|
{
|
|
"epoch": 4.381766381766382,
|
|
"grad_norm": 0.7901027972617194,
|
|
"learning_rate": 1.4770071356049966e-05,
|
|
"loss": 0.1363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06546150892972946,
|
|
"step": 5385,
|
|
"valid_targets_mean": 1571.6,
|
|
"valid_targets_min": 1015
|
|
},
|
|
{
|
|
"epoch": 4.385836385836386,
|
|
"grad_norm": 0.7459849965828421,
|
|
"learning_rate": 1.4730915534453084e-05,
|
|
"loss": 0.1364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.059574246406555176,
|
|
"step": 5390,
|
|
"valid_targets_mean": 1276.8,
|
|
"valid_targets_min": 711
|
|
},
|
|
{
|
|
"epoch": 4.38990638990639,
|
|
"grad_norm": 0.7250412854507754,
|
|
"learning_rate": 1.4691781403299695e-05,
|
|
"loss": 0.1348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04473428428173065,
|
|
"step": 5395,
|
|
"valid_targets_mean": 1147.5,
|
|
"valid_targets_min": 733
|
|
},
|
|
{
|
|
"epoch": 4.393976393976394,
|
|
"grad_norm": 0.9341622348888069,
|
|
"learning_rate": 1.4652669123687335e-05,
|
|
"loss": 0.1388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08530112355947495,
|
|
"step": 5400,
|
|
"valid_targets_mean": 1604.0,
|
|
"valid_targets_min": 953
|
|
},
|
|
{
|
|
"epoch": 4.398046398046398,
|
|
"grad_norm": 0.7506012218410362,
|
|
"learning_rate": 1.4613578856623634e-05,
|
|
"loss": 0.145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0721062421798706,
|
|
"step": 5405,
|
|
"valid_targets_mean": 1784.0,
|
|
"valid_targets_min": 1011
|
|
},
|
|
{
|
|
"epoch": 4.402116402116402,
|
|
"grad_norm": 0.9539670072016909,
|
|
"learning_rate": 1.4574510763025571e-05,
|
|
"loss": 0.1517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09979036450386047,
|
|
"step": 5410,
|
|
"valid_targets_mean": 1729.0,
|
|
"valid_targets_min": 1143
|
|
},
|
|
{
|
|
"epoch": 4.406186406186406,
|
|
"grad_norm": 0.8152959339758997,
|
|
"learning_rate": 1.4535465003718872e-05,
|
|
"loss": 0.1364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06546957790851593,
|
|
"step": 5415,
|
|
"valid_targets_mean": 1483.9,
|
|
"valid_targets_min": 699
|
|
},
|
|
{
|
|
"epoch": 4.410256410256411,
|
|
"grad_norm": 1.0303278083543088,
|
|
"learning_rate": 1.4496441739437308e-05,
|
|
"loss": 0.1309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.059898581355810165,
|
|
"step": 5420,
|
|
"valid_targets_mean": 1198.4,
|
|
"valid_targets_min": 680
|
|
},
|
|
{
|
|
"epoch": 4.414326414326414,
|
|
"grad_norm": 0.8198228481186385,
|
|
"learning_rate": 1.445744113082205e-05,
|
|
"loss": 0.1438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06732296943664551,
|
|
"step": 5425,
|
|
"valid_targets_mean": 1365.2,
|
|
"valid_targets_min": 687
|
|
},
|
|
{
|
|
"epoch": 4.418396418396418,
|
|
"grad_norm": 0.9913925030956331,
|
|
"learning_rate": 1.4418463338421014e-05,
|
|
"loss": 0.1331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05075017362833023,
|
|
"step": 5430,
|
|
"valid_targets_mean": 1142.9,
|
|
"valid_targets_min": 835
|
|
},
|
|
{
|
|
"epoch": 4.422466422466423,
|
|
"grad_norm": 0.8461959968216606,
|
|
"learning_rate": 1.4379508522688172e-05,
|
|
"loss": 0.1288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06075669825077057,
|
|
"step": 5435,
|
|
"valid_targets_mean": 1316.9,
|
|
"valid_targets_min": 829
|
|
},
|
|
{
|
|
"epoch": 4.426536426536426,
|
|
"grad_norm": 0.7557942053954224,
|
|
"learning_rate": 1.4340576843982941e-05,
|
|
"loss": 0.1279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07234987616539001,
|
|
"step": 5440,
|
|
"valid_targets_mean": 1739.2,
|
|
"valid_targets_min": 1138
|
|
},
|
|
{
|
|
"epoch": 4.430606430606431,
|
|
"grad_norm": 0.7526966602303533,
|
|
"learning_rate": 1.4301668462569463e-05,
|
|
"loss": 0.1382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06122884154319763,
|
|
"step": 5445,
|
|
"valid_targets_mean": 1592.2,
|
|
"valid_targets_min": 1174
|
|
},
|
|
{
|
|
"epoch": 4.434676434676435,
|
|
"grad_norm": 0.7785215289541277,
|
|
"learning_rate": 1.4262783538615997e-05,
|
|
"loss": 0.1384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06840664893388748,
|
|
"step": 5450,
|
|
"valid_targets_mean": 1543.1,
|
|
"valid_targets_min": 554
|
|
},
|
|
{
|
|
"epoch": 4.438746438746438,
|
|
"grad_norm": 0.8035141393477653,
|
|
"learning_rate": 1.4223922232194231e-05,
|
|
"loss": 0.1342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06752649694681168,
|
|
"step": 5455,
|
|
"valid_targets_mean": 1430.0,
|
|
"valid_targets_min": 843
|
|
},
|
|
{
|
|
"epoch": 4.442816442816443,
|
|
"grad_norm": 0.680868471747835,
|
|
"learning_rate": 1.4185084703278636e-05,
|
|
"loss": 0.1298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06250318884849548,
|
|
"step": 5460,
|
|
"valid_targets_mean": 1622.9,
|
|
"valid_targets_min": 849
|
|
},
|
|
{
|
|
"epoch": 4.446886446886447,
|
|
"grad_norm": 0.8578202447129205,
|
|
"learning_rate": 1.4146271111745785e-05,
|
|
"loss": 0.1394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06331510841846466,
|
|
"step": 5465,
|
|
"valid_targets_mean": 1429.0,
|
|
"valid_targets_min": 612
|
|
},
|
|
{
|
|
"epoch": 4.4509564509564505,
|
|
"grad_norm": 0.8243591653669494,
|
|
"learning_rate": 1.4107481617373738e-05,
|
|
"loss": 0.1366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05521140247583389,
|
|
"step": 5470,
|
|
"valid_targets_mean": 1283.0,
|
|
"valid_targets_min": 782
|
|
},
|
|
{
|
|
"epoch": 4.455026455026455,
|
|
"grad_norm": 0.8492636328085363,
|
|
"learning_rate": 1.406871637984132e-05,
|
|
"loss": 0.135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07561201602220535,
|
|
"step": 5475,
|
|
"valid_targets_mean": 1457.0,
|
|
"valid_targets_min": 926
|
|
},
|
|
{
|
|
"epoch": 4.459096459096459,
|
|
"grad_norm": 0.7560434633983276,
|
|
"learning_rate": 1.4029975558727546e-05,
|
|
"loss": 0.1292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05740554630756378,
|
|
"step": 5480,
|
|
"valid_targets_mean": 1399.6,
|
|
"valid_targets_min": 822
|
|
},
|
|
{
|
|
"epoch": 4.463166463166463,
|
|
"grad_norm": 0.7108917719684089,
|
|
"learning_rate": 1.399125931351088e-05,
|
|
"loss": 0.1241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.060493603348731995,
|
|
"step": 5485,
|
|
"valid_targets_mean": 1464.6,
|
|
"valid_targets_min": 670
|
|
},
|
|
{
|
|
"epoch": 4.467236467236467,
|
|
"grad_norm": 0.8373770508282027,
|
|
"learning_rate": 1.3952567803568648e-05,
|
|
"loss": 0.1383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06813108921051025,
|
|
"step": 5490,
|
|
"valid_targets_mean": 1504.8,
|
|
"valid_targets_min": 647
|
|
},
|
|
{
|
|
"epoch": 4.471306471306471,
|
|
"grad_norm": 0.7967991746334874,
|
|
"learning_rate": 1.391390118817634e-05,
|
|
"loss": 0.135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.051831118762493134,
|
|
"step": 5495,
|
|
"valid_targets_mean": 1276.9,
|
|
"valid_targets_min": 791
|
|
},
|
|
{
|
|
"epoch": 4.475376475376476,
|
|
"grad_norm": 0.8724038689238638,
|
|
"learning_rate": 1.3875259626506958e-05,
|
|
"loss": 0.1294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.058950334787368774,
|
|
"step": 5500,
|
|
"valid_targets_mean": 1291.4,
|
|
"valid_targets_min": 746
|
|
},
|
|
{
|
|
"epoch": 4.479446479446479,
|
|
"grad_norm": 0.8243173279519246,
|
|
"learning_rate": 1.383664327763039e-05,
|
|
"loss": 0.1348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05535842478275299,
|
|
"step": 5505,
|
|
"valid_targets_mean": 1409.1,
|
|
"valid_targets_min": 892
|
|
},
|
|
{
|
|
"epoch": 4.483516483516484,
|
|
"grad_norm": 0.8045208272162921,
|
|
"learning_rate": 1.3798052300512707e-05,
|
|
"loss": 0.1382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07621202617883682,
|
|
"step": 5510,
|
|
"valid_targets_mean": 1581.9,
|
|
"valid_targets_min": 914
|
|
},
|
|
{
|
|
"epoch": 4.487586487586488,
|
|
"grad_norm": 0.8000566735592727,
|
|
"learning_rate": 1.3759486854015558e-05,
|
|
"loss": 0.1319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06206750124692917,
|
|
"step": 5515,
|
|
"valid_targets_mean": 1647.6,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 4.491656491656491,
|
|
"grad_norm": 0.841307842616834,
|
|
"learning_rate": 1.3720947096895487e-05,
|
|
"loss": 0.132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06576070189476013,
|
|
"step": 5520,
|
|
"valid_targets_mean": 1523.5,
|
|
"valid_targets_min": 641
|
|
},
|
|
{
|
|
"epoch": 4.495726495726496,
|
|
"grad_norm": 0.8127961832136743,
|
|
"learning_rate": 1.3682433187803283e-05,
|
|
"loss": 0.1333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07405531406402588,
|
|
"step": 5525,
|
|
"valid_targets_mean": 1629.4,
|
|
"valid_targets_min": 1020
|
|
},
|
|
{
|
|
"epoch": 4.4997964997965,
|
|
"grad_norm": 0.8576090738580665,
|
|
"learning_rate": 1.3643945285283336e-05,
|
|
"loss": 0.1305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05857028067111969,
|
|
"step": 5530,
|
|
"valid_targets_mean": 1163.4,
|
|
"valid_targets_min": 729
|
|
},
|
|
{
|
|
"epoch": 4.503866503866504,
|
|
"grad_norm": 0.8768940435922552,
|
|
"learning_rate": 1.3605483547772977e-05,
|
|
"loss": 0.1393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08095380663871765,
|
|
"step": 5535,
|
|
"valid_targets_mean": 1613.2,
|
|
"valid_targets_min": 1001
|
|
},
|
|
{
|
|
"epoch": 4.507936507936508,
|
|
"grad_norm": 0.8051072533291032,
|
|
"learning_rate": 1.3567048133601821e-05,
|
|
"loss": 0.1387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06716311722993851,
|
|
"step": 5540,
|
|
"valid_targets_mean": 1567.0,
|
|
"valid_targets_min": 1175
|
|
},
|
|
{
|
|
"epoch": 4.512006512006512,
|
|
"grad_norm": 0.8552572932480055,
|
|
"learning_rate": 1.352863920099114e-05,
|
|
"loss": 0.1346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08142690360546112,
|
|
"step": 5545,
|
|
"valid_targets_mean": 1609.0,
|
|
"valid_targets_min": 892
|
|
},
|
|
{
|
|
"epoch": 4.516076516076516,
|
|
"grad_norm": 0.9469817635167835,
|
|
"learning_rate": 1.3490256908053165e-05,
|
|
"loss": 0.1362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06301309913396835,
|
|
"step": 5550,
|
|
"valid_targets_mean": 1425.2,
|
|
"valid_targets_min": 801
|
|
},
|
|
{
|
|
"epoch": 4.52014652014652,
|
|
"grad_norm": 0.7579582875823737,
|
|
"learning_rate": 1.3451901412790485e-05,
|
|
"loss": 0.1237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06315524131059647,
|
|
"step": 5555,
|
|
"valid_targets_mean": 1462.4,
|
|
"valid_targets_min": 564
|
|
},
|
|
{
|
|
"epoch": 4.524216524216524,
|
|
"grad_norm": 0.7776282955223374,
|
|
"learning_rate": 1.341357287309537e-05,
|
|
"loss": 0.1266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06340664625167847,
|
|
"step": 5560,
|
|
"valid_targets_mean": 1749.1,
|
|
"valid_targets_min": 651
|
|
},
|
|
{
|
|
"epoch": 4.528286528286529,
|
|
"grad_norm": 0.8269577317944914,
|
|
"learning_rate": 1.3375271446749125e-05,
|
|
"loss": 0.1354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.053509172052145004,
|
|
"step": 5565,
|
|
"valid_targets_mean": 1161.0,
|
|
"valid_targets_min": 716
|
|
},
|
|
{
|
|
"epoch": 4.532356532356532,
|
|
"grad_norm": 0.7967004348591504,
|
|
"learning_rate": 1.3336997291421441e-05,
|
|
"loss": 0.1275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06124432757496834,
|
|
"step": 5570,
|
|
"valid_targets_mean": 1425.6,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 4.536426536426537,
|
|
"grad_norm": 0.7435104069530646,
|
|
"learning_rate": 1.3298750564669751e-05,
|
|
"loss": 0.1252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05810406804084778,
|
|
"step": 5575,
|
|
"valid_targets_mean": 1391.1,
|
|
"valid_targets_min": 1218
|
|
},
|
|
{
|
|
"epoch": 4.540496540496541,
|
|
"grad_norm": 0.7999784062535612,
|
|
"learning_rate": 1.3260531423938571e-05,
|
|
"loss": 0.1327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07500585913658142,
|
|
"step": 5580,
|
|
"valid_targets_mean": 1724.5,
|
|
"valid_targets_min": 793
|
|
},
|
|
{
|
|
"epoch": 4.544566544566544,
|
|
"grad_norm": 0.8510244277487944,
|
|
"learning_rate": 1.3222340026558855e-05,
|
|
"loss": 0.1411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06173360347747803,
|
|
"step": 5585,
|
|
"valid_targets_mean": 1366.8,
|
|
"valid_targets_min": 846
|
|
},
|
|
{
|
|
"epoch": 4.548636548636549,
|
|
"grad_norm": 0.8073160848727309,
|
|
"learning_rate": 1.3184176529747357e-05,
|
|
"loss": 0.1273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07489972561597824,
|
|
"step": 5590,
|
|
"valid_targets_mean": 1772.1,
|
|
"valid_targets_min": 979
|
|
},
|
|
{
|
|
"epoch": 4.552706552706553,
|
|
"grad_norm": 0.7253390941686678,
|
|
"learning_rate": 1.3146041090605977e-05,
|
|
"loss": 0.1351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07174719870090485,
|
|
"step": 5595,
|
|
"valid_targets_mean": 1778.9,
|
|
"valid_targets_min": 1093
|
|
},
|
|
{
|
|
"epoch": 4.556776556776557,
|
|
"grad_norm": 0.7424924022896014,
|
|
"learning_rate": 1.3107933866121117e-05,
|
|
"loss": 0.1167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0609469898045063,
|
|
"step": 5600,
|
|
"valid_targets_mean": 1510.8,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 4.560846560846561,
|
|
"grad_norm": 0.8618358393453901,
|
|
"learning_rate": 1.306985501316302e-05,
|
|
"loss": 0.1284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06146273389458656,
|
|
"step": 5605,
|
|
"valid_targets_mean": 1330.4,
|
|
"valid_targets_min": 868
|
|
},
|
|
{
|
|
"epoch": 4.564916564916565,
|
|
"grad_norm": 0.8268278139473043,
|
|
"learning_rate": 1.3031804688485143e-05,
|
|
"loss": 0.1258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07098115980625153,
|
|
"step": 5610,
|
|
"valid_targets_mean": 1504.2,
|
|
"valid_targets_min": 914
|
|
},
|
|
{
|
|
"epoch": 4.568986568986569,
|
|
"grad_norm": 0.7720719541618248,
|
|
"learning_rate": 1.2993783048723515e-05,
|
|
"loss": 0.1288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07257020473480225,
|
|
"step": 5615,
|
|
"valid_targets_mean": 1972.9,
|
|
"valid_targets_min": 1080
|
|
},
|
|
{
|
|
"epoch": 4.573056573056573,
|
|
"grad_norm": 0.7939819756885284,
|
|
"learning_rate": 1.295579025039607e-05,
|
|
"loss": 0.1297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06880711764097214,
|
|
"step": 5620,
|
|
"valid_targets_mean": 1519.6,
|
|
"valid_targets_min": 658
|
|
},
|
|
{
|
|
"epoch": 4.5771265771265774,
|
|
"grad_norm": 0.7859404051654513,
|
|
"learning_rate": 1.2917826449902005e-05,
|
|
"loss": 0.1336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.057696133852005005,
|
|
"step": 5625,
|
|
"valid_targets_mean": 1362.0,
|
|
"valid_targets_min": 1018
|
|
},
|
|
{
|
|
"epoch": 4.581196581196581,
|
|
"grad_norm": 0.828035069084943,
|
|
"learning_rate": 1.2879891803521167e-05,
|
|
"loss": 0.1343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07344723492860794,
|
|
"step": 5630,
|
|
"valid_targets_mean": 1623.0,
|
|
"valid_targets_min": 1041
|
|
},
|
|
{
|
|
"epoch": 4.585266585266585,
|
|
"grad_norm": 0.7735417380419619,
|
|
"learning_rate": 1.2841986467413384e-05,
|
|
"loss": 0.125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05607450753450394,
|
|
"step": 5635,
|
|
"valid_targets_mean": 2889.2,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 4.58933658933659,
|
|
"grad_norm": 0.460850977797522,
|
|
"learning_rate": 1.2804110597617817e-05,
|
|
"loss": 0.111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05251425504684448,
|
|
"step": 5640,
|
|
"valid_targets_mean": 3423.9,
|
|
"valid_targets_min": 2404
|
|
},
|
|
{
|
|
"epoch": 4.593406593406593,
|
|
"grad_norm": 0.3657662471298085,
|
|
"learning_rate": 1.2766264350052334e-05,
|
|
"loss": 0.0806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.041075143963098526,
|
|
"step": 5645,
|
|
"valid_targets_mean": 3592.0,
|
|
"valid_targets_min": 3064
|
|
},
|
|
{
|
|
"epoch": 4.597476597476597,
|
|
"grad_norm": 0.3851006948518706,
|
|
"learning_rate": 1.2728447880512862e-05,
|
|
"loss": 0.0808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.039379969239234924,
|
|
"step": 5650,
|
|
"valid_targets_mean": 3425.4,
|
|
"valid_targets_min": 2901
|
|
},
|
|
{
|
|
"epoch": 4.601546601546602,
|
|
"grad_norm": 0.3982103509768099,
|
|
"learning_rate": 1.2690661344672755e-05,
|
|
"loss": 0.0948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0414591059088707,
|
|
"step": 5655,
|
|
"valid_targets_mean": 3393.9,
|
|
"valid_targets_min": 2392
|
|
},
|
|
{
|
|
"epoch": 4.605616605616605,
|
|
"grad_norm": 0.46756243462854763,
|
|
"learning_rate": 1.2652904898082117e-05,
|
|
"loss": 0.078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.034624554216861725,
|
|
"step": 5660,
|
|
"valid_targets_mean": 2066.5,
|
|
"valid_targets_min": 662
|
|
},
|
|
{
|
|
"epoch": 4.60968660968661,
|
|
"grad_norm": 0.8771981868563371,
|
|
"learning_rate": 1.2615178696167205e-05,
|
|
"loss": 0.1068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06684105098247528,
|
|
"step": 5665,
|
|
"valid_targets_mean": 1519.1,
|
|
"valid_targets_min": 724
|
|
},
|
|
{
|
|
"epoch": 4.613756613756614,
|
|
"grad_norm": 0.3782756354180073,
|
|
"learning_rate": 1.2577482894229777e-05,
|
|
"loss": 0.1116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04186592623591423,
|
|
"step": 5670,
|
|
"valid_targets_mean": 4022.1,
|
|
"valid_targets_min": 3668
|
|
},
|
|
{
|
|
"epoch": 4.617826617826617,
|
|
"grad_norm": 0.3867031798803203,
|
|
"learning_rate": 1.2539817647446446e-05,
|
|
"loss": 0.0872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04109447821974754,
|
|
"step": 5675,
|
|
"valid_targets_mean": 3483.0,
|
|
"valid_targets_min": 2891
|
|
},
|
|
{
|
|
"epoch": 4.621896621896622,
|
|
"grad_norm": 0.5812366237791688,
|
|
"learning_rate": 1.2502183110868031e-05,
|
|
"loss": 0.1197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09486156702041626,
|
|
"step": 5680,
|
|
"valid_targets_mean": 2689.9,
|
|
"valid_targets_min": 246
|
|
},
|
|
{
|
|
"epoch": 4.625966625966626,
|
|
"grad_norm": 0.28966277428771003,
|
|
"learning_rate": 1.2464579439418943e-05,
|
|
"loss": 0.0727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.029673418030142784,
|
|
"step": 5685,
|
|
"valid_targets_mean": 5332.0,
|
|
"valid_targets_min": 886
|
|
},
|
|
{
|
|
"epoch": 4.63003663003663,
|
|
"grad_norm": 0.4205463767229215,
|
|
"learning_rate": 1.2427006787896537e-05,
|
|
"loss": 0.0825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03594677895307541,
|
|
"step": 5690,
|
|
"valid_targets_mean": 2137.1,
|
|
"valid_targets_min": 848
|
|
},
|
|
{
|
|
"epoch": 4.634106634106634,
|
|
"grad_norm": 0.31841598595521275,
|
|
"learning_rate": 1.2389465310970459e-05,
|
|
"loss": 0.0793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.036279890686273575,
|
|
"step": 5695,
|
|
"valid_targets_mean": 3335.8,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 4.638176638176638,
|
|
"grad_norm": 0.3807391992506858,
|
|
"learning_rate": 1.2351955163182039e-05,
|
|
"loss": 0.0948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04160192608833313,
|
|
"step": 5700,
|
|
"valid_targets_mean": 2811.1,
|
|
"valid_targets_min": 1146
|
|
},
|
|
{
|
|
"epoch": 4.642246642246643,
|
|
"grad_norm": 0.3742032229362971,
|
|
"learning_rate": 1.2314476498943622e-05,
|
|
"loss": 0.1083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04018232598900795,
|
|
"step": 5705,
|
|
"valid_targets_mean": 2773.0,
|
|
"valid_targets_min": 645
|
|
},
|
|
{
|
|
"epoch": 4.646316646316646,
|
|
"grad_norm": 0.3939537525171128,
|
|
"learning_rate": 1.2277029472537967e-05,
|
|
"loss": 0.0796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.038491107523441315,
|
|
"step": 5710,
|
|
"valid_targets_mean": 2632.2,
|
|
"valid_targets_min": 579
|
|
},
|
|
{
|
|
"epoch": 4.6503866503866504,
|
|
"grad_norm": 0.4057474887227267,
|
|
"learning_rate": 1.2239614238117588e-05,
|
|
"loss": 0.0753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04323495551943779,
|
|
"step": 5715,
|
|
"valid_targets_mean": 2741.0,
|
|
"valid_targets_min": 1111
|
|
},
|
|
{
|
|
"epoch": 4.654456654456655,
|
|
"grad_norm": 0.5815162907436859,
|
|
"learning_rate": 1.2202230949704117e-05,
|
|
"loss": 0.0808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06871599704027176,
|
|
"step": 5720,
|
|
"valid_targets_mean": 2122.4,
|
|
"valid_targets_min": 812
|
|
},
|
|
{
|
|
"epoch": 4.658526658526658,
|
|
"grad_norm": 0.4521817666906546,
|
|
"learning_rate": 1.2164879761187691e-05,
|
|
"loss": 0.0871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03911435604095459,
|
|
"step": 5725,
|
|
"valid_targets_mean": 2335.0,
|
|
"valid_targets_min": 493
|
|
},
|
|
{
|
|
"epoch": 4.662596662596663,
|
|
"grad_norm": 0.47178406108628634,
|
|
"learning_rate": 1.212756082632631e-05,
|
|
"loss": 0.0865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03535778075456619,
|
|
"step": 5730,
|
|
"valid_targets_mean": 4099.9,
|
|
"valid_targets_min": 2949
|
|
},
|
|
{
|
|
"epoch": 4.666666666666667,
|
|
"grad_norm": 0.43766318103399476,
|
|
"learning_rate": 1.2090274298745172e-05,
|
|
"loss": 0.0942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04855756461620331,
|
|
"step": 5735,
|
|
"valid_targets_mean": 2667.8,
|
|
"valid_targets_min": 990
|
|
},
|
|
{
|
|
"epoch": 4.67073667073667,
|
|
"grad_norm": 0.5685037462804967,
|
|
"learning_rate": 1.2053020331936108e-05,
|
|
"loss": 0.1052,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09078393876552582,
|
|
"step": 5740,
|
|
"valid_targets_mean": 1997.2,
|
|
"valid_targets_min": 974
|
|
},
|
|
{
|
|
"epoch": 4.674806674806675,
|
|
"grad_norm": 0.4814600386863112,
|
|
"learning_rate": 1.2015799079256876e-05,
|
|
"loss": 0.11,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.053439922630786896,
|
|
"step": 5745,
|
|
"valid_targets_mean": 2656.1,
|
|
"valid_targets_min": 896
|
|
},
|
|
{
|
|
"epoch": 4.678876678876679,
|
|
"grad_norm": 0.4807779629525887,
|
|
"learning_rate": 1.1978610693930587e-05,
|
|
"loss": 0.0756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03917412832379341,
|
|
"step": 5750,
|
|
"valid_targets_mean": 1499.9,
|
|
"valid_targets_min": 550
|
|
},
|
|
{
|
|
"epoch": 4.682946682946683,
|
|
"grad_norm": 0.44551985651283915,
|
|
"learning_rate": 1.1941455329045047e-05,
|
|
"loss": 0.1135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04586545005440712,
|
|
"step": 5755,
|
|
"valid_targets_mean": 2963.0,
|
|
"valid_targets_min": 873
|
|
},
|
|
{
|
|
"epoch": 4.687016687016687,
|
|
"grad_norm": 0.4671700313248316,
|
|
"learning_rate": 1.1904333137552124e-05,
|
|
"loss": 0.0998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06650416553020477,
|
|
"step": 5760,
|
|
"valid_targets_mean": 3018.8,
|
|
"valid_targets_min": 1247
|
|
},
|
|
{
|
|
"epoch": 4.691086691086691,
|
|
"grad_norm": 0.54473726834285,
|
|
"learning_rate": 1.1867244272267136e-05,
|
|
"loss": 0.0842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05167562887072563,
|
|
"step": 5765,
|
|
"valid_targets_mean": 1737.4,
|
|
"valid_targets_min": 800
|
|
},
|
|
{
|
|
"epoch": 4.695156695156696,
|
|
"grad_norm": 0.6730426959023267,
|
|
"learning_rate": 1.1830188885868213e-05,
|
|
"loss": 0.2561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13521423935890198,
|
|
"step": 5770,
|
|
"valid_targets_mean": 2176.9,
|
|
"valid_targets_min": 807
|
|
},
|
|
{
|
|
"epoch": 4.699226699226699,
|
|
"grad_norm": 0.4728336672436709,
|
|
"learning_rate": 1.1793167130895656e-05,
|
|
"loss": 0.1083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.032692648470401764,
|
|
"step": 5775,
|
|
"valid_targets_mean": 1465.0,
|
|
"valid_targets_min": 507
|
|
},
|
|
{
|
|
"epoch": 4.7032967032967035,
|
|
"grad_norm": 0.3873581322887633,
|
|
"learning_rate": 1.1756179159751322e-05,
|
|
"loss": 0.0958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03570934757590294,
|
|
"step": 5780,
|
|
"valid_targets_mean": 3186.2,
|
|
"valid_targets_min": 658
|
|
},
|
|
{
|
|
"epoch": 4.707366707366708,
|
|
"grad_norm": 0.4264449290678136,
|
|
"learning_rate": 1.1719225124698003e-05,
|
|
"loss": 0.1856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04953089728951454,
|
|
"step": 5785,
|
|
"valid_targets_mean": 3058.2,
|
|
"valid_targets_min": 826
|
|
},
|
|
{
|
|
"epoch": 4.711436711436711,
|
|
"grad_norm": 0.3737908843266182,
|
|
"learning_rate": 1.16823051778588e-05,
|
|
"loss": 0.0812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.052201047539711,
|
|
"step": 5790,
|
|
"valid_targets_mean": 4502.5,
|
|
"valid_targets_min": 2131
|
|
},
|
|
{
|
|
"epoch": 4.715506715506716,
|
|
"grad_norm": 0.4570728348397021,
|
|
"learning_rate": 1.1645419471216462e-05,
|
|
"loss": 0.0854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.056565217673778534,
|
|
"step": 5795,
|
|
"valid_targets_mean": 3669.0,
|
|
"valid_targets_min": 1746
|
|
},
|
|
{
|
|
"epoch": 4.71957671957672,
|
|
"grad_norm": 0.3818909080202796,
|
|
"learning_rate": 1.1608568156612807e-05,
|
|
"loss": 0.0854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03912724554538727,
|
|
"step": 5800,
|
|
"valid_targets_mean": 2759.6,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 4.7236467236467234,
|
|
"grad_norm": 0.36574902640964757,
|
|
"learning_rate": 1.1571751385748082e-05,
|
|
"loss": 0.0786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04248278588056564,
|
|
"step": 5805,
|
|
"valid_targets_mean": 3420.0,
|
|
"valid_targets_min": 825
|
|
},
|
|
{
|
|
"epoch": 4.727716727716728,
|
|
"grad_norm": 0.36160705101402424,
|
|
"learning_rate": 1.1534969310180303e-05,
|
|
"loss": 0.079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03156093508005142,
|
|
"step": 5810,
|
|
"valid_targets_mean": 3323.0,
|
|
"valid_targets_min": 687
|
|
},
|
|
{
|
|
"epoch": 4.731786731786732,
|
|
"grad_norm": 0.36054700912548693,
|
|
"learning_rate": 1.149822208132469e-05,
|
|
"loss": 0.075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.041020315140485764,
|
|
"step": 5815,
|
|
"valid_targets_mean": 3504.0,
|
|
"valid_targets_min": 1956
|
|
},
|
|
{
|
|
"epoch": 4.735856735856736,
|
|
"grad_norm": 0.4108903603282782,
|
|
"learning_rate": 1.1461509850453e-05,
|
|
"loss": 0.0814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.033342353999614716,
|
|
"step": 5820,
|
|
"valid_targets_mean": 3553.0,
|
|
"valid_targets_min": 1421
|
|
},
|
|
{
|
|
"epoch": 4.73992673992674,
|
|
"grad_norm": 0.43529092649816564,
|
|
"learning_rate": 1.1424832768692942e-05,
|
|
"loss": 0.0718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.045596443116664886,
|
|
"step": 5825,
|
|
"valid_targets_mean": 3672.6,
|
|
"valid_targets_min": 1607
|
|
},
|
|
{
|
|
"epoch": 4.743996743996744,
|
|
"grad_norm": 0.31603285050379853,
|
|
"learning_rate": 1.1388190987027485e-05,
|
|
"loss": 0.1137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.021315453574061394,
|
|
"step": 5830,
|
|
"valid_targets_mean": 2072.2,
|
|
"valid_targets_min": 535
|
|
},
|
|
{
|
|
"epoch": 4.748066748066748,
|
|
"grad_norm": 0.30398769091490463,
|
|
"learning_rate": 1.135158465629433e-05,
|
|
"loss": 0.0655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.01872686669230461,
|
|
"step": 5835,
|
|
"valid_targets_mean": 2852.2,
|
|
"valid_targets_min": 583
|
|
},
|
|
{
|
|
"epoch": 4.752136752136752,
|
|
"grad_norm": 0.41779314691880687,
|
|
"learning_rate": 1.1315013927185224e-05,
|
|
"loss": 0.0714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03134084492921829,
|
|
"step": 5840,
|
|
"valid_targets_mean": 2778.2,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 4.7562067562067565,
|
|
"grad_norm": 0.43596477198959716,
|
|
"learning_rate": 1.1278478950245364e-05,
|
|
"loss": 0.0685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.040790289640426636,
|
|
"step": 5845,
|
|
"valid_targets_mean": 3474.1,
|
|
"valid_targets_min": 2154
|
|
},
|
|
{
|
|
"epoch": 4.76027676027676,
|
|
"grad_norm": 0.6262422674089861,
|
|
"learning_rate": 1.1241979875872748e-05,
|
|
"loss": 0.1148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0588751845061779,
|
|
"step": 5850,
|
|
"valid_targets_mean": 2075.6,
|
|
"valid_targets_min": 647
|
|
},
|
|
{
|
|
"epoch": 4.764346764346764,
|
|
"grad_norm": 0.4373065366822497,
|
|
"learning_rate": 1.120551685431761e-05,
|
|
"loss": 0.0854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07749606668949127,
|
|
"step": 5855,
|
|
"valid_targets_mean": 2382.5,
|
|
"valid_targets_min": 840
|
|
},
|
|
{
|
|
"epoch": 4.768416768416769,
|
|
"grad_norm": 0.44549286271516586,
|
|
"learning_rate": 1.1169090035681772e-05,
|
|
"loss": 0.1014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.043722283095121384,
|
|
"step": 5860,
|
|
"valid_targets_mean": 2478.5,
|
|
"valid_targets_min": 684
|
|
},
|
|
{
|
|
"epoch": 4.772486772486772,
|
|
"grad_norm": 0.3836262029735173,
|
|
"learning_rate": 1.1132699569917982e-05,
|
|
"loss": 0.0804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.033280737698078156,
|
|
"step": 5865,
|
|
"valid_targets_mean": 3505.0,
|
|
"valid_targets_min": 847
|
|
},
|
|
{
|
|
"epoch": 4.7765567765567765,
|
|
"grad_norm": 0.6061904603600585,
|
|
"learning_rate": 1.1096345606829388e-05,
|
|
"loss": 0.1076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04993060976266861,
|
|
"step": 5870,
|
|
"valid_targets_mean": 1690.6,
|
|
"valid_targets_min": 949
|
|
},
|
|
{
|
|
"epoch": 4.780626780626781,
|
|
"grad_norm": 0.45611765176673036,
|
|
"learning_rate": 1.1060028296068853e-05,
|
|
"loss": 0.1027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04060452803969383,
|
|
"step": 5875,
|
|
"valid_targets_mean": 3574.0,
|
|
"valid_targets_min": 2201
|
|
},
|
|
{
|
|
"epoch": 4.784696784696784,
|
|
"grad_norm": 0.714044002886353,
|
|
"learning_rate": 1.1023747787138361e-05,
|
|
"loss": 0.1082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04921393096446991,
|
|
"step": 5880,
|
|
"valid_targets_mean": 1174.5,
|
|
"valid_targets_min": 649
|
|
},
|
|
{
|
|
"epoch": 4.788766788766789,
|
|
"grad_norm": 0.44604008395762645,
|
|
"learning_rate": 1.0987504229388391e-05,
|
|
"loss": 0.0808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04379687458276749,
|
|
"step": 5885,
|
|
"valid_targets_mean": 3146.5,
|
|
"valid_targets_min": 707
|
|
},
|
|
{
|
|
"epoch": 4.792836792836793,
|
|
"grad_norm": 0.43017541020951683,
|
|
"learning_rate": 1.0951297772017319e-05,
|
|
"loss": 0.0847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.045811258256435394,
|
|
"step": 5890,
|
|
"valid_targets_mean": 3508.5,
|
|
"valid_targets_min": 766
|
|
},
|
|
{
|
|
"epoch": 4.7969067969067964,
|
|
"grad_norm": 0.48390962547473454,
|
|
"learning_rate": 1.0915128564070803e-05,
|
|
"loss": 0.086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.038865868002176285,
|
|
"step": 5895,
|
|
"valid_targets_mean": 2029.6,
|
|
"valid_targets_min": 516
|
|
},
|
|
{
|
|
"epoch": 4.800976800976801,
|
|
"grad_norm": 0.36825203623662306,
|
|
"learning_rate": 1.0878996754441151e-05,
|
|
"loss": 0.0702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04049721732735634,
|
|
"step": 5900,
|
|
"valid_targets_mean": 3818.9,
|
|
"valid_targets_min": 763
|
|
},
|
|
{
|
|
"epoch": 4.805046805046805,
|
|
"grad_norm": 0.43168309660573984,
|
|
"learning_rate": 1.0842902491866716e-05,
|
|
"loss": 0.0676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.039305686950683594,
|
|
"step": 5905,
|
|
"valid_targets_mean": 4056.5,
|
|
"valid_targets_min": 2036
|
|
},
|
|
{
|
|
"epoch": 4.8091168091168095,
|
|
"grad_norm": 0.35933047571594207,
|
|
"learning_rate": 1.0806845924931296e-05,
|
|
"loss": 0.0793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03556220978498459,
|
|
"step": 5910,
|
|
"valid_targets_mean": 3477.1,
|
|
"valid_targets_min": 1122
|
|
},
|
|
{
|
|
"epoch": 4.813186813186813,
|
|
"grad_norm": 0.36322550413900717,
|
|
"learning_rate": 1.0770827202063505e-05,
|
|
"loss": 0.0969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02814421057701111,
|
|
"step": 5915,
|
|
"valid_targets_mean": 2432.0,
|
|
"valid_targets_min": 1139
|
|
},
|
|
{
|
|
"epoch": 4.817256817256817,
|
|
"grad_norm": 0.540577699819694,
|
|
"learning_rate": 1.073484647153619e-05,
|
|
"loss": 0.0918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04813909903168678,
|
|
"step": 5920,
|
|
"valid_targets_mean": 1705.1,
|
|
"valid_targets_min": 782
|
|
},
|
|
{
|
|
"epoch": 4.821326821326822,
|
|
"grad_norm": 0.2873896338117805,
|
|
"learning_rate": 1.0698903881465763e-05,
|
|
"loss": 0.0796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.021741347387433052,
|
|
"step": 5925,
|
|
"valid_targets_mean": 4197.8,
|
|
"valid_targets_min": 4050
|
|
},
|
|
{
|
|
"epoch": 4.825396825396825,
|
|
"grad_norm": 0.34456005761736075,
|
|
"learning_rate": 1.0662999579811664e-05,
|
|
"loss": 0.0746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02447408251464367,
|
|
"step": 5930,
|
|
"valid_targets_mean": 3064.9,
|
|
"valid_targets_min": 945
|
|
},
|
|
{
|
|
"epoch": 4.8294668294668295,
|
|
"grad_norm": 0.49034557307772714,
|
|
"learning_rate": 1.06271337143757e-05,
|
|
"loss": 0.0931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05810309574007988,
|
|
"step": 5935,
|
|
"valid_targets_mean": 3935.9,
|
|
"valid_targets_min": 3235
|
|
},
|
|
{
|
|
"epoch": 4.833536833536834,
|
|
"grad_norm": 0.31610766883370367,
|
|
"learning_rate": 1.0591306432801467e-05,
|
|
"loss": 0.0697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.032884612679481506,
|
|
"step": 5940,
|
|
"valid_targets_mean": 3656.0,
|
|
"valid_targets_min": 2848
|
|
},
|
|
{
|
|
"epoch": 4.837606837606837,
|
|
"grad_norm": 0.7350255340291643,
|
|
"learning_rate": 1.05555178825737e-05,
|
|
"loss": 0.0839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05409277603030205,
|
|
"step": 5945,
|
|
"valid_targets_mean": 1732.0,
|
|
"valid_targets_min": 742
|
|
},
|
|
{
|
|
"epoch": 4.841676841676842,
|
|
"grad_norm": 0.5190904613777559,
|
|
"learning_rate": 1.0519768211017726e-05,
|
|
"loss": 0.0792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.054192811250686646,
|
|
"step": 5950,
|
|
"valid_targets_mean": 2964.1,
|
|
"valid_targets_min": 1547
|
|
},
|
|
{
|
|
"epoch": 4.845746845746846,
|
|
"grad_norm": 0.48868311864434977,
|
|
"learning_rate": 1.0484057565298822e-05,
|
|
"loss": 0.0814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04929513484239578,
|
|
"step": 5955,
|
|
"valid_targets_mean": 1643.6,
|
|
"valid_targets_min": 811
|
|
},
|
|
{
|
|
"epoch": 4.8498168498168495,
|
|
"grad_norm": 0.5380227640246409,
|
|
"learning_rate": 1.0448386092421586e-05,
|
|
"loss": 0.0945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04821448400616646,
|
|
"step": 5960,
|
|
"valid_targets_mean": 1432.5,
|
|
"valid_targets_min": 854
|
|
},
|
|
{
|
|
"epoch": 4.853886853886854,
|
|
"grad_norm": 0.5675102154058619,
|
|
"learning_rate": 1.0412753939229385e-05,
|
|
"loss": 0.0752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04497600346803665,
|
|
"step": 5965,
|
|
"valid_targets_mean": 1551.0,
|
|
"valid_targets_min": 886
|
|
},
|
|
{
|
|
"epoch": 4.857956857956858,
|
|
"grad_norm": 0.37368458137912997,
|
|
"learning_rate": 1.037716125240372e-05,
|
|
"loss": 0.0841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0343891903758049,
|
|
"step": 5970,
|
|
"valid_targets_mean": 3490.9,
|
|
"valid_targets_min": 2839
|
|
},
|
|
{
|
|
"epoch": 4.8620268620268625,
|
|
"grad_norm": 0.465690197105879,
|
|
"learning_rate": 1.0341608178463623e-05,
|
|
"loss": 0.1007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0349278599023819,
|
|
"step": 5975,
|
|
"valid_targets_mean": 3522.9,
|
|
"valid_targets_min": 1322
|
|
},
|
|
{
|
|
"epoch": 4.866096866096866,
|
|
"grad_norm": 0.4539769609059549,
|
|
"learning_rate": 1.0306094863765066e-05,
|
|
"loss": 0.0791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04086630046367645,
|
|
"step": 5980,
|
|
"valid_targets_mean": 2675.1,
|
|
"valid_targets_min": 623
|
|
},
|
|
{
|
|
"epoch": 4.87016687016687,
|
|
"grad_norm": 0.5194040935548523,
|
|
"learning_rate": 1.027062145450033e-05,
|
|
"loss": 0.0703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03959763050079346,
|
|
"step": 5985,
|
|
"valid_targets_mean": 2448.1,
|
|
"valid_targets_min": 690
|
|
},
|
|
{
|
|
"epoch": 4.874236874236875,
|
|
"grad_norm": 0.5286302251911614,
|
|
"learning_rate": 1.023518809669744e-05,
|
|
"loss": 0.091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07098161429166794,
|
|
"step": 5990,
|
|
"valid_targets_mean": 3094.1,
|
|
"valid_targets_min": 1804
|
|
},
|
|
{
|
|
"epoch": 4.878306878306878,
|
|
"grad_norm": 0.4012211699517951,
|
|
"learning_rate": 1.0199794936219554e-05,
|
|
"loss": 0.087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04231107980012894,
|
|
"step": 5995,
|
|
"valid_targets_mean": 3015.2,
|
|
"valid_targets_min": 1605
|
|
},
|
|
{
|
|
"epoch": 4.8823768823768825,
|
|
"grad_norm": 0.48558169241283544,
|
|
"learning_rate": 1.0164442118764328e-05,
|
|
"loss": 0.065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04597931355237961,
|
|
"step": 6000,
|
|
"valid_targets_mean": 1446.4,
|
|
"valid_targets_min": 832
|
|
},
|
|
{
|
|
"epoch": 4.886446886446887,
|
|
"grad_norm": 0.46675560750287587,
|
|
"learning_rate": 1.0129129789863375e-05,
|
|
"loss": 0.0799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04974772036075592,
|
|
"step": 6005,
|
|
"valid_targets_mean": 3758.4,
|
|
"valid_targets_min": 1295
|
|
},
|
|
{
|
|
"epoch": 4.89051689051689,
|
|
"grad_norm": 0.3718508133031444,
|
|
"learning_rate": 1.0093858094881612e-05,
|
|
"loss": 0.075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.029652055352926254,
|
|
"step": 6010,
|
|
"valid_targets_mean": 3927.1,
|
|
"valid_targets_min": 1057
|
|
},
|
|
{
|
|
"epoch": 4.894586894586895,
|
|
"grad_norm": 0.3548767930814201,
|
|
"learning_rate": 1.00586271790167e-05,
|
|
"loss": 0.0633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.029149774461984634,
|
|
"step": 6015,
|
|
"valid_targets_mean": 3467.9,
|
|
"valid_targets_min": 853
|
|
},
|
|
{
|
|
"epoch": 4.898656898656899,
|
|
"grad_norm": 0.36526262775572454,
|
|
"learning_rate": 1.002343718729843e-05,
|
|
"loss": 0.0873,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03588550537824631,
|
|
"step": 6020,
|
|
"valid_targets_mean": 4106.0,
|
|
"valid_targets_min": 3239
|
|
},
|
|
{
|
|
"epoch": 4.9027269027269025,
|
|
"grad_norm": 0.39347213994342317,
|
|
"learning_rate": 9.988288264588106e-06,
|
|
"loss": 0.0707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.041096627712249756,
|
|
"step": 6025,
|
|
"valid_targets_mean": 3558.8,
|
|
"valid_targets_min": 769
|
|
},
|
|
{
|
|
"epoch": 4.906796906796907,
|
|
"grad_norm": 0.3567865777207391,
|
|
"learning_rate": 9.953180555578e-06,
|
|
"loss": 0.1034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03674373775720596,
|
|
"step": 6030,
|
|
"valid_targets_mean": 4000.0,
|
|
"valid_targets_min": 2959
|
|
},
|
|
{
|
|
"epoch": 4.910866910866911,
|
|
"grad_norm": 0.2939515031544418,
|
|
"learning_rate": 9.918114204790697e-06,
|
|
"loss": 0.0905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.024303892627358437,
|
|
"step": 6035,
|
|
"valid_targets_mean": 3030.4,
|
|
"valid_targets_min": 629
|
|
},
|
|
{
|
|
"epoch": 4.914936914936915,
|
|
"grad_norm": 0.5314390212449058,
|
|
"learning_rate": 9.883089356578545e-06,
|
|
"loss": 0.0958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.045132189989089966,
|
|
"step": 6040,
|
|
"valid_targets_mean": 3038.5,
|
|
"valid_targets_min": 1742
|
|
},
|
|
{
|
|
"epoch": 4.919006919006919,
|
|
"grad_norm": 0.33340762029888144,
|
|
"learning_rate": 9.848106155123045e-06,
|
|
"loss": 0.0701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02736440859735012,
|
|
"step": 6045,
|
|
"valid_targets_mean": 3813.6,
|
|
"valid_targets_min": 3003
|
|
},
|
|
{
|
|
"epoch": 4.923076923076923,
|
|
"grad_norm": 0.538168843602704,
|
|
"learning_rate": 9.813164744434256e-06,
|
|
"loss": 0.0814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0524737685918808,
|
|
"step": 6050,
|
|
"valid_targets_mean": 2625.8,
|
|
"valid_targets_min": 1025
|
|
},
|
|
{
|
|
"epoch": 4.927146927146927,
|
|
"grad_norm": 0.39397698811634346,
|
|
"learning_rate": 9.778265268350204e-06,
|
|
"loss": 0.0767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04419640451669693,
|
|
"step": 6055,
|
|
"valid_targets_mean": 3914.9,
|
|
"valid_targets_min": 3129
|
|
},
|
|
{
|
|
"epoch": 4.931216931216931,
|
|
"grad_norm": 0.788807885519458,
|
|
"learning_rate": 9.743407870536277e-06,
|
|
"loss": 0.1089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.055179398506879807,
|
|
"step": 6060,
|
|
"valid_targets_mean": 1266.1,
|
|
"valid_targets_min": 458
|
|
},
|
|
{
|
|
"epoch": 4.9352869352869355,
|
|
"grad_norm": 0.4712531044323888,
|
|
"learning_rate": 9.708592694484655e-06,
|
|
"loss": 0.1036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03245309740304947,
|
|
"step": 6065,
|
|
"valid_targets_mean": 2087.1,
|
|
"valid_targets_min": 459
|
|
},
|
|
{
|
|
"epoch": 4.939356939356939,
|
|
"grad_norm": 0.5077235785209703,
|
|
"learning_rate": 9.673819883513727e-06,
|
|
"loss": 0.0858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06637755781412125,
|
|
"step": 6070,
|
|
"valid_targets_mean": 3098.8,
|
|
"valid_targets_min": 1694
|
|
},
|
|
{
|
|
"epoch": 4.943426943426943,
|
|
"grad_norm": 0.36026473643652757,
|
|
"learning_rate": 9.639089580767445e-06,
|
|
"loss": 0.0778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03487565368413925,
|
|
"step": 6075,
|
|
"valid_targets_mean": 4018.6,
|
|
"valid_targets_min": 3151
|
|
},
|
|
{
|
|
"epoch": 4.947496947496948,
|
|
"grad_norm": 0.42480201057736183,
|
|
"learning_rate": 9.604401929214805e-06,
|
|
"loss": 0.0778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.033923834562301636,
|
|
"step": 6080,
|
|
"valid_targets_mean": 3190.0,
|
|
"valid_targets_min": 2197
|
|
},
|
|
{
|
|
"epoch": 4.951566951566951,
|
|
"grad_norm": 0.5467294848551612,
|
|
"learning_rate": 9.56975707164922e-06,
|
|
"loss": 0.0708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.029525276273489,
|
|
"step": 6085,
|
|
"valid_targets_mean": 2252.9,
|
|
"valid_targets_min": 858
|
|
},
|
|
{
|
|
"epoch": 4.9556369556369555,
|
|
"grad_norm": 0.9869857674866476,
|
|
"learning_rate": 9.535155150687939e-06,
|
|
"loss": 0.087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07059233635663986,
|
|
"step": 6090,
|
|
"valid_targets_mean": 975.9,
|
|
"valid_targets_min": 563
|
|
},
|
|
{
|
|
"epoch": 4.95970695970696,
|
|
"grad_norm": 0.3873514877863149,
|
|
"learning_rate": 9.500596308771462e-06,
|
|
"loss": 0.0744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.033523522317409515,
|
|
"step": 6095,
|
|
"valid_targets_mean": 3326.5,
|
|
"valid_targets_min": 701
|
|
},
|
|
{
|
|
"epoch": 4.963776963776963,
|
|
"grad_norm": 0.4142970786518696,
|
|
"learning_rate": 9.466080688162937e-06,
|
|
"loss": 0.0751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03642815351486206,
|
|
"step": 6100,
|
|
"valid_targets_mean": 3462.9,
|
|
"valid_targets_min": 2432
|
|
},
|
|
{
|
|
"epoch": 4.967846967846968,
|
|
"grad_norm": 0.4388097100302869,
|
|
"learning_rate": 9.431608430947619e-06,
|
|
"loss": 0.0724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03296447917819023,
|
|
"step": 6105,
|
|
"valid_targets_mean": 3280.9,
|
|
"valid_targets_min": 2018
|
|
},
|
|
{
|
|
"epoch": 4.971916971916972,
|
|
"grad_norm": 0.3737989484289901,
|
|
"learning_rate": 9.397179679032219e-06,
|
|
"loss": 0.0816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03461182862520218,
|
|
"step": 6110,
|
|
"valid_targets_mean": 3928.1,
|
|
"valid_targets_min": 485
|
|
},
|
|
{
|
|
"epoch": 4.975986975986976,
|
|
"grad_norm": 0.31608767099934815,
|
|
"learning_rate": 9.362794574144383e-06,
|
|
"loss": 0.0694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03851504623889923,
|
|
"step": 6115,
|
|
"valid_targets_mean": 4310.0,
|
|
"valid_targets_min": 800
|
|
},
|
|
{
|
|
"epoch": 4.98005698005698,
|
|
"grad_norm": 0.3283616246045454,
|
|
"learning_rate": 9.328453257832078e-06,
|
|
"loss": 0.0714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03032471239566803,
|
|
"step": 6120,
|
|
"valid_targets_mean": 4135.0,
|
|
"valid_targets_min": 740
|
|
},
|
|
{
|
|
"epoch": 4.984126984126984,
|
|
"grad_norm": 0.46368165314117155,
|
|
"learning_rate": 9.294155871463007e-06,
|
|
"loss": 0.0869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06997954845428467,
|
|
"step": 6125,
|
|
"valid_targets_mean": 3285.1,
|
|
"valid_targets_min": 1957
|
|
},
|
|
{
|
|
"epoch": 4.9881969881969885,
|
|
"grad_norm": 0.4185794895076128,
|
|
"learning_rate": 9.259902556224034e-06,
|
|
"loss": 0.1577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04614701494574547,
|
|
"step": 6130,
|
|
"valid_targets_mean": 3392.6,
|
|
"valid_targets_min": 1137
|
|
},
|
|
{
|
|
"epoch": 4.992266992266992,
|
|
"grad_norm": 0.5563291333666768,
|
|
"learning_rate": 9.225693453120614e-06,
|
|
"loss": 0.0786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06407204270362854,
|
|
"step": 6135,
|
|
"valid_targets_mean": 1517.8,
|
|
"valid_targets_min": 716
|
|
},
|
|
{
|
|
"epoch": 4.996336996336996,
|
|
"grad_norm": 0.38670562354030985,
|
|
"learning_rate": 9.191528702976173e-06,
|
|
"loss": 0.072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03636377677321434,
|
|
"step": 6140,
|
|
"valid_targets_mean": 3538.4,
|
|
"valid_targets_min": 886
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.4464669787231326,
|
|
"learning_rate": 9.15740844643159e-06,
|
|
"loss": 0.0996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.051706526428461075,
|
|
"step": 6145,
|
|
"valid_targets_mean": 4028.4,
|
|
"valid_targets_min": 1273
|
|
},
|
|
{
|
|
"epoch": 5.004070004070004,
|
|
"grad_norm": 0.6854109342653534,
|
|
"learning_rate": 9.123332823944552e-06,
|
|
"loss": 0.1946,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09130831062793732,
|
|
"step": 6150,
|
|
"valid_targets_mean": 7389.0,
|
|
"valid_targets_min": 5627
|
|
},
|
|
{
|
|
"epoch": 5.008140008140008,
|
|
"grad_norm": 0.6566317430199434,
|
|
"learning_rate": 9.089301975789029e-06,
|
|
"loss": 0.1854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.049035608768463135,
|
|
"step": 6155,
|
|
"valid_targets_mean": 1294.8,
|
|
"valid_targets_min": 422
|
|
},
|
|
{
|
|
"epoch": 5.012210012210012,
|
|
"grad_norm": 0.414825686168276,
|
|
"learning_rate": 9.05531604205467e-06,
|
|
"loss": 0.1759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09194067865610123,
|
|
"step": 6160,
|
|
"valid_targets_mean": 8539.9,
|
|
"valid_targets_min": 6761
|
|
},
|
|
{
|
|
"epoch": 5.0162800162800165,
|
|
"grad_norm": 0.40549825227625574,
|
|
"learning_rate": 9.021375162646233e-06,
|
|
"loss": 0.1706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08057986944913864,
|
|
"step": 6165,
|
|
"valid_targets_mean": 7579.0,
|
|
"valid_targets_min": 6174
|
|
},
|
|
{
|
|
"epoch": 5.02035002035002,
|
|
"grad_norm": 0.41714578888717524,
|
|
"learning_rate": 8.987479477282999e-06,
|
|
"loss": 0.1731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07731068134307861,
|
|
"step": 6170,
|
|
"valid_targets_mean": 7328.2,
|
|
"valid_targets_min": 4839
|
|
},
|
|
{
|
|
"epoch": 5.024420024420024,
|
|
"grad_norm": 0.40700870452137905,
|
|
"learning_rate": 8.953629125498227e-06,
|
|
"loss": 0.1813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09170807898044586,
|
|
"step": 6175,
|
|
"valid_targets_mean": 6838.2,
|
|
"valid_targets_min": 5034
|
|
},
|
|
{
|
|
"epoch": 5.028490028490029,
|
|
"grad_norm": 0.3922582234171617,
|
|
"learning_rate": 8.919824246638528e-06,
|
|
"loss": 0.1849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09518324583768845,
|
|
"step": 6180,
|
|
"valid_targets_mean": 7248.2,
|
|
"valid_targets_min": 5567
|
|
},
|
|
{
|
|
"epoch": 5.032560032560032,
|
|
"grad_norm": 0.3805986393619526,
|
|
"learning_rate": 8.886064979863334e-06,
|
|
"loss": 0.1709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07861495763063431,
|
|
"step": 6185,
|
|
"valid_targets_mean": 6234.0,
|
|
"valid_targets_min": 5603
|
|
},
|
|
{
|
|
"epoch": 5.0366300366300365,
|
|
"grad_norm": 0.6374511935157152,
|
|
"learning_rate": 8.852351464144322e-06,
|
|
"loss": 0.1626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.024300407618284225,
|
|
"step": 6190,
|
|
"valid_targets_mean": 602.0,
|
|
"valid_targets_min": 152
|
|
},
|
|
{
|
|
"epoch": 5.040700040700041,
|
|
"grad_norm": 0.40242759534056755,
|
|
"learning_rate": 8.818683838264826e-06,
|
|
"loss": 0.1559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08006134629249573,
|
|
"step": 6195,
|
|
"valid_targets_mean": 7299.9,
|
|
"valid_targets_min": 6158
|
|
},
|
|
{
|
|
"epoch": 5.044770044770045,
|
|
"grad_norm": 0.3913058263835821,
|
|
"learning_rate": 8.785062240819266e-06,
|
|
"loss": 0.1725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08000493794679642,
|
|
"step": 6200,
|
|
"valid_targets_mean": 6140.0,
|
|
"valid_targets_min": 4648
|
|
},
|
|
{
|
|
"epoch": 5.048840048840049,
|
|
"grad_norm": 0.4080986618966172,
|
|
"learning_rate": 8.751486810212599e-06,
|
|
"loss": 0.1654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08597160875797272,
|
|
"step": 6205,
|
|
"valid_targets_mean": 6603.0,
|
|
"valid_targets_min": 4882
|
|
},
|
|
{
|
|
"epoch": 5.052910052910053,
|
|
"grad_norm": 0.3727143747867664,
|
|
"learning_rate": 8.717957684659717e-06,
|
|
"loss": 0.1656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08394499868154526,
|
|
"step": 6210,
|
|
"valid_targets_mean": 8015.0,
|
|
"valid_targets_min": 6344
|
|
},
|
|
{
|
|
"epoch": 5.056980056980057,
|
|
"grad_norm": 0.36991121771348234,
|
|
"learning_rate": 8.684475002184916e-06,
|
|
"loss": 0.1607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07384654879570007,
|
|
"step": 6215,
|
|
"valid_targets_mean": 6305.2,
|
|
"valid_targets_min": 4622
|
|
},
|
|
{
|
|
"epoch": 5.061050061050061,
|
|
"grad_norm": 0.3865770710996986,
|
|
"learning_rate": 8.651038900621277e-06,
|
|
"loss": 0.1659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0872340202331543,
|
|
"step": 6220,
|
|
"valid_targets_mean": 7890.8,
|
|
"valid_targets_min": 5485
|
|
},
|
|
{
|
|
"epoch": 5.065120065120065,
|
|
"grad_norm": 0.3847284521430894,
|
|
"learning_rate": 8.617649517610148e-06,
|
|
"loss": 0.1667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07563507556915283,
|
|
"step": 6225,
|
|
"valid_targets_mean": 7016.6,
|
|
"valid_targets_min": 4960
|
|
},
|
|
{
|
|
"epoch": 5.0691900691900695,
|
|
"grad_norm": 0.4213687533775985,
|
|
"learning_rate": 8.584306990600554e-06,
|
|
"loss": 0.1648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07866060733795166,
|
|
"step": 6230,
|
|
"valid_targets_mean": 6527.1,
|
|
"valid_targets_min": 5399
|
|
},
|
|
{
|
|
"epoch": 5.073260073260073,
|
|
"grad_norm": 0.46746065430470146,
|
|
"learning_rate": 8.55101145684864e-06,
|
|
"loss": 0.1683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07950198650360107,
|
|
"step": 6235,
|
|
"valid_targets_mean": 4995.0,
|
|
"valid_targets_min": 618
|
|
},
|
|
{
|
|
"epoch": 5.077330077330077,
|
|
"grad_norm": 0.42212928764720686,
|
|
"learning_rate": 8.517763053417095e-06,
|
|
"loss": 0.1823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09315831959247589,
|
|
"step": 6240,
|
|
"valid_targets_mean": 7219.4,
|
|
"valid_targets_min": 5764
|
|
},
|
|
{
|
|
"epoch": 5.081400081400082,
|
|
"grad_norm": 0.4077166480983304,
|
|
"learning_rate": 8.484561917174592e-06,
|
|
"loss": 0.1477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08582858741283417,
|
|
"step": 6245,
|
|
"valid_targets_mean": 7998.5,
|
|
"valid_targets_min": 5902
|
|
},
|
|
{
|
|
"epoch": 5.085470085470085,
|
|
"grad_norm": 0.3787575234434601,
|
|
"learning_rate": 8.451408184795242e-06,
|
|
"loss": 0.1541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06539824604988098,
|
|
"step": 6250,
|
|
"valid_targets_mean": 7377.4,
|
|
"valid_targets_min": 5015
|
|
},
|
|
{
|
|
"epoch": 5.0895400895400895,
|
|
"grad_norm": 0.3870195711380443,
|
|
"learning_rate": 8.418301992757984e-06,
|
|
"loss": 0.164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07054895162582397,
|
|
"step": 6255,
|
|
"valid_targets_mean": 6794.0,
|
|
"valid_targets_min": 5283
|
|
},
|
|
{
|
|
"epoch": 5.093610093610094,
|
|
"grad_norm": 0.41166952386174893,
|
|
"learning_rate": 8.385243477346095e-06,
|
|
"loss": 0.1682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09239070862531662,
|
|
"step": 6260,
|
|
"valid_targets_mean": 6656.4,
|
|
"valid_targets_min": 4364
|
|
},
|
|
{
|
|
"epoch": 5.097680097680097,
|
|
"grad_norm": 0.38241917148742083,
|
|
"learning_rate": 8.352232774646545e-06,
|
|
"loss": 0.1662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07691332697868347,
|
|
"step": 6265,
|
|
"valid_targets_mean": 7178.4,
|
|
"valid_targets_min": 4448
|
|
},
|
|
{
|
|
"epoch": 5.101750101750102,
|
|
"grad_norm": 0.4136989734963614,
|
|
"learning_rate": 8.319270020549517e-06,
|
|
"loss": 0.166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0996970385313034,
|
|
"step": 6270,
|
|
"valid_targets_mean": 7619.8,
|
|
"valid_targets_min": 5737
|
|
},
|
|
{
|
|
"epoch": 5.105820105820106,
|
|
"grad_norm": 0.37122078417443183,
|
|
"learning_rate": 8.286355350747795e-06,
|
|
"loss": 0.1471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07079966366291046,
|
|
"step": 6275,
|
|
"valid_targets_mean": 8754.4,
|
|
"valid_targets_min": 5752
|
|
},
|
|
{
|
|
"epoch": 5.1098901098901095,
|
|
"grad_norm": 0.41739380473502985,
|
|
"learning_rate": 8.253488900736226e-06,
|
|
"loss": 0.148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.077818363904953,
|
|
"step": 6280,
|
|
"valid_targets_mean": 6341.0,
|
|
"valid_targets_min": 4543
|
|
},
|
|
{
|
|
"epoch": 5.113960113960114,
|
|
"grad_norm": 0.41626778838918543,
|
|
"learning_rate": 8.220670805811156e-06,
|
|
"loss": 0.1421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0813712328672409,
|
|
"step": 6285,
|
|
"valid_targets_mean": 7181.9,
|
|
"valid_targets_min": 4563
|
|
},
|
|
{
|
|
"epoch": 5.118030118030118,
|
|
"grad_norm": 0.7414749413994536,
|
|
"learning_rate": 8.187901201069878e-06,
|
|
"loss": 0.1678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.051455169916152954,
|
|
"step": 6290,
|
|
"valid_targets_mean": 1104.4,
|
|
"valid_targets_min": 161
|
|
},
|
|
{
|
|
"epoch": 5.122100122100122,
|
|
"grad_norm": 0.42212437842472816,
|
|
"learning_rate": 8.155180221410062e-06,
|
|
"loss": 0.1523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07822007685899734,
|
|
"step": 6295,
|
|
"valid_targets_mean": 7018.2,
|
|
"valid_targets_min": 5424
|
|
},
|
|
{
|
|
"epoch": 5.126170126170126,
|
|
"grad_norm": 0.43199230917096043,
|
|
"learning_rate": 8.12250800152923e-06,
|
|
"loss": 0.1716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08775204420089722,
|
|
"step": 6300,
|
|
"valid_targets_mean": 7738.4,
|
|
"valid_targets_min": 5329
|
|
},
|
|
{
|
|
"epoch": 5.13024013024013,
|
|
"grad_norm": 0.4607805236152848,
|
|
"learning_rate": 8.089884675924155e-06,
|
|
"loss": 0.1777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08426693081855774,
|
|
"step": 6305,
|
|
"valid_targets_mean": 6426.6,
|
|
"valid_targets_min": 5560
|
|
},
|
|
{
|
|
"epoch": 5.134310134310135,
|
|
"grad_norm": 0.42455220317990994,
|
|
"learning_rate": 8.057310378890362e-06,
|
|
"loss": 0.171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0837627574801445,
|
|
"step": 6310,
|
|
"valid_targets_mean": 6169.9,
|
|
"valid_targets_min": 3446
|
|
},
|
|
{
|
|
"epoch": 5.138380138380138,
|
|
"grad_norm": 0.39791893535614575,
|
|
"learning_rate": 8.024785244521528e-06,
|
|
"loss": 0.1676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08946311473846436,
|
|
"step": 6315,
|
|
"valid_targets_mean": 7204.6,
|
|
"valid_targets_min": 5381
|
|
},
|
|
{
|
|
"epoch": 5.1424501424501425,
|
|
"grad_norm": 0.3868007797054446,
|
|
"learning_rate": 7.99230940670896e-06,
|
|
"loss": 0.1696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08641095459461212,
|
|
"step": 6320,
|
|
"valid_targets_mean": 7600.4,
|
|
"valid_targets_min": 5241
|
|
},
|
|
{
|
|
"epoch": 5.146520146520147,
|
|
"grad_norm": 0.39428677674139484,
|
|
"learning_rate": 7.959882999141032e-06,
|
|
"loss": 0.1662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09060313552618027,
|
|
"step": 6325,
|
|
"valid_targets_mean": 7098.5,
|
|
"valid_targets_min": 5607
|
|
},
|
|
{
|
|
"epoch": 5.15059015059015,
|
|
"grad_norm": 0.4263247652696508,
|
|
"learning_rate": 7.92750615530264e-06,
|
|
"loss": 0.169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09046140313148499,
|
|
"step": 6330,
|
|
"valid_targets_mean": 6955.1,
|
|
"valid_targets_min": 5530
|
|
},
|
|
{
|
|
"epoch": 5.154660154660155,
|
|
"grad_norm": 0.4159005125904762,
|
|
"learning_rate": 7.895179008474634e-06,
|
|
"loss": 0.1699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08619844913482666,
|
|
"step": 6335,
|
|
"valid_targets_mean": 6748.1,
|
|
"valid_targets_min": 5364
|
|
},
|
|
{
|
|
"epoch": 5.158730158730159,
|
|
"grad_norm": 0.45512866927412166,
|
|
"learning_rate": 7.862901691733287e-06,
|
|
"loss": 0.169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0810968279838562,
|
|
"step": 6340,
|
|
"valid_targets_mean": 6273.9,
|
|
"valid_targets_min": 5438
|
|
},
|
|
{
|
|
"epoch": 5.1628001628001625,
|
|
"grad_norm": 0.4004779475142846,
|
|
"learning_rate": 7.830674337949756e-06,
|
|
"loss": 0.1653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07858355343341827,
|
|
"step": 6345,
|
|
"valid_targets_mean": 6419.5,
|
|
"valid_targets_min": 4879
|
|
},
|
|
{
|
|
"epoch": 5.166870166870167,
|
|
"grad_norm": 0.4558820682158631,
|
|
"learning_rate": 7.798497079789513e-06,
|
|
"loss": 0.1637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07956701517105103,
|
|
"step": 6350,
|
|
"valid_targets_mean": 6483.4,
|
|
"valid_targets_min": 4309
|
|
},
|
|
{
|
|
"epoch": 5.170940170940171,
|
|
"grad_norm": 0.4440243865004471,
|
|
"learning_rate": 7.76637004971182e-06,
|
|
"loss": 0.1335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09749727696180344,
|
|
"step": 6355,
|
|
"valid_targets_mean": 6825.0,
|
|
"valid_targets_min": 5205
|
|
},
|
|
{
|
|
"epoch": 5.175010175010175,
|
|
"grad_norm": 0.4204178711309292,
|
|
"learning_rate": 7.734293379969157e-06,
|
|
"loss": 0.1636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07635550945997238,
|
|
"step": 6360,
|
|
"valid_targets_mean": 6729.4,
|
|
"valid_targets_min": 5200
|
|
},
|
|
{
|
|
"epoch": 5.179080179080179,
|
|
"grad_norm": 0.42587388736681253,
|
|
"learning_rate": 7.702267202606709e-06,
|
|
"loss": 0.1615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07823633402585983,
|
|
"step": 6365,
|
|
"valid_targets_mean": 6094.0,
|
|
"valid_targets_min": 5302
|
|
},
|
|
{
|
|
"epoch": 5.183150183150183,
|
|
"grad_norm": 0.40638485768353844,
|
|
"learning_rate": 7.670291649461798e-06,
|
|
"loss": 0.1661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08494766801595688,
|
|
"step": 6370,
|
|
"valid_targets_mean": 7140.0,
|
|
"valid_targets_min": 5270
|
|
},
|
|
{
|
|
"epoch": 5.187220187220187,
|
|
"grad_norm": 0.3845786657820816,
|
|
"learning_rate": 7.638366852163348e-06,
|
|
"loss": 0.1593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07755722105503082,
|
|
"step": 6375,
|
|
"valid_targets_mean": 7425.0,
|
|
"valid_targets_min": 5281
|
|
},
|
|
{
|
|
"epoch": 5.191290191290191,
|
|
"grad_norm": 0.41277396807257505,
|
|
"learning_rate": 7.606492942131336e-06,
|
|
"loss": 0.1669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1049172580242157,
|
|
"step": 6380,
|
|
"valid_targets_mean": 7063.4,
|
|
"valid_targets_min": 5386
|
|
},
|
|
{
|
|
"epoch": 5.1953601953601956,
|
|
"grad_norm": 0.40861047918605575,
|
|
"learning_rate": 7.574670050576281e-06,
|
|
"loss": 0.1663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08464010059833527,
|
|
"step": 6385,
|
|
"valid_targets_mean": 6827.0,
|
|
"valid_targets_min": 4547
|
|
},
|
|
{
|
|
"epoch": 5.199430199430199,
|
|
"grad_norm": 0.5620066997157668,
|
|
"learning_rate": 7.542898308498663e-06,
|
|
"loss": 0.1451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.038255929946899414,
|
|
"step": 6390,
|
|
"valid_targets_mean": 2047.0,
|
|
"valid_targets_min": 165
|
|
},
|
|
{
|
|
"epoch": 5.203500203500203,
|
|
"grad_norm": 0.44632121984559203,
|
|
"learning_rate": 7.511177846688413e-06,
|
|
"loss": 0.1472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07810096442699432,
|
|
"step": 6395,
|
|
"valid_targets_mean": 6754.1,
|
|
"valid_targets_min": 5196
|
|
},
|
|
{
|
|
"epoch": 5.207570207570208,
|
|
"grad_norm": 0.4574803118753031,
|
|
"learning_rate": 7.479508795724361e-06,
|
|
"loss": 0.1603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07977963984012604,
|
|
"step": 6400,
|
|
"valid_targets_mean": 5985.6,
|
|
"valid_targets_min": 5028
|
|
},
|
|
{
|
|
"epoch": 5.211640211640212,
|
|
"grad_norm": 0.42827249479040463,
|
|
"learning_rate": 7.447891285973705e-06,
|
|
"loss": 0.1663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07478997111320496,
|
|
"step": 6405,
|
|
"valid_targets_mean": 5963.9,
|
|
"valid_targets_min": 5237
|
|
},
|
|
{
|
|
"epoch": 5.2157102157102155,
|
|
"grad_norm": 0.4401945100045424,
|
|
"learning_rate": 7.416325447591468e-06,
|
|
"loss": 0.1622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08579155802726746,
|
|
"step": 6410,
|
|
"valid_targets_mean": 6319.0,
|
|
"valid_targets_min": 5412
|
|
},
|
|
{
|
|
"epoch": 5.21978021978022,
|
|
"grad_norm": 0.3872584192038548,
|
|
"learning_rate": 7.384811410519961e-06,
|
|
"loss": 0.1535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06653904169797897,
|
|
"step": 6415,
|
|
"valid_targets_mean": 7125.8,
|
|
"valid_targets_min": 5080
|
|
},
|
|
{
|
|
"epoch": 5.223850223850224,
|
|
"grad_norm": 0.4118913981470542,
|
|
"learning_rate": 7.353349304488251e-06,
|
|
"loss": 0.1641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07929787784814835,
|
|
"step": 6420,
|
|
"valid_targets_mean": 6123.5,
|
|
"valid_targets_min": 4521
|
|
},
|
|
{
|
|
"epoch": 5.227920227920228,
|
|
"grad_norm": 0.8933632330027761,
|
|
"learning_rate": 7.321939259011639e-06,
|
|
"loss": 0.1614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09677982330322266,
|
|
"step": 6425,
|
|
"valid_targets_mean": 1886.0,
|
|
"valid_targets_min": 1111
|
|
},
|
|
{
|
|
"epoch": 5.231990231990232,
|
|
"grad_norm": 0.7717354209997601,
|
|
"learning_rate": 7.29058140339111e-06,
|
|
"loss": 0.1527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07828053832054138,
|
|
"step": 6430,
|
|
"valid_targets_mean": 1497.6,
|
|
"valid_targets_min": 893
|
|
},
|
|
{
|
|
"epoch": 5.236060236060236,
|
|
"grad_norm": 0.7156196156160548,
|
|
"learning_rate": 7.259275866712812e-06,
|
|
"loss": 0.1394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05080351233482361,
|
|
"step": 6435,
|
|
"valid_targets_mean": 1356.0,
|
|
"valid_targets_min": 914
|
|
},
|
|
{
|
|
"epoch": 5.24013024013024,
|
|
"grad_norm": 0.7775879541195082,
|
|
"learning_rate": 7.22802277784751e-06,
|
|
"loss": 0.1354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05790482088923454,
|
|
"step": 6440,
|
|
"valid_targets_mean": 1324.5,
|
|
"valid_targets_min": 680
|
|
},
|
|
{
|
|
"epoch": 5.244200244200244,
|
|
"grad_norm": 0.7428014404555288,
|
|
"learning_rate": 7.196822265450079e-06,
|
|
"loss": 0.1342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06787339597940445,
|
|
"step": 6445,
|
|
"valid_targets_mean": 1931.8,
|
|
"valid_targets_min": 876
|
|
},
|
|
{
|
|
"epoch": 5.248270248270249,
|
|
"grad_norm": 0.8246845467297721,
|
|
"learning_rate": 7.165674457958938e-06,
|
|
"loss": 0.1319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05890588089823723,
|
|
"step": 6450,
|
|
"valid_targets_mean": 1292.2,
|
|
"valid_targets_min": 734
|
|
},
|
|
{
|
|
"epoch": 5.252340252340252,
|
|
"grad_norm": 0.7747801015256733,
|
|
"learning_rate": 7.134579483595574e-06,
|
|
"loss": 0.1396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05581791326403618,
|
|
"step": 6455,
|
|
"valid_targets_mean": 1469.6,
|
|
"valid_targets_min": 655
|
|
},
|
|
{
|
|
"epoch": 5.256410256410256,
|
|
"grad_norm": 0.7889248714375038,
|
|
"learning_rate": 7.10353747036395e-06,
|
|
"loss": 0.1197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07032492756843567,
|
|
"step": 6460,
|
|
"valid_targets_mean": 1548.4,
|
|
"valid_targets_min": 737
|
|
},
|
|
{
|
|
"epoch": 5.260480260480261,
|
|
"grad_norm": 0.7576401372568481,
|
|
"learning_rate": 7.072548546050038e-06,
|
|
"loss": 0.1395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.056093454360961914,
|
|
"step": 6465,
|
|
"valid_targets_mean": 1316.5,
|
|
"valid_targets_min": 759
|
|
},
|
|
{
|
|
"epoch": 5.264550264550264,
|
|
"grad_norm": 0.8148494000497493,
|
|
"learning_rate": 7.041612838221257e-06,
|
|
"loss": 0.1288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0493401475250721,
|
|
"step": 6470,
|
|
"valid_targets_mean": 1249.6,
|
|
"valid_targets_min": 753
|
|
},
|
|
{
|
|
"epoch": 5.2686202686202686,
|
|
"grad_norm": 0.8821896425064455,
|
|
"learning_rate": 7.010730474225958e-06,
|
|
"loss": 0.131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0843823179602623,
|
|
"step": 6475,
|
|
"valid_targets_mean": 1833.0,
|
|
"valid_targets_min": 927
|
|
},
|
|
{
|
|
"epoch": 5.272690272690273,
|
|
"grad_norm": 0.8128938462953682,
|
|
"learning_rate": 6.979901581192903e-06,
|
|
"loss": 0.1297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06127440556883812,
|
|
"step": 6480,
|
|
"valid_targets_mean": 1338.4,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 5.276760276760276,
|
|
"grad_norm": 0.8516276560604461,
|
|
"learning_rate": 6.949126286030739e-06,
|
|
"loss": 0.1313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06296617537736893,
|
|
"step": 6485,
|
|
"valid_targets_mean": 1244.6,
|
|
"valid_targets_min": 771
|
|
},
|
|
{
|
|
"epoch": 5.280830280830281,
|
|
"grad_norm": 0.7641818533358684,
|
|
"learning_rate": 6.91840471542746e-06,
|
|
"loss": 0.1159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05841711908578873,
|
|
"step": 6490,
|
|
"valid_targets_mean": 1732.5,
|
|
"valid_targets_min": 986
|
|
},
|
|
{
|
|
"epoch": 5.284900284900285,
|
|
"grad_norm": 0.7751581720642792,
|
|
"learning_rate": 6.887736995849925e-06,
|
|
"loss": 0.1226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.054849620908498764,
|
|
"step": 6495,
|
|
"valid_targets_mean": 1389.9,
|
|
"valid_targets_min": 1103
|
|
},
|
|
{
|
|
"epoch": 5.2889702889702885,
|
|
"grad_norm": 0.8647625768666302,
|
|
"learning_rate": 6.857123253543286e-06,
|
|
"loss": 0.1249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06798947602510452,
|
|
"step": 6500,
|
|
"valid_targets_mean": 1593.2,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 5.293040293040293,
|
|
"grad_norm": 0.8101312346504578,
|
|
"learning_rate": 6.826563614530511e-06,
|
|
"loss": 0.131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07953788340091705,
|
|
"step": 6505,
|
|
"valid_targets_mean": 1926.6,
|
|
"valid_targets_min": 974
|
|
},
|
|
{
|
|
"epoch": 5.297110297110297,
|
|
"grad_norm": 0.8479150840387949,
|
|
"learning_rate": 6.7960582046118505e-06,
|
|
"loss": 0.1282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07549211382865906,
|
|
"step": 6510,
|
|
"valid_targets_mean": 1953.1,
|
|
"valid_targets_min": 869
|
|
},
|
|
{
|
|
"epoch": 5.301180301180302,
|
|
"grad_norm": 0.7598838779769721,
|
|
"learning_rate": 6.765607149364313e-06,
|
|
"loss": 0.1302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.053766507655382156,
|
|
"step": 6515,
|
|
"valid_targets_mean": 1327.2,
|
|
"valid_targets_min": 1023
|
|
},
|
|
{
|
|
"epoch": 5.305250305250305,
|
|
"grad_norm": 1.6475991371156928,
|
|
"learning_rate": 6.735210574141158e-06,
|
|
"loss": 0.1296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06451068818569183,
|
|
"step": 6520,
|
|
"valid_targets_mean": 1717.9,
|
|
"valid_targets_min": 709
|
|
},
|
|
{
|
|
"epoch": 5.309320309320309,
|
|
"grad_norm": 0.8012862936876622,
|
|
"learning_rate": 6.704868604071362e-06,
|
|
"loss": 0.1261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05108807235956192,
|
|
"step": 6525,
|
|
"valid_targets_mean": 1139.9,
|
|
"valid_targets_min": 658
|
|
},
|
|
{
|
|
"epoch": 5.313390313390314,
|
|
"grad_norm": 1.10291965479662,
|
|
"learning_rate": 6.674581364059138e-06,
|
|
"loss": 0.1276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06254150718450546,
|
|
"step": 6530,
|
|
"valid_targets_mean": 1444.8,
|
|
"valid_targets_min": 662
|
|
},
|
|
{
|
|
"epoch": 5.317460317460317,
|
|
"grad_norm": 0.7975798780270608,
|
|
"learning_rate": 6.644348978783375e-06,
|
|
"loss": 0.1248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.045557815581560135,
|
|
"step": 6535,
|
|
"valid_targets_mean": 1242.2,
|
|
"valid_targets_min": 859
|
|
},
|
|
{
|
|
"epoch": 5.321530321530322,
|
|
"grad_norm": 0.808432619410068,
|
|
"learning_rate": 6.614171572697172e-06,
|
|
"loss": 0.1139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06855520606040955,
|
|
"step": 6540,
|
|
"valid_targets_mean": 1569.2,
|
|
"valid_targets_min": 950
|
|
},
|
|
{
|
|
"epoch": 5.325600325600326,
|
|
"grad_norm": 0.8165752644870131,
|
|
"learning_rate": 6.584049270027291e-06,
|
|
"loss": 0.1206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.058723099529743195,
|
|
"step": 6545,
|
|
"valid_targets_mean": 1539.8,
|
|
"valid_targets_min": 686
|
|
},
|
|
{
|
|
"epoch": 5.329670329670329,
|
|
"grad_norm": 0.8800699749983475,
|
|
"learning_rate": 6.553982194773663e-06,
|
|
"loss": 0.1238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07176946103572845,
|
|
"step": 6550,
|
|
"valid_targets_mean": 1610.6,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 5.333740333740334,
|
|
"grad_norm": 0.7914141839534132,
|
|
"learning_rate": 6.523970470708874e-06,
|
|
"loss": 0.1199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04290122538805008,
|
|
"step": 6555,
|
|
"valid_targets_mean": 1190.4,
|
|
"valid_targets_min": 779
|
|
},
|
|
{
|
|
"epoch": 5.337810337810338,
|
|
"grad_norm": 0.8735942419573629,
|
|
"learning_rate": 6.494014221377654e-06,
|
|
"loss": 0.1315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06523449718952179,
|
|
"step": 6560,
|
|
"valid_targets_mean": 1467.1,
|
|
"valid_targets_min": 822
|
|
},
|
|
{
|
|
"epoch": 5.3418803418803416,
|
|
"grad_norm": 0.7904142925153533,
|
|
"learning_rate": 6.4641135700963555e-06,
|
|
"loss": 0.1188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0542932003736496,
|
|
"step": 6565,
|
|
"valid_targets_mean": 1350.5,
|
|
"valid_targets_min": 868
|
|
},
|
|
{
|
|
"epoch": 5.345950345950346,
|
|
"grad_norm": 0.8188017954142414,
|
|
"learning_rate": 6.434268639952482e-06,
|
|
"loss": 0.1246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.052746959030628204,
|
|
"step": 6570,
|
|
"valid_targets_mean": 1283.5,
|
|
"valid_targets_min": 781
|
|
},
|
|
{
|
|
"epoch": 5.35002035002035,
|
|
"grad_norm": 0.8883502243806306,
|
|
"learning_rate": 6.4044795538041325e-06,
|
|
"loss": 0.1202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.052638888359069824,
|
|
"step": 6575,
|
|
"valid_targets_mean": 1303.2,
|
|
"valid_targets_min": 793
|
|
},
|
|
{
|
|
"epoch": 5.354090354090354,
|
|
"grad_norm": 0.7346427480548663,
|
|
"learning_rate": 6.374746434279542e-06,
|
|
"loss": 0.1218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0480545237660408,
|
|
"step": 6580,
|
|
"valid_targets_mean": 1309.1,
|
|
"valid_targets_min": 806
|
|
},
|
|
{
|
|
"epoch": 5.358160358160358,
|
|
"grad_norm": 0.7749262088030695,
|
|
"learning_rate": 6.345069403776547e-06,
|
|
"loss": 0.1199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05063071846961975,
|
|
"step": 6585,
|
|
"valid_targets_mean": 1397.6,
|
|
"valid_targets_min": 803
|
|
},
|
|
{
|
|
"epoch": 5.362230362230362,
|
|
"grad_norm": 0.8904924291717923,
|
|
"learning_rate": 6.3154485844620935e-06,
|
|
"loss": 0.1202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.045979805290699005,
|
|
"step": 6590,
|
|
"valid_targets_mean": 1187.8,
|
|
"valid_targets_min": 660
|
|
},
|
|
{
|
|
"epoch": 5.366300366300366,
|
|
"grad_norm": 0.8134025550512829,
|
|
"learning_rate": 6.285884098271739e-06,
|
|
"loss": 0.1097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05374240130186081,
|
|
"step": 6595,
|
|
"valid_targets_mean": 1547.0,
|
|
"valid_targets_min": 853
|
|
},
|
|
{
|
|
"epoch": 5.37037037037037,
|
|
"grad_norm": 0.8806055042632812,
|
|
"learning_rate": 6.25637606690912e-06,
|
|
"loss": 0.122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06583143770694733,
|
|
"step": 6600,
|
|
"valid_targets_mean": 1704.2,
|
|
"valid_targets_min": 719
|
|
},
|
|
{
|
|
"epoch": 5.374440374440375,
|
|
"grad_norm": 0.9022742278146144,
|
|
"learning_rate": 6.226924611845495e-06,
|
|
"loss": 0.1209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04527607560157776,
|
|
"step": 6605,
|
|
"valid_targets_mean": 1255.0,
|
|
"valid_targets_min": 765
|
|
},
|
|
{
|
|
"epoch": 5.378510378510379,
|
|
"grad_norm": 0.8451608246424978,
|
|
"learning_rate": 6.197529854319222e-06,
|
|
"loss": 0.1181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06376711279153824,
|
|
"step": 6610,
|
|
"valid_targets_mean": 1550.8,
|
|
"valid_targets_min": 875
|
|
},
|
|
{
|
|
"epoch": 5.382580382580382,
|
|
"grad_norm": 0.8477923753137665,
|
|
"learning_rate": 6.168191915335242e-06,
|
|
"loss": 0.1162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06021396815776825,
|
|
"step": 6615,
|
|
"valid_targets_mean": 1362.0,
|
|
"valid_targets_min": 739
|
|
},
|
|
{
|
|
"epoch": 5.386650386650387,
|
|
"grad_norm": 0.8811854153496774,
|
|
"learning_rate": 6.138910915664624e-06,
|
|
"loss": 0.1238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.069005087018013,
|
|
"step": 6620,
|
|
"valid_targets_mean": 1666.8,
|
|
"valid_targets_min": 760
|
|
},
|
|
{
|
|
"epoch": 5.390720390720391,
|
|
"grad_norm": 0.8205504832118758,
|
|
"learning_rate": 6.109686975844029e-06,
|
|
"loss": 0.1173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05043880641460419,
|
|
"step": 6625,
|
|
"valid_targets_mean": 1319.6,
|
|
"valid_targets_min": 1046
|
|
},
|
|
{
|
|
"epoch": 5.394790394790395,
|
|
"grad_norm": 1.1123497884965143,
|
|
"learning_rate": 6.080520216175236e-06,
|
|
"loss": 0.1249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06800902634859085,
|
|
"step": 6630,
|
|
"valid_targets_mean": 1727.9,
|
|
"valid_targets_min": 851
|
|
},
|
|
{
|
|
"epoch": 5.398860398860399,
|
|
"grad_norm": 0.8955095197538415,
|
|
"learning_rate": 6.051410756724638e-06,
|
|
"loss": 0.1276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06400962918996811,
|
|
"step": 6635,
|
|
"valid_targets_mean": 1643.2,
|
|
"valid_targets_min": 907
|
|
},
|
|
{
|
|
"epoch": 5.402930402930403,
|
|
"grad_norm": 0.867837643579163,
|
|
"learning_rate": 6.022358717322734e-06,
|
|
"loss": 0.135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05850240960717201,
|
|
"step": 6640,
|
|
"valid_targets_mean": 1478.5,
|
|
"valid_targets_min": 764
|
|
},
|
|
{
|
|
"epoch": 5.407000407000407,
|
|
"grad_norm": 0.7621902905404546,
|
|
"learning_rate": 5.993364217563671e-06,
|
|
"loss": 0.1176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05375190079212189,
|
|
"step": 6645,
|
|
"valid_targets_mean": 1522.6,
|
|
"valid_targets_min": 822
|
|
},
|
|
{
|
|
"epoch": 5.411070411070411,
|
|
"grad_norm": 0.8033822439060038,
|
|
"learning_rate": 5.964427376804726e-06,
|
|
"loss": 0.1177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06127481535077095,
|
|
"step": 6650,
|
|
"valid_targets_mean": 1589.0,
|
|
"valid_targets_min": 982
|
|
},
|
|
{
|
|
"epoch": 5.415140415140415,
|
|
"grad_norm": 0.7789522955304922,
|
|
"learning_rate": 5.935548314165809e-06,
|
|
"loss": 0.1247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05730774253606796,
|
|
"step": 6655,
|
|
"valid_targets_mean": 1574.8,
|
|
"valid_targets_min": 816
|
|
},
|
|
{
|
|
"epoch": 5.419210419210419,
|
|
"grad_norm": 0.7825880008149688,
|
|
"learning_rate": 5.9067271485289945e-06,
|
|
"loss": 0.1141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05469139292836189,
|
|
"step": 6660,
|
|
"valid_targets_mean": 1422.0,
|
|
"valid_targets_min": 721
|
|
},
|
|
{
|
|
"epoch": 5.423280423280423,
|
|
"grad_norm": 0.8034253037962921,
|
|
"learning_rate": 5.877963998538019e-06,
|
|
"loss": 0.1138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04614481329917908,
|
|
"step": 6665,
|
|
"valid_targets_mean": 1150.6,
|
|
"valid_targets_min": 741
|
|
},
|
|
{
|
|
"epoch": 5.427350427350428,
|
|
"grad_norm": 0.8550910565715886,
|
|
"learning_rate": 5.849258982597801e-06,
|
|
"loss": 0.1183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06886758655309677,
|
|
"step": 6670,
|
|
"valid_targets_mean": 1684.2,
|
|
"valid_targets_min": 734
|
|
},
|
|
{
|
|
"epoch": 5.431420431420431,
|
|
"grad_norm": 0.9381725661390383,
|
|
"learning_rate": 5.820612218873927e-06,
|
|
"loss": 0.1204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07469278573989868,
|
|
"step": 6675,
|
|
"valid_targets_mean": 1937.1,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 5.435490435490435,
|
|
"grad_norm": 0.8330098856323024,
|
|
"learning_rate": 5.792023825292201e-06,
|
|
"loss": 0.1207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06777373701334,
|
|
"step": 6680,
|
|
"valid_targets_mean": 1842.2,
|
|
"valid_targets_min": 620
|
|
},
|
|
{
|
|
"epoch": 5.43956043956044,
|
|
"grad_norm": 0.8473418521641808,
|
|
"learning_rate": 5.763493919538154e-06,
|
|
"loss": 0.1161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06759806722402573,
|
|
"step": 6685,
|
|
"valid_targets_mean": 1637.6,
|
|
"valid_targets_min": 785
|
|
},
|
|
{
|
|
"epoch": 5.443630443630443,
|
|
"grad_norm": 0.8280879497111717,
|
|
"learning_rate": 5.735022619056521e-06,
|
|
"loss": 0.1206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06591871380805969,
|
|
"step": 6690,
|
|
"valid_targets_mean": 1969.9,
|
|
"valid_targets_min": 740
|
|
},
|
|
{
|
|
"epoch": 5.447700447700448,
|
|
"grad_norm": 0.8139854030624378,
|
|
"learning_rate": 5.706610041050806e-06,
|
|
"loss": 0.1152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05533464252948761,
|
|
"step": 6695,
|
|
"valid_targets_mean": 1432.1,
|
|
"valid_targets_min": 711
|
|
},
|
|
{
|
|
"epoch": 5.451770451770452,
|
|
"grad_norm": 0.8118262581872678,
|
|
"learning_rate": 5.678256302482772e-06,
|
|
"loss": 0.1224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06479828804731369,
|
|
"step": 6700,
|
|
"valid_targets_mean": 1695.6,
|
|
"valid_targets_min": 1122
|
|
},
|
|
{
|
|
"epoch": 5.455840455840455,
|
|
"grad_norm": 0.7908183689246445,
|
|
"learning_rate": 5.6499615200719735e-06,
|
|
"loss": 0.1174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06296779215335846,
|
|
"step": 6705,
|
|
"valid_targets_mean": 1524.5,
|
|
"valid_targets_min": 722
|
|
},
|
|
{
|
|
"epoch": 5.45991045991046,
|
|
"grad_norm": 0.8157309366372941,
|
|
"learning_rate": 5.621725810295264e-06,
|
|
"loss": 0.1128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05387778580188751,
|
|
"step": 6710,
|
|
"valid_targets_mean": 1289.2,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 5.463980463980464,
|
|
"grad_norm": 0.7987860552011007,
|
|
"learning_rate": 5.593549289386315e-06,
|
|
"loss": 0.1107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.055739592760801315,
|
|
"step": 6715,
|
|
"valid_targets_mean": 1519.5,
|
|
"valid_targets_min": 1011
|
|
},
|
|
{
|
|
"epoch": 5.4680504680504685,
|
|
"grad_norm": 0.8385843690819885,
|
|
"learning_rate": 5.565432073335153e-06,
|
|
"loss": 0.1237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0668364018201828,
|
|
"step": 6720,
|
|
"valid_targets_mean": 1731.4,
|
|
"valid_targets_min": 1277
|
|
},
|
|
{
|
|
"epoch": 5.472120472120472,
|
|
"grad_norm": 0.7910981905181974,
|
|
"learning_rate": 5.537374277887677e-06,
|
|
"loss": 0.1143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05977032706141472,
|
|
"step": 6725,
|
|
"valid_targets_mean": 1486.1,
|
|
"valid_targets_min": 807
|
|
},
|
|
{
|
|
"epoch": 5.476190476190476,
|
|
"grad_norm": 0.9696747865228715,
|
|
"learning_rate": 5.509376018545161e-06,
|
|
"loss": 0.1154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.057927779853343964,
|
|
"step": 6730,
|
|
"valid_targets_mean": 1549.9,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 5.480260480260481,
|
|
"grad_norm": 0.8421335406537752,
|
|
"learning_rate": 5.481437410563813e-06,
|
|
"loss": 0.1212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.060152310878038406,
|
|
"step": 6735,
|
|
"valid_targets_mean": 1656.0,
|
|
"valid_targets_min": 1294
|
|
},
|
|
{
|
|
"epoch": 5.484330484330484,
|
|
"grad_norm": 0.7856036336209856,
|
|
"learning_rate": 5.4535585689542735e-06,
|
|
"loss": 0.118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.054788943380117416,
|
|
"step": 6740,
|
|
"valid_targets_mean": 1368.8,
|
|
"valid_targets_min": 790
|
|
},
|
|
{
|
|
"epoch": 5.488400488400488,
|
|
"grad_norm": 0.827864917037226,
|
|
"learning_rate": 5.4257396084811665e-06,
|
|
"loss": 0.1177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06671278178691864,
|
|
"step": 6745,
|
|
"valid_targets_mean": 1765.8,
|
|
"valid_targets_min": 592
|
|
},
|
|
{
|
|
"epoch": 5.492470492470493,
|
|
"grad_norm": 0.8303973263376846,
|
|
"learning_rate": 5.397980643662586e-06,
|
|
"loss": 0.1178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06511053442955017,
|
|
"step": 6750,
|
|
"valid_targets_mean": 1586.8,
|
|
"valid_targets_min": 662
|
|
},
|
|
{
|
|
"epoch": 5.496540496540496,
|
|
"grad_norm": 0.8780460301609091,
|
|
"learning_rate": 5.370281788769673e-06,
|
|
"loss": 0.1163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05283388867974281,
|
|
"step": 6755,
|
|
"valid_targets_mean": 1336.9,
|
|
"valid_targets_min": 622
|
|
},
|
|
{
|
|
"epoch": 5.500610500610501,
|
|
"grad_norm": 0.8240875330044186,
|
|
"learning_rate": 5.342643157826117e-06,
|
|
"loss": 0.1131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03851066157221794,
|
|
"step": 6760,
|
|
"valid_targets_mean": 1097.1,
|
|
"valid_targets_min": 625
|
|
},
|
|
{
|
|
"epoch": 5.504680504680505,
|
|
"grad_norm": 0.8739039631522074,
|
|
"learning_rate": 5.315064864607695e-06,
|
|
"loss": 0.1256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05119139701128006,
|
|
"step": 6765,
|
|
"valid_targets_mean": 1484.9,
|
|
"valid_targets_min": 888
|
|
},
|
|
{
|
|
"epoch": 5.508750508750508,
|
|
"grad_norm": 0.8003068896738135,
|
|
"learning_rate": 5.287547022641788e-06,
|
|
"loss": 0.1228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06188352033495903,
|
|
"step": 6770,
|
|
"valid_targets_mean": 1757.0,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 5.512820512820513,
|
|
"grad_norm": 0.927599280836426,
|
|
"learning_rate": 5.260089745206942e-06,
|
|
"loss": 0.118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06444002687931061,
|
|
"step": 6775,
|
|
"valid_targets_mean": 1476.1,
|
|
"valid_targets_min": 522
|
|
},
|
|
{
|
|
"epoch": 5.516890516890517,
|
|
"grad_norm": 0.824485847569823,
|
|
"learning_rate": 5.232693145332379e-06,
|
|
"loss": 0.1168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04994414374232292,
|
|
"step": 6780,
|
|
"valid_targets_mean": 1230.9,
|
|
"valid_targets_min": 697
|
|
},
|
|
{
|
|
"epoch": 5.520960520960521,
|
|
"grad_norm": 0.8214091658820951,
|
|
"learning_rate": 5.205357335797545e-06,
|
|
"loss": 0.1103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06151646375656128,
|
|
"step": 6785,
|
|
"valid_targets_mean": 1671.0,
|
|
"valid_targets_min": 1010
|
|
},
|
|
{
|
|
"epoch": 5.525030525030525,
|
|
"grad_norm": 0.880824287835339,
|
|
"learning_rate": 5.178082429131628e-06,
|
|
"loss": 0.1122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.055703192949295044,
|
|
"step": 6790,
|
|
"valid_targets_mean": 1411.0,
|
|
"valid_targets_min": 769
|
|
},
|
|
{
|
|
"epoch": 5.529100529100529,
|
|
"grad_norm": 0.8856311775898162,
|
|
"learning_rate": 5.150868537613114e-06,
|
|
"loss": 0.1171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04835759103298187,
|
|
"step": 6795,
|
|
"valid_targets_mean": 1298.6,
|
|
"valid_targets_min": 745
|
|
},
|
|
{
|
|
"epoch": 5.533170533170534,
|
|
"grad_norm": 0.8530843893172676,
|
|
"learning_rate": 5.123715773269318e-06,
|
|
"loss": 0.1102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.043554842472076416,
|
|
"step": 6800,
|
|
"valid_targets_mean": 1118.9,
|
|
"valid_targets_min": 680
|
|
},
|
|
{
|
|
"epoch": 5.537240537240537,
|
|
"grad_norm": 0.8234954694495478,
|
|
"learning_rate": 5.096624247875925e-06,
|
|
"loss": 0.1146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07579924166202545,
|
|
"step": 6805,
|
|
"valid_targets_mean": 1905.9,
|
|
"valid_targets_min": 1347
|
|
},
|
|
{
|
|
"epoch": 5.5413105413105415,
|
|
"grad_norm": 0.9238596830777893,
|
|
"learning_rate": 5.069594072956512e-06,
|
|
"loss": 0.1211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08299441635608673,
|
|
"step": 6810,
|
|
"valid_targets_mean": 1636.0,
|
|
"valid_targets_min": 851
|
|
},
|
|
{
|
|
"epoch": 5.545380545380546,
|
|
"grad_norm": 0.8280105418368178,
|
|
"learning_rate": 5.042625359782118e-06,
|
|
"loss": 0.1138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04978195205330849,
|
|
"step": 6815,
|
|
"valid_targets_mean": 1457.2,
|
|
"valid_targets_min": 903
|
|
},
|
|
{
|
|
"epoch": 5.549450549450549,
|
|
"grad_norm": 0.8327669041192984,
|
|
"learning_rate": 5.015718219370775e-06,
|
|
"loss": 0.1146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06120377779006958,
|
|
"step": 6820,
|
|
"valid_targets_mean": 1674.4,
|
|
"valid_targets_min": 1240
|
|
},
|
|
{
|
|
"epoch": 5.553520553520554,
|
|
"grad_norm": 0.6091529761830878,
|
|
"learning_rate": 4.988872762487029e-06,
|
|
"loss": 0.1153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.047283854335546494,
|
|
"step": 6825,
|
|
"valid_targets_mean": 1697.1,
|
|
"valid_targets_min": 917
|
|
},
|
|
{
|
|
"epoch": 5.557590557590558,
|
|
"grad_norm": 0.7831969192018885,
|
|
"learning_rate": 4.962089099641518e-06,
|
|
"loss": 0.1051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05221349745988846,
|
|
"step": 6830,
|
|
"valid_targets_mean": 1580.9,
|
|
"valid_targets_min": 930
|
|
},
|
|
{
|
|
"epoch": 5.561660561660561,
|
|
"grad_norm": 0.784999322544449,
|
|
"learning_rate": 4.935367341090498e-06,
|
|
"loss": 0.1161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.059659071266651154,
|
|
"step": 6835,
|
|
"valid_targets_mean": 1655.1,
|
|
"valid_targets_min": 1227
|
|
},
|
|
{
|
|
"epoch": 5.565730565730566,
|
|
"grad_norm": 0.8561304848098958,
|
|
"learning_rate": 4.908707596835396e-06,
|
|
"loss": 0.1101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05701415240764618,
|
|
"step": 6840,
|
|
"valid_targets_mean": 1565.8,
|
|
"valid_targets_min": 870
|
|
},
|
|
{
|
|
"epoch": 5.56980056980057,
|
|
"grad_norm": 0.8237448435276692,
|
|
"learning_rate": 4.882109976622353e-06,
|
|
"loss": 0.114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05764477699995041,
|
|
"step": 6845,
|
|
"valid_targets_mean": 1564.6,
|
|
"valid_targets_min": 822
|
|
},
|
|
{
|
|
"epoch": 5.573870573870574,
|
|
"grad_norm": 0.9077557622146745,
|
|
"learning_rate": 4.855574589941763e-06,
|
|
"loss": 0.1199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05498945713043213,
|
|
"step": 6850,
|
|
"valid_targets_mean": 1540.5,
|
|
"valid_targets_min": 1214
|
|
},
|
|
{
|
|
"epoch": 5.577940577940578,
|
|
"grad_norm": 0.8208304804405712,
|
|
"learning_rate": 4.829101546027843e-06,
|
|
"loss": 0.1097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05034732073545456,
|
|
"step": 6855,
|
|
"valid_targets_mean": 1268.6,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 5.582010582010582,
|
|
"grad_norm": 0.8341724291260191,
|
|
"learning_rate": 4.80269095385818e-06,
|
|
"loss": 0.1156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.045787129551172256,
|
|
"step": 6860,
|
|
"valid_targets_mean": 1232.5,
|
|
"valid_targets_min": 646
|
|
},
|
|
{
|
|
"epoch": 5.586080586080586,
|
|
"grad_norm": 0.6220953592726438,
|
|
"learning_rate": 4.776342922153252e-06,
|
|
"loss": 0.1074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04945782572031021,
|
|
"step": 6865,
|
|
"valid_targets_mean": 3032.8,
|
|
"valid_targets_min": 2115
|
|
},
|
|
{
|
|
"epoch": 5.59015059015059,
|
|
"grad_norm": 0.514602989750909,
|
|
"learning_rate": 4.750057559376027e-06,
|
|
"loss": 0.0989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03673814609646797,
|
|
"step": 6870,
|
|
"valid_targets_mean": 2553.1,
|
|
"valid_targets_min": 324
|
|
},
|
|
{
|
|
"epoch": 5.5942205942205945,
|
|
"grad_norm": 0.4451969667171798,
|
|
"learning_rate": 4.72383497373148e-06,
|
|
"loss": 0.0786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04091775789856911,
|
|
"step": 6875,
|
|
"valid_targets_mean": 2212.5,
|
|
"valid_targets_min": 982
|
|
},
|
|
{
|
|
"epoch": 5.598290598290598,
|
|
"grad_norm": 0.6233305674965636,
|
|
"learning_rate": 4.6976752731661755e-06,
|
|
"loss": 0.0736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.042984090745449066,
|
|
"step": 6880,
|
|
"valid_targets_mean": 3619.4,
|
|
"valid_targets_min": 2479
|
|
},
|
|
{
|
|
"epoch": 5.602360602360602,
|
|
"grad_norm": 0.3301906624858536,
|
|
"learning_rate": 4.671578565367783e-06,
|
|
"loss": 0.0838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.027935273945331573,
|
|
"step": 6885,
|
|
"valid_targets_mean": 3030.5,
|
|
"valid_targets_min": 811
|
|
},
|
|
{
|
|
"epoch": 5.606430606430607,
|
|
"grad_norm": 0.3998270887471306,
|
|
"learning_rate": 4.645544957764683e-06,
|
|
"loss": 0.0766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.041048966348171234,
|
|
"step": 6890,
|
|
"valid_targets_mean": 2922.1,
|
|
"valid_targets_min": 1431
|
|
},
|
|
{
|
|
"epoch": 5.61050061050061,
|
|
"grad_norm": 0.7362755403982966,
|
|
"learning_rate": 4.619574557525497e-06,
|
|
"loss": 0.1112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09079274535179138,
|
|
"step": 6895,
|
|
"valid_targets_mean": 1991.0,
|
|
"valid_targets_min": 703
|
|
},
|
|
{
|
|
"epoch": 5.6145706145706145,
|
|
"grad_norm": 0.3819808054209917,
|
|
"learning_rate": 4.5936674715586335e-06,
|
|
"loss": 0.0923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.045745231211185455,
|
|
"step": 6900,
|
|
"valid_targets_mean": 3251.1,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 5.618640618640619,
|
|
"grad_norm": 0.4913669321147579,
|
|
"learning_rate": 4.567823806511882e-06,
|
|
"loss": 0.0844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0513700470328331,
|
|
"step": 6905,
|
|
"valid_targets_mean": 2681.1,
|
|
"valid_targets_min": 846
|
|
},
|
|
{
|
|
"epoch": 5.622710622710622,
|
|
"grad_norm": 0.46623010414455635,
|
|
"learning_rate": 4.542043668771956e-06,
|
|
"loss": 0.1154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05827459692955017,
|
|
"step": 6910,
|
|
"valid_targets_mean": 3618.6,
|
|
"valid_targets_min": 1369
|
|
},
|
|
{
|
|
"epoch": 5.626780626780627,
|
|
"grad_norm": 0.40350792589628737,
|
|
"learning_rate": 4.516327164464045e-06,
|
|
"loss": 0.0608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03749353438615799,
|
|
"step": 6915,
|
|
"valid_targets_mean": 2980.5,
|
|
"valid_targets_min": 754
|
|
},
|
|
{
|
|
"epoch": 5.630850630850631,
|
|
"grad_norm": 0.44644536414753183,
|
|
"learning_rate": 4.490674399451404e-06,
|
|
"loss": 0.0748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04275457561016083,
|
|
"step": 6920,
|
|
"valid_targets_mean": 2610.8,
|
|
"valid_targets_min": 776
|
|
},
|
|
{
|
|
"epoch": 5.634920634920634,
|
|
"grad_norm": 0.3716682275188854,
|
|
"learning_rate": 4.465085479334881e-06,
|
|
"loss": 0.0738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04192643612623215,
|
|
"step": 6925,
|
|
"valid_targets_mean": 3132.1,
|
|
"valid_targets_min": 754
|
|
},
|
|
{
|
|
"epoch": 5.638990638990639,
|
|
"grad_norm": 0.6737602005566654,
|
|
"learning_rate": 4.439560509452521e-06,
|
|
"loss": 0.1201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1757407784461975,
|
|
"step": 6930,
|
|
"valid_targets_mean": 2020.8,
|
|
"valid_targets_min": 664
|
|
},
|
|
{
|
|
"epoch": 5.643060643060643,
|
|
"grad_norm": 0.38494643743663376,
|
|
"learning_rate": 4.414099594879116e-06,
|
|
"loss": 0.0683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04006730392575264,
|
|
"step": 6935,
|
|
"valid_targets_mean": 3713.8,
|
|
"valid_targets_min": 2542
|
|
},
|
|
{
|
|
"epoch": 5.6471306471306475,
|
|
"grad_norm": 0.42182454234126665,
|
|
"learning_rate": 4.388702840425747e-06,
|
|
"loss": 0.0745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.050172992050647736,
|
|
"step": 6940,
|
|
"valid_targets_mean": 3191.9,
|
|
"valid_targets_min": 1462
|
|
},
|
|
{
|
|
"epoch": 5.651200651200651,
|
|
"grad_norm": 0.42723433064009736,
|
|
"learning_rate": 4.363370350639405e-06,
|
|
"loss": 0.0672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.045071978121995926,
|
|
"step": 6945,
|
|
"valid_targets_mean": 3721.0,
|
|
"valid_targets_min": 3281
|
|
},
|
|
{
|
|
"epoch": 5.655270655270655,
|
|
"grad_norm": 0.4697371582365796,
|
|
"learning_rate": 4.338102229802519e-06,
|
|
"loss": 0.0768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.037965573370456696,
|
|
"step": 6950,
|
|
"valid_targets_mean": 3360.4,
|
|
"valid_targets_min": 2189
|
|
},
|
|
{
|
|
"epoch": 5.65934065934066,
|
|
"grad_norm": 0.3866243720482525,
|
|
"learning_rate": 4.312898581932543e-06,
|
|
"loss": 0.0754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.033566609025001526,
|
|
"step": 6955,
|
|
"valid_targets_mean": 3223.9,
|
|
"valid_targets_min": 1590
|
|
},
|
|
{
|
|
"epoch": 5.663410663410663,
|
|
"grad_norm": 0.5221435785464758,
|
|
"learning_rate": 4.287759510781531e-06,
|
|
"loss": 0.0876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05182216316461563,
|
|
"step": 6960,
|
|
"valid_targets_mean": 2352.1,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 5.6674806674806675,
|
|
"grad_norm": 0.4315745464727039,
|
|
"learning_rate": 4.262685119835681e-06,
|
|
"loss": 0.0824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04116567224264145,
|
|
"step": 6965,
|
|
"valid_targets_mean": 2612.9,
|
|
"valid_targets_min": 701
|
|
},
|
|
{
|
|
"epoch": 5.671550671550672,
|
|
"grad_norm": 0.5037919896206252,
|
|
"learning_rate": 4.237675512314963e-06,
|
|
"loss": 0.112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08254027366638184,
|
|
"step": 6970,
|
|
"valid_targets_mean": 2791.8,
|
|
"valid_targets_min": 1652
|
|
},
|
|
{
|
|
"epoch": 5.675620675620675,
|
|
"grad_norm": 0.37428492562737725,
|
|
"learning_rate": 4.212730791172637e-06,
|
|
"loss": 0.0828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.030793912708759308,
|
|
"step": 6975,
|
|
"valid_targets_mean": 3704.5,
|
|
"valid_targets_min": 1908
|
|
},
|
|
{
|
|
"epoch": 5.67969067969068,
|
|
"grad_norm": 0.734812670763522,
|
|
"learning_rate": 4.1878510590948675e-06,
|
|
"loss": 0.0707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.030109543353319168,
|
|
"step": 6980,
|
|
"valid_targets_mean": 3204.0,
|
|
"valid_targets_min": 842
|
|
},
|
|
{
|
|
"epoch": 5.683760683760684,
|
|
"grad_norm": 0.4553294615748584,
|
|
"learning_rate": 4.163036418500288e-06,
|
|
"loss": 0.1071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0399131178855896,
|
|
"step": 6985,
|
|
"valid_targets_mean": 3402.9,
|
|
"valid_targets_min": 2552
|
|
},
|
|
{
|
|
"epoch": 5.6878306878306875,
|
|
"grad_norm": 0.3690385553206738,
|
|
"learning_rate": 4.138286971539578e-06,
|
|
"loss": 0.0883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.030925989151000977,
|
|
"step": 6990,
|
|
"valid_targets_mean": 2979.9,
|
|
"valid_targets_min": 753
|
|
},
|
|
{
|
|
"epoch": 5.691900691900692,
|
|
"grad_norm": 0.5807379632235751,
|
|
"learning_rate": 4.113602820095046e-06,
|
|
"loss": 0.0862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05012385547161102,
|
|
"step": 6995,
|
|
"valid_targets_mean": 1296.9,
|
|
"valid_targets_min": 568
|
|
},
|
|
{
|
|
"epoch": 5.695970695970696,
|
|
"grad_norm": 0.5547047028197435,
|
|
"learning_rate": 4.088984065780211e-06,
|
|
"loss": 0.2639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10247977077960968,
|
|
"step": 7000,
|
|
"valid_targets_mean": 3126.9,
|
|
"valid_targets_min": 1070
|
|
},
|
|
{
|
|
"epoch": 5.7000407000407005,
|
|
"grad_norm": 0.5537368442018135,
|
|
"learning_rate": 4.064430809939366e-06,
|
|
"loss": 0.0833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.041642434895038605,
|
|
"step": 7005,
|
|
"valid_targets_mean": 2456.6,
|
|
"valid_targets_min": 329
|
|
},
|
|
{
|
|
"epoch": 5.704110704110704,
|
|
"grad_norm": 0.6592126673581086,
|
|
"learning_rate": 4.039943153647199e-06,
|
|
"loss": 0.0868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04998764768242836,
|
|
"step": 7010,
|
|
"valid_targets_mean": 1027.8,
|
|
"valid_targets_min": 608
|
|
},
|
|
{
|
|
"epoch": 5.708180708180708,
|
|
"grad_norm": 0.460024733008231,
|
|
"learning_rate": 4.015521197708332e-06,
|
|
"loss": 0.1726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.048872776329517365,
|
|
"step": 7015,
|
|
"valid_targets_mean": 4479.8,
|
|
"valid_targets_min": 2772
|
|
},
|
|
{
|
|
"epoch": 5.712250712250713,
|
|
"grad_norm": 0.27240175134306893,
|
|
"learning_rate": 3.9911650426569435e-06,
|
|
"loss": 0.0681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.022211387753486633,
|
|
"step": 7020,
|
|
"valid_targets_mean": 4843.0,
|
|
"valid_targets_min": 3778
|
|
},
|
|
{
|
|
"epoch": 5.716320716320716,
|
|
"grad_norm": 0.679333960574683,
|
|
"learning_rate": 3.966874788756334e-06,
|
|
"loss": 0.0857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03754306584596634,
|
|
"step": 7025,
|
|
"valid_targets_mean": 4306.0,
|
|
"valid_targets_min": 2336
|
|
},
|
|
{
|
|
"epoch": 5.7203907203907205,
|
|
"grad_norm": 0.4459001490448593,
|
|
"learning_rate": 3.942650535998524e-06,
|
|
"loss": 0.0806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04726475849747658,
|
|
"step": 7030,
|
|
"valid_targets_mean": 3579.5,
|
|
"valid_targets_min": 846
|
|
},
|
|
{
|
|
"epoch": 5.724460724460725,
|
|
"grad_norm": 0.44157380368224075,
|
|
"learning_rate": 3.9184923841038295e-06,
|
|
"loss": 0.0716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04266761243343353,
|
|
"step": 7035,
|
|
"valid_targets_mean": 2909.1,
|
|
"valid_targets_min": 798
|
|
},
|
|
{
|
|
"epoch": 5.728530728530728,
|
|
"grad_norm": 0.4449017607427598,
|
|
"learning_rate": 3.894400432520469e-06,
|
|
"loss": 0.0689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02835097163915634,
|
|
"step": 7040,
|
|
"valid_targets_mean": 2008.5,
|
|
"valid_targets_min": 533
|
|
},
|
|
{
|
|
"epoch": 5.732600732600733,
|
|
"grad_norm": 0.4703351541184157,
|
|
"learning_rate": 3.870374780424131e-06,
|
|
"loss": 0.0723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03980240598320961,
|
|
"step": 7045,
|
|
"valid_targets_mean": 2598.2,
|
|
"valid_targets_min": 833
|
|
},
|
|
{
|
|
"epoch": 5.736670736670737,
|
|
"grad_norm": 0.3551380809469693,
|
|
"learning_rate": 3.846415526717582e-06,
|
|
"loss": 0.0685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0330246202647686,
|
|
"step": 7050,
|
|
"valid_targets_mean": 4031.8,
|
|
"valid_targets_min": 3391
|
|
},
|
|
{
|
|
"epoch": 5.7407407407407405,
|
|
"grad_norm": 0.5831858604258909,
|
|
"learning_rate": 3.8225227700302616e-06,
|
|
"loss": 0.0848,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10323013365268707,
|
|
"step": 7055,
|
|
"valid_targets_mean": 1909.9,
|
|
"valid_targets_min": 732
|
|
},
|
|
{
|
|
"epoch": 5.744810744810745,
|
|
"grad_norm": 0.5066452107972234,
|
|
"learning_rate": 3.7986966087178733e-06,
|
|
"loss": 0.0908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03759615123271942,
|
|
"step": 7060,
|
|
"valid_targets_mean": 1908.9,
|
|
"valid_targets_min": 538
|
|
},
|
|
{
|
|
"epoch": 5.748880748880749,
|
|
"grad_norm": 0.528867423320594,
|
|
"learning_rate": 3.7749371408619718e-06,
|
|
"loss": 0.0595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.032433267682790756,
|
|
"step": 7065,
|
|
"valid_targets_mean": 824.9,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 5.752950752950753,
|
|
"grad_norm": 0.41003167533389645,
|
|
"learning_rate": 3.751244464269568e-06,
|
|
"loss": 0.0647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0331251323223114,
|
|
"step": 7070,
|
|
"valid_targets_mean": 3181.1,
|
|
"valid_targets_min": 2328
|
|
},
|
|
{
|
|
"epoch": 5.757020757020757,
|
|
"grad_norm": 0.48249588908464164,
|
|
"learning_rate": 3.727618676472724e-06,
|
|
"loss": 0.0628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.028061306104063988,
|
|
"step": 7075,
|
|
"valid_targets_mean": 2581.4,
|
|
"valid_targets_min": 684
|
|
},
|
|
{
|
|
"epoch": 5.761090761090761,
|
|
"grad_norm": 0.444459931043437,
|
|
"learning_rate": 3.704059874728141e-06,
|
|
"loss": 0.1082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04039149731397629,
|
|
"step": 7080,
|
|
"valid_targets_mean": 3461.2,
|
|
"valid_targets_min": 2795
|
|
},
|
|
{
|
|
"epoch": 5.765160765160765,
|
|
"grad_norm": 0.43861111289426047,
|
|
"learning_rate": 3.680568156016786e-06,
|
|
"loss": 0.0816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0352468304336071,
|
|
"step": 7085,
|
|
"valid_targets_mean": 3382.6,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 5.769230769230769,
|
|
"grad_norm": 0.3633878194360417,
|
|
"learning_rate": 3.6571436170434547e-06,
|
|
"loss": 0.0859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02400396391749382,
|
|
"step": 7090,
|
|
"valid_targets_mean": 3966.6,
|
|
"valid_targets_min": 3279
|
|
},
|
|
{
|
|
"epoch": 5.7733007733007735,
|
|
"grad_norm": 0.6690219140055973,
|
|
"learning_rate": 3.633786354236415e-06,
|
|
"loss": 0.0817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.035930804908275604,
|
|
"step": 7095,
|
|
"valid_targets_mean": 792.1,
|
|
"valid_targets_min": 536
|
|
},
|
|
{
|
|
"epoch": 5.777370777370777,
|
|
"grad_norm": 0.6422113838762212,
|
|
"learning_rate": 3.6104964637469755e-06,
|
|
"loss": 0.1166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04105771332979202,
|
|
"step": 7100,
|
|
"valid_targets_mean": 2227.4,
|
|
"valid_targets_min": 853
|
|
},
|
|
{
|
|
"epoch": 5.781440781440781,
|
|
"grad_norm": 0.4559712074457705,
|
|
"learning_rate": 3.5872740414491093e-06,
|
|
"loss": 0.0771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04004089534282684,
|
|
"step": 7105,
|
|
"valid_targets_mean": 3105.6,
|
|
"valid_targets_min": 2621
|
|
},
|
|
{
|
|
"epoch": 5.785510785510786,
|
|
"grad_norm": 0.6237273326303014,
|
|
"learning_rate": 3.564119182939052e-06,
|
|
"loss": 0.1027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04093822091817856,
|
|
"step": 7110,
|
|
"valid_targets_mean": 1028.0,
|
|
"valid_targets_min": 728
|
|
},
|
|
{
|
|
"epoch": 5.789580789580789,
|
|
"grad_norm": 0.5635603145419109,
|
|
"learning_rate": 3.541031983534915e-06,
|
|
"loss": 0.0706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04176001995801926,
|
|
"step": 7115,
|
|
"valid_targets_mean": 1574.2,
|
|
"valid_targets_min": 618
|
|
},
|
|
{
|
|
"epoch": 5.7936507936507935,
|
|
"grad_norm": 0.36319222074079344,
|
|
"learning_rate": 3.5180125382762674e-06,
|
|
"loss": 0.0744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.042091649025678635,
|
|
"step": 7120,
|
|
"valid_targets_mean": 4135.0,
|
|
"valid_targets_min": 907
|
|
},
|
|
{
|
|
"epoch": 5.797720797720798,
|
|
"grad_norm": 0.6553513990147538,
|
|
"learning_rate": 3.4950609419237956e-06,
|
|
"loss": 0.0831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0537349097430706,
|
|
"step": 7125,
|
|
"valid_targets_mean": 3060.8,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 5.801790801790801,
|
|
"grad_norm": 0.3897873956243497,
|
|
"learning_rate": 3.4721772889588533e-06,
|
|
"loss": 0.0642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.031878359615802765,
|
|
"step": 7130,
|
|
"valid_targets_mean": 3413.8,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 5.805860805860806,
|
|
"grad_norm": 0.7307214688735437,
|
|
"learning_rate": 3.4493616735831205e-06,
|
|
"loss": 0.0627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0403984859585762,
|
|
"step": 7135,
|
|
"valid_targets_mean": 4560.9,
|
|
"valid_targets_min": 2613
|
|
},
|
|
{
|
|
"epoch": 5.80993080993081,
|
|
"grad_norm": 0.4526517648913433,
|
|
"learning_rate": 3.4266141897181917e-06,
|
|
"loss": 0.0721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04063914343714714,
|
|
"step": 7140,
|
|
"valid_targets_mean": 2847.2,
|
|
"valid_targets_min": 531
|
|
},
|
|
{
|
|
"epoch": 5.814000814000814,
|
|
"grad_norm": 0.48774825169971975,
|
|
"learning_rate": 3.4039349310051973e-06,
|
|
"loss": 0.0897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.035396378487348557,
|
|
"step": 7145,
|
|
"valid_targets_mean": 2571.5,
|
|
"valid_targets_min": 826
|
|
},
|
|
{
|
|
"epoch": 5.818070818070818,
|
|
"grad_norm": 0.4021017225060565,
|
|
"learning_rate": 3.3813239908044104e-06,
|
|
"loss": 0.085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.034908998757600784,
|
|
"step": 7150,
|
|
"valid_targets_mean": 3709.6,
|
|
"valid_targets_min": 3024
|
|
},
|
|
{
|
|
"epoch": 5.822140822140822,
|
|
"grad_norm": 0.3908988537537165,
|
|
"learning_rate": 3.358781462194878e-06,
|
|
"loss": 0.0733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0368090383708477,
|
|
"step": 7155,
|
|
"valid_targets_mean": 3559.0,
|
|
"valid_targets_min": 2588
|
|
},
|
|
{
|
|
"epoch": 5.8262108262108265,
|
|
"grad_norm": 0.5606060832836967,
|
|
"learning_rate": 3.336307437974011e-06,
|
|
"loss": 0.0756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05241452157497406,
|
|
"step": 7160,
|
|
"valid_targets_mean": 913.9,
|
|
"valid_targets_min": 502
|
|
},
|
|
{
|
|
"epoch": 5.83028083028083,
|
|
"grad_norm": 0.44223267919961007,
|
|
"learning_rate": 3.313902010657226e-06,
|
|
"loss": 0.08,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.032404445111751556,
|
|
"step": 7165,
|
|
"valid_targets_mean": 3301.9,
|
|
"valid_targets_min": 1158
|
|
},
|
|
{
|
|
"epoch": 5.834350834350834,
|
|
"grad_norm": 0.3154823634128118,
|
|
"learning_rate": 3.2915652724775616e-06,
|
|
"loss": 0.0588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.027957221493124962,
|
|
"step": 7170,
|
|
"valid_targets_mean": 3874.9,
|
|
"valid_targets_min": 3044
|
|
},
|
|
{
|
|
"epoch": 5.838420838420839,
|
|
"grad_norm": 0.4259827920303877,
|
|
"learning_rate": 3.2692973153852936e-06,
|
|
"loss": 0.0792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03220093250274658,
|
|
"step": 7175,
|
|
"valid_targets_mean": 2167.8,
|
|
"valid_targets_min": 539
|
|
},
|
|
{
|
|
"epoch": 5.842490842490842,
|
|
"grad_norm": 0.42833294388406135,
|
|
"learning_rate": 3.247098231047552e-06,
|
|
"loss": 0.0734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03128783777356148,
|
|
"step": 7180,
|
|
"valid_targets_mean": 2397.0,
|
|
"valid_targets_min": 387
|
|
},
|
|
{
|
|
"epoch": 5.8465608465608465,
|
|
"grad_norm": 0.5739666740538348,
|
|
"learning_rate": 3.22496811084795e-06,
|
|
"loss": 0.0823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04366585612297058,
|
|
"step": 7185,
|
|
"valid_targets_mean": 2340.9,
|
|
"valid_targets_min": 872
|
|
},
|
|
{
|
|
"epoch": 5.850630850630851,
|
|
"grad_norm": 0.41911962257836954,
|
|
"learning_rate": 3.2029070458862145e-06,
|
|
"loss": 0.0763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0234956294298172,
|
|
"step": 7190,
|
|
"valid_targets_mean": 3017.2,
|
|
"valid_targets_min": 822
|
|
},
|
|
{
|
|
"epoch": 5.854700854700854,
|
|
"grad_norm": 0.7353086790754305,
|
|
"learning_rate": 3.180915126977795e-06,
|
|
"loss": 0.0796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05614851415157318,
|
|
"step": 7195,
|
|
"valid_targets_mean": 1405.9,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 5.858770858770859,
|
|
"grad_norm": 0.4594492349495658,
|
|
"learning_rate": 3.158992444653497e-06,
|
|
"loss": 0.0707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.043180473148822784,
|
|
"step": 7200,
|
|
"valid_targets_mean": 2249.2,
|
|
"valid_targets_min": 808
|
|
},
|
|
{
|
|
"epoch": 5.862840862840863,
|
|
"grad_norm": 0.41266900678804913,
|
|
"learning_rate": 3.137139089159109e-06,
|
|
"loss": 0.0917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.035402562469244,
|
|
"step": 7205,
|
|
"valid_targets_mean": 2900.5,
|
|
"valid_targets_min": 1010
|
|
},
|
|
{
|
|
"epoch": 5.866910866910867,
|
|
"grad_norm": 0.41094688449906835,
|
|
"learning_rate": 3.1153551504550397e-06,
|
|
"loss": 0.0711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02497289329767227,
|
|
"step": 7210,
|
|
"valid_targets_mean": 2432.6,
|
|
"valid_targets_min": 524
|
|
},
|
|
{
|
|
"epoch": 5.870980870980871,
|
|
"grad_norm": 0.47665813139114804,
|
|
"learning_rate": 3.0936407182159333e-06,
|
|
"loss": 0.0659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02819974161684513,
|
|
"step": 7215,
|
|
"valid_targets_mean": 1303.5,
|
|
"valid_targets_min": 516
|
|
},
|
|
{
|
|
"epoch": 5.875050875050875,
|
|
"grad_norm": 0.5054406457987558,
|
|
"learning_rate": 3.0719958818303165e-06,
|
|
"loss": 0.0841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.032017797231674194,
|
|
"step": 7220,
|
|
"valid_targets_mean": 1762.1,
|
|
"valid_targets_min": 342
|
|
},
|
|
{
|
|
"epoch": 5.8791208791208796,
|
|
"grad_norm": 0.39611619386986135,
|
|
"learning_rate": 3.050420730400212e-06,
|
|
"loss": 0.0782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03306104242801666,
|
|
"step": 7225,
|
|
"valid_targets_mean": 3301.8,
|
|
"valid_targets_min": 1440
|
|
},
|
|
{
|
|
"epoch": 5.883190883190883,
|
|
"grad_norm": 0.44746690647319465,
|
|
"learning_rate": 3.0289153527407842e-06,
|
|
"loss": 0.0613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.031408607959747314,
|
|
"step": 7230,
|
|
"valid_targets_mean": 2274.2,
|
|
"valid_targets_min": 708
|
|
},
|
|
{
|
|
"epoch": 5.887260887260887,
|
|
"grad_norm": 0.3541822938829694,
|
|
"learning_rate": 3.007479837379974e-06,
|
|
"loss": 0.0713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.030303731560707092,
|
|
"step": 7235,
|
|
"valid_targets_mean": 5103.1,
|
|
"valid_targets_min": 3653
|
|
},
|
|
{
|
|
"epoch": 5.891330891330892,
|
|
"grad_norm": 0.35916098515876993,
|
|
"learning_rate": 2.9861142725581225e-06,
|
|
"loss": 0.068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02420351281762123,
|
|
"step": 7240,
|
|
"valid_targets_mean": 2366.5,
|
|
"valid_targets_min": 684
|
|
},
|
|
{
|
|
"epoch": 5.895400895400895,
|
|
"grad_norm": 0.3952741270339185,
|
|
"learning_rate": 2.96481874622762e-06,
|
|
"loss": 0.0586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.034595150500535965,
|
|
"step": 7245,
|
|
"valid_targets_mean": 3352.1,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 5.8994708994708995,
|
|
"grad_norm": 0.33735128445439605,
|
|
"learning_rate": 2.94359334605254e-06,
|
|
"loss": 0.082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03757286071777344,
|
|
"step": 7250,
|
|
"valid_targets_mean": 4009.6,
|
|
"valid_targets_min": 3076
|
|
},
|
|
{
|
|
"epoch": 5.903540903540904,
|
|
"grad_norm": 0.5708917806995089,
|
|
"learning_rate": 2.9224381594082807e-06,
|
|
"loss": 0.0917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14005272090435028,
|
|
"step": 7255,
|
|
"valid_targets_mean": 2811.9,
|
|
"valid_targets_min": 1536
|
|
},
|
|
{
|
|
"epoch": 5.907610907610907,
|
|
"grad_norm": 0.39275014659707425,
|
|
"learning_rate": 2.9013532733812e-06,
|
|
"loss": 0.0702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03683479502797127,
|
|
"step": 7260,
|
|
"valid_targets_mean": 2669.4,
|
|
"valid_targets_min": 582
|
|
},
|
|
{
|
|
"epoch": 5.911680911680912,
|
|
"grad_norm": 0.34548880597408943,
|
|
"learning_rate": 2.880338774768263e-06,
|
|
"loss": 0.0819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.025920607149600983,
|
|
"step": 7265,
|
|
"valid_targets_mean": 3261.9,
|
|
"valid_targets_min": 633
|
|
},
|
|
{
|
|
"epoch": 5.915750915750916,
|
|
"grad_norm": 0.37519369539071246,
|
|
"learning_rate": 2.8593947500766805e-06,
|
|
"loss": 0.0921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02501959726214409,
|
|
"step": 7270,
|
|
"valid_targets_mean": 3287.2,
|
|
"valid_targets_min": 1130
|
|
},
|
|
{
|
|
"epoch": 5.9198209198209195,
|
|
"grad_norm": 0.5057331987244915,
|
|
"learning_rate": 2.8385212855235477e-06,
|
|
"loss": 0.0668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03711901605129242,
|
|
"step": 7275,
|
|
"valid_targets_mean": 2858.9,
|
|
"valid_targets_min": 492
|
|
},
|
|
{
|
|
"epoch": 5.923890923890924,
|
|
"grad_norm": 0.5218986162294897,
|
|
"learning_rate": 2.8177184670355063e-06,
|
|
"loss": 0.0741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04017069190740585,
|
|
"step": 7280,
|
|
"valid_targets_mean": 2531.5,
|
|
"valid_targets_min": 1045
|
|
},
|
|
{
|
|
"epoch": 5.927960927960928,
|
|
"grad_norm": 0.40603198794295736,
|
|
"learning_rate": 2.7969863802483676e-06,
|
|
"loss": 0.0666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.023707497864961624,
|
|
"step": 7285,
|
|
"valid_targets_mean": 2692.0,
|
|
"valid_targets_min": 647
|
|
},
|
|
{
|
|
"epoch": 5.932030932030932,
|
|
"grad_norm": 0.61945163833617,
|
|
"learning_rate": 2.7763251105067813e-06,
|
|
"loss": 0.1139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09058065712451935,
|
|
"step": 7290,
|
|
"valid_targets_mean": 1831.5,
|
|
"valid_targets_min": 806
|
|
},
|
|
{
|
|
"epoch": 5.936100936100936,
|
|
"grad_norm": 0.5987985070511739,
|
|
"learning_rate": 2.755734742863876e-06,
|
|
"loss": 0.0935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.036716535687446594,
|
|
"step": 7295,
|
|
"valid_targets_mean": 1300.1,
|
|
"valid_targets_min": 575
|
|
},
|
|
{
|
|
"epoch": 5.94017094017094,
|
|
"grad_norm": 0.4345655736128064,
|
|
"learning_rate": 2.7352153620809053e-06,
|
|
"loss": 0.0704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03189973905682564,
|
|
"step": 7300,
|
|
"valid_targets_mean": 3615.9,
|
|
"valid_targets_min": 3193
|
|
},
|
|
{
|
|
"epoch": 5.944240944240944,
|
|
"grad_norm": 0.3798658377694568,
|
|
"learning_rate": 2.7147670526268986e-06,
|
|
"loss": 0.0699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02867596037685871,
|
|
"step": 7305,
|
|
"valid_targets_mean": 3944.1,
|
|
"valid_targets_min": 3433
|
|
},
|
|
{
|
|
"epoch": 5.948310948310948,
|
|
"grad_norm": 0.4038694045993181,
|
|
"learning_rate": 2.694389898678327e-06,
|
|
"loss": 0.07,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.027545157819986343,
|
|
"step": 7310,
|
|
"valid_targets_mean": 2874.5,
|
|
"valid_targets_min": 767
|
|
},
|
|
{
|
|
"epoch": 5.9523809523809526,
|
|
"grad_norm": 0.4462098257681364,
|
|
"learning_rate": 2.674083984118736e-06,
|
|
"loss": 0.0641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02901633456349373,
|
|
"step": 7315,
|
|
"valid_targets_mean": 2262.1,
|
|
"valid_targets_min": 785
|
|
},
|
|
{
|
|
"epoch": 5.956450956450956,
|
|
"grad_norm": 0.7941447901126528,
|
|
"learning_rate": 2.65384939253841e-06,
|
|
"loss": 0.0865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04680174961686134,
|
|
"step": 7320,
|
|
"valid_targets_mean": 1222.8,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 5.96052096052096,
|
|
"grad_norm": 0.6008702314886109,
|
|
"learning_rate": 2.6336862072340343e-06,
|
|
"loss": 0.0624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03380628675222397,
|
|
"step": 7325,
|
|
"valid_targets_mean": 2184.0,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 5.964590964590965,
|
|
"grad_norm": 0.40310929667447126,
|
|
"learning_rate": 2.6135945112083506e-06,
|
|
"loss": 0.0677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.031311824917793274,
|
|
"step": 7330,
|
|
"valid_targets_mean": 3445.0,
|
|
"valid_targets_min": 1606
|
|
},
|
|
{
|
|
"epoch": 5.968660968660968,
|
|
"grad_norm": 0.5761031593235989,
|
|
"learning_rate": 2.593574387169804e-06,
|
|
"loss": 0.0682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.039855875074863434,
|
|
"step": 7335,
|
|
"valid_targets_mean": 2760.0,
|
|
"valid_targets_min": 678
|
|
},
|
|
{
|
|
"epoch": 5.9727309727309725,
|
|
"grad_norm": 0.3635293145604907,
|
|
"learning_rate": 2.573625917532212e-06,
|
|
"loss": 0.0731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.035828523337841034,
|
|
"step": 7340,
|
|
"valid_targets_mean": 5550.0,
|
|
"valid_targets_min": 3997
|
|
},
|
|
{
|
|
"epoch": 5.976800976800977,
|
|
"grad_norm": 0.39688144504385575,
|
|
"learning_rate": 2.553749184414429e-06,
|
|
"loss": 0.0649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02166413888335228,
|
|
"step": 7345,
|
|
"valid_targets_mean": 2142.1,
|
|
"valid_targets_min": 706
|
|
},
|
|
{
|
|
"epoch": 5.980870980870981,
|
|
"grad_norm": 0.6218812931078816,
|
|
"learning_rate": 2.5339442696399897e-06,
|
|
"loss": 0.0713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04698009043931961,
|
|
"step": 7350,
|
|
"valid_targets_mean": 1705.0,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 5.984940984940985,
|
|
"grad_norm": 0.6040764860465824,
|
|
"learning_rate": 2.5142112547368005e-06,
|
|
"loss": 0.0932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0763426423072815,
|
|
"step": 7355,
|
|
"valid_targets_mean": 2792.0,
|
|
"valid_targets_min": 2379
|
|
},
|
|
{
|
|
"epoch": 5.989010989010989,
|
|
"grad_norm": 0.37158486023146986,
|
|
"learning_rate": 2.494550220936773e-06,
|
|
"loss": 0.1331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.033909812569618225,
|
|
"step": 7360,
|
|
"valid_targets_mean": 3489.0,
|
|
"valid_targets_min": 2861
|
|
},
|
|
{
|
|
"epoch": 5.993080993080993,
|
|
"grad_norm": 0.39704045184452585,
|
|
"learning_rate": 2.4749612491755158e-06,
|
|
"loss": 0.0711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03327007219195366,
|
|
"step": 7365,
|
|
"valid_targets_mean": 3850.4,
|
|
"valid_targets_min": 2453
|
|
},
|
|
{
|
|
"epoch": 5.997150997150997,
|
|
"grad_norm": 0.3981496985616391,
|
|
"learning_rate": 2.4554444200919882e-06,
|
|
"loss": 0.0662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03435596078634262,
|
|
"step": 7370,
|
|
"valid_targets_mean": 3932.9,
|
|
"valid_targets_min": 3389
|
|
},
|
|
{
|
|
"epoch": 6.0008140008140005,
|
|
"grad_norm": 0.9799010644276512,
|
|
"learning_rate": 2.4359998140281715e-06,
|
|
"loss": 0.1184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09912261366844177,
|
|
"step": 7375,
|
|
"valid_targets_mean": 8215.2,
|
|
"valid_targets_min": 6163
|
|
},
|
|
{
|
|
"epoch": 6.004884004884005,
|
|
"grad_norm": 0.958774868683029,
|
|
"learning_rate": 2.416627511028733e-06,
|
|
"loss": 0.1947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10130266845226288,
|
|
"step": 7380,
|
|
"valid_targets_mean": 8594.4,
|
|
"valid_targets_min": 6006
|
|
},
|
|
{
|
|
"epoch": 6.008954008954009,
|
|
"grad_norm": 0.7944310104614162,
|
|
"learning_rate": 2.39732759084071e-06,
|
|
"loss": 0.1772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07904568314552307,
|
|
"step": 7385,
|
|
"valid_targets_mean": 3578.4,
|
|
"valid_targets_min": 289
|
|
},
|
|
{
|
|
"epoch": 6.013024013024013,
|
|
"grad_norm": 0.6679727135866469,
|
|
"learning_rate": 2.3781001329131593e-06,
|
|
"loss": 0.1762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08894851803779602,
|
|
"step": 7390,
|
|
"valid_targets_mean": 7019.9,
|
|
"valid_targets_min": 4985
|
|
},
|
|
{
|
|
"epoch": 6.017094017094017,
|
|
"grad_norm": 0.5487601824765074,
|
|
"learning_rate": 2.358945216396855e-06,
|
|
"loss": 0.1657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08587461709976196,
|
|
"step": 7395,
|
|
"valid_targets_mean": 6956.6,
|
|
"valid_targets_min": 1457
|
|
},
|
|
{
|
|
"epoch": 6.021164021164021,
|
|
"grad_norm": 0.4456534450702924,
|
|
"learning_rate": 2.3398629201439427e-06,
|
|
"loss": 0.1706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08091467618942261,
|
|
"step": 7400,
|
|
"valid_targets_mean": 7361.6,
|
|
"valid_targets_min": 4961
|
|
},
|
|
{
|
|
"epoch": 6.025234025234025,
|
|
"grad_norm": 0.49514854465976493,
|
|
"learning_rate": 2.3208533227076257e-06,
|
|
"loss": 0.1834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10835494101047516,
|
|
"step": 7405,
|
|
"valid_targets_mean": 7058.5,
|
|
"valid_targets_min": 5985
|
|
},
|
|
{
|
|
"epoch": 6.029304029304029,
|
|
"grad_norm": 0.43486218825382905,
|
|
"learning_rate": 2.3019165023418433e-06,
|
|
"loss": 0.1771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09653938561677933,
|
|
"step": 7410,
|
|
"valid_targets_mean": 7664.2,
|
|
"valid_targets_min": 5169
|
|
},
|
|
{
|
|
"epoch": 6.0333740333740336,
|
|
"grad_norm": 0.4375911545615398,
|
|
"learning_rate": 2.2830525370009405e-06,
|
|
"loss": 0.1655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08376707136631012,
|
|
"step": 7415,
|
|
"valid_targets_mean": 7111.8,
|
|
"valid_targets_min": 5348
|
|
},
|
|
{
|
|
"epoch": 6.037444037444037,
|
|
"grad_norm": 0.48136548478281455,
|
|
"learning_rate": 2.2642615043393512e-06,
|
|
"loss": 0.1516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07133744657039642,
|
|
"step": 7420,
|
|
"valid_targets_mean": 4238.9,
|
|
"valid_targets_min": 152
|
|
},
|
|
{
|
|
"epoch": 6.041514041514041,
|
|
"grad_norm": 0.4381144318946382,
|
|
"learning_rate": 2.2455434817112853e-06,
|
|
"loss": 0.1572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08667995780706406,
|
|
"step": 7425,
|
|
"valid_targets_mean": 7199.4,
|
|
"valid_targets_min": 4258
|
|
},
|
|
{
|
|
"epoch": 6.045584045584046,
|
|
"grad_norm": 0.4042301802869176,
|
|
"learning_rate": 2.226898546170384e-06,
|
|
"loss": 0.166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08247347176074982,
|
|
"step": 7430,
|
|
"valid_targets_mean": 7243.4,
|
|
"valid_targets_min": 5831
|
|
},
|
|
{
|
|
"epoch": 6.04965404965405,
|
|
"grad_norm": 0.401406369618756,
|
|
"learning_rate": 2.2083267744694494e-06,
|
|
"loss": 0.1616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08167104423046112,
|
|
"step": 7435,
|
|
"valid_targets_mean": 6950.6,
|
|
"valid_targets_min": 3862
|
|
},
|
|
{
|
|
"epoch": 6.0537240537240535,
|
|
"grad_norm": 0.3945925382036933,
|
|
"learning_rate": 2.1898282430600727e-06,
|
|
"loss": 0.1608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08647415041923523,
|
|
"step": 7440,
|
|
"valid_targets_mean": 6991.9,
|
|
"valid_targets_min": 5696
|
|
},
|
|
{
|
|
"epoch": 6.057794057794058,
|
|
"grad_norm": 0.42315972384850226,
|
|
"learning_rate": 2.171403028092367e-06,
|
|
"loss": 0.1573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0863867774605751,
|
|
"step": 7445,
|
|
"valid_targets_mean": 6963.5,
|
|
"valid_targets_min": 4947
|
|
},
|
|
{
|
|
"epoch": 6.061864061864062,
|
|
"grad_norm": 0.39104627702301664,
|
|
"learning_rate": 2.153051205414631e-06,
|
|
"loss": 0.1591,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07545962929725647,
|
|
"step": 7450,
|
|
"valid_targets_mean": 6436.9,
|
|
"valid_targets_min": 4988
|
|
},
|
|
{
|
|
"epoch": 6.065934065934066,
|
|
"grad_norm": 0.4009252559269369,
|
|
"learning_rate": 2.1347728505730392e-06,
|
|
"loss": 0.1616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07987205684185028,
|
|
"step": 7455,
|
|
"valid_targets_mean": 7518.0,
|
|
"valid_targets_min": 5476
|
|
},
|
|
{
|
|
"epoch": 6.07000407000407,
|
|
"grad_norm": 0.380485131548382,
|
|
"learning_rate": 2.116568038811333e-06,
|
|
"loss": 0.1588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07481364905834198,
|
|
"step": 7460,
|
|
"valid_targets_mean": 7422.6,
|
|
"valid_targets_min": 5226
|
|
},
|
|
{
|
|
"epoch": 6.074074074074074,
|
|
"grad_norm": 0.4378435227613099,
|
|
"learning_rate": 2.098436845070504e-06,
|
|
"loss": 0.1678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10386194288730621,
|
|
"step": 7465,
|
|
"valid_targets_mean": 7080.0,
|
|
"valid_targets_min": 3753
|
|
},
|
|
{
|
|
"epoch": 6.078144078144078,
|
|
"grad_norm": 0.4804949566456428,
|
|
"learning_rate": 2.080379343988497e-06,
|
|
"loss": 0.1842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.093023382127285,
|
|
"step": 7470,
|
|
"valid_targets_mean": 6064.6,
|
|
"valid_targets_min": 5161
|
|
},
|
|
{
|
|
"epoch": 6.082214082214082,
|
|
"grad_norm": 0.40760982947259294,
|
|
"learning_rate": 2.0623956098999056e-06,
|
|
"loss": 0.1285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07654588669538498,
|
|
"step": 7475,
|
|
"valid_targets_mean": 7388.1,
|
|
"valid_targets_min": 5795
|
|
},
|
|
{
|
|
"epoch": 6.086284086284087,
|
|
"grad_norm": 0.3959750946787557,
|
|
"learning_rate": 2.044485716835638e-06,
|
|
"loss": 0.1535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07424226403236389,
|
|
"step": 7480,
|
|
"valid_targets_mean": 7696.0,
|
|
"valid_targets_min": 4658
|
|
},
|
|
{
|
|
"epoch": 6.09035409035409,
|
|
"grad_norm": 0.40969800106923865,
|
|
"learning_rate": 2.026649738522648e-06,
|
|
"loss": 0.1584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08557029068470001,
|
|
"step": 7485,
|
|
"valid_targets_mean": 7799.1,
|
|
"valid_targets_min": 5508
|
|
},
|
|
{
|
|
"epoch": 6.094424094424094,
|
|
"grad_norm": 0.4322454122605302,
|
|
"learning_rate": 2.00888774838361e-06,
|
|
"loss": 0.1651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08102475851774216,
|
|
"step": 7490,
|
|
"valid_targets_mean": 7341.6,
|
|
"valid_targets_min": 6454
|
|
},
|
|
{
|
|
"epoch": 6.098494098494099,
|
|
"grad_norm": 0.4243940059827788,
|
|
"learning_rate": 1.9911998195366267e-06,
|
|
"loss": 0.1589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07009638100862503,
|
|
"step": 7495,
|
|
"valid_targets_mean": 6818.0,
|
|
"valid_targets_min": 4392
|
|
},
|
|
{
|
|
"epoch": 6.102564102564102,
|
|
"grad_norm": 0.3901538287752902,
|
|
"learning_rate": 1.9735860247949245e-06,
|
|
"loss": 0.1597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07060252130031586,
|
|
"step": 7500,
|
|
"valid_targets_mean": 7857.6,
|
|
"valid_targets_min": 5394
|
|
},
|
|
{
|
|
"epoch": 6.1066341066341066,
|
|
"grad_norm": 0.3579631569965471,
|
|
"learning_rate": 1.956046436666539e-06,
|
|
"loss": 0.1407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06699617207050323,
|
|
"step": 7505,
|
|
"valid_targets_mean": 8314.1,
|
|
"valid_targets_min": 4837
|
|
},
|
|
{
|
|
"epoch": 6.110704110704111,
|
|
"grad_norm": 0.3428888972719672,
|
|
"learning_rate": 1.938581127354049e-06,
|
|
"loss": 0.1402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05949755012989044,
|
|
"step": 7510,
|
|
"valid_targets_mean": 7368.2,
|
|
"valid_targets_min": 6027
|
|
},
|
|
{
|
|
"epoch": 6.114774114774114,
|
|
"grad_norm": 0.435155549282921,
|
|
"learning_rate": 1.92119016875425e-06,
|
|
"loss": 0.1474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07999107986688614,
|
|
"step": 7515,
|
|
"valid_targets_mean": 6435.0,
|
|
"valid_targets_min": 4238
|
|
},
|
|
{
|
|
"epoch": 6.118844118844119,
|
|
"grad_norm": 1.1395430138108782,
|
|
"learning_rate": 1.903873632457871e-06,
|
|
"loss": 0.1424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03771523758769035,
|
|
"step": 7520,
|
|
"valid_targets_mean": 247.8,
|
|
"valid_targets_min": 137
|
|
},
|
|
{
|
|
"epoch": 6.122914122914123,
|
|
"grad_norm": 0.4163271285571281,
|
|
"learning_rate": 1.8866315897492792e-06,
|
|
"loss": 0.1656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07711055874824524,
|
|
"step": 7525,
|
|
"valid_targets_mean": 6807.0,
|
|
"valid_targets_min": 5165
|
|
},
|
|
{
|
|
"epoch": 6.1269841269841265,
|
|
"grad_norm": 0.4805973336300679,
|
|
"learning_rate": 1.8694641116061895e-06,
|
|
"loss": 0.1706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08931201696395874,
|
|
"step": 7530,
|
|
"valid_targets_mean": 6989.9,
|
|
"valid_targets_min": 5104
|
|
},
|
|
{
|
|
"epoch": 6.131054131054131,
|
|
"grad_norm": 0.48380408719281154,
|
|
"learning_rate": 1.8523712686993644e-06,
|
|
"loss": 0.1694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09077896177768707,
|
|
"step": 7535,
|
|
"valid_targets_mean": 6692.2,
|
|
"valid_targets_min": 5068
|
|
},
|
|
{
|
|
"epoch": 6.135124135124135,
|
|
"grad_norm": 0.4478468576523544,
|
|
"learning_rate": 1.8353531313923213e-06,
|
|
"loss": 0.1644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09867697954177856,
|
|
"step": 7540,
|
|
"valid_targets_mean": 7419.6,
|
|
"valid_targets_min": 5578
|
|
},
|
|
{
|
|
"epoch": 6.13919413919414,
|
|
"grad_norm": 0.41186611739704415,
|
|
"learning_rate": 1.818409769741054e-06,
|
|
"loss": 0.1604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08206415176391602,
|
|
"step": 7545,
|
|
"valid_targets_mean": 7536.5,
|
|
"valid_targets_min": 5075
|
|
},
|
|
{
|
|
"epoch": 6.143264143264143,
|
|
"grad_norm": 0.3768181446141734,
|
|
"learning_rate": 1.8015412534937438e-06,
|
|
"loss": 0.1656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07646715641021729,
|
|
"step": 7550,
|
|
"valid_targets_mean": 7221.5,
|
|
"valid_targets_min": 5841
|
|
},
|
|
{
|
|
"epoch": 6.147334147334147,
|
|
"grad_norm": 0.38647441084901085,
|
|
"learning_rate": 1.7847476520904528e-06,
|
|
"loss": 0.1603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07809443771839142,
|
|
"step": 7555,
|
|
"valid_targets_mean": 7287.5,
|
|
"valid_targets_min": 5151
|
|
},
|
|
{
|
|
"epoch": 6.151404151404152,
|
|
"grad_norm": 0.40816468239190296,
|
|
"learning_rate": 1.7680290346628659e-06,
|
|
"loss": 0.1686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0784003883600235,
|
|
"step": 7560,
|
|
"valid_targets_mean": 6738.4,
|
|
"valid_targets_min": 4042
|
|
},
|
|
{
|
|
"epoch": 6.155474155474155,
|
|
"grad_norm": 0.4323305496400144,
|
|
"learning_rate": 1.7513854700339884e-06,
|
|
"loss": 0.1626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08970016241073608,
|
|
"step": 7565,
|
|
"valid_targets_mean": 6331.0,
|
|
"valid_targets_min": 5496
|
|
},
|
|
{
|
|
"epoch": 6.15954415954416,
|
|
"grad_norm": 0.43997280921953213,
|
|
"learning_rate": 1.7348170267178655e-06,
|
|
"loss": 0.1631,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07909651845693588,
|
|
"step": 7570,
|
|
"valid_targets_mean": 6271.1,
|
|
"valid_targets_min": 4695
|
|
},
|
|
{
|
|
"epoch": 6.163614163614164,
|
|
"grad_norm": 0.4492944206042751,
|
|
"learning_rate": 1.7183237729193081e-06,
|
|
"loss": 0.161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08394847810268402,
|
|
"step": 7575,
|
|
"valid_targets_mean": 5790.8,
|
|
"valid_targets_min": 5021
|
|
},
|
|
{
|
|
"epoch": 6.167684167684167,
|
|
"grad_norm": 0.4731995030501333,
|
|
"learning_rate": 1.7019057765335945e-06,
|
|
"loss": 0.1525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0583474338054657,
|
|
"step": 7580,
|
|
"valid_targets_mean": 3386.8,
|
|
"valid_targets_min": 2279
|
|
},
|
|
{
|
|
"epoch": 6.171754171754172,
|
|
"grad_norm": 0.4167788460938727,
|
|
"learning_rate": 1.6855631051462084e-06,
|
|
"loss": 0.1308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0789489597082138,
|
|
"step": 7585,
|
|
"valid_targets_mean": 6240.5,
|
|
"valid_targets_min": 5093
|
|
},
|
|
{
|
|
"epoch": 6.175824175824176,
|
|
"grad_norm": 0.43712455668316696,
|
|
"learning_rate": 1.6692958260325599e-06,
|
|
"loss": 0.157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08045420050621033,
|
|
"step": 7590,
|
|
"valid_targets_mean": 6329.5,
|
|
"valid_targets_min": 4589
|
|
},
|
|
{
|
|
"epoch": 6.1798941798941796,
|
|
"grad_norm": 0.402788614634297,
|
|
"learning_rate": 1.6531040061576909e-06,
|
|
"loss": 0.1578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08553263545036316,
|
|
"step": 7595,
|
|
"valid_targets_mean": 7589.2,
|
|
"valid_targets_min": 4963
|
|
},
|
|
{
|
|
"epoch": 6.183964183964184,
|
|
"grad_norm": 0.41963035365571566,
|
|
"learning_rate": 1.6369877121760237e-06,
|
|
"loss": 0.158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06892232596874237,
|
|
"step": 7600,
|
|
"valid_targets_mean": 5577.9,
|
|
"valid_targets_min": 4458
|
|
},
|
|
{
|
|
"epoch": 6.188034188034188,
|
|
"grad_norm": 0.45017103457098034,
|
|
"learning_rate": 1.6209470104310666e-06,
|
|
"loss": 0.1574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0763903558254242,
|
|
"step": 7605,
|
|
"valid_targets_mean": 5432.5,
|
|
"valid_targets_min": 4433
|
|
},
|
|
{
|
|
"epoch": 6.192104192104192,
|
|
"grad_norm": 0.4536922039185944,
|
|
"learning_rate": 1.604981966955157e-06,
|
|
"loss": 0.1613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08348037302494049,
|
|
"step": 7610,
|
|
"valid_targets_mean": 6361.8,
|
|
"valid_targets_min": 5201
|
|
},
|
|
{
|
|
"epoch": 6.196174196174196,
|
|
"grad_norm": 0.4307255875359923,
|
|
"learning_rate": 1.5890926474691682e-06,
|
|
"loss": 0.1609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07572345435619354,
|
|
"step": 7615,
|
|
"valid_targets_mean": 5824.4,
|
|
"valid_targets_min": 5129
|
|
},
|
|
{
|
|
"epoch": 6.2002442002442,
|
|
"grad_norm": 0.7193073806157995,
|
|
"learning_rate": 1.5732791173822626e-06,
|
|
"loss": 0.1234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0709250420331955,
|
|
"step": 7620,
|
|
"valid_targets_mean": 1637.6,
|
|
"valid_targets_min": 137
|
|
},
|
|
{
|
|
"epoch": 6.204314204314204,
|
|
"grad_norm": 0.418121085820752,
|
|
"learning_rate": 1.55754144179161e-06,
|
|
"loss": 0.1541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07970662415027618,
|
|
"step": 7625,
|
|
"valid_targets_mean": 6474.6,
|
|
"valid_targets_min": 4751
|
|
},
|
|
{
|
|
"epoch": 6.208384208384208,
|
|
"grad_norm": 0.41818652414348384,
|
|
"learning_rate": 1.5418796854821239e-06,
|
|
"loss": 0.1545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08583327382802963,
|
|
"step": 7630,
|
|
"valid_targets_mean": 7186.2,
|
|
"valid_targets_min": 5358
|
|
},
|
|
{
|
|
"epoch": 6.212454212454213,
|
|
"grad_norm": 0.4079409474648988,
|
|
"learning_rate": 1.52629391292618e-06,
|
|
"loss": 0.1608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08047682046890259,
|
|
"step": 7635,
|
|
"valid_targets_mean": 7126.8,
|
|
"valid_targets_min": 4078
|
|
},
|
|
{
|
|
"epoch": 6.216524216524217,
|
|
"grad_norm": 0.41113059492418275,
|
|
"learning_rate": 1.5107841882833785e-06,
|
|
"loss": 0.1535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07577785849571228,
|
|
"step": 7640,
|
|
"valid_targets_mean": 7022.1,
|
|
"valid_targets_min": 4940
|
|
},
|
|
{
|
|
"epoch": 6.22059422059422,
|
|
"grad_norm": 0.4280680246313017,
|
|
"learning_rate": 1.4953505754002562e-06,
|
|
"loss": 0.1506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08161963522434235,
|
|
"step": 7645,
|
|
"valid_targets_mean": 5900.6,
|
|
"valid_targets_min": 4779
|
|
},
|
|
{
|
|
"epoch": 6.224664224664225,
|
|
"grad_norm": 0.40255377466943804,
|
|
"learning_rate": 1.4799931378100386e-06,
|
|
"loss": 0.1582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08172336220741272,
|
|
"step": 7650,
|
|
"valid_targets_mean": 6290.1,
|
|
"valid_targets_min": 4620
|
|
},
|
|
{
|
|
"epoch": 6.228734228734229,
|
|
"grad_norm": 0.8420012750324607,
|
|
"learning_rate": 1.4647119387323593e-06,
|
|
"loss": 0.1532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05620827525854111,
|
|
"step": 7655,
|
|
"valid_targets_mean": 1162.2,
|
|
"valid_targets_min": 788
|
|
},
|
|
{
|
|
"epoch": 6.232804232804233,
|
|
"grad_norm": 0.8176601544559853,
|
|
"learning_rate": 1.4495070410730238e-06,
|
|
"loss": 0.1392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06722263246774673,
|
|
"step": 7660,
|
|
"valid_targets_mean": 1500.6,
|
|
"valid_targets_min": 957
|
|
},
|
|
{
|
|
"epoch": 6.236874236874237,
|
|
"grad_norm": 0.7199859456501606,
|
|
"learning_rate": 1.4343785074237393e-06,
|
|
"loss": 0.1284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05805957317352295,
|
|
"step": 7665,
|
|
"valid_targets_mean": 1371.2,
|
|
"valid_targets_min": 796
|
|
},
|
|
{
|
|
"epoch": 6.240944240944241,
|
|
"grad_norm": 0.8078058347677376,
|
|
"learning_rate": 1.4193264000618511e-06,
|
|
"loss": 0.1277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06448983401060104,
|
|
"step": 7670,
|
|
"valid_targets_mean": 1433.6,
|
|
"valid_targets_min": 697
|
|
},
|
|
{
|
|
"epoch": 6.245014245014245,
|
|
"grad_norm": 0.7165323289126707,
|
|
"learning_rate": 1.4043507809500923e-06,
|
|
"loss": 0.1268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06178100407123566,
|
|
"step": 7675,
|
|
"valid_targets_mean": 1578.9,
|
|
"valid_targets_min": 638
|
|
},
|
|
{
|
|
"epoch": 6.249084249084249,
|
|
"grad_norm": 0.7277559295792052,
|
|
"learning_rate": 1.3894517117363294e-06,
|
|
"loss": 0.1248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.060996219515800476,
|
|
"step": 7680,
|
|
"valid_targets_mean": 1746.4,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 6.253154253154253,
|
|
"grad_norm": 0.753037215595642,
|
|
"learning_rate": 1.3746292537533145e-06,
|
|
"loss": 0.1284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04994869977235794,
|
|
"step": 7685,
|
|
"valid_targets_mean": 1309.9,
|
|
"valid_targets_min": 761
|
|
},
|
|
{
|
|
"epoch": 6.257224257224257,
|
|
"grad_norm": 0.8053654916578108,
|
|
"learning_rate": 1.3598834680184124e-06,
|
|
"loss": 0.1134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06560010462999344,
|
|
"step": 7690,
|
|
"valid_targets_mean": 1368.6,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 6.261294261294261,
|
|
"grad_norm": 0.795230502585329,
|
|
"learning_rate": 1.3452144152333734e-06,
|
|
"loss": 0.1333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06102893874049187,
|
|
"step": 7695,
|
|
"valid_targets_mean": 1491.8,
|
|
"valid_targets_min": 867
|
|
},
|
|
{
|
|
"epoch": 6.265364265364266,
|
|
"grad_norm": 0.7862793929838975,
|
|
"learning_rate": 1.330622155784067e-06,
|
|
"loss": 0.1255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07237902283668518,
|
|
"step": 7700,
|
|
"valid_targets_mean": 1994.0,
|
|
"valid_targets_min": 790
|
|
},
|
|
{
|
|
"epoch": 6.269434269434269,
|
|
"grad_norm": 0.8055709736092495,
|
|
"learning_rate": 1.316106749740249e-06,
|
|
"loss": 0.1178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05973295122385025,
|
|
"step": 7705,
|
|
"valid_targets_mean": 1523.4,
|
|
"valid_targets_min": 896
|
|
},
|
|
{
|
|
"epoch": 6.273504273504273,
|
|
"grad_norm": 1.1291501131017656,
|
|
"learning_rate": 1.3016682568552907e-06,
|
|
"loss": 0.1218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05998287722468376,
|
|
"step": 7710,
|
|
"valid_targets_mean": 1790.1,
|
|
"valid_targets_min": 1009
|
|
},
|
|
{
|
|
"epoch": 6.277574277574278,
|
|
"grad_norm": 0.7166308018469184,
|
|
"learning_rate": 1.2873067365659519e-06,
|
|
"loss": 0.1227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05086486041545868,
|
|
"step": 7715,
|
|
"valid_targets_mean": 1394.0,
|
|
"valid_targets_min": 717
|
|
},
|
|
{
|
|
"epoch": 6.281644281644281,
|
|
"grad_norm": 1.0582670753983305,
|
|
"learning_rate": 1.273022247992135e-06,
|
|
"loss": 0.1122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05162067711353302,
|
|
"step": 7720,
|
|
"valid_targets_mean": 1129.0,
|
|
"valid_targets_min": 659
|
|
},
|
|
{
|
|
"epoch": 6.285714285714286,
|
|
"grad_norm": 0.7611563612526064,
|
|
"learning_rate": 1.2588148499366405e-06,
|
|
"loss": 0.1091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.054125986993312836,
|
|
"step": 7725,
|
|
"valid_targets_mean": 1466.9,
|
|
"valid_targets_min": 642
|
|
},
|
|
{
|
|
"epoch": 6.28978428978429,
|
|
"grad_norm": 0.7818498664594019,
|
|
"learning_rate": 1.2446846008849046e-06,
|
|
"loss": 0.1154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04109550267457962,
|
|
"step": 7730,
|
|
"valid_targets_mean": 1129.4,
|
|
"valid_targets_min": 765
|
|
},
|
|
{
|
|
"epoch": 6.293854293854293,
|
|
"grad_norm": 0.8675512810239565,
|
|
"learning_rate": 1.2306315590047912e-06,
|
|
"loss": 0.1264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.053088150918483734,
|
|
"step": 7735,
|
|
"valid_targets_mean": 1322.8,
|
|
"valid_targets_min": 631
|
|
},
|
|
{
|
|
"epoch": 6.297924297924298,
|
|
"grad_norm": 0.9623907919932141,
|
|
"learning_rate": 1.2166557821463387e-06,
|
|
"loss": 0.1265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0787411630153656,
|
|
"step": 7740,
|
|
"valid_targets_mean": 1612.0,
|
|
"valid_targets_min": 623
|
|
},
|
|
{
|
|
"epoch": 6.301994301994302,
|
|
"grad_norm": 0.7567121449013423,
|
|
"learning_rate": 1.2027573278415129e-06,
|
|
"loss": 0.1158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.046164147555828094,
|
|
"step": 7745,
|
|
"valid_targets_mean": 1402.1,
|
|
"valid_targets_min": 940
|
|
},
|
|
{
|
|
"epoch": 6.3060643060643065,
|
|
"grad_norm": 0.7882207258869738,
|
|
"learning_rate": 1.188936253303976e-06,
|
|
"loss": 0.1216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.051682114601135254,
|
|
"step": 7750,
|
|
"valid_targets_mean": 1337.8,
|
|
"valid_targets_min": 776
|
|
},
|
|
{
|
|
"epoch": 6.31013431013431,
|
|
"grad_norm": 0.8346483716380687,
|
|
"learning_rate": 1.1751926154288572e-06,
|
|
"loss": 0.1195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06084754317998886,
|
|
"step": 7755,
|
|
"valid_targets_mean": 1525.5,
|
|
"valid_targets_min": 623
|
|
},
|
|
{
|
|
"epoch": 6.314204314204314,
|
|
"grad_norm": 0.7952416348423385,
|
|
"learning_rate": 1.1615264707925178e-06,
|
|
"loss": 0.1177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.056195251643657684,
|
|
"step": 7760,
|
|
"valid_targets_mean": 1441.0,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 6.318274318274319,
|
|
"grad_norm": 0.7893730311403948,
|
|
"learning_rate": 1.1479378756523008e-06,
|
|
"loss": 0.1132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03072535991668701,
|
|
"step": 7765,
|
|
"valid_targets_mean": 847.9,
|
|
"valid_targets_min": 611
|
|
},
|
|
{
|
|
"epoch": 6.322344322344322,
|
|
"grad_norm": 1.824214283992368,
|
|
"learning_rate": 1.1344268859463292e-06,
|
|
"loss": 0.1102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05747454613447189,
|
|
"step": 7770,
|
|
"valid_targets_mean": 1524.9,
|
|
"valid_targets_min": 793
|
|
},
|
|
{
|
|
"epoch": 6.326414326414326,
|
|
"grad_norm": 0.7591661900171986,
|
|
"learning_rate": 1.1209935572932485e-06,
|
|
"loss": 0.1111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.060703910887241364,
|
|
"step": 7775,
|
|
"valid_targets_mean": 1779.4,
|
|
"valid_targets_min": 563
|
|
},
|
|
{
|
|
"epoch": 6.330484330484331,
|
|
"grad_norm": 0.7391670644699448,
|
|
"learning_rate": 1.1076379449920105e-06,
|
|
"loss": 0.1148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05903908237814903,
|
|
"step": 7780,
|
|
"valid_targets_mean": 1741.2,
|
|
"valid_targets_min": 1341
|
|
},
|
|
{
|
|
"epoch": 6.334554334554334,
|
|
"grad_norm": 0.8519469218904264,
|
|
"learning_rate": 1.0943601040216522e-06,
|
|
"loss": 0.117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0688420832157135,
|
|
"step": 7785,
|
|
"valid_targets_mean": 1875.6,
|
|
"valid_targets_min": 1107
|
|
},
|
|
{
|
|
"epoch": 6.338624338624339,
|
|
"grad_norm": 0.8426833803284528,
|
|
"learning_rate": 1.0811600890410467e-06,
|
|
"loss": 0.1213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05542324483394623,
|
|
"step": 7790,
|
|
"valid_targets_mean": 1419.5,
|
|
"valid_targets_min": 680
|
|
},
|
|
{
|
|
"epoch": 6.342694342694343,
|
|
"grad_norm": 0.7888806915864902,
|
|
"learning_rate": 1.0680379543887032e-06,
|
|
"loss": 0.1109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07356111705303192,
|
|
"step": 7795,
|
|
"valid_targets_mean": 1968.2,
|
|
"valid_targets_min": 981
|
|
},
|
|
{
|
|
"epoch": 6.346764346764346,
|
|
"grad_norm": 0.8256011446110055,
|
|
"learning_rate": 1.054993754082534e-06,
|
|
"loss": 0.1173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05775183066725731,
|
|
"step": 7800,
|
|
"valid_targets_mean": 1568.9,
|
|
"valid_targets_min": 937
|
|
},
|
|
{
|
|
"epoch": 6.350834350834351,
|
|
"grad_norm": 0.8120968604513121,
|
|
"learning_rate": 1.0420275418196168e-06,
|
|
"loss": 0.1121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06070208176970482,
|
|
"step": 7805,
|
|
"valid_targets_mean": 1645.2,
|
|
"valid_targets_min": 804
|
|
},
|
|
{
|
|
"epoch": 6.354904354904355,
|
|
"grad_norm": 0.8680173034124377,
|
|
"learning_rate": 1.0291393709760044e-06,
|
|
"loss": 0.116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.057425472885370255,
|
|
"step": 7810,
|
|
"valid_targets_mean": 1574.9,
|
|
"valid_targets_min": 1019
|
|
},
|
|
{
|
|
"epoch": 6.358974358974359,
|
|
"grad_norm": 0.8493112270869786,
|
|
"learning_rate": 1.0163292946064774e-06,
|
|
"loss": 0.1114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.048819221556186676,
|
|
"step": 7815,
|
|
"valid_targets_mean": 1121.6,
|
|
"valid_targets_min": 882
|
|
},
|
|
{
|
|
"epoch": 6.363044363044363,
|
|
"grad_norm": 0.7714277556830921,
|
|
"learning_rate": 1.0035973654443466e-06,
|
|
"loss": 0.1085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05912599712610245,
|
|
"step": 7820,
|
|
"valid_targets_mean": 1818.1,
|
|
"valid_targets_min": 1168
|
|
},
|
|
{
|
|
"epoch": 6.367114367114367,
|
|
"grad_norm": 0.8847457566201997,
|
|
"learning_rate": 9.909436359012182e-07,
|
|
"loss": 0.1039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05455390363931656,
|
|
"step": 7825,
|
|
"valid_targets_mean": 1417.8,
|
|
"valid_targets_min": 678
|
|
},
|
|
{
|
|
"epoch": 6.371184371184372,
|
|
"grad_norm": 0.8899939918613994,
|
|
"learning_rate": 9.783681580667825e-07,
|
|
"loss": 0.1126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04446838051080704,
|
|
"step": 7830,
|
|
"valid_targets_mean": 1051.0,
|
|
"valid_targets_min": 676
|
|
},
|
|
{
|
|
"epoch": 6.375254375254375,
|
|
"grad_norm": 0.7355334459190873,
|
|
"learning_rate": 9.658709837086144e-07,
|
|
"loss": 0.1108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05160418897867203,
|
|
"step": 7835,
|
|
"valid_targets_mean": 1431.1,
|
|
"valid_targets_min": 794
|
|
},
|
|
{
|
|
"epoch": 6.3793243793243795,
|
|
"grad_norm": 0.7838797957111434,
|
|
"learning_rate": 9.534521642719375e-07,
|
|
"loss": 0.1105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05564986169338226,
|
|
"step": 7840,
|
|
"valid_targets_mean": 1486.9,
|
|
"valid_targets_min": 742
|
|
},
|
|
{
|
|
"epoch": 6.383394383394384,
|
|
"grad_norm": 0.7965717627384523,
|
|
"learning_rate": 9.411117508794309e-07,
|
|
"loss": 0.1111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06882425397634506,
|
|
"step": 7845,
|
|
"valid_targets_mean": 1734.5,
|
|
"valid_targets_min": 741
|
|
},
|
|
{
|
|
"epoch": 6.387464387464387,
|
|
"grad_norm": 0.8893222531203173,
|
|
"learning_rate": 9.288497943310082e-07,
|
|
"loss": 0.1183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06830059736967087,
|
|
"step": 7850,
|
|
"valid_targets_mean": 1712.5,
|
|
"valid_targets_min": 1157
|
|
},
|
|
{
|
|
"epoch": 6.391534391534392,
|
|
"grad_norm": 0.8161513544565203,
|
|
"learning_rate": 9.166663451036118e-07,
|
|
"loss": 0.1055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06825710833072662,
|
|
"step": 7855,
|
|
"valid_targets_mean": 1723.8,
|
|
"valid_targets_min": 862
|
|
},
|
|
{
|
|
"epoch": 6.395604395604396,
|
|
"grad_norm": 0.8646112978766813,
|
|
"learning_rate": 9.045614533510072e-07,
|
|
"loss": 0.1186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06607664376497269,
|
|
"step": 7860,
|
|
"valid_targets_mean": 1633.1,
|
|
"valid_targets_min": 1019
|
|
},
|
|
{
|
|
"epoch": 6.399674399674399,
|
|
"grad_norm": 0.7626824305794315,
|
|
"learning_rate": 8.925351689035722e-07,
|
|
"loss": 0.1161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04722012206912041,
|
|
"step": 7865,
|
|
"valid_targets_mean": 1376.1,
|
|
"valid_targets_min": 1058
|
|
},
|
|
{
|
|
"epoch": 6.403744403744404,
|
|
"grad_norm": 0.840691332087647,
|
|
"learning_rate": 8.80587541268092e-07,
|
|
"loss": 0.1293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05784667283296585,
|
|
"step": 7870,
|
|
"valid_targets_mean": 1576.5,
|
|
"valid_targets_min": 618
|
|
},
|
|
{
|
|
"epoch": 6.407814407814408,
|
|
"grad_norm": 0.8207659704590586,
|
|
"learning_rate": 8.687186196275643e-07,
|
|
"loss": 0.1103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06569144874811172,
|
|
"step": 7875,
|
|
"valid_targets_mean": 1615.8,
|
|
"valid_targets_min": 629
|
|
},
|
|
{
|
|
"epoch": 6.411884411884412,
|
|
"grad_norm": 0.9326833663597143,
|
|
"learning_rate": 8.56928452840975e-07,
|
|
"loss": 0.1147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06420698761940002,
|
|
"step": 7880,
|
|
"valid_targets_mean": 1881.2,
|
|
"valid_targets_min": 1167
|
|
},
|
|
{
|
|
"epoch": 6.415954415954416,
|
|
"grad_norm": 0.8554904869093989,
|
|
"learning_rate": 8.452170894431267e-07,
|
|
"loss": 0.1111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05731596797704697,
|
|
"step": 7885,
|
|
"valid_targets_mean": 1367.2,
|
|
"valid_targets_min": 699
|
|
},
|
|
{
|
|
"epoch": 6.42002442002442,
|
|
"grad_norm": 0.8406972937569153,
|
|
"learning_rate": 8.335845776444218e-07,
|
|
"loss": 0.1033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04333464428782463,
|
|
"step": 7890,
|
|
"valid_targets_mean": 1174.9,
|
|
"valid_targets_min": 679
|
|
},
|
|
{
|
|
"epoch": 6.424094424094424,
|
|
"grad_norm": 0.8085860922583763,
|
|
"learning_rate": 8.22030965330658e-07,
|
|
"loss": 0.1085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.054415322840213776,
|
|
"step": 7895,
|
|
"valid_targets_mean": 1556.2,
|
|
"valid_targets_min": 504
|
|
},
|
|
{
|
|
"epoch": 6.428164428164428,
|
|
"grad_norm": 0.7792125188992498,
|
|
"learning_rate": 8.10556300062848e-07,
|
|
"loss": 0.1119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06440182030200958,
|
|
"step": 7900,
|
|
"valid_targets_mean": 1885.0,
|
|
"valid_targets_min": 1331
|
|
},
|
|
{
|
|
"epoch": 6.4322344322344325,
|
|
"grad_norm": 0.9287716280023119,
|
|
"learning_rate": 7.991606290770093e-07,
|
|
"loss": 0.1095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.041307710111141205,
|
|
"step": 7905,
|
|
"valid_targets_mean": 1306.1,
|
|
"valid_targets_min": 661
|
|
},
|
|
{
|
|
"epoch": 6.436304436304436,
|
|
"grad_norm": 0.866709090318928,
|
|
"learning_rate": 7.878439992839815e-07,
|
|
"loss": 0.1149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0448264516890049,
|
|
"step": 7910,
|
|
"valid_targets_mean": 1330.1,
|
|
"valid_targets_min": 958
|
|
},
|
|
{
|
|
"epoch": 6.44037444037444,
|
|
"grad_norm": 0.7657298637604557,
|
|
"learning_rate": 7.766064572692178e-07,
|
|
"loss": 0.1066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04386502504348755,
|
|
"step": 7915,
|
|
"valid_targets_mean": 1465.0,
|
|
"valid_targets_min": 963
|
|
},
|
|
{
|
|
"epoch": 6.444444444444445,
|
|
"grad_norm": 0.8273726055440597,
|
|
"learning_rate": 7.65448049292612e-07,
|
|
"loss": 0.1128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05073707178235054,
|
|
"step": 7920,
|
|
"valid_targets_mean": 1428.0,
|
|
"valid_targets_min": 793
|
|
},
|
|
{
|
|
"epoch": 6.448514448514448,
|
|
"grad_norm": 0.8400987990779193,
|
|
"learning_rate": 7.54368821288296e-07,
|
|
"loss": 0.1093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06966409087181091,
|
|
"step": 7925,
|
|
"valid_targets_mean": 1633.1,
|
|
"valid_targets_min": 917
|
|
},
|
|
{
|
|
"epoch": 6.4525844525844525,
|
|
"grad_norm": 0.781744242763993,
|
|
"learning_rate": 7.433688188644517e-07,
|
|
"loss": 0.1127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04392581433057785,
|
|
"step": 7930,
|
|
"valid_targets_mean": 1246.5,
|
|
"valid_targets_min": 872
|
|
},
|
|
{
|
|
"epoch": 6.456654456654457,
|
|
"grad_norm": 0.8099440855665998,
|
|
"learning_rate": 7.324480873031281e-07,
|
|
"loss": 0.1119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05493423342704773,
|
|
"step": 7935,
|
|
"valid_targets_mean": 1903.6,
|
|
"valid_targets_min": 1220
|
|
},
|
|
{
|
|
"epoch": 6.46072446072446,
|
|
"grad_norm": 0.8303935015673607,
|
|
"learning_rate": 7.216066715600489e-07,
|
|
"loss": 0.1,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04634704440832138,
|
|
"step": 7940,
|
|
"valid_targets_mean": 1391.6,
|
|
"valid_targets_min": 712
|
|
},
|
|
{
|
|
"epoch": 6.464794464794465,
|
|
"grad_norm": 0.8580005093462261,
|
|
"learning_rate": 7.1084461626443e-07,
|
|
"loss": 0.106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.053560368716716766,
|
|
"step": 7945,
|
|
"valid_targets_mean": 1295.0,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 6.468864468864469,
|
|
"grad_norm": 0.7965111105826138,
|
|
"learning_rate": 7.001619657187996e-07,
|
|
"loss": 0.1173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04019693285226822,
|
|
"step": 7950,
|
|
"valid_targets_mean": 1414.8,
|
|
"valid_targets_min": 746
|
|
},
|
|
{
|
|
"epoch": 6.472934472934473,
|
|
"grad_norm": 0.8678383738827472,
|
|
"learning_rate": 6.895587638988077e-07,
|
|
"loss": 0.1022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.044681429862976074,
|
|
"step": 7955,
|
|
"valid_targets_mean": 1059.4,
|
|
"valid_targets_min": 723
|
|
},
|
|
{
|
|
"epoch": 6.477004477004477,
|
|
"grad_norm": 0.8017716642473961,
|
|
"learning_rate": 6.790350544530522e-07,
|
|
"loss": 0.1093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06314340978860855,
|
|
"step": 7960,
|
|
"valid_targets_mean": 1634.2,
|
|
"valid_targets_min": 1327
|
|
},
|
|
{
|
|
"epoch": 6.481074481074481,
|
|
"grad_norm": 0.8064315437918422,
|
|
"learning_rate": 6.685908807028996e-07,
|
|
"loss": 0.1129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04939217120409012,
|
|
"step": 7965,
|
|
"valid_targets_mean": 1412.5,
|
|
"valid_targets_min": 902
|
|
},
|
|
{
|
|
"epoch": 6.4851444851444855,
|
|
"grad_norm": 0.8311002011404962,
|
|
"learning_rate": 6.582262856423005e-07,
|
|
"loss": 0.1101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05105116218328476,
|
|
"step": 7970,
|
|
"valid_targets_mean": 1558.8,
|
|
"valid_targets_min": 803
|
|
},
|
|
{
|
|
"epoch": 6.489214489214489,
|
|
"grad_norm": 0.8917053809677671,
|
|
"learning_rate": 6.479413119376143e-07,
|
|
"loss": 0.1131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05821871757507324,
|
|
"step": 7975,
|
|
"valid_targets_mean": 1441.2,
|
|
"valid_targets_min": 1079
|
|
},
|
|
{
|
|
"epoch": 6.493284493284493,
|
|
"grad_norm": 0.7886247369918117,
|
|
"learning_rate": 6.377360019274425e-07,
|
|
"loss": 0.1054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04788671433925629,
|
|
"step": 7980,
|
|
"valid_targets_mean": 1449.8,
|
|
"valid_targets_min": 1263
|
|
},
|
|
{
|
|
"epoch": 6.497354497354498,
|
|
"grad_norm": 0.8165623190693444,
|
|
"learning_rate": 6.276103976224401e-07,
|
|
"loss": 0.1116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05484423413872719,
|
|
"step": 7985,
|
|
"valid_targets_mean": 1487.4,
|
|
"valid_targets_min": 844
|
|
},
|
|
{
|
|
"epoch": 6.501424501424501,
|
|
"grad_norm": 0.9134655853507222,
|
|
"learning_rate": 6.175645407051467e-07,
|
|
"loss": 0.1066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05332685261964798,
|
|
"step": 7990,
|
|
"valid_targets_mean": 1421.4,
|
|
"valid_targets_min": 667
|
|
},
|
|
{
|
|
"epoch": 6.5054945054945055,
|
|
"grad_norm": 0.8551082037730058,
|
|
"learning_rate": 6.075984725298245e-07,
|
|
"loss": 0.1151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06440360844135284,
|
|
"step": 7995,
|
|
"valid_targets_mean": 1822.6,
|
|
"valid_targets_min": 1041
|
|
},
|
|
{
|
|
"epoch": 6.50956450956451,
|
|
"grad_norm": 0.8385108112647772,
|
|
"learning_rate": 5.977122341222852e-07,
|
|
"loss": 0.1129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.046396177262067795,
|
|
"step": 8000,
|
|
"valid_targets_mean": 1327.4,
|
|
"valid_targets_min": 777
|
|
},
|
|
{
|
|
"epoch": 6.513634513634513,
|
|
"grad_norm": 0.8465014974527149,
|
|
"learning_rate": 5.879058661797055e-07,
|
|
"loss": 0.1111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05752868950366974,
|
|
"step": 8005,
|
|
"valid_targets_mean": 1823.8,
|
|
"valid_targets_min": 1283
|
|
},
|
|
{
|
|
"epoch": 6.517704517704518,
|
|
"grad_norm": 0.8181274172180951,
|
|
"learning_rate": 5.781794090704806e-07,
|
|
"loss": 0.1041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0392225906252861,
|
|
"step": 8010,
|
|
"valid_targets_mean": 924.0,
|
|
"valid_targets_min": 620
|
|
},
|
|
{
|
|
"epoch": 6.521774521774522,
|
|
"grad_norm": 0.8248876924646297,
|
|
"learning_rate": 5.685329028340492e-07,
|
|
"loss": 0.1049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.045971810817718506,
|
|
"step": 8015,
|
|
"valid_targets_mean": 1480.2,
|
|
"valid_targets_min": 656
|
|
},
|
|
{
|
|
"epoch": 6.5258445258445255,
|
|
"grad_norm": 0.8286128442083801,
|
|
"learning_rate": 5.589663871807216e-07,
|
|
"loss": 0.1075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04419014975428581,
|
|
"step": 8020,
|
|
"valid_targets_mean": 1415.5,
|
|
"valid_targets_min": 697
|
|
},
|
|
{
|
|
"epoch": 6.52991452991453,
|
|
"grad_norm": 0.8155469269825053,
|
|
"learning_rate": 5.494799014915275e-07,
|
|
"loss": 0.1055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.049654521048069,
|
|
"step": 8025,
|
|
"valid_targets_mean": 1655.8,
|
|
"valid_targets_min": 816
|
|
},
|
|
{
|
|
"epoch": 6.533984533984534,
|
|
"grad_norm": 0.8136069829867537,
|
|
"learning_rate": 5.400734848180467e-07,
|
|
"loss": 0.1001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04807547852396965,
|
|
"step": 8030,
|
|
"valid_targets_mean": 1553.0,
|
|
"valid_targets_min": 850
|
|
},
|
|
{
|
|
"epoch": 6.5380545380545385,
|
|
"grad_norm": 0.8744821289828539,
|
|
"learning_rate": 5.307471758822557e-07,
|
|
"loss": 0.1066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.052176572382450104,
|
|
"step": 8035,
|
|
"valid_targets_mean": 1353.9,
|
|
"valid_targets_min": 747
|
|
},
|
|
{
|
|
"epoch": 6.542124542124542,
|
|
"grad_norm": 0.7918578875775583,
|
|
"learning_rate": 5.215010130763576e-07,
|
|
"loss": 0.113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04798971116542816,
|
|
"step": 8040,
|
|
"valid_targets_mean": 1420.9,
|
|
"valid_targets_min": 933
|
|
},
|
|
{
|
|
"epoch": 6.546194546194546,
|
|
"grad_norm": 0.8293804879113885,
|
|
"learning_rate": 5.123350344626343e-07,
|
|
"loss": 0.106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.053255513310432434,
|
|
"step": 8045,
|
|
"valid_targets_mean": 1394.9,
|
|
"valid_targets_min": 827
|
|
},
|
|
{
|
|
"epoch": 6.550264550264551,
|
|
"grad_norm": 0.7710088076942784,
|
|
"learning_rate": 5.032492777732856e-07,
|
|
"loss": 0.1042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04495897516608238,
|
|
"step": 8050,
|
|
"valid_targets_mean": 1383.2,
|
|
"valid_targets_min": 909
|
|
},
|
|
{
|
|
"epoch": 6.554334554334554,
|
|
"grad_norm": 0.7408488115343422,
|
|
"learning_rate": 4.942437804102729e-07,
|
|
"loss": 0.1065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05502689629793167,
|
|
"step": 8055,
|
|
"valid_targets_mean": 1682.4,
|
|
"valid_targets_min": 1094
|
|
},
|
|
{
|
|
"epoch": 6.5584045584045585,
|
|
"grad_norm": 0.8161047493613602,
|
|
"learning_rate": 4.853185794451643e-07,
|
|
"loss": 0.0979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04552149027585983,
|
|
"step": 8060,
|
|
"valid_targets_mean": 1118.4,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 6.562474562474563,
|
|
"grad_norm": 0.810699986703739,
|
|
"learning_rate": 4.7647371161898547e-07,
|
|
"loss": 0.1075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04979178309440613,
|
|
"step": 8065,
|
|
"valid_targets_mean": 1538.2,
|
|
"valid_targets_min": 610
|
|
},
|
|
{
|
|
"epoch": 6.566544566544566,
|
|
"grad_norm": 0.7810924502619642,
|
|
"learning_rate": 4.677092133420647e-07,
|
|
"loss": 0.1015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05203615128993988,
|
|
"step": 8070,
|
|
"valid_targets_mean": 1570.1,
|
|
"valid_targets_min": 713
|
|
},
|
|
{
|
|
"epoch": 6.570614570614571,
|
|
"grad_norm": 0.8664468115638617,
|
|
"learning_rate": 4.5902512069389006e-07,
|
|
"loss": 0.1088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.050125155597925186,
|
|
"step": 8075,
|
|
"valid_targets_mean": 1537.8,
|
|
"valid_targets_min": 874
|
|
},
|
|
{
|
|
"epoch": 6.574684574684575,
|
|
"grad_norm": 0.784693140282473,
|
|
"learning_rate": 4.504214694229525e-07,
|
|
"loss": 0.1066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.051420196890830994,
|
|
"step": 8080,
|
|
"valid_targets_mean": 1471.2,
|
|
"valid_targets_min": 765
|
|
},
|
|
{
|
|
"epoch": 6.5787545787545785,
|
|
"grad_norm": 0.8589862191743883,
|
|
"learning_rate": 4.418982949466011e-07,
|
|
"loss": 0.1028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04462102800607681,
|
|
"step": 8085,
|
|
"valid_targets_mean": 1254.4,
|
|
"valid_targets_min": 816
|
|
},
|
|
{
|
|
"epoch": 6.582824582824583,
|
|
"grad_norm": 0.8700836145069042,
|
|
"learning_rate": 4.334556323509009e-07,
|
|
"loss": 0.1058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04805884137749672,
|
|
"step": 8090,
|
|
"valid_targets_mean": 1482.9,
|
|
"valid_targets_min": 1182
|
|
},
|
|
{
|
|
"epoch": 6.586894586894587,
|
|
"grad_norm": 0.6773547787375108,
|
|
"learning_rate": 4.25093516390489e-07,
|
|
"loss": 0.1102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0742184966802597,
|
|
"step": 8095,
|
|
"valid_targets_mean": 1583.2,
|
|
"valid_targets_min": 363
|
|
},
|
|
{
|
|
"epoch": 6.590964590964591,
|
|
"grad_norm": 0.49684846547746847,
|
|
"learning_rate": 4.1681198148841415e-07,
|
|
"loss": 0.0797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04401320964097977,
|
|
"step": 8100,
|
|
"valid_targets_mean": 3710.1,
|
|
"valid_targets_min": 3074
|
|
},
|
|
{
|
|
"epoch": 6.595034595034595,
|
|
"grad_norm": 0.5269822498666787,
|
|
"learning_rate": 4.0861106173602837e-07,
|
|
"loss": 0.0753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04181273281574249,
|
|
"step": 8105,
|
|
"valid_targets_mean": 2577.6,
|
|
"valid_targets_min": 700
|
|
},
|
|
{
|
|
"epoch": 6.599104599104599,
|
|
"grad_norm": 0.5319878660224543,
|
|
"learning_rate": 4.004907908928135e-07,
|
|
"loss": 0.0794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03852052986621857,
|
|
"step": 8110,
|
|
"valid_targets_mean": 3309.8,
|
|
"valid_targets_min": 803
|
|
},
|
|
{
|
|
"epoch": 6.603174603174603,
|
|
"grad_norm": 0.4436363617952094,
|
|
"learning_rate": 3.9245120238626144e-07,
|
|
"loss": 0.0727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03728438913822174,
|
|
"step": 8115,
|
|
"valid_targets_mean": 2484.8,
|
|
"valid_targets_min": 920
|
|
},
|
|
{
|
|
"epoch": 6.607244607244607,
|
|
"grad_norm": 0.4833719991023813,
|
|
"learning_rate": 3.8449232931173195e-07,
|
|
"loss": 0.0759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.039660267531871796,
|
|
"step": 8120,
|
|
"valid_targets_mean": 3095.1,
|
|
"valid_targets_min": 806
|
|
},
|
|
{
|
|
"epoch": 6.6113146113146115,
|
|
"grad_norm": 0.7829899471206379,
|
|
"learning_rate": 3.766142044323129e-07,
|
|
"loss": 0.1153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06311972439289093,
|
|
"step": 8125,
|
|
"valid_targets_mean": 1663.1,
|
|
"valid_targets_min": 718
|
|
},
|
|
{
|
|
"epoch": 6.615384615384615,
|
|
"grad_norm": 0.4509111643360801,
|
|
"learning_rate": 3.688168601786912e-07,
|
|
"loss": 0.0836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0408773347735405,
|
|
"step": 8130,
|
|
"valid_targets_mean": 3443.2,
|
|
"valid_targets_min": 1133
|
|
},
|
|
{
|
|
"epoch": 6.619454619454619,
|
|
"grad_norm": 0.5161079431546142,
|
|
"learning_rate": 3.6110032864901776e-07,
|
|
"loss": 0.0832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03457147255539894,
|
|
"step": 8135,
|
|
"valid_targets_mean": 1609.9,
|
|
"valid_targets_min": 859
|
|
},
|
|
{
|
|
"epoch": 6.623524623524624,
|
|
"grad_norm": 0.3376406967796116,
|
|
"learning_rate": 3.5346464160876945e-07,
|
|
"loss": 0.1058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02600034326314926,
|
|
"step": 8140,
|
|
"valid_targets_mean": 5572.1,
|
|
"valid_targets_min": 5150
|
|
},
|
|
{
|
|
"epoch": 6.627594627594627,
|
|
"grad_norm": 0.4907063131191801,
|
|
"learning_rate": 3.459098304906228e-07,
|
|
"loss": 0.0679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04523205757141113,
|
|
"step": 8145,
|
|
"valid_targets_mean": 3950.0,
|
|
"valid_targets_min": 502
|
|
},
|
|
{
|
|
"epoch": 6.6316646316646315,
|
|
"grad_norm": 0.4380405824798826,
|
|
"learning_rate": 3.384359263943271e-07,
|
|
"loss": 0.0722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04171394184231758,
|
|
"step": 8150,
|
|
"valid_targets_mean": 3669.2,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 6.635734635734636,
|
|
"grad_norm": 0.41061389847454,
|
|
"learning_rate": 3.310429600865739e-07,
|
|
"loss": 0.0688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.030914265662431717,
|
|
"step": 8155,
|
|
"valid_targets_mean": 2980.4,
|
|
"valid_targets_min": 518
|
|
},
|
|
{
|
|
"epoch": 6.639804639804639,
|
|
"grad_norm": 0.43198145452754944,
|
|
"learning_rate": 3.237309620008722e-07,
|
|
"loss": 0.1178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0387565903365612,
|
|
"step": 8160,
|
|
"valid_targets_mean": 3675.4,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 6.643874643874644,
|
|
"grad_norm": 0.35792992132158885,
|
|
"learning_rate": 3.1649996223741765e-07,
|
|
"loss": 0.0646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.027515945956110954,
|
|
"step": 8165,
|
|
"valid_targets_mean": 4013.2,
|
|
"valid_targets_min": 3548
|
|
},
|
|
{
|
|
"epoch": 6.647944647944648,
|
|
"grad_norm": 0.47471099164218855,
|
|
"learning_rate": 3.093499905629727e-07,
|
|
"loss": 0.076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.038465503603219986,
|
|
"step": 8170,
|
|
"valid_targets_mean": 2000.5,
|
|
"valid_targets_min": 644
|
|
},
|
|
{
|
|
"epoch": 6.652014652014652,
|
|
"grad_norm": 0.32225997700337605,
|
|
"learning_rate": 3.022810764107487e-07,
|
|
"loss": 0.0596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02280164137482643,
|
|
"step": 8175,
|
|
"valid_targets_mean": 2904.4,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 6.656084656084656,
|
|
"grad_norm": 0.5895755914253757,
|
|
"learning_rate": 2.9529324888027287e-07,
|
|
"loss": 0.0797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.028042782098054886,
|
|
"step": 8180,
|
|
"valid_targets_mean": 1487.6,
|
|
"valid_targets_min": 516
|
|
},
|
|
{
|
|
"epoch": 6.66015466015466,
|
|
"grad_norm": 0.5222751795102811,
|
|
"learning_rate": 2.8838653673727514e-07,
|
|
"loss": 0.0781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.048454202711582184,
|
|
"step": 8185,
|
|
"valid_targets_mean": 2006.0,
|
|
"valid_targets_min": 557
|
|
},
|
|
{
|
|
"epoch": 6.6642246642246645,
|
|
"grad_norm": 0.5326599642169098,
|
|
"learning_rate": 2.8156096841357893e-07,
|
|
"loss": 0.0833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04685577005147934,
|
|
"step": 8190,
|
|
"valid_targets_mean": 3359.2,
|
|
"valid_targets_min": 1086
|
|
},
|
|
{
|
|
"epoch": 6.668294668294668,
|
|
"grad_norm": 0.48322360350526905,
|
|
"learning_rate": 2.748165720069684e-07,
|
|
"loss": 0.0808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05052535608410835,
|
|
"step": 8195,
|
|
"valid_targets_mean": 2866.8,
|
|
"valid_targets_min": 1008
|
|
},
|
|
{
|
|
"epoch": 6.672364672364672,
|
|
"grad_norm": 0.5129442984365581,
|
|
"learning_rate": 2.6815337528107723e-07,
|
|
"loss": 0.1169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03285352140665054,
|
|
"step": 8200,
|
|
"valid_targets_mean": 3259.9,
|
|
"valid_targets_min": 2702
|
|
},
|
|
{
|
|
"epoch": 6.676434676434677,
|
|
"grad_norm": 0.3692290656961646,
|
|
"learning_rate": 2.615714056652841e-07,
|
|
"loss": 0.0686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.030034877359867096,
|
|
"step": 8205,
|
|
"valid_targets_mean": 4086.4,
|
|
"valid_targets_min": 3798
|
|
},
|
|
{
|
|
"epoch": 6.68050468050468,
|
|
"grad_norm": 0.8498455958044546,
|
|
"learning_rate": 2.5507069025458855e-07,
|
|
"loss": 0.0841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06665704399347305,
|
|
"step": 8210,
|
|
"valid_targets_mean": 1020.8,
|
|
"valid_targets_min": 598
|
|
},
|
|
{
|
|
"epoch": 6.6845746845746845,
|
|
"grad_norm": 0.4283220379269479,
|
|
"learning_rate": 2.486512558095e-07,
|
|
"loss": 0.0925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02849685214459896,
|
|
"step": 8215,
|
|
"valid_targets_mean": 2028.4,
|
|
"valid_targets_min": 590
|
|
},
|
|
{
|
|
"epoch": 6.688644688644689,
|
|
"grad_norm": 0.4312912789027484,
|
|
"learning_rate": 2.423131287559355e-07,
|
|
"loss": 0.0879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0402199849486351,
|
|
"step": 8220,
|
|
"valid_targets_mean": 4046.9,
|
|
"valid_targets_min": 3390
|
|
},
|
|
{
|
|
"epoch": 6.692714692714693,
|
|
"grad_norm": 0.6525867542972212,
|
|
"learning_rate": 2.3605633518510195e-07,
|
|
"loss": 0.1139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12717895209789276,
|
|
"step": 8225,
|
|
"valid_targets_mean": 2247.4,
|
|
"valid_targets_min": 984
|
|
},
|
|
{
|
|
"epoch": 6.696784696784697,
|
|
"grad_norm": 0.48625518043675997,
|
|
"learning_rate": 2.2988090085339864e-07,
|
|
"loss": 0.2336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04617800936102867,
|
|
"step": 8230,
|
|
"valid_targets_mean": 3348.9,
|
|
"valid_targets_min": 2176
|
|
},
|
|
{
|
|
"epoch": 6.700854700854701,
|
|
"grad_norm": 0.5666484347927709,
|
|
"learning_rate": 2.237868511823016e-07,
|
|
"loss": 0.0802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.038623686879873276,
|
|
"step": 8235,
|
|
"valid_targets_mean": 2385.1,
|
|
"valid_targets_min": 1074
|
|
},
|
|
{
|
|
"epoch": 6.704924704924705,
|
|
"grad_norm": 0.4480717010310978,
|
|
"learning_rate": 2.1777421125826593e-07,
|
|
"loss": 0.0848,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03942921385169029,
|
|
"step": 8240,
|
|
"valid_targets_mean": 3126.2,
|
|
"valid_targets_min": 1212
|
|
},
|
|
{
|
|
"epoch": 6.708994708994709,
|
|
"grad_norm": 0.447193589815513,
|
|
"learning_rate": 2.1184300583261263e-07,
|
|
"loss": 0.1726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04053179919719696,
|
|
"step": 8245,
|
|
"valid_targets_mean": 4386.0,
|
|
"valid_targets_min": 2258
|
|
},
|
|
{
|
|
"epoch": 6.713064713064713,
|
|
"grad_norm": 0.37115709592990564,
|
|
"learning_rate": 2.059932593214442e-07,
|
|
"loss": 0.066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05170625448226929,
|
|
"step": 8250,
|
|
"valid_targets_mean": 3738.9,
|
|
"valid_targets_min": 1466
|
|
},
|
|
{
|
|
"epoch": 6.7171347171347175,
|
|
"grad_norm": 0.3666621308583359,
|
|
"learning_rate": 2.0022499580552247e-07,
|
|
"loss": 0.08,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.033547915518283844,
|
|
"step": 8255,
|
|
"valid_targets_mean": 3522.6,
|
|
"valid_targets_min": 560
|
|
},
|
|
{
|
|
"epoch": 6.721204721204721,
|
|
"grad_norm": 0.41360520779368637,
|
|
"learning_rate": 1.9453823903019086e-07,
|
|
"loss": 0.0817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04005451872944832,
|
|
"step": 8260,
|
|
"valid_targets_mean": 3383.1,
|
|
"valid_targets_min": 1189
|
|
},
|
|
{
|
|
"epoch": 6.725274725274725,
|
|
"grad_norm": 0.39520542502071915,
|
|
"learning_rate": 1.8893301240525463e-07,
|
|
"loss": 0.0672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03196336701512337,
|
|
"step": 8265,
|
|
"valid_targets_mean": 3610.1,
|
|
"valid_targets_min": 2257
|
|
},
|
|
{
|
|
"epoch": 6.72934472934473,
|
|
"grad_norm": 0.37918993485934277,
|
|
"learning_rate": 1.8340933900490965e-07,
|
|
"loss": 0.0652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.026241963729262352,
|
|
"step": 8270,
|
|
"valid_targets_mean": 3479.8,
|
|
"valid_targets_min": 886
|
|
},
|
|
{
|
|
"epoch": 6.733414733414733,
|
|
"grad_norm": 0.4561669870328987,
|
|
"learning_rate": 1.7796724156762258e-07,
|
|
"loss": 0.0729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03602328151464462,
|
|
"step": 8275,
|
|
"valid_targets_mean": 3346.6,
|
|
"valid_targets_min": 2409
|
|
},
|
|
{
|
|
"epoch": 6.7374847374847375,
|
|
"grad_norm": 0.4635371928947697,
|
|
"learning_rate": 1.726067424960576e-07,
|
|
"loss": 0.0654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.031619295477867126,
|
|
"step": 8280,
|
|
"valid_targets_mean": 3552.0,
|
|
"valid_targets_min": 2309
|
|
},
|
|
{
|
|
"epoch": 6.741554741554742,
|
|
"grad_norm": 0.7003118747711201,
|
|
"learning_rate": 1.6732786385696754e-07,
|
|
"loss": 0.1105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0696185976266861,
|
|
"step": 8285,
|
|
"valid_targets_mean": 2149.8,
|
|
"valid_targets_min": 645
|
|
},
|
|
{
|
|
"epoch": 6.745624745624745,
|
|
"grad_norm": 0.3594856692476842,
|
|
"learning_rate": 1.6213062738111407e-07,
|
|
"loss": 0.06,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02745797298848629,
|
|
"step": 8290,
|
|
"valid_targets_mean": 3310.2,
|
|
"valid_targets_min": 545
|
|
},
|
|
{
|
|
"epoch": 6.74969474969475,
|
|
"grad_norm": 0.474912995800518,
|
|
"learning_rate": 1.5701505446317656e-07,
|
|
"loss": 0.0601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03320920467376709,
|
|
"step": 8295,
|
|
"valid_targets_mean": 2458.4,
|
|
"valid_targets_min": 667
|
|
},
|
|
{
|
|
"epoch": 6.753764753764754,
|
|
"grad_norm": 0.3593430573149411,
|
|
"learning_rate": 1.5198116616165881e-07,
|
|
"loss": 0.0608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02956734225153923,
|
|
"step": 8300,
|
|
"valid_targets_mean": 3501.5,
|
|
"valid_targets_min": 2353
|
|
},
|
|
{
|
|
"epoch": 6.7578347578347575,
|
|
"grad_norm": 0.4694070743198895,
|
|
"learning_rate": 1.4702898319881142e-07,
|
|
"loss": 0.0628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04120086506009102,
|
|
"step": 8305,
|
|
"valid_targets_mean": 3220.2,
|
|
"valid_targets_min": 792
|
|
},
|
|
{
|
|
"epoch": 6.761904761904762,
|
|
"grad_norm": 0.39254754104169143,
|
|
"learning_rate": 1.421585259605318e-07,
|
|
"loss": 0.1059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.027905117720365524,
|
|
"step": 8310,
|
|
"valid_targets_mean": 3818.5,
|
|
"valid_targets_min": 3276
|
|
},
|
|
{
|
|
"epoch": 6.765974765974766,
|
|
"grad_norm": 0.4934206140709475,
|
|
"learning_rate": 1.3736981449629982e-07,
|
|
"loss": 0.0929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.048690035939216614,
|
|
"step": 8315,
|
|
"valid_targets_mean": 2390.9,
|
|
"valid_targets_min": 647
|
|
},
|
|
{
|
|
"epoch": 6.77004477004477,
|
|
"grad_norm": 0.5152362898858448,
|
|
"learning_rate": 1.3266286851907783e-07,
|
|
"loss": 0.0741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.041787609457969666,
|
|
"step": 8320,
|
|
"valid_targets_mean": 1500.6,
|
|
"valid_targets_min": 673
|
|
},
|
|
{
|
|
"epoch": 6.774114774114774,
|
|
"grad_norm": 0.7351478599697331,
|
|
"learning_rate": 1.280377074052397e-07,
|
|
"loss": 0.0919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08116742968559265,
|
|
"step": 8325,
|
|
"valid_targets_mean": 1711.2,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 6.778184778184778,
|
|
"grad_norm": 0.47783788496330504,
|
|
"learning_rate": 1.234943501944863e-07,
|
|
"loss": 0.1044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04755005985498428,
|
|
"step": 8330,
|
|
"valid_targets_mean": 3036.1,
|
|
"valid_targets_min": 945
|
|
},
|
|
{
|
|
"epoch": 6.782254782254782,
|
|
"grad_norm": 0.5018792752451303,
|
|
"learning_rate": 1.1903281558976798e-07,
|
|
"loss": 0.0772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.051417913287878036,
|
|
"step": 8335,
|
|
"valid_targets_mean": 3317.2,
|
|
"valid_targets_min": 2265
|
|
},
|
|
{
|
|
"epoch": 6.786324786324786,
|
|
"grad_norm": 0.3439978747787152,
|
|
"learning_rate": 1.1465312195721334e-07,
|
|
"loss": 0.0935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03338252753019333,
|
|
"step": 8340,
|
|
"valid_targets_mean": 3875.2,
|
|
"valid_targets_min": 3051
|
|
},
|
|
{
|
|
"epoch": 6.7903947903947905,
|
|
"grad_norm": 0.4903813555602027,
|
|
"learning_rate": 1.1035528732604272e-07,
|
|
"loss": 0.0747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05204244330525398,
|
|
"step": 8345,
|
|
"valid_targets_mean": 2069.8,
|
|
"valid_targets_min": 523
|
|
},
|
|
{
|
|
"epoch": 6.794464794464794,
|
|
"grad_norm": 0.3832807020853381,
|
|
"learning_rate": 1.0613932938850157e-07,
|
|
"loss": 0.0698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03278394788503647,
|
|
"step": 8350,
|
|
"valid_targets_mean": 1640.8,
|
|
"valid_targets_min": 468
|
|
},
|
|
{
|
|
"epoch": 6.798534798534798,
|
|
"grad_norm": 0.3608814015492147,
|
|
"learning_rate": 1.020052654997894e-07,
|
|
"loss": 0.076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.022406112402677536,
|
|
"step": 8355,
|
|
"valid_targets_mean": 4435.1,
|
|
"valid_targets_min": 885
|
|
},
|
|
{
|
|
"epoch": 6.802604802604803,
|
|
"grad_norm": 0.3861502768165619,
|
|
"learning_rate": 9.79531126779798e-08,
|
|
"loss": 0.0654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.025468919426202774,
|
|
"step": 8360,
|
|
"valid_targets_mean": 1889.2,
|
|
"valid_targets_min": 546
|
|
},
|
|
{
|
|
"epoch": 6.806674806674806,
|
|
"grad_norm": 0.38892792716177965,
|
|
"learning_rate": 9.398288760395836e-08,
|
|
"loss": 0.0662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03875254467129707,
|
|
"step": 8365,
|
|
"valid_targets_mean": 4257.1,
|
|
"valid_targets_min": 2414
|
|
},
|
|
{
|
|
"epoch": 6.8107448107448105,
|
|
"grad_norm": 0.5353675959388392,
|
|
"learning_rate": 9.009460662134928e-08,
|
|
"loss": 0.079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.043166883289813995,
|
|
"step": 8370,
|
|
"valid_targets_mean": 1502.4,
|
|
"valid_targets_min": 697
|
|
},
|
|
{
|
|
"epoch": 6.814814814814815,
|
|
"grad_norm": 0.4502618811767403,
|
|
"learning_rate": 8.628828573645554e-08,
|
|
"loss": 0.0813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.039157938212156296,
|
|
"step": 8375,
|
|
"valid_targets_mean": 3466.0,
|
|
"valid_targets_min": 2777
|
|
},
|
|
{
|
|
"epoch": 6.818884818884819,
|
|
"grad_norm": 0.44084545531435015,
|
|
"learning_rate": 8.256394061817663e-08,
|
|
"loss": 0.0841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.043219827115535736,
|
|
"step": 8380,
|
|
"valid_targets_mean": 3982.1,
|
|
"valid_targets_min": 3061
|
|
},
|
|
{
|
|
"epoch": 6.822954822954823,
|
|
"grad_norm": 0.37363497568335224,
|
|
"learning_rate": 7.892158659796422e-08,
|
|
"loss": 0.0682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.034288160502910614,
|
|
"step": 8385,
|
|
"valid_targets_mean": 2788.0,
|
|
"valid_targets_min": 868
|
|
},
|
|
{
|
|
"epoch": 6.827024827024827,
|
|
"grad_norm": 0.3799289990411093,
|
|
"learning_rate": 7.536123866974665e-08,
|
|
"loss": 0.0738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02917984500527382,
|
|
"step": 8390,
|
|
"valid_targets_mean": 2120.6,
|
|
"valid_targets_min": 752
|
|
},
|
|
{
|
|
"epoch": 6.831094831094831,
|
|
"grad_norm": 0.3835533743706495,
|
|
"learning_rate": 7.188291148986892e-08,
|
|
"loss": 0.077,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03172875568270683,
|
|
"step": 8395,
|
|
"valid_targets_mean": 3477.1,
|
|
"valid_targets_min": 1141
|
|
},
|
|
{
|
|
"epoch": 6.835164835164835,
|
|
"grad_norm": 0.34008156095211195,
|
|
"learning_rate": 6.848661937703727e-08,
|
|
"loss": 0.0577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03241172060370445,
|
|
"step": 8400,
|
|
"valid_targets_mean": 3888.4,
|
|
"valid_targets_min": 3540
|
|
},
|
|
{
|
|
"epoch": 6.839234839234839,
|
|
"grad_norm": 0.4090753301091059,
|
|
"learning_rate": 6.517237631225026e-08,
|
|
"loss": 0.0768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.027315791696310043,
|
|
"step": 8405,
|
|
"valid_targets_mean": 3086.8,
|
|
"valid_targets_min": 773
|
|
},
|
|
{
|
|
"epoch": 6.843304843304844,
|
|
"grad_norm": 0.39701062406863746,
|
|
"learning_rate": 6.194019593875444e-08,
|
|
"loss": 0.0726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.025227008387446404,
|
|
"step": 8410,
|
|
"valid_targets_mean": 3558.6,
|
|
"valid_targets_min": 2587
|
|
},
|
|
{
|
|
"epoch": 6.847374847374847,
|
|
"grad_norm": 0.4392394924145199,
|
|
"learning_rate": 5.879009156197768e-08,
|
|
"loss": 0.0846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03493019938468933,
|
|
"step": 8415,
|
|
"valid_targets_mean": 2688.9,
|
|
"valid_targets_min": 773
|
|
},
|
|
{
|
|
"epoch": 6.851444851444851,
|
|
"grad_norm": 0.5083647411957302,
|
|
"learning_rate": 5.572207614947589e-08,
|
|
"loss": 0.0732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03646247088909149,
|
|
"step": 8420,
|
|
"valid_targets_mean": 3734.4,
|
|
"valid_targets_min": 3068
|
|
},
|
|
{
|
|
"epoch": 6.855514855514856,
|
|
"grad_norm": 0.628171717161323,
|
|
"learning_rate": 5.273616233088641e-08,
|
|
"loss": 0.0805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04628308117389679,
|
|
"step": 8425,
|
|
"valid_targets_mean": 1816.1,
|
|
"valid_targets_min": 893
|
|
},
|
|
{
|
|
"epoch": 6.85958485958486,
|
|
"grad_norm": 0.6087268426251439,
|
|
"learning_rate": 4.983236239787026e-08,
|
|
"loss": 0.076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04026108980178833,
|
|
"step": 8430,
|
|
"valid_targets_mean": 1444.4,
|
|
"valid_targets_min": 708
|
|
},
|
|
{
|
|
"epoch": 6.8636548636548635,
|
|
"grad_norm": 0.4335028192076854,
|
|
"learning_rate": 4.701068830405886e-08,
|
|
"loss": 0.0804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.029476728290319443,
|
|
"step": 8435,
|
|
"valid_targets_mean": 1768.8,
|
|
"valid_targets_min": 617
|
|
},
|
|
{
|
|
"epoch": 6.867724867724868,
|
|
"grad_norm": 0.45556499572135306,
|
|
"learning_rate": 4.4271151665014055e-08,
|
|
"loss": 0.0718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03315385431051254,
|
|
"step": 8440,
|
|
"valid_targets_mean": 2634.9,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 6.871794871794872,
|
|
"grad_norm": 0.5629937228346741,
|
|
"learning_rate": 4.161376375817039e-08,
|
|
"loss": 0.0661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04290713742375374,
|
|
"step": 8445,
|
|
"valid_targets_mean": 1836.2,
|
|
"valid_targets_min": 785
|
|
},
|
|
{
|
|
"epoch": 6.875864875864876,
|
|
"grad_norm": 0.5669698114208702,
|
|
"learning_rate": 3.903853552279513e-08,
|
|
"loss": 0.0872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06467582285404205,
|
|
"step": 8450,
|
|
"valid_targets_mean": 2148.2,
|
|
"valid_targets_min": 765
|
|
},
|
|
{
|
|
"epoch": 6.87993487993488,
|
|
"grad_norm": 0.30486345935213593,
|
|
"learning_rate": 3.654547755994164e-08,
|
|
"loss": 0.0651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.019002530723810196,
|
|
"step": 8455,
|
|
"valid_targets_mean": 3639.2,
|
|
"valid_targets_min": 2863
|
|
},
|
|
{
|
|
"epoch": 6.884004884004884,
|
|
"grad_norm": 0.38454514502865644,
|
|
"learning_rate": 3.413460013240499e-08,
|
|
"loss": 0.0655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.026053160429000854,
|
|
"step": 8460,
|
|
"valid_targets_mean": 3033.4,
|
|
"valid_targets_min": 555
|
|
},
|
|
{
|
|
"epoch": 6.888074888074888,
|
|
"grad_norm": 0.4941087528400253,
|
|
"learning_rate": 3.180591316467974e-08,
|
|
"loss": 0.0745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04288317263126373,
|
|
"step": 8465,
|
|
"valid_targets_mean": 2346.6,
|
|
"valid_targets_min": 721
|
|
},
|
|
{
|
|
"epoch": 6.892144892144892,
|
|
"grad_norm": 0.3509155023887329,
|
|
"learning_rate": 2.9559426242919964e-08,
|
|
"loss": 0.0606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03283192217350006,
|
|
"step": 8470,
|
|
"valid_targets_mean": 4902.1,
|
|
"valid_targets_min": 3640
|
|
},
|
|
{
|
|
"epoch": 6.896214896214897,
|
|
"grad_norm": 0.4037435152095895,
|
|
"learning_rate": 2.7395148614897115e-08,
|
|
"loss": 0.0617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03601628914475441,
|
|
"step": 8475,
|
|
"valid_targets_mean": 4536.6,
|
|
"valid_targets_min": 2020
|
|
},
|
|
{
|
|
"epoch": 6.9002849002849,
|
|
"grad_norm": 0.34603119742925265,
|
|
"learning_rate": 2.5313089189966665e-08,
|
|
"loss": 0.0775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.025459568947553635,
|
|
"step": 8480,
|
|
"valid_targets_mean": 3218.6,
|
|
"valid_targets_min": 739
|
|
},
|
|
{
|
|
"epoch": 6.904354904354904,
|
|
"grad_norm": 0.4268602714698763,
|
|
"learning_rate": 2.331325653902816e-08,
|
|
"loss": 0.0945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03908785805106163,
|
|
"step": 8485,
|
|
"valid_targets_mean": 3774.6,
|
|
"valid_targets_min": 2945
|
|
},
|
|
{
|
|
"epoch": 6.908424908424909,
|
|
"grad_norm": 0.4557025570243415,
|
|
"learning_rate": 2.139565889448969e-08,
|
|
"loss": 0.07,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04192078113555908,
|
|
"step": 8490,
|
|
"valid_targets_mean": 3045.4,
|
|
"valid_targets_min": 704
|
|
},
|
|
{
|
|
"epoch": 6.912494912494912,
|
|
"grad_norm": 0.4093961592946955,
|
|
"learning_rate": 1.9560304150234576e-08,
|
|
"loss": 0.076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.030827539041638374,
|
|
"step": 8495,
|
|
"valid_targets_mean": 2492.8,
|
|
"valid_targets_min": 774
|
|
},
|
|
{
|
|
"epoch": 6.916564916564917,
|
|
"grad_norm": 0.4284938895525246,
|
|
"learning_rate": 1.7807199861594736e-08,
|
|
"loss": 0.0906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02964516170322895,
|
|
"step": 8500,
|
|
"valid_targets_mean": 2564.6,
|
|
"valid_targets_min": 485
|
|
},
|
|
{
|
|
"epoch": 6.920634920634921,
|
|
"grad_norm": 0.43820381846708467,
|
|
"learning_rate": 1.613635324530405e-08,
|
|
"loss": 0.0682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03012934699654579,
|
|
"step": 8505,
|
|
"valid_targets_mean": 3002.5,
|
|
"valid_targets_min": 1048
|
|
},
|
|
{
|
|
"epoch": 6.924704924704924,
|
|
"grad_norm": 0.4182221488232721,
|
|
"learning_rate": 1.4547771179487246e-08,
|
|
"loss": 0.0721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.032236211001873016,
|
|
"step": 8510,
|
|
"valid_targets_mean": 2588.5,
|
|
"valid_targets_min": 722
|
|
},
|
|
{
|
|
"epoch": 6.928774928774929,
|
|
"grad_norm": 0.5356394748249438,
|
|
"learning_rate": 1.304146020361996e-08,
|
|
"loss": 0.0757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04836539924144745,
|
|
"step": 8515,
|
|
"valid_targets_mean": 1443.8,
|
|
"valid_targets_min": 865
|
|
},
|
|
{
|
|
"epoch": 6.932844932844933,
|
|
"grad_norm": 0.4971988244053681,
|
|
"learning_rate": 1.1617426518504283e-08,
|
|
"loss": 0.1033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03724466264247894,
|
|
"step": 8520,
|
|
"valid_targets_mean": 2283.1,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 6.9369149369149365,
|
|
"grad_norm": 0.41438516750308313,
|
|
"learning_rate": 1.0275675986242128e-08,
|
|
"loss": 0.0913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03457161411643028,
|
|
"step": 8525,
|
|
"valid_targets_mean": 3202.0,
|
|
"valid_targets_min": 782
|
|
},
|
|
{
|
|
"epoch": 6.940984940984941,
|
|
"grad_norm": 0.4720882472008192,
|
|
"learning_rate": 9.016214130219692e-09,
|
|
"loss": 0.0686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03335484117269516,
|
|
"step": 8530,
|
|
"valid_targets_mean": 2586.9,
|
|
"valid_targets_min": 795
|
|
},
|
|
{
|
|
"epoch": 6.945054945054945,
|
|
"grad_norm": 0.4477158642708563,
|
|
"learning_rate": 7.839046135069695e-09,
|
|
"loss": 0.069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.033690642565488815,
|
|
"step": 8535,
|
|
"valid_targets_mean": 3345.4,
|
|
"valid_targets_min": 2017
|
|
},
|
|
{
|
|
"epoch": 6.949124949124949,
|
|
"grad_norm": 0.4310573655106279,
|
|
"learning_rate": 6.744176846664729e-09,
|
|
"loss": 0.0679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.029223371297121048,
|
|
"step": 8540,
|
|
"valid_targets_mean": 1937.5,
|
|
"valid_targets_min": 798
|
|
},
|
|
{
|
|
"epoch": 6.953194953194953,
|
|
"grad_norm": 0.37061900269005643,
|
|
"learning_rate": 5.731610772083951e-09,
|
|
"loss": 0.0597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.029739029705524445,
|
|
"step": 8545,
|
|
"valid_targets_mean": 3302.2,
|
|
"valid_targets_min": 860
|
|
},
|
|
{
|
|
"epoch": 6.957264957264957,
|
|
"grad_norm": 0.39293408289174847,
|
|
"learning_rate": 4.801352079606414e-09,
|
|
"loss": 0.0844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03068091720342636,
|
|
"step": 8550,
|
|
"valid_targets_mean": 3287.2,
|
|
"valid_targets_min": 878
|
|
},
|
|
{
|
|
"epoch": 6.961334961334961,
|
|
"grad_norm": 0.36550971062080234,
|
|
"learning_rate": 3.9534045986888706e-09,
|
|
"loss": 0.0616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.026515811681747437,
|
|
"step": 8555,
|
|
"valid_targets_mean": 3800.8,
|
|
"valid_targets_min": 2602
|
|
},
|
|
{
|
|
"epoch": 6.965404965404965,
|
|
"grad_norm": 0.5094035280811099,
|
|
"learning_rate": 3.1877718199480045e-09,
|
|
"loss": 0.0705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04128776118159294,
|
|
"step": 8560,
|
|
"valid_targets_mean": 2853.9,
|
|
"valid_targets_min": 945
|
|
},
|
|
{
|
|
"epoch": 6.96947496947497,
|
|
"grad_norm": 0.43630707115728157,
|
|
"learning_rate": 2.5044568951471114e-09,
|
|
"loss": 0.0678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.042031463235616684,
|
|
"step": 8565,
|
|
"valid_targets_mean": 3633.9,
|
|
"valid_targets_min": 2059
|
|
},
|
|
{
|
|
"epoch": 6.973544973544973,
|
|
"grad_norm": 0.4179577364933974,
|
|
"learning_rate": 1.9034626371872147e-09,
|
|
"loss": 0.0713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03878530487418175,
|
|
"step": 8570,
|
|
"valid_targets_mean": 4244.8,
|
|
"valid_targets_min": 2529
|
|
},
|
|
{
|
|
"epoch": 6.977614977614977,
|
|
"grad_norm": 0.3982770165173577,
|
|
"learning_rate": 1.3847915200893015e-09,
|
|
"loss": 0.0615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04271527752280235,
|
|
"step": 8575,
|
|
"valid_targets_mean": 4650.8,
|
|
"valid_targets_min": 892
|
|
},
|
|
{
|
|
"epoch": 6.981684981684982,
|
|
"grad_norm": 0.28047291358880216,
|
|
"learning_rate": 9.484456789876639e-10,
|
|
"loss": 0.065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02087704837322235,
|
|
"step": 8580,
|
|
"valid_targets_mean": 4432.1,
|
|
"valid_targets_min": 1657
|
|
},
|
|
{
|
|
"epoch": 6.985754985754986,
|
|
"grad_norm": 0.6547087358433034,
|
|
"learning_rate": 5.944269101232358e-10,
|
|
"loss": 0.1178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09982357919216156,
|
|
"step": 8585,
|
|
"valid_targets_mean": 2023.0,
|
|
"valid_targets_min": 709
|
|
},
|
|
{
|
|
"epoch": 6.98982498982499,
|
|
"grad_norm": 0.3577008023192031,
|
|
"learning_rate": 3.2273667083027036e-10,
|
|
"loss": 0.1045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.027526959776878357,
|
|
"step": 8590,
|
|
"valid_targets_mean": 3241.9,
|
|
"valid_targets_min": 527
|
|
},
|
|
{
|
|
"epoch": 6.993894993894994,
|
|
"grad_norm": 0.4179136407746658,
|
|
"learning_rate": 1.3337607953634034e-10,
|
|
"loss": 0.0743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04843872785568237,
|
|
"step": 8595,
|
|
"valid_targets_mean": 3313.4,
|
|
"valid_targets_min": 2308
|
|
},
|
|
{
|
|
"epoch": 6.997964997964998,
|
|
"grad_norm": 0.3512093714041848,
|
|
"learning_rate": 2.6345915753456243e-11,
|
|
"loss": 0.0599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0316237136721611,
|
|
"step": 8600,
|
|
"valid_targets_mean": 3463.5,
|
|
"valid_targets_min": 2883
|
|
},
|
|
{
|
|
"epoch": 6.999592999592999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04449460655450821,
|
|
"step": 8602,
|
|
"total_flos": 2.7561038524606054e+18,
|
|
"train_loss": 0.015238219677667456,
|
|
"train_runtime": 26280.0507,
|
|
"train_samples_per_second": 5.235,
|
|
"train_steps_per_second": 0.327,
|
|
"valid_targets_mean": 4028.4,
|
|
"valid_targets_min": 1273
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 8603,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 200,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": false
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 2.7561038524606054e+18,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|