18011 lines
501 KiB
JSON
18011 lines
501 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 8169,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.004284490145672665,
|
|
"grad_norm": 13.494672426000244,
|
|
"learning_rate": 1.9583843329253365e-07,
|
|
"loss": 0.6876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3388299345970154,
|
|
"step": 5,
|
|
"valid_targets_mean": 4748.4,
|
|
"valid_targets_min": 2097
|
|
},
|
|
{
|
|
"epoch": 0.00856898029134533,
|
|
"grad_norm": 13.627970120064575,
|
|
"learning_rate": 4.406364749082008e-07,
|
|
"loss": 0.6849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3521880507469177,
|
|
"step": 10,
|
|
"valid_targets_mean": 5171.9,
|
|
"valid_targets_min": 2306
|
|
},
|
|
{
|
|
"epoch": 0.012853470437017995,
|
|
"grad_norm": 13.0750137558056,
|
|
"learning_rate": 6.854345165238678e-07,
|
|
"loss": 0.6856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37566128373146057,
|
|
"step": 15,
|
|
"valid_targets_mean": 5947.0,
|
|
"valid_targets_min": 4169
|
|
},
|
|
{
|
|
"epoch": 0.01713796058269066,
|
|
"grad_norm": 11.347303801767657,
|
|
"learning_rate": 9.30232558139535e-07,
|
|
"loss": 0.6682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30158156156539917,
|
|
"step": 20,
|
|
"valid_targets_mean": 5885.1,
|
|
"valid_targets_min": 3731
|
|
},
|
|
{
|
|
"epoch": 0.021422450728363324,
|
|
"grad_norm": 8.458288031130037,
|
|
"learning_rate": 1.175030599755202e-06,
|
|
"loss": 0.6535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3375881314277649,
|
|
"step": 25,
|
|
"valid_targets_mean": 5465.9,
|
|
"valid_targets_min": 2991
|
|
},
|
|
{
|
|
"epoch": 0.02570694087403599,
|
|
"grad_norm": 6.174217503581587,
|
|
"learning_rate": 1.4198286413708693e-06,
|
|
"loss": 0.6146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2947828769683838,
|
|
"step": 30,
|
|
"valid_targets_mean": 6386.5,
|
|
"valid_targets_min": 4563
|
|
},
|
|
{
|
|
"epoch": 0.029991431019708654,
|
|
"grad_norm": 5.113168458961102,
|
|
"learning_rate": 1.6646266829865362e-06,
|
|
"loss": 0.5942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28605085611343384,
|
|
"step": 35,
|
|
"valid_targets_mean": 4906.0,
|
|
"valid_targets_min": 2785
|
|
},
|
|
{
|
|
"epoch": 0.03427592116538132,
|
|
"grad_norm": 4.466096345198193,
|
|
"learning_rate": 1.9094247246022034e-06,
|
|
"loss": 0.5779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28148600459098816,
|
|
"step": 40,
|
|
"valid_targets_mean": 4122.8,
|
|
"valid_targets_min": 2077
|
|
},
|
|
{
|
|
"epoch": 0.038560411311053984,
|
|
"grad_norm": 4.4102843948797235,
|
|
"learning_rate": 2.1542227662178707e-06,
|
|
"loss": 0.5303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24403318762779236,
|
|
"step": 45,
|
|
"valid_targets_mean": 5068.8,
|
|
"valid_targets_min": 1885
|
|
},
|
|
{
|
|
"epoch": 0.04284490145672665,
|
|
"grad_norm": 2.5838635468953304,
|
|
"learning_rate": 2.3990208078335376e-06,
|
|
"loss": 0.4715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24094364047050476,
|
|
"step": 50,
|
|
"valid_targets_mean": 5781.5,
|
|
"valid_targets_min": 1360
|
|
},
|
|
{
|
|
"epoch": 0.04712939160239932,
|
|
"grad_norm": 1.66427395007336,
|
|
"learning_rate": 2.6438188494492045e-06,
|
|
"loss": 0.466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24363049864768982,
|
|
"step": 55,
|
|
"valid_targets_mean": 5561.0,
|
|
"valid_targets_min": 2994
|
|
},
|
|
{
|
|
"epoch": 0.05141388174807198,
|
|
"grad_norm": 1.2843484071788496,
|
|
"learning_rate": 2.8886168910648714e-06,
|
|
"loss": 0.4702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18553395569324493,
|
|
"step": 60,
|
|
"valid_targets_mean": 4436.9,
|
|
"valid_targets_min": 2411
|
|
},
|
|
{
|
|
"epoch": 0.055698371893744644,
|
|
"grad_norm": 1.1208170150705083,
|
|
"learning_rate": 3.1334149326805383e-06,
|
|
"loss": 0.4611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20919764041900635,
|
|
"step": 65,
|
|
"valid_targets_mean": 4674.6,
|
|
"valid_targets_min": 2789
|
|
},
|
|
{
|
|
"epoch": 0.05998286203941731,
|
|
"grad_norm": 0.8862735321798944,
|
|
"learning_rate": 3.378212974296206e-06,
|
|
"loss": 0.4505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17865021526813507,
|
|
"step": 70,
|
|
"valid_targets_mean": 4583.2,
|
|
"valid_targets_min": 1330
|
|
},
|
|
{
|
|
"epoch": 0.06426735218508997,
|
|
"grad_norm": 0.8100500072395349,
|
|
"learning_rate": 3.623011015911873e-06,
|
|
"loss": 0.4574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20401369035243988,
|
|
"step": 75,
|
|
"valid_targets_mean": 5121.2,
|
|
"valid_targets_min": 1862
|
|
},
|
|
{
|
|
"epoch": 0.06855184233076264,
|
|
"grad_norm": 0.7408478984723242,
|
|
"learning_rate": 3.86780905752754e-06,
|
|
"loss": 0.4241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22882798314094543,
|
|
"step": 80,
|
|
"valid_targets_mean": 5859.9,
|
|
"valid_targets_min": 2694
|
|
},
|
|
{
|
|
"epoch": 0.0728363324764353,
|
|
"grad_norm": 0.6980932774784856,
|
|
"learning_rate": 4.112607099143207e-06,
|
|
"loss": 0.415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2240135371685028,
|
|
"step": 85,
|
|
"valid_targets_mean": 4830.5,
|
|
"valid_targets_min": 1794
|
|
},
|
|
{
|
|
"epoch": 0.07712082262210797,
|
|
"grad_norm": 0.5915927033833656,
|
|
"learning_rate": 4.357405140758874e-06,
|
|
"loss": 0.4105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2051009237766266,
|
|
"step": 90,
|
|
"valid_targets_mean": 6430.6,
|
|
"valid_targets_min": 4764
|
|
},
|
|
{
|
|
"epoch": 0.08140531276778064,
|
|
"grad_norm": 0.5894205300368119,
|
|
"learning_rate": 4.6022031823745415e-06,
|
|
"loss": 0.3988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21032562851905823,
|
|
"step": 95,
|
|
"valid_targets_mean": 6892.0,
|
|
"valid_targets_min": 4453
|
|
},
|
|
{
|
|
"epoch": 0.0856898029134533,
|
|
"grad_norm": 0.6598571392919448,
|
|
"learning_rate": 4.847001223990209e-06,
|
|
"loss": 0.3942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21012765169143677,
|
|
"step": 100,
|
|
"valid_targets_mean": 4862.1,
|
|
"valid_targets_min": 1875
|
|
},
|
|
{
|
|
"epoch": 0.08997429305912596,
|
|
"grad_norm": 0.7406760827901361,
|
|
"learning_rate": 5.091799265605875e-06,
|
|
"loss": 0.3795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21197551488876343,
|
|
"step": 105,
|
|
"valid_targets_mean": 5108.5,
|
|
"valid_targets_min": 2767
|
|
},
|
|
{
|
|
"epoch": 0.09425878320479864,
|
|
"grad_norm": 0.6224090599201313,
|
|
"learning_rate": 5.336597307221543e-06,
|
|
"loss": 0.3819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.139212965965271,
|
|
"step": 110,
|
|
"valid_targets_mean": 3294.1,
|
|
"valid_targets_min": 1668
|
|
},
|
|
{
|
|
"epoch": 0.09854327335047129,
|
|
"grad_norm": 0.5436817831194202,
|
|
"learning_rate": 5.58139534883721e-06,
|
|
"loss": 0.3826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16008134186267853,
|
|
"step": 115,
|
|
"valid_targets_mean": 4258.8,
|
|
"valid_targets_min": 1850
|
|
},
|
|
{
|
|
"epoch": 0.10282776349614396,
|
|
"grad_norm": 0.527086505076333,
|
|
"learning_rate": 5.8261933904528765e-06,
|
|
"loss": 0.3991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20387375354766846,
|
|
"step": 120,
|
|
"valid_targets_mean": 5987.6,
|
|
"valid_targets_min": 4164
|
|
},
|
|
{
|
|
"epoch": 0.10711225364181662,
|
|
"grad_norm": 0.5288720994634073,
|
|
"learning_rate": 6.070991432068544e-06,
|
|
"loss": 0.3599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16154056787490845,
|
|
"step": 125,
|
|
"valid_targets_mean": 5076.4,
|
|
"valid_targets_min": 1265
|
|
},
|
|
{
|
|
"epoch": 0.11139674378748929,
|
|
"grad_norm": 0.5757742789735052,
|
|
"learning_rate": 6.31578947368421e-06,
|
|
"loss": 0.3566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19382911920547485,
|
|
"step": 130,
|
|
"valid_targets_mean": 4982.8,
|
|
"valid_targets_min": 2004
|
|
},
|
|
{
|
|
"epoch": 0.11568123393316196,
|
|
"grad_norm": 0.4866632046001885,
|
|
"learning_rate": 6.560587515299878e-06,
|
|
"loss": 0.3591,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1747215837240219,
|
|
"step": 135,
|
|
"valid_targets_mean": 6455.6,
|
|
"valid_targets_min": 2291
|
|
},
|
|
{
|
|
"epoch": 0.11996572407883462,
|
|
"grad_norm": 0.5502088562401298,
|
|
"learning_rate": 6.805385556915545e-06,
|
|
"loss": 0.3531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17451369762420654,
|
|
"step": 140,
|
|
"valid_targets_mean": 5308.2,
|
|
"valid_targets_min": 1552
|
|
},
|
|
{
|
|
"epoch": 0.12425021422450729,
|
|
"grad_norm": 0.566485704846439,
|
|
"learning_rate": 7.050183598531213e-06,
|
|
"loss": 0.354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18449868261814117,
|
|
"step": 145,
|
|
"valid_targets_mean": 4460.1,
|
|
"valid_targets_min": 1410
|
|
},
|
|
{
|
|
"epoch": 0.12853470437017994,
|
|
"grad_norm": 0.6014576667932432,
|
|
"learning_rate": 7.29498164014688e-06,
|
|
"loss": 0.3503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16550466418266296,
|
|
"step": 150,
|
|
"valid_targets_mean": 4723.2,
|
|
"valid_targets_min": 2563
|
|
},
|
|
{
|
|
"epoch": 0.1328191945158526,
|
|
"grad_norm": 0.5678765320233051,
|
|
"learning_rate": 7.539779681762547e-06,
|
|
"loss": 0.35,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20662416517734528,
|
|
"step": 155,
|
|
"valid_targets_mean": 6524.4,
|
|
"valid_targets_min": 4442
|
|
},
|
|
{
|
|
"epoch": 0.13710368466152528,
|
|
"grad_norm": 0.5209904193861675,
|
|
"learning_rate": 7.784577723378214e-06,
|
|
"loss": 0.3441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17364206910133362,
|
|
"step": 160,
|
|
"valid_targets_mean": 5728.1,
|
|
"valid_targets_min": 3117
|
|
},
|
|
{
|
|
"epoch": 0.14138817480719795,
|
|
"grad_norm": 0.5495554877332163,
|
|
"learning_rate": 8.02937576499388e-06,
|
|
"loss": 0.3406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19940133392810822,
|
|
"step": 165,
|
|
"valid_targets_mean": 5301.6,
|
|
"valid_targets_min": 2457
|
|
},
|
|
{
|
|
"epoch": 0.1456726649528706,
|
|
"grad_norm": 0.6721459306059137,
|
|
"learning_rate": 8.274173806609547e-06,
|
|
"loss": 0.3484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15087860822677612,
|
|
"step": 170,
|
|
"valid_targets_mean": 3913.6,
|
|
"valid_targets_min": 1133
|
|
},
|
|
{
|
|
"epoch": 0.14995715509854327,
|
|
"grad_norm": 0.5139212368391745,
|
|
"learning_rate": 8.518971848225216e-06,
|
|
"loss": 0.3418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1852315366268158,
|
|
"step": 175,
|
|
"valid_targets_mean": 6394.9,
|
|
"valid_targets_min": 2348
|
|
},
|
|
{
|
|
"epoch": 0.15424164524421594,
|
|
"grad_norm": 0.6080406124848964,
|
|
"learning_rate": 8.763769889840882e-06,
|
|
"loss": 0.3439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2070283591747284,
|
|
"step": 180,
|
|
"valid_targets_mean": 5282.9,
|
|
"valid_targets_min": 501
|
|
},
|
|
{
|
|
"epoch": 0.1585261353898886,
|
|
"grad_norm": 0.5434520776028683,
|
|
"learning_rate": 9.008567931456548e-06,
|
|
"loss": 0.3407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13234570622444153,
|
|
"step": 185,
|
|
"valid_targets_mean": 3877.6,
|
|
"valid_targets_min": 988
|
|
},
|
|
{
|
|
"epoch": 0.16281062553556128,
|
|
"grad_norm": 0.5084708583445311,
|
|
"learning_rate": 9.253365973072217e-06,
|
|
"loss": 0.3367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18342074751853943,
|
|
"step": 190,
|
|
"valid_targets_mean": 6325.2,
|
|
"valid_targets_min": 4092
|
|
},
|
|
{
|
|
"epoch": 0.16709511568123395,
|
|
"grad_norm": 0.5710666319007862,
|
|
"learning_rate": 9.498164014687883e-06,
|
|
"loss": 0.3376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1878889948129654,
|
|
"step": 195,
|
|
"valid_targets_mean": 5223.2,
|
|
"valid_targets_min": 1916
|
|
},
|
|
{
|
|
"epoch": 0.1713796058269066,
|
|
"grad_norm": 0.6479959072854039,
|
|
"learning_rate": 9.74296205630355e-06,
|
|
"loss": 0.3622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16445878148078918,
|
|
"step": 200,
|
|
"valid_targets_mean": 3273.6,
|
|
"valid_targets_min": 815
|
|
},
|
|
{
|
|
"epoch": 0.17566409597257926,
|
|
"grad_norm": 0.5202221871171575,
|
|
"learning_rate": 9.987760097919218e-06,
|
|
"loss": 0.3173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16284489631652832,
|
|
"step": 205,
|
|
"valid_targets_mean": 5055.8,
|
|
"valid_targets_min": 2305
|
|
},
|
|
{
|
|
"epoch": 0.17994858611825193,
|
|
"grad_norm": 0.5524924025549904,
|
|
"learning_rate": 1.0232558139534884e-05,
|
|
"loss": 0.3382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19672557711601257,
|
|
"step": 210,
|
|
"valid_targets_mean": 5106.8,
|
|
"valid_targets_min": 997
|
|
},
|
|
{
|
|
"epoch": 0.1842330762639246,
|
|
"grad_norm": 1.0223498715512376,
|
|
"learning_rate": 1.047735618115055e-05,
|
|
"loss": 0.3263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19318395853042603,
|
|
"step": 215,
|
|
"valid_targets_mean": 5064.0,
|
|
"valid_targets_min": 2027
|
|
},
|
|
{
|
|
"epoch": 0.18851756640959727,
|
|
"grad_norm": 0.5211527369329275,
|
|
"learning_rate": 1.0722154222766219e-05,
|
|
"loss": 0.3221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17607557773590088,
|
|
"step": 220,
|
|
"valid_targets_mean": 6074.5,
|
|
"valid_targets_min": 2335
|
|
},
|
|
{
|
|
"epoch": 0.1928020565552699,
|
|
"grad_norm": 0.5123127191188075,
|
|
"learning_rate": 1.0966952264381885e-05,
|
|
"loss": 0.3233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15591086447238922,
|
|
"step": 225,
|
|
"valid_targets_mean": 6394.2,
|
|
"valid_targets_min": 1530
|
|
},
|
|
{
|
|
"epoch": 0.19708654670094258,
|
|
"grad_norm": 0.5191104747458265,
|
|
"learning_rate": 1.1211750305997554e-05,
|
|
"loss": 0.3338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14223499596118927,
|
|
"step": 230,
|
|
"valid_targets_mean": 4574.8,
|
|
"valid_targets_min": 1339
|
|
},
|
|
{
|
|
"epoch": 0.20137103684661525,
|
|
"grad_norm": 0.5699877811233731,
|
|
"learning_rate": 1.1456548347613222e-05,
|
|
"loss": 0.3153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17597973346710205,
|
|
"step": 235,
|
|
"valid_targets_mean": 6795.1,
|
|
"valid_targets_min": 4224
|
|
},
|
|
{
|
|
"epoch": 0.20565552699228792,
|
|
"grad_norm": 0.5970363797480456,
|
|
"learning_rate": 1.1701346389228887e-05,
|
|
"loss": 0.3228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1517130434513092,
|
|
"step": 240,
|
|
"valid_targets_mean": 5152.9,
|
|
"valid_targets_min": 2470
|
|
},
|
|
{
|
|
"epoch": 0.2099400171379606,
|
|
"grad_norm": 0.5127097301248218,
|
|
"learning_rate": 1.1946144430844555e-05,
|
|
"loss": 0.3194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.159349724650383,
|
|
"step": 245,
|
|
"valid_targets_mean": 5716.4,
|
|
"valid_targets_min": 1947
|
|
},
|
|
{
|
|
"epoch": 0.21422450728363324,
|
|
"grad_norm": 0.584380958903832,
|
|
"learning_rate": 1.2190942472460221e-05,
|
|
"loss": 0.3276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17096039652824402,
|
|
"step": 250,
|
|
"valid_targets_mean": 5353.8,
|
|
"valid_targets_min": 2263
|
|
},
|
|
{
|
|
"epoch": 0.2185089974293059,
|
|
"grad_norm": 0.531304528969124,
|
|
"learning_rate": 1.243574051407589e-05,
|
|
"loss": 0.3189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.176926851272583,
|
|
"step": 255,
|
|
"valid_targets_mean": 5375.8,
|
|
"valid_targets_min": 2384
|
|
},
|
|
{
|
|
"epoch": 0.22279348757497858,
|
|
"grad_norm": 0.5610210920466541,
|
|
"learning_rate": 1.2680538555691554e-05,
|
|
"loss": 0.3162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15306371450424194,
|
|
"step": 260,
|
|
"valid_targets_mean": 4906.6,
|
|
"valid_targets_min": 1320
|
|
},
|
|
{
|
|
"epoch": 0.22707797772065125,
|
|
"grad_norm": 0.6099494807045799,
|
|
"learning_rate": 1.2925336597307222e-05,
|
|
"loss": 0.3233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17797164618968964,
|
|
"step": 265,
|
|
"valid_targets_mean": 6262.9,
|
|
"valid_targets_min": 1200
|
|
},
|
|
{
|
|
"epoch": 0.23136246786632392,
|
|
"grad_norm": 0.5584816599275492,
|
|
"learning_rate": 1.3170134638922889e-05,
|
|
"loss": 0.317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14984680712223053,
|
|
"step": 270,
|
|
"valid_targets_mean": 5216.5,
|
|
"valid_targets_min": 3918
|
|
},
|
|
{
|
|
"epoch": 0.23564695801199656,
|
|
"grad_norm": 0.5581317615102905,
|
|
"learning_rate": 1.3414932680538557e-05,
|
|
"loss": 0.3113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1488761305809021,
|
|
"step": 275,
|
|
"valid_targets_mean": 5854.2,
|
|
"valid_targets_min": 3345
|
|
},
|
|
{
|
|
"epoch": 0.23993144815766923,
|
|
"grad_norm": 0.7063875640569396,
|
|
"learning_rate": 1.3659730722154224e-05,
|
|
"loss": 0.3183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1853543519973755,
|
|
"step": 280,
|
|
"valid_targets_mean": 3949.1,
|
|
"valid_targets_min": 1876
|
|
},
|
|
{
|
|
"epoch": 0.2442159383033419,
|
|
"grad_norm": 0.53488254992811,
|
|
"learning_rate": 1.3904528763769892e-05,
|
|
"loss": 0.321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14736245572566986,
|
|
"step": 285,
|
|
"valid_targets_mean": 4980.2,
|
|
"valid_targets_min": 1308
|
|
},
|
|
{
|
|
"epoch": 0.24850042844901457,
|
|
"grad_norm": 0.5414391694510241,
|
|
"learning_rate": 1.4149326805385557e-05,
|
|
"loss": 0.3221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12899701297283173,
|
|
"step": 290,
|
|
"valid_targets_mean": 4972.6,
|
|
"valid_targets_min": 1062
|
|
},
|
|
{
|
|
"epoch": 0.2527849185946872,
|
|
"grad_norm": 0.6054725196706271,
|
|
"learning_rate": 1.4394124847001225e-05,
|
|
"loss": 0.31,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13913224637508392,
|
|
"step": 295,
|
|
"valid_targets_mean": 4399.9,
|
|
"valid_targets_min": 1640
|
|
},
|
|
{
|
|
"epoch": 0.2570694087403599,
|
|
"grad_norm": 0.5637355859677816,
|
|
"learning_rate": 1.4638922888616893e-05,
|
|
"loss": 0.3193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1314675658941269,
|
|
"step": 300,
|
|
"valid_targets_mean": 4433.2,
|
|
"valid_targets_min": 863
|
|
},
|
|
{
|
|
"epoch": 0.26135389888603255,
|
|
"grad_norm": 0.5867368931627099,
|
|
"learning_rate": 1.488372093023256e-05,
|
|
"loss": 0.3138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16789467632770538,
|
|
"step": 305,
|
|
"valid_targets_mean": 4875.8,
|
|
"valid_targets_min": 3050
|
|
},
|
|
{
|
|
"epoch": 0.2656383890317052,
|
|
"grad_norm": 0.8512860966478079,
|
|
"learning_rate": 1.5128518971848228e-05,
|
|
"loss": 0.3146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17016926407814026,
|
|
"step": 310,
|
|
"valid_targets_mean": 5136.0,
|
|
"valid_targets_min": 1296
|
|
},
|
|
{
|
|
"epoch": 0.2699228791773779,
|
|
"grad_norm": 0.5870192100135584,
|
|
"learning_rate": 1.5373317013463894e-05,
|
|
"loss": 0.2978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17785808444023132,
|
|
"step": 315,
|
|
"valid_targets_mean": 4783.2,
|
|
"valid_targets_min": 814
|
|
},
|
|
{
|
|
"epoch": 0.27420736932305056,
|
|
"grad_norm": 0.5658522055114601,
|
|
"learning_rate": 1.561811505507956e-05,
|
|
"loss": 0.2904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.134462371468544,
|
|
"step": 320,
|
|
"valid_targets_mean": 4842.8,
|
|
"valid_targets_min": 1679
|
|
},
|
|
{
|
|
"epoch": 0.27849185946872324,
|
|
"grad_norm": 0.6039759670188529,
|
|
"learning_rate": 1.5862913096695227e-05,
|
|
"loss": 0.309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1433200240135193,
|
|
"step": 325,
|
|
"valid_targets_mean": 4665.6,
|
|
"valid_targets_min": 2454
|
|
},
|
|
{
|
|
"epoch": 0.2827763496143959,
|
|
"grad_norm": 0.6039885202347223,
|
|
"learning_rate": 1.6107711138310894e-05,
|
|
"loss": 0.2999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13930335640907288,
|
|
"step": 330,
|
|
"valid_targets_mean": 3988.1,
|
|
"valid_targets_min": 2263
|
|
},
|
|
{
|
|
"epoch": 0.2870608397600686,
|
|
"grad_norm": 0.5444104462871615,
|
|
"learning_rate": 1.635250917992656e-05,
|
|
"loss": 0.2902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1417446732521057,
|
|
"step": 335,
|
|
"valid_targets_mean": 4539.2,
|
|
"valid_targets_min": 2819
|
|
},
|
|
{
|
|
"epoch": 0.2913453299057412,
|
|
"grad_norm": 0.50219258851867,
|
|
"learning_rate": 1.659730722154223e-05,
|
|
"loss": 0.2904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12210437655448914,
|
|
"step": 340,
|
|
"valid_targets_mean": 5315.8,
|
|
"valid_targets_min": 2531
|
|
},
|
|
{
|
|
"epoch": 0.29562982005141386,
|
|
"grad_norm": 0.6297237494404057,
|
|
"learning_rate": 1.6842105263157896e-05,
|
|
"loss": 0.3063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15295211970806122,
|
|
"step": 345,
|
|
"valid_targets_mean": 3552.4,
|
|
"valid_targets_min": 1152
|
|
},
|
|
{
|
|
"epoch": 0.29991431019708653,
|
|
"grad_norm": 0.5297704310792776,
|
|
"learning_rate": 1.7086903304773563e-05,
|
|
"loss": 0.2893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15931358933448792,
|
|
"step": 350,
|
|
"valid_targets_mean": 5920.4,
|
|
"valid_targets_min": 2610
|
|
},
|
|
{
|
|
"epoch": 0.3041988003427592,
|
|
"grad_norm": 0.5703340645186171,
|
|
"learning_rate": 1.7331701346389233e-05,
|
|
"loss": 0.3127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15025386214256287,
|
|
"step": 355,
|
|
"valid_targets_mean": 5074.5,
|
|
"valid_targets_min": 2473
|
|
},
|
|
{
|
|
"epoch": 0.30848329048843187,
|
|
"grad_norm": 0.4830147558403847,
|
|
"learning_rate": 1.7576499388004896e-05,
|
|
"loss": 0.2917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1271696537733078,
|
|
"step": 360,
|
|
"valid_targets_mean": 5975.8,
|
|
"valid_targets_min": 2878
|
|
},
|
|
{
|
|
"epoch": 0.31276778063410454,
|
|
"grad_norm": 0.6325322902133472,
|
|
"learning_rate": 1.7821297429620566e-05,
|
|
"loss": 0.3037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13799910247325897,
|
|
"step": 365,
|
|
"valid_targets_mean": 6035.0,
|
|
"valid_targets_min": 2353
|
|
},
|
|
{
|
|
"epoch": 0.3170522707797772,
|
|
"grad_norm": 0.5287748967291697,
|
|
"learning_rate": 1.8066095471236232e-05,
|
|
"loss": 0.3053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14968207478523254,
|
|
"step": 370,
|
|
"valid_targets_mean": 5034.8,
|
|
"valid_targets_min": 1135
|
|
},
|
|
{
|
|
"epoch": 0.3213367609254499,
|
|
"grad_norm": 0.669848870896634,
|
|
"learning_rate": 1.83108935128519e-05,
|
|
"loss": 0.3068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12943169474601746,
|
|
"step": 375,
|
|
"valid_targets_mean": 3075.2,
|
|
"valid_targets_min": 1410
|
|
},
|
|
{
|
|
"epoch": 0.32562125107112255,
|
|
"grad_norm": 0.5893188910141677,
|
|
"learning_rate": 1.8555691554467565e-05,
|
|
"loss": 0.2976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1718309074640274,
|
|
"step": 380,
|
|
"valid_targets_mean": 4813.2,
|
|
"valid_targets_min": 2649
|
|
},
|
|
{
|
|
"epoch": 0.3299057412167952,
|
|
"grad_norm": 0.6019329357724723,
|
|
"learning_rate": 1.8800489596083232e-05,
|
|
"loss": 0.2917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15909942984580994,
|
|
"step": 385,
|
|
"valid_targets_mean": 4439.0,
|
|
"valid_targets_min": 1582
|
|
},
|
|
{
|
|
"epoch": 0.3341902313624679,
|
|
"grad_norm": 0.6064248819899891,
|
|
"learning_rate": 1.9045287637698898e-05,
|
|
"loss": 0.295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12156758457422256,
|
|
"step": 390,
|
|
"valid_targets_mean": 3861.8,
|
|
"valid_targets_min": 1737
|
|
},
|
|
{
|
|
"epoch": 0.3384747215081405,
|
|
"grad_norm": 0.5842534367398564,
|
|
"learning_rate": 1.9290085679314568e-05,
|
|
"loss": 0.2994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.160810187458992,
|
|
"step": 395,
|
|
"valid_targets_mean": 4659.6,
|
|
"valid_targets_min": 1685
|
|
},
|
|
{
|
|
"epoch": 0.3427592116538132,
|
|
"grad_norm": 0.5268467691843103,
|
|
"learning_rate": 1.9534883720930235e-05,
|
|
"loss": 0.2694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15826356410980225,
|
|
"step": 400,
|
|
"valid_targets_mean": 5544.1,
|
|
"valid_targets_min": 1661
|
|
},
|
|
{
|
|
"epoch": 0.34704370179948585,
|
|
"grad_norm": 0.6958394661491231,
|
|
"learning_rate": 1.97796817625459e-05,
|
|
"loss": 0.2911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16895712912082672,
|
|
"step": 405,
|
|
"valid_targets_mean": 5782.8,
|
|
"valid_targets_min": 2263
|
|
},
|
|
{
|
|
"epoch": 0.3513281919451585,
|
|
"grad_norm": 0.5948462583056694,
|
|
"learning_rate": 2.002447980416157e-05,
|
|
"loss": 0.2766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16228261590003967,
|
|
"step": 410,
|
|
"valid_targets_mean": 5028.5,
|
|
"valid_targets_min": 3210
|
|
},
|
|
{
|
|
"epoch": 0.3556126820908312,
|
|
"grad_norm": 0.5420726443350751,
|
|
"learning_rate": 2.0269277845777234e-05,
|
|
"loss": 0.2815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13688191771507263,
|
|
"step": 415,
|
|
"valid_targets_mean": 5821.4,
|
|
"valid_targets_min": 1708
|
|
},
|
|
{
|
|
"epoch": 0.35989717223650386,
|
|
"grad_norm": 0.6388615140705679,
|
|
"learning_rate": 2.05140758873929e-05,
|
|
"loss": 0.306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14519023895263672,
|
|
"step": 420,
|
|
"valid_targets_mean": 4266.8,
|
|
"valid_targets_min": 1438
|
|
},
|
|
{
|
|
"epoch": 0.36418166238217653,
|
|
"grad_norm": 0.5663922132128151,
|
|
"learning_rate": 2.075887392900857e-05,
|
|
"loss": 0.2924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1524307131767273,
|
|
"step": 425,
|
|
"valid_targets_mean": 4807.2,
|
|
"valid_targets_min": 1665
|
|
},
|
|
{
|
|
"epoch": 0.3684661525278492,
|
|
"grad_norm": 0.5688733222055533,
|
|
"learning_rate": 2.1003671970624237e-05,
|
|
"loss": 0.2998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11472992599010468,
|
|
"step": 430,
|
|
"valid_targets_mean": 3932.0,
|
|
"valid_targets_min": 618
|
|
},
|
|
{
|
|
"epoch": 0.37275064267352187,
|
|
"grad_norm": 0.6223142453975377,
|
|
"learning_rate": 2.1248470012239903e-05,
|
|
"loss": 0.2998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15172141790390015,
|
|
"step": 435,
|
|
"valid_targets_mean": 3571.1,
|
|
"valid_targets_min": 1021
|
|
},
|
|
{
|
|
"epoch": 0.37703513281919454,
|
|
"grad_norm": 0.5609083997959252,
|
|
"learning_rate": 2.149326805385557e-05,
|
|
"loss": 0.2785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14583829045295715,
|
|
"step": 440,
|
|
"valid_targets_mean": 5311.2,
|
|
"valid_targets_min": 1171
|
|
},
|
|
{
|
|
"epoch": 0.38131962296486716,
|
|
"grad_norm": 0.5460504697544348,
|
|
"learning_rate": 2.173806609547124e-05,
|
|
"loss": 0.2705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13169243931770325,
|
|
"step": 445,
|
|
"valid_targets_mean": 5064.8,
|
|
"valid_targets_min": 2680
|
|
},
|
|
{
|
|
"epoch": 0.3856041131105398,
|
|
"grad_norm": 0.5770188612901637,
|
|
"learning_rate": 2.1982864137086906e-05,
|
|
"loss": 0.2788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12129063904285431,
|
|
"step": 450,
|
|
"valid_targets_mean": 5601.2,
|
|
"valid_targets_min": 1197
|
|
},
|
|
{
|
|
"epoch": 0.3898886032562125,
|
|
"grad_norm": 0.5993535554284934,
|
|
"learning_rate": 2.2227662178702573e-05,
|
|
"loss": 0.2864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15587423741817474,
|
|
"step": 455,
|
|
"valid_targets_mean": 6190.5,
|
|
"valid_targets_min": 2977
|
|
},
|
|
{
|
|
"epoch": 0.39417309340188517,
|
|
"grad_norm": 0.5386585493381317,
|
|
"learning_rate": 2.2472460220318236e-05,
|
|
"loss": 0.2968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14170946180820465,
|
|
"step": 460,
|
|
"valid_targets_mean": 4982.9,
|
|
"valid_targets_min": 2205
|
|
},
|
|
{
|
|
"epoch": 0.39845758354755784,
|
|
"grad_norm": 0.6195415175294577,
|
|
"learning_rate": 2.271725826193391e-05,
|
|
"loss": 0.3146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13214179873466492,
|
|
"step": 465,
|
|
"valid_targets_mean": 4013.9,
|
|
"valid_targets_min": 1611
|
|
},
|
|
{
|
|
"epoch": 0.4027420736932305,
|
|
"grad_norm": 0.5519123387328592,
|
|
"learning_rate": 2.2962056303549572e-05,
|
|
"loss": 0.2896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13508766889572144,
|
|
"step": 470,
|
|
"valid_targets_mean": 5249.9,
|
|
"valid_targets_min": 3133
|
|
},
|
|
{
|
|
"epoch": 0.4070265638389032,
|
|
"grad_norm": 0.5009869019667028,
|
|
"learning_rate": 2.320685434516524e-05,
|
|
"loss": 0.277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14315488934516907,
|
|
"step": 475,
|
|
"valid_targets_mean": 5515.5,
|
|
"valid_targets_min": 3758
|
|
},
|
|
{
|
|
"epoch": 0.41131105398457585,
|
|
"grad_norm": 0.5626625359580815,
|
|
"learning_rate": 2.345165238678091e-05,
|
|
"loss": 0.2841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16055189073085785,
|
|
"step": 480,
|
|
"valid_targets_mean": 5776.0,
|
|
"valid_targets_min": 2353
|
|
},
|
|
{
|
|
"epoch": 0.4155955441302485,
|
|
"grad_norm": 0.5449390116780272,
|
|
"learning_rate": 2.3696450428396575e-05,
|
|
"loss": 0.2798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1312532126903534,
|
|
"step": 485,
|
|
"valid_targets_mean": 5009.4,
|
|
"valid_targets_min": 637
|
|
},
|
|
{
|
|
"epoch": 0.4198800342759212,
|
|
"grad_norm": 0.5779586892568972,
|
|
"learning_rate": 2.394124847001224e-05,
|
|
"loss": 0.2904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14948180317878723,
|
|
"step": 490,
|
|
"valid_targets_mean": 5899.0,
|
|
"valid_targets_min": 2260
|
|
},
|
|
{
|
|
"epoch": 0.4241645244215938,
|
|
"grad_norm": 0.6200627622851805,
|
|
"learning_rate": 2.4186046511627908e-05,
|
|
"loss": 0.2763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13663563132286072,
|
|
"step": 495,
|
|
"valid_targets_mean": 4235.1,
|
|
"valid_targets_min": 1282
|
|
},
|
|
{
|
|
"epoch": 0.4284490145672665,
|
|
"grad_norm": 0.5205400982232026,
|
|
"learning_rate": 2.4430844553243578e-05,
|
|
"loss": 0.2705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11656229943037033,
|
|
"step": 500,
|
|
"valid_targets_mean": 5687.9,
|
|
"valid_targets_min": 2297
|
|
},
|
|
{
|
|
"epoch": 0.43273350471293914,
|
|
"grad_norm": 0.47871740853032907,
|
|
"learning_rate": 2.4675642594859244e-05,
|
|
"loss": 0.276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11609447002410889,
|
|
"step": 505,
|
|
"valid_targets_mean": 5404.6,
|
|
"valid_targets_min": 3686
|
|
},
|
|
{
|
|
"epoch": 0.4370179948586118,
|
|
"grad_norm": 0.5492898934140984,
|
|
"learning_rate": 2.492044063647491e-05,
|
|
"loss": 0.276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14045745134353638,
|
|
"step": 510,
|
|
"valid_targets_mean": 5541.1,
|
|
"valid_targets_min": 3317
|
|
},
|
|
{
|
|
"epoch": 0.4413024850042845,
|
|
"grad_norm": 0.5071268634243311,
|
|
"learning_rate": 2.5165238678090574e-05,
|
|
"loss": 0.2701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.128058522939682,
|
|
"step": 515,
|
|
"valid_targets_mean": 5796.8,
|
|
"valid_targets_min": 2077
|
|
},
|
|
{
|
|
"epoch": 0.44558697514995715,
|
|
"grad_norm": 0.5655382730838427,
|
|
"learning_rate": 2.5410036719706247e-05,
|
|
"loss": 0.2688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11492526531219482,
|
|
"step": 520,
|
|
"valid_targets_mean": 4854.9,
|
|
"valid_targets_min": 1338
|
|
},
|
|
{
|
|
"epoch": 0.4498714652956298,
|
|
"grad_norm": 0.5424893346687838,
|
|
"learning_rate": 2.565483476132191e-05,
|
|
"loss": 0.2814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12241284549236298,
|
|
"step": 525,
|
|
"valid_targets_mean": 4384.1,
|
|
"valid_targets_min": 1704
|
|
},
|
|
{
|
|
"epoch": 0.4541559554413025,
|
|
"grad_norm": 0.5448333250427129,
|
|
"learning_rate": 2.5899632802937577e-05,
|
|
"loss": 0.2875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13487297296524048,
|
|
"step": 530,
|
|
"valid_targets_mean": 5340.8,
|
|
"valid_targets_min": 2939
|
|
},
|
|
{
|
|
"epoch": 0.45844044558697516,
|
|
"grad_norm": 0.6187884073389983,
|
|
"learning_rate": 2.6144430844553243e-05,
|
|
"loss": 0.2807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13935431838035583,
|
|
"step": 535,
|
|
"valid_targets_mean": 4248.1,
|
|
"valid_targets_min": 1438
|
|
},
|
|
{
|
|
"epoch": 0.46272493573264784,
|
|
"grad_norm": 0.6001804492599332,
|
|
"learning_rate": 2.6389228886168913e-05,
|
|
"loss": 0.2737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1647961139678955,
|
|
"step": 540,
|
|
"valid_targets_mean": 5800.1,
|
|
"valid_targets_min": 4068
|
|
},
|
|
{
|
|
"epoch": 0.4670094258783205,
|
|
"grad_norm": 0.5600479101604777,
|
|
"learning_rate": 2.663402692778458e-05,
|
|
"loss": 0.269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14143353700637817,
|
|
"step": 545,
|
|
"valid_targets_mean": 5761.4,
|
|
"valid_targets_min": 2433
|
|
},
|
|
{
|
|
"epoch": 0.4712939160239931,
|
|
"grad_norm": 0.5673387034583495,
|
|
"learning_rate": 2.6878824969400246e-05,
|
|
"loss": 0.2839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14500755071640015,
|
|
"step": 550,
|
|
"valid_targets_mean": 4903.9,
|
|
"valid_targets_min": 1460
|
|
},
|
|
{
|
|
"epoch": 0.4755784061696658,
|
|
"grad_norm": 0.5691470882956529,
|
|
"learning_rate": 2.7123623011015916e-05,
|
|
"loss": 0.2685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1349785029888153,
|
|
"step": 555,
|
|
"valid_targets_mean": 4998.4,
|
|
"valid_targets_min": 2318
|
|
},
|
|
{
|
|
"epoch": 0.47986289631533846,
|
|
"grad_norm": 0.5709082137504153,
|
|
"learning_rate": 2.7368421052631583e-05,
|
|
"loss": 0.2853,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14183221757411957,
|
|
"step": 560,
|
|
"valid_targets_mean": 5866.0,
|
|
"valid_targets_min": 3081
|
|
},
|
|
{
|
|
"epoch": 0.48414738646101113,
|
|
"grad_norm": 0.5442903898637672,
|
|
"learning_rate": 2.761321909424725e-05,
|
|
"loss": 0.28,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13423144817352295,
|
|
"step": 565,
|
|
"valid_targets_mean": 5355.0,
|
|
"valid_targets_min": 2979
|
|
},
|
|
{
|
|
"epoch": 0.4884318766066838,
|
|
"grad_norm": 0.5145090110407444,
|
|
"learning_rate": 2.7858017135862912e-05,
|
|
"loss": 0.2942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13521131873130798,
|
|
"step": 570,
|
|
"valid_targets_mean": 5537.5,
|
|
"valid_targets_min": 1981
|
|
},
|
|
{
|
|
"epoch": 0.49271636675235647,
|
|
"grad_norm": 0.5126476525977094,
|
|
"learning_rate": 2.8102815177478585e-05,
|
|
"loss": 0.2897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2233107089996338,
|
|
"step": 575,
|
|
"valid_targets_mean": 5748.8,
|
|
"valid_targets_min": 2260
|
|
},
|
|
{
|
|
"epoch": 0.49700085689802914,
|
|
"grad_norm": 0.49860856959951844,
|
|
"learning_rate": 2.834761321909425e-05,
|
|
"loss": 0.2762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13818103075027466,
|
|
"step": 580,
|
|
"valid_targets_mean": 5683.2,
|
|
"valid_targets_min": 3056
|
|
},
|
|
{
|
|
"epoch": 0.5012853470437018,
|
|
"grad_norm": 0.6185441022844439,
|
|
"learning_rate": 2.8592411260709915e-05,
|
|
"loss": 0.2882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13868600130081177,
|
|
"step": 585,
|
|
"valid_targets_mean": 4239.5,
|
|
"valid_targets_min": 1407
|
|
},
|
|
{
|
|
"epoch": 0.5055698371893744,
|
|
"grad_norm": 0.4868317952453862,
|
|
"learning_rate": 2.883720930232558e-05,
|
|
"loss": 0.266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12314537912607193,
|
|
"step": 590,
|
|
"valid_targets_mean": 7541.9,
|
|
"valid_targets_min": 4887
|
|
},
|
|
{
|
|
"epoch": 0.5098543273350471,
|
|
"grad_norm": 0.5956933077864743,
|
|
"learning_rate": 2.908200734394125e-05,
|
|
"loss": 0.2736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16916868090629578,
|
|
"step": 595,
|
|
"valid_targets_mean": 5266.6,
|
|
"valid_targets_min": 2106
|
|
},
|
|
{
|
|
"epoch": 0.5141388174807198,
|
|
"grad_norm": 0.5145182472434158,
|
|
"learning_rate": 2.9326805385556918e-05,
|
|
"loss": 0.2771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12932124733924866,
|
|
"step": 600,
|
|
"valid_targets_mean": 5444.1,
|
|
"valid_targets_min": 2584
|
|
},
|
|
{
|
|
"epoch": 0.5184233076263924,
|
|
"grad_norm": 0.5148978628448854,
|
|
"learning_rate": 2.9571603427172584e-05,
|
|
"loss": 0.2623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10980220139026642,
|
|
"step": 605,
|
|
"valid_targets_mean": 4968.5,
|
|
"valid_targets_min": 1153
|
|
},
|
|
{
|
|
"epoch": 0.5227077977720651,
|
|
"grad_norm": 0.6546541289847614,
|
|
"learning_rate": 2.9816401468788254e-05,
|
|
"loss": 0.2837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17208120226860046,
|
|
"step": 610,
|
|
"valid_targets_mean": 4161.1,
|
|
"valid_targets_min": 1401
|
|
},
|
|
{
|
|
"epoch": 0.5269922879177378,
|
|
"grad_norm": 0.5713763418705968,
|
|
"learning_rate": 3.006119951040392e-05,
|
|
"loss": 0.2867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14918869733810425,
|
|
"step": 615,
|
|
"valid_targets_mean": 4642.0,
|
|
"valid_targets_min": 1988
|
|
},
|
|
{
|
|
"epoch": 0.5312767780634104,
|
|
"grad_norm": 0.582802473484628,
|
|
"learning_rate": 3.0305997552019587e-05,
|
|
"loss": 0.2789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13957634568214417,
|
|
"step": 620,
|
|
"valid_targets_mean": 4580.8,
|
|
"valid_targets_min": 671
|
|
},
|
|
{
|
|
"epoch": 0.5355612682090831,
|
|
"grad_norm": 0.5074017899375523,
|
|
"learning_rate": 3.0550795593635254e-05,
|
|
"loss": 0.2659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11979982256889343,
|
|
"step": 625,
|
|
"valid_targets_mean": 4226.1,
|
|
"valid_targets_min": 3320
|
|
},
|
|
{
|
|
"epoch": 0.5398457583547558,
|
|
"grad_norm": 0.7080288701071616,
|
|
"learning_rate": 3.079559363525092e-05,
|
|
"loss": 0.2948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12488646060228348,
|
|
"step": 630,
|
|
"valid_targets_mean": 3806.6,
|
|
"valid_targets_min": 1293
|
|
},
|
|
{
|
|
"epoch": 0.5441302485004285,
|
|
"grad_norm": 0.5523074527657701,
|
|
"learning_rate": 3.104039167686659e-05,
|
|
"loss": 0.29,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14442437887191772,
|
|
"step": 635,
|
|
"valid_targets_mean": 6145.4,
|
|
"valid_targets_min": 4748
|
|
},
|
|
{
|
|
"epoch": 0.5484147386461011,
|
|
"grad_norm": 0.5839924266213236,
|
|
"learning_rate": 3.128518971848225e-05,
|
|
"loss": 0.2949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16150975227355957,
|
|
"step": 640,
|
|
"valid_targets_mean": 5677.8,
|
|
"valid_targets_min": 2736
|
|
},
|
|
{
|
|
"epoch": 0.5526992287917738,
|
|
"grad_norm": 0.5513476463866779,
|
|
"learning_rate": 3.152998776009792e-05,
|
|
"loss": 0.2757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13722610473632812,
|
|
"step": 645,
|
|
"valid_targets_mean": 4844.5,
|
|
"valid_targets_min": 873
|
|
},
|
|
{
|
|
"epoch": 0.5569837189374465,
|
|
"grad_norm": 0.5616002902466735,
|
|
"learning_rate": 3.177478580171359e-05,
|
|
"loss": 0.2612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1205633282661438,
|
|
"step": 650,
|
|
"valid_targets_mean": 4022.0,
|
|
"valid_targets_min": 1058
|
|
},
|
|
{
|
|
"epoch": 0.5612682090831191,
|
|
"grad_norm": 0.5025577976134684,
|
|
"learning_rate": 3.201958384332925e-05,
|
|
"loss": 0.2663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1423630565404892,
|
|
"step": 655,
|
|
"valid_targets_mean": 6668.2,
|
|
"valid_targets_min": 3320
|
|
},
|
|
{
|
|
"epoch": 0.5655526992287918,
|
|
"grad_norm": 0.8192137076290354,
|
|
"learning_rate": 3.226438188494492e-05,
|
|
"loss": 0.2738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16772106289863586,
|
|
"step": 660,
|
|
"valid_targets_mean": 4754.0,
|
|
"valid_targets_min": 2355
|
|
},
|
|
{
|
|
"epoch": 0.5698371893744645,
|
|
"grad_norm": 0.5727839053126536,
|
|
"learning_rate": 3.250917992656059e-05,
|
|
"loss": 0.2692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1537611484527588,
|
|
"step": 665,
|
|
"valid_targets_mean": 5351.6,
|
|
"valid_targets_min": 3234
|
|
},
|
|
{
|
|
"epoch": 0.5741216795201372,
|
|
"grad_norm": 0.5444238697417997,
|
|
"learning_rate": 3.275397796817626e-05,
|
|
"loss": 0.2602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14869078993797302,
|
|
"step": 670,
|
|
"valid_targets_mean": 5715.8,
|
|
"valid_targets_min": 3830
|
|
},
|
|
{
|
|
"epoch": 0.5784061696658098,
|
|
"grad_norm": 0.5836983468782506,
|
|
"learning_rate": 3.2998776009791925e-05,
|
|
"loss": 0.2831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14694061875343323,
|
|
"step": 675,
|
|
"valid_targets_mean": 4126.9,
|
|
"valid_targets_min": 2461
|
|
},
|
|
{
|
|
"epoch": 0.5826906598114824,
|
|
"grad_norm": 0.6568378900260874,
|
|
"learning_rate": 3.324357405140759e-05,
|
|
"loss": 0.2906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15956830978393555,
|
|
"step": 680,
|
|
"valid_targets_mean": 4040.9,
|
|
"valid_targets_min": 1980
|
|
},
|
|
{
|
|
"epoch": 0.586975149957155,
|
|
"grad_norm": 0.5145475286597274,
|
|
"learning_rate": 3.348837209302326e-05,
|
|
"loss": 0.2701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12406280636787415,
|
|
"step": 685,
|
|
"valid_targets_mean": 5950.8,
|
|
"valid_targets_min": 1383
|
|
},
|
|
{
|
|
"epoch": 0.5912596401028277,
|
|
"grad_norm": 0.5558673446263661,
|
|
"learning_rate": 3.3733170134638925e-05,
|
|
"loss": 0.2702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12211956083774567,
|
|
"step": 690,
|
|
"valid_targets_mean": 6111.4,
|
|
"valid_targets_min": 3072
|
|
},
|
|
{
|
|
"epoch": 0.5955441302485004,
|
|
"grad_norm": 0.4747838546803119,
|
|
"learning_rate": 3.397796817625459e-05,
|
|
"loss": 0.2591,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12097050994634628,
|
|
"step": 695,
|
|
"valid_targets_mean": 7185.4,
|
|
"valid_targets_min": 1457
|
|
},
|
|
{
|
|
"epoch": 0.5998286203941731,
|
|
"grad_norm": 0.5194524552843034,
|
|
"learning_rate": 3.422276621787026e-05,
|
|
"loss": 0.2649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12255051732063293,
|
|
"step": 700,
|
|
"valid_targets_mean": 5736.5,
|
|
"valid_targets_min": 2366
|
|
},
|
|
{
|
|
"epoch": 0.6041131105398457,
|
|
"grad_norm": 0.5106674807425143,
|
|
"learning_rate": 3.446756425948593e-05,
|
|
"loss": 0.2511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11906369030475616,
|
|
"step": 705,
|
|
"valid_targets_mean": 5204.1,
|
|
"valid_targets_min": 3741
|
|
},
|
|
{
|
|
"epoch": 0.6083976006855184,
|
|
"grad_norm": 0.6010561442816965,
|
|
"learning_rate": 3.471236230110159e-05,
|
|
"loss": 0.2752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1466081142425537,
|
|
"step": 710,
|
|
"valid_targets_mean": 3954.8,
|
|
"valid_targets_min": 1020
|
|
},
|
|
{
|
|
"epoch": 0.6126820908311911,
|
|
"grad_norm": 0.5256518508777503,
|
|
"learning_rate": 3.495716034271726e-05,
|
|
"loss": 0.2657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12180833518505096,
|
|
"step": 715,
|
|
"valid_targets_mean": 5649.9,
|
|
"valid_targets_min": 1919
|
|
},
|
|
{
|
|
"epoch": 0.6169665809768637,
|
|
"grad_norm": 0.5968635555634715,
|
|
"learning_rate": 3.5201958384332924e-05,
|
|
"loss": 0.2769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16054174304008484,
|
|
"step": 720,
|
|
"valid_targets_mean": 5424.4,
|
|
"valid_targets_min": 2052
|
|
},
|
|
{
|
|
"epoch": 0.6212510711225364,
|
|
"grad_norm": 0.5087542046238016,
|
|
"learning_rate": 3.54467564259486e-05,
|
|
"loss": 0.2577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13530471920967102,
|
|
"step": 725,
|
|
"valid_targets_mean": 5822.2,
|
|
"valid_targets_min": 3292
|
|
},
|
|
{
|
|
"epoch": 0.6255355612682091,
|
|
"grad_norm": 0.5308493101137274,
|
|
"learning_rate": 3.5691554467564264e-05,
|
|
"loss": 0.2671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12818732857704163,
|
|
"step": 730,
|
|
"valid_targets_mean": 4503.0,
|
|
"valid_targets_min": 1724
|
|
},
|
|
{
|
|
"epoch": 0.6298200514138818,
|
|
"grad_norm": 0.531906996873097,
|
|
"learning_rate": 3.593635250917993e-05,
|
|
"loss": 0.2751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1301267445087433,
|
|
"step": 735,
|
|
"valid_targets_mean": 4668.6,
|
|
"valid_targets_min": 2431
|
|
},
|
|
{
|
|
"epoch": 0.6341045415595544,
|
|
"grad_norm": 0.5963099527547568,
|
|
"learning_rate": 3.6181150550795597e-05,
|
|
"loss": 0.2888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16435176134109497,
|
|
"step": 740,
|
|
"valid_targets_mean": 5506.2,
|
|
"valid_targets_min": 2628
|
|
},
|
|
{
|
|
"epoch": 0.6383890317052271,
|
|
"grad_norm": 0.6155164579618286,
|
|
"learning_rate": 3.642594859241126e-05,
|
|
"loss": 0.2724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13344869017601013,
|
|
"step": 745,
|
|
"valid_targets_mean": 5700.5,
|
|
"valid_targets_min": 3180
|
|
},
|
|
{
|
|
"epoch": 0.6426735218508998,
|
|
"grad_norm": 0.501126635501298,
|
|
"learning_rate": 3.667074663402693e-05,
|
|
"loss": 0.2715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12969595193862915,
|
|
"step": 750,
|
|
"valid_targets_mean": 5380.1,
|
|
"valid_targets_min": 2965
|
|
},
|
|
{
|
|
"epoch": 0.6469580119965724,
|
|
"grad_norm": 0.6452184273012488,
|
|
"learning_rate": 3.6915544675642596e-05,
|
|
"loss": 0.273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1391134411096573,
|
|
"step": 755,
|
|
"valid_targets_mean": 4782.4,
|
|
"valid_targets_min": 2001
|
|
},
|
|
{
|
|
"epoch": 0.6512425021422451,
|
|
"grad_norm": 0.5469747322011657,
|
|
"learning_rate": 3.716034271725827e-05,
|
|
"loss": 0.2671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12635910511016846,
|
|
"step": 760,
|
|
"valid_targets_mean": 4953.2,
|
|
"valid_targets_min": 2284
|
|
},
|
|
{
|
|
"epoch": 0.6555269922879178,
|
|
"grad_norm": 0.9231721523981166,
|
|
"learning_rate": 3.740514075887393e-05,
|
|
"loss": 0.2799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1526474952697754,
|
|
"step": 765,
|
|
"valid_targets_mean": 3834.5,
|
|
"valid_targets_min": 1720
|
|
},
|
|
{
|
|
"epoch": 0.6598114824335904,
|
|
"grad_norm": 0.5371949962004401,
|
|
"learning_rate": 3.7649938800489595e-05,
|
|
"loss": 0.2708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.123946413397789,
|
|
"step": 770,
|
|
"valid_targets_mean": 3959.9,
|
|
"valid_targets_min": 1194
|
|
},
|
|
{
|
|
"epoch": 0.6640959725792631,
|
|
"grad_norm": 0.515907944612913,
|
|
"learning_rate": 3.789473684210526e-05,
|
|
"loss": 0.2791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11019599437713623,
|
|
"step": 775,
|
|
"valid_targets_mean": 5306.2,
|
|
"valid_targets_min": 2080
|
|
},
|
|
{
|
|
"epoch": 0.6683804627249358,
|
|
"grad_norm": 0.535884454819786,
|
|
"learning_rate": 3.8139534883720935e-05,
|
|
"loss": 0.2665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11634519696235657,
|
|
"step": 780,
|
|
"valid_targets_mean": 4438.4,
|
|
"valid_targets_min": 2185
|
|
},
|
|
{
|
|
"epoch": 0.6726649528706083,
|
|
"grad_norm": 0.5779089412080396,
|
|
"learning_rate": 3.83843329253366e-05,
|
|
"loss": 0.2785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12202630937099457,
|
|
"step": 785,
|
|
"valid_targets_mean": 4871.4,
|
|
"valid_targets_min": 981
|
|
},
|
|
{
|
|
"epoch": 0.676949443016281,
|
|
"grad_norm": 0.7054778397195679,
|
|
"learning_rate": 3.862913096695227e-05,
|
|
"loss": 0.2747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19049608707427979,
|
|
"step": 790,
|
|
"valid_targets_mean": 4367.0,
|
|
"valid_targets_min": 1829
|
|
},
|
|
{
|
|
"epoch": 0.6812339331619537,
|
|
"grad_norm": 0.5847261365100348,
|
|
"learning_rate": 3.8873929008567935e-05,
|
|
"loss": 0.2775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13841959834098816,
|
|
"step": 795,
|
|
"valid_targets_mean": 4756.6,
|
|
"valid_targets_min": 2879
|
|
},
|
|
{
|
|
"epoch": 0.6855184233076264,
|
|
"grad_norm": 0.48740615813189136,
|
|
"learning_rate": 3.91187270501836e-05,
|
|
"loss": 0.2772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12658631801605225,
|
|
"step": 800,
|
|
"valid_targets_mean": 5949.4,
|
|
"valid_targets_min": 2546
|
|
},
|
|
{
|
|
"epoch": 0.689802913453299,
|
|
"grad_norm": 0.520041364867478,
|
|
"learning_rate": 3.936352509179927e-05,
|
|
"loss": 0.2581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1271926909685135,
|
|
"step": 805,
|
|
"valid_targets_mean": 4749.2,
|
|
"valid_targets_min": 2381
|
|
},
|
|
{
|
|
"epoch": 0.6940874035989717,
|
|
"grad_norm": 0.5101189199156749,
|
|
"learning_rate": 3.9608323133414934e-05,
|
|
"loss": 0.2584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11503781378269196,
|
|
"step": 810,
|
|
"valid_targets_mean": 4865.8,
|
|
"valid_targets_min": 1847
|
|
},
|
|
{
|
|
"epoch": 0.6983718937446444,
|
|
"grad_norm": 0.5265392292861037,
|
|
"learning_rate": 3.985312117503061e-05,
|
|
"loss": 0.2639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12135123461484909,
|
|
"step": 815,
|
|
"valid_targets_mean": 6132.9,
|
|
"valid_targets_min": 4072
|
|
},
|
|
{
|
|
"epoch": 0.702656383890317,
|
|
"grad_norm": 0.47800731568934585,
|
|
"learning_rate": 3.999999269620268e-05,
|
|
"loss": 0.2782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12147536873817444,
|
|
"step": 820,
|
|
"valid_targets_mean": 6568.1,
|
|
"valid_targets_min": 4664
|
|
},
|
|
{
|
|
"epoch": 0.7069408740359897,
|
|
"grad_norm": 0.537235188878352,
|
|
"learning_rate": 3.9999910528544e-05,
|
|
"loss": 0.2677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.104698546230793,
|
|
"step": 825,
|
|
"valid_targets_mean": 5815.1,
|
|
"valid_targets_min": 4353
|
|
},
|
|
{
|
|
"epoch": 0.7112253641816624,
|
|
"grad_norm": 0.5159489954530644,
|
|
"learning_rate": 3.999973706385632e-05,
|
|
"loss": 0.2695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13141869008541107,
|
|
"step": 830,
|
|
"valid_targets_mean": 5879.0,
|
|
"valid_targets_min": 1423
|
|
},
|
|
{
|
|
"epoch": 0.715509854327335,
|
|
"grad_norm": 0.5585389197621717,
|
|
"learning_rate": 3.999947230293148e-05,
|
|
"loss": 0.2781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14570122957229614,
|
|
"step": 835,
|
|
"valid_targets_mean": 5374.4,
|
|
"valid_targets_min": 3055
|
|
},
|
|
{
|
|
"epoch": 0.7197943444730077,
|
|
"grad_norm": 0.48092488487972956,
|
|
"learning_rate": 3.999911624697808e-05,
|
|
"loss": 0.269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12411829084157944,
|
|
"step": 840,
|
|
"valid_targets_mean": 5068.4,
|
|
"valid_targets_min": 2888
|
|
},
|
|
{
|
|
"epoch": 0.7240788346186804,
|
|
"grad_norm": 0.5805619507516684,
|
|
"learning_rate": 3.9998668897621475e-05,
|
|
"loss": 0.2647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12677715718746185,
|
|
"step": 845,
|
|
"valid_targets_mean": 4666.0,
|
|
"valid_targets_min": 1524
|
|
},
|
|
{
|
|
"epoch": 0.7283633247643531,
|
|
"grad_norm": 0.46425962080468136,
|
|
"learning_rate": 3.999813025690375e-05,
|
|
"loss": 0.2783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14648611843585968,
|
|
"step": 850,
|
|
"valid_targets_mean": 6841.4,
|
|
"valid_targets_min": 2339
|
|
},
|
|
{
|
|
"epoch": 0.7326478149100257,
|
|
"grad_norm": 0.6069969870776635,
|
|
"learning_rate": 3.999750032728374e-05,
|
|
"loss": 0.2666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12701371312141418,
|
|
"step": 855,
|
|
"valid_targets_mean": 4912.1,
|
|
"valid_targets_min": 1219
|
|
},
|
|
{
|
|
"epoch": 0.7369323050556984,
|
|
"grad_norm": 0.6481210450101693,
|
|
"learning_rate": 3.999677911163699e-05,
|
|
"loss": 0.2718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17122164368629456,
|
|
"step": 860,
|
|
"valid_targets_mean": 6041.9,
|
|
"valid_targets_min": 3392
|
|
},
|
|
{
|
|
"epoch": 0.7412167952013711,
|
|
"grad_norm": 0.527730875932154,
|
|
"learning_rate": 3.9995966613255745e-05,
|
|
"loss": 0.265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13581043481826782,
|
|
"step": 865,
|
|
"valid_targets_mean": 5484.5,
|
|
"valid_targets_min": 3171
|
|
},
|
|
{
|
|
"epoch": 0.7455012853470437,
|
|
"grad_norm": 0.5952293340519983,
|
|
"learning_rate": 3.999506283584897e-05,
|
|
"loss": 0.2703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.152851864695549,
|
|
"step": 870,
|
|
"valid_targets_mean": 5642.9,
|
|
"valid_targets_min": 3182
|
|
},
|
|
{
|
|
"epoch": 0.7497857754927164,
|
|
"grad_norm": 0.5850366617796318,
|
|
"learning_rate": 3.999406778354229e-05,
|
|
"loss": 0.2557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15019948780536652,
|
|
"step": 875,
|
|
"valid_targets_mean": 6524.1,
|
|
"valid_targets_min": 2770
|
|
},
|
|
{
|
|
"epoch": 0.7540702656383891,
|
|
"grad_norm": 0.575250820294863,
|
|
"learning_rate": 3.999298146087799e-05,
|
|
"loss": 0.2646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15052559971809387,
|
|
"step": 880,
|
|
"valid_targets_mean": 4841.0,
|
|
"valid_targets_min": 2024
|
|
},
|
|
{
|
|
"epoch": 0.7583547557840618,
|
|
"grad_norm": 0.5493149132399983,
|
|
"learning_rate": 3.9991803872814984e-05,
|
|
"loss": 0.2543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13368487358093262,
|
|
"step": 885,
|
|
"valid_targets_mean": 5288.2,
|
|
"valid_targets_min": 1295
|
|
},
|
|
{
|
|
"epoch": 0.7626392459297343,
|
|
"grad_norm": 0.6160371683158595,
|
|
"learning_rate": 3.9990535024728834e-05,
|
|
"loss": 0.2663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12840642035007477,
|
|
"step": 890,
|
|
"valid_targets_mean": 4293.9,
|
|
"valid_targets_min": 2469
|
|
},
|
|
{
|
|
"epoch": 0.766923736075407,
|
|
"grad_norm": 0.5101580762550667,
|
|
"learning_rate": 3.9989174922411645e-05,
|
|
"loss": 0.246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10666569322347641,
|
|
"step": 895,
|
|
"valid_targets_mean": 5572.5,
|
|
"valid_targets_min": 2629
|
|
},
|
|
{
|
|
"epoch": 0.7712082262210797,
|
|
"grad_norm": 0.5267886267732871,
|
|
"learning_rate": 3.998772357207212e-05,
|
|
"loss": 0.2607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15602833032608032,
|
|
"step": 900,
|
|
"valid_targets_mean": 5127.2,
|
|
"valid_targets_min": 855
|
|
},
|
|
{
|
|
"epoch": 0.7754927163667523,
|
|
"grad_norm": 0.6179003743095123,
|
|
"learning_rate": 3.9986180980335495e-05,
|
|
"loss": 0.2725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11543315649032593,
|
|
"step": 905,
|
|
"valid_targets_mean": 4137.4,
|
|
"valid_targets_min": 2362
|
|
},
|
|
{
|
|
"epoch": 0.779777206512425,
|
|
"grad_norm": 0.7245223925500395,
|
|
"learning_rate": 3.998454715424349e-05,
|
|
"loss": 0.2626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14181703329086304,
|
|
"step": 910,
|
|
"valid_targets_mean": 4639.0,
|
|
"valid_targets_min": 2314
|
|
},
|
|
{
|
|
"epoch": 0.7840616966580977,
|
|
"grad_norm": 0.566536142916355,
|
|
"learning_rate": 3.9982822101254325e-05,
|
|
"loss": 0.2647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14244404435157776,
|
|
"step": 915,
|
|
"valid_targets_mean": 5085.1,
|
|
"valid_targets_min": 852
|
|
},
|
|
{
|
|
"epoch": 0.7883461868037703,
|
|
"grad_norm": 0.5345840659362985,
|
|
"learning_rate": 3.998100582924263e-05,
|
|
"loss": 0.2602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11293502151966095,
|
|
"step": 920,
|
|
"valid_targets_mean": 4794.4,
|
|
"valid_targets_min": 2450
|
|
},
|
|
{
|
|
"epoch": 0.792630676949443,
|
|
"grad_norm": 0.5659991348613919,
|
|
"learning_rate": 3.997909834649947e-05,
|
|
"loss": 0.2664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12308050692081451,
|
|
"step": 925,
|
|
"valid_targets_mean": 4838.2,
|
|
"valid_targets_min": 2091
|
|
},
|
|
{
|
|
"epoch": 0.7969151670951157,
|
|
"grad_norm": 0.6133976264224211,
|
|
"learning_rate": 3.997709966173224e-05,
|
|
"loss": 0.262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13634806871414185,
|
|
"step": 930,
|
|
"valid_targets_mean": 5017.0,
|
|
"valid_targets_min": 1673
|
|
},
|
|
{
|
|
"epoch": 0.8011996572407883,
|
|
"grad_norm": 0.5073324638405171,
|
|
"learning_rate": 3.99750097840647e-05,
|
|
"loss": 0.2604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12453116476535797,
|
|
"step": 935,
|
|
"valid_targets_mean": 4123.8,
|
|
"valid_targets_min": 2410
|
|
},
|
|
{
|
|
"epoch": 0.805484147386461,
|
|
"grad_norm": 0.629776260395206,
|
|
"learning_rate": 3.9972828723036866e-05,
|
|
"loss": 0.2635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17246073484420776,
|
|
"step": 940,
|
|
"valid_targets_mean": 5477.5,
|
|
"valid_targets_min": 2430
|
|
},
|
|
{
|
|
"epoch": 0.8097686375321337,
|
|
"grad_norm": 0.536197964984172,
|
|
"learning_rate": 3.9970556488605005e-05,
|
|
"loss": 0.2623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1453799456357956,
|
|
"step": 945,
|
|
"valid_targets_mean": 5394.2,
|
|
"valid_targets_min": 2969
|
|
},
|
|
{
|
|
"epoch": 0.8140531276778064,
|
|
"grad_norm": 0.519512629373522,
|
|
"learning_rate": 3.996819309114157e-05,
|
|
"loss": 0.2636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13137950003147125,
|
|
"step": 950,
|
|
"valid_targets_mean": 5013.2,
|
|
"valid_targets_min": 208
|
|
},
|
|
{
|
|
"epoch": 0.818337617823479,
|
|
"grad_norm": 0.5279654224402252,
|
|
"learning_rate": 3.996573854143517e-05,
|
|
"loss": 0.2415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12292104959487915,
|
|
"step": 955,
|
|
"valid_targets_mean": 5456.9,
|
|
"valid_targets_min": 2518
|
|
},
|
|
{
|
|
"epoch": 0.8226221079691517,
|
|
"grad_norm": 0.530627611144268,
|
|
"learning_rate": 3.9963192850690505e-05,
|
|
"loss": 0.2654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14889578521251678,
|
|
"step": 960,
|
|
"valid_targets_mean": 5652.2,
|
|
"valid_targets_min": 464
|
|
},
|
|
{
|
|
"epoch": 0.8269065981148244,
|
|
"grad_norm": 0.5292718935261117,
|
|
"learning_rate": 3.996055603052834e-05,
|
|
"loss": 0.2657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1426638662815094,
|
|
"step": 965,
|
|
"valid_targets_mean": 5372.0,
|
|
"valid_targets_min": 2310
|
|
},
|
|
{
|
|
"epoch": 0.831191088260497,
|
|
"grad_norm": 0.49393157060692316,
|
|
"learning_rate": 3.995782809298541e-05,
|
|
"loss": 0.2672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13456028699874878,
|
|
"step": 970,
|
|
"valid_targets_mean": 5692.2,
|
|
"valid_targets_min": 3007
|
|
},
|
|
{
|
|
"epoch": 0.8354755784061697,
|
|
"grad_norm": 0.5093574922007108,
|
|
"learning_rate": 3.99550090505144e-05,
|
|
"loss": 0.2527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11772598326206207,
|
|
"step": 975,
|
|
"valid_targets_mean": 4665.6,
|
|
"valid_targets_min": 2811
|
|
},
|
|
{
|
|
"epoch": 0.8397600685518424,
|
|
"grad_norm": 0.5389340710537845,
|
|
"learning_rate": 3.995209891598389e-05,
|
|
"loss": 0.2629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10856447368860245,
|
|
"step": 980,
|
|
"valid_targets_mean": 4917.9,
|
|
"valid_targets_min": 2593
|
|
},
|
|
{
|
|
"epoch": 0.844044558697515,
|
|
"grad_norm": 0.5633173022193055,
|
|
"learning_rate": 3.994909770267826e-05,
|
|
"loss": 0.2388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0946207195520401,
|
|
"step": 985,
|
|
"valid_targets_mean": 5132.9,
|
|
"valid_targets_min": 2355
|
|
},
|
|
{
|
|
"epoch": 0.8483290488431876,
|
|
"grad_norm": 0.5661047160597428,
|
|
"learning_rate": 3.994600542429766e-05,
|
|
"loss": 0.255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13344813883304596,
|
|
"step": 990,
|
|
"valid_targets_mean": 5267.4,
|
|
"valid_targets_min": 1603
|
|
},
|
|
{
|
|
"epoch": 0.8526135389888603,
|
|
"grad_norm": 0.4974712864932079,
|
|
"learning_rate": 3.994282209495796e-05,
|
|
"loss": 0.25,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10783732682466507,
|
|
"step": 995,
|
|
"valid_targets_mean": 6089.4,
|
|
"valid_targets_min": 2375
|
|
},
|
|
{
|
|
"epoch": 0.856898029134533,
|
|
"grad_norm": 0.5203686372043815,
|
|
"learning_rate": 3.9939547729190635e-05,
|
|
"loss": 0.2544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11107154190540314,
|
|
"step": 1000,
|
|
"valid_targets_mean": 4804.4,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 0.8611825192802056,
|
|
"grad_norm": 0.5095587249033884,
|
|
"learning_rate": 3.993618234194276e-05,
|
|
"loss": 0.2648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12677043676376343,
|
|
"step": 1005,
|
|
"valid_targets_mean": 5111.6,
|
|
"valid_targets_min": 1473
|
|
},
|
|
{
|
|
"epoch": 0.8654670094258783,
|
|
"grad_norm": 0.5583450696312183,
|
|
"learning_rate": 3.993272594857688e-05,
|
|
"loss": 0.2588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10360285639762878,
|
|
"step": 1010,
|
|
"valid_targets_mean": 3846.5,
|
|
"valid_targets_min": 1142
|
|
},
|
|
{
|
|
"epoch": 0.869751499571551,
|
|
"grad_norm": 0.52221496646814,
|
|
"learning_rate": 3.9929178564871e-05,
|
|
"loss": 0.28,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11834236234426498,
|
|
"step": 1015,
|
|
"valid_targets_mean": 4285.2,
|
|
"valid_targets_min": 2182
|
|
},
|
|
{
|
|
"epoch": 0.8740359897172236,
|
|
"grad_norm": 0.4630768401778714,
|
|
"learning_rate": 3.9925540207018476e-05,
|
|
"loss": 0.2492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11245059967041016,
|
|
"step": 1020,
|
|
"valid_targets_mean": 5297.0,
|
|
"valid_targets_min": 1991
|
|
},
|
|
{
|
|
"epoch": 0.8783204798628963,
|
|
"grad_norm": 0.5543771552921872,
|
|
"learning_rate": 3.992181089162793e-05,
|
|
"loss": 0.2542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11139364540576935,
|
|
"step": 1025,
|
|
"valid_targets_mean": 5065.1,
|
|
"valid_targets_min": 2356
|
|
},
|
|
{
|
|
"epoch": 0.882604970008569,
|
|
"grad_norm": 0.4831163743420262,
|
|
"learning_rate": 3.991799063572323e-05,
|
|
"loss": 0.248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1225896030664444,
|
|
"step": 1030,
|
|
"valid_targets_mean": 5435.6,
|
|
"valid_targets_min": 2743
|
|
},
|
|
{
|
|
"epoch": 0.8868894601542416,
|
|
"grad_norm": 0.48868600865654355,
|
|
"learning_rate": 3.991407945674333e-05,
|
|
"loss": 0.2691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14089226722717285,
|
|
"step": 1035,
|
|
"valid_targets_mean": 4377.1,
|
|
"valid_targets_min": 1227
|
|
},
|
|
{
|
|
"epoch": 0.8911739502999143,
|
|
"grad_norm": 0.56988270931451,
|
|
"learning_rate": 3.9910077372542275e-05,
|
|
"loss": 0.2612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14983482658863068,
|
|
"step": 1040,
|
|
"valid_targets_mean": 4440.5,
|
|
"valid_targets_min": 1002
|
|
},
|
|
{
|
|
"epoch": 0.895458440445587,
|
|
"grad_norm": 0.6135335700902131,
|
|
"learning_rate": 3.990598440138906e-05,
|
|
"loss": 0.238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11355569958686829,
|
|
"step": 1045,
|
|
"valid_targets_mean": 5958.2,
|
|
"valid_targets_min": 3276
|
|
},
|
|
{
|
|
"epoch": 0.8997429305912596,
|
|
"grad_norm": 0.5907089677929637,
|
|
"learning_rate": 3.9901800561967585e-05,
|
|
"loss": 0.2701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.142267107963562,
|
|
"step": 1050,
|
|
"valid_targets_mean": 5085.5,
|
|
"valid_targets_min": 1497
|
|
},
|
|
{
|
|
"epoch": 0.9040274207369323,
|
|
"grad_norm": 0.5304317761747265,
|
|
"learning_rate": 3.9897525873376536e-05,
|
|
"loss": 0.2643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1555720418691635,
|
|
"step": 1055,
|
|
"valid_targets_mean": 5944.8,
|
|
"valid_targets_min": 2371
|
|
},
|
|
{
|
|
"epoch": 0.908311910882605,
|
|
"grad_norm": 0.5288262253537347,
|
|
"learning_rate": 3.9893160355129314e-05,
|
|
"loss": 0.2673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10222537070512772,
|
|
"step": 1060,
|
|
"valid_targets_mean": 4336.6,
|
|
"valid_targets_min": 1612
|
|
},
|
|
{
|
|
"epoch": 0.9125964010282777,
|
|
"grad_norm": 0.516164532221851,
|
|
"learning_rate": 3.988870402715395e-05,
|
|
"loss": 0.2348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09846851974725723,
|
|
"step": 1065,
|
|
"valid_targets_mean": 4531.5,
|
|
"valid_targets_min": 2632
|
|
},
|
|
{
|
|
"epoch": 0.9168808911739503,
|
|
"grad_norm": 0.5836994718154224,
|
|
"learning_rate": 3.988415690979302e-05,
|
|
"loss": 0.2661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16352909803390503,
|
|
"step": 1070,
|
|
"valid_targets_mean": 5046.2,
|
|
"valid_targets_min": 2063
|
|
},
|
|
{
|
|
"epoch": 0.921165381319623,
|
|
"grad_norm": 0.504311532246728,
|
|
"learning_rate": 3.987951902380352e-05,
|
|
"loss": 0.2458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1278531849384308,
|
|
"step": 1075,
|
|
"valid_targets_mean": 4596.0,
|
|
"valid_targets_min": 1628
|
|
},
|
|
{
|
|
"epoch": 0.9254498714652957,
|
|
"grad_norm": 0.5683140754140763,
|
|
"learning_rate": 3.987479039035681e-05,
|
|
"loss": 0.2448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14287438988685608,
|
|
"step": 1080,
|
|
"valid_targets_mean": 3734.8,
|
|
"valid_targets_min": 1460
|
|
},
|
|
{
|
|
"epoch": 0.9297343616109683,
|
|
"grad_norm": 0.6658402907336981,
|
|
"learning_rate": 3.986997103103851e-05,
|
|
"loss": 0.2519,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1435733586549759,
|
|
"step": 1085,
|
|
"valid_targets_mean": 4683.5,
|
|
"valid_targets_min": 2230
|
|
},
|
|
{
|
|
"epoch": 0.934018851756641,
|
|
"grad_norm": 0.5336490395553605,
|
|
"learning_rate": 3.986506096784835e-05,
|
|
"loss": 0.2403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09944146126508713,
|
|
"step": 1090,
|
|
"valid_targets_mean": 5346.2,
|
|
"valid_targets_min": 878
|
|
},
|
|
{
|
|
"epoch": 0.9383033419023136,
|
|
"grad_norm": 0.5146882277747881,
|
|
"learning_rate": 3.986006022320016e-05,
|
|
"loss": 0.259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1226465106010437,
|
|
"step": 1095,
|
|
"valid_targets_mean": 5573.5,
|
|
"valid_targets_min": 2145
|
|
},
|
|
{
|
|
"epoch": 0.9425878320479862,
|
|
"grad_norm": 0.5248136140897455,
|
|
"learning_rate": 3.985496881992169e-05,
|
|
"loss": 0.2497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1310267150402069,
|
|
"step": 1100,
|
|
"valid_targets_mean": 5006.2,
|
|
"valid_targets_min": 2506
|
|
},
|
|
{
|
|
"epoch": 0.9468723221936589,
|
|
"grad_norm": 0.4963079106449318,
|
|
"learning_rate": 3.9849786781254545e-05,
|
|
"loss": 0.2584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11951654404401779,
|
|
"step": 1105,
|
|
"valid_targets_mean": 5100.5,
|
|
"valid_targets_min": 1529
|
|
},
|
|
{
|
|
"epoch": 0.9511568123393316,
|
|
"grad_norm": 0.5099594473574132,
|
|
"learning_rate": 3.9844514130854074e-05,
|
|
"loss": 0.2546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16315767168998718,
|
|
"step": 1110,
|
|
"valid_targets_mean": 6070.2,
|
|
"valid_targets_min": 2569
|
|
},
|
|
{
|
|
"epoch": 0.9554413024850043,
|
|
"grad_norm": 0.5985623194305978,
|
|
"learning_rate": 3.9839150892789246e-05,
|
|
"loss": 0.258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10299926996231079,
|
|
"step": 1115,
|
|
"valid_targets_mean": 3485.2,
|
|
"valid_targets_min": 182
|
|
},
|
|
{
|
|
"epoch": 0.9597257926306769,
|
|
"grad_norm": 0.4687631245915332,
|
|
"learning_rate": 3.983369709154255e-05,
|
|
"loss": 0.2408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09602198004722595,
|
|
"step": 1120,
|
|
"valid_targets_mean": 4476.5,
|
|
"valid_targets_min": 1512
|
|
},
|
|
{
|
|
"epoch": 0.9640102827763496,
|
|
"grad_norm": 0.5635958054071022,
|
|
"learning_rate": 3.98281527520099e-05,
|
|
"loss": 0.2497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11026757955551147,
|
|
"step": 1125,
|
|
"valid_targets_mean": 4010.4,
|
|
"valid_targets_min": 1727
|
|
},
|
|
{
|
|
"epoch": 0.9682947729220223,
|
|
"grad_norm": 0.5665722329143809,
|
|
"learning_rate": 3.9822517899500486e-05,
|
|
"loss": 0.2675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1259326934814453,
|
|
"step": 1130,
|
|
"valid_targets_mean": 4350.8,
|
|
"valid_targets_min": 1457
|
|
},
|
|
{
|
|
"epoch": 0.9725792630676949,
|
|
"grad_norm": 0.5059137742295718,
|
|
"learning_rate": 3.981679255973669e-05,
|
|
"loss": 0.2559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1195632740855217,
|
|
"step": 1135,
|
|
"valid_targets_mean": 5862.1,
|
|
"valid_targets_min": 1916
|
|
},
|
|
{
|
|
"epoch": 0.9768637532133676,
|
|
"grad_norm": 0.4913048915853541,
|
|
"learning_rate": 3.981097675885396e-05,
|
|
"loss": 0.2537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1371796727180481,
|
|
"step": 1140,
|
|
"valid_targets_mean": 5177.4,
|
|
"valid_targets_min": 2014
|
|
},
|
|
{
|
|
"epoch": 0.9811482433590403,
|
|
"grad_norm": 0.5254857739233036,
|
|
"learning_rate": 3.980507052340068e-05,
|
|
"loss": 0.2521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.120770163834095,
|
|
"step": 1145,
|
|
"valid_targets_mean": 5489.5,
|
|
"valid_targets_min": 1497
|
|
},
|
|
{
|
|
"epoch": 0.9854327335047129,
|
|
"grad_norm": 0.5748653058226206,
|
|
"learning_rate": 3.9799073880338055e-05,
|
|
"loss": 0.2425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11903972923755646,
|
|
"step": 1150,
|
|
"valid_targets_mean": 5864.4,
|
|
"valid_targets_min": 3012
|
|
},
|
|
{
|
|
"epoch": 0.9897172236503856,
|
|
"grad_norm": 0.47846029169056215,
|
|
"learning_rate": 3.979298685703999e-05,
|
|
"loss": 0.2414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09142991900444031,
|
|
"step": 1155,
|
|
"valid_targets_mean": 4676.9,
|
|
"valid_targets_min": 1133
|
|
},
|
|
{
|
|
"epoch": 0.9940017137960583,
|
|
"grad_norm": 0.49721265958922034,
|
|
"learning_rate": 3.978680948129297e-05,
|
|
"loss": 0.2673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12009415030479431,
|
|
"step": 1160,
|
|
"valid_targets_mean": 4889.5,
|
|
"valid_targets_min": 2491
|
|
},
|
|
{
|
|
"epoch": 0.998286203941731,
|
|
"grad_norm": 0.5997068908898251,
|
|
"learning_rate": 3.978054178129593e-05,
|
|
"loss": 0.2485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16987952589988708,
|
|
"step": 1165,
|
|
"valid_targets_mean": 5021.4,
|
|
"valid_targets_min": 3299
|
|
},
|
|
{
|
|
"epoch": 1.0025706940874035,
|
|
"grad_norm": 0.5576183640944067,
|
|
"learning_rate": 3.977418378566011e-05,
|
|
"loss": 0.2408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12192939221858978,
|
|
"step": 1170,
|
|
"valid_targets_mean": 4643.8,
|
|
"valid_targets_min": 1323
|
|
},
|
|
{
|
|
"epoch": 1.0068551842330762,
|
|
"grad_norm": 0.6035479518237012,
|
|
"learning_rate": 3.976773552340894e-05,
|
|
"loss": 0.2384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1091715395450592,
|
|
"step": 1175,
|
|
"valid_targets_mean": 5468.6,
|
|
"valid_targets_min": 2461
|
|
},
|
|
{
|
|
"epoch": 1.0111396743787489,
|
|
"grad_norm": 0.5524878515251019,
|
|
"learning_rate": 3.9761197023977924e-05,
|
|
"loss": 0.2555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10132533311843872,
|
|
"step": 1180,
|
|
"valid_targets_mean": 4217.8,
|
|
"valid_targets_min": 1181
|
|
},
|
|
{
|
|
"epoch": 1.0154241645244215,
|
|
"grad_norm": 0.5302166537716149,
|
|
"learning_rate": 3.975456831721447e-05,
|
|
"loss": 0.2448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12239775061607361,
|
|
"step": 1185,
|
|
"valid_targets_mean": 4477.5,
|
|
"valid_targets_min": 1831
|
|
},
|
|
{
|
|
"epoch": 1.0197086546700942,
|
|
"grad_norm": 0.8143657797961059,
|
|
"learning_rate": 3.974784943337776e-05,
|
|
"loss": 0.2426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11950390040874481,
|
|
"step": 1190,
|
|
"valid_targets_mean": 3528.0,
|
|
"valid_targets_min": 1135
|
|
},
|
|
{
|
|
"epoch": 1.0239931448157669,
|
|
"grad_norm": 0.5145644173556422,
|
|
"learning_rate": 3.9741040403138664e-05,
|
|
"loss": 0.2275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11581888794898987,
|
|
"step": 1195,
|
|
"valid_targets_mean": 5046.9,
|
|
"valid_targets_min": 3252
|
|
},
|
|
{
|
|
"epoch": 1.0282776349614395,
|
|
"grad_norm": 0.5096075629051414,
|
|
"learning_rate": 3.9734141257579516e-05,
|
|
"loss": 0.2354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11322258412837982,
|
|
"step": 1200,
|
|
"valid_targets_mean": 5094.9,
|
|
"valid_targets_min": 1764
|
|
},
|
|
{
|
|
"epoch": 1.0325621251071122,
|
|
"grad_norm": 0.5452129236572999,
|
|
"learning_rate": 3.972715202819403e-05,
|
|
"loss": 0.2338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1197786033153534,
|
|
"step": 1205,
|
|
"valid_targets_mean": 4397.4,
|
|
"valid_targets_min": 1365
|
|
},
|
|
{
|
|
"epoch": 1.0368466152527849,
|
|
"grad_norm": 0.4836356248918647,
|
|
"learning_rate": 3.9720072746887154e-05,
|
|
"loss": 0.2394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11884791404008865,
|
|
"step": 1210,
|
|
"valid_targets_mean": 5713.0,
|
|
"valid_targets_min": 3133
|
|
},
|
|
{
|
|
"epoch": 1.0411311053984575,
|
|
"grad_norm": 0.5286827396131738,
|
|
"learning_rate": 3.9712903445974894e-05,
|
|
"loss": 0.2391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11177793145179749,
|
|
"step": 1215,
|
|
"valid_targets_mean": 4789.9,
|
|
"valid_targets_min": 1429
|
|
},
|
|
{
|
|
"epoch": 1.0454155955441302,
|
|
"grad_norm": 0.44979507798907414,
|
|
"learning_rate": 3.9705644158184184e-05,
|
|
"loss": 0.2347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08846048265695572,
|
|
"step": 1220,
|
|
"valid_targets_mean": 5322.9,
|
|
"valid_targets_min": 1743
|
|
},
|
|
{
|
|
"epoch": 1.0497000856898029,
|
|
"grad_norm": 0.48284818931206736,
|
|
"learning_rate": 3.969829491665275e-05,
|
|
"loss": 0.2433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1184084340929985,
|
|
"step": 1225,
|
|
"valid_targets_mean": 5108.2,
|
|
"valid_targets_min": 2100
|
|
},
|
|
{
|
|
"epoch": 1.0539845758354756,
|
|
"grad_norm": 0.5530452468519074,
|
|
"learning_rate": 3.9690855754928936e-05,
|
|
"loss": 0.2332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10823780298233032,
|
|
"step": 1230,
|
|
"valid_targets_mean": 3682.9,
|
|
"valid_targets_min": 1490
|
|
},
|
|
{
|
|
"epoch": 1.0582690659811482,
|
|
"grad_norm": 0.6477676224717895,
|
|
"learning_rate": 3.968332670697157e-05,
|
|
"loss": 0.2426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15097667276859283,
|
|
"step": 1235,
|
|
"valid_targets_mean": 5090.2,
|
|
"valid_targets_min": 1598
|
|
},
|
|
{
|
|
"epoch": 1.062553556126821,
|
|
"grad_norm": 0.4747704952236916,
|
|
"learning_rate": 3.9675707807149774e-05,
|
|
"loss": 0.2299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11814823746681213,
|
|
"step": 1240,
|
|
"valid_targets_mean": 5738.4,
|
|
"valid_targets_min": 3191
|
|
},
|
|
{
|
|
"epoch": 1.0668380462724936,
|
|
"grad_norm": 0.48119695863094186,
|
|
"learning_rate": 3.966799909024286e-05,
|
|
"loss": 0.2298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10638413578271866,
|
|
"step": 1245,
|
|
"valid_targets_mean": 5727.0,
|
|
"valid_targets_min": 3135
|
|
},
|
|
{
|
|
"epoch": 1.0711225364181662,
|
|
"grad_norm": 0.5072465021718784,
|
|
"learning_rate": 3.9660200591440136e-05,
|
|
"loss": 0.235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09678170084953308,
|
|
"step": 1250,
|
|
"valid_targets_mean": 4749.6,
|
|
"valid_targets_min": 1223
|
|
},
|
|
{
|
|
"epoch": 1.075407026563839,
|
|
"grad_norm": 0.47861655127047764,
|
|
"learning_rate": 3.9652312346340746e-05,
|
|
"loss": 0.2264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08683246374130249,
|
|
"step": 1255,
|
|
"valid_targets_mean": 4310.6,
|
|
"valid_targets_min": 1645
|
|
},
|
|
{
|
|
"epoch": 1.0796915167095116,
|
|
"grad_norm": 0.5520967853377322,
|
|
"learning_rate": 3.9644334390953514e-05,
|
|
"loss": 0.2417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12760165333747864,
|
|
"step": 1260,
|
|
"valid_targets_mean": 4486.6,
|
|
"valid_targets_min": 1234
|
|
},
|
|
{
|
|
"epoch": 1.0839760068551842,
|
|
"grad_norm": 0.5076375139897981,
|
|
"learning_rate": 3.963626676169679e-05,
|
|
"loss": 0.2378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11930255591869354,
|
|
"step": 1265,
|
|
"valid_targets_mean": 4789.0,
|
|
"valid_targets_min": 2907
|
|
},
|
|
{
|
|
"epoch": 1.088260497000857,
|
|
"grad_norm": 0.5987933195720649,
|
|
"learning_rate": 3.962810949539826e-05,
|
|
"loss": 0.2513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12798230350017548,
|
|
"step": 1270,
|
|
"valid_targets_mean": 4584.6,
|
|
"valid_targets_min": 2098
|
|
},
|
|
{
|
|
"epoch": 1.0925449871465296,
|
|
"grad_norm": 0.5516383003650653,
|
|
"learning_rate": 3.961986262929481e-05,
|
|
"loss": 0.2449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14063888788223267,
|
|
"step": 1275,
|
|
"valid_targets_mean": 5532.2,
|
|
"valid_targets_min": 1121
|
|
},
|
|
{
|
|
"epoch": 1.0968294772922023,
|
|
"grad_norm": 0.5399383782336239,
|
|
"learning_rate": 3.9611526201032336e-05,
|
|
"loss": 0.2375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11722411215305328,
|
|
"step": 1280,
|
|
"valid_targets_mean": 5993.0,
|
|
"valid_targets_min": 4716
|
|
},
|
|
{
|
|
"epoch": 1.101113967437875,
|
|
"grad_norm": 0.5164083843201357,
|
|
"learning_rate": 3.9603100248665536e-05,
|
|
"loss": 0.2308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13713915646076202,
|
|
"step": 1285,
|
|
"valid_targets_mean": 5326.6,
|
|
"valid_targets_min": 1571
|
|
},
|
|
{
|
|
"epoch": 1.1053984575835476,
|
|
"grad_norm": 0.5589242182917155,
|
|
"learning_rate": 3.959458481065782e-05,
|
|
"loss": 0.2422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11845184862613678,
|
|
"step": 1290,
|
|
"valid_targets_mean": 4219.1,
|
|
"valid_targets_min": 2084
|
|
},
|
|
{
|
|
"epoch": 1.1096829477292203,
|
|
"grad_norm": 0.49734016852841817,
|
|
"learning_rate": 3.9585979925881075e-05,
|
|
"loss": 0.2349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1256842315196991,
|
|
"step": 1295,
|
|
"valid_targets_mean": 5600.0,
|
|
"valid_targets_min": 1568
|
|
},
|
|
{
|
|
"epoch": 1.113967437874893,
|
|
"grad_norm": 1.0796013091355239,
|
|
"learning_rate": 3.9577285633615485e-05,
|
|
"loss": 0.2321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11703415215015411,
|
|
"step": 1300,
|
|
"valid_targets_mean": 4676.5,
|
|
"valid_targets_min": 1971
|
|
},
|
|
{
|
|
"epoch": 1.1182519280205656,
|
|
"grad_norm": 0.5352032156238867,
|
|
"learning_rate": 3.956850197354939e-05,
|
|
"loss": 0.2386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12584301829338074,
|
|
"step": 1305,
|
|
"valid_targets_mean": 4500.8,
|
|
"valid_targets_min": 1902
|
|
},
|
|
{
|
|
"epoch": 1.1225364181662383,
|
|
"grad_norm": 0.5369977973721654,
|
|
"learning_rate": 3.9559628985779075e-05,
|
|
"loss": 0.2455,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06521826982498169,
|
|
"step": 1310,
|
|
"valid_targets_mean": 2152.4,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 1.126820908311911,
|
|
"grad_norm": 0.5394440535514909,
|
|
"learning_rate": 3.955066671080858e-05,
|
|
"loss": 0.2181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09284225106239319,
|
|
"step": 1315,
|
|
"valid_targets_mean": 4841.8,
|
|
"valid_targets_min": 3460
|
|
},
|
|
{
|
|
"epoch": 1.1311053984575836,
|
|
"grad_norm": 0.5167458668393816,
|
|
"learning_rate": 3.954161518954956e-05,
|
|
"loss": 0.2355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13846097886562347,
|
|
"step": 1320,
|
|
"valid_targets_mean": 5263.6,
|
|
"valid_targets_min": 2850
|
|
},
|
|
{
|
|
"epoch": 1.1353898886032563,
|
|
"grad_norm": 0.5249378449576371,
|
|
"learning_rate": 3.9532474463321036e-05,
|
|
"loss": 0.2259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10796986520290375,
|
|
"step": 1325,
|
|
"valid_targets_mean": 5340.2,
|
|
"valid_targets_min": 1381
|
|
},
|
|
{
|
|
"epoch": 1.139674378748929,
|
|
"grad_norm": 0.5158354334741792,
|
|
"learning_rate": 3.952324457384925e-05,
|
|
"loss": 0.2459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13506674766540527,
|
|
"step": 1330,
|
|
"valid_targets_mean": 4589.1,
|
|
"valid_targets_min": 2922
|
|
},
|
|
{
|
|
"epoch": 1.1439588688946016,
|
|
"grad_norm": 0.5350786311548035,
|
|
"learning_rate": 3.951392556326747e-05,
|
|
"loss": 0.2403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.114400215446949,
|
|
"step": 1335,
|
|
"valid_targets_mean": 3964.2,
|
|
"valid_targets_min": 1625
|
|
},
|
|
{
|
|
"epoch": 1.1482433590402743,
|
|
"grad_norm": 0.507025132940053,
|
|
"learning_rate": 3.950451747411579e-05,
|
|
"loss": 0.2354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14559516310691833,
|
|
"step": 1340,
|
|
"valid_targets_mean": 5644.9,
|
|
"valid_targets_min": 807
|
|
},
|
|
{
|
|
"epoch": 1.152527849185947,
|
|
"grad_norm": 0.5534439903907535,
|
|
"learning_rate": 3.949502034934092e-05,
|
|
"loss": 0.234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10887180268764496,
|
|
"step": 1345,
|
|
"valid_targets_mean": 5214.2,
|
|
"valid_targets_min": 3494
|
|
},
|
|
{
|
|
"epoch": 1.1568123393316196,
|
|
"grad_norm": 0.4897888427615898,
|
|
"learning_rate": 3.9485434232296034e-05,
|
|
"loss": 0.2315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12154433131217957,
|
|
"step": 1350,
|
|
"valid_targets_mean": 6807.4,
|
|
"valid_targets_min": 3141
|
|
},
|
|
{
|
|
"epoch": 1.1610968294772923,
|
|
"grad_norm": 0.5142647411009782,
|
|
"learning_rate": 3.947575916674051e-05,
|
|
"loss": 0.2465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15283022820949554,
|
|
"step": 1355,
|
|
"valid_targets_mean": 5998.6,
|
|
"valid_targets_min": 4767
|
|
},
|
|
{
|
|
"epoch": 1.165381319622965,
|
|
"grad_norm": 0.578472327995002,
|
|
"learning_rate": 3.94659951968398e-05,
|
|
"loss": 0.2452,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16904443502426147,
|
|
"step": 1360,
|
|
"valid_targets_mean": 4652.2,
|
|
"valid_targets_min": 2686
|
|
},
|
|
{
|
|
"epoch": 1.1696658097686377,
|
|
"grad_norm": 0.5012200266051032,
|
|
"learning_rate": 3.9456142367165165e-05,
|
|
"loss": 0.2429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13777001202106476,
|
|
"step": 1365,
|
|
"valid_targets_mean": 5621.0,
|
|
"valid_targets_min": 2270
|
|
},
|
|
{
|
|
"epoch": 1.17395029991431,
|
|
"grad_norm": 0.5059125605439242,
|
|
"learning_rate": 3.944620072269351e-05,
|
|
"loss": 0.2418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10521627962589264,
|
|
"step": 1370,
|
|
"valid_targets_mean": 6382.9,
|
|
"valid_targets_min": 2711
|
|
},
|
|
{
|
|
"epoch": 1.1782347900599828,
|
|
"grad_norm": 0.5461561352834591,
|
|
"learning_rate": 3.9436170308807164e-05,
|
|
"loss": 0.2308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.120382159948349,
|
|
"step": 1375,
|
|
"valid_targets_mean": 5325.1,
|
|
"valid_targets_min": 3385
|
|
},
|
|
{
|
|
"epoch": 1.1825192802056554,
|
|
"grad_norm": 0.5037824311138134,
|
|
"learning_rate": 3.94260511712937e-05,
|
|
"loss": 0.238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12292107194662094,
|
|
"step": 1380,
|
|
"valid_targets_mean": 4843.8,
|
|
"valid_targets_min": 3123
|
|
},
|
|
{
|
|
"epoch": 1.1868037703513281,
|
|
"grad_norm": 0.5102819898803347,
|
|
"learning_rate": 3.9415843356345664e-05,
|
|
"loss": 0.2491,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10522742569446564,
|
|
"step": 1385,
|
|
"valid_targets_mean": 4137.6,
|
|
"valid_targets_min": 1397
|
|
},
|
|
{
|
|
"epoch": 1.1910882604970008,
|
|
"grad_norm": 0.5069734026849441,
|
|
"learning_rate": 3.940554691056043e-05,
|
|
"loss": 0.2432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11096564680337906,
|
|
"step": 1390,
|
|
"valid_targets_mean": 4371.0,
|
|
"valid_targets_min": 993
|
|
},
|
|
{
|
|
"epoch": 1.1953727506426735,
|
|
"grad_norm": 0.6163302866315783,
|
|
"learning_rate": 3.939516188093996e-05,
|
|
"loss": 0.2417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13463535904884338,
|
|
"step": 1395,
|
|
"valid_targets_mean": 5707.9,
|
|
"valid_targets_min": 2948
|
|
},
|
|
{
|
|
"epoch": 1.1996572407883461,
|
|
"grad_norm": 0.4759043815168194,
|
|
"learning_rate": 3.938468831489057e-05,
|
|
"loss": 0.227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10264989733695984,
|
|
"step": 1400,
|
|
"valid_targets_mean": 3913.9,
|
|
"valid_targets_min": 1941
|
|
},
|
|
{
|
|
"epoch": 1.2039417309340188,
|
|
"grad_norm": 0.5280325447990015,
|
|
"learning_rate": 3.937412626022276e-05,
|
|
"loss": 0.2373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11383386701345444,
|
|
"step": 1405,
|
|
"valid_targets_mean": 4232.0,
|
|
"valid_targets_min": 2762
|
|
},
|
|
{
|
|
"epoch": 1.2082262210796915,
|
|
"grad_norm": 0.4313710336430502,
|
|
"learning_rate": 3.936347576515096e-05,
|
|
"loss": 0.2374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11712110042572021,
|
|
"step": 1410,
|
|
"valid_targets_mean": 5636.2,
|
|
"valid_targets_min": 1780
|
|
},
|
|
{
|
|
"epoch": 1.2125107112253641,
|
|
"grad_norm": 0.5678079911208448,
|
|
"learning_rate": 3.935273687829329e-05,
|
|
"loss": 0.2401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14850357174873352,
|
|
"step": 1415,
|
|
"valid_targets_mean": 4743.9,
|
|
"valid_targets_min": 1330
|
|
},
|
|
{
|
|
"epoch": 1.2167952013710368,
|
|
"grad_norm": 0.5436449188870729,
|
|
"learning_rate": 3.934190964867142e-05,
|
|
"loss": 0.2465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10638877749443054,
|
|
"step": 1420,
|
|
"valid_targets_mean": 4788.4,
|
|
"valid_targets_min": 2395
|
|
},
|
|
{
|
|
"epoch": 1.2210796915167095,
|
|
"grad_norm": 0.5836160715561696,
|
|
"learning_rate": 3.933099412571026e-05,
|
|
"loss": 0.2438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1167445033788681,
|
|
"step": 1425,
|
|
"valid_targets_mean": 3805.0,
|
|
"valid_targets_min": 1026
|
|
},
|
|
{
|
|
"epoch": 1.2253641816623821,
|
|
"grad_norm": 0.5470623808192315,
|
|
"learning_rate": 3.9319990359237754e-05,
|
|
"loss": 0.2328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10465378314256668,
|
|
"step": 1430,
|
|
"valid_targets_mean": 5702.0,
|
|
"valid_targets_min": 1685
|
|
},
|
|
{
|
|
"epoch": 1.2296486718080548,
|
|
"grad_norm": 0.5805817722056696,
|
|
"learning_rate": 3.930889839948472e-05,
|
|
"loss": 0.2447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13942649960517883,
|
|
"step": 1435,
|
|
"valid_targets_mean": 5853.0,
|
|
"valid_targets_min": 3992
|
|
},
|
|
{
|
|
"epoch": 1.2339331619537275,
|
|
"grad_norm": 0.6131744635856514,
|
|
"learning_rate": 3.92977182970845e-05,
|
|
"loss": 0.2487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11150114983320236,
|
|
"step": 1440,
|
|
"valid_targets_mean": 3655.6,
|
|
"valid_targets_min": 1422
|
|
},
|
|
{
|
|
"epoch": 1.2382176520994002,
|
|
"grad_norm": 0.5838711652791473,
|
|
"learning_rate": 3.9286450103072845e-05,
|
|
"loss": 0.2363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11811933666467667,
|
|
"step": 1445,
|
|
"valid_targets_mean": 5601.4,
|
|
"valid_targets_min": 1721
|
|
},
|
|
{
|
|
"epoch": 1.2425021422450728,
|
|
"grad_norm": 0.5154752965394229,
|
|
"learning_rate": 3.927509386888762e-05,
|
|
"loss": 0.2398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08512001484632492,
|
|
"step": 1450,
|
|
"valid_targets_mean": 5162.0,
|
|
"valid_targets_min": 1416
|
|
},
|
|
{
|
|
"epoch": 1.2467866323907455,
|
|
"grad_norm": 0.4974023561818171,
|
|
"learning_rate": 3.9263649646368574e-05,
|
|
"loss": 0.2325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12567369639873505,
|
|
"step": 1455,
|
|
"valid_targets_mean": 5258.8,
|
|
"valid_targets_min": 2672
|
|
},
|
|
{
|
|
"epoch": 1.2510711225364182,
|
|
"grad_norm": 0.5739391983178774,
|
|
"learning_rate": 3.925211748775711e-05,
|
|
"loss": 0.2265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11506156623363495,
|
|
"step": 1460,
|
|
"valid_targets_mean": 5422.0,
|
|
"valid_targets_min": 2651
|
|
},
|
|
{
|
|
"epoch": 1.2553556126820908,
|
|
"grad_norm": 0.4588309784359663,
|
|
"learning_rate": 3.924049744569606e-05,
|
|
"loss": 0.2268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10045787692070007,
|
|
"step": 1465,
|
|
"valid_targets_mean": 5433.0,
|
|
"valid_targets_min": 2926
|
|
},
|
|
{
|
|
"epoch": 1.2596401028277635,
|
|
"grad_norm": 0.6336871374145083,
|
|
"learning_rate": 3.9228789573229435e-05,
|
|
"loss": 0.2376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1141088455915451,
|
|
"step": 1470,
|
|
"valid_targets_mean": 6424.4,
|
|
"valid_targets_min": 3386
|
|
},
|
|
{
|
|
"epoch": 1.2639245929734362,
|
|
"grad_norm": 0.5888888726481188,
|
|
"learning_rate": 3.921699392380217e-05,
|
|
"loss": 0.2476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11039966344833374,
|
|
"step": 1475,
|
|
"valid_targets_mean": 3917.6,
|
|
"valid_targets_min": 1241
|
|
},
|
|
{
|
|
"epoch": 1.2682090831191088,
|
|
"grad_norm": 0.4739663401324403,
|
|
"learning_rate": 3.9205110551259887e-05,
|
|
"loss": 0.2467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10644856095314026,
|
|
"step": 1480,
|
|
"valid_targets_mean": 5735.8,
|
|
"valid_targets_min": 2296
|
|
},
|
|
{
|
|
"epoch": 1.2724935732647815,
|
|
"grad_norm": 0.4744561144457731,
|
|
"learning_rate": 3.919313950984865e-05,
|
|
"loss": 0.2216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09622097760438919,
|
|
"step": 1485,
|
|
"valid_targets_mean": 5583.1,
|
|
"valid_targets_min": 3786
|
|
},
|
|
{
|
|
"epoch": 1.2767780634104542,
|
|
"grad_norm": 0.46507247473662877,
|
|
"learning_rate": 3.918108085421475e-05,
|
|
"loss": 0.2343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10571260750293732,
|
|
"step": 1490,
|
|
"valid_targets_mean": 4983.6,
|
|
"valid_targets_min": 3087
|
|
},
|
|
{
|
|
"epoch": 1.2810625535561269,
|
|
"grad_norm": 0.520596404130355,
|
|
"learning_rate": 3.916893463940439e-05,
|
|
"loss": 0.2471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10671904683113098,
|
|
"step": 1495,
|
|
"valid_targets_mean": 5082.2,
|
|
"valid_targets_min": 1445
|
|
},
|
|
{
|
|
"epoch": 1.2853470437017995,
|
|
"grad_norm": 0.6672099202048909,
|
|
"learning_rate": 3.91567009208635e-05,
|
|
"loss": 0.2313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09037558734416962,
|
|
"step": 1500,
|
|
"valid_targets_mean": 4235.4,
|
|
"valid_targets_min": 1926
|
|
},
|
|
{
|
|
"epoch": 1.2896315338474722,
|
|
"grad_norm": 0.5003672248035446,
|
|
"learning_rate": 3.914437975443741e-05,
|
|
"loss": 0.2349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10948409140110016,
|
|
"step": 1505,
|
|
"valid_targets_mean": 4737.6,
|
|
"valid_targets_min": 1773
|
|
},
|
|
{
|
|
"epoch": 1.2939160239931449,
|
|
"grad_norm": 0.6007120324301214,
|
|
"learning_rate": 3.91319711963707e-05,
|
|
"loss": 0.2465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12115178257226944,
|
|
"step": 1510,
|
|
"valid_targets_mean": 3282.4,
|
|
"valid_targets_min": 1669
|
|
},
|
|
{
|
|
"epoch": 1.2982005141388175,
|
|
"grad_norm": 0.44395621445516503,
|
|
"learning_rate": 3.911947530330683e-05,
|
|
"loss": 0.2234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13494271039962769,
|
|
"step": 1515,
|
|
"valid_targets_mean": 5397.8,
|
|
"valid_targets_min": 2902
|
|
},
|
|
{
|
|
"epoch": 1.3024850042844902,
|
|
"grad_norm": 0.4727033453216096,
|
|
"learning_rate": 3.910689213228795e-05,
|
|
"loss": 0.2383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11796768754720688,
|
|
"step": 1520,
|
|
"valid_targets_mean": 5473.4,
|
|
"valid_targets_min": 3285
|
|
},
|
|
{
|
|
"epoch": 1.3067694944301629,
|
|
"grad_norm": 0.4409730908944917,
|
|
"learning_rate": 3.909422174075462e-05,
|
|
"loss": 0.2375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09463514387607574,
|
|
"step": 1525,
|
|
"valid_targets_mean": 4376.8,
|
|
"valid_targets_min": 1670
|
|
},
|
|
{
|
|
"epoch": 1.3110539845758356,
|
|
"grad_norm": 0.4902923230647443,
|
|
"learning_rate": 3.908146418654556e-05,
|
|
"loss": 0.2562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09760729223489761,
|
|
"step": 1530,
|
|
"valid_targets_mean": 5181.0,
|
|
"valid_targets_min": 2707
|
|
},
|
|
{
|
|
"epoch": 1.3153384747215082,
|
|
"grad_norm": 0.5157944969179804,
|
|
"learning_rate": 3.9068619527897366e-05,
|
|
"loss": 0.23,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07778498530387878,
|
|
"step": 1535,
|
|
"valid_targets_mean": 3545.9,
|
|
"valid_targets_min": 1097
|
|
},
|
|
{
|
|
"epoch": 1.3196229648671807,
|
|
"grad_norm": 0.5078174223015692,
|
|
"learning_rate": 3.905568782344426e-05,
|
|
"loss": 0.2251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10518595576286316,
|
|
"step": 1540,
|
|
"valid_targets_mean": 4583.6,
|
|
"valid_targets_min": 852
|
|
},
|
|
{
|
|
"epoch": 1.3239074550128533,
|
|
"grad_norm": 0.5457009602166955,
|
|
"learning_rate": 3.904266913221784e-05,
|
|
"loss": 0.2305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12312053143978119,
|
|
"step": 1545,
|
|
"valid_targets_mean": 4720.4,
|
|
"valid_targets_min": 1836
|
|
},
|
|
{
|
|
"epoch": 1.328191945158526,
|
|
"grad_norm": 0.5099687710270526,
|
|
"learning_rate": 3.9029563513646724e-05,
|
|
"loss": 0.2385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10449822247028351,
|
|
"step": 1550,
|
|
"valid_targets_mean": 4636.4,
|
|
"valid_targets_min": 1042
|
|
},
|
|
{
|
|
"epoch": 1.3324764353041987,
|
|
"grad_norm": 0.5287799852413793,
|
|
"learning_rate": 3.9016371027556396e-05,
|
|
"loss": 0.2336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11712635308504105,
|
|
"step": 1555,
|
|
"valid_targets_mean": 4263.0,
|
|
"valid_targets_min": 1504
|
|
},
|
|
{
|
|
"epoch": 1.3367609254498714,
|
|
"grad_norm": 0.5810619731197988,
|
|
"learning_rate": 3.900309173416888e-05,
|
|
"loss": 0.2293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13848112523555756,
|
|
"step": 1560,
|
|
"valid_targets_mean": 4872.2,
|
|
"valid_targets_min": 2033
|
|
},
|
|
{
|
|
"epoch": 1.341045415595544,
|
|
"grad_norm": 0.5273047760121564,
|
|
"learning_rate": 3.898972569410243e-05,
|
|
"loss": 0.2277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11683964729309082,
|
|
"step": 1565,
|
|
"valid_targets_mean": 5287.6,
|
|
"valid_targets_min": 1893
|
|
},
|
|
{
|
|
"epoch": 1.3453299057412167,
|
|
"grad_norm": 0.49578100784275103,
|
|
"learning_rate": 3.897627296837131e-05,
|
|
"loss": 0.2494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11568988859653473,
|
|
"step": 1570,
|
|
"valid_targets_mean": 5398.9,
|
|
"valid_targets_min": 3649
|
|
},
|
|
{
|
|
"epoch": 1.3496143958868894,
|
|
"grad_norm": 0.46855836444786286,
|
|
"learning_rate": 3.8962733618385506e-05,
|
|
"loss": 0.2214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11565341055393219,
|
|
"step": 1575,
|
|
"valid_targets_mean": 4907.6,
|
|
"valid_targets_min": 2337
|
|
},
|
|
{
|
|
"epoch": 1.353898886032562,
|
|
"grad_norm": 0.4958189472696427,
|
|
"learning_rate": 3.894910770595039e-05,
|
|
"loss": 0.2433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09800922870635986,
|
|
"step": 1580,
|
|
"valid_targets_mean": 6002.4,
|
|
"valid_targets_min": 1912
|
|
},
|
|
{
|
|
"epoch": 1.3581833761782347,
|
|
"grad_norm": 0.5894328386022706,
|
|
"learning_rate": 3.8935395293266516e-05,
|
|
"loss": 0.2525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0854383036494255,
|
|
"step": 1585,
|
|
"valid_targets_mean": 4064.6,
|
|
"valid_targets_min": 1383
|
|
},
|
|
{
|
|
"epoch": 1.3624678663239074,
|
|
"grad_norm": 0.49166187454600596,
|
|
"learning_rate": 3.89215964429293e-05,
|
|
"loss": 0.2261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11674100160598755,
|
|
"step": 1590,
|
|
"valid_targets_mean": 5408.1,
|
|
"valid_targets_min": 1931
|
|
},
|
|
{
|
|
"epoch": 1.36675235646958,
|
|
"grad_norm": 0.5772449814015101,
|
|
"learning_rate": 3.890771121792872e-05,
|
|
"loss": 0.2381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15250825881958008,
|
|
"step": 1595,
|
|
"valid_targets_mean": 5092.9,
|
|
"valid_targets_min": 2054
|
|
},
|
|
{
|
|
"epoch": 1.3710368466152527,
|
|
"grad_norm": 0.5085778895382281,
|
|
"learning_rate": 3.889373968164905e-05,
|
|
"loss": 0.2347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09528356790542603,
|
|
"step": 1600,
|
|
"valid_targets_mean": 4869.2,
|
|
"valid_targets_min": 1497
|
|
},
|
|
{
|
|
"epoch": 1.3753213367609254,
|
|
"grad_norm": 0.5243776347992599,
|
|
"learning_rate": 3.887968189786856e-05,
|
|
"loss": 0.2314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0885411947965622,
|
|
"step": 1605,
|
|
"valid_targets_mean": 4016.6,
|
|
"valid_targets_min": 830
|
|
},
|
|
{
|
|
"epoch": 1.379605826906598,
|
|
"grad_norm": 0.5220292901952734,
|
|
"learning_rate": 3.886553793075924e-05,
|
|
"loss": 0.2321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11466914415359497,
|
|
"step": 1610,
|
|
"valid_targets_mean": 5926.9,
|
|
"valid_targets_min": 4034
|
|
},
|
|
{
|
|
"epoch": 1.3838903170522707,
|
|
"grad_norm": 0.5076872075997929,
|
|
"learning_rate": 3.885130784488647e-05,
|
|
"loss": 0.2239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10209037363529205,
|
|
"step": 1615,
|
|
"valid_targets_mean": 5117.1,
|
|
"valid_targets_min": 2323
|
|
},
|
|
{
|
|
"epoch": 1.3881748071979434,
|
|
"grad_norm": 0.5633778381637327,
|
|
"learning_rate": 3.8836991705208776e-05,
|
|
"loss": 0.2427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13901841640472412,
|
|
"step": 1620,
|
|
"valid_targets_mean": 5037.9,
|
|
"valid_targets_min": 1173
|
|
},
|
|
{
|
|
"epoch": 1.392459297343616,
|
|
"grad_norm": 0.5172796487217647,
|
|
"learning_rate": 3.8822589577077516e-05,
|
|
"loss": 0.2327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11917863041162491,
|
|
"step": 1625,
|
|
"valid_targets_mean": 4597.6,
|
|
"valid_targets_min": 738
|
|
},
|
|
{
|
|
"epoch": 1.3967437874892887,
|
|
"grad_norm": 0.5836629834083695,
|
|
"learning_rate": 3.880810152623655e-05,
|
|
"loss": 0.2435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1187589019536972,
|
|
"step": 1630,
|
|
"valid_targets_mean": 5212.9,
|
|
"valid_targets_min": 2639
|
|
},
|
|
{
|
|
"epoch": 1.4010282776349614,
|
|
"grad_norm": 0.46315449970055106,
|
|
"learning_rate": 3.879352761882196e-05,
|
|
"loss": 0.241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11266039311885834,
|
|
"step": 1635,
|
|
"valid_targets_mean": 5534.8,
|
|
"valid_targets_min": 449
|
|
},
|
|
{
|
|
"epoch": 1.405312767780634,
|
|
"grad_norm": 0.48359082235776496,
|
|
"learning_rate": 3.877886792136179e-05,
|
|
"loss": 0.2329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11315010488033295,
|
|
"step": 1640,
|
|
"valid_targets_mean": 5587.9,
|
|
"valid_targets_min": 3047
|
|
},
|
|
{
|
|
"epoch": 1.4095972579263067,
|
|
"grad_norm": 0.5036320475550751,
|
|
"learning_rate": 3.876412250077566e-05,
|
|
"loss": 0.2287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11478479206562042,
|
|
"step": 1645,
|
|
"valid_targets_mean": 4597.0,
|
|
"valid_targets_min": 2566
|
|
},
|
|
{
|
|
"epoch": 1.4138817480719794,
|
|
"grad_norm": 0.5576450862795339,
|
|
"learning_rate": 3.874929142437454e-05,
|
|
"loss": 0.2392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15003828704357147,
|
|
"step": 1650,
|
|
"valid_targets_mean": 6018.1,
|
|
"valid_targets_min": 3425
|
|
},
|
|
{
|
|
"epoch": 1.418166238217652,
|
|
"grad_norm": 0.5004516414353842,
|
|
"learning_rate": 3.8734374759860384e-05,
|
|
"loss": 0.2326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11132154613733292,
|
|
"step": 1655,
|
|
"valid_targets_mean": 5050.9,
|
|
"valid_targets_min": 1796
|
|
},
|
|
{
|
|
"epoch": 1.4224507283633248,
|
|
"grad_norm": 0.5009886073584217,
|
|
"learning_rate": 3.8719372575325856e-05,
|
|
"loss": 0.213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10907307267189026,
|
|
"step": 1660,
|
|
"valid_targets_mean": 6238.2,
|
|
"valid_targets_min": 3435
|
|
},
|
|
{
|
|
"epoch": 1.4267352185089974,
|
|
"grad_norm": 0.7075590218522548,
|
|
"learning_rate": 3.8704284939254016e-05,
|
|
"loss": 0.2211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11796370148658752,
|
|
"step": 1665,
|
|
"valid_targets_mean": 4848.0,
|
|
"valid_targets_min": 2130
|
|
},
|
|
{
|
|
"epoch": 1.43101970865467,
|
|
"grad_norm": 0.4737684683625076,
|
|
"learning_rate": 3.868911192051798e-05,
|
|
"loss": 0.236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12450909614562988,
|
|
"step": 1670,
|
|
"valid_targets_mean": 4871.0,
|
|
"valid_targets_min": 2607
|
|
},
|
|
{
|
|
"epoch": 1.4353041988003428,
|
|
"grad_norm": 0.49240871949752624,
|
|
"learning_rate": 3.8673853588380636e-05,
|
|
"loss": 0.2222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1182079166173935,
|
|
"step": 1675,
|
|
"valid_targets_mean": 5182.0,
|
|
"valid_targets_min": 3373
|
|
},
|
|
{
|
|
"epoch": 1.4395886889460154,
|
|
"grad_norm": 0.5645221728062096,
|
|
"learning_rate": 3.8658510012494315e-05,
|
|
"loss": 0.241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12042039632797241,
|
|
"step": 1680,
|
|
"valid_targets_mean": 5086.6,
|
|
"valid_targets_min": 2631
|
|
},
|
|
{
|
|
"epoch": 1.443873179091688,
|
|
"grad_norm": 0.4866729059138686,
|
|
"learning_rate": 3.864308126290048e-05,
|
|
"loss": 0.2353,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11386478692293167,
|
|
"step": 1685,
|
|
"valid_targets_mean": 5221.1,
|
|
"valid_targets_min": 3102
|
|
},
|
|
{
|
|
"epoch": 1.4481576692373608,
|
|
"grad_norm": 0.4658043009744602,
|
|
"learning_rate": 3.8627567410029384e-05,
|
|
"loss": 0.2245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09366113692522049,
|
|
"step": 1690,
|
|
"valid_targets_mean": 4225.1,
|
|
"valid_targets_min": 2334
|
|
},
|
|
{
|
|
"epoch": 1.4524421593830334,
|
|
"grad_norm": 0.6858090737961489,
|
|
"learning_rate": 3.8611968524699785e-05,
|
|
"loss": 0.2303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11841071397066116,
|
|
"step": 1695,
|
|
"valid_targets_mean": 5201.1,
|
|
"valid_targets_min": 644
|
|
},
|
|
{
|
|
"epoch": 1.4567266495287061,
|
|
"grad_norm": 0.4830762365406248,
|
|
"learning_rate": 3.85962846781186e-05,
|
|
"loss": 0.2302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08972778916358948,
|
|
"step": 1700,
|
|
"valid_targets_mean": 3782.6,
|
|
"valid_targets_min": 719
|
|
},
|
|
{
|
|
"epoch": 1.4610111396743788,
|
|
"grad_norm": 0.5067013636299821,
|
|
"learning_rate": 3.8580515941880575e-05,
|
|
"loss": 0.2333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10617825388908386,
|
|
"step": 1705,
|
|
"valid_targets_mean": 4790.6,
|
|
"valid_targets_min": 2243
|
|
},
|
|
{
|
|
"epoch": 1.4652956298200515,
|
|
"grad_norm": 0.4674148374739586,
|
|
"learning_rate": 3.856466238796797e-05,
|
|
"loss": 0.2356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11428733170032501,
|
|
"step": 1710,
|
|
"valid_targets_mean": 5591.4,
|
|
"valid_targets_min": 2066
|
|
},
|
|
{
|
|
"epoch": 1.4695801199657241,
|
|
"grad_norm": 0.5011619719025958,
|
|
"learning_rate": 3.8548724088750226e-05,
|
|
"loss": 0.2183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11932536959648132,
|
|
"step": 1715,
|
|
"valid_targets_mean": 5395.4,
|
|
"valid_targets_min": 2465
|
|
},
|
|
{
|
|
"epoch": 1.4738646101113968,
|
|
"grad_norm": 0.43993044430943745,
|
|
"learning_rate": 3.853270111698364e-05,
|
|
"loss": 0.2188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10304902493953705,
|
|
"step": 1720,
|
|
"valid_targets_mean": 5551.4,
|
|
"valid_targets_min": 2404
|
|
},
|
|
{
|
|
"epoch": 1.4781491002570695,
|
|
"grad_norm": 0.45745867204854895,
|
|
"learning_rate": 3.851659354581102e-05,
|
|
"loss": 0.2214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10135609656572342,
|
|
"step": 1725,
|
|
"valid_targets_mean": 5176.1,
|
|
"valid_targets_min": 3026
|
|
},
|
|
{
|
|
"epoch": 1.4824335904027421,
|
|
"grad_norm": 0.4826171730375347,
|
|
"learning_rate": 3.850040144876138e-05,
|
|
"loss": 0.2299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11102210730314255,
|
|
"step": 1730,
|
|
"valid_targets_mean": 5723.2,
|
|
"valid_targets_min": 2724
|
|
},
|
|
{
|
|
"epoch": 1.4867180805484148,
|
|
"grad_norm": 0.5631587425192481,
|
|
"learning_rate": 3.8484124899749546e-05,
|
|
"loss": 0.2431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14050203561782837,
|
|
"step": 1735,
|
|
"valid_targets_mean": 4802.1,
|
|
"valid_targets_min": 1329
|
|
},
|
|
{
|
|
"epoch": 1.4910025706940875,
|
|
"grad_norm": 0.4778645712533244,
|
|
"learning_rate": 3.846776397307589e-05,
|
|
"loss": 0.2391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13428761065006256,
|
|
"step": 1740,
|
|
"valid_targets_mean": 6715.9,
|
|
"valid_targets_min": 1410
|
|
},
|
|
{
|
|
"epoch": 1.4952870608397602,
|
|
"grad_norm": 0.47558774254834896,
|
|
"learning_rate": 3.8451318743425956e-05,
|
|
"loss": 0.2338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14050710201263428,
|
|
"step": 1745,
|
|
"valid_targets_mean": 5461.2,
|
|
"valid_targets_min": 2104
|
|
},
|
|
{
|
|
"epoch": 1.4995715509854328,
|
|
"grad_norm": 0.5020280960692198,
|
|
"learning_rate": 3.8434789285870086e-05,
|
|
"loss": 0.2352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12446136027574539,
|
|
"step": 1750,
|
|
"valid_targets_mean": 4883.2,
|
|
"valid_targets_min": 2589
|
|
},
|
|
{
|
|
"epoch": 1.5038560411311055,
|
|
"grad_norm": 0.46458323275607555,
|
|
"learning_rate": 3.8418175675863156e-05,
|
|
"loss": 0.2235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11468029022216797,
|
|
"step": 1755,
|
|
"valid_targets_mean": 5307.2,
|
|
"valid_targets_min": 2860
|
|
},
|
|
{
|
|
"epoch": 1.5081405312767782,
|
|
"grad_norm": 0.46789760447295525,
|
|
"learning_rate": 3.840147798924416e-05,
|
|
"loss": 0.2328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11490252614021301,
|
|
"step": 1760,
|
|
"valid_targets_mean": 5207.2,
|
|
"valid_targets_min": 3608
|
|
},
|
|
{
|
|
"epoch": 1.5124250214224508,
|
|
"grad_norm": 0.5021006585006861,
|
|
"learning_rate": 3.83846963022359e-05,
|
|
"loss": 0.2316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.110697902739048,
|
|
"step": 1765,
|
|
"valid_targets_mean": 5780.4,
|
|
"valid_targets_min": 1822
|
|
},
|
|
{
|
|
"epoch": 1.5167095115681235,
|
|
"grad_norm": 0.469853039317634,
|
|
"learning_rate": 3.836783069144463e-05,
|
|
"loss": 0.2229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1256060153245926,
|
|
"step": 1770,
|
|
"valid_targets_mean": 6065.6,
|
|
"valid_targets_min": 1991
|
|
},
|
|
{
|
|
"epoch": 1.5209940017137962,
|
|
"grad_norm": 0.4716821318464838,
|
|
"learning_rate": 3.8350881233859685e-05,
|
|
"loss": 0.2472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11349677294492722,
|
|
"step": 1775,
|
|
"valid_targets_mean": 4324.6,
|
|
"valid_targets_min": 1790
|
|
},
|
|
{
|
|
"epoch": 1.5252784918594688,
|
|
"grad_norm": 0.536991970869005,
|
|
"learning_rate": 3.833384800685319e-05,
|
|
"loss": 0.2318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10904942452907562,
|
|
"step": 1780,
|
|
"valid_targets_mean": 3615.6,
|
|
"valid_targets_min": 505
|
|
},
|
|
{
|
|
"epoch": 1.5295629820051415,
|
|
"grad_norm": 0.485747456917392,
|
|
"learning_rate": 3.831673108817963e-05,
|
|
"loss": 0.2482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10863450169563293,
|
|
"step": 1785,
|
|
"valid_targets_mean": 5838.9,
|
|
"valid_targets_min": 4549
|
|
},
|
|
{
|
|
"epoch": 1.5338474721508142,
|
|
"grad_norm": 0.5198449290197883,
|
|
"learning_rate": 3.829953055597555e-05,
|
|
"loss": 0.2441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12037435919046402,
|
|
"step": 1790,
|
|
"valid_targets_mean": 4294.2,
|
|
"valid_targets_min": 1412
|
|
},
|
|
{
|
|
"epoch": 1.5381319622964869,
|
|
"grad_norm": 0.4606032330821934,
|
|
"learning_rate": 3.8282246488759176e-05,
|
|
"loss": 0.2291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10967078804969788,
|
|
"step": 1795,
|
|
"valid_targets_mean": 5616.6,
|
|
"valid_targets_min": 2014
|
|
},
|
|
{
|
|
"epoch": 1.5424164524421595,
|
|
"grad_norm": 0.5099176146994311,
|
|
"learning_rate": 3.8264878965430054e-05,
|
|
"loss": 0.2335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12301696091890335,
|
|
"step": 1800,
|
|
"valid_targets_mean": 5868.0,
|
|
"valid_targets_min": 2649
|
|
},
|
|
{
|
|
"epoch": 1.5467009425878322,
|
|
"grad_norm": 0.46299916466979724,
|
|
"learning_rate": 3.824742806526871e-05,
|
|
"loss": 0.2215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09365905821323395,
|
|
"step": 1805,
|
|
"valid_targets_mean": 5420.4,
|
|
"valid_targets_min": 1664
|
|
},
|
|
{
|
|
"epoch": 1.5509854327335049,
|
|
"grad_norm": 0.4617740195208103,
|
|
"learning_rate": 3.8229893867936276e-05,
|
|
"loss": 0.2368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11876237392425537,
|
|
"step": 1810,
|
|
"valid_targets_mean": 5358.9,
|
|
"valid_targets_min": 3369
|
|
},
|
|
{
|
|
"epoch": 1.5552699228791775,
|
|
"grad_norm": 0.5626159117550897,
|
|
"learning_rate": 3.821227645347409e-05,
|
|
"loss": 0.2435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15046226978302002,
|
|
"step": 1815,
|
|
"valid_targets_mean": 5776.0,
|
|
"valid_targets_min": 3152
|
|
},
|
|
{
|
|
"epoch": 1.5595544130248502,
|
|
"grad_norm": 0.4959252152313045,
|
|
"learning_rate": 3.819457590230342e-05,
|
|
"loss": 0.2397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14104165136814117,
|
|
"step": 1820,
|
|
"valid_targets_mean": 5804.4,
|
|
"valid_targets_min": 3132
|
|
},
|
|
{
|
|
"epoch": 1.5638389031705227,
|
|
"grad_norm": 0.48077074587515184,
|
|
"learning_rate": 3.8176792295224994e-05,
|
|
"loss": 0.2305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13259726762771606,
|
|
"step": 1825,
|
|
"valid_targets_mean": 4807.0,
|
|
"valid_targets_min": 935
|
|
},
|
|
{
|
|
"epoch": 1.5681233933161953,
|
|
"grad_norm": 0.5242143074489919,
|
|
"learning_rate": 3.815892571341871e-05,
|
|
"loss": 0.2467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1247769221663475,
|
|
"step": 1830,
|
|
"valid_targets_mean": 5371.4,
|
|
"valid_targets_min": 2433
|
|
},
|
|
{
|
|
"epoch": 1.572407883461868,
|
|
"grad_norm": 0.4281594807768039,
|
|
"learning_rate": 3.814097623844322e-05,
|
|
"loss": 0.22,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11494001746177673,
|
|
"step": 1835,
|
|
"valid_targets_mean": 5965.5,
|
|
"valid_targets_min": 1483
|
|
},
|
|
{
|
|
"epoch": 1.5766923736075407,
|
|
"grad_norm": 0.422218804665581,
|
|
"learning_rate": 3.8122943952235576e-05,
|
|
"loss": 0.2251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09081430733203888,
|
|
"step": 1840,
|
|
"valid_targets_mean": 4779.8,
|
|
"valid_targets_min": 1111
|
|
},
|
|
{
|
|
"epoch": 1.5809768637532133,
|
|
"grad_norm": 0.5163048678096387,
|
|
"learning_rate": 3.810482893711086e-05,
|
|
"loss": 0.249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15263834595680237,
|
|
"step": 1845,
|
|
"valid_targets_mean": 5439.9,
|
|
"valid_targets_min": 1915
|
|
},
|
|
{
|
|
"epoch": 1.585261353898886,
|
|
"grad_norm": 0.5418579237620981,
|
|
"learning_rate": 3.8086631275761795e-05,
|
|
"loss": 0.218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10625886172056198,
|
|
"step": 1850,
|
|
"valid_targets_mean": 4995.9,
|
|
"valid_targets_min": 2001
|
|
},
|
|
{
|
|
"epoch": 1.5895458440445587,
|
|
"grad_norm": 0.47484588455080984,
|
|
"learning_rate": 3.806835105125837e-05,
|
|
"loss": 0.2225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10894256830215454,
|
|
"step": 1855,
|
|
"valid_targets_mean": 5525.2,
|
|
"valid_targets_min": 2468
|
|
},
|
|
{
|
|
"epoch": 1.5938303341902313,
|
|
"grad_norm": 0.5207951893762186,
|
|
"learning_rate": 3.804998834704746e-05,
|
|
"loss": 0.2289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1276807188987732,
|
|
"step": 1860,
|
|
"valid_targets_mean": 4585.6,
|
|
"valid_targets_min": 1850
|
|
},
|
|
{
|
|
"epoch": 1.598114824335904,
|
|
"grad_norm": 0.514875419878507,
|
|
"learning_rate": 3.803154324695248e-05,
|
|
"loss": 0.226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11865563690662384,
|
|
"step": 1865,
|
|
"valid_targets_mean": 4956.0,
|
|
"valid_targets_min": 1346
|
|
},
|
|
{
|
|
"epoch": 1.6023993144815767,
|
|
"grad_norm": 0.4886307508960448,
|
|
"learning_rate": 3.801301583517292e-05,
|
|
"loss": 0.2196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09256631880998611,
|
|
"step": 1870,
|
|
"valid_targets_mean": 4049.8,
|
|
"valid_targets_min": 1177
|
|
},
|
|
{
|
|
"epoch": 1.6066838046272494,
|
|
"grad_norm": 0.5480634986060973,
|
|
"learning_rate": 3.799440619628406e-05,
|
|
"loss": 0.2538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27145320177078247,
|
|
"step": 1875,
|
|
"valid_targets_mean": 6011.6,
|
|
"valid_targets_min": 2061
|
|
},
|
|
{
|
|
"epoch": 1.610968294772922,
|
|
"grad_norm": 0.48254595774152576,
|
|
"learning_rate": 3.797571441523652e-05,
|
|
"loss": 0.2272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11581829190254211,
|
|
"step": 1880,
|
|
"valid_targets_mean": 4414.2,
|
|
"valid_targets_min": 1389
|
|
},
|
|
{
|
|
"epoch": 1.6152527849185947,
|
|
"grad_norm": 0.4647566333863733,
|
|
"learning_rate": 3.795694057735586e-05,
|
|
"loss": 0.2208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1067863404750824,
|
|
"step": 1885,
|
|
"valid_targets_mean": 5442.4,
|
|
"valid_targets_min": 3921
|
|
},
|
|
{
|
|
"epoch": 1.6195372750642674,
|
|
"grad_norm": 0.5357115187409527,
|
|
"learning_rate": 3.793808476834228e-05,
|
|
"loss": 0.2255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1260157823562622,
|
|
"step": 1890,
|
|
"valid_targets_mean": 5031.4,
|
|
"valid_targets_min": 467
|
|
},
|
|
{
|
|
"epoch": 1.62382176520994,
|
|
"grad_norm": 0.44411910008494787,
|
|
"learning_rate": 3.79191470742701e-05,
|
|
"loss": 0.2272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08977937698364258,
|
|
"step": 1895,
|
|
"valid_targets_mean": 4903.1,
|
|
"valid_targets_min": 3398
|
|
},
|
|
{
|
|
"epoch": 1.6281062553556127,
|
|
"grad_norm": 0.6058848653295341,
|
|
"learning_rate": 3.790012758158749e-05,
|
|
"loss": 0.2386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1174095869064331,
|
|
"step": 1900,
|
|
"valid_targets_mean": 6338.4,
|
|
"valid_targets_min": 3480
|
|
},
|
|
{
|
|
"epoch": 1.6323907455012854,
|
|
"grad_norm": 0.46118509931806967,
|
|
"learning_rate": 3.7881026377115985e-05,
|
|
"loss": 0.2274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11545247584581375,
|
|
"step": 1905,
|
|
"valid_targets_mean": 5339.1,
|
|
"valid_targets_min": 1100
|
|
},
|
|
{
|
|
"epoch": 1.636675235646958,
|
|
"grad_norm": 0.47188924482314787,
|
|
"learning_rate": 3.786184354805014e-05,
|
|
"loss": 0.2287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1037750393152237,
|
|
"step": 1910,
|
|
"valid_targets_mean": 5157.8,
|
|
"valid_targets_min": 1471
|
|
},
|
|
{
|
|
"epoch": 1.6409597257926307,
|
|
"grad_norm": 0.4760249550925058,
|
|
"learning_rate": 3.784257918195711e-05,
|
|
"loss": 0.2221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.104103222489357,
|
|
"step": 1915,
|
|
"valid_targets_mean": 4309.8,
|
|
"valid_targets_min": 2781
|
|
},
|
|
{
|
|
"epoch": 1.6452442159383034,
|
|
"grad_norm": 0.737205338310611,
|
|
"learning_rate": 3.782323336677626e-05,
|
|
"loss": 0.2276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11420521140098572,
|
|
"step": 1920,
|
|
"valid_targets_mean": 3715.8,
|
|
"valid_targets_min": 1727
|
|
},
|
|
{
|
|
"epoch": 1.649528706083976,
|
|
"grad_norm": 0.527920669307789,
|
|
"learning_rate": 3.780380619081876e-05,
|
|
"loss": 0.2238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10416075587272644,
|
|
"step": 1925,
|
|
"valid_targets_mean": 4639.4,
|
|
"valid_targets_min": 1275
|
|
},
|
|
{
|
|
"epoch": 1.6538131962296485,
|
|
"grad_norm": 0.4736895787140529,
|
|
"learning_rate": 3.778429774276717e-05,
|
|
"loss": 0.2207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11791656166315079,
|
|
"step": 1930,
|
|
"valid_targets_mean": 5350.2,
|
|
"valid_targets_min": 2054
|
|
},
|
|
{
|
|
"epoch": 1.6580976863753212,
|
|
"grad_norm": 0.5005815340680339,
|
|
"learning_rate": 3.7764708111675056e-05,
|
|
"loss": 0.2082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10442407429218292,
|
|
"step": 1935,
|
|
"valid_targets_mean": 7008.9,
|
|
"valid_targets_min": 1658
|
|
},
|
|
{
|
|
"epoch": 1.6623821765209938,
|
|
"grad_norm": 0.5195739036849526,
|
|
"learning_rate": 3.774503738696659e-05,
|
|
"loss": 0.2413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12746137380599976,
|
|
"step": 1940,
|
|
"valid_targets_mean": 4134.6,
|
|
"valid_targets_min": 2320
|
|
},
|
|
{
|
|
"epoch": 1.6666666666666665,
|
|
"grad_norm": 0.4847015952377785,
|
|
"learning_rate": 3.772528565843609e-05,
|
|
"loss": 0.2297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13392658531665802,
|
|
"step": 1945,
|
|
"valid_targets_mean": 6174.6,
|
|
"valid_targets_min": 1862
|
|
},
|
|
{
|
|
"epoch": 1.6709511568123392,
|
|
"grad_norm": 0.4797232277084971,
|
|
"learning_rate": 3.770545301624768e-05,
|
|
"loss": 0.2372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12461329251527786,
|
|
"step": 1950,
|
|
"valid_targets_mean": 4713.5,
|
|
"valid_targets_min": 1071
|
|
},
|
|
{
|
|
"epoch": 1.6752356469580119,
|
|
"grad_norm": 0.4168554029651549,
|
|
"learning_rate": 3.7685539550934824e-05,
|
|
"loss": 0.2336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10524686425924301,
|
|
"step": 1955,
|
|
"valid_targets_mean": 5712.0,
|
|
"valid_targets_min": 1974
|
|
},
|
|
{
|
|
"epoch": 1.6795201371036845,
|
|
"grad_norm": 0.4359677674072398,
|
|
"learning_rate": 3.766554535339995e-05,
|
|
"loss": 0.2399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09910120069980621,
|
|
"step": 1960,
|
|
"valid_targets_mean": 5346.0,
|
|
"valid_targets_min": 3455
|
|
},
|
|
{
|
|
"epoch": 1.6838046272493572,
|
|
"grad_norm": 0.5279298884270155,
|
|
"learning_rate": 3.764547051491399e-05,
|
|
"loss": 0.2271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12458719313144684,
|
|
"step": 1965,
|
|
"valid_targets_mean": 5258.2,
|
|
"valid_targets_min": 1920
|
|
},
|
|
{
|
|
"epoch": 1.6880891173950299,
|
|
"grad_norm": 0.5009594685257963,
|
|
"learning_rate": 3.762531512711602e-05,
|
|
"loss": 0.2241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10473014414310455,
|
|
"step": 1970,
|
|
"valid_targets_mean": 5807.4,
|
|
"valid_targets_min": 980
|
|
},
|
|
{
|
|
"epoch": 1.6923736075407025,
|
|
"grad_norm": 0.4489171770839222,
|
|
"learning_rate": 3.76050792820128e-05,
|
|
"loss": 0.2218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09549927711486816,
|
|
"step": 1975,
|
|
"valid_targets_mean": 5834.2,
|
|
"valid_targets_min": 1428
|
|
},
|
|
{
|
|
"epoch": 1.6966580976863752,
|
|
"grad_norm": 0.4874910810752834,
|
|
"learning_rate": 3.758476307197837e-05,
|
|
"loss": 0.226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11774912476539612,
|
|
"step": 1980,
|
|
"valid_targets_mean": 5110.4,
|
|
"valid_targets_min": 1711
|
|
},
|
|
{
|
|
"epoch": 1.7009425878320479,
|
|
"grad_norm": 0.45409870644959277,
|
|
"learning_rate": 3.756436658975363e-05,
|
|
"loss": 0.2393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13284586369991302,
|
|
"step": 1985,
|
|
"valid_targets_mean": 5634.4,
|
|
"valid_targets_min": 2901
|
|
},
|
|
{
|
|
"epoch": 1.7052270779777206,
|
|
"grad_norm": 0.5009345769885237,
|
|
"learning_rate": 3.754388992844591e-05,
|
|
"loss": 0.23,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14606676995754242,
|
|
"step": 1990,
|
|
"valid_targets_mean": 5623.9,
|
|
"valid_targets_min": 2457
|
|
},
|
|
{
|
|
"epoch": 1.7095115681233932,
|
|
"grad_norm": 0.465963716185065,
|
|
"learning_rate": 3.7523333181528536e-05,
|
|
"loss": 0.235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12039413303136826,
|
|
"step": 1995,
|
|
"valid_targets_mean": 4879.8,
|
|
"valid_targets_min": 2333
|
|
},
|
|
{
|
|
"epoch": 1.713796058269066,
|
|
"grad_norm": 0.49285296134297085,
|
|
"learning_rate": 3.7502696442840424e-05,
|
|
"loss": 0.2366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11541670560836792,
|
|
"step": 2000,
|
|
"valid_targets_mean": 5865.6,
|
|
"valid_targets_min": 2986
|
|
},
|
|
{
|
|
"epoch": 1.7180805484147386,
|
|
"grad_norm": 0.48303222679304736,
|
|
"learning_rate": 3.7481979806585654e-05,
|
|
"loss": 0.231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10086794197559357,
|
|
"step": 2005,
|
|
"valid_targets_mean": 5970.1,
|
|
"valid_targets_min": 2852
|
|
},
|
|
{
|
|
"epoch": 1.7223650385604112,
|
|
"grad_norm": 0.4583874773594575,
|
|
"learning_rate": 3.746118336733301e-05,
|
|
"loss": 0.2194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14557163417339325,
|
|
"step": 2010,
|
|
"valid_targets_mean": 7418.1,
|
|
"valid_targets_min": 2612
|
|
},
|
|
{
|
|
"epoch": 1.726649528706084,
|
|
"grad_norm": 0.5839102741671757,
|
|
"learning_rate": 3.744030722001556e-05,
|
|
"loss": 0.2243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12097503244876862,
|
|
"step": 2015,
|
|
"valid_targets_mean": 5231.9,
|
|
"valid_targets_min": 1248
|
|
},
|
|
{
|
|
"epoch": 1.7309340188517566,
|
|
"grad_norm": 0.4679557970967644,
|
|
"learning_rate": 3.741935145993026e-05,
|
|
"loss": 0.2266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12258268147706985,
|
|
"step": 2020,
|
|
"valid_targets_mean": 5119.0,
|
|
"valid_targets_min": 2507
|
|
},
|
|
{
|
|
"epoch": 1.7352185089974292,
|
|
"grad_norm": 0.4916485486601471,
|
|
"learning_rate": 3.739831618273746e-05,
|
|
"loss": 0.2335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10012927651405334,
|
|
"step": 2025,
|
|
"valid_targets_mean": 4079.2,
|
|
"valid_targets_min": 1147
|
|
},
|
|
{
|
|
"epoch": 1.739502999143102,
|
|
"grad_norm": 0.5414661267399626,
|
|
"learning_rate": 3.7377201484460516e-05,
|
|
"loss": 0.2435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11908154934644699,
|
|
"step": 2030,
|
|
"valid_targets_mean": 4558.2,
|
|
"valid_targets_min": 1874
|
|
},
|
|
{
|
|
"epoch": 1.7437874892887746,
|
|
"grad_norm": 0.48355263559472655,
|
|
"learning_rate": 3.73560074614853e-05,
|
|
"loss": 0.2294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1211392879486084,
|
|
"step": 2035,
|
|
"valid_targets_mean": 5351.6,
|
|
"valid_targets_min": 3687
|
|
},
|
|
{
|
|
"epoch": 1.7480719794344473,
|
|
"grad_norm": 0.454722234661219,
|
|
"learning_rate": 3.733473421055983e-05,
|
|
"loss": 0.2253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09652171283960342,
|
|
"step": 2040,
|
|
"valid_targets_mean": 5261.1,
|
|
"valid_targets_min": 1546
|
|
},
|
|
{
|
|
"epoch": 1.75235646958012,
|
|
"grad_norm": 0.5511450722174028,
|
|
"learning_rate": 3.731338182879376e-05,
|
|
"loss": 0.2421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11845758557319641,
|
|
"step": 2045,
|
|
"valid_targets_mean": 3688.6,
|
|
"valid_targets_min": 1168
|
|
},
|
|
{
|
|
"epoch": 1.7566409597257926,
|
|
"grad_norm": 0.4282004392379052,
|
|
"learning_rate": 3.729195041365798e-05,
|
|
"loss": 0.2032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10152602195739746,
|
|
"step": 2050,
|
|
"valid_targets_mean": 5710.8,
|
|
"valid_targets_min": 3035
|
|
},
|
|
{
|
|
"epoch": 1.7609254498714653,
|
|
"grad_norm": 0.44868158141916553,
|
|
"learning_rate": 3.7270440062984155e-05,
|
|
"loss": 0.2149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09024541825056076,
|
|
"step": 2055,
|
|
"valid_targets_mean": 5247.6,
|
|
"valid_targets_min": 1394
|
|
},
|
|
{
|
|
"epoch": 1.765209940017138,
|
|
"grad_norm": 0.4723931524742898,
|
|
"learning_rate": 3.7248850874964284e-05,
|
|
"loss": 0.2109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10666035115718842,
|
|
"step": 2060,
|
|
"valid_targets_mean": 5078.1,
|
|
"valid_targets_min": 3050
|
|
},
|
|
{
|
|
"epoch": 1.7694944301628106,
|
|
"grad_norm": 0.4823220352807819,
|
|
"learning_rate": 3.7227182948150235e-05,
|
|
"loss": 0.2328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11269587278366089,
|
|
"step": 2065,
|
|
"valid_targets_mean": 5477.6,
|
|
"valid_targets_min": 3002
|
|
},
|
|
{
|
|
"epoch": 1.7737789203084833,
|
|
"grad_norm": 0.5474865566919354,
|
|
"learning_rate": 3.7205436381453325e-05,
|
|
"loss": 0.2248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12877508997917175,
|
|
"step": 2070,
|
|
"valid_targets_mean": 4364.1,
|
|
"valid_targets_min": 1699
|
|
},
|
|
{
|
|
"epoch": 1.778063410454156,
|
|
"grad_norm": 0.4596942382402779,
|
|
"learning_rate": 3.718361127414384e-05,
|
|
"loss": 0.2146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10272775590419769,
|
|
"step": 2075,
|
|
"valid_targets_mean": 5257.1,
|
|
"valid_targets_min": 3299
|
|
},
|
|
{
|
|
"epoch": 1.7823479005998286,
|
|
"grad_norm": 0.6641809993375568,
|
|
"learning_rate": 3.71617077258506e-05,
|
|
"loss": 0.2259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.125893235206604,
|
|
"step": 2080,
|
|
"valid_targets_mean": 4647.0,
|
|
"valid_targets_min": 1603
|
|
},
|
|
{
|
|
"epoch": 1.7866323907455013,
|
|
"grad_norm": 0.5016510769637406,
|
|
"learning_rate": 3.71397258365605e-05,
|
|
"loss": 0.2238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11946600675582886,
|
|
"step": 2085,
|
|
"valid_targets_mean": 5398.1,
|
|
"valid_targets_min": 2260
|
|
},
|
|
{
|
|
"epoch": 1.790916880891174,
|
|
"grad_norm": 0.40018325885445133,
|
|
"learning_rate": 3.711766570661805e-05,
|
|
"loss": 0.2159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09395642578601837,
|
|
"step": 2090,
|
|
"valid_targets_mean": 5911.4,
|
|
"valid_targets_min": 2664
|
|
},
|
|
{
|
|
"epoch": 1.7952013710368466,
|
|
"grad_norm": 0.48624128496277386,
|
|
"learning_rate": 3.70955274367249e-05,
|
|
"loss": 0.2371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1076192706823349,
|
|
"step": 2095,
|
|
"valid_targets_mean": 5477.1,
|
|
"valid_targets_min": 1540
|
|
},
|
|
{
|
|
"epoch": 1.7994858611825193,
|
|
"grad_norm": 0.6049349014182495,
|
|
"learning_rate": 3.7073311127939434e-05,
|
|
"loss": 0.2469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15382909774780273,
|
|
"step": 2100,
|
|
"valid_targets_mean": 4850.2,
|
|
"valid_targets_min": 2910
|
|
},
|
|
{
|
|
"epoch": 1.803770351328192,
|
|
"grad_norm": 0.4602861225585512,
|
|
"learning_rate": 3.705101688167625e-05,
|
|
"loss": 0.221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10993652045726776,
|
|
"step": 2105,
|
|
"valid_targets_mean": 5117.2,
|
|
"valid_targets_min": 3344
|
|
},
|
|
{
|
|
"epoch": 1.8080548414738646,
|
|
"grad_norm": 0.4573276660119421,
|
|
"learning_rate": 3.702864479970572e-05,
|
|
"loss": 0.2336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09111273288726807,
|
|
"step": 2110,
|
|
"valid_targets_mean": 4376.4,
|
|
"valid_targets_min": 1914
|
|
},
|
|
{
|
|
"epoch": 1.8123393316195373,
|
|
"grad_norm": 0.45701318648217,
|
|
"learning_rate": 3.7006194984153536e-05,
|
|
"loss": 0.2133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09573175013065338,
|
|
"step": 2115,
|
|
"valid_targets_mean": 5533.2,
|
|
"valid_targets_min": 3051
|
|
},
|
|
{
|
|
"epoch": 1.81662382176521,
|
|
"grad_norm": 0.5098679742063379,
|
|
"learning_rate": 3.6983667537500226e-05,
|
|
"loss": 0.237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13827547430992126,
|
|
"step": 2120,
|
|
"valid_targets_mean": 4585.8,
|
|
"valid_targets_min": 2714
|
|
},
|
|
{
|
|
"epoch": 1.8209083119108826,
|
|
"grad_norm": 0.46205848208851363,
|
|
"learning_rate": 3.696106256258071e-05,
|
|
"loss": 0.2403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10701566934585571,
|
|
"step": 2125,
|
|
"valid_targets_mean": 4941.9,
|
|
"valid_targets_min": 1446
|
|
},
|
|
{
|
|
"epoch": 1.8251928020565553,
|
|
"grad_norm": 0.5012775704899749,
|
|
"learning_rate": 3.6938380162583784e-05,
|
|
"loss": 0.2085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1033095270395279,
|
|
"step": 2130,
|
|
"valid_targets_mean": 5638.0,
|
|
"valid_targets_min": 1965
|
|
},
|
|
{
|
|
"epoch": 1.829477292202228,
|
|
"grad_norm": 0.4747025798124583,
|
|
"learning_rate": 3.6915620441051706e-05,
|
|
"loss": 0.229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1287415474653244,
|
|
"step": 2135,
|
|
"valid_targets_mean": 5183.9,
|
|
"valid_targets_min": 2699
|
|
},
|
|
{
|
|
"epoch": 1.8337617823479007,
|
|
"grad_norm": 0.5375748900531672,
|
|
"learning_rate": 3.689278350187969e-05,
|
|
"loss": 0.2286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1322089433670044,
|
|
"step": 2140,
|
|
"valid_targets_mean": 5262.1,
|
|
"valid_targets_min": 2255
|
|
},
|
|
{
|
|
"epoch": 1.8380462724935733,
|
|
"grad_norm": 0.520531827518314,
|
|
"learning_rate": 3.686986944931544e-05,
|
|
"loss": 0.2324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12290707975625992,
|
|
"step": 2145,
|
|
"valid_targets_mean": 5127.6,
|
|
"valid_targets_min": 2904
|
|
},
|
|
{
|
|
"epoch": 1.842330762639246,
|
|
"grad_norm": 0.468452706681642,
|
|
"learning_rate": 3.684687838795867e-05,
|
|
"loss": 0.2211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10318892449140549,
|
|
"step": 2150,
|
|
"valid_targets_mean": 5195.9,
|
|
"valid_targets_min": 2905
|
|
},
|
|
{
|
|
"epoch": 1.8466152527849187,
|
|
"grad_norm": 0.4894073955207035,
|
|
"learning_rate": 3.682381042276062e-05,
|
|
"loss": 0.2549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13634109497070312,
|
|
"step": 2155,
|
|
"valid_targets_mean": 4727.5,
|
|
"valid_targets_min": 1388
|
|
},
|
|
{
|
|
"epoch": 1.8508997429305913,
|
|
"grad_norm": 0.5063303905317564,
|
|
"learning_rate": 3.68006656590236e-05,
|
|
"loss": 0.2355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11118605732917786,
|
|
"step": 2160,
|
|
"valid_targets_mean": 5055.9,
|
|
"valid_targets_min": 3056
|
|
},
|
|
{
|
|
"epoch": 1.855184233076264,
|
|
"grad_norm": 0.413860193373722,
|
|
"learning_rate": 3.67774442024005e-05,
|
|
"loss": 0.2221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10318203270435333,
|
|
"step": 2165,
|
|
"valid_targets_mean": 6153.2,
|
|
"valid_targets_min": 2380
|
|
},
|
|
{
|
|
"epoch": 1.8594687232219367,
|
|
"grad_norm": 0.5664320802607056,
|
|
"learning_rate": 3.6754146158894274e-05,
|
|
"loss": 0.2222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13304372131824493,
|
|
"step": 2170,
|
|
"valid_targets_mean": 4987.5,
|
|
"valid_targets_min": 1806
|
|
},
|
|
{
|
|
"epoch": 1.8637532133676094,
|
|
"grad_norm": 0.49595955527913155,
|
|
"learning_rate": 3.673077163485753e-05,
|
|
"loss": 0.22,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12880939245224,
|
|
"step": 2175,
|
|
"valid_targets_mean": 5107.0,
|
|
"valid_targets_min": 2167
|
|
},
|
|
{
|
|
"epoch": 1.868037703513282,
|
|
"grad_norm": 0.46081595732819347,
|
|
"learning_rate": 3.670732073699196e-05,
|
|
"loss": 0.2227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11992570012807846,
|
|
"step": 2180,
|
|
"valid_targets_mean": 5616.6,
|
|
"valid_targets_min": 2212
|
|
},
|
|
{
|
|
"epoch": 1.8723221936589547,
|
|
"grad_norm": 0.452165893709852,
|
|
"learning_rate": 3.6683793572347905e-05,
|
|
"loss": 0.2058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08756551146507263,
|
|
"step": 2185,
|
|
"valid_targets_mean": 4324.8,
|
|
"valid_targets_min": 2018
|
|
},
|
|
{
|
|
"epoch": 1.8766066838046274,
|
|
"grad_norm": 0.598100612012415,
|
|
"learning_rate": 3.666019024832387e-05,
|
|
"loss": 0.232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12989911437034607,
|
|
"step": 2190,
|
|
"valid_targets_mean": 4225.9,
|
|
"valid_targets_min": 2347
|
|
},
|
|
{
|
|
"epoch": 1.8808911739503,
|
|
"grad_norm": 0.5526053715780299,
|
|
"learning_rate": 3.6636510872665995e-05,
|
|
"loss": 0.22,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12627767026424408,
|
|
"step": 2195,
|
|
"valid_targets_mean": 4753.0,
|
|
"valid_targets_min": 2165
|
|
},
|
|
{
|
|
"epoch": 1.8851756640959727,
|
|
"grad_norm": 0.4847762020789575,
|
|
"learning_rate": 3.66127555534676e-05,
|
|
"loss": 0.2311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.132424995303154,
|
|
"step": 2200,
|
|
"valid_targets_mean": 5307.9,
|
|
"valid_targets_min": 1651
|
|
},
|
|
{
|
|
"epoch": 1.8894601542416454,
|
|
"grad_norm": 0.5046411208346194,
|
|
"learning_rate": 3.6588924399168664e-05,
|
|
"loss": 0.2219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09791286289691925,
|
|
"step": 2205,
|
|
"valid_targets_mean": 4715.8,
|
|
"valid_targets_min": 439
|
|
},
|
|
{
|
|
"epoch": 1.893744644387318,
|
|
"grad_norm": 0.5338025713849385,
|
|
"learning_rate": 3.656501751855537e-05,
|
|
"loss": 0.2326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1205880269408226,
|
|
"step": 2210,
|
|
"valid_targets_mean": 5110.4,
|
|
"valid_targets_min": 2460
|
|
},
|
|
{
|
|
"epoch": 1.8980291345329907,
|
|
"grad_norm": 0.511539137370197,
|
|
"learning_rate": 3.6541035020759555e-05,
|
|
"loss": 0.2334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10615777969360352,
|
|
"step": 2215,
|
|
"valid_targets_mean": 4384.9,
|
|
"valid_targets_min": 1217
|
|
},
|
|
{
|
|
"epoch": 1.9023136246786634,
|
|
"grad_norm": 0.5360317696651323,
|
|
"learning_rate": 3.651697701525825e-05,
|
|
"loss": 0.2329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09040658921003342,
|
|
"step": 2220,
|
|
"valid_targets_mean": 4914.2,
|
|
"valid_targets_min": 1091
|
|
},
|
|
{
|
|
"epoch": 1.906598114824336,
|
|
"grad_norm": 0.5170648753277196,
|
|
"learning_rate": 3.6492843611873164e-05,
|
|
"loss": 0.2196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1084122285246849,
|
|
"step": 2225,
|
|
"valid_targets_mean": 4081.0,
|
|
"valid_targets_min": 512
|
|
},
|
|
{
|
|
"epoch": 1.9108826049700087,
|
|
"grad_norm": 0.4745365155640128,
|
|
"learning_rate": 3.64686349207702e-05,
|
|
"loss": 0.228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10846118628978729,
|
|
"step": 2230,
|
|
"valid_targets_mean": 4990.8,
|
|
"valid_targets_min": 3201
|
|
},
|
|
{
|
|
"epoch": 1.9151670951156814,
|
|
"grad_norm": 0.4758545058789654,
|
|
"learning_rate": 3.6444351052458924e-05,
|
|
"loss": 0.2399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10121801495552063,
|
|
"step": 2235,
|
|
"valid_targets_mean": 4110.4,
|
|
"valid_targets_min": 1497
|
|
},
|
|
{
|
|
"epoch": 1.919451585261354,
|
|
"grad_norm": 0.4514826598697308,
|
|
"learning_rate": 3.641999211779208e-05,
|
|
"loss": 0.2068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10626526921987534,
|
|
"step": 2240,
|
|
"valid_targets_mean": 6423.8,
|
|
"valid_targets_min": 2306
|
|
},
|
|
{
|
|
"epoch": 1.9237360754070265,
|
|
"grad_norm": 0.49670895196681053,
|
|
"learning_rate": 3.6395558227965094e-05,
|
|
"loss": 0.2252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11679775267839432,
|
|
"step": 2245,
|
|
"valid_targets_mean": 4890.6,
|
|
"valid_targets_min": 1631
|
|
},
|
|
{
|
|
"epoch": 1.9280205655526992,
|
|
"grad_norm": 0.4838984020881071,
|
|
"learning_rate": 3.637104949451554e-05,
|
|
"loss": 0.2198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09723664820194244,
|
|
"step": 2250,
|
|
"valid_targets_mean": 4169.4,
|
|
"valid_targets_min": 1639
|
|
},
|
|
{
|
|
"epoch": 1.9323050556983719,
|
|
"grad_norm": 0.44509857111678414,
|
|
"learning_rate": 3.6346466029322636e-05,
|
|
"loss": 0.2262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11195631325244904,
|
|
"step": 2255,
|
|
"valid_targets_mean": 4828.4,
|
|
"valid_targets_min": 2573
|
|
},
|
|
{
|
|
"epoch": 1.9365895458440445,
|
|
"grad_norm": 0.4858864351791111,
|
|
"learning_rate": 3.632180794460676e-05,
|
|
"loss": 0.23,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11188924312591553,
|
|
"step": 2260,
|
|
"valid_targets_mean": 4487.5,
|
|
"valid_targets_min": 1524
|
|
},
|
|
{
|
|
"epoch": 1.9408740359897172,
|
|
"grad_norm": 0.413600120478062,
|
|
"learning_rate": 3.629707535292891e-05,
|
|
"loss": 0.2048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10609625279903412,
|
|
"step": 2265,
|
|
"valid_targets_mean": 6128.2,
|
|
"valid_targets_min": 4528
|
|
},
|
|
{
|
|
"epoch": 1.9451585261353899,
|
|
"grad_norm": 0.4494778547641328,
|
|
"learning_rate": 3.627226836719019e-05,
|
|
"loss": 0.2179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11913739144802094,
|
|
"step": 2270,
|
|
"valid_targets_mean": 4749.6,
|
|
"valid_targets_min": 1433
|
|
},
|
|
{
|
|
"epoch": 1.9494430162810625,
|
|
"grad_norm": 0.5304785163709616,
|
|
"learning_rate": 3.624738710063132e-05,
|
|
"loss": 0.2205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13347581028938293,
|
|
"step": 2275,
|
|
"valid_targets_mean": 4066.4,
|
|
"valid_targets_min": 2419
|
|
},
|
|
{
|
|
"epoch": 1.9537275064267352,
|
|
"grad_norm": 0.45353166493326297,
|
|
"learning_rate": 3.622243166683209e-05,
|
|
"loss": 0.2252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0952456071972847,
|
|
"step": 2280,
|
|
"valid_targets_mean": 4990.4,
|
|
"valid_targets_min": 4051
|
|
},
|
|
{
|
|
"epoch": 1.9580119965724079,
|
|
"grad_norm": 0.4539289969108042,
|
|
"learning_rate": 3.619740217971086e-05,
|
|
"loss": 0.2172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09723306447267532,
|
|
"step": 2285,
|
|
"valid_targets_mean": 5118.4,
|
|
"valid_targets_min": 1774
|
|
},
|
|
{
|
|
"epoch": 1.9622964867180805,
|
|
"grad_norm": 0.46233942047553034,
|
|
"learning_rate": 3.617229875352403e-05,
|
|
"loss": 0.23,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09375040233135223,
|
|
"step": 2290,
|
|
"valid_targets_mean": 4558.5,
|
|
"valid_targets_min": 1897
|
|
},
|
|
{
|
|
"epoch": 1.9665809768637532,
|
|
"grad_norm": 0.4218514068432325,
|
|
"learning_rate": 3.614712150286552e-05,
|
|
"loss": 0.2171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11099795252084732,
|
|
"step": 2295,
|
|
"valid_targets_mean": 5717.6,
|
|
"valid_targets_min": 2203
|
|
},
|
|
{
|
|
"epoch": 1.9708654670094259,
|
|
"grad_norm": 0.45789692025390966,
|
|
"learning_rate": 3.6121870542666256e-05,
|
|
"loss": 0.2293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13194692134857178,
|
|
"step": 2300,
|
|
"valid_targets_mean": 6161.2,
|
|
"valid_targets_min": 2376
|
|
},
|
|
{
|
|
"epoch": 1.9751499571550986,
|
|
"grad_norm": 0.47632168461685737,
|
|
"learning_rate": 3.609654598819364e-05,
|
|
"loss": 0.233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1115507110953331,
|
|
"step": 2305,
|
|
"valid_targets_mean": 5124.5,
|
|
"valid_targets_min": 2677
|
|
},
|
|
{
|
|
"epoch": 1.9794344473007712,
|
|
"grad_norm": 0.45583728590859696,
|
|
"learning_rate": 3.6071147955051016e-05,
|
|
"loss": 0.2177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10582319647073746,
|
|
"step": 2310,
|
|
"valid_targets_mean": 5469.5,
|
|
"valid_targets_min": 2316
|
|
},
|
|
{
|
|
"epoch": 1.983718937446444,
|
|
"grad_norm": 0.48697460061754,
|
|
"learning_rate": 3.6045676559177156e-05,
|
|
"loss": 0.231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11527565121650696,
|
|
"step": 2315,
|
|
"valid_targets_mean": 4943.4,
|
|
"valid_targets_min": 1153
|
|
},
|
|
{
|
|
"epoch": 1.9880034275921166,
|
|
"grad_norm": 0.4731822430894365,
|
|
"learning_rate": 3.60201319168457e-05,
|
|
"loss": 0.2281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10451348125934601,
|
|
"step": 2320,
|
|
"valid_targets_mean": 4202.4,
|
|
"valid_targets_min": 1502
|
|
},
|
|
{
|
|
"epoch": 1.9922879177377892,
|
|
"grad_norm": 0.4556151371510566,
|
|
"learning_rate": 3.599451414466469e-05,
|
|
"loss": 0.2269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10072088986635208,
|
|
"step": 2325,
|
|
"valid_targets_mean": 5196.8,
|
|
"valid_targets_min": 1988
|
|
},
|
|
{
|
|
"epoch": 1.996572407883462,
|
|
"grad_norm": 0.4193240280740944,
|
|
"learning_rate": 3.5968823359575965e-05,
|
|
"loss": 0.2029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10769035667181015,
|
|
"step": 2330,
|
|
"valid_targets_mean": 6380.5,
|
|
"valid_targets_min": 2703
|
|
},
|
|
{
|
|
"epoch": 2.0008568980291344,
|
|
"grad_norm": 0.4798142878195261,
|
|
"learning_rate": 3.594305967885466e-05,
|
|
"loss": 0.2359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13428811728954315,
|
|
"step": 2335,
|
|
"valid_targets_mean": 6203.6,
|
|
"valid_targets_min": 2349
|
|
},
|
|
{
|
|
"epoch": 2.005141388174807,
|
|
"grad_norm": 0.4839034707974094,
|
|
"learning_rate": 3.591722322010869e-05,
|
|
"loss": 0.1998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0971929281949997,
|
|
"step": 2340,
|
|
"valid_targets_mean": 5707.4,
|
|
"valid_targets_min": 2730
|
|
},
|
|
{
|
|
"epoch": 2.0094258783204797,
|
|
"grad_norm": 0.48112037071217567,
|
|
"learning_rate": 3.589131410127817e-05,
|
|
"loss": 0.2019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10458128899335861,
|
|
"step": 2345,
|
|
"valid_targets_mean": 5635.6,
|
|
"valid_targets_min": 2717
|
|
},
|
|
{
|
|
"epoch": 2.0137103684661524,
|
|
"grad_norm": 0.4420284708052282,
|
|
"learning_rate": 3.5865332440634906e-05,
|
|
"loss": 0.206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07776182889938354,
|
|
"step": 2350,
|
|
"valid_targets_mean": 5238.4,
|
|
"valid_targets_min": 2509
|
|
},
|
|
{
|
|
"epoch": 2.017994858611825,
|
|
"grad_norm": 0.4930492802262695,
|
|
"learning_rate": 3.583927835678186e-05,
|
|
"loss": 0.2094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07519136369228363,
|
|
"step": 2355,
|
|
"valid_targets_mean": 3908.9,
|
|
"valid_targets_min": 1777
|
|
},
|
|
{
|
|
"epoch": 2.0222793487574977,
|
|
"grad_norm": 0.48606257170744127,
|
|
"learning_rate": 3.581315196865257e-05,
|
|
"loss": 0.2024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10364539921283722,
|
|
"step": 2360,
|
|
"valid_targets_mean": 5116.8,
|
|
"valid_targets_min": 3772
|
|
},
|
|
{
|
|
"epoch": 2.0265638389031704,
|
|
"grad_norm": 0.5146962374677706,
|
|
"learning_rate": 3.5786953395510664e-05,
|
|
"loss": 0.206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10978688299655914,
|
|
"step": 2365,
|
|
"valid_targets_mean": 4928.8,
|
|
"valid_targets_min": 1727
|
|
},
|
|
{
|
|
"epoch": 2.030848329048843,
|
|
"grad_norm": 0.4378443621517526,
|
|
"learning_rate": 3.576068275694927e-05,
|
|
"loss": 0.2033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08051362633705139,
|
|
"step": 2370,
|
|
"valid_targets_mean": 6912.5,
|
|
"valid_targets_min": 3311
|
|
},
|
|
{
|
|
"epoch": 2.0351328191945157,
|
|
"grad_norm": 0.5386581392854123,
|
|
"learning_rate": 3.573434017289048e-05,
|
|
"loss": 0.2066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1055622398853302,
|
|
"step": 2375,
|
|
"valid_targets_mean": 4388.2,
|
|
"valid_targets_min": 1279
|
|
},
|
|
{
|
|
"epoch": 2.0394173093401884,
|
|
"grad_norm": 0.5240571401301507,
|
|
"learning_rate": 3.570792576358482e-05,
|
|
"loss": 0.2064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09677842259407043,
|
|
"step": 2380,
|
|
"valid_targets_mean": 4769.2,
|
|
"valid_targets_min": 1944
|
|
},
|
|
{
|
|
"epoch": 2.043701799485861,
|
|
"grad_norm": 0.478161437533347,
|
|
"learning_rate": 3.568143964961069e-05,
|
|
"loss": 0.1902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08123594522476196,
|
|
"step": 2385,
|
|
"valid_targets_mean": 6699.8,
|
|
"valid_targets_min": 4104
|
|
},
|
|
{
|
|
"epoch": 2.0479862896315337,
|
|
"grad_norm": 0.4583846295744667,
|
|
"learning_rate": 3.56548819518738e-05,
|
|
"loss": 0.2048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11680682003498077,
|
|
"step": 2390,
|
|
"valid_targets_mean": 5929.0,
|
|
"valid_targets_min": 1390
|
|
},
|
|
{
|
|
"epoch": 2.0522707797772064,
|
|
"grad_norm": 0.4488261353496457,
|
|
"learning_rate": 3.562825279160663e-05,
|
|
"loss": 0.1917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10441552102565765,
|
|
"step": 2395,
|
|
"valid_targets_mean": 5957.9,
|
|
"valid_targets_min": 1250
|
|
},
|
|
{
|
|
"epoch": 2.056555269922879,
|
|
"grad_norm": 0.5419790810037427,
|
|
"learning_rate": 3.560155229036789e-05,
|
|
"loss": 0.2091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1211346834897995,
|
|
"step": 2400,
|
|
"valid_targets_mean": 4245.8,
|
|
"valid_targets_min": 3184
|
|
},
|
|
{
|
|
"epoch": 2.0608397600685517,
|
|
"grad_norm": 0.48698288645147914,
|
|
"learning_rate": 3.557478057004196e-05,
|
|
"loss": 0.2342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12603498995304108,
|
|
"step": 2405,
|
|
"valid_targets_mean": 4773.8,
|
|
"valid_targets_min": 1426
|
|
},
|
|
{
|
|
"epoch": 2.0651242502142244,
|
|
"grad_norm": 0.45972267451212057,
|
|
"learning_rate": 3.5547937752838296e-05,
|
|
"loss": 0.231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12184608727693558,
|
|
"step": 2410,
|
|
"valid_targets_mean": 5739.6,
|
|
"valid_targets_min": 1705
|
|
},
|
|
{
|
|
"epoch": 2.069408740359897,
|
|
"grad_norm": 0.5052359876559305,
|
|
"learning_rate": 3.552102396129093e-05,
|
|
"loss": 0.2117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12169013917446136,
|
|
"step": 2415,
|
|
"valid_targets_mean": 4769.2,
|
|
"valid_targets_min": 1612
|
|
},
|
|
{
|
|
"epoch": 2.0736932305055698,
|
|
"grad_norm": 0.46583683006954374,
|
|
"learning_rate": 3.549403931825787e-05,
|
|
"loss": 0.2044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09529776126146317,
|
|
"step": 2420,
|
|
"valid_targets_mean": 6170.0,
|
|
"valid_targets_min": 2466
|
|
},
|
|
{
|
|
"epoch": 2.0779777206512424,
|
|
"grad_norm": 0.5103020607049007,
|
|
"learning_rate": 3.546698394692054e-05,
|
|
"loss": 0.208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10125349462032318,
|
|
"step": 2425,
|
|
"valid_targets_mean": 5250.1,
|
|
"valid_targets_min": 3092
|
|
},
|
|
{
|
|
"epoch": 2.082262210796915,
|
|
"grad_norm": 0.6724602759927233,
|
|
"learning_rate": 3.543985797078326e-05,
|
|
"loss": 0.2076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09732373058795929,
|
|
"step": 2430,
|
|
"valid_targets_mean": 4869.6,
|
|
"valid_targets_min": 1902
|
|
},
|
|
{
|
|
"epoch": 2.0865467009425878,
|
|
"grad_norm": 0.4710418734332259,
|
|
"learning_rate": 3.5412661513672635e-05,
|
|
"loss": 0.2085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07645601034164429,
|
|
"step": 2435,
|
|
"valid_targets_mean": 5544.9,
|
|
"valid_targets_min": 2866
|
|
},
|
|
{
|
|
"epoch": 2.0908311910882604,
|
|
"grad_norm": 0.5696628205752358,
|
|
"learning_rate": 3.538539469973699e-05,
|
|
"loss": 0.2117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12250518798828125,
|
|
"step": 2440,
|
|
"valid_targets_mean": 3794.1,
|
|
"valid_targets_min": 1345
|
|
},
|
|
{
|
|
"epoch": 2.095115681233933,
|
|
"grad_norm": 0.4713107612195895,
|
|
"learning_rate": 3.535805765344587e-05,
|
|
"loss": 0.202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09606672078371048,
|
|
"step": 2445,
|
|
"valid_targets_mean": 4874.5,
|
|
"valid_targets_min": 1698
|
|
},
|
|
{
|
|
"epoch": 2.0994001713796058,
|
|
"grad_norm": 0.5120423087865809,
|
|
"learning_rate": 3.533065049958936e-05,
|
|
"loss": 0.2165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10608918964862823,
|
|
"step": 2450,
|
|
"valid_targets_mean": 4431.9,
|
|
"valid_targets_min": 1820
|
|
},
|
|
{
|
|
"epoch": 2.1036846615252784,
|
|
"grad_norm": 0.44536739766857203,
|
|
"learning_rate": 3.530317336327761e-05,
|
|
"loss": 0.2309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1180947870016098,
|
|
"step": 2455,
|
|
"valid_targets_mean": 6261.0,
|
|
"valid_targets_min": 2680
|
|
},
|
|
{
|
|
"epoch": 2.107969151670951,
|
|
"grad_norm": 0.48777347702473567,
|
|
"learning_rate": 3.5275626369940226e-05,
|
|
"loss": 0.199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09988237917423248,
|
|
"step": 2460,
|
|
"valid_targets_mean": 5259.1,
|
|
"valid_targets_min": 2737
|
|
},
|
|
{
|
|
"epoch": 2.112253641816624,
|
|
"grad_norm": 0.53585728228521,
|
|
"learning_rate": 3.5248009645325715e-05,
|
|
"loss": 0.2111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1207893043756485,
|
|
"step": 2465,
|
|
"valid_targets_mean": 4266.6,
|
|
"valid_targets_min": 1976
|
|
},
|
|
{
|
|
"epoch": 2.1165381319622965,
|
|
"grad_norm": 0.5083574721252143,
|
|
"learning_rate": 3.522032331550088e-05,
|
|
"loss": 0.2016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10587166994810104,
|
|
"step": 2470,
|
|
"valid_targets_mean": 4353.4,
|
|
"valid_targets_min": 1318
|
|
},
|
|
{
|
|
"epoch": 2.120822622107969,
|
|
"grad_norm": 0.8426373316811898,
|
|
"learning_rate": 3.519256750685027e-05,
|
|
"loss": 0.2006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0982590913772583,
|
|
"step": 2475,
|
|
"valid_targets_mean": 5417.4,
|
|
"valid_targets_min": 1229
|
|
},
|
|
{
|
|
"epoch": 2.125107112253642,
|
|
"grad_norm": 0.4797781453173742,
|
|
"learning_rate": 3.5164742346075586e-05,
|
|
"loss": 0.2058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1159694492816925,
|
|
"step": 2480,
|
|
"valid_targets_mean": 5555.8,
|
|
"valid_targets_min": 2557
|
|
},
|
|
{
|
|
"epoch": 2.1293916023993145,
|
|
"grad_norm": 0.5759990368448268,
|
|
"learning_rate": 3.5136847960195133e-05,
|
|
"loss": 0.21,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1255708783864975,
|
|
"step": 2485,
|
|
"valid_targets_mean": 5154.9,
|
|
"valid_targets_min": 3061
|
|
},
|
|
{
|
|
"epoch": 2.133676092544987,
|
|
"grad_norm": 0.5142715372317823,
|
|
"learning_rate": 3.5108884476543204e-05,
|
|
"loss": 0.2031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11523468792438507,
|
|
"step": 2490,
|
|
"valid_targets_mean": 5533.1,
|
|
"valid_targets_min": 3318
|
|
},
|
|
{
|
|
"epoch": 2.13796058269066,
|
|
"grad_norm": 0.5401336125170897,
|
|
"learning_rate": 3.508085202276952e-05,
|
|
"loss": 0.1988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1292748749256134,
|
|
"step": 2495,
|
|
"valid_targets_mean": 4863.1,
|
|
"valid_targets_min": 1367
|
|
},
|
|
{
|
|
"epoch": 2.1422450728363325,
|
|
"grad_norm": 0.5151114715150518,
|
|
"learning_rate": 3.505275072683864e-05,
|
|
"loss": 0.2023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08936254680156708,
|
|
"step": 2500,
|
|
"valid_targets_mean": 5512.5,
|
|
"valid_targets_min": 2314
|
|
},
|
|
{
|
|
"epoch": 2.146529562982005,
|
|
"grad_norm": 0.44524209721883445,
|
|
"learning_rate": 3.502458071702938e-05,
|
|
"loss": 0.2101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08574520796537399,
|
|
"step": 2505,
|
|
"valid_targets_mean": 5010.2,
|
|
"valid_targets_min": 3171
|
|
},
|
|
{
|
|
"epoch": 2.150814053127678,
|
|
"grad_norm": 0.4745350777906336,
|
|
"learning_rate": 3.4996342121934225e-05,
|
|
"loss": 0.2181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12945052981376648,
|
|
"step": 2510,
|
|
"valid_targets_mean": 6108.2,
|
|
"valid_targets_min": 2530
|
|
},
|
|
{
|
|
"epoch": 2.1550985432733505,
|
|
"grad_norm": 0.5461131598088633,
|
|
"learning_rate": 3.4968035070458745e-05,
|
|
"loss": 0.217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13106781244277954,
|
|
"step": 2515,
|
|
"valid_targets_mean": 3841.6,
|
|
"valid_targets_min": 1190
|
|
},
|
|
{
|
|
"epoch": 2.159383033419023,
|
|
"grad_norm": 0.4937762829697131,
|
|
"learning_rate": 3.4939659691821005e-05,
|
|
"loss": 0.2038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11381436884403229,
|
|
"step": 2520,
|
|
"valid_targets_mean": 6040.9,
|
|
"valid_targets_min": 3076
|
|
},
|
|
{
|
|
"epoch": 2.163667523564696,
|
|
"grad_norm": 0.47189409527732573,
|
|
"learning_rate": 3.491121611555096e-05,
|
|
"loss": 0.2089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08889122307300568,
|
|
"step": 2525,
|
|
"valid_targets_mean": 5620.0,
|
|
"valid_targets_min": 3232
|
|
},
|
|
{
|
|
"epoch": 2.1679520137103685,
|
|
"grad_norm": 0.6019995157437676,
|
|
"learning_rate": 3.488270447148991e-05,
|
|
"loss": 0.226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13497303426265717,
|
|
"step": 2530,
|
|
"valid_targets_mean": 3912.2,
|
|
"valid_targets_min": 2096
|
|
},
|
|
{
|
|
"epoch": 2.172236503856041,
|
|
"grad_norm": 0.48550078769805255,
|
|
"learning_rate": 3.485412488978984e-05,
|
|
"loss": 0.2175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1056138426065445,
|
|
"step": 2535,
|
|
"valid_targets_mean": 4961.4,
|
|
"valid_targets_min": 1818
|
|
},
|
|
{
|
|
"epoch": 2.176520994001714,
|
|
"grad_norm": 0.4629235375663915,
|
|
"learning_rate": 3.4825477500912894e-05,
|
|
"loss": 0.2,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0837879553437233,
|
|
"step": 2540,
|
|
"valid_targets_mean": 4675.6,
|
|
"valid_targets_min": 980
|
|
},
|
|
{
|
|
"epoch": 2.1808054841473865,
|
|
"grad_norm": 0.4928325443486428,
|
|
"learning_rate": 3.479676243563072e-05,
|
|
"loss": 0.2185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11683500558137894,
|
|
"step": 2545,
|
|
"valid_targets_mean": 5289.5,
|
|
"valid_targets_min": 3087
|
|
},
|
|
{
|
|
"epoch": 2.185089974293059,
|
|
"grad_norm": 0.4893658082842753,
|
|
"learning_rate": 3.476797982502391e-05,
|
|
"loss": 0.2096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11225201934576035,
|
|
"step": 2550,
|
|
"valid_targets_mean": 4724.6,
|
|
"valid_targets_min": 3685
|
|
},
|
|
{
|
|
"epoch": 2.189374464438732,
|
|
"grad_norm": 0.5695749570671081,
|
|
"learning_rate": 3.4739129800481406e-05,
|
|
"loss": 0.1905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0752904862165451,
|
|
"step": 2555,
|
|
"valid_targets_mean": 4190.8,
|
|
"valid_targets_min": 1558
|
|
},
|
|
{
|
|
"epoch": 2.1936589545844045,
|
|
"grad_norm": 0.5777670022037187,
|
|
"learning_rate": 3.4710212493699867e-05,
|
|
"loss": 0.2156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1446160078048706,
|
|
"step": 2560,
|
|
"valid_targets_mean": 5293.1,
|
|
"valid_targets_min": 2412
|
|
},
|
|
{
|
|
"epoch": 2.197943444730077,
|
|
"grad_norm": 0.4326574462697901,
|
|
"learning_rate": 3.4681228036683085e-05,
|
|
"loss": 0.209,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07848373800516129,
|
|
"step": 2565,
|
|
"valid_targets_mean": 5972.1,
|
|
"valid_targets_min": 1630
|
|
},
|
|
{
|
|
"epoch": 2.20222793487575,
|
|
"grad_norm": 0.4840416081525319,
|
|
"learning_rate": 3.465217656174141e-05,
|
|
"loss": 0.203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10920464247465134,
|
|
"step": 2570,
|
|
"valid_targets_mean": 4751.2,
|
|
"valid_targets_min": 1185
|
|
},
|
|
{
|
|
"epoch": 2.2065124250214225,
|
|
"grad_norm": 0.47442024176086667,
|
|
"learning_rate": 3.462305820149109e-05,
|
|
"loss": 0.2023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08478060364723206,
|
|
"step": 2575,
|
|
"valid_targets_mean": 4709.0,
|
|
"valid_targets_min": 1991
|
|
},
|
|
{
|
|
"epoch": 2.210796915167095,
|
|
"grad_norm": 0.5073332229567192,
|
|
"learning_rate": 3.4593873088853716e-05,
|
|
"loss": 0.2133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11226142197847366,
|
|
"step": 2580,
|
|
"valid_targets_mean": 4234.8,
|
|
"valid_targets_min": 1878
|
|
},
|
|
{
|
|
"epoch": 2.215081405312768,
|
|
"grad_norm": 0.4721289019762803,
|
|
"learning_rate": 3.4564621357055573e-05,
|
|
"loss": 0.2081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09325413405895233,
|
|
"step": 2585,
|
|
"valid_targets_mean": 5800.1,
|
|
"valid_targets_min": 3763
|
|
},
|
|
{
|
|
"epoch": 2.2193658954584405,
|
|
"grad_norm": 0.48760680488450414,
|
|
"learning_rate": 3.453530313962709e-05,
|
|
"loss": 0.2135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09359925240278244,
|
|
"step": 2590,
|
|
"valid_targets_mean": 4446.0,
|
|
"valid_targets_min": 1774
|
|
},
|
|
{
|
|
"epoch": 2.223650385604113,
|
|
"grad_norm": 0.532150303219305,
|
|
"learning_rate": 3.450591857040215e-05,
|
|
"loss": 0.2104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10448680818080902,
|
|
"step": 2595,
|
|
"valid_targets_mean": 4132.4,
|
|
"valid_targets_min": 1109
|
|
},
|
|
{
|
|
"epoch": 2.227934875749786,
|
|
"grad_norm": 0.4818587245261894,
|
|
"learning_rate": 3.4476467783517576e-05,
|
|
"loss": 0.2011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08007673174142838,
|
|
"step": 2600,
|
|
"valid_targets_mean": 4857.9,
|
|
"valid_targets_min": 1727
|
|
},
|
|
{
|
|
"epoch": 2.2322193658954586,
|
|
"grad_norm": 0.5052456440668673,
|
|
"learning_rate": 3.4446950913412406e-05,
|
|
"loss": 0.2172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1390840858221054,
|
|
"step": 2605,
|
|
"valid_targets_mean": 4809.4,
|
|
"valid_targets_min": 1705
|
|
},
|
|
{
|
|
"epoch": 2.236503856041131,
|
|
"grad_norm": 0.5909119312145202,
|
|
"learning_rate": 3.441736809482739e-05,
|
|
"loss": 0.2058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12171690165996552,
|
|
"step": 2610,
|
|
"valid_targets_mean": 4481.1,
|
|
"valid_targets_min": 2084
|
|
},
|
|
{
|
|
"epoch": 2.240788346186804,
|
|
"grad_norm": 0.5198439433298654,
|
|
"learning_rate": 3.4387719462804296e-05,
|
|
"loss": 0.207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10234121233224869,
|
|
"step": 2615,
|
|
"valid_targets_mean": 4727.2,
|
|
"valid_targets_min": 2308
|
|
},
|
|
{
|
|
"epoch": 2.2450728363324766,
|
|
"grad_norm": 0.524554938779893,
|
|
"learning_rate": 3.435800515268533e-05,
|
|
"loss": 0.2156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14251066744327545,
|
|
"step": 2620,
|
|
"valid_targets_mean": 5830.2,
|
|
"valid_targets_min": 3293
|
|
},
|
|
{
|
|
"epoch": 2.2493573264781492,
|
|
"grad_norm": 0.4516713254788256,
|
|
"learning_rate": 3.432822530011252e-05,
|
|
"loss": 0.212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11865200102329254,
|
|
"step": 2625,
|
|
"valid_targets_mean": 5070.9,
|
|
"valid_targets_min": 728
|
|
},
|
|
{
|
|
"epoch": 2.253641816623822,
|
|
"grad_norm": 0.48771244356564863,
|
|
"learning_rate": 3.429838004102707e-05,
|
|
"loss": 0.2115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1007542833685875,
|
|
"step": 2630,
|
|
"valid_targets_mean": 4801.2,
|
|
"valid_targets_min": 2899
|
|
},
|
|
{
|
|
"epoch": 2.2579263067694946,
|
|
"grad_norm": 0.4941928389134593,
|
|
"learning_rate": 3.426846951166876e-05,
|
|
"loss": 0.2178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11054898798465729,
|
|
"step": 2635,
|
|
"valid_targets_mean": 4868.9,
|
|
"valid_targets_min": 1349
|
|
},
|
|
{
|
|
"epoch": 2.2622107969151672,
|
|
"grad_norm": 0.504860332302527,
|
|
"learning_rate": 3.423849384857532e-05,
|
|
"loss": 0.2112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11777180433273315,
|
|
"step": 2640,
|
|
"valid_targets_mean": 5324.9,
|
|
"valid_targets_min": 1285
|
|
},
|
|
{
|
|
"epoch": 2.26649528706084,
|
|
"grad_norm": 0.459939251999804,
|
|
"learning_rate": 3.420845318858184e-05,
|
|
"loss": 0.2004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0937754213809967,
|
|
"step": 2645,
|
|
"valid_targets_mean": 5160.6,
|
|
"valid_targets_min": 1706
|
|
},
|
|
{
|
|
"epoch": 2.2707797772065126,
|
|
"grad_norm": 0.5132192935657194,
|
|
"learning_rate": 3.417834766882005e-05,
|
|
"loss": 0.2016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09987785667181015,
|
|
"step": 2650,
|
|
"valid_targets_mean": 4889.0,
|
|
"valid_targets_min": 1651
|
|
},
|
|
{
|
|
"epoch": 2.2750642673521853,
|
|
"grad_norm": 0.4960047556364684,
|
|
"learning_rate": 3.41481774267178e-05,
|
|
"loss": 0.1886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09927482903003693,
|
|
"step": 2655,
|
|
"valid_targets_mean": 5076.6,
|
|
"valid_targets_min": 1539
|
|
},
|
|
{
|
|
"epoch": 2.279348757497858,
|
|
"grad_norm": 0.4696575743098189,
|
|
"learning_rate": 3.411794259999839e-05,
|
|
"loss": 0.2124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07672121375799179,
|
|
"step": 2660,
|
|
"valid_targets_mean": 4167.1,
|
|
"valid_targets_min": 1731
|
|
},
|
|
{
|
|
"epoch": 2.2836332476435306,
|
|
"grad_norm": 0.44652313999302545,
|
|
"learning_rate": 3.408764332667992e-05,
|
|
"loss": 0.1976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09379899501800537,
|
|
"step": 2665,
|
|
"valid_targets_mean": 5656.6,
|
|
"valid_targets_min": 2313
|
|
},
|
|
{
|
|
"epoch": 2.2879177377892033,
|
|
"grad_norm": 0.45677683642611405,
|
|
"learning_rate": 3.4057279745074696e-05,
|
|
"loss": 0.1935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09468834847211838,
|
|
"step": 2670,
|
|
"valid_targets_mean": 5627.0,
|
|
"valid_targets_min": 2577
|
|
},
|
|
{
|
|
"epoch": 2.292202227934876,
|
|
"grad_norm": 0.4628339227087412,
|
|
"learning_rate": 3.402685199378857e-05,
|
|
"loss": 0.1966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08526807278394699,
|
|
"step": 2675,
|
|
"valid_targets_mean": 5128.9,
|
|
"valid_targets_min": 1877
|
|
},
|
|
{
|
|
"epoch": 2.2964867180805486,
|
|
"grad_norm": 0.48877181789580515,
|
|
"learning_rate": 3.399636021172034e-05,
|
|
"loss": 0.2103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.092775359749794,
|
|
"step": 2680,
|
|
"valid_targets_mean": 4849.2,
|
|
"valid_targets_min": 2735
|
|
},
|
|
{
|
|
"epoch": 2.3007712082262213,
|
|
"grad_norm": 0.4854806416818893,
|
|
"learning_rate": 3.396580453806107e-05,
|
|
"loss": 0.1964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10871044546365738,
|
|
"step": 2685,
|
|
"valid_targets_mean": 6293.9,
|
|
"valid_targets_min": 3431
|
|
},
|
|
{
|
|
"epoch": 2.305055698371894,
|
|
"grad_norm": 0.5323163601667085,
|
|
"learning_rate": 3.393518511229351e-05,
|
|
"loss": 0.226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13361677527427673,
|
|
"step": 2690,
|
|
"valid_targets_mean": 4992.9,
|
|
"valid_targets_min": 1685
|
|
},
|
|
{
|
|
"epoch": 2.3093401885175666,
|
|
"grad_norm": 0.46863816721964147,
|
|
"learning_rate": 3.3904502074191405e-05,
|
|
"loss": 0.1952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0967712551355362,
|
|
"step": 2695,
|
|
"valid_targets_mean": 4828.0,
|
|
"valid_targets_min": 1880
|
|
},
|
|
{
|
|
"epoch": 2.3136246786632393,
|
|
"grad_norm": 0.4937021877713454,
|
|
"learning_rate": 3.38737555638189e-05,
|
|
"loss": 0.212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11895988881587982,
|
|
"step": 2700,
|
|
"valid_targets_mean": 4830.0,
|
|
"valid_targets_min": 1780
|
|
},
|
|
{
|
|
"epoch": 2.317909168808912,
|
|
"grad_norm": 0.5425049113594594,
|
|
"learning_rate": 3.384294572152986e-05,
|
|
"loss": 0.2114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11120740324258804,
|
|
"step": 2705,
|
|
"valid_targets_mean": 3873.9,
|
|
"valid_targets_min": 2068
|
|
},
|
|
{
|
|
"epoch": 2.3221936589545846,
|
|
"grad_norm": 0.46221874375889965,
|
|
"learning_rate": 3.3812072687967286e-05,
|
|
"loss": 0.208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10035630315542221,
|
|
"step": 2710,
|
|
"valid_targets_mean": 4683.9,
|
|
"valid_targets_min": 1965
|
|
},
|
|
{
|
|
"epoch": 2.3264781491002573,
|
|
"grad_norm": 0.4358452001193529,
|
|
"learning_rate": 3.378113660406261e-05,
|
|
"loss": 0.2026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09401911497116089,
|
|
"step": 2715,
|
|
"valid_targets_mean": 6308.2,
|
|
"valid_targets_min": 4481
|
|
},
|
|
{
|
|
"epoch": 2.33076263924593,
|
|
"grad_norm": 0.4971478285651246,
|
|
"learning_rate": 3.3750137611035096e-05,
|
|
"loss": 0.2054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10292759537696838,
|
|
"step": 2720,
|
|
"valid_targets_mean": 4957.5,
|
|
"valid_targets_min": 2267
|
|
},
|
|
{
|
|
"epoch": 2.3350471293916026,
|
|
"grad_norm": 0.4829059484138484,
|
|
"learning_rate": 3.3719075850391174e-05,
|
|
"loss": 0.1958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09821857511997223,
|
|
"step": 2725,
|
|
"valid_targets_mean": 6403.6,
|
|
"valid_targets_min": 3302
|
|
},
|
|
{
|
|
"epoch": 2.3393316195372753,
|
|
"grad_norm": 0.42495114876627665,
|
|
"learning_rate": 3.36879514639238e-05,
|
|
"loss": 0.2045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08856787532567978,
|
|
"step": 2730,
|
|
"valid_targets_mean": 5714.2,
|
|
"valid_targets_min": 3273
|
|
},
|
|
{
|
|
"epoch": 2.343616109682948,
|
|
"grad_norm": 0.4919645273602842,
|
|
"learning_rate": 3.365676459371181e-05,
|
|
"loss": 0.1886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09872014075517654,
|
|
"step": 2735,
|
|
"valid_targets_mean": 5452.1,
|
|
"valid_targets_min": 2308
|
|
},
|
|
{
|
|
"epoch": 2.34790059982862,
|
|
"grad_norm": 0.49482547215601524,
|
|
"learning_rate": 3.362551538211927e-05,
|
|
"loss": 0.2034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08632953464984894,
|
|
"step": 2740,
|
|
"valid_targets_mean": 5981.2,
|
|
"valid_targets_min": 1924
|
|
},
|
|
{
|
|
"epoch": 2.352185089974293,
|
|
"grad_norm": 0.44873956381323665,
|
|
"learning_rate": 3.359420397179483e-05,
|
|
"loss": 0.1878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09699660539627075,
|
|
"step": 2745,
|
|
"valid_targets_mean": 4944.5,
|
|
"valid_targets_min": 1397
|
|
},
|
|
{
|
|
"epoch": 2.3564695801199655,
|
|
"grad_norm": 0.49552476816804186,
|
|
"learning_rate": 3.3562830505671065e-05,
|
|
"loss": 0.2012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10512469708919525,
|
|
"step": 2750,
|
|
"valid_targets_mean": 5890.2,
|
|
"valid_targets_min": 3051
|
|
},
|
|
{
|
|
"epoch": 2.360754070265638,
|
|
"grad_norm": 0.48348438529624993,
|
|
"learning_rate": 3.353139512696383e-05,
|
|
"loss": 0.2075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07310528308153152,
|
|
"step": 2755,
|
|
"valid_targets_mean": 4501.5,
|
|
"valid_targets_min": 2621
|
|
},
|
|
{
|
|
"epoch": 2.365038560411311,
|
|
"grad_norm": 0.4828132476454937,
|
|
"learning_rate": 3.34998979791716e-05,
|
|
"loss": 0.2099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11672252416610718,
|
|
"step": 2760,
|
|
"valid_targets_mean": 5034.0,
|
|
"valid_targets_min": 1177
|
|
},
|
|
{
|
|
"epoch": 2.3693230505569836,
|
|
"grad_norm": 0.47308712313036416,
|
|
"learning_rate": 3.3468339206074815e-05,
|
|
"loss": 0.2072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10690588504076004,
|
|
"step": 2765,
|
|
"valid_targets_mean": 5897.6,
|
|
"valid_targets_min": 2412
|
|
},
|
|
{
|
|
"epoch": 2.3736075407026562,
|
|
"grad_norm": 0.4653042324351704,
|
|
"learning_rate": 3.343671895173524e-05,
|
|
"loss": 0.202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08077119290828705,
|
|
"step": 2770,
|
|
"valid_targets_mean": 5471.0,
|
|
"valid_targets_min": 2227
|
|
},
|
|
{
|
|
"epoch": 2.377892030848329,
|
|
"grad_norm": 0.437950561918184,
|
|
"learning_rate": 3.340503736049527e-05,
|
|
"loss": 0.1917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08129626512527466,
|
|
"step": 2775,
|
|
"valid_targets_mean": 5579.0,
|
|
"valid_targets_min": 3308
|
|
},
|
|
{
|
|
"epoch": 2.3821765209940016,
|
|
"grad_norm": 0.509302804118846,
|
|
"learning_rate": 3.3373294576977313e-05,
|
|
"loss": 0.1977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11761146783828735,
|
|
"step": 2780,
|
|
"valid_targets_mean": 4584.9,
|
|
"valid_targets_min": 1504
|
|
},
|
|
{
|
|
"epoch": 2.3864610111396742,
|
|
"grad_norm": 0.4814389942194716,
|
|
"learning_rate": 3.3341490746083125e-05,
|
|
"loss": 0.1974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13504838943481445,
|
|
"step": 2785,
|
|
"valid_targets_mean": 5767.0,
|
|
"valid_targets_min": 2377
|
|
},
|
|
{
|
|
"epoch": 2.390745501285347,
|
|
"grad_norm": 0.45161396380264546,
|
|
"learning_rate": 3.33096260129931e-05,
|
|
"loss": 0.1981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08189062774181366,
|
|
"step": 2790,
|
|
"valid_targets_mean": 4240.6,
|
|
"valid_targets_min": 842
|
|
},
|
|
{
|
|
"epoch": 2.3950299914310196,
|
|
"grad_norm": 0.5218339855169469,
|
|
"learning_rate": 3.327770052316567e-05,
|
|
"loss": 0.2207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1058935672044754,
|
|
"step": 2795,
|
|
"valid_targets_mean": 5277.1,
|
|
"valid_targets_min": 2821
|
|
},
|
|
{
|
|
"epoch": 2.3993144815766922,
|
|
"grad_norm": 0.5190837299505044,
|
|
"learning_rate": 3.3245714422336615e-05,
|
|
"loss": 0.2192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1227574422955513,
|
|
"step": 2800,
|
|
"valid_targets_mean": 5310.6,
|
|
"valid_targets_min": 2024
|
|
},
|
|
{
|
|
"epoch": 2.403598971722365,
|
|
"grad_norm": 0.4771751306499786,
|
|
"learning_rate": 3.321366785651837e-05,
|
|
"loss": 0.2036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09484189748764038,
|
|
"step": 2805,
|
|
"valid_targets_mean": 4509.6,
|
|
"valid_targets_min": 2628
|
|
},
|
|
{
|
|
"epoch": 2.4078834618680376,
|
|
"grad_norm": 0.5173351776105665,
|
|
"learning_rate": 3.31815609719994e-05,
|
|
"loss": 0.1945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11100342869758606,
|
|
"step": 2810,
|
|
"valid_targets_mean": 6206.4,
|
|
"valid_targets_min": 4923
|
|
},
|
|
{
|
|
"epoch": 2.4121679520137103,
|
|
"grad_norm": 0.4357793385013147,
|
|
"learning_rate": 3.314939391534354e-05,
|
|
"loss": 0.2019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10164409875869751,
|
|
"step": 2815,
|
|
"valid_targets_mean": 6839.2,
|
|
"valid_targets_min": 4419
|
|
},
|
|
{
|
|
"epoch": 2.416452442159383,
|
|
"grad_norm": 0.5023728125638247,
|
|
"learning_rate": 3.311716683338927e-05,
|
|
"loss": 0.1973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10916950553655624,
|
|
"step": 2820,
|
|
"valid_targets_mean": 5020.1,
|
|
"valid_targets_min": 1666
|
|
},
|
|
{
|
|
"epoch": 2.4207369323050556,
|
|
"grad_norm": 0.44497761259747304,
|
|
"learning_rate": 3.308487987324908e-05,
|
|
"loss": 0.2014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08513899147510529,
|
|
"step": 2825,
|
|
"valid_targets_mean": 4916.0,
|
|
"valid_targets_min": 2516
|
|
},
|
|
{
|
|
"epoch": 2.4250214224507283,
|
|
"grad_norm": 0.499349034804837,
|
|
"learning_rate": 3.305253318230882e-05,
|
|
"loss": 0.2059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1289609670639038,
|
|
"step": 2830,
|
|
"valid_targets_mean": 5271.5,
|
|
"valid_targets_min": 3094
|
|
},
|
|
{
|
|
"epoch": 2.429305912596401,
|
|
"grad_norm": 0.5463948445725036,
|
|
"learning_rate": 3.302012690822698e-05,
|
|
"loss": 0.1867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10212835669517517,
|
|
"step": 2835,
|
|
"valid_targets_mean": 4947.0,
|
|
"valid_targets_min": 2836
|
|
},
|
|
{
|
|
"epoch": 2.4335904027420736,
|
|
"grad_norm": 0.4086429133907161,
|
|
"learning_rate": 3.2987661198934066e-05,
|
|
"loss": 0.1865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09139082580804825,
|
|
"step": 2840,
|
|
"valid_targets_mean": 7052.8,
|
|
"valid_targets_min": 5042
|
|
},
|
|
{
|
|
"epoch": 2.4378748928877463,
|
|
"grad_norm": 0.4876443641286558,
|
|
"learning_rate": 3.295513620263187e-05,
|
|
"loss": 0.2058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12413141876459122,
|
|
"step": 2845,
|
|
"valid_targets_mean": 5646.8,
|
|
"valid_targets_min": 2418
|
|
},
|
|
{
|
|
"epoch": 2.442159383033419,
|
|
"grad_norm": 0.49316864024616114,
|
|
"learning_rate": 3.292255206779282e-05,
|
|
"loss": 0.1863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.096271812915802,
|
|
"step": 2850,
|
|
"valid_targets_mean": 4577.0,
|
|
"valid_targets_min": 1955
|
|
},
|
|
{
|
|
"epoch": 2.4464438731790916,
|
|
"grad_norm": 0.4964219050365118,
|
|
"learning_rate": 3.2889908943159335e-05,
|
|
"loss": 0.1902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1023876965045929,
|
|
"step": 2855,
|
|
"valid_targets_mean": 7298.4,
|
|
"valid_targets_min": 4788
|
|
},
|
|
{
|
|
"epoch": 2.4507283633247643,
|
|
"grad_norm": 0.4990305688617052,
|
|
"learning_rate": 3.285720697774309e-05,
|
|
"loss": 0.2021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11171339452266693,
|
|
"step": 2860,
|
|
"valid_targets_mean": 5601.4,
|
|
"valid_targets_min": 2756
|
|
},
|
|
{
|
|
"epoch": 2.455012853470437,
|
|
"grad_norm": 0.41817374740108504,
|
|
"learning_rate": 3.282444632082437e-05,
|
|
"loss": 0.2028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08068175613880157,
|
|
"step": 2865,
|
|
"valid_targets_mean": 5683.9,
|
|
"valid_targets_min": 738
|
|
},
|
|
{
|
|
"epoch": 2.4592973436161096,
|
|
"grad_norm": 0.43554569977825025,
|
|
"learning_rate": 3.279162712195137e-05,
|
|
"loss": 0.2053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11963269114494324,
|
|
"step": 2870,
|
|
"valid_targets_mean": 7243.9,
|
|
"valid_targets_min": 4833
|
|
},
|
|
{
|
|
"epoch": 2.4635818337617823,
|
|
"grad_norm": 0.48826855861952745,
|
|
"learning_rate": 3.275874953093953e-05,
|
|
"loss": 0.1944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10686782747507095,
|
|
"step": 2875,
|
|
"valid_targets_mean": 6170.8,
|
|
"valid_targets_min": 3494
|
|
},
|
|
{
|
|
"epoch": 2.467866323907455,
|
|
"grad_norm": 0.5068519289504925,
|
|
"learning_rate": 3.272581369787085e-05,
|
|
"loss": 0.192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11042851209640503,
|
|
"step": 2880,
|
|
"valid_targets_mean": 4609.8,
|
|
"valid_targets_min": 2343
|
|
},
|
|
{
|
|
"epoch": 2.4721508140531276,
|
|
"grad_norm": 0.46615869185497966,
|
|
"learning_rate": 3.2692819773093176e-05,
|
|
"loss": 0.2032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09782660007476807,
|
|
"step": 2885,
|
|
"valid_targets_mean": 5556.2,
|
|
"valid_targets_min": 3460
|
|
},
|
|
{
|
|
"epoch": 2.4764353041988003,
|
|
"grad_norm": 0.4967868181924323,
|
|
"learning_rate": 3.265976790721955e-05,
|
|
"loss": 0.1911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09430047124624252,
|
|
"step": 2890,
|
|
"valid_targets_mean": 3529.8,
|
|
"valid_targets_min": 1223
|
|
},
|
|
{
|
|
"epoch": 2.480719794344473,
|
|
"grad_norm": 0.5205596403786918,
|
|
"learning_rate": 3.2626658251127504e-05,
|
|
"loss": 0.2102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0845278799533844,
|
|
"step": 2895,
|
|
"valid_targets_mean": 5316.0,
|
|
"valid_targets_min": 1397
|
|
},
|
|
{
|
|
"epoch": 2.4850042844901457,
|
|
"grad_norm": 0.587813292998456,
|
|
"learning_rate": 3.2593490955958374e-05,
|
|
"loss": 0.2063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1236010491847992,
|
|
"step": 2900,
|
|
"valid_targets_mean": 4464.0,
|
|
"valid_targets_min": 908
|
|
},
|
|
{
|
|
"epoch": 2.4892887746358183,
|
|
"grad_norm": 0.5067347323119288,
|
|
"learning_rate": 3.256026617311662e-05,
|
|
"loss": 0.202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09447930008172989,
|
|
"step": 2905,
|
|
"valid_targets_mean": 4661.0,
|
|
"valid_targets_min": 2243
|
|
},
|
|
{
|
|
"epoch": 2.493573264781491,
|
|
"grad_norm": 0.5698817677028789,
|
|
"learning_rate": 3.252698405426912e-05,
|
|
"loss": 0.2137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09549528360366821,
|
|
"step": 2910,
|
|
"valid_targets_mean": 4545.1,
|
|
"valid_targets_min": 648
|
|
},
|
|
{
|
|
"epoch": 2.4978577549271637,
|
|
"grad_norm": 0.5216276128702821,
|
|
"learning_rate": 3.249364475134447e-05,
|
|
"loss": 0.203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1125568151473999,
|
|
"step": 2915,
|
|
"valid_targets_mean": 4951.6,
|
|
"valid_targets_min": 2860
|
|
},
|
|
{
|
|
"epoch": 2.5021422450728363,
|
|
"grad_norm": 0.42280291475452436,
|
|
"learning_rate": 3.246024841653233e-05,
|
|
"loss": 0.1981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0859423577785492,
|
|
"step": 2920,
|
|
"valid_targets_mean": 5395.1,
|
|
"valid_targets_min": 1793
|
|
},
|
|
{
|
|
"epoch": 2.506426735218509,
|
|
"grad_norm": 0.5223758050374674,
|
|
"learning_rate": 3.2426795202282675e-05,
|
|
"loss": 0.1963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08532857149839401,
|
|
"step": 2925,
|
|
"valid_targets_mean": 6232.2,
|
|
"valid_targets_min": 4735
|
|
},
|
|
{
|
|
"epoch": 2.5107112253641817,
|
|
"grad_norm": 0.4856689543602733,
|
|
"learning_rate": 3.239328526130516e-05,
|
|
"loss": 0.2011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09928176552057266,
|
|
"step": 2930,
|
|
"valid_targets_mean": 5423.1,
|
|
"valid_targets_min": 1613
|
|
},
|
|
{
|
|
"epoch": 2.5149957155098543,
|
|
"grad_norm": 0.4321558846322626,
|
|
"learning_rate": 3.235971874656835e-05,
|
|
"loss": 0.1907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08418859541416168,
|
|
"step": 2935,
|
|
"valid_targets_mean": 5001.8,
|
|
"valid_targets_min": 1040
|
|
},
|
|
{
|
|
"epoch": 2.519280205655527,
|
|
"grad_norm": 0.48854417396408445,
|
|
"learning_rate": 3.2326095811299096e-05,
|
|
"loss": 0.196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10312269628047943,
|
|
"step": 2940,
|
|
"valid_targets_mean": 4326.8,
|
|
"valid_targets_min": 2391
|
|
},
|
|
{
|
|
"epoch": 2.5235646958011997,
|
|
"grad_norm": 0.47592753685481903,
|
|
"learning_rate": 3.22924166089818e-05,
|
|
"loss": 0.2075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10614067316055298,
|
|
"step": 2945,
|
|
"valid_targets_mean": 5605.1,
|
|
"valid_targets_min": 2827
|
|
},
|
|
{
|
|
"epoch": 2.5278491859468724,
|
|
"grad_norm": 0.48528302022832537,
|
|
"learning_rate": 3.225868129335768e-05,
|
|
"loss": 0.2013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09091340005397797,
|
|
"step": 2950,
|
|
"valid_targets_mean": 4797.1,
|
|
"valid_targets_min": 1697
|
|
},
|
|
{
|
|
"epoch": 2.532133676092545,
|
|
"grad_norm": 0.5251980386220312,
|
|
"learning_rate": 3.2224890018424156e-05,
|
|
"loss": 0.2037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09622952342033386,
|
|
"step": 2955,
|
|
"valid_targets_mean": 4853.0,
|
|
"valid_targets_min": 2149
|
|
},
|
|
{
|
|
"epoch": 2.5364181662382177,
|
|
"grad_norm": 0.541458693239247,
|
|
"learning_rate": 3.219104293843405e-05,
|
|
"loss": 0.1949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10172342509031296,
|
|
"step": 2960,
|
|
"valid_targets_mean": 4165.2,
|
|
"valid_targets_min": 1455
|
|
},
|
|
{
|
|
"epoch": 2.5407026563838904,
|
|
"grad_norm": 0.5178417669621618,
|
|
"learning_rate": 3.215714020789495e-05,
|
|
"loss": 0.2124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09176942706108093,
|
|
"step": 2965,
|
|
"valid_targets_mean": 3891.8,
|
|
"valid_targets_min": 981
|
|
},
|
|
{
|
|
"epoch": 2.544987146529563,
|
|
"grad_norm": 0.4483728443085302,
|
|
"learning_rate": 3.212318198156848e-05,
|
|
"loss": 0.2074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09753090143203735,
|
|
"step": 2970,
|
|
"valid_targets_mean": 5039.6,
|
|
"valid_targets_min": 2813
|
|
},
|
|
{
|
|
"epoch": 2.5492716366752357,
|
|
"grad_norm": 0.43906680838014805,
|
|
"learning_rate": 3.208916841446959e-05,
|
|
"loss": 0.212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08158387988805771,
|
|
"step": 2975,
|
|
"valid_targets_mean": 5588.9,
|
|
"valid_targets_min": 3214
|
|
},
|
|
{
|
|
"epoch": 2.5535561268209084,
|
|
"grad_norm": 0.42515686500054095,
|
|
"learning_rate": 3.2055099661865854e-05,
|
|
"loss": 0.2006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09098470211029053,
|
|
"step": 2980,
|
|
"valid_targets_mean": 5731.4,
|
|
"valid_targets_min": 3643
|
|
},
|
|
{
|
|
"epoch": 2.557840616966581,
|
|
"grad_norm": 0.4667107050928945,
|
|
"learning_rate": 3.202097587927676e-05,
|
|
"loss": 0.1966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11131277680397034,
|
|
"step": 2985,
|
|
"valid_targets_mean": 4814.0,
|
|
"valid_targets_min": 1697
|
|
},
|
|
{
|
|
"epoch": 2.5621251071122537,
|
|
"grad_norm": 0.5191660325554805,
|
|
"learning_rate": 3.1986797222473016e-05,
|
|
"loss": 0.1922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.102913036942482,
|
|
"step": 2990,
|
|
"valid_targets_mean": 4120.5,
|
|
"valid_targets_min": 1392
|
|
},
|
|
{
|
|
"epoch": 2.5664095972579264,
|
|
"grad_norm": 0.5098529158119127,
|
|
"learning_rate": 3.1952563847475805e-05,
|
|
"loss": 0.2054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10675621777772903,
|
|
"step": 2995,
|
|
"valid_targets_mean": 5657.0,
|
|
"valid_targets_min": 1446
|
|
},
|
|
{
|
|
"epoch": 2.570694087403599,
|
|
"grad_norm": 0.4757706576119366,
|
|
"learning_rate": 3.191827591055609e-05,
|
|
"loss": 0.187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08147279173135757,
|
|
"step": 3000,
|
|
"valid_targets_mean": 4523.4,
|
|
"valid_targets_min": 1596
|
|
},
|
|
{
|
|
"epoch": 2.5749785775492717,
|
|
"grad_norm": 0.47439322539254747,
|
|
"learning_rate": 3.1883933568233915e-05,
|
|
"loss": 0.1869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08240111172199249,
|
|
"step": 3005,
|
|
"valid_targets_mean": 6329.6,
|
|
"valid_targets_min": 4234
|
|
},
|
|
{
|
|
"epoch": 2.5792630676949444,
|
|
"grad_norm": 0.4301275527510319,
|
|
"learning_rate": 3.184953697727767e-05,
|
|
"loss": 0.202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09618514031171799,
|
|
"step": 3010,
|
|
"valid_targets_mean": 6511.9,
|
|
"valid_targets_min": 3879
|
|
},
|
|
{
|
|
"epoch": 2.583547557840617,
|
|
"grad_norm": 0.4585872283916217,
|
|
"learning_rate": 3.181508629470339e-05,
|
|
"loss": 0.2221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1063903272151947,
|
|
"step": 3015,
|
|
"valid_targets_mean": 3837.1,
|
|
"valid_targets_min": 1377
|
|
},
|
|
{
|
|
"epoch": 2.5878320479862897,
|
|
"grad_norm": 0.4767544940704192,
|
|
"learning_rate": 3.1780581677774026e-05,
|
|
"loss": 0.1959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10895279049873352,
|
|
"step": 3020,
|
|
"valid_targets_mean": 5447.4,
|
|
"valid_targets_min": 1774
|
|
},
|
|
{
|
|
"epoch": 2.5921165381319624,
|
|
"grad_norm": 0.505750162963359,
|
|
"learning_rate": 3.1746023283998736e-05,
|
|
"loss": 0.1955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0998040959239006,
|
|
"step": 3025,
|
|
"valid_targets_mean": 3948.4,
|
|
"valid_targets_min": 1631
|
|
},
|
|
{
|
|
"epoch": 2.596401028277635,
|
|
"grad_norm": 0.4324680394219439,
|
|
"learning_rate": 3.1711411271132156e-05,
|
|
"loss": 0.1866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09612485021352768,
|
|
"step": 3030,
|
|
"valid_targets_mean": 5286.2,
|
|
"valid_targets_min": 1612
|
|
},
|
|
{
|
|
"epoch": 2.6006855184233078,
|
|
"grad_norm": 0.44679380951456976,
|
|
"learning_rate": 3.1676745797173685e-05,
|
|
"loss": 0.1929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08475670218467712,
|
|
"step": 3035,
|
|
"valid_targets_mean": 5412.5,
|
|
"valid_targets_min": 2685
|
|
},
|
|
{
|
|
"epoch": 2.6049700085689804,
|
|
"grad_norm": 0.45529448999326766,
|
|
"learning_rate": 3.164202702036677e-05,
|
|
"loss": 0.1813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11184944212436676,
|
|
"step": 3040,
|
|
"valid_targets_mean": 6625.9,
|
|
"valid_targets_min": 4048
|
|
},
|
|
{
|
|
"epoch": 2.609254498714653,
|
|
"grad_norm": 0.5200508408646883,
|
|
"learning_rate": 3.16072550991982e-05,
|
|
"loss": 0.2133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10833759605884552,
|
|
"step": 3045,
|
|
"valid_targets_mean": 4254.2,
|
|
"valid_targets_min": 1634
|
|
},
|
|
{
|
|
"epoch": 2.6135389888603258,
|
|
"grad_norm": 0.47583075311418843,
|
|
"learning_rate": 3.157243019239731e-05,
|
|
"loss": 0.1901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09005630761384964,
|
|
"step": 3050,
|
|
"valid_targets_mean": 4993.1,
|
|
"valid_targets_min": 1900
|
|
},
|
|
{
|
|
"epoch": 2.6178234790059984,
|
|
"grad_norm": 0.47116305006250425,
|
|
"learning_rate": 3.1537552458935356e-05,
|
|
"loss": 0.1964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08517856150865555,
|
|
"step": 3055,
|
|
"valid_targets_mean": 5333.1,
|
|
"valid_targets_min": 1795
|
|
},
|
|
{
|
|
"epoch": 2.622107969151671,
|
|
"grad_norm": 0.46550629698322077,
|
|
"learning_rate": 3.150262205802472e-05,
|
|
"loss": 0.1891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09626109898090363,
|
|
"step": 3060,
|
|
"valid_targets_mean": 5502.6,
|
|
"valid_targets_min": 3489
|
|
},
|
|
{
|
|
"epoch": 2.6263924592973438,
|
|
"grad_norm": 0.5190463480248941,
|
|
"learning_rate": 3.1467639149118195e-05,
|
|
"loss": 0.1979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08040817081928253,
|
|
"step": 3065,
|
|
"valid_targets_mean": 4047.1,
|
|
"valid_targets_min": 996
|
|
},
|
|
{
|
|
"epoch": 2.6306769494430164,
|
|
"grad_norm": 0.47565052334057345,
|
|
"learning_rate": 3.1432603891908295e-05,
|
|
"loss": 0.2064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10391212999820709,
|
|
"step": 3070,
|
|
"valid_targets_mean": 6069.5,
|
|
"valid_targets_min": 2558
|
|
},
|
|
{
|
|
"epoch": 2.6349614395886887,
|
|
"grad_norm": 0.49248862544332433,
|
|
"learning_rate": 3.1397516446326464e-05,
|
|
"loss": 0.2175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10490263998508453,
|
|
"step": 3075,
|
|
"valid_targets_mean": 5443.2,
|
|
"valid_targets_min": 2735
|
|
},
|
|
{
|
|
"epoch": 2.6392459297343613,
|
|
"grad_norm": 0.4861909277172617,
|
|
"learning_rate": 3.136237697254241e-05,
|
|
"loss": 0.1981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11516415327787399,
|
|
"step": 3080,
|
|
"valid_targets_mean": 5015.1,
|
|
"valid_targets_min": 2795
|
|
},
|
|
{
|
|
"epoch": 2.643530419880034,
|
|
"grad_norm": 0.6286859927618421,
|
|
"learning_rate": 3.132718563096332e-05,
|
|
"loss": 0.2013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09952767938375473,
|
|
"step": 3085,
|
|
"valid_targets_mean": 5225.5,
|
|
"valid_targets_min": 2651
|
|
},
|
|
{
|
|
"epoch": 2.6478149100257067,
|
|
"grad_norm": 0.5524151436612424,
|
|
"learning_rate": 3.129194258223316e-05,
|
|
"loss": 0.2082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11292801052331924,
|
|
"step": 3090,
|
|
"valid_targets_mean": 4077.6,
|
|
"valid_targets_min": 1330
|
|
},
|
|
{
|
|
"epoch": 2.6520994001713794,
|
|
"grad_norm": 0.495231500813035,
|
|
"learning_rate": 3.125664798723193e-05,
|
|
"loss": 0.1899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10587464272975922,
|
|
"step": 3095,
|
|
"valid_targets_mean": 5196.5,
|
|
"valid_targets_min": 3772
|
|
},
|
|
{
|
|
"epoch": 2.656383890317052,
|
|
"grad_norm": 0.4357989088506872,
|
|
"learning_rate": 3.122130200707494e-05,
|
|
"loss": 0.2054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09586100280284882,
|
|
"step": 3100,
|
|
"valid_targets_mean": 5426.6,
|
|
"valid_targets_min": 1967
|
|
},
|
|
{
|
|
"epoch": 2.6606683804627247,
|
|
"grad_norm": 0.5325689265202237,
|
|
"learning_rate": 3.118590480311206e-05,
|
|
"loss": 0.2071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08797873556613922,
|
|
"step": 3105,
|
|
"valid_targets_mean": 3848.8,
|
|
"valid_targets_min": 1716
|
|
},
|
|
{
|
|
"epoch": 2.6649528706083974,
|
|
"grad_norm": 0.4396920701228743,
|
|
"learning_rate": 3.115045653692698e-05,
|
|
"loss": 0.1957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09114794433116913,
|
|
"step": 3110,
|
|
"valid_targets_mean": 4718.9,
|
|
"valid_targets_min": 2123
|
|
},
|
|
{
|
|
"epoch": 2.66923736075407,
|
|
"grad_norm": 0.550751313514682,
|
|
"learning_rate": 3.111495737033651e-05,
|
|
"loss": 0.2054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10032795369625092,
|
|
"step": 3115,
|
|
"valid_targets_mean": 3613.1,
|
|
"valid_targets_min": 1622
|
|
},
|
|
{
|
|
"epoch": 2.6735218508997427,
|
|
"grad_norm": 0.41079175675526175,
|
|
"learning_rate": 3.107940746538978e-05,
|
|
"loss": 0.1932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08481456339359283,
|
|
"step": 3120,
|
|
"valid_targets_mean": 5954.8,
|
|
"valid_targets_min": 3243
|
|
},
|
|
{
|
|
"epoch": 2.6778063410454154,
|
|
"grad_norm": 0.5076145326822086,
|
|
"learning_rate": 3.104380698436757e-05,
|
|
"loss": 0.2062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12480606138706207,
|
|
"step": 3125,
|
|
"valid_targets_mean": 5253.0,
|
|
"valid_targets_min": 1945
|
|
},
|
|
{
|
|
"epoch": 2.682090831191088,
|
|
"grad_norm": 0.49899317879815436,
|
|
"learning_rate": 3.100815608978149e-05,
|
|
"loss": 0.2019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09135354310274124,
|
|
"step": 3130,
|
|
"valid_targets_mean": 4251.4,
|
|
"valid_targets_min": 2493
|
|
},
|
|
{
|
|
"epoch": 2.6863753213367607,
|
|
"grad_norm": 0.6388457405969222,
|
|
"learning_rate": 3.097245494437333e-05,
|
|
"loss": 0.204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09539501368999481,
|
|
"step": 3135,
|
|
"valid_targets_mean": 4684.8,
|
|
"valid_targets_min": 976
|
|
},
|
|
{
|
|
"epoch": 2.6906598114824334,
|
|
"grad_norm": 0.4922925574234516,
|
|
"learning_rate": 3.093670371111423e-05,
|
|
"loss": 0.191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09869565814733505,
|
|
"step": 3140,
|
|
"valid_targets_mean": 3889.5,
|
|
"valid_targets_min": 2515
|
|
},
|
|
{
|
|
"epoch": 2.694944301628106,
|
|
"grad_norm": 0.4788156391819075,
|
|
"learning_rate": 3.0900902553204e-05,
|
|
"loss": 0.185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0883324146270752,
|
|
"step": 3145,
|
|
"valid_targets_mean": 6716.8,
|
|
"valid_targets_min": 3263
|
|
},
|
|
{
|
|
"epoch": 2.6992287917737787,
|
|
"grad_norm": 0.5726806445817842,
|
|
"learning_rate": 3.086505163407032e-05,
|
|
"loss": 0.1973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12586042284965515,
|
|
"step": 3150,
|
|
"valid_targets_mean": 5087.8,
|
|
"valid_targets_min": 3199
|
|
},
|
|
{
|
|
"epoch": 2.7035132819194514,
|
|
"grad_norm": 0.4559348559533937,
|
|
"learning_rate": 3.082915111736807e-05,
|
|
"loss": 0.2052,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09658126533031464,
|
|
"step": 3155,
|
|
"valid_targets_mean": 5267.8,
|
|
"valid_targets_min": 2678
|
|
},
|
|
{
|
|
"epoch": 2.707797772065124,
|
|
"grad_norm": 0.5227607109278021,
|
|
"learning_rate": 3.0793201166978476e-05,
|
|
"loss": 0.1999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11674916744232178,
|
|
"step": 3160,
|
|
"valid_targets_mean": 5233.1,
|
|
"valid_targets_min": 2110
|
|
},
|
|
{
|
|
"epoch": 2.7120822622107967,
|
|
"grad_norm": 0.5381499374038914,
|
|
"learning_rate": 3.0757201947008483e-05,
|
|
"loss": 0.2023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1280943900346756,
|
|
"step": 3165,
|
|
"valid_targets_mean": 5339.1,
|
|
"valid_targets_min": 1444
|
|
},
|
|
{
|
|
"epoch": 2.7163667523564694,
|
|
"grad_norm": 0.4876029858247357,
|
|
"learning_rate": 3.07211536217899e-05,
|
|
"loss": 0.2027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09767451882362366,
|
|
"step": 3170,
|
|
"valid_targets_mean": 4989.1,
|
|
"valid_targets_min": 1777
|
|
},
|
|
{
|
|
"epoch": 2.720651242502142,
|
|
"grad_norm": 0.5259217745782021,
|
|
"learning_rate": 3.0685056355878726e-05,
|
|
"loss": 0.1956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12681160867214203,
|
|
"step": 3175,
|
|
"valid_targets_mean": 4674.9,
|
|
"valid_targets_min": 974
|
|
},
|
|
{
|
|
"epoch": 2.7249357326478147,
|
|
"grad_norm": 0.4762818016196657,
|
|
"learning_rate": 3.064891031405434e-05,
|
|
"loss": 0.1943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11337507516145706,
|
|
"step": 3180,
|
|
"valid_targets_mean": 5564.9,
|
|
"valid_targets_min": 1805
|
|
},
|
|
{
|
|
"epoch": 2.7292202227934874,
|
|
"grad_norm": 0.453669260564161,
|
|
"learning_rate": 3.0612715661318805e-05,
|
|
"loss": 0.2003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1022944450378418,
|
|
"step": 3185,
|
|
"valid_targets_mean": 5294.9,
|
|
"valid_targets_min": 1899
|
|
},
|
|
{
|
|
"epoch": 2.73350471293916,
|
|
"grad_norm": 0.4889641599281156,
|
|
"learning_rate": 3.0576472562896075e-05,
|
|
"loss": 0.2022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08817283809185028,
|
|
"step": 3190,
|
|
"valid_targets_mean": 5229.1,
|
|
"valid_targets_min": 2106
|
|
},
|
|
{
|
|
"epoch": 2.7377892030848328,
|
|
"grad_norm": 0.49903719744109054,
|
|
"learning_rate": 3.054018118423124e-05,
|
|
"loss": 0.2094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1024971455335617,
|
|
"step": 3195,
|
|
"valid_targets_mean": 5159.2,
|
|
"valid_targets_min": 2495
|
|
},
|
|
{
|
|
"epoch": 2.7420736932305054,
|
|
"grad_norm": 0.49119777255841934,
|
|
"learning_rate": 3.0503841690989796e-05,
|
|
"loss": 0.1884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09236447513103485,
|
|
"step": 3200,
|
|
"valid_targets_mean": 4263.9,
|
|
"valid_targets_min": 1124
|
|
},
|
|
{
|
|
"epoch": 2.746358183376178,
|
|
"grad_norm": 0.42139121040252897,
|
|
"learning_rate": 3.046745424905688e-05,
|
|
"loss": 0.1981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0806296318769455,
|
|
"step": 3205,
|
|
"valid_targets_mean": 5324.8,
|
|
"valid_targets_min": 2062
|
|
},
|
|
{
|
|
"epoch": 2.7506426735218508,
|
|
"grad_norm": 0.8660851118774108,
|
|
"learning_rate": 3.043101902453649e-05,
|
|
"loss": 0.198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09605230391025543,
|
|
"step": 3210,
|
|
"valid_targets_mean": 4066.8,
|
|
"valid_targets_min": 1700
|
|
},
|
|
{
|
|
"epoch": 2.7549271636675234,
|
|
"grad_norm": 0.5879290547420875,
|
|
"learning_rate": 3.0394536183750767e-05,
|
|
"loss": 0.1951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09734377264976501,
|
|
"step": 3215,
|
|
"valid_targets_mean": 4280.2,
|
|
"valid_targets_min": 1991
|
|
},
|
|
{
|
|
"epoch": 2.759211653813196,
|
|
"grad_norm": 0.4906383846943346,
|
|
"learning_rate": 3.03580058932392e-05,
|
|
"loss": 0.1843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07829025387763977,
|
|
"step": 3220,
|
|
"valid_targets_mean": 4240.2,
|
|
"valid_targets_min": 2774
|
|
},
|
|
{
|
|
"epoch": 2.763496143958869,
|
|
"grad_norm": 0.4477719224428516,
|
|
"learning_rate": 3.0321428319757893e-05,
|
|
"loss": 0.1962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08995410054922104,
|
|
"step": 3225,
|
|
"valid_targets_mean": 5337.2,
|
|
"valid_targets_min": 2546
|
|
},
|
|
{
|
|
"epoch": 2.7677806341045414,
|
|
"grad_norm": 0.48047544514516427,
|
|
"learning_rate": 3.0284803630278775e-05,
|
|
"loss": 0.1848,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.091777004301548,
|
|
"step": 3230,
|
|
"valid_targets_mean": 4903.8,
|
|
"valid_targets_min": 3388
|
|
},
|
|
{
|
|
"epoch": 2.772065124250214,
|
|
"grad_norm": 0.47975164003492826,
|
|
"learning_rate": 3.0248131991988857e-05,
|
|
"loss": 0.198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1095355898141861,
|
|
"step": 3235,
|
|
"valid_targets_mean": 5540.9,
|
|
"valid_targets_min": 2016
|
|
},
|
|
{
|
|
"epoch": 2.776349614395887,
|
|
"grad_norm": 0.465247942884362,
|
|
"learning_rate": 3.0211413572289482e-05,
|
|
"loss": 0.1996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10163888335227966,
|
|
"step": 3240,
|
|
"valid_targets_mean": 5639.5,
|
|
"valid_targets_min": 3602
|
|
},
|
|
{
|
|
"epoch": 2.7806341045415595,
|
|
"grad_norm": 0.4641303234289437,
|
|
"learning_rate": 3.0174648538795515e-05,
|
|
"loss": 0.2,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10476160049438477,
|
|
"step": 3245,
|
|
"valid_targets_mean": 5816.5,
|
|
"valid_targets_min": 3071
|
|
},
|
|
{
|
|
"epoch": 2.784918594687232,
|
|
"grad_norm": 0.505653288338379,
|
|
"learning_rate": 3.013783705933463e-05,
|
|
"loss": 0.1948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10704667121171951,
|
|
"step": 3250,
|
|
"valid_targets_mean": 5718.9,
|
|
"valid_targets_min": 2908
|
|
},
|
|
{
|
|
"epoch": 2.789203084832905,
|
|
"grad_norm": 0.4816018494525304,
|
|
"learning_rate": 3.0100979301946532e-05,
|
|
"loss": 0.1971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10598535090684891,
|
|
"step": 3255,
|
|
"valid_targets_mean": 4577.1,
|
|
"valid_targets_min": 2044
|
|
},
|
|
{
|
|
"epoch": 2.7934875749785775,
|
|
"grad_norm": 0.4266350764785256,
|
|
"learning_rate": 3.0064075434882132e-05,
|
|
"loss": 0.1931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09043243527412415,
|
|
"step": 3260,
|
|
"valid_targets_mean": 6281.5,
|
|
"valid_targets_min": 3937
|
|
},
|
|
{
|
|
"epoch": 2.79777206512425,
|
|
"grad_norm": 0.4644479409123575,
|
|
"learning_rate": 3.0027125626602873e-05,
|
|
"loss": 0.1981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08926934003829956,
|
|
"step": 3265,
|
|
"valid_targets_mean": 5050.8,
|
|
"valid_targets_min": 2544
|
|
},
|
|
{
|
|
"epoch": 2.802056555269923,
|
|
"grad_norm": 0.4396938445335989,
|
|
"learning_rate": 2.9990130045779883e-05,
|
|
"loss": 0.188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0825769230723381,
|
|
"step": 3270,
|
|
"valid_targets_mean": 5007.4,
|
|
"valid_targets_min": 1194
|
|
},
|
|
{
|
|
"epoch": 2.8063410454155955,
|
|
"grad_norm": 0.5118083821075619,
|
|
"learning_rate": 2.9953088861293262e-05,
|
|
"loss": 0.2037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10184841603040695,
|
|
"step": 3275,
|
|
"valid_targets_mean": 3770.6,
|
|
"valid_targets_min": 1691
|
|
},
|
|
{
|
|
"epoch": 2.810625535561268,
|
|
"grad_norm": 0.5105152513424626,
|
|
"learning_rate": 2.9916002242231262e-05,
|
|
"loss": 0.1961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.132453054189682,
|
|
"step": 3280,
|
|
"valid_targets_mean": 5373.8,
|
|
"valid_targets_min": 865
|
|
},
|
|
{
|
|
"epoch": 2.814910025706941,
|
|
"grad_norm": 0.49738165894659575,
|
|
"learning_rate": 2.9878870357889544e-05,
|
|
"loss": 0.1921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1020285040140152,
|
|
"step": 3285,
|
|
"valid_targets_mean": 5117.5,
|
|
"valid_targets_min": 1751
|
|
},
|
|
{
|
|
"epoch": 2.8191945158526135,
|
|
"grad_norm": 0.4634789342087233,
|
|
"learning_rate": 2.9841693377770416e-05,
|
|
"loss": 0.1773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08905909955501556,
|
|
"step": 3290,
|
|
"valid_targets_mean": 5624.5,
|
|
"valid_targets_min": 2650
|
|
},
|
|
{
|
|
"epoch": 2.823479005998286,
|
|
"grad_norm": 0.4864734023517098,
|
|
"learning_rate": 2.980447147158202e-05,
|
|
"loss": 0.2013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12124571949243546,
|
|
"step": 3295,
|
|
"valid_targets_mean": 5532.0,
|
|
"valid_targets_min": 1678
|
|
},
|
|
{
|
|
"epoch": 2.827763496143959,
|
|
"grad_norm": 0.4764156499362963,
|
|
"learning_rate": 2.9767204809237588e-05,
|
|
"loss": 0.1986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10643521696329117,
|
|
"step": 3300,
|
|
"valid_targets_mean": 6071.6,
|
|
"valid_targets_min": 1965
|
|
},
|
|
{
|
|
"epoch": 2.8320479862896315,
|
|
"grad_norm": 0.5007611593322627,
|
|
"learning_rate": 2.972989356085466e-05,
|
|
"loss": 0.1954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09378144145011902,
|
|
"step": 3305,
|
|
"valid_targets_mean": 4657.8,
|
|
"valid_targets_min": 1111
|
|
},
|
|
{
|
|
"epoch": 2.836332476435304,
|
|
"grad_norm": 0.5005017672048564,
|
|
"learning_rate": 2.9692537896754298e-05,
|
|
"loss": 0.193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11224367469549179,
|
|
"step": 3310,
|
|
"valid_targets_mean": 4564.1,
|
|
"valid_targets_min": 2544
|
|
},
|
|
{
|
|
"epoch": 2.840616966580977,
|
|
"grad_norm": 0.48187504122875946,
|
|
"learning_rate": 2.9655137987460335e-05,
|
|
"loss": 0.1922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08884987235069275,
|
|
"step": 3315,
|
|
"valid_targets_mean": 5739.2,
|
|
"valid_targets_min": 2278
|
|
},
|
|
{
|
|
"epoch": 2.8449014567266495,
|
|
"grad_norm": 0.4741640143584193,
|
|
"learning_rate": 2.9617694003698562e-05,
|
|
"loss": 0.176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08888500183820724,
|
|
"step": 3320,
|
|
"valid_targets_mean": 4935.2,
|
|
"valid_targets_min": 1853
|
|
},
|
|
{
|
|
"epoch": 2.849185946872322,
|
|
"grad_norm": 0.5052854112998219,
|
|
"learning_rate": 2.958020611639596e-05,
|
|
"loss": 0.1922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12267468869686127,
|
|
"step": 3325,
|
|
"valid_targets_mean": 6198.9,
|
|
"valid_targets_min": 4193
|
|
},
|
|
{
|
|
"epoch": 2.853470437017995,
|
|
"grad_norm": 0.5101750578437945,
|
|
"learning_rate": 2.9542674496679934e-05,
|
|
"loss": 0.1876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09153853356838226,
|
|
"step": 3330,
|
|
"valid_targets_mean": 4406.9,
|
|
"valid_targets_min": 958
|
|
},
|
|
{
|
|
"epoch": 2.8577549271636675,
|
|
"grad_norm": 0.49964993139780745,
|
|
"learning_rate": 2.9505099315877523e-05,
|
|
"loss": 0.1858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09591518342494965,
|
|
"step": 3335,
|
|
"valid_targets_mean": 5327.9,
|
|
"valid_targets_min": 2086
|
|
},
|
|
{
|
|
"epoch": 2.86203941730934,
|
|
"grad_norm": 0.4801936085794442,
|
|
"learning_rate": 2.9467480745514602e-05,
|
|
"loss": 0.1986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10366001725196838,
|
|
"step": 3340,
|
|
"valid_targets_mean": 5311.1,
|
|
"valid_targets_min": 2174
|
|
},
|
|
{
|
|
"epoch": 2.866323907455013,
|
|
"grad_norm": 0.5427472421890712,
|
|
"learning_rate": 2.9429818957315136e-05,
|
|
"loss": 0.1904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1099645346403122,
|
|
"step": 3345,
|
|
"valid_targets_mean": 4822.6,
|
|
"valid_targets_min": 2990
|
|
},
|
|
{
|
|
"epoch": 2.8706083976006855,
|
|
"grad_norm": 0.4766043311462836,
|
|
"learning_rate": 2.9392114123200364e-05,
|
|
"loss": 0.2057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10909044742584229,
|
|
"step": 3350,
|
|
"valid_targets_mean": 5208.8,
|
|
"valid_targets_min": 3584
|
|
},
|
|
{
|
|
"epoch": 2.874892887746358,
|
|
"grad_norm": 0.5119800749708363,
|
|
"learning_rate": 2.9354366415288014e-05,
|
|
"loss": 0.19,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09974499046802521,
|
|
"step": 3355,
|
|
"valid_targets_mean": 4476.0,
|
|
"valid_targets_min": 1244
|
|
},
|
|
{
|
|
"epoch": 2.879177377892031,
|
|
"grad_norm": 0.48951315745386836,
|
|
"learning_rate": 2.9316576005891542e-05,
|
|
"loss": 0.1857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08528132736682892,
|
|
"step": 3360,
|
|
"valid_targets_mean": 4576.9,
|
|
"valid_targets_min": 2041
|
|
},
|
|
{
|
|
"epoch": 2.8834618680377035,
|
|
"grad_norm": 0.49140619995629536,
|
|
"learning_rate": 2.9278743067519326e-05,
|
|
"loss": 0.1891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13168025016784668,
|
|
"step": 3365,
|
|
"valid_targets_mean": 6030.4,
|
|
"valid_targets_min": 2371
|
|
},
|
|
{
|
|
"epoch": 2.887746358183376,
|
|
"grad_norm": 0.4819206963434024,
|
|
"learning_rate": 2.9240867772873874e-05,
|
|
"loss": 0.1988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1023363322019577,
|
|
"step": 3370,
|
|
"valid_targets_mean": 4449.2,
|
|
"valid_targets_min": 3215
|
|
},
|
|
{
|
|
"epoch": 2.892030848329049,
|
|
"grad_norm": 0.5091994742881791,
|
|
"learning_rate": 2.920295029485106e-05,
|
|
"loss": 0.1927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10762618482112885,
|
|
"step": 3375,
|
|
"valid_targets_mean": 5931.4,
|
|
"valid_targets_min": 2190
|
|
},
|
|
{
|
|
"epoch": 2.8963153384747216,
|
|
"grad_norm": 0.45100342040875835,
|
|
"learning_rate": 2.916499080653931e-05,
|
|
"loss": 0.1751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10775713622570038,
|
|
"step": 3380,
|
|
"valid_targets_mean": 5796.4,
|
|
"valid_targets_min": 1079
|
|
},
|
|
{
|
|
"epoch": 2.9005998286203942,
|
|
"grad_norm": 0.4190962708075657,
|
|
"learning_rate": 2.9126989481218825e-05,
|
|
"loss": 0.2045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08602378517389297,
|
|
"step": 3385,
|
|
"valid_targets_mean": 5787.2,
|
|
"valid_targets_min": 1951
|
|
},
|
|
{
|
|
"epoch": 2.904884318766067,
|
|
"grad_norm": 0.5697440698751479,
|
|
"learning_rate": 2.9088946492360785e-05,
|
|
"loss": 0.2074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1274511069059372,
|
|
"step": 3390,
|
|
"valid_targets_mean": 4970.1,
|
|
"valid_targets_min": 1205
|
|
},
|
|
{
|
|
"epoch": 2.9091688089117396,
|
|
"grad_norm": 0.4196775106056009,
|
|
"learning_rate": 2.9050862013626564e-05,
|
|
"loss": 0.1893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0831018015742302,
|
|
"step": 3395,
|
|
"valid_targets_mean": 5513.4,
|
|
"valid_targets_min": 1654
|
|
},
|
|
{
|
|
"epoch": 2.9134532990574122,
|
|
"grad_norm": 0.5081613353431708,
|
|
"learning_rate": 2.901273621886693e-05,
|
|
"loss": 0.1797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10517710447311401,
|
|
"step": 3400,
|
|
"valid_targets_mean": 3801.2,
|
|
"valid_targets_min": 1711
|
|
},
|
|
{
|
|
"epoch": 2.917737789203085,
|
|
"grad_norm": 0.45930708882009963,
|
|
"learning_rate": 2.8974569282121234e-05,
|
|
"loss": 0.197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06961196660995483,
|
|
"step": 3405,
|
|
"valid_targets_mean": 4856.4,
|
|
"valid_targets_min": 2652
|
|
},
|
|
{
|
|
"epoch": 2.9220222793487576,
|
|
"grad_norm": 0.481696740533411,
|
|
"learning_rate": 2.893636137761667e-05,
|
|
"loss": 0.1823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09374944865703583,
|
|
"step": 3410,
|
|
"valid_targets_mean": 5881.6,
|
|
"valid_targets_min": 3134
|
|
},
|
|
{
|
|
"epoch": 2.9263067694944302,
|
|
"grad_norm": 0.4503892609201548,
|
|
"learning_rate": 2.8898112679767423e-05,
|
|
"loss": 0.1846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08698620647192001,
|
|
"step": 3415,
|
|
"valid_targets_mean": 6453.8,
|
|
"valid_targets_min": 3057
|
|
},
|
|
{
|
|
"epoch": 2.930591259640103,
|
|
"grad_norm": 0.5118223470574057,
|
|
"learning_rate": 2.8859823363173895e-05,
|
|
"loss": 0.1877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0849127247929573,
|
|
"step": 3420,
|
|
"valid_targets_mean": 5139.4,
|
|
"valid_targets_min": 1009
|
|
},
|
|
{
|
|
"epoch": 2.9348757497857756,
|
|
"grad_norm": 0.6604188562733819,
|
|
"learning_rate": 2.8821493602621905e-05,
|
|
"loss": 0.1866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0802006646990776,
|
|
"step": 3425,
|
|
"valid_targets_mean": 3835.0,
|
|
"valid_targets_min": 505
|
|
},
|
|
{
|
|
"epoch": 2.9391602399314483,
|
|
"grad_norm": 0.4628759246456681,
|
|
"learning_rate": 2.8783123573081914e-05,
|
|
"loss": 0.1881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08029916882514954,
|
|
"step": 3430,
|
|
"valid_targets_mean": 4832.9,
|
|
"valid_targets_min": 2019
|
|
},
|
|
{
|
|
"epoch": 2.943444730077121,
|
|
"grad_norm": 0.46129914147611034,
|
|
"learning_rate": 2.8744713449708177e-05,
|
|
"loss": 0.1899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10295387357473373,
|
|
"step": 3435,
|
|
"valid_targets_mean": 5271.5,
|
|
"valid_targets_min": 2478
|
|
},
|
|
{
|
|
"epoch": 2.9477292202227936,
|
|
"grad_norm": 0.441159023257155,
|
|
"learning_rate": 2.8706263407837987e-05,
|
|
"loss": 0.1923,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0948648527264595,
|
|
"step": 3440,
|
|
"valid_targets_mean": 5663.5,
|
|
"valid_targets_min": 1505
|
|
},
|
|
{
|
|
"epoch": 2.9520137103684663,
|
|
"grad_norm": 0.45438783348338496,
|
|
"learning_rate": 2.8667773622990866e-05,
|
|
"loss": 0.1908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07966434955596924,
|
|
"step": 3445,
|
|
"valid_targets_mean": 4331.8,
|
|
"valid_targets_min": 1691
|
|
},
|
|
{
|
|
"epoch": 2.956298200514139,
|
|
"grad_norm": 0.4711518580687224,
|
|
"learning_rate": 2.862924427086774e-05,
|
|
"loss": 0.1946,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08603475242853165,
|
|
"step": 3450,
|
|
"valid_targets_mean": 4567.8,
|
|
"valid_targets_min": 1840
|
|
},
|
|
{
|
|
"epoch": 2.9605826906598116,
|
|
"grad_norm": 0.47190988623418867,
|
|
"learning_rate": 2.8590675527350176e-05,
|
|
"loss": 0.1844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09044405072927475,
|
|
"step": 3455,
|
|
"valid_targets_mean": 5541.8,
|
|
"valid_targets_min": 1713
|
|
},
|
|
{
|
|
"epoch": 2.9648671808054843,
|
|
"grad_norm": 0.4720059395646641,
|
|
"learning_rate": 2.855206756849954e-05,
|
|
"loss": 0.1878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10311125218868256,
|
|
"step": 3460,
|
|
"valid_targets_mean": 6031.2,
|
|
"valid_targets_min": 3389
|
|
},
|
|
{
|
|
"epoch": 2.969151670951157,
|
|
"grad_norm": 0.4655184637568635,
|
|
"learning_rate": 2.8513420570556215e-05,
|
|
"loss": 0.2065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1196562647819519,
|
|
"step": 3465,
|
|
"valid_targets_mean": 6100.0,
|
|
"valid_targets_min": 2060
|
|
},
|
|
{
|
|
"epoch": 2.9734361610968296,
|
|
"grad_norm": 0.47509143615627447,
|
|
"learning_rate": 2.84747347099388e-05,
|
|
"loss": 0.193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10468809306621552,
|
|
"step": 3470,
|
|
"valid_targets_mean": 5777.6,
|
|
"valid_targets_min": 3849
|
|
},
|
|
{
|
|
"epoch": 2.9777206512425023,
|
|
"grad_norm": 0.498289537677379,
|
|
"learning_rate": 2.8436010163243287e-05,
|
|
"loss": 0.1943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09609770029783249,
|
|
"step": 3475,
|
|
"valid_targets_mean": 4283.6,
|
|
"valid_targets_min": 1896
|
|
},
|
|
{
|
|
"epoch": 2.982005141388175,
|
|
"grad_norm": 0.507469384352556,
|
|
"learning_rate": 2.8397247107242277e-05,
|
|
"loss": 0.1869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08562733232975006,
|
|
"step": 3480,
|
|
"valid_targets_mean": 4300.0,
|
|
"valid_targets_min": 1174
|
|
},
|
|
{
|
|
"epoch": 2.9862896315338476,
|
|
"grad_norm": 0.47299832417200927,
|
|
"learning_rate": 2.835844571888415e-05,
|
|
"loss": 0.1806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07681527733802795,
|
|
"step": 3485,
|
|
"valid_targets_mean": 4179.5,
|
|
"valid_targets_min": 2248
|
|
},
|
|
{
|
|
"epoch": 2.9905741216795203,
|
|
"grad_norm": 0.4667157029346048,
|
|
"learning_rate": 2.8319606175292268e-05,
|
|
"loss": 0.1855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09336496889591217,
|
|
"step": 3490,
|
|
"valid_targets_mean": 5837.1,
|
|
"valid_targets_min": 3571
|
|
},
|
|
{
|
|
"epoch": 2.994858611825193,
|
|
"grad_norm": 0.4634512544107229,
|
|
"learning_rate": 2.828072865376418e-05,
|
|
"loss": 0.2023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08310113102197647,
|
|
"step": 3495,
|
|
"valid_targets_mean": 5144.8,
|
|
"valid_targets_min": 2286
|
|
},
|
|
{
|
|
"epoch": 2.9991431019708656,
|
|
"grad_norm": 0.49616404871527503,
|
|
"learning_rate": 2.824181333177078e-05,
|
|
"loss": 0.1888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06787382811307907,
|
|
"step": 3500,
|
|
"valid_targets_mean": 4116.4,
|
|
"valid_targets_min": 1676
|
|
},
|
|
{
|
|
"epoch": 3.0034275921165383,
|
|
"grad_norm": 0.4456602655142074,
|
|
"learning_rate": 2.8202860386955534e-05,
|
|
"loss": 0.1831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06987389177083969,
|
|
"step": 3505,
|
|
"valid_targets_mean": 4272.9,
|
|
"valid_targets_min": 1939
|
|
},
|
|
{
|
|
"epoch": 3.007712082262211,
|
|
"grad_norm": 0.5110710630191438,
|
|
"learning_rate": 2.816386999713365e-05,
|
|
"loss": 0.1817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09876535832881927,
|
|
"step": 3510,
|
|
"valid_targets_mean": 5605.4,
|
|
"valid_targets_min": 2389
|
|
},
|
|
{
|
|
"epoch": 3.0119965724078837,
|
|
"grad_norm": 0.5107222623720482,
|
|
"learning_rate": 2.812484234029124e-05,
|
|
"loss": 0.1824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0746680349111557,
|
|
"step": 3515,
|
|
"valid_targets_mean": 4642.9,
|
|
"valid_targets_min": 2104
|
|
},
|
|
{
|
|
"epoch": 3.0162810625535563,
|
|
"grad_norm": 0.5128074506697414,
|
|
"learning_rate": 2.808577759458458e-05,
|
|
"loss": 0.1838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07372597604990005,
|
|
"step": 3520,
|
|
"valid_targets_mean": 4467.4,
|
|
"valid_targets_min": 2243
|
|
},
|
|
{
|
|
"epoch": 3.020565552699229,
|
|
"grad_norm": 0.4834848424338338,
|
|
"learning_rate": 2.8046675938339225e-05,
|
|
"loss": 0.186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08008388429880142,
|
|
"step": 3525,
|
|
"valid_targets_mean": 5414.8,
|
|
"valid_targets_min": 2880
|
|
},
|
|
{
|
|
"epoch": 3.0248500428449017,
|
|
"grad_norm": 0.4846106856666557,
|
|
"learning_rate": 2.8007537550049217e-05,
|
|
"loss": 0.1859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0926634818315506,
|
|
"step": 3530,
|
|
"valid_targets_mean": 5325.6,
|
|
"valid_targets_min": 3692
|
|
},
|
|
{
|
|
"epoch": 3.0291345329905743,
|
|
"grad_norm": 0.5380320550491385,
|
|
"learning_rate": 2.7968362608376286e-05,
|
|
"loss": 0.1786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09920945763587952,
|
|
"step": 3535,
|
|
"valid_targets_mean": 4871.8,
|
|
"valid_targets_min": 1352
|
|
},
|
|
{
|
|
"epoch": 3.033419023136247,
|
|
"grad_norm": 0.5268363683831023,
|
|
"learning_rate": 2.792915129214902e-05,
|
|
"loss": 0.1746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1121433675289154,
|
|
"step": 3540,
|
|
"valid_targets_mean": 5245.8,
|
|
"valid_targets_min": 2337
|
|
},
|
|
{
|
|
"epoch": 3.0377035132819197,
|
|
"grad_norm": 0.5235815428498671,
|
|
"learning_rate": 2.7889903780362063e-05,
|
|
"loss": 0.1797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07933693379163742,
|
|
"step": 3545,
|
|
"valid_targets_mean": 3886.6,
|
|
"valid_targets_min": 1805
|
|
},
|
|
{
|
|
"epoch": 3.041988003427592,
|
|
"grad_norm": 0.4745143364972243,
|
|
"learning_rate": 2.785062025217526e-05,
|
|
"loss": 0.1589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06564758718013763,
|
|
"step": 3550,
|
|
"valid_targets_mean": 5061.2,
|
|
"valid_targets_min": 3020
|
|
},
|
|
{
|
|
"epoch": 3.0462724935732646,
|
|
"grad_norm": 0.5901591936966151,
|
|
"learning_rate": 2.78113008869129e-05,
|
|
"loss": 0.1886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11617127805948257,
|
|
"step": 3555,
|
|
"valid_targets_mean": 4354.5,
|
|
"valid_targets_min": 2177
|
|
},
|
|
{
|
|
"epoch": 3.0505569837189372,
|
|
"grad_norm": 0.5031895790828588,
|
|
"learning_rate": 2.7771945864062847e-05,
|
|
"loss": 0.1889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09542632848024368,
|
|
"step": 3560,
|
|
"valid_targets_mean": 4467.2,
|
|
"valid_targets_min": 2041
|
|
},
|
|
{
|
|
"epoch": 3.05484147386461,
|
|
"grad_norm": 0.5067028918137597,
|
|
"learning_rate": 2.773255536327573e-05,
|
|
"loss": 0.177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1154802069067955,
|
|
"step": 3565,
|
|
"valid_targets_mean": 4982.1,
|
|
"valid_targets_min": 2672
|
|
},
|
|
{
|
|
"epoch": 3.0591259640102826,
|
|
"grad_norm": 0.5295274273062762,
|
|
"learning_rate": 2.7693129564364138e-05,
|
|
"loss": 0.188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10568901896476746,
|
|
"step": 3570,
|
|
"valid_targets_mean": 4102.6,
|
|
"valid_targets_min": 1626
|
|
},
|
|
{
|
|
"epoch": 3.0634104541559553,
|
|
"grad_norm": 0.5184377694152152,
|
|
"learning_rate": 2.7653668647301797e-05,
|
|
"loss": 0.1692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08287379145622253,
|
|
"step": 3575,
|
|
"valid_targets_mean": 5492.0,
|
|
"valid_targets_min": 2635
|
|
},
|
|
{
|
|
"epoch": 3.067694944301628,
|
|
"grad_norm": 0.4426511184640844,
|
|
"learning_rate": 2.7614172792222737e-05,
|
|
"loss": 0.1714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07739445567131042,
|
|
"step": 3580,
|
|
"valid_targets_mean": 5844.0,
|
|
"valid_targets_min": 1437
|
|
},
|
|
{
|
|
"epoch": 3.0719794344473006,
|
|
"grad_norm": 0.6163269967783526,
|
|
"learning_rate": 2.7574642179420467e-05,
|
|
"loss": 0.1796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09435909986495972,
|
|
"step": 3585,
|
|
"valid_targets_mean": 5930.0,
|
|
"valid_targets_min": 1412
|
|
},
|
|
{
|
|
"epoch": 3.0762639245929733,
|
|
"grad_norm": 0.4912971796430249,
|
|
"learning_rate": 2.7535076989347182e-05,
|
|
"loss": 0.1842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10539242625236511,
|
|
"step": 3590,
|
|
"valid_targets_mean": 5639.5,
|
|
"valid_targets_min": 3804
|
|
},
|
|
{
|
|
"epoch": 3.080548414738646,
|
|
"grad_norm": 0.49155347164096125,
|
|
"learning_rate": 2.7495477402612905e-05,
|
|
"loss": 0.1845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09726318717002869,
|
|
"step": 3595,
|
|
"valid_targets_mean": 4792.0,
|
|
"valid_targets_min": 1491
|
|
},
|
|
{
|
|
"epoch": 3.0848329048843186,
|
|
"grad_norm": 0.46870008979654276,
|
|
"learning_rate": 2.7455843599984658e-05,
|
|
"loss": 0.1808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08482338488101959,
|
|
"step": 3600,
|
|
"valid_targets_mean": 5219.8,
|
|
"valid_targets_min": 1497
|
|
},
|
|
{
|
|
"epoch": 3.0891173950299913,
|
|
"grad_norm": 0.5315024980579985,
|
|
"learning_rate": 2.7416175762385687e-05,
|
|
"loss": 0.1795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.105237677693367,
|
|
"step": 3605,
|
|
"valid_targets_mean": 5123.6,
|
|
"valid_targets_min": 3354
|
|
},
|
|
{
|
|
"epoch": 3.093401885175664,
|
|
"grad_norm": 0.5074302541744373,
|
|
"learning_rate": 2.737647407089459e-05,
|
|
"loss": 0.1838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08297023177146912,
|
|
"step": 3610,
|
|
"valid_targets_mean": 4989.2,
|
|
"valid_targets_min": 2494
|
|
},
|
|
{
|
|
"epoch": 3.0976863753213366,
|
|
"grad_norm": 0.5224620565734408,
|
|
"learning_rate": 2.7336738706744494e-05,
|
|
"loss": 0.1833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07492747157812119,
|
|
"step": 3615,
|
|
"valid_targets_mean": 4522.4,
|
|
"valid_targets_min": 964
|
|
},
|
|
{
|
|
"epoch": 3.1019708654670093,
|
|
"grad_norm": 0.44188115834719727,
|
|
"learning_rate": 2.7296969851322246e-05,
|
|
"loss": 0.1818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07636836171150208,
|
|
"step": 3620,
|
|
"valid_targets_mean": 5345.1,
|
|
"valid_targets_min": 1656
|
|
},
|
|
{
|
|
"epoch": 3.106255355612682,
|
|
"grad_norm": 0.48164647718897613,
|
|
"learning_rate": 2.7257167686167577e-05,
|
|
"loss": 0.1763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07908610999584198,
|
|
"step": 3625,
|
|
"valid_targets_mean": 4995.0,
|
|
"valid_targets_min": 1741
|
|
},
|
|
{
|
|
"epoch": 3.1105398457583546,
|
|
"grad_norm": 0.46223745921085535,
|
|
"learning_rate": 2.721733239297228e-05,
|
|
"loss": 0.1655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08104974031448364,
|
|
"step": 3630,
|
|
"valid_targets_mean": 5590.5,
|
|
"valid_targets_min": 1385
|
|
},
|
|
{
|
|
"epoch": 3.1148243359040273,
|
|
"grad_norm": 0.5550320490512014,
|
|
"learning_rate": 2.7177464153579345e-05,
|
|
"loss": 0.1748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07732957601547241,
|
|
"step": 3635,
|
|
"valid_targets_mean": 4547.8,
|
|
"valid_targets_min": 2008
|
|
},
|
|
{
|
|
"epoch": 3.1191088260497,
|
|
"grad_norm": 0.5493528497955329,
|
|
"learning_rate": 2.713756314998219e-05,
|
|
"loss": 0.1811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09979130327701569,
|
|
"step": 3640,
|
|
"valid_targets_mean": 4378.2,
|
|
"valid_targets_min": 814
|
|
},
|
|
{
|
|
"epoch": 3.1233933161953726,
|
|
"grad_norm": 0.5292553656306752,
|
|
"learning_rate": 2.7097629564323784e-05,
|
|
"loss": 0.1774,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09737001359462738,
|
|
"step": 3645,
|
|
"valid_targets_mean": 3762.6,
|
|
"valid_targets_min": 2046
|
|
},
|
|
{
|
|
"epoch": 3.1276778063410453,
|
|
"grad_norm": 0.5153566581021852,
|
|
"learning_rate": 2.705766357889582e-05,
|
|
"loss": 0.1792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08181529492139816,
|
|
"step": 3650,
|
|
"valid_targets_mean": 3764.9,
|
|
"valid_targets_min": 1832
|
|
},
|
|
{
|
|
"epoch": 3.131962296486718,
|
|
"grad_norm": 0.533212309221076,
|
|
"learning_rate": 2.701766537613791e-05,
|
|
"loss": 0.196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10756025463342667,
|
|
"step": 3655,
|
|
"valid_targets_mean": 4934.1,
|
|
"valid_targets_min": 3003
|
|
},
|
|
{
|
|
"epoch": 3.1362467866323906,
|
|
"grad_norm": 0.4730794348004389,
|
|
"learning_rate": 2.6977635138636725e-05,
|
|
"loss": 0.1764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08242323994636536,
|
|
"step": 3660,
|
|
"valid_targets_mean": 5205.4,
|
|
"valid_targets_min": 1267
|
|
},
|
|
{
|
|
"epoch": 3.1405312767780633,
|
|
"grad_norm": 0.5082206695922894,
|
|
"learning_rate": 2.693757304912516e-05,
|
|
"loss": 0.1699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08571913838386536,
|
|
"step": 3665,
|
|
"valid_targets_mean": 5586.2,
|
|
"valid_targets_min": 2777
|
|
},
|
|
{
|
|
"epoch": 3.144815766923736,
|
|
"grad_norm": 0.5194249572852877,
|
|
"learning_rate": 2.6897479290481537e-05,
|
|
"loss": 0.1679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08359972387552261,
|
|
"step": 3670,
|
|
"valid_targets_mean": 5197.6,
|
|
"valid_targets_min": 1005
|
|
},
|
|
{
|
|
"epoch": 3.1491002570694087,
|
|
"grad_norm": 0.5024393062086439,
|
|
"learning_rate": 2.685735404572871e-05,
|
|
"loss": 0.1663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07878033071756363,
|
|
"step": 3675,
|
|
"valid_targets_mean": 4835.9,
|
|
"valid_targets_min": 2000
|
|
},
|
|
{
|
|
"epoch": 3.1533847472150813,
|
|
"grad_norm": 0.4909761470748089,
|
|
"learning_rate": 2.6817197498033282e-05,
|
|
"loss": 0.19,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09674154222011566,
|
|
"step": 3680,
|
|
"valid_targets_mean": 5215.1,
|
|
"valid_targets_min": 2439
|
|
},
|
|
{
|
|
"epoch": 3.157669237360754,
|
|
"grad_norm": 0.5894183379504294,
|
|
"learning_rate": 2.6777009830704767e-05,
|
|
"loss": 0.1825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09668425470590591,
|
|
"step": 3685,
|
|
"valid_targets_mean": 3794.5,
|
|
"valid_targets_min": 1774
|
|
},
|
|
{
|
|
"epoch": 3.1619537275064267,
|
|
"grad_norm": 0.53445260586619,
|
|
"learning_rate": 2.6736791227194694e-05,
|
|
"loss": 0.1712,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07560717314481735,
|
|
"step": 3690,
|
|
"valid_targets_mean": 4421.9,
|
|
"valid_targets_min": 1630
|
|
},
|
|
{
|
|
"epoch": 3.1662382176520993,
|
|
"grad_norm": 0.4985541351909853,
|
|
"learning_rate": 2.669654187109585e-05,
|
|
"loss": 0.1775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08976177871227264,
|
|
"step": 3695,
|
|
"valid_targets_mean": 6416.4,
|
|
"valid_targets_min": 3572
|
|
},
|
|
{
|
|
"epoch": 3.170522707797772,
|
|
"grad_norm": 0.5327057742708573,
|
|
"learning_rate": 2.665626194614138e-05,
|
|
"loss": 0.1704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09565296024084091,
|
|
"step": 3700,
|
|
"valid_targets_mean": 6072.0,
|
|
"valid_targets_min": 3138
|
|
},
|
|
{
|
|
"epoch": 3.1748071979434447,
|
|
"grad_norm": 0.5530367088343197,
|
|
"learning_rate": 2.6615951636203983e-05,
|
|
"loss": 0.1732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09422317147254944,
|
|
"step": 3705,
|
|
"valid_targets_mean": 3588.1,
|
|
"valid_targets_min": 1818
|
|
},
|
|
{
|
|
"epoch": 3.1790916880891174,
|
|
"grad_norm": 0.4790691676535246,
|
|
"learning_rate": 2.657561112529506e-05,
|
|
"loss": 0.1675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09259481728076935,
|
|
"step": 3710,
|
|
"valid_targets_mean": 5763.2,
|
|
"valid_targets_min": 3117
|
|
},
|
|
{
|
|
"epoch": 3.18337617823479,
|
|
"grad_norm": 0.49104439000892763,
|
|
"learning_rate": 2.653524059756387e-05,
|
|
"loss": 0.1854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1050562709569931,
|
|
"step": 3715,
|
|
"valid_targets_mean": 5120.1,
|
|
"valid_targets_min": 1274
|
|
},
|
|
{
|
|
"epoch": 3.1876606683804627,
|
|
"grad_norm": 0.5244108830562542,
|
|
"learning_rate": 2.649484023729671e-05,
|
|
"loss": 0.1726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10317362844944,
|
|
"step": 3720,
|
|
"valid_targets_mean": 4561.6,
|
|
"valid_targets_min": 1123
|
|
},
|
|
{
|
|
"epoch": 3.1919451585261354,
|
|
"grad_norm": 0.5382322149811951,
|
|
"learning_rate": 2.6454410228916037e-05,
|
|
"loss": 0.1956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07304810732603073,
|
|
"step": 3725,
|
|
"valid_targets_mean": 4988.5,
|
|
"valid_targets_min": 2542
|
|
},
|
|
{
|
|
"epoch": 3.196229648671808,
|
|
"grad_norm": 0.480710934454029,
|
|
"learning_rate": 2.6413950756979662e-05,
|
|
"loss": 0.1747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07215426862239838,
|
|
"step": 3730,
|
|
"valid_targets_mean": 5174.0,
|
|
"valid_targets_min": 3332
|
|
},
|
|
{
|
|
"epoch": 3.2005141388174807,
|
|
"grad_norm": 0.48834989667916534,
|
|
"learning_rate": 2.6373462006179895e-05,
|
|
"loss": 0.1809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09519222378730774,
|
|
"step": 3735,
|
|
"valid_targets_mean": 6658.1,
|
|
"valid_targets_min": 2450
|
|
},
|
|
{
|
|
"epoch": 3.2047986289631534,
|
|
"grad_norm": 0.49688774009942777,
|
|
"learning_rate": 2.6332944161342697e-05,
|
|
"loss": 0.1747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09624415636062622,
|
|
"step": 3740,
|
|
"valid_targets_mean": 6799.4,
|
|
"valid_targets_min": 3828
|
|
},
|
|
{
|
|
"epoch": 3.209083119108826,
|
|
"grad_norm": 0.4506399758467214,
|
|
"learning_rate": 2.6292397407426826e-05,
|
|
"loss": 0.1681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07406134903430939,
|
|
"step": 3745,
|
|
"valid_targets_mean": 5605.9,
|
|
"valid_targets_min": 3460
|
|
},
|
|
{
|
|
"epoch": 3.2133676092544987,
|
|
"grad_norm": 0.5409584270562771,
|
|
"learning_rate": 2.625182192952303e-05,
|
|
"loss": 0.1894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08308728039264679,
|
|
"step": 3750,
|
|
"valid_targets_mean": 3723.2,
|
|
"valid_targets_min": 1233
|
|
},
|
|
{
|
|
"epoch": 3.2176520994001714,
|
|
"grad_norm": 0.5487106876988163,
|
|
"learning_rate": 2.621121791285316e-05,
|
|
"loss": 0.1919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0809471383690834,
|
|
"step": 3755,
|
|
"valid_targets_mean": 3911.8,
|
|
"valid_targets_min": 2035
|
|
},
|
|
{
|
|
"epoch": 3.221936589545844,
|
|
"grad_norm": 0.4873880044416427,
|
|
"learning_rate": 2.6170585542769353e-05,
|
|
"loss": 0.1752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08481766283512115,
|
|
"step": 3760,
|
|
"valid_targets_mean": 4059.0,
|
|
"valid_targets_min": 1602
|
|
},
|
|
{
|
|
"epoch": 3.2262210796915167,
|
|
"grad_norm": 0.5884861590780336,
|
|
"learning_rate": 2.6129925004753173e-05,
|
|
"loss": 0.1796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10550187528133392,
|
|
"step": 3765,
|
|
"valid_targets_mean": 4256.6,
|
|
"valid_targets_min": 2465
|
|
},
|
|
{
|
|
"epoch": 3.2305055698371894,
|
|
"grad_norm": 0.4904994688232888,
|
|
"learning_rate": 2.6089236484414767e-05,
|
|
"loss": 0.1715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08721703290939331,
|
|
"step": 3770,
|
|
"valid_targets_mean": 6815.4,
|
|
"valid_targets_min": 2390
|
|
},
|
|
{
|
|
"epoch": 3.234790059982862,
|
|
"grad_norm": 0.49949775985853045,
|
|
"learning_rate": 2.604852016749201e-05,
|
|
"loss": 0.1707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08349231630563736,
|
|
"step": 3775,
|
|
"valid_targets_mean": 5104.8,
|
|
"valid_targets_min": 3264
|
|
},
|
|
{
|
|
"epoch": 3.2390745501285347,
|
|
"grad_norm": 0.49538203709639544,
|
|
"learning_rate": 2.6007776239849678e-05,
|
|
"loss": 0.1804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08115263283252716,
|
|
"step": 3780,
|
|
"valid_targets_mean": 5585.4,
|
|
"valid_targets_min": 2132
|
|
},
|
|
{
|
|
"epoch": 3.2433590402742074,
|
|
"grad_norm": 0.4743043011897055,
|
|
"learning_rate": 2.5967004887478582e-05,
|
|
"loss": 0.1625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07015199214220047,
|
|
"step": 3785,
|
|
"valid_targets_mean": 5265.1,
|
|
"valid_targets_min": 1307
|
|
},
|
|
{
|
|
"epoch": 3.24764353041988,
|
|
"grad_norm": 0.4774324868777757,
|
|
"learning_rate": 2.592620629649472e-05,
|
|
"loss": 0.1781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08552783727645874,
|
|
"step": 3790,
|
|
"valid_targets_mean": 5696.9,
|
|
"valid_targets_min": 2495
|
|
},
|
|
{
|
|
"epoch": 3.2519280205655527,
|
|
"grad_norm": 0.47726321679965944,
|
|
"learning_rate": 2.588538065313842e-05,
|
|
"loss": 0.1754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09820352494716644,
|
|
"step": 3795,
|
|
"valid_targets_mean": 6080.5,
|
|
"valid_targets_min": 3405
|
|
},
|
|
{
|
|
"epoch": 3.2562125107112254,
|
|
"grad_norm": 0.47184919744601855,
|
|
"learning_rate": 2.5844528143773536e-05,
|
|
"loss": 0.1779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08200445771217346,
|
|
"step": 3800,
|
|
"valid_targets_mean": 5325.4,
|
|
"valid_targets_min": 2574
|
|
},
|
|
{
|
|
"epoch": 3.260497000856898,
|
|
"grad_norm": 0.5480095421683794,
|
|
"learning_rate": 2.5803648954886526e-05,
|
|
"loss": 0.1775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07922104001045227,
|
|
"step": 3805,
|
|
"valid_targets_mean": 4847.6,
|
|
"valid_targets_min": 1832
|
|
},
|
|
{
|
|
"epoch": 3.2647814910025708,
|
|
"grad_norm": 0.4563494451988351,
|
|
"learning_rate": 2.5762743273085642e-05,
|
|
"loss": 0.1853,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07653181999921799,
|
|
"step": 3810,
|
|
"valid_targets_mean": 5683.8,
|
|
"valid_targets_min": 2964
|
|
},
|
|
{
|
|
"epoch": 3.2690659811482434,
|
|
"grad_norm": 0.5664265801776952,
|
|
"learning_rate": 2.5721811285100094e-05,
|
|
"loss": 0.165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08541962504386902,
|
|
"step": 3815,
|
|
"valid_targets_mean": 4073.6,
|
|
"valid_targets_min": 992
|
|
},
|
|
{
|
|
"epoch": 3.273350471293916,
|
|
"grad_norm": 0.506078224016259,
|
|
"learning_rate": 2.5680853177779156e-05,
|
|
"loss": 0.1874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11393250524997711,
|
|
"step": 3820,
|
|
"valid_targets_mean": 6029.9,
|
|
"valid_targets_min": 1457
|
|
},
|
|
{
|
|
"epoch": 3.2776349614395888,
|
|
"grad_norm": 0.494273020347569,
|
|
"learning_rate": 2.563986913809134e-05,
|
|
"loss": 0.1664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06963160634040833,
|
|
"step": 3825,
|
|
"valid_targets_mean": 4817.8,
|
|
"valid_targets_min": 1059
|
|
},
|
|
{
|
|
"epoch": 3.2819194515852614,
|
|
"grad_norm": 0.4941550710602732,
|
|
"learning_rate": 2.5598859353123544e-05,
|
|
"loss": 0.2005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10108497738838196,
|
|
"step": 3830,
|
|
"valid_targets_mean": 4969.1,
|
|
"valid_targets_min": 937
|
|
},
|
|
{
|
|
"epoch": 3.286203941730934,
|
|
"grad_norm": 0.44118797019685824,
|
|
"learning_rate": 2.5557824010080167e-05,
|
|
"loss": 0.1788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07035285234451294,
|
|
"step": 3835,
|
|
"valid_targets_mean": 5435.5,
|
|
"valid_targets_min": 2621
|
|
},
|
|
{
|
|
"epoch": 3.290488431876607,
|
|
"grad_norm": 0.535660164763162,
|
|
"learning_rate": 2.5516763296282304e-05,
|
|
"loss": 0.1721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08199736475944519,
|
|
"step": 3840,
|
|
"valid_targets_mean": 5702.1,
|
|
"valid_targets_min": 2302
|
|
},
|
|
{
|
|
"epoch": 3.2947729220222794,
|
|
"grad_norm": 0.5406302387494281,
|
|
"learning_rate": 2.5475677399166846e-05,
|
|
"loss": 0.1626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08630575984716415,
|
|
"step": 3845,
|
|
"valid_targets_mean": 5694.1,
|
|
"valid_targets_min": 1559
|
|
},
|
|
{
|
|
"epoch": 3.299057412167952,
|
|
"grad_norm": 0.5368623601343584,
|
|
"learning_rate": 2.5434566506285652e-05,
|
|
"loss": 0.1781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0820312350988388,
|
|
"step": 3850,
|
|
"valid_targets_mean": 4866.5,
|
|
"valid_targets_min": 2259
|
|
},
|
|
{
|
|
"epoch": 3.303341902313625,
|
|
"grad_norm": 0.42391830685772136,
|
|
"learning_rate": 2.5393430805304684e-05,
|
|
"loss": 0.1656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07458534836769104,
|
|
"step": 3855,
|
|
"valid_targets_mean": 6219.2,
|
|
"valid_targets_min": 4199
|
|
},
|
|
{
|
|
"epoch": 3.3076263924592975,
|
|
"grad_norm": 0.46723573532937834,
|
|
"learning_rate": 2.5352270484003136e-05,
|
|
"loss": 0.1719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06834311783313751,
|
|
"step": 3860,
|
|
"valid_targets_mean": 4890.8,
|
|
"valid_targets_min": 1499
|
|
},
|
|
{
|
|
"epoch": 3.31191088260497,
|
|
"grad_norm": 0.5042670913751731,
|
|
"learning_rate": 2.5311085730272615e-05,
|
|
"loss": 0.1635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08892189711332321,
|
|
"step": 3865,
|
|
"valid_targets_mean": 5347.4,
|
|
"valid_targets_min": 3761
|
|
},
|
|
{
|
|
"epoch": 3.316195372750643,
|
|
"grad_norm": 0.5146453940010698,
|
|
"learning_rate": 2.5269876732116232e-05,
|
|
"loss": 0.1805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08683112263679504,
|
|
"step": 3870,
|
|
"valid_targets_mean": 4728.5,
|
|
"valid_targets_min": 1387
|
|
},
|
|
{
|
|
"epoch": 3.3204798628963155,
|
|
"grad_norm": 0.5179841404789549,
|
|
"learning_rate": 2.522864367764779e-05,
|
|
"loss": 0.1802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0970543771982193,
|
|
"step": 3875,
|
|
"valid_targets_mean": 5566.1,
|
|
"valid_targets_min": 1279
|
|
},
|
|
{
|
|
"epoch": 3.324764353041988,
|
|
"grad_norm": 0.5586468052716015,
|
|
"learning_rate": 2.5187386755090896e-05,
|
|
"loss": 0.1941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10074944794178009,
|
|
"step": 3880,
|
|
"valid_targets_mean": 4139.8,
|
|
"valid_targets_min": 1357
|
|
},
|
|
{
|
|
"epoch": 3.329048843187661,
|
|
"grad_norm": 0.5151397548165649,
|
|
"learning_rate": 2.5146106152778123e-05,
|
|
"loss": 0.1783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08319426327943802,
|
|
"step": 3885,
|
|
"valid_targets_mean": 5424.5,
|
|
"valid_targets_min": 2426
|
|
},
|
|
{
|
|
"epoch": 3.3333333333333335,
|
|
"grad_norm": 0.4720092194356925,
|
|
"learning_rate": 2.5104802059150116e-05,
|
|
"loss": 0.1792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07055439800024033,
|
|
"step": 3890,
|
|
"valid_targets_mean": 5038.0,
|
|
"valid_targets_min": 1381
|
|
},
|
|
{
|
|
"epoch": 3.337617823479006,
|
|
"grad_norm": 0.5657582007478067,
|
|
"learning_rate": 2.506347466275479e-05,
|
|
"loss": 0.1696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09392206370830536,
|
|
"step": 3895,
|
|
"valid_targets_mean": 3641.5,
|
|
"valid_targets_min": 810
|
|
},
|
|
{
|
|
"epoch": 3.341902313624679,
|
|
"grad_norm": 0.46332448819773575,
|
|
"learning_rate": 2.5022124152246408e-05,
|
|
"loss": 0.1659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08185388147830963,
|
|
"step": 3900,
|
|
"valid_targets_mean": 5194.2,
|
|
"valid_targets_min": 1295
|
|
},
|
|
{
|
|
"epoch": 3.3461868037703515,
|
|
"grad_norm": 0.5276021301125949,
|
|
"learning_rate": 2.4980750716384742e-05,
|
|
"loss": 0.1797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11507555842399597,
|
|
"step": 3905,
|
|
"valid_targets_mean": 5921.0,
|
|
"valid_targets_min": 3081
|
|
},
|
|
{
|
|
"epoch": 3.350471293916024,
|
|
"grad_norm": 0.5243579270007013,
|
|
"learning_rate": 2.4939354544034227e-05,
|
|
"loss": 0.1826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11897942423820496,
|
|
"step": 3910,
|
|
"valid_targets_mean": 5501.2,
|
|
"valid_targets_min": 2765
|
|
},
|
|
{
|
|
"epoch": 3.354755784061697,
|
|
"grad_norm": 0.4397898226348528,
|
|
"learning_rate": 2.48979358241631e-05,
|
|
"loss": 0.1744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08484429121017456,
|
|
"step": 3915,
|
|
"valid_targets_mean": 6601.0,
|
|
"valid_targets_min": 4025
|
|
},
|
|
{
|
|
"epoch": 3.3590402742073695,
|
|
"grad_norm": 0.48471277085423436,
|
|
"learning_rate": 2.4856494745842494e-05,
|
|
"loss": 0.1789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09149546921253204,
|
|
"step": 3920,
|
|
"valid_targets_mean": 5834.6,
|
|
"valid_targets_min": 2135
|
|
},
|
|
{
|
|
"epoch": 3.363324764353042,
|
|
"grad_norm": 0.640251930186407,
|
|
"learning_rate": 2.481503149824563e-05,
|
|
"loss": 0.1724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09592543542385101,
|
|
"step": 3925,
|
|
"valid_targets_mean": 5464.4,
|
|
"valid_targets_min": 2672
|
|
},
|
|
{
|
|
"epoch": 3.367609254498715,
|
|
"grad_norm": 0.7077370709133005,
|
|
"learning_rate": 2.4773546270646924e-05,
|
|
"loss": 0.184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08654675632715225,
|
|
"step": 3930,
|
|
"valid_targets_mean": 3764.9,
|
|
"valid_targets_min": 1305
|
|
},
|
|
{
|
|
"epoch": 3.371893744644387,
|
|
"grad_norm": 0.5667813121104889,
|
|
"learning_rate": 2.4732039252421124e-05,
|
|
"loss": 0.1802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10520567744970322,
|
|
"step": 3935,
|
|
"valid_targets_mean": 4461.2,
|
|
"valid_targets_min": 1390
|
|
},
|
|
{
|
|
"epoch": 3.3761782347900597,
|
|
"grad_norm": 0.5242448683415644,
|
|
"learning_rate": 2.4690510633042434e-05,
|
|
"loss": 0.1713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08278533816337585,
|
|
"step": 3940,
|
|
"valid_targets_mean": 4752.5,
|
|
"valid_targets_min": 2392
|
|
},
|
|
{
|
|
"epoch": 3.3804627249357324,
|
|
"grad_norm": 0.5049433307182706,
|
|
"learning_rate": 2.46489606020837e-05,
|
|
"loss": 0.1803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08539880812168121,
|
|
"step": 3945,
|
|
"valid_targets_mean": 4561.8,
|
|
"valid_targets_min": 1711
|
|
},
|
|
{
|
|
"epoch": 3.384747215081405,
|
|
"grad_norm": 0.5227073441087726,
|
|
"learning_rate": 2.4607389349215486e-05,
|
|
"loss": 0.1808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09328305721282959,
|
|
"step": 3950,
|
|
"valid_targets_mean": 5030.6,
|
|
"valid_targets_min": 2456
|
|
},
|
|
{
|
|
"epoch": 3.3890317052270778,
|
|
"grad_norm": 0.5292754389388692,
|
|
"learning_rate": 2.456579706420522e-05,
|
|
"loss": 0.1782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0860356017947197,
|
|
"step": 3955,
|
|
"valid_targets_mean": 4216.2,
|
|
"valid_targets_min": 2060
|
|
},
|
|
{
|
|
"epoch": 3.3933161953727504,
|
|
"grad_norm": 0.4865013112852646,
|
|
"learning_rate": 2.4524183936916375e-05,
|
|
"loss": 0.1804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10694333910942078,
|
|
"step": 3960,
|
|
"valid_targets_mean": 5631.4,
|
|
"valid_targets_min": 3043
|
|
},
|
|
{
|
|
"epoch": 3.397600685518423,
|
|
"grad_norm": 0.44020607224044916,
|
|
"learning_rate": 2.4482550157307535e-05,
|
|
"loss": 0.1648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07197045534849167,
|
|
"step": 3965,
|
|
"valid_targets_mean": 5408.6,
|
|
"valid_targets_min": 3985
|
|
},
|
|
{
|
|
"epoch": 3.4018851756640958,
|
|
"grad_norm": 0.5010961069712251,
|
|
"learning_rate": 2.4440895915431564e-05,
|
|
"loss": 0.1839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07832053303718567,
|
|
"step": 3970,
|
|
"valid_targets_mean": 4119.0,
|
|
"valid_targets_min": 1678
|
|
},
|
|
{
|
|
"epoch": 3.4061696658097684,
|
|
"grad_norm": 0.460386431994602,
|
|
"learning_rate": 2.4399221401434744e-05,
|
|
"loss": 0.1733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08432082831859589,
|
|
"step": 3975,
|
|
"valid_targets_mean": 6040.0,
|
|
"valid_targets_min": 2483
|
|
},
|
|
{
|
|
"epoch": 3.410454155955441,
|
|
"grad_norm": 0.46018138510892553,
|
|
"learning_rate": 2.4357526805555898e-05,
|
|
"loss": 0.1659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07811066508293152,
|
|
"step": 3980,
|
|
"valid_targets_mean": 6039.2,
|
|
"valid_targets_min": 2097
|
|
},
|
|
{
|
|
"epoch": 3.4147386461011138,
|
|
"grad_norm": 0.44313847646961696,
|
|
"learning_rate": 2.431581231812551e-05,
|
|
"loss": 0.1797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08324681967496872,
|
|
"step": 3985,
|
|
"valid_targets_mean": 6093.0,
|
|
"valid_targets_min": 1795
|
|
},
|
|
{
|
|
"epoch": 3.4190231362467864,
|
|
"grad_norm": 0.49476268172484367,
|
|
"learning_rate": 2.4274078129564865e-05,
|
|
"loss": 0.1642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08022307604551315,
|
|
"step": 3990,
|
|
"valid_targets_mean": 4712.8,
|
|
"valid_targets_min": 2631
|
|
},
|
|
{
|
|
"epoch": 3.423307626392459,
|
|
"grad_norm": 0.5552227552355836,
|
|
"learning_rate": 2.4232324430385186e-05,
|
|
"loss": 0.1876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1327175348997116,
|
|
"step": 3995,
|
|
"valid_targets_mean": 4551.5,
|
|
"valid_targets_min": 1184
|
|
},
|
|
{
|
|
"epoch": 3.427592116538132,
|
|
"grad_norm": 0.5185839985435154,
|
|
"learning_rate": 2.4190551411186772e-05,
|
|
"loss": 0.1896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08617275208234787,
|
|
"step": 4000,
|
|
"valid_targets_mean": 4747.9,
|
|
"valid_targets_min": 1219
|
|
},
|
|
{
|
|
"epoch": 3.4318766066838045,
|
|
"grad_norm": 0.4709991819622267,
|
|
"learning_rate": 2.414875926265809e-05,
|
|
"loss": 0.1724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07815124839544296,
|
|
"step": 4005,
|
|
"valid_targets_mean": 5581.9,
|
|
"valid_targets_min": 2606
|
|
},
|
|
{
|
|
"epoch": 3.436161096829477,
|
|
"grad_norm": 0.48407163673156856,
|
|
"learning_rate": 2.410694817557495e-05,
|
|
"loss": 0.1822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1021660715341568,
|
|
"step": 4010,
|
|
"valid_targets_mean": 5507.0,
|
|
"valid_targets_min": 2411
|
|
},
|
|
{
|
|
"epoch": 3.44044558697515,
|
|
"grad_norm": 0.5232737652771369,
|
|
"learning_rate": 2.406511834079961e-05,
|
|
"loss": 0.1653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07770241796970367,
|
|
"step": 4015,
|
|
"valid_targets_mean": 4428.8,
|
|
"valid_targets_min": 996
|
|
},
|
|
{
|
|
"epoch": 3.4447300771208225,
|
|
"grad_norm": 0.5393197148717728,
|
|
"learning_rate": 2.4023269949279896e-05,
|
|
"loss": 0.1895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10861536860466003,
|
|
"step": 4020,
|
|
"valid_targets_mean": 5138.0,
|
|
"valid_targets_min": 854
|
|
},
|
|
{
|
|
"epoch": 3.449014567266495,
|
|
"grad_norm": 0.4959778784971376,
|
|
"learning_rate": 2.398140319204836e-05,
|
|
"loss": 0.1577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0765412300825119,
|
|
"step": 4025,
|
|
"valid_targets_mean": 4376.9,
|
|
"valid_targets_min": 2131
|
|
},
|
|
{
|
|
"epoch": 3.453299057412168,
|
|
"grad_norm": 0.44539828071435855,
|
|
"learning_rate": 2.393951826022139e-05,
|
|
"loss": 0.1772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0800006315112114,
|
|
"step": 4030,
|
|
"valid_targets_mean": 6357.0,
|
|
"valid_targets_min": 2336
|
|
},
|
|
{
|
|
"epoch": 3.4575835475578405,
|
|
"grad_norm": 0.5009575823756433,
|
|
"learning_rate": 2.3897615344998316e-05,
|
|
"loss": 0.1785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0751371756196022,
|
|
"step": 4035,
|
|
"valid_targets_mean": 4151.1,
|
|
"valid_targets_min": 1054
|
|
},
|
|
{
|
|
"epoch": 3.461868037703513,
|
|
"grad_norm": 0.5659099414288352,
|
|
"learning_rate": 2.38556946376606e-05,
|
|
"loss": 0.1919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08233688771724701,
|
|
"step": 4040,
|
|
"valid_targets_mean": 4852.5,
|
|
"valid_targets_min": 3043
|
|
},
|
|
{
|
|
"epoch": 3.466152527849186,
|
|
"grad_norm": 0.48964486978436755,
|
|
"learning_rate": 2.381375632957088e-05,
|
|
"loss": 0.1773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09233547747135162,
|
|
"step": 4045,
|
|
"valid_targets_mean": 6505.9,
|
|
"valid_targets_min": 1066
|
|
},
|
|
{
|
|
"epoch": 3.4704370179948585,
|
|
"grad_norm": 0.6366684492336043,
|
|
"learning_rate": 2.377180061217217e-05,
|
|
"loss": 0.1809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10365661233663559,
|
|
"step": 4050,
|
|
"valid_targets_mean": 5608.6,
|
|
"valid_targets_min": 2391
|
|
},
|
|
{
|
|
"epoch": 3.474721508140531,
|
|
"grad_norm": 0.5178869224071108,
|
|
"learning_rate": 2.372982767698694e-05,
|
|
"loss": 0.1813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09731529653072357,
|
|
"step": 4055,
|
|
"valid_targets_mean": 5259.2,
|
|
"valid_targets_min": 3468
|
|
},
|
|
{
|
|
"epoch": 3.479005998286204,
|
|
"grad_norm": 0.4979000344639998,
|
|
"learning_rate": 2.3687837715616265e-05,
|
|
"loss": 0.1836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09686636924743652,
|
|
"step": 4060,
|
|
"valid_targets_mean": 5191.6,
|
|
"valid_targets_min": 2305
|
|
},
|
|
{
|
|
"epoch": 3.4832904884318765,
|
|
"grad_norm": 0.4673954502395025,
|
|
"learning_rate": 2.3645830919738934e-05,
|
|
"loss": 0.1615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06930539011955261,
|
|
"step": 4065,
|
|
"valid_targets_mean": 5102.1,
|
|
"valid_targets_min": 1840
|
|
},
|
|
{
|
|
"epoch": 3.487574978577549,
|
|
"grad_norm": 0.4603399923475969,
|
|
"learning_rate": 2.3603807481110582e-05,
|
|
"loss": 0.1849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07954011112451553,
|
|
"step": 4070,
|
|
"valid_targets_mean": 4994.6,
|
|
"valid_targets_min": 2915
|
|
},
|
|
{
|
|
"epoch": 3.491859468723222,
|
|
"grad_norm": 0.4598435838646931,
|
|
"learning_rate": 2.3561767591562838e-05,
|
|
"loss": 0.1576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07119995355606079,
|
|
"step": 4075,
|
|
"valid_targets_mean": 5935.2,
|
|
"valid_targets_min": 3691
|
|
},
|
|
{
|
|
"epoch": 3.4961439588688945,
|
|
"grad_norm": 0.518348370256928,
|
|
"learning_rate": 2.3519711443002403e-05,
|
|
"loss": 0.1743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08827964216470718,
|
|
"step": 4080,
|
|
"valid_targets_mean": 5031.9,
|
|
"valid_targets_min": 1483
|
|
},
|
|
{
|
|
"epoch": 3.500428449014567,
|
|
"grad_norm": 0.45898367296651266,
|
|
"learning_rate": 2.3477639227410203e-05,
|
|
"loss": 0.1619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07615832984447479,
|
|
"step": 4085,
|
|
"valid_targets_mean": 4248.0,
|
|
"valid_targets_min": 1181
|
|
},
|
|
{
|
|
"epoch": 3.50471293916024,
|
|
"grad_norm": 0.5048500740270604,
|
|
"learning_rate": 2.3435551136840516e-05,
|
|
"loss": 0.1776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08330333232879639,
|
|
"step": 4090,
|
|
"valid_targets_mean": 4459.8,
|
|
"valid_targets_min": 2015
|
|
},
|
|
{
|
|
"epoch": 3.5089974293059125,
|
|
"grad_norm": 0.5215915294355933,
|
|
"learning_rate": 2.3393447363420086e-05,
|
|
"loss": 0.1722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1037408709526062,
|
|
"step": 4095,
|
|
"valid_targets_mean": 5052.2,
|
|
"valid_targets_min": 3269
|
|
},
|
|
{
|
|
"epoch": 3.513281919451585,
|
|
"grad_norm": 0.4962196427584831,
|
|
"learning_rate": 2.335132809934723e-05,
|
|
"loss": 0.1865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06860017776489258,
|
|
"step": 4100,
|
|
"valid_targets_mean": 4654.9,
|
|
"valid_targets_min": 1920
|
|
},
|
|
{
|
|
"epoch": 3.517566409597258,
|
|
"grad_norm": 0.489981385343179,
|
|
"learning_rate": 2.3309193536891005e-05,
|
|
"loss": 0.1755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08288254588842392,
|
|
"step": 4105,
|
|
"valid_targets_mean": 5823.2,
|
|
"valid_targets_min": 1714
|
|
},
|
|
{
|
|
"epoch": 3.5218508997429305,
|
|
"grad_norm": 0.656955220664152,
|
|
"learning_rate": 2.3267043868390294e-05,
|
|
"loss": 0.1635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08202258497476578,
|
|
"step": 4110,
|
|
"valid_targets_mean": 4970.4,
|
|
"valid_targets_min": 2605
|
|
},
|
|
{
|
|
"epoch": 3.526135389888603,
|
|
"grad_norm": 0.5037309614923813,
|
|
"learning_rate": 2.3224879286252912e-05,
|
|
"loss": 0.1812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1043175458908081,
|
|
"step": 4115,
|
|
"valid_targets_mean": 5029.6,
|
|
"valid_targets_min": 2077
|
|
},
|
|
{
|
|
"epoch": 3.530419880034276,
|
|
"grad_norm": 0.5175047493676703,
|
|
"learning_rate": 2.3182699982954798e-05,
|
|
"loss": 0.174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07466578483581543,
|
|
"step": 4120,
|
|
"valid_targets_mean": 4483.4,
|
|
"valid_targets_min": 2113
|
|
},
|
|
{
|
|
"epoch": 3.5347043701799485,
|
|
"grad_norm": 0.47635693359402775,
|
|
"learning_rate": 2.3140506151039056e-05,
|
|
"loss": 0.1746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09035299718379974,
|
|
"step": 4125,
|
|
"valid_targets_mean": 5929.0,
|
|
"valid_targets_min": 3412
|
|
},
|
|
{
|
|
"epoch": 3.538988860325621,
|
|
"grad_norm": 0.4565464264321229,
|
|
"learning_rate": 2.309829798311512e-05,
|
|
"loss": 0.1598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0775737538933754,
|
|
"step": 4130,
|
|
"valid_targets_mean": 5772.6,
|
|
"valid_targets_min": 1234
|
|
},
|
|
{
|
|
"epoch": 3.543273350471294,
|
|
"grad_norm": 0.46993348068850305,
|
|
"learning_rate": 2.3056075671857873e-05,
|
|
"loss": 0.1655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08100612461566925,
|
|
"step": 4135,
|
|
"valid_targets_mean": 4891.2,
|
|
"valid_targets_min": 1548
|
|
},
|
|
{
|
|
"epoch": 3.5475578406169666,
|
|
"grad_norm": 0.5042039449397279,
|
|
"learning_rate": 2.3013839410006752e-05,
|
|
"loss": 0.1701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10945829749107361,
|
|
"step": 4140,
|
|
"valid_targets_mean": 4754.5,
|
|
"valid_targets_min": 1194
|
|
},
|
|
{
|
|
"epoch": 3.551842330762639,
|
|
"grad_norm": 0.5219428536509176,
|
|
"learning_rate": 2.2971589390364883e-05,
|
|
"loss": 0.1886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08812487125396729,
|
|
"step": 4145,
|
|
"valid_targets_mean": 4773.4,
|
|
"valid_targets_min": 2528
|
|
},
|
|
{
|
|
"epoch": 3.556126820908312,
|
|
"grad_norm": 0.4595032270725784,
|
|
"learning_rate": 2.2929325805798193e-05,
|
|
"loss": 0.1767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09251895546913147,
|
|
"step": 4150,
|
|
"valid_targets_mean": 5433.2,
|
|
"valid_targets_min": 2510
|
|
},
|
|
{
|
|
"epoch": 3.5604113110539846,
|
|
"grad_norm": 0.592306139102451,
|
|
"learning_rate": 2.288704884923454e-05,
|
|
"loss": 0.1787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09715113043785095,
|
|
"step": 4155,
|
|
"valid_targets_mean": 4493.5,
|
|
"valid_targets_min": 229
|
|
},
|
|
{
|
|
"epoch": 3.5646958011996572,
|
|
"grad_norm": 0.4670380137898987,
|
|
"learning_rate": 2.2844758713662792e-05,
|
|
"loss": 0.1692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07353829592466354,
|
|
"step": 4160,
|
|
"valid_targets_mean": 6750.5,
|
|
"valid_targets_min": 2466
|
|
},
|
|
{
|
|
"epoch": 3.56898029134533,
|
|
"grad_norm": 0.5225853631870525,
|
|
"learning_rate": 2.2802455592132016e-05,
|
|
"loss": 0.1733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0953027755022049,
|
|
"step": 4165,
|
|
"valid_targets_mean": 5718.2,
|
|
"valid_targets_min": 3231
|
|
},
|
|
{
|
|
"epoch": 3.5732647814910026,
|
|
"grad_norm": 0.4638223398798671,
|
|
"learning_rate": 2.2760139677750535e-05,
|
|
"loss": 0.1776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09025826305150986,
|
|
"step": 4170,
|
|
"valid_targets_mean": 5769.9,
|
|
"valid_targets_min": 2480
|
|
},
|
|
{
|
|
"epoch": 3.5775492716366752,
|
|
"grad_norm": 0.5579887756037205,
|
|
"learning_rate": 2.2717811163685083e-05,
|
|
"loss": 0.1679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07685058563947678,
|
|
"step": 4175,
|
|
"valid_targets_mean": 3841.8,
|
|
"valid_targets_min": 908
|
|
},
|
|
{
|
|
"epoch": 3.581833761782348,
|
|
"grad_norm": 0.4948096539474111,
|
|
"learning_rate": 2.267547024315989e-05,
|
|
"loss": 0.1773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08701218664646149,
|
|
"step": 4180,
|
|
"valid_targets_mean": 5586.2,
|
|
"valid_targets_min": 2833
|
|
},
|
|
{
|
|
"epoch": 3.5861182519280206,
|
|
"grad_norm": 0.4838736011672868,
|
|
"learning_rate": 2.2633117109455842e-05,
|
|
"loss": 0.1777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09145478904247284,
|
|
"step": 4185,
|
|
"valid_targets_mean": 6145.4,
|
|
"valid_targets_min": 2143
|
|
},
|
|
{
|
|
"epoch": 3.5904027420736933,
|
|
"grad_norm": 0.45606871426482026,
|
|
"learning_rate": 2.259075195590957e-05,
|
|
"loss": 0.172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07948420941829681,
|
|
"step": 4190,
|
|
"valid_targets_mean": 5401.1,
|
|
"valid_targets_min": 2076
|
|
},
|
|
{
|
|
"epoch": 3.594687232219366,
|
|
"grad_norm": 0.5025562342810775,
|
|
"learning_rate": 2.2548374975912566e-05,
|
|
"loss": 0.1775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07993195205926895,
|
|
"step": 4195,
|
|
"valid_targets_mean": 4961.0,
|
|
"valid_targets_min": 1661
|
|
},
|
|
{
|
|
"epoch": 3.5989717223650386,
|
|
"grad_norm": 0.505770776087417,
|
|
"learning_rate": 2.250598636291032e-05,
|
|
"loss": 0.172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08299499750137329,
|
|
"step": 4200,
|
|
"valid_targets_mean": 4817.9,
|
|
"valid_targets_min": 1747
|
|
},
|
|
{
|
|
"epoch": 3.6032562125107113,
|
|
"grad_norm": 0.4954913757966641,
|
|
"learning_rate": 2.2463586310401415e-05,
|
|
"loss": 0.1718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08894240856170654,
|
|
"step": 4205,
|
|
"valid_targets_mean": 4411.9,
|
|
"valid_targets_min": 1142
|
|
},
|
|
{
|
|
"epoch": 3.607540702656384,
|
|
"grad_norm": 0.5418588144667075,
|
|
"learning_rate": 2.242117501193666e-05,
|
|
"loss": 0.1776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11070725321769714,
|
|
"step": 4210,
|
|
"valid_targets_mean": 5323.1,
|
|
"valid_targets_min": 1367
|
|
},
|
|
{
|
|
"epoch": 3.6118251928020566,
|
|
"grad_norm": 0.531263626146356,
|
|
"learning_rate": 2.2378752661118196e-05,
|
|
"loss": 0.1848,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08583134412765503,
|
|
"step": 4215,
|
|
"valid_targets_mean": 4969.5,
|
|
"valid_targets_min": 1849
|
|
},
|
|
{
|
|
"epoch": 3.6161096829477293,
|
|
"grad_norm": 0.5612122432997662,
|
|
"learning_rate": 2.233631945159862e-05,
|
|
"loss": 0.1825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08974435925483704,
|
|
"step": 4220,
|
|
"valid_targets_mean": 4273.5,
|
|
"valid_targets_min": 2028
|
|
},
|
|
{
|
|
"epoch": 3.620394173093402,
|
|
"grad_norm": 0.48142060702080514,
|
|
"learning_rate": 2.2293875577080096e-05,
|
|
"loss": 0.1814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07660025358200073,
|
|
"step": 4225,
|
|
"valid_targets_mean": 3994.6,
|
|
"valid_targets_min": 953
|
|
},
|
|
{
|
|
"epoch": 3.6246786632390746,
|
|
"grad_norm": 0.4848066208026919,
|
|
"learning_rate": 2.2251421231313476e-05,
|
|
"loss": 0.2024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07478651404380798,
|
|
"step": 4230,
|
|
"valid_targets_mean": 5517.0,
|
|
"valid_targets_min": 3239
|
|
},
|
|
{
|
|
"epoch": 3.6289631533847473,
|
|
"grad_norm": 0.49899057473509,
|
|
"learning_rate": 2.2208956608097404e-05,
|
|
"loss": 0.1648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06353067606687546,
|
|
"step": 4235,
|
|
"valid_targets_mean": 5502.9,
|
|
"valid_targets_min": 2124
|
|
},
|
|
{
|
|
"epoch": 3.63324764353042,
|
|
"grad_norm": 0.5813828678411174,
|
|
"learning_rate": 2.2166481901277445e-05,
|
|
"loss": 0.1771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1088082492351532,
|
|
"step": 4240,
|
|
"valid_targets_mean": 3780.8,
|
|
"valid_targets_min": 1875
|
|
},
|
|
{
|
|
"epoch": 3.6375321336760926,
|
|
"grad_norm": 0.4817039168502182,
|
|
"learning_rate": 2.2123997304745197e-05,
|
|
"loss": 0.1733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09596549719572067,
|
|
"step": 4245,
|
|
"valid_targets_mean": 4660.6,
|
|
"valid_targets_min": 2130
|
|
},
|
|
{
|
|
"epoch": 3.6418166238217653,
|
|
"grad_norm": 0.4792499434827214,
|
|
"learning_rate": 2.20815030124374e-05,
|
|
"loss": 0.1983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08193084597587585,
|
|
"step": 4250,
|
|
"valid_targets_mean": 4666.0,
|
|
"valid_targets_min": 2073
|
|
},
|
|
{
|
|
"epoch": 3.646101113967438,
|
|
"grad_norm": 0.5304247034774922,
|
|
"learning_rate": 2.2038999218335052e-05,
|
|
"loss": 0.1756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11344555020332336,
|
|
"step": 4255,
|
|
"valid_targets_mean": 4205.0,
|
|
"valid_targets_min": 1741
|
|
},
|
|
{
|
|
"epoch": 3.6503856041131106,
|
|
"grad_norm": 0.561201430702449,
|
|
"learning_rate": 2.1996486116462518e-05,
|
|
"loss": 0.1944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10015340894460678,
|
|
"step": 4260,
|
|
"valid_targets_mean": 4303.4,
|
|
"valid_targets_min": 976
|
|
},
|
|
{
|
|
"epoch": 3.6546700942587833,
|
|
"grad_norm": 0.5025165588665476,
|
|
"learning_rate": 2.1953963900886678e-05,
|
|
"loss": 0.1778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0987139567732811,
|
|
"step": 4265,
|
|
"valid_targets_mean": 6982.6,
|
|
"valid_targets_min": 3682
|
|
},
|
|
{
|
|
"epoch": 3.658954584404456,
|
|
"grad_norm": 0.4734958218505094,
|
|
"learning_rate": 2.1911432765715984e-05,
|
|
"loss": 0.1903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09380441159009933,
|
|
"step": 4270,
|
|
"valid_targets_mean": 4911.9,
|
|
"valid_targets_min": 2569
|
|
},
|
|
{
|
|
"epoch": 3.6632390745501286,
|
|
"grad_norm": 0.47972395955451463,
|
|
"learning_rate": 2.186889290509963e-05,
|
|
"loss": 0.1784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08934134244918823,
|
|
"step": 4275,
|
|
"valid_targets_mean": 5794.8,
|
|
"valid_targets_min": 1794
|
|
},
|
|
{
|
|
"epoch": 3.6675235646958013,
|
|
"grad_norm": 0.5054146734195408,
|
|
"learning_rate": 2.1826344513226612e-05,
|
|
"loss": 0.1784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0747838243842125,
|
|
"step": 4280,
|
|
"valid_targets_mean": 4473.8,
|
|
"valid_targets_min": 1046
|
|
},
|
|
{
|
|
"epoch": 3.671808054841474,
|
|
"grad_norm": 0.48848277806211415,
|
|
"learning_rate": 2.178378778432491e-05,
|
|
"loss": 0.1772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07787490636110306,
|
|
"step": 4285,
|
|
"valid_targets_mean": 5002.2,
|
|
"valid_targets_min": 2578
|
|
},
|
|
{
|
|
"epoch": 3.6760925449871467,
|
|
"grad_norm": 0.5569673045876135,
|
|
"learning_rate": 2.174122291266052e-05,
|
|
"loss": 0.183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10725502669811249,
|
|
"step": 4290,
|
|
"valid_targets_mean": 5099.5,
|
|
"valid_targets_min": 2687
|
|
},
|
|
{
|
|
"epoch": 3.6803770351328193,
|
|
"grad_norm": 0.5166032053971452,
|
|
"learning_rate": 2.169865009253663e-05,
|
|
"loss": 0.1731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09015043079853058,
|
|
"step": 4295,
|
|
"valid_targets_mean": 5599.1,
|
|
"valid_targets_min": 1040
|
|
},
|
|
{
|
|
"epoch": 3.684661525278492,
|
|
"grad_norm": 0.5709661002249621,
|
|
"learning_rate": 2.1656069518292724e-05,
|
|
"loss": 0.1724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10100851207971573,
|
|
"step": 4300,
|
|
"valid_targets_mean": 4439.0,
|
|
"valid_targets_min": 1128
|
|
},
|
|
{
|
|
"epoch": 3.6889460154241647,
|
|
"grad_norm": 0.543802259685694,
|
|
"learning_rate": 2.161348138430365e-05,
|
|
"loss": 0.1658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0801519826054573,
|
|
"step": 4305,
|
|
"valid_targets_mean": 4878.2,
|
|
"valid_targets_min": 1294
|
|
},
|
|
{
|
|
"epoch": 3.6932305055698373,
|
|
"grad_norm": 0.5570095437587615,
|
|
"learning_rate": 2.157088588497879e-05,
|
|
"loss": 0.1794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08337944746017456,
|
|
"step": 4310,
|
|
"valid_targets_mean": 3964.8,
|
|
"valid_targets_min": 533
|
|
},
|
|
{
|
|
"epoch": 3.69751499571551,
|
|
"grad_norm": 0.453897962207074,
|
|
"learning_rate": 2.1528283214761143e-05,
|
|
"loss": 0.1721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0844380259513855,
|
|
"step": 4315,
|
|
"valid_targets_mean": 5618.8,
|
|
"valid_targets_min": 2713
|
|
},
|
|
{
|
|
"epoch": 3.7017994858611827,
|
|
"grad_norm": 0.5264838129574774,
|
|
"learning_rate": 2.148567356812644e-05,
|
|
"loss": 0.1788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09211684763431549,
|
|
"step": 4320,
|
|
"valid_targets_mean": 4739.2,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 3.7060839760068554,
|
|
"grad_norm": 0.5416369851840257,
|
|
"learning_rate": 2.1443057139582248e-05,
|
|
"loss": 0.1636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07441074401140213,
|
|
"step": 4325,
|
|
"valid_targets_mean": 4180.2,
|
|
"valid_targets_min": 1716
|
|
},
|
|
{
|
|
"epoch": 3.710368466152528,
|
|
"grad_norm": 0.5233842699548993,
|
|
"learning_rate": 2.140043412366711e-05,
|
|
"loss": 0.1653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09138879179954529,
|
|
"step": 4330,
|
|
"valid_targets_mean": 5340.5,
|
|
"valid_targets_min": 2420
|
|
},
|
|
{
|
|
"epoch": 3.7146529562982007,
|
|
"grad_norm": 0.5027330097368046,
|
|
"learning_rate": 2.135780471494964e-05,
|
|
"loss": 0.1731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08350911736488342,
|
|
"step": 4335,
|
|
"valid_targets_mean": 5366.6,
|
|
"valid_targets_min": 1678
|
|
},
|
|
{
|
|
"epoch": 3.7189374464438734,
|
|
"grad_norm": 0.47526774562585344,
|
|
"learning_rate": 2.1315169108027618e-05,
|
|
"loss": 0.1812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07638335227966309,
|
|
"step": 4340,
|
|
"valid_targets_mean": 5456.4,
|
|
"valid_targets_min": 1939
|
|
},
|
|
{
|
|
"epoch": 3.723221936589546,
|
|
"grad_norm": 0.5042053368330688,
|
|
"learning_rate": 2.127252749752713e-05,
|
|
"loss": 0.1792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09122496843338013,
|
|
"step": 4345,
|
|
"valid_targets_mean": 5834.4,
|
|
"valid_targets_min": 1449
|
|
},
|
|
{
|
|
"epoch": 3.7275064267352187,
|
|
"grad_norm": 0.5289307926314227,
|
|
"learning_rate": 2.122988007810166e-05,
|
|
"loss": 0.1838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09580802172422409,
|
|
"step": 4350,
|
|
"valid_targets_mean": 5320.8,
|
|
"valid_targets_min": 3011
|
|
},
|
|
{
|
|
"epoch": 3.7317909168808914,
|
|
"grad_norm": 0.4576457670282547,
|
|
"learning_rate": 2.1187227044431218e-05,
|
|
"loss": 0.168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05943462625145912,
|
|
"step": 4355,
|
|
"valid_targets_mean": 4858.8,
|
|
"valid_targets_min": 1578
|
|
},
|
|
{
|
|
"epoch": 3.736075407026564,
|
|
"grad_norm": 0.45540481367302393,
|
|
"learning_rate": 2.1144568591221435e-05,
|
|
"loss": 0.1806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08127421140670776,
|
|
"step": 4360,
|
|
"valid_targets_mean": 5734.1,
|
|
"valid_targets_min": 2680
|
|
},
|
|
{
|
|
"epoch": 3.7403598971722367,
|
|
"grad_norm": 0.5362612179693383,
|
|
"learning_rate": 2.1101904913202683e-05,
|
|
"loss": 0.1809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11337935924530029,
|
|
"step": 4365,
|
|
"valid_targets_mean": 5713.0,
|
|
"valid_targets_min": 3480
|
|
},
|
|
{
|
|
"epoch": 3.7446443873179094,
|
|
"grad_norm": 0.4704003263016836,
|
|
"learning_rate": 2.1059236205129193e-05,
|
|
"loss": 0.1832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08135171979665756,
|
|
"step": 4370,
|
|
"valid_targets_mean": 6026.6,
|
|
"valid_targets_min": 3211
|
|
},
|
|
{
|
|
"epoch": 3.748928877463582,
|
|
"grad_norm": 0.5097814181014321,
|
|
"learning_rate": 2.101656266177814e-05,
|
|
"loss": 0.1795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07321173697710037,
|
|
"step": 4375,
|
|
"valid_targets_mean": 4055.1,
|
|
"valid_targets_min": 1062
|
|
},
|
|
{
|
|
"epoch": 3.7532133676092547,
|
|
"grad_norm": 0.5058831165511345,
|
|
"learning_rate": 2.0973884477948788e-05,
|
|
"loss": 0.1721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07657093554735184,
|
|
"step": 4380,
|
|
"valid_targets_mean": 5092.8,
|
|
"valid_targets_min": 2508
|
|
},
|
|
{
|
|
"epoch": 3.7574978577549274,
|
|
"grad_norm": 0.48377529739196745,
|
|
"learning_rate": 2.093120184846158e-05,
|
|
"loss": 0.1797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07396461069583893,
|
|
"step": 4385,
|
|
"valid_targets_mean": 4718.1,
|
|
"valid_targets_min": 3365
|
|
},
|
|
{
|
|
"epoch": 3.7617823479006,
|
|
"grad_norm": 0.5858845859525321,
|
|
"learning_rate": 2.0888514968157242e-05,
|
|
"loss": 0.1799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11814798414707184,
|
|
"step": 4390,
|
|
"valid_targets_mean": 5390.4,
|
|
"valid_targets_min": 3563
|
|
},
|
|
{
|
|
"epoch": 3.7660668380462727,
|
|
"grad_norm": 0.5009081909970391,
|
|
"learning_rate": 2.0845824031895922e-05,
|
|
"loss": 0.188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07869275659322739,
|
|
"step": 4395,
|
|
"valid_targets_mean": 5265.8,
|
|
"valid_targets_min": 2004
|
|
},
|
|
{
|
|
"epoch": 3.7703513281919454,
|
|
"grad_norm": 0.46579194546844566,
|
|
"learning_rate": 2.080312923455627e-05,
|
|
"loss": 0.1735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06215429678559303,
|
|
"step": 4400,
|
|
"valid_targets_mean": 5621.1,
|
|
"valid_targets_min": 2765
|
|
},
|
|
{
|
|
"epoch": 3.774635818337618,
|
|
"grad_norm": 0.4811822376668833,
|
|
"learning_rate": 2.0760430771034567e-05,
|
|
"loss": 0.1743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07595115900039673,
|
|
"step": 4405,
|
|
"valid_targets_mean": 5120.1,
|
|
"valid_targets_min": 1484
|
|
},
|
|
{
|
|
"epoch": 3.7789203084832907,
|
|
"grad_norm": 0.5625193799505521,
|
|
"learning_rate": 2.0717728836243828e-05,
|
|
"loss": 0.1593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07109188288450241,
|
|
"step": 4410,
|
|
"valid_targets_mean": 4732.2,
|
|
"valid_targets_min": 2687
|
|
},
|
|
{
|
|
"epoch": 3.7832047986289634,
|
|
"grad_norm": 0.5103516926344571,
|
|
"learning_rate": 2.0675023625112908e-05,
|
|
"loss": 0.169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10274773091077805,
|
|
"step": 4415,
|
|
"valid_targets_mean": 5381.0,
|
|
"valid_targets_min": 3303
|
|
},
|
|
{
|
|
"epoch": 3.787489288774636,
|
|
"grad_norm": 0.5353559159650585,
|
|
"learning_rate": 2.063231533258563e-05,
|
|
"loss": 0.1804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09648248553276062,
|
|
"step": 4420,
|
|
"valid_targets_mean": 4760.2,
|
|
"valid_targets_min": 2482
|
|
},
|
|
{
|
|
"epoch": 3.7917737789203088,
|
|
"grad_norm": 0.5362956724018828,
|
|
"learning_rate": 2.058960415361988e-05,
|
|
"loss": 0.1747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0884857177734375,
|
|
"step": 4425,
|
|
"valid_targets_mean": 4564.9,
|
|
"valid_targets_min": 2484
|
|
},
|
|
{
|
|
"epoch": 3.796058269065981,
|
|
"grad_norm": 0.6503106392647596,
|
|
"learning_rate": 2.0546890283186717e-05,
|
|
"loss": 0.1554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10990839451551437,
|
|
"step": 4430,
|
|
"valid_targets_mean": 3900.4,
|
|
"valid_targets_min": 2609
|
|
},
|
|
{
|
|
"epoch": 3.8003427592116537,
|
|
"grad_norm": 0.5916435853895476,
|
|
"learning_rate": 2.050417391626948e-05,
|
|
"loss": 0.1708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08456003665924072,
|
|
"step": 4435,
|
|
"valid_targets_mean": 7036.9,
|
|
"valid_targets_min": 1318
|
|
},
|
|
{
|
|
"epoch": 3.8046272493573263,
|
|
"grad_norm": 0.5036276852885103,
|
|
"learning_rate": 2.0461455247862912e-05,
|
|
"loss": 0.1604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06545372307300568,
|
|
"step": 4440,
|
|
"valid_targets_mean": 4662.1,
|
|
"valid_targets_min": 505
|
|
},
|
|
{
|
|
"epoch": 3.808911739502999,
|
|
"grad_norm": 0.44765849535218993,
|
|
"learning_rate": 2.0418734472972262e-05,
|
|
"loss": 0.1662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06430330872535706,
|
|
"step": 4445,
|
|
"valid_targets_mean": 4303.9,
|
|
"valid_targets_min": 1532
|
|
},
|
|
{
|
|
"epoch": 3.8131962296486717,
|
|
"grad_norm": 0.5222825963604962,
|
|
"learning_rate": 2.0376011786612396e-05,
|
|
"loss": 0.1775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08797872811555862,
|
|
"step": 4450,
|
|
"valid_targets_mean": 4519.4,
|
|
"valid_targets_min": 2156
|
|
},
|
|
{
|
|
"epoch": 3.8174807197943443,
|
|
"grad_norm": 0.49236298315659555,
|
|
"learning_rate": 2.0333287383806907e-05,
|
|
"loss": 0.1911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08118836581707001,
|
|
"step": 4455,
|
|
"valid_targets_mean": 4663.1,
|
|
"valid_targets_min": 2595
|
|
},
|
|
{
|
|
"epoch": 3.821765209940017,
|
|
"grad_norm": 0.4782325683501973,
|
|
"learning_rate": 2.029056145958721e-05,
|
|
"loss": 0.1635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08120326697826385,
|
|
"step": 4460,
|
|
"valid_targets_mean": 6560.8,
|
|
"valid_targets_min": 3023
|
|
},
|
|
{
|
|
"epoch": 3.8260497000856897,
|
|
"grad_norm": 0.473132007473985,
|
|
"learning_rate": 2.0247834208991684e-05,
|
|
"loss": 0.172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07763845473527908,
|
|
"step": 4465,
|
|
"valid_targets_mean": 5648.8,
|
|
"valid_targets_min": 2787
|
|
},
|
|
{
|
|
"epoch": 3.8303341902313623,
|
|
"grad_norm": 0.5139351501622985,
|
|
"learning_rate": 2.0205105827064736e-05,
|
|
"loss": 0.169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09254143387079239,
|
|
"step": 4470,
|
|
"valid_targets_mean": 5020.5,
|
|
"valid_targets_min": 2034
|
|
},
|
|
{
|
|
"epoch": 3.834618680377035,
|
|
"grad_norm": 0.5778317488055335,
|
|
"learning_rate": 2.0162376508855978e-05,
|
|
"loss": 0.1785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09841796010732651,
|
|
"step": 4475,
|
|
"valid_targets_mean": 4625.0,
|
|
"valid_targets_min": 1509
|
|
},
|
|
{
|
|
"epoch": 3.8389031705227077,
|
|
"grad_norm": 0.48508807741509924,
|
|
"learning_rate": 2.0119646449419265e-05,
|
|
"loss": 0.1636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09713976830244064,
|
|
"step": 4480,
|
|
"valid_targets_mean": 5486.1,
|
|
"valid_targets_min": 1497
|
|
},
|
|
{
|
|
"epoch": 3.8431876606683804,
|
|
"grad_norm": 0.5020491833115472,
|
|
"learning_rate": 2.0076915843811835e-05,
|
|
"loss": 0.1842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09118911623954773,
|
|
"step": 4485,
|
|
"valid_targets_mean": 4502.4,
|
|
"valid_targets_min": 1301
|
|
},
|
|
{
|
|
"epoch": 3.847472150814053,
|
|
"grad_norm": 0.4907616526259011,
|
|
"learning_rate": 2.0034184887093433e-05,
|
|
"loss": 0.179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09922203421592712,
|
|
"step": 4490,
|
|
"valid_targets_mean": 5388.1,
|
|
"valid_targets_min": 3170
|
|
},
|
|
{
|
|
"epoch": 3.8517566409597257,
|
|
"grad_norm": 0.4565291844177587,
|
|
"learning_rate": 1.9991453774325402e-05,
|
|
"loss": 0.1648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10273808240890503,
|
|
"step": 4495,
|
|
"valid_targets_mean": 6592.6,
|
|
"valid_targets_min": 2004
|
|
},
|
|
{
|
|
"epoch": 3.8560411311053984,
|
|
"grad_norm": 0.5732545604455834,
|
|
"learning_rate": 1.9948722700569795e-05,
|
|
"loss": 0.199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10075698792934418,
|
|
"step": 4500,
|
|
"valid_targets_mean": 3010.5,
|
|
"valid_targets_min": 845
|
|
},
|
|
{
|
|
"epoch": 3.860325621251071,
|
|
"grad_norm": 0.4860731335191765,
|
|
"learning_rate": 1.9905991860888496e-05,
|
|
"loss": 0.1768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08558526635169983,
|
|
"step": 4505,
|
|
"valid_targets_mean": 5031.2,
|
|
"valid_targets_min": 3410
|
|
},
|
|
{
|
|
"epoch": 3.8646101113967437,
|
|
"grad_norm": 0.5088911251158567,
|
|
"learning_rate": 1.9863261450342305e-05,
|
|
"loss": 0.1856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06963960826396942,
|
|
"step": 4510,
|
|
"valid_targets_mean": 5028.4,
|
|
"valid_targets_min": 1772
|
|
},
|
|
{
|
|
"epoch": 3.8688946015424164,
|
|
"grad_norm": 0.4879977625126342,
|
|
"learning_rate": 1.9820531663990065e-05,
|
|
"loss": 0.1732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07305698841810226,
|
|
"step": 4515,
|
|
"valid_targets_mean": 5932.9,
|
|
"valid_targets_min": 1210
|
|
},
|
|
{
|
|
"epoch": 3.873179091688089,
|
|
"grad_norm": 0.5372417512132406,
|
|
"learning_rate": 1.9777802696887783e-05,
|
|
"loss": 0.1888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10021747648715973,
|
|
"step": 4520,
|
|
"valid_targets_mean": 5266.9,
|
|
"valid_targets_min": 2822
|
|
},
|
|
{
|
|
"epoch": 3.8774635818337617,
|
|
"grad_norm": 0.4610327967140201,
|
|
"learning_rate": 1.9735074744087733e-05,
|
|
"loss": 0.1678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12863047420978546,
|
|
"step": 4525,
|
|
"valid_targets_mean": 7164.8,
|
|
"valid_targets_min": 4651
|
|
},
|
|
{
|
|
"epoch": 3.8817480719794344,
|
|
"grad_norm": 0.5019450406903498,
|
|
"learning_rate": 1.9692348000637527e-05,
|
|
"loss": 0.1657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07446804642677307,
|
|
"step": 4530,
|
|
"valid_targets_mean": 5320.9,
|
|
"valid_targets_min": 3110
|
|
},
|
|
{
|
|
"epoch": 3.886032562125107,
|
|
"grad_norm": 0.5112673713240492,
|
|
"learning_rate": 1.9649622661579274e-05,
|
|
"loss": 0.1591,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09042879939079285,
|
|
"step": 4535,
|
|
"valid_targets_mean": 5653.9,
|
|
"valid_targets_min": 1091
|
|
},
|
|
{
|
|
"epoch": 3.8903170522707797,
|
|
"grad_norm": 0.5369515631810194,
|
|
"learning_rate": 1.9606898921948683e-05,
|
|
"loss": 0.1977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0947985053062439,
|
|
"step": 4540,
|
|
"valid_targets_mean": 4051.1,
|
|
"valid_targets_min": 2694
|
|
},
|
|
{
|
|
"epoch": 3.8946015424164524,
|
|
"grad_norm": 0.5552773847133019,
|
|
"learning_rate": 1.956417697677414e-05,
|
|
"loss": 0.1771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08706317096948624,
|
|
"step": 4545,
|
|
"valid_targets_mean": 4993.9,
|
|
"valid_targets_min": 3037
|
|
},
|
|
{
|
|
"epoch": 3.898886032562125,
|
|
"grad_norm": 0.4915528438499653,
|
|
"learning_rate": 1.9521457021075855e-05,
|
|
"loss": 0.177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08254234492778778,
|
|
"step": 4550,
|
|
"valid_targets_mean": 5879.4,
|
|
"valid_targets_min": 3345
|
|
},
|
|
{
|
|
"epoch": 3.9031705227077977,
|
|
"grad_norm": 0.5160133103456404,
|
|
"learning_rate": 1.9478739249864962e-05,
|
|
"loss": 0.1644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10425172001123428,
|
|
"step": 4555,
|
|
"valid_targets_mean": 4621.1,
|
|
"valid_targets_min": 1198
|
|
},
|
|
{
|
|
"epoch": 3.9074550128534704,
|
|
"grad_norm": 0.5000307732972977,
|
|
"learning_rate": 1.9436023858142602e-05,
|
|
"loss": 0.1655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0853274017572403,
|
|
"step": 4560,
|
|
"valid_targets_mean": 4777.2,
|
|
"valid_targets_min": 1683
|
|
},
|
|
{
|
|
"epoch": 3.911739502999143,
|
|
"grad_norm": 0.5068195746979115,
|
|
"learning_rate": 1.9393311040899067e-05,
|
|
"loss": 0.1847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07205282896757126,
|
|
"step": 4565,
|
|
"valid_targets_mean": 5539.8,
|
|
"valid_targets_min": 3387
|
|
},
|
|
{
|
|
"epoch": 3.9160239931448158,
|
|
"grad_norm": 0.4267534546807796,
|
|
"learning_rate": 1.93506009931129e-05,
|
|
"loss": 0.1688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06260967999696732,
|
|
"step": 4570,
|
|
"valid_targets_mean": 5994.8,
|
|
"valid_targets_min": 4569
|
|
},
|
|
{
|
|
"epoch": 3.9203084832904884,
|
|
"grad_norm": 0.5252719123556089,
|
|
"learning_rate": 1.9307893909749994e-05,
|
|
"loss": 0.1819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09602794051170349,
|
|
"step": 4575,
|
|
"valid_targets_mean": 4688.0,
|
|
"valid_targets_min": 3086
|
|
},
|
|
{
|
|
"epoch": 3.924592973436161,
|
|
"grad_norm": 0.537363202650325,
|
|
"learning_rate": 1.9265189985762703e-05,
|
|
"loss": 0.1627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07901383191347122,
|
|
"step": 4580,
|
|
"valid_targets_mean": 4856.1,
|
|
"valid_targets_min": 2446
|
|
},
|
|
{
|
|
"epoch": 3.9288774635818338,
|
|
"grad_norm": 0.4719462842604403,
|
|
"learning_rate": 1.9222489416088975e-05,
|
|
"loss": 0.1709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07746100425720215,
|
|
"step": 4585,
|
|
"valid_targets_mean": 6851.2,
|
|
"valid_targets_min": 5211
|
|
},
|
|
{
|
|
"epoch": 3.9331619537275064,
|
|
"grad_norm": 0.5353923548310513,
|
|
"learning_rate": 1.917979239565145e-05,
|
|
"loss": 0.1799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08081245422363281,
|
|
"step": 4590,
|
|
"valid_targets_mean": 4776.4,
|
|
"valid_targets_min": 1753
|
|
},
|
|
{
|
|
"epoch": 3.937446443873179,
|
|
"grad_norm": 0.5404284267622435,
|
|
"learning_rate": 1.9137099119356545e-05,
|
|
"loss": 0.1665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07720431685447693,
|
|
"step": 4595,
|
|
"valid_targets_mean": 5211.1,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 3.9417309340188518,
|
|
"grad_norm": 0.5867772663723523,
|
|
"learning_rate": 1.9094409782093597e-05,
|
|
"loss": 0.1825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0957307517528534,
|
|
"step": 4600,
|
|
"valid_targets_mean": 3721.1,
|
|
"valid_targets_min": 1406
|
|
},
|
|
{
|
|
"epoch": 3.9460154241645244,
|
|
"grad_norm": 0.5145554246504417,
|
|
"learning_rate": 1.9051724578733962e-05,
|
|
"loss": 0.177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09897803515195847,
|
|
"step": 4605,
|
|
"valid_targets_mean": 6185.1,
|
|
"valid_targets_min": 2822
|
|
},
|
|
{
|
|
"epoch": 3.950299914310197,
|
|
"grad_norm": 0.49010810880745426,
|
|
"learning_rate": 1.9009043704130122e-05,
|
|
"loss": 0.1732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09428513050079346,
|
|
"step": 4610,
|
|
"valid_targets_mean": 5483.1,
|
|
"valid_targets_min": 2530
|
|
},
|
|
{
|
|
"epoch": 3.95458440445587,
|
|
"grad_norm": 0.4733695336444945,
|
|
"learning_rate": 1.8966367353114807e-05,
|
|
"loss": 0.1837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08150522410869598,
|
|
"step": 4615,
|
|
"valid_targets_mean": 5887.5,
|
|
"valid_targets_min": 2650
|
|
},
|
|
{
|
|
"epoch": 3.9588688946015425,
|
|
"grad_norm": 0.5422990185050677,
|
|
"learning_rate": 1.892369572050008e-05,
|
|
"loss": 0.1826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09762519598007202,
|
|
"step": 4620,
|
|
"valid_targets_mean": 3828.2,
|
|
"valid_targets_min": 1364
|
|
},
|
|
{
|
|
"epoch": 3.963153384747215,
|
|
"grad_norm": 0.48852927765073745,
|
|
"learning_rate": 1.888102900107649e-05,
|
|
"loss": 0.18,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10224743187427521,
|
|
"step": 4625,
|
|
"valid_targets_mean": 5479.5,
|
|
"valid_targets_min": 904
|
|
},
|
|
{
|
|
"epoch": 3.967437874892888,
|
|
"grad_norm": 0.5723550383231459,
|
|
"learning_rate": 1.8838367389612132e-05,
|
|
"loss": 0.1719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10078905522823334,
|
|
"step": 4630,
|
|
"valid_targets_mean": 4667.5,
|
|
"valid_targets_min": 2156
|
|
},
|
|
{
|
|
"epoch": 3.9717223650385605,
|
|
"grad_norm": 0.5944746392231771,
|
|
"learning_rate": 1.879571108085181e-05,
|
|
"loss": 0.1886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09629759192466736,
|
|
"step": 4635,
|
|
"valid_targets_mean": 4048.5,
|
|
"valid_targets_min": 1777
|
|
},
|
|
{
|
|
"epoch": 3.976006855184233,
|
|
"grad_norm": 0.5235308428167299,
|
|
"learning_rate": 1.8753060269516098e-05,
|
|
"loss": 0.1819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08603832125663757,
|
|
"step": 4640,
|
|
"valid_targets_mean": 5105.2,
|
|
"valid_targets_min": 1457
|
|
},
|
|
{
|
|
"epoch": 3.980291345329906,
|
|
"grad_norm": 0.6014671147278671,
|
|
"learning_rate": 1.8710415150300485e-05,
|
|
"loss": 0.179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12909947335720062,
|
|
"step": 4645,
|
|
"valid_targets_mean": 5040.0,
|
|
"valid_targets_min": 1988
|
|
},
|
|
{
|
|
"epoch": 3.9845758354755785,
|
|
"grad_norm": 0.5570437745107412,
|
|
"learning_rate": 1.866777591787448e-05,
|
|
"loss": 0.1714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0958973839879036,
|
|
"step": 4650,
|
|
"valid_targets_mean": 4275.1,
|
|
"valid_targets_min": 3053
|
|
},
|
|
{
|
|
"epoch": 3.988860325621251,
|
|
"grad_norm": 0.4838053085641309,
|
|
"learning_rate": 1.8625142766880707e-05,
|
|
"loss": 0.1724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07679547369480133,
|
|
"step": 4655,
|
|
"valid_targets_mean": 4272.4,
|
|
"valid_targets_min": 1110
|
|
},
|
|
{
|
|
"epoch": 3.993144815766924,
|
|
"grad_norm": 0.4560089306445956,
|
|
"learning_rate": 1.8582515891934053e-05,
|
|
"loss": 0.1695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07818187773227692,
|
|
"step": 4660,
|
|
"valid_targets_mean": 5043.8,
|
|
"valid_targets_min": 2346
|
|
},
|
|
{
|
|
"epoch": 3.9974293059125965,
|
|
"grad_norm": 0.4658705104546062,
|
|
"learning_rate": 1.8539895487620728e-05,
|
|
"loss": 0.1775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07729354500770569,
|
|
"step": 4665,
|
|
"valid_targets_mean": 5242.0,
|
|
"valid_targets_min": 3391
|
|
},
|
|
{
|
|
"epoch": 4.001713796058269,
|
|
"grad_norm": 0.5114580276556692,
|
|
"learning_rate": 1.8497281748497423e-05,
|
|
"loss": 0.162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09246338158845901,
|
|
"step": 4670,
|
|
"valid_targets_mean": 5042.1,
|
|
"valid_targets_min": 2457
|
|
},
|
|
{
|
|
"epoch": 4.005998286203941,
|
|
"grad_norm": 0.5675860095297649,
|
|
"learning_rate": 1.8454674869090394e-05,
|
|
"loss": 0.1668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07740327715873718,
|
|
"step": 4675,
|
|
"valid_targets_mean": 4260.1,
|
|
"valid_targets_min": 1793
|
|
},
|
|
{
|
|
"epoch": 4.010282776349614,
|
|
"grad_norm": 0.4395254253260001,
|
|
"learning_rate": 1.8412075043894587e-05,
|
|
"loss": 0.1521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05301671475172043,
|
|
"step": 4680,
|
|
"valid_targets_mean": 6729.9,
|
|
"valid_targets_min": 1457
|
|
},
|
|
{
|
|
"epoch": 4.014567266495287,
|
|
"grad_norm": 0.5305603780007552,
|
|
"learning_rate": 1.8369482467372748e-05,
|
|
"loss": 0.1507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0686296597123146,
|
|
"step": 4685,
|
|
"valid_targets_mean": 4777.6,
|
|
"valid_targets_min": 2383
|
|
},
|
|
{
|
|
"epoch": 4.018851756640959,
|
|
"grad_norm": 0.4427631670793687,
|
|
"learning_rate": 1.8326897333954524e-05,
|
|
"loss": 0.1497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05530937761068344,
|
|
"step": 4690,
|
|
"valid_targets_mean": 5215.6,
|
|
"valid_targets_min": 1880
|
|
},
|
|
{
|
|
"epoch": 4.023136246786632,
|
|
"grad_norm": 0.5273081300549666,
|
|
"learning_rate": 1.8284319838035596e-05,
|
|
"loss": 0.1693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09043410420417786,
|
|
"step": 4695,
|
|
"valid_targets_mean": 5147.4,
|
|
"valid_targets_min": 2946
|
|
},
|
|
{
|
|
"epoch": 4.027420736932305,
|
|
"grad_norm": 0.516072643406447,
|
|
"learning_rate": 1.824175017397679e-05,
|
|
"loss": 0.1638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07810711860656738,
|
|
"step": 4700,
|
|
"valid_targets_mean": 5571.1,
|
|
"valid_targets_min": 3578
|
|
},
|
|
{
|
|
"epoch": 4.031705227077977,
|
|
"grad_norm": 0.6072196631344798,
|
|
"learning_rate": 1.8199188536103155e-05,
|
|
"loss": 0.1715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08345551043748856,
|
|
"step": 4705,
|
|
"valid_targets_mean": 3770.1,
|
|
"valid_targets_min": 1173
|
|
},
|
|
{
|
|
"epoch": 4.03598971722365,
|
|
"grad_norm": 0.5165920468423776,
|
|
"learning_rate": 1.8156635118703113e-05,
|
|
"loss": 0.1634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1011938825249672,
|
|
"step": 4710,
|
|
"valid_targets_mean": 5820.2,
|
|
"valid_targets_min": 3722
|
|
},
|
|
{
|
|
"epoch": 4.040274207369323,
|
|
"grad_norm": 0.6037846700268825,
|
|
"learning_rate": 1.8114090116027575e-05,
|
|
"loss": 0.1679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08537499606609344,
|
|
"step": 4715,
|
|
"valid_targets_mean": 3899.5,
|
|
"valid_targets_min": 2159
|
|
},
|
|
{
|
|
"epoch": 4.044558697514995,
|
|
"grad_norm": 0.564709272472501,
|
|
"learning_rate": 1.8071553722289012e-05,
|
|
"loss": 0.1544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07162496447563171,
|
|
"step": 4720,
|
|
"valid_targets_mean": 4443.4,
|
|
"valid_targets_min": 571
|
|
},
|
|
{
|
|
"epoch": 4.048843187660668,
|
|
"grad_norm": 0.5249602067864373,
|
|
"learning_rate": 1.8029026131660613e-05,
|
|
"loss": 0.1586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08937022089958191,
|
|
"step": 4725,
|
|
"valid_targets_mean": 5472.2,
|
|
"valid_targets_min": 1922
|
|
},
|
|
{
|
|
"epoch": 4.053127677806341,
|
|
"grad_norm": 0.5323768288458474,
|
|
"learning_rate": 1.7986507538275388e-05,
|
|
"loss": 0.1534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09161496162414551,
|
|
"step": 4730,
|
|
"valid_targets_mean": 4799.8,
|
|
"valid_targets_min": 967
|
|
},
|
|
{
|
|
"epoch": 4.057412167952013,
|
|
"grad_norm": 0.5424394265463435,
|
|
"learning_rate": 1.794399813622527e-05,
|
|
"loss": 0.1569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06499495357275009,
|
|
"step": 4735,
|
|
"valid_targets_mean": 4970.8,
|
|
"valid_targets_min": 1345
|
|
},
|
|
{
|
|
"epoch": 4.061696658097686,
|
|
"grad_norm": 0.47986919896406616,
|
|
"learning_rate": 1.7901498119560224e-05,
|
|
"loss": 0.1501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06093684583902359,
|
|
"step": 4740,
|
|
"valid_targets_mean": 5482.4,
|
|
"valid_targets_min": 1190
|
|
},
|
|
{
|
|
"epoch": 4.065981148243359,
|
|
"grad_norm": 0.5696299033930009,
|
|
"learning_rate": 1.7859007682287382e-05,
|
|
"loss": 0.1602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10723207890987396,
|
|
"step": 4745,
|
|
"valid_targets_mean": 5067.1,
|
|
"valid_targets_min": 3381
|
|
},
|
|
{
|
|
"epoch": 4.070265638389031,
|
|
"grad_norm": 0.5399946394195704,
|
|
"learning_rate": 1.7816527018370154e-05,
|
|
"loss": 0.1626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0841757208108902,
|
|
"step": 4750,
|
|
"valid_targets_mean": 5439.0,
|
|
"valid_targets_min": 2166
|
|
},
|
|
{
|
|
"epoch": 4.074550128534704,
|
|
"grad_norm": 0.49331881552628826,
|
|
"learning_rate": 1.7774056321727318e-05,
|
|
"loss": 0.1524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06685491651296616,
|
|
"step": 4755,
|
|
"valid_targets_mean": 5076.4,
|
|
"valid_targets_min": 746
|
|
},
|
|
{
|
|
"epoch": 4.078834618680377,
|
|
"grad_norm": 0.5323944357504423,
|
|
"learning_rate": 1.7731595786232158e-05,
|
|
"loss": 0.1501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08423639088869095,
|
|
"step": 4760,
|
|
"valid_targets_mean": 5571.4,
|
|
"valid_targets_min": 2206
|
|
},
|
|
{
|
|
"epoch": 4.0831191088260494,
|
|
"grad_norm": 0.5021387969482174,
|
|
"learning_rate": 1.7689145605711597e-05,
|
|
"loss": 0.1402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06299129128456116,
|
|
"step": 4765,
|
|
"valid_targets_mean": 5146.2,
|
|
"valid_targets_min": 2294
|
|
},
|
|
{
|
|
"epoch": 4.087403598971722,
|
|
"grad_norm": 0.5274689353853328,
|
|
"learning_rate": 1.764670597394526e-05,
|
|
"loss": 0.1638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07674658298492432,
|
|
"step": 4770,
|
|
"valid_targets_mean": 4519.9,
|
|
"valid_targets_min": 1803
|
|
},
|
|
{
|
|
"epoch": 4.091688089117395,
|
|
"grad_norm": 0.6677711186644161,
|
|
"learning_rate": 1.7604277084664627e-05,
|
|
"loss": 0.1418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07176576554775238,
|
|
"step": 4775,
|
|
"valid_targets_mean": 5555.4,
|
|
"valid_targets_min": 2832
|
|
},
|
|
{
|
|
"epoch": 4.0959725792630675,
|
|
"grad_norm": 0.5595601408846527,
|
|
"learning_rate": 1.7561859131552138e-05,
|
|
"loss": 0.1568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0624658428132534,
|
|
"step": 4780,
|
|
"valid_targets_mean": 4522.5,
|
|
"valid_targets_min": 1209
|
|
},
|
|
{
|
|
"epoch": 4.10025706940874,
|
|
"grad_norm": 0.5455455804748165,
|
|
"learning_rate": 1.7519452308240324e-05,
|
|
"loss": 0.1585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07068127393722534,
|
|
"step": 4785,
|
|
"valid_targets_mean": 5320.4,
|
|
"valid_targets_min": 1748
|
|
},
|
|
{
|
|
"epoch": 4.104541559554413,
|
|
"grad_norm": 0.5474643591865783,
|
|
"learning_rate": 1.747705680831089e-05,
|
|
"loss": 0.1546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07546311616897583,
|
|
"step": 4790,
|
|
"valid_targets_mean": 5132.1,
|
|
"valid_targets_min": 3519
|
|
},
|
|
{
|
|
"epoch": 4.1088260497000855,
|
|
"grad_norm": 0.5385666835894883,
|
|
"learning_rate": 1.7434672825293858e-05,
|
|
"loss": 0.1505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0874762162566185,
|
|
"step": 4795,
|
|
"valid_targets_mean": 4158.9,
|
|
"valid_targets_min": 814
|
|
},
|
|
{
|
|
"epoch": 4.113110539845758,
|
|
"grad_norm": 0.6098356168965273,
|
|
"learning_rate": 1.7392300552666693e-05,
|
|
"loss": 0.1614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07409660518169403,
|
|
"step": 4800,
|
|
"valid_targets_mean": 3346.6,
|
|
"valid_targets_min": 1078
|
|
},
|
|
{
|
|
"epoch": 4.117395029991431,
|
|
"grad_norm": 0.5600443893585015,
|
|
"learning_rate": 1.7349940183853388e-05,
|
|
"loss": 0.1635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06515643000602722,
|
|
"step": 4805,
|
|
"valid_targets_mean": 4069.8,
|
|
"valid_targets_min": 1473
|
|
},
|
|
{
|
|
"epoch": 4.1216795201371035,
|
|
"grad_norm": 0.5302463304522121,
|
|
"learning_rate": 1.730759191222359e-05,
|
|
"loss": 0.1563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07636624574661255,
|
|
"step": 4810,
|
|
"valid_targets_mean": 5003.0,
|
|
"valid_targets_min": 2677
|
|
},
|
|
{
|
|
"epoch": 4.125964010282776,
|
|
"grad_norm": 0.5645410089555536,
|
|
"learning_rate": 1.7265255931091744e-05,
|
|
"loss": 0.163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08191752433776855,
|
|
"step": 4815,
|
|
"valid_targets_mean": 5747.6,
|
|
"valid_targets_min": 4079
|
|
},
|
|
{
|
|
"epoch": 4.130248500428449,
|
|
"grad_norm": 0.5577312981087081,
|
|
"learning_rate": 1.7222932433716165e-05,
|
|
"loss": 0.1494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0870317667722702,
|
|
"step": 4820,
|
|
"valid_targets_mean": 4744.1,
|
|
"valid_targets_min": 1434
|
|
},
|
|
{
|
|
"epoch": 4.1345329905741215,
|
|
"grad_norm": 0.6047371298572394,
|
|
"learning_rate": 1.718062161329821e-05,
|
|
"loss": 0.1577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07716318964958191,
|
|
"step": 4825,
|
|
"valid_targets_mean": 3328.6,
|
|
"valid_targets_min": 484
|
|
},
|
|
{
|
|
"epoch": 4.138817480719794,
|
|
"grad_norm": 0.5179065735302368,
|
|
"learning_rate": 1.7138323662981342e-05,
|
|
"loss": 0.1436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.055352915078401566,
|
|
"step": 4830,
|
|
"valid_targets_mean": 5146.1,
|
|
"valid_targets_min": 1854
|
|
},
|
|
{
|
|
"epoch": 4.143101970865467,
|
|
"grad_norm": 0.5772651797610137,
|
|
"learning_rate": 1.7096038775850295e-05,
|
|
"loss": 0.151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08402058482170105,
|
|
"step": 4835,
|
|
"valid_targets_mean": 4349.9,
|
|
"valid_targets_min": 2355
|
|
},
|
|
{
|
|
"epoch": 4.1473864610111395,
|
|
"grad_norm": 0.5282643276921688,
|
|
"learning_rate": 1.7053767144930155e-05,
|
|
"loss": 0.1547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08722338080406189,
|
|
"step": 4840,
|
|
"valid_targets_mean": 5179.6,
|
|
"valid_targets_min": 3034
|
|
},
|
|
{
|
|
"epoch": 4.151670951156812,
|
|
"grad_norm": 0.5683081625420684,
|
|
"learning_rate": 1.7011508963185505e-05,
|
|
"loss": 0.1623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06397093832492828,
|
|
"step": 4845,
|
|
"valid_targets_mean": 4508.5,
|
|
"valid_targets_min": 2709
|
|
},
|
|
{
|
|
"epoch": 4.155955441302485,
|
|
"grad_norm": 0.5040127948122992,
|
|
"learning_rate": 1.6969264423519525e-05,
|
|
"loss": 0.1659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07005979120731354,
|
|
"step": 4850,
|
|
"valid_targets_mean": 5723.5,
|
|
"valid_targets_min": 2919
|
|
},
|
|
{
|
|
"epoch": 4.1602399314481575,
|
|
"grad_norm": 0.5126571353902103,
|
|
"learning_rate": 1.6927033718773124e-05,
|
|
"loss": 0.1447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06733615696430206,
|
|
"step": 4855,
|
|
"valid_targets_mean": 5801.8,
|
|
"valid_targets_min": 3148
|
|
},
|
|
{
|
|
"epoch": 4.16452442159383,
|
|
"grad_norm": 0.5529197226331911,
|
|
"learning_rate": 1.688481704172407e-05,
|
|
"loss": 0.1515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07038679718971252,
|
|
"step": 4860,
|
|
"valid_targets_mean": 4861.9,
|
|
"valid_targets_min": 2188
|
|
},
|
|
{
|
|
"epoch": 4.168808911739503,
|
|
"grad_norm": 0.5276769475415478,
|
|
"learning_rate": 1.684261458508607e-05,
|
|
"loss": 0.1469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09177596867084503,
|
|
"step": 4865,
|
|
"valid_targets_mean": 5110.4,
|
|
"valid_targets_min": 3486
|
|
},
|
|
{
|
|
"epoch": 4.1730934018851755,
|
|
"grad_norm": 0.5313106774206768,
|
|
"learning_rate": 1.6800426541507933e-05,
|
|
"loss": 0.1585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07231737673282623,
|
|
"step": 4870,
|
|
"valid_targets_mean": 4668.4,
|
|
"valid_targets_min": 2299
|
|
},
|
|
{
|
|
"epoch": 4.177377892030848,
|
|
"grad_norm": 0.4968871910487643,
|
|
"learning_rate": 1.675825310357268e-05,
|
|
"loss": 0.1478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06075207144021988,
|
|
"step": 4875,
|
|
"valid_targets_mean": 4199.8,
|
|
"valid_targets_min": 1058
|
|
},
|
|
{
|
|
"epoch": 4.181662382176521,
|
|
"grad_norm": 0.4992102223765819,
|
|
"learning_rate": 1.6716094463796656e-05,
|
|
"loss": 0.1583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06933930516242981,
|
|
"step": 4880,
|
|
"valid_targets_mean": 6485.5,
|
|
"valid_targets_min": 4589
|
|
},
|
|
{
|
|
"epoch": 4.1859468723221935,
|
|
"grad_norm": 0.49852535954505295,
|
|
"learning_rate": 1.6673950814628638e-05,
|
|
"loss": 0.1617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06788508594036102,
|
|
"step": 4885,
|
|
"valid_targets_mean": 4713.8,
|
|
"valid_targets_min": 1500
|
|
},
|
|
{
|
|
"epoch": 4.190231362467866,
|
|
"grad_norm": 0.5428573528512465,
|
|
"learning_rate": 1.663182234844899e-05,
|
|
"loss": 0.1698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09118878841400146,
|
|
"step": 4890,
|
|
"valid_targets_mean": 4834.9,
|
|
"valid_targets_min": 1226
|
|
},
|
|
{
|
|
"epoch": 4.194515852613539,
|
|
"grad_norm": 0.5399301656937301,
|
|
"learning_rate": 1.6589709257568766e-05,
|
|
"loss": 0.1585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10401996970176697,
|
|
"step": 4895,
|
|
"valid_targets_mean": 5292.5,
|
|
"valid_targets_min": 1699
|
|
},
|
|
{
|
|
"epoch": 4.1988003427592115,
|
|
"grad_norm": 0.5475752036164578,
|
|
"learning_rate": 1.6547611734228817e-05,
|
|
"loss": 0.153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07695914804935455,
|
|
"step": 4900,
|
|
"valid_targets_mean": 4350.5,
|
|
"valid_targets_min": 1223
|
|
},
|
|
{
|
|
"epoch": 4.203084832904884,
|
|
"grad_norm": 0.4556661261268513,
|
|
"learning_rate": 1.650552997059895e-05,
|
|
"loss": 0.1533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06842450797557831,
|
|
"step": 4905,
|
|
"valid_targets_mean": 5715.6,
|
|
"valid_targets_min": 2000
|
|
},
|
|
{
|
|
"epoch": 4.207369323050557,
|
|
"grad_norm": 0.4656726329938494,
|
|
"learning_rate": 1.646346415877703e-05,
|
|
"loss": 0.1489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.061458855867385864,
|
|
"step": 4910,
|
|
"valid_targets_mean": 6226.4,
|
|
"valid_targets_min": 4540
|
|
},
|
|
{
|
|
"epoch": 4.21165381319623,
|
|
"grad_norm": 0.5354888183136649,
|
|
"learning_rate": 1.6421414490788098e-05,
|
|
"loss": 0.1531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09619096666574478,
|
|
"step": 4915,
|
|
"valid_targets_mean": 5275.5,
|
|
"valid_targets_min": 1805
|
|
},
|
|
{
|
|
"epoch": 4.215938303341902,
|
|
"grad_norm": 0.4975118230489396,
|
|
"learning_rate": 1.6379381158583493e-05,
|
|
"loss": 0.1366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06388730555772781,
|
|
"step": 4920,
|
|
"valid_targets_mean": 5423.9,
|
|
"valid_targets_min": 3288
|
|
},
|
|
{
|
|
"epoch": 4.220222793487575,
|
|
"grad_norm": 0.5974187769947972,
|
|
"learning_rate": 1.6337364354039994e-05,
|
|
"loss": 0.1551,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07961668074131012,
|
|
"step": 4925,
|
|
"valid_targets_mean": 6249.6,
|
|
"valid_targets_min": 3773
|
|
},
|
|
{
|
|
"epoch": 4.224507283633248,
|
|
"grad_norm": 0.5862381544675477,
|
|
"learning_rate": 1.6295364268958936e-05,
|
|
"loss": 0.154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08168651163578033,
|
|
"step": 4930,
|
|
"valid_targets_mean": 4983.1,
|
|
"valid_targets_min": 929
|
|
},
|
|
{
|
|
"epoch": 4.22879177377892,
|
|
"grad_norm": 0.4911113975456263,
|
|
"learning_rate": 1.6253381095065314e-05,
|
|
"loss": 0.1602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0776476263999939,
|
|
"step": 4935,
|
|
"valid_targets_mean": 6293.8,
|
|
"valid_targets_min": 2358
|
|
},
|
|
{
|
|
"epoch": 4.233076263924593,
|
|
"grad_norm": 0.5126617824938491,
|
|
"learning_rate": 1.621141502400694e-05,
|
|
"loss": 0.1521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07559158653020859,
|
|
"step": 4940,
|
|
"valid_targets_mean": 6424.8,
|
|
"valid_targets_min": 1374
|
|
},
|
|
{
|
|
"epoch": 4.237360754070266,
|
|
"grad_norm": 0.5075151025551148,
|
|
"learning_rate": 1.6169466247353562e-05,
|
|
"loss": 0.1513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06209402531385422,
|
|
"step": 4945,
|
|
"valid_targets_mean": 5486.1,
|
|
"valid_targets_min": 2639
|
|
},
|
|
{
|
|
"epoch": 4.241645244215938,
|
|
"grad_norm": 0.5109370413756867,
|
|
"learning_rate": 1.612753495659596e-05,
|
|
"loss": 0.1633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09271623939275742,
|
|
"step": 4950,
|
|
"valid_targets_mean": 5698.2,
|
|
"valid_targets_min": 3159
|
|
},
|
|
{
|
|
"epoch": 4.245929734361611,
|
|
"grad_norm": 0.4747575594308358,
|
|
"learning_rate": 1.6085621343145113e-05,
|
|
"loss": 0.1429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07969255745410919,
|
|
"step": 4955,
|
|
"valid_targets_mean": 7038.0,
|
|
"valid_targets_min": 5366
|
|
},
|
|
{
|
|
"epoch": 4.250214224507284,
|
|
"grad_norm": 0.5653444035435231,
|
|
"learning_rate": 1.604372559833129e-05,
|
|
"loss": 0.1503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07547251880168915,
|
|
"step": 4960,
|
|
"valid_targets_mean": 4756.8,
|
|
"valid_targets_min": 1066
|
|
},
|
|
{
|
|
"epoch": 4.254498714652956,
|
|
"grad_norm": 0.5581339202049348,
|
|
"learning_rate": 1.6001847913403195e-05,
|
|
"loss": 0.1514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07747235894203186,
|
|
"step": 4965,
|
|
"valid_targets_mean": 5881.0,
|
|
"valid_targets_min": 2908
|
|
},
|
|
{
|
|
"epoch": 4.258783204798629,
|
|
"grad_norm": 0.5703169846987617,
|
|
"learning_rate": 1.5959988479527103e-05,
|
|
"loss": 0.1671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10156498104333878,
|
|
"step": 4970,
|
|
"valid_targets_mean": 3763.6,
|
|
"valid_targets_min": 1803
|
|
},
|
|
{
|
|
"epoch": 4.263067694944302,
|
|
"grad_norm": 0.5108130782973002,
|
|
"learning_rate": 1.591814748778595e-05,
|
|
"loss": 0.1455,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06843523681163788,
|
|
"step": 4975,
|
|
"valid_targets_mean": 6338.8,
|
|
"valid_targets_min": 2763
|
|
},
|
|
{
|
|
"epoch": 4.267352185089974,
|
|
"grad_norm": 0.49260975625136155,
|
|
"learning_rate": 1.5876325129178522e-05,
|
|
"loss": 0.1647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.061994947493076324,
|
|
"step": 4980,
|
|
"valid_targets_mean": 5452.9,
|
|
"valid_targets_min": 3764
|
|
},
|
|
{
|
|
"epoch": 4.271636675235647,
|
|
"grad_norm": 0.6114248898222204,
|
|
"learning_rate": 1.5834521594618514e-05,
|
|
"loss": 0.1505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07271035015583038,
|
|
"step": 4985,
|
|
"valid_targets_mean": 4659.2,
|
|
"valid_targets_min": 1087
|
|
},
|
|
{
|
|
"epoch": 4.27592116538132,
|
|
"grad_norm": 0.529292747106911,
|
|
"learning_rate": 1.579273707493372e-05,
|
|
"loss": 0.1586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0663774311542511,
|
|
"step": 4990,
|
|
"valid_targets_mean": 3475.1,
|
|
"valid_targets_min": 1711
|
|
},
|
|
{
|
|
"epoch": 4.280205655526992,
|
|
"grad_norm": 0.47914632822231956,
|
|
"learning_rate": 1.57509717608651e-05,
|
|
"loss": 0.1587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.058661431074142456,
|
|
"step": 4995,
|
|
"valid_targets_mean": 6168.5,
|
|
"valid_targets_min": 4451
|
|
},
|
|
{
|
|
"epoch": 4.284490145672665,
|
|
"grad_norm": 0.5638502813940276,
|
|
"learning_rate": 1.570922584306597e-05,
|
|
"loss": 0.1598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07884243130683899,
|
|
"step": 5000,
|
|
"valid_targets_mean": 4818.4,
|
|
"valid_targets_min": 2131
|
|
},
|
|
{
|
|
"epoch": 4.288774635818338,
|
|
"grad_norm": 0.5494126554720642,
|
|
"learning_rate": 1.56674995121011e-05,
|
|
"loss": 0.1689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08957488089799881,
|
|
"step": 5005,
|
|
"valid_targets_mean": 4892.2,
|
|
"valid_targets_min": 1181
|
|
},
|
|
{
|
|
"epoch": 4.29305912596401,
|
|
"grad_norm": 0.5068086295229425,
|
|
"learning_rate": 1.562579295844583e-05,
|
|
"loss": 0.1387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06114926189184189,
|
|
"step": 5010,
|
|
"valid_targets_mean": 5003.1,
|
|
"valid_targets_min": 2380
|
|
},
|
|
{
|
|
"epoch": 4.297343616109683,
|
|
"grad_norm": 0.513304107378341,
|
|
"learning_rate": 1.5584106372485248e-05,
|
|
"loss": 0.1515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07609188556671143,
|
|
"step": 5015,
|
|
"valid_targets_mean": 4886.4,
|
|
"valid_targets_min": 1893
|
|
},
|
|
{
|
|
"epoch": 4.301628106255356,
|
|
"grad_norm": 0.504771691823541,
|
|
"learning_rate": 1.5542439944513275e-05,
|
|
"loss": 0.1423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07484638690948486,
|
|
"step": 5020,
|
|
"valid_targets_mean": 4305.2,
|
|
"valid_targets_min": 1695
|
|
},
|
|
{
|
|
"epoch": 4.305912596401028,
|
|
"grad_norm": 0.5398695617811872,
|
|
"learning_rate": 1.550079386473182e-05,
|
|
"loss": 0.1569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07757730782032013,
|
|
"step": 5025,
|
|
"valid_targets_mean": 5340.9,
|
|
"valid_targets_min": 3343
|
|
},
|
|
{
|
|
"epoch": 4.310197086546701,
|
|
"grad_norm": 0.550478230847001,
|
|
"learning_rate": 1.545916832324989e-05,
|
|
"loss": 0.1548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08564269542694092,
|
|
"step": 5030,
|
|
"valid_targets_mean": 4638.2,
|
|
"valid_targets_min": 2890
|
|
},
|
|
{
|
|
"epoch": 4.314481576692374,
|
|
"grad_norm": 0.4889711466495263,
|
|
"learning_rate": 1.5417563510082755e-05,
|
|
"loss": 0.1559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06698659062385559,
|
|
"step": 5035,
|
|
"valid_targets_mean": 5970.6,
|
|
"valid_targets_min": 4406
|
|
},
|
|
{
|
|
"epoch": 4.318766066838046,
|
|
"grad_norm": 0.5347010488287188,
|
|
"learning_rate": 1.5375979615151057e-05,
|
|
"loss": 0.154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08710899949073792,
|
|
"step": 5040,
|
|
"valid_targets_mean": 4744.0,
|
|
"valid_targets_min": 2763
|
|
},
|
|
{
|
|
"epoch": 4.323050556983719,
|
|
"grad_norm": 0.5546906054359373,
|
|
"learning_rate": 1.533441682827994e-05,
|
|
"loss": 0.1504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06388499587774277,
|
|
"step": 5045,
|
|
"valid_targets_mean": 4623.4,
|
|
"valid_targets_min": 1150
|
|
},
|
|
{
|
|
"epoch": 4.327335047129392,
|
|
"grad_norm": 0.5378021703951273,
|
|
"learning_rate": 1.529287533919821e-05,
|
|
"loss": 0.1584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0835384801030159,
|
|
"step": 5050,
|
|
"valid_targets_mean": 4925.8,
|
|
"valid_targets_min": 2589
|
|
},
|
|
{
|
|
"epoch": 4.331619537275064,
|
|
"grad_norm": 0.5046452348781201,
|
|
"learning_rate": 1.5251355337537444e-05,
|
|
"loss": 0.1399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07628235965967178,
|
|
"step": 5055,
|
|
"valid_targets_mean": 5603.4,
|
|
"valid_targets_min": 2126
|
|
},
|
|
{
|
|
"epoch": 4.335904027420737,
|
|
"grad_norm": 0.493889238390984,
|
|
"learning_rate": 1.5209857012831123e-05,
|
|
"loss": 0.1668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06308963894844055,
|
|
"step": 5060,
|
|
"valid_targets_mean": 5509.6,
|
|
"valid_targets_min": 1801
|
|
},
|
|
{
|
|
"epoch": 4.34018851756641,
|
|
"grad_norm": 0.5937885212471326,
|
|
"learning_rate": 1.5168380554513784e-05,
|
|
"loss": 0.1579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09766152501106262,
|
|
"step": 5065,
|
|
"valid_targets_mean": 3527.0,
|
|
"valid_targets_min": 618
|
|
},
|
|
{
|
|
"epoch": 4.344473007712082,
|
|
"grad_norm": 0.5495491492987082,
|
|
"learning_rate": 1.5126926151920147e-05,
|
|
"loss": 0.1586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07378506660461426,
|
|
"step": 5070,
|
|
"valid_targets_mean": 4329.9,
|
|
"valid_targets_min": 1607
|
|
},
|
|
{
|
|
"epoch": 4.348757497857755,
|
|
"grad_norm": 0.5484078687151889,
|
|
"learning_rate": 1.5085493994284242e-05,
|
|
"loss": 0.1617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07752133905887604,
|
|
"step": 5075,
|
|
"valid_targets_mean": 4007.2,
|
|
"valid_targets_min": 1397
|
|
},
|
|
{
|
|
"epoch": 4.353041988003428,
|
|
"grad_norm": 0.6848788002164049,
|
|
"learning_rate": 1.5044084270738564e-05,
|
|
"loss": 0.1655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08638869971036911,
|
|
"step": 5080,
|
|
"valid_targets_mean": 4488.8,
|
|
"valid_targets_min": 2058
|
|
},
|
|
{
|
|
"epoch": 4.3573264781491,
|
|
"grad_norm": 0.5309526090692271,
|
|
"learning_rate": 1.5002697170313196e-05,
|
|
"loss": 0.1476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07703551650047302,
|
|
"step": 5085,
|
|
"valid_targets_mean": 6423.1,
|
|
"valid_targets_min": 2527
|
|
},
|
|
{
|
|
"epoch": 4.361610968294773,
|
|
"grad_norm": 0.5847600572908466,
|
|
"learning_rate": 1.4961332881934953e-05,
|
|
"loss": 0.1646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07591263949871063,
|
|
"step": 5090,
|
|
"valid_targets_mean": 4150.9,
|
|
"valid_targets_min": 1429
|
|
},
|
|
{
|
|
"epoch": 4.365895458440446,
|
|
"grad_norm": 0.5180251527106797,
|
|
"learning_rate": 1.4919991594426502e-05,
|
|
"loss": 0.1595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07740815728902817,
|
|
"step": 5095,
|
|
"valid_targets_mean": 5425.0,
|
|
"valid_targets_min": 3707
|
|
},
|
|
{
|
|
"epoch": 4.370179948586118,
|
|
"grad_norm": 0.5000963323863451,
|
|
"learning_rate": 1.4878673496505531e-05,
|
|
"loss": 0.1603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0634835809469223,
|
|
"step": 5100,
|
|
"valid_targets_mean": 5381.6,
|
|
"valid_targets_min": 2967
|
|
},
|
|
{
|
|
"epoch": 4.374464438731791,
|
|
"grad_norm": 0.5524440824151057,
|
|
"learning_rate": 1.4837378776783861e-05,
|
|
"loss": 0.1498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07013142108917236,
|
|
"step": 5105,
|
|
"valid_targets_mean": 4227.1,
|
|
"valid_targets_min": 2190
|
|
},
|
|
{
|
|
"epoch": 4.378748928877464,
|
|
"grad_norm": 0.5691071845198381,
|
|
"learning_rate": 1.4796107623766594e-05,
|
|
"loss": 0.1576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08084717392921448,
|
|
"step": 5110,
|
|
"valid_targets_mean": 5505.4,
|
|
"valid_targets_min": 2092
|
|
},
|
|
{
|
|
"epoch": 4.383033419023136,
|
|
"grad_norm": 0.491822580389387,
|
|
"learning_rate": 1.4754860225851248e-05,
|
|
"loss": 0.1641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0682189092040062,
|
|
"step": 5115,
|
|
"valid_targets_mean": 5129.0,
|
|
"valid_targets_min": 2418
|
|
},
|
|
{
|
|
"epoch": 4.387317909168809,
|
|
"grad_norm": 0.5001887996202214,
|
|
"learning_rate": 1.4713636771326929e-05,
|
|
"loss": 0.1519,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06508070975542068,
|
|
"step": 5120,
|
|
"valid_targets_mean": 5043.5,
|
|
"valid_targets_min": 3149
|
|
},
|
|
{
|
|
"epoch": 4.391602399314482,
|
|
"grad_norm": 0.5022664642918662,
|
|
"learning_rate": 1.4672437448373416e-05,
|
|
"loss": 0.152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07056738436222076,
|
|
"step": 5125,
|
|
"valid_targets_mean": 6385.4,
|
|
"valid_targets_min": 1720
|
|
},
|
|
{
|
|
"epoch": 4.395886889460154,
|
|
"grad_norm": 0.5863443026857277,
|
|
"learning_rate": 1.4631262445060332e-05,
|
|
"loss": 0.1578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09551360458135605,
|
|
"step": 5130,
|
|
"valid_targets_mean": 3457.2,
|
|
"valid_targets_min": 1548
|
|
},
|
|
{
|
|
"epoch": 4.400171379605827,
|
|
"grad_norm": 0.5741351775944951,
|
|
"learning_rate": 1.4590111949346306e-05,
|
|
"loss": 0.1503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08465814590454102,
|
|
"step": 5135,
|
|
"valid_targets_mean": 4954.9,
|
|
"valid_targets_min": 2516
|
|
},
|
|
{
|
|
"epoch": 4.4044558697515,
|
|
"grad_norm": 0.5430047413666953,
|
|
"learning_rate": 1.4548986149078062e-05,
|
|
"loss": 0.1599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09200140833854675,
|
|
"step": 5140,
|
|
"valid_targets_mean": 5727.6,
|
|
"valid_targets_min": 2481
|
|
},
|
|
{
|
|
"epoch": 4.408740359897172,
|
|
"grad_norm": 0.5646929867221707,
|
|
"learning_rate": 1.4507885231989622e-05,
|
|
"loss": 0.1531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06149185448884964,
|
|
"step": 5145,
|
|
"valid_targets_mean": 4736.2,
|
|
"valid_targets_min": 1427
|
|
},
|
|
{
|
|
"epoch": 4.413024850042845,
|
|
"grad_norm": 0.5302072495262342,
|
|
"learning_rate": 1.4466809385701392e-05,
|
|
"loss": 0.1607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05918242782354355,
|
|
"step": 5150,
|
|
"valid_targets_mean": 4760.2,
|
|
"valid_targets_min": 1269
|
|
},
|
|
{
|
|
"epoch": 4.417309340188518,
|
|
"grad_norm": 0.5252703532678488,
|
|
"learning_rate": 1.4425758797719365e-05,
|
|
"loss": 0.1665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0716017633676529,
|
|
"step": 5155,
|
|
"valid_targets_mean": 4607.8,
|
|
"valid_targets_min": 2376
|
|
},
|
|
{
|
|
"epoch": 4.42159383033419,
|
|
"grad_norm": 0.5844920058156552,
|
|
"learning_rate": 1.4384733655434201e-05,
|
|
"loss": 0.1465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07548099756240845,
|
|
"step": 5160,
|
|
"valid_targets_mean": 4374.4,
|
|
"valid_targets_min": 1540
|
|
},
|
|
{
|
|
"epoch": 4.425878320479863,
|
|
"grad_norm": 0.5440444311779218,
|
|
"learning_rate": 1.434373414612043e-05,
|
|
"loss": 0.172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08596000075340271,
|
|
"step": 5165,
|
|
"valid_targets_mean": 4538.4,
|
|
"valid_targets_min": 1312
|
|
},
|
|
{
|
|
"epoch": 4.430162810625536,
|
|
"grad_norm": 0.5227315015316456,
|
|
"learning_rate": 1.4302760456935554e-05,
|
|
"loss": 0.1451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06954213976860046,
|
|
"step": 5170,
|
|
"valid_targets_mean": 4355.1,
|
|
"valid_targets_min": 1344
|
|
},
|
|
{
|
|
"epoch": 4.434447300771208,
|
|
"grad_norm": 0.5359733841570606,
|
|
"learning_rate": 1.4261812774919204e-05,
|
|
"loss": 0.1499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07341448962688446,
|
|
"step": 5175,
|
|
"valid_targets_mean": 5344.9,
|
|
"valid_targets_min": 1219
|
|
},
|
|
{
|
|
"epoch": 4.438731790916881,
|
|
"grad_norm": 0.5007954035839989,
|
|
"learning_rate": 1.422089128699232e-05,
|
|
"loss": 0.1544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0841982364654541,
|
|
"step": 5180,
|
|
"valid_targets_mean": 5447.6,
|
|
"valid_targets_min": 4387
|
|
},
|
|
{
|
|
"epoch": 4.443016281062554,
|
|
"grad_norm": 0.4942238514175937,
|
|
"learning_rate": 1.417999617995623e-05,
|
|
"loss": 0.1506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.083289735019207,
|
|
"step": 5185,
|
|
"valid_targets_mean": 6193.6,
|
|
"valid_targets_min": 3138
|
|
},
|
|
{
|
|
"epoch": 4.447300771208226,
|
|
"grad_norm": 0.540560184297077,
|
|
"learning_rate": 1.4139127640491878e-05,
|
|
"loss": 0.153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09234566986560822,
|
|
"step": 5190,
|
|
"valid_targets_mean": 5512.8,
|
|
"valid_targets_min": 1162
|
|
},
|
|
{
|
|
"epoch": 4.451585261353899,
|
|
"grad_norm": 0.5261391646545392,
|
|
"learning_rate": 1.40982858551589e-05,
|
|
"loss": 0.1593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08443096280097961,
|
|
"step": 5195,
|
|
"valid_targets_mean": 4668.5,
|
|
"valid_targets_min": 2560
|
|
},
|
|
{
|
|
"epoch": 4.455869751499572,
|
|
"grad_norm": 0.5522795050476156,
|
|
"learning_rate": 1.4057471010394817e-05,
|
|
"loss": 0.1671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07564914226531982,
|
|
"step": 5200,
|
|
"valid_targets_mean": 4724.5,
|
|
"valid_targets_min": 1988
|
|
},
|
|
{
|
|
"epoch": 4.460154241645244,
|
|
"grad_norm": 0.567034605022243,
|
|
"learning_rate": 1.4016683292514159e-05,
|
|
"loss": 0.1571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0924619808793068,
|
|
"step": 5205,
|
|
"valid_targets_mean": 4826.9,
|
|
"valid_targets_min": 1476
|
|
},
|
|
{
|
|
"epoch": 4.464438731790917,
|
|
"grad_norm": 0.5095049144912834,
|
|
"learning_rate": 1.3975922887707642e-05,
|
|
"loss": 0.1492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08745399862527847,
|
|
"step": 5210,
|
|
"valid_targets_mean": 7177.6,
|
|
"valid_targets_min": 3947
|
|
},
|
|
{
|
|
"epoch": 4.46872322193659,
|
|
"grad_norm": 0.5347971040551971,
|
|
"learning_rate": 1.3935189982041288e-05,
|
|
"loss": 0.1437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06757889688014984,
|
|
"step": 5215,
|
|
"valid_targets_mean": 4876.9,
|
|
"valid_targets_min": 1697
|
|
},
|
|
{
|
|
"epoch": 4.473007712082262,
|
|
"grad_norm": 0.5031940372937177,
|
|
"learning_rate": 1.389448476145558e-05,
|
|
"loss": 0.1613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07829098403453827,
|
|
"step": 5220,
|
|
"valid_targets_mean": 5504.0,
|
|
"valid_targets_min": 914
|
|
},
|
|
{
|
|
"epoch": 4.477292202227935,
|
|
"grad_norm": 0.4877850332317292,
|
|
"learning_rate": 1.385380741176465e-05,
|
|
"loss": 0.1491,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05520389601588249,
|
|
"step": 5225,
|
|
"valid_targets_mean": 5574.2,
|
|
"valid_targets_min": 2353
|
|
},
|
|
{
|
|
"epoch": 4.481576692373608,
|
|
"grad_norm": 0.5168689231491843,
|
|
"learning_rate": 1.3813158118655388e-05,
|
|
"loss": 0.1425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06339316070079803,
|
|
"step": 5230,
|
|
"valid_targets_mean": 4143.9,
|
|
"valid_targets_min": 2092
|
|
},
|
|
{
|
|
"epoch": 4.4858611825192805,
|
|
"grad_norm": 0.5211426284721579,
|
|
"learning_rate": 1.37725370676866e-05,
|
|
"loss": 0.1552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07790861278772354,
|
|
"step": 5235,
|
|
"valid_targets_mean": 5682.0,
|
|
"valid_targets_min": 1654
|
|
},
|
|
{
|
|
"epoch": 4.490145672664953,
|
|
"grad_norm": 0.5501908060838966,
|
|
"learning_rate": 1.3731944444288176e-05,
|
|
"loss": 0.1506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07432614266872406,
|
|
"step": 5240,
|
|
"valid_targets_mean": 5329.0,
|
|
"valid_targets_min": 2228
|
|
},
|
|
{
|
|
"epoch": 4.494430162810626,
|
|
"grad_norm": 0.6814936988247406,
|
|
"learning_rate": 1.3691380433760256e-05,
|
|
"loss": 0.1678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08724258840084076,
|
|
"step": 5245,
|
|
"valid_targets_mean": 5105.9,
|
|
"valid_targets_min": 1802
|
|
},
|
|
{
|
|
"epoch": 4.4987146529562985,
|
|
"grad_norm": 0.48575644119426753,
|
|
"learning_rate": 1.3650845221272339e-05,
|
|
"loss": 0.1497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08099687099456787,
|
|
"step": 5250,
|
|
"valid_targets_mean": 5515.2,
|
|
"valid_targets_min": 1997
|
|
},
|
|
{
|
|
"epoch": 4.502999143101971,
|
|
"grad_norm": 0.4792202117077872,
|
|
"learning_rate": 1.361033899186247e-05,
|
|
"loss": 0.1583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05577072501182556,
|
|
"step": 5255,
|
|
"valid_targets_mean": 4312.9,
|
|
"valid_targets_min": 1432
|
|
},
|
|
{
|
|
"epoch": 4.507283633247644,
|
|
"grad_norm": 0.4968967287597206,
|
|
"learning_rate": 1.356986193043641e-05,
|
|
"loss": 0.1531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.056737661361694336,
|
|
"step": 5260,
|
|
"valid_targets_mean": 5970.6,
|
|
"valid_targets_min": 2492
|
|
},
|
|
{
|
|
"epoch": 4.5115681233933165,
|
|
"grad_norm": 0.548191687960978,
|
|
"learning_rate": 1.3529414221766757e-05,
|
|
"loss": 0.1458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06213381886482239,
|
|
"step": 5265,
|
|
"valid_targets_mean": 6522.9,
|
|
"valid_targets_min": 4398
|
|
},
|
|
{
|
|
"epoch": 4.515852613538989,
|
|
"grad_norm": 0.5321136136220768,
|
|
"learning_rate": 1.3488996050492113e-05,
|
|
"loss": 0.1678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09102845191955566,
|
|
"step": 5270,
|
|
"valid_targets_mean": 6087.0,
|
|
"valid_targets_min": 3210
|
|
},
|
|
{
|
|
"epoch": 4.520137103684662,
|
|
"grad_norm": 0.5061958136840395,
|
|
"learning_rate": 1.344860760111625e-05,
|
|
"loss": 0.1602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0620713010430336,
|
|
"step": 5275,
|
|
"valid_targets_mean": 5473.5,
|
|
"valid_targets_min": 2461
|
|
},
|
|
{
|
|
"epoch": 4.5244215938303345,
|
|
"grad_norm": 0.6581217807964654,
|
|
"learning_rate": 1.340824905800727e-05,
|
|
"loss": 0.1582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0733901634812355,
|
|
"step": 5280,
|
|
"valid_targets_mean": 3708.5,
|
|
"valid_targets_min": 1991
|
|
},
|
|
{
|
|
"epoch": 4.528706083976007,
|
|
"grad_norm": 0.5523848868978788,
|
|
"learning_rate": 1.3367920605396747e-05,
|
|
"loss": 0.1503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08316996693611145,
|
|
"step": 5285,
|
|
"valid_targets_mean": 5033.2,
|
|
"valid_targets_min": 420
|
|
},
|
|
{
|
|
"epoch": 4.53299057412168,
|
|
"grad_norm": 0.5996859005901333,
|
|
"learning_rate": 1.3327622427378898e-05,
|
|
"loss": 0.1572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07758800685405731,
|
|
"step": 5290,
|
|
"valid_targets_mean": 4266.6,
|
|
"valid_targets_min": 648
|
|
},
|
|
{
|
|
"epoch": 4.5372750642673525,
|
|
"grad_norm": 0.4698519477164445,
|
|
"learning_rate": 1.3287354707909749e-05,
|
|
"loss": 0.1539,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06927581131458282,
|
|
"step": 5295,
|
|
"valid_targets_mean": 5093.6,
|
|
"valid_targets_min": 2129
|
|
},
|
|
{
|
|
"epoch": 4.541559554413025,
|
|
"grad_norm": 0.5573866431663388,
|
|
"learning_rate": 1.3247117630806278e-05,
|
|
"loss": 0.1635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08232997357845306,
|
|
"step": 5300,
|
|
"valid_targets_mean": 4446.1,
|
|
"valid_targets_min": 1670
|
|
},
|
|
{
|
|
"epoch": 4.545844044558698,
|
|
"grad_norm": 0.5159305534170026,
|
|
"learning_rate": 1.3206911379745584e-05,
|
|
"loss": 0.1717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09247186779975891,
|
|
"step": 5305,
|
|
"valid_targets_mean": 4631.2,
|
|
"valid_targets_min": 2132
|
|
},
|
|
{
|
|
"epoch": 4.5501285347043705,
|
|
"grad_norm": 0.5031933574779294,
|
|
"learning_rate": 1.3166736138264055e-05,
|
|
"loss": 0.1528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06761917471885681,
|
|
"step": 5310,
|
|
"valid_targets_mean": 5934.8,
|
|
"valid_targets_min": 1071
|
|
},
|
|
{
|
|
"epoch": 4.554413024850043,
|
|
"grad_norm": 0.5472447704799865,
|
|
"learning_rate": 1.3126592089756521e-05,
|
|
"loss": 0.1434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07344842702150345,
|
|
"step": 5315,
|
|
"valid_targets_mean": 5072.5,
|
|
"valid_targets_min": 2786
|
|
},
|
|
{
|
|
"epoch": 4.558697514995716,
|
|
"grad_norm": 0.577751519518059,
|
|
"learning_rate": 1.3086479417475408e-05,
|
|
"loss": 0.1692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09713322669267654,
|
|
"step": 5320,
|
|
"valid_targets_mean": 5168.0,
|
|
"valid_targets_min": 2283
|
|
},
|
|
{
|
|
"epoch": 4.5629820051413885,
|
|
"grad_norm": 0.47491486290625706,
|
|
"learning_rate": 1.3046398304529933e-05,
|
|
"loss": 0.1558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06455229222774506,
|
|
"step": 5325,
|
|
"valid_targets_mean": 5562.4,
|
|
"valid_targets_min": 1848
|
|
},
|
|
{
|
|
"epoch": 4.567266495287061,
|
|
"grad_norm": 0.4797250814827459,
|
|
"learning_rate": 1.3006348933885252e-05,
|
|
"loss": 0.1627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05497722327709198,
|
|
"step": 5330,
|
|
"valid_targets_mean": 5382.9,
|
|
"valid_targets_min": 3161
|
|
},
|
|
{
|
|
"epoch": 4.571550985432734,
|
|
"grad_norm": 0.5414909779456916,
|
|
"learning_rate": 1.2966331488361599e-05,
|
|
"loss": 0.1471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08572009205818176,
|
|
"step": 5335,
|
|
"valid_targets_mean": 4926.2,
|
|
"valid_targets_min": 770
|
|
},
|
|
{
|
|
"epoch": 4.5758354755784065,
|
|
"grad_norm": 0.5539025467247136,
|
|
"learning_rate": 1.2926346150633484e-05,
|
|
"loss": 0.1532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06572136282920837,
|
|
"step": 5340,
|
|
"valid_targets_mean": 4644.0,
|
|
"valid_targets_min": 1389
|
|
},
|
|
{
|
|
"epoch": 4.580119965724079,
|
|
"grad_norm": 0.5283538221866442,
|
|
"learning_rate": 1.288639310322886e-05,
|
|
"loss": 0.1599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07332241535186768,
|
|
"step": 5345,
|
|
"valid_targets_mean": 5910.5,
|
|
"valid_targets_min": 4202
|
|
},
|
|
{
|
|
"epoch": 4.584404455869752,
|
|
"grad_norm": 0.5636559415041953,
|
|
"learning_rate": 1.2846472528528256e-05,
|
|
"loss": 0.1535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09230844676494598,
|
|
"step": 5350,
|
|
"valid_targets_mean": 5420.8,
|
|
"valid_targets_min": 3139
|
|
},
|
|
{
|
|
"epoch": 4.5886889460154245,
|
|
"grad_norm": 0.4655589846682366,
|
|
"learning_rate": 1.2806584608763994e-05,
|
|
"loss": 0.1465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0654459148645401,
|
|
"step": 5355,
|
|
"valid_targets_mean": 5589.5,
|
|
"valid_targets_min": 2240
|
|
},
|
|
{
|
|
"epoch": 4.592973436161097,
|
|
"grad_norm": 0.5546221446040871,
|
|
"learning_rate": 1.2766729526019304e-05,
|
|
"loss": 0.1493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07415308803319931,
|
|
"step": 5360,
|
|
"valid_targets_mean": 5301.8,
|
|
"valid_targets_min": 3329
|
|
},
|
|
{
|
|
"epoch": 4.59725792630677,
|
|
"grad_norm": 0.546669939195082,
|
|
"learning_rate": 1.2726907462227544e-05,
|
|
"loss": 0.1612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08334168046712875,
|
|
"step": 5365,
|
|
"valid_targets_mean": 5201.9,
|
|
"valid_targets_min": 3041
|
|
},
|
|
{
|
|
"epoch": 4.6015424164524426,
|
|
"grad_norm": 0.5135226747551913,
|
|
"learning_rate": 1.2687118599171327e-05,
|
|
"loss": 0.1612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07777928560972214,
|
|
"step": 5370,
|
|
"valid_targets_mean": 5755.5,
|
|
"valid_targets_min": 3512
|
|
},
|
|
{
|
|
"epoch": 4.605826906598114,
|
|
"grad_norm": 0.5242944087916945,
|
|
"learning_rate": 1.2647363118481717e-05,
|
|
"loss": 0.1445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06749745458364487,
|
|
"step": 5375,
|
|
"valid_targets_mean": 5622.8,
|
|
"valid_targets_min": 3932
|
|
},
|
|
{
|
|
"epoch": 4.610111396743788,
|
|
"grad_norm": 0.5290445805781622,
|
|
"learning_rate": 1.260764120163739e-05,
|
|
"loss": 0.1537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09278124570846558,
|
|
"step": 5380,
|
|
"valid_targets_mean": 5294.4,
|
|
"valid_targets_min": 1943
|
|
},
|
|
{
|
|
"epoch": 4.61439588688946,
|
|
"grad_norm": 0.4952292523993502,
|
|
"learning_rate": 1.25679530299638e-05,
|
|
"loss": 0.1425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07781682163476944,
|
|
"step": 5385,
|
|
"valid_targets_mean": 6209.1,
|
|
"valid_targets_min": 3518
|
|
},
|
|
{
|
|
"epoch": 4.618680377035133,
|
|
"grad_norm": 0.5090845076261995,
|
|
"learning_rate": 1.2528298784632375e-05,
|
|
"loss": 0.1447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08040069788694382,
|
|
"step": 5390,
|
|
"valid_targets_mean": 6575.0,
|
|
"valid_targets_min": 4930
|
|
},
|
|
{
|
|
"epoch": 4.622964867180805,
|
|
"grad_norm": 0.7138521723562766,
|
|
"learning_rate": 1.2488678646659658e-05,
|
|
"loss": 0.1625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07027311623096466,
|
|
"step": 5395,
|
|
"valid_targets_mean": 4191.6,
|
|
"valid_targets_min": 1864
|
|
},
|
|
{
|
|
"epoch": 4.627249357326479,
|
|
"grad_norm": 0.5234789422070203,
|
|
"learning_rate": 1.2449092796906497e-05,
|
|
"loss": 0.1411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06149337440729141,
|
|
"step": 5400,
|
|
"valid_targets_mean": 5699.6,
|
|
"valid_targets_min": 2825
|
|
},
|
|
{
|
|
"epoch": 4.63153384747215,
|
|
"grad_norm": 0.5453103495533085,
|
|
"learning_rate": 1.2409541416077238e-05,
|
|
"loss": 0.1692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0639188289642334,
|
|
"step": 5405,
|
|
"valid_targets_mean": 4543.4,
|
|
"valid_targets_min": 1354
|
|
},
|
|
{
|
|
"epoch": 4.635818337617824,
|
|
"grad_norm": 0.529250776293999,
|
|
"learning_rate": 1.237002468471886e-05,
|
|
"loss": 0.1693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06333880126476288,
|
|
"step": 5410,
|
|
"valid_targets_mean": 5286.9,
|
|
"valid_targets_min": 4132
|
|
},
|
|
{
|
|
"epoch": 4.640102827763496,
|
|
"grad_norm": 0.4824578549134014,
|
|
"learning_rate": 1.2330542783220177e-05,
|
|
"loss": 0.1518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06923186033964157,
|
|
"step": 5415,
|
|
"valid_targets_mean": 5960.9,
|
|
"valid_targets_min": 2901
|
|
},
|
|
{
|
|
"epoch": 4.644387317909169,
|
|
"grad_norm": 0.6147699408146292,
|
|
"learning_rate": 1.2291095891811013e-05,
|
|
"loss": 0.1477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0718100517988205,
|
|
"step": 5420,
|
|
"valid_targets_mean": 5304.1,
|
|
"valid_targets_min": 2365
|
|
},
|
|
{
|
|
"epoch": 4.648671808054841,
|
|
"grad_norm": 0.570110845047798,
|
|
"learning_rate": 1.2251684190561374e-05,
|
|
"loss": 0.1549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11174198985099792,
|
|
"step": 5425,
|
|
"valid_targets_mean": 4865.2,
|
|
"valid_targets_min": 1515
|
|
},
|
|
{
|
|
"epoch": 4.652956298200515,
|
|
"grad_norm": 0.5422990193863433,
|
|
"learning_rate": 1.2212307859380618e-05,
|
|
"loss": 0.1627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08089768141508102,
|
|
"step": 5430,
|
|
"valid_targets_mean": 5349.4,
|
|
"valid_targets_min": 1626
|
|
},
|
|
{
|
|
"epoch": 4.657240788346186,
|
|
"grad_norm": 0.5224672528642734,
|
|
"learning_rate": 1.2172967078016656e-05,
|
|
"loss": 0.155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07316748797893524,
|
|
"step": 5435,
|
|
"valid_targets_mean": 5330.9,
|
|
"valid_targets_min": 2271
|
|
},
|
|
{
|
|
"epoch": 4.66152527849186,
|
|
"grad_norm": 0.569852849423712,
|
|
"learning_rate": 1.2133662026055125e-05,
|
|
"loss": 0.1508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.070674367249012,
|
|
"step": 5440,
|
|
"valid_targets_mean": 4763.4,
|
|
"valid_targets_min": 2084
|
|
},
|
|
{
|
|
"epoch": 4.665809768637532,
|
|
"grad_norm": 0.5424889241152411,
|
|
"learning_rate": 1.209439288291854e-05,
|
|
"loss": 0.1599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09382478892803192,
|
|
"step": 5445,
|
|
"valid_targets_mean": 4803.8,
|
|
"valid_targets_min": 2923
|
|
},
|
|
{
|
|
"epoch": 4.670094258783205,
|
|
"grad_norm": 0.5472144017637648,
|
|
"learning_rate": 1.2055159827865505e-05,
|
|
"loss": 0.1722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0735054463148117,
|
|
"step": 5450,
|
|
"valid_targets_mean": 4434.1,
|
|
"valid_targets_min": 2278
|
|
},
|
|
{
|
|
"epoch": 4.674378748928877,
|
|
"grad_norm": 0.4995362136721301,
|
|
"learning_rate": 1.2015963039989905e-05,
|
|
"loss": 0.153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06715245544910431,
|
|
"step": 5455,
|
|
"valid_targets_mean": 4374.8,
|
|
"valid_targets_min": 2406
|
|
},
|
|
{
|
|
"epoch": 4.678663239074551,
|
|
"grad_norm": 0.5507431322821279,
|
|
"learning_rate": 1.1976802698220038e-05,
|
|
"loss": 0.1592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06111610680818558,
|
|
"step": 5460,
|
|
"valid_targets_mean": 4680.6,
|
|
"valid_targets_min": 2076
|
|
},
|
|
{
|
|
"epoch": 4.682947729220222,
|
|
"grad_norm": 0.5378007298408868,
|
|
"learning_rate": 1.193767898131785e-05,
|
|
"loss": 0.1659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07199619710445404,
|
|
"step": 5465,
|
|
"valid_targets_mean": 4682.8,
|
|
"valid_targets_min": 817
|
|
},
|
|
{
|
|
"epoch": 4.687232219365896,
|
|
"grad_norm": 0.5653851351887367,
|
|
"learning_rate": 1.1898592067878095e-05,
|
|
"loss": 0.1512,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06057225912809372,
|
|
"step": 5470,
|
|
"valid_targets_mean": 4277.6,
|
|
"valid_targets_min": 1210
|
|
},
|
|
{
|
|
"epoch": 4.691516709511568,
|
|
"grad_norm": 0.5696106129355583,
|
|
"learning_rate": 1.1859542136327536e-05,
|
|
"loss": 0.1685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08045990765094757,
|
|
"step": 5475,
|
|
"valid_targets_mean": 4922.5,
|
|
"valid_targets_min": 1710
|
|
},
|
|
{
|
|
"epoch": 4.69580119965724,
|
|
"grad_norm": 0.7438822419270039,
|
|
"learning_rate": 1.1820529364924097e-05,
|
|
"loss": 0.1524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06591324508190155,
|
|
"step": 5480,
|
|
"valid_targets_mean": 4385.4,
|
|
"valid_targets_min": 2395
|
|
},
|
|
{
|
|
"epoch": 4.700085689802913,
|
|
"grad_norm": 0.5508282840037704,
|
|
"learning_rate": 1.1781553931756085e-05,
|
|
"loss": 0.1603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08241961896419525,
|
|
"step": 5485,
|
|
"valid_targets_mean": 4717.6,
|
|
"valid_targets_min": 1312
|
|
},
|
|
{
|
|
"epoch": 4.704370179948586,
|
|
"grad_norm": 0.5323245501515965,
|
|
"learning_rate": 1.1742616014741357e-05,
|
|
"loss": 0.1371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08502338826656342,
|
|
"step": 5490,
|
|
"valid_targets_mean": 4517.1,
|
|
"valid_targets_min": 2815
|
|
},
|
|
{
|
|
"epoch": 4.708654670094258,
|
|
"grad_norm": 0.5426241679965548,
|
|
"learning_rate": 1.170371579162651e-05,
|
|
"loss": 0.1653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08141259849071503,
|
|
"step": 5495,
|
|
"valid_targets_mean": 4457.2,
|
|
"valid_targets_min": 1908
|
|
},
|
|
{
|
|
"epoch": 4.712939160239931,
|
|
"grad_norm": 0.5261494063308156,
|
|
"learning_rate": 1.1664853439986088e-05,
|
|
"loss": 0.1588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0825117975473404,
|
|
"step": 5500,
|
|
"valid_targets_mean": 5681.6,
|
|
"valid_targets_min": 3930
|
|
},
|
|
{
|
|
"epoch": 4.717223650385604,
|
|
"grad_norm": 0.5842224316874969,
|
|
"learning_rate": 1.1626029137221732e-05,
|
|
"loss": 0.1762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.089847132563591,
|
|
"step": 5505,
|
|
"valid_targets_mean": 4607.5,
|
|
"valid_targets_min": 2200
|
|
},
|
|
{
|
|
"epoch": 4.721508140531276,
|
|
"grad_norm": 0.5451479286311981,
|
|
"learning_rate": 1.158724306056143e-05,
|
|
"loss": 0.1531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07350163161754608,
|
|
"step": 5510,
|
|
"valid_targets_mean": 5046.6,
|
|
"valid_targets_min": 1281
|
|
},
|
|
{
|
|
"epoch": 4.725792630676949,
|
|
"grad_norm": 0.5225285581687482,
|
|
"learning_rate": 1.1548495387058642e-05,
|
|
"loss": 0.1663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08591011166572571,
|
|
"step": 5515,
|
|
"valid_targets_mean": 5717.2,
|
|
"valid_targets_min": 3322
|
|
},
|
|
{
|
|
"epoch": 4.730077120822622,
|
|
"grad_norm": 0.5141714932638097,
|
|
"learning_rate": 1.1509786293591524e-05,
|
|
"loss": 0.1564,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08459824323654175,
|
|
"step": 5520,
|
|
"valid_targets_mean": 5143.5,
|
|
"valid_targets_min": 1339
|
|
},
|
|
{
|
|
"epoch": 4.734361610968294,
|
|
"grad_norm": 0.5597302521486275,
|
|
"learning_rate": 1.1471115956862145e-05,
|
|
"loss": 0.1649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08023977279663086,
|
|
"step": 5525,
|
|
"valid_targets_mean": 4154.6,
|
|
"valid_targets_min": 1669
|
|
},
|
|
{
|
|
"epoch": 4.738646101113967,
|
|
"grad_norm": 0.538636287377202,
|
|
"learning_rate": 1.143248455339563e-05,
|
|
"loss": 0.1535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07752753794193268,
|
|
"step": 5530,
|
|
"valid_targets_mean": 4840.4,
|
|
"valid_targets_min": 1511
|
|
},
|
|
{
|
|
"epoch": 4.74293059125964,
|
|
"grad_norm": 0.5506968921961911,
|
|
"learning_rate": 1.1393892259539383e-05,
|
|
"loss": 0.1636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08784641325473785,
|
|
"step": 5535,
|
|
"valid_targets_mean": 4550.6,
|
|
"valid_targets_min": 1941
|
|
},
|
|
{
|
|
"epoch": 4.7472150814053125,
|
|
"grad_norm": 0.5163377608613207,
|
|
"learning_rate": 1.1355339251462274e-05,
|
|
"loss": 0.1472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06592030823230743,
|
|
"step": 5540,
|
|
"valid_targets_mean": 4323.5,
|
|
"valid_targets_min": 1705
|
|
},
|
|
{
|
|
"epoch": 4.751499571550985,
|
|
"grad_norm": 0.6296357758109399,
|
|
"learning_rate": 1.1316825705153865e-05,
|
|
"loss": 0.1525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09954133629798889,
|
|
"step": 5545,
|
|
"valid_targets_mean": 4299.6,
|
|
"valid_targets_min": 2768
|
|
},
|
|
{
|
|
"epoch": 4.755784061696658,
|
|
"grad_norm": 0.48267799557988644,
|
|
"learning_rate": 1.1278351796423545e-05,
|
|
"loss": 0.1508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05708755552768707,
|
|
"step": 5550,
|
|
"valid_targets_mean": 5362.1,
|
|
"valid_targets_min": 2246
|
|
},
|
|
{
|
|
"epoch": 4.7600685518423305,
|
|
"grad_norm": 0.5302101998703135,
|
|
"learning_rate": 1.1239917700899778e-05,
|
|
"loss": 0.1613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0904797613620758,
|
|
"step": 5555,
|
|
"valid_targets_mean": 5171.4,
|
|
"valid_targets_min": 3800
|
|
},
|
|
{
|
|
"epoch": 4.764353041988003,
|
|
"grad_norm": 0.5421815674524496,
|
|
"learning_rate": 1.1201523594029296e-05,
|
|
"loss": 0.1627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07278761267662048,
|
|
"step": 5560,
|
|
"valid_targets_mean": 6107.8,
|
|
"valid_targets_min": 2127
|
|
},
|
|
{
|
|
"epoch": 4.768637532133676,
|
|
"grad_norm": 0.5281193865224264,
|
|
"learning_rate": 1.1163169651076271e-05,
|
|
"loss": 0.1426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07599358260631561,
|
|
"step": 5565,
|
|
"valid_targets_mean": 5263.0,
|
|
"valid_targets_min": 2255
|
|
},
|
|
{
|
|
"epoch": 4.7729220222793485,
|
|
"grad_norm": 0.5672602574684631,
|
|
"learning_rate": 1.1124856047121538e-05,
|
|
"loss": 0.1663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08513578772544861,
|
|
"step": 5570,
|
|
"valid_targets_mean": 4949.6,
|
|
"valid_targets_min": 1346
|
|
},
|
|
{
|
|
"epoch": 4.777206512425021,
|
|
"grad_norm": 0.5546920567993101,
|
|
"learning_rate": 1.108658295706178e-05,
|
|
"loss": 0.1432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07476161420345306,
|
|
"step": 5575,
|
|
"valid_targets_mean": 4970.9,
|
|
"valid_targets_min": 1518
|
|
},
|
|
{
|
|
"epoch": 4.781491002570694,
|
|
"grad_norm": 0.5495756311638148,
|
|
"learning_rate": 1.104835055560877e-05,
|
|
"loss": 0.159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11116725951433182,
|
|
"step": 5580,
|
|
"valid_targets_mean": 5278.9,
|
|
"valid_targets_min": 3203
|
|
},
|
|
{
|
|
"epoch": 4.7857754927163665,
|
|
"grad_norm": 0.531572898829633,
|
|
"learning_rate": 1.1010159017288503e-05,
|
|
"loss": 0.1596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06708759069442749,
|
|
"step": 5585,
|
|
"valid_targets_mean": 5266.9,
|
|
"valid_targets_min": 2536
|
|
},
|
|
{
|
|
"epoch": 4.790059982862039,
|
|
"grad_norm": 0.6385122856137114,
|
|
"learning_rate": 1.0972008516440475e-05,
|
|
"loss": 0.1526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08954238146543503,
|
|
"step": 5590,
|
|
"valid_targets_mean": 4359.5,
|
|
"valid_targets_min": 1431
|
|
},
|
|
{
|
|
"epoch": 4.794344473007712,
|
|
"grad_norm": 0.5186245977784936,
|
|
"learning_rate": 1.0933899227216825e-05,
|
|
"loss": 0.1394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0712139829993248,
|
|
"step": 5595,
|
|
"valid_targets_mean": 5009.5,
|
|
"valid_targets_min": 1381
|
|
},
|
|
{
|
|
"epoch": 4.7986289631533845,
|
|
"grad_norm": 0.5319356056915755,
|
|
"learning_rate": 1.0895831323581576e-05,
|
|
"loss": 0.1478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07693114131689072,
|
|
"step": 5600,
|
|
"valid_targets_mean": 4645.4,
|
|
"valid_targets_min": 2833
|
|
},
|
|
{
|
|
"epoch": 4.802913453299057,
|
|
"grad_norm": 0.5212979617586241,
|
|
"learning_rate": 1.085780497930983e-05,
|
|
"loss": 0.1491,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07081075012683868,
|
|
"step": 5605,
|
|
"valid_targets_mean": 5706.9,
|
|
"valid_targets_min": 2853
|
|
},
|
|
{
|
|
"epoch": 4.80719794344473,
|
|
"grad_norm": 0.535613814118286,
|
|
"learning_rate": 1.0819820367986971e-05,
|
|
"loss": 0.1474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.054836373776197433,
|
|
"step": 5610,
|
|
"valid_targets_mean": 4400.9,
|
|
"valid_targets_min": 1582
|
|
},
|
|
{
|
|
"epoch": 4.8114824335904025,
|
|
"grad_norm": 0.524310423958414,
|
|
"learning_rate": 1.0781877663007894e-05,
|
|
"loss": 0.1562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08845508098602295,
|
|
"step": 5615,
|
|
"valid_targets_mean": 5411.8,
|
|
"valid_targets_min": 2292
|
|
},
|
|
{
|
|
"epoch": 4.815766923736075,
|
|
"grad_norm": 0.49503651101709395,
|
|
"learning_rate": 1.0743977037576175e-05,
|
|
"loss": 0.1365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06133238226175308,
|
|
"step": 5620,
|
|
"valid_targets_mean": 6299.9,
|
|
"valid_targets_min": 1401
|
|
},
|
|
{
|
|
"epoch": 4.820051413881748,
|
|
"grad_norm": 0.5260704382165095,
|
|
"learning_rate": 1.0706118664703325e-05,
|
|
"loss": 0.1531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07649976760149002,
|
|
"step": 5625,
|
|
"valid_targets_mean": 5099.4,
|
|
"valid_targets_min": 1739
|
|
},
|
|
{
|
|
"epoch": 4.8243359040274205,
|
|
"grad_norm": 0.5417701735704517,
|
|
"learning_rate": 1.0668302717207956e-05,
|
|
"loss": 0.1538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07010504603385925,
|
|
"step": 5630,
|
|
"valid_targets_mean": 4464.9,
|
|
"valid_targets_min": 1685
|
|
},
|
|
{
|
|
"epoch": 4.828620394173093,
|
|
"grad_norm": 0.5671176679190546,
|
|
"learning_rate": 1.063052936771503e-05,
|
|
"loss": 0.1569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06441761553287506,
|
|
"step": 5635,
|
|
"valid_targets_mean": 5495.6,
|
|
"valid_targets_min": 3054
|
|
},
|
|
{
|
|
"epoch": 4.832904884318766,
|
|
"grad_norm": 0.581886558811821,
|
|
"learning_rate": 1.0592798788655041e-05,
|
|
"loss": 0.1534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09613919258117676,
|
|
"step": 5640,
|
|
"valid_targets_mean": 5075.4,
|
|
"valid_targets_min": 2716
|
|
},
|
|
{
|
|
"epoch": 4.8371893744644385,
|
|
"grad_norm": 0.7008507535275961,
|
|
"learning_rate": 1.0555111152263242e-05,
|
|
"loss": 0.1493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1073591411113739,
|
|
"step": 5645,
|
|
"valid_targets_mean": 6467.6,
|
|
"valid_targets_min": 2480
|
|
},
|
|
{
|
|
"epoch": 4.841473864610111,
|
|
"grad_norm": 0.5070119707443743,
|
|
"learning_rate": 1.0517466630578874e-05,
|
|
"loss": 0.1421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.054898276925086975,
|
|
"step": 5650,
|
|
"valid_targets_mean": 5255.6,
|
|
"valid_targets_min": 1531
|
|
},
|
|
{
|
|
"epoch": 4.845758354755784,
|
|
"grad_norm": 0.5516486979699894,
|
|
"learning_rate": 1.0479865395444362e-05,
|
|
"loss": 0.1486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.059809666126966476,
|
|
"step": 5655,
|
|
"valid_targets_mean": 5287.0,
|
|
"valid_targets_min": 1630
|
|
},
|
|
{
|
|
"epoch": 4.8500428449014565,
|
|
"grad_norm": 0.5398004607320777,
|
|
"learning_rate": 1.044230761850452e-05,
|
|
"loss": 0.1455,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08338692039251328,
|
|
"step": 5660,
|
|
"valid_targets_mean": 6707.8,
|
|
"valid_targets_min": 3387
|
|
},
|
|
{
|
|
"epoch": 4.854327335047129,
|
|
"grad_norm": 0.5668342950363051,
|
|
"learning_rate": 1.040479347120578e-05,
|
|
"loss": 0.1517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07433384656906128,
|
|
"step": 5665,
|
|
"valid_targets_mean": 4323.1,
|
|
"valid_targets_min": 1718
|
|
},
|
|
{
|
|
"epoch": 4.858611825192802,
|
|
"grad_norm": 0.6213106739886147,
|
|
"learning_rate": 1.036732312479543e-05,
|
|
"loss": 0.1639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09529195725917816,
|
|
"step": 5670,
|
|
"valid_targets_mean": 5139.4,
|
|
"valid_targets_min": 1208
|
|
},
|
|
{
|
|
"epoch": 4.8628963153384746,
|
|
"grad_norm": 0.5177404281691247,
|
|
"learning_rate": 1.0329896750320789e-05,
|
|
"loss": 0.1436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0704999715089798,
|
|
"step": 5675,
|
|
"valid_targets_mean": 5343.9,
|
|
"valid_targets_min": 2279
|
|
},
|
|
{
|
|
"epoch": 4.867180805484147,
|
|
"grad_norm": 0.520797862309505,
|
|
"learning_rate": 1.0292514518628462e-05,
|
|
"loss": 0.14,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05940914899110794,
|
|
"step": 5680,
|
|
"valid_targets_mean": 5120.5,
|
|
"valid_targets_min": 3199
|
|
},
|
|
{
|
|
"epoch": 4.87146529562982,
|
|
"grad_norm": 0.5458396359053364,
|
|
"learning_rate": 1.025517660036355e-05,
|
|
"loss": 0.1525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06686630845069885,
|
|
"step": 5685,
|
|
"valid_targets_mean": 5368.1,
|
|
"valid_targets_min": 1607
|
|
},
|
|
{
|
|
"epoch": 4.875749785775493,
|
|
"grad_norm": 0.5143392154065023,
|
|
"learning_rate": 1.0217883165968873e-05,
|
|
"loss": 0.1663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07533658295869827,
|
|
"step": 5690,
|
|
"valid_targets_mean": 5656.9,
|
|
"valid_targets_min": 4122
|
|
},
|
|
{
|
|
"epoch": 4.880034275921165,
|
|
"grad_norm": 0.5466425536619203,
|
|
"learning_rate": 1.0180634385684179e-05,
|
|
"loss": 0.1572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08453083038330078,
|
|
"step": 5695,
|
|
"valid_targets_mean": 5683.0,
|
|
"valid_targets_min": 2789
|
|
},
|
|
{
|
|
"epoch": 4.884318766066838,
|
|
"grad_norm": 0.5423879147218846,
|
|
"learning_rate": 1.0143430429545367e-05,
|
|
"loss": 0.1628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07795500010251999,
|
|
"step": 5700,
|
|
"valid_targets_mean": 5769.6,
|
|
"valid_targets_min": 4791
|
|
},
|
|
{
|
|
"epoch": 4.888603256212511,
|
|
"grad_norm": 0.5341530692778058,
|
|
"learning_rate": 1.010627146738374e-05,
|
|
"loss": 0.1426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0644720196723938,
|
|
"step": 5705,
|
|
"valid_targets_mean": 5012.2,
|
|
"valid_targets_min": 2150
|
|
},
|
|
{
|
|
"epoch": 4.892887746358183,
|
|
"grad_norm": 0.5737432099647293,
|
|
"learning_rate": 1.0069157668825185e-05,
|
|
"loss": 0.1596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11156918108463287,
|
|
"step": 5710,
|
|
"valid_targets_mean": 4350.5,
|
|
"valid_targets_min": 2085
|
|
},
|
|
{
|
|
"epoch": 4.897172236503856,
|
|
"grad_norm": 0.526678452776669,
|
|
"learning_rate": 1.0032089203289446e-05,
|
|
"loss": 0.1453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06818336993455887,
|
|
"step": 5715,
|
|
"valid_targets_mean": 4246.8,
|
|
"valid_targets_min": 1373
|
|
},
|
|
{
|
|
"epoch": 4.901456726649529,
|
|
"grad_norm": 0.47464083861660505,
|
|
"learning_rate": 9.99506623998933e-06,
|
|
"loss": 0.1511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07242852449417114,
|
|
"step": 5720,
|
|
"valid_targets_mean": 6936.0,
|
|
"valid_targets_min": 4491
|
|
},
|
|
{
|
|
"epoch": 4.905741216795201,
|
|
"grad_norm": 0.6043407418923966,
|
|
"learning_rate": 9.958088947929909e-06,
|
|
"loss": 0.1579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10490664094686508,
|
|
"step": 5725,
|
|
"valid_targets_mean": 4582.8,
|
|
"valid_targets_min": 1712
|
|
},
|
|
{
|
|
"epoch": 4.910025706940874,
|
|
"grad_norm": 0.5525595588437875,
|
|
"learning_rate": 9.921157495907785e-06,
|
|
"loss": 0.1646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09463279694318771,
|
|
"step": 5730,
|
|
"valid_targets_mean": 5260.6,
|
|
"valid_targets_min": 1353
|
|
},
|
|
{
|
|
"epoch": 4.914310197086547,
|
|
"grad_norm": 0.5188449026265485,
|
|
"learning_rate": 9.884272052510306e-06,
|
|
"loss": 0.1367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08734147250652313,
|
|
"step": 5735,
|
|
"valid_targets_mean": 5719.8,
|
|
"valid_targets_min": 3734
|
|
},
|
|
{
|
|
"epoch": 4.918594687232219,
|
|
"grad_norm": 0.550677240559499,
|
|
"learning_rate": 9.847432786114793e-06,
|
|
"loss": 0.1716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07683046162128448,
|
|
"step": 5740,
|
|
"valid_targets_mean": 5706.1,
|
|
"valid_targets_min": 3867
|
|
},
|
|
{
|
|
"epoch": 4.922879177377892,
|
|
"grad_norm": 0.544349033724898,
|
|
"learning_rate": 9.810639864887767e-06,
|
|
"loss": 0.1518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07485879212617874,
|
|
"step": 5745,
|
|
"valid_targets_mean": 5523.6,
|
|
"valid_targets_min": 1666
|
|
},
|
|
{
|
|
"epoch": 4.927163667523565,
|
|
"grad_norm": 0.5541874503249078,
|
|
"learning_rate": 9.773893456784212e-06,
|
|
"loss": 0.1542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08049365133047104,
|
|
"step": 5750,
|
|
"valid_targets_mean": 4984.2,
|
|
"valid_targets_min": 1629
|
|
},
|
|
{
|
|
"epoch": 4.931448157669237,
|
|
"grad_norm": 0.49873889321961007,
|
|
"learning_rate": 9.737193729546775e-06,
|
|
"loss": 0.1455,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07165542244911194,
|
|
"step": 5755,
|
|
"valid_targets_mean": 5716.2,
|
|
"valid_targets_min": 2894
|
|
},
|
|
{
|
|
"epoch": 4.93573264781491,
|
|
"grad_norm": 0.5310506565288947,
|
|
"learning_rate": 9.700540850705007e-06,
|
|
"loss": 0.1597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08118843287229538,
|
|
"step": 5760,
|
|
"valid_targets_mean": 4976.8,
|
|
"valid_targets_min": 2014
|
|
},
|
|
{
|
|
"epoch": 4.940017137960583,
|
|
"grad_norm": 0.581273266255407,
|
|
"learning_rate": 9.663934987574597e-06,
|
|
"loss": 0.1534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08815907686948776,
|
|
"step": 5765,
|
|
"valid_targets_mean": 5708.8,
|
|
"valid_targets_min": 2736
|
|
},
|
|
{
|
|
"epoch": 4.944301628106255,
|
|
"grad_norm": 0.4753411184941003,
|
|
"learning_rate": 9.627376307256626e-06,
|
|
"loss": 0.1398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07187001407146454,
|
|
"step": 5770,
|
|
"valid_targets_mean": 4870.6,
|
|
"valid_targets_min": 2266
|
|
},
|
|
{
|
|
"epoch": 4.948586118251928,
|
|
"grad_norm": 0.5255200905440895,
|
|
"learning_rate": 9.590864976636774e-06,
|
|
"loss": 0.1417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07110698521137238,
|
|
"step": 5775,
|
|
"valid_targets_mean": 4578.6,
|
|
"valid_targets_min": 1793
|
|
},
|
|
{
|
|
"epoch": 4.952870608397601,
|
|
"grad_norm": 0.5223872032353971,
|
|
"learning_rate": 9.554401162384604e-06,
|
|
"loss": 0.149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06410413980484009,
|
|
"step": 5780,
|
|
"valid_targets_mean": 4336.6,
|
|
"valid_targets_min": 2024
|
|
},
|
|
{
|
|
"epoch": 4.957155098543273,
|
|
"grad_norm": 0.546798377896757,
|
|
"learning_rate": 9.517985030952739e-06,
|
|
"loss": 0.1658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0852179229259491,
|
|
"step": 5785,
|
|
"valid_targets_mean": 5628.1,
|
|
"valid_targets_min": 2023
|
|
},
|
|
{
|
|
"epoch": 4.961439588688946,
|
|
"grad_norm": 0.5161720732576406,
|
|
"learning_rate": 9.481616748576171e-06,
|
|
"loss": 0.1378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06516799330711365,
|
|
"step": 5790,
|
|
"valid_targets_mean": 4302.5,
|
|
"valid_targets_min": 1825
|
|
},
|
|
{
|
|
"epoch": 4.965724078834619,
|
|
"grad_norm": 0.565400152380402,
|
|
"learning_rate": 9.44529648127144e-06,
|
|
"loss": 0.155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07869059592485428,
|
|
"step": 5795,
|
|
"valid_targets_mean": 3989.9,
|
|
"valid_targets_min": 699
|
|
},
|
|
{
|
|
"epoch": 4.970008568980291,
|
|
"grad_norm": 0.48670358415908505,
|
|
"learning_rate": 9.409024394835912e-06,
|
|
"loss": 0.1601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06296761333942413,
|
|
"step": 5800,
|
|
"valid_targets_mean": 4779.6,
|
|
"valid_targets_min": 1387
|
|
},
|
|
{
|
|
"epoch": 4.974293059125964,
|
|
"grad_norm": 0.5317639102339133,
|
|
"learning_rate": 9.372800654847015e-06,
|
|
"loss": 0.1648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05721823498606682,
|
|
"step": 5805,
|
|
"valid_targets_mean": 4486.0,
|
|
"valid_targets_min": 1897
|
|
},
|
|
{
|
|
"epoch": 4.978577549271637,
|
|
"grad_norm": 0.5874589278661156,
|
|
"learning_rate": 9.336625426661475e-06,
|
|
"loss": 0.1674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07887572050094604,
|
|
"step": 5810,
|
|
"valid_targets_mean": 4364.9,
|
|
"valid_targets_min": 1879
|
|
},
|
|
{
|
|
"epoch": 4.982862039417309,
|
|
"grad_norm": 0.5575739274103504,
|
|
"learning_rate": 9.300498875414583e-06,
|
|
"loss": 0.1623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08425137400627136,
|
|
"step": 5815,
|
|
"valid_targets_mean": 4901.0,
|
|
"valid_targets_min": 1233
|
|
},
|
|
{
|
|
"epoch": 4.987146529562982,
|
|
"grad_norm": 0.47598116677152114,
|
|
"learning_rate": 9.264421166019408e-06,
|
|
"loss": 0.1441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.052196092903614044,
|
|
"step": 5820,
|
|
"valid_targets_mean": 4768.5,
|
|
"valid_targets_min": 2056
|
|
},
|
|
{
|
|
"epoch": 4.991431019708655,
|
|
"grad_norm": 0.5278491426313316,
|
|
"learning_rate": 9.228392463166082e-06,
|
|
"loss": 0.1573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0845300629734993,
|
|
"step": 5825,
|
|
"valid_targets_mean": 7003.1,
|
|
"valid_targets_min": 3785
|
|
},
|
|
{
|
|
"epoch": 4.995715509854327,
|
|
"grad_norm": 0.575656419690066,
|
|
"learning_rate": 9.192412931321011e-06,
|
|
"loss": 0.1524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09089523553848267,
|
|
"step": 5830,
|
|
"valid_targets_mean": 4330.6,
|
|
"valid_targets_min": 1567
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.536566562014675,
|
|
"learning_rate": 9.156482734726147e-06,
|
|
"loss": 0.173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07456126809120178,
|
|
"step": 5835,
|
|
"valid_targets_mean": 5001.2,
|
|
"valid_targets_min": 2780
|
|
},
|
|
{
|
|
"epoch": 5.004284490145673,
|
|
"grad_norm": 0.461679823419595,
|
|
"learning_rate": 9.120602037398227e-06,
|
|
"loss": 0.1447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05347277969121933,
|
|
"step": 5840,
|
|
"valid_targets_mean": 4582.1,
|
|
"valid_targets_min": 2106
|
|
},
|
|
{
|
|
"epoch": 5.008568980291345,
|
|
"grad_norm": 0.5615618946463897,
|
|
"learning_rate": 9.084771003128053e-06,
|
|
"loss": 0.1404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09778857976198196,
|
|
"step": 5845,
|
|
"valid_targets_mean": 5213.6,
|
|
"valid_targets_min": 2510
|
|
},
|
|
{
|
|
"epoch": 5.012853470437018,
|
|
"grad_norm": 0.5224785906609419,
|
|
"learning_rate": 9.048989795479697e-06,
|
|
"loss": 0.1427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06234997138381004,
|
|
"step": 5850,
|
|
"valid_targets_mean": 5434.2,
|
|
"valid_targets_min": 3402
|
|
},
|
|
{
|
|
"epoch": 5.017137960582691,
|
|
"grad_norm": 0.558131422882269,
|
|
"learning_rate": 9.013258577789778e-06,
|
|
"loss": 0.1369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06631478667259216,
|
|
"step": 5855,
|
|
"valid_targets_mean": 5814.6,
|
|
"valid_targets_min": 887
|
|
},
|
|
{
|
|
"epoch": 5.021422450728363,
|
|
"grad_norm": 0.5007153060775194,
|
|
"learning_rate": 8.977577513166745e-06,
|
|
"loss": 0.1349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05998099595308304,
|
|
"step": 5860,
|
|
"valid_targets_mean": 5688.1,
|
|
"valid_targets_min": 1945
|
|
},
|
|
{
|
|
"epoch": 5.025706940874036,
|
|
"grad_norm": 0.5457382272762462,
|
|
"learning_rate": 8.941946764490075e-06,
|
|
"loss": 0.1495,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05917572230100632,
|
|
"step": 5865,
|
|
"valid_targets_mean": 3900.0,
|
|
"valid_targets_min": 863
|
|
},
|
|
{
|
|
"epoch": 5.029991431019709,
|
|
"grad_norm": 0.5654326331634673,
|
|
"learning_rate": 8.906366494409572e-06,
|
|
"loss": 0.1332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0636155903339386,
|
|
"step": 5870,
|
|
"valid_targets_mean": 3952.5,
|
|
"valid_targets_min": 1962
|
|
},
|
|
{
|
|
"epoch": 5.034275921165381,
|
|
"grad_norm": 0.5558719783166709,
|
|
"learning_rate": 8.870836865344605e-06,
|
|
"loss": 0.1436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0862559899687767,
|
|
"step": 5875,
|
|
"valid_targets_mean": 6003.8,
|
|
"valid_targets_min": 2878
|
|
},
|
|
{
|
|
"epoch": 5.038560411311054,
|
|
"grad_norm": 0.5931457187334572,
|
|
"learning_rate": 8.83535803948339e-06,
|
|
"loss": 0.1451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06579413264989853,
|
|
"step": 5880,
|
|
"valid_targets_mean": 3968.0,
|
|
"valid_targets_min": 1049
|
|
},
|
|
{
|
|
"epoch": 5.042844901456727,
|
|
"grad_norm": 0.4926081340892514,
|
|
"learning_rate": 8.799930178782217e-06,
|
|
"loss": 0.1487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.048622652888298035,
|
|
"step": 5885,
|
|
"valid_targets_mean": 5776.8,
|
|
"valid_targets_min": 3088
|
|
},
|
|
{
|
|
"epoch": 5.047129391602399,
|
|
"grad_norm": 0.5043569545461065,
|
|
"learning_rate": 8.764553444964727e-06,
|
|
"loss": 0.1385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06690607964992523,
|
|
"step": 5890,
|
|
"valid_targets_mean": 5154.6,
|
|
"valid_targets_min": 1254
|
|
},
|
|
{
|
|
"epoch": 5.051413881748072,
|
|
"grad_norm": 0.570691437471372,
|
|
"learning_rate": 8.72922799952117e-06,
|
|
"loss": 0.1485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0650261640548706,
|
|
"step": 5895,
|
|
"valid_targets_mean": 5009.9,
|
|
"valid_targets_min": 2690
|
|
},
|
|
{
|
|
"epoch": 5.055698371893745,
|
|
"grad_norm": 0.544574579251449,
|
|
"learning_rate": 8.693954003707696e-06,
|
|
"loss": 0.1391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07156353443861008,
|
|
"step": 5900,
|
|
"valid_targets_mean": 5317.2,
|
|
"valid_targets_min": 2609
|
|
},
|
|
{
|
|
"epoch": 5.059982862039417,
|
|
"grad_norm": 0.5129014740792754,
|
|
"learning_rate": 8.658731618545568e-06,
|
|
"loss": 0.1388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06884966790676117,
|
|
"step": 5905,
|
|
"valid_targets_mean": 5266.9,
|
|
"valid_targets_min": 2015
|
|
},
|
|
{
|
|
"epoch": 5.06426735218509,
|
|
"grad_norm": 0.557367027998699,
|
|
"learning_rate": 8.623561004820453e-06,
|
|
"loss": 0.1331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06522762775421143,
|
|
"step": 5910,
|
|
"valid_targets_mean": 4625.5,
|
|
"valid_targets_min": 1500
|
|
},
|
|
{
|
|
"epoch": 5.068551842330763,
|
|
"grad_norm": 0.593029732278665,
|
|
"learning_rate": 8.58844232308171e-06,
|
|
"loss": 0.1381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07211321592330933,
|
|
"step": 5915,
|
|
"valid_targets_mean": 5161.1,
|
|
"valid_targets_min": 2515
|
|
},
|
|
{
|
|
"epoch": 5.072836332476435,
|
|
"grad_norm": 0.565911279068255,
|
|
"learning_rate": 8.553375733641617e-06,
|
|
"loss": 0.1343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0606425404548645,
|
|
"step": 5920,
|
|
"valid_targets_mean": 4933.0,
|
|
"valid_targets_min": 2762
|
|
},
|
|
{
|
|
"epoch": 5.077120822622108,
|
|
"grad_norm": 0.5706569662387289,
|
|
"learning_rate": 8.518361396574659e-06,
|
|
"loss": 0.1457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08236978203058243,
|
|
"step": 5925,
|
|
"valid_targets_mean": 4947.5,
|
|
"valid_targets_min": 2784
|
|
},
|
|
{
|
|
"epoch": 5.081405312767781,
|
|
"grad_norm": 0.5089594026928188,
|
|
"learning_rate": 8.483399471716793e-06,
|
|
"loss": 0.124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05879615992307663,
|
|
"step": 5930,
|
|
"valid_targets_mean": 6075.5,
|
|
"valid_targets_min": 2211
|
|
},
|
|
{
|
|
"epoch": 5.085689802913453,
|
|
"grad_norm": 0.5281032014270479,
|
|
"learning_rate": 8.44849011866474e-06,
|
|
"loss": 0.1338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0665479302406311,
|
|
"step": 5935,
|
|
"valid_targets_mean": 6032.6,
|
|
"valid_targets_min": 1734
|
|
},
|
|
{
|
|
"epoch": 5.089974293059126,
|
|
"grad_norm": 0.5773129793427175,
|
|
"learning_rate": 8.413633496775209e-06,
|
|
"loss": 0.1388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07037808746099472,
|
|
"step": 5940,
|
|
"valid_targets_mean": 4567.1,
|
|
"valid_targets_min": 1981
|
|
},
|
|
{
|
|
"epoch": 5.094258783204799,
|
|
"grad_norm": 0.545253432414127,
|
|
"learning_rate": 8.378829765164227e-06,
|
|
"loss": 0.143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06493847072124481,
|
|
"step": 5945,
|
|
"valid_targets_mean": 5403.6,
|
|
"valid_targets_min": 1040
|
|
},
|
|
{
|
|
"epoch": 5.098543273350471,
|
|
"grad_norm": 0.5843283397967569,
|
|
"learning_rate": 8.344079082706365e-06,
|
|
"loss": 0.1409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06504341959953308,
|
|
"step": 5950,
|
|
"valid_targets_mean": 5195.8,
|
|
"valid_targets_min": 3061
|
|
},
|
|
{
|
|
"epoch": 5.102827763496144,
|
|
"grad_norm": 0.746729007527675,
|
|
"learning_rate": 8.309381608034031e-06,
|
|
"loss": 0.1483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07642590254545212,
|
|
"step": 5955,
|
|
"valid_targets_mean": 5460.0,
|
|
"valid_targets_min": 3577
|
|
},
|
|
{
|
|
"epoch": 5.107112253641817,
|
|
"grad_norm": 0.5842968919515109,
|
|
"learning_rate": 8.274737499536756e-06,
|
|
"loss": 0.1505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0753287672996521,
|
|
"step": 5960,
|
|
"valid_targets_mean": 4521.8,
|
|
"valid_targets_min": 952
|
|
},
|
|
{
|
|
"epoch": 5.111396743787489,
|
|
"grad_norm": 0.5470619824706553,
|
|
"learning_rate": 8.240146915360446e-06,
|
|
"loss": 0.1347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0719161406159401,
|
|
"step": 5965,
|
|
"valid_targets_mean": 6245.6,
|
|
"valid_targets_min": 4221
|
|
},
|
|
{
|
|
"epoch": 5.115681233933162,
|
|
"grad_norm": 0.5541965469691894,
|
|
"learning_rate": 8.205610013406703e-06,
|
|
"loss": 0.1399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07677328586578369,
|
|
"step": 5970,
|
|
"valid_targets_mean": 4794.0,
|
|
"valid_targets_min": 3034
|
|
},
|
|
{
|
|
"epoch": 5.119965724078835,
|
|
"grad_norm": 0.5665391192317706,
|
|
"learning_rate": 8.17112695133204e-06,
|
|
"loss": 0.1471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06618514657020569,
|
|
"step": 5975,
|
|
"valid_targets_mean": 4742.2,
|
|
"valid_targets_min": 2004
|
|
},
|
|
{
|
|
"epoch": 5.124250214224507,
|
|
"grad_norm": 0.5762731737652794,
|
|
"learning_rate": 8.136697886547235e-06,
|
|
"loss": 0.1372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07710222899913788,
|
|
"step": 5980,
|
|
"valid_targets_mean": 5326.2,
|
|
"valid_targets_min": 2850
|
|
},
|
|
{
|
|
"epoch": 5.12853470437018,
|
|
"grad_norm": 0.6488573742379213,
|
|
"learning_rate": 8.10232297621655e-06,
|
|
"loss": 0.1407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06464515626430511,
|
|
"step": 5985,
|
|
"valid_targets_mean": 6132.5,
|
|
"valid_targets_min": 1318
|
|
},
|
|
{
|
|
"epoch": 5.132819194515853,
|
|
"grad_norm": 0.5668846483094866,
|
|
"learning_rate": 8.068002377257047e-06,
|
|
"loss": 0.1411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07700810581445694,
|
|
"step": 5990,
|
|
"valid_targets_mean": 5266.4,
|
|
"valid_targets_min": 2162
|
|
},
|
|
{
|
|
"epoch": 5.1371036846615254,
|
|
"grad_norm": 0.6119562445788617,
|
|
"learning_rate": 8.033736246337861e-06,
|
|
"loss": 0.1367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08774948120117188,
|
|
"step": 5995,
|
|
"valid_targets_mean": 4714.4,
|
|
"valid_targets_min": 2076
|
|
},
|
|
{
|
|
"epoch": 5.141388174807198,
|
|
"grad_norm": 0.5803264364747844,
|
|
"learning_rate": 7.999524739879486e-06,
|
|
"loss": 0.1399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07601134479045868,
|
|
"step": 6000,
|
|
"valid_targets_mean": 4622.2,
|
|
"valid_targets_min": 2434
|
|
},
|
|
{
|
|
"epoch": 5.145672664952871,
|
|
"grad_norm": 0.5669247714040354,
|
|
"learning_rate": 7.965368014053065e-06,
|
|
"loss": 0.1371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0730358436703682,
|
|
"step": 6005,
|
|
"valid_targets_mean": 4821.9,
|
|
"valid_targets_min": 1683
|
|
},
|
|
{
|
|
"epoch": 5.1499571550985435,
|
|
"grad_norm": 0.6008925220384994,
|
|
"learning_rate": 7.931266224779688e-06,
|
|
"loss": 0.1406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06482844054698944,
|
|
"step": 6010,
|
|
"valid_targets_mean": 4140.9,
|
|
"valid_targets_min": 1302
|
|
},
|
|
{
|
|
"epoch": 5.154241645244216,
|
|
"grad_norm": 0.6067676646670379,
|
|
"learning_rate": 7.897219527729647e-06,
|
|
"loss": 0.1433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.060043372213840485,
|
|
"step": 6015,
|
|
"valid_targets_mean": 3742.0,
|
|
"valid_targets_min": 1625
|
|
},
|
|
{
|
|
"epoch": 5.158526135389889,
|
|
"grad_norm": 0.5562801412629258,
|
|
"learning_rate": 7.863228078321748e-06,
|
|
"loss": 0.1457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08964170515537262,
|
|
"step": 6020,
|
|
"valid_targets_mean": 4821.4,
|
|
"valid_targets_min": 3237
|
|
},
|
|
{
|
|
"epoch": 5.1628106255355615,
|
|
"grad_norm": 0.5677679608679317,
|
|
"learning_rate": 7.829292031722605e-06,
|
|
"loss": 0.1382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07650621235370636,
|
|
"step": 6025,
|
|
"valid_targets_mean": 4806.0,
|
|
"valid_targets_min": 1482
|
|
},
|
|
{
|
|
"epoch": 5.167095115681234,
|
|
"grad_norm": 0.6154466258539943,
|
|
"learning_rate": 7.795411542845918e-06,
|
|
"loss": 0.1417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07820950448513031,
|
|
"step": 6030,
|
|
"valid_targets_mean": 4463.4,
|
|
"valid_targets_min": 2484
|
|
},
|
|
{
|
|
"epoch": 5.171379605826907,
|
|
"grad_norm": 0.534700907388558,
|
|
"learning_rate": 7.761586766351779e-06,
|
|
"loss": 0.1396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06325490772724152,
|
|
"step": 6035,
|
|
"valid_targets_mean": 5119.2,
|
|
"valid_targets_min": 2836
|
|
},
|
|
{
|
|
"epoch": 5.1756640959725795,
|
|
"grad_norm": 0.6171116306561318,
|
|
"learning_rate": 7.727817856645959e-06,
|
|
"loss": 0.1387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0964316874742508,
|
|
"step": 6040,
|
|
"valid_targets_mean": 4525.8,
|
|
"valid_targets_min": 1179
|
|
},
|
|
{
|
|
"epoch": 5.179948586118252,
|
|
"grad_norm": 0.49396649995838965,
|
|
"learning_rate": 7.694104967879213e-06,
|
|
"loss": 0.1451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0832238420844078,
|
|
"step": 6045,
|
|
"valid_targets_mean": 6017.9,
|
|
"valid_targets_min": 2326
|
|
},
|
|
{
|
|
"epoch": 5.184233076263925,
|
|
"grad_norm": 0.5414578284712983,
|
|
"learning_rate": 7.66044825394655e-06,
|
|
"loss": 0.1596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09561492502689362,
|
|
"step": 6050,
|
|
"valid_targets_mean": 5810.4,
|
|
"valid_targets_min": 3746
|
|
},
|
|
{
|
|
"epoch": 5.1885175664095975,
|
|
"grad_norm": 0.600524577834584,
|
|
"learning_rate": 7.62684786848656e-06,
|
|
"loss": 0.1352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05960875004529953,
|
|
"step": 6055,
|
|
"valid_targets_mean": 4689.1,
|
|
"valid_targets_min": 1267
|
|
},
|
|
{
|
|
"epoch": 5.19280205655527,
|
|
"grad_norm": 0.5979465438640408,
|
|
"learning_rate": 7.593303964880696e-06,
|
|
"loss": 0.1366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05936659500002861,
|
|
"step": 6060,
|
|
"valid_targets_mean": 4414.9,
|
|
"valid_targets_min": 1569
|
|
},
|
|
{
|
|
"epoch": 5.197086546700943,
|
|
"grad_norm": 0.5581662135102559,
|
|
"learning_rate": 7.559816696252573e-06,
|
|
"loss": 0.1699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06237490475177765,
|
|
"step": 6065,
|
|
"valid_targets_mean": 5152.8,
|
|
"valid_targets_min": 1741
|
|
},
|
|
{
|
|
"epoch": 5.2013710368466155,
|
|
"grad_norm": 0.614944712657222,
|
|
"learning_rate": 7.526386215467287e-06,
|
|
"loss": 0.1273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05573698878288269,
|
|
"step": 6070,
|
|
"valid_targets_mean": 4619.1,
|
|
"valid_targets_min": 2424
|
|
},
|
|
{
|
|
"epoch": 5.205655526992288,
|
|
"grad_norm": 0.5765074497095739,
|
|
"learning_rate": 7.493012675130711e-06,
|
|
"loss": 0.1397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06293430924415588,
|
|
"step": 6075,
|
|
"valid_targets_mean": 4066.6,
|
|
"valid_targets_min": 699
|
|
},
|
|
{
|
|
"epoch": 5.209940017137961,
|
|
"grad_norm": 0.6194830516991278,
|
|
"learning_rate": 7.459696227588768e-06,
|
|
"loss": 0.1596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08389878273010254,
|
|
"step": 6080,
|
|
"valid_targets_mean": 6030.9,
|
|
"valid_targets_min": 2243
|
|
},
|
|
{
|
|
"epoch": 5.2142245072836335,
|
|
"grad_norm": 0.5820834048829306,
|
|
"learning_rate": 7.426437024926774e-06,
|
|
"loss": 0.1443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06466574966907501,
|
|
"step": 6085,
|
|
"valid_targets_mean": 5668.6,
|
|
"valid_targets_min": 3608
|
|
},
|
|
{
|
|
"epoch": 5.218508997429306,
|
|
"grad_norm": 0.6448838593097435,
|
|
"learning_rate": 7.393235218968731e-06,
|
|
"loss": 0.1431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0774383544921875,
|
|
"step": 6090,
|
|
"valid_targets_mean": 3682.6,
|
|
"valid_targets_min": 1195
|
|
},
|
|
{
|
|
"epoch": 5.222793487574979,
|
|
"grad_norm": 0.5256021821677503,
|
|
"learning_rate": 7.360090961276625e-06,
|
|
"loss": 0.1351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06239830330014229,
|
|
"step": 6095,
|
|
"valid_targets_mean": 4077.2,
|
|
"valid_targets_min": 1100
|
|
},
|
|
{
|
|
"epoch": 5.2270779777206515,
|
|
"grad_norm": 0.5506695042512416,
|
|
"learning_rate": 7.327004403149738e-06,
|
|
"loss": 0.1332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06670170277357101,
|
|
"step": 6100,
|
|
"valid_targets_mean": 5362.8,
|
|
"valid_targets_min": 1035
|
|
},
|
|
{
|
|
"epoch": 5.231362467866324,
|
|
"grad_norm": 0.5494435488237343,
|
|
"learning_rate": 7.293975695623981e-06,
|
|
"loss": 0.1421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0640135109424591,
|
|
"step": 6105,
|
|
"valid_targets_mean": 4585.5,
|
|
"valid_targets_min": 1389
|
|
},
|
|
{
|
|
"epoch": 5.235646958011997,
|
|
"grad_norm": 0.5741122933274755,
|
|
"learning_rate": 7.261004989471172e-06,
|
|
"loss": 0.1384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08091049641370773,
|
|
"step": 6110,
|
|
"valid_targets_mean": 5247.6,
|
|
"valid_targets_min": 1066
|
|
},
|
|
{
|
|
"epoch": 5.2399314481576695,
|
|
"grad_norm": 0.527100645437638,
|
|
"learning_rate": 7.228092435198359e-06,
|
|
"loss": 0.1357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06724964827299118,
|
|
"step": 6115,
|
|
"valid_targets_mean": 4800.0,
|
|
"valid_targets_min": 1681
|
|
},
|
|
{
|
|
"epoch": 5.244215938303342,
|
|
"grad_norm": 0.5732389963931994,
|
|
"learning_rate": 7.195238183047133e-06,
|
|
"loss": 0.1442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07661618292331696,
|
|
"step": 6120,
|
|
"valid_targets_mean": 5228.4,
|
|
"valid_targets_min": 2611
|
|
},
|
|
{
|
|
"epoch": 5.248500428449015,
|
|
"grad_norm": 0.6371677881221045,
|
|
"learning_rate": 7.1624423829929446e-06,
|
|
"loss": 0.1344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09487690031528473,
|
|
"step": 6125,
|
|
"valid_targets_mean": 5642.8,
|
|
"valid_targets_min": 3309
|
|
},
|
|
{
|
|
"epoch": 5.2527849185946875,
|
|
"grad_norm": 0.5587444588268791,
|
|
"learning_rate": 7.129705184744413e-06,
|
|
"loss": 0.1317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09017151594161987,
|
|
"step": 6130,
|
|
"valid_targets_mean": 5733.6,
|
|
"valid_targets_min": 3604
|
|
},
|
|
{
|
|
"epoch": 5.25706940874036,
|
|
"grad_norm": 0.5574414156904585,
|
|
"learning_rate": 7.097026737742667e-06,
|
|
"loss": 0.1286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05598699301481247,
|
|
"step": 6135,
|
|
"valid_targets_mean": 3445.1,
|
|
"valid_targets_min": 1295
|
|
},
|
|
{
|
|
"epoch": 5.261353898886033,
|
|
"grad_norm": 0.5174736669960975,
|
|
"learning_rate": 7.064407191160616e-06,
|
|
"loss": 0.1468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07012724876403809,
|
|
"step": 6140,
|
|
"valid_targets_mean": 6463.2,
|
|
"valid_targets_min": 3191
|
|
},
|
|
{
|
|
"epoch": 5.265638389031706,
|
|
"grad_norm": 0.5082834218010197,
|
|
"learning_rate": 7.0318466939023264e-06,
|
|
"loss": 0.1397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05886061489582062,
|
|
"step": 6145,
|
|
"valid_targets_mean": 6100.0,
|
|
"valid_targets_min": 1874
|
|
},
|
|
{
|
|
"epoch": 5.269922879177378,
|
|
"grad_norm": 0.5548709879767999,
|
|
"learning_rate": 6.999345394602286e-06,
|
|
"loss": 0.1405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08334115892648697,
|
|
"step": 6150,
|
|
"valid_targets_mean": 4543.4,
|
|
"valid_targets_min": 2328
|
|
},
|
|
{
|
|
"epoch": 5.274207369323051,
|
|
"grad_norm": 0.5676546777822717,
|
|
"learning_rate": 6.966903441624766e-06,
|
|
"loss": 0.131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05629216507077217,
|
|
"step": 6155,
|
|
"valid_targets_mean": 4842.1,
|
|
"valid_targets_min": 1318
|
|
},
|
|
{
|
|
"epoch": 5.278491859468724,
|
|
"grad_norm": 0.6042159590416843,
|
|
"learning_rate": 6.93452098306312e-06,
|
|
"loss": 0.1365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06647060811519623,
|
|
"step": 6160,
|
|
"valid_targets_mean": 5053.8,
|
|
"valid_targets_min": 1747
|
|
},
|
|
{
|
|
"epoch": 5.282776349614396,
|
|
"grad_norm": 0.6191709463886721,
|
|
"learning_rate": 6.90219816673912e-06,
|
|
"loss": 0.1401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0582103431224823,
|
|
"step": 6165,
|
|
"valid_targets_mean": 4129.1,
|
|
"valid_targets_min": 1373
|
|
},
|
|
{
|
|
"epoch": 5.287060839760069,
|
|
"grad_norm": 0.5249507912952034,
|
|
"learning_rate": 6.869935140202293e-06,
|
|
"loss": 0.1302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.055291514843702316,
|
|
"step": 6170,
|
|
"valid_targets_mean": 5271.0,
|
|
"valid_targets_min": 2146
|
|
},
|
|
{
|
|
"epoch": 5.291345329905742,
|
|
"grad_norm": 0.5156906152579579,
|
|
"learning_rate": 6.837732050729207e-06,
|
|
"loss": 0.1402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06559474766254425,
|
|
"step": 6175,
|
|
"valid_targets_mean": 5878.5,
|
|
"valid_targets_min": 1439
|
|
},
|
|
{
|
|
"epoch": 5.295629820051414,
|
|
"grad_norm": 0.592944268797725,
|
|
"learning_rate": 6.805589045322854e-06,
|
|
"loss": 0.131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0791960209608078,
|
|
"step": 6180,
|
|
"valid_targets_mean": 5405.0,
|
|
"valid_targets_min": 1577
|
|
},
|
|
{
|
|
"epoch": 5.299914310197087,
|
|
"grad_norm": 0.5999862846915844,
|
|
"learning_rate": 6.773506270711925e-06,
|
|
"loss": 0.1437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08896322548389435,
|
|
"step": 6185,
|
|
"valid_targets_mean": 5432.4,
|
|
"valid_targets_min": 1440
|
|
},
|
|
{
|
|
"epoch": 5.30419880034276,
|
|
"grad_norm": 0.5263317224741103,
|
|
"learning_rate": 6.741483873350181e-06,
|
|
"loss": 0.1466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05374643951654434,
|
|
"step": 6190,
|
|
"valid_targets_mean": 6210.0,
|
|
"valid_targets_min": 3094
|
|
},
|
|
{
|
|
"epoch": 5.308483290488432,
|
|
"grad_norm": 0.6235588769390094,
|
|
"learning_rate": 6.709521999415753e-06,
|
|
"loss": 0.1289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06328782439231873,
|
|
"step": 6195,
|
|
"valid_targets_mean": 4310.1,
|
|
"valid_targets_min": 2227
|
|
},
|
|
{
|
|
"epoch": 5.312767780634105,
|
|
"grad_norm": 0.567942608527058,
|
|
"learning_rate": 6.677620794810513e-06,
|
|
"loss": 0.1469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07411561161279678,
|
|
"step": 6200,
|
|
"valid_targets_mean": 5104.1,
|
|
"valid_targets_min": 2564
|
|
},
|
|
{
|
|
"epoch": 5.317052270779778,
|
|
"grad_norm": 0.6366432620717134,
|
|
"learning_rate": 6.6457804051593675e-06,
|
|
"loss": 0.1492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06682665646076202,
|
|
"step": 6205,
|
|
"valid_targets_mean": 3957.5,
|
|
"valid_targets_min": 929
|
|
},
|
|
{
|
|
"epoch": 5.32133676092545,
|
|
"grad_norm": 0.54498818017496,
|
|
"learning_rate": 6.614000975809611e-06,
|
|
"loss": 0.1358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06897222250699997,
|
|
"step": 6210,
|
|
"valid_targets_mean": 5864.2,
|
|
"valid_targets_min": 3570
|
|
},
|
|
{
|
|
"epoch": 5.325621251071123,
|
|
"grad_norm": 0.5459683344468065,
|
|
"learning_rate": 6.582282651830274e-06,
|
|
"loss": 0.1271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.066863052546978,
|
|
"step": 6215,
|
|
"valid_targets_mean": 6395.4,
|
|
"valid_targets_min": 1982
|
|
},
|
|
{
|
|
"epoch": 5.329905741216796,
|
|
"grad_norm": 0.5503926257381906,
|
|
"learning_rate": 6.55062557801144e-06,
|
|
"loss": 0.1478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08532165735960007,
|
|
"step": 6220,
|
|
"valid_targets_mean": 4813.0,
|
|
"valid_targets_min": 3996
|
|
},
|
|
{
|
|
"epoch": 5.334190231362468,
|
|
"grad_norm": 0.5895214744593827,
|
|
"learning_rate": 6.519029898863594e-06,
|
|
"loss": 0.145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07670275866985321,
|
|
"step": 6225,
|
|
"valid_targets_mean": 4062.2,
|
|
"valid_targets_min": 1206
|
|
},
|
|
{
|
|
"epoch": 5.33847472150814,
|
|
"grad_norm": 0.5638386205387821,
|
|
"learning_rate": 6.487495758616953e-06,
|
|
"loss": 0.1281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08205410838127136,
|
|
"step": 6230,
|
|
"valid_targets_mean": 6390.1,
|
|
"valid_targets_min": 1676
|
|
},
|
|
{
|
|
"epoch": 5.342759211653814,
|
|
"grad_norm": 0.5167616637272222,
|
|
"learning_rate": 6.456023301220844e-06,
|
|
"loss": 0.1352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06270787119865417,
|
|
"step": 6235,
|
|
"valid_targets_mean": 5502.0,
|
|
"valid_targets_min": 1988
|
|
},
|
|
{
|
|
"epoch": 5.347043701799485,
|
|
"grad_norm": 0.5650143553238214,
|
|
"learning_rate": 6.424612670342993e-06,
|
|
"loss": 0.1449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07442963123321533,
|
|
"step": 6240,
|
|
"valid_targets_mean": 5757.2,
|
|
"valid_targets_min": 3118
|
|
},
|
|
{
|
|
"epoch": 5.351328191945159,
|
|
"grad_norm": 0.558072143781282,
|
|
"learning_rate": 6.3932640093689e-06,
|
|
"loss": 0.1383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07415077835321426,
|
|
"step": 6245,
|
|
"valid_targets_mean": 5365.6,
|
|
"valid_targets_min": 2881
|
|
},
|
|
{
|
|
"epoch": 5.355612682090831,
|
|
"grad_norm": 0.5233654550955804,
|
|
"learning_rate": 6.361977461401201e-06,
|
|
"loss": 0.1363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06110835075378418,
|
|
"step": 6250,
|
|
"valid_targets_mean": 4701.1,
|
|
"valid_targets_min": 1436
|
|
},
|
|
{
|
|
"epoch": 5.359897172236504,
|
|
"grad_norm": 0.5861788265596787,
|
|
"learning_rate": 6.330753169258967e-06,
|
|
"loss": 0.147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.055069178342819214,
|
|
"step": 6255,
|
|
"valid_targets_mean": 4977.6,
|
|
"valid_targets_min": 1345
|
|
},
|
|
{
|
|
"epoch": 5.364181662382176,
|
|
"grad_norm": 0.5832531571711229,
|
|
"learning_rate": 6.299591275477091e-06,
|
|
"loss": 0.1484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07630908489227295,
|
|
"step": 6260,
|
|
"valid_targets_mean": 3914.6,
|
|
"valid_targets_min": 2431
|
|
},
|
|
{
|
|
"epoch": 5.36846615252785,
|
|
"grad_norm": 0.5049380798779455,
|
|
"learning_rate": 6.268491922305633e-06,
|
|
"loss": 0.128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06887591630220413,
|
|
"step": 6265,
|
|
"valid_targets_mean": 5875.1,
|
|
"valid_targets_min": 4278
|
|
},
|
|
{
|
|
"epoch": 5.372750642673521,
|
|
"grad_norm": 0.6388268871126679,
|
|
"learning_rate": 6.23745525170915e-06,
|
|
"loss": 0.1408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06426230072975159,
|
|
"step": 6270,
|
|
"valid_targets_mean": 5025.6,
|
|
"valid_targets_min": 1614
|
|
},
|
|
{
|
|
"epoch": 5.377035132819195,
|
|
"grad_norm": 0.5446159780516101,
|
|
"learning_rate": 6.206481405366069e-06,
|
|
"loss": 0.1408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08342628180980682,
|
|
"step": 6275,
|
|
"valid_targets_mean": 5403.2,
|
|
"valid_targets_min": 2126
|
|
},
|
|
{
|
|
"epoch": 5.381319622964867,
|
|
"grad_norm": 0.5607310151212304,
|
|
"learning_rate": 6.175570524668022e-06,
|
|
"loss": 0.1355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06648731231689453,
|
|
"step": 6280,
|
|
"valid_targets_mean": 5965.5,
|
|
"valid_targets_min": 4029
|
|
},
|
|
{
|
|
"epoch": 5.385604113110539,
|
|
"grad_norm": 0.558085994371478,
|
|
"learning_rate": 6.144722750719232e-06,
|
|
"loss": 0.1349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07811075448989868,
|
|
"step": 6285,
|
|
"valid_targets_mean": 5506.6,
|
|
"valid_targets_min": 3583
|
|
},
|
|
{
|
|
"epoch": 5.389888603256212,
|
|
"grad_norm": 0.5483846780131174,
|
|
"learning_rate": 6.113938224335827e-06,
|
|
"loss": 0.1437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07613667845726013,
|
|
"step": 6290,
|
|
"valid_targets_mean": 5975.0,
|
|
"valid_targets_min": 3121
|
|
},
|
|
{
|
|
"epoch": 5.394173093401885,
|
|
"grad_norm": 0.5855194434955084,
|
|
"learning_rate": 6.083217086045219e-06,
|
|
"loss": 0.1384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08013680577278137,
|
|
"step": 6295,
|
|
"valid_targets_mean": 4852.4,
|
|
"valid_targets_min": 2089
|
|
},
|
|
{
|
|
"epoch": 5.3984575835475574,
|
|
"grad_norm": 0.5350669250301283,
|
|
"learning_rate": 6.052559476085482e-06,
|
|
"loss": 0.135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06474751234054565,
|
|
"step": 6300,
|
|
"valid_targets_mean": 5435.9,
|
|
"valid_targets_min": 2461
|
|
},
|
|
{
|
|
"epoch": 5.40274207369323,
|
|
"grad_norm": 0.58160460055041,
|
|
"learning_rate": 6.021965534404672e-06,
|
|
"loss": 0.1492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07002052664756775,
|
|
"step": 6305,
|
|
"valid_targets_mean": 4838.4,
|
|
"valid_targets_min": 2493
|
|
},
|
|
{
|
|
"epoch": 5.407026563838903,
|
|
"grad_norm": 0.5396925657648401,
|
|
"learning_rate": 5.991435400660213e-06,
|
|
"loss": 0.1526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08394481986761093,
|
|
"step": 6310,
|
|
"valid_targets_mean": 6202.5,
|
|
"valid_targets_min": 2967
|
|
},
|
|
{
|
|
"epoch": 5.4113110539845755,
|
|
"grad_norm": 0.5694513991424146,
|
|
"learning_rate": 5.960969214218244e-06,
|
|
"loss": 0.1456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08601640164852142,
|
|
"step": 6315,
|
|
"valid_targets_mean": 4850.4,
|
|
"valid_targets_min": 1333
|
|
},
|
|
{
|
|
"epoch": 5.415595544130248,
|
|
"grad_norm": 0.5836812047602808,
|
|
"learning_rate": 5.930567114153016e-06,
|
|
"loss": 0.1468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0535474456846714,
|
|
"step": 6320,
|
|
"valid_targets_mean": 5394.1,
|
|
"valid_targets_min": 2081
|
|
},
|
|
{
|
|
"epoch": 5.419880034275921,
|
|
"grad_norm": 0.6996982249932461,
|
|
"learning_rate": 5.900229239246211e-06,
|
|
"loss": 0.1477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07842926681041718,
|
|
"step": 6325,
|
|
"valid_targets_mean": 4914.5,
|
|
"valid_targets_min": 2608
|
|
},
|
|
{
|
|
"epoch": 5.4241645244215935,
|
|
"grad_norm": 0.5927430432906213,
|
|
"learning_rate": 5.869955727986352e-06,
|
|
"loss": 0.1483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06343986839056015,
|
|
"step": 6330,
|
|
"valid_targets_mean": 4385.8,
|
|
"valid_targets_min": 1844
|
|
},
|
|
{
|
|
"epoch": 5.428449014567266,
|
|
"grad_norm": 0.5615288154250935,
|
|
"learning_rate": 5.839746718568131e-06,
|
|
"loss": 0.1457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07984130084514618,
|
|
"step": 6335,
|
|
"valid_targets_mean": 5323.0,
|
|
"valid_targets_min": 2204
|
|
},
|
|
{
|
|
"epoch": 5.432733504712939,
|
|
"grad_norm": 0.5541340256670348,
|
|
"learning_rate": 5.809602348891808e-06,
|
|
"loss": 0.135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.061635490506887436,
|
|
"step": 6340,
|
|
"valid_targets_mean": 4427.9,
|
|
"valid_targets_min": 2092
|
|
},
|
|
{
|
|
"epoch": 5.4370179948586115,
|
|
"grad_norm": 0.5960063481704528,
|
|
"learning_rate": 5.779522756562563e-06,
|
|
"loss": 0.1371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06906621158123016,
|
|
"step": 6345,
|
|
"valid_targets_mean": 5128.5,
|
|
"valid_targets_min": 2339
|
|
},
|
|
{
|
|
"epoch": 5.441302485004284,
|
|
"grad_norm": 0.5661976276058973,
|
|
"learning_rate": 5.749508078889881e-06,
|
|
"loss": 0.1424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06279657036066055,
|
|
"step": 6350,
|
|
"valid_targets_mean": 5162.1,
|
|
"valid_targets_min": 2131
|
|
},
|
|
{
|
|
"epoch": 5.445586975149957,
|
|
"grad_norm": 0.5497893081216243,
|
|
"learning_rate": 5.719558452886929e-06,
|
|
"loss": 0.1437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0694722831249237,
|
|
"step": 6355,
|
|
"valid_targets_mean": 4845.1,
|
|
"valid_targets_min": 2309
|
|
},
|
|
{
|
|
"epoch": 5.4498714652956295,
|
|
"grad_norm": 0.5163796487394642,
|
|
"learning_rate": 5.689674015269901e-06,
|
|
"loss": 0.1543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05982083082199097,
|
|
"step": 6360,
|
|
"valid_targets_mean": 6627.6,
|
|
"valid_targets_min": 3133
|
|
},
|
|
{
|
|
"epoch": 5.454155955441302,
|
|
"grad_norm": 0.5471286635981826,
|
|
"learning_rate": 5.6598549024574375e-06,
|
|
"loss": 0.1383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06936630606651306,
|
|
"step": 6365,
|
|
"valid_targets_mean": 5126.2,
|
|
"valid_targets_min": 1611
|
|
},
|
|
{
|
|
"epoch": 5.458440445586975,
|
|
"grad_norm": 0.5944792407962814,
|
|
"learning_rate": 5.6301012505699615e-06,
|
|
"loss": 0.1331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07246899604797363,
|
|
"step": 6370,
|
|
"valid_targets_mean": 4889.1,
|
|
"valid_targets_min": 1502
|
|
},
|
|
{
|
|
"epoch": 5.4627249357326475,
|
|
"grad_norm": 0.6361275513468415,
|
|
"learning_rate": 5.600413195429082e-06,
|
|
"loss": 0.1473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07020856440067291,
|
|
"step": 6375,
|
|
"valid_targets_mean": 5216.9,
|
|
"valid_targets_min": 2034
|
|
},
|
|
{
|
|
"epoch": 5.46700942587832,
|
|
"grad_norm": 0.5612537422879182,
|
|
"learning_rate": 5.570790872556966e-06,
|
|
"loss": 0.1425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0894571989774704,
|
|
"step": 6380,
|
|
"valid_targets_mean": 6033.5,
|
|
"valid_targets_min": 3294
|
|
},
|
|
{
|
|
"epoch": 5.471293916023993,
|
|
"grad_norm": 0.5000465417769924,
|
|
"learning_rate": 5.541234417175718e-06,
|
|
"loss": 0.142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05969848483800888,
|
|
"step": 6385,
|
|
"valid_targets_mean": 5685.2,
|
|
"valid_targets_min": 1363
|
|
},
|
|
{
|
|
"epoch": 5.4755784061696655,
|
|
"grad_norm": 0.6351891861499561,
|
|
"learning_rate": 5.5117439642067725e-06,
|
|
"loss": 0.1477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09923093765974045,
|
|
"step": 6390,
|
|
"valid_targets_mean": 4533.6,
|
|
"valid_targets_min": 2594
|
|
},
|
|
{
|
|
"epoch": 5.479862896315338,
|
|
"grad_norm": 0.5210414729111282,
|
|
"learning_rate": 5.482319648270278e-06,
|
|
"loss": 0.1262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0708557665348053,
|
|
"step": 6395,
|
|
"valid_targets_mean": 5451.4,
|
|
"valid_targets_min": 1234
|
|
},
|
|
{
|
|
"epoch": 5.484147386461011,
|
|
"grad_norm": 0.5334767406234865,
|
|
"learning_rate": 5.452961603684459e-06,
|
|
"loss": 0.1408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07409349828958511,
|
|
"step": 6400,
|
|
"valid_targets_mean": 6112.6,
|
|
"valid_targets_min": 2179
|
|
},
|
|
{
|
|
"epoch": 5.4884318766066835,
|
|
"grad_norm": 0.5412429447282396,
|
|
"learning_rate": 5.423669964465028e-06,
|
|
"loss": 0.1449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06016102433204651,
|
|
"step": 6405,
|
|
"valid_targets_mean": 5271.8,
|
|
"valid_targets_min": 1831
|
|
},
|
|
{
|
|
"epoch": 5.492716366752356,
|
|
"grad_norm": 0.5818658258711826,
|
|
"learning_rate": 5.394444864324564e-06,
|
|
"loss": 0.1319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0727088674902916,
|
|
"step": 6410,
|
|
"valid_targets_mean": 4549.9,
|
|
"valid_targets_min": 1142
|
|
},
|
|
{
|
|
"epoch": 5.497000856898029,
|
|
"grad_norm": 0.5784324823124002,
|
|
"learning_rate": 5.365286436671903e-06,
|
|
"loss": 0.1473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07919305562973022,
|
|
"step": 6415,
|
|
"valid_targets_mean": 4619.6,
|
|
"valid_targets_min": 1724
|
|
},
|
|
{
|
|
"epoch": 5.5012853470437015,
|
|
"grad_norm": 0.5460056842645928,
|
|
"learning_rate": 5.3361948146115306e-06,
|
|
"loss": 0.1409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.063385508954525,
|
|
"step": 6420,
|
|
"valid_targets_mean": 4789.6,
|
|
"valid_targets_min": 3603
|
|
},
|
|
{
|
|
"epoch": 5.505569837189374,
|
|
"grad_norm": 0.49938221905703617,
|
|
"learning_rate": 5.307170130942976e-06,
|
|
"loss": 0.1325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06451307237148285,
|
|
"step": 6425,
|
|
"valid_targets_mean": 6023.5,
|
|
"valid_targets_min": 2947
|
|
},
|
|
{
|
|
"epoch": 5.509854327335047,
|
|
"grad_norm": 0.562066135955173,
|
|
"learning_rate": 5.278212518160208e-06,
|
|
"loss": 0.1303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07842020690441132,
|
|
"step": 6430,
|
|
"valid_targets_mean": 5180.6,
|
|
"valid_targets_min": 1873
|
|
},
|
|
{
|
|
"epoch": 5.5141388174807195,
|
|
"grad_norm": 0.5540925996211802,
|
|
"learning_rate": 5.249322108451014e-06,
|
|
"loss": 0.1347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08028965443372726,
|
|
"step": 6435,
|
|
"valid_targets_mean": 5239.5,
|
|
"valid_targets_min": 1830
|
|
},
|
|
{
|
|
"epoch": 5.518423307626392,
|
|
"grad_norm": 0.5073241633366583,
|
|
"learning_rate": 5.220499033696409e-06,
|
|
"loss": 0.136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06025489419698715,
|
|
"step": 6440,
|
|
"valid_targets_mean": 4024.8,
|
|
"valid_targets_min": 1668
|
|
},
|
|
{
|
|
"epoch": 5.522707797772065,
|
|
"grad_norm": 0.5717332273536765,
|
|
"learning_rate": 5.191743425470039e-06,
|
|
"loss": 0.1482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07372932136058807,
|
|
"step": 6445,
|
|
"valid_targets_mean": 5477.2,
|
|
"valid_targets_min": 3413
|
|
},
|
|
{
|
|
"epoch": 5.526992287917738,
|
|
"grad_norm": 0.5215747809897383,
|
|
"learning_rate": 5.163055415037565e-06,
|
|
"loss": 0.1277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0440630242228508,
|
|
"step": 6450,
|
|
"valid_targets_mean": 4580.2,
|
|
"valid_targets_min": 1269
|
|
},
|
|
{
|
|
"epoch": 5.53127677806341,
|
|
"grad_norm": 0.5442611902593508,
|
|
"learning_rate": 5.134435133356091e-06,
|
|
"loss": 0.1309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06199388578534126,
|
|
"step": 6455,
|
|
"valid_targets_mean": 5632.6,
|
|
"valid_targets_min": 2253
|
|
},
|
|
{
|
|
"epoch": 5.535561268209083,
|
|
"grad_norm": 0.6524942377050443,
|
|
"learning_rate": 5.105882711073524e-06,
|
|
"loss": 0.1343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.058554474264383316,
|
|
"step": 6460,
|
|
"valid_targets_mean": 4896.4,
|
|
"valid_targets_min": 1597
|
|
},
|
|
{
|
|
"epoch": 5.539845758354756,
|
|
"grad_norm": 0.6548298261620451,
|
|
"learning_rate": 5.077398278528023e-06,
|
|
"loss": 0.1317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05502365902066231,
|
|
"step": 6465,
|
|
"valid_targets_mean": 4457.4,
|
|
"valid_targets_min": 1757
|
|
},
|
|
{
|
|
"epoch": 5.544130248500428,
|
|
"grad_norm": 0.5792243897972685,
|
|
"learning_rate": 5.04898196574737e-06,
|
|
"loss": 0.1353,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0710696280002594,
|
|
"step": 6470,
|
|
"valid_targets_mean": 5837.4,
|
|
"valid_targets_min": 1548
|
|
},
|
|
{
|
|
"epoch": 5.548414738646101,
|
|
"grad_norm": 0.5385213971717662,
|
|
"learning_rate": 5.020633902448391e-06,
|
|
"loss": 0.1303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07189559936523438,
|
|
"step": 6475,
|
|
"valid_targets_mean": 5131.1,
|
|
"valid_targets_min": 2320
|
|
},
|
|
{
|
|
"epoch": 5.552699228791774,
|
|
"grad_norm": 0.5951604508945392,
|
|
"learning_rate": 4.992354218036362e-06,
|
|
"loss": 0.1493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07350689172744751,
|
|
"step": 6480,
|
|
"valid_targets_mean": 3616.0,
|
|
"valid_targets_min": 1744
|
|
},
|
|
{
|
|
"epoch": 5.556983718937446,
|
|
"grad_norm": 0.5223294450121244,
|
|
"learning_rate": 4.9641430416044124e-06,
|
|
"loss": 0.1336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06630503386259079,
|
|
"step": 6485,
|
|
"valid_targets_mean": 5069.4,
|
|
"valid_targets_min": 3190
|
|
},
|
|
{
|
|
"epoch": 5.561268209083119,
|
|
"grad_norm": 0.5302966929442247,
|
|
"learning_rate": 4.93600050193296e-06,
|
|
"loss": 0.144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05977999418973923,
|
|
"step": 6490,
|
|
"valid_targets_mean": 5080.2,
|
|
"valid_targets_min": 420
|
|
},
|
|
{
|
|
"epoch": 5.565552699228792,
|
|
"grad_norm": 0.5793933577454219,
|
|
"learning_rate": 4.907926727489083e-06,
|
|
"loss": 0.1341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06778793036937714,
|
|
"step": 6495,
|
|
"valid_targets_mean": 5024.2,
|
|
"valid_targets_min": 3738
|
|
},
|
|
{
|
|
"epoch": 5.569837189374464,
|
|
"grad_norm": 0.5749514354601191,
|
|
"learning_rate": 4.879921846425972e-06,
|
|
"loss": 0.1395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07842457294464111,
|
|
"step": 6500,
|
|
"valid_targets_mean": 6434.0,
|
|
"valid_targets_min": 2762
|
|
},
|
|
{
|
|
"epoch": 5.574121679520137,
|
|
"grad_norm": 0.5535613872524504,
|
|
"learning_rate": 4.851985986582322e-06,
|
|
"loss": 0.1399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07504276931285858,
|
|
"step": 6505,
|
|
"valid_targets_mean": 4328.8,
|
|
"valid_targets_min": 1916
|
|
},
|
|
{
|
|
"epoch": 5.57840616966581,
|
|
"grad_norm": 0.5083713146732084,
|
|
"learning_rate": 4.8241192754817464e-06,
|
|
"loss": 0.1321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.051820166409015656,
|
|
"step": 6510,
|
|
"valid_targets_mean": 4595.5,
|
|
"valid_targets_min": 1220
|
|
},
|
|
{
|
|
"epoch": 5.582690659811482,
|
|
"grad_norm": 0.5143527720674547,
|
|
"learning_rate": 4.7963218403322095e-06,
|
|
"loss": 0.135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07099468261003494,
|
|
"step": 6515,
|
|
"valid_targets_mean": 5824.1,
|
|
"valid_targets_min": 2822
|
|
},
|
|
{
|
|
"epoch": 5.586975149957155,
|
|
"grad_norm": 0.5586273797079392,
|
|
"learning_rate": 4.76859380802545e-06,
|
|
"loss": 0.1253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07062015682458878,
|
|
"step": 6520,
|
|
"valid_targets_mean": 4951.0,
|
|
"valid_targets_min": 1362
|
|
},
|
|
{
|
|
"epoch": 5.591259640102828,
|
|
"grad_norm": 0.5659530059931339,
|
|
"learning_rate": 4.74093530513638e-06,
|
|
"loss": 0.1298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07185934484004974,
|
|
"step": 6525,
|
|
"valid_targets_mean": 4485.4,
|
|
"valid_targets_min": 3572
|
|
},
|
|
{
|
|
"epoch": 5.5955441302485,
|
|
"grad_norm": 0.559165573881336,
|
|
"learning_rate": 4.713346457922514e-06,
|
|
"loss": 0.1411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07086736708879471,
|
|
"step": 6530,
|
|
"valid_targets_mean": 4172.9,
|
|
"valid_targets_min": 1002
|
|
},
|
|
{
|
|
"epoch": 5.599828620394173,
|
|
"grad_norm": 0.6504390747344749,
|
|
"learning_rate": 4.6858273923234145e-06,
|
|
"loss": 0.1366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.064616858959198,
|
|
"step": 6535,
|
|
"valid_targets_mean": 4253.6,
|
|
"valid_targets_min": 1969
|
|
},
|
|
{
|
|
"epoch": 5.604113110539846,
|
|
"grad_norm": 0.5689426033257432,
|
|
"learning_rate": 4.658378233960088e-06,
|
|
"loss": 0.1376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05738406628370285,
|
|
"step": 6540,
|
|
"valid_targets_mean": 5430.6,
|
|
"valid_targets_min": 2168
|
|
},
|
|
{
|
|
"epoch": 5.608397600685518,
|
|
"grad_norm": 0.6003852321277522,
|
|
"learning_rate": 4.6309991081344215e-06,
|
|
"loss": 0.1442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06764773279428482,
|
|
"step": 6545,
|
|
"valid_targets_mean": 4634.0,
|
|
"valid_targets_min": 738
|
|
},
|
|
{
|
|
"epoch": 5.612682090831191,
|
|
"grad_norm": 0.5314130423969416,
|
|
"learning_rate": 4.603690139828611e-06,
|
|
"loss": 0.16,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.063227578997612,
|
|
"step": 6550,
|
|
"valid_targets_mean": 5144.6,
|
|
"valid_targets_min": 1834
|
|
},
|
|
{
|
|
"epoch": 5.616966580976864,
|
|
"grad_norm": 0.5519198942615016,
|
|
"learning_rate": 4.576451453704614e-06,
|
|
"loss": 0.1301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06084362044930458,
|
|
"step": 6555,
|
|
"valid_targets_mean": 4861.1,
|
|
"valid_targets_min": 1124
|
|
},
|
|
{
|
|
"epoch": 5.621251071122536,
|
|
"grad_norm": 0.5085490164316521,
|
|
"learning_rate": 4.549283174103529e-06,
|
|
"loss": 0.1358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05866881459951401,
|
|
"step": 6560,
|
|
"valid_targets_mean": 5328.1,
|
|
"valid_targets_min": 3653
|
|
},
|
|
{
|
|
"epoch": 5.625535561268209,
|
|
"grad_norm": 0.5166410092324383,
|
|
"learning_rate": 4.522185425045074e-06,
|
|
"loss": 0.1361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07482907176017761,
|
|
"step": 6565,
|
|
"valid_targets_mean": 5045.6,
|
|
"valid_targets_min": 2124
|
|
},
|
|
{
|
|
"epoch": 5.629820051413882,
|
|
"grad_norm": 0.5735608739843969,
|
|
"learning_rate": 4.495158330227005e-06,
|
|
"loss": 0.1361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08168354630470276,
|
|
"step": 6570,
|
|
"valid_targets_mean": 5395.8,
|
|
"valid_targets_min": 4029
|
|
},
|
|
{
|
|
"epoch": 5.634104541559554,
|
|
"grad_norm": 0.5850271273841091,
|
|
"learning_rate": 4.468202013024547e-06,
|
|
"loss": 0.1318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07387813925743103,
|
|
"step": 6575,
|
|
"valid_targets_mean": 4727.8,
|
|
"valid_targets_min": 1939
|
|
},
|
|
{
|
|
"epoch": 5.638389031705227,
|
|
"grad_norm": 0.6816286906770415,
|
|
"learning_rate": 4.441316596489826e-06,
|
|
"loss": 0.1429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09496738016605377,
|
|
"step": 6580,
|
|
"valid_targets_mean": 5681.8,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 5.6426735218509,
|
|
"grad_norm": 0.6492212601368968,
|
|
"learning_rate": 4.414502203351336e-06,
|
|
"loss": 0.1341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08192385733127594,
|
|
"step": 6585,
|
|
"valid_targets_mean": 4593.8,
|
|
"valid_targets_min": 2645
|
|
},
|
|
{
|
|
"epoch": 5.646958011996572,
|
|
"grad_norm": 0.5776686099072865,
|
|
"learning_rate": 4.387758956013342e-06,
|
|
"loss": 0.1509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06738873571157455,
|
|
"step": 6590,
|
|
"valid_targets_mean": 3651.4,
|
|
"valid_targets_min": 1737
|
|
},
|
|
{
|
|
"epoch": 5.651242502142245,
|
|
"grad_norm": 0.7273725593882753,
|
|
"learning_rate": 4.361086976555336e-06,
|
|
"loss": 0.1446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.055989429354667664,
|
|
"step": 6595,
|
|
"valid_targets_mean": 5591.0,
|
|
"valid_targets_min": 2574
|
|
},
|
|
{
|
|
"epoch": 5.655526992287918,
|
|
"grad_norm": 0.5714109431160903,
|
|
"learning_rate": 4.334486386731487e-06,
|
|
"loss": 0.1429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06508769094944,
|
|
"step": 6600,
|
|
"valid_targets_mean": 4761.9,
|
|
"valid_targets_min": 1046
|
|
},
|
|
{
|
|
"epoch": 5.65981148243359,
|
|
"grad_norm": 0.5476429325916928,
|
|
"learning_rate": 4.307957307970085e-06,
|
|
"loss": 0.1259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06340515613555908,
|
|
"step": 6605,
|
|
"valid_targets_mean": 4844.9,
|
|
"valid_targets_min": 2410
|
|
},
|
|
{
|
|
"epoch": 5.664095972579263,
|
|
"grad_norm": 0.5006033373634183,
|
|
"learning_rate": 4.2814998613729755e-06,
|
|
"loss": 0.1147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05262615531682968,
|
|
"step": 6610,
|
|
"valid_targets_mean": 6462.6,
|
|
"valid_targets_min": 5640
|
|
},
|
|
{
|
|
"epoch": 5.668380462724936,
|
|
"grad_norm": 0.5933953350490176,
|
|
"learning_rate": 4.255114167715005e-06,
|
|
"loss": 0.1371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07736065983772278,
|
|
"step": 6615,
|
|
"valid_targets_mean": 5382.6,
|
|
"valid_targets_min": 2419
|
|
},
|
|
{
|
|
"epoch": 5.672664952870608,
|
|
"grad_norm": 0.5266374655195465,
|
|
"learning_rate": 4.228800347443491e-06,
|
|
"loss": 0.1305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07201828062534332,
|
|
"step": 6620,
|
|
"valid_targets_mean": 5567.9,
|
|
"valid_targets_min": 3669
|
|
},
|
|
{
|
|
"epoch": 5.676949443016281,
|
|
"grad_norm": 0.5898754476535092,
|
|
"learning_rate": 4.202558520677651e-06,
|
|
"loss": 0.1459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07570499181747437,
|
|
"step": 6625,
|
|
"valid_targets_mean": 4978.1,
|
|
"valid_targets_min": 1173
|
|
},
|
|
{
|
|
"epoch": 5.681233933161954,
|
|
"grad_norm": 0.4991332472101233,
|
|
"learning_rate": 4.17638880720806e-06,
|
|
"loss": 0.1379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05089268088340759,
|
|
"step": 6630,
|
|
"valid_targets_mean": 6274.0,
|
|
"valid_targets_min": 3291
|
|
},
|
|
{
|
|
"epoch": 5.685518423307626,
|
|
"grad_norm": 0.5419759634610473,
|
|
"learning_rate": 4.150291326496101e-06,
|
|
"loss": 0.1413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07586829364299774,
|
|
"step": 6635,
|
|
"valid_targets_mean": 5088.5,
|
|
"valid_targets_min": 2381
|
|
},
|
|
{
|
|
"epoch": 5.689802913453299,
|
|
"grad_norm": 0.5441222779036342,
|
|
"learning_rate": 4.1242661976734434e-06,
|
|
"loss": 0.1435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04856753721833229,
|
|
"step": 6640,
|
|
"valid_targets_mean": 4978.9,
|
|
"valid_targets_min": 1832
|
|
},
|
|
{
|
|
"epoch": 5.694087403598972,
|
|
"grad_norm": 0.6207196786094554,
|
|
"learning_rate": 4.09831353954145e-06,
|
|
"loss": 0.1448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0738341212272644,
|
|
"step": 6645,
|
|
"valid_targets_mean": 4128.1,
|
|
"valid_targets_min": 1906
|
|
},
|
|
{
|
|
"epoch": 5.698371893744644,
|
|
"grad_norm": 0.5213327231009702,
|
|
"learning_rate": 4.072433470570696e-06,
|
|
"loss": 0.1395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05955669283866882,
|
|
"step": 6650,
|
|
"valid_targets_mean": 4380.0,
|
|
"valid_targets_min": 2072
|
|
},
|
|
{
|
|
"epoch": 5.702656383890317,
|
|
"grad_norm": 0.5453669018514253,
|
|
"learning_rate": 4.046626108900369e-06,
|
|
"loss": 0.1398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07227616012096405,
|
|
"step": 6655,
|
|
"valid_targets_mean": 5088.1,
|
|
"valid_targets_min": 1736
|
|
},
|
|
{
|
|
"epoch": 5.70694087403599,
|
|
"grad_norm": 0.5739041238549919,
|
|
"learning_rate": 4.0208915723377724e-06,
|
|
"loss": 0.1521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08359837532043457,
|
|
"step": 6660,
|
|
"valid_targets_mean": 4858.0,
|
|
"valid_targets_min": 2031
|
|
},
|
|
{
|
|
"epoch": 5.711225364181662,
|
|
"grad_norm": 0.5221244934963233,
|
|
"learning_rate": 3.995229978357771e-06,
|
|
"loss": 0.1351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06315900385379791,
|
|
"step": 6665,
|
|
"valid_targets_mean": 5562.8,
|
|
"valid_targets_min": 2966
|
|
},
|
|
{
|
|
"epoch": 5.715509854327335,
|
|
"grad_norm": 0.5293926691188072,
|
|
"learning_rate": 3.969641444102241e-06,
|
|
"loss": 0.1341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0793573409318924,
|
|
"step": 6670,
|
|
"valid_targets_mean": 4634.9,
|
|
"valid_targets_min": 3504
|
|
},
|
|
{
|
|
"epoch": 5.719794344473008,
|
|
"grad_norm": 0.5911683803276364,
|
|
"learning_rate": 3.944126086379578e-06,
|
|
"loss": 0.1405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.059656694531440735,
|
|
"step": 6675,
|
|
"valid_targets_mean": 4381.6,
|
|
"valid_targets_min": 1091
|
|
},
|
|
{
|
|
"epoch": 5.72407883461868,
|
|
"grad_norm": 0.5868429249145188,
|
|
"learning_rate": 3.9186840216641075e-06,
|
|
"loss": 0.1452,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07227806746959686,
|
|
"step": 6680,
|
|
"valid_targets_mean": 2916.5,
|
|
"valid_targets_min": 869
|
|
},
|
|
{
|
|
"epoch": 5.728363324764353,
|
|
"grad_norm": 0.5684965723566315,
|
|
"learning_rate": 3.893315366095609e-06,
|
|
"loss": 0.1431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0693536251783371,
|
|
"step": 6685,
|
|
"valid_targets_mean": 6093.6,
|
|
"valid_targets_min": 2979
|
|
},
|
|
{
|
|
"epoch": 5.732647814910026,
|
|
"grad_norm": 0.5603873006925839,
|
|
"learning_rate": 3.868020235478735e-06,
|
|
"loss": 0.1347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09165522456169128,
|
|
"step": 6690,
|
|
"valid_targets_mean": 5763.5,
|
|
"valid_targets_min": 1445
|
|
},
|
|
{
|
|
"epoch": 5.736932305055698,
|
|
"grad_norm": 0.5469489775102141,
|
|
"learning_rate": 3.842798745282521e-06,
|
|
"loss": 0.1345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.058962516486644745,
|
|
"step": 6695,
|
|
"valid_targets_mean": 4978.2,
|
|
"valid_targets_min": 2444
|
|
},
|
|
{
|
|
"epoch": 5.741216795201371,
|
|
"grad_norm": 0.6019679932692666,
|
|
"learning_rate": 3.817651010639833e-06,
|
|
"loss": 0.141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07497408986091614,
|
|
"step": 6700,
|
|
"valid_targets_mean": 3552.8,
|
|
"valid_targets_min": 1639
|
|
},
|
|
{
|
|
"epoch": 5.745501285347044,
|
|
"grad_norm": 0.6504758349385636,
|
|
"learning_rate": 3.7925771463468564e-06,
|
|
"loss": 0.1484,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07307694852352142,
|
|
"step": 6705,
|
|
"valid_targets_mean": 3517.8,
|
|
"valid_targets_min": 854
|
|
},
|
|
{
|
|
"epoch": 5.749785775492716,
|
|
"grad_norm": 0.5541217867040226,
|
|
"learning_rate": 3.7675772668625765e-06,
|
|
"loss": 0.1523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06129279360175133,
|
|
"step": 6710,
|
|
"valid_targets_mean": 5937.2,
|
|
"valid_targets_min": 1195
|
|
},
|
|
{
|
|
"epoch": 5.754070265638389,
|
|
"grad_norm": 0.5592631375594215,
|
|
"learning_rate": 3.742651486308233e-06,
|
|
"loss": 0.1497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07175225019454956,
|
|
"step": 6715,
|
|
"valid_targets_mean": 5371.6,
|
|
"valid_targets_min": 2908
|
|
},
|
|
{
|
|
"epoch": 5.758354755784062,
|
|
"grad_norm": 0.5535958969395064,
|
|
"learning_rate": 3.7177999184668267e-06,
|
|
"loss": 0.1481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09704683721065521,
|
|
"step": 6720,
|
|
"valid_targets_mean": 6141.8,
|
|
"valid_targets_min": 2870
|
|
},
|
|
{
|
|
"epoch": 5.762639245929734,
|
|
"grad_norm": 0.5287083456633489,
|
|
"learning_rate": 3.693022676782578e-06,
|
|
"loss": 0.1379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06492763757705688,
|
|
"step": 6725,
|
|
"valid_targets_mean": 5959.8,
|
|
"valid_targets_min": 3981
|
|
},
|
|
{
|
|
"epoch": 5.766923736075407,
|
|
"grad_norm": 0.5235234483770389,
|
|
"learning_rate": 3.6683198743604176e-06,
|
|
"loss": 0.1414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05290340632200241,
|
|
"step": 6730,
|
|
"valid_targets_mean": 5557.6,
|
|
"valid_targets_min": 2775
|
|
},
|
|
{
|
|
"epoch": 5.77120822622108,
|
|
"grad_norm": 0.5744313280557972,
|
|
"learning_rate": 3.6436916239654775e-06,
|
|
"loss": 0.1384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07471472024917603,
|
|
"step": 6735,
|
|
"valid_targets_mean": 5893.5,
|
|
"valid_targets_min": 4005
|
|
},
|
|
{
|
|
"epoch": 5.775492716366752,
|
|
"grad_norm": 0.5191831107637773,
|
|
"learning_rate": 3.619138038022558e-06,
|
|
"loss": 0.131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05435687303543091,
|
|
"step": 6740,
|
|
"valid_targets_mean": 6395.0,
|
|
"valid_targets_min": 777
|
|
},
|
|
{
|
|
"epoch": 5.779777206512425,
|
|
"grad_norm": 0.5542420902763224,
|
|
"learning_rate": 3.594659228615638e-06,
|
|
"loss": 0.1419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06281452625989914,
|
|
"step": 6745,
|
|
"valid_targets_mean": 4446.0,
|
|
"valid_targets_min": 1787
|
|
},
|
|
{
|
|
"epoch": 5.784061696658098,
|
|
"grad_norm": 0.57109479507259,
|
|
"learning_rate": 3.5702553074873515e-06,
|
|
"loss": 0.1349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07405635714530945,
|
|
"step": 6750,
|
|
"valid_targets_mean": 5396.8,
|
|
"valid_targets_min": 1616
|
|
},
|
|
{
|
|
"epoch": 5.78834618680377,
|
|
"grad_norm": 0.5845579805680661,
|
|
"learning_rate": 3.5459263860384676e-06,
|
|
"loss": 0.139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0800202414393425,
|
|
"step": 6755,
|
|
"valid_targets_mean": 5868.5,
|
|
"valid_targets_min": 2093
|
|
},
|
|
{
|
|
"epoch": 5.792630676949443,
|
|
"grad_norm": 0.5083013565986827,
|
|
"learning_rate": 3.521672575327393e-06,
|
|
"loss": 0.1421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.054221805185079575,
|
|
"step": 6760,
|
|
"valid_targets_mean": 6155.0,
|
|
"valid_targets_min": 1994
|
|
},
|
|
{
|
|
"epoch": 5.796915167095116,
|
|
"grad_norm": 0.5761850082494403,
|
|
"learning_rate": 3.4974939860696667e-06,
|
|
"loss": 0.1535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05445557087659836,
|
|
"step": 6765,
|
|
"valid_targets_mean": 3601.5,
|
|
"valid_targets_min": 876
|
|
},
|
|
{
|
|
"epoch": 5.8011996572407885,
|
|
"grad_norm": 0.638371981329725,
|
|
"learning_rate": 3.473390728637447e-06,
|
|
"loss": 0.1525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07390281558036804,
|
|
"step": 6770,
|
|
"valid_targets_mean": 3327.8,
|
|
"valid_targets_min": 1259
|
|
},
|
|
{
|
|
"epoch": 5.805484147386461,
|
|
"grad_norm": 0.7196984139985203,
|
|
"learning_rate": 3.4493629130590246e-06,
|
|
"loss": 0.1471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07923655956983566,
|
|
"step": 6775,
|
|
"valid_targets_mean": 4779.0,
|
|
"valid_targets_min": 1021
|
|
},
|
|
{
|
|
"epoch": 5.809768637532134,
|
|
"grad_norm": 0.5267892321366978,
|
|
"learning_rate": 3.425410649018288e-06,
|
|
"loss": 0.1394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05486609786748886,
|
|
"step": 6780,
|
|
"valid_targets_mean": 4125.4,
|
|
"valid_targets_min": 1208
|
|
},
|
|
{
|
|
"epoch": 5.8140531276778065,
|
|
"grad_norm": 0.4997215023511863,
|
|
"learning_rate": 3.4015340458542667e-06,
|
|
"loss": 0.1374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06113504618406296,
|
|
"step": 6785,
|
|
"valid_targets_mean": 6006.1,
|
|
"valid_targets_min": 2393
|
|
},
|
|
{
|
|
"epoch": 5.818337617823479,
|
|
"grad_norm": 0.5803591765288956,
|
|
"learning_rate": 3.3777332125605876e-06,
|
|
"loss": 0.1411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0857536643743515,
|
|
"step": 6790,
|
|
"valid_targets_mean": 4885.4,
|
|
"valid_targets_min": 1143
|
|
},
|
|
{
|
|
"epoch": 5.822622107969152,
|
|
"grad_norm": 0.6302037090494906,
|
|
"learning_rate": 3.354008257785004e-06,
|
|
"loss": 0.1388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06109432876110077,
|
|
"step": 6795,
|
|
"valid_targets_mean": 4721.6,
|
|
"valid_targets_min": 996
|
|
},
|
|
{
|
|
"epoch": 5.8269065981148245,
|
|
"grad_norm": 0.5321710621405683,
|
|
"learning_rate": 3.3303592898288996e-06,
|
|
"loss": 0.1341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06634179502725601,
|
|
"step": 6800,
|
|
"valid_targets_mean": 5669.6,
|
|
"valid_targets_min": 2958
|
|
},
|
|
{
|
|
"epoch": 5.831191088260497,
|
|
"grad_norm": 0.5265118919796228,
|
|
"learning_rate": 3.3067864166467724e-06,
|
|
"loss": 0.1378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06433935463428497,
|
|
"step": 6805,
|
|
"valid_targets_mean": 5867.8,
|
|
"valid_targets_min": 1720
|
|
},
|
|
{
|
|
"epoch": 5.83547557840617,
|
|
"grad_norm": 0.6185168294053308,
|
|
"learning_rate": 3.2832897458457746e-06,
|
|
"loss": 0.1342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0700758695602417,
|
|
"step": 6810,
|
|
"valid_targets_mean": 4015.1,
|
|
"valid_targets_min": 1654
|
|
},
|
|
{
|
|
"epoch": 5.8397600685518425,
|
|
"grad_norm": 0.5574030938626355,
|
|
"learning_rate": 3.2598693846852037e-06,
|
|
"loss": 0.1378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06746580451726913,
|
|
"step": 6815,
|
|
"valid_targets_mean": 5562.1,
|
|
"valid_targets_min": 3556
|
|
},
|
|
{
|
|
"epoch": 5.844044558697515,
|
|
"grad_norm": 0.544442153355723,
|
|
"learning_rate": 3.2365254400760036e-06,
|
|
"loss": 0.1404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09361623972654343,
|
|
"step": 6820,
|
|
"valid_targets_mean": 5243.6,
|
|
"valid_targets_min": 1166
|
|
},
|
|
{
|
|
"epoch": 5.848329048843188,
|
|
"grad_norm": 0.5254064816831592,
|
|
"learning_rate": 3.2132580185802876e-06,
|
|
"loss": 0.131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.051424961537122726,
|
|
"step": 6825,
|
|
"valid_targets_mean": 5415.0,
|
|
"valid_targets_min": 3837
|
|
},
|
|
{
|
|
"epoch": 5.8526135389888605,
|
|
"grad_norm": 0.5915974965701197,
|
|
"learning_rate": 3.190067226410858e-06,
|
|
"loss": 0.1443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08004424721002579,
|
|
"step": 6830,
|
|
"valid_targets_mean": 5658.2,
|
|
"valid_targets_min": 3150
|
|
},
|
|
{
|
|
"epoch": 5.856898029134533,
|
|
"grad_norm": 0.504995306642246,
|
|
"learning_rate": 3.1669531694307e-06,
|
|
"loss": 0.1319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.041914355009794235,
|
|
"step": 6835,
|
|
"valid_targets_mean": 5900.6,
|
|
"valid_targets_min": 2968
|
|
},
|
|
{
|
|
"epoch": 5.861182519280206,
|
|
"grad_norm": 0.6108621619033056,
|
|
"learning_rate": 3.1439159531525344e-06,
|
|
"loss": 0.1379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.068336620926857,
|
|
"step": 6840,
|
|
"valid_targets_mean": 5198.2,
|
|
"valid_targets_min": 1241
|
|
},
|
|
{
|
|
"epoch": 5.8654670094258785,
|
|
"grad_norm": 0.5372951593834955,
|
|
"learning_rate": 3.1209556827382915e-06,
|
|
"loss": 0.1414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05392158031463623,
|
|
"step": 6845,
|
|
"valid_targets_mean": 4665.5,
|
|
"valid_targets_min": 1243
|
|
},
|
|
{
|
|
"epoch": 5.869751499571551,
|
|
"grad_norm": 0.5427030654594667,
|
|
"learning_rate": 3.098072462998671e-06,
|
|
"loss": 0.1338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06130759045481682,
|
|
"step": 6850,
|
|
"valid_targets_mean": 5294.8,
|
|
"valid_targets_min": 1186
|
|
},
|
|
{
|
|
"epoch": 5.874035989717224,
|
|
"grad_norm": 0.5551297016793235,
|
|
"learning_rate": 3.07526639839264e-06,
|
|
"loss": 0.1355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07664550840854645,
|
|
"step": 6855,
|
|
"valid_targets_mean": 6137.0,
|
|
"valid_targets_min": 3194
|
|
},
|
|
{
|
|
"epoch": 5.8783204798628965,
|
|
"grad_norm": 0.552587788514632,
|
|
"learning_rate": 3.0525375930269584e-06,
|
|
"loss": 0.1388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07360447943210602,
|
|
"step": 6860,
|
|
"valid_targets_mean": 5645.5,
|
|
"valid_targets_min": 2589
|
|
},
|
|
{
|
|
"epoch": 5.882604970008569,
|
|
"grad_norm": 0.5800728441639016,
|
|
"learning_rate": 3.0298861506557076e-06,
|
|
"loss": 0.138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.057409510016441345,
|
|
"step": 6865,
|
|
"valid_targets_mean": 4250.6,
|
|
"valid_targets_min": 1360
|
|
},
|
|
{
|
|
"epoch": 5.886889460154242,
|
|
"grad_norm": 0.5472337989778477,
|
|
"learning_rate": 3.0073121746798196e-06,
|
|
"loss": 0.148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.053992968052625656,
|
|
"step": 6870,
|
|
"valid_targets_mean": 5314.0,
|
|
"valid_targets_min": 1640
|
|
},
|
|
{
|
|
"epoch": 5.8911739502999145,
|
|
"grad_norm": 0.5339616139250007,
|
|
"learning_rate": 2.9848157681466093e-06,
|
|
"loss": 0.123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06431794911623001,
|
|
"step": 6875,
|
|
"valid_targets_mean": 4802.9,
|
|
"valid_targets_min": 1410
|
|
},
|
|
{
|
|
"epoch": 5.895458440445587,
|
|
"grad_norm": 0.5851089093746733,
|
|
"learning_rate": 2.96239703374928e-06,
|
|
"loss": 0.1421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07572221010923386,
|
|
"step": 6880,
|
|
"valid_targets_mean": 5150.2,
|
|
"valid_targets_min": 3099
|
|
},
|
|
{
|
|
"epoch": 5.89974293059126,
|
|
"grad_norm": 0.5482802919748951,
|
|
"learning_rate": 2.940056073826494e-06,
|
|
"loss": 0.1323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05965782329440117,
|
|
"step": 6885,
|
|
"valid_targets_mean": 5102.5,
|
|
"valid_targets_min": 1197
|
|
},
|
|
{
|
|
"epoch": 5.9040274207369325,
|
|
"grad_norm": 0.5859800632364757,
|
|
"learning_rate": 2.917792990361863e-06,
|
|
"loss": 0.1284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06816573441028595,
|
|
"step": 6890,
|
|
"valid_targets_mean": 4878.9,
|
|
"valid_targets_min": 2223
|
|
},
|
|
{
|
|
"epoch": 5.908311910882605,
|
|
"grad_norm": 0.5243638339363336,
|
|
"learning_rate": 2.8956078849835133e-06,
|
|
"loss": 0.126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05222854018211365,
|
|
"step": 6895,
|
|
"valid_targets_mean": 6770.0,
|
|
"valid_targets_min": 3501
|
|
},
|
|
{
|
|
"epoch": 5.912596401028278,
|
|
"grad_norm": 0.6186882882901905,
|
|
"learning_rate": 2.873500858963607e-06,
|
|
"loss": 0.1297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04518494755029678,
|
|
"step": 6900,
|
|
"valid_targets_mean": 4287.5,
|
|
"valid_targets_min": 922
|
|
},
|
|
{
|
|
"epoch": 5.9168808911739506,
|
|
"grad_norm": 0.5803000774619191,
|
|
"learning_rate": 2.85147201321788e-06,
|
|
"loss": 0.1409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08812421560287476,
|
|
"step": 6905,
|
|
"valid_targets_mean": 4709.4,
|
|
"valid_targets_min": 2221
|
|
},
|
|
{
|
|
"epoch": 5.921165381319623,
|
|
"grad_norm": 0.6531158925885155,
|
|
"learning_rate": 2.829521448305199e-06,
|
|
"loss": 0.1465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08043619245290756,
|
|
"step": 6910,
|
|
"valid_targets_mean": 4030.8,
|
|
"valid_targets_min": 1831
|
|
},
|
|
{
|
|
"epoch": 5.925449871465296,
|
|
"grad_norm": 0.6188264080269267,
|
|
"learning_rate": 2.807649264427079e-06,
|
|
"loss": 0.1578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09220334887504578,
|
|
"step": 6915,
|
|
"valid_targets_mean": 4534.5,
|
|
"valid_targets_min": 2656
|
|
},
|
|
{
|
|
"epoch": 5.929734361610969,
|
|
"grad_norm": 0.5381152998325993,
|
|
"learning_rate": 2.785855561427231e-06,
|
|
"loss": 0.1398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05605997145175934,
|
|
"step": 6920,
|
|
"valid_targets_mean": 5281.2,
|
|
"valid_targets_min": 3046
|
|
},
|
|
{
|
|
"epoch": 5.934018851756641,
|
|
"grad_norm": 0.5777102355387708,
|
|
"learning_rate": 2.7641404387911253e-06,
|
|
"loss": 0.1401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08713674545288086,
|
|
"step": 6925,
|
|
"valid_targets_mean": 5400.8,
|
|
"valid_targets_min": 2651
|
|
},
|
|
{
|
|
"epoch": 5.938303341902314,
|
|
"grad_norm": 0.7710764345542876,
|
|
"learning_rate": 2.7425039956455113e-06,
|
|
"loss": 0.1451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08280148357152939,
|
|
"step": 6930,
|
|
"valid_targets_mean": 5259.6,
|
|
"valid_targets_min": 1085
|
|
},
|
|
{
|
|
"epoch": 5.942587832047987,
|
|
"grad_norm": 0.49364547735421227,
|
|
"learning_rate": 2.720946330757972e-06,
|
|
"loss": 0.137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06348837912082672,
|
|
"step": 6935,
|
|
"valid_targets_mean": 5807.9,
|
|
"valid_targets_min": 3536
|
|
},
|
|
{
|
|
"epoch": 5.946872322193659,
|
|
"grad_norm": 0.6205905188458044,
|
|
"learning_rate": 2.699467542536498e-06,
|
|
"loss": 0.1357,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11586824059486389,
|
|
"step": 6940,
|
|
"valid_targets_mean": 6235.1,
|
|
"valid_targets_min": 2918
|
|
},
|
|
{
|
|
"epoch": 5.951156812339332,
|
|
"grad_norm": 0.6084874967670024,
|
|
"learning_rate": 2.678067729028999e-06,
|
|
"loss": 0.1437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0647452175617218,
|
|
"step": 6945,
|
|
"valid_targets_mean": 3589.9,
|
|
"valid_targets_min": 505
|
|
},
|
|
{
|
|
"epoch": 5.955441302485005,
|
|
"grad_norm": 0.5973192224285923,
|
|
"learning_rate": 2.6567469879228824e-06,
|
|
"loss": 0.15,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07049669325351715,
|
|
"step": 6950,
|
|
"valid_targets_mean": 4123.8,
|
|
"valid_targets_min": 1399
|
|
},
|
|
{
|
|
"epoch": 5.959725792630677,
|
|
"grad_norm": 0.6208232920444805,
|
|
"learning_rate": 2.635505416544595e-06,
|
|
"loss": 0.1364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0627826675772667,
|
|
"step": 6955,
|
|
"valid_targets_mean": 4860.9,
|
|
"valid_targets_min": 1282
|
|
},
|
|
{
|
|
"epoch": 5.96401028277635,
|
|
"grad_norm": 0.5267484487748498,
|
|
"learning_rate": 2.6143431118591967e-06,
|
|
"loss": 0.1504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08430700749158859,
|
|
"step": 6960,
|
|
"valid_targets_mean": 5536.1,
|
|
"valid_targets_min": 4115
|
|
},
|
|
{
|
|
"epoch": 5.968294772922023,
|
|
"grad_norm": 0.5496591230529959,
|
|
"learning_rate": 2.593260170469891e-06,
|
|
"loss": 0.1461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0636175125837326,
|
|
"step": 6965,
|
|
"valid_targets_mean": 5769.9,
|
|
"valid_targets_min": 2399
|
|
},
|
|
{
|
|
"epoch": 5.972579263067695,
|
|
"grad_norm": 0.4920257881440762,
|
|
"learning_rate": 2.5722566886176047e-06,
|
|
"loss": 0.1394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.054918207228183746,
|
|
"step": 6970,
|
|
"valid_targets_mean": 5959.6,
|
|
"valid_targets_min": 2296
|
|
},
|
|
{
|
|
"epoch": 5.976863753213368,
|
|
"grad_norm": 0.6577430899430436,
|
|
"learning_rate": 2.5513327621805473e-06,
|
|
"loss": 0.1468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07874983549118042,
|
|
"step": 6975,
|
|
"valid_targets_mean": 4578.1,
|
|
"valid_targets_min": 1934
|
|
},
|
|
{
|
|
"epoch": 5.981148243359041,
|
|
"grad_norm": 0.5192838115792693,
|
|
"learning_rate": 2.530488486673757e-06,
|
|
"loss": 0.1368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06696130335330963,
|
|
"step": 6980,
|
|
"valid_targets_mean": 5333.2,
|
|
"valid_targets_min": 2213
|
|
},
|
|
{
|
|
"epoch": 5.985432733504713,
|
|
"grad_norm": 0.5527558283403213,
|
|
"learning_rate": 2.50972395724868e-06,
|
|
"loss": 0.1423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0464884415268898,
|
|
"step": 6985,
|
|
"valid_targets_mean": 4953.5,
|
|
"valid_targets_min": 988
|
|
},
|
|
{
|
|
"epoch": 5.989717223650386,
|
|
"grad_norm": 0.5352427081717313,
|
|
"learning_rate": 2.48903926869273e-06,
|
|
"loss": 0.1447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0705413892865181,
|
|
"step": 6990,
|
|
"valid_targets_mean": 5753.9,
|
|
"valid_targets_min": 4252
|
|
},
|
|
{
|
|
"epoch": 5.994001713796059,
|
|
"grad_norm": 0.5714701782964342,
|
|
"learning_rate": 2.468434515428868e-06,
|
|
"loss": 0.1581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07364567369222641,
|
|
"step": 6995,
|
|
"valid_targets_mean": 4093.5,
|
|
"valid_targets_min": 2187
|
|
},
|
|
{
|
|
"epoch": 5.998286203941731,
|
|
"grad_norm": 0.5603266143505143,
|
|
"learning_rate": 2.4479097915151438e-06,
|
|
"loss": 0.154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0819477066397667,
|
|
"step": 7000,
|
|
"valid_targets_mean": 5255.8,
|
|
"valid_targets_min": 2270
|
|
},
|
|
{
|
|
"epoch": 6.002570694087404,
|
|
"grad_norm": 0.5617902857898495,
|
|
"learning_rate": 2.4274651906443026e-06,
|
|
"loss": 0.1349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07298682630062103,
|
|
"step": 7005,
|
|
"valid_targets_mean": 6007.9,
|
|
"valid_targets_min": 3138
|
|
},
|
|
{
|
|
"epoch": 6.006855184233077,
|
|
"grad_norm": 0.5590701329433548,
|
|
"learning_rate": 2.407100806143321e-06,
|
|
"loss": 0.1307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.056523799896240234,
|
|
"step": 7010,
|
|
"valid_targets_mean": 4618.4,
|
|
"valid_targets_min": 2404
|
|
},
|
|
{
|
|
"epoch": 6.011139674378749,
|
|
"grad_norm": 0.5314159056896909,
|
|
"learning_rate": 2.386816730973005e-06,
|
|
"loss": 0.1347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05974011868238449,
|
|
"step": 7015,
|
|
"valid_targets_mean": 5098.0,
|
|
"valid_targets_min": 2011
|
|
},
|
|
{
|
|
"epoch": 6.015424164524422,
|
|
"grad_norm": 0.5636771573961143,
|
|
"learning_rate": 2.3666130577275604e-06,
|
|
"loss": 0.1422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07339408248662949,
|
|
"step": 7020,
|
|
"valid_targets_mean": 4734.5,
|
|
"valid_targets_min": 1748
|
|
},
|
|
{
|
|
"epoch": 6.019708654670095,
|
|
"grad_norm": 0.5435129863456174,
|
|
"learning_rate": 2.3464898786341615e-06,
|
|
"loss": 0.1258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06307528913021088,
|
|
"step": 7025,
|
|
"valid_targets_mean": 4226.4,
|
|
"valid_targets_min": 2028
|
|
},
|
|
{
|
|
"epoch": 6.023993144815767,
|
|
"grad_norm": 0.5315069862433984,
|
|
"learning_rate": 2.3264472855525532e-06,
|
|
"loss": 0.123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06738060712814331,
|
|
"step": 7030,
|
|
"valid_targets_mean": 5815.2,
|
|
"valid_targets_min": 3714
|
|
},
|
|
{
|
|
"epoch": 6.02827763496144,
|
|
"grad_norm": 0.4855041602382719,
|
|
"learning_rate": 2.3064853699745936e-06,
|
|
"loss": 0.1271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05301181226968765,
|
|
"step": 7035,
|
|
"valid_targets_mean": 6482.8,
|
|
"valid_targets_min": 3550
|
|
},
|
|
{
|
|
"epoch": 6.032562125107113,
|
|
"grad_norm": 0.5294378234912969,
|
|
"learning_rate": 2.2866042230238804e-06,
|
|
"loss": 0.1378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.061131253838539124,
|
|
"step": 7040,
|
|
"valid_targets_mean": 6033.4,
|
|
"valid_targets_min": 3134
|
|
},
|
|
{
|
|
"epoch": 6.036846615252785,
|
|
"grad_norm": 0.5273015137391548,
|
|
"learning_rate": 2.266803935455295e-06,
|
|
"loss": 0.1227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0604885071516037,
|
|
"step": 7045,
|
|
"valid_targets_mean": 5655.1,
|
|
"valid_targets_min": 1845
|
|
},
|
|
{
|
|
"epoch": 6.041131105398458,
|
|
"grad_norm": 0.5952816902680278,
|
|
"learning_rate": 2.2470845976546163e-06,
|
|
"loss": 0.1244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06316784024238586,
|
|
"step": 7050,
|
|
"valid_targets_mean": 5000.8,
|
|
"valid_targets_min": 1318
|
|
},
|
|
{
|
|
"epoch": 6.045415595544131,
|
|
"grad_norm": 0.5949149831620895,
|
|
"learning_rate": 2.227446299638092e-06,
|
|
"loss": 0.136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08699578046798706,
|
|
"step": 7055,
|
|
"valid_targets_mean": 5022.2,
|
|
"valid_targets_min": 2303
|
|
},
|
|
{
|
|
"epoch": 6.049700085689803,
|
|
"grad_norm": 0.5432610582498822,
|
|
"learning_rate": 2.2078891310520346e-06,
|
|
"loss": 0.1278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05387689173221588,
|
|
"step": 7060,
|
|
"valid_targets_mean": 3837.4,
|
|
"valid_targets_min": 1804
|
|
},
|
|
{
|
|
"epoch": 6.053984575835476,
|
|
"grad_norm": 0.5661465570912795,
|
|
"learning_rate": 2.188413181172415e-06,
|
|
"loss": 0.1377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06640972197055817,
|
|
"step": 7065,
|
|
"valid_targets_mean": 3869.1,
|
|
"valid_targets_min": 1786
|
|
},
|
|
{
|
|
"epoch": 6.058269065981149,
|
|
"grad_norm": 0.5711416361143601,
|
|
"learning_rate": 2.169018538904455e-06,
|
|
"loss": 0.1393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0669720321893692,
|
|
"step": 7070,
|
|
"valid_targets_mean": 6166.6,
|
|
"valid_targets_min": 2563
|
|
},
|
|
{
|
|
"epoch": 6.062553556126821,
|
|
"grad_norm": 0.6060473922843284,
|
|
"learning_rate": 2.149705292782205e-06,
|
|
"loss": 0.1329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06459236145019531,
|
|
"step": 7075,
|
|
"valid_targets_mean": 4371.8,
|
|
"valid_targets_min": 2106
|
|
},
|
|
{
|
|
"epoch": 6.066838046272494,
|
|
"grad_norm": 0.5258787232106485,
|
|
"learning_rate": 2.13047353096816e-06,
|
|
"loss": 0.1369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05734770745038986,
|
|
"step": 7080,
|
|
"valid_targets_mean": 4410.9,
|
|
"valid_targets_min": 1790
|
|
},
|
|
{
|
|
"epoch": 6.071122536418166,
|
|
"grad_norm": 0.5835317414829356,
|
|
"learning_rate": 2.111323341252851e-06,
|
|
"loss": 0.128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07886640727519989,
|
|
"step": 7085,
|
|
"valid_targets_mean": 4713.9,
|
|
"valid_targets_min": 1312
|
|
},
|
|
{
|
|
"epoch": 6.075407026563839,
|
|
"grad_norm": 0.5824598357112564,
|
|
"learning_rate": 2.092254811054437e-06,
|
|
"loss": 0.1277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07371959835290909,
|
|
"step": 7090,
|
|
"valid_targets_mean": 5103.1,
|
|
"valid_targets_min": 1558
|
|
},
|
|
{
|
|
"epoch": 6.079691516709511,
|
|
"grad_norm": 0.5772979309118111,
|
|
"learning_rate": 2.073268027418314e-06,
|
|
"loss": 0.1186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07025423645973206,
|
|
"step": 7095,
|
|
"valid_targets_mean": 4879.0,
|
|
"valid_targets_min": 1787
|
|
},
|
|
{
|
|
"epoch": 6.083976006855184,
|
|
"grad_norm": 0.5759877981086569,
|
|
"learning_rate": 2.0543630770167166e-06,
|
|
"loss": 0.1285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09002973884344101,
|
|
"step": 7100,
|
|
"valid_targets_mean": 5280.8,
|
|
"valid_targets_min": 2574
|
|
},
|
|
{
|
|
"epoch": 6.0882604970008565,
|
|
"grad_norm": 0.5319451711483659,
|
|
"learning_rate": 2.0355400461483278e-06,
|
|
"loss": 0.1439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.057267673313617706,
|
|
"step": 7105,
|
|
"valid_targets_mean": 4528.5,
|
|
"valid_targets_min": 1902
|
|
},
|
|
{
|
|
"epoch": 6.092544987146529,
|
|
"grad_norm": 0.5722236613500173,
|
|
"learning_rate": 2.0167990207378696e-06,
|
|
"loss": 0.1355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06940978765487671,
|
|
"step": 7110,
|
|
"valid_targets_mean": 4825.8,
|
|
"valid_targets_min": 793
|
|
},
|
|
{
|
|
"epoch": 6.096829477292202,
|
|
"grad_norm": 0.5990088640600807,
|
|
"learning_rate": 1.998140086335718e-06,
|
|
"loss": 0.134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0675596296787262,
|
|
"step": 7115,
|
|
"valid_targets_mean": 4574.1,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 6.1011139674378745,
|
|
"grad_norm": 0.5801047706770528,
|
|
"learning_rate": 1.97956332811752e-06,
|
|
"loss": 0.1155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.055842798203229904,
|
|
"step": 7120,
|
|
"valid_targets_mean": 4754.4,
|
|
"valid_targets_min": 1071
|
|
},
|
|
{
|
|
"epoch": 6.105398457583547,
|
|
"grad_norm": 0.6048377130053119,
|
|
"learning_rate": 1.96106883088379e-06,
|
|
"loss": 0.1395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07217507064342499,
|
|
"step": 7125,
|
|
"valid_targets_mean": 4534.0,
|
|
"valid_targets_min": 2138
|
|
},
|
|
{
|
|
"epoch": 6.10968294772922,
|
|
"grad_norm": 0.520549717902676,
|
|
"learning_rate": 1.942656679059547e-06,
|
|
"loss": 0.1219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06939978897571564,
|
|
"step": 7130,
|
|
"valid_targets_mean": 5840.8,
|
|
"valid_targets_min": 2292
|
|
},
|
|
{
|
|
"epoch": 6.1139674378748925,
|
|
"grad_norm": 0.570791137709158,
|
|
"learning_rate": 1.924326956693905e-06,
|
|
"loss": 0.1274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07258065789937973,
|
|
"step": 7135,
|
|
"valid_targets_mean": 4443.1,
|
|
"valid_targets_min": 1876
|
|
},
|
|
{
|
|
"epoch": 6.118251928020565,
|
|
"grad_norm": 0.49546155449057244,
|
|
"learning_rate": 1.906079747459695e-06,
|
|
"loss": 0.1355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05577601492404938,
|
|
"step": 7140,
|
|
"valid_targets_mean": 5838.4,
|
|
"valid_targets_min": 3043
|
|
},
|
|
{
|
|
"epoch": 6.122536418166238,
|
|
"grad_norm": 0.6098928236472626,
|
|
"learning_rate": 1.8879151346530889e-06,
|
|
"loss": 0.138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07786683738231659,
|
|
"step": 7145,
|
|
"valid_targets_mean": 4605.0,
|
|
"valid_targets_min": 671
|
|
},
|
|
{
|
|
"epoch": 6.1268209083119105,
|
|
"grad_norm": 0.5765047245134982,
|
|
"learning_rate": 1.8698332011932164e-06,
|
|
"loss": 0.1327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.053650569170713425,
|
|
"step": 7150,
|
|
"valid_targets_mean": 4318.0,
|
|
"valid_targets_min": 1250
|
|
},
|
|
{
|
|
"epoch": 6.131105398457583,
|
|
"grad_norm": 1.6491384383257206,
|
|
"learning_rate": 1.8518340296217907e-06,
|
|
"loss": 0.1362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05434166640043259,
|
|
"step": 7155,
|
|
"valid_targets_mean": 5819.2,
|
|
"valid_targets_min": 2102
|
|
},
|
|
{
|
|
"epoch": 6.135389888603256,
|
|
"grad_norm": 0.5471148417542772,
|
|
"learning_rate": 1.8339177021027144e-06,
|
|
"loss": 0.1332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07945186644792557,
|
|
"step": 7160,
|
|
"valid_targets_mean": 5115.5,
|
|
"valid_targets_min": 1679
|
|
},
|
|
{
|
|
"epoch": 6.1396743787489285,
|
|
"grad_norm": 0.6043050620509512,
|
|
"learning_rate": 1.8160843004217344e-06,
|
|
"loss": 0.1328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06661148369312286,
|
|
"step": 7165,
|
|
"valid_targets_mean": 4373.0,
|
|
"valid_targets_min": 1373
|
|
},
|
|
{
|
|
"epoch": 6.143958868894601,
|
|
"grad_norm": 0.5942375631504286,
|
|
"learning_rate": 1.7983339059860472e-06,
|
|
"loss": 0.1519,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0706518292427063,
|
|
"step": 7170,
|
|
"valid_targets_mean": 4559.2,
|
|
"valid_targets_min": 3384
|
|
},
|
|
{
|
|
"epoch": 6.148243359040274,
|
|
"grad_norm": 0.6161555629304948,
|
|
"learning_rate": 1.780666599823926e-06,
|
|
"loss": 0.1361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06862369924783707,
|
|
"step": 7175,
|
|
"valid_targets_mean": 4812.8,
|
|
"valid_targets_min": 2162
|
|
},
|
|
{
|
|
"epoch": 6.1525278491859465,
|
|
"grad_norm": 0.5570310289376202,
|
|
"learning_rate": 1.7630824625843579e-06,
|
|
"loss": 0.1262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05563248693943024,
|
|
"step": 7180,
|
|
"valid_targets_mean": 5118.5,
|
|
"valid_targets_min": 1578
|
|
},
|
|
{
|
|
"epoch": 6.156812339331619,
|
|
"grad_norm": 0.5565818568350045,
|
|
"learning_rate": 1.745581574536679e-06,
|
|
"loss": 0.146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07531631737947464,
|
|
"step": 7185,
|
|
"valid_targets_mean": 5362.5,
|
|
"valid_targets_min": 1126
|
|
},
|
|
{
|
|
"epoch": 6.161096829477292,
|
|
"grad_norm": 0.5474402626627962,
|
|
"learning_rate": 1.728164015570195e-06,
|
|
"loss": 0.1267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05338769778609276,
|
|
"step": 7190,
|
|
"valid_targets_mean": 5042.5,
|
|
"valid_targets_min": 2566
|
|
},
|
|
{
|
|
"epoch": 6.1653813196229645,
|
|
"grad_norm": 0.5006188609732733,
|
|
"learning_rate": 1.7108298651938393e-06,
|
|
"loss": 0.134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04690549895167351,
|
|
"step": 7195,
|
|
"valid_targets_mean": 6448.0,
|
|
"valid_targets_min": 4129
|
|
},
|
|
{
|
|
"epoch": 6.169665809768637,
|
|
"grad_norm": 0.6054830426470936,
|
|
"learning_rate": 1.69357920253578e-06,
|
|
"loss": 0.1252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.052494410425424576,
|
|
"step": 7200,
|
|
"valid_targets_mean": 3395.1,
|
|
"valid_targets_min": 1208
|
|
},
|
|
{
|
|
"epoch": 6.17395029991431,
|
|
"grad_norm": 0.5458160886088905,
|
|
"learning_rate": 1.6764121063430882e-06,
|
|
"loss": 0.1354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06598511338233948,
|
|
"step": 7205,
|
|
"valid_targets_mean": 6574.4,
|
|
"valid_targets_min": 1931
|
|
},
|
|
{
|
|
"epoch": 6.1782347900599826,
|
|
"grad_norm": 0.5511129685150017,
|
|
"learning_rate": 1.659328654981356e-06,
|
|
"loss": 0.1365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06158352270722389,
|
|
"step": 7210,
|
|
"valid_targets_mean": 6243.9,
|
|
"valid_targets_min": 4428
|
|
},
|
|
{
|
|
"epoch": 6.182519280205655,
|
|
"grad_norm": 0.5721224946813679,
|
|
"learning_rate": 1.642328926434349e-06,
|
|
"loss": 0.1266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.052804622799158096,
|
|
"step": 7215,
|
|
"valid_targets_mean": 4807.4,
|
|
"valid_targets_min": 2000
|
|
},
|
|
{
|
|
"epoch": 6.186803770351328,
|
|
"grad_norm": 0.5187633332938161,
|
|
"learning_rate": 1.6254129983036504e-06,
|
|
"loss": 0.1288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06331296265125275,
|
|
"step": 7220,
|
|
"valid_targets_mean": 5979.9,
|
|
"valid_targets_min": 3483
|
|
},
|
|
{
|
|
"epoch": 6.191088260497001,
|
|
"grad_norm": 0.5538859237064832,
|
|
"learning_rate": 1.6085809478083025e-06,
|
|
"loss": 0.1252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08845791220664978,
|
|
"step": 7225,
|
|
"valid_targets_mean": 4928.2,
|
|
"valid_targets_min": 1834
|
|
},
|
|
{
|
|
"epoch": 6.195372750642673,
|
|
"grad_norm": 0.5971124365145998,
|
|
"learning_rate": 1.5918328517844628e-06,
|
|
"loss": 0.1367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0702032819390297,
|
|
"step": 7230,
|
|
"valid_targets_mean": 4869.4,
|
|
"valid_targets_min": 1197
|
|
},
|
|
{
|
|
"epoch": 6.199657240788346,
|
|
"grad_norm": 0.5376800060207101,
|
|
"learning_rate": 1.5751687866850396e-06,
|
|
"loss": 0.1383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04543754830956459,
|
|
"step": 7235,
|
|
"valid_targets_mean": 5223.5,
|
|
"valid_targets_min": 1531
|
|
},
|
|
{
|
|
"epoch": 6.203941730934019,
|
|
"grad_norm": 0.52362958791206,
|
|
"learning_rate": 1.5585888285793616e-06,
|
|
"loss": 0.1195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.056802961975336075,
|
|
"step": 7240,
|
|
"valid_targets_mean": 5973.1,
|
|
"valid_targets_min": 4700
|
|
},
|
|
{
|
|
"epoch": 6.208226221079691,
|
|
"grad_norm": 0.5787165339346138,
|
|
"learning_rate": 1.542093053152809e-06,
|
|
"loss": 0.1362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06048420071601868,
|
|
"step": 7245,
|
|
"valid_targets_mean": 5055.2,
|
|
"valid_targets_min": 2931
|
|
},
|
|
{
|
|
"epoch": 6.212510711225364,
|
|
"grad_norm": 0.522957992600429,
|
|
"learning_rate": 1.5256815357064825e-06,
|
|
"loss": 0.1277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04589177295565605,
|
|
"step": 7250,
|
|
"valid_targets_mean": 5997.5,
|
|
"valid_targets_min": 3531
|
|
},
|
|
{
|
|
"epoch": 6.216795201371037,
|
|
"grad_norm": 0.5811695311496623,
|
|
"learning_rate": 1.5093543511568531e-06,
|
|
"loss": 0.1271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.054738134145736694,
|
|
"step": 7255,
|
|
"valid_targets_mean": 4873.9,
|
|
"valid_targets_min": 2125
|
|
},
|
|
{
|
|
"epoch": 6.221079691516709,
|
|
"grad_norm": 0.5878951744890861,
|
|
"learning_rate": 1.4931115740354352e-06,
|
|
"loss": 0.1428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0675361156463623,
|
|
"step": 7260,
|
|
"valid_targets_mean": 5341.0,
|
|
"valid_targets_min": 2498
|
|
},
|
|
{
|
|
"epoch": 6.225364181662382,
|
|
"grad_norm": 0.5239516616418894,
|
|
"learning_rate": 1.4769532784884199e-06,
|
|
"loss": 0.1266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.054139237850904465,
|
|
"step": 7265,
|
|
"valid_targets_mean": 6078.6,
|
|
"valid_targets_min": 505
|
|
},
|
|
{
|
|
"epoch": 6.229648671808055,
|
|
"grad_norm": 0.6125674704012837,
|
|
"learning_rate": 1.4608795382763519e-06,
|
|
"loss": 0.1384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09281733632087708,
|
|
"step": 7270,
|
|
"valid_targets_mean": 3341.8,
|
|
"valid_targets_min": 1937
|
|
},
|
|
{
|
|
"epoch": 6.233933161953727,
|
|
"grad_norm": 0.5802810544979863,
|
|
"learning_rate": 1.4448904267738062e-06,
|
|
"loss": 0.1419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05857785418629646,
|
|
"step": 7275,
|
|
"valid_targets_mean": 4161.6,
|
|
"valid_targets_min": 1381
|
|
},
|
|
{
|
|
"epoch": 6.2382176520994,
|
|
"grad_norm": 0.6228348827694725,
|
|
"learning_rate": 1.4289860169690206e-06,
|
|
"loss": 0.1334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06603018939495087,
|
|
"step": 7280,
|
|
"valid_targets_mean": 3769.4,
|
|
"valid_targets_min": 1893
|
|
},
|
|
{
|
|
"epoch": 6.242502142245073,
|
|
"grad_norm": 0.5606682274420252,
|
|
"learning_rate": 1.4131663814635888e-06,
|
|
"loss": 0.1367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07488321512937546,
|
|
"step": 7285,
|
|
"valid_targets_mean": 4953.9,
|
|
"valid_targets_min": 3753
|
|
},
|
|
{
|
|
"epoch": 6.246786632390745,
|
|
"grad_norm": 0.5681033905864885,
|
|
"learning_rate": 1.3974315924721182e-06,
|
|
"loss": 0.1335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.054232463240623474,
|
|
"step": 7290,
|
|
"valid_targets_mean": 4851.4,
|
|
"valid_targets_min": 2171
|
|
},
|
|
{
|
|
"epoch": 6.251071122536418,
|
|
"grad_norm": 0.5528862655721212,
|
|
"learning_rate": 1.381781721821911e-06,
|
|
"loss": 0.1243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0710495263338089,
|
|
"step": 7295,
|
|
"valid_targets_mean": 5688.9,
|
|
"valid_targets_min": 4264
|
|
},
|
|
{
|
|
"epoch": 6.255355612682091,
|
|
"grad_norm": 0.5967193988030114,
|
|
"learning_rate": 1.3662168409526167e-06,
|
|
"loss": 0.1256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11634612828493118,
|
|
"step": 7300,
|
|
"valid_targets_mean": 6235.5,
|
|
"valid_targets_min": 2655
|
|
},
|
|
{
|
|
"epoch": 6.259640102827763,
|
|
"grad_norm": 0.5821630227786284,
|
|
"learning_rate": 1.350737020915922e-06,
|
|
"loss": 0.139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.052662041038274765,
|
|
"step": 7305,
|
|
"valid_targets_mean": 4289.1,
|
|
"valid_targets_min": 1202
|
|
},
|
|
{
|
|
"epoch": 6.263924592973436,
|
|
"grad_norm": 0.6765553432922821,
|
|
"learning_rate": 1.3353423323752246e-06,
|
|
"loss": 0.1416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09214189648628235,
|
|
"step": 7310,
|
|
"valid_targets_mean": 4074.9,
|
|
"valid_targets_min": 2281
|
|
},
|
|
{
|
|
"epoch": 6.268209083119109,
|
|
"grad_norm": 0.5044662591051317,
|
|
"learning_rate": 1.3200328456053036e-06,
|
|
"loss": 0.1136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04478124529123306,
|
|
"step": 7315,
|
|
"valid_targets_mean": 6636.9,
|
|
"valid_targets_min": 3794
|
|
},
|
|
{
|
|
"epoch": 6.272493573264781,
|
|
"grad_norm": 0.566414312062377,
|
|
"learning_rate": 1.3048086304920026e-06,
|
|
"loss": 0.132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06308507919311523,
|
|
"step": 7320,
|
|
"valid_targets_mean": 5068.5,
|
|
"valid_targets_min": 1538
|
|
},
|
|
{
|
|
"epoch": 6.276778063410454,
|
|
"grad_norm": 0.6515762665691374,
|
|
"learning_rate": 1.289669756531917e-06,
|
|
"loss": 0.1269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0729881301522255,
|
|
"step": 7325,
|
|
"valid_targets_mean": 4246.6,
|
|
"valid_targets_min": 1436
|
|
},
|
|
{
|
|
"epoch": 6.281062553556127,
|
|
"grad_norm": 0.5517482213898717,
|
|
"learning_rate": 1.2746162928320649e-06,
|
|
"loss": 0.1375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08301884680986404,
|
|
"step": 7330,
|
|
"valid_targets_mean": 5955.0,
|
|
"valid_targets_min": 1822
|
|
},
|
|
{
|
|
"epoch": 6.285347043701799,
|
|
"grad_norm": 0.5523007068091895,
|
|
"learning_rate": 1.2596483081095778e-06,
|
|
"loss": 0.1229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06908877938985825,
|
|
"step": 7335,
|
|
"valid_targets_mean": 5983.0,
|
|
"valid_targets_min": 3720
|
|
},
|
|
{
|
|
"epoch": 6.289631533847472,
|
|
"grad_norm": 0.5601682429843172,
|
|
"learning_rate": 1.2447658706913868e-06,
|
|
"loss": 0.1297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05858379602432251,
|
|
"step": 7340,
|
|
"valid_targets_mean": 5157.9,
|
|
"valid_targets_min": 1599
|
|
},
|
|
{
|
|
"epoch": 6.293916023993145,
|
|
"grad_norm": 0.5981781004239811,
|
|
"learning_rate": 1.2299690485139193e-06,
|
|
"loss": 0.138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07124894857406616,
|
|
"step": 7345,
|
|
"valid_targets_mean": 4882.2,
|
|
"valid_targets_min": 2014
|
|
},
|
|
{
|
|
"epoch": 6.298200514138817,
|
|
"grad_norm": 0.5628953258224636,
|
|
"learning_rate": 1.2152579091227668e-06,
|
|
"loss": 0.1378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07427423447370529,
|
|
"step": 7350,
|
|
"valid_targets_mean": 5157.0,
|
|
"valid_targets_min": 2189
|
|
},
|
|
{
|
|
"epoch": 6.30248500428449,
|
|
"grad_norm": 0.5264402145565338,
|
|
"learning_rate": 1.2006325196723933e-06,
|
|
"loss": 0.1407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06821203231811523,
|
|
"step": 7355,
|
|
"valid_targets_mean": 5412.6,
|
|
"valid_targets_min": 1866
|
|
},
|
|
{
|
|
"epoch": 6.306769494430163,
|
|
"grad_norm": 0.5815500627748932,
|
|
"learning_rate": 1.1860929469258341e-06,
|
|
"loss": 0.1269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.052492864429950714,
|
|
"step": 7360,
|
|
"valid_targets_mean": 4367.4,
|
|
"valid_targets_min": 512
|
|
},
|
|
{
|
|
"epoch": 6.311053984575835,
|
|
"grad_norm": 0.5825385511943423,
|
|
"learning_rate": 1.1716392572543732e-06,
|
|
"loss": 0.1382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07757897675037384,
|
|
"step": 7365,
|
|
"valid_targets_mean": 5754.5,
|
|
"valid_targets_min": 4385
|
|
},
|
|
{
|
|
"epoch": 6.315338474721508,
|
|
"grad_norm": 0.6110286527146042,
|
|
"learning_rate": 1.1572715166372506e-06,
|
|
"loss": 0.1323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06084147468209267,
|
|
"step": 7370,
|
|
"valid_targets_mean": 3635.0,
|
|
"valid_targets_min": 1439
|
|
},
|
|
{
|
|
"epoch": 6.319622964867181,
|
|
"grad_norm": 0.5483459534045897,
|
|
"learning_rate": 1.1429897906613596e-06,
|
|
"loss": 0.1416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.050404250621795654,
|
|
"step": 7375,
|
|
"valid_targets_mean": 4962.5,
|
|
"valid_targets_min": 1736
|
|
},
|
|
{
|
|
"epoch": 6.323907455012853,
|
|
"grad_norm": 0.5642870717751584,
|
|
"learning_rate": 1.128794144520955e-06,
|
|
"loss": 0.1265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05563634634017944,
|
|
"step": 7380,
|
|
"valid_targets_mean": 4469.4,
|
|
"valid_targets_min": 1883
|
|
},
|
|
{
|
|
"epoch": 6.328191945158526,
|
|
"grad_norm": 0.5588183190716616,
|
|
"learning_rate": 1.1146846430173385e-06,
|
|
"loss": 0.1469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08249536901712418,
|
|
"step": 7385,
|
|
"valid_targets_mean": 5998.8,
|
|
"valid_targets_min": 2795
|
|
},
|
|
{
|
|
"epoch": 6.332476435304199,
|
|
"grad_norm": 0.5731674436370081,
|
|
"learning_rate": 1.1006613505585783e-06,
|
|
"loss": 0.1345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06888927519321442,
|
|
"step": 7390,
|
|
"valid_targets_mean": 4352.9,
|
|
"valid_targets_min": 1884
|
|
},
|
|
{
|
|
"epoch": 6.336760925449871,
|
|
"grad_norm": 0.5123918560667162,
|
|
"learning_rate": 1.0867243311592079e-06,
|
|
"loss": 0.1336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05763709917664528,
|
|
"step": 7395,
|
|
"valid_targets_mean": 6003.0,
|
|
"valid_targets_min": 2829
|
|
},
|
|
{
|
|
"epoch": 6.341045415595544,
|
|
"grad_norm": 0.5100597503907828,
|
|
"learning_rate": 1.072873648439936e-06,
|
|
"loss": 0.1375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05623047798871994,
|
|
"step": 7400,
|
|
"valid_targets_mean": 5028.2,
|
|
"valid_targets_min": 2742
|
|
},
|
|
{
|
|
"epoch": 6.345329905741217,
|
|
"grad_norm": 0.5811807929396194,
|
|
"learning_rate": 1.059109365627351e-06,
|
|
"loss": 0.1332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07150489836931229,
|
|
"step": 7405,
|
|
"valid_targets_mean": 5120.2,
|
|
"valid_targets_min": 3356
|
|
},
|
|
{
|
|
"epoch": 6.349614395886889,
|
|
"grad_norm": 0.4895410328153705,
|
|
"learning_rate": 1.0454315455536436e-06,
|
|
"loss": 0.1159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05738158896565437,
|
|
"step": 7410,
|
|
"valid_targets_mean": 5229.6,
|
|
"valid_targets_min": 2671
|
|
},
|
|
{
|
|
"epoch": 6.353898886032562,
|
|
"grad_norm": 0.5404284191963216,
|
|
"learning_rate": 1.0318402506563062e-06,
|
|
"loss": 0.1253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0597027949988842,
|
|
"step": 7415,
|
|
"valid_targets_mean": 5363.9,
|
|
"valid_targets_min": 1530
|
|
},
|
|
{
|
|
"epoch": 6.358183376178235,
|
|
"grad_norm": 0.5501116303269473,
|
|
"learning_rate": 1.0183355429778595e-06,
|
|
"loss": 0.1313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06805878132581711,
|
|
"step": 7420,
|
|
"valid_targets_mean": 5213.6,
|
|
"valid_targets_min": 3005
|
|
},
|
|
{
|
|
"epoch": 6.362467866323907,
|
|
"grad_norm": 0.5576493461479655,
|
|
"learning_rate": 1.0049174841655685e-06,
|
|
"loss": 0.1325,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07751034200191498,
|
|
"step": 7425,
|
|
"valid_targets_mean": 5268.6,
|
|
"valid_targets_min": 3439
|
|
},
|
|
{
|
|
"epoch": 6.36675235646958,
|
|
"grad_norm": 0.5548633392053545,
|
|
"learning_rate": 9.915861354711498e-07,
|
|
"loss": 0.1399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07571675628423691,
|
|
"step": 7430,
|
|
"valid_targets_mean": 6207.0,
|
|
"valid_targets_min": 1883
|
|
},
|
|
{
|
|
"epoch": 6.371036846615253,
|
|
"grad_norm": 0.526980009994755,
|
|
"learning_rate": 9.783415577505018e-07,
|
|
"loss": 0.134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.053638383746147156,
|
|
"step": 7435,
|
|
"valid_targets_mean": 6888.4,
|
|
"valid_targets_min": 4719
|
|
},
|
|
{
|
|
"epoch": 6.375321336760925,
|
|
"grad_norm": 0.5729456153802657,
|
|
"learning_rate": 9.651838114634216e-07,
|
|
"loss": 0.1249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06362077593803406,
|
|
"step": 7440,
|
|
"valid_targets_mean": 4871.2,
|
|
"valid_targets_min": 1490
|
|
},
|
|
{
|
|
"epoch": 6.379605826906598,
|
|
"grad_norm": 0.5184410617626088,
|
|
"learning_rate": 9.521129566733389e-07,
|
|
"loss": 0.117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06541168689727783,
|
|
"step": 7445,
|
|
"valid_targets_mean": 5580.0,
|
|
"valid_targets_min": 3229
|
|
},
|
|
{
|
|
"epoch": 6.383890317052271,
|
|
"grad_norm": 0.6255470828970859,
|
|
"learning_rate": 9.391290530470277e-07,
|
|
"loss": 0.1332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07521027326583862,
|
|
"step": 7450,
|
|
"valid_targets_mean": 5401.9,
|
|
"valid_targets_min": 1524
|
|
},
|
|
{
|
|
"epoch": 6.388174807197943,
|
|
"grad_norm": 0.5194885483403754,
|
|
"learning_rate": 9.2623215985435e-07,
|
|
"loss": 0.1269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09347446262836456,
|
|
"step": 7455,
|
|
"valid_targets_mean": 5763.8,
|
|
"valid_targets_min": 1737
|
|
},
|
|
{
|
|
"epoch": 6.392459297343616,
|
|
"grad_norm": 0.5600928570709444,
|
|
"learning_rate": 9.134223359679683e-07,
|
|
"loss": 0.1305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06300924718379974,
|
|
"step": 7460,
|
|
"valid_targets_mean": 4393.2,
|
|
"valid_targets_min": 2933
|
|
},
|
|
{
|
|
"epoch": 6.396743787489289,
|
|
"grad_norm": 0.5369824063885026,
|
|
"learning_rate": 9.006996398630851e-07,
|
|
"loss": 0.1558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0863390564918518,
|
|
"step": 7465,
|
|
"valid_targets_mean": 6428.6,
|
|
"valid_targets_min": 2124
|
|
},
|
|
{
|
|
"epoch": 6.401028277634961,
|
|
"grad_norm": 0.5773689496250954,
|
|
"learning_rate": 8.88064129617181e-07,
|
|
"loss": 0.1394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05433430150151253,
|
|
"step": 7470,
|
|
"valid_targets_mean": 5142.5,
|
|
"valid_targets_min": 2347
|
|
},
|
|
{
|
|
"epoch": 6.405312767780634,
|
|
"grad_norm": 0.5256807129221563,
|
|
"learning_rate": 8.755158629097393e-07,
|
|
"loss": 0.1281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07254520058631897,
|
|
"step": 7475,
|
|
"valid_targets_mean": 6583.0,
|
|
"valid_targets_min": 3019
|
|
},
|
|
{
|
|
"epoch": 6.409597257926307,
|
|
"grad_norm": 0.5655039871931183,
|
|
"learning_rate": 8.630548970219888e-07,
|
|
"loss": 0.1327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08246055990457535,
|
|
"step": 7480,
|
|
"valid_targets_mean": 7249.6,
|
|
"valid_targets_min": 3680
|
|
},
|
|
{
|
|
"epoch": 6.413881748071979,
|
|
"grad_norm": 0.5463016827169638,
|
|
"learning_rate": 8.506812888366412e-07,
|
|
"loss": 0.1291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.059731028974056244,
|
|
"step": 7485,
|
|
"valid_targets_mean": 5100.2,
|
|
"valid_targets_min": 2494
|
|
},
|
|
{
|
|
"epoch": 6.418166238217652,
|
|
"grad_norm": 0.5832410787055895,
|
|
"learning_rate": 8.383950948376385e-07,
|
|
"loss": 0.1342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09012992680072784,
|
|
"step": 7490,
|
|
"valid_targets_mean": 4581.4,
|
|
"valid_targets_min": 1210
|
|
},
|
|
{
|
|
"epoch": 6.422450728363325,
|
|
"grad_norm": 0.5373271133685443,
|
|
"learning_rate": 8.261963711098798e-07,
|
|
"loss": 0.1163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05034567415714264,
|
|
"step": 7495,
|
|
"valid_targets_mean": 5593.4,
|
|
"valid_targets_min": 3512
|
|
},
|
|
{
|
|
"epoch": 6.426735218508997,
|
|
"grad_norm": 0.49924700407085665,
|
|
"learning_rate": 8.140851733389743e-07,
|
|
"loss": 0.1332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05929369851946831,
|
|
"step": 7500,
|
|
"valid_targets_mean": 5676.0,
|
|
"valid_targets_min": 3019
|
|
},
|
|
{
|
|
"epoch": 6.43101970865467,
|
|
"grad_norm": 0.5566287946269606,
|
|
"learning_rate": 8.020615568109868e-07,
|
|
"loss": 0.1304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07680325210094452,
|
|
"step": 7505,
|
|
"valid_targets_mean": 4787.9,
|
|
"valid_targets_min": 3468
|
|
},
|
|
{
|
|
"epoch": 6.435304198800343,
|
|
"grad_norm": 0.5718185055108692,
|
|
"learning_rate": 7.901255764121862e-07,
|
|
"loss": 0.1159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.049524642527103424,
|
|
"step": 7510,
|
|
"valid_targets_mean": 5347.8,
|
|
"valid_targets_min": 2413
|
|
},
|
|
{
|
|
"epoch": 6.439588688946015,
|
|
"grad_norm": 0.5617952603517002,
|
|
"learning_rate": 7.782772866287968e-07,
|
|
"loss": 0.1312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07754452526569366,
|
|
"step": 7515,
|
|
"valid_targets_mean": 4205.2,
|
|
"valid_targets_min": 746
|
|
},
|
|
{
|
|
"epoch": 6.443873179091688,
|
|
"grad_norm": 0.5382909893432877,
|
|
"learning_rate": 7.66516741546739e-07,
|
|
"loss": 0.1203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06745108962059021,
|
|
"step": 7520,
|
|
"valid_targets_mean": 5163.9,
|
|
"valid_targets_min": 3205
|
|
},
|
|
{
|
|
"epoch": 6.448157669237361,
|
|
"grad_norm": 0.6324837900871627,
|
|
"learning_rate": 7.548439948514019e-07,
|
|
"loss": 0.1268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09697423875331879,
|
|
"step": 7525,
|
|
"valid_targets_mean": 5868.6,
|
|
"valid_targets_min": 2030
|
|
},
|
|
{
|
|
"epoch": 6.4524421593830334,
|
|
"grad_norm": 0.6297388208752188,
|
|
"learning_rate": 7.432590998273714e-07,
|
|
"loss": 0.1321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05477753281593323,
|
|
"step": 7530,
|
|
"valid_targets_mean": 4286.4,
|
|
"valid_targets_min": 484
|
|
},
|
|
{
|
|
"epoch": 6.456726649528706,
|
|
"grad_norm": 0.6145786549613212,
|
|
"learning_rate": 7.317621093582117e-07,
|
|
"loss": 0.1334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07500009983778,
|
|
"step": 7535,
|
|
"valid_targets_mean": 3922.9,
|
|
"valid_targets_min": 977
|
|
},
|
|
{
|
|
"epoch": 6.461011139674379,
|
|
"grad_norm": 0.5521848259032168,
|
|
"learning_rate": 7.2035307592621e-07,
|
|
"loss": 0.1234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06406620144844055,
|
|
"step": 7540,
|
|
"valid_targets_mean": 4464.2,
|
|
"valid_targets_min": 1828
|
|
},
|
|
{
|
|
"epoch": 6.4652956298200515,
|
|
"grad_norm": 0.5833023790742793,
|
|
"learning_rate": 7.090320516121418e-07,
|
|
"loss": 0.1252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06913968175649643,
|
|
"step": 7545,
|
|
"valid_targets_mean": 5600.9,
|
|
"valid_targets_min": 1182
|
|
},
|
|
{
|
|
"epoch": 6.469580119965724,
|
|
"grad_norm": 0.6100203970157438,
|
|
"learning_rate": 6.977990880950348e-07,
|
|
"loss": 0.1265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06103619933128357,
|
|
"step": 7550,
|
|
"valid_targets_mean": 4139.6,
|
|
"valid_targets_min": 1295
|
|
},
|
|
{
|
|
"epoch": 6.473864610111397,
|
|
"grad_norm": 0.5633400073336338,
|
|
"learning_rate": 6.866542366519247e-07,
|
|
"loss": 0.1306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06431341916322708,
|
|
"step": 7555,
|
|
"valid_targets_mean": 5054.8,
|
|
"valid_targets_min": 2696
|
|
},
|
|
{
|
|
"epoch": 6.4781491002570695,
|
|
"grad_norm": 0.6041224027325699,
|
|
"learning_rate": 6.755975481576338e-07,
|
|
"loss": 0.1346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0816982090473175,
|
|
"step": 7560,
|
|
"valid_targets_mean": 4527.1,
|
|
"valid_targets_min": 1576
|
|
},
|
|
{
|
|
"epoch": 6.482433590402742,
|
|
"grad_norm": 0.5881212273674449,
|
|
"learning_rate": 6.646290730845285e-07,
|
|
"loss": 0.1303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07361535727977753,
|
|
"step": 7565,
|
|
"valid_targets_mean": 5084.4,
|
|
"valid_targets_min": 2417
|
|
},
|
|
{
|
|
"epoch": 6.486718080548415,
|
|
"grad_norm": 0.6028451231337074,
|
|
"learning_rate": 6.537488615022902e-07,
|
|
"loss": 0.1324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06748676300048828,
|
|
"step": 7570,
|
|
"valid_targets_mean": 4147.8,
|
|
"valid_targets_min": 2788
|
|
},
|
|
{
|
|
"epoch": 6.4910025706940875,
|
|
"grad_norm": 0.6049291595728633,
|
|
"learning_rate": 6.429569630776899e-07,
|
|
"loss": 0.1281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07137708365917206,
|
|
"step": 7575,
|
|
"valid_targets_mean": 6559.4,
|
|
"valid_targets_min": 3194
|
|
},
|
|
{
|
|
"epoch": 6.49528706083976,
|
|
"grad_norm": 0.5724152711580965,
|
|
"learning_rate": 6.322534270743653e-07,
|
|
"loss": 0.1409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04908524081110954,
|
|
"step": 7580,
|
|
"valid_targets_mean": 5618.8,
|
|
"valid_targets_min": 2020
|
|
},
|
|
{
|
|
"epoch": 6.499571550985433,
|
|
"grad_norm": 0.5284775742025518,
|
|
"learning_rate": 6.216383023525829e-07,
|
|
"loss": 0.1331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06110971421003342,
|
|
"step": 7585,
|
|
"valid_targets_mean": 6379.1,
|
|
"valid_targets_min": 3952
|
|
},
|
|
{
|
|
"epoch": 6.5038560411311055,
|
|
"grad_norm": 0.5412105324257526,
|
|
"learning_rate": 6.111116373690262e-07,
|
|
"loss": 0.1302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06972099840641022,
|
|
"step": 7590,
|
|
"valid_targets_mean": 5871.4,
|
|
"valid_targets_min": 2694
|
|
},
|
|
{
|
|
"epoch": 6.508140531276778,
|
|
"grad_norm": 0.5803291024390985,
|
|
"learning_rate": 6.006734801765746e-07,
|
|
"loss": 0.1218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05369029939174652,
|
|
"step": 7595,
|
|
"valid_targets_mean": 4633.1,
|
|
"valid_targets_min": 571
|
|
},
|
|
{
|
|
"epoch": 6.512425021422451,
|
|
"grad_norm": 0.5703864930855584,
|
|
"learning_rate": 5.903238784240794e-07,
|
|
"loss": 0.1373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07159294188022614,
|
|
"step": 7600,
|
|
"valid_targets_mean": 5685.8,
|
|
"valid_targets_min": 3429
|
|
},
|
|
{
|
|
"epoch": 6.5167095115681235,
|
|
"grad_norm": 0.5489229421473321,
|
|
"learning_rate": 5.800628793561447e-07,
|
|
"loss": 0.1196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06936562061309814,
|
|
"step": 7605,
|
|
"valid_targets_mean": 5826.9,
|
|
"valid_targets_min": 3011
|
|
},
|
|
{
|
|
"epoch": 6.520994001713796,
|
|
"grad_norm": 0.5421183921246827,
|
|
"learning_rate": 5.698905298129154e-07,
|
|
"loss": 0.122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07958382368087769,
|
|
"step": 7610,
|
|
"valid_targets_mean": 6062.5,
|
|
"valid_targets_min": 3845
|
|
},
|
|
{
|
|
"epoch": 6.525278491859469,
|
|
"grad_norm": 0.5649633947170538,
|
|
"learning_rate": 5.598068762298647e-07,
|
|
"loss": 0.1366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06852822005748749,
|
|
"step": 7615,
|
|
"valid_targets_mean": 5788.6,
|
|
"valid_targets_min": 1353
|
|
},
|
|
{
|
|
"epoch": 6.5295629820051415,
|
|
"grad_norm": 0.5844565816018681,
|
|
"learning_rate": 5.49811964637581e-07,
|
|
"loss": 0.1385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07468803226947784,
|
|
"step": 7620,
|
|
"valid_targets_mean": 4700.9,
|
|
"valid_targets_min": 2566
|
|
},
|
|
{
|
|
"epoch": 6.533847472150814,
|
|
"grad_norm": 0.5759158172395592,
|
|
"learning_rate": 5.399058406615498e-07,
|
|
"loss": 0.1364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09095199406147003,
|
|
"step": 7625,
|
|
"valid_targets_mean": 4950.9,
|
|
"valid_targets_min": 2768
|
|
},
|
|
{
|
|
"epoch": 6.538131962296487,
|
|
"grad_norm": 0.6098207410965979,
|
|
"learning_rate": 5.300885495219654e-07,
|
|
"loss": 0.1172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07239219546318054,
|
|
"step": 7630,
|
|
"valid_targets_mean": 6228.8,
|
|
"valid_targets_min": 2836
|
|
},
|
|
{
|
|
"epoch": 6.5424164524421595,
|
|
"grad_norm": 0.6448334666116833,
|
|
"learning_rate": 5.203601360334998e-07,
|
|
"loss": 0.1524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0904010683298111,
|
|
"step": 7635,
|
|
"valid_targets_mean": 4510.2,
|
|
"valid_targets_min": 1861
|
|
},
|
|
{
|
|
"epoch": 6.546700942587832,
|
|
"grad_norm": 0.6303612947945207,
|
|
"learning_rate": 5.107206446051138e-07,
|
|
"loss": 0.1387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07945692539215088,
|
|
"step": 7640,
|
|
"valid_targets_mean": 4986.5,
|
|
"valid_targets_min": 1948
|
|
},
|
|
{
|
|
"epoch": 6.550985432733505,
|
|
"grad_norm": 0.6436318797393943,
|
|
"learning_rate": 5.01170119239851e-07,
|
|
"loss": 0.1278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07079534232616425,
|
|
"step": 7645,
|
|
"valid_targets_mean": 4854.4,
|
|
"valid_targets_min": 1728
|
|
},
|
|
{
|
|
"epoch": 6.5552699228791775,
|
|
"grad_norm": 0.5134784672643112,
|
|
"learning_rate": 4.917086035346374e-07,
|
|
"loss": 0.1261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05049274116754532,
|
|
"step": 7650,
|
|
"valid_targets_mean": 6183.4,
|
|
"valid_targets_min": 3825
|
|
},
|
|
{
|
|
"epoch": 6.55955441302485,
|
|
"grad_norm": 0.5325514964818551,
|
|
"learning_rate": 4.823361406800775e-07,
|
|
"loss": 0.1288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06300629675388336,
|
|
"step": 7655,
|
|
"valid_targets_mean": 5562.2,
|
|
"valid_targets_min": 2461
|
|
},
|
|
{
|
|
"epoch": 6.563838903170523,
|
|
"grad_norm": 0.5355193257799574,
|
|
"learning_rate": 4.7305277346026523e-07,
|
|
"loss": 0.1205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06088012084364891,
|
|
"step": 7660,
|
|
"valid_targets_mean": 6078.8,
|
|
"valid_targets_min": 3357
|
|
},
|
|
{
|
|
"epoch": 6.5681233933161955,
|
|
"grad_norm": 0.5743674784749225,
|
|
"learning_rate": 4.6385854425258225e-07,
|
|
"loss": 0.1328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06708808243274689,
|
|
"step": 7665,
|
|
"valid_targets_mean": 4031.1,
|
|
"valid_targets_min": 2246
|
|
},
|
|
{
|
|
"epoch": 6.572407883461868,
|
|
"grad_norm": 0.48895775992421786,
|
|
"learning_rate": 4.5475349502750675e-07,
|
|
"loss": 0.1214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04776772856712341,
|
|
"step": 7670,
|
|
"valid_targets_mean": 5182.9,
|
|
"valid_targets_min": 2608
|
|
},
|
|
{
|
|
"epoch": 6.576692373607541,
|
|
"grad_norm": 0.5923442658960232,
|
|
"learning_rate": 4.457376673484204e-07,
|
|
"loss": 0.1274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06151628866791725,
|
|
"step": 7675,
|
|
"valid_targets_mean": 4924.8,
|
|
"valid_targets_min": 2762
|
|
},
|
|
{
|
|
"epoch": 6.580976863753214,
|
|
"grad_norm": 0.6082249406191534,
|
|
"learning_rate": 4.3681110237142165e-07,
|
|
"loss": 0.1314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06975458562374115,
|
|
"step": 7680,
|
|
"valid_targets_mean": 4497.1,
|
|
"valid_targets_min": 1832
|
|
},
|
|
{
|
|
"epoch": 6.585261353898886,
|
|
"grad_norm": 0.5650799881285941,
|
|
"learning_rate": 4.279738408451395e-07,
|
|
"loss": 0.1496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06257934868335724,
|
|
"step": 7685,
|
|
"valid_targets_mean": 4092.9,
|
|
"valid_targets_min": 904
|
|
},
|
|
{
|
|
"epoch": 6.589545844044559,
|
|
"grad_norm": 0.655822224913595,
|
|
"learning_rate": 4.1922592311053776e-07,
|
|
"loss": 0.1273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06709827482700348,
|
|
"step": 7690,
|
|
"valid_targets_mean": 5060.4,
|
|
"valid_targets_min": 1415
|
|
},
|
|
{
|
|
"epoch": 6.593830334190232,
|
|
"grad_norm": 0.5030085090665306,
|
|
"learning_rate": 4.105673891007378e-07,
|
|
"loss": 0.1216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.049611542373895645,
|
|
"step": 7695,
|
|
"valid_targets_mean": 6162.4,
|
|
"valid_targets_min": 2679
|
|
},
|
|
{
|
|
"epoch": 6.598114824335904,
|
|
"grad_norm": 0.6102081049129495,
|
|
"learning_rate": 4.0199827834084047e-07,
|
|
"loss": 0.1286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09663502871990204,
|
|
"step": 7700,
|
|
"valid_targets_mean": 4532.8,
|
|
"valid_targets_min": 2484
|
|
},
|
|
{
|
|
"epoch": 6.602399314481577,
|
|
"grad_norm": 0.5895159499997753,
|
|
"learning_rate": 3.9351862994774e-07,
|
|
"loss": 0.1126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07301288843154907,
|
|
"step": 7705,
|
|
"valid_targets_mean": 4698.4,
|
|
"valid_targets_min": 1504
|
|
},
|
|
{
|
|
"epoch": 6.60668380462725,
|
|
"grad_norm": 0.5314397820314964,
|
|
"learning_rate": 3.8512848262994175e-07,
|
|
"loss": 0.1278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07113306224346161,
|
|
"step": 7710,
|
|
"valid_targets_mean": 5652.4,
|
|
"valid_targets_min": 4421
|
|
},
|
|
{
|
|
"epoch": 6.610968294772922,
|
|
"grad_norm": 0.5677507631274338,
|
|
"learning_rate": 3.7682787468739544e-07,
|
|
"loss": 0.1345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07104374468326569,
|
|
"step": 7715,
|
|
"valid_targets_mean": 4550.2,
|
|
"valid_targets_min": 2510
|
|
},
|
|
{
|
|
"epoch": 6.615252784918595,
|
|
"grad_norm": 0.6096034474741296,
|
|
"learning_rate": 3.6861684401131135e-07,
|
|
"loss": 0.1301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09399035573005676,
|
|
"step": 7720,
|
|
"valid_targets_mean": 6537.1,
|
|
"valid_targets_min": 3034
|
|
},
|
|
{
|
|
"epoch": 6.619537275064268,
|
|
"grad_norm": 0.5217524607050796,
|
|
"learning_rate": 3.604954280839934e-07,
|
|
"loss": 0.1228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06967628002166748,
|
|
"step": 7725,
|
|
"valid_targets_mean": 5787.9,
|
|
"valid_targets_min": 3968
|
|
},
|
|
{
|
|
"epoch": 6.62382176520994,
|
|
"grad_norm": 0.6152416458895353,
|
|
"learning_rate": 3.524636639786616e-07,
|
|
"loss": 0.1252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.059186115860939026,
|
|
"step": 7730,
|
|
"valid_targets_mean": 4107.1,
|
|
"valid_targets_min": 2620
|
|
},
|
|
{
|
|
"epoch": 6.628106255355613,
|
|
"grad_norm": 0.5668561641800082,
|
|
"learning_rate": 3.445215883592945e-07,
|
|
"loss": 0.1346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0830339565873146,
|
|
"step": 7735,
|
|
"valid_targets_mean": 6530.4,
|
|
"valid_targets_min": 2104
|
|
},
|
|
{
|
|
"epoch": 6.632390745501286,
|
|
"grad_norm": 0.6248303748819045,
|
|
"learning_rate": 3.36669237480447e-07,
|
|
"loss": 0.1382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08806224167346954,
|
|
"step": 7740,
|
|
"valid_targets_mean": 5062.6,
|
|
"valid_targets_min": 3322
|
|
},
|
|
{
|
|
"epoch": 6.636675235646958,
|
|
"grad_norm": 0.537812883564012,
|
|
"learning_rate": 3.289066471870972e-07,
|
|
"loss": 0.1432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06440138071775436,
|
|
"step": 7745,
|
|
"valid_targets_mean": 6161.6,
|
|
"valid_targets_min": 1091
|
|
},
|
|
{
|
|
"epoch": 6.640959725792631,
|
|
"grad_norm": 0.6367096859599471,
|
|
"learning_rate": 3.2123385291447315e-07,
|
|
"loss": 0.1396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06297184526920319,
|
|
"step": 7750,
|
|
"valid_targets_mean": 5560.1,
|
|
"valid_targets_min": 2408
|
|
},
|
|
{
|
|
"epoch": 6.645244215938304,
|
|
"grad_norm": 0.5174128452501829,
|
|
"learning_rate": 3.136508896878976e-07,
|
|
"loss": 0.1249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.046948693692684174,
|
|
"step": 7755,
|
|
"valid_targets_mean": 5429.2,
|
|
"valid_targets_min": 2134
|
|
},
|
|
{
|
|
"epoch": 6.649528706083976,
|
|
"grad_norm": 0.5502673569976809,
|
|
"learning_rate": 3.0615779212262773e-07,
|
|
"loss": 0.1377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.054427504539489746,
|
|
"step": 7760,
|
|
"valid_targets_mean": 5898.8,
|
|
"valid_targets_min": 1897
|
|
},
|
|
{
|
|
"epoch": 6.653813196229649,
|
|
"grad_norm": 0.5942641291785041,
|
|
"learning_rate": 2.9875459442368915e-07,
|
|
"loss": 0.1272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06269437074661255,
|
|
"step": 7765,
|
|
"valid_targets_mean": 5309.4,
|
|
"valid_targets_min": 3817
|
|
},
|
|
{
|
|
"epoch": 6.658097686375322,
|
|
"grad_norm": 0.5808692095958206,
|
|
"learning_rate": 2.914413303857377e-07,
|
|
"loss": 0.1389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06568677723407745,
|
|
"step": 7770,
|
|
"valid_targets_mean": 4647.8,
|
|
"valid_targets_min": 2544
|
|
},
|
|
{
|
|
"epoch": 6.662382176520994,
|
|
"grad_norm": 0.557804612223383,
|
|
"learning_rate": 2.8421803339288236e-07,
|
|
"loss": 0.1392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06331031024456024,
|
|
"step": 7775,
|
|
"valid_targets_mean": 5791.6,
|
|
"valid_targets_min": 3055
|
|
},
|
|
{
|
|
"epoch": 6.666666666666667,
|
|
"grad_norm": 0.5493010270576719,
|
|
"learning_rate": 2.7708473641854917e-07,
|
|
"loss": 0.1169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05088922753930092,
|
|
"step": 7780,
|
|
"valid_targets_mean": 4945.6,
|
|
"valid_targets_min": 1712
|
|
},
|
|
{
|
|
"epoch": 6.67095115681234,
|
|
"grad_norm": 0.538355272547983,
|
|
"learning_rate": 2.7004147202532416e-07,
|
|
"loss": 0.1355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07455355674028397,
|
|
"step": 7785,
|
|
"valid_targets_mean": 5992.4,
|
|
"valid_targets_min": 3400
|
|
},
|
|
{
|
|
"epoch": 6.675235646958012,
|
|
"grad_norm": 0.5608671946650889,
|
|
"learning_rate": 2.630882723648087e-07,
|
|
"loss": 0.126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04839465022087097,
|
|
"step": 7790,
|
|
"valid_targets_mean": 4834.1,
|
|
"valid_targets_min": 1084
|
|
},
|
|
{
|
|
"epoch": 6.679520137103685,
|
|
"grad_norm": 0.5725307968184742,
|
|
"learning_rate": 2.5622516917746644e-07,
|
|
"loss": 0.1424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06177856773138046,
|
|
"step": 7795,
|
|
"valid_targets_mean": 4075.1,
|
|
"valid_targets_min": 1885
|
|
},
|
|
{
|
|
"epoch": 6.683804627249358,
|
|
"grad_norm": 0.5668999443014117,
|
|
"learning_rate": 2.494521937924854e-07,
|
|
"loss": 0.1361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07443411648273468,
|
|
"step": 7800,
|
|
"valid_targets_mean": 5455.5,
|
|
"valid_targets_min": 3160
|
|
},
|
|
{
|
|
"epoch": 6.68808911739503,
|
|
"grad_norm": 0.5604181051156855,
|
|
"learning_rate": 2.427693771276274e-07,
|
|
"loss": 0.1218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0571829155087471,
|
|
"step": 7805,
|
|
"valid_targets_mean": 5181.1,
|
|
"valid_targets_min": 1670
|
|
},
|
|
{
|
|
"epoch": 6.692373607540703,
|
|
"grad_norm": 0.5947177144498428,
|
|
"learning_rate": 2.3617674968909876e-07,
|
|
"loss": 0.1339,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09753529727458954,
|
|
"step": 7810,
|
|
"valid_targets_mean": 4709.2,
|
|
"valid_targets_min": 2501
|
|
},
|
|
{
|
|
"epoch": 6.696658097686376,
|
|
"grad_norm": 0.5618804740404388,
|
|
"learning_rate": 2.2967434157139756e-07,
|
|
"loss": 0.1118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0628480538725853,
|
|
"step": 7815,
|
|
"valid_targets_mean": 4900.4,
|
|
"valid_targets_min": 2355
|
|
},
|
|
{
|
|
"epoch": 6.700942587832048,
|
|
"grad_norm": 0.5927186775042097,
|
|
"learning_rate": 2.2326218245718455e-07,
|
|
"loss": 0.1358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07199184596538544,
|
|
"step": 7820,
|
|
"valid_targets_mean": 4698.0,
|
|
"valid_targets_min": 1902
|
|
},
|
|
{
|
|
"epoch": 6.705227077977721,
|
|
"grad_norm": 0.6156643349117058,
|
|
"learning_rate": 2.1694030161714118e-07,
|
|
"loss": 0.1352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.055360130965709686,
|
|
"step": 7825,
|
|
"valid_targets_mean": 4293.8,
|
|
"valid_targets_min": 1427
|
|
},
|
|
{
|
|
"epoch": 6.709511568123394,
|
|
"grad_norm": 0.6098212433325052,
|
|
"learning_rate": 2.107087279098452e-07,
|
|
"loss": 0.1353,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08166077733039856,
|
|
"step": 7830,
|
|
"valid_targets_mean": 5018.5,
|
|
"valid_targets_min": 2294
|
|
},
|
|
{
|
|
"epoch": 6.713796058269066,
|
|
"grad_norm": 0.5356389161718971,
|
|
"learning_rate": 2.0456748978163299e-07,
|
|
"loss": 0.127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05675205588340759,
|
|
"step": 7835,
|
|
"valid_targets_mean": 4544.2,
|
|
"valid_targets_min": 2282
|
|
},
|
|
{
|
|
"epoch": 6.718080548414739,
|
|
"grad_norm": 0.5858343176283142,
|
|
"learning_rate": 1.9851661526646638e-07,
|
|
"loss": 0.1318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08815167844295502,
|
|
"step": 7840,
|
|
"valid_targets_mean": 4550.6,
|
|
"valid_targets_min": 2884
|
|
},
|
|
{
|
|
"epoch": 6.722365038560412,
|
|
"grad_norm": 0.5595700704199968,
|
|
"learning_rate": 1.9255613198581934e-07,
|
|
"loss": 0.1318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.060909878462553024,
|
|
"step": 7845,
|
|
"valid_targets_mean": 5540.4,
|
|
"valid_targets_min": 2807
|
|
},
|
|
{
|
|
"epoch": 6.726649528706084,
|
|
"grad_norm": 0.5191269758152909,
|
|
"learning_rate": 1.866860671485271e-07,
|
|
"loss": 0.1229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06576729565858841,
|
|
"step": 7850,
|
|
"valid_targets_mean": 5806.9,
|
|
"valid_targets_min": 2533
|
|
},
|
|
{
|
|
"epoch": 6.730934018851757,
|
|
"grad_norm": 0.5758310377261832,
|
|
"learning_rate": 1.809064475506883e-07,
|
|
"loss": 0.1265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07471377402544022,
|
|
"step": 7855,
|
|
"valid_targets_mean": 5287.5,
|
|
"valid_targets_min": 2978
|
|
},
|
|
{
|
|
"epoch": 6.73521850899743,
|
|
"grad_norm": 0.5278397932305886,
|
|
"learning_rate": 1.7521729957552302e-07,
|
|
"loss": 0.1371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06098167225718498,
|
|
"step": 7860,
|
|
"valid_targets_mean": 5044.8,
|
|
"valid_targets_min": 2857
|
|
},
|
|
{
|
|
"epoch": 6.739502999143102,
|
|
"grad_norm": 0.5395419129018436,
|
|
"learning_rate": 1.6961864919326166e-07,
|
|
"loss": 0.1412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05927133187651634,
|
|
"step": 7865,
|
|
"valid_targets_mean": 5738.5,
|
|
"valid_targets_min": 2026
|
|
},
|
|
{
|
|
"epoch": 6.743787489288774,
|
|
"grad_norm": 0.5304905943053638,
|
|
"learning_rate": 1.641105219610295e-07,
|
|
"loss": 0.1284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07619032263755798,
|
|
"step": 7870,
|
|
"valid_targets_mean": 6232.8,
|
|
"valid_targets_min": 2850
|
|
},
|
|
{
|
|
"epoch": 6.748071979434448,
|
|
"grad_norm": 0.5553095352748566,
|
|
"learning_rate": 1.586929430227202e-07,
|
|
"loss": 0.1322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06535205990076065,
|
|
"step": 7875,
|
|
"valid_targets_mean": 4887.5,
|
|
"valid_targets_min": 1439
|
|
},
|
|
{
|
|
"epoch": 6.7523564695801195,
|
|
"grad_norm": 0.5534113588647671,
|
|
"learning_rate": 1.5336593710888914e-07,
|
|
"loss": 0.1276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06758899241685867,
|
|
"step": 7880,
|
|
"valid_targets_mean": 4739.1,
|
|
"valid_targets_min": 2091
|
|
},
|
|
{
|
|
"epoch": 6.756640959725793,
|
|
"grad_norm": 0.5601728067274346,
|
|
"learning_rate": 1.4812952853663132e-07,
|
|
"loss": 0.1251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05526921898126602,
|
|
"step": 7885,
|
|
"valid_targets_mean": 3589.0,
|
|
"valid_targets_min": 1466
|
|
},
|
|
{
|
|
"epoch": 6.760925449871465,
|
|
"grad_norm": 0.5724487101335303,
|
|
"learning_rate": 1.4298374120948588e-07,
|
|
"loss": 0.126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07227601855993271,
|
|
"step": 7890,
|
|
"valid_targets_mean": 6033.5,
|
|
"valid_targets_min": 1545
|
|
},
|
|
{
|
|
"epoch": 6.765209940017138,
|
|
"grad_norm": 0.5629625045477072,
|
|
"learning_rate": 1.3792859861730955e-07,
|
|
"loss": 0.1273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06101224198937416,
|
|
"step": 7895,
|
|
"valid_targets_mean": 5031.9,
|
|
"valid_targets_min": 2327
|
|
},
|
|
{
|
|
"epoch": 6.76949443016281,
|
|
"grad_norm": 0.5682786914829596,
|
|
"learning_rate": 1.3296412383617896e-07,
|
|
"loss": 0.1246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06303523480892181,
|
|
"step": 7900,
|
|
"valid_targets_mean": 5499.0,
|
|
"valid_targets_min": 3762
|
|
},
|
|
{
|
|
"epoch": 6.773778920308484,
|
|
"grad_norm": 0.53063945226954,
|
|
"learning_rate": 1.2809033952829065e-07,
|
|
"loss": 0.1309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06398984789848328,
|
|
"step": 7905,
|
|
"valid_targets_mean": 6115.8,
|
|
"valid_targets_min": 3463
|
|
},
|
|
{
|
|
"epoch": 6.7780634104541555,
|
|
"grad_norm": 0.5683228046451378,
|
|
"learning_rate": 1.2330726794184124e-07,
|
|
"loss": 0.1296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06533034145832062,
|
|
"step": 7910,
|
|
"valid_targets_mean": 4878.9,
|
|
"valid_targets_min": 537
|
|
},
|
|
{
|
|
"epoch": 6.782347900599829,
|
|
"grad_norm": 0.6162313267351365,
|
|
"learning_rate": 1.1861493091094078e-07,
|
|
"loss": 0.1286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05130893737077713,
|
|
"step": 7915,
|
|
"valid_targets_mean": 3761.9,
|
|
"valid_targets_min": 1588
|
|
},
|
|
{
|
|
"epoch": 6.786632390745501,
|
|
"grad_norm": 0.5364938112255343,
|
|
"learning_rate": 1.1401334985550849e-07,
|
|
"loss": 0.1333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05069788917899132,
|
|
"step": 7920,
|
|
"valid_targets_mean": 4648.5,
|
|
"valid_targets_min": 1284
|
|
},
|
|
{
|
|
"epoch": 6.790916880891174,
|
|
"grad_norm": 0.6759019110702623,
|
|
"learning_rate": 1.0950254578117047e-07,
|
|
"loss": 0.1393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08072400093078613,
|
|
"step": 7925,
|
|
"valid_targets_mean": 4252.4,
|
|
"valid_targets_min": 2091
|
|
},
|
|
{
|
|
"epoch": 6.795201371036846,
|
|
"grad_norm": 0.6010547033753089,
|
|
"learning_rate": 1.0508253927916878e-07,
|
|
"loss": 0.1215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07396875321865082,
|
|
"step": 7930,
|
|
"valid_targets_mean": 4642.6,
|
|
"valid_targets_min": 2359
|
|
},
|
|
{
|
|
"epoch": 6.79948586118252,
|
|
"grad_norm": 0.5306208560777944,
|
|
"learning_rate": 1.0075335052626811e-07,
|
|
"loss": 0.1277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09541085362434387,
|
|
"step": 7935,
|
|
"valid_targets_mean": 4518.5,
|
|
"valid_targets_min": 1451
|
|
},
|
|
{
|
|
"epoch": 6.8037703513281915,
|
|
"grad_norm": 0.5503742533160736,
|
|
"learning_rate": 9.651499928465812e-08,
|
|
"loss": 0.1182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.045133013278245926,
|
|
"step": 7940,
|
|
"valid_targets_mean": 5171.4,
|
|
"valid_targets_min": 2204
|
|
},
|
|
{
|
|
"epoch": 6.808054841473865,
|
|
"grad_norm": 0.6369200662485703,
|
|
"learning_rate": 9.23675049018713e-08,
|
|
"loss": 0.1425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08669963479042053,
|
|
"step": 7945,
|
|
"valid_targets_mean": 5398.8,
|
|
"valid_targets_min": 2923
|
|
},
|
|
{
|
|
"epoch": 6.812339331619537,
|
|
"grad_norm": 0.5797105544129365,
|
|
"learning_rate": 8.831088631068962e-08,
|
|
"loss": 0.1296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08598867058753967,
|
|
"step": 7950,
|
|
"valid_targets_mean": 5427.9,
|
|
"valid_targets_min": 1720
|
|
},
|
|
{
|
|
"epoch": 6.81662382176521,
|
|
"grad_norm": 0.5892362865899328,
|
|
"learning_rate": 8.434516202905585e-08,
|
|
"loss": 0.1138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04355989396572113,
|
|
"step": 7955,
|
|
"valid_targets_mean": 4668.5,
|
|
"valid_targets_min": 1787
|
|
},
|
|
{
|
|
"epoch": 6.820908311910882,
|
|
"grad_norm": 0.5259449130439358,
|
|
"learning_rate": 8.047035015999127e-08,
|
|
"loss": 0.1293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.062147438526153564,
|
|
"step": 7960,
|
|
"valid_targets_mean": 6266.0,
|
|
"valid_targets_min": 3304
|
|
},
|
|
{
|
|
"epoch": 6.825192802056556,
|
|
"grad_norm": 0.5980788196035809,
|
|
"learning_rate": 7.668646839151584e-08,
|
|
"loss": 0.1342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07914112508296967,
|
|
"step": 7965,
|
|
"valid_targets_mean": 4330.0,
|
|
"valid_targets_min": 2088
|
|
},
|
|
{
|
|
"epoch": 6.8294772922022275,
|
|
"grad_norm": 0.5679019198989551,
|
|
"learning_rate": 7.299353399656817e-08,
|
|
"loss": 0.1343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06378501653671265,
|
|
"step": 7970,
|
|
"valid_targets_mean": 5255.6,
|
|
"valid_targets_min": 2568
|
|
},
|
|
{
|
|
"epoch": 6.8337617823479,
|
|
"grad_norm": 0.5466815453187154,
|
|
"learning_rate": 6.939156383291679e-08,
|
|
"loss": 0.1113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0462757870554924,
|
|
"step": 7975,
|
|
"valid_targets_mean": 6509.4,
|
|
"valid_targets_min": 2957
|
|
},
|
|
{
|
|
"epoch": 6.838046272493573,
|
|
"grad_norm": 0.5441314114108987,
|
|
"learning_rate": 6.58805743430957e-08,
|
|
"loss": 0.137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07762189954519272,
|
|
"step": 7980,
|
|
"valid_targets_mean": 6555.9,
|
|
"valid_targets_min": 1836
|
|
},
|
|
{
|
|
"epoch": 6.842330762639246,
|
|
"grad_norm": 0.50096448496228,
|
|
"learning_rate": 6.246058155432444e-08,
|
|
"loss": 0.1205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04757378250360489,
|
|
"step": 7985,
|
|
"valid_targets_mean": 5298.4,
|
|
"valid_targets_min": 1479
|
|
},
|
|
{
|
|
"epoch": 6.846615252784918,
|
|
"grad_norm": 0.4878422198549988,
|
|
"learning_rate": 5.913160107842819e-08,
|
|
"loss": 0.1132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0475429967045784,
|
|
"step": 7990,
|
|
"valid_targets_mean": 5907.8,
|
|
"valid_targets_min": 1951
|
|
},
|
|
{
|
|
"epoch": 6.850899742930591,
|
|
"grad_norm": 0.49936103592353775,
|
|
"learning_rate": 5.5893648111777774e-08,
|
|
"loss": 0.1306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0695628896355629,
|
|
"step": 7995,
|
|
"valid_targets_mean": 6045.9,
|
|
"valid_targets_min": 3008
|
|
},
|
|
{
|
|
"epoch": 6.855184233076264,
|
|
"grad_norm": 0.5665138724150077,
|
|
"learning_rate": 5.274673743521197e-08,
|
|
"loss": 0.1142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05693498253822327,
|
|
"step": 8000,
|
|
"valid_targets_mean": 4705.0,
|
|
"valid_targets_min": 887
|
|
},
|
|
{
|
|
"epoch": 6.859468723221936,
|
|
"grad_norm": 0.5203110760122742,
|
|
"learning_rate": 4.969088341397976e-08,
|
|
"loss": 0.1291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04779854789376259,
|
|
"step": 8005,
|
|
"valid_targets_mean": 4978.9,
|
|
"valid_targets_min": 1267
|
|
},
|
|
{
|
|
"epoch": 6.863753213367609,
|
|
"grad_norm": 0.5627332408917793,
|
|
"learning_rate": 4.6726099997655985e-08,
|
|
"loss": 0.1203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06360907107591629,
|
|
"step": 8010,
|
|
"valid_targets_mean": 4355.4,
|
|
"valid_targets_min": 1548
|
|
},
|
|
{
|
|
"epoch": 6.868037703513282,
|
|
"grad_norm": 0.6210799134015753,
|
|
"learning_rate": 4.385240072010355e-08,
|
|
"loss": 0.1352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07875913381576538,
|
|
"step": 8015,
|
|
"valid_targets_mean": 5311.0,
|
|
"valid_targets_min": 2297
|
|
},
|
|
{
|
|
"epoch": 6.872322193658954,
|
|
"grad_norm": 0.6250033404142441,
|
|
"learning_rate": 4.1069798699389094e-08,
|
|
"loss": 0.1321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06782643496990204,
|
|
"step": 8020,
|
|
"valid_targets_mean": 4566.6,
|
|
"valid_targets_min": 2102
|
|
},
|
|
{
|
|
"epoch": 6.876606683804627,
|
|
"grad_norm": 0.5422644649871644,
|
|
"learning_rate": 3.83783066377319e-08,
|
|
"loss": 0.1243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0571243017911911,
|
|
"step": 8025,
|
|
"valid_targets_mean": 4521.6,
|
|
"valid_targets_min": 1140
|
|
},
|
|
{
|
|
"epoch": 6.8808911739503,
|
|
"grad_norm": 0.5636592346059632,
|
|
"learning_rate": 3.5777936821450584e-08,
|
|
"loss": 0.113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07804582267999649,
|
|
"step": 8030,
|
|
"valid_targets_mean": 6277.8,
|
|
"valid_targets_min": 2562
|
|
},
|
|
{
|
|
"epoch": 6.885175664095972,
|
|
"grad_norm": 0.5444328936008808,
|
|
"learning_rate": 3.326870112090097e-08,
|
|
"loss": 0.1331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.060951389372348785,
|
|
"step": 8035,
|
|
"valid_targets_mean": 5473.1,
|
|
"valid_targets_min": 2419
|
|
},
|
|
{
|
|
"epoch": 6.889460154241645,
|
|
"grad_norm": 0.5365253336459935,
|
|
"learning_rate": 3.0850610990422745e-08,
|
|
"loss": 0.1285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06827065348625183,
|
|
"step": 8040,
|
|
"valid_targets_mean": 6299.4,
|
|
"valid_targets_min": 3425
|
|
},
|
|
{
|
|
"epoch": 6.893744644387318,
|
|
"grad_norm": 0.5234285669194776,
|
|
"learning_rate": 2.8523677468286216e-08,
|
|
"loss": 0.1143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.051526255905628204,
|
|
"step": 8045,
|
|
"valid_targets_mean": 5121.8,
|
|
"valid_targets_min": 3038
|
|
},
|
|
{
|
|
"epoch": 6.89802913453299,
|
|
"grad_norm": 0.6361279161031459,
|
|
"learning_rate": 2.6287911176643422e-08,
|
|
"loss": 0.1321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07987803220748901,
|
|
"step": 8050,
|
|
"valid_targets_mean": 4748.1,
|
|
"valid_targets_min": 1969
|
|
},
|
|
{
|
|
"epoch": 6.902313624678663,
|
|
"grad_norm": 0.5639544639534764,
|
|
"learning_rate": 2.414332232148375e-08,
|
|
"loss": 0.1341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.056079618632793427,
|
|
"step": 8055,
|
|
"valid_targets_mean": 3328.0,
|
|
"valid_targets_min": 1312
|
|
},
|
|
{
|
|
"epoch": 6.906598114824336,
|
|
"grad_norm": 0.5270526838763586,
|
|
"learning_rate": 2.2089920692578427e-08,
|
|
"loss": 0.1377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.062460094690322876,
|
|
"step": 8060,
|
|
"valid_targets_mean": 6072.0,
|
|
"valid_targets_min": 3614
|
|
},
|
|
{
|
|
"epoch": 6.910882604970008,
|
|
"grad_norm": 0.5623331773457231,
|
|
"learning_rate": 2.0127715663442737e-08,
|
|
"loss": 0.1054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04031980037689209,
|
|
"step": 8065,
|
|
"valid_targets_mean": 4893.9,
|
|
"valid_targets_min": 1076
|
|
},
|
|
{
|
|
"epoch": 6.915167095115681,
|
|
"grad_norm": 0.5722152732120999,
|
|
"learning_rate": 1.8256716191293876e-08,
|
|
"loss": 0.1319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07477788627147675,
|
|
"step": 8070,
|
|
"valid_targets_mean": 4832.0,
|
|
"valid_targets_min": 2439
|
|
},
|
|
{
|
|
"epoch": 6.919451585261354,
|
|
"grad_norm": 0.6020483180818025,
|
|
"learning_rate": 1.647693081700208e-08,
|
|
"loss": 0.1211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04321936517953873,
|
|
"step": 8075,
|
|
"valid_targets_mean": 4835.4,
|
|
"valid_targets_min": 1616
|
|
},
|
|
{
|
|
"epoch": 6.923736075407026,
|
|
"grad_norm": 0.5670456675857146,
|
|
"learning_rate": 1.4788367665061753e-08,
|
|
"loss": 0.1236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07685419917106628,
|
|
"step": 8080,
|
|
"valid_targets_mean": 5860.4,
|
|
"valid_targets_min": 2446
|
|
},
|
|
{
|
|
"epoch": 6.928020565552699,
|
|
"grad_norm": 0.4939827002011384,
|
|
"learning_rate": 1.3191034443544859e-08,
|
|
"loss": 0.1101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.049323923885822296,
|
|
"step": 8085,
|
|
"valid_targets_mean": 4900.1,
|
|
"valid_targets_min": 3089
|
|
},
|
|
{
|
|
"epoch": 6.932305055698372,
|
|
"grad_norm": 0.5775169071638229,
|
|
"learning_rate": 1.1684938444074256e-08,
|
|
"loss": 0.1327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0588383711874485,
|
|
"step": 8090,
|
|
"valid_targets_mean": 4254.9,
|
|
"valid_targets_min": 2406
|
|
},
|
|
{
|
|
"epoch": 6.936589545844044,
|
|
"grad_norm": 0.5629445712235375,
|
|
"learning_rate": 1.0270086541785961e-08,
|
|
"loss": 0.1158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0664314329624176,
|
|
"step": 8095,
|
|
"valid_targets_mean": 4874.4,
|
|
"valid_targets_min": 1914
|
|
},
|
|
{
|
|
"epoch": 6.940874035989717,
|
|
"grad_norm": 0.5038614919165475,
|
|
"learning_rate": 8.946485195295839e-09,
|
|
"loss": 0.1226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05850938707590103,
|
|
"step": 8100,
|
|
"valid_targets_mean": 6217.1,
|
|
"valid_targets_min": 2366
|
|
},
|
|
{
|
|
"epoch": 6.94515852613539,
|
|
"grad_norm": 0.5841112429555259,
|
|
"learning_rate": 7.714140446677399e-09,
|
|
"loss": 0.1251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06938346475362778,
|
|
"step": 8105,
|
|
"valid_targets_mean": 5006.5,
|
|
"valid_targets_min": 1629
|
|
},
|
|
{
|
|
"epoch": 6.949443016281062,
|
|
"grad_norm": 0.5950489578435784,
|
|
"learning_rate": 6.573057921421821e-09,
|
|
"loss": 0.1265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07542908191680908,
|
|
"step": 8110,
|
|
"valid_targets_mean": 4854.5,
|
|
"valid_targets_min": 2653
|
|
},
|
|
{
|
|
"epoch": 6.953727506426735,
|
|
"grad_norm": 0.5433367260843878,
|
|
"learning_rate": 5.523242828429087e-09,
|
|
"loss": 0.1228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05837767571210861,
|
|
"step": 8115,
|
|
"valid_targets_mean": 5498.1,
|
|
"valid_targets_min": 3257
|
|
},
|
|
{
|
|
"epoch": 6.958011996572408,
|
|
"grad_norm": 0.5579267091216227,
|
|
"learning_rate": 4.5646999599657795e-09,
|
|
"loss": 0.1306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.047898173332214355,
|
|
"step": 8120,
|
|
"valid_targets_mean": 3744.8,
|
|
"valid_targets_min": 993
|
|
},
|
|
{
|
|
"epoch": 6.96229648671808,
|
|
"grad_norm": 0.5502801957973467,
|
|
"learning_rate": 3.697433691662866e-09,
|
|
"loss": 0.1208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04671556502580643,
|
|
"step": 8125,
|
|
"valid_targets_mean": 5065.5,
|
|
"valid_targets_min": 1893
|
|
},
|
|
{
|
|
"epoch": 6.966580976863753,
|
|
"grad_norm": 0.6715401391059547,
|
|
"learning_rate": 2.9214479824757336e-09,
|
|
"loss": 0.1338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0737813264131546,
|
|
"step": 8130,
|
|
"valid_targets_mean": 3978.1,
|
|
"valid_targets_min": 2252
|
|
},
|
|
{
|
|
"epoch": 6.970865467009426,
|
|
"grad_norm": 0.5572764767170948,
|
|
"learning_rate": 2.236746374681964e-09,
|
|
"loss": 0.1351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06324169039726257,
|
|
"step": 8135,
|
|
"valid_targets_mean": 4121.8,
|
|
"valid_targets_min": 767
|
|
},
|
|
{
|
|
"epoch": 6.975149957155098,
|
|
"grad_norm": 0.6214094452567607,
|
|
"learning_rate": 1.6433319938569115e-09,
|
|
"loss": 0.125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07531341910362244,
|
|
"step": 8140,
|
|
"valid_targets_mean": 4792.2,
|
|
"valid_targets_min": 2175
|
|
},
|
|
{
|
|
"epoch": 6.979434447300771,
|
|
"grad_norm": 0.5631333769607573,
|
|
"learning_rate": 1.1412075488581587e-09,
|
|
"loss": 0.1255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06048138439655304,
|
|
"step": 8145,
|
|
"valid_targets_mean": 4735.9,
|
|
"valid_targets_min": 1720
|
|
},
|
|
{
|
|
"epoch": 6.983718937446444,
|
|
"grad_norm": 0.5522652498654058,
|
|
"learning_rate": 7.303753318232964e-10,
|
|
"loss": 0.1232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06047441065311432,
|
|
"step": 8150,
|
|
"valid_targets_mean": 4334.2,
|
|
"valid_targets_min": 2076
|
|
},
|
|
{
|
|
"epoch": 6.988003427592116,
|
|
"grad_norm": 0.5623147954093044,
|
|
"learning_rate": 4.1083721814549893e-10,
|
|
"loss": 0.1172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07241150736808777,
|
|
"step": 8155,
|
|
"valid_targets_mean": 5543.4,
|
|
"valid_targets_min": 2466
|
|
},
|
|
{
|
|
"epoch": 6.992287917737789,
|
|
"grad_norm": 0.5989116648786725,
|
|
"learning_rate": 1.8259466647574386e-10,
|
|
"loss": 0.1335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07918166369199753,
|
|
"step": 8160,
|
|
"valid_targets_mean": 4510.2,
|
|
"valid_targets_min": 2247
|
|
},
|
|
{
|
|
"epoch": 6.996572407883462,
|
|
"grad_norm": 0.5320669172153315,
|
|
"learning_rate": 4.564871871393095e-11,
|
|
"loss": 0.1217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.056516826152801514,
|
|
"step": 8165,
|
|
"valid_targets_mean": 4857.2,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07527446001768112,
|
|
"step": 8169,
|
|
"total_flos": 4.1481371263550095e+18,
|
|
"train_loss": 0.012115523323082897,
|
|
"train_runtime": 26384.5271,
|
|
"train_samples_per_second": 4.953,
|
|
"train_steps_per_second": 0.31,
|
|
"valid_targets_mean": 4146.9,
|
|
"valid_targets_min": 1674
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 8169,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 200,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 4.1481371263550095e+18,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|