13959 lines
389 KiB
JSON
13959 lines
389 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 6328,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.005534034311012728,
|
|
"grad_norm": 22.573869688184732,
|
|
"learning_rate": 2.527646129541864e-07,
|
|
"loss": 0.8221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3871062695980072,
|
|
"step": 5,
|
|
"valid_targets_mean": 6579.2,
|
|
"valid_targets_min": 1651
|
|
},
|
|
{
|
|
"epoch": 0.011068068622025456,
|
|
"grad_norm": 19.583965059989275,
|
|
"learning_rate": 5.687203791469194e-07,
|
|
"loss": 0.818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38075920939445496,
|
|
"step": 10,
|
|
"valid_targets_mean": 6844.8,
|
|
"valid_targets_min": 2928
|
|
},
|
|
{
|
|
"epoch": 0.016602102933038185,
|
|
"grad_norm": 10.39723815630763,
|
|
"learning_rate": 8.846761453396525e-07,
|
|
"loss": 0.7289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36340269446372986,
|
|
"step": 15,
|
|
"valid_targets_mean": 6970.2,
|
|
"valid_targets_min": 1816
|
|
},
|
|
{
|
|
"epoch": 0.02213613724405091,
|
|
"grad_norm": 3.0485900232610743,
|
|
"learning_rate": 1.2006319115323856e-06,
|
|
"loss": 0.6323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3266253173351288,
|
|
"step": 20,
|
|
"valid_targets_mean": 7589.0,
|
|
"valid_targets_min": 2058
|
|
},
|
|
{
|
|
"epoch": 0.02767017155506364,
|
|
"grad_norm": 1.535701101825782,
|
|
"learning_rate": 1.5165876777251187e-06,
|
|
"loss": 0.5703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2894801199436188,
|
|
"step": 25,
|
|
"valid_targets_mean": 7366.7,
|
|
"valid_targets_min": 3293
|
|
},
|
|
{
|
|
"epoch": 0.03320420586607637,
|
|
"grad_norm": 1.0808344775071332,
|
|
"learning_rate": 1.8325434439178516e-06,
|
|
"loss": 0.5208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2558213472366333,
|
|
"step": 30,
|
|
"valid_targets_mean": 6763.6,
|
|
"valid_targets_min": 2844
|
|
},
|
|
{
|
|
"epoch": 0.0387382401770891,
|
|
"grad_norm": 0.7808300398981991,
|
|
"learning_rate": 2.148499210110585e-06,
|
|
"loss": 0.508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2513916790485382,
|
|
"step": 35,
|
|
"valid_targets_mean": 6678.9,
|
|
"valid_targets_min": 2465
|
|
},
|
|
{
|
|
"epoch": 0.04427227448810182,
|
|
"grad_norm": 0.4538559560104941,
|
|
"learning_rate": 2.4644549763033174e-06,
|
|
"loss": 0.4735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22647960484027863,
|
|
"step": 40,
|
|
"valid_targets_mean": 7128.3,
|
|
"valid_targets_min": 2482
|
|
},
|
|
{
|
|
"epoch": 0.04980630879911455,
|
|
"grad_norm": 0.44569451067957555,
|
|
"learning_rate": 2.7804107424960508e-06,
|
|
"loss": 0.4509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21913795173168182,
|
|
"step": 45,
|
|
"valid_targets_mean": 6365.0,
|
|
"valid_targets_min": 1594
|
|
},
|
|
{
|
|
"epoch": 0.05534034311012728,
|
|
"grad_norm": 0.37638104680350243,
|
|
"learning_rate": 3.096366508688784e-06,
|
|
"loss": 0.4326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21242153644561768,
|
|
"step": 50,
|
|
"valid_targets_mean": 6929.4,
|
|
"valid_targets_min": 2099
|
|
},
|
|
{
|
|
"epoch": 0.06087437742114001,
|
|
"grad_norm": 0.340887327503334,
|
|
"learning_rate": 3.412322274881517e-06,
|
|
"loss": 0.4184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17877615988254547,
|
|
"step": 55,
|
|
"valid_targets_mean": 6168.6,
|
|
"valid_targets_min": 1277
|
|
},
|
|
{
|
|
"epoch": 0.06640841173215274,
|
|
"grad_norm": 0.23882053545013524,
|
|
"learning_rate": 3.72827804107425e-06,
|
|
"loss": 0.4048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17955751717090607,
|
|
"step": 60,
|
|
"valid_targets_mean": 6729.7,
|
|
"valid_targets_min": 2910
|
|
},
|
|
{
|
|
"epoch": 0.07194244604316546,
|
|
"grad_norm": 0.24725932657940633,
|
|
"learning_rate": 4.044233807266983e-06,
|
|
"loss": 0.3949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18898995220661163,
|
|
"step": 65,
|
|
"valid_targets_mean": 6594.8,
|
|
"valid_targets_min": 1941
|
|
},
|
|
{
|
|
"epoch": 0.0774764803541782,
|
|
"grad_norm": 0.22636276011564896,
|
|
"learning_rate": 4.360189573459716e-06,
|
|
"loss": 0.3861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18377459049224854,
|
|
"step": 70,
|
|
"valid_targets_mean": 6777.4,
|
|
"valid_targets_min": 2598
|
|
},
|
|
{
|
|
"epoch": 0.08301051466519092,
|
|
"grad_norm": 0.21545729413371376,
|
|
"learning_rate": 4.676145339652449e-06,
|
|
"loss": 0.3705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18341968953609467,
|
|
"step": 75,
|
|
"valid_targets_mean": 6707.7,
|
|
"valid_targets_min": 2313
|
|
},
|
|
{
|
|
"epoch": 0.08854454897620365,
|
|
"grad_norm": 0.20558446498893868,
|
|
"learning_rate": 4.9921011058451815e-06,
|
|
"loss": 0.3739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1936556100845337,
|
|
"step": 80,
|
|
"valid_targets_mean": 7161.9,
|
|
"valid_targets_min": 1638
|
|
},
|
|
{
|
|
"epoch": 0.09407858328721638,
|
|
"grad_norm": 0.19767479431879417,
|
|
"learning_rate": 5.308056872037915e-06,
|
|
"loss": 0.3719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18940621614456177,
|
|
"step": 85,
|
|
"valid_targets_mean": 7025.5,
|
|
"valid_targets_min": 3109
|
|
},
|
|
{
|
|
"epoch": 0.0996126175982291,
|
|
"grad_norm": 0.19295545512857692,
|
|
"learning_rate": 5.624012638230648e-06,
|
|
"loss": 0.3668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1677761822938919,
|
|
"step": 90,
|
|
"valid_targets_mean": 6895.4,
|
|
"valid_targets_min": 2040
|
|
},
|
|
{
|
|
"epoch": 0.10514665190924184,
|
|
"grad_norm": 0.2020435352377361,
|
|
"learning_rate": 5.939968404423381e-06,
|
|
"loss": 0.352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1823124885559082,
|
|
"step": 95,
|
|
"valid_targets_mean": 6505.2,
|
|
"valid_targets_min": 2623
|
|
},
|
|
{
|
|
"epoch": 0.11068068622025456,
|
|
"grad_norm": 0.19088643214652315,
|
|
"learning_rate": 6.255924170616115e-06,
|
|
"loss": 0.3369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17667806148529053,
|
|
"step": 100,
|
|
"valid_targets_mean": 6831.8,
|
|
"valid_targets_min": 2450
|
|
},
|
|
{
|
|
"epoch": 0.11621472053126729,
|
|
"grad_norm": 0.1990091606136358,
|
|
"learning_rate": 6.571879936808847e-06,
|
|
"loss": 0.3472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1750369668006897,
|
|
"step": 105,
|
|
"valid_targets_mean": 6923.1,
|
|
"valid_targets_min": 3116
|
|
},
|
|
{
|
|
"epoch": 0.12174875484228002,
|
|
"grad_norm": 0.218232857308847,
|
|
"learning_rate": 6.8878357030015805e-06,
|
|
"loss": 0.3481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15672846138477325,
|
|
"step": 110,
|
|
"valid_targets_mean": 6147.5,
|
|
"valid_targets_min": 2014
|
|
},
|
|
{
|
|
"epoch": 0.12728278915329275,
|
|
"grad_norm": 0.19960252131354672,
|
|
"learning_rate": 7.203791469194313e-06,
|
|
"loss": 0.3354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17363061010837555,
|
|
"step": 115,
|
|
"valid_targets_mean": 6990.4,
|
|
"valid_targets_min": 2412
|
|
},
|
|
{
|
|
"epoch": 0.13281682346430548,
|
|
"grad_norm": 0.22454500068576372,
|
|
"learning_rate": 7.519747235387046e-06,
|
|
"loss": 0.3275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1685630828142166,
|
|
"step": 120,
|
|
"valid_targets_mean": 6798.5,
|
|
"valid_targets_min": 2824
|
|
},
|
|
{
|
|
"epoch": 0.13835085777531822,
|
|
"grad_norm": 0.21833094391730726,
|
|
"learning_rate": 7.83570300157978e-06,
|
|
"loss": 0.3303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15475989878177643,
|
|
"step": 125,
|
|
"valid_targets_mean": 7045.3,
|
|
"valid_targets_min": 2530
|
|
},
|
|
{
|
|
"epoch": 0.14388489208633093,
|
|
"grad_norm": 0.20638162571713792,
|
|
"learning_rate": 8.151658767772512e-06,
|
|
"loss": 0.3263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1473006308078766,
|
|
"step": 130,
|
|
"valid_targets_mean": 6538.4,
|
|
"valid_targets_min": 1802
|
|
},
|
|
{
|
|
"epoch": 0.14941892639734367,
|
|
"grad_norm": 0.22358362084070507,
|
|
"learning_rate": 8.467614533965247e-06,
|
|
"loss": 0.3217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15598450601100922,
|
|
"step": 135,
|
|
"valid_targets_mean": 6699.4,
|
|
"valid_targets_min": 2155
|
|
},
|
|
{
|
|
"epoch": 0.1549529607083564,
|
|
"grad_norm": 0.21017837829312694,
|
|
"learning_rate": 8.783570300157978e-06,
|
|
"loss": 0.3084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14871534705162048,
|
|
"step": 140,
|
|
"valid_targets_mean": 6332.6,
|
|
"valid_targets_min": 2295
|
|
},
|
|
{
|
|
"epoch": 0.1604869950193691,
|
|
"grad_norm": 0.21622396498243057,
|
|
"learning_rate": 9.09952606635071e-06,
|
|
"loss": 0.326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16364316642284393,
|
|
"step": 145,
|
|
"valid_targets_mean": 6915.4,
|
|
"valid_targets_min": 1863
|
|
},
|
|
{
|
|
"epoch": 0.16602102933038185,
|
|
"grad_norm": 0.21695105023925934,
|
|
"learning_rate": 9.415481832543445e-06,
|
|
"loss": 0.32,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1467968374490738,
|
|
"step": 150,
|
|
"valid_targets_mean": 6859.3,
|
|
"valid_targets_min": 1842
|
|
},
|
|
{
|
|
"epoch": 0.17155506364139458,
|
|
"grad_norm": 0.28985331992579194,
|
|
"learning_rate": 9.731437598736178e-06,
|
|
"loss": 0.3119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15782998502254486,
|
|
"step": 155,
|
|
"valid_targets_mean": 6832.0,
|
|
"valid_targets_min": 2689
|
|
},
|
|
{
|
|
"epoch": 0.1770890979524073,
|
|
"grad_norm": 0.22934552913714118,
|
|
"learning_rate": 1.004739336492891e-05,
|
|
"loss": 0.317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16041989624500275,
|
|
"step": 160,
|
|
"valid_targets_mean": 6902.6,
|
|
"valid_targets_min": 2882
|
|
},
|
|
{
|
|
"epoch": 0.18262313226342003,
|
|
"grad_norm": 0.23762401673014116,
|
|
"learning_rate": 1.0363349131121642e-05,
|
|
"loss": 0.3127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13668395578861237,
|
|
"step": 165,
|
|
"valid_targets_mean": 6325.6,
|
|
"valid_targets_min": 1950
|
|
},
|
|
{
|
|
"epoch": 0.18815716657443277,
|
|
"grad_norm": 0.23400047830438106,
|
|
"learning_rate": 1.0679304897314377e-05,
|
|
"loss": 0.3214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1621910184621811,
|
|
"step": 170,
|
|
"valid_targets_mean": 7014.8,
|
|
"valid_targets_min": 2827
|
|
},
|
|
{
|
|
"epoch": 0.1936912008854455,
|
|
"grad_norm": 0.24418156895720325,
|
|
"learning_rate": 1.099526066350711e-05,
|
|
"loss": 0.3139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15957500040531158,
|
|
"step": 175,
|
|
"valid_targets_mean": 6979.4,
|
|
"valid_targets_min": 2777
|
|
},
|
|
{
|
|
"epoch": 0.1992252351964582,
|
|
"grad_norm": 0.21969595390343435,
|
|
"learning_rate": 1.1311216429699843e-05,
|
|
"loss": 0.3048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14138895273208618,
|
|
"step": 180,
|
|
"valid_targets_mean": 6767.5,
|
|
"valid_targets_min": 2923
|
|
},
|
|
{
|
|
"epoch": 0.20475926950747095,
|
|
"grad_norm": 0.23779818602920866,
|
|
"learning_rate": 1.1627172195892576e-05,
|
|
"loss": 0.3127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15995411574840546,
|
|
"step": 185,
|
|
"valid_targets_mean": 6852.5,
|
|
"valid_targets_min": 2299
|
|
},
|
|
{
|
|
"epoch": 0.21029330381848368,
|
|
"grad_norm": 0.2299655072817694,
|
|
"learning_rate": 1.1943127962085309e-05,
|
|
"loss": 0.3104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14684586226940155,
|
|
"step": 190,
|
|
"valid_targets_mean": 6578.0,
|
|
"valid_targets_min": 1726
|
|
},
|
|
{
|
|
"epoch": 0.2158273381294964,
|
|
"grad_norm": 0.2242595706921999,
|
|
"learning_rate": 1.2259083728278043e-05,
|
|
"loss": 0.3091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14662371575832367,
|
|
"step": 195,
|
|
"valid_targets_mean": 6188.8,
|
|
"valid_targets_min": 2385
|
|
},
|
|
{
|
|
"epoch": 0.22136137244050913,
|
|
"grad_norm": 0.22118820017723173,
|
|
"learning_rate": 1.2575039494470776e-05,
|
|
"loss": 0.2985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13862204551696777,
|
|
"step": 200,
|
|
"valid_targets_mean": 6247.0,
|
|
"valid_targets_min": 1759
|
|
},
|
|
{
|
|
"epoch": 0.22689540675152187,
|
|
"grad_norm": 0.586152646493132,
|
|
"learning_rate": 1.2890995260663507e-05,
|
|
"loss": 0.4144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24356555938720703,
|
|
"step": 205,
|
|
"valid_targets_mean": 3846.9,
|
|
"valid_targets_min": 1635
|
|
},
|
|
{
|
|
"epoch": 0.23242944106253458,
|
|
"grad_norm": 0.40113982026614686,
|
|
"learning_rate": 1.320695102685624e-05,
|
|
"loss": 0.4594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24281583726406097,
|
|
"step": 210,
|
|
"valid_targets_mean": 4104.0,
|
|
"valid_targets_min": 1728
|
|
},
|
|
{
|
|
"epoch": 0.2379634753735473,
|
|
"grad_norm": 0.3587578081191217,
|
|
"learning_rate": 1.3522906793048973e-05,
|
|
"loss": 0.4383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2290240079164505,
|
|
"step": 215,
|
|
"valid_targets_mean": 5437.0,
|
|
"valid_targets_min": 2245
|
|
},
|
|
{
|
|
"epoch": 0.24349750968456005,
|
|
"grad_norm": 0.3001740341699114,
|
|
"learning_rate": 1.3838862559241708e-05,
|
|
"loss": 0.4277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19526934623718262,
|
|
"step": 220,
|
|
"valid_targets_mean": 4097.2,
|
|
"valid_targets_min": 1379
|
|
},
|
|
{
|
|
"epoch": 0.24903154399557278,
|
|
"grad_norm": 0.31824892620034584,
|
|
"learning_rate": 1.415481832543444e-05,
|
|
"loss": 0.4195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2064758539199829,
|
|
"step": 225,
|
|
"valid_targets_mean": 4265.5,
|
|
"valid_targets_min": 1788
|
|
},
|
|
{
|
|
"epoch": 0.2545655783065855,
|
|
"grad_norm": 0.3003772739169376,
|
|
"learning_rate": 1.4470774091627173e-05,
|
|
"loss": 0.4264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18863408267498016,
|
|
"step": 230,
|
|
"valid_targets_mean": 4447.0,
|
|
"valid_targets_min": 1177
|
|
},
|
|
{
|
|
"epoch": 0.26009961261759823,
|
|
"grad_norm": 0.2664070522457513,
|
|
"learning_rate": 1.4786729857819906e-05,
|
|
"loss": 0.434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19851483404636383,
|
|
"step": 235,
|
|
"valid_targets_mean": 4379.6,
|
|
"valid_targets_min": 1555
|
|
},
|
|
{
|
|
"epoch": 0.26563364692861097,
|
|
"grad_norm": 0.2896341807388695,
|
|
"learning_rate": 1.510268562401264e-05,
|
|
"loss": 0.4292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23082368075847626,
|
|
"step": 240,
|
|
"valid_targets_mean": 5404.9,
|
|
"valid_targets_min": 1590
|
|
},
|
|
{
|
|
"epoch": 0.2711676812396237,
|
|
"grad_norm": 0.27047837917898676,
|
|
"learning_rate": 1.5418641390205372e-05,
|
|
"loss": 0.4292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18603259325027466,
|
|
"step": 245,
|
|
"valid_targets_mean": 4231.8,
|
|
"valid_targets_min": 1726
|
|
},
|
|
{
|
|
"epoch": 0.27670171555063644,
|
|
"grad_norm": 0.27449329042796916,
|
|
"learning_rate": 1.5734597156398107e-05,
|
|
"loss": 0.4018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1991693377494812,
|
|
"step": 250,
|
|
"valid_targets_mean": 4803.9,
|
|
"valid_targets_min": 1700
|
|
},
|
|
{
|
|
"epoch": 0.2822357498616491,
|
|
"grad_norm": 3.0042063043416634,
|
|
"learning_rate": 1.6050552922590838e-05,
|
|
"loss": 0.4093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18418695032596588,
|
|
"step": 255,
|
|
"valid_targets_mean": 3728.0,
|
|
"valid_targets_min": 1113
|
|
},
|
|
{
|
|
"epoch": 0.28776978417266186,
|
|
"grad_norm": 0.31439617437478307,
|
|
"learning_rate": 1.6366508688783572e-05,
|
|
"loss": 0.4058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23006130754947662,
|
|
"step": 260,
|
|
"valid_targets_mean": 5108.4,
|
|
"valid_targets_min": 1637
|
|
},
|
|
{
|
|
"epoch": 0.2933038184836746,
|
|
"grad_norm": 0.31360285945290356,
|
|
"learning_rate": 1.6682464454976304e-05,
|
|
"loss": 0.3914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17801325023174286,
|
|
"step": 265,
|
|
"valid_targets_mean": 4185.0,
|
|
"valid_targets_min": 2178
|
|
},
|
|
{
|
|
"epoch": 0.29883785279468733,
|
|
"grad_norm": 0.30802890709568975,
|
|
"learning_rate": 1.6998420221169038e-05,
|
|
"loss": 0.3936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19540362060070038,
|
|
"step": 270,
|
|
"valid_targets_mean": 4341.5,
|
|
"valid_targets_min": 1581
|
|
},
|
|
{
|
|
"epoch": 0.30437188710570007,
|
|
"grad_norm": 0.30033819428446645,
|
|
"learning_rate": 1.731437598736177e-05,
|
|
"loss": 0.3955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21151547133922577,
|
|
"step": 275,
|
|
"valid_targets_mean": 4570.1,
|
|
"valid_targets_min": 1677
|
|
},
|
|
{
|
|
"epoch": 0.3099059214167128,
|
|
"grad_norm": 0.29709854755432863,
|
|
"learning_rate": 1.7630331753554504e-05,
|
|
"loss": 0.395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19487600028514862,
|
|
"step": 280,
|
|
"valid_targets_mean": 4621.4,
|
|
"valid_targets_min": 1831
|
|
},
|
|
{
|
|
"epoch": 0.3154399557277255,
|
|
"grad_norm": 0.31577946727287876,
|
|
"learning_rate": 1.7946287519747235e-05,
|
|
"loss": 0.4025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2140292376279831,
|
|
"step": 285,
|
|
"valid_targets_mean": 4701.1,
|
|
"valid_targets_min": 1892
|
|
},
|
|
{
|
|
"epoch": 0.3209739900387382,
|
|
"grad_norm": 0.2771669398573801,
|
|
"learning_rate": 1.826224328593997e-05,
|
|
"loss": 0.3875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17965345084667206,
|
|
"step": 290,
|
|
"valid_targets_mean": 4210.5,
|
|
"valid_targets_min": 1282
|
|
},
|
|
{
|
|
"epoch": 0.32650802434975096,
|
|
"grad_norm": 0.28721368651483475,
|
|
"learning_rate": 1.8578199052132704e-05,
|
|
"loss": 0.3898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18116088211536407,
|
|
"step": 295,
|
|
"valid_targets_mean": 4092.7,
|
|
"valid_targets_min": 1552
|
|
},
|
|
{
|
|
"epoch": 0.3320420586607637,
|
|
"grad_norm": 0.2831149564492413,
|
|
"learning_rate": 1.8894154818325436e-05,
|
|
"loss": 0.3946,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16176189482212067,
|
|
"step": 300,
|
|
"valid_targets_mean": 3938.2,
|
|
"valid_targets_min": 1518
|
|
},
|
|
{
|
|
"epoch": 0.33757609297177643,
|
|
"grad_norm": 0.32797058470167983,
|
|
"learning_rate": 1.921011058451817e-05,
|
|
"loss": 0.3958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17827928066253662,
|
|
"step": 305,
|
|
"valid_targets_mean": 3866.6,
|
|
"valid_targets_min": 1939
|
|
},
|
|
{
|
|
"epoch": 0.34311012728278917,
|
|
"grad_norm": 0.2925512265275017,
|
|
"learning_rate": 1.95260663507109e-05,
|
|
"loss": 0.3922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20264865458011627,
|
|
"step": 310,
|
|
"valid_targets_mean": 4178.5,
|
|
"valid_targets_min": 1919
|
|
},
|
|
{
|
|
"epoch": 0.3486441615938019,
|
|
"grad_norm": 0.28525946139415165,
|
|
"learning_rate": 1.9842022116903633e-05,
|
|
"loss": 0.3916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18759731948375702,
|
|
"step": 315,
|
|
"valid_targets_mean": 4422.5,
|
|
"valid_targets_min": 1519
|
|
},
|
|
{
|
|
"epoch": 0.3541781959048146,
|
|
"grad_norm": 0.3336622602016608,
|
|
"learning_rate": 2.015797788309637e-05,
|
|
"loss": 0.3934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20357847213745117,
|
|
"step": 320,
|
|
"valid_targets_mean": 4715.8,
|
|
"valid_targets_min": 1890
|
|
},
|
|
{
|
|
"epoch": 0.3597122302158273,
|
|
"grad_norm": 0.27871603743410805,
|
|
"learning_rate": 2.04739336492891e-05,
|
|
"loss": 0.3897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18295319378376007,
|
|
"step": 325,
|
|
"valid_targets_mean": 4308.6,
|
|
"valid_targets_min": 1797
|
|
},
|
|
{
|
|
"epoch": 0.36524626452684006,
|
|
"grad_norm": 0.26962841694038514,
|
|
"learning_rate": 2.0789889415481833e-05,
|
|
"loss": 0.3812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1815447360277176,
|
|
"step": 330,
|
|
"valid_targets_mean": 4429.0,
|
|
"valid_targets_min": 1563
|
|
},
|
|
{
|
|
"epoch": 0.3707802988378528,
|
|
"grad_norm": 0.3088487589522481,
|
|
"learning_rate": 2.1105845181674568e-05,
|
|
"loss": 0.3883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22907239198684692,
|
|
"step": 335,
|
|
"valid_targets_mean": 4832.3,
|
|
"valid_targets_min": 1789
|
|
},
|
|
{
|
|
"epoch": 0.37631433314886553,
|
|
"grad_norm": 0.30545972833166846,
|
|
"learning_rate": 2.14218009478673e-05,
|
|
"loss": 0.3813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19862891733646393,
|
|
"step": 340,
|
|
"valid_targets_mean": 4335.4,
|
|
"valid_targets_min": 1729
|
|
},
|
|
{
|
|
"epoch": 0.38184836745987827,
|
|
"grad_norm": 0.28528331140566465,
|
|
"learning_rate": 2.1737756714060033e-05,
|
|
"loss": 0.3819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17305584251880646,
|
|
"step": 345,
|
|
"valid_targets_mean": 3609.3,
|
|
"valid_targets_min": 1651
|
|
},
|
|
{
|
|
"epoch": 0.387382401770891,
|
|
"grad_norm": 0.2698228535419003,
|
|
"learning_rate": 2.2053712480252765e-05,
|
|
"loss": 0.3856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19168169796466827,
|
|
"step": 350,
|
|
"valid_targets_mean": 4319.0,
|
|
"valid_targets_min": 1420
|
|
},
|
|
{
|
|
"epoch": 0.3929164360819037,
|
|
"grad_norm": 0.30836810816091687,
|
|
"learning_rate": 2.23696682464455e-05,
|
|
"loss": 0.3936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20140843093395233,
|
|
"step": 355,
|
|
"valid_targets_mean": 4539.0,
|
|
"valid_targets_min": 1855
|
|
},
|
|
{
|
|
"epoch": 0.3984504703929164,
|
|
"grad_norm": 0.29938242402686027,
|
|
"learning_rate": 2.2685624012638234e-05,
|
|
"loss": 0.3847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20533442497253418,
|
|
"step": 360,
|
|
"valid_targets_mean": 4650.9,
|
|
"valid_targets_min": 1295
|
|
},
|
|
{
|
|
"epoch": 0.40398450470392916,
|
|
"grad_norm": 0.29159514782037715,
|
|
"learning_rate": 2.3001579778830965e-05,
|
|
"loss": 0.3772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17482727766036987,
|
|
"step": 365,
|
|
"valid_targets_mean": 3957.8,
|
|
"valid_targets_min": 1401
|
|
},
|
|
{
|
|
"epoch": 0.4095185390149419,
|
|
"grad_norm": 0.2960042857765756,
|
|
"learning_rate": 2.33175355450237e-05,
|
|
"loss": 0.3792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18393875658512115,
|
|
"step": 370,
|
|
"valid_targets_mean": 4258.2,
|
|
"valid_targets_min": 1375
|
|
},
|
|
{
|
|
"epoch": 0.41505257332595463,
|
|
"grad_norm": 0.258271336122156,
|
|
"learning_rate": 2.363349131121643e-05,
|
|
"loss": 0.382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18117231130599976,
|
|
"step": 375,
|
|
"valid_targets_mean": 4293.1,
|
|
"valid_targets_min": 1932
|
|
},
|
|
{
|
|
"epoch": 0.42058660763696737,
|
|
"grad_norm": 0.2857141427353967,
|
|
"learning_rate": 2.3949447077409165e-05,
|
|
"loss": 0.3762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17623044550418854,
|
|
"step": 380,
|
|
"valid_targets_mean": 4114.5,
|
|
"valid_targets_min": 1761
|
|
},
|
|
{
|
|
"epoch": 0.42612064194798005,
|
|
"grad_norm": 0.28334088488972164,
|
|
"learning_rate": 2.42654028436019e-05,
|
|
"loss": 0.3655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18481989204883575,
|
|
"step": 385,
|
|
"valid_targets_mean": 4430.8,
|
|
"valid_targets_min": 2314
|
|
},
|
|
{
|
|
"epoch": 0.4316546762589928,
|
|
"grad_norm": 0.31103800283731187,
|
|
"learning_rate": 2.458135860979463e-05,
|
|
"loss": 0.3823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20391111075878143,
|
|
"step": 390,
|
|
"valid_targets_mean": 4497.4,
|
|
"valid_targets_min": 1599
|
|
},
|
|
{
|
|
"epoch": 0.4371887105700055,
|
|
"grad_norm": 0.3007656212158998,
|
|
"learning_rate": 2.4897314375987366e-05,
|
|
"loss": 0.3667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20604145526885986,
|
|
"step": 395,
|
|
"valid_targets_mean": 4274.5,
|
|
"valid_targets_min": 1618
|
|
},
|
|
{
|
|
"epoch": 0.44272274488101826,
|
|
"grad_norm": 0.26889513414602545,
|
|
"learning_rate": 2.5213270142180094e-05,
|
|
"loss": 0.3719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16915635764598846,
|
|
"step": 400,
|
|
"valid_targets_mean": 4277.4,
|
|
"valid_targets_min": 1851
|
|
},
|
|
{
|
|
"epoch": 0.448256779192031,
|
|
"grad_norm": 0.284968480654406,
|
|
"learning_rate": 2.552922590837283e-05,
|
|
"loss": 0.3769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1831769496202469,
|
|
"step": 405,
|
|
"valid_targets_mean": 4469.7,
|
|
"valid_targets_min": 1871
|
|
},
|
|
{
|
|
"epoch": 0.45379081350304373,
|
|
"grad_norm": 0.26901256292445136,
|
|
"learning_rate": 2.5845181674565566e-05,
|
|
"loss": 0.3782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19426560401916504,
|
|
"step": 410,
|
|
"valid_targets_mean": 4527.5,
|
|
"valid_targets_min": 1453
|
|
},
|
|
{
|
|
"epoch": 0.45932484781405647,
|
|
"grad_norm": 0.27323699028442777,
|
|
"learning_rate": 2.6161137440758294e-05,
|
|
"loss": 0.3815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17527484893798828,
|
|
"step": 415,
|
|
"valid_targets_mean": 4689.2,
|
|
"valid_targets_min": 1243
|
|
},
|
|
{
|
|
"epoch": 0.46485888212506915,
|
|
"grad_norm": 0.26749382303257646,
|
|
"learning_rate": 2.647709320695103e-05,
|
|
"loss": 0.3783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17316754162311554,
|
|
"step": 420,
|
|
"valid_targets_mean": 4317.8,
|
|
"valid_targets_min": 1812
|
|
},
|
|
{
|
|
"epoch": 0.4703929164360819,
|
|
"grad_norm": 0.26836824185876,
|
|
"learning_rate": 2.679304897314376e-05,
|
|
"loss": 0.3257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12835562229156494,
|
|
"step": 425,
|
|
"valid_targets_mean": 5641.1,
|
|
"valid_targets_min": 2296
|
|
},
|
|
{
|
|
"epoch": 0.4759269507470946,
|
|
"grad_norm": 0.24793671818721102,
|
|
"learning_rate": 2.7109004739336494e-05,
|
|
"loss": 0.2676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12962763011455536,
|
|
"step": 430,
|
|
"valid_targets_mean": 5349.0,
|
|
"valid_targets_min": 2050
|
|
},
|
|
{
|
|
"epoch": 0.48146098505810736,
|
|
"grad_norm": 0.21415649307966725,
|
|
"learning_rate": 2.742496050552923e-05,
|
|
"loss": 0.2651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1357322484254837,
|
|
"step": 435,
|
|
"valid_targets_mean": 5783.2,
|
|
"valid_targets_min": 1845
|
|
},
|
|
{
|
|
"epoch": 0.4869950193691201,
|
|
"grad_norm": 0.2449205704557651,
|
|
"learning_rate": 2.774091627172196e-05,
|
|
"loss": 0.2486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14369916915893555,
|
|
"step": 440,
|
|
"valid_targets_mean": 5767.9,
|
|
"valid_targets_min": 2653
|
|
},
|
|
{
|
|
"epoch": 0.49252905368013283,
|
|
"grad_norm": 0.2059316309784506,
|
|
"learning_rate": 2.8056872037914695e-05,
|
|
"loss": 0.2604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13689398765563965,
|
|
"step": 445,
|
|
"valid_targets_mean": 5619.5,
|
|
"valid_targets_min": 2023
|
|
},
|
|
{
|
|
"epoch": 0.49806308799114557,
|
|
"grad_norm": 0.21338401994743855,
|
|
"learning_rate": 2.8372827804107426e-05,
|
|
"loss": 0.252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.127161905169487,
|
|
"step": 450,
|
|
"valid_targets_mean": 5253.8,
|
|
"valid_targets_min": 1239
|
|
},
|
|
{
|
|
"epoch": 0.5035971223021583,
|
|
"grad_norm": 0.21754017832580447,
|
|
"learning_rate": 2.868878357030016e-05,
|
|
"loss": 0.2445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1304529309272766,
|
|
"step": 455,
|
|
"valid_targets_mean": 5506.8,
|
|
"valid_targets_min": 2598
|
|
},
|
|
{
|
|
"epoch": 0.509131156613171,
|
|
"grad_norm": 0.2294942844909931,
|
|
"learning_rate": 2.9004739336492895e-05,
|
|
"loss": 0.2451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11473497003316879,
|
|
"step": 460,
|
|
"valid_targets_mean": 5309.8,
|
|
"valid_targets_min": 2443
|
|
},
|
|
{
|
|
"epoch": 0.5146651909241837,
|
|
"grad_norm": 0.2196751904342027,
|
|
"learning_rate": 2.9320695102685626e-05,
|
|
"loss": 0.2371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15007314085960388,
|
|
"step": 465,
|
|
"valid_targets_mean": 5868.3,
|
|
"valid_targets_min": 2497
|
|
},
|
|
{
|
|
"epoch": 0.5201992252351965,
|
|
"grad_norm": 0.18738709883609975,
|
|
"learning_rate": 2.963665086887836e-05,
|
|
"loss": 0.241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10643309354782104,
|
|
"step": 470,
|
|
"valid_targets_mean": 5761.6,
|
|
"valid_targets_min": 2096
|
|
},
|
|
{
|
|
"epoch": 0.5257332595462092,
|
|
"grad_norm": 0.2175539021849619,
|
|
"learning_rate": 2.9952606635071092e-05,
|
|
"loss": 0.2425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12912330031394958,
|
|
"step": 475,
|
|
"valid_targets_mean": 5816.0,
|
|
"valid_targets_min": 2258
|
|
},
|
|
{
|
|
"epoch": 0.5312672938572219,
|
|
"grad_norm": 0.1970812604349457,
|
|
"learning_rate": 3.0268562401263827e-05,
|
|
"loss": 0.2374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13159504532814026,
|
|
"step": 480,
|
|
"valid_targets_mean": 5984.8,
|
|
"valid_targets_min": 2264
|
|
},
|
|
{
|
|
"epoch": 0.5368013281682347,
|
|
"grad_norm": 0.1798179806094743,
|
|
"learning_rate": 3.058451816745656e-05,
|
|
"loss": 0.2343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13087119162082672,
|
|
"step": 485,
|
|
"valid_targets_mean": 5711.8,
|
|
"valid_targets_min": 2394
|
|
},
|
|
{
|
|
"epoch": 0.5423353624792474,
|
|
"grad_norm": 0.25288413800579274,
|
|
"learning_rate": 3.090047393364929e-05,
|
|
"loss": 0.2339,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1277281939983368,
|
|
"step": 490,
|
|
"valid_targets_mean": 5680.3,
|
|
"valid_targets_min": 2487
|
|
},
|
|
{
|
|
"epoch": 0.5478693967902601,
|
|
"grad_norm": 0.22610447769390982,
|
|
"learning_rate": 3.121642969984203e-05,
|
|
"loss": 0.2296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11709853261709213,
|
|
"step": 495,
|
|
"valid_targets_mean": 5869.9,
|
|
"valid_targets_min": 2655
|
|
},
|
|
{
|
|
"epoch": 0.5534034311012729,
|
|
"grad_norm": 0.47954836253427174,
|
|
"learning_rate": 3.153238546603476e-05,
|
|
"loss": 0.231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11786717921495438,
|
|
"step": 500,
|
|
"valid_targets_mean": 5479.9,
|
|
"valid_targets_min": 2153
|
|
},
|
|
{
|
|
"epoch": 0.5589374654122855,
|
|
"grad_norm": 0.23140780959919824,
|
|
"learning_rate": 3.184834123222749e-05,
|
|
"loss": 0.2406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12131712585687637,
|
|
"step": 505,
|
|
"valid_targets_mean": 5297.5,
|
|
"valid_targets_min": 1932
|
|
},
|
|
{
|
|
"epoch": 0.5644714997232982,
|
|
"grad_norm": 0.20491642615826702,
|
|
"learning_rate": 3.216429699842023e-05,
|
|
"loss": 0.2332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11049050092697144,
|
|
"step": 510,
|
|
"valid_targets_mean": 5783.3,
|
|
"valid_targets_min": 944
|
|
},
|
|
{
|
|
"epoch": 0.570005534034311,
|
|
"grad_norm": 0.20016014346656516,
|
|
"learning_rate": 3.248025276461296e-05,
|
|
"loss": 0.2288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10679373145103455,
|
|
"step": 515,
|
|
"valid_targets_mean": 5570.2,
|
|
"valid_targets_min": 2844
|
|
},
|
|
{
|
|
"epoch": 0.5755395683453237,
|
|
"grad_norm": 0.23789726284137908,
|
|
"learning_rate": 3.279620853080569e-05,
|
|
"loss": 0.2294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11191616207361221,
|
|
"step": 520,
|
|
"valid_targets_mean": 5753.7,
|
|
"valid_targets_min": 2045
|
|
},
|
|
{
|
|
"epoch": 0.5810736026563365,
|
|
"grad_norm": 0.22722793613585027,
|
|
"learning_rate": 3.311216429699842e-05,
|
|
"loss": 0.2373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1198091134428978,
|
|
"step": 525,
|
|
"valid_targets_mean": 5353.3,
|
|
"valid_targets_min": 2316
|
|
},
|
|
{
|
|
"epoch": 0.5866076369673492,
|
|
"grad_norm": 0.21981373105431526,
|
|
"learning_rate": 3.342812006319116e-05,
|
|
"loss": 0.2335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11608002334833145,
|
|
"step": 530,
|
|
"valid_targets_mean": 5618.7,
|
|
"valid_targets_min": 2144
|
|
},
|
|
{
|
|
"epoch": 0.5921416712783619,
|
|
"grad_norm": 0.21244635954118507,
|
|
"learning_rate": 3.374407582938389e-05,
|
|
"loss": 0.2327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11392959952354431,
|
|
"step": 535,
|
|
"valid_targets_mean": 5338.1,
|
|
"valid_targets_min": 1731
|
|
},
|
|
{
|
|
"epoch": 0.5976757055893747,
|
|
"grad_norm": 0.19933294250504463,
|
|
"learning_rate": 3.406003159557662e-05,
|
|
"loss": 0.2255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12038113921880722,
|
|
"step": 540,
|
|
"valid_targets_mean": 5883.8,
|
|
"valid_targets_min": 2583
|
|
},
|
|
{
|
|
"epoch": 0.6032097399003874,
|
|
"grad_norm": 0.22758141250939715,
|
|
"learning_rate": 3.437598736176936e-05,
|
|
"loss": 0.2324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11625228077173233,
|
|
"step": 545,
|
|
"valid_targets_mean": 5300.8,
|
|
"valid_targets_min": 1973
|
|
},
|
|
{
|
|
"epoch": 0.6087437742114001,
|
|
"grad_norm": 0.21233039772834064,
|
|
"learning_rate": 3.4691943127962084e-05,
|
|
"loss": 0.2295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12226393818855286,
|
|
"step": 550,
|
|
"valid_targets_mean": 5928.1,
|
|
"valid_targets_min": 3227
|
|
},
|
|
{
|
|
"epoch": 0.6142778085224129,
|
|
"grad_norm": 0.21065540622054063,
|
|
"learning_rate": 3.500789889415482e-05,
|
|
"loss": 0.2366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11683835834264755,
|
|
"step": 555,
|
|
"valid_targets_mean": 5923.1,
|
|
"valid_targets_min": 2392
|
|
},
|
|
{
|
|
"epoch": 0.6198118428334256,
|
|
"grad_norm": 0.21163909369812556,
|
|
"learning_rate": 3.532385466034755e-05,
|
|
"loss": 0.2228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10224387794733047,
|
|
"step": 560,
|
|
"valid_targets_mean": 5607.5,
|
|
"valid_targets_min": 1927
|
|
},
|
|
{
|
|
"epoch": 0.6253458771444383,
|
|
"grad_norm": 0.18564813482011772,
|
|
"learning_rate": 3.5639810426540284e-05,
|
|
"loss": 0.2207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13485325872898102,
|
|
"step": 565,
|
|
"valid_targets_mean": 5891.4,
|
|
"valid_targets_min": 1659
|
|
},
|
|
{
|
|
"epoch": 0.630879911455451,
|
|
"grad_norm": 0.18394726290312965,
|
|
"learning_rate": 3.595576619273302e-05,
|
|
"loss": 0.2258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11927980184555054,
|
|
"step": 570,
|
|
"valid_targets_mean": 5810.3,
|
|
"valid_targets_min": 2797
|
|
},
|
|
{
|
|
"epoch": 0.6364139457664637,
|
|
"grad_norm": 0.2233018855151671,
|
|
"learning_rate": 3.6271721958925753e-05,
|
|
"loss": 0.2224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1191728413105011,
|
|
"step": 575,
|
|
"valid_targets_mean": 5845.2,
|
|
"valid_targets_min": 2085
|
|
},
|
|
{
|
|
"epoch": 0.6419479800774764,
|
|
"grad_norm": 0.18891122067095578,
|
|
"learning_rate": 3.6587677725118485e-05,
|
|
"loss": 0.2232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10877739638090134,
|
|
"step": 580,
|
|
"valid_targets_mean": 5777.4,
|
|
"valid_targets_min": 2375
|
|
},
|
|
{
|
|
"epoch": 0.6474820143884892,
|
|
"grad_norm": 0.22782608500286497,
|
|
"learning_rate": 3.690363349131122e-05,
|
|
"loss": 0.2279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11073682457208633,
|
|
"step": 585,
|
|
"valid_targets_mean": 5355.5,
|
|
"valid_targets_min": 1462
|
|
},
|
|
{
|
|
"epoch": 0.6530160486995019,
|
|
"grad_norm": 0.18715533043352361,
|
|
"learning_rate": 3.7219589257503954e-05,
|
|
"loss": 0.223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10883685946464539,
|
|
"step": 590,
|
|
"valid_targets_mean": 5619.6,
|
|
"valid_targets_min": 2248
|
|
},
|
|
{
|
|
"epoch": 0.6585500830105147,
|
|
"grad_norm": 0.1807423599008659,
|
|
"learning_rate": 3.7535545023696685e-05,
|
|
"loss": 0.2263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11599501967430115,
|
|
"step": 595,
|
|
"valid_targets_mean": 5980.5,
|
|
"valid_targets_min": 2300
|
|
},
|
|
{
|
|
"epoch": 0.6640841173215274,
|
|
"grad_norm": 0.18248038573986183,
|
|
"learning_rate": 3.7851500789889416e-05,
|
|
"loss": 0.2236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11335260421037674,
|
|
"step": 600,
|
|
"valid_targets_mean": 5847.6,
|
|
"valid_targets_min": 3518
|
|
},
|
|
{
|
|
"epoch": 0.6696181516325401,
|
|
"grad_norm": 0.2003961315282654,
|
|
"learning_rate": 3.8167456556082154e-05,
|
|
"loss": 0.2192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11822617799043655,
|
|
"step": 605,
|
|
"valid_targets_mean": 5279.1,
|
|
"valid_targets_min": 2145
|
|
},
|
|
{
|
|
"epoch": 0.6751521859435529,
|
|
"grad_norm": 0.18542784290777958,
|
|
"learning_rate": 3.8483412322274885e-05,
|
|
"loss": 0.2229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11544450372457504,
|
|
"step": 610,
|
|
"valid_targets_mean": 5963.4,
|
|
"valid_targets_min": 1785
|
|
},
|
|
{
|
|
"epoch": 0.6806862202545656,
|
|
"grad_norm": 0.19641417454190124,
|
|
"learning_rate": 3.8799368088467617e-05,
|
|
"loss": 0.2271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12316196411848068,
|
|
"step": 615,
|
|
"valid_targets_mean": 5615.0,
|
|
"valid_targets_min": 1880
|
|
},
|
|
{
|
|
"epoch": 0.6862202545655783,
|
|
"grad_norm": 0.1874576999126586,
|
|
"learning_rate": 3.9115323854660355e-05,
|
|
"loss": 0.2255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10751807689666748,
|
|
"step": 620,
|
|
"valid_targets_mean": 5902.0,
|
|
"valid_targets_min": 2804
|
|
},
|
|
{
|
|
"epoch": 0.6917542888765911,
|
|
"grad_norm": 0.24128158732751992,
|
|
"learning_rate": 3.943127962085308e-05,
|
|
"loss": 0.215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10644346475601196,
|
|
"step": 625,
|
|
"valid_targets_mean": 5319.6,
|
|
"valid_targets_min": 2528
|
|
},
|
|
{
|
|
"epoch": 0.6972883231876038,
|
|
"grad_norm": 0.18947226772815468,
|
|
"learning_rate": 3.974723538704582e-05,
|
|
"loss": 0.2219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10888642817735672,
|
|
"step": 630,
|
|
"valid_targets_mean": 5529.9,
|
|
"valid_targets_min": 2824
|
|
},
|
|
{
|
|
"epoch": 0.7028223574986165,
|
|
"grad_norm": 0.19843731199616285,
|
|
"learning_rate": 3.999999695692766e-05,
|
|
"loss": 0.2117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10342171043157578,
|
|
"step": 635,
|
|
"valid_targets_mean": 5210.4,
|
|
"valid_targets_min": 2273
|
|
},
|
|
{
|
|
"epoch": 0.7083563918096292,
|
|
"grad_norm": 0.18980116649020176,
|
|
"learning_rate": 3.999989044949274e-05,
|
|
"loss": 0.2181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1191055178642273,
|
|
"step": 640,
|
|
"valid_targets_mean": 5708.2,
|
|
"valid_targets_min": 2025
|
|
},
|
|
{
|
|
"epoch": 0.7138904261206419,
|
|
"grad_norm": 0.20411494778223988,
|
|
"learning_rate": 3.999963178936651e-05,
|
|
"loss": 0.2203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10782010108232498,
|
|
"step": 645,
|
|
"valid_targets_mean": 5535.9,
|
|
"valid_targets_min": 2354
|
|
},
|
|
{
|
|
"epoch": 0.7194244604316546,
|
|
"grad_norm": 0.17255710493878107,
|
|
"learning_rate": 3.999922097851675e-05,
|
|
"loss": 0.2248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11195490509271622,
|
|
"step": 650,
|
|
"valid_targets_mean": 5587.0,
|
|
"valid_targets_min": 2655
|
|
},
|
|
{
|
|
"epoch": 0.7249584947426674,
|
|
"grad_norm": 0.18555854481909534,
|
|
"learning_rate": 3.9998658020068784e-05,
|
|
"loss": 0.2167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10752513259649277,
|
|
"step": 655,
|
|
"valid_targets_mean": 5726.0,
|
|
"valid_targets_min": 1536
|
|
},
|
|
{
|
|
"epoch": 0.7304925290536801,
|
|
"grad_norm": 0.18167748433444353,
|
|
"learning_rate": 3.999794291830542e-05,
|
|
"loss": 0.2149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11595481634140015,
|
|
"step": 660,
|
|
"valid_targets_mean": 5276.6,
|
|
"valid_targets_min": 1725
|
|
},
|
|
{
|
|
"epoch": 0.7360265633646929,
|
|
"grad_norm": 0.18817894948873296,
|
|
"learning_rate": 3.9997075678666916e-05,
|
|
"loss": 0.2201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10897151380777359,
|
|
"step": 665,
|
|
"valid_targets_mean": 4917.2,
|
|
"valid_targets_min": 1306
|
|
},
|
|
{
|
|
"epoch": 0.7415605976757056,
|
|
"grad_norm": 0.18637142332630313,
|
|
"learning_rate": 3.999605630775096e-05,
|
|
"loss": 0.2144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10294731706380844,
|
|
"step": 670,
|
|
"valid_targets_mean": 5920.3,
|
|
"valid_targets_min": 2692
|
|
},
|
|
{
|
|
"epoch": 0.7470946319867183,
|
|
"grad_norm": 0.1785392311851408,
|
|
"learning_rate": 3.999488481331258e-05,
|
|
"loss": 0.2144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09978693723678589,
|
|
"step": 675,
|
|
"valid_targets_mean": 5935.9,
|
|
"valid_targets_min": 2414
|
|
},
|
|
{
|
|
"epoch": 0.7526286662977311,
|
|
"grad_norm": 0.20179880948163537,
|
|
"learning_rate": 3.9993561204264146e-05,
|
|
"loss": 0.227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10697966814041138,
|
|
"step": 680,
|
|
"valid_targets_mean": 5286.9,
|
|
"valid_targets_min": 1052
|
|
},
|
|
{
|
|
"epoch": 0.7581627006087438,
|
|
"grad_norm": 0.36865020584556263,
|
|
"learning_rate": 3.9992085490675234e-05,
|
|
"loss": 0.3649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20275266468524933,
|
|
"step": 685,
|
|
"valid_targets_mean": 4064.5,
|
|
"valid_targets_min": 2223
|
|
},
|
|
{
|
|
"epoch": 0.7636967349197565,
|
|
"grad_norm": 0.3368021068093887,
|
|
"learning_rate": 3.99904576837726e-05,
|
|
"loss": 0.3874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19558501243591309,
|
|
"step": 690,
|
|
"valid_targets_mean": 4034.0,
|
|
"valid_targets_min": 1203
|
|
},
|
|
{
|
|
"epoch": 0.7692307692307693,
|
|
"grad_norm": 0.30594385458337714,
|
|
"learning_rate": 3.998867779594006e-05,
|
|
"loss": 0.3743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19384127855300903,
|
|
"step": 695,
|
|
"valid_targets_mean": 4315.0,
|
|
"valid_targets_min": 1773
|
|
},
|
|
{
|
|
"epoch": 0.774764803541782,
|
|
"grad_norm": 0.2690179495002744,
|
|
"learning_rate": 3.9986745840718436e-05,
|
|
"loss": 0.3582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1699591875076294,
|
|
"step": 700,
|
|
"valid_targets_mean": 4799.0,
|
|
"valid_targets_min": 1603
|
|
},
|
|
{
|
|
"epoch": 0.7802988378527946,
|
|
"grad_norm": 0.23845997029468932,
|
|
"learning_rate": 3.998466183280542e-05,
|
|
"loss": 0.3494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17642243206501007,
|
|
"step": 705,
|
|
"valid_targets_mean": 4921.9,
|
|
"valid_targets_min": 1695
|
|
},
|
|
{
|
|
"epoch": 0.7858328721638074,
|
|
"grad_norm": 0.23728225244280673,
|
|
"learning_rate": 3.9982425788055466e-05,
|
|
"loss": 0.3414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17517788708209991,
|
|
"step": 710,
|
|
"valid_targets_mean": 4232.0,
|
|
"valid_targets_min": 1416
|
|
},
|
|
{
|
|
"epoch": 0.7913669064748201,
|
|
"grad_norm": 0.2404569963674739,
|
|
"learning_rate": 3.998003772347968e-05,
|
|
"loss": 0.3365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17175722122192383,
|
|
"step": 715,
|
|
"valid_targets_mean": 7718.0,
|
|
"valid_targets_min": 1254
|
|
},
|
|
{
|
|
"epoch": 0.7969009407858328,
|
|
"grad_norm": 0.23498357970697878,
|
|
"learning_rate": 3.9977497657245674e-05,
|
|
"loss": 0.3235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17249642312526703,
|
|
"step": 720,
|
|
"valid_targets_mean": 7411.9,
|
|
"valid_targets_min": 1367
|
|
},
|
|
{
|
|
"epoch": 0.8024349750968456,
|
|
"grad_norm": 0.21293551752089374,
|
|
"learning_rate": 3.9974805608677465e-05,
|
|
"loss": 0.3035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14262016117572784,
|
|
"step": 725,
|
|
"valid_targets_mean": 6794.7,
|
|
"valid_targets_min": 1002
|
|
},
|
|
{
|
|
"epoch": 0.8079690094078583,
|
|
"grad_norm": 0.35505785754047214,
|
|
"learning_rate": 3.9971961598255274e-05,
|
|
"loss": 0.3056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1443863958120346,
|
|
"step": 730,
|
|
"valid_targets_mean": 7931.0,
|
|
"valid_targets_min": 2635
|
|
},
|
|
{
|
|
"epoch": 0.813503043718871,
|
|
"grad_norm": 0.18843536910159747,
|
|
"learning_rate": 3.9968965647615425e-05,
|
|
"loss": 0.2952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1430182307958603,
|
|
"step": 735,
|
|
"valid_targets_mean": 7836.5,
|
|
"valid_targets_min": 1113
|
|
},
|
|
{
|
|
"epoch": 0.8190370780298838,
|
|
"grad_norm": 0.20338764208992186,
|
|
"learning_rate": 3.9965817779550126e-05,
|
|
"loss": 0.2866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13604462146759033,
|
|
"step": 740,
|
|
"valid_targets_mean": 8004.7,
|
|
"valid_targets_min": 2354
|
|
},
|
|
{
|
|
"epoch": 0.8245711123408965,
|
|
"grad_norm": 0.1532663067966529,
|
|
"learning_rate": 3.996251801800734e-05,
|
|
"loss": 0.2615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12938176095485687,
|
|
"step": 745,
|
|
"valid_targets_mean": 14621.4,
|
|
"valid_targets_min": 1121
|
|
},
|
|
{
|
|
"epoch": 0.8301051466519093,
|
|
"grad_norm": 0.14561220287672735,
|
|
"learning_rate": 3.9959066388090597e-05,
|
|
"loss": 0.2438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12480156868696213,
|
|
"step": 750,
|
|
"valid_targets_mean": 13730.6,
|
|
"valid_targets_min": 5627
|
|
},
|
|
{
|
|
"epoch": 0.835639180962922,
|
|
"grad_norm": 0.1407702815224483,
|
|
"learning_rate": 3.995546291605877e-05,
|
|
"loss": 0.2393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12473263591527939,
|
|
"step": 755,
|
|
"valid_targets_mean": 13851.5,
|
|
"valid_targets_min": 2420
|
|
},
|
|
{
|
|
"epoch": 0.8411732152739347,
|
|
"grad_norm": 0.14372957684020582,
|
|
"learning_rate": 3.995170762932591e-05,
|
|
"loss": 0.2389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11691296845674515,
|
|
"step": 760,
|
|
"valid_targets_mean": 10187.0,
|
|
"valid_targets_min": 4526
|
|
},
|
|
{
|
|
"epoch": 0.8467072495849475,
|
|
"grad_norm": 0.2323270872047143,
|
|
"learning_rate": 3.994780055646102e-05,
|
|
"loss": 0.2814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13866698741912842,
|
|
"step": 765,
|
|
"valid_targets_mean": 7247.6,
|
|
"valid_targets_min": 1894
|
|
},
|
|
{
|
|
"epoch": 0.8522412838959601,
|
|
"grad_norm": 0.22739619288922386,
|
|
"learning_rate": 3.994374172718785e-05,
|
|
"loss": 0.2786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12408073991537094,
|
|
"step": 770,
|
|
"valid_targets_mean": 7225.9,
|
|
"valid_targets_min": 1279
|
|
},
|
|
{
|
|
"epoch": 0.8577753182069728,
|
|
"grad_norm": 0.39006362577079406,
|
|
"learning_rate": 3.993953117238466e-05,
|
|
"loss": 0.2878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16530466079711914,
|
|
"step": 775,
|
|
"valid_targets_mean": 4795.3,
|
|
"valid_targets_min": 1685
|
|
},
|
|
{
|
|
"epoch": 0.8633093525179856,
|
|
"grad_norm": 0.3242688535462117,
|
|
"learning_rate": 3.993516892408398e-05,
|
|
"loss": 0.311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1452576369047165,
|
|
"step": 780,
|
|
"valid_targets_mean": 4640.0,
|
|
"valid_targets_min": 1767
|
|
},
|
|
{
|
|
"epoch": 0.8688433868289983,
|
|
"grad_norm": 0.25941774708005494,
|
|
"learning_rate": 3.993065501547238e-05,
|
|
"loss": 0.3072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14877530932426453,
|
|
"step": 785,
|
|
"valid_targets_mean": 4969.9,
|
|
"valid_targets_min": 1543
|
|
},
|
|
{
|
|
"epoch": 0.874377421140011,
|
|
"grad_norm": 0.2304050970573324,
|
|
"learning_rate": 3.992598948089024e-05,
|
|
"loss": 0.3115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1475018709897995,
|
|
"step": 790,
|
|
"valid_targets_mean": 4724.9,
|
|
"valid_targets_min": 1634
|
|
},
|
|
{
|
|
"epoch": 0.8799114554510238,
|
|
"grad_norm": 0.260910096283691,
|
|
"learning_rate": 3.99211723558314e-05,
|
|
"loss": 0.3167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1795371174812317,
|
|
"step": 795,
|
|
"valid_targets_mean": 3955.6,
|
|
"valid_targets_min": 1829
|
|
},
|
|
{
|
|
"epoch": 0.8854454897620365,
|
|
"grad_norm": 0.22463559577059178,
|
|
"learning_rate": 3.9916203676943016e-05,
|
|
"loss": 0.348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1876264065504074,
|
|
"step": 800,
|
|
"valid_targets_mean": 5335.9,
|
|
"valid_targets_min": 2041
|
|
},
|
|
{
|
|
"epoch": 0.8909795240730493,
|
|
"grad_norm": 0.21762357743673363,
|
|
"learning_rate": 3.991108348202518e-05,
|
|
"loss": 0.3516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19074296951293945,
|
|
"step": 805,
|
|
"valid_targets_mean": 5668.2,
|
|
"valid_targets_min": 1895
|
|
},
|
|
{
|
|
"epoch": 0.896513558384062,
|
|
"grad_norm": 0.22305146954992916,
|
|
"learning_rate": 3.9905811810030676e-05,
|
|
"loss": 0.3351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15585224330425262,
|
|
"step": 810,
|
|
"valid_targets_mean": 5241.9,
|
|
"valid_targets_min": 2052
|
|
},
|
|
{
|
|
"epoch": 0.9020475926950747,
|
|
"grad_norm": 0.25423401454979944,
|
|
"learning_rate": 3.9900388701064676e-05,
|
|
"loss": 0.3442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1668250560760498,
|
|
"step": 815,
|
|
"valid_targets_mean": 4501.1,
|
|
"valid_targets_min": 1859
|
|
},
|
|
{
|
|
"epoch": 0.9075816270060875,
|
|
"grad_norm": 0.22576903339912976,
|
|
"learning_rate": 3.989481419638444e-05,
|
|
"loss": 0.3375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16919755935668945,
|
|
"step": 820,
|
|
"valid_targets_mean": 5344.1,
|
|
"valid_targets_min": 2200
|
|
},
|
|
{
|
|
"epoch": 0.9131156613171002,
|
|
"grad_norm": 0.25973565946214083,
|
|
"learning_rate": 3.988908833839899e-05,
|
|
"loss": 0.3401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16168487071990967,
|
|
"step": 825,
|
|
"valid_targets_mean": 4667.0,
|
|
"valid_targets_min": 2131
|
|
},
|
|
{
|
|
"epoch": 0.9186496956281129,
|
|
"grad_norm": 0.24009224852265348,
|
|
"learning_rate": 3.988321117066881e-05,
|
|
"loss": 0.3335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17461256682872772,
|
|
"step": 830,
|
|
"valid_targets_mean": 4774.7,
|
|
"valid_targets_min": 1735
|
|
},
|
|
{
|
|
"epoch": 0.9241837299391257,
|
|
"grad_norm": 0.24743671307697393,
|
|
"learning_rate": 3.987718273790548e-05,
|
|
"loss": 0.3347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16680864989757538,
|
|
"step": 835,
|
|
"valid_targets_mean": 5840.8,
|
|
"valid_targets_min": 2338
|
|
},
|
|
{
|
|
"epoch": 0.9297177642501383,
|
|
"grad_norm": 0.26168458507079656,
|
|
"learning_rate": 3.9871003085971364e-05,
|
|
"loss": 0.3395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17075331509113312,
|
|
"step": 840,
|
|
"valid_targets_mean": 4816.4,
|
|
"valid_targets_min": 1674
|
|
},
|
|
{
|
|
"epoch": 0.935251798561151,
|
|
"grad_norm": 0.23232107381013864,
|
|
"learning_rate": 3.986467226187926e-05,
|
|
"loss": 0.3258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1656631976366043,
|
|
"step": 845,
|
|
"valid_targets_mean": 5732.7,
|
|
"valid_targets_min": 1854
|
|
},
|
|
{
|
|
"epoch": 0.9407858328721638,
|
|
"grad_norm": 0.27341206012899355,
|
|
"learning_rate": 3.985819031379203e-05,
|
|
"loss": 0.3265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14391395449638367,
|
|
"step": 850,
|
|
"valid_targets_mean": 4115.7,
|
|
"valid_targets_min": 1512
|
|
},
|
|
{
|
|
"epoch": 0.9463198671831765,
|
|
"grad_norm": 0.22767348437154553,
|
|
"learning_rate": 3.9851557291022224e-05,
|
|
"loss": 0.3333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15771305561065674,
|
|
"step": 855,
|
|
"valid_targets_mean": 5378.9,
|
|
"valid_targets_min": 2469
|
|
},
|
|
{
|
|
"epoch": 0.9518539014941892,
|
|
"grad_norm": 0.228541958830484,
|
|
"learning_rate": 3.984477324403174e-05,
|
|
"loss": 0.3227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1492053121328354,
|
|
"step": 860,
|
|
"valid_targets_mean": 4137.5,
|
|
"valid_targets_min": 1891
|
|
},
|
|
{
|
|
"epoch": 0.957387935805202,
|
|
"grad_norm": 0.20561509877155584,
|
|
"learning_rate": 3.983783822443141e-05,
|
|
"loss": 0.3212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15702982246875763,
|
|
"step": 865,
|
|
"valid_targets_mean": 4230.2,
|
|
"valid_targets_min": 1879
|
|
},
|
|
{
|
|
"epoch": 0.9629219701162147,
|
|
"grad_norm": 0.22705250814177763,
|
|
"learning_rate": 3.9830752284980624e-05,
|
|
"loss": 0.3203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16598166525363922,
|
|
"step": 870,
|
|
"valid_targets_mean": 4060.4,
|
|
"valid_targets_min": 1929
|
|
},
|
|
{
|
|
"epoch": 0.9684560044272275,
|
|
"grad_norm": 0.213751893760191,
|
|
"learning_rate": 3.982351547958691e-05,
|
|
"loss": 0.3204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1574791520833969,
|
|
"step": 875,
|
|
"valid_targets_mean": 4318.9,
|
|
"valid_targets_min": 1421
|
|
},
|
|
{
|
|
"epoch": 0.9739900387382402,
|
|
"grad_norm": 0.2299802209897319,
|
|
"learning_rate": 3.9816127863305535e-05,
|
|
"loss": 0.3127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15395411849021912,
|
|
"step": 880,
|
|
"valid_targets_mean": 4196.6,
|
|
"valid_targets_min": 2018
|
|
},
|
|
{
|
|
"epoch": 0.9795240730492529,
|
|
"grad_norm": 0.22056896946644247,
|
|
"learning_rate": 3.9808589492339093e-05,
|
|
"loss": 0.3195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15959514677524567,
|
|
"step": 885,
|
|
"valid_targets_mean": 4083.7,
|
|
"valid_targets_min": 1696
|
|
},
|
|
{
|
|
"epoch": 0.9850581073602657,
|
|
"grad_norm": 0.212685778198781,
|
|
"learning_rate": 3.9800900424037074e-05,
|
|
"loss": 0.3183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1645464152097702,
|
|
"step": 890,
|
|
"valid_targets_mean": 4253.7,
|
|
"valid_targets_min": 2036
|
|
},
|
|
{
|
|
"epoch": 0.9905921416712784,
|
|
"grad_norm": 0.1917607058601535,
|
|
"learning_rate": 3.979306071689542e-05,
|
|
"loss": 0.3148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14884722232818604,
|
|
"step": 895,
|
|
"valid_targets_mean": 4333.8,
|
|
"valid_targets_min": 1885
|
|
},
|
|
{
|
|
"epoch": 0.9961261759822911,
|
|
"grad_norm": 0.2288504359536138,
|
|
"learning_rate": 3.978507043055609e-05,
|
|
"loss": 0.3151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15858103334903717,
|
|
"step": 900,
|
|
"valid_targets_mean": 4365.4,
|
|
"valid_targets_min": 1943
|
|
},
|
|
{
|
|
"epoch": 1.0011068068622024,
|
|
"grad_norm": 0.28075043239026515,
|
|
"learning_rate": 3.977692962580658e-05,
|
|
"loss": 0.311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15253998339176178,
|
|
"step": 905,
|
|
"valid_targets_mean": 7176.8,
|
|
"valid_targets_min": 2475
|
|
},
|
|
{
|
|
"epoch": 1.0066408411732153,
|
|
"grad_norm": 0.25456043767832615,
|
|
"learning_rate": 3.976863836457951e-05,
|
|
"loss": 0.3125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15757876634597778,
|
|
"step": 910,
|
|
"valid_targets_mean": 7152.2,
|
|
"valid_targets_min": 1817
|
|
},
|
|
{
|
|
"epoch": 1.012174875484228,
|
|
"grad_norm": 0.23203080605619728,
|
|
"learning_rate": 3.9760196709952105e-05,
|
|
"loss": 0.3031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1390572041273117,
|
|
"step": 915,
|
|
"valid_targets_mean": 6413.1,
|
|
"valid_targets_min": 2694
|
|
},
|
|
{
|
|
"epoch": 1.0177089097952408,
|
|
"grad_norm": 0.19501821276663242,
|
|
"learning_rate": 3.975160472614575e-05,
|
|
"loss": 0.2928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14973397552967072,
|
|
"step": 920,
|
|
"valid_targets_mean": 6742.4,
|
|
"valid_targets_min": 2529
|
|
},
|
|
{
|
|
"epoch": 1.0232429441062534,
|
|
"grad_norm": 0.19217358370469367,
|
|
"learning_rate": 3.974286247852547e-05,
|
|
"loss": 0.296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13378290832042694,
|
|
"step": 925,
|
|
"valid_targets_mean": 6891.3,
|
|
"valid_targets_min": 2165
|
|
},
|
|
{
|
|
"epoch": 1.0287769784172662,
|
|
"grad_norm": 0.1958278238627834,
|
|
"learning_rate": 3.973397003359944e-05,
|
|
"loss": 0.2904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1444612592458725,
|
|
"step": 930,
|
|
"valid_targets_mean": 6714.7,
|
|
"valid_targets_min": 1734
|
|
},
|
|
{
|
|
"epoch": 1.0343110127282789,
|
|
"grad_norm": 0.17503375002909113,
|
|
"learning_rate": 3.972492745901853e-05,
|
|
"loss": 0.2772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14504456520080566,
|
|
"step": 935,
|
|
"valid_targets_mean": 7105.4,
|
|
"valid_targets_min": 1845
|
|
},
|
|
{
|
|
"epoch": 1.0398450470392917,
|
|
"grad_norm": 0.19689281067552836,
|
|
"learning_rate": 3.9715734823575694e-05,
|
|
"loss": 0.2844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1314559280872345,
|
|
"step": 940,
|
|
"valid_targets_mean": 6475.6,
|
|
"valid_targets_min": 2297
|
|
},
|
|
{
|
|
"epoch": 1.0453790813503043,
|
|
"grad_norm": 0.17574426136805996,
|
|
"learning_rate": 3.970639219720554e-05,
|
|
"loss": 0.2814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14803387224674225,
|
|
"step": 945,
|
|
"valid_targets_mean": 6744.1,
|
|
"valid_targets_min": 2793
|
|
},
|
|
{
|
|
"epoch": 1.0509131156613172,
|
|
"grad_norm": 0.1785812212021472,
|
|
"learning_rate": 3.969689965098375e-05,
|
|
"loss": 0.277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12867371737957,
|
|
"step": 950,
|
|
"valid_targets_mean": 6725.4,
|
|
"valid_targets_min": 2520
|
|
},
|
|
{
|
|
"epoch": 1.0564471499723298,
|
|
"grad_norm": 0.2020877145542382,
|
|
"learning_rate": 3.968725725712652e-05,
|
|
"loss": 0.2812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1310427039861679,
|
|
"step": 955,
|
|
"valid_targets_mean": 6286.6,
|
|
"valid_targets_min": 2643
|
|
},
|
|
{
|
|
"epoch": 1.0619811842833426,
|
|
"grad_norm": 0.17480637632142337,
|
|
"learning_rate": 3.9677465088990086e-05,
|
|
"loss": 0.2827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14058233797550201,
|
|
"step": 960,
|
|
"valid_targets_mean": 6647.1,
|
|
"valid_targets_min": 2218
|
|
},
|
|
{
|
|
"epoch": 1.0675152185943553,
|
|
"grad_norm": 0.18176363686246289,
|
|
"learning_rate": 3.966752322107008e-05,
|
|
"loss": 0.2821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14324791729450226,
|
|
"step": 965,
|
|
"valid_targets_mean": 6991.6,
|
|
"valid_targets_min": 3271
|
|
},
|
|
{
|
|
"epoch": 1.073049252905368,
|
|
"grad_norm": 0.18316339501001513,
|
|
"learning_rate": 3.965743172900101e-05,
|
|
"loss": 0.2791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1419498324394226,
|
|
"step": 970,
|
|
"valid_targets_mean": 7175.3,
|
|
"valid_targets_min": 1805
|
|
},
|
|
{
|
|
"epoch": 1.0785832872163807,
|
|
"grad_norm": 0.17368634878804703,
|
|
"learning_rate": 3.964719068955569e-05,
|
|
"loss": 0.2774,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12790296971797943,
|
|
"step": 975,
|
|
"valid_targets_mean": 6327.0,
|
|
"valid_targets_min": 1224
|
|
},
|
|
{
|
|
"epoch": 1.0841173215273934,
|
|
"grad_norm": 0.18090420263161394,
|
|
"learning_rate": 3.963680018064462e-05,
|
|
"loss": 0.2763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16885483264923096,
|
|
"step": 980,
|
|
"valid_targets_mean": 7451.6,
|
|
"valid_targets_min": 1864
|
|
},
|
|
{
|
|
"epoch": 1.0896513558384062,
|
|
"grad_norm": 0.1697452461712111,
|
|
"learning_rate": 3.962626028131545e-05,
|
|
"loss": 0.2809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15592683851718903,
|
|
"step": 985,
|
|
"valid_targets_mean": 7721.9,
|
|
"valid_targets_min": 3662
|
|
},
|
|
{
|
|
"epoch": 1.0951853901494188,
|
|
"grad_norm": 0.17748411877088877,
|
|
"learning_rate": 3.9615571071752305e-05,
|
|
"loss": 0.2793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14733010530471802,
|
|
"step": 990,
|
|
"valid_targets_mean": 7039.7,
|
|
"valid_targets_min": 1698
|
|
},
|
|
{
|
|
"epoch": 1.1007194244604317,
|
|
"grad_norm": 0.17111740120076824,
|
|
"learning_rate": 3.960473263327523e-05,
|
|
"loss": 0.288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15300166606903076,
|
|
"step": 995,
|
|
"valid_targets_mean": 7115.9,
|
|
"valid_targets_min": 1492
|
|
},
|
|
{
|
|
"epoch": 1.1062534587714443,
|
|
"grad_norm": 0.16650248420319466,
|
|
"learning_rate": 3.9593745048339564e-05,
|
|
"loss": 0.2724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12374699115753174,
|
|
"step": 1000,
|
|
"valid_targets_mean": 6713.1,
|
|
"valid_targets_min": 2543
|
|
},
|
|
{
|
|
"epoch": 1.1117874930824572,
|
|
"grad_norm": 0.1678513032654032,
|
|
"learning_rate": 3.958260840053528e-05,
|
|
"loss": 0.268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1383708268404007,
|
|
"step": 1005,
|
|
"valid_targets_mean": 6488.6,
|
|
"valid_targets_min": 2556
|
|
},
|
|
{
|
|
"epoch": 1.1173215273934698,
|
|
"grad_norm": 0.18025520979543855,
|
|
"learning_rate": 3.957132277458642e-05,
|
|
"loss": 0.2777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14708085358142853,
|
|
"step": 1010,
|
|
"valid_targets_mean": 7216.3,
|
|
"valid_targets_min": 1992
|
|
},
|
|
{
|
|
"epoch": 1.1228555617044826,
|
|
"grad_norm": 0.19456611664282664,
|
|
"learning_rate": 3.9559888256350335e-05,
|
|
"loss": 0.2791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1300152689218521,
|
|
"step": 1015,
|
|
"valid_targets_mean": 6421.9,
|
|
"valid_targets_min": 2444
|
|
},
|
|
{
|
|
"epoch": 1.1283895960154953,
|
|
"grad_norm": 0.16137239816538576,
|
|
"learning_rate": 3.9548304932817164e-05,
|
|
"loss": 0.2715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1205127164721489,
|
|
"step": 1020,
|
|
"valid_targets_mean": 6405.9,
|
|
"valid_targets_min": 2545
|
|
},
|
|
{
|
|
"epoch": 1.1339236303265081,
|
|
"grad_norm": 0.17348272652206792,
|
|
"learning_rate": 3.9536572892109066e-05,
|
|
"loss": 0.2678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12615039944648743,
|
|
"step": 1025,
|
|
"valid_targets_mean": 6837.7,
|
|
"valid_targets_min": 2151
|
|
},
|
|
{
|
|
"epoch": 1.1394576646375207,
|
|
"grad_norm": 0.18148333043463813,
|
|
"learning_rate": 3.9524692223479624e-05,
|
|
"loss": 0.2677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15118034183979034,
|
|
"step": 1030,
|
|
"valid_targets_mean": 7361.1,
|
|
"valid_targets_min": 2378
|
|
},
|
|
{
|
|
"epoch": 1.1449916989485334,
|
|
"grad_norm": 0.18131447382404747,
|
|
"learning_rate": 3.951266301731311e-05,
|
|
"loss": 0.2737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1340409368276596,
|
|
"step": 1035,
|
|
"valid_targets_mean": 6319.2,
|
|
"valid_targets_min": 2356
|
|
},
|
|
{
|
|
"epoch": 1.1505257332595462,
|
|
"grad_norm": 0.163951472238804,
|
|
"learning_rate": 3.950048536512383e-05,
|
|
"loss": 0.2654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11924827098846436,
|
|
"step": 1040,
|
|
"valid_targets_mean": 6630.8,
|
|
"valid_targets_min": 1972
|
|
},
|
|
{
|
|
"epoch": 1.156059767570559,
|
|
"grad_norm": 0.17678653243995013,
|
|
"learning_rate": 3.948815935955542e-05,
|
|
"loss": 0.2594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.141445592045784,
|
|
"step": 1045,
|
|
"valid_targets_mean": 6972.9,
|
|
"valid_targets_min": 2135
|
|
},
|
|
{
|
|
"epoch": 1.1615938018815717,
|
|
"grad_norm": 0.16588165744271002,
|
|
"learning_rate": 3.947568509438013e-05,
|
|
"loss": 0.2737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13333432376384735,
|
|
"step": 1050,
|
|
"valid_targets_mean": 6707.8,
|
|
"valid_targets_min": 1871
|
|
},
|
|
{
|
|
"epoch": 1.1671278361925843,
|
|
"grad_norm": 0.17341045683221493,
|
|
"learning_rate": 3.946306266449814e-05,
|
|
"loss": 0.2648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12358522415161133,
|
|
"step": 1055,
|
|
"valid_targets_mean": 6739.1,
|
|
"valid_targets_min": 2150
|
|
},
|
|
{
|
|
"epoch": 1.1726618705035972,
|
|
"grad_norm": 0.17207704447771158,
|
|
"learning_rate": 3.945029216593682e-05,
|
|
"loss": 0.2671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13025973737239838,
|
|
"step": 1060,
|
|
"valid_targets_mean": 6861.2,
|
|
"valid_targets_min": 2514
|
|
},
|
|
{
|
|
"epoch": 1.1781959048146098,
|
|
"grad_norm": 0.16244557894426362,
|
|
"learning_rate": 3.943737369584997e-05,
|
|
"loss": 0.2725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1395055502653122,
|
|
"step": 1065,
|
|
"valid_targets_mean": 7224.6,
|
|
"valid_targets_min": 2698
|
|
},
|
|
{
|
|
"epoch": 1.1837299391256226,
|
|
"grad_norm": 0.16903966101182416,
|
|
"learning_rate": 3.942430735251714e-05,
|
|
"loss": 0.2661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13284917175769806,
|
|
"step": 1070,
|
|
"valid_targets_mean": 6807.6,
|
|
"valid_targets_min": 2279
|
|
},
|
|
{
|
|
"epoch": 1.1892639734366353,
|
|
"grad_norm": 0.5838918039734643,
|
|
"learning_rate": 3.941109323534284e-05,
|
|
"loss": 0.2748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13204120099544525,
|
|
"step": 1075,
|
|
"valid_targets_mean": 6986.2,
|
|
"valid_targets_min": 2280
|
|
},
|
|
{
|
|
"epoch": 1.194798007747648,
|
|
"grad_norm": 0.1619581813660958,
|
|
"learning_rate": 3.9397731444855784e-05,
|
|
"loss": 0.2691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1205381453037262,
|
|
"step": 1080,
|
|
"valid_targets_mean": 6668.9,
|
|
"valid_targets_min": 3573
|
|
},
|
|
{
|
|
"epoch": 1.2003320420586607,
|
|
"grad_norm": 0.17972197349829508,
|
|
"learning_rate": 3.9384222082708154e-05,
|
|
"loss": 0.2646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14235249161720276,
|
|
"step": 1085,
|
|
"valid_targets_mean": 7348.7,
|
|
"valid_targets_min": 2266
|
|
},
|
|
{
|
|
"epoch": 1.2058660763696736,
|
|
"grad_norm": 0.17975739708819172,
|
|
"learning_rate": 3.93705652516748e-05,
|
|
"loss": 0.2709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13401982188224792,
|
|
"step": 1090,
|
|
"valid_targets_mean": 6718.5,
|
|
"valid_targets_min": 1852
|
|
},
|
|
{
|
|
"epoch": 1.2114001106806862,
|
|
"grad_norm": 0.16450821294263088,
|
|
"learning_rate": 3.935676105565249e-05,
|
|
"loss": 0.2661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11176055669784546,
|
|
"step": 1095,
|
|
"valid_targets_mean": 6317.0,
|
|
"valid_targets_min": 1890
|
|
},
|
|
{
|
|
"epoch": 1.2169341449916988,
|
|
"grad_norm": 0.18586118472743715,
|
|
"learning_rate": 3.934280959965905e-05,
|
|
"loss": 0.2717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12307751178741455,
|
|
"step": 1100,
|
|
"valid_targets_mean": 6423.0,
|
|
"valid_targets_min": 1961
|
|
},
|
|
{
|
|
"epoch": 1.2224681793027117,
|
|
"grad_norm": 0.16354727134618027,
|
|
"learning_rate": 3.932871098983266e-05,
|
|
"loss": 0.2596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13544310629367828,
|
|
"step": 1105,
|
|
"valid_targets_mean": 6961.5,
|
|
"valid_targets_min": 2011
|
|
},
|
|
{
|
|
"epoch": 1.2280022136137245,
|
|
"grad_norm": 0.26445155244963164,
|
|
"learning_rate": 3.931446533343097e-05,
|
|
"loss": 0.3569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16654008626937866,
|
|
"step": 1110,
|
|
"valid_targets_mean": 4264.4,
|
|
"valid_targets_min": 1664
|
|
},
|
|
{
|
|
"epoch": 1.2335362479247372,
|
|
"grad_norm": 0.259149059080008,
|
|
"learning_rate": 3.9300072738830326e-05,
|
|
"loss": 0.3616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17894653975963593,
|
|
"step": 1115,
|
|
"valid_targets_mean": 4403.8,
|
|
"valid_targets_min": 1896
|
|
},
|
|
{
|
|
"epoch": 1.2390702822357498,
|
|
"grad_norm": 0.2532394954077379,
|
|
"learning_rate": 3.928553331552493e-05,
|
|
"loss": 0.3613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17868827283382416,
|
|
"step": 1120,
|
|
"valid_targets_mean": 4392.9,
|
|
"valid_targets_min": 1282
|
|
},
|
|
{
|
|
"epoch": 1.2446043165467626,
|
|
"grad_norm": 0.2355989102476764,
|
|
"learning_rate": 3.9270847174126e-05,
|
|
"loss": 0.3472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16397854685783386,
|
|
"step": 1125,
|
|
"valid_targets_mean": 4179.3,
|
|
"valid_targets_min": 1998
|
|
},
|
|
{
|
|
"epoch": 1.2501383508577752,
|
|
"grad_norm": 0.23545544996639933,
|
|
"learning_rate": 3.925601442636095e-05,
|
|
"loss": 0.3505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17215688526630402,
|
|
"step": 1130,
|
|
"valid_targets_mean": 4410.1,
|
|
"valid_targets_min": 1629
|
|
},
|
|
{
|
|
"epoch": 1.255672385168788,
|
|
"grad_norm": 0.23534951612683017,
|
|
"learning_rate": 3.924103518507251e-05,
|
|
"loss": 0.3587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1638919711112976,
|
|
"step": 1135,
|
|
"valid_targets_mean": 4081.9,
|
|
"valid_targets_min": 1820
|
|
},
|
|
{
|
|
"epoch": 1.2612064194798007,
|
|
"grad_norm": 0.23489838611021904,
|
|
"learning_rate": 3.922590956421792e-05,
|
|
"loss": 0.3711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18799881637096405,
|
|
"step": 1140,
|
|
"valid_targets_mean": 4430.3,
|
|
"valid_targets_min": 1591
|
|
},
|
|
{
|
|
"epoch": 1.2667404537908136,
|
|
"grad_norm": 0.231684281912992,
|
|
"learning_rate": 3.921063767886799e-05,
|
|
"loss": 0.3596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15110845863819122,
|
|
"step": 1145,
|
|
"valid_targets_mean": 3639.0,
|
|
"valid_targets_min": 1609
|
|
},
|
|
{
|
|
"epoch": 1.2722744881018262,
|
|
"grad_norm": 0.22883238121740831,
|
|
"learning_rate": 3.9195219645206286e-05,
|
|
"loss": 0.3636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17991988360881805,
|
|
"step": 1150,
|
|
"valid_targets_mean": 4169.2,
|
|
"valid_targets_min": 2003
|
|
},
|
|
{
|
|
"epoch": 1.277808522412839,
|
|
"grad_norm": 0.23839713321871983,
|
|
"learning_rate": 3.917965558052822e-05,
|
|
"loss": 0.3403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18226949870586395,
|
|
"step": 1155,
|
|
"valid_targets_mean": 4387.5,
|
|
"valid_targets_min": 1802
|
|
},
|
|
{
|
|
"epoch": 1.2833425567238517,
|
|
"grad_norm": 0.22438517731822086,
|
|
"learning_rate": 3.9163945603240144e-05,
|
|
"loss": 0.3532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18129350244998932,
|
|
"step": 1160,
|
|
"valid_targets_mean": 4586.4,
|
|
"valid_targets_min": 1431
|
|
},
|
|
{
|
|
"epoch": 1.2888765910348643,
|
|
"grad_norm": 0.2470833669018919,
|
|
"learning_rate": 3.914808983285848e-05,
|
|
"loss": 0.3448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15314365923404694,
|
|
"step": 1165,
|
|
"valid_targets_mean": 4150.2,
|
|
"valid_targets_min": 1451
|
|
},
|
|
{
|
|
"epoch": 1.2944106253458771,
|
|
"grad_norm": 0.22268292074124693,
|
|
"learning_rate": 3.913208839000882e-05,
|
|
"loss": 0.3358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1547563523054123,
|
|
"step": 1170,
|
|
"valid_targets_mean": 4058.3,
|
|
"valid_targets_min": 1500
|
|
},
|
|
{
|
|
"epoch": 1.29994465965689,
|
|
"grad_norm": 0.23898654391290106,
|
|
"learning_rate": 3.911594139642493e-05,
|
|
"loss": 0.3392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17526842653751373,
|
|
"step": 1175,
|
|
"valid_targets_mean": 4309.4,
|
|
"valid_targets_min": 1370
|
|
},
|
|
{
|
|
"epoch": 1.3054786939679026,
|
|
"grad_norm": 0.21380823907483926,
|
|
"learning_rate": 3.909964897494793e-05,
|
|
"loss": 0.3351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16898113489151,
|
|
"step": 1180,
|
|
"valid_targets_mean": 4254.8,
|
|
"valid_targets_min": 1915
|
|
},
|
|
{
|
|
"epoch": 1.3110127282789152,
|
|
"grad_norm": 0.26882185157487465,
|
|
"learning_rate": 3.9083211249525277e-05,
|
|
"loss": 0.3429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16856753826141357,
|
|
"step": 1185,
|
|
"valid_targets_mean": 3897.5,
|
|
"valid_targets_min": 1699
|
|
},
|
|
{
|
|
"epoch": 1.316546762589928,
|
|
"grad_norm": 0.2309951770435585,
|
|
"learning_rate": 3.906662834520987e-05,
|
|
"loss": 0.345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1852109432220459,
|
|
"step": 1190,
|
|
"valid_targets_mean": 4873.8,
|
|
"valid_targets_min": 1374
|
|
},
|
|
{
|
|
"epoch": 1.3220807969009407,
|
|
"grad_norm": 0.21792217488268217,
|
|
"learning_rate": 3.904990038815907e-05,
|
|
"loss": 0.3336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1790103316307068,
|
|
"step": 1195,
|
|
"valid_targets_mean": 4314.8,
|
|
"valid_targets_min": 1526
|
|
},
|
|
{
|
|
"epoch": 1.3276148312119536,
|
|
"grad_norm": 0.23123270794811962,
|
|
"learning_rate": 3.903302750563376e-05,
|
|
"loss": 0.333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14537380635738373,
|
|
"step": 1200,
|
|
"valid_targets_mean": 3668.5,
|
|
"valid_targets_min": 1735
|
|
},
|
|
{
|
|
"epoch": 1.3331488655229662,
|
|
"grad_norm": 0.2693499746386558,
|
|
"learning_rate": 3.901600982599737e-05,
|
|
"loss": 0.3463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15684859454631805,
|
|
"step": 1205,
|
|
"valid_targets_mean": 4198.7,
|
|
"valid_targets_min": 1587
|
|
},
|
|
{
|
|
"epoch": 1.338682899833979,
|
|
"grad_norm": 0.21173376275796202,
|
|
"learning_rate": 3.89988474787149e-05,
|
|
"loss": 0.3347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16322259604930878,
|
|
"step": 1210,
|
|
"valid_targets_mean": 4426.8,
|
|
"valid_targets_min": 1624
|
|
},
|
|
{
|
|
"epoch": 1.3442169341449917,
|
|
"grad_norm": 0.24726409366171126,
|
|
"learning_rate": 3.898154059435192e-05,
|
|
"loss": 0.3403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15184156596660614,
|
|
"step": 1215,
|
|
"valid_targets_mean": 3844.5,
|
|
"valid_targets_min": 1660
|
|
},
|
|
{
|
|
"epoch": 1.3497509684560045,
|
|
"grad_norm": 0.21934453532998735,
|
|
"learning_rate": 3.896408930457361e-05,
|
|
"loss": 0.3386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16550517082214355,
|
|
"step": 1220,
|
|
"valid_targets_mean": 4607.4,
|
|
"valid_targets_min": 1446
|
|
},
|
|
{
|
|
"epoch": 1.3552850027670171,
|
|
"grad_norm": 0.22442116015233177,
|
|
"learning_rate": 3.8946493742143744e-05,
|
|
"loss": 0.3346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17187415063381195,
|
|
"step": 1225,
|
|
"valid_targets_mean": 4620.0,
|
|
"valid_targets_min": 1843
|
|
},
|
|
{
|
|
"epoch": 1.3608190370780298,
|
|
"grad_norm": 0.20627885478063304,
|
|
"learning_rate": 3.8928754040923645e-05,
|
|
"loss": 0.329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1436220407485962,
|
|
"step": 1230,
|
|
"valid_targets_mean": 4062.0,
|
|
"valid_targets_min": 1629
|
|
},
|
|
{
|
|
"epoch": 1.3663530713890426,
|
|
"grad_norm": 0.2185699711014017,
|
|
"learning_rate": 3.891087033587122e-05,
|
|
"loss": 0.3308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1684235781431198,
|
|
"step": 1235,
|
|
"valid_targets_mean": 4552.9,
|
|
"valid_targets_min": 1687
|
|
},
|
|
{
|
|
"epoch": 1.3718871057000555,
|
|
"grad_norm": 0.21370744728285102,
|
|
"learning_rate": 3.889284276303992e-05,
|
|
"loss": 0.3326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14738354086875916,
|
|
"step": 1240,
|
|
"valid_targets_mean": 4288.0,
|
|
"valid_targets_min": 1735
|
|
},
|
|
{
|
|
"epoch": 1.377421140011068,
|
|
"grad_norm": 0.22076208540817957,
|
|
"learning_rate": 3.887467145957768e-05,
|
|
"loss": 0.3288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16767971217632294,
|
|
"step": 1245,
|
|
"valid_targets_mean": 4494.9,
|
|
"valid_targets_min": 1742
|
|
},
|
|
{
|
|
"epoch": 1.3829551743220807,
|
|
"grad_norm": 0.2258390472368909,
|
|
"learning_rate": 3.8856356563725874e-05,
|
|
"loss": 0.3273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17086593806743622,
|
|
"step": 1250,
|
|
"valid_targets_mean": 4560.7,
|
|
"valid_targets_min": 1850
|
|
},
|
|
{
|
|
"epoch": 1.3884892086330936,
|
|
"grad_norm": 0.23613538896669242,
|
|
"learning_rate": 3.883789821481832e-05,
|
|
"loss": 0.3303,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17400549352169037,
|
|
"step": 1255,
|
|
"valid_targets_mean": 4525.4,
|
|
"valid_targets_min": 1488
|
|
},
|
|
{
|
|
"epoch": 1.3940232429441062,
|
|
"grad_norm": 0.23064138194881575,
|
|
"learning_rate": 3.881929655328016e-05,
|
|
"loss": 0.3311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1510716825723648,
|
|
"step": 1260,
|
|
"valid_targets_mean": 4337.6,
|
|
"valid_targets_min": 1551
|
|
},
|
|
{
|
|
"epoch": 1.399557277255119,
|
|
"grad_norm": 0.2400505261171503,
|
|
"learning_rate": 3.8800551720626794e-05,
|
|
"loss": 0.331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14990423619747162,
|
|
"step": 1265,
|
|
"valid_targets_mean": 4064.8,
|
|
"valid_targets_min": 1260
|
|
},
|
|
{
|
|
"epoch": 1.4050913115661317,
|
|
"grad_norm": 0.24599218182439966,
|
|
"learning_rate": 3.878166385946286e-05,
|
|
"loss": 0.3232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16412757337093353,
|
|
"step": 1270,
|
|
"valid_targets_mean": 4271.5,
|
|
"valid_targets_min": 1965
|
|
},
|
|
{
|
|
"epoch": 1.4106253458771445,
|
|
"grad_norm": 0.21566388081545748,
|
|
"learning_rate": 3.876263311348108e-05,
|
|
"loss": 0.3238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1625698059797287,
|
|
"step": 1275,
|
|
"valid_targets_mean": 4530.3,
|
|
"valid_targets_min": 1812
|
|
},
|
|
{
|
|
"epoch": 1.4161593801881571,
|
|
"grad_norm": 0.20195854254771664,
|
|
"learning_rate": 3.874345962746121e-05,
|
|
"loss": 0.3225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16930294036865234,
|
|
"step": 1280,
|
|
"valid_targets_mean": 5182.0,
|
|
"valid_targets_min": 2171
|
|
},
|
|
{
|
|
"epoch": 1.42169341449917,
|
|
"grad_norm": 0.21607264069691856,
|
|
"learning_rate": 3.8724143547268924e-05,
|
|
"loss": 0.3215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.145011767745018,
|
|
"step": 1285,
|
|
"valid_targets_mean": 4341.3,
|
|
"valid_targets_min": 1861
|
|
},
|
|
{
|
|
"epoch": 1.4272274488101826,
|
|
"grad_norm": 0.2237693296652014,
|
|
"learning_rate": 3.870468501985471e-05,
|
|
"loss": 0.3078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15807199478149414,
|
|
"step": 1290,
|
|
"valid_targets_mean": 4484.4,
|
|
"valid_targets_min": 1651
|
|
},
|
|
{
|
|
"epoch": 1.4327614831211952,
|
|
"grad_norm": 0.22529943616165538,
|
|
"learning_rate": 3.8685084193252745e-05,
|
|
"loss": 0.3225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15684156119823456,
|
|
"step": 1295,
|
|
"valid_targets_mean": 4192.0,
|
|
"valid_targets_min": 1684
|
|
},
|
|
{
|
|
"epoch": 1.438295517432208,
|
|
"grad_norm": 0.24576986248520252,
|
|
"learning_rate": 3.866534121657977e-05,
|
|
"loss": 0.3152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1745026856660843,
|
|
"step": 1300,
|
|
"valid_targets_mean": 4869.7,
|
|
"valid_targets_min": 1745
|
|
},
|
|
{
|
|
"epoch": 1.443829551743221,
|
|
"grad_norm": 0.21766701909656544,
|
|
"learning_rate": 3.8645456240033946e-05,
|
|
"loss": 0.3122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14245377480983734,
|
|
"step": 1305,
|
|
"valid_targets_mean": 3970.9,
|
|
"valid_targets_min": 1643
|
|
},
|
|
{
|
|
"epoch": 1.4493635860542335,
|
|
"grad_norm": 0.21818942722818285,
|
|
"learning_rate": 3.8625429414893763e-05,
|
|
"loss": 0.3247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14716601371765137,
|
|
"step": 1310,
|
|
"valid_targets_mean": 4239.2,
|
|
"valid_targets_min": 1695
|
|
},
|
|
{
|
|
"epoch": 1.4548976203652462,
|
|
"grad_norm": 0.21251289322826,
|
|
"learning_rate": 3.860526089351679e-05,
|
|
"loss": 0.3214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16402918100357056,
|
|
"step": 1315,
|
|
"valid_targets_mean": 4571.5,
|
|
"valid_targets_min": 1696
|
|
},
|
|
{
|
|
"epoch": 1.460431654676259,
|
|
"grad_norm": 0.2336086082990606,
|
|
"learning_rate": 3.858495082933864e-05,
|
|
"loss": 0.3239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.163013756275177,
|
|
"step": 1320,
|
|
"valid_targets_mean": 4433.2,
|
|
"valid_targets_min": 2003
|
|
},
|
|
{
|
|
"epoch": 1.4659656889872716,
|
|
"grad_norm": 0.2396576832714152,
|
|
"learning_rate": 3.8564499376871694e-05,
|
|
"loss": 0.3212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15132392942905426,
|
|
"step": 1325,
|
|
"valid_targets_mean": 4008.7,
|
|
"valid_targets_min": 1371
|
|
},
|
|
{
|
|
"epoch": 1.4714997232982845,
|
|
"grad_norm": 0.18098797137211153,
|
|
"learning_rate": 3.854390669170397e-05,
|
|
"loss": 0.2381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10818668454885483,
|
|
"step": 1330,
|
|
"valid_targets_mean": 5755.9,
|
|
"valid_targets_min": 1607
|
|
},
|
|
{
|
|
"epoch": 1.4770337576092971,
|
|
"grad_norm": 0.1787066797313889,
|
|
"learning_rate": 3.852317293049796e-05,
|
|
"loss": 0.2039,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0983661413192749,
|
|
"step": 1335,
|
|
"valid_targets_mean": 5835.6,
|
|
"valid_targets_min": 2306
|
|
},
|
|
{
|
|
"epoch": 1.48256779192031,
|
|
"grad_norm": 0.17085975919111254,
|
|
"learning_rate": 3.8502298250989415e-05,
|
|
"loss": 0.2012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09475555270910263,
|
|
"step": 1340,
|
|
"valid_targets_mean": 5367.4,
|
|
"valid_targets_min": 2272
|
|
},
|
|
{
|
|
"epoch": 1.4881018262313226,
|
|
"grad_norm": 0.1681837566049417,
|
|
"learning_rate": 3.8481282811986117e-05,
|
|
"loss": 0.1916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08345619589090347,
|
|
"step": 1345,
|
|
"valid_targets_mean": 5459.6,
|
|
"valid_targets_min": 1924
|
|
},
|
|
{
|
|
"epoch": 1.4936358605423354,
|
|
"grad_norm": 0.15219321287099127,
|
|
"learning_rate": 3.8460126773366735e-05,
|
|
"loss": 0.2065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09989086538553238,
|
|
"step": 1350,
|
|
"valid_targets_mean": 5662.9,
|
|
"valid_targets_min": 2081
|
|
},
|
|
{
|
|
"epoch": 1.499169894853348,
|
|
"grad_norm": 0.15555993992068823,
|
|
"learning_rate": 3.843883029607956e-05,
|
|
"loss": 0.2003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09627845138311386,
|
|
"step": 1355,
|
|
"valid_targets_mean": 5408.0,
|
|
"valid_targets_min": 1663
|
|
},
|
|
{
|
|
"epoch": 1.5047039291643607,
|
|
"grad_norm": 0.1575554029684579,
|
|
"learning_rate": 3.841739354214129e-05,
|
|
"loss": 0.19,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10124339908361435,
|
|
"step": 1360,
|
|
"valid_targets_mean": 6090.0,
|
|
"valid_targets_min": 3233
|
|
},
|
|
{
|
|
"epoch": 1.5102379634753735,
|
|
"grad_norm": 0.17073776730783605,
|
|
"learning_rate": 3.839581667463582e-05,
|
|
"loss": 0.1938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08516725152730942,
|
|
"step": 1365,
|
|
"valid_targets_mean": 5283.3,
|
|
"valid_targets_min": 2542
|
|
},
|
|
{
|
|
"epoch": 1.5157719977863864,
|
|
"grad_norm": 0.1610335812807677,
|
|
"learning_rate": 3.837409985771297e-05,
|
|
"loss": 0.1925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10468452423810959,
|
|
"step": 1370,
|
|
"valid_targets_mean": 5841.9,
|
|
"valid_targets_min": 3074
|
|
},
|
|
{
|
|
"epoch": 1.521306032097399,
|
|
"grad_norm": 0.15022862826788239,
|
|
"learning_rate": 3.835224325658724e-05,
|
|
"loss": 0.1872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09101595729589462,
|
|
"step": 1375,
|
|
"valid_targets_mean": 5844.2,
|
|
"valid_targets_min": 1791
|
|
},
|
|
{
|
|
"epoch": 1.5268400664084116,
|
|
"grad_norm": 0.15473274276046864,
|
|
"learning_rate": 3.8330247037536587e-05,
|
|
"loss": 0.1953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09456873685121536,
|
|
"step": 1380,
|
|
"valid_targets_mean": 5735.6,
|
|
"valid_targets_min": 2746
|
|
},
|
|
{
|
|
"epoch": 1.5323741007194245,
|
|
"grad_norm": 0.16114760241509796,
|
|
"learning_rate": 3.830811136790112e-05,
|
|
"loss": 0.1898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09572803229093552,
|
|
"step": 1385,
|
|
"valid_targets_mean": 5778.7,
|
|
"valid_targets_min": 2829
|
|
},
|
|
{
|
|
"epoch": 1.5379081350304373,
|
|
"grad_norm": 0.16418451727918168,
|
|
"learning_rate": 3.8285836416081846e-05,
|
|
"loss": 0.1861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08751516789197922,
|
|
"step": 1390,
|
|
"valid_targets_mean": 5638.1,
|
|
"valid_targets_min": 1955
|
|
},
|
|
{
|
|
"epoch": 1.54344216934145,
|
|
"grad_norm": 0.15617353071730314,
|
|
"learning_rate": 3.826342235153939e-05,
|
|
"loss": 0.1859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0809503123164177,
|
|
"step": 1395,
|
|
"valid_targets_mean": 5649.6,
|
|
"valid_targets_min": 1657
|
|
},
|
|
{
|
|
"epoch": 1.5489762036524626,
|
|
"grad_norm": 0.1687895202850322,
|
|
"learning_rate": 3.82408693447927e-05,
|
|
"loss": 0.1849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09529554843902588,
|
|
"step": 1400,
|
|
"valid_targets_mean": 5870.3,
|
|
"valid_targets_min": 3122
|
|
},
|
|
{
|
|
"epoch": 1.5545102379634754,
|
|
"grad_norm": 0.15097236666408503,
|
|
"learning_rate": 3.821817756741774e-05,
|
|
"loss": 0.1852,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08635803312063217,
|
|
"step": 1405,
|
|
"valid_targets_mean": 5898.3,
|
|
"valid_targets_min": 2722
|
|
},
|
|
{
|
|
"epoch": 1.560044272274488,
|
|
"grad_norm": 0.17377232093918724,
|
|
"learning_rate": 3.819534719204621e-05,
|
|
"loss": 0.1959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0999286100268364,
|
|
"step": 1410,
|
|
"valid_targets_mean": 5430.9,
|
|
"valid_targets_min": 1832
|
|
},
|
|
{
|
|
"epoch": 1.565578306585501,
|
|
"grad_norm": 0.15515421991156828,
|
|
"learning_rate": 3.8172378392364215e-05,
|
|
"loss": 0.1846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09400574117898941,
|
|
"step": 1415,
|
|
"valid_targets_mean": 5711.7,
|
|
"valid_targets_min": 1876
|
|
},
|
|
{
|
|
"epoch": 1.5711123408965135,
|
|
"grad_norm": 0.18249126639634058,
|
|
"learning_rate": 3.814927134311095e-05,
|
|
"loss": 0.1823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08851730823516846,
|
|
"step": 1420,
|
|
"valid_targets_mean": 5817.8,
|
|
"valid_targets_min": 3349
|
|
},
|
|
{
|
|
"epoch": 1.5766463752075262,
|
|
"grad_norm": 0.17463154504582576,
|
|
"learning_rate": 3.812602622007736e-05,
|
|
"loss": 0.1873,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10670870542526245,
|
|
"step": 1425,
|
|
"valid_targets_mean": 6213.9,
|
|
"valid_targets_min": 2486
|
|
},
|
|
{
|
|
"epoch": 1.582180409518539,
|
|
"grad_norm": 0.15520876012616955,
|
|
"learning_rate": 3.810264320010482e-05,
|
|
"loss": 0.1907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10215985029935837,
|
|
"step": 1430,
|
|
"valid_targets_mean": 5960.8,
|
|
"valid_targets_min": 1760
|
|
},
|
|
{
|
|
"epoch": 1.5877144438295518,
|
|
"grad_norm": 0.15302405983671816,
|
|
"learning_rate": 3.807912246108377e-05,
|
|
"loss": 0.1867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08349398523569107,
|
|
"step": 1435,
|
|
"valid_targets_mean": 5431.6,
|
|
"valid_targets_min": 1939
|
|
},
|
|
{
|
|
"epoch": 1.5932484781405645,
|
|
"grad_norm": 0.1638805599904952,
|
|
"learning_rate": 3.805546418195237e-05,
|
|
"loss": 0.1879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09607791155576706,
|
|
"step": 1440,
|
|
"valid_targets_mean": 5619.5,
|
|
"valid_targets_min": 1402
|
|
},
|
|
{
|
|
"epoch": 1.598782512451577,
|
|
"grad_norm": 0.16184784396834395,
|
|
"learning_rate": 3.8031668542695164e-05,
|
|
"loss": 0.1821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09894770383834839,
|
|
"step": 1445,
|
|
"valid_targets_mean": 5685.0,
|
|
"valid_targets_min": 1115
|
|
},
|
|
{
|
|
"epoch": 1.60431654676259,
|
|
"grad_norm": 0.14470877256991516,
|
|
"learning_rate": 3.800773572434166e-05,
|
|
"loss": 0.1828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08206525444984436,
|
|
"step": 1450,
|
|
"valid_targets_mean": 5896.3,
|
|
"valid_targets_min": 2249
|
|
},
|
|
{
|
|
"epoch": 1.6098505810736028,
|
|
"grad_norm": 0.1616847234283941,
|
|
"learning_rate": 3.7983665908965006e-05,
|
|
"loss": 0.1882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08751017600297928,
|
|
"step": 1455,
|
|
"valid_targets_mean": 5286.9,
|
|
"valid_targets_min": 2419
|
|
},
|
|
{
|
|
"epoch": 1.6153846153846154,
|
|
"grad_norm": 0.16780432753063682,
|
|
"learning_rate": 3.795945927968055e-05,
|
|
"loss": 0.1891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09756562858819962,
|
|
"step": 1460,
|
|
"valid_targets_mean": 5907.0,
|
|
"valid_targets_min": 2764
|
|
},
|
|
{
|
|
"epoch": 1.620918649695628,
|
|
"grad_norm": 0.15430952235574313,
|
|
"learning_rate": 3.7935116020644496e-05,
|
|
"loss": 0.1736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0840432420372963,
|
|
"step": 1465,
|
|
"valid_targets_mean": 5677.7,
|
|
"valid_targets_min": 2561
|
|
},
|
|
{
|
|
"epoch": 1.626452684006641,
|
|
"grad_norm": 0.17774260453796328,
|
|
"learning_rate": 3.791063631705247e-05,
|
|
"loss": 0.1789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08377804607152939,
|
|
"step": 1470,
|
|
"valid_targets_mean": 5690.5,
|
|
"valid_targets_min": 2153
|
|
},
|
|
{
|
|
"epoch": 1.6319867183176535,
|
|
"grad_norm": 0.15507806329693358,
|
|
"learning_rate": 3.7886020355138144e-05,
|
|
"loss": 0.1822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09811588376760483,
|
|
"step": 1475,
|
|
"valid_targets_mean": 5799.1,
|
|
"valid_targets_min": 2167
|
|
},
|
|
{
|
|
"epoch": 1.6375207526286664,
|
|
"grad_norm": 0.16053247401611923,
|
|
"learning_rate": 3.7861268322171775e-05,
|
|
"loss": 0.1791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1035495176911354,
|
|
"step": 1480,
|
|
"valid_targets_mean": 5970.1,
|
|
"valid_targets_min": 3079
|
|
},
|
|
{
|
|
"epoch": 1.643054786939679,
|
|
"grad_norm": 0.14849765401498888,
|
|
"learning_rate": 3.7836380406458824e-05,
|
|
"loss": 0.1756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08759353309869766,
|
|
"step": 1485,
|
|
"valid_targets_mean": 5799.0,
|
|
"valid_targets_min": 2123
|
|
},
|
|
{
|
|
"epoch": 1.6485888212506916,
|
|
"grad_norm": 0.1605302752895466,
|
|
"learning_rate": 3.781135679733849e-05,
|
|
"loss": 0.1816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07913342863321304,
|
|
"step": 1490,
|
|
"valid_targets_mean": 5474.9,
|
|
"valid_targets_min": 1505
|
|
},
|
|
{
|
|
"epoch": 1.6541228555617045,
|
|
"grad_norm": 0.15361664307520298,
|
|
"learning_rate": 3.77861976851823e-05,
|
|
"loss": 0.176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07888994365930557,
|
|
"step": 1495,
|
|
"valid_targets_mean": 5696.5,
|
|
"valid_targets_min": 2150
|
|
},
|
|
{
|
|
"epoch": 1.6596568898727173,
|
|
"grad_norm": 0.16091667110596974,
|
|
"learning_rate": 3.7760903261392614e-05,
|
|
"loss": 0.1841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08942961692810059,
|
|
"step": 1500,
|
|
"valid_targets_mean": 5482.0,
|
|
"valid_targets_min": 1925
|
|
},
|
|
{
|
|
"epoch": 1.66519092418373,
|
|
"grad_norm": 0.15842859658005418,
|
|
"learning_rate": 3.773547371840124e-05,
|
|
"loss": 0.1779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08590523153543472,
|
|
"step": 1505,
|
|
"valid_targets_mean": 5700.8,
|
|
"valid_targets_min": 2524
|
|
},
|
|
{
|
|
"epoch": 1.6707249584947426,
|
|
"grad_norm": 0.15930986743197398,
|
|
"learning_rate": 3.77099092496679e-05,
|
|
"loss": 0.1772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09003861993551254,
|
|
"step": 1510,
|
|
"valid_targets_mean": 5443.2,
|
|
"valid_targets_min": 1992
|
|
},
|
|
{
|
|
"epoch": 1.6762589928057554,
|
|
"grad_norm": 0.15717236223857892,
|
|
"learning_rate": 3.768421004967879e-05,
|
|
"loss": 0.1744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08390811085700989,
|
|
"step": 1515,
|
|
"valid_targets_mean": 5506.9,
|
|
"valid_targets_min": 2062
|
|
},
|
|
{
|
|
"epoch": 1.6817930271167683,
|
|
"grad_norm": 0.16274698844769872,
|
|
"learning_rate": 3.7658376313945104e-05,
|
|
"loss": 0.181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08415307849645615,
|
|
"step": 1520,
|
|
"valid_targets_mean": 5554.7,
|
|
"valid_targets_min": 2289
|
|
},
|
|
{
|
|
"epoch": 1.6873270614277809,
|
|
"grad_norm": 0.15826237180855693,
|
|
"learning_rate": 3.763240823900155e-05,
|
|
"loss": 0.1771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08058290928602219,
|
|
"step": 1525,
|
|
"valid_targets_mean": 5283.6,
|
|
"valid_targets_min": 2511
|
|
},
|
|
{
|
|
"epoch": 1.6928610957387935,
|
|
"grad_norm": 0.15034453834066436,
|
|
"learning_rate": 3.7606306022404824e-05,
|
|
"loss": 0.172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08194555342197418,
|
|
"step": 1530,
|
|
"valid_targets_mean": 5367.8,
|
|
"valid_targets_min": 1636
|
|
},
|
|
{
|
|
"epoch": 1.6983951300498064,
|
|
"grad_norm": 0.1583222022360076,
|
|
"learning_rate": 3.7580069862732145e-05,
|
|
"loss": 0.1757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08407051116228104,
|
|
"step": 1535,
|
|
"valid_targets_mean": 5643.5,
|
|
"valid_targets_min": 1822
|
|
},
|
|
{
|
|
"epoch": 1.703929164360819,
|
|
"grad_norm": 0.1714358773650662,
|
|
"learning_rate": 3.7553699959579716e-05,
|
|
"loss": 0.1692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08538398146629333,
|
|
"step": 1540,
|
|
"valid_targets_mean": 5522.8,
|
|
"valid_targets_min": 2271
|
|
},
|
|
{
|
|
"epoch": 1.7094631986718318,
|
|
"grad_norm": 0.17664480726337453,
|
|
"learning_rate": 3.7527196513561224e-05,
|
|
"loss": 0.1747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08488353341817856,
|
|
"step": 1545,
|
|
"valid_targets_mean": 5634.4,
|
|
"valid_targets_min": 3397
|
|
},
|
|
{
|
|
"epoch": 1.7149972329828445,
|
|
"grad_norm": 0.16551892245239816,
|
|
"learning_rate": 3.750055972630631e-05,
|
|
"loss": 0.1761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09638077020645142,
|
|
"step": 1550,
|
|
"valid_targets_mean": 5882.0,
|
|
"valid_targets_min": 1620
|
|
},
|
|
{
|
|
"epoch": 1.720531267293857,
|
|
"grad_norm": 0.17200205226469634,
|
|
"learning_rate": 3.747378980045902e-05,
|
|
"loss": 0.1785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08725154399871826,
|
|
"step": 1555,
|
|
"valid_targets_mean": 5484.1,
|
|
"valid_targets_min": 1983
|
|
},
|
|
{
|
|
"epoch": 1.72606530160487,
|
|
"grad_norm": 0.1542667371346797,
|
|
"learning_rate": 3.744688693967629e-05,
|
|
"loss": 0.1752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09500366449356079,
|
|
"step": 1560,
|
|
"valid_targets_mean": 6050.4,
|
|
"valid_targets_min": 2705
|
|
},
|
|
{
|
|
"epoch": 1.7315993359158828,
|
|
"grad_norm": 0.14814420685720778,
|
|
"learning_rate": 3.741985134862638e-05,
|
|
"loss": 0.1663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08133411407470703,
|
|
"step": 1565,
|
|
"valid_targets_mean": 5951.2,
|
|
"valid_targets_min": 1263
|
|
},
|
|
{
|
|
"epoch": 1.7371333702268954,
|
|
"grad_norm": 0.15781816560926434,
|
|
"learning_rate": 3.7392683232987294e-05,
|
|
"loss": 0.1804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08072637766599655,
|
|
"step": 1570,
|
|
"valid_targets_mean": 5333.5,
|
|
"valid_targets_min": 2761
|
|
},
|
|
{
|
|
"epoch": 1.742667404537908,
|
|
"grad_norm": 0.1635678128038785,
|
|
"learning_rate": 3.736538279944527e-05,
|
|
"loss": 0.1722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10370779037475586,
|
|
"step": 1575,
|
|
"valid_targets_mean": 5898.5,
|
|
"valid_targets_min": 1918
|
|
},
|
|
{
|
|
"epoch": 1.7482014388489209,
|
|
"grad_norm": 0.14261265693671554,
|
|
"learning_rate": 3.7337950255693176e-05,
|
|
"loss": 0.1704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08859887719154358,
|
|
"step": 1580,
|
|
"valid_targets_mean": 5690.7,
|
|
"valid_targets_min": 2491
|
|
},
|
|
{
|
|
"epoch": 1.7537354731599337,
|
|
"grad_norm": 0.152831569549247,
|
|
"learning_rate": 3.731038581042891e-05,
|
|
"loss": 0.1852,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10772746801376343,
|
|
"step": 1585,
|
|
"valid_targets_mean": 5855.6,
|
|
"valid_targets_min": 1834
|
|
},
|
|
{
|
|
"epoch": 1.7592695074709463,
|
|
"grad_norm": 0.2647356527516756,
|
|
"learning_rate": 3.728268967335386e-05,
|
|
"loss": 0.2809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1324308067560196,
|
|
"step": 1590,
|
|
"valid_targets_mean": 4478.1,
|
|
"valid_targets_min": 1152
|
|
},
|
|
{
|
|
"epoch": 1.764803541781959,
|
|
"grad_norm": 0.2424881601505193,
|
|
"learning_rate": 3.7254862055171265e-05,
|
|
"loss": 0.2778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12917517125606537,
|
|
"step": 1595,
|
|
"valid_targets_mean": 4434.4,
|
|
"valid_targets_min": 1363
|
|
},
|
|
{
|
|
"epoch": 1.7703375760929718,
|
|
"grad_norm": 0.26389469655832054,
|
|
"learning_rate": 3.7226903167584624e-05,
|
|
"loss": 0.2776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14717340469360352,
|
|
"step": 1600,
|
|
"valid_targets_mean": 4586.5,
|
|
"valid_targets_min": 1203
|
|
},
|
|
{
|
|
"epoch": 1.7758716104039847,
|
|
"grad_norm": 0.29187032871634677,
|
|
"learning_rate": 3.7198813223296116e-05,
|
|
"loss": 0.2727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13220685720443726,
|
|
"step": 1605,
|
|
"valid_targets_mean": 4198.7,
|
|
"valid_targets_min": 1898
|
|
},
|
|
{
|
|
"epoch": 1.7814056447149973,
|
|
"grad_norm": 0.22776258261432347,
|
|
"learning_rate": 3.7170592436004935e-05,
|
|
"loss": 0.2662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1366225928068161,
|
|
"step": 1610,
|
|
"valid_targets_mean": 4499.3,
|
|
"valid_targets_min": 2282
|
|
},
|
|
{
|
|
"epoch": 1.78693967902601,
|
|
"grad_norm": 0.22817137768793902,
|
|
"learning_rate": 3.714224102040569e-05,
|
|
"loss": 0.261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12853863835334778,
|
|
"step": 1615,
|
|
"valid_targets_mean": 4354.3,
|
|
"valid_targets_min": 2243
|
|
},
|
|
{
|
|
"epoch": 1.7924737133370225,
|
|
"grad_norm": 0.2229030717728699,
|
|
"learning_rate": 3.7113759192186787e-05,
|
|
"loss": 0.2478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1050797700881958,
|
|
"step": 1620,
|
|
"valid_targets_mean": 5987.4,
|
|
"valid_targets_min": 1045
|
|
},
|
|
{
|
|
"epoch": 1.7980077476480354,
|
|
"grad_norm": 0.21115591425185332,
|
|
"learning_rate": 3.708514716802874e-05,
|
|
"loss": 0.2393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11311844736337662,
|
|
"step": 1625,
|
|
"valid_targets_mean": 7339.2,
|
|
"valid_targets_min": 1398
|
|
},
|
|
{
|
|
"epoch": 1.8035417819590482,
|
|
"grad_norm": 0.2174003005062392,
|
|
"learning_rate": 3.705640516560257e-05,
|
|
"loss": 0.2348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12683409452438354,
|
|
"step": 1630,
|
|
"valid_targets_mean": 7609.3,
|
|
"valid_targets_min": 2489
|
|
},
|
|
{
|
|
"epoch": 1.8090758162700609,
|
|
"grad_norm": 0.177464000416812,
|
|
"learning_rate": 3.702753340356813e-05,
|
|
"loss": 0.2397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11056222766637802,
|
|
"step": 1635,
|
|
"valid_targets_mean": 8182.5,
|
|
"valid_targets_min": 2323
|
|
},
|
|
{
|
|
"epoch": 1.8146098505810735,
|
|
"grad_norm": 0.1870460713862642,
|
|
"learning_rate": 3.699853210157243e-05,
|
|
"loss": 0.2356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11322402209043503,
|
|
"step": 1640,
|
|
"valid_targets_mean": 7945.0,
|
|
"valid_targets_min": 2386
|
|
},
|
|
{
|
|
"epoch": 1.8201438848920863,
|
|
"grad_norm": 0.18526910832781884,
|
|
"learning_rate": 3.6969401480247984e-05,
|
|
"loss": 0.2271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10831591486930847,
|
|
"step": 1645,
|
|
"valid_targets_mean": 7628.4,
|
|
"valid_targets_min": 2019
|
|
},
|
|
{
|
|
"epoch": 1.8256779192030992,
|
|
"grad_norm": 0.13803254078026828,
|
|
"learning_rate": 3.694014176121114e-05,
|
|
"loss": 0.2142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.096345454454422,
|
|
"step": 1650,
|
|
"valid_targets_mean": 13487.5,
|
|
"valid_targets_min": 1304
|
|
},
|
|
{
|
|
"epoch": 1.8312119535141118,
|
|
"grad_norm": 0.1323739468997748,
|
|
"learning_rate": 3.6910753167060366e-05,
|
|
"loss": 0.2102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10764390230178833,
|
|
"step": 1655,
|
|
"valid_targets_mean": 14044.6,
|
|
"valid_targets_min": 2837
|
|
},
|
|
{
|
|
"epoch": 1.8367459878251244,
|
|
"grad_norm": 0.12739906419458874,
|
|
"learning_rate": 3.688123592137455e-05,
|
|
"loss": 0.2043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09776881337165833,
|
|
"step": 1660,
|
|
"valid_targets_mean": 12808.5,
|
|
"valid_targets_min": 1264
|
|
},
|
|
{
|
|
"epoch": 1.8422800221361373,
|
|
"grad_norm": 0.18472517875224204,
|
|
"learning_rate": 3.685159024871137e-05,
|
|
"loss": 0.2074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10981454700231552,
|
|
"step": 1665,
|
|
"valid_targets_mean": 7773.3,
|
|
"valid_targets_min": 1300
|
|
},
|
|
{
|
|
"epoch": 1.8478140564471501,
|
|
"grad_norm": 0.18058654658948134,
|
|
"learning_rate": 3.682181637460549e-05,
|
|
"loss": 0.2227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1116521880030632,
|
|
"step": 1670,
|
|
"valid_targets_mean": 8022.5,
|
|
"valid_targets_min": 1221
|
|
},
|
|
{
|
|
"epoch": 1.8533480907581628,
|
|
"grad_norm": 0.18724230912218406,
|
|
"learning_rate": 3.67919145255669e-05,
|
|
"loss": 0.2162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10184332728385925,
|
|
"step": 1675,
|
|
"valid_targets_mean": 7332.0,
|
|
"valid_targets_min": 935
|
|
},
|
|
{
|
|
"epoch": 1.8588821250691754,
|
|
"grad_norm": 0.284846970975493,
|
|
"learning_rate": 3.67618849290792e-05,
|
|
"loss": 0.2349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1234837993979454,
|
|
"step": 1680,
|
|
"valid_targets_mean": 4677.9,
|
|
"valid_targets_min": 1413
|
|
},
|
|
{
|
|
"epoch": 1.864416159380188,
|
|
"grad_norm": 0.23265322355069754,
|
|
"learning_rate": 3.6731727813597826e-05,
|
|
"loss": 0.2485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12468057125806808,
|
|
"step": 1685,
|
|
"valid_targets_mean": 5047.0,
|
|
"valid_targets_min": 1422
|
|
},
|
|
{
|
|
"epoch": 1.8699501936912009,
|
|
"grad_norm": 0.24901026147462169,
|
|
"learning_rate": 3.6701443408548356e-05,
|
|
"loss": 0.2477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1232062578201294,
|
|
"step": 1690,
|
|
"valid_targets_mean": 4560.3,
|
|
"valid_targets_min": 2177
|
|
},
|
|
{
|
|
"epoch": 1.8754842280022137,
|
|
"grad_norm": 0.24845493572710523,
|
|
"learning_rate": 3.667103194432473e-05,
|
|
"loss": 0.2487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12273026257753372,
|
|
"step": 1695,
|
|
"valid_targets_mean": 4750.5,
|
|
"valid_targets_min": 2094
|
|
},
|
|
{
|
|
"epoch": 1.8810182623132263,
|
|
"grad_norm": 0.21352332489113723,
|
|
"learning_rate": 3.664049365228753e-05,
|
|
"loss": 0.2592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13580043613910675,
|
|
"step": 1700,
|
|
"valid_targets_mean": 4360.2,
|
|
"valid_targets_min": 1571
|
|
},
|
|
{
|
|
"epoch": 1.886552296624239,
|
|
"grad_norm": 0.24187118495071433,
|
|
"learning_rate": 3.66098287647622e-05,
|
|
"loss": 0.2759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1355639547109604,
|
|
"step": 1705,
|
|
"valid_targets_mean": 4350.1,
|
|
"valid_targets_min": 1350
|
|
},
|
|
{
|
|
"epoch": 1.8920863309352518,
|
|
"grad_norm": 0.2221319087254357,
|
|
"learning_rate": 3.657903751503726e-05,
|
|
"loss": 0.2774,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1442292481660843,
|
|
"step": 1710,
|
|
"valid_targets_mean": 5759.9,
|
|
"valid_targets_min": 2496
|
|
},
|
|
{
|
|
"epoch": 1.8976203652462647,
|
|
"grad_norm": 0.22014101686720144,
|
|
"learning_rate": 3.6548120137362586e-05,
|
|
"loss": 0.2757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13664335012435913,
|
|
"step": 1715,
|
|
"valid_targets_mean": 4665.8,
|
|
"valid_targets_min": 1862
|
|
},
|
|
{
|
|
"epoch": 1.9031543995572773,
|
|
"grad_norm": 0.19549340134656434,
|
|
"learning_rate": 3.6517076866947557e-05,
|
|
"loss": 0.2755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1436709612607956,
|
|
"step": 1720,
|
|
"valid_targets_mean": 5136.1,
|
|
"valid_targets_min": 2075
|
|
},
|
|
{
|
|
"epoch": 1.90868843386829,
|
|
"grad_norm": 0.18788086589483333,
|
|
"learning_rate": 3.648590793995935e-05,
|
|
"loss": 0.2723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1428404301404953,
|
|
"step": 1725,
|
|
"valid_targets_mean": 5201.3,
|
|
"valid_targets_min": 2016
|
|
},
|
|
{
|
|
"epoch": 1.9142224681793027,
|
|
"grad_norm": 0.19335780199762595,
|
|
"learning_rate": 3.645461359352104e-05,
|
|
"loss": 0.2781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1349288523197174,
|
|
"step": 1730,
|
|
"valid_targets_mean": 5300.8,
|
|
"valid_targets_min": 1145
|
|
},
|
|
{
|
|
"epoch": 1.9197565024903156,
|
|
"grad_norm": 0.21764207689379206,
|
|
"learning_rate": 3.6423194065709906e-05,
|
|
"loss": 0.2725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11965199559926987,
|
|
"step": 1735,
|
|
"valid_targets_mean": 4597.2,
|
|
"valid_targets_min": 1963
|
|
},
|
|
{
|
|
"epoch": 1.9252905368013282,
|
|
"grad_norm": 0.20259374288682375,
|
|
"learning_rate": 3.639164959555553e-05,
|
|
"loss": 0.2735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12947358191013336,
|
|
"step": 1740,
|
|
"valid_targets_mean": 4532.3,
|
|
"valid_targets_min": 1109
|
|
},
|
|
{
|
|
"epoch": 1.9308245711123408,
|
|
"grad_norm": 0.2011284156115527,
|
|
"learning_rate": 3.635998042303804e-05,
|
|
"loss": 0.2784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12850068509578705,
|
|
"step": 1745,
|
|
"valid_targets_mean": 5127.4,
|
|
"valid_targets_min": 2148
|
|
},
|
|
{
|
|
"epoch": 1.9363586054233535,
|
|
"grad_norm": 0.23379586205063374,
|
|
"learning_rate": 3.632818678908624e-05,
|
|
"loss": 0.2718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14635328948497772,
|
|
"step": 1750,
|
|
"valid_targets_mean": 5093.4,
|
|
"valid_targets_min": 2249
|
|
},
|
|
{
|
|
"epoch": 1.9418926397343663,
|
|
"grad_norm": 0.24506410198251863,
|
|
"learning_rate": 3.629626893557581e-05,
|
|
"loss": 0.2704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13486354053020477,
|
|
"step": 1755,
|
|
"valid_targets_mean": 4493.6,
|
|
"valid_targets_min": 1745
|
|
},
|
|
{
|
|
"epoch": 1.9474266740453792,
|
|
"grad_norm": 0.20344712413332988,
|
|
"learning_rate": 3.626422710532743e-05,
|
|
"loss": 0.272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13823886215686798,
|
|
"step": 1760,
|
|
"valid_targets_mean": 5985.7,
|
|
"valid_targets_min": 1785
|
|
},
|
|
{
|
|
"epoch": 1.9529607083563918,
|
|
"grad_norm": 0.23951804276675492,
|
|
"learning_rate": 3.6232061542104984e-05,
|
|
"loss": 0.2608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12460750341415405,
|
|
"step": 1765,
|
|
"valid_targets_mean": 4282.6,
|
|
"valid_targets_min": 1345
|
|
},
|
|
{
|
|
"epoch": 1.9584947426674044,
|
|
"grad_norm": 0.21081778587343794,
|
|
"learning_rate": 3.619977249061366e-05,
|
|
"loss": 0.2596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12043123692274094,
|
|
"step": 1770,
|
|
"valid_targets_mean": 4043.1,
|
|
"valid_targets_min": 1453
|
|
},
|
|
{
|
|
"epoch": 1.9640287769784173,
|
|
"grad_norm": 0.2000956213548545,
|
|
"learning_rate": 3.6167360196498104e-05,
|
|
"loss": 0.2574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12249448150396347,
|
|
"step": 1775,
|
|
"valid_targets_mean": 3962.6,
|
|
"valid_targets_min": 1950
|
|
},
|
|
{
|
|
"epoch": 1.9695628112894301,
|
|
"grad_norm": 0.2063885651633084,
|
|
"learning_rate": 3.6134824906340565e-05,
|
|
"loss": 0.2609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12181480973958969,
|
|
"step": 1780,
|
|
"valid_targets_mean": 4089.9,
|
|
"valid_targets_min": 2225
|
|
},
|
|
{
|
|
"epoch": 1.9750968456004427,
|
|
"grad_norm": 0.1963326019016443,
|
|
"learning_rate": 3.610216686765899e-05,
|
|
"loss": 0.2562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13744494318962097,
|
|
"step": 1785,
|
|
"valid_targets_mean": 4647.0,
|
|
"valid_targets_min": 1720
|
|
},
|
|
{
|
|
"epoch": 1.9806308799114554,
|
|
"grad_norm": 0.20396733516424023,
|
|
"learning_rate": 3.6069386328905156e-05,
|
|
"loss": 0.2604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12744426727294922,
|
|
"step": 1790,
|
|
"valid_targets_mean": 4220.0,
|
|
"valid_targets_min": 1637
|
|
},
|
|
{
|
|
"epoch": 1.9861649142224682,
|
|
"grad_norm": 0.21173248608648465,
|
|
"learning_rate": 3.603648353946281e-05,
|
|
"loss": 0.2585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12109271436929703,
|
|
"step": 1795,
|
|
"valid_targets_mean": 4065.2,
|
|
"valid_targets_min": 2020
|
|
},
|
|
{
|
|
"epoch": 1.991698948533481,
|
|
"grad_norm": 0.19300670821745727,
|
|
"learning_rate": 3.60034587496457e-05,
|
|
"loss": 0.2562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12375432252883911,
|
|
"step": 1800,
|
|
"valid_targets_mean": 4378.8,
|
|
"valid_targets_min": 2197
|
|
},
|
|
{
|
|
"epoch": 1.9972329828444937,
|
|
"grad_norm": 0.20611442328641857,
|
|
"learning_rate": 3.597031221069575e-05,
|
|
"loss": 0.2602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1273588091135025,
|
|
"step": 1805,
|
|
"valid_targets_mean": 4250.2,
|
|
"valid_targets_min": 2029
|
|
},
|
|
{
|
|
"epoch": 2.002213613724405,
|
|
"grad_norm": 0.25460945090750153,
|
|
"learning_rate": 3.593704417478108e-05,
|
|
"loss": 0.2396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12178600579500198,
|
|
"step": 1810,
|
|
"valid_targets_mean": 6556.2,
|
|
"valid_targets_min": 1732
|
|
},
|
|
{
|
|
"epoch": 2.007747648035418,
|
|
"grad_norm": 0.196181622903712,
|
|
"learning_rate": 3.5903654894994156e-05,
|
|
"loss": 0.261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12736719846725464,
|
|
"step": 1815,
|
|
"valid_targets_mean": 6745.2,
|
|
"valid_targets_min": 2595
|
|
},
|
|
{
|
|
"epoch": 2.0132816823464306,
|
|
"grad_norm": 0.18163396282759997,
|
|
"learning_rate": 3.5870144625349796e-05,
|
|
"loss": 0.2453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12206553667783737,
|
|
"step": 1820,
|
|
"valid_targets_mean": 6414.2,
|
|
"valid_targets_min": 1804
|
|
},
|
|
{
|
|
"epoch": 2.018815716657443,
|
|
"grad_norm": 0.17498500763921046,
|
|
"learning_rate": 3.583651362078329e-05,
|
|
"loss": 0.2454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1313360035419464,
|
|
"step": 1825,
|
|
"valid_targets_mean": 7147.8,
|
|
"valid_targets_min": 2654
|
|
},
|
|
{
|
|
"epoch": 2.024349750968456,
|
|
"grad_norm": 0.15874180378712185,
|
|
"learning_rate": 3.580276213714842e-05,
|
|
"loss": 0.2506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12748394906520844,
|
|
"step": 1830,
|
|
"valid_targets_mean": 7391.1,
|
|
"valid_targets_min": 2506
|
|
},
|
|
{
|
|
"epoch": 2.029883785279469,
|
|
"grad_norm": 0.16967369735811572,
|
|
"learning_rate": 3.576889043121556e-05,
|
|
"loss": 0.2395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09263142198324203,
|
|
"step": 1835,
|
|
"valid_targets_mean": 5653.9,
|
|
"valid_targets_min": 2050
|
|
},
|
|
{
|
|
"epoch": 2.0354178195904815,
|
|
"grad_norm": 0.15502565084525127,
|
|
"learning_rate": 3.573489876066967e-05,
|
|
"loss": 0.2353,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11003027111291885,
|
|
"step": 1840,
|
|
"valid_targets_mean": 6577.6,
|
|
"valid_targets_min": 2180
|
|
},
|
|
{
|
|
"epoch": 2.040951853901494,
|
|
"grad_norm": 0.17240419472611698,
|
|
"learning_rate": 3.570078738410838e-05,
|
|
"loss": 0.2407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1300700157880783,
|
|
"step": 1845,
|
|
"valid_targets_mean": 7243.6,
|
|
"valid_targets_min": 1462
|
|
},
|
|
{
|
|
"epoch": 2.0464858882125068,
|
|
"grad_norm": 0.15041215438948005,
|
|
"learning_rate": 3.566655656104e-05,
|
|
"loss": 0.2332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09950295835733414,
|
|
"step": 1850,
|
|
"valid_targets_mean": 5979.6,
|
|
"valid_targets_min": 2261
|
|
},
|
|
{
|
|
"epoch": 2.05201992252352,
|
|
"grad_norm": 0.16398165353835098,
|
|
"learning_rate": 3.563220655188155e-05,
|
|
"loss": 0.2321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11267545819282532,
|
|
"step": 1855,
|
|
"valid_targets_mean": 6526.0,
|
|
"valid_targets_min": 2889
|
|
},
|
|
{
|
|
"epoch": 2.0575539568345325,
|
|
"grad_norm": 0.1644283942720737,
|
|
"learning_rate": 3.559773761795677e-05,
|
|
"loss": 0.2407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11775601655244827,
|
|
"step": 1860,
|
|
"valid_targets_mean": 6637.4,
|
|
"valid_targets_min": 2198
|
|
},
|
|
{
|
|
"epoch": 2.063087991145545,
|
|
"grad_norm": 0.18894361057192038,
|
|
"learning_rate": 3.556315002149417e-05,
|
|
"loss": 0.2377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11832115054130554,
|
|
"step": 1865,
|
|
"valid_targets_mean": 6630.0,
|
|
"valid_targets_min": 2447
|
|
},
|
|
{
|
|
"epoch": 2.0686220254565577,
|
|
"grad_norm": 0.15871754158886173,
|
|
"learning_rate": 3.552844402562496e-05,
|
|
"loss": 0.2404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11108693480491638,
|
|
"step": 1870,
|
|
"valid_targets_mean": 6784.3,
|
|
"valid_targets_min": 2018
|
|
},
|
|
{
|
|
"epoch": 2.074156059767571,
|
|
"grad_norm": 0.1666703709693314,
|
|
"learning_rate": 3.5493619894381134e-05,
|
|
"loss": 0.2354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10879040509462357,
|
|
"step": 1875,
|
|
"valid_targets_mean": 6594.1,
|
|
"valid_targets_min": 1738
|
|
},
|
|
{
|
|
"epoch": 2.0796900940785834,
|
|
"grad_norm": 0.16471460484089057,
|
|
"learning_rate": 3.5458677892693414e-05,
|
|
"loss": 0.2353,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10758719593286514,
|
|
"step": 1880,
|
|
"valid_targets_mean": 6608.3,
|
|
"valid_targets_min": 2294
|
|
},
|
|
{
|
|
"epoch": 2.085224128389596,
|
|
"grad_norm": 0.15983077692158537,
|
|
"learning_rate": 3.542361828638922e-05,
|
|
"loss": 0.2344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11144182831048965,
|
|
"step": 1885,
|
|
"valid_targets_mean": 6807.7,
|
|
"valid_targets_min": 2459
|
|
},
|
|
{
|
|
"epoch": 2.0907581627006087,
|
|
"grad_norm": 0.16073131019754824,
|
|
"learning_rate": 3.538844134219069e-05,
|
|
"loss": 0.2404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13222560286521912,
|
|
"step": 1890,
|
|
"valid_targets_mean": 7171.7,
|
|
"valid_targets_min": 2663
|
|
},
|
|
{
|
|
"epoch": 2.0962921970116213,
|
|
"grad_norm": 0.16666788192831658,
|
|
"learning_rate": 3.5353147327712637e-05,
|
|
"loss": 0.2378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1460612565279007,
|
|
"step": 1895,
|
|
"valid_targets_mean": 7488.9,
|
|
"valid_targets_min": 1737
|
|
},
|
|
{
|
|
"epoch": 2.1018262313226344,
|
|
"grad_norm": 0.16460431294876285,
|
|
"learning_rate": 3.531773651146049e-05,
|
|
"loss": 0.2369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10749489814043045,
|
|
"step": 1900,
|
|
"valid_targets_mean": 6740.6,
|
|
"valid_targets_min": 3073
|
|
},
|
|
{
|
|
"epoch": 2.107360265633647,
|
|
"grad_norm": 0.17328310563867794,
|
|
"learning_rate": 3.5282209162828264e-05,
|
|
"loss": 0.2282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11726633459329605,
|
|
"step": 1905,
|
|
"valid_targets_mean": 7155.1,
|
|
"valid_targets_min": 2303
|
|
},
|
|
{
|
|
"epoch": 2.1128942999446596,
|
|
"grad_norm": 0.1708861905255187,
|
|
"learning_rate": 3.5246565552096536e-05,
|
|
"loss": 0.2291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11879414319992065,
|
|
"step": 1910,
|
|
"valid_targets_mean": 6865.1,
|
|
"valid_targets_min": 3211
|
|
},
|
|
{
|
|
"epoch": 2.1184283342556722,
|
|
"grad_norm": 0.1628843755311141,
|
|
"learning_rate": 3.5210805950430366e-05,
|
|
"loss": 0.2345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10074803978204727,
|
|
"step": 1915,
|
|
"valid_targets_mean": 6133.8,
|
|
"valid_targets_min": 3408
|
|
},
|
|
{
|
|
"epoch": 2.1239623685666853,
|
|
"grad_norm": 0.1712030155859964,
|
|
"learning_rate": 3.51749306298772e-05,
|
|
"loss": 0.2332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11956235021352768,
|
|
"step": 1920,
|
|
"valid_targets_mean": 7243.4,
|
|
"valid_targets_min": 2661
|
|
},
|
|
{
|
|
"epoch": 2.129496402877698,
|
|
"grad_norm": 0.15855319282312255,
|
|
"learning_rate": 3.5138939863364884e-05,
|
|
"loss": 0.2309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10077456384897232,
|
|
"step": 1925,
|
|
"valid_targets_mean": 6463.6,
|
|
"valid_targets_min": 2198
|
|
},
|
|
{
|
|
"epoch": 2.1350304371887106,
|
|
"grad_norm": 0.1731689581044804,
|
|
"learning_rate": 3.5102833924699515e-05,
|
|
"loss": 0.2294,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12133423238992691,
|
|
"step": 1930,
|
|
"valid_targets_mean": 7358.3,
|
|
"valid_targets_min": 2450
|
|
},
|
|
{
|
|
"epoch": 2.140564471499723,
|
|
"grad_norm": 0.16938213670980715,
|
|
"learning_rate": 3.5066613088563376e-05,
|
|
"loss": 0.2284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12302404642105103,
|
|
"step": 1935,
|
|
"valid_targets_mean": 7186.7,
|
|
"valid_targets_min": 3067
|
|
},
|
|
{
|
|
"epoch": 2.146098505810736,
|
|
"grad_norm": 0.1602878817587104,
|
|
"learning_rate": 3.503027763051286e-05,
|
|
"loss": 0.225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09725018590688705,
|
|
"step": 1940,
|
|
"valid_targets_mean": 6213.1,
|
|
"valid_targets_min": 2512
|
|
},
|
|
{
|
|
"epoch": 2.151632540121749,
|
|
"grad_norm": 0.17073852982024915,
|
|
"learning_rate": 3.499382782697638e-05,
|
|
"loss": 0.2254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10988564044237137,
|
|
"step": 1945,
|
|
"valid_targets_mean": 6629.0,
|
|
"valid_targets_min": 2721
|
|
},
|
|
{
|
|
"epoch": 2.1571665744327615,
|
|
"grad_norm": 0.16508704276971933,
|
|
"learning_rate": 3.495726395525222e-05,
|
|
"loss": 0.2221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11532676219940186,
|
|
"step": 1950,
|
|
"valid_targets_mean": 6951.3,
|
|
"valid_targets_min": 1410
|
|
},
|
|
{
|
|
"epoch": 2.162700608743774,
|
|
"grad_norm": 0.16850115690665773,
|
|
"learning_rate": 3.4920586293506505e-05,
|
|
"loss": 0.2326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11975447088479996,
|
|
"step": 1955,
|
|
"valid_targets_mean": 6716.0,
|
|
"valid_targets_min": 2015
|
|
},
|
|
{
|
|
"epoch": 2.1682346430547867,
|
|
"grad_norm": 0.18027122835951337,
|
|
"learning_rate": 3.488379512077099e-05,
|
|
"loss": 0.2222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1077960804104805,
|
|
"step": 1960,
|
|
"valid_targets_mean": 6754.2,
|
|
"valid_targets_min": 1285
|
|
},
|
|
{
|
|
"epoch": 2.1737686773658,
|
|
"grad_norm": 0.1694207004089083,
|
|
"learning_rate": 3.4846890716941005e-05,
|
|
"loss": 0.2271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12165435403585434,
|
|
"step": 1965,
|
|
"valid_targets_mean": 7055.3,
|
|
"valid_targets_min": 1824
|
|
},
|
|
{
|
|
"epoch": 2.1793027116768124,
|
|
"grad_norm": 0.16984422107945202,
|
|
"learning_rate": 3.480987336277332e-05,
|
|
"loss": 0.2302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1043839231133461,
|
|
"step": 1970,
|
|
"valid_targets_mean": 6240.7,
|
|
"valid_targets_min": 2520
|
|
},
|
|
{
|
|
"epoch": 2.184836745987825,
|
|
"grad_norm": 0.15928989504119623,
|
|
"learning_rate": 3.4772743339883975e-05,
|
|
"loss": 0.2276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11364192515611649,
|
|
"step": 1975,
|
|
"valid_targets_mean": 7048.4,
|
|
"valid_targets_min": 2534
|
|
},
|
|
{
|
|
"epoch": 2.1903707802988377,
|
|
"grad_norm": 0.17238125314734223,
|
|
"learning_rate": 3.4735500930746155e-05,
|
|
"loss": 0.236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11842123419046402,
|
|
"step": 1980,
|
|
"valid_targets_mean": 7279.1,
|
|
"valid_targets_min": 2874
|
|
},
|
|
{
|
|
"epoch": 2.1959048146098508,
|
|
"grad_norm": 0.16521197109465582,
|
|
"learning_rate": 3.469814641868806e-05,
|
|
"loss": 0.2268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10197652131319046,
|
|
"step": 1985,
|
|
"valid_targets_mean": 6879.3,
|
|
"valid_targets_min": 2655
|
|
},
|
|
{
|
|
"epoch": 2.2014388489208634,
|
|
"grad_norm": 0.19198517275447463,
|
|
"learning_rate": 3.4660680087890724e-05,
|
|
"loss": 0.2301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12141253799200058,
|
|
"step": 1990,
|
|
"valid_targets_mean": 7067.9,
|
|
"valid_targets_min": 2044
|
|
},
|
|
{
|
|
"epoch": 2.206972883231876,
|
|
"grad_norm": 0.16151705568785352,
|
|
"learning_rate": 3.4623102223385854e-05,
|
|
"loss": 0.2291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12976032495498657,
|
|
"step": 1995,
|
|
"valid_targets_mean": 7770.0,
|
|
"valid_targets_min": 1537
|
|
},
|
|
{
|
|
"epoch": 2.2125069175428886,
|
|
"grad_norm": 0.18893779239098643,
|
|
"learning_rate": 3.458541311105369e-05,
|
|
"loss": 0.2279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1286742091178894,
|
|
"step": 2000,
|
|
"valid_targets_mean": 6690.8,
|
|
"valid_targets_min": 2068
|
|
},
|
|
{
|
|
"epoch": 2.2180409518539017,
|
|
"grad_norm": 0.16505910458823952,
|
|
"learning_rate": 3.454761303762078e-05,
|
|
"loss": 0.2251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10115599632263184,
|
|
"step": 2005,
|
|
"valid_targets_mean": 6521.1,
|
|
"valid_targets_min": 1758
|
|
},
|
|
{
|
|
"epoch": 2.2235749861649143,
|
|
"grad_norm": 0.16847553045939181,
|
|
"learning_rate": 3.4509702290657856e-05,
|
|
"loss": 0.2313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13247986137866974,
|
|
"step": 2010,
|
|
"valid_targets_mean": 7180.3,
|
|
"valid_targets_min": 2546
|
|
},
|
|
{
|
|
"epoch": 2.229109020475927,
|
|
"grad_norm": 0.32498045972365375,
|
|
"learning_rate": 3.4471681158577604e-05,
|
|
"loss": 0.2702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15396571159362793,
|
|
"step": 2015,
|
|
"valid_targets_mean": 4645.0,
|
|
"valid_targets_min": 1835
|
|
},
|
|
{
|
|
"epoch": 2.2346430547869396,
|
|
"grad_norm": 0.3199891571560827,
|
|
"learning_rate": 3.443354993063248e-05,
|
|
"loss": 0.2606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1466095894575119,
|
|
"step": 2020,
|
|
"valid_targets_mean": 5546.4,
|
|
"valid_targets_min": 2098
|
|
},
|
|
{
|
|
"epoch": 2.240177089097952,
|
|
"grad_norm": 0.26512644951778497,
|
|
"learning_rate": 3.439530889691254e-05,
|
|
"loss": 0.2729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1405329555273056,
|
|
"step": 2025,
|
|
"valid_targets_mean": 4328.5,
|
|
"valid_targets_min": 1980
|
|
},
|
|
{
|
|
"epoch": 2.2457111234089653,
|
|
"grad_norm": 0.2869325070126373,
|
|
"learning_rate": 3.4356958348343155e-05,
|
|
"loss": 0.2648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12140665203332901,
|
|
"step": 2030,
|
|
"valid_targets_mean": 3992.0,
|
|
"valid_targets_min": 2005
|
|
},
|
|
{
|
|
"epoch": 2.251245157719978,
|
|
"grad_norm": 0.2589383495052475,
|
|
"learning_rate": 3.431849857668292e-05,
|
|
"loss": 0.268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13259261846542358,
|
|
"step": 2035,
|
|
"valid_targets_mean": 4146.0,
|
|
"valid_targets_min": 1570
|
|
},
|
|
{
|
|
"epoch": 2.2567791920309905,
|
|
"grad_norm": 0.24535043204958112,
|
|
"learning_rate": 3.42799298745213e-05,
|
|
"loss": 0.2804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14231093227863312,
|
|
"step": 2040,
|
|
"valid_targets_mean": 4619.1,
|
|
"valid_targets_min": 1887
|
|
},
|
|
{
|
|
"epoch": 2.262313226342003,
|
|
"grad_norm": 0.256468789178841,
|
|
"learning_rate": 3.42412525352765e-05,
|
|
"loss": 0.2788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13515257835388184,
|
|
"step": 2045,
|
|
"valid_targets_mean": 4458.5,
|
|
"valid_targets_min": 1561
|
|
},
|
|
{
|
|
"epoch": 2.2678472606530162,
|
|
"grad_norm": 0.2524559056796063,
|
|
"learning_rate": 3.42024668531932e-05,
|
|
"loss": 0.2773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1405826061964035,
|
|
"step": 2050,
|
|
"valid_targets_mean": 4055.2,
|
|
"valid_targets_min": 1504
|
|
},
|
|
{
|
|
"epoch": 2.273381294964029,
|
|
"grad_norm": 0.2273920463882431,
|
|
"learning_rate": 3.41635731233403e-05,
|
|
"loss": 0.2705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11907500773668289,
|
|
"step": 2055,
|
|
"valid_targets_mean": 4350.9,
|
|
"valid_targets_min": 1772
|
|
},
|
|
{
|
|
"epoch": 2.2789153292750415,
|
|
"grad_norm": 0.2449799495311102,
|
|
"learning_rate": 3.412457164160872e-05,
|
|
"loss": 0.2638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11413963884115219,
|
|
"step": 2060,
|
|
"valid_targets_mean": 3810.2,
|
|
"valid_targets_min": 1857
|
|
},
|
|
{
|
|
"epoch": 2.284449363586054,
|
|
"grad_norm": 0.24708374303357555,
|
|
"learning_rate": 3.408546270470909e-05,
|
|
"loss": 0.2821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14712080359458923,
|
|
"step": 2065,
|
|
"valid_targets_mean": 4675.1,
|
|
"valid_targets_min": 1726
|
|
},
|
|
{
|
|
"epoch": 2.2899833978970667,
|
|
"grad_norm": 0.2367581615241945,
|
|
"learning_rate": 3.404624661016956e-05,
|
|
"loss": 0.2665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13800793886184692,
|
|
"step": 2070,
|
|
"valid_targets_mean": 4311.9,
|
|
"valid_targets_min": 1715
|
|
},
|
|
{
|
|
"epoch": 2.29551743220808,
|
|
"grad_norm": 0.2417319220816092,
|
|
"learning_rate": 3.400692365633346e-05,
|
|
"loss": 0.2641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14577551186084747,
|
|
"step": 2075,
|
|
"valid_targets_mean": 4801.3,
|
|
"valid_targets_min": 1735
|
|
},
|
|
{
|
|
"epoch": 2.3010514665190924,
|
|
"grad_norm": 0.25106235822407685,
|
|
"learning_rate": 3.39674941423571e-05,
|
|
"loss": 0.2624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1416432410478592,
|
|
"step": 2080,
|
|
"valid_targets_mean": 4260.2,
|
|
"valid_targets_min": 2003
|
|
},
|
|
{
|
|
"epoch": 2.306585500830105,
|
|
"grad_norm": 0.22436144371235098,
|
|
"learning_rate": 3.392795836820746e-05,
|
|
"loss": 0.2565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11476701498031616,
|
|
"step": 2085,
|
|
"valid_targets_mean": 4150.7,
|
|
"valid_targets_min": 1400
|
|
},
|
|
{
|
|
"epoch": 2.312119535141118,
|
|
"grad_norm": 0.2535443617210647,
|
|
"learning_rate": 3.38883166346599e-05,
|
|
"loss": 0.272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14522124826908112,
|
|
"step": 2090,
|
|
"valid_targets_mean": 4290.6,
|
|
"valid_targets_min": 1471
|
|
},
|
|
{
|
|
"epoch": 2.3176535694521307,
|
|
"grad_norm": 0.21858072979627968,
|
|
"learning_rate": 3.384856924329592e-05,
|
|
"loss": 0.2661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11835676431655884,
|
|
"step": 2095,
|
|
"valid_targets_mean": 4075.0,
|
|
"valid_targets_min": 1330
|
|
},
|
|
{
|
|
"epoch": 2.3231876037631434,
|
|
"grad_norm": 0.21954086058104552,
|
|
"learning_rate": 3.380871649650077e-05,
|
|
"loss": 0.2577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12017607688903809,
|
|
"step": 2100,
|
|
"valid_targets_mean": 4359.4,
|
|
"valid_targets_min": 1655
|
|
},
|
|
{
|
|
"epoch": 2.328721638074156,
|
|
"grad_norm": 0.2542703005591101,
|
|
"learning_rate": 3.376875869746126e-05,
|
|
"loss": 0.2575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12094666808843613,
|
|
"step": 2105,
|
|
"valid_targets_mean": 3815.9,
|
|
"valid_targets_min": 1415
|
|
},
|
|
{
|
|
"epoch": 2.3342556723851686,
|
|
"grad_norm": 0.251126910414918,
|
|
"learning_rate": 3.372869615016339e-05,
|
|
"loss": 0.2776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12940432131290436,
|
|
"step": 2110,
|
|
"valid_targets_mean": 4242.0,
|
|
"valid_targets_min": 1487
|
|
},
|
|
{
|
|
"epoch": 2.3397897066961817,
|
|
"grad_norm": 0.2643944650713139,
|
|
"learning_rate": 3.3688529159390046e-05,
|
|
"loss": 0.2615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12784574925899506,
|
|
"step": 2115,
|
|
"valid_targets_mean": 4385.7,
|
|
"valid_targets_min": 1708
|
|
},
|
|
{
|
|
"epoch": 2.3453237410071943,
|
|
"grad_norm": 0.2461152263037893,
|
|
"learning_rate": 3.364825803071869e-05,
|
|
"loss": 0.2659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11380674690008163,
|
|
"step": 2120,
|
|
"valid_targets_mean": 3871.0,
|
|
"valid_targets_min": 1449
|
|
},
|
|
{
|
|
"epoch": 2.350857775318207,
|
|
"grad_norm": 0.2621383638045265,
|
|
"learning_rate": 3.360788307051904e-05,
|
|
"loss": 0.2696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12078753858804703,
|
|
"step": 2125,
|
|
"valid_targets_mean": 3865.1,
|
|
"valid_targets_min": 1769
|
|
},
|
|
{
|
|
"epoch": 2.3563918096292196,
|
|
"grad_norm": 0.24641782553806202,
|
|
"learning_rate": 3.35674045859507e-05,
|
|
"loss": 0.2605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13783816993236542,
|
|
"step": 2130,
|
|
"valid_targets_mean": 4496.6,
|
|
"valid_targets_min": 2112
|
|
},
|
|
{
|
|
"epoch": 2.3619258439402326,
|
|
"grad_norm": 0.24085148737669346,
|
|
"learning_rate": 3.35268228849609e-05,
|
|
"loss": 0.2556,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14685018360614777,
|
|
"step": 2135,
|
|
"valid_targets_mean": 4774.4,
|
|
"valid_targets_min": 1607
|
|
},
|
|
{
|
|
"epoch": 2.3674598782512453,
|
|
"grad_norm": 0.2220386797959251,
|
|
"learning_rate": 3.348613827628206e-05,
|
|
"loss": 0.2586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1352035254240036,
|
|
"step": 2140,
|
|
"valid_targets_mean": 4803.5,
|
|
"valid_targets_min": 1845
|
|
},
|
|
{
|
|
"epoch": 2.372993912562258,
|
|
"grad_norm": 0.240284130200847,
|
|
"learning_rate": 3.3445351069429525e-05,
|
|
"loss": 0.2628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14021100103855133,
|
|
"step": 2145,
|
|
"valid_targets_mean": 4359.3,
|
|
"valid_targets_min": 1571
|
|
},
|
|
{
|
|
"epoch": 2.3785279468732705,
|
|
"grad_norm": 0.2428089987954204,
|
|
"learning_rate": 3.340446157469916e-05,
|
|
"loss": 0.2528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12046308070421219,
|
|
"step": 2150,
|
|
"valid_targets_mean": 4695.8,
|
|
"valid_targets_min": 1357
|
|
},
|
|
{
|
|
"epoch": 2.384061981184283,
|
|
"grad_norm": 0.26202820127755705,
|
|
"learning_rate": 3.3363470103164986e-05,
|
|
"loss": 0.2532,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11755248159170151,
|
|
"step": 2155,
|
|
"valid_targets_mean": 3991.0,
|
|
"valid_targets_min": 1560
|
|
},
|
|
{
|
|
"epoch": 2.389596015495296,
|
|
"grad_norm": 0.25794323418326875,
|
|
"learning_rate": 3.3322376966676865e-05,
|
|
"loss": 0.2621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14617593586444855,
|
|
"step": 2160,
|
|
"valid_targets_mean": 4203.9,
|
|
"valid_targets_min": 1391
|
|
},
|
|
{
|
|
"epoch": 2.395130049806309,
|
|
"grad_norm": 0.24069832733527413,
|
|
"learning_rate": 3.328118247785807e-05,
|
|
"loss": 0.2557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13045255839824677,
|
|
"step": 2165,
|
|
"valid_targets_mean": 4457.6,
|
|
"valid_targets_min": 1571
|
|
},
|
|
{
|
|
"epoch": 2.4006640841173215,
|
|
"grad_norm": 0.24464630121892514,
|
|
"learning_rate": 3.3239886950102933e-05,
|
|
"loss": 0.2627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13744525611400604,
|
|
"step": 2170,
|
|
"valid_targets_mean": 4402.4,
|
|
"valid_targets_min": 2100
|
|
},
|
|
{
|
|
"epoch": 2.406198118428334,
|
|
"grad_norm": 0.245525174323216,
|
|
"learning_rate": 3.319849069757446e-05,
|
|
"loss": 0.2479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11348900943994522,
|
|
"step": 2175,
|
|
"valid_targets_mean": 4009.4,
|
|
"valid_targets_min": 1787
|
|
},
|
|
{
|
|
"epoch": 2.411732152739347,
|
|
"grad_norm": 0.25531675363724365,
|
|
"learning_rate": 3.315699403520193e-05,
|
|
"loss": 0.2528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12031114846467972,
|
|
"step": 2180,
|
|
"valid_targets_mean": 4591.5,
|
|
"valid_targets_min": 1915
|
|
},
|
|
{
|
|
"epoch": 2.41726618705036,
|
|
"grad_norm": 0.2723623550882369,
|
|
"learning_rate": 3.3115397278678525e-05,
|
|
"loss": 0.2507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12081772089004517,
|
|
"step": 2185,
|
|
"valid_targets_mean": 3853.5,
|
|
"valid_targets_min": 1692
|
|
},
|
|
{
|
|
"epoch": 2.4228002213613724,
|
|
"grad_norm": 0.2759578981507214,
|
|
"learning_rate": 3.3073700744458905e-05,
|
|
"loss": 0.2425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10981065779924393,
|
|
"step": 2190,
|
|
"valid_targets_mean": 3809.5,
|
|
"valid_targets_min": 1771
|
|
},
|
|
{
|
|
"epoch": 2.428334255672385,
|
|
"grad_norm": 0.2708688860694367,
|
|
"learning_rate": 3.303190474975679e-05,
|
|
"loss": 0.2401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11741551011800766,
|
|
"step": 2195,
|
|
"valid_targets_mean": 4231.2,
|
|
"valid_targets_min": 1649
|
|
},
|
|
{
|
|
"epoch": 2.4338682899833977,
|
|
"grad_norm": 0.26191735840776026,
|
|
"learning_rate": 3.299000961254259e-05,
|
|
"loss": 0.2577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15129616856575012,
|
|
"step": 2200,
|
|
"valid_targets_mean": 4823.4,
|
|
"valid_targets_min": 1879
|
|
},
|
|
{
|
|
"epoch": 2.4394023242944107,
|
|
"grad_norm": 0.24684680082578841,
|
|
"learning_rate": 3.2948015651540926e-05,
|
|
"loss": 0.2392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11781984567642212,
|
|
"step": 2205,
|
|
"valid_targets_mean": 4126.4,
|
|
"valid_targets_min": 1435
|
|
},
|
|
{
|
|
"epoch": 2.4449363586054234,
|
|
"grad_norm": 0.2571977772945324,
|
|
"learning_rate": 3.290592318622827e-05,
|
|
"loss": 0.2492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11264637857675552,
|
|
"step": 2210,
|
|
"valid_targets_mean": 3882.1,
|
|
"valid_targets_min": 1659
|
|
},
|
|
{
|
|
"epoch": 2.450470392916436,
|
|
"grad_norm": 0.25102524109760466,
|
|
"learning_rate": 3.286373253683048e-05,
|
|
"loss": 0.2517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11968725174665451,
|
|
"step": 2215,
|
|
"valid_targets_mean": 4487.3,
|
|
"valid_targets_min": 1825
|
|
},
|
|
{
|
|
"epoch": 2.456004427227449,
|
|
"grad_norm": 0.2743124438502291,
|
|
"learning_rate": 3.2821444024320326e-05,
|
|
"loss": 0.2541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13843697309494019,
|
|
"step": 2220,
|
|
"valid_targets_mean": 4427.6,
|
|
"valid_targets_min": 1596
|
|
},
|
|
{
|
|
"epoch": 2.4615384615384617,
|
|
"grad_norm": 0.25338666208737304,
|
|
"learning_rate": 3.277905797041514e-05,
|
|
"loss": 0.2539,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14188285171985626,
|
|
"step": 2225,
|
|
"valid_targets_mean": 4976.6,
|
|
"valid_targets_min": 1470
|
|
},
|
|
{
|
|
"epoch": 2.4670724958494743,
|
|
"grad_norm": 0.25251346662449287,
|
|
"learning_rate": 3.273657469757431e-05,
|
|
"loss": 0.2494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13103936612606049,
|
|
"step": 2230,
|
|
"valid_targets_mean": 5125.1,
|
|
"valid_targets_min": 1955
|
|
},
|
|
{
|
|
"epoch": 2.472606530160487,
|
|
"grad_norm": 0.18675770996860236,
|
|
"learning_rate": 3.26939945289968e-05,
|
|
"loss": 0.1731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08323365449905396,
|
|
"step": 2235,
|
|
"valid_targets_mean": 5392.4,
|
|
"valid_targets_min": 1736
|
|
},
|
|
{
|
|
"epoch": 2.4781405644714996,
|
|
"grad_norm": 0.18709599083859213,
|
|
"learning_rate": 3.265131778861876e-05,
|
|
"loss": 0.1679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08022157102823257,
|
|
"step": 2240,
|
|
"valid_targets_mean": 5752.5,
|
|
"valid_targets_min": 2418
|
|
},
|
|
{
|
|
"epoch": 2.4836745987825126,
|
|
"grad_norm": 0.1581221414370123,
|
|
"learning_rate": 3.2608544801111e-05,
|
|
"loss": 0.1616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07407370954751968,
|
|
"step": 2245,
|
|
"valid_targets_mean": 5096.6,
|
|
"valid_targets_min": 1180
|
|
},
|
|
{
|
|
"epoch": 2.4892086330935252,
|
|
"grad_norm": 0.15996766971390297,
|
|
"learning_rate": 3.256567589187657e-05,
|
|
"loss": 0.1599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08508744835853577,
|
|
"step": 2250,
|
|
"valid_targets_mean": 5641.0,
|
|
"valid_targets_min": 2134
|
|
},
|
|
{
|
|
"epoch": 2.494742667404538,
|
|
"grad_norm": 0.1503407078520133,
|
|
"learning_rate": 3.2522711387048246e-05,
|
|
"loss": 0.1657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0826578214764595,
|
|
"step": 2255,
|
|
"valid_targets_mean": 5802.1,
|
|
"valid_targets_min": 3269
|
|
},
|
|
{
|
|
"epoch": 2.5002767017155505,
|
|
"grad_norm": 0.15113352160318952,
|
|
"learning_rate": 3.2479651613486075e-05,
|
|
"loss": 0.1599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08101323246955872,
|
|
"step": 2260,
|
|
"valid_targets_mean": 5432.3,
|
|
"valid_targets_min": 1602
|
|
},
|
|
{
|
|
"epoch": 2.5058107360265636,
|
|
"grad_norm": 0.15169501414305464,
|
|
"learning_rate": 3.243649689877487e-05,
|
|
"loss": 0.1537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07829853147268295,
|
|
"step": 2265,
|
|
"valid_targets_mean": 5743.6,
|
|
"valid_targets_min": 2175
|
|
},
|
|
{
|
|
"epoch": 2.511344770337576,
|
|
"grad_norm": 0.1471582265896324,
|
|
"learning_rate": 3.239324757122174e-05,
|
|
"loss": 0.1507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07005364447832108,
|
|
"step": 2270,
|
|
"valid_targets_mean": 5761.5,
|
|
"valid_targets_min": 3064
|
|
},
|
|
{
|
|
"epoch": 2.516878804648589,
|
|
"grad_norm": 0.15319141052788057,
|
|
"learning_rate": 3.2349903959853557e-05,
|
|
"loss": 0.1586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0837135836482048,
|
|
"step": 2275,
|
|
"valid_targets_mean": 5588.5,
|
|
"valid_targets_min": 2372
|
|
},
|
|
{
|
|
"epoch": 2.5224128389596014,
|
|
"grad_norm": 0.15171378597396376,
|
|
"learning_rate": 3.230646639441449e-05,
|
|
"loss": 0.1492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07668746262788773,
|
|
"step": 2280,
|
|
"valid_targets_mean": 5415.7,
|
|
"valid_targets_min": 2271
|
|
},
|
|
{
|
|
"epoch": 2.527946873270614,
|
|
"grad_norm": 0.15186774768441974,
|
|
"learning_rate": 3.226293520536345e-05,
|
|
"loss": 0.1582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0768275186419487,
|
|
"step": 2285,
|
|
"valid_targets_mean": 5562.1,
|
|
"valid_targets_min": 2281
|
|
},
|
|
{
|
|
"epoch": 2.533480907581627,
|
|
"grad_norm": 0.17699707125016628,
|
|
"learning_rate": 3.221931072387165e-05,
|
|
"loss": 0.1497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06258273124694824,
|
|
"step": 2290,
|
|
"valid_targets_mean": 5284.1,
|
|
"valid_targets_min": 973
|
|
},
|
|
{
|
|
"epoch": 2.5390149418926398,
|
|
"grad_norm": 0.16796400444970988,
|
|
"learning_rate": 3.217559328182003e-05,
|
|
"loss": 0.1485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07881813496351242,
|
|
"step": 2295,
|
|
"valid_targets_mean": 5722.6,
|
|
"valid_targets_min": 2512
|
|
},
|
|
{
|
|
"epoch": 2.5445489762036524,
|
|
"grad_norm": 0.16922323148374335,
|
|
"learning_rate": 3.2131783211796716e-05,
|
|
"loss": 0.1478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0652880072593689,
|
|
"step": 2300,
|
|
"valid_targets_mean": 4999.8,
|
|
"valid_targets_min": 1687
|
|
},
|
|
{
|
|
"epoch": 2.5500830105146655,
|
|
"grad_norm": 0.16119863560776443,
|
|
"learning_rate": 3.208788084709455e-05,
|
|
"loss": 0.1487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07294315099716187,
|
|
"step": 2305,
|
|
"valid_targets_mean": 5641.6,
|
|
"valid_targets_min": 2140
|
|
},
|
|
{
|
|
"epoch": 2.555617044825678,
|
|
"grad_norm": 0.16848133595850323,
|
|
"learning_rate": 3.20438865217085e-05,
|
|
"loss": 0.1469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07951802015304565,
|
|
"step": 2310,
|
|
"valid_targets_mean": 5478.7,
|
|
"valid_targets_min": 1603
|
|
},
|
|
{
|
|
"epoch": 2.5611510791366907,
|
|
"grad_norm": 0.15915104081782677,
|
|
"learning_rate": 3.1999800570333146e-05,
|
|
"loss": 0.1541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07250721007585526,
|
|
"step": 2315,
|
|
"valid_targets_mean": 5777.6,
|
|
"valid_targets_min": 2255
|
|
},
|
|
{
|
|
"epoch": 2.5666851134477033,
|
|
"grad_norm": 0.16108384255341257,
|
|
"learning_rate": 3.195562332836015e-05,
|
|
"loss": 0.1471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07589638233184814,
|
|
"step": 2320,
|
|
"valid_targets_mean": 5722.6,
|
|
"valid_targets_min": 1753
|
|
},
|
|
{
|
|
"epoch": 2.572219147758716,
|
|
"grad_norm": 0.1755066395817842,
|
|
"learning_rate": 3.191135513187564e-05,
|
|
"loss": 0.1449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07764853537082672,
|
|
"step": 2325,
|
|
"valid_targets_mean": 5676.9,
|
|
"valid_targets_min": 1668
|
|
},
|
|
{
|
|
"epoch": 2.5777531820697286,
|
|
"grad_norm": 0.17430824924843394,
|
|
"learning_rate": 3.186699631765775e-05,
|
|
"loss": 0.1515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07290103286504745,
|
|
"step": 2330,
|
|
"valid_targets_mean": 5766.0,
|
|
"valid_targets_min": 2729
|
|
},
|
|
{
|
|
"epoch": 2.5832872163807417,
|
|
"grad_norm": 0.17057163905522507,
|
|
"learning_rate": 3.182254722317396e-05,
|
|
"loss": 0.1473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07022867351770401,
|
|
"step": 2335,
|
|
"valid_targets_mean": 5821.4,
|
|
"valid_targets_min": 2004
|
|
},
|
|
{
|
|
"epoch": 2.5888212506917543,
|
|
"grad_norm": 0.15791232305329486,
|
|
"learning_rate": 3.177800818657859e-05,
|
|
"loss": 0.1505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0739402249455452,
|
|
"step": 2340,
|
|
"valid_targets_mean": 5754.9,
|
|
"valid_targets_min": 3355
|
|
},
|
|
{
|
|
"epoch": 2.594355285002767,
|
|
"grad_norm": 0.17193888788181674,
|
|
"learning_rate": 3.1733379546710226e-05,
|
|
"loss": 0.144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06612033396959305,
|
|
"step": 2345,
|
|
"valid_targets_mean": 5748.6,
|
|
"valid_targets_min": 2067
|
|
},
|
|
{
|
|
"epoch": 2.59988931931378,
|
|
"grad_norm": 0.15255677334789028,
|
|
"learning_rate": 3.168866164308909e-05,
|
|
"loss": 0.1474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07562779635190964,
|
|
"step": 2350,
|
|
"valid_targets_mean": 5688.9,
|
|
"valid_targets_min": 1637
|
|
},
|
|
{
|
|
"epoch": 2.6054233536247926,
|
|
"grad_norm": 0.16904413269578494,
|
|
"learning_rate": 3.164385481591453e-05,
|
|
"loss": 0.1427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07645299285650253,
|
|
"step": 2355,
|
|
"valid_targets_mean": 5857.9,
|
|
"valid_targets_min": 2260
|
|
},
|
|
{
|
|
"epoch": 2.6109573879358052,
|
|
"grad_norm": 0.16744205276671484,
|
|
"learning_rate": 3.159895940606237e-05,
|
|
"loss": 0.1515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08270134031772614,
|
|
"step": 2360,
|
|
"valid_targets_mean": 5516.6,
|
|
"valid_targets_min": 1184
|
|
},
|
|
{
|
|
"epoch": 2.616491422246818,
|
|
"grad_norm": 0.16642339253554195,
|
|
"learning_rate": 3.1553975755082366e-05,
|
|
"loss": 0.1477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07753140479326248,
|
|
"step": 2365,
|
|
"valid_targets_mean": 5969.0,
|
|
"valid_targets_min": 2448
|
|
},
|
|
{
|
|
"epoch": 2.6220254565578305,
|
|
"grad_norm": 0.16838695928479055,
|
|
"learning_rate": 3.150890420519555e-05,
|
|
"loss": 0.1337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06355169415473938,
|
|
"step": 2370,
|
|
"valid_targets_mean": 5274.3,
|
|
"valid_targets_min": 1542
|
|
},
|
|
{
|
|
"epoch": 2.6275594908688436,
|
|
"grad_norm": 0.17243268355338107,
|
|
"learning_rate": 3.14637450992917e-05,
|
|
"loss": 0.1442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07621452957391739,
|
|
"step": 2375,
|
|
"valid_targets_mean": 5350.8,
|
|
"valid_targets_min": 2456
|
|
},
|
|
{
|
|
"epoch": 2.633093525179856,
|
|
"grad_norm": 0.17277521809452379,
|
|
"learning_rate": 3.1418498780926666e-05,
|
|
"loss": 0.1409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.061686377972364426,
|
|
"step": 2380,
|
|
"valid_targets_mean": 5759.4,
|
|
"valid_targets_min": 3046
|
|
},
|
|
{
|
|
"epoch": 2.638627559490869,
|
|
"grad_norm": 0.16747105085539926,
|
|
"learning_rate": 3.1373165594319784e-05,
|
|
"loss": 0.1416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06588593870401382,
|
|
"step": 2385,
|
|
"valid_targets_mean": 5577.3,
|
|
"valid_targets_min": 2091
|
|
},
|
|
{
|
|
"epoch": 2.6441615938018814,
|
|
"grad_norm": 0.17286141433563584,
|
|
"learning_rate": 3.132774588435128e-05,
|
|
"loss": 0.1416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07492373883724213,
|
|
"step": 2390,
|
|
"valid_targets_mean": 5280.2,
|
|
"valid_targets_min": 2551
|
|
},
|
|
{
|
|
"epoch": 2.6496956281128945,
|
|
"grad_norm": 0.17616906402572663,
|
|
"learning_rate": 3.1282239996559575e-05,
|
|
"loss": 0.1407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07154124230146408,
|
|
"step": 2395,
|
|
"valid_targets_mean": 5727.3,
|
|
"valid_targets_min": 2530
|
|
},
|
|
{
|
|
"epoch": 2.655229662423907,
|
|
"grad_norm": 0.17794751292359126,
|
|
"learning_rate": 3.123664827713875e-05,
|
|
"loss": 0.1363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06947065144777298,
|
|
"step": 2400,
|
|
"valid_targets_mean": 5774.6,
|
|
"valid_targets_min": 1202
|
|
},
|
|
{
|
|
"epoch": 2.6607636967349197,
|
|
"grad_norm": 0.17260761975203412,
|
|
"learning_rate": 3.119097107293583e-05,
|
|
"loss": 0.1439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06682231277227402,
|
|
"step": 2405,
|
|
"valid_targets_mean": 5240.7,
|
|
"valid_targets_min": 1928
|
|
},
|
|
{
|
|
"epoch": 2.6662977310459324,
|
|
"grad_norm": 0.165118957251718,
|
|
"learning_rate": 3.114520873144821e-05,
|
|
"loss": 0.1393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06677956134080887,
|
|
"step": 2410,
|
|
"valid_targets_mean": 5512.1,
|
|
"valid_targets_min": 2321
|
|
},
|
|
{
|
|
"epoch": 2.671831765356945,
|
|
"grad_norm": 0.15925324907477573,
|
|
"learning_rate": 3.109936160082096e-05,
|
|
"loss": 0.1395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06604073196649551,
|
|
"step": 2415,
|
|
"valid_targets_mean": 5468.1,
|
|
"valid_targets_min": 2393
|
|
},
|
|
{
|
|
"epoch": 2.677365799667958,
|
|
"grad_norm": 0.1740316680183045,
|
|
"learning_rate": 3.1053430029844215e-05,
|
|
"loss": 0.1412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0712900385260582,
|
|
"step": 2420,
|
|
"valid_targets_mean": 5260.7,
|
|
"valid_targets_min": 2063
|
|
},
|
|
{
|
|
"epoch": 2.6828998339789707,
|
|
"grad_norm": 0.19632240056281522,
|
|
"learning_rate": 3.1007414367950484e-05,
|
|
"loss": 0.14,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0738878846168518,
|
|
"step": 2425,
|
|
"valid_targets_mean": 5720.1,
|
|
"valid_targets_min": 2402
|
|
},
|
|
{
|
|
"epoch": 2.6884338682899833,
|
|
"grad_norm": 0.18012558106553866,
|
|
"learning_rate": 3.096131496521203e-05,
|
|
"loss": 0.1375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07787241786718369,
|
|
"step": 2430,
|
|
"valid_targets_mean": 5954.4,
|
|
"valid_targets_min": 2397
|
|
},
|
|
{
|
|
"epoch": 2.6939679026009964,
|
|
"grad_norm": 0.17581719862803866,
|
|
"learning_rate": 3.091513217233819e-05,
|
|
"loss": 0.1367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07507916539907455,
|
|
"step": 2435,
|
|
"valid_targets_mean": 5727.8,
|
|
"valid_targets_min": 1876
|
|
},
|
|
{
|
|
"epoch": 2.699501936912009,
|
|
"grad_norm": 0.17347837639945535,
|
|
"learning_rate": 3.086886634067269e-05,
|
|
"loss": 0.1354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06442199647426605,
|
|
"step": 2440,
|
|
"valid_targets_mean": 5569.0,
|
|
"valid_targets_min": 1965
|
|
},
|
|
{
|
|
"epoch": 2.7050359712230216,
|
|
"grad_norm": 0.16489782674925513,
|
|
"learning_rate": 3.0822517822191e-05,
|
|
"loss": 0.1306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06415524333715439,
|
|
"step": 2445,
|
|
"valid_targets_mean": 5661.8,
|
|
"valid_targets_min": 2471
|
|
},
|
|
{
|
|
"epoch": 2.7105700055340343,
|
|
"grad_norm": 0.16368842284606797,
|
|
"learning_rate": 3.077608696949765e-05,
|
|
"loss": 0.1356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06764273345470428,
|
|
"step": 2450,
|
|
"valid_targets_mean": 5622.1,
|
|
"valid_targets_min": 2291
|
|
},
|
|
{
|
|
"epoch": 2.716104039845047,
|
|
"grad_norm": 0.18330837065767697,
|
|
"learning_rate": 3.072957413582353e-05,
|
|
"loss": 0.1394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07820945233106613,
|
|
"step": 2455,
|
|
"valid_targets_mean": 5615.0,
|
|
"valid_targets_min": 1872
|
|
},
|
|
{
|
|
"epoch": 2.7216380741560595,
|
|
"grad_norm": 0.18111367512510274,
|
|
"learning_rate": 3.068297967502321e-05,
|
|
"loss": 0.1397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06971076875925064,
|
|
"step": 2460,
|
|
"valid_targets_mean": 5620.1,
|
|
"valid_targets_min": 2509
|
|
},
|
|
{
|
|
"epoch": 2.7271721084670726,
|
|
"grad_norm": 0.181017231669242,
|
|
"learning_rate": 3.063630394157228e-05,
|
|
"loss": 0.1377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06892272084951401,
|
|
"step": 2465,
|
|
"valid_targets_mean": 5791.0,
|
|
"valid_targets_min": 2652
|
|
},
|
|
{
|
|
"epoch": 2.732706142778085,
|
|
"grad_norm": 0.16873712291586646,
|
|
"learning_rate": 3.05895472905646e-05,
|
|
"loss": 0.1307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06001288816332817,
|
|
"step": 2470,
|
|
"valid_targets_mean": 5198.1,
|
|
"valid_targets_min": 2440
|
|
},
|
|
{
|
|
"epoch": 2.738240177089098,
|
|
"grad_norm": 0.1868490278778886,
|
|
"learning_rate": 3.054271007770963e-05,
|
|
"loss": 0.1425,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06651145964860916,
|
|
"step": 2475,
|
|
"valid_targets_mean": 5649.1,
|
|
"valid_targets_min": 2077
|
|
},
|
|
{
|
|
"epoch": 2.743774211400111,
|
|
"grad_norm": 0.17000751161197922,
|
|
"learning_rate": 3.0495792659329723e-05,
|
|
"loss": 0.1369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.071280837059021,
|
|
"step": 2480,
|
|
"valid_targets_mean": 5574.3,
|
|
"valid_targets_min": 2057
|
|
},
|
|
{
|
|
"epoch": 2.7493082457111235,
|
|
"grad_norm": 0.1771028695622922,
|
|
"learning_rate": 3.0448795392357406e-05,
|
|
"loss": 0.1368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07716403156518936,
|
|
"step": 2485,
|
|
"valid_targets_mean": 5628.9,
|
|
"valid_targets_min": 2814
|
|
},
|
|
{
|
|
"epoch": 2.754842280022136,
|
|
"grad_norm": 0.28237982686418645,
|
|
"learning_rate": 3.0401718634332674e-05,
|
|
"loss": 0.1574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10520079731941223,
|
|
"step": 2490,
|
|
"valid_targets_mean": 4304.0,
|
|
"valid_targets_min": 1999
|
|
},
|
|
{
|
|
"epoch": 2.760376314333149,
|
|
"grad_norm": 0.3327083760173073,
|
|
"learning_rate": 3.0354562743400264e-05,
|
|
"loss": 0.2156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10733775049448013,
|
|
"step": 2495,
|
|
"valid_targets_mean": 4515.2,
|
|
"valid_targets_min": 2164
|
|
},
|
|
{
|
|
"epoch": 2.7659103486441614,
|
|
"grad_norm": 0.31049768057667776,
|
|
"learning_rate": 3.0307328078306916e-05,
|
|
"loss": 0.2067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11256422847509384,
|
|
"step": 2500,
|
|
"valid_targets_mean": 4470.0,
|
|
"valid_targets_min": 1842
|
|
},
|
|
{
|
|
"epoch": 2.7714443829551745,
|
|
"grad_norm": 0.2809847389542365,
|
|
"learning_rate": 3.026001499839868e-05,
|
|
"loss": 0.2084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10991676896810532,
|
|
"step": 2505,
|
|
"valid_targets_mean": 4624.8,
|
|
"valid_targets_min": 2330
|
|
},
|
|
{
|
|
"epoch": 2.776978417266187,
|
|
"grad_norm": 0.2670407115330399,
|
|
"learning_rate": 3.021262386361814e-05,
|
|
"loss": 0.2106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10309511423110962,
|
|
"step": 2510,
|
|
"valid_targets_mean": 4104.8,
|
|
"valid_targets_min": 1757
|
|
},
|
|
{
|
|
"epoch": 2.7825124515771997,
|
|
"grad_norm": 0.2487675603371622,
|
|
"learning_rate": 3.0165155034501716e-05,
|
|
"loss": 0.2045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10134413838386536,
|
|
"step": 2515,
|
|
"valid_targets_mean": 4168.2,
|
|
"valid_targets_min": 2144
|
|
},
|
|
{
|
|
"epoch": 2.7880464858882124,
|
|
"grad_norm": 0.385750753122307,
|
|
"learning_rate": 3.0117608872176886e-05,
|
|
"loss": 0.1966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08835925906896591,
|
|
"step": 2520,
|
|
"valid_targets_mean": 7307.1,
|
|
"valid_targets_min": 1241
|
|
},
|
|
{
|
|
"epoch": 2.7935805201992254,
|
|
"grad_norm": 0.2972093207639648,
|
|
"learning_rate": 3.0069985738359454e-05,
|
|
"loss": 0.187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09143689274787903,
|
|
"step": 2525,
|
|
"valid_targets_mean": 6482.4,
|
|
"valid_targets_min": 1234
|
|
},
|
|
{
|
|
"epoch": 2.799114554510238,
|
|
"grad_norm": 0.29782874213402083,
|
|
"learning_rate": 3.002228599535081e-05,
|
|
"loss": 0.1802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08202945441007614,
|
|
"step": 2530,
|
|
"valid_targets_mean": 6422.1,
|
|
"valid_targets_min": 2269
|
|
},
|
|
{
|
|
"epoch": 2.8046485888212507,
|
|
"grad_norm": 0.25631859635668064,
|
|
"learning_rate": 2.997451000603516e-05,
|
|
"loss": 0.1854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09196306020021439,
|
|
"step": 2535,
|
|
"valid_targets_mean": 8032.0,
|
|
"valid_targets_min": 1561
|
|
},
|
|
{
|
|
"epoch": 2.8101826231322633,
|
|
"grad_norm": 0.22517445589923313,
|
|
"learning_rate": 2.9926658133876753e-05,
|
|
"loss": 0.1924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09404587745666504,
|
|
"step": 2540,
|
|
"valid_targets_mean": 7753.7,
|
|
"valid_targets_min": 997
|
|
},
|
|
{
|
|
"epoch": 2.815716657443276,
|
|
"grad_norm": 0.21994484776162698,
|
|
"learning_rate": 2.987873074291715e-05,
|
|
"loss": 0.1823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08639196306467056,
|
|
"step": 2545,
|
|
"valid_targets_mean": 7592.0,
|
|
"valid_targets_min": 1134
|
|
},
|
|
{
|
|
"epoch": 2.821250691754289,
|
|
"grad_norm": 0.24575217873515323,
|
|
"learning_rate": 2.983072819777241e-05,
|
|
"loss": 0.1819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09172558784484863,
|
|
"step": 2550,
|
|
"valid_targets_mean": 12918.2,
|
|
"valid_targets_min": 1948
|
|
},
|
|
{
|
|
"epoch": 2.8267847260653016,
|
|
"grad_norm": 0.15527163383639894,
|
|
"learning_rate": 2.9782650863630363e-05,
|
|
"loss": 0.1871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09034281969070435,
|
|
"step": 2555,
|
|
"valid_targets_mean": 13792.4,
|
|
"valid_targets_min": 1105
|
|
},
|
|
{
|
|
"epoch": 2.8323187603763142,
|
|
"grad_norm": 0.13533382276779396,
|
|
"learning_rate": 2.9734499106247807e-05,
|
|
"loss": 0.185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08931824564933777,
|
|
"step": 2560,
|
|
"valid_targets_mean": 13438.8,
|
|
"valid_targets_min": 1167
|
|
},
|
|
{
|
|
"epoch": 2.8378527946873273,
|
|
"grad_norm": 0.13404710125492317,
|
|
"learning_rate": 2.968627329194771e-05,
|
|
"loss": 0.1805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09555711597204208,
|
|
"step": 2565,
|
|
"valid_targets_mean": 13485.9,
|
|
"valid_targets_min": 3165
|
|
},
|
|
{
|
|
"epoch": 2.84338682899834,
|
|
"grad_norm": 0.20197899539890302,
|
|
"learning_rate": 2.963797378761647e-05,
|
|
"loss": 0.1751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08223999291658401,
|
|
"step": 2570,
|
|
"valid_targets_mean": 6748.4,
|
|
"valid_targets_min": 1326
|
|
},
|
|
{
|
|
"epoch": 2.8489208633093526,
|
|
"grad_norm": 0.22908324980878794,
|
|
"learning_rate": 2.958960096070106e-05,
|
|
"loss": 0.1663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08211114257574081,
|
|
"step": 2575,
|
|
"valid_targets_mean": 7122.6,
|
|
"valid_targets_min": 802
|
|
},
|
|
{
|
|
"epoch": 2.854454897620365,
|
|
"grad_norm": 0.21069092400808864,
|
|
"learning_rate": 2.95411551792063e-05,
|
|
"loss": 0.1618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08094976097345352,
|
|
"step": 2580,
|
|
"valid_targets_mean": 7025.2,
|
|
"valid_targets_min": 974
|
|
},
|
|
{
|
|
"epoch": 2.859988931931378,
|
|
"grad_norm": 0.24247812593799903,
|
|
"learning_rate": 2.9492636811692007e-05,
|
|
"loss": 0.1873,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09617865830659866,
|
|
"step": 2585,
|
|
"valid_targets_mean": 4722.1,
|
|
"valid_targets_min": 1833
|
|
},
|
|
{
|
|
"epoch": 2.8655229662423904,
|
|
"grad_norm": 0.3284985892901276,
|
|
"learning_rate": 2.944404622727021e-05,
|
|
"loss": 0.1949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1026540994644165,
|
|
"step": 2590,
|
|
"valid_targets_mean": 4635.6,
|
|
"valid_targets_min": 2233
|
|
},
|
|
{
|
|
"epoch": 2.8710570005534035,
|
|
"grad_norm": 0.26228909838556336,
|
|
"learning_rate": 2.9395383795602343e-05,
|
|
"loss": 0.1941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08885417133569717,
|
|
"step": 2595,
|
|
"valid_targets_mean": 4973.8,
|
|
"valid_targets_min": 1415
|
|
},
|
|
{
|
|
"epoch": 2.876591034864416,
|
|
"grad_norm": 0.26117458416833533,
|
|
"learning_rate": 2.934664988689643e-05,
|
|
"loss": 0.1994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10508755594491959,
|
|
"step": 2600,
|
|
"valid_targets_mean": 5083.4,
|
|
"valid_targets_min": 1921
|
|
},
|
|
{
|
|
"epoch": 2.8821250691754288,
|
|
"grad_norm": 0.24051633479555667,
|
|
"learning_rate": 2.9297844871904274e-05,
|
|
"loss": 0.2028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09834790229797363,
|
|
"step": 2605,
|
|
"valid_targets_mean": 4291.8,
|
|
"valid_targets_min": 1566
|
|
},
|
|
{
|
|
"epoch": 2.887659103486442,
|
|
"grad_norm": 0.22965236694884916,
|
|
"learning_rate": 2.9248969121918632e-05,
|
|
"loss": 0.2155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10472244024276733,
|
|
"step": 2610,
|
|
"valid_targets_mean": 4530.1,
|
|
"valid_targets_min": 1886
|
|
},
|
|
{
|
|
"epoch": 2.8931931377974545,
|
|
"grad_norm": 0.21364671524876022,
|
|
"learning_rate": 2.9200023008770373e-05,
|
|
"loss": 0.2227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1135094165802002,
|
|
"step": 2615,
|
|
"valid_targets_mean": 5348.9,
|
|
"valid_targets_min": 1919
|
|
},
|
|
{
|
|
"epoch": 2.898727172108467,
|
|
"grad_norm": 0.2209531134133939,
|
|
"learning_rate": 2.915100690482569e-05,
|
|
"loss": 0.222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12207873910665512,
|
|
"step": 2620,
|
|
"valid_targets_mean": 5068.9,
|
|
"valid_targets_min": 1828
|
|
},
|
|
{
|
|
"epoch": 2.9042612064194797,
|
|
"grad_norm": 0.2211608236960134,
|
|
"learning_rate": 2.9101921182983227e-05,
|
|
"loss": 0.2239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09815773367881775,
|
|
"step": 2625,
|
|
"valid_targets_mean": 4750.7,
|
|
"valid_targets_min": 1799
|
|
},
|
|
{
|
|
"epoch": 2.9097952407304923,
|
|
"grad_norm": 0.2499134010821729,
|
|
"learning_rate": 2.905276621667127e-05,
|
|
"loss": 0.2211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10744103044271469,
|
|
"step": 2630,
|
|
"valid_targets_mean": 4307.7,
|
|
"valid_targets_min": 1767
|
|
},
|
|
{
|
|
"epoch": 2.9153292750415054,
|
|
"grad_norm": 0.23090348616357698,
|
|
"learning_rate": 2.9003542379844884e-05,
|
|
"loss": 0.2199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11299965530633926,
|
|
"step": 2635,
|
|
"valid_targets_mean": 5096.7,
|
|
"valid_targets_min": 1742
|
|
},
|
|
{
|
|
"epoch": 2.920863309352518,
|
|
"grad_norm": 0.2320420684470363,
|
|
"learning_rate": 2.8954250046983073e-05,
|
|
"loss": 0.2207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10773200541734695,
|
|
"step": 2640,
|
|
"valid_targets_mean": 4752.5,
|
|
"valid_targets_min": 1336
|
|
},
|
|
{
|
|
"epoch": 2.9263973436635307,
|
|
"grad_norm": 0.22083962294312315,
|
|
"learning_rate": 2.890488959308596e-05,
|
|
"loss": 0.2166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1107417568564415,
|
|
"step": 2645,
|
|
"valid_targets_mean": 4916.1,
|
|
"valid_targets_min": 1577
|
|
},
|
|
{
|
|
"epoch": 2.9319313779745433,
|
|
"grad_norm": 0.22091590603380928,
|
|
"learning_rate": 2.8855461393671896e-05,
|
|
"loss": 0.2218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10437620431184769,
|
|
"step": 2650,
|
|
"valid_targets_mean": 5456.0,
|
|
"valid_targets_min": 1907
|
|
},
|
|
{
|
|
"epoch": 2.9374654122855564,
|
|
"grad_norm": 0.24532483951310813,
|
|
"learning_rate": 2.8805965824774615e-05,
|
|
"loss": 0.2206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10593270510435104,
|
|
"step": 2655,
|
|
"valid_targets_mean": 5207.6,
|
|
"valid_targets_min": 1726
|
|
},
|
|
{
|
|
"epoch": 2.942999446596569,
|
|
"grad_norm": 0.27364048448227346,
|
|
"learning_rate": 2.8756403262940375e-05,
|
|
"loss": 0.2194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10775301605463028,
|
|
"step": 2660,
|
|
"valid_targets_mean": 5106.3,
|
|
"valid_targets_min": 2259
|
|
},
|
|
{
|
|
"epoch": 2.9485334809075816,
|
|
"grad_norm": 0.2920537004724869,
|
|
"learning_rate": 2.8706774085225114e-05,
|
|
"loss": 0.2152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09780740737915039,
|
|
"step": 2665,
|
|
"valid_targets_mean": 4084.7,
|
|
"valid_targets_min": 1191
|
|
},
|
|
{
|
|
"epoch": 2.9540675152185942,
|
|
"grad_norm": 0.27117239277014693,
|
|
"learning_rate": 2.865707866919154e-05,
|
|
"loss": 0.1994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09460770338773727,
|
|
"step": 2670,
|
|
"valid_targets_mean": 4309.1,
|
|
"valid_targets_min": 1672
|
|
},
|
|
{
|
|
"epoch": 2.959601549529607,
|
|
"grad_norm": 0.2533093955054154,
|
|
"learning_rate": 2.8607317392906285e-05,
|
|
"loss": 0.1983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09425908327102661,
|
|
"step": 2675,
|
|
"valid_targets_mean": 4199.7,
|
|
"valid_targets_min": 1743
|
|
},
|
|
{
|
|
"epoch": 2.96513558384062,
|
|
"grad_norm": 0.2544597366637726,
|
|
"learning_rate": 2.8557490634937027e-05,
|
|
"loss": 0.1976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09826552867889404,
|
|
"step": 2680,
|
|
"valid_targets_mean": 3982.9,
|
|
"valid_targets_min": 1242
|
|
},
|
|
{
|
|
"epoch": 2.9706696181516326,
|
|
"grad_norm": 0.2270008550810053,
|
|
"learning_rate": 2.850759877434962e-05,
|
|
"loss": 0.2004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08753231912851334,
|
|
"step": 2685,
|
|
"valid_targets_mean": 4210.3,
|
|
"valid_targets_min": 2144
|
|
},
|
|
{
|
|
"epoch": 2.976203652462645,
|
|
"grad_norm": 0.23023731050285826,
|
|
"learning_rate": 2.845764219070518e-05,
|
|
"loss": 0.1992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09693797677755356,
|
|
"step": 2690,
|
|
"valid_targets_mean": 4198.8,
|
|
"valid_targets_min": 1335
|
|
},
|
|
{
|
|
"epoch": 2.9817376867736582,
|
|
"grad_norm": 0.24052515348826364,
|
|
"learning_rate": 2.8407621264057222e-05,
|
|
"loss": 0.2032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09210985898971558,
|
|
"step": 2695,
|
|
"valid_targets_mean": 4010.5,
|
|
"valid_targets_min": 1501
|
|
},
|
|
{
|
|
"epoch": 2.987271721084671,
|
|
"grad_norm": 0.22041660826317713,
|
|
"learning_rate": 2.8357536374948752e-05,
|
|
"loss": 0.2023,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10018914192914963,
|
|
"step": 2700,
|
|
"valid_targets_mean": 4123.1,
|
|
"valid_targets_min": 2004
|
|
},
|
|
{
|
|
"epoch": 2.9928057553956835,
|
|
"grad_norm": 0.22308832266909892,
|
|
"learning_rate": 2.8307387904409408e-05,
|
|
"loss": 0.1996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1004939153790474,
|
|
"step": 2705,
|
|
"valid_targets_mean": 4315.6,
|
|
"valid_targets_min": 2203
|
|
},
|
|
{
|
|
"epoch": 2.998339789706696,
|
|
"grad_norm": 0.2267894178719489,
|
|
"learning_rate": 2.8257176233952508e-05,
|
|
"loss": 0.1991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09376098960638046,
|
|
"step": 2710,
|
|
"valid_targets_mean": 4404.2,
|
|
"valid_targets_min": 1768
|
|
},
|
|
{
|
|
"epoch": 3.0033204205866078,
|
|
"grad_norm": 0.3260552849333209,
|
|
"learning_rate": 2.8206901745572182e-05,
|
|
"loss": 0.1877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12010964751243591,
|
|
"step": 2715,
|
|
"valid_targets_mean": 7708.5,
|
|
"valid_targets_min": 2114
|
|
},
|
|
{
|
|
"epoch": 3.0088544548976204,
|
|
"grad_norm": 0.2488611435270132,
|
|
"learning_rate": 2.8156564821740464e-05,
|
|
"loss": 0.2279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11786027997732162,
|
|
"step": 2720,
|
|
"valid_targets_mean": 7049.1,
|
|
"valid_targets_min": 1582
|
|
},
|
|
{
|
|
"epoch": 3.014388489208633,
|
|
"grad_norm": 0.1921083844286432,
|
|
"learning_rate": 2.8106165845404367e-05,
|
|
"loss": 0.2085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10925697535276413,
|
|
"step": 2725,
|
|
"valid_targets_mean": 6744.9,
|
|
"valid_targets_min": 2415
|
|
},
|
|
{
|
|
"epoch": 3.0199225235196456,
|
|
"grad_norm": 0.1821419754608182,
|
|
"learning_rate": 2.8055705199982995e-05,
|
|
"loss": 0.2082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11892497539520264,
|
|
"step": 2730,
|
|
"valid_targets_mean": 7223.4,
|
|
"valid_targets_min": 1917
|
|
},
|
|
{
|
|
"epoch": 3.0254565578306587,
|
|
"grad_norm": 0.15693484811819064,
|
|
"learning_rate": 2.8005183269364603e-05,
|
|
"loss": 0.2119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10714951902627945,
|
|
"step": 2735,
|
|
"valid_targets_mean": 6793.6,
|
|
"valid_targets_min": 2699
|
|
},
|
|
{
|
|
"epoch": 3.0309905921416713,
|
|
"grad_norm": 0.16709176637637066,
|
|
"learning_rate": 2.795460043790367e-05,
|
|
"loss": 0.1978,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08674667030572891,
|
|
"step": 2740,
|
|
"valid_targets_mean": 6319.4,
|
|
"valid_targets_min": 2331
|
|
},
|
|
{
|
|
"epoch": 3.036524626452684,
|
|
"grad_norm": 0.16398990842948868,
|
|
"learning_rate": 2.7903957090418007e-05,
|
|
"loss": 0.2032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09815031290054321,
|
|
"step": 2745,
|
|
"valid_targets_mean": 6464.3,
|
|
"valid_targets_min": 2301
|
|
},
|
|
{
|
|
"epoch": 3.0420586607636966,
|
|
"grad_norm": 0.1738792299814764,
|
|
"learning_rate": 2.785325361218581e-05,
|
|
"loss": 0.2031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08038411289453506,
|
|
"step": 2750,
|
|
"valid_targets_mean": 5905.9,
|
|
"valid_targets_min": 1789
|
|
},
|
|
{
|
|
"epoch": 3.0475926950747096,
|
|
"grad_norm": 0.1804894694966061,
|
|
"learning_rate": 2.780249038894272e-05,
|
|
"loss": 0.1976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0968136116862297,
|
|
"step": 2755,
|
|
"valid_targets_mean": 6846.8,
|
|
"valid_targets_min": 2291
|
|
},
|
|
{
|
|
"epoch": 3.0531267293857223,
|
|
"grad_norm": 0.16073218176284065,
|
|
"learning_rate": 2.77516678068789e-05,
|
|
"loss": 0.1931,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10354902595281601,
|
|
"step": 2760,
|
|
"valid_targets_mean": 6919.4,
|
|
"valid_targets_min": 1723
|
|
},
|
|
{
|
|
"epoch": 3.058660763696735,
|
|
"grad_norm": 0.18868948921708006,
|
|
"learning_rate": 2.7700786252636107e-05,
|
|
"loss": 0.2028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10971930623054504,
|
|
"step": 2765,
|
|
"valid_targets_mean": 7691.2,
|
|
"valid_targets_min": 2510
|
|
},
|
|
{
|
|
"epoch": 3.0641947980077475,
|
|
"grad_norm": 0.21268751522461696,
|
|
"learning_rate": 2.7649846113304738e-05,
|
|
"loss": 0.1981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1004100814461708,
|
|
"step": 2770,
|
|
"valid_targets_mean": 6916.7,
|
|
"valid_targets_min": 2234
|
|
},
|
|
{
|
|
"epoch": 3.06972883231876,
|
|
"grad_norm": 0.18504147125623674,
|
|
"learning_rate": 2.7598847776420865e-05,
|
|
"loss": 0.2018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10682997852563858,
|
|
"step": 2775,
|
|
"valid_targets_mean": 6807.6,
|
|
"valid_targets_min": 2726
|
|
},
|
|
{
|
|
"epoch": 3.075262866629773,
|
|
"grad_norm": 0.17716671792405636,
|
|
"learning_rate": 2.7547791629963332e-05,
|
|
"loss": 0.1962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09369424730539322,
|
|
"step": 2780,
|
|
"valid_targets_mean": 7006.3,
|
|
"valid_targets_min": 2770
|
|
},
|
|
{
|
|
"epoch": 3.080796900940786,
|
|
"grad_norm": 0.17303029321105318,
|
|
"learning_rate": 2.7496678062350773e-05,
|
|
"loss": 0.1955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10050015896558762,
|
|
"step": 2785,
|
|
"valid_targets_mean": 6757.8,
|
|
"valid_targets_min": 2587
|
|
},
|
|
{
|
|
"epoch": 3.0863309352517985,
|
|
"grad_norm": 0.21149449511005555,
|
|
"learning_rate": 2.744550746243866e-05,
|
|
"loss": 0.1944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09726202487945557,
|
|
"step": 2790,
|
|
"valid_targets_mean": 6627.8,
|
|
"valid_targets_min": 2615
|
|
},
|
|
{
|
|
"epoch": 3.091864969562811,
|
|
"grad_norm": 0.1734481214507415,
|
|
"learning_rate": 2.7394280219516342e-05,
|
|
"loss": 0.2018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08735307306051254,
|
|
"step": 2795,
|
|
"valid_targets_mean": 6780.6,
|
|
"valid_targets_min": 1765
|
|
},
|
|
{
|
|
"epoch": 3.097399003873824,
|
|
"grad_norm": 0.1689887983129223,
|
|
"learning_rate": 2.7342996723304112e-05,
|
|
"loss": 0.1974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1004621684551239,
|
|
"step": 2800,
|
|
"valid_targets_mean": 6800.9,
|
|
"valid_targets_min": 2579
|
|
},
|
|
{
|
|
"epoch": 3.102933038184837,
|
|
"grad_norm": 0.17087147304863046,
|
|
"learning_rate": 2.72916573639502e-05,
|
|
"loss": 0.1958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10274376720190048,
|
|
"step": 2805,
|
|
"valid_targets_mean": 7386.6,
|
|
"valid_targets_min": 2617
|
|
},
|
|
{
|
|
"epoch": 3.1084670724958494,
|
|
"grad_norm": 0.18170372433423485,
|
|
"learning_rate": 2.7240262532027825e-05,
|
|
"loss": 0.1887,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09600893408060074,
|
|
"step": 2810,
|
|
"valid_targets_mean": 7072.9,
|
|
"valid_targets_min": 1681
|
|
},
|
|
{
|
|
"epoch": 3.114001106806862,
|
|
"grad_norm": 0.1902362690377946,
|
|
"learning_rate": 2.7188812618532236e-05,
|
|
"loss": 0.1918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09322325140237808,
|
|
"step": 2815,
|
|
"valid_targets_mean": 6639.1,
|
|
"valid_targets_min": 3065
|
|
},
|
|
{
|
|
"epoch": 3.119535141117875,
|
|
"grad_norm": 0.20059988733734505,
|
|
"learning_rate": 2.713730801487771e-05,
|
|
"loss": 0.1936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09941449016332626,
|
|
"step": 2820,
|
|
"valid_targets_mean": 6858.6,
|
|
"valid_targets_min": 1617
|
|
},
|
|
{
|
|
"epoch": 3.1250691754288877,
|
|
"grad_norm": 0.1851527145531092,
|
|
"learning_rate": 2.7085749112894596e-05,
|
|
"loss": 0.1867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09204109758138657,
|
|
"step": 2825,
|
|
"valid_targets_mean": 7061.5,
|
|
"valid_targets_min": 1644
|
|
},
|
|
{
|
|
"epoch": 3.1306032097399004,
|
|
"grad_norm": 0.19876799779267673,
|
|
"learning_rate": 2.7034136304826342e-05,
|
|
"loss": 0.1919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08583220094442368,
|
|
"step": 2830,
|
|
"valid_targets_mean": 6095.5,
|
|
"valid_targets_min": 1940
|
|
},
|
|
{
|
|
"epoch": 3.136137244050913,
|
|
"grad_norm": 0.21456728555996507,
|
|
"learning_rate": 2.6982469983326464e-05,
|
|
"loss": 0.1892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08667775988578796,
|
|
"step": 2835,
|
|
"valid_targets_mean": 6642.1,
|
|
"valid_targets_min": 3179
|
|
},
|
|
{
|
|
"epoch": 3.141671278361926,
|
|
"grad_norm": 0.1852695398214736,
|
|
"learning_rate": 2.6930750541455616e-05,
|
|
"loss": 0.1915,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09901637583971024,
|
|
"step": 2840,
|
|
"valid_targets_mean": 6965.1,
|
|
"valid_targets_min": 2543
|
|
},
|
|
{
|
|
"epoch": 3.1472053126729387,
|
|
"grad_norm": 0.17489563656293733,
|
|
"learning_rate": 2.6878978372678567e-05,
|
|
"loss": 0.1807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08868839591741562,
|
|
"step": 2845,
|
|
"valid_targets_mean": 7017.0,
|
|
"valid_targets_min": 2142
|
|
},
|
|
{
|
|
"epoch": 3.1527393469839513,
|
|
"grad_norm": 0.20139088817043452,
|
|
"learning_rate": 2.682715387086121e-05,
|
|
"loss": 0.1862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0875069797039032,
|
|
"step": 2850,
|
|
"valid_targets_mean": 6845.5,
|
|
"valid_targets_min": 1710
|
|
},
|
|
{
|
|
"epoch": 3.158273381294964,
|
|
"grad_norm": 0.19119018997986806,
|
|
"learning_rate": 2.677527743026759e-05,
|
|
"loss": 0.1834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09193676710128784,
|
|
"step": 2855,
|
|
"valid_targets_mean": 6665.5,
|
|
"valid_targets_min": 2561
|
|
},
|
|
{
|
|
"epoch": 3.1638074156059766,
|
|
"grad_norm": 0.20580010529432702,
|
|
"learning_rate": 2.6723349445556857e-05,
|
|
"loss": 0.1938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10696002840995789,
|
|
"step": 2860,
|
|
"valid_targets_mean": 7223.8,
|
|
"valid_targets_min": 2785
|
|
},
|
|
{
|
|
"epoch": 3.1693414499169896,
|
|
"grad_norm": 0.1946678998474377,
|
|
"learning_rate": 2.6671370311780303e-05,
|
|
"loss": 0.1801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09646815061569214,
|
|
"step": 2865,
|
|
"valid_targets_mean": 7461.0,
|
|
"valid_targets_min": 2445
|
|
},
|
|
{
|
|
"epoch": 3.1748754842280023,
|
|
"grad_norm": 0.19551830245272903,
|
|
"learning_rate": 2.6619340424378367e-05,
|
|
"loss": 0.1887,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09138414263725281,
|
|
"step": 2870,
|
|
"valid_targets_mean": 6816.0,
|
|
"valid_targets_min": 2527
|
|
},
|
|
{
|
|
"epoch": 3.180409518539015,
|
|
"grad_norm": 0.23049576923409093,
|
|
"learning_rate": 2.656726017917757e-05,
|
|
"loss": 0.1862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09756263345479965,
|
|
"step": 2875,
|
|
"valid_targets_mean": 7153.1,
|
|
"valid_targets_min": 2039
|
|
},
|
|
{
|
|
"epoch": 3.1859435528500275,
|
|
"grad_norm": 0.20343859617581705,
|
|
"learning_rate": 2.651512997238757e-05,
|
|
"loss": 0.1911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10609716176986694,
|
|
"step": 2880,
|
|
"valid_targets_mean": 7173.5,
|
|
"valid_targets_min": 3054
|
|
},
|
|
{
|
|
"epoch": 3.1914775871610406,
|
|
"grad_norm": 0.20071464261230162,
|
|
"learning_rate": 2.6462950200598094e-05,
|
|
"loss": 0.195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09010434150695801,
|
|
"step": 2885,
|
|
"valid_targets_mean": 6941.1,
|
|
"valid_targets_min": 2433
|
|
},
|
|
{
|
|
"epoch": 3.197011621472053,
|
|
"grad_norm": 0.20500500621992115,
|
|
"learning_rate": 2.641072126077595e-05,
|
|
"loss": 0.1826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08181817829608917,
|
|
"step": 2890,
|
|
"valid_targets_mean": 6240.0,
|
|
"valid_targets_min": 2230
|
|
},
|
|
{
|
|
"epoch": 3.202545655783066,
|
|
"grad_norm": 0.19535535133451618,
|
|
"learning_rate": 2.635844355026201e-05,
|
|
"loss": 0.1898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08865565806627274,
|
|
"step": 2895,
|
|
"valid_targets_mean": 7364.3,
|
|
"valid_targets_min": 2192
|
|
},
|
|
{
|
|
"epoch": 3.2080796900940785,
|
|
"grad_norm": 0.1897151141024659,
|
|
"learning_rate": 2.6306117466768167e-05,
|
|
"loss": 0.1891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08853700757026672,
|
|
"step": 2900,
|
|
"valid_targets_mean": 6757.6,
|
|
"valid_targets_min": 1881
|
|
},
|
|
{
|
|
"epoch": 3.213613724405091,
|
|
"grad_norm": 0.20943692032905442,
|
|
"learning_rate": 2.6253743408374313e-05,
|
|
"loss": 0.1924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10196039825677872,
|
|
"step": 2905,
|
|
"valid_targets_mean": 7209.8,
|
|
"valid_targets_min": 1944
|
|
},
|
|
{
|
|
"epoch": 3.219147758716104,
|
|
"grad_norm": 0.1935204265289269,
|
|
"learning_rate": 2.6201321773525338e-05,
|
|
"loss": 0.1827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08931133151054382,
|
|
"step": 2910,
|
|
"valid_targets_mean": 6967.8,
|
|
"valid_targets_min": 2450
|
|
},
|
|
{
|
|
"epoch": 3.2246817930271168,
|
|
"grad_norm": 0.286822677674402,
|
|
"learning_rate": 2.6148852961028066e-05,
|
|
"loss": 0.1902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08451773971319199,
|
|
"step": 2915,
|
|
"valid_targets_mean": 3892.3,
|
|
"valid_targets_min": 1207
|
|
},
|
|
{
|
|
"epoch": 3.2302158273381294,
|
|
"grad_norm": 0.3861579584361899,
|
|
"learning_rate": 2.609633737004824e-05,
|
|
"loss": 0.1913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09060275554656982,
|
|
"step": 2920,
|
|
"valid_targets_mean": 4990.3,
|
|
"valid_targets_min": 1823
|
|
},
|
|
{
|
|
"epoch": 3.235749861649142,
|
|
"grad_norm": 0.3831401215740749,
|
|
"learning_rate": 2.604377540010746e-05,
|
|
"loss": 0.1809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09715189784765244,
|
|
"step": 2925,
|
|
"valid_targets_mean": 4408.2,
|
|
"valid_targets_min": 1915
|
|
},
|
|
{
|
|
"epoch": 3.241283895960155,
|
|
"grad_norm": 0.3632796786868025,
|
|
"learning_rate": 2.5991167451080195e-05,
|
|
"loss": 0.2046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08995544910430908,
|
|
"step": 2930,
|
|
"valid_targets_mean": 4236.2,
|
|
"valid_targets_min": 1432
|
|
},
|
|
{
|
|
"epoch": 3.2468179302711677,
|
|
"grad_norm": 0.3654482389278134,
|
|
"learning_rate": 2.593851392319067e-05,
|
|
"loss": 0.1918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09162074327468872,
|
|
"step": 2935,
|
|
"valid_targets_mean": 3938.6,
|
|
"valid_targets_min": 1433
|
|
},
|
|
{
|
|
"epoch": 3.2523519645821803,
|
|
"grad_norm": 0.331181711978515,
|
|
"learning_rate": 2.588581521700989e-05,
|
|
"loss": 0.201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1132919192314148,
|
|
"step": 2940,
|
|
"valid_targets_mean": 5005.6,
|
|
"valid_targets_min": 2114
|
|
},
|
|
{
|
|
"epoch": 3.257885998893193,
|
|
"grad_norm": 0.2984211657827162,
|
|
"learning_rate": 2.5833071733452543e-05,
|
|
"loss": 0.2079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08910011500120163,
|
|
"step": 2945,
|
|
"valid_targets_mean": 3783.9,
|
|
"valid_targets_min": 1636
|
|
},
|
|
{
|
|
"epoch": 3.263420033204206,
|
|
"grad_norm": 0.3057568807284397,
|
|
"learning_rate": 2.5780283873773975e-05,
|
|
"loss": 0.2074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11115298420190811,
|
|
"step": 2950,
|
|
"valid_targets_mean": 4429.8,
|
|
"valid_targets_min": 1385
|
|
},
|
|
{
|
|
"epoch": 3.2689540675152187,
|
|
"grad_norm": 0.3334752774018342,
|
|
"learning_rate": 2.5727452039567148e-05,
|
|
"loss": 0.21,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09694870561361313,
|
|
"step": 2955,
|
|
"valid_targets_mean": 4083.3,
|
|
"valid_targets_min": 1931
|
|
},
|
|
{
|
|
"epoch": 3.2744881018262313,
|
|
"grad_norm": 0.2987416030856599,
|
|
"learning_rate": 2.5674576632759528e-05,
|
|
"loss": 0.1952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10165077447891235,
|
|
"step": 2960,
|
|
"valid_targets_mean": 4112.9,
|
|
"valid_targets_min": 1850
|
|
},
|
|
{
|
|
"epoch": 3.280022136137244,
|
|
"grad_norm": 0.2820928535868255,
|
|
"learning_rate": 2.5621658055610102e-05,
|
|
"loss": 0.1988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10538693517446518,
|
|
"step": 2965,
|
|
"valid_targets_mean": 4394.1,
|
|
"valid_targets_min": 1563
|
|
},
|
|
{
|
|
"epoch": 3.285556170448257,
|
|
"grad_norm": 0.2928257056247332,
|
|
"learning_rate": 2.5568696710706275e-05,
|
|
"loss": 0.2099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10181987285614014,
|
|
"step": 2970,
|
|
"valid_targets_mean": 4841.0,
|
|
"valid_targets_min": 1692
|
|
},
|
|
{
|
|
"epoch": 3.2910902047592696,
|
|
"grad_norm": 0.2702920625567065,
|
|
"learning_rate": 2.5515693000960804e-05,
|
|
"loss": 0.2032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.088201604783535,
|
|
"step": 2975,
|
|
"valid_targets_mean": 3999.7,
|
|
"valid_targets_min": 1273
|
|
},
|
|
{
|
|
"epoch": 3.2966242390702822,
|
|
"grad_norm": 0.2787562271224257,
|
|
"learning_rate": 2.546264732960876e-05,
|
|
"loss": 0.1997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10615724325180054,
|
|
"step": 2980,
|
|
"valid_targets_mean": 4681.2,
|
|
"valid_targets_min": 1604
|
|
},
|
|
{
|
|
"epoch": 3.302158273381295,
|
|
"grad_norm": 0.2910523675114332,
|
|
"learning_rate": 2.540956010020443e-05,
|
|
"loss": 0.1935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10740578174591064,
|
|
"step": 2985,
|
|
"valid_targets_mean": 4388.4,
|
|
"valid_targets_min": 1227
|
|
},
|
|
{
|
|
"epoch": 3.3076923076923075,
|
|
"grad_norm": 0.2842663902466955,
|
|
"learning_rate": 2.535643171661827e-05,
|
|
"loss": 0.1881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08907082676887512,
|
|
"step": 2990,
|
|
"valid_targets_mean": 3892.8,
|
|
"valid_targets_min": 1207
|
|
},
|
|
{
|
|
"epoch": 3.3132263420033206,
|
|
"grad_norm": 0.2617368302587257,
|
|
"learning_rate": 2.530326258303383e-05,
|
|
"loss": 0.2038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1079702377319336,
|
|
"step": 2995,
|
|
"valid_targets_mean": 4398.5,
|
|
"valid_targets_min": 2229
|
|
},
|
|
{
|
|
"epoch": 3.318760376314333,
|
|
"grad_norm": 0.2786891448668176,
|
|
"learning_rate": 2.5250053103944653e-05,
|
|
"loss": 0.1956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09582444280385971,
|
|
"step": 3000,
|
|
"valid_targets_mean": 4250.3,
|
|
"valid_targets_min": 1806
|
|
},
|
|
{
|
|
"epoch": 3.324294410625346,
|
|
"grad_norm": 0.2941018152542811,
|
|
"learning_rate": 2.5196803684151223e-05,
|
|
"loss": 0.1906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08968652039766312,
|
|
"step": 3005,
|
|
"valid_targets_mean": 4232.0,
|
|
"valid_targets_min": 1263
|
|
},
|
|
{
|
|
"epoch": 3.3298284449363584,
|
|
"grad_norm": 0.28117186217774265,
|
|
"learning_rate": 2.5143514728757904e-05,
|
|
"loss": 0.1954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11329822987318039,
|
|
"step": 3010,
|
|
"valid_targets_mean": 5043.6,
|
|
"valid_targets_min": 1841
|
|
},
|
|
{
|
|
"epoch": 3.3353624792473715,
|
|
"grad_norm": 0.2802942381724536,
|
|
"learning_rate": 2.5090186643169804e-05,
|
|
"loss": 0.207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09427406638860703,
|
|
"step": 3015,
|
|
"valid_targets_mean": 3982.5,
|
|
"valid_targets_min": 1785
|
|
},
|
|
{
|
|
"epoch": 3.340896513558384,
|
|
"grad_norm": 0.32475933644846067,
|
|
"learning_rate": 2.5036819833089734e-05,
|
|
"loss": 0.203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1276949793100357,
|
|
"step": 3020,
|
|
"valid_targets_mean": 4740.7,
|
|
"valid_targets_min": 1552
|
|
},
|
|
{
|
|
"epoch": 3.3464305478693968,
|
|
"grad_norm": 0.32006737602777374,
|
|
"learning_rate": 2.4983414704515104e-05,
|
|
"loss": 0.1898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09326666593551636,
|
|
"step": 3025,
|
|
"valid_targets_mean": 4246.9,
|
|
"valid_targets_min": 1659
|
|
},
|
|
{
|
|
"epoch": 3.3519645821804094,
|
|
"grad_norm": 0.27630109677520476,
|
|
"learning_rate": 2.492997166373485e-05,
|
|
"loss": 0.2018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09285572916269302,
|
|
"step": 3030,
|
|
"valid_targets_mean": 4407.0,
|
|
"valid_targets_min": 1384
|
|
},
|
|
{
|
|
"epoch": 3.357498616491422,
|
|
"grad_norm": 0.28240313121848504,
|
|
"learning_rate": 2.4876491117326315e-05,
|
|
"loss": 0.1941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09093517065048218,
|
|
"step": 3035,
|
|
"valid_targets_mean": 3929.4,
|
|
"valid_targets_min": 1212
|
|
},
|
|
{
|
|
"epoch": 3.363032650802435,
|
|
"grad_norm": 0.31520104902216356,
|
|
"learning_rate": 2.482297347215219e-05,
|
|
"loss": 0.1938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10813164710998535,
|
|
"step": 3040,
|
|
"valid_targets_mean": 4758.0,
|
|
"valid_targets_min": 1634
|
|
},
|
|
{
|
|
"epoch": 3.3685666851134477,
|
|
"grad_norm": 0.290618673534816,
|
|
"learning_rate": 2.476941913535738e-05,
|
|
"loss": 0.1921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08990909904241562,
|
|
"step": 3045,
|
|
"valid_targets_mean": 4235.8,
|
|
"valid_targets_min": 1303
|
|
},
|
|
{
|
|
"epoch": 3.3741007194244603,
|
|
"grad_norm": 0.275550505368689,
|
|
"learning_rate": 2.4715828514365953e-05,
|
|
"loss": 0.1961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09693226963281631,
|
|
"step": 3050,
|
|
"valid_targets_mean": 4851.0,
|
|
"valid_targets_min": 1845
|
|
},
|
|
{
|
|
"epoch": 3.3796347537354734,
|
|
"grad_norm": 0.2963432180668484,
|
|
"learning_rate": 2.4662202016878e-05,
|
|
"loss": 0.1879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08788219839334488,
|
|
"step": 3055,
|
|
"valid_targets_mean": 3943.5,
|
|
"valid_targets_min": 1444
|
|
},
|
|
{
|
|
"epoch": 3.385168788046486,
|
|
"grad_norm": 0.29499952415638026,
|
|
"learning_rate": 2.4608540050866544e-05,
|
|
"loss": 0.1839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09814119338989258,
|
|
"step": 3060,
|
|
"valid_targets_mean": 4412.8,
|
|
"valid_targets_min": 1966
|
|
},
|
|
{
|
|
"epoch": 3.3907028223574986,
|
|
"grad_norm": 0.3042118001971243,
|
|
"learning_rate": 2.4554843024574454e-05,
|
|
"loss": 0.195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09488651901483536,
|
|
"step": 3065,
|
|
"valid_targets_mean": 4550.5,
|
|
"valid_targets_min": 1194
|
|
},
|
|
{
|
|
"epoch": 3.3962368566685113,
|
|
"grad_norm": 0.2939692802430235,
|
|
"learning_rate": 2.450111134651132e-05,
|
|
"loss": 0.1925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10834595561027527,
|
|
"step": 3070,
|
|
"valid_targets_mean": 4816.4,
|
|
"valid_targets_min": 1586
|
|
},
|
|
{
|
|
"epoch": 3.401770890979524,
|
|
"grad_norm": 0.2857820932330975,
|
|
"learning_rate": 2.4447345425450353e-05,
|
|
"loss": 0.1908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09045431762933731,
|
|
"step": 3075,
|
|
"valid_targets_mean": 4299.6,
|
|
"valid_targets_min": 1866
|
|
},
|
|
{
|
|
"epoch": 3.407304925290537,
|
|
"grad_norm": 0.3019933684662385,
|
|
"learning_rate": 2.4393545670425276e-05,
|
|
"loss": 0.1822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0811840146780014,
|
|
"step": 3080,
|
|
"valid_targets_mean": 3712.6,
|
|
"valid_targets_min": 1675
|
|
},
|
|
{
|
|
"epoch": 3.4128389596015496,
|
|
"grad_norm": 0.30329649427969957,
|
|
"learning_rate": 2.43397124907272e-05,
|
|
"loss": 0.1901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0953529104590416,
|
|
"step": 3085,
|
|
"valid_targets_mean": 4404.2,
|
|
"valid_targets_min": 1801
|
|
},
|
|
{
|
|
"epoch": 3.418372993912562,
|
|
"grad_norm": 0.3100850324511087,
|
|
"learning_rate": 2.4285846295901537e-05,
|
|
"loss": 0.1841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09615548700094223,
|
|
"step": 3090,
|
|
"valid_targets_mean": 4216.7,
|
|
"valid_targets_min": 1385
|
|
},
|
|
{
|
|
"epoch": 3.423907028223575,
|
|
"grad_norm": 0.306563388540784,
|
|
"learning_rate": 2.423194749574485e-05,
|
|
"loss": 0.1763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08508458733558655,
|
|
"step": 3095,
|
|
"valid_targets_mean": 4448.0,
|
|
"valid_targets_min": 1434
|
|
},
|
|
{
|
|
"epoch": 3.429441062534588,
|
|
"grad_norm": 0.3297129530561979,
|
|
"learning_rate": 2.4178016500301757e-05,
|
|
"loss": 0.1761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09123352915048599,
|
|
"step": 3100,
|
|
"valid_targets_mean": 4399.5,
|
|
"valid_targets_min": 1633
|
|
},
|
|
{
|
|
"epoch": 3.4349750968456005,
|
|
"grad_norm": 0.2956509424732023,
|
|
"learning_rate": 2.412405371986182e-05,
|
|
"loss": 0.1905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09261216968297958,
|
|
"step": 3105,
|
|
"valid_targets_mean": 5115.6,
|
|
"valid_targets_min": 1868
|
|
},
|
|
{
|
|
"epoch": 3.440509131156613,
|
|
"grad_norm": 0.32424268893973734,
|
|
"learning_rate": 2.4070059564956393e-05,
|
|
"loss": 0.1786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09287405014038086,
|
|
"step": 3110,
|
|
"valid_targets_mean": 4610.4,
|
|
"valid_targets_min": 1423
|
|
},
|
|
{
|
|
"epoch": 3.446043165467626,
|
|
"grad_norm": 0.3177883403955838,
|
|
"learning_rate": 2.4016034446355533e-05,
|
|
"loss": 0.1833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08633139729499817,
|
|
"step": 3115,
|
|
"valid_targets_mean": 4042.8,
|
|
"valid_targets_min": 1613
|
|
},
|
|
{
|
|
"epoch": 3.4515771997786384,
|
|
"grad_norm": 0.2943065081811316,
|
|
"learning_rate": 2.396197877506484e-05,
|
|
"loss": 0.189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09375669807195663,
|
|
"step": 3120,
|
|
"valid_targets_mean": 4377.2,
|
|
"valid_targets_min": 1484
|
|
},
|
|
{
|
|
"epoch": 3.4571112340896515,
|
|
"grad_norm": 0.2642397180732357,
|
|
"learning_rate": 2.390789296232237e-05,
|
|
"loss": 0.1909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11398092657327652,
|
|
"step": 3125,
|
|
"valid_targets_mean": 5438.3,
|
|
"valid_targets_min": 1560
|
|
},
|
|
{
|
|
"epoch": 3.462645268400664,
|
|
"grad_norm": 0.30534761153482814,
|
|
"learning_rate": 2.3853777419595476e-05,
|
|
"loss": 0.188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09357643872499466,
|
|
"step": 3130,
|
|
"valid_targets_mean": 4372.1,
|
|
"valid_targets_min": 2038
|
|
},
|
|
{
|
|
"epoch": 3.4681793027116767,
|
|
"grad_norm": 0.27270808124872176,
|
|
"learning_rate": 2.3799632558577675e-05,
|
|
"loss": 0.1772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07442276924848557,
|
|
"step": 3135,
|
|
"valid_targets_mean": 5855.8,
|
|
"valid_targets_min": 1883
|
|
},
|
|
{
|
|
"epoch": 3.4737133370226894,
|
|
"grad_norm": 0.24153688662366368,
|
|
"learning_rate": 2.3745458791185554e-05,
|
|
"loss": 0.1375,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07261274009943008,
|
|
"step": 3140,
|
|
"valid_targets_mean": 5574.1,
|
|
"valid_targets_min": 2410
|
|
},
|
|
{
|
|
"epoch": 3.4792473713337024,
|
|
"grad_norm": 0.17721777678165035,
|
|
"learning_rate": 2.3691256529555587e-05,
|
|
"loss": 0.1399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06744299083948135,
|
|
"step": 3145,
|
|
"valid_targets_mean": 5714.9,
|
|
"valid_targets_min": 1867
|
|
},
|
|
{
|
|
"epoch": 3.484781405644715,
|
|
"grad_norm": 0.17939390430450763,
|
|
"learning_rate": 2.3637026186041028e-05,
|
|
"loss": 0.1339,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06565037369728088,
|
|
"step": 3150,
|
|
"valid_targets_mean": 5655.9,
|
|
"valid_targets_min": 2408
|
|
},
|
|
{
|
|
"epoch": 3.4903154399557277,
|
|
"grad_norm": 0.16869028724210566,
|
|
"learning_rate": 2.3582768173208788e-05,
|
|
"loss": 0.1318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05937578156590462,
|
|
"step": 3155,
|
|
"valid_targets_mean": 5437.7,
|
|
"valid_targets_min": 2380
|
|
},
|
|
{
|
|
"epoch": 3.4958494742667403,
|
|
"grad_norm": 0.17528165871270113,
|
|
"learning_rate": 2.3528482903836248e-05,
|
|
"loss": 0.134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06441015750169754,
|
|
"step": 3160,
|
|
"valid_targets_mean": 5273.7,
|
|
"valid_targets_min": 2383
|
|
},
|
|
{
|
|
"epoch": 3.501383508577753,
|
|
"grad_norm": 0.16460285910916572,
|
|
"learning_rate": 2.3474170790908166e-05,
|
|
"loss": 0.1277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06017139554023743,
|
|
"step": 3165,
|
|
"valid_targets_mean": 5861.0,
|
|
"valid_targets_min": 2743
|
|
},
|
|
{
|
|
"epoch": 3.506917542888766,
|
|
"grad_norm": 0.17600501488293324,
|
|
"learning_rate": 2.3419832247613514e-05,
|
|
"loss": 0.1231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06484571099281311,
|
|
"step": 3170,
|
|
"valid_targets_mean": 5160.7,
|
|
"valid_targets_min": 1693
|
|
},
|
|
{
|
|
"epoch": 3.5124515771997786,
|
|
"grad_norm": 0.17434239041445998,
|
|
"learning_rate": 2.3365467687342336e-05,
|
|
"loss": 0.1175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05803552269935608,
|
|
"step": 3175,
|
|
"valid_targets_mean": 5590.1,
|
|
"valid_targets_min": 1422
|
|
},
|
|
{
|
|
"epoch": 3.5179856115107913,
|
|
"grad_norm": 0.17499098929635343,
|
|
"learning_rate": 2.33110775236826e-05,
|
|
"loss": 0.1273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06218947470188141,
|
|
"step": 3180,
|
|
"valid_targets_mean": 6409.6,
|
|
"valid_targets_min": 2530
|
|
},
|
|
{
|
|
"epoch": 3.5235196458218043,
|
|
"grad_norm": 0.16318966508533975,
|
|
"learning_rate": 2.3256662170417076e-05,
|
|
"loss": 0.1196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05842486396431923,
|
|
"step": 3185,
|
|
"valid_targets_mean": 5695.5,
|
|
"valid_targets_min": 1750
|
|
},
|
|
{
|
|
"epoch": 3.529053680132817,
|
|
"grad_norm": 0.1708102667677403,
|
|
"learning_rate": 2.3202222041520135e-05,
|
|
"loss": 0.125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05957861244678497,
|
|
"step": 3190,
|
|
"valid_targets_mean": 5893.7,
|
|
"valid_targets_min": 1777
|
|
},
|
|
{
|
|
"epoch": 3.5345877144438296,
|
|
"grad_norm": 0.1948736754459281,
|
|
"learning_rate": 2.3147757551154667e-05,
|
|
"loss": 0.1167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0627470389008522,
|
|
"step": 3195,
|
|
"valid_targets_mean": 5945.1,
|
|
"valid_targets_min": 2180
|
|
},
|
|
{
|
|
"epoch": 3.540121748754842,
|
|
"grad_norm": 0.17992534289584564,
|
|
"learning_rate": 2.309326911366888e-05,
|
|
"loss": 0.115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05356718227267265,
|
|
"step": 3200,
|
|
"valid_targets_mean": 5163.2,
|
|
"valid_targets_min": 2070
|
|
},
|
|
{
|
|
"epoch": 3.545655783065855,
|
|
"grad_norm": 0.1896234788874136,
|
|
"learning_rate": 2.3038757143593155e-05,
|
|
"loss": 0.1144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05686204507946968,
|
|
"step": 3205,
|
|
"valid_targets_mean": 5939.5,
|
|
"valid_targets_min": 2847
|
|
},
|
|
{
|
|
"epoch": 3.551189817376868,
|
|
"grad_norm": 0.1934068787610231,
|
|
"learning_rate": 2.298422205563692e-05,
|
|
"loss": 0.1158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05834668502211571,
|
|
"step": 3210,
|
|
"valid_targets_mean": 5371.9,
|
|
"valid_targets_min": 1377
|
|
},
|
|
{
|
|
"epoch": 3.5567238516878805,
|
|
"grad_norm": 0.20265828021309631,
|
|
"learning_rate": 2.2929664264685463e-05,
|
|
"loss": 0.1135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.055999208241701126,
|
|
"step": 3215,
|
|
"valid_targets_mean": 5458.5,
|
|
"valid_targets_min": 1522
|
|
},
|
|
{
|
|
"epoch": 3.562257885998893,
|
|
"grad_norm": 0.18832600204742164,
|
|
"learning_rate": 2.2875084185796796e-05,
|
|
"loss": 0.1174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05172300711274147,
|
|
"step": 3220,
|
|
"valid_targets_mean": 5541.6,
|
|
"valid_targets_min": 2876
|
|
},
|
|
{
|
|
"epoch": 3.5677919203099058,
|
|
"grad_norm": 0.19124983926618677,
|
|
"learning_rate": 2.282048223419849e-05,
|
|
"loss": 0.1149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05770617723464966,
|
|
"step": 3225,
|
|
"valid_targets_mean": 5546.1,
|
|
"valid_targets_min": 1917
|
|
},
|
|
{
|
|
"epoch": 3.573325954620919,
|
|
"grad_norm": 0.1815657905894023,
|
|
"learning_rate": 2.2765858825284512e-05,
|
|
"loss": 0.1104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05621454119682312,
|
|
"step": 3230,
|
|
"valid_targets_mean": 5396.4,
|
|
"valid_targets_min": 1801
|
|
},
|
|
{
|
|
"epoch": 3.5788599889319315,
|
|
"grad_norm": 0.18039022969585103,
|
|
"learning_rate": 2.2711214374612073e-05,
|
|
"loss": 0.1164,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05948448181152344,
|
|
"step": 3235,
|
|
"valid_targets_mean": 5903.2,
|
|
"valid_targets_min": 2781
|
|
},
|
|
{
|
|
"epoch": 3.584394023242944,
|
|
"grad_norm": 0.18527243267165797,
|
|
"learning_rate": 2.265654929789846e-05,
|
|
"loss": 0.1121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05525258556008339,
|
|
"step": 3240,
|
|
"valid_targets_mean": 5584.1,
|
|
"valid_targets_min": 2118
|
|
},
|
|
{
|
|
"epoch": 3.5899280575539567,
|
|
"grad_norm": 0.18795759761112604,
|
|
"learning_rate": 2.260186401101788e-05,
|
|
"loss": 0.1166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05691872909665108,
|
|
"step": 3245,
|
|
"valid_targets_mean": 5704.3,
|
|
"valid_targets_min": 2934
|
|
},
|
|
{
|
|
"epoch": 3.5954620918649693,
|
|
"grad_norm": 0.18431748353924032,
|
|
"learning_rate": 2.2547158929998295e-05,
|
|
"loss": 0.1103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.050647616386413574,
|
|
"step": 3250,
|
|
"valid_targets_mean": 5652.3,
|
|
"valid_targets_min": 2882
|
|
},
|
|
{
|
|
"epoch": 3.6009961261759824,
|
|
"grad_norm": 0.1836061155823106,
|
|
"learning_rate": 2.2492434471018242e-05,
|
|
"loss": 0.1118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.046802204102277756,
|
|
"step": 3255,
|
|
"valid_targets_mean": 5334.6,
|
|
"valid_targets_min": 2788
|
|
},
|
|
{
|
|
"epoch": 3.606530160486995,
|
|
"grad_norm": 0.17861875194505744,
|
|
"learning_rate": 2.243769105040369e-05,
|
|
"loss": 0.1082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05294260382652283,
|
|
"step": 3260,
|
|
"valid_targets_mean": 5215.5,
|
|
"valid_targets_min": 1972
|
|
},
|
|
{
|
|
"epoch": 3.6120641947980077,
|
|
"grad_norm": 0.18093941310905542,
|
|
"learning_rate": 2.2382929084624858e-05,
|
|
"loss": 0.1158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05248071625828743,
|
|
"step": 3265,
|
|
"valid_targets_mean": 5419.0,
|
|
"valid_targets_min": 2222
|
|
},
|
|
{
|
|
"epoch": 3.6175982291090207,
|
|
"grad_norm": 0.18748518763677194,
|
|
"learning_rate": 2.232814899029305e-05,
|
|
"loss": 0.1137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.047866091132164,
|
|
"step": 3270,
|
|
"valid_targets_mean": 6090.2,
|
|
"valid_targets_min": 2674
|
|
},
|
|
{
|
|
"epoch": 3.6231322634200334,
|
|
"grad_norm": 0.19665222290693413,
|
|
"learning_rate": 2.2273351184157504e-05,
|
|
"loss": 0.1016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.055568646639585495,
|
|
"step": 3275,
|
|
"valid_targets_mean": 5320.1,
|
|
"valid_targets_min": 1569
|
|
},
|
|
{
|
|
"epoch": 3.628666297731046,
|
|
"grad_norm": 0.22082547667814492,
|
|
"learning_rate": 2.2218536083102168e-05,
|
|
"loss": 0.1109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05981751158833504,
|
|
"step": 3280,
|
|
"valid_targets_mean": 5572.3,
|
|
"valid_targets_min": 1129
|
|
},
|
|
{
|
|
"epoch": 3.6342003320420586,
|
|
"grad_norm": 0.19055385755567586,
|
|
"learning_rate": 2.2163704104142586e-05,
|
|
"loss": 0.1062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04928545281291008,
|
|
"step": 3285,
|
|
"valid_targets_mean": 5389.2,
|
|
"valid_targets_min": 1754
|
|
},
|
|
{
|
|
"epoch": 3.6397343663530712,
|
|
"grad_norm": 0.19954978228249304,
|
|
"learning_rate": 2.210885566442271e-05,
|
|
"loss": 0.1102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05369424819946289,
|
|
"step": 3290,
|
|
"valid_targets_mean": 5636.7,
|
|
"valid_targets_min": 2583
|
|
},
|
|
{
|
|
"epoch": 3.645268400664084,
|
|
"grad_norm": 0.19931274918591688,
|
|
"learning_rate": 2.2053991181211706e-05,
|
|
"loss": 0.1103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04861845448613167,
|
|
"step": 3295,
|
|
"valid_targets_mean": 5504.9,
|
|
"valid_targets_min": 1563
|
|
},
|
|
{
|
|
"epoch": 3.650802434975097,
|
|
"grad_norm": 0.20336889771034933,
|
|
"learning_rate": 2.1999111071900806e-05,
|
|
"loss": 0.1035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05100370571017265,
|
|
"step": 3300,
|
|
"valid_targets_mean": 5358.7,
|
|
"valid_targets_min": 2485
|
|
},
|
|
{
|
|
"epoch": 3.6563364692861096,
|
|
"grad_norm": 0.20145951306806356,
|
|
"learning_rate": 2.1944215754000107e-05,
|
|
"loss": 0.1042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.061765749007463455,
|
|
"step": 3305,
|
|
"valid_targets_mean": 5603.7,
|
|
"valid_targets_min": 2272
|
|
},
|
|
{
|
|
"epoch": 3.661870503597122,
|
|
"grad_norm": 0.4966725091201629,
|
|
"learning_rate": 2.188930564513541e-05,
|
|
"loss": 0.1069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05129636451601982,
|
|
"step": 3310,
|
|
"valid_targets_mean": 5764.9,
|
|
"valid_targets_min": 2253
|
|
},
|
|
{
|
|
"epoch": 3.6674045379081353,
|
|
"grad_norm": 0.1958144887330287,
|
|
"learning_rate": 2.183438116304506e-05,
|
|
"loss": 0.1066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05419323965907097,
|
|
"step": 3315,
|
|
"valid_targets_mean": 5999.5,
|
|
"valid_targets_min": 1804
|
|
},
|
|
{
|
|
"epoch": 3.672938572219148,
|
|
"grad_norm": 0.20186968025351232,
|
|
"learning_rate": 2.1779442725576722e-05,
|
|
"loss": 0.1068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04640122130513191,
|
|
"step": 3320,
|
|
"valid_targets_mean": 5618.7,
|
|
"valid_targets_min": 2151
|
|
},
|
|
{
|
|
"epoch": 3.6784726065301605,
|
|
"grad_norm": 0.19428287401499916,
|
|
"learning_rate": 2.1724490750684238e-05,
|
|
"loss": 0.1072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.049658600240945816,
|
|
"step": 3325,
|
|
"valid_targets_mean": 5737.9,
|
|
"valid_targets_min": 1134
|
|
},
|
|
{
|
|
"epoch": 3.684006640841173,
|
|
"grad_norm": 0.20853568450404253,
|
|
"learning_rate": 2.1669525656424454e-05,
|
|
"loss": 0.1083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05687330290675163,
|
|
"step": 3330,
|
|
"valid_targets_mean": 5562.4,
|
|
"valid_targets_min": 2524
|
|
},
|
|
{
|
|
"epoch": 3.6895406751521858,
|
|
"grad_norm": 0.20138033879262535,
|
|
"learning_rate": 2.1614547860954005e-05,
|
|
"loss": 0.1033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04973490163683891,
|
|
"step": 3335,
|
|
"valid_targets_mean": 5549.1,
|
|
"valid_targets_min": 2082
|
|
},
|
|
{
|
|
"epoch": 3.695074709463199,
|
|
"grad_norm": 0.20307972370942748,
|
|
"learning_rate": 2.1559557782526162e-05,
|
|
"loss": 0.1046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06023570895195007,
|
|
"step": 3340,
|
|
"valid_targets_mean": 6159.9,
|
|
"valid_targets_min": 2331
|
|
},
|
|
{
|
|
"epoch": 3.7006087437742115,
|
|
"grad_norm": 0.1925172789049576,
|
|
"learning_rate": 2.1504555839487636e-05,
|
|
"loss": 0.1005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05248604714870453,
|
|
"step": 3345,
|
|
"valid_targets_mean": 5593.5,
|
|
"valid_targets_min": 2366
|
|
},
|
|
{
|
|
"epoch": 3.706142778085224,
|
|
"grad_norm": 0.20737964844426446,
|
|
"learning_rate": 2.14495424502754e-05,
|
|
"loss": 0.0991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05406155064702034,
|
|
"step": 3350,
|
|
"valid_targets_mean": 5608.8,
|
|
"valid_targets_min": 2612
|
|
},
|
|
{
|
|
"epoch": 3.7116768123962367,
|
|
"grad_norm": 0.21321632992915002,
|
|
"learning_rate": 2.139451803341351e-05,
|
|
"loss": 0.1006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05330200493335724,
|
|
"step": 3355,
|
|
"valid_targets_mean": 5524.9,
|
|
"valid_targets_min": 2217
|
|
},
|
|
{
|
|
"epoch": 3.7172108467072498,
|
|
"grad_norm": 0.20866461352352217,
|
|
"learning_rate": 2.133948300750992e-05,
|
|
"loss": 0.1056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05255679413676262,
|
|
"step": 3360,
|
|
"valid_targets_mean": 5398.8,
|
|
"valid_targets_min": 1864
|
|
},
|
|
{
|
|
"epoch": 3.7227448810182624,
|
|
"grad_norm": 0.21615500098606666,
|
|
"learning_rate": 2.1284437791253273e-05,
|
|
"loss": 0.1062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05293337628245354,
|
|
"step": 3365,
|
|
"valid_targets_mean": 5673.8,
|
|
"valid_targets_min": 2189
|
|
},
|
|
{
|
|
"epoch": 3.728278915329275,
|
|
"grad_norm": 0.18285632131411686,
|
|
"learning_rate": 2.122938280340977e-05,
|
|
"loss": 0.1027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.052879661321640015,
|
|
"step": 3370,
|
|
"valid_targets_mean": 5651.7,
|
|
"valid_targets_min": 3032
|
|
},
|
|
{
|
|
"epoch": 3.7338129496402876,
|
|
"grad_norm": 0.20603578255440028,
|
|
"learning_rate": 2.1174318462819925e-05,
|
|
"loss": 0.1027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05127779021859169,
|
|
"step": 3375,
|
|
"valid_targets_mean": 5350.3,
|
|
"valid_targets_min": 1305
|
|
},
|
|
{
|
|
"epoch": 3.7393469839513003,
|
|
"grad_norm": 0.19738589050719968,
|
|
"learning_rate": 2.1119245188395414e-05,
|
|
"loss": 0.1041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.052101802080869675,
|
|
"step": 3380,
|
|
"valid_targets_mean": 6064.2,
|
|
"valid_targets_min": 2801
|
|
},
|
|
{
|
|
"epoch": 3.7448810182623133,
|
|
"grad_norm": 0.17640985034996595,
|
|
"learning_rate": 2.1064163399115883e-05,
|
|
"loss": 0.1033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.051063280552625656,
|
|
"step": 3385,
|
|
"valid_targets_mean": 5364.2,
|
|
"valid_targets_min": 2088
|
|
},
|
|
{
|
|
"epoch": 3.750415052573326,
|
|
"grad_norm": 0.20762261921662648,
|
|
"learning_rate": 2.1009073514025743e-05,
|
|
"loss": 0.1065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.049620259553194046,
|
|
"step": 3390,
|
|
"valid_targets_mean": 5511.1,
|
|
"valid_targets_min": 2194
|
|
},
|
|
{
|
|
"epoch": 3.7559490868843386,
|
|
"grad_norm": 0.2818458833816632,
|
|
"learning_rate": 2.0953975952231015e-05,
|
|
"loss": 0.1264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07185864448547363,
|
|
"step": 3395,
|
|
"valid_targets_mean": 4122.2,
|
|
"valid_targets_min": 1988
|
|
},
|
|
{
|
|
"epoch": 3.7614831211953517,
|
|
"grad_norm": 0.3703796923782842,
|
|
"learning_rate": 2.08988711328961e-05,
|
|
"loss": 0.1556,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08274849504232407,
|
|
"step": 3400,
|
|
"valid_targets_mean": 4674.5,
|
|
"valid_targets_min": 1999
|
|
},
|
|
{
|
|
"epoch": 3.7670171555063643,
|
|
"grad_norm": 0.35697897890053726,
|
|
"learning_rate": 2.0843759475240625e-05,
|
|
"loss": 0.1468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07798462361097336,
|
|
"step": 3405,
|
|
"valid_targets_mean": 4400.6,
|
|
"valid_targets_min": 1173
|
|
},
|
|
{
|
|
"epoch": 3.772551189817377,
|
|
"grad_norm": 0.34355115258038904,
|
|
"learning_rate": 2.0788641398536237e-05,
|
|
"loss": 0.1534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08108644932508469,
|
|
"step": 3410,
|
|
"valid_targets_mean": 4527.1,
|
|
"valid_targets_min": 1917
|
|
},
|
|
{
|
|
"epoch": 3.7780852241283895,
|
|
"grad_norm": 0.2802880828112715,
|
|
"learning_rate": 2.073351732210343e-05,
|
|
"loss": 0.1558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07276499271392822,
|
|
"step": 3415,
|
|
"valid_targets_mean": 4372.0,
|
|
"valid_targets_min": 1283
|
|
},
|
|
{
|
|
"epoch": 3.783619258439402,
|
|
"grad_norm": 0.2873614036864067,
|
|
"learning_rate": 2.067838766530832e-05,
|
|
"loss": 0.1518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07237691432237625,
|
|
"step": 3420,
|
|
"valid_targets_mean": 4184.1,
|
|
"valid_targets_min": 1752
|
|
},
|
|
{
|
|
"epoch": 3.789153292750415,
|
|
"grad_norm": 0.3112753635064296,
|
|
"learning_rate": 2.062325284755949e-05,
|
|
"loss": 0.142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07607384026050568,
|
|
"step": 3425,
|
|
"valid_targets_mean": 7490.5,
|
|
"valid_targets_min": 1699
|
|
},
|
|
{
|
|
"epoch": 3.794687327061428,
|
|
"grad_norm": 0.2938322497063729,
|
|
"learning_rate": 2.0568113288304773e-05,
|
|
"loss": 0.142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07387111335992813,
|
|
"step": 3430,
|
|
"valid_targets_mean": 7260.8,
|
|
"valid_targets_min": 1426
|
|
},
|
|
{
|
|
"epoch": 3.8002213613724405,
|
|
"grad_norm": 0.2916769580095921,
|
|
"learning_rate": 2.0512969407028103e-05,
|
|
"loss": 0.1387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06837702542543411,
|
|
"step": 3435,
|
|
"valid_targets_mean": 7299.8,
|
|
"valid_targets_min": 996
|
|
},
|
|
{
|
|
"epoch": 3.805755395683453,
|
|
"grad_norm": 0.31919923173242226,
|
|
"learning_rate": 2.0457821623246263e-05,
|
|
"loss": 0.1439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07120344787836075,
|
|
"step": 3440,
|
|
"valid_targets_mean": 7641.9,
|
|
"valid_targets_min": 1230
|
|
},
|
|
{
|
|
"epoch": 3.811289429994466,
|
|
"grad_norm": 0.24702206519128803,
|
|
"learning_rate": 2.0402670356505734e-05,
|
|
"loss": 0.1468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.060001153498888016,
|
|
"step": 3445,
|
|
"valid_targets_mean": 6535.7,
|
|
"valid_targets_min": 1027
|
|
},
|
|
{
|
|
"epoch": 3.816823464305479,
|
|
"grad_norm": 0.25088171572413825,
|
|
"learning_rate": 2.0347516026379498e-05,
|
|
"loss": 0.1439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06905766576528549,
|
|
"step": 3450,
|
|
"valid_targets_mean": 8137.3,
|
|
"valid_targets_min": 2136
|
|
},
|
|
{
|
|
"epoch": 3.8223574986164914,
|
|
"grad_norm": 0.26797108470524955,
|
|
"learning_rate": 2.0292359052463856e-05,
|
|
"loss": 0.1491,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08341628313064575,
|
|
"step": 3455,
|
|
"valid_targets_mean": 14691.3,
|
|
"valid_targets_min": 4833
|
|
},
|
|
{
|
|
"epoch": 3.827891532927504,
|
|
"grad_norm": 0.18658299888065677,
|
|
"learning_rate": 2.0237199854375195e-05,
|
|
"loss": 0.1661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08647984266281128,
|
|
"step": 3460,
|
|
"valid_targets_mean": 13659.3,
|
|
"valid_targets_min": 4559
|
|
},
|
|
{
|
|
"epoch": 3.8334255672385167,
|
|
"grad_norm": 0.15668057439242344,
|
|
"learning_rate": 2.0182038851746836e-05,
|
|
"loss": 0.1622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06748253852128983,
|
|
"step": 3465,
|
|
"valid_targets_mean": 11821.1,
|
|
"valid_targets_min": 1028
|
|
},
|
|
{
|
|
"epoch": 3.8389596015495298,
|
|
"grad_norm": 0.1441504264141319,
|
|
"learning_rate": 2.0126876464225832e-05,
|
|
"loss": 0.1585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07420114427804947,
|
|
"step": 3470,
|
|
"valid_targets_mean": 13088.8,
|
|
"valid_targets_min": 1103
|
|
},
|
|
{
|
|
"epoch": 3.8444936358605424,
|
|
"grad_norm": 0.19666886204621933,
|
|
"learning_rate": 2.0071713111469768e-05,
|
|
"loss": 0.1437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.064952552318573,
|
|
"step": 3475,
|
|
"valid_targets_mean": 7919.6,
|
|
"valid_targets_min": 2223
|
|
},
|
|
{
|
|
"epoch": 3.850027670171555,
|
|
"grad_norm": 0.26433296077389334,
|
|
"learning_rate": 2.0016549213143567e-05,
|
|
"loss": 0.1187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05874726548790932,
|
|
"step": 3480,
|
|
"valid_targets_mean": 7816.1,
|
|
"valid_targets_min": 1665
|
|
},
|
|
{
|
|
"epoch": 3.8555617044825676,
|
|
"grad_norm": 0.30906600677339086,
|
|
"learning_rate": 1.996138518891631e-05,
|
|
"loss": 0.1125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.053055714815855026,
|
|
"step": 3485,
|
|
"valid_targets_mean": 6050.3,
|
|
"valid_targets_min": 1080
|
|
},
|
|
{
|
|
"epoch": 3.8610957387935807,
|
|
"grad_norm": 0.27246669254577127,
|
|
"learning_rate": 1.9906221458458036e-05,
|
|
"loss": 0.1542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08226754516363144,
|
|
"step": 3490,
|
|
"valid_targets_mean": 5350.3,
|
|
"valid_targets_min": 2411
|
|
},
|
|
{
|
|
"epoch": 3.8666297731045933,
|
|
"grad_norm": 0.3377397171769842,
|
|
"learning_rate": 1.985105844143654e-05,
|
|
"loss": 0.1458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06281077116727829,
|
|
"step": 3495,
|
|
"valid_targets_mean": 4708.8,
|
|
"valid_targets_min": 1501
|
|
},
|
|
{
|
|
"epoch": 3.872163807415606,
|
|
"grad_norm": 0.2929815010490038,
|
|
"learning_rate": 1.979589655751421e-05,
|
|
"loss": 0.1482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06699756532907486,
|
|
"step": 3500,
|
|
"valid_targets_mean": 4942.9,
|
|
"valid_targets_min": 1766
|
|
},
|
|
{
|
|
"epoch": 3.8776978417266186,
|
|
"grad_norm": 0.2567591919624809,
|
|
"learning_rate": 1.9740736226344775e-05,
|
|
"loss": 0.1521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07040539383888245,
|
|
"step": 3505,
|
|
"valid_targets_mean": 5004.8,
|
|
"valid_targets_min": 1840
|
|
},
|
|
{
|
|
"epoch": 3.883231876037631,
|
|
"grad_norm": 0.26801555266148386,
|
|
"learning_rate": 1.9685577867570197e-05,
|
|
"loss": 0.1503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0682801902294159,
|
|
"step": 3510,
|
|
"valid_targets_mean": 4034.9,
|
|
"valid_targets_min": 2025
|
|
},
|
|
{
|
|
"epoch": 3.8887659103486443,
|
|
"grad_norm": 0.3086796660354632,
|
|
"learning_rate": 1.9630421900817407e-05,
|
|
"loss": 0.1641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07974547892808914,
|
|
"step": 3515,
|
|
"valid_targets_mean": 4940.1,
|
|
"valid_targets_min": 2012
|
|
},
|
|
{
|
|
"epoch": 3.894299944659657,
|
|
"grad_norm": 0.2811177468129799,
|
|
"learning_rate": 1.9575268745695133e-05,
|
|
"loss": 0.1709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0761692151427269,
|
|
"step": 3520,
|
|
"valid_targets_mean": 4119.7,
|
|
"valid_targets_min": 1305
|
|
},
|
|
{
|
|
"epoch": 3.8998339789706695,
|
|
"grad_norm": 0.23447014214849063,
|
|
"learning_rate": 1.952011882179074e-05,
|
|
"loss": 0.174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08349272608757019,
|
|
"step": 3525,
|
|
"valid_targets_mean": 5118.2,
|
|
"valid_targets_min": 2161
|
|
},
|
|
{
|
|
"epoch": 3.9053680132816826,
|
|
"grad_norm": 0.2765881507094179,
|
|
"learning_rate": 1.946497254866697e-05,
|
|
"loss": 0.1773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09142681211233139,
|
|
"step": 3530,
|
|
"valid_targets_mean": 5002.8,
|
|
"valid_targets_min": 1723
|
|
},
|
|
{
|
|
"epoch": 3.910902047592695,
|
|
"grad_norm": 0.23418257070583998,
|
|
"learning_rate": 1.9409830345858843e-05,
|
|
"loss": 0.1695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08624914288520813,
|
|
"step": 3535,
|
|
"valid_targets_mean": 5169.8,
|
|
"valid_targets_min": 1906
|
|
},
|
|
{
|
|
"epoch": 3.916436081903708,
|
|
"grad_norm": 0.2413458604168126,
|
|
"learning_rate": 1.9354692632870358e-05,
|
|
"loss": 0.1681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08076553791761398,
|
|
"step": 3540,
|
|
"valid_targets_mean": 5038.4,
|
|
"valid_targets_min": 1650
|
|
},
|
|
{
|
|
"epoch": 3.9219701162147205,
|
|
"grad_norm": 0.24352589831960533,
|
|
"learning_rate": 1.9299559829171396e-05,
|
|
"loss": 0.1698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07839047908782959,
|
|
"step": 3545,
|
|
"valid_targets_mean": 4684.4,
|
|
"valid_targets_min": 1996
|
|
},
|
|
{
|
|
"epoch": 3.927504150525733,
|
|
"grad_norm": 0.2638840372227269,
|
|
"learning_rate": 1.9244432354194483e-05,
|
|
"loss": 0.1687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08922380208969116,
|
|
"step": 3550,
|
|
"valid_targets_mean": 4838.9,
|
|
"valid_targets_min": 1977
|
|
},
|
|
{
|
|
"epoch": 3.9330381848367457,
|
|
"grad_norm": 0.24960441994375437,
|
|
"learning_rate": 1.918931062733159e-05,
|
|
"loss": 0.17,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08381988853216171,
|
|
"step": 3555,
|
|
"valid_targets_mean": 5133.2,
|
|
"valid_targets_min": 1506
|
|
},
|
|
{
|
|
"epoch": 3.938572219147759,
|
|
"grad_norm": 0.2442163896469321,
|
|
"learning_rate": 1.9134195067930985e-05,
|
|
"loss": 0.1685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08304736018180847,
|
|
"step": 3560,
|
|
"valid_targets_mean": 5072.2,
|
|
"valid_targets_min": 2124
|
|
},
|
|
{
|
|
"epoch": 3.9441062534587714,
|
|
"grad_norm": 0.28311527205978554,
|
|
"learning_rate": 1.9079086095293978e-05,
|
|
"loss": 0.1684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09676555544137955,
|
|
"step": 3565,
|
|
"valid_targets_mean": 5254.5,
|
|
"valid_targets_min": 1760
|
|
},
|
|
{
|
|
"epoch": 3.949640287769784,
|
|
"grad_norm": 0.2775742290309911,
|
|
"learning_rate": 1.90239841286718e-05,
|
|
"loss": 0.1579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06859570741653442,
|
|
"step": 3570,
|
|
"valid_targets_mean": 4087.4,
|
|
"valid_targets_min": 1891
|
|
},
|
|
{
|
|
"epoch": 3.955174322080797,
|
|
"grad_norm": 0.30165383534771095,
|
|
"learning_rate": 1.8968889587262373e-05,
|
|
"loss": 0.1435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0765879675745964,
|
|
"step": 3575,
|
|
"valid_targets_mean": 4363.6,
|
|
"valid_targets_min": 1955
|
|
},
|
|
{
|
|
"epoch": 3.9607083563918097,
|
|
"grad_norm": 0.3177687011272404,
|
|
"learning_rate": 1.8913802890207132e-05,
|
|
"loss": 0.1409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07781877368688583,
|
|
"step": 3580,
|
|
"valid_targets_mean": 4320.0,
|
|
"valid_targets_min": 2526
|
|
},
|
|
{
|
|
"epoch": 3.9662423907028224,
|
|
"grad_norm": 0.2626966192151495,
|
|
"learning_rate": 1.885872445658784e-05,
|
|
"loss": 0.1426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07548317313194275,
|
|
"step": 3585,
|
|
"valid_targets_mean": 4363.6,
|
|
"valid_targets_min": 1674
|
|
},
|
|
{
|
|
"epoch": 3.971776425013835,
|
|
"grad_norm": 0.2800247562347386,
|
|
"learning_rate": 1.8803654705423372e-05,
|
|
"loss": 0.1462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06895517557859421,
|
|
"step": 3590,
|
|
"valid_targets_mean": 4099.5,
|
|
"valid_targets_min": 2050
|
|
},
|
|
{
|
|
"epoch": 3.9773104593248476,
|
|
"grad_norm": 0.27891467086583555,
|
|
"learning_rate": 1.8748594055666577e-05,
|
|
"loss": 0.1458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07581552118062973,
|
|
"step": 3595,
|
|
"valid_targets_mean": 4430.0,
|
|
"valid_targets_min": 1829
|
|
},
|
|
{
|
|
"epoch": 3.9828444936358607,
|
|
"grad_norm": 0.24997155449664565,
|
|
"learning_rate": 1.8693542926201042e-05,
|
|
"loss": 0.148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07906609028577805,
|
|
"step": 3600,
|
|
"valid_targets_mean": 4739.8,
|
|
"valid_targets_min": 2082
|
|
},
|
|
{
|
|
"epoch": 3.9883785279468733,
|
|
"grad_norm": 0.2609218020015554,
|
|
"learning_rate": 1.8638501735837956e-05,
|
|
"loss": 0.1483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0702708438038826,
|
|
"step": 3605,
|
|
"valid_targets_mean": 4067.0,
|
|
"valid_targets_min": 1637
|
|
},
|
|
{
|
|
"epoch": 3.993912562257886,
|
|
"grad_norm": 0.24947274856100093,
|
|
"learning_rate": 1.8583470903312842e-05,
|
|
"loss": 0.1454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07230882346630096,
|
|
"step": 3610,
|
|
"valid_targets_mean": 4236.1,
|
|
"valid_targets_min": 2259
|
|
},
|
|
{
|
|
"epoch": 3.999446596568899,
|
|
"grad_norm": 0.2604568289333355,
|
|
"learning_rate": 1.8528450847282494e-05,
|
|
"loss": 0.1437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06669026613235474,
|
|
"step": 3615,
|
|
"valid_targets_mean": 3957.5,
|
|
"valid_targets_min": 1518
|
|
},
|
|
{
|
|
"epoch": 4.00442722744881,
|
|
"grad_norm": 0.3615296486902075,
|
|
"learning_rate": 1.8473441986321658e-05,
|
|
"loss": 0.1546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1087537333369255,
|
|
"step": 3620,
|
|
"valid_targets_mean": 7661.5,
|
|
"valid_targets_min": 2439
|
|
},
|
|
{
|
|
"epoch": 4.009961261759823,
|
|
"grad_norm": 0.24874350087256913,
|
|
"learning_rate": 1.841844473891995e-05,
|
|
"loss": 0.1908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09617044776678085,
|
|
"step": 3625,
|
|
"valid_targets_mean": 6907.5,
|
|
"valid_targets_min": 2738
|
|
},
|
|
{
|
|
"epoch": 4.015495296070836,
|
|
"grad_norm": 0.21774237548987765,
|
|
"learning_rate": 1.836345952347862e-05,
|
|
"loss": 0.18,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08362749963998795,
|
|
"step": 3630,
|
|
"valid_targets_mean": 6410.7,
|
|
"valid_targets_min": 2445
|
|
},
|
|
{
|
|
"epoch": 4.0210293303818485,
|
|
"grad_norm": 0.21921994848890536,
|
|
"learning_rate": 1.8308486758307382e-05,
|
|
"loss": 0.1803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08051495999097824,
|
|
"step": 3635,
|
|
"valid_targets_mean": 6399.4,
|
|
"valid_targets_min": 1929
|
|
},
|
|
{
|
|
"epoch": 4.026563364692861,
|
|
"grad_norm": 0.20926541818543987,
|
|
"learning_rate": 1.825352686162125e-05,
|
|
"loss": 0.1809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07576586306095123,
|
|
"step": 3640,
|
|
"valid_targets_mean": 6164.7,
|
|
"valid_targets_min": 1823
|
|
},
|
|
{
|
|
"epoch": 4.032097399003874,
|
|
"grad_norm": 0.18768126266546767,
|
|
"learning_rate": 1.81985802515373e-05,
|
|
"loss": 0.1629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07731462270021439,
|
|
"step": 3645,
|
|
"valid_targets_mean": 6921.8,
|
|
"valid_targets_min": 2809
|
|
},
|
|
{
|
|
"epoch": 4.037631433314886,
|
|
"grad_norm": 0.18836188233033177,
|
|
"learning_rate": 1.8143647346071566e-05,
|
|
"loss": 0.1711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0950491726398468,
|
|
"step": 3650,
|
|
"valid_targets_mean": 6955.7,
|
|
"valid_targets_min": 2131
|
|
},
|
|
{
|
|
"epoch": 4.043165467625899,
|
|
"grad_norm": 0.19727093822121985,
|
|
"learning_rate": 1.8088728563135806e-05,
|
|
"loss": 0.17,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08911281824111938,
|
|
"step": 3655,
|
|
"valid_targets_mean": 7009.1,
|
|
"valid_targets_min": 1790
|
|
},
|
|
{
|
|
"epoch": 4.048699501936912,
|
|
"grad_norm": 0.20294571729859454,
|
|
"learning_rate": 1.8033824320534336e-05,
|
|
"loss": 0.1614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07729238271713257,
|
|
"step": 3660,
|
|
"valid_targets_mean": 6647.1,
|
|
"valid_targets_min": 1889
|
|
},
|
|
{
|
|
"epoch": 4.054233536247925,
|
|
"grad_norm": 0.21478276080125386,
|
|
"learning_rate": 1.7978935035960873e-05,
|
|
"loss": 0.1601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09047490358352661,
|
|
"step": 3665,
|
|
"valid_targets_mean": 7243.6,
|
|
"valid_targets_min": 2074
|
|
},
|
|
{
|
|
"epoch": 4.059767570558938,
|
|
"grad_norm": 0.22279499864290855,
|
|
"learning_rate": 1.79240611269953e-05,
|
|
"loss": 0.166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09274063259363174,
|
|
"step": 3670,
|
|
"valid_targets_mean": 7255.7,
|
|
"valid_targets_min": 1549
|
|
},
|
|
{
|
|
"epoch": 4.06530160486995,
|
|
"grad_norm": 0.24519964467416966,
|
|
"learning_rate": 1.7869203011100566e-05,
|
|
"loss": 0.1643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0784914568066597,
|
|
"step": 3675,
|
|
"valid_targets_mean": 7046.5,
|
|
"valid_targets_min": 1799
|
|
},
|
|
{
|
|
"epoch": 4.070835639180963,
|
|
"grad_norm": 0.20156601817139844,
|
|
"learning_rate": 1.781436110561945e-05,
|
|
"loss": 0.1628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0783810019493103,
|
|
"step": 3680,
|
|
"valid_targets_mean": 6997.1,
|
|
"valid_targets_min": 1617
|
|
},
|
|
{
|
|
"epoch": 4.076369673491976,
|
|
"grad_norm": 0.21464079402718325,
|
|
"learning_rate": 1.7759535827771422e-05,
|
|
"loss": 0.1628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07057405263185501,
|
|
"step": 3685,
|
|
"valid_targets_mean": 6658.6,
|
|
"valid_targets_min": 1317
|
|
},
|
|
{
|
|
"epoch": 4.081903707802988,
|
|
"grad_norm": 0.21650675445364018,
|
|
"learning_rate": 1.7704727594649422e-05,
|
|
"loss": 0.1576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07326150685548782,
|
|
"step": 3690,
|
|
"valid_targets_mean": 6380.2,
|
|
"valid_targets_min": 1852
|
|
},
|
|
{
|
|
"epoch": 4.087437742114001,
|
|
"grad_norm": 0.21109180543842868,
|
|
"learning_rate": 1.7649936823216763e-05,
|
|
"loss": 0.1565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08527389913797379,
|
|
"step": 3695,
|
|
"valid_targets_mean": 7227.7,
|
|
"valid_targets_min": 2572
|
|
},
|
|
{
|
|
"epoch": 4.0929717764250135,
|
|
"grad_norm": 0.21597274911137776,
|
|
"learning_rate": 1.7595163930303902e-05,
|
|
"loss": 0.1663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0764990746974945,
|
|
"step": 3700,
|
|
"valid_targets_mean": 6691.6,
|
|
"valid_targets_min": 2260
|
|
},
|
|
{
|
|
"epoch": 4.098505810736026,
|
|
"grad_norm": 0.21074152697566187,
|
|
"learning_rate": 1.7540409332605254e-05,
|
|
"loss": 0.1601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07818029820919037,
|
|
"step": 3705,
|
|
"valid_targets_mean": 6555.6,
|
|
"valid_targets_min": 2516
|
|
},
|
|
{
|
|
"epoch": 4.10403984504704,
|
|
"grad_norm": 0.2096894213490095,
|
|
"learning_rate": 1.7485673446676083e-05,
|
|
"loss": 0.1577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07488299906253815,
|
|
"step": 3710,
|
|
"valid_targets_mean": 6841.9,
|
|
"valid_targets_min": 2355
|
|
},
|
|
{
|
|
"epoch": 4.109573879358052,
|
|
"grad_norm": 0.22354028109558502,
|
|
"learning_rate": 1.7430956688929283e-05,
|
|
"loss": 0.152,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0797095000743866,
|
|
"step": 3715,
|
|
"valid_targets_mean": 7266.3,
|
|
"valid_targets_min": 2994
|
|
},
|
|
{
|
|
"epoch": 4.115107913669065,
|
|
"grad_norm": 0.23105711899293185,
|
|
"learning_rate": 1.737625947563224e-05,
|
|
"loss": 0.1547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07823490351438522,
|
|
"step": 3720,
|
|
"valid_targets_mean": 6713.0,
|
|
"valid_targets_min": 1810
|
|
},
|
|
{
|
|
"epoch": 4.1206419479800775,
|
|
"grad_norm": 0.2376544784068995,
|
|
"learning_rate": 1.7321582222903633e-05,
|
|
"loss": 0.1538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06807952374219894,
|
|
"step": 3725,
|
|
"valid_targets_mean": 5598.9,
|
|
"valid_targets_min": 1999
|
|
},
|
|
{
|
|
"epoch": 4.12617598229109,
|
|
"grad_norm": 0.24742699875160848,
|
|
"learning_rate": 1.7266925346710298e-05,
|
|
"loss": 0.1521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0800933688879013,
|
|
"step": 3730,
|
|
"valid_targets_mean": 7036.1,
|
|
"valid_targets_min": 2562
|
|
},
|
|
{
|
|
"epoch": 4.131710016602103,
|
|
"grad_norm": 0.22579775043356334,
|
|
"learning_rate": 1.7212289262864062e-05,
|
|
"loss": 0.151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06760784983634949,
|
|
"step": 3735,
|
|
"valid_targets_mean": 6320.9,
|
|
"valid_targets_min": 2368
|
|
},
|
|
{
|
|
"epoch": 4.137244050913115,
|
|
"grad_norm": 0.23946866129603425,
|
|
"learning_rate": 1.7157674387018558e-05,
|
|
"loss": 0.1481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05896640196442604,
|
|
"step": 3740,
|
|
"valid_targets_mean": 6202.8,
|
|
"valid_targets_min": 2211
|
|
},
|
|
{
|
|
"epoch": 4.142778085224128,
|
|
"grad_norm": 0.24631315838170026,
|
|
"learning_rate": 1.7103081134666082e-05,
|
|
"loss": 0.1589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07588019222021103,
|
|
"step": 3745,
|
|
"valid_targets_mean": 6766.1,
|
|
"valid_targets_min": 2573
|
|
},
|
|
{
|
|
"epoch": 4.148312119535142,
|
|
"grad_norm": 0.239737773218279,
|
|
"learning_rate": 1.7048509921134414e-05,
|
|
"loss": 0.1462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07164987921714783,
|
|
"step": 3750,
|
|
"valid_targets_mean": 6909.6,
|
|
"valid_targets_min": 1794
|
|
},
|
|
{
|
|
"epoch": 4.153846153846154,
|
|
"grad_norm": 0.23206720441459175,
|
|
"learning_rate": 1.6993961161583688e-05,
|
|
"loss": 0.1462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07039956748485565,
|
|
"step": 3755,
|
|
"valid_targets_mean": 6667.0,
|
|
"valid_targets_min": 2490
|
|
},
|
|
{
|
|
"epoch": 4.159380188157167,
|
|
"grad_norm": 0.28050518211988124,
|
|
"learning_rate": 1.69394352710032e-05,
|
|
"loss": 0.1476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07837789505720139,
|
|
"step": 3760,
|
|
"valid_targets_mean": 7530.7,
|
|
"valid_targets_min": 2368
|
|
},
|
|
{
|
|
"epoch": 4.164914222468179,
|
|
"grad_norm": 0.2430050074307155,
|
|
"learning_rate": 1.6884932664208278e-05,
|
|
"loss": 0.1522,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07588884234428406,
|
|
"step": 3765,
|
|
"valid_targets_mean": 7045.4,
|
|
"valid_targets_min": 2157
|
|
},
|
|
{
|
|
"epoch": 4.170448256779192,
|
|
"grad_norm": 0.2712507508374963,
|
|
"learning_rate": 1.6830453755837097e-05,
|
|
"loss": 0.1461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08221418410539627,
|
|
"step": 3770,
|
|
"valid_targets_mean": 7343.0,
|
|
"valid_targets_min": 2030
|
|
},
|
|
{
|
|
"epoch": 4.175982291090205,
|
|
"grad_norm": 0.23336410380253853,
|
|
"learning_rate": 1.677599896034756e-05,
|
|
"loss": 0.1512,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08233041316270828,
|
|
"step": 3775,
|
|
"valid_targets_mean": 7534.6,
|
|
"valid_targets_min": 2929
|
|
},
|
|
{
|
|
"epoch": 4.181516325401217,
|
|
"grad_norm": 0.2407133203249527,
|
|
"learning_rate": 1.6721568692014132e-05,
|
|
"loss": 0.1501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08575388789176941,
|
|
"step": 3780,
|
|
"valid_targets_mean": 7523.0,
|
|
"valid_targets_min": 2726
|
|
},
|
|
{
|
|
"epoch": 4.18705035971223,
|
|
"grad_norm": 0.27008357658440313,
|
|
"learning_rate": 1.666716336492465e-05,
|
|
"loss": 0.1529,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08472446352243423,
|
|
"step": 3785,
|
|
"valid_targets_mean": 7667.5,
|
|
"valid_targets_min": 2094
|
|
},
|
|
{
|
|
"epoch": 4.192584394023243,
|
|
"grad_norm": 0.25094042506394354,
|
|
"learning_rate": 1.661278339297725e-05,
|
|
"loss": 0.1524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06790081411600113,
|
|
"step": 3790,
|
|
"valid_targets_mean": 6404.8,
|
|
"valid_targets_min": 1726
|
|
},
|
|
{
|
|
"epoch": 4.198118428334256,
|
|
"grad_norm": 0.24960985494169227,
|
|
"learning_rate": 1.6558429189877136e-05,
|
|
"loss": 0.1477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08814112097024918,
|
|
"step": 3795,
|
|
"valid_targets_mean": 7508.5,
|
|
"valid_targets_min": 1982
|
|
},
|
|
{
|
|
"epoch": 4.203652462645269,
|
|
"grad_norm": 0.23685380443519255,
|
|
"learning_rate": 1.6504101169133502e-05,
|
|
"loss": 0.1511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07432366907596588,
|
|
"step": 3800,
|
|
"valid_targets_mean": 6691.5,
|
|
"valid_targets_min": 2313
|
|
},
|
|
{
|
|
"epoch": 4.209186496956281,
|
|
"grad_norm": 0.2233873764002913,
|
|
"learning_rate": 1.6449799744056326e-05,
|
|
"loss": 0.1494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07543178647756577,
|
|
"step": 3805,
|
|
"valid_targets_mean": 7022.8,
|
|
"valid_targets_min": 2791
|
|
},
|
|
{
|
|
"epoch": 4.214720531267294,
|
|
"grad_norm": 0.25189444245731557,
|
|
"learning_rate": 1.639552532775327e-05,
|
|
"loss": 0.1528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07702386379241943,
|
|
"step": 3810,
|
|
"valid_targets_mean": 6730.0,
|
|
"valid_targets_min": 2100
|
|
},
|
|
{
|
|
"epoch": 4.220254565578307,
|
|
"grad_norm": 0.25534754639468726,
|
|
"learning_rate": 1.6341278333126537e-05,
|
|
"loss": 0.147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06664300709962845,
|
|
"step": 3815,
|
|
"valid_targets_mean": 6620.6,
|
|
"valid_targets_min": 2477
|
|
},
|
|
{
|
|
"epoch": 4.225788599889319,
|
|
"grad_norm": 0.2974132005135256,
|
|
"learning_rate": 1.628705917286968e-05,
|
|
"loss": 0.1462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06445831805467606,
|
|
"step": 3820,
|
|
"valid_targets_mean": 4577.6,
|
|
"valid_targets_min": 1817
|
|
},
|
|
{
|
|
"epoch": 4.231322634200332,
|
|
"grad_norm": 0.44015971722572467,
|
|
"learning_rate": 1.6232868259464533e-05,
|
|
"loss": 0.1241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.051866840571165085,
|
|
"step": 3825,
|
|
"valid_targets_mean": 3941.8,
|
|
"valid_targets_min": 1708
|
|
},
|
|
{
|
|
"epoch": 4.2368566685113445,
|
|
"grad_norm": 0.42445133585718986,
|
|
"learning_rate": 1.6178706005178005e-05,
|
|
"loss": 0.1306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06825078278779984,
|
|
"step": 3830,
|
|
"valid_targets_mean": 4732.0,
|
|
"valid_targets_min": 2113
|
|
},
|
|
{
|
|
"epoch": 4.242390702822357,
|
|
"grad_norm": 0.3906783833792015,
|
|
"learning_rate": 1.6124572822059002e-05,
|
|
"loss": 0.1472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0659128725528717,
|
|
"step": 3835,
|
|
"valid_targets_mean": 4162.7,
|
|
"valid_targets_min": 1439
|
|
},
|
|
{
|
|
"epoch": 4.247924737133371,
|
|
"grad_norm": 0.34059660618028414,
|
|
"learning_rate": 1.6070469121935244e-05,
|
|
"loss": 0.1364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05694308876991272,
|
|
"step": 3840,
|
|
"valid_targets_mean": 4168.2,
|
|
"valid_targets_min": 1580
|
|
},
|
|
{
|
|
"epoch": 4.253458771444383,
|
|
"grad_norm": 0.3878126160306128,
|
|
"learning_rate": 1.601639531641017e-05,
|
|
"loss": 0.1454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07526243478059769,
|
|
"step": 3845,
|
|
"valid_targets_mean": 4418.8,
|
|
"valid_targets_min": 1172
|
|
},
|
|
{
|
|
"epoch": 4.258992805755396,
|
|
"grad_norm": 0.3222207333090876,
|
|
"learning_rate": 1.59623518168598e-05,
|
|
"loss": 0.1561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07659698277711868,
|
|
"step": 3850,
|
|
"valid_targets_mean": 4298.7,
|
|
"valid_targets_min": 1729
|
|
},
|
|
{
|
|
"epoch": 4.2645268400664085,
|
|
"grad_norm": 0.3405375114224534,
|
|
"learning_rate": 1.5908339034429553e-05,
|
|
"loss": 0.1463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06387145072221756,
|
|
"step": 3855,
|
|
"valid_targets_mean": 4263.8,
|
|
"valid_targets_min": 1526
|
|
},
|
|
{
|
|
"epoch": 4.270060874377421,
|
|
"grad_norm": 0.35138119803410545,
|
|
"learning_rate": 1.585435738003122e-05,
|
|
"loss": 0.1506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07715055346488953,
|
|
"step": 3860,
|
|
"valid_targets_mean": 4593.6,
|
|
"valid_targets_min": 1793
|
|
},
|
|
{
|
|
"epoch": 4.275594908688434,
|
|
"grad_norm": 0.33785285840583545,
|
|
"learning_rate": 1.5800407264339723e-05,
|
|
"loss": 0.14,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06908231228590012,
|
|
"step": 3865,
|
|
"valid_targets_mean": 4257.0,
|
|
"valid_targets_min": 2012
|
|
},
|
|
{
|
|
"epoch": 4.281128942999446,
|
|
"grad_norm": 0.3310320736650623,
|
|
"learning_rate": 1.5746489097790095e-05,
|
|
"loss": 0.1433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06672009080648422,
|
|
"step": 3870,
|
|
"valid_targets_mean": 3774.0,
|
|
"valid_targets_min": 1808
|
|
},
|
|
{
|
|
"epoch": 4.286662977310459,
|
|
"grad_norm": 0.311463607839016,
|
|
"learning_rate": 1.569260329057428e-05,
|
|
"loss": 0.149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07763797044754028,
|
|
"step": 3875,
|
|
"valid_targets_mean": 4852.8,
|
|
"valid_targets_min": 1619
|
|
},
|
|
{
|
|
"epoch": 4.292197011621472,
|
|
"grad_norm": 0.3118208083512682,
|
|
"learning_rate": 1.5638750252638063e-05,
|
|
"loss": 0.15,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07648203521966934,
|
|
"step": 3880,
|
|
"valid_targets_mean": 4255.5,
|
|
"valid_targets_min": 1551
|
|
},
|
|
{
|
|
"epoch": 4.297731045932485,
|
|
"grad_norm": 0.3243321560248647,
|
|
"learning_rate": 1.5584930393677908e-05,
|
|
"loss": 0.1434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07285168766975403,
|
|
"step": 3885,
|
|
"valid_targets_mean": 3931.0,
|
|
"valid_targets_min": 1430
|
|
},
|
|
{
|
|
"epoch": 4.303265080243498,
|
|
"grad_norm": 0.31813386126179766,
|
|
"learning_rate": 1.5531144123137884e-05,
|
|
"loss": 0.1358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06385066360235214,
|
|
"step": 3890,
|
|
"valid_targets_mean": 4537.8,
|
|
"valid_targets_min": 1511
|
|
},
|
|
{
|
|
"epoch": 4.30879911455451,
|
|
"grad_norm": 0.33780246281634424,
|
|
"learning_rate": 1.5477391850206526e-05,
|
|
"loss": 0.1366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08562406152486801,
|
|
"step": 3895,
|
|
"valid_targets_mean": 4946.9,
|
|
"valid_targets_min": 1389
|
|
},
|
|
{
|
|
"epoch": 4.314333148865523,
|
|
"grad_norm": 0.3090801042995307,
|
|
"learning_rate": 1.5423673983813714e-05,
|
|
"loss": 0.1462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06944170594215393,
|
|
"step": 3900,
|
|
"valid_targets_mean": 4475.8,
|
|
"valid_targets_min": 1986
|
|
},
|
|
{
|
|
"epoch": 4.319867183176536,
|
|
"grad_norm": 0.2998739282759218,
|
|
"learning_rate": 1.53699909326276e-05,
|
|
"loss": 0.1361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0686049684882164,
|
|
"step": 3905,
|
|
"valid_targets_mean": 4404.4,
|
|
"valid_targets_min": 1591
|
|
},
|
|
{
|
|
"epoch": 4.325401217487548,
|
|
"grad_norm": 0.31359183343090213,
|
|
"learning_rate": 1.531634310505144e-05,
|
|
"loss": 0.1395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07918906956911087,
|
|
"step": 3910,
|
|
"valid_targets_mean": 4628.4,
|
|
"valid_targets_min": 1929
|
|
},
|
|
{
|
|
"epoch": 4.330935251798561,
|
|
"grad_norm": 0.3021828405506255,
|
|
"learning_rate": 1.5262730909220545e-05,
|
|
"loss": 0.1336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06379434466362,
|
|
"step": 3915,
|
|
"valid_targets_mean": 4265.2,
|
|
"valid_targets_min": 1686
|
|
},
|
|
{
|
|
"epoch": 4.3364692861095735,
|
|
"grad_norm": 0.31493550037750695,
|
|
"learning_rate": 1.5209154752999142e-05,
|
|
"loss": 0.1508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.060155678540468216,
|
|
"step": 3920,
|
|
"valid_targets_mean": 3913.0,
|
|
"valid_targets_min": 1477
|
|
},
|
|
{
|
|
"epoch": 4.342003320420587,
|
|
"grad_norm": 0.2979376162118054,
|
|
"learning_rate": 1.515561504397728e-05,
|
|
"loss": 0.1446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06678381562232971,
|
|
"step": 3925,
|
|
"valid_targets_mean": 4541.2,
|
|
"valid_targets_min": 2006
|
|
},
|
|
{
|
|
"epoch": 4.3475373547316,
|
|
"grad_norm": 0.34626094261505586,
|
|
"learning_rate": 1.5102112189467742e-05,
|
|
"loss": 0.1392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08782351016998291,
|
|
"step": 3930,
|
|
"valid_targets_mean": 4729.2,
|
|
"valid_targets_min": 1459
|
|
},
|
|
{
|
|
"epoch": 4.353071389042612,
|
|
"grad_norm": 0.350296048861095,
|
|
"learning_rate": 1.50486465965029e-05,
|
|
"loss": 0.1421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07288840413093567,
|
|
"step": 3935,
|
|
"valid_targets_mean": 4464.4,
|
|
"valid_targets_min": 1616
|
|
},
|
|
{
|
|
"epoch": 4.358605423353625,
|
|
"grad_norm": 0.31987640666361744,
|
|
"learning_rate": 1.4995218671831705e-05,
|
|
"loss": 0.1394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06294938921928406,
|
|
"step": 3940,
|
|
"valid_targets_mean": 3874.0,
|
|
"valid_targets_min": 1494
|
|
},
|
|
{
|
|
"epoch": 4.3641394576646375,
|
|
"grad_norm": 0.30819667327862005,
|
|
"learning_rate": 1.494182882191648e-05,
|
|
"loss": 0.1386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06814851611852646,
|
|
"step": 3945,
|
|
"valid_targets_mean": 4333.2,
|
|
"valid_targets_min": 1769
|
|
},
|
|
{
|
|
"epoch": 4.36967349197565,
|
|
"grad_norm": 0.2924284374500159,
|
|
"learning_rate": 1.4888477452929939e-05,
|
|
"loss": 0.1387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0692083016037941,
|
|
"step": 3950,
|
|
"valid_targets_mean": 4336.7,
|
|
"valid_targets_min": 1614
|
|
},
|
|
{
|
|
"epoch": 4.375207526286663,
|
|
"grad_norm": 0.2980347648638733,
|
|
"learning_rate": 1.4835164970752009e-05,
|
|
"loss": 0.14,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06465265899896622,
|
|
"step": 3955,
|
|
"valid_targets_mean": 4379.2,
|
|
"valid_targets_min": 1480
|
|
},
|
|
{
|
|
"epoch": 4.380741560597675,
|
|
"grad_norm": 0.33585729515337576,
|
|
"learning_rate": 1.4781891780966807e-05,
|
|
"loss": 0.1326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07466866075992584,
|
|
"step": 3960,
|
|
"valid_targets_mean": 4534.8,
|
|
"valid_targets_min": 1647
|
|
},
|
|
{
|
|
"epoch": 4.386275594908689,
|
|
"grad_norm": 0.33954531909020547,
|
|
"learning_rate": 1.4728658288859513e-05,
|
|
"loss": 0.1288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06735555827617645,
|
|
"step": 3965,
|
|
"valid_targets_mean": 4447.5,
|
|
"valid_targets_min": 1564
|
|
},
|
|
{
|
|
"epoch": 4.3918096292197015,
|
|
"grad_norm": 0.3253826621039958,
|
|
"learning_rate": 1.4675464899413284e-05,
|
|
"loss": 0.1349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.062315523624420166,
|
|
"step": 3970,
|
|
"valid_targets_mean": 3862.5,
|
|
"valid_targets_min": 1318
|
|
},
|
|
{
|
|
"epoch": 4.397343663530714,
|
|
"grad_norm": 0.3159085492524021,
|
|
"learning_rate": 1.462231201730622e-05,
|
|
"loss": 0.1373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06629395484924316,
|
|
"step": 3975,
|
|
"valid_targets_mean": 3827.4,
|
|
"valid_targets_min": 1371
|
|
},
|
|
{
|
|
"epoch": 4.402877697841727,
|
|
"grad_norm": 0.32534308965146447,
|
|
"learning_rate": 1.456920004690823e-05,
|
|
"loss": 0.1389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07012656331062317,
|
|
"step": 3980,
|
|
"valid_targets_mean": 4606.9,
|
|
"valid_targets_min": 1870
|
|
},
|
|
{
|
|
"epoch": 4.408411732152739,
|
|
"grad_norm": 0.3216555715303935,
|
|
"learning_rate": 1.4516129392278003e-05,
|
|
"loss": 0.1268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05857391655445099,
|
|
"step": 3985,
|
|
"valid_targets_mean": 3911.3,
|
|
"valid_targets_min": 1488
|
|
},
|
|
{
|
|
"epoch": 4.413945766463752,
|
|
"grad_norm": 0.3282521488414122,
|
|
"learning_rate": 1.4463100457159877e-05,
|
|
"loss": 0.1377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06805693358182907,
|
|
"step": 3990,
|
|
"valid_targets_mean": 4191.2,
|
|
"valid_targets_min": 1884
|
|
},
|
|
{
|
|
"epoch": 4.419479800774765,
|
|
"grad_norm": 0.32474929052230217,
|
|
"learning_rate": 1.4410113644980839e-05,
|
|
"loss": 0.1315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06785105913877487,
|
|
"step": 3995,
|
|
"valid_targets_mean": 4723.3,
|
|
"valid_targets_min": 1628
|
|
},
|
|
{
|
|
"epoch": 4.425013835085777,
|
|
"grad_norm": 0.3284907213593056,
|
|
"learning_rate": 1.4357169358847389e-05,
|
|
"loss": 0.1231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05492676421999931,
|
|
"step": 4000,
|
|
"valid_targets_mean": 4008.1,
|
|
"valid_targets_min": 1914
|
|
},
|
|
{
|
|
"epoch": 4.43054786939679,
|
|
"grad_norm": 0.35574101579330153,
|
|
"learning_rate": 1.4304268001542519e-05,
|
|
"loss": 0.1284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06498607993125916,
|
|
"step": 4005,
|
|
"valid_targets_mean": 4685.6,
|
|
"valid_targets_min": 2247
|
|
},
|
|
{
|
|
"epoch": 4.436081903707803,
|
|
"grad_norm": 0.33321809273036523,
|
|
"learning_rate": 1.4251409975522636e-05,
|
|
"loss": 0.1326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.061576616019010544,
|
|
"step": 4010,
|
|
"valid_targets_mean": 4355.3,
|
|
"valid_targets_min": 1633
|
|
},
|
|
{
|
|
"epoch": 4.441615938018816,
|
|
"grad_norm": 0.33404638633231243,
|
|
"learning_rate": 1.419859568291447e-05,
|
|
"loss": 0.1283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07737898081541061,
|
|
"step": 4015,
|
|
"valid_targets_mean": 4712.5,
|
|
"valid_targets_min": 1714
|
|
},
|
|
{
|
|
"epoch": 4.447149972329829,
|
|
"grad_norm": 0.3497397035228793,
|
|
"learning_rate": 1.4145825525512083e-05,
|
|
"loss": 0.1263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06712006777524948,
|
|
"step": 4020,
|
|
"valid_targets_mean": 4363.5,
|
|
"valid_targets_min": 1473
|
|
},
|
|
{
|
|
"epoch": 4.452684006640841,
|
|
"grad_norm": 0.36013635115483095,
|
|
"learning_rate": 1.4093099904773726e-05,
|
|
"loss": 0.1353,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06490481644868851,
|
|
"step": 4025,
|
|
"valid_targets_mean": 4250.2,
|
|
"valid_targets_min": 1551
|
|
},
|
|
{
|
|
"epoch": 4.458218040951854,
|
|
"grad_norm": 0.32488215075011995,
|
|
"learning_rate": 1.4040419221818868e-05,
|
|
"loss": 0.1347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06323728710412979,
|
|
"step": 4030,
|
|
"valid_targets_mean": 4371.3,
|
|
"valid_targets_min": 2003
|
|
},
|
|
{
|
|
"epoch": 4.4637520752628665,
|
|
"grad_norm": 0.3249210032879383,
|
|
"learning_rate": 1.3987783877425072e-05,
|
|
"loss": 0.1329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06592188030481339,
|
|
"step": 4035,
|
|
"valid_targets_mean": 4365.3,
|
|
"valid_targets_min": 1446
|
|
},
|
|
{
|
|
"epoch": 4.469286109573879,
|
|
"grad_norm": 0.28724233199462046,
|
|
"learning_rate": 1.393519427202501e-05,
|
|
"loss": 0.1212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05613340064883232,
|
|
"step": 4040,
|
|
"valid_targets_mean": 5889.4,
|
|
"valid_targets_min": 1633
|
|
},
|
|
{
|
|
"epoch": 4.474820143884892,
|
|
"grad_norm": 0.23711156534213082,
|
|
"learning_rate": 1.3882650805703377e-05,
|
|
"loss": 0.1122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.051731567829847336,
|
|
"step": 4045,
|
|
"valid_targets_mean": 5894.5,
|
|
"valid_targets_min": 2159
|
|
},
|
|
{
|
|
"epoch": 4.480354178195904,
|
|
"grad_norm": 0.19306426908842927,
|
|
"learning_rate": 1.3830153878193834e-05,
|
|
"loss": 0.1189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0583718977868557,
|
|
"step": 4050,
|
|
"valid_targets_mean": 5530.8,
|
|
"valid_targets_min": 2066
|
|
},
|
|
{
|
|
"epoch": 4.485888212506918,
|
|
"grad_norm": 0.18149358463243234,
|
|
"learning_rate": 1.3777703888876028e-05,
|
|
"loss": 0.1091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04824841022491455,
|
|
"step": 4055,
|
|
"valid_targets_mean": 5229.4,
|
|
"valid_targets_min": 1790
|
|
},
|
|
{
|
|
"epoch": 4.491422246817931,
|
|
"grad_norm": 0.17464115907385364,
|
|
"learning_rate": 1.3725301236772483e-05,
|
|
"loss": 0.1109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05410352349281311,
|
|
"step": 4060,
|
|
"valid_targets_mean": 5318.1,
|
|
"valid_targets_min": 2531
|
|
},
|
|
{
|
|
"epoch": 4.496956281128943,
|
|
"grad_norm": 0.17874490281802502,
|
|
"learning_rate": 1.3672946320545628e-05,
|
|
"loss": 0.107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05268557742238045,
|
|
"step": 4065,
|
|
"valid_targets_mean": 5814.4,
|
|
"valid_targets_min": 1901
|
|
},
|
|
{
|
|
"epoch": 4.502490315439956,
|
|
"grad_norm": 0.16795797581062424,
|
|
"learning_rate": 1.362063953849469e-05,
|
|
"loss": 0.0997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04696022346615791,
|
|
"step": 4070,
|
|
"valid_targets_mean": 5287.9,
|
|
"valid_targets_min": 1855
|
|
},
|
|
{
|
|
"epoch": 4.508024349750968,
|
|
"grad_norm": 0.17567280237182417,
|
|
"learning_rate": 1.3568381288552748e-05,
|
|
"loss": 0.0993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.053378403186798096,
|
|
"step": 4075,
|
|
"valid_targets_mean": 5617.3,
|
|
"valid_targets_min": 1984
|
|
},
|
|
{
|
|
"epoch": 4.513558384061981,
|
|
"grad_norm": 0.17977586868192777,
|
|
"learning_rate": 1.3516171968283649e-05,
|
|
"loss": 0.0908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.049263015389442444,
|
|
"step": 4080,
|
|
"valid_targets_mean": 5884.0,
|
|
"valid_targets_min": 2647
|
|
},
|
|
{
|
|
"epoch": 4.519092418372994,
|
|
"grad_norm": 0.19740781183830758,
|
|
"learning_rate": 1.3464011974878988e-05,
|
|
"loss": 0.0987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04521234333515167,
|
|
"step": 4085,
|
|
"valid_targets_mean": 5214.2,
|
|
"valid_targets_min": 2632
|
|
},
|
|
{
|
|
"epoch": 4.524626452684006,
|
|
"grad_norm": 0.20383505960047912,
|
|
"learning_rate": 1.3411901705155126e-05,
|
|
"loss": 0.0966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04395684972405434,
|
|
"step": 4090,
|
|
"valid_targets_mean": 5505.8,
|
|
"valid_targets_min": 2755
|
|
},
|
|
{
|
|
"epoch": 4.530160486995019,
|
|
"grad_norm": 0.19576017975585366,
|
|
"learning_rate": 1.3359841555550102e-05,
|
|
"loss": 0.0959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.047161608934402466,
|
|
"step": 4095,
|
|
"valid_targets_mean": 5478.0,
|
|
"valid_targets_min": 1291
|
|
},
|
|
{
|
|
"epoch": 4.5356945213060325,
|
|
"grad_norm": 0.20223095318809983,
|
|
"learning_rate": 1.3307831922120691e-05,
|
|
"loss": 0.091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04582715034484863,
|
|
"step": 4100,
|
|
"valid_targets_mean": 5744.8,
|
|
"valid_targets_min": 1269
|
|
},
|
|
{
|
|
"epoch": 4.541228555617045,
|
|
"grad_norm": 0.20193723656409343,
|
|
"learning_rate": 1.325587320053934e-05,
|
|
"loss": 0.0894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04994739219546318,
|
|
"step": 4105,
|
|
"valid_targets_mean": 5808.3,
|
|
"valid_targets_min": 1922
|
|
},
|
|
{
|
|
"epoch": 4.546762589928058,
|
|
"grad_norm": 0.21038735128119,
|
|
"learning_rate": 1.3203965786091187e-05,
|
|
"loss": 0.0857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04188760742545128,
|
|
"step": 4110,
|
|
"valid_targets_mean": 5499.9,
|
|
"valid_targets_min": 1968
|
|
},
|
|
{
|
|
"epoch": 4.55229662423907,
|
|
"grad_norm": 0.21118246301948174,
|
|
"learning_rate": 1.3152110073671027e-05,
|
|
"loss": 0.0881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.040510065853595734,
|
|
"step": 4115,
|
|
"valid_targets_mean": 5453.8,
|
|
"valid_targets_min": 1899
|
|
},
|
|
{
|
|
"epoch": 4.557830658550083,
|
|
"grad_norm": 0.20307802961381277,
|
|
"learning_rate": 1.3100306457780339e-05,
|
|
"loss": 0.0887,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.048915524035692215,
|
|
"step": 4120,
|
|
"valid_targets_mean": 5544.6,
|
|
"valid_targets_min": 2127
|
|
},
|
|
{
|
|
"epoch": 4.563364692861096,
|
|
"grad_norm": 0.2178967026283615,
|
|
"learning_rate": 1.3048555332524263e-05,
|
|
"loss": 0.0874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04666067659854889,
|
|
"step": 4125,
|
|
"valid_targets_mean": 5964.0,
|
|
"valid_targets_min": 2088
|
|
},
|
|
{
|
|
"epoch": 4.568898727172108,
|
|
"grad_norm": 0.20195987432932186,
|
|
"learning_rate": 1.2996857091608595e-05,
|
|
"loss": 0.0853,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.037834491580724716,
|
|
"step": 4130,
|
|
"valid_targets_mean": 5607.6,
|
|
"valid_targets_min": 3237
|
|
},
|
|
{
|
|
"epoch": 4.574432761483121,
|
|
"grad_norm": 0.19860253079715776,
|
|
"learning_rate": 1.2945212128336819e-05,
|
|
"loss": 0.0849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04107757285237312,
|
|
"step": 4135,
|
|
"valid_targets_mean": 5680.2,
|
|
"valid_targets_min": 2183
|
|
},
|
|
{
|
|
"epoch": 4.5799667957941335,
|
|
"grad_norm": 0.2018354713018964,
|
|
"learning_rate": 1.2893620835607092e-05,
|
|
"loss": 0.0885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.046085942536592484,
|
|
"step": 4140,
|
|
"valid_targets_mean": 5323.5,
|
|
"valid_targets_min": 2257
|
|
},
|
|
{
|
|
"epoch": 4.585500830105147,
|
|
"grad_norm": 0.21606523713545506,
|
|
"learning_rate": 1.2842083605909276e-05,
|
|
"loss": 0.0867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04127819463610649,
|
|
"step": 4145,
|
|
"valid_targets_mean": 5460.1,
|
|
"valid_targets_min": 3076
|
|
},
|
|
{
|
|
"epoch": 4.59103486441616,
|
|
"grad_norm": 0.2101210681414386,
|
|
"learning_rate": 1.2790600831321913e-05,
|
|
"loss": 0.0864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04266387224197388,
|
|
"step": 4150,
|
|
"valid_targets_mean": 5721.5,
|
|
"valid_targets_min": 1692
|
|
},
|
|
{
|
|
"epoch": 4.596568898727172,
|
|
"grad_norm": 0.19303279577936738,
|
|
"learning_rate": 1.273917290350929e-05,
|
|
"loss": 0.0825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04391302540898323,
|
|
"step": 4155,
|
|
"valid_targets_mean": 5423.7,
|
|
"valid_targets_min": 2850
|
|
},
|
|
{
|
|
"epoch": 4.602102933038185,
|
|
"grad_norm": 0.2196543955803934,
|
|
"learning_rate": 1.268780021371844e-05,
|
|
"loss": 0.0846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04037768021225929,
|
|
"step": 4160,
|
|
"valid_targets_mean": 5830.5,
|
|
"valid_targets_min": 2413
|
|
},
|
|
{
|
|
"epoch": 4.6076369673491975,
|
|
"grad_norm": 0.1984560903462667,
|
|
"learning_rate": 1.2636483152776143e-05,
|
|
"loss": 0.0825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04546644166111946,
|
|
"step": 4165,
|
|
"valid_targets_mean": 5912.3,
|
|
"valid_targets_min": 2614
|
|
},
|
|
{
|
|
"epoch": 4.61317100166021,
|
|
"grad_norm": 0.22930911625183262,
|
|
"learning_rate": 1.2585222111085996e-05,
|
|
"loss": 0.0875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.046509597450494766,
|
|
"step": 4170,
|
|
"valid_targets_mean": 5704.6,
|
|
"valid_targets_min": 1416
|
|
},
|
|
{
|
|
"epoch": 4.618705035971223,
|
|
"grad_norm": 0.21168966624145297,
|
|
"learning_rate": 1.2534017478625391e-05,
|
|
"loss": 0.0856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04334982857108116,
|
|
"step": 4175,
|
|
"valid_targets_mean": 5734.4,
|
|
"valid_targets_min": 2234
|
|
},
|
|
{
|
|
"epoch": 4.624239070282236,
|
|
"grad_norm": 0.2125073621973969,
|
|
"learning_rate": 1.2482869644942608e-05,
|
|
"loss": 0.0751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04163689911365509,
|
|
"step": 4180,
|
|
"valid_targets_mean": 5532.1,
|
|
"valid_targets_min": 2237
|
|
},
|
|
{
|
|
"epoch": 4.629773104593248,
|
|
"grad_norm": 0.21539042139380618,
|
|
"learning_rate": 1.2431778999153796e-05,
|
|
"loss": 0.0836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03761904314160347,
|
|
"step": 4185,
|
|
"valid_targets_mean": 4842.2,
|
|
"valid_targets_min": 1875
|
|
},
|
|
{
|
|
"epoch": 4.6353071389042615,
|
|
"grad_norm": 0.205450846924651,
|
|
"learning_rate": 1.2380745929940052e-05,
|
|
"loss": 0.0775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0382852703332901,
|
|
"step": 4190,
|
|
"valid_targets_mean": 5742.5,
|
|
"valid_targets_min": 2469
|
|
},
|
|
{
|
|
"epoch": 4.640841173215274,
|
|
"grad_norm": 0.19439066732416754,
|
|
"learning_rate": 1.232977082554445e-05,
|
|
"loss": 0.0846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03928928077220917,
|
|
"step": 4195,
|
|
"valid_targets_mean": 5476.2,
|
|
"valid_targets_min": 1835
|
|
},
|
|
{
|
|
"epoch": 4.646375207526287,
|
|
"grad_norm": 0.1853857456809193,
|
|
"learning_rate": 1.2278854073769066e-05,
|
|
"loss": 0.0804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.037013471126556396,
|
|
"step": 4200,
|
|
"valid_targets_mean": 5632.0,
|
|
"valid_targets_min": 1288
|
|
},
|
|
{
|
|
"epoch": 4.651909241837299,
|
|
"grad_norm": 0.20631684561188882,
|
|
"learning_rate": 1.2227996061972083e-05,
|
|
"loss": 0.075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03684552013874054,
|
|
"step": 4205,
|
|
"valid_targets_mean": 5595.0,
|
|
"valid_targets_min": 2228
|
|
},
|
|
{
|
|
"epoch": 4.657443276148312,
|
|
"grad_norm": 0.2192507106190281,
|
|
"learning_rate": 1.2177197177064768e-05,
|
|
"loss": 0.0781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04795651137828827,
|
|
"step": 4210,
|
|
"valid_targets_mean": 6140.2,
|
|
"valid_targets_min": 2896
|
|
},
|
|
{
|
|
"epoch": 4.662977310459325,
|
|
"grad_norm": 0.2121775921922545,
|
|
"learning_rate": 1.2126457805508603e-05,
|
|
"loss": 0.0782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03650488331913948,
|
|
"step": 4215,
|
|
"valid_targets_mean": 5670.7,
|
|
"valid_targets_min": 2615
|
|
},
|
|
{
|
|
"epoch": 4.668511344770337,
|
|
"grad_norm": 0.20449506169734838,
|
|
"learning_rate": 1.207577833331229e-05,
|
|
"loss": 0.08,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04212837293744087,
|
|
"step": 4220,
|
|
"valid_targets_mean": 6108.9,
|
|
"valid_targets_min": 3283
|
|
},
|
|
{
|
|
"epoch": 4.674045379081351,
|
|
"grad_norm": 0.19508106582699883,
|
|
"learning_rate": 1.202515914602886e-05,
|
|
"loss": 0.0777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03631249815225601,
|
|
"step": 4225,
|
|
"valid_targets_mean": 5478.3,
|
|
"valid_targets_min": 2154
|
|
},
|
|
{
|
|
"epoch": 4.679579413392363,
|
|
"grad_norm": 0.20810963639880978,
|
|
"learning_rate": 1.1974600628752681e-05,
|
|
"loss": 0.0784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03693137690424919,
|
|
"step": 4230,
|
|
"valid_targets_mean": 5802.5,
|
|
"valid_targets_min": 1887
|
|
},
|
|
{
|
|
"epoch": 4.685113447703376,
|
|
"grad_norm": 0.21279762134599406,
|
|
"learning_rate": 1.1924103166116598e-05,
|
|
"loss": 0.0805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.038755886256694794,
|
|
"step": 4235,
|
|
"valid_targets_mean": 5761.5,
|
|
"valid_targets_min": 1754
|
|
},
|
|
{
|
|
"epoch": 4.690647482014389,
|
|
"grad_norm": 0.20795635056360526,
|
|
"learning_rate": 1.1873667142288956e-05,
|
|
"loss": 0.0723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03495751693844795,
|
|
"step": 4240,
|
|
"valid_targets_mean": 5489.2,
|
|
"valid_targets_min": 2284
|
|
},
|
|
{
|
|
"epoch": 4.696181516325401,
|
|
"grad_norm": 0.20836060464310172,
|
|
"learning_rate": 1.1823292940970692e-05,
|
|
"loss": 0.0781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03726666420698166,
|
|
"step": 4245,
|
|
"valid_targets_mean": 5341.3,
|
|
"valid_targets_min": 2340
|
|
},
|
|
{
|
|
"epoch": 4.701715550636414,
|
|
"grad_norm": 0.20843861440410902,
|
|
"learning_rate": 1.1772980945392427e-05,
|
|
"loss": 0.0728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03751606121659279,
|
|
"step": 4250,
|
|
"valid_targets_mean": 5431.4,
|
|
"valid_targets_min": 1828
|
|
},
|
|
{
|
|
"epoch": 4.7072495849474265,
|
|
"grad_norm": 0.19972530412032494,
|
|
"learning_rate": 1.172273153831152e-05,
|
|
"loss": 0.072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03226105496287346,
|
|
"step": 4255,
|
|
"valid_targets_mean": 5290.1,
|
|
"valid_targets_min": 2681
|
|
},
|
|
{
|
|
"epoch": 4.712783619258439,
|
|
"grad_norm": 0.20865318170034022,
|
|
"learning_rate": 1.167254510200921e-05,
|
|
"loss": 0.074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04075950011610985,
|
|
"step": 4260,
|
|
"valid_targets_mean": 5485.9,
|
|
"valid_targets_min": 2475
|
|
},
|
|
{
|
|
"epoch": 4.718317653569452,
|
|
"grad_norm": 0.23813125977819116,
|
|
"learning_rate": 1.1622422018287635e-05,
|
|
"loss": 0.0759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03836166486144066,
|
|
"step": 4265,
|
|
"valid_targets_mean": 5596.4,
|
|
"valid_targets_min": 2069
|
|
},
|
|
{
|
|
"epoch": 4.723851687880465,
|
|
"grad_norm": 0.21498738596125336,
|
|
"learning_rate": 1.1572362668466995e-05,
|
|
"loss": 0.0769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04106292873620987,
|
|
"step": 4270,
|
|
"valid_targets_mean": 5956.1,
|
|
"valid_targets_min": 2766
|
|
},
|
|
{
|
|
"epoch": 4.729385722191478,
|
|
"grad_norm": 0.21114082725651798,
|
|
"learning_rate": 1.1522367433382633e-05,
|
|
"loss": 0.0752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.039354342967271805,
|
|
"step": 4275,
|
|
"valid_targets_mean": 5119.2,
|
|
"valid_targets_min": 2043
|
|
},
|
|
{
|
|
"epoch": 4.7349197565024905,
|
|
"grad_norm": 0.2151908373056844,
|
|
"learning_rate": 1.147243669338209e-05,
|
|
"loss": 0.0753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.037762101739645004,
|
|
"step": 4280,
|
|
"valid_targets_mean": 5718.6,
|
|
"valid_targets_min": 2696
|
|
},
|
|
{
|
|
"epoch": 4.740453790813503,
|
|
"grad_norm": 0.2099551489654757,
|
|
"learning_rate": 1.1422570828322285e-05,
|
|
"loss": 0.0762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.039806973189115524,
|
|
"step": 4285,
|
|
"valid_targets_mean": 5818.4,
|
|
"valid_targets_min": 2425
|
|
},
|
|
{
|
|
"epoch": 4.745987825124516,
|
|
"grad_norm": 0.20429021711714365,
|
|
"learning_rate": 1.1372770217566562e-05,
|
|
"loss": 0.0744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03849330544471741,
|
|
"step": 4290,
|
|
"valid_targets_mean": 5530.9,
|
|
"valid_targets_min": 1540
|
|
},
|
|
{
|
|
"epoch": 4.751521859435528,
|
|
"grad_norm": 0.2026865420434942,
|
|
"learning_rate": 1.1323035239981856e-05,
|
|
"loss": 0.0789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03567812219262123,
|
|
"step": 4295,
|
|
"valid_targets_mean": 5335.0,
|
|
"valid_targets_min": 2544
|
|
},
|
|
{
|
|
"epoch": 4.757055893746541,
|
|
"grad_norm": 0.29789586977612714,
|
|
"learning_rate": 1.1273366273935759e-05,
|
|
"loss": 0.0959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05724092945456505,
|
|
"step": 4300,
|
|
"valid_targets_mean": 4571.2,
|
|
"valid_targets_min": 2219
|
|
},
|
|
{
|
|
"epoch": 4.762589928057554,
|
|
"grad_norm": 0.3316362801818146,
|
|
"learning_rate": 1.1223763697293695e-05,
|
|
"loss": 0.1046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04876897856593132,
|
|
"step": 4305,
|
|
"valid_targets_mean": 3826.4,
|
|
"valid_targets_min": 1483
|
|
},
|
|
{
|
|
"epoch": 4.768123962368566,
|
|
"grad_norm": 0.33551860487043705,
|
|
"learning_rate": 1.1174227887416005e-05,
|
|
"loss": 0.103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05014974996447563,
|
|
"step": 4310,
|
|
"valid_targets_mean": 4318.0,
|
|
"valid_targets_min": 1571
|
|
},
|
|
{
|
|
"epoch": 4.77365799667958,
|
|
"grad_norm": 0.327002436914812,
|
|
"learning_rate": 1.1124759221155092e-05,
|
|
"loss": 0.108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0552230030298233,
|
|
"step": 4315,
|
|
"valid_targets_mean": 4533.9,
|
|
"valid_targets_min": 1401
|
|
},
|
|
{
|
|
"epoch": 4.779192030990592,
|
|
"grad_norm": 0.28803494848222577,
|
|
"learning_rate": 1.107535807485256e-05,
|
|
"loss": 0.1116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05753651261329651,
|
|
"step": 4320,
|
|
"valid_targets_mean": 4452.8,
|
|
"valid_targets_min": 2102
|
|
},
|
|
{
|
|
"epoch": 4.784726065301605,
|
|
"grad_norm": 0.2686551481691491,
|
|
"learning_rate": 1.1026024824336323e-05,
|
|
"loss": 0.1076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.051282186061143875,
|
|
"step": 4325,
|
|
"valid_targets_mean": 3980.8,
|
|
"valid_targets_min": 1100
|
|
},
|
|
{
|
|
"epoch": 4.790260099612618,
|
|
"grad_norm": 0.3433498093901168,
|
|
"learning_rate": 1.0976759844917795e-05,
|
|
"loss": 0.1003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04425298050045967,
|
|
"step": 4330,
|
|
"valid_targets_mean": 6227.9,
|
|
"valid_targets_min": 952
|
|
},
|
|
{
|
|
"epoch": 4.79579413392363,
|
|
"grad_norm": 0.33910744534375425,
|
|
"learning_rate": 1.092756351138897e-05,
|
|
"loss": 0.1055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05586230754852295,
|
|
"step": 4335,
|
|
"valid_targets_mean": 7692.2,
|
|
"valid_targets_min": 974
|
|
},
|
|
{
|
|
"epoch": 4.801328168234643,
|
|
"grad_norm": 0.3059364705184329,
|
|
"learning_rate": 1.0878436198019645e-05,
|
|
"loss": 0.1062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05682585760951042,
|
|
"step": 4340,
|
|
"valid_targets_mean": 8154.4,
|
|
"valid_targets_min": 2085
|
|
},
|
|
{
|
|
"epoch": 4.8068622025456555,
|
|
"grad_norm": 0.27601921800383866,
|
|
"learning_rate": 1.0829378278554493e-05,
|
|
"loss": 0.1056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04944746568799019,
|
|
"step": 4345,
|
|
"valid_targets_mean": 7351.3,
|
|
"valid_targets_min": 917
|
|
},
|
|
{
|
|
"epoch": 4.812396236856668,
|
|
"grad_norm": 0.2769996942215238,
|
|
"learning_rate": 1.0780390126210288e-05,
|
|
"loss": 0.1115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.054811980575323105,
|
|
"step": 4350,
|
|
"valid_targets_mean": 7964.9,
|
|
"valid_targets_min": 2908
|
|
},
|
|
{
|
|
"epoch": 4.817930271167681,
|
|
"grad_norm": 0.22968483653271038,
|
|
"learning_rate": 1.0731472113673052e-05,
|
|
"loss": 0.1057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04972505569458008,
|
|
"step": 4355,
|
|
"valid_targets_mean": 7937.8,
|
|
"valid_targets_min": 1400
|
|
},
|
|
{
|
|
"epoch": 4.823464305478694,
|
|
"grad_norm": 0.2853677264211911,
|
|
"learning_rate": 1.0682624613095167e-05,
|
|
"loss": 0.125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06412405520677567,
|
|
"step": 4360,
|
|
"valid_targets_mean": 12586.8,
|
|
"valid_targets_min": 5456
|
|
},
|
|
{
|
|
"epoch": 4.828998339789707,
|
|
"grad_norm": 0.1943599246918971,
|
|
"learning_rate": 1.0633847996092614e-05,
|
|
"loss": 0.1438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0753672644495964,
|
|
"step": 4365,
|
|
"valid_targets_mean": 13651.0,
|
|
"valid_targets_min": 1115
|
|
},
|
|
{
|
|
"epoch": 4.83453237410072,
|
|
"grad_norm": 0.1775365824202433,
|
|
"learning_rate": 1.0585142633742092e-05,
|
|
"loss": 0.1417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07086262851953506,
|
|
"step": 4370,
|
|
"valid_targets_mean": 14026.5,
|
|
"valid_targets_min": 1038
|
|
},
|
|
{
|
|
"epoch": 4.840066408411732,
|
|
"grad_norm": 0.1549695067204721,
|
|
"learning_rate": 1.0536508896578242e-05,
|
|
"loss": 0.1412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06735699623823166,
|
|
"step": 4375,
|
|
"valid_targets_mean": 12705.8,
|
|
"valid_targets_min": 1006
|
|
},
|
|
{
|
|
"epoch": 4.845600442722745,
|
|
"grad_norm": 0.22373494573606925,
|
|
"learning_rate": 1.0487947154590772e-05,
|
|
"loss": 0.111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04747587442398071,
|
|
"step": 4380,
|
|
"valid_targets_mean": 7084.4,
|
|
"valid_targets_min": 1340
|
|
},
|
|
{
|
|
"epoch": 4.851134477033757,
|
|
"grad_norm": 0.24007175445827605,
|
|
"learning_rate": 1.0439457777221706e-05,
|
|
"loss": 0.0864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04307395592331886,
|
|
"step": 4385,
|
|
"valid_targets_mean": 7130.2,
|
|
"valid_targets_min": 915
|
|
},
|
|
{
|
|
"epoch": 4.85666851134477,
|
|
"grad_norm": 0.29404821959647026,
|
|
"learning_rate": 1.0391041133362523e-05,
|
|
"loss": 0.0851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06584474444389343,
|
|
"step": 4390,
|
|
"valid_targets_mean": 5321.2,
|
|
"valid_targets_min": 2197
|
|
},
|
|
{
|
|
"epoch": 4.862202545655783,
|
|
"grad_norm": 0.26180868997040285,
|
|
"learning_rate": 1.0342697591351377e-05,
|
|
"loss": 0.1175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.049729812890291214,
|
|
"step": 4395,
|
|
"valid_targets_mean": 4732.5,
|
|
"valid_targets_min": 1967
|
|
},
|
|
{
|
|
"epoch": 4.867736579966795,
|
|
"grad_norm": 0.29007400477711953,
|
|
"learning_rate": 1.0294427518970292e-05,
|
|
"loss": 0.1094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05817165970802307,
|
|
"step": 4400,
|
|
"valid_targets_mean": 5243.5,
|
|
"valid_targets_min": 1678
|
|
},
|
|
{
|
|
"epoch": 4.873270614277809,
|
|
"grad_norm": 0.3072483070061111,
|
|
"learning_rate": 1.0246231283442332e-05,
|
|
"loss": 0.1089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05918870493769646,
|
|
"step": 4405,
|
|
"valid_targets_mean": 5038.7,
|
|
"valid_targets_min": 1770
|
|
},
|
|
{
|
|
"epoch": 4.8788046485888215,
|
|
"grad_norm": 0.28996219297193787,
|
|
"learning_rate": 1.0198109251428872e-05,
|
|
"loss": 0.1108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05295060947537422,
|
|
"step": 4410,
|
|
"valid_targets_mean": 4237.3,
|
|
"valid_targets_min": 1640
|
|
},
|
|
{
|
|
"epoch": 4.884338682899834,
|
|
"grad_norm": 0.27499077703198677,
|
|
"learning_rate": 1.0150061789026733e-05,
|
|
"loss": 0.1106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06034369766712189,
|
|
"step": 4415,
|
|
"valid_targets_mean": 4563.9,
|
|
"valid_targets_min": 1755
|
|
},
|
|
{
|
|
"epoch": 4.889872717210847,
|
|
"grad_norm": 0.31840419017600025,
|
|
"learning_rate": 1.0102089261765462e-05,
|
|
"loss": 0.1214,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06176462769508362,
|
|
"step": 4420,
|
|
"valid_targets_mean": 5055.2,
|
|
"valid_targets_min": 1858
|
|
},
|
|
{
|
|
"epoch": 4.895406751521859,
|
|
"grad_norm": 0.2689260733055054,
|
|
"learning_rate": 1.0054192034604522e-05,
|
|
"loss": 0.1289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06580108404159546,
|
|
"step": 4425,
|
|
"valid_targets_mean": 4773.1,
|
|
"valid_targets_min": 1714
|
|
},
|
|
{
|
|
"epoch": 4.900940785832872,
|
|
"grad_norm": 0.2647577766370914,
|
|
"learning_rate": 1.000637047193049e-05,
|
|
"loss": 0.1346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0675923228263855,
|
|
"step": 4430,
|
|
"valid_targets_mean": 5269.0,
|
|
"valid_targets_min": 1316
|
|
},
|
|
{
|
|
"epoch": 4.906474820143885,
|
|
"grad_norm": 0.25175825106931504,
|
|
"learning_rate": 9.958624937554356e-06,
|
|
"loss": 0.1339,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0740848183631897,
|
|
"step": 4435,
|
|
"valid_targets_mean": 5723.2,
|
|
"valid_targets_min": 1929
|
|
},
|
|
{
|
|
"epoch": 4.912008854454898,
|
|
"grad_norm": 0.2541669896283131,
|
|
"learning_rate": 9.910955794708668e-06,
|
|
"loss": 0.1276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06130128726363182,
|
|
"step": 4440,
|
|
"valid_targets_mean": 4649.5,
|
|
"valid_targets_min": 1923
|
|
},
|
|
{
|
|
"epoch": 4.917542888765911,
|
|
"grad_norm": 0.24388814289257738,
|
|
"learning_rate": 9.863363406044849e-06,
|
|
"loss": 0.127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06754420697689056,
|
|
"step": 4445,
|
|
"valid_targets_mean": 4893.2,
|
|
"valid_targets_min": 1673
|
|
},
|
|
{
|
|
"epoch": 4.923076923076923,
|
|
"grad_norm": 0.253006989245634,
|
|
"learning_rate": 9.815848133630364e-06,
|
|
"loss": 0.1231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0545407272875309,
|
|
"step": 4450,
|
|
"valid_targets_mean": 4655.0,
|
|
"valid_targets_min": 1883
|
|
},
|
|
{
|
|
"epoch": 4.928610957387936,
|
|
"grad_norm": 0.26114474356839684,
|
|
"learning_rate": 9.768410338946038e-06,
|
|
"loss": 0.1261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0732916072010994,
|
|
"step": 4455,
|
|
"valid_targets_mean": 5300.7,
|
|
"valid_targets_min": 2051
|
|
},
|
|
{
|
|
"epoch": 4.934144991698949,
|
|
"grad_norm": 0.2541083629983541,
|
|
"learning_rate": 9.721050382883242e-06,
|
|
"loss": 0.1239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0583202950656414,
|
|
"step": 4460,
|
|
"valid_targets_mean": 4678.7,
|
|
"valid_targets_min": 1933
|
|
},
|
|
{
|
|
"epoch": 4.939679026009961,
|
|
"grad_norm": 0.2414388793326451,
|
|
"learning_rate": 9.673768625741193e-06,
|
|
"loss": 0.1278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.061433177441358566,
|
|
"step": 4465,
|
|
"valid_targets_mean": 5780.5,
|
|
"valid_targets_min": 1422
|
|
},
|
|
{
|
|
"epoch": 4.945213060320974,
|
|
"grad_norm": 0.26581185336094676,
|
|
"learning_rate": 9.626565427224192e-06,
|
|
"loss": 0.1241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0596851110458374,
|
|
"step": 4470,
|
|
"valid_targets_mean": 4821.9,
|
|
"valid_targets_min": 2022
|
|
},
|
|
{
|
|
"epoch": 4.9507470946319865,
|
|
"grad_norm": 0.2735582196598092,
|
|
"learning_rate": 9.57944114643889e-06,
|
|
"loss": 0.1114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05883411690592766,
|
|
"step": 4475,
|
|
"valid_targets_mean": 4274.3,
|
|
"valid_targets_min": 1649
|
|
},
|
|
{
|
|
"epoch": 4.956281128942999,
|
|
"grad_norm": 0.27818780258660997,
|
|
"learning_rate": 9.532396141891569e-06,
|
|
"loss": 0.0993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0458868183195591,
|
|
"step": 4480,
|
|
"valid_targets_mean": 4046.1,
|
|
"valid_targets_min": 1637
|
|
},
|
|
{
|
|
"epoch": 4.961815163254013,
|
|
"grad_norm": 0.3003596198137884,
|
|
"learning_rate": 9.485430771485373e-06,
|
|
"loss": 0.0953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04684314504265785,
|
|
"step": 4485,
|
|
"valid_targets_mean": 4062.4,
|
|
"valid_targets_min": 1853
|
|
},
|
|
{
|
|
"epoch": 4.967349197565025,
|
|
"grad_norm": 0.2868799722839849,
|
|
"learning_rate": 9.438545392517652e-06,
|
|
"loss": 0.0991,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04980555176734924,
|
|
"step": 4490,
|
|
"valid_targets_mean": 4217.3,
|
|
"valid_targets_min": 1262
|
|
},
|
|
{
|
|
"epoch": 4.972883231876038,
|
|
"grad_norm": 0.26196698800655627,
|
|
"learning_rate": 9.391740361677173e-06,
|
|
"loss": 0.1015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04342920705676079,
|
|
"step": 4495,
|
|
"valid_targets_mean": 4143.7,
|
|
"valid_targets_min": 1761
|
|
},
|
|
{
|
|
"epoch": 4.9784172661870505,
|
|
"grad_norm": 0.26604115606438405,
|
|
"learning_rate": 9.345016035041465e-06,
|
|
"loss": 0.1009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05186079069972038,
|
|
"step": 4500,
|
|
"valid_targets_mean": 4363.9,
|
|
"valid_targets_min": 1864
|
|
},
|
|
{
|
|
"epoch": 4.983951300498063,
|
|
"grad_norm": 0.278259737373658,
|
|
"learning_rate": 9.298372768074087e-06,
|
|
"loss": 0.1056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05233500525355339,
|
|
"step": 4505,
|
|
"valid_targets_mean": 4360.2,
|
|
"valid_targets_min": 1789
|
|
},
|
|
{
|
|
"epoch": 4.989485334809076,
|
|
"grad_norm": 0.25668363958803886,
|
|
"learning_rate": 9.251810915621886e-06,
|
|
"loss": 0.1026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05602368712425232,
|
|
"step": 4510,
|
|
"valid_targets_mean": 4662.1,
|
|
"valid_targets_min": 1550
|
|
},
|
|
{
|
|
"epoch": 4.995019369120088,
|
|
"grad_norm": 0.2768152114205245,
|
|
"learning_rate": 9.205330831912391e-06,
|
|
"loss": 0.1021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.048191387206315994,
|
|
"step": 4515,
|
|
"valid_targets_mean": 4001.0,
|
|
"valid_targets_min": 1185
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.26913245830976207,
|
|
"learning_rate": 9.158932870551012e-06,
|
|
"loss": 0.0912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.051119934767484665,
|
|
"step": 4520,
|
|
"valid_targets_mean": 6656.8,
|
|
"valid_targets_min": 2566
|
|
},
|
|
{
|
|
"epoch": 5.005534034311013,
|
|
"grad_norm": 0.3818780932790394,
|
|
"learning_rate": 9.11261738451843e-06,
|
|
"loss": 0.1399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06865507364273071,
|
|
"step": 4525,
|
|
"valid_targets_mean": 6579.2,
|
|
"valid_targets_min": 1651
|
|
},
|
|
{
|
|
"epoch": 5.011068068622025,
|
|
"grad_norm": 0.26736649847077115,
|
|
"learning_rate": 9.066384726167852e-06,
|
|
"loss": 0.1634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07701835036277771,
|
|
"step": 4530,
|
|
"valid_targets_mean": 6844.8,
|
|
"valid_targets_min": 2928
|
|
},
|
|
{
|
|
"epoch": 5.016602102933038,
|
|
"grad_norm": 0.24921269016703676,
|
|
"learning_rate": 9.02023524722238e-06,
|
|
"loss": 0.1548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0844825804233551,
|
|
"step": 4535,
|
|
"valid_targets_mean": 6970.2,
|
|
"valid_targets_min": 1816
|
|
},
|
|
{
|
|
"epoch": 5.0221361372440505,
|
|
"grad_norm": 0.23123025216852947,
|
|
"learning_rate": 8.97416929877232e-06,
|
|
"loss": 0.1569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08778110146522522,
|
|
"step": 4540,
|
|
"valid_targets_mean": 7589.0,
|
|
"valid_targets_min": 2058
|
|
},
|
|
{
|
|
"epoch": 5.027670171555064,
|
|
"grad_norm": 0.2183352157949055,
|
|
"learning_rate": 8.928187231272473e-06,
|
|
"loss": 0.1521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08101414889097214,
|
|
"step": 4545,
|
|
"valid_targets_mean": 7366.7,
|
|
"valid_targets_min": 3293
|
|
},
|
|
{
|
|
"epoch": 5.033204205866077,
|
|
"grad_norm": 0.22039800103534601,
|
|
"learning_rate": 8.882289394539535e-06,
|
|
"loss": 0.1346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06690150499343872,
|
|
"step": 4550,
|
|
"valid_targets_mean": 6763.6,
|
|
"valid_targets_min": 2844
|
|
},
|
|
{
|
|
"epoch": 5.038738240177089,
|
|
"grad_norm": 0.21931906805649626,
|
|
"learning_rate": 8.836476137749385e-06,
|
|
"loss": 0.1432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07438153773546219,
|
|
"step": 4555,
|
|
"valid_targets_mean": 6678.9,
|
|
"valid_targets_min": 2465
|
|
},
|
|
{
|
|
"epoch": 5.044272274488102,
|
|
"grad_norm": 0.20586807991514844,
|
|
"learning_rate": 8.790747809434455e-06,
|
|
"loss": 0.138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06708905845880508,
|
|
"step": 4560,
|
|
"valid_targets_mean": 7128.3,
|
|
"valid_targets_min": 2482
|
|
},
|
|
{
|
|
"epoch": 5.0498063087991145,
|
|
"grad_norm": 0.2325373502097837,
|
|
"learning_rate": 8.745104757481046e-06,
|
|
"loss": 0.1315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06491201370954514,
|
|
"step": 4565,
|
|
"valid_targets_mean": 6365.0,
|
|
"valid_targets_min": 1594
|
|
},
|
|
{
|
|
"epoch": 5.055340343110127,
|
|
"grad_norm": 0.28916764748195456,
|
|
"learning_rate": 8.699547329126736e-06,
|
|
"loss": 0.1331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06788638234138489,
|
|
"step": 4570,
|
|
"valid_targets_mean": 6929.4,
|
|
"valid_targets_min": 2099
|
|
},
|
|
{
|
|
"epoch": 5.06087437742114,
|
|
"grad_norm": 0.25615559212069233,
|
|
"learning_rate": 8.654075870957676e-06,
|
|
"loss": 0.1341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05642274022102356,
|
|
"step": 4575,
|
|
"valid_targets_mean": 6168.6,
|
|
"valid_targets_min": 1277
|
|
},
|
|
{
|
|
"epoch": 5.066408411732152,
|
|
"grad_norm": 0.2691284143249438,
|
|
"learning_rate": 8.608690728906004e-06,
|
|
"loss": 0.1324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06038684770464897,
|
|
"step": 4580,
|
|
"valid_targets_mean": 6729.7,
|
|
"valid_targets_min": 2910
|
|
},
|
|
{
|
|
"epoch": 5.071942446043165,
|
|
"grad_norm": 0.2751600219231519,
|
|
"learning_rate": 8.563392248247193e-06,
|
|
"loss": 0.1336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06257107108831406,
|
|
"step": 4585,
|
|
"valid_targets_mean": 6594.8,
|
|
"valid_targets_min": 1941
|
|
},
|
|
{
|
|
"epoch": 5.0774764803541785,
|
|
"grad_norm": 0.250400668036176,
|
|
"learning_rate": 8.518180773597406e-06,
|
|
"loss": 0.1345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.062037717550992966,
|
|
"step": 4590,
|
|
"valid_targets_mean": 6777.4,
|
|
"valid_targets_min": 2598
|
|
},
|
|
{
|
|
"epoch": 5.083010514665191,
|
|
"grad_norm": 0.23213367037483715,
|
|
"learning_rate": 8.473056648910909e-06,
|
|
"loss": 0.1263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.061457958072423935,
|
|
"step": 4595,
|
|
"valid_targets_mean": 6707.7,
|
|
"valid_targets_min": 2313
|
|
},
|
|
{
|
|
"epoch": 5.088544548976204,
|
|
"grad_norm": 0.24681629870719266,
|
|
"learning_rate": 8.428020217477435e-06,
|
|
"loss": 0.1269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06621939688920975,
|
|
"step": 4600,
|
|
"valid_targets_mean": 7161.9,
|
|
"valid_targets_min": 1638
|
|
},
|
|
{
|
|
"epoch": 5.094078583287216,
|
|
"grad_norm": 0.22011790924248184,
|
|
"learning_rate": 8.383071821919581e-06,
|
|
"loss": 0.1316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06517525762319565,
|
|
"step": 4605,
|
|
"valid_targets_mean": 7025.5,
|
|
"valid_targets_min": 3109
|
|
},
|
|
{
|
|
"epoch": 5.099612617598229,
|
|
"grad_norm": 0.2308272738760395,
|
|
"learning_rate": 8.338211804190171e-06,
|
|
"loss": 0.1307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06220779940485954,
|
|
"step": 4610,
|
|
"valid_targets_mean": 6895.4,
|
|
"valid_targets_min": 2040
|
|
},
|
|
{
|
|
"epoch": 5.105146651909242,
|
|
"grad_norm": 0.2519594431488911,
|
|
"learning_rate": 8.293440505569702e-06,
|
|
"loss": 0.1234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06370244175195694,
|
|
"step": 4615,
|
|
"valid_targets_mean": 6505.2,
|
|
"valid_targets_min": 2623
|
|
},
|
|
{
|
|
"epoch": 5.110680686220254,
|
|
"grad_norm": 0.23678060362713865,
|
|
"learning_rate": 8.248758266663732e-06,
|
|
"loss": 0.1198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06401697546243668,
|
|
"step": 4620,
|
|
"valid_targets_mean": 6831.8,
|
|
"valid_targets_min": 2450
|
|
},
|
|
{
|
|
"epoch": 5.116214720531267,
|
|
"grad_norm": 0.2512630049931684,
|
|
"learning_rate": 8.204165427400248e-06,
|
|
"loss": 0.1222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05943406745791435,
|
|
"step": 4625,
|
|
"valid_targets_mean": 6923.1,
|
|
"valid_targets_min": 3116
|
|
},
|
|
{
|
|
"epoch": 5.12174875484228,
|
|
"grad_norm": 0.262980975137919,
|
|
"learning_rate": 8.159662327027143e-06,
|
|
"loss": 0.1207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.051256000995635986,
|
|
"step": 4630,
|
|
"valid_targets_mean": 6147.5,
|
|
"valid_targets_min": 2014
|
|
},
|
|
{
|
|
"epoch": 5.127282789153293,
|
|
"grad_norm": 0.25330742312270843,
|
|
"learning_rate": 8.115249304109596e-06,
|
|
"loss": 0.1201,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.062148112803697586,
|
|
"step": 4635,
|
|
"valid_targets_mean": 6990.4,
|
|
"valid_targets_min": 2412
|
|
},
|
|
{
|
|
"epoch": 5.132816823464306,
|
|
"grad_norm": 0.23882554511618578,
|
|
"learning_rate": 8.070926696527512e-06,
|
|
"loss": 0.1168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.058880433440208435,
|
|
"step": 4640,
|
|
"valid_targets_mean": 6798.5,
|
|
"valid_targets_min": 2824
|
|
},
|
|
{
|
|
"epoch": 5.138350857775318,
|
|
"grad_norm": 0.31616935487527625,
|
|
"learning_rate": 8.026694841472929e-06,
|
|
"loss": 0.119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05718575045466423,
|
|
"step": 4645,
|
|
"valid_targets_mean": 7045.3,
|
|
"valid_targets_min": 2530
|
|
},
|
|
{
|
|
"epoch": 5.143884892086331,
|
|
"grad_norm": 0.2323324300764862,
|
|
"learning_rate": 7.982554075447482e-06,
|
|
"loss": 0.1217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05392269790172577,
|
|
"step": 4650,
|
|
"valid_targets_mean": 6538.4,
|
|
"valid_targets_min": 1802
|
|
},
|
|
{
|
|
"epoch": 5.1494189263973436,
|
|
"grad_norm": 0.26192731565345284,
|
|
"learning_rate": 7.938504734259836e-06,
|
|
"loss": 0.1183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0585092194378376,
|
|
"step": 4655,
|
|
"valid_targets_mean": 6699.4,
|
|
"valid_targets_min": 2155
|
|
},
|
|
{
|
|
"epoch": 5.154952960708356,
|
|
"grad_norm": 0.27065708663094357,
|
|
"learning_rate": 7.894547153023104e-06,
|
|
"loss": 0.1099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.052169669419527054,
|
|
"step": 4660,
|
|
"valid_targets_mean": 6332.6,
|
|
"valid_targets_min": 2295
|
|
},
|
|
{
|
|
"epoch": 5.160486995019369,
|
|
"grad_norm": 0.2792905073585824,
|
|
"learning_rate": 7.850681666152348e-06,
|
|
"loss": 0.1185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06055386736989021,
|
|
"step": 4665,
|
|
"valid_targets_mean": 6915.4,
|
|
"valid_targets_min": 1863
|
|
},
|
|
{
|
|
"epoch": 5.166021029330381,
|
|
"grad_norm": 0.26818560060554336,
|
|
"learning_rate": 7.806908607361975e-06,
|
|
"loss": 0.117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05585666000843048,
|
|
"step": 4670,
|
|
"valid_targets_mean": 6859.3,
|
|
"valid_targets_min": 1842
|
|
},
|
|
{
|
|
"epoch": 5.171555063641395,
|
|
"grad_norm": 0.24361678510373205,
|
|
"learning_rate": 7.763228309663249e-06,
|
|
"loss": 0.1161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05813150480389595,
|
|
"step": 4675,
|
|
"valid_targets_mean": 6832.0,
|
|
"valid_targets_min": 2689
|
|
},
|
|
{
|
|
"epoch": 5.177089097952408,
|
|
"grad_norm": 0.25066633263052085,
|
|
"learning_rate": 7.719641105361734e-06,
|
|
"loss": 0.1186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05950310826301575,
|
|
"step": 4680,
|
|
"valid_targets_mean": 6902.6,
|
|
"valid_targets_min": 2882
|
|
},
|
|
{
|
|
"epoch": 5.18262313226342,
|
|
"grad_norm": 0.2379735265243224,
|
|
"learning_rate": 7.676147326054777e-06,
|
|
"loss": 0.1167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04992043972015381,
|
|
"step": 4685,
|
|
"valid_targets_mean": 6325.6,
|
|
"valid_targets_min": 1950
|
|
},
|
|
{
|
|
"epoch": 5.188157166574433,
|
|
"grad_norm": 0.23783821622074944,
|
|
"learning_rate": 7.632747302628955e-06,
|
|
"loss": 0.1181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05535200610756874,
|
|
"step": 4690,
|
|
"valid_targets_mean": 7014.8,
|
|
"valid_targets_min": 2827
|
|
},
|
|
{
|
|
"epoch": 5.1936912008854454,
|
|
"grad_norm": 0.2536124353503241,
|
|
"learning_rate": 7.589441365257602e-06,
|
|
"loss": 0.1175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.059621792286634445,
|
|
"step": 4695,
|
|
"valid_targets_mean": 6979.4,
|
|
"valid_targets_min": 2777
|
|
},
|
|
{
|
|
"epoch": 5.199225235196458,
|
|
"grad_norm": 0.2624400473065704,
|
|
"learning_rate": 7.546229843398271e-06,
|
|
"loss": 0.114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05143032595515251,
|
|
"step": 4700,
|
|
"valid_targets_mean": 6767.5,
|
|
"valid_targets_min": 2923
|
|
},
|
|
{
|
|
"epoch": 5.204759269507471,
|
|
"grad_norm": 0.23777881827254418,
|
|
"learning_rate": 7.503113065790222e-06,
|
|
"loss": 0.1187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05865449085831642,
|
|
"step": 4705,
|
|
"valid_targets_mean": 6852.5,
|
|
"valid_targets_min": 2299
|
|
},
|
|
{
|
|
"epoch": 5.210293303818483,
|
|
"grad_norm": 0.2662602707599381,
|
|
"learning_rate": 7.4600913604519445e-06,
|
|
"loss": 0.1174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.056671250611543655,
|
|
"step": 4710,
|
|
"valid_targets_mean": 6578.0,
|
|
"valid_targets_min": 1726
|
|
},
|
|
{
|
|
"epoch": 5.215827338129497,
|
|
"grad_norm": 0.25881459566492687,
|
|
"learning_rate": 7.417165054678643e-06,
|
|
"loss": 0.1156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05237901210784912,
|
|
"step": 4715,
|
|
"valid_targets_mean": 6188.8,
|
|
"valid_targets_min": 2385
|
|
},
|
|
{
|
|
"epoch": 5.2213613724405095,
|
|
"grad_norm": 0.2578321461673644,
|
|
"learning_rate": 7.374334475039762e-06,
|
|
"loss": 0.1161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05409706011414528,
|
|
"step": 4720,
|
|
"valid_targets_mean": 6247.0,
|
|
"valid_targets_min": 1759
|
|
},
|
|
{
|
|
"epoch": 5.226895406751522,
|
|
"grad_norm": 0.3264457151684806,
|
|
"learning_rate": 7.331599947376471e-06,
|
|
"loss": 0.1032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04035346210002899,
|
|
"step": 4725,
|
|
"valid_targets_mean": 3846.9,
|
|
"valid_targets_min": 1635
|
|
},
|
|
{
|
|
"epoch": 5.232429441062535,
|
|
"grad_norm": 0.40942657277044675,
|
|
"learning_rate": 7.2889617967992255e-06,
|
|
"loss": 0.084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04189052805304527,
|
|
"step": 4730,
|
|
"valid_targets_mean": 4104.0,
|
|
"valid_targets_min": 1728
|
|
},
|
|
{
|
|
"epoch": 5.237963475373547,
|
|
"grad_norm": 0.41042956707091544,
|
|
"learning_rate": 7.246420347685279e-06,
|
|
"loss": 0.0951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06203579530119896,
|
|
"step": 4735,
|
|
"valid_targets_mean": 5437.0,
|
|
"valid_targets_min": 2245
|
|
},
|
|
{
|
|
"epoch": 5.24349750968456,
|
|
"grad_norm": 0.3061378279333753,
|
|
"learning_rate": 7.203975923676187e-06,
|
|
"loss": 0.0966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.044795963913202286,
|
|
"step": 4740,
|
|
"valid_targets_mean": 4097.2,
|
|
"valid_targets_min": 1379
|
|
},
|
|
{
|
|
"epoch": 5.249031543995573,
|
|
"grad_norm": 0.2914998837573403,
|
|
"learning_rate": 7.161628847675404e-06,
|
|
"loss": 0.0912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04384605959057808,
|
|
"step": 4745,
|
|
"valid_targets_mean": 4265.5,
|
|
"valid_targets_min": 1788
|
|
},
|
|
{
|
|
"epoch": 5.254565578306585,
|
|
"grad_norm": 0.30376807330939326,
|
|
"learning_rate": 7.119379441845755e-06,
|
|
"loss": 0.0981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04657572880387306,
|
|
"step": 4750,
|
|
"valid_targets_mean": 4447.0,
|
|
"valid_targets_min": 1177
|
|
},
|
|
{
|
|
"epoch": 5.260099612617598,
|
|
"grad_norm": 0.3306816287024313,
|
|
"learning_rate": 7.077228027607051e-06,
|
|
"loss": 0.1066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.047103408724069595,
|
|
"step": 4755,
|
|
"valid_targets_mean": 4379.6,
|
|
"valid_targets_min": 1555
|
|
},
|
|
{
|
|
"epoch": 5.265633646928611,
|
|
"grad_norm": 0.3191925419611457,
|
|
"learning_rate": 7.035174925633601e-06,
|
|
"loss": 0.1048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06188632547855377,
|
|
"step": 4760,
|
|
"valid_targets_mean": 5404.9,
|
|
"valid_targets_min": 1590
|
|
},
|
|
{
|
|
"epoch": 5.271167681239624,
|
|
"grad_norm": 0.27568048786242777,
|
|
"learning_rate": 6.993220455851793e-06,
|
|
"loss": 0.0967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.044619277119636536,
|
|
"step": 4765,
|
|
"valid_targets_mean": 4231.8,
|
|
"valid_targets_min": 1726
|
|
},
|
|
{
|
|
"epoch": 5.276701715550637,
|
|
"grad_norm": 0.2681799337705664,
|
|
"learning_rate": 6.9513649374376545e-06,
|
|
"loss": 0.094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05106706544756889,
|
|
"step": 4770,
|
|
"valid_targets_mean": 4803.9,
|
|
"valid_targets_min": 1700
|
|
},
|
|
{
|
|
"epoch": 5.282235749861649,
|
|
"grad_norm": 0.298973414328357,
|
|
"learning_rate": 6.909608688814406e-06,
|
|
"loss": 0.098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.044636014848947525,
|
|
"step": 4775,
|
|
"valid_targets_mean": 3728.0,
|
|
"valid_targets_min": 1113
|
|
},
|
|
{
|
|
"epoch": 5.287769784172662,
|
|
"grad_norm": 0.28881994899588387,
|
|
"learning_rate": 6.867952027650082e-06,
|
|
"loss": 0.1071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06730066984891891,
|
|
"step": 4780,
|
|
"valid_targets_mean": 5108.4,
|
|
"valid_targets_min": 1637
|
|
},
|
|
{
|
|
"epoch": 5.2933038184836745,
|
|
"grad_norm": 0.30289762439677825,
|
|
"learning_rate": 6.82639527085506e-06,
|
|
"loss": 0.099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04290211200714111,
|
|
"step": 4785,
|
|
"valid_targets_mean": 4185.0,
|
|
"valid_targets_min": 2178
|
|
},
|
|
{
|
|
"epoch": 5.298837852794687,
|
|
"grad_norm": 0.25043690090210174,
|
|
"learning_rate": 6.784938734579696e-06,
|
|
"loss": 0.0989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.045655298978090286,
|
|
"step": 4790,
|
|
"valid_targets_mean": 4341.5,
|
|
"valid_targets_min": 1581
|
|
},
|
|
{
|
|
"epoch": 5.3043718871057,
|
|
"grad_norm": 0.305794362107919,
|
|
"learning_rate": 6.7435827342118975e-06,
|
|
"loss": 0.094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.051216140389442444,
|
|
"step": 4795,
|
|
"valid_targets_mean": 4570.1,
|
|
"valid_targets_min": 1677
|
|
},
|
|
{
|
|
"epoch": 5.309905921416712,
|
|
"grad_norm": 0.3031531972220382,
|
|
"learning_rate": 6.702327584374733e-06,
|
|
"loss": 0.097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05147046968340874,
|
|
"step": 4800,
|
|
"valid_targets_mean": 4621.4,
|
|
"valid_targets_min": 1831
|
|
},
|
|
{
|
|
"epoch": 5.315439955727726,
|
|
"grad_norm": 0.3162091598967438,
|
|
"learning_rate": 6.661173598924009e-06,
|
|
"loss": 0.0995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05728279426693916,
|
|
"step": 4805,
|
|
"valid_targets_mean": 4701.1,
|
|
"valid_targets_min": 1892
|
|
},
|
|
{
|
|
"epoch": 5.3209739900387385,
|
|
"grad_norm": 0.27126503376359495,
|
|
"learning_rate": 6.620121090945932e-06,
|
|
"loss": 0.0926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04581926390528679,
|
|
"step": 4810,
|
|
"valid_targets_mean": 4210.5,
|
|
"valid_targets_min": 1282
|
|
},
|
|
{
|
|
"epoch": 5.326508024349751,
|
|
"grad_norm": 0.2692516343928811,
|
|
"learning_rate": 6.5791703727547e-06,
|
|
"loss": 0.0936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0382157638669014,
|
|
"step": 4815,
|
|
"valid_targets_mean": 4092.7,
|
|
"valid_targets_min": 1552
|
|
},
|
|
{
|
|
"epoch": 5.332042058660764,
|
|
"grad_norm": 0.3396002792680986,
|
|
"learning_rate": 6.538321755890103e-06,
|
|
"loss": 0.0975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04399949684739113,
|
|
"step": 4820,
|
|
"valid_targets_mean": 3938.2,
|
|
"valid_targets_min": 1518
|
|
},
|
|
{
|
|
"epoch": 5.337576092971776,
|
|
"grad_norm": 0.29307725564948145,
|
|
"learning_rate": 6.497575551115205e-06,
|
|
"loss": 0.0998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04623451828956604,
|
|
"step": 4825,
|
|
"valid_targets_mean": 3866.6,
|
|
"valid_targets_min": 1939
|
|
},
|
|
{
|
|
"epoch": 5.343110127282789,
|
|
"grad_norm": 0.28264014387665437,
|
|
"learning_rate": 6.456932068413928e-06,
|
|
"loss": 0.0987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04547819495201111,
|
|
"step": 4830,
|
|
"valid_targets_mean": 4178.5,
|
|
"valid_targets_min": 1919
|
|
},
|
|
{
|
|
"epoch": 5.348644161593802,
|
|
"grad_norm": 0.3035715825727704,
|
|
"learning_rate": 6.416391616988735e-06,
|
|
"loss": 0.098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05045393109321594,
|
|
"step": 4835,
|
|
"valid_targets_mean": 4422.5,
|
|
"valid_targets_min": 1519
|
|
},
|
|
{
|
|
"epoch": 5.354178195904814,
|
|
"grad_norm": 0.2996643875725543,
|
|
"learning_rate": 6.3759545052582596e-06,
|
|
"loss": 0.0972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05186879634857178,
|
|
"step": 4840,
|
|
"valid_targets_mean": 4715.8,
|
|
"valid_targets_min": 1890
|
|
},
|
|
{
|
|
"epoch": 5.359712230215827,
|
|
"grad_norm": 0.27842839335332986,
|
|
"learning_rate": 6.335621040854949e-06,
|
|
"loss": 0.0953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04677329584956169,
|
|
"step": 4845,
|
|
"valid_targets_mean": 4308.6,
|
|
"valid_targets_min": 1797
|
|
},
|
|
{
|
|
"epoch": 5.36524626452684,
|
|
"grad_norm": 0.2728960533899608,
|
|
"learning_rate": 6.295391530622754e-06,
|
|
"loss": 0.0961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04513225331902504,
|
|
"step": 4850,
|
|
"valid_targets_mean": 4429.0,
|
|
"valid_targets_min": 1563
|
|
},
|
|
{
|
|
"epoch": 5.370780298837853,
|
|
"grad_norm": 0.2930308558837003,
|
|
"learning_rate": 6.255266280614747e-06,
|
|
"loss": 0.0987,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0623805969953537,
|
|
"step": 4855,
|
|
"valid_targets_mean": 4832.3,
|
|
"valid_targets_min": 1789
|
|
},
|
|
{
|
|
"epoch": 5.376314333148866,
|
|
"grad_norm": 0.2809054803196358,
|
|
"learning_rate": 6.215245596090853e-06,
|
|
"loss": 0.0929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04654247686266899,
|
|
"step": 4860,
|
|
"valid_targets_mean": 4335.4,
|
|
"valid_targets_min": 1729
|
|
},
|
|
{
|
|
"epoch": 5.381848367459878,
|
|
"grad_norm": 0.26481027178759486,
|
|
"learning_rate": 6.1753297815154716e-06,
|
|
"loss": 0.0886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03547029569745064,
|
|
"step": 4865,
|
|
"valid_targets_mean": 3609.3,
|
|
"valid_targets_min": 1651
|
|
},
|
|
{
|
|
"epoch": 5.387382401770891,
|
|
"grad_norm": 0.27745340499226884,
|
|
"learning_rate": 6.135519140555199e-06,
|
|
"loss": 0.0875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04294551536440849,
|
|
"step": 4870,
|
|
"valid_targets_mean": 4319.0,
|
|
"valid_targets_min": 1420
|
|
},
|
|
{
|
|
"epoch": 5.3929164360819035,
|
|
"grad_norm": 0.30288204365953497,
|
|
"learning_rate": 6.0958139760765074e-06,
|
|
"loss": 0.0921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04404045268893242,
|
|
"step": 4875,
|
|
"valid_targets_mean": 4539.0,
|
|
"valid_targets_min": 1855
|
|
},
|
|
{
|
|
"epoch": 5.398450470392916,
|
|
"grad_norm": 0.3005477264174116,
|
|
"learning_rate": 6.05621459014343e-06,
|
|
"loss": 0.0975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.052519965916872025,
|
|
"step": 4880,
|
|
"valid_targets_mean": 4650.9,
|
|
"valid_targets_min": 1295
|
|
},
|
|
{
|
|
"epoch": 5.403984504703929,
|
|
"grad_norm": 0.2588205344702586,
|
|
"learning_rate": 6.016721284015281e-06,
|
|
"loss": 0.0935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.041075099259614944,
|
|
"step": 4885,
|
|
"valid_targets_mean": 3957.8,
|
|
"valid_targets_min": 1401
|
|
},
|
|
{
|
|
"epoch": 5.409518539014942,
|
|
"grad_norm": 0.26904245923876685,
|
|
"learning_rate": 5.977334358144335e-06,
|
|
"loss": 0.0902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04499991610646248,
|
|
"step": 4890,
|
|
"valid_targets_mean": 4258.2,
|
|
"valid_targets_min": 1375
|
|
},
|
|
{
|
|
"epoch": 5.415052573325955,
|
|
"grad_norm": 0.27780104929993793,
|
|
"learning_rate": 5.938054112173585e-06,
|
|
"loss": 0.0937,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04547039791941643,
|
|
"step": 4895,
|
|
"valid_targets_mean": 4293.1,
|
|
"valid_targets_min": 1932
|
|
},
|
|
{
|
|
"epoch": 5.4205866076369675,
|
|
"grad_norm": 0.2835969799258284,
|
|
"learning_rate": 5.898880844934411e-06,
|
|
"loss": 0.0854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.042660683393478394,
|
|
"step": 4900,
|
|
"valid_targets_mean": 4114.5,
|
|
"valid_targets_min": 1761
|
|
},
|
|
{
|
|
"epoch": 5.42612064194798,
|
|
"grad_norm": 0.26809629928124185,
|
|
"learning_rate": 5.859814854444361e-06,
|
|
"loss": 0.0825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04189951717853546,
|
|
"step": 4905,
|
|
"valid_targets_mean": 4430.8,
|
|
"valid_targets_min": 2314
|
|
},
|
|
{
|
|
"epoch": 5.431654676258993,
|
|
"grad_norm": 0.3033668506817014,
|
|
"learning_rate": 5.820856437904829e-06,
|
|
"loss": 0.0893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04731345176696777,
|
|
"step": 4910,
|
|
"valid_targets_mean": 4497.4,
|
|
"valid_targets_min": 1599
|
|
},
|
|
{
|
|
"epoch": 5.437188710570005,
|
|
"grad_norm": 0.27894467946557433,
|
|
"learning_rate": 5.782005891698843e-06,
|
|
"loss": 0.0889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04985889792442322,
|
|
"step": 4915,
|
|
"valid_targets_mean": 4274.5,
|
|
"valid_targets_min": 1618
|
|
},
|
|
{
|
|
"epoch": 5.442722744881018,
|
|
"grad_norm": 0.2610829057581517,
|
|
"learning_rate": 5.743263511388777e-06,
|
|
"loss": 0.0866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.043534595519304276,
|
|
"step": 4920,
|
|
"valid_targets_mean": 4277.4,
|
|
"valid_targets_min": 1851
|
|
},
|
|
{
|
|
"epoch": 5.448256779192031,
|
|
"grad_norm": 0.2814480946363349,
|
|
"learning_rate": 5.704629591714119e-06,
|
|
"loss": 0.0845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04236781597137451,
|
|
"step": 4925,
|
|
"valid_targets_mean": 4469.7,
|
|
"valid_targets_min": 1871
|
|
},
|
|
{
|
|
"epoch": 5.453790813503044,
|
|
"grad_norm": 0.28196491095765974,
|
|
"learning_rate": 5.666104426589227e-06,
|
|
"loss": 0.0902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.045051101595163345,
|
|
"step": 4930,
|
|
"valid_targets_mean": 4527.5,
|
|
"valid_targets_min": 1453
|
|
},
|
|
{
|
|
"epoch": 5.459324847814057,
|
|
"grad_norm": 0.2793798119184077,
|
|
"learning_rate": 5.627688309101074e-06,
|
|
"loss": 0.0941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04612480476498604,
|
|
"step": 4935,
|
|
"valid_targets_mean": 4689.2,
|
|
"valid_targets_min": 1243
|
|
},
|
|
{
|
|
"epoch": 5.464858882125069,
|
|
"grad_norm": 0.2527466996252714,
|
|
"learning_rate": 5.5893815315070545e-06,
|
|
"loss": 0.088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03624003753066063,
|
|
"step": 4940,
|
|
"valid_targets_mean": 4317.8,
|
|
"valid_targets_min": 1812
|
|
},
|
|
{
|
|
"epoch": 5.470392916436082,
|
|
"grad_norm": 0.2548369389008775,
|
|
"learning_rate": 5.551184385232717e-06,
|
|
"loss": 0.0842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03714959695935249,
|
|
"step": 4945,
|
|
"valid_targets_mean": 5641.1,
|
|
"valid_targets_min": 2296
|
|
},
|
|
{
|
|
"epoch": 5.475926950747095,
|
|
"grad_norm": 0.26918984483077724,
|
|
"learning_rate": 5.513097160869594e-06,
|
|
"loss": 0.0926,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04826346039772034,
|
|
"step": 4950,
|
|
"valid_targets_mean": 5349.0,
|
|
"valid_targets_min": 2050
|
|
},
|
|
{
|
|
"epoch": 5.481460985058107,
|
|
"grad_norm": 0.22898992837192203,
|
|
"learning_rate": 5.475120148172952e-06,
|
|
"loss": 0.1016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0547969788312912,
|
|
"step": 4955,
|
|
"valid_targets_mean": 5783.2,
|
|
"valid_targets_min": 1845
|
|
},
|
|
{
|
|
"epoch": 5.48699501936912,
|
|
"grad_norm": 0.20835361008156356,
|
|
"learning_rate": 5.4372536360596095e-06,
|
|
"loss": 0.0899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05061693117022514,
|
|
"step": 4960,
|
|
"valid_targets_mean": 5767.9,
|
|
"valid_targets_min": 2653
|
|
},
|
|
{
|
|
"epoch": 5.4925290536801326,
|
|
"grad_norm": 0.3263923310676471,
|
|
"learning_rate": 5.39949791260574e-06,
|
|
"loss": 0.0929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04884323105216026,
|
|
"step": 4965,
|
|
"valid_targets_mean": 5619.5,
|
|
"valid_targets_min": 2023
|
|
},
|
|
{
|
|
"epoch": 5.498063087991145,
|
|
"grad_norm": 0.18223012730725743,
|
|
"learning_rate": 5.36185326504465e-06,
|
|
"loss": 0.0859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.043797388672828674,
|
|
"step": 4970,
|
|
"valid_targets_mean": 5253.8,
|
|
"valid_targets_min": 1239
|
|
},
|
|
{
|
|
"epoch": 5.503597122302159,
|
|
"grad_norm": 0.17208167859721404,
|
|
"learning_rate": 5.324319979764638e-06,
|
|
"loss": 0.079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.041199374943971634,
|
|
"step": 4975,
|
|
"valid_targets_mean": 5506.8,
|
|
"valid_targets_min": 2598
|
|
},
|
|
{
|
|
"epoch": 5.509131156613171,
|
|
"grad_norm": 0.1742427957489206,
|
|
"learning_rate": 5.286898342306781e-06,
|
|
"loss": 0.0785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03271806240081787,
|
|
"step": 4980,
|
|
"valid_targets_mean": 5309.8,
|
|
"valid_targets_min": 2443
|
|
},
|
|
{
|
|
"epoch": 5.514665190924184,
|
|
"grad_norm": 0.19408572897818147,
|
|
"learning_rate": 5.24958863736279e-06,
|
|
"loss": 0.0746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05028039216995239,
|
|
"step": 4985,
|
|
"valid_targets_mean": 5868.3,
|
|
"valid_targets_min": 2497
|
|
},
|
|
{
|
|
"epoch": 5.520199225235197,
|
|
"grad_norm": 0.1883711835145652,
|
|
"learning_rate": 5.212391148772808e-06,
|
|
"loss": 0.0752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.035246167331933975,
|
|
"step": 4990,
|
|
"valid_targets_mean": 5761.6,
|
|
"valid_targets_min": 2096
|
|
},
|
|
{
|
|
"epoch": 5.525733259546209,
|
|
"grad_norm": 0.20122322848305024,
|
|
"learning_rate": 5.175306159523292e-06,
|
|
"loss": 0.0788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04153045266866684,
|
|
"step": 4995,
|
|
"valid_targets_mean": 5816.0,
|
|
"valid_targets_min": 2258
|
|
},
|
|
{
|
|
"epoch": 5.531267293857222,
|
|
"grad_norm": 0.1877338847041848,
|
|
"learning_rate": 5.138333951744834e-06,
|
|
"loss": 0.0738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.040626995265483856,
|
|
"step": 5000,
|
|
"valid_targets_mean": 5984.8,
|
|
"valid_targets_min": 2264
|
|
},
|
|
{
|
|
"epoch": 5.5368013281682344,
|
|
"grad_norm": 0.18575057786697535,
|
|
"learning_rate": 5.101474806710018e-06,
|
|
"loss": 0.0701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03857755288481712,
|
|
"step": 5005,
|
|
"valid_targets_mean": 5711.8,
|
|
"valid_targets_min": 2394
|
|
},
|
|
{
|
|
"epoch": 5.542335362479247,
|
|
"grad_norm": 0.1937292314638601,
|
|
"learning_rate": 5.064729004831297e-06,
|
|
"loss": 0.0701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0389263816177845,
|
|
"step": 5010,
|
|
"valid_targets_mean": 5680.3,
|
|
"valid_targets_min": 2487
|
|
},
|
|
{
|
|
"epoch": 5.54786939679026,
|
|
"grad_norm": 0.19161248239894565,
|
|
"learning_rate": 5.028096825658817e-06,
|
|
"loss": 0.0656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03286981210112572,
|
|
"step": 5015,
|
|
"valid_targets_mean": 5869.9,
|
|
"valid_targets_min": 2655
|
|
},
|
|
{
|
|
"epoch": 5.553403431101273,
|
|
"grad_norm": 0.1803407447825603,
|
|
"learning_rate": 4.991578547878353e-06,
|
|
"loss": 0.0662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03436180204153061,
|
|
"step": 5020,
|
|
"valid_targets_mean": 5479.9,
|
|
"valid_targets_min": 2153
|
|
},
|
|
{
|
|
"epoch": 5.558937465412286,
|
|
"grad_norm": 0.1926916620778669,
|
|
"learning_rate": 4.955174449309126e-06,
|
|
"loss": 0.068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03226090595126152,
|
|
"step": 5025,
|
|
"valid_targets_mean": 5297.5,
|
|
"valid_targets_min": 1932
|
|
},
|
|
{
|
|
"epoch": 5.5644714997232985,
|
|
"grad_norm": 0.17699772829970112,
|
|
"learning_rate": 4.918884806901736e-06,
|
|
"loss": 0.0661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02836117148399353,
|
|
"step": 5030,
|
|
"valid_targets_mean": 5783.3,
|
|
"valid_targets_min": 944
|
|
},
|
|
{
|
|
"epoch": 5.570005534034311,
|
|
"grad_norm": 0.18361638296491878,
|
|
"learning_rate": 4.882709896736035e-06,
|
|
"loss": 0.064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.029991939663887024,
|
|
"step": 5035,
|
|
"valid_targets_mean": 5570.2,
|
|
"valid_targets_min": 2844
|
|
},
|
|
{
|
|
"epoch": 5.575539568345324,
|
|
"grad_norm": 0.19332970772301838,
|
|
"learning_rate": 4.846649994019028e-06,
|
|
"loss": 0.0646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.030482301488518715,
|
|
"step": 5040,
|
|
"valid_targets_mean": 5753.7,
|
|
"valid_targets_min": 2045
|
|
},
|
|
{
|
|
"epoch": 5.581073602656336,
|
|
"grad_norm": 0.1802565765077648,
|
|
"learning_rate": 4.810705373082785e-06,
|
|
"loss": 0.0672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03281958028674126,
|
|
"step": 5045,
|
|
"valid_targets_mean": 5353.3,
|
|
"valid_targets_min": 2316
|
|
},
|
|
{
|
|
"epoch": 5.586607636967349,
|
|
"grad_norm": 0.18209674554957742,
|
|
"learning_rate": 4.77487630738233e-06,
|
|
"loss": 0.0659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03288013115525246,
|
|
"step": 5050,
|
|
"valid_targets_mean": 5618.7,
|
|
"valid_targets_min": 2144
|
|
},
|
|
{
|
|
"epoch": 5.592141671278362,
|
|
"grad_norm": 0.19378510164029844,
|
|
"learning_rate": 4.739163069493611e-06,
|
|
"loss": 0.064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03251936659216881,
|
|
"step": 5055,
|
|
"valid_targets_mean": 5338.1,
|
|
"valid_targets_min": 1731
|
|
},
|
|
{
|
|
"epoch": 5.597675705589374,
|
|
"grad_norm": 0.1904853135186227,
|
|
"learning_rate": 4.7035659311113665e-06,
|
|
"loss": 0.0621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.035014282912015915,
|
|
"step": 5060,
|
|
"valid_targets_mean": 5883.8,
|
|
"valid_targets_min": 2583
|
|
},
|
|
{
|
|
"epoch": 5.603209739900388,
|
|
"grad_norm": 0.17577557424938922,
|
|
"learning_rate": 4.668085163047109e-06,
|
|
"loss": 0.0605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.029335923492908478,
|
|
"step": 5065,
|
|
"valid_targets_mean": 5300.8,
|
|
"valid_targets_min": 1973
|
|
},
|
|
{
|
|
"epoch": 5.6087437742114,
|
|
"grad_norm": 0.1927824160022825,
|
|
"learning_rate": 4.632721035227028e-06,
|
|
"loss": 0.0633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03788955882191658,
|
|
"step": 5070,
|
|
"valid_targets_mean": 5928.1,
|
|
"valid_targets_min": 3227
|
|
},
|
|
{
|
|
"epoch": 5.614277808522413,
|
|
"grad_norm": 0.19769655833056504,
|
|
"learning_rate": 4.597473816689959e-06,
|
|
"loss": 0.0641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.032567158341407776,
|
|
"step": 5075,
|
|
"valid_targets_mean": 5923.1,
|
|
"valid_targets_min": 2392
|
|
},
|
|
{
|
|
"epoch": 5.619811842833426,
|
|
"grad_norm": 0.17335162190475464,
|
|
"learning_rate": 4.562343775585334e-06,
|
|
"loss": 0.0609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.026050040498375893,
|
|
"step": 5080,
|
|
"valid_targets_mean": 5607.5,
|
|
"valid_targets_min": 1927
|
|
},
|
|
{
|
|
"epoch": 5.625345877144438,
|
|
"grad_norm": 0.1927394890214593,
|
|
"learning_rate": 4.5273311791711265e-06,
|
|
"loss": 0.0576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.033131640404462814,
|
|
"step": 5085,
|
|
"valid_targets_mean": 5891.4,
|
|
"valid_targets_min": 1659
|
|
},
|
|
{
|
|
"epoch": 5.630879911455451,
|
|
"grad_norm": 0.17771864942156337,
|
|
"learning_rate": 4.492436293811844e-06,
|
|
"loss": 0.0599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.032288823276758194,
|
|
"step": 5090,
|
|
"valid_targets_mean": 5810.3,
|
|
"valid_targets_min": 2797
|
|
},
|
|
{
|
|
"epoch": 5.6364139457664635,
|
|
"grad_norm": 0.19154573618071724,
|
|
"learning_rate": 4.457659384976463e-06,
|
|
"loss": 0.0576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03311631456017494,
|
|
"step": 5095,
|
|
"valid_targets_mean": 5845.2,
|
|
"valid_targets_min": 2085
|
|
},
|
|
{
|
|
"epoch": 5.641947980077476,
|
|
"grad_norm": 0.17290736570454462,
|
|
"learning_rate": 4.423000717236458e-06,
|
|
"loss": 0.0602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.026159053668379784,
|
|
"step": 5100,
|
|
"valid_targets_mean": 5777.4,
|
|
"valid_targets_min": 2375
|
|
},
|
|
{
|
|
"epoch": 5.647482014388489,
|
|
"grad_norm": 0.1682909452754771,
|
|
"learning_rate": 4.388460554263743e-06,
|
|
"loss": 0.0601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.028254443779587746,
|
|
"step": 5105,
|
|
"valid_targets_mean": 5355.5,
|
|
"valid_targets_min": 1462
|
|
},
|
|
{
|
|
"epoch": 5.653016048699502,
|
|
"grad_norm": 0.16940272629745565,
|
|
"learning_rate": 4.354039158828698e-06,
|
|
"loss": 0.0536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0268237367272377,
|
|
"step": 5110,
|
|
"valid_targets_mean": 5619.6,
|
|
"valid_targets_min": 2248
|
|
},
|
|
{
|
|
"epoch": 5.658550083010515,
|
|
"grad_norm": 0.20684182065746543,
|
|
"learning_rate": 4.319736792798157e-06,
|
|
"loss": 0.0571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03059101104736328,
|
|
"step": 5115,
|
|
"valid_targets_mean": 5980.5,
|
|
"valid_targets_min": 2300
|
|
},
|
|
{
|
|
"epoch": 5.6640841173215275,
|
|
"grad_norm": 0.20355768534181248,
|
|
"learning_rate": 4.285553717133413e-06,
|
|
"loss": 0.0573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0313802994787693,
|
|
"step": 5120,
|
|
"valid_targets_mean": 5847.6,
|
|
"valid_targets_min": 3518
|
|
},
|
|
{
|
|
"epoch": 5.66961815163254,
|
|
"grad_norm": 0.17795362801982467,
|
|
"learning_rate": 4.251490191888246e-06,
|
|
"loss": 0.0554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.025926783680915833,
|
|
"step": 5125,
|
|
"valid_targets_mean": 5279.1,
|
|
"valid_targets_min": 2145
|
|
},
|
|
{
|
|
"epoch": 5.675152185943553,
|
|
"grad_norm": 0.19984037968820786,
|
|
"learning_rate": 4.217546476206915e-06,
|
|
"loss": 0.0583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03138670697808266,
|
|
"step": 5130,
|
|
"valid_targets_mean": 5963.4,
|
|
"valid_targets_min": 1785
|
|
},
|
|
{
|
|
"epoch": 5.680686220254565,
|
|
"grad_norm": 0.2050104885169466,
|
|
"learning_rate": 4.183722828322232e-06,
|
|
"loss": 0.0575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03241662308573723,
|
|
"step": 5135,
|
|
"valid_targets_mean": 5615.0,
|
|
"valid_targets_min": 1880
|
|
},
|
|
{
|
|
"epoch": 5.686220254565578,
|
|
"grad_norm": 0.1790320457867811,
|
|
"learning_rate": 4.150019505553546e-06,
|
|
"loss": 0.0551,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.024099178612232208,
|
|
"step": 5140,
|
|
"valid_targets_mean": 5902.0,
|
|
"valid_targets_min": 2804
|
|
},
|
|
{
|
|
"epoch": 5.6917542888765915,
|
|
"grad_norm": 0.1748063463819483,
|
|
"learning_rate": 4.116436764304832e-06,
|
|
"loss": 0.0526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.027424706146121025,
|
|
"step": 5145,
|
|
"valid_targets_mean": 5319.6,
|
|
"valid_targets_min": 2528
|
|
},
|
|
{
|
|
"epoch": 5.697288323187604,
|
|
"grad_norm": 0.1782024377321046,
|
|
"learning_rate": 4.082974860062718e-06,
|
|
"loss": 0.0556,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.026081182062625885,
|
|
"step": 5150,
|
|
"valid_targets_mean": 5529.9,
|
|
"valid_targets_min": 2824
|
|
},
|
|
{
|
|
"epoch": 5.702822357498617,
|
|
"grad_norm": 0.177274788200606,
|
|
"learning_rate": 4.0496340473945264e-06,
|
|
"loss": 0.0509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.024925321340560913,
|
|
"step": 5155,
|
|
"valid_targets_mean": 5210.4,
|
|
"valid_targets_min": 2273
|
|
},
|
|
{
|
|
"epoch": 5.708356391809629,
|
|
"grad_norm": 0.19751307017113642,
|
|
"learning_rate": 4.0164145799463686e-06,
|
|
"loss": 0.0518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.027725854888558388,
|
|
"step": 5160,
|
|
"valid_targets_mean": 5708.2,
|
|
"valid_targets_min": 2025
|
|
},
|
|
{
|
|
"epoch": 5.713890426120642,
|
|
"grad_norm": 0.1837401122379636,
|
|
"learning_rate": 3.983316710441203e-06,
|
|
"loss": 0.0518,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.024004802107810974,
|
|
"step": 5165,
|
|
"valid_targets_mean": 5535.9,
|
|
"valid_targets_min": 2354
|
|
},
|
|
{
|
|
"epoch": 5.719424460431655,
|
|
"grad_norm": 0.190406571209415,
|
|
"learning_rate": 3.950340690676904e-06,
|
|
"loss": 0.0547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.028329044580459595,
|
|
"step": 5170,
|
|
"valid_targets_mean": 5587.0,
|
|
"valid_targets_min": 2655
|
|
},
|
|
{
|
|
"epoch": 5.724958494742667,
|
|
"grad_norm": 0.1949683903785584,
|
|
"learning_rate": 3.917486771524348e-06,
|
|
"loss": 0.0538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02764093317091465,
|
|
"step": 5175,
|
|
"valid_targets_mean": 5726.0,
|
|
"valid_targets_min": 1536
|
|
},
|
|
{
|
|
"epoch": 5.73049252905368,
|
|
"grad_norm": 0.18363803990275362,
|
|
"learning_rate": 3.884755202925523e-06,
|
|
"loss": 0.0535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.026779597625136375,
|
|
"step": 5180,
|
|
"valid_targets_mean": 5276.6,
|
|
"valid_targets_min": 1725
|
|
},
|
|
{
|
|
"epoch": 5.7360265633646925,
|
|
"grad_norm": 0.1923047283339151,
|
|
"learning_rate": 3.852146233891598e-06,
|
|
"loss": 0.0539,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.025118621066212654,
|
|
"step": 5185,
|
|
"valid_targets_mean": 4917.2,
|
|
"valid_targets_min": 1306
|
|
},
|
|
{
|
|
"epoch": 5.741560597675706,
|
|
"grad_norm": 0.18362331997004988,
|
|
"learning_rate": 3.819660112501053e-06,
|
|
"loss": 0.0523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.025888383388519287,
|
|
"step": 5190,
|
|
"valid_targets_mean": 5920.3,
|
|
"valid_targets_min": 2692
|
|
},
|
|
{
|
|
"epoch": 5.747094631986719,
|
|
"grad_norm": 0.19299061576096796,
|
|
"learning_rate": 3.787297085897783e-06,
|
|
"loss": 0.0536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.026169372722506523,
|
|
"step": 5195,
|
|
"valid_targets_mean": 5935.9,
|
|
"valid_targets_min": 2414
|
|
},
|
|
{
|
|
"epoch": 5.752628666297731,
|
|
"grad_norm": 0.1934208644361245,
|
|
"learning_rate": 3.7550574002892193e-06,
|
|
"loss": 0.057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02640332095324993,
|
|
"step": 5200,
|
|
"valid_targets_mean": 5286.9,
|
|
"valid_targets_min": 1052
|
|
},
|
|
{
|
|
"epoch": 5.758162700608744,
|
|
"grad_norm": 0.28244689860994376,
|
|
"learning_rate": 3.722941300944454e-06,
|
|
"loss": 0.0706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03444273769855499,
|
|
"step": 5205,
|
|
"valid_targets_mean": 4064.5,
|
|
"valid_targets_min": 2223
|
|
},
|
|
{
|
|
"epoch": 5.7636967349197565,
|
|
"grad_norm": 0.296330254238661,
|
|
"learning_rate": 3.6909490321923634e-06,
|
|
"loss": 0.073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03813193738460541,
|
|
"step": 5210,
|
|
"valid_targets_mean": 4034.0,
|
|
"valid_targets_min": 1203
|
|
},
|
|
{
|
|
"epoch": 5.769230769230769,
|
|
"grad_norm": 0.3122974913435053,
|
|
"learning_rate": 3.6590808374197816e-06,
|
|
"loss": 0.0721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.036831844598054886,
|
|
"step": 5215,
|
|
"valid_targets_mean": 4315.0,
|
|
"valid_targets_min": 1773
|
|
},
|
|
{
|
|
"epoch": 5.774764803541782,
|
|
"grad_norm": 0.2718515044346279,
|
|
"learning_rate": 3.627336959069605e-06,
|
|
"loss": 0.0772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04120616614818573,
|
|
"step": 5220,
|
|
"valid_targets_mean": 4799.0,
|
|
"valid_targets_min": 1603
|
|
},
|
|
{
|
|
"epoch": 5.780298837852794,
|
|
"grad_norm": 0.24395973721925102,
|
|
"learning_rate": 3.5957176386389893e-06,
|
|
"loss": 0.0775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04204801842570305,
|
|
"step": 5225,
|
|
"valid_targets_mean": 4921.9,
|
|
"valid_targets_min": 1695
|
|
},
|
|
{
|
|
"epoch": 5.785832872163807,
|
|
"grad_norm": 0.24764045579431937,
|
|
"learning_rate": 3.5642231166774897e-06,
|
|
"loss": 0.0749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.039783138781785965,
|
|
"step": 5230,
|
|
"valid_targets_mean": 4232.0,
|
|
"valid_targets_min": 1416
|
|
},
|
|
{
|
|
"epoch": 5.7913669064748206,
|
|
"grad_norm": 0.3306174867366802,
|
|
"learning_rate": 3.532853632785227e-06,
|
|
"loss": 0.0721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0378490649163723,
|
|
"step": 5235,
|
|
"valid_targets_mean": 7718.0,
|
|
"valid_targets_min": 1254
|
|
},
|
|
{
|
|
"epoch": 5.796900940785833,
|
|
"grad_norm": 0.32124352248548405,
|
|
"learning_rate": 3.501609425611079e-06,
|
|
"loss": 0.0788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.040089331567287445,
|
|
"step": 5240,
|
|
"valid_targets_mean": 7411.9,
|
|
"valid_targets_min": 1367
|
|
},
|
|
{
|
|
"epoch": 5.802434975096846,
|
|
"grad_norm": 0.2691272435692972,
|
|
"learning_rate": 3.4704907328508576e-06,
|
|
"loss": 0.0803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.036173637956380844,
|
|
"step": 5245,
|
|
"valid_targets_mean": 6794.7,
|
|
"valid_targets_min": 1002
|
|
},
|
|
{
|
|
"epoch": 5.807969009407858,
|
|
"grad_norm": 0.2620356647940023,
|
|
"learning_rate": 3.439497791245512e-06,
|
|
"loss": 0.086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0450415201485157,
|
|
"step": 5250,
|
|
"valid_targets_mean": 7931.0,
|
|
"valid_targets_min": 2635
|
|
},
|
|
{
|
|
"epoch": 5.813503043718871,
|
|
"grad_norm": 0.22557524289207584,
|
|
"learning_rate": 3.408630836579294e-06,
|
|
"loss": 0.0842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03923304006457329,
|
|
"step": 5255,
|
|
"valid_targets_mean": 7836.5,
|
|
"valid_targets_min": 1113
|
|
},
|
|
{
|
|
"epoch": 5.819037078029884,
|
|
"grad_norm": 0.2045914134636371,
|
|
"learning_rate": 3.3778901036780076e-06,
|
|
"loss": 0.0807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03792804852128029,
|
|
"step": 5260,
|
|
"valid_targets_mean": 8004.7,
|
|
"valid_targets_min": 2354
|
|
},
|
|
{
|
|
"epoch": 5.824571112340896,
|
|
"grad_norm": 0.3427586199909158,
|
|
"learning_rate": 3.347275826407199e-06,
|
|
"loss": 0.1138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06689509004354477,
|
|
"step": 5265,
|
|
"valid_targets_mean": 14621.4,
|
|
"valid_targets_min": 1121
|
|
},
|
|
{
|
|
"epoch": 5.830105146651909,
|
|
"grad_norm": 0.29603497949784,
|
|
"learning_rate": 3.3167882376703696e-06,
|
|
"loss": 0.1306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06736762076616287,
|
|
"step": 5270,
|
|
"valid_targets_mean": 13730.6,
|
|
"valid_targets_min": 5627
|
|
},
|
|
{
|
|
"epoch": 5.835639180962922,
|
|
"grad_norm": 0.20784944949332113,
|
|
"learning_rate": 3.286427569407229e-06,
|
|
"loss": 0.1285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06973903626203537,
|
|
"step": 5275,
|
|
"valid_targets_mean": 13851.5,
|
|
"valid_targets_min": 2420
|
|
},
|
|
{
|
|
"epoch": 5.841173215273935,
|
|
"grad_norm": 0.18870880205922533,
|
|
"learning_rate": 3.2561940525919124e-06,
|
|
"loss": 0.1244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05548988655209541,
|
|
"step": 5280,
|
|
"valid_targets_mean": 10187.0,
|
|
"valid_targets_min": 4526
|
|
},
|
|
{
|
|
"epoch": 5.846707249584948,
|
|
"grad_norm": 0.27156730295517534,
|
|
"learning_rate": 3.226087917231231e-06,
|
|
"loss": 0.0866,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03967982530593872,
|
|
"step": 5285,
|
|
"valid_targets_mean": 7247.6,
|
|
"valid_targets_min": 1894
|
|
},
|
|
{
|
|
"epoch": 5.85224128389596,
|
|
"grad_norm": 0.234889121335806,
|
|
"learning_rate": 3.1961093923629028e-06,
|
|
"loss": 0.0716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03272298723459244,
|
|
"step": 5290,
|
|
"valid_targets_mean": 7225.9,
|
|
"valid_targets_min": 1279
|
|
},
|
|
{
|
|
"epoch": 5.857775318206973,
|
|
"grad_norm": 0.2709109126040272,
|
|
"learning_rate": 3.166258706053855e-06,
|
|
"loss": 0.0777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05472330376505852,
|
|
"step": 5295,
|
|
"valid_targets_mean": 4795.3,
|
|
"valid_targets_min": 1685
|
|
},
|
|
{
|
|
"epoch": 5.863309352517986,
|
|
"grad_norm": 0.2507789435309829,
|
|
"learning_rate": 3.1365360853984294e-06,
|
|
"loss": 0.0965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04301333427429199,
|
|
"step": 5300,
|
|
"valid_targets_mean": 4640.0,
|
|
"valid_targets_min": 1767
|
|
},
|
|
{
|
|
"epoch": 5.868843386828998,
|
|
"grad_norm": 0.24171054349923565,
|
|
"learning_rate": 3.106941756516708e-06,
|
|
"loss": 0.0853,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03779042512178421,
|
|
"step": 5305,
|
|
"valid_targets_mean": 4969.9,
|
|
"valid_targets_min": 1543
|
|
},
|
|
{
|
|
"epoch": 5.874377421140011,
|
|
"grad_norm": 0.23818204523791442,
|
|
"learning_rate": 3.077475944552768e-06,
|
|
"loss": 0.086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03676900640130043,
|
|
"step": 5310,
|
|
"valid_targets_mean": 4724.9,
|
|
"valid_targets_min": 1634
|
|
},
|
|
{
|
|
"epoch": 5.8799114554510234,
|
|
"grad_norm": 0.2745452909907268,
|
|
"learning_rate": 3.0481388736729566e-06,
|
|
"loss": 0.0832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.040772464126348495,
|
|
"step": 5315,
|
|
"valid_targets_mean": 3955.6,
|
|
"valid_targets_min": 1829
|
|
},
|
|
{
|
|
"epoch": 5.885445489762036,
|
|
"grad_norm": 0.2716601097234908,
|
|
"learning_rate": 3.0189307670642186e-06,
|
|
"loss": 0.0844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05109298229217529,
|
|
"step": 5320,
|
|
"valid_targets_mean": 5335.9,
|
|
"valid_targets_min": 2041
|
|
},
|
|
{
|
|
"epoch": 5.89097952407305,
|
|
"grad_norm": 0.28363532790959217,
|
|
"learning_rate": 2.9898518469323677e-06,
|
|
"loss": 0.0947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.059856902807950974,
|
|
"step": 5325,
|
|
"valid_targets_mean": 5668.2,
|
|
"valid_targets_min": 1895
|
|
},
|
|
{
|
|
"epoch": 5.896513558384062,
|
|
"grad_norm": 0.2885730075923772,
|
|
"learning_rate": 2.9609023345004217e-06,
|
|
"loss": 0.1012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.055200595408678055,
|
|
"step": 5330,
|
|
"valid_targets_mean": 5241.9,
|
|
"valid_targets_min": 2052
|
|
},
|
|
{
|
|
"epoch": 5.902047592695075,
|
|
"grad_norm": 0.2418413992225755,
|
|
"learning_rate": 2.93208245000689e-06,
|
|
"loss": 0.1018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04855811595916748,
|
|
"step": 5335,
|
|
"valid_targets_mean": 4501.1,
|
|
"valid_targets_min": 1859
|
|
},
|
|
{
|
|
"epoch": 5.9075816270060875,
|
|
"grad_norm": 0.21570529848712508,
|
|
"learning_rate": 2.9033924127041224e-06,
|
|
"loss": 0.1041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04917898774147034,
|
|
"step": 5340,
|
|
"valid_targets_mean": 5344.1,
|
|
"valid_targets_min": 2200
|
|
},
|
|
{
|
|
"epoch": 5.9131156613171,
|
|
"grad_norm": 0.23318304045499405,
|
|
"learning_rate": 2.8748324408566454e-06,
|
|
"loss": 0.1001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.047095414251089096,
|
|
"step": 5345,
|
|
"valid_targets_mean": 4667.0,
|
|
"valid_targets_min": 2131
|
|
},
|
|
{
|
|
"epoch": 5.918649695628113,
|
|
"grad_norm": 0.2213096477089317,
|
|
"learning_rate": 2.846402751739463e-06,
|
|
"loss": 0.0943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04613789916038513,
|
|
"step": 5350,
|
|
"valid_targets_mean": 4774.7,
|
|
"valid_targets_min": 1735
|
|
},
|
|
{
|
|
"epoch": 5.924183729939125,
|
|
"grad_norm": 0.2155036772624133,
|
|
"learning_rate": 2.8181035616364536e-06,
|
|
"loss": 0.0958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.047325681895017624,
|
|
"step": 5355,
|
|
"valid_targets_mean": 5840.8,
|
|
"valid_targets_min": 2338
|
|
},
|
|
{
|
|
"epoch": 5.929717764250138,
|
|
"grad_norm": 0.23045482683818375,
|
|
"learning_rate": 2.789935085838693e-06,
|
|
"loss": 0.0981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04686537757515907,
|
|
"step": 5360,
|
|
"valid_targets_mean": 4816.4,
|
|
"valid_targets_min": 1674
|
|
},
|
|
{
|
|
"epoch": 5.935251798561151,
|
|
"grad_norm": 0.23606907939433505,
|
|
"learning_rate": 2.761897538642828e-06,
|
|
"loss": 0.0956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05562064051628113,
|
|
"step": 5365,
|
|
"valid_targets_mean": 5732.7,
|
|
"valid_targets_min": 1854
|
|
},
|
|
{
|
|
"epoch": 5.940785832872164,
|
|
"grad_norm": 0.21496323680378623,
|
|
"learning_rate": 2.733991133349434e-06,
|
|
"loss": 0.095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.038572560995817184,
|
|
"step": 5370,
|
|
"valid_targets_mean": 4115.7,
|
|
"valid_targets_min": 1512
|
|
},
|
|
{
|
|
"epoch": 5.946319867183177,
|
|
"grad_norm": 0.22849226855351992,
|
|
"learning_rate": 2.706216082261408e-06,
|
|
"loss": 0.0954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04715319350361824,
|
|
"step": 5375,
|
|
"valid_targets_mean": 5378.9,
|
|
"valid_targets_min": 2469
|
|
},
|
|
{
|
|
"epoch": 5.951853901494189,
|
|
"grad_norm": 0.22534943956068945,
|
|
"learning_rate": 2.678572596682354e-06,
|
|
"loss": 0.0789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03633340075612068,
|
|
"step": 5380,
|
|
"valid_targets_mean": 4137.5,
|
|
"valid_targets_min": 1891
|
|
},
|
|
{
|
|
"epoch": 5.957387935805202,
|
|
"grad_norm": 0.22497875005678122,
|
|
"learning_rate": 2.651060886914949e-06,
|
|
"loss": 0.0714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.036558959633111954,
|
|
"step": 5385,
|
|
"valid_targets_mean": 4230.2,
|
|
"valid_targets_min": 1879
|
|
},
|
|
{
|
|
"epoch": 5.962921970116215,
|
|
"grad_norm": 0.22473026813893437,
|
|
"learning_rate": 2.623681162259386e-06,
|
|
"loss": 0.0672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03210429102182388,
|
|
"step": 5390,
|
|
"valid_targets_mean": 4060.4,
|
|
"valid_targets_min": 1929
|
|
},
|
|
{
|
|
"epoch": 5.968456004427227,
|
|
"grad_norm": 0.2688832422929091,
|
|
"learning_rate": 2.596433631011732e-06,
|
|
"loss": 0.0694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03812266141176224,
|
|
"step": 5395,
|
|
"valid_targets_mean": 4318.9,
|
|
"valid_targets_min": 1421
|
|
},
|
|
{
|
|
"epoch": 5.97399003873824,
|
|
"grad_norm": 0.25749309111006674,
|
|
"learning_rate": 2.569318500462392e-06,
|
|
"loss": 0.0714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03529876843094826,
|
|
"step": 5400,
|
|
"valid_targets_mean": 4196.6,
|
|
"valid_targets_min": 2018
|
|
},
|
|
{
|
|
"epoch": 5.979524073049253,
|
|
"grad_norm": 0.23337191064636828,
|
|
"learning_rate": 2.5423359768944967e-06,
|
|
"loss": 0.072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0353553332388401,
|
|
"step": 5405,
|
|
"valid_targets_mean": 4083.7,
|
|
"valid_targets_min": 1696
|
|
},
|
|
{
|
|
"epoch": 5.985058107360266,
|
|
"grad_norm": 0.22862376038231164,
|
|
"learning_rate": 2.5154862655823563e-06,
|
|
"loss": 0.0735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.036466386169195175,
|
|
"step": 5410,
|
|
"valid_targets_mean": 4253.7,
|
|
"valid_targets_min": 2036
|
|
},
|
|
{
|
|
"epoch": 5.990592141671279,
|
|
"grad_norm": 0.22955140120017462,
|
|
"learning_rate": 2.4887695707898728e-06,
|
|
"loss": 0.0713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03640773519873619,
|
|
"step": 5415,
|
|
"valid_targets_mean": 4333.8,
|
|
"valid_targets_min": 1885
|
|
},
|
|
{
|
|
"epoch": 5.996126175982291,
|
|
"grad_norm": 0.2125943251045193,
|
|
"learning_rate": 2.4621860957690123e-06,
|
|
"loss": 0.0714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03555760160088539,
|
|
"step": 5420,
|
|
"valid_targets_mean": 4365.4,
|
|
"valid_targets_min": 1943
|
|
},
|
|
{
|
|
"epoch": 6.001106806862203,
|
|
"grad_norm": 0.26368627735327654,
|
|
"learning_rate": 2.435736042758252e-06,
|
|
"loss": 0.0677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.059096869081258774,
|
|
"step": 5425,
|
|
"valid_targets_mean": 7176.8,
|
|
"valid_targets_min": 2475
|
|
},
|
|
{
|
|
"epoch": 6.0066408411732155,
|
|
"grad_norm": 0.47193555298918144,
|
|
"learning_rate": 2.409419612981023e-06,
|
|
"loss": 0.1281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07251014560461044,
|
|
"step": 5430,
|
|
"valid_targets_mean": 7152.2,
|
|
"valid_targets_min": 1817
|
|
},
|
|
{
|
|
"epoch": 6.012174875484228,
|
|
"grad_norm": 0.3653661869376702,
|
|
"learning_rate": 2.383237006644208e-06,
|
|
"loss": 0.1413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06588508933782578,
|
|
"step": 5435,
|
|
"valid_targets_mean": 6413.1,
|
|
"valid_targets_min": 2694
|
|
},
|
|
{
|
|
"epoch": 6.017708909795241,
|
|
"grad_norm": 0.2862731006886899,
|
|
"learning_rate": 2.3571884229365984e-06,
|
|
"loss": 0.1374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06910615414381027,
|
|
"step": 5440,
|
|
"valid_targets_mean": 6742.4,
|
|
"valid_targets_min": 2529
|
|
},
|
|
{
|
|
"epoch": 6.023242944106253,
|
|
"grad_norm": 0.23404931027479792,
|
|
"learning_rate": 2.3312740600273888e-06,
|
|
"loss": 0.1378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06342857331037521,
|
|
"step": 5445,
|
|
"valid_targets_mean": 6891.3,
|
|
"valid_targets_min": 2165
|
|
},
|
|
{
|
|
"epoch": 6.028776978417266,
|
|
"grad_norm": 0.21433044642997082,
|
|
"learning_rate": 2.3054941150646525e-06,
|
|
"loss": 0.1313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06521899253129959,
|
|
"step": 5450,
|
|
"valid_targets_mean": 6714.7,
|
|
"valid_targets_min": 1734
|
|
},
|
|
{
|
|
"epoch": 6.034311012728279,
|
|
"grad_norm": 0.20433087046826062,
|
|
"learning_rate": 2.2798487841738727e-06,
|
|
"loss": 0.1189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0649661123752594,
|
|
"step": 5455,
|
|
"valid_targets_mean": 7105.4,
|
|
"valid_targets_min": 1845
|
|
},
|
|
{
|
|
"epoch": 6.039845047039291,
|
|
"grad_norm": 0.19088713467699442,
|
|
"learning_rate": 2.2543382624564235e-06,
|
|
"loss": 0.1212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05487219616770744,
|
|
"step": 5460,
|
|
"valid_targets_mean": 6475.6,
|
|
"valid_targets_min": 2297
|
|
},
|
|
{
|
|
"epoch": 6.045379081350305,
|
|
"grad_norm": 0.19942065796995198,
|
|
"learning_rate": 2.22896274398809e-06,
|
|
"loss": 0.119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05961311236023903,
|
|
"step": 5465,
|
|
"valid_targets_mean": 6744.1,
|
|
"valid_targets_min": 2793
|
|
},
|
|
{
|
|
"epoch": 6.050913115661317,
|
|
"grad_norm": 0.19120702109483861,
|
|
"learning_rate": 2.2037224218176067e-06,
|
|
"loss": 0.1113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05088047310709953,
|
|
"step": 5470,
|
|
"valid_targets_mean": 6725.4,
|
|
"valid_targets_min": 2520
|
|
},
|
|
{
|
|
"epoch": 6.05644714997233,
|
|
"grad_norm": 0.20696475872471462,
|
|
"learning_rate": 2.1786174879651646e-06,
|
|
"loss": 0.1119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.050160735845565796,
|
|
"step": 5475,
|
|
"valid_targets_mean": 6286.6,
|
|
"valid_targets_min": 2643
|
|
},
|
|
{
|
|
"epoch": 6.061981184283343,
|
|
"grad_norm": 0.2838515581035275,
|
|
"learning_rate": 2.15364813342098e-06,
|
|
"loss": 0.115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.055874865502119064,
|
|
"step": 5480,
|
|
"valid_targets_mean": 6647.1,
|
|
"valid_targets_min": 2218
|
|
},
|
|
{
|
|
"epoch": 6.067515218594355,
|
|
"grad_norm": 0.24429025940738258,
|
|
"learning_rate": 2.128814548143814e-06,
|
|
"loss": 0.1127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05764148011803627,
|
|
"step": 5485,
|
|
"valid_targets_mean": 6991.6,
|
|
"valid_targets_min": 3271
|
|
},
|
|
{
|
|
"epoch": 6.073049252905368,
|
|
"grad_norm": 0.22561127923089053,
|
|
"learning_rate": 2.1041169210595445e-06,
|
|
"loss": 0.1132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05936726927757263,
|
|
"step": 5490,
|
|
"valid_targets_mean": 7175.3,
|
|
"valid_targets_min": 1805
|
|
},
|
|
{
|
|
"epoch": 6.0785832872163805,
|
|
"grad_norm": 0.22212852022644294,
|
|
"learning_rate": 2.0795554400597286e-06,
|
|
"loss": 0.11,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04884013533592224,
|
|
"step": 5495,
|
|
"valid_targets_mean": 6327.0,
|
|
"valid_targets_min": 1224
|
|
},
|
|
{
|
|
"epoch": 6.084117321527393,
|
|
"grad_norm": 0.21393129624832247,
|
|
"learning_rate": 2.0551302920001493e-06,
|
|
"loss": 0.107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06428065896034241,
|
|
"step": 5500,
|
|
"valid_targets_mean": 7451.6,
|
|
"valid_targets_min": 1864
|
|
},
|
|
{
|
|
"epoch": 6.089651355838406,
|
|
"grad_norm": 0.2087036431848382,
|
|
"learning_rate": 2.0308416626994364e-06,
|
|
"loss": 0.1065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06320202350616455,
|
|
"step": 5505,
|
|
"valid_targets_mean": 7721.9,
|
|
"valid_targets_min": 3662
|
|
},
|
|
{
|
|
"epoch": 6.095185390149419,
|
|
"grad_norm": 0.18708347512612522,
|
|
"learning_rate": 2.0066897369376102e-06,
|
|
"loss": 0.1055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.052226606756448746,
|
|
"step": 5510,
|
|
"valid_targets_mean": 7039.7,
|
|
"valid_targets_min": 1698
|
|
},
|
|
{
|
|
"epoch": 6.100719424460432,
|
|
"grad_norm": 0.20885772048522724,
|
|
"learning_rate": 1.982674698454703e-06,
|
|
"loss": 0.1101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.055415648967027664,
|
|
"step": 5515,
|
|
"valid_targets_mean": 7115.9,
|
|
"valid_targets_min": 1492
|
|
},
|
|
{
|
|
"epoch": 6.1062534587714445,
|
|
"grad_norm": 0.2033519261853173,
|
|
"learning_rate": 1.958796729949355e-06,
|
|
"loss": 0.101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04657214879989624,
|
|
"step": 5520,
|
|
"valid_targets_mean": 6713.1,
|
|
"valid_targets_min": 2543
|
|
},
|
|
{
|
|
"epoch": 6.111787493082457,
|
|
"grad_norm": 0.21636598733741638,
|
|
"learning_rate": 1.9350560130774234e-06,
|
|
"loss": 0.0997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.053439270704984665,
|
|
"step": 5525,
|
|
"valid_targets_mean": 6488.6,
|
|
"valid_targets_min": 2556
|
|
},
|
|
{
|
|
"epoch": 6.11732152739347,
|
|
"grad_norm": 0.212204397525806,
|
|
"learning_rate": 1.911452728450589e-06,
|
|
"loss": 0.0999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05276120826601982,
|
|
"step": 5530,
|
|
"valid_targets_mean": 7216.3,
|
|
"valid_targets_min": 1992
|
|
},
|
|
{
|
|
"epoch": 6.122855561704482,
|
|
"grad_norm": 0.20517538957361184,
|
|
"learning_rate": 1.887987055635001e-06,
|
|
"loss": 0.097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.046480581164360046,
|
|
"step": 5535,
|
|
"valid_targets_mean": 6421.9,
|
|
"valid_targets_min": 2444
|
|
},
|
|
{
|
|
"epoch": 6.128389596015495,
|
|
"grad_norm": 0.21771474389513235,
|
|
"learning_rate": 1.8646591731499053e-06,
|
|
"loss": 0.0982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04300164803862572,
|
|
"step": 5540,
|
|
"valid_targets_mean": 6405.9,
|
|
"valid_targets_min": 2545
|
|
},
|
|
{
|
|
"epoch": 6.133923630326508,
|
|
"grad_norm": 0.20624266728265683,
|
|
"learning_rate": 1.841469258466273e-06,
|
|
"loss": 0.0956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.047111015766859055,
|
|
"step": 5545,
|
|
"valid_targets_mean": 6837.7,
|
|
"valid_targets_min": 2151
|
|
},
|
|
{
|
|
"epoch": 6.13945766463752,
|
|
"grad_norm": 0.1983984538297293,
|
|
"learning_rate": 1.8184174880054728e-06,
|
|
"loss": 0.0949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0536317341029644,
|
|
"step": 5550,
|
|
"valid_targets_mean": 7361.1,
|
|
"valid_targets_min": 2378
|
|
},
|
|
{
|
|
"epoch": 6.144991698948534,
|
|
"grad_norm": 0.21196934655380573,
|
|
"learning_rate": 1.7955040371379052e-06,
|
|
"loss": 0.0993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.045640040189027786,
|
|
"step": 5555,
|
|
"valid_targets_mean": 6319.2,
|
|
"valid_targets_min": 2356
|
|
},
|
|
{
|
|
"epoch": 6.150525733259546,
|
|
"grad_norm": 0.21184599881715346,
|
|
"learning_rate": 1.7727290801816877e-06,
|
|
"loss": 0.0964,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.041314657777547836,
|
|
"step": 5560,
|
|
"valid_targets_mean": 6630.8,
|
|
"valid_targets_min": 1972
|
|
},
|
|
{
|
|
"epoch": 6.156059767570559,
|
|
"grad_norm": 0.21228986821072401,
|
|
"learning_rate": 1.750092790401321e-06,
|
|
"loss": 0.0883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04752728343009949,
|
|
"step": 5565,
|
|
"valid_targets_mean": 6972.9,
|
|
"valid_targets_min": 2135
|
|
},
|
|
{
|
|
"epoch": 6.161593801881572,
|
|
"grad_norm": 0.2160322976500172,
|
|
"learning_rate": 1.7275953400063672e-06,
|
|
"loss": 0.096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.046096403151750565,
|
|
"step": 5570,
|
|
"valid_targets_mean": 6707.8,
|
|
"valid_targets_min": 1871
|
|
},
|
|
{
|
|
"epoch": 6.167127836192584,
|
|
"grad_norm": 0.2008173954902481,
|
|
"learning_rate": 1.7052369001501489e-06,
|
|
"loss": 0.0919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.043318573385477066,
|
|
"step": 5575,
|
|
"valid_targets_mean": 6739.1,
|
|
"valid_targets_min": 2150
|
|
},
|
|
{
|
|
"epoch": 6.172661870503597,
|
|
"grad_norm": 0.2002716284364717,
|
|
"learning_rate": 1.6830176409284327e-06,
|
|
"loss": 0.0934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.045532722026109695,
|
|
"step": 5580,
|
|
"valid_targets_mean": 6861.2,
|
|
"valid_targets_min": 2514
|
|
},
|
|
{
|
|
"epoch": 6.17819590481461,
|
|
"grad_norm": 0.19935046559834757,
|
|
"learning_rate": 1.6609377313781539e-06,
|
|
"loss": 0.0976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05056362226605415,
|
|
"step": 5585,
|
|
"valid_targets_mean": 7224.6,
|
|
"valid_targets_min": 2698
|
|
},
|
|
{
|
|
"epoch": 6.183729939125622,
|
|
"grad_norm": 0.20653748117791743,
|
|
"learning_rate": 1.6389973394761116e-06,
|
|
"loss": 0.094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04757365211844444,
|
|
"step": 5590,
|
|
"valid_targets_mean": 6807.6,
|
|
"valid_targets_min": 2279
|
|
},
|
|
{
|
|
"epoch": 6.189263973436636,
|
|
"grad_norm": 0.19818533807915628,
|
|
"learning_rate": 1.617196632137703e-06,
|
|
"loss": 0.0951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04539031162858009,
|
|
"step": 5595,
|
|
"valid_targets_mean": 6986.2,
|
|
"valid_targets_min": 2280
|
|
},
|
|
{
|
|
"epoch": 6.194798007747648,
|
|
"grad_norm": 0.20166258302871556,
|
|
"learning_rate": 1.595535775215653e-06,
|
|
"loss": 0.0941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04186883568763733,
|
|
"step": 5600,
|
|
"valid_targets_mean": 6668.9,
|
|
"valid_targets_min": 3573
|
|
},
|
|
{
|
|
"epoch": 6.200332042058661,
|
|
"grad_norm": 0.2403254196566056,
|
|
"learning_rate": 1.574014933498751e-06,
|
|
"loss": 0.093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05118389055132866,
|
|
"step": 5605,
|
|
"valid_targets_mean": 7348.7,
|
|
"valid_targets_min": 2266
|
|
},
|
|
{
|
|
"epoch": 6.205866076369674,
|
|
"grad_norm": 0.2343022776577193,
|
|
"learning_rate": 1.5526342707105912e-06,
|
|
"loss": 0.0951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0463598258793354,
|
|
"step": 5610,
|
|
"valid_targets_mean": 6718.5,
|
|
"valid_targets_min": 1852
|
|
},
|
|
{
|
|
"epoch": 6.211400110680686,
|
|
"grad_norm": 0.20474245395725305,
|
|
"learning_rate": 1.5313939495083329e-06,
|
|
"loss": 0.0936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03855031356215477,
|
|
"step": 5615,
|
|
"valid_targets_mean": 6317.0,
|
|
"valid_targets_min": 1890
|
|
},
|
|
{
|
|
"epoch": 6.216934144991699,
|
|
"grad_norm": 0.20389018507643655,
|
|
"learning_rate": 1.5102941314814645e-06,
|
|
"loss": 0.0941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04324701055884361,
|
|
"step": 5620,
|
|
"valid_targets_mean": 6423.0,
|
|
"valid_targets_min": 1961
|
|
},
|
|
{
|
|
"epoch": 6.2224681793027115,
|
|
"grad_norm": 0.22347203143863656,
|
|
"learning_rate": 1.489334977150567e-06,
|
|
"loss": 0.0943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.049007613211870193,
|
|
"step": 5625,
|
|
"valid_targets_mean": 6961.5,
|
|
"valid_targets_min": 2011
|
|
},
|
|
{
|
|
"epoch": 6.228002213613724,
|
|
"grad_norm": 0.29422745024398256,
|
|
"learning_rate": 1.468516645966107e-06,
|
|
"loss": 0.0749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.033227451145648956,
|
|
"step": 5630,
|
|
"valid_targets_mean": 4264.4,
|
|
"valid_targets_min": 1664
|
|
},
|
|
{
|
|
"epoch": 6.233536247924737,
|
|
"grad_norm": 0.2453358322792234,
|
|
"learning_rate": 1.4478392963071985e-06,
|
|
"loss": 0.0601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02738841436803341,
|
|
"step": 5635,
|
|
"valid_targets_mean": 4403.8,
|
|
"valid_targets_min": 1896
|
|
},
|
|
{
|
|
"epoch": 6.23907028223575,
|
|
"grad_norm": 0.30540480705047374,
|
|
"learning_rate": 1.4273030854804292e-06,
|
|
"loss": 0.0738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03664984181523323,
|
|
"step": 5640,
|
|
"valid_targets_mean": 4392.9,
|
|
"valid_targets_min": 1282
|
|
},
|
|
{
|
|
"epoch": 6.244604316546763,
|
|
"grad_norm": 0.2717082701357272,
|
|
"learning_rate": 1.4069081697186415e-06,
|
|
"loss": 0.0709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03534964099526405,
|
|
"step": 5645,
|
|
"valid_targets_mean": 4179.3,
|
|
"valid_targets_min": 1998
|
|
},
|
|
{
|
|
"epoch": 6.2501383508577755,
|
|
"grad_norm": 0.2475574410090893,
|
|
"learning_rate": 1.386654704179753e-06,
|
|
"loss": 0.0675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.036007124930620193,
|
|
"step": 5650,
|
|
"valid_targets_mean": 4410.1,
|
|
"valid_targets_min": 1629
|
|
},
|
|
{
|
|
"epoch": 6.255672385168788,
|
|
"grad_norm": 0.22433925042604627,
|
|
"learning_rate": 1.3665428429455729e-06,
|
|
"loss": 0.0709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03184202313423157,
|
|
"step": 5655,
|
|
"valid_targets_mean": 4081.9,
|
|
"valid_targets_min": 1820
|
|
},
|
|
{
|
|
"epoch": 6.261206419479801,
|
|
"grad_norm": 0.24733371250383948,
|
|
"learning_rate": 1.346572739020624e-06,
|
|
"loss": 0.0803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.040468450635671616,
|
|
"step": 5660,
|
|
"valid_targets_mean": 4430.3,
|
|
"valid_targets_min": 1591
|
|
},
|
|
{
|
|
"epoch": 6.266740453790813,
|
|
"grad_norm": 0.2194558634851793,
|
|
"learning_rate": 1.326744544331e-06,
|
|
"loss": 0.0735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02739051915705204,
|
|
"step": 5665,
|
|
"valid_targets_mean": 3639.0,
|
|
"valid_targets_min": 1609
|
|
},
|
|
{
|
|
"epoch": 6.272274488101826,
|
|
"grad_norm": 0.22573361756053215,
|
|
"learning_rate": 1.3070584097231764e-06,
|
|
"loss": 0.0719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03558327630162239,
|
|
"step": 5670,
|
|
"valid_targets_mean": 4169.2,
|
|
"valid_targets_min": 2003
|
|
},
|
|
{
|
|
"epoch": 6.277808522412839,
|
|
"grad_norm": 0.23202403327961435,
|
|
"learning_rate": 1.2875144849628973e-06,
|
|
"loss": 0.0709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.039374466985464096,
|
|
"step": 5675,
|
|
"valid_targets_mean": 4387.5,
|
|
"valid_targets_min": 1802
|
|
},
|
|
{
|
|
"epoch": 6.283342556723852,
|
|
"grad_norm": 0.23588444742883982,
|
|
"learning_rate": 1.2681129187340147e-06,
|
|
"loss": 0.0728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.041341233998537064,
|
|
"step": 5680,
|
|
"valid_targets_mean": 4586.4,
|
|
"valid_targets_min": 1431
|
|
},
|
|
{
|
|
"epoch": 6.288876591034865,
|
|
"grad_norm": 0.24261143005195893,
|
|
"learning_rate": 1.2488538586373645e-06,
|
|
"loss": 0.0808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.035322047770023346,
|
|
"step": 5685,
|
|
"valid_targets_mean": 4150.2,
|
|
"valid_targets_min": 1451
|
|
},
|
|
{
|
|
"epoch": 6.294410625345877,
|
|
"grad_norm": 0.23626406873269493,
|
|
"learning_rate": 1.2297374511896387e-06,
|
|
"loss": 0.0741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03342689946293831,
|
|
"step": 5690,
|
|
"valid_targets_mean": 4058.3,
|
|
"valid_targets_min": 1500
|
|
},
|
|
{
|
|
"epoch": 6.29994465965689,
|
|
"grad_norm": 0.22893138639014132,
|
|
"learning_rate": 1.2107638418222733e-06,
|
|
"loss": 0.0739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03557369112968445,
|
|
"step": 5695,
|
|
"valid_targets_mean": 4309.4,
|
|
"valid_targets_min": 1370
|
|
},
|
|
{
|
|
"epoch": 6.305478693967903,
|
|
"grad_norm": 0.20742676858737275,
|
|
"learning_rate": 1.1919331748803531e-06,
|
|
"loss": 0.07,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.032634858042001724,
|
|
"step": 5700,
|
|
"valid_targets_mean": 4254.8,
|
|
"valid_targets_min": 1915
|
|
},
|
|
{
|
|
"epoch": 6.311012728278915,
|
|
"grad_norm": 0.21245971613137082,
|
|
"learning_rate": 1.173245593621486e-06,
|
|
"loss": 0.0727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.033117055892944336,
|
|
"step": 5705,
|
|
"valid_targets_mean": 3897.5,
|
|
"valid_targets_min": 1699
|
|
},
|
|
{
|
|
"epoch": 6.316546762589928,
|
|
"grad_norm": 0.2211524280699169,
|
|
"learning_rate": 1.1547012402147461e-06,
|
|
"loss": 0.0735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03834658861160278,
|
|
"step": 5710,
|
|
"valid_targets_mean": 4873.8,
|
|
"valid_targets_min": 1374
|
|
},
|
|
{
|
|
"epoch": 6.3220807969009405,
|
|
"grad_norm": 0.22494584076410562,
|
|
"learning_rate": 1.1363002557395663e-06,
|
|
"loss": 0.0696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03914666920900345,
|
|
"step": 5715,
|
|
"valid_targets_mean": 4314.8,
|
|
"valid_targets_min": 1526
|
|
},
|
|
{
|
|
"epoch": 6.327614831211953,
|
|
"grad_norm": 0.21502150042346616,
|
|
"learning_rate": 1.1180427801846827e-06,
|
|
"loss": 0.0691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.028126897290349007,
|
|
"step": 5720,
|
|
"valid_targets_mean": 3668.5,
|
|
"valid_targets_min": 1735
|
|
},
|
|
{
|
|
"epoch": 6.333148865522967,
|
|
"grad_norm": 0.2415860232914858,
|
|
"learning_rate": 1.0999289524470537e-06,
|
|
"loss": 0.0792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0350409634411335,
|
|
"step": 5725,
|
|
"valid_targets_mean": 4198.7,
|
|
"valid_targets_min": 1587
|
|
},
|
|
{
|
|
"epoch": 6.338682899833979,
|
|
"grad_norm": 0.22985989637869847,
|
|
"learning_rate": 1.0819589103308204e-06,
|
|
"loss": 0.0697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.030462943017482758,
|
|
"step": 5730,
|
|
"valid_targets_mean": 4426.8,
|
|
"valid_targets_min": 1624
|
|
},
|
|
{
|
|
"epoch": 6.344216934144992,
|
|
"grad_norm": 0.23139369403799398,
|
|
"learning_rate": 1.064132790546246e-06,
|
|
"loss": 0.076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03156794235110283,
|
|
"step": 5735,
|
|
"valid_targets_mean": 3844.5,
|
|
"valid_targets_min": 1660
|
|
},
|
|
{
|
|
"epoch": 6.3497509684560045,
|
|
"grad_norm": 0.240471213429906,
|
|
"learning_rate": 1.0464507287086744e-06,
|
|
"loss": 0.0753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03767099231481552,
|
|
"step": 5740,
|
|
"valid_targets_mean": 4607.4,
|
|
"valid_targets_min": 1446
|
|
},
|
|
{
|
|
"epoch": 6.355285002767017,
|
|
"grad_norm": 0.22192997179412693,
|
|
"learning_rate": 1.0289128593375119e-06,
|
|
"loss": 0.073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04201770946383476,
|
|
"step": 5745,
|
|
"valid_targets_mean": 4620.0,
|
|
"valid_targets_min": 1843
|
|
},
|
|
{
|
|
"epoch": 6.36081903707803,
|
|
"grad_norm": 0.2047492893678803,
|
|
"learning_rate": 1.011519315855185e-06,
|
|
"loss": 0.0688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.030818996950984,
|
|
"step": 5750,
|
|
"valid_targets_mean": 4062.0,
|
|
"valid_targets_min": 1629
|
|
},
|
|
{
|
|
"epoch": 6.366353071389042,
|
|
"grad_norm": 0.21823041506261645,
|
|
"learning_rate": 9.94270230586145e-07,
|
|
"loss": 0.0755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03921927139163017,
|
|
"step": 5755,
|
|
"valid_targets_mean": 4552.9,
|
|
"valid_targets_min": 1687
|
|
},
|
|
{
|
|
"epoch": 6.371887105700055,
|
|
"grad_norm": 0.2220340611729739,
|
|
"learning_rate": 9.771657347558428e-07,
|
|
"loss": 0.0738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.031742434948682785,
|
|
"step": 5760,
|
|
"valid_targets_mean": 4288.0,
|
|
"valid_targets_min": 1735
|
|
},
|
|
{
|
|
"epoch": 6.377421140011068,
|
|
"grad_norm": 0.21745186872682112,
|
|
"learning_rate": 9.602059584897506e-07,
|
|
"loss": 0.0708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03511330857872963,
|
|
"step": 5765,
|
|
"valid_targets_mean": 4494.9,
|
|
"valid_targets_min": 1742
|
|
},
|
|
{
|
|
"epoch": 6.382955174322081,
|
|
"grad_norm": 0.21894433232405924,
|
|
"learning_rate": 9.433910308123572e-07,
|
|
"loss": 0.0676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03947247937321663,
|
|
"step": 5770,
|
|
"valid_targets_mean": 4560.7,
|
|
"valid_targets_min": 1850
|
|
},
|
|
{
|
|
"epoch": 6.388489208633094,
|
|
"grad_norm": 0.22995324546061463,
|
|
"learning_rate": 9.267210796461823e-07,
|
|
"loss": 0.0655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03699576109647751,
|
|
"step": 5775,
|
|
"valid_targets_mean": 4525.4,
|
|
"valid_targets_min": 1488
|
|
},
|
|
{
|
|
"epoch": 6.394023242944106,
|
|
"grad_norm": 0.22563307465188687,
|
|
"learning_rate": 9.101962318108226e-07,
|
|
"loss": 0.0678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03179671987891197,
|
|
"step": 5780,
|
|
"valid_targets_mean": 4337.6,
|
|
"valid_targets_min": 1551
|
|
},
|
|
{
|
|
"epoch": 6.399557277255119,
|
|
"grad_norm": 0.22470922312893973,
|
|
"learning_rate": 8.93816613021965e-07,
|
|
"loss": 0.0741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.033924076706171036,
|
|
"step": 5785,
|
|
"valid_targets_mean": 4064.8,
|
|
"valid_targets_min": 1260
|
|
},
|
|
{
|
|
"epoch": 6.405091311566132,
|
|
"grad_norm": 0.2322852894397586,
|
|
"learning_rate": 8.775823478904488e-07,
|
|
"loss": 0.0698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03452476114034653,
|
|
"step": 5790,
|
|
"valid_targets_mean": 4271.5,
|
|
"valid_targets_min": 1965
|
|
},
|
|
{
|
|
"epoch": 6.410625345877144,
|
|
"grad_norm": 0.24177928417312386,
|
|
"learning_rate": 8.61493559921307e-07,
|
|
"loss": 0.07,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.037243809551000595,
|
|
"step": 5795,
|
|
"valid_targets_mean": 4530.3,
|
|
"valid_targets_min": 1812
|
|
},
|
|
{
|
|
"epoch": 6.416159380188157,
|
|
"grad_norm": 0.2216906788415978,
|
|
"learning_rate": 8.455503715128266e-07,
|
|
"loss": 0.0707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.038190603256225586,
|
|
"step": 5800,
|
|
"valid_targets_mean": 5182.0,
|
|
"valid_targets_min": 2171
|
|
},
|
|
{
|
|
"epoch": 6.4216934144991695,
|
|
"grad_norm": 0.2120593332647113,
|
|
"learning_rate": 8.297529039556274e-07,
|
|
"loss": 0.0636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.032920487225055695,
|
|
"step": 5805,
|
|
"valid_targets_mean": 4341.3,
|
|
"valid_targets_min": 1861
|
|
},
|
|
{
|
|
"epoch": 6.427227448810182,
|
|
"grad_norm": 0.21228521891675042,
|
|
"learning_rate": 8.141012774317269e-07,
|
|
"loss": 0.061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0351475290954113,
|
|
"step": 5810,
|
|
"valid_targets_mean": 4484.4,
|
|
"valid_targets_min": 1651
|
|
},
|
|
{
|
|
"epoch": 6.432761483121196,
|
|
"grad_norm": 0.19448488726474453,
|
|
"learning_rate": 7.98595611013635e-07,
|
|
"loss": 0.0635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02942313253879547,
|
|
"step": 5815,
|
|
"valid_targets_mean": 4192.0,
|
|
"valid_targets_min": 1684
|
|
},
|
|
{
|
|
"epoch": 6.438295517432208,
|
|
"grad_norm": 0.20833364189105583,
|
|
"learning_rate": 7.832360226634361e-07,
|
|
"loss": 0.0669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03339335694909096,
|
|
"step": 5820,
|
|
"valid_targets_mean": 4869.7,
|
|
"valid_targets_min": 1745
|
|
},
|
|
{
|
|
"epoch": 6.443829551743221,
|
|
"grad_norm": 0.19617265316913424,
|
|
"learning_rate": 7.680226292319082e-07,
|
|
"loss": 0.0642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02662007510662079,
|
|
"step": 5825,
|
|
"valid_targets_mean": 3970.9,
|
|
"valid_targets_min": 1643
|
|
},
|
|
{
|
|
"epoch": 6.4493635860542335,
|
|
"grad_norm": 0.2179831646410156,
|
|
"learning_rate": 7.52955546457621e-07,
|
|
"loss": 0.0669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.034840185195207596,
|
|
"step": 5830,
|
|
"valid_targets_mean": 4239.2,
|
|
"valid_targets_min": 1695
|
|
},
|
|
{
|
|
"epoch": 6.454897620365246,
|
|
"grad_norm": 0.21340091815248927,
|
|
"learning_rate": 7.380348889660661e-07,
|
|
"loss": 0.0637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.033205918967723846,
|
|
"step": 5835,
|
|
"valid_targets_mean": 4571.5,
|
|
"valid_targets_min": 1696
|
|
},
|
|
{
|
|
"epoch": 6.460431654676259,
|
|
"grad_norm": 0.21409959567631628,
|
|
"learning_rate": 7.232607702687699e-07,
|
|
"loss": 0.0711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03569451719522476,
|
|
"step": 5840,
|
|
"valid_targets_mean": 4433.2,
|
|
"valid_targets_min": 2003
|
|
},
|
|
{
|
|
"epoch": 6.465965688987271,
|
|
"grad_norm": 0.20028490308433225,
|
|
"learning_rate": 7.086333027624493e-07,
|
|
"loss": 0.0625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.028834521770477295,
|
|
"step": 5845,
|
|
"valid_targets_mean": 4008.7,
|
|
"valid_targets_min": 1371
|
|
},
|
|
{
|
|
"epoch": 6.471499723298284,
|
|
"grad_norm": 0.28020985868169035,
|
|
"learning_rate": 6.941525977281393e-07,
|
|
"loss": 0.0706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03831277787685394,
|
|
"step": 5850,
|
|
"valid_targets_mean": 5755.9,
|
|
"valid_targets_min": 1607
|
|
},
|
|
{
|
|
"epoch": 6.477033757609298,
|
|
"grad_norm": 0.32129073672421576,
|
|
"learning_rate": 6.798187653303534e-07,
|
|
"loss": 0.0826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.042238298803567886,
|
|
"step": 5855,
|
|
"valid_targets_mean": 5835.6,
|
|
"valid_targets_min": 2306
|
|
},
|
|
{
|
|
"epoch": 6.48256779192031,
|
|
"grad_norm": 0.2736503696001417,
|
|
"learning_rate": 6.656319146162516e-07,
|
|
"loss": 0.0875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03857993707060814,
|
|
"step": 5860,
|
|
"valid_targets_mean": 5367.4,
|
|
"valid_targets_min": 2272
|
|
},
|
|
{
|
|
"epoch": 6.488101826231323,
|
|
"grad_norm": 0.23936583896726388,
|
|
"learning_rate": 6.515921535147974e-07,
|
|
"loss": 0.0797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03267192468047142,
|
|
"step": 5865,
|
|
"valid_targets_mean": 5459.6,
|
|
"valid_targets_min": 1924
|
|
},
|
|
{
|
|
"epoch": 6.493635860542335,
|
|
"grad_norm": 0.21675915459435122,
|
|
"learning_rate": 6.376995888359516e-07,
|
|
"loss": 0.0842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.040545906871557236,
|
|
"step": 5870,
|
|
"valid_targets_mean": 5662.9,
|
|
"valid_targets_min": 2081
|
|
},
|
|
{
|
|
"epoch": 6.499169894853348,
|
|
"grad_norm": 0.19691958549470645,
|
|
"learning_rate": 6.239543262698422e-07,
|
|
"loss": 0.0763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.035456351935863495,
|
|
"step": 5875,
|
|
"valid_targets_mean": 5408.0,
|
|
"valid_targets_min": 1663
|
|
},
|
|
{
|
|
"epoch": 6.504703929164361,
|
|
"grad_norm": 0.18219063079375922,
|
|
"learning_rate": 6.103564703859799e-07,
|
|
"loss": 0.0697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.037758808583021164,
|
|
"step": 5880,
|
|
"valid_targets_mean": 6090.0,
|
|
"valid_targets_min": 3233
|
|
},
|
|
{
|
|
"epoch": 6.510237963475373,
|
|
"grad_norm": 0.1787082903748495,
|
|
"learning_rate": 5.969061246324525e-07,
|
|
"loss": 0.0682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.030553584918379784,
|
|
"step": 5885,
|
|
"valid_targets_mean": 5283.3,
|
|
"valid_targets_min": 2542
|
|
},
|
|
{
|
|
"epoch": 6.515771997786386,
|
|
"grad_norm": 0.18264859649970874,
|
|
"learning_rate": 5.836033913351302e-07,
|
|
"loss": 0.0674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03753102943301201,
|
|
"step": 5890,
|
|
"valid_targets_mean": 5841.9,
|
|
"valid_targets_min": 3074
|
|
},
|
|
{
|
|
"epoch": 6.5213060320973995,
|
|
"grad_norm": 0.16662900561871258,
|
|
"learning_rate": 5.7044837169691e-07,
|
|
"loss": 0.0636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03211820498108864,
|
|
"step": 5895,
|
|
"valid_targets_mean": 5844.2,
|
|
"valid_targets_min": 1791
|
|
},
|
|
{
|
|
"epoch": 6.526840066408412,
|
|
"grad_norm": 0.16649539115731712,
|
|
"learning_rate": 5.574411657969125e-07,
|
|
"loss": 0.069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03245365247130394,
|
|
"step": 5900,
|
|
"valid_targets_mean": 5735.6,
|
|
"valid_targets_min": 2746
|
|
},
|
|
{
|
|
"epoch": 6.532374100719425,
|
|
"grad_norm": 0.16434361967617983,
|
|
"learning_rate": 5.445818725897534e-07,
|
|
"loss": 0.0645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.031247785314917564,
|
|
"step": 5905,
|
|
"valid_targets_mean": 5778.7,
|
|
"valid_targets_min": 2829
|
|
},
|
|
{
|
|
"epoch": 6.537908135030437,
|
|
"grad_norm": 0.16181782660191443,
|
|
"learning_rate": 5.318705899047727e-07,
|
|
"loss": 0.0602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.030019355937838554,
|
|
"step": 5910,
|
|
"valid_targets_mean": 5638.1,
|
|
"valid_targets_min": 1955
|
|
},
|
|
{
|
|
"epoch": 6.54344216934145,
|
|
"grad_norm": 0.15453935494744914,
|
|
"learning_rate": 5.193074144452892e-07,
|
|
"loss": 0.0606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02768268622457981,
|
|
"step": 5915,
|
|
"valid_targets_mean": 5649.6,
|
|
"valid_targets_min": 1657
|
|
},
|
|
{
|
|
"epoch": 6.548976203652463,
|
|
"grad_norm": 0.16303444707259854,
|
|
"learning_rate": 5.068924417878807e-07,
|
|
"loss": 0.0571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.030390538275241852,
|
|
"step": 5920,
|
|
"valid_targets_mean": 5870.3,
|
|
"valid_targets_min": 3122
|
|
},
|
|
{
|
|
"epoch": 6.554510237963475,
|
|
"grad_norm": 0.162724661938449,
|
|
"learning_rate": 4.946257663816334e-07,
|
|
"loss": 0.057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02618623711168766,
|
|
"step": 5925,
|
|
"valid_targets_mean": 5898.3,
|
|
"valid_targets_min": 2722
|
|
},
|
|
{
|
|
"epoch": 6.560044272274488,
|
|
"grad_norm": 0.16815058862489182,
|
|
"learning_rate": 4.825074815474495e-07,
|
|
"loss": 0.0599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.030746882781386375,
|
|
"step": 5930,
|
|
"valid_targets_mean": 5430.9,
|
|
"valid_targets_min": 1832
|
|
},
|
|
{
|
|
"epoch": 6.5655783065855005,
|
|
"grad_norm": 0.16268319766530676,
|
|
"learning_rate": 4.7053767947730976e-07,
|
|
"loss": 0.0562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02740519307553768,
|
|
"step": 5935,
|
|
"valid_targets_mean": 5711.7,
|
|
"valid_targets_min": 1876
|
|
},
|
|
{
|
|
"epoch": 6.571112340896514,
|
|
"grad_norm": 0.16185701199142863,
|
|
"learning_rate": 4.587164512335984e-07,
|
|
"loss": 0.0553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.028320694342255592,
|
|
"step": 5940,
|
|
"valid_targets_mean": 5817.8,
|
|
"valid_targets_min": 3349
|
|
},
|
|
{
|
|
"epoch": 6.576646375207527,
|
|
"grad_norm": 0.17315465874497984,
|
|
"learning_rate": 4.4704388674838836e-07,
|
|
"loss": 0.0568,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0337056927382946,
|
|
"step": 5945,
|
|
"valid_targets_mean": 6213.9,
|
|
"valid_targets_min": 2486
|
|
},
|
|
{
|
|
"epoch": 6.582180409518539,
|
|
"grad_norm": 0.16874657796605771,
|
|
"learning_rate": 4.355200748227728e-07,
|
|
"loss": 0.0566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.027117937803268433,
|
|
"step": 5950,
|
|
"valid_targets_mean": 5960.8,
|
|
"valid_targets_min": 1760
|
|
},
|
|
{
|
|
"epoch": 6.587714443829552,
|
|
"grad_norm": 0.16953088257827853,
|
|
"learning_rate": 4.241451031261812e-07,
|
|
"loss": 0.0571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02523176372051239,
|
|
"step": 5955,
|
|
"valid_targets_mean": 5431.6,
|
|
"valid_targets_min": 1939
|
|
},
|
|
{
|
|
"epoch": 6.5932484781405645,
|
|
"grad_norm": 0.17046845562368365,
|
|
"learning_rate": 4.129190581957154e-07,
|
|
"loss": 0.0556,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.028271449729800224,
|
|
"step": 5960,
|
|
"valid_targets_mean": 5619.5,
|
|
"valid_targets_min": 1402
|
|
},
|
|
{
|
|
"epoch": 6.598782512451577,
|
|
"grad_norm": 0.1658430114537753,
|
|
"learning_rate": 4.0184202543549266e-07,
|
|
"loss": 0.0535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.029601609334349632,
|
|
"step": 5965,
|
|
"valid_targets_mean": 5685.0,
|
|
"valid_targets_min": 1115
|
|
},
|
|
{
|
|
"epoch": 6.60431654676259,
|
|
"grad_norm": 0.16261583595041115,
|
|
"learning_rate": 3.9091408911599016e-07,
|
|
"loss": 0.0512,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02375098504126072,
|
|
"step": 5970,
|
|
"valid_targets_mean": 5896.3,
|
|
"valid_targets_min": 2249
|
|
},
|
|
{
|
|
"epoch": 6.609850581073602,
|
|
"grad_norm": 0.17113988788796894,
|
|
"learning_rate": 3.8013533237341026e-07,
|
|
"loss": 0.0551,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.024803929030895233,
|
|
"step": 5975,
|
|
"valid_targets_mean": 5286.9,
|
|
"valid_targets_min": 2419
|
|
},
|
|
{
|
|
"epoch": 6.615384615384615,
|
|
"grad_norm": 0.16227669472509565,
|
|
"learning_rate": 3.695058372090432e-07,
|
|
"loss": 0.055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.031464751809835434,
|
|
"step": 5980,
|
|
"valid_targets_mean": 5907.0,
|
|
"valid_targets_min": 2764
|
|
},
|
|
{
|
|
"epoch": 6.6209186496956285,
|
|
"grad_norm": 0.15230008226364655,
|
|
"learning_rate": 3.590256844886475e-07,
|
|
"loss": 0.0491,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02285112999379635,
|
|
"step": 5985,
|
|
"valid_targets_mean": 5677.7,
|
|
"valid_targets_min": 2561
|
|
},
|
|
{
|
|
"epoch": 6.626452684006641,
|
|
"grad_norm": 0.15507053735181311,
|
|
"learning_rate": 3.486949539418327e-07,
|
|
"loss": 0.0496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.022178545594215393,
|
|
"step": 5990,
|
|
"valid_targets_mean": 5690.5,
|
|
"valid_targets_min": 2153
|
|
},
|
|
{
|
|
"epoch": 6.631986718317654,
|
|
"grad_norm": 0.15406847130900264,
|
|
"learning_rate": 3.385137241614489e-07,
|
|
"loss": 0.0507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.025633057579398155,
|
|
"step": 5995,
|
|
"valid_targets_mean": 5799.1,
|
|
"valid_targets_min": 2167
|
|
},
|
|
{
|
|
"epoch": 6.637520752628666,
|
|
"grad_norm": 0.16701465983686922,
|
|
"learning_rate": 3.284820726030025e-07,
|
|
"loss": 0.0502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02921949326992035,
|
|
"step": 6000,
|
|
"valid_targets_mean": 5970.1,
|
|
"valid_targets_min": 3079
|
|
},
|
|
{
|
|
"epoch": 6.643054786939679,
|
|
"grad_norm": 0.1491935829405977,
|
|
"learning_rate": 3.1860007558404125e-07,
|
|
"loss": 0.0489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.023413682356476784,
|
|
"step": 6005,
|
|
"valid_targets_mean": 5799.0,
|
|
"valid_targets_min": 2123
|
|
},
|
|
{
|
|
"epoch": 6.648588821250692,
|
|
"grad_norm": 0.150245854586345,
|
|
"learning_rate": 3.088678082836083e-07,
|
|
"loss": 0.0499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.019299032166600227,
|
|
"step": 6010,
|
|
"valid_targets_mean": 5474.9,
|
|
"valid_targets_min": 1505
|
|
},
|
|
{
|
|
"epoch": 6.654122855561704,
|
|
"grad_norm": 0.14307532650864027,
|
|
"learning_rate": 2.992853447416377e-07,
|
|
"loss": 0.0452,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.019033873453736305,
|
|
"step": 6015,
|
|
"valid_targets_mean": 5696.5,
|
|
"valid_targets_min": 2150
|
|
},
|
|
{
|
|
"epoch": 6.659656889872717,
|
|
"grad_norm": 0.15702109651212837,
|
|
"learning_rate": 2.8985275785841094e-07,
|
|
"loss": 0.0494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02460307441651821,
|
|
"step": 6020,
|
|
"valid_targets_mean": 5482.0,
|
|
"valid_targets_min": 1925
|
|
},
|
|
{
|
|
"epoch": 6.6651909241837295,
|
|
"grad_norm": 0.16295466209332987,
|
|
"learning_rate": 2.805701193939947e-07,
|
|
"loss": 0.0479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.023025652393698692,
|
|
"step": 6025,
|
|
"valid_targets_mean": 5700.8,
|
|
"valid_targets_min": 2524
|
|
},
|
|
{
|
|
"epoch": 6.670724958494743,
|
|
"grad_norm": 0.17077714740007088,
|
|
"learning_rate": 2.71437499967695e-07,
|
|
"loss": 0.0475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02456340752542019,
|
|
"step": 6030,
|
|
"valid_targets_mean": 5443.2,
|
|
"valid_targets_min": 1992
|
|
},
|
|
{
|
|
"epoch": 6.676258992805756,
|
|
"grad_norm": 0.15583333265496774,
|
|
"learning_rate": 2.624549690575284e-07,
|
|
"loss": 0.0471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0222936999052763,
|
|
"step": 6035,
|
|
"valid_targets_mean": 5506.9,
|
|
"valid_targets_min": 2062
|
|
},
|
|
{
|
|
"epoch": 6.681793027116768,
|
|
"grad_norm": 0.14726673158584985,
|
|
"learning_rate": 2.5362259499967623e-07,
|
|
"loss": 0.0471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.021626999601721764,
|
|
"step": 6040,
|
|
"valid_targets_mean": 5554.7,
|
|
"valid_targets_min": 2289
|
|
},
|
|
{
|
|
"epoch": 6.687327061427781,
|
|
"grad_norm": 0.1432897815929805,
|
|
"learning_rate": 2.449404449879844e-07,
|
|
"loss": 0.0456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.021053863689303398,
|
|
"step": 6045,
|
|
"valid_targets_mean": 5283.6,
|
|
"valid_targets_min": 2511
|
|
},
|
|
{
|
|
"epoch": 6.6928610957387935,
|
|
"grad_norm": 0.1497531374262155,
|
|
"learning_rate": 2.3640858507343766e-07,
|
|
"loss": 0.0438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02070341445505619,
|
|
"step": 6050,
|
|
"valid_targets_mean": 5367.8,
|
|
"valid_targets_min": 1636
|
|
},
|
|
{
|
|
"epoch": 6.698395130049806,
|
|
"grad_norm": 0.1481851861278496,
|
|
"learning_rate": 2.2802708016366636e-07,
|
|
"loss": 0.0458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.022724062204360962,
|
|
"step": 6055,
|
|
"valid_targets_mean": 5643.5,
|
|
"valid_targets_min": 1822
|
|
},
|
|
{
|
|
"epoch": 6.703929164360819,
|
|
"grad_norm": 0.14848419552320255,
|
|
"learning_rate": 2.1979599402244256e-07,
|
|
"loss": 0.042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.021349245682358742,
|
|
"step": 6060,
|
|
"valid_targets_mean": 5522.8,
|
|
"valid_targets_min": 2271
|
|
},
|
|
{
|
|
"epoch": 6.709463198671831,
|
|
"grad_norm": 0.1593119673142632,
|
|
"learning_rate": 2.1171538926920697e-07,
|
|
"loss": 0.0435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0206944290548563,
|
|
"step": 6065,
|
|
"valid_targets_mean": 5634.4,
|
|
"valid_targets_min": 3397
|
|
},
|
|
{
|
|
"epoch": 6.714997232982844,
|
|
"grad_norm": 0.16781758974714178,
|
|
"learning_rate": 2.0378532737858724e-07,
|
|
"loss": 0.0429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.024538887664675713,
|
|
"step": 6070,
|
|
"valid_targets_mean": 5882.0,
|
|
"valid_targets_min": 1620
|
|
},
|
|
{
|
|
"epoch": 6.7205312672938575,
|
|
"grad_norm": 0.15427347780497472,
|
|
"learning_rate": 1.9600586867992045e-07,
|
|
"loss": 0.0445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.02108161151409149,
|
|
"step": 6075,
|
|
"valid_targets_mean": 5484.1,
|
|
"valid_targets_min": 1983
|
|
},
|
|
{
|
|
"epoch": 6.72606530160487,
|
|
"grad_norm": 0.1590775756796442,
|
|
"learning_rate": 1.8837707235681347e-07,
|
|
"loss": 0.0448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.025591416284441948,
|
|
"step": 6080,
|
|
"valid_targets_mean": 6050.4,
|
|
"valid_targets_min": 2705
|
|
},
|
|
{
|
|
"epoch": 6.731599335915883,
|
|
"grad_norm": 0.14284190422291548,
|
|
"learning_rate": 1.8089899644667673e-07,
|
|
"loss": 0.0414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.019970562309026718,
|
|
"step": 6085,
|
|
"valid_targets_mean": 5951.2,
|
|
"valid_targets_min": 1263
|
|
},
|
|
{
|
|
"epoch": 6.737133370226895,
|
|
"grad_norm": 0.15288214426850036,
|
|
"learning_rate": 1.7357169784029348e-07,
|
|
"loss": 0.0448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.021346738561987877,
|
|
"step": 6090,
|
|
"valid_targets_mean": 5333.5,
|
|
"valid_targets_min": 2761
|
|
},
|
|
{
|
|
"epoch": 6.742667404537908,
|
|
"grad_norm": 0.15933272800140702,
|
|
"learning_rate": 1.6639523228137778e-07,
|
|
"loss": 0.0435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.026872599497437477,
|
|
"step": 6095,
|
|
"valid_targets_mean": 5898.5,
|
|
"valid_targets_min": 1918
|
|
},
|
|
{
|
|
"epoch": 6.748201438848921,
|
|
"grad_norm": 0.15210885857654943,
|
|
"learning_rate": 1.5936965436615492e-07,
|
|
"loss": 0.0424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.019810838624835014,
|
|
"step": 6100,
|
|
"valid_targets_mean": 5690.7,
|
|
"valid_targets_min": 2491
|
|
},
|
|
{
|
|
"epoch": 6.753735473159933,
|
|
"grad_norm": 0.16212466670735068,
|
|
"learning_rate": 1.524950175429507e-07,
|
|
"loss": 0.0473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.026772432029247284,
|
|
"step": 6105,
|
|
"valid_targets_mean": 5855.6,
|
|
"valid_targets_min": 1834
|
|
},
|
|
{
|
|
"epoch": 6.759269507470947,
|
|
"grad_norm": 0.2781957807032019,
|
|
"learning_rate": 1.4577137411177166e-07,
|
|
"loss": 0.0601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.028757303953170776,
|
|
"step": 6110,
|
|
"valid_targets_mean": 4478.1,
|
|
"valid_targets_min": 1152
|
|
},
|
|
{
|
|
"epoch": 6.764803541781959,
|
|
"grad_norm": 0.285966123432216,
|
|
"learning_rate": 1.3919877522392322e-07,
|
|
"loss": 0.0616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.027906084433197975,
|
|
"step": 6115,
|
|
"valid_targets_mean": 4434.4,
|
|
"valid_targets_min": 1363
|
|
},
|
|
{
|
|
"epoch": 6.770337576092972,
|
|
"grad_norm": 0.2660206536747435,
|
|
"learning_rate": 1.3277727088160775e-07,
|
|
"loss": 0.0606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.033058829605579376,
|
|
"step": 6120,
|
|
"valid_targets_mean": 4586.5,
|
|
"valid_targets_min": 1203
|
|
},
|
|
{
|
|
"epoch": 6.775871610403985,
|
|
"grad_norm": 0.2815578821576788,
|
|
"learning_rate": 1.2650690993755377e-07,
|
|
"loss": 0.0656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03213704749941826,
|
|
"step": 6125,
|
|
"valid_targets_mean": 4198.7,
|
|
"valid_targets_min": 1898
|
|
},
|
|
{
|
|
"epoch": 6.781405644714997,
|
|
"grad_norm": 0.27812158818342514,
|
|
"learning_rate": 1.2038774009463406e-07,
|
|
"loss": 0.0675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03444572538137436,
|
|
"step": 6130,
|
|
"valid_targets_mean": 4499.3,
|
|
"valid_targets_min": 2282
|
|
},
|
|
{
|
|
"epoch": 6.78693967902601,
|
|
"grad_norm": 0.2538775243898183,
|
|
"learning_rate": 1.1441980790551699e-07,
|
|
"loss": 0.0644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0322687067091465,
|
|
"step": 6135,
|
|
"valid_targets_mean": 4354.3,
|
|
"valid_targets_min": 2243
|
|
},
|
|
{
|
|
"epoch": 6.7924737133370225,
|
|
"grad_norm": 0.4139407017723169,
|
|
"learning_rate": 1.0860315877229133e-07,
|
|
"loss": 0.0656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0316312201321125,
|
|
"step": 6140,
|
|
"valid_targets_mean": 5987.4,
|
|
"valid_targets_min": 1045
|
|
},
|
|
{
|
|
"epoch": 6.798007747648035,
|
|
"grad_norm": 0.4219852983365904,
|
|
"learning_rate": 1.0293783694614645e-07,
|
|
"loss": 0.0741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.037688422948122025,
|
|
"step": 6145,
|
|
"valid_targets_mean": 7339.2,
|
|
"valid_targets_min": 1398
|
|
},
|
|
{
|
|
"epoch": 6.803541781959048,
|
|
"grad_norm": 0.4271759301135669,
|
|
"learning_rate": 9.742388552701266e-08,
|
|
"loss": 0.0759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0405937097966671,
|
|
"step": 6150,
|
|
"valid_targets_mean": 7609.3,
|
|
"valid_targets_min": 2489
|
|
},
|
|
{
|
|
"epoch": 6.809075816270061,
|
|
"grad_norm": 0.47550541018352915,
|
|
"learning_rate": 9.206134646325026e-08,
|
|
"loss": 0.0858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.042033206671476364,
|
|
"step": 6155,
|
|
"valid_targets_mean": 8182.5,
|
|
"valid_targets_min": 2323
|
|
},
|
|
{
|
|
"epoch": 6.814609850581074,
|
|
"grad_norm": 0.4541435636889632,
|
|
"learning_rate": 8.685026055131662e-08,
|
|
"loss": 0.0852,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04260958358645439,
|
|
"step": 6160,
|
|
"valid_targets_mean": 7945.0,
|
|
"valid_targets_min": 2386
|
|
},
|
|
{
|
|
"epoch": 6.820143884892087,
|
|
"grad_norm": 0.4187211676260777,
|
|
"learning_rate": 8.179066743546848e-08,
|
|
"loss": 0.0808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04068029299378395,
|
|
"step": 6165,
|
|
"valid_targets_mean": 7628.4,
|
|
"valid_targets_min": 2019
|
|
},
|
|
{
|
|
"epoch": 6.825677919203099,
|
|
"grad_norm": 0.7188901584149283,
|
|
"learning_rate": 7.688260560745342e-08,
|
|
"loss": 0.1341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0671887993812561,
|
|
"step": 6170,
|
|
"valid_targets_mean": 13487.5,
|
|
"valid_targets_min": 1304
|
|
},
|
|
{
|
|
"epoch": 6.831211953514112,
|
|
"grad_norm": 0.8134072666671982,
|
|
"learning_rate": 7.212611240621448e-08,
|
|
"loss": 0.1536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08201751857995987,
|
|
"step": 6175,
|
|
"valid_targets_mean": 14044.6,
|
|
"valid_targets_min": 2837
|
|
},
|
|
{
|
|
"epoch": 6.836745987825124,
|
|
"grad_norm": 0.7728347504284517,
|
|
"learning_rate": 6.752122401761263e-08,
|
|
"loss": 0.1538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07637709379196167,
|
|
"step": 6180,
|
|
"valid_targets_mean": 12808.5,
|
|
"valid_targets_min": 1264
|
|
},
|
|
{
|
|
"epoch": 6.842280022136137,
|
|
"grad_norm": 0.4866135651542434,
|
|
"learning_rate": 6.306797547414923e-08,
|
|
"loss": 0.1383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04519471526145935,
|
|
"step": 6185,
|
|
"valid_targets_mean": 7773.3,
|
|
"valid_targets_min": 1300
|
|
},
|
|
{
|
|
"epoch": 6.84781405644715,
|
|
"grad_norm": 0.34132735867861963,
|
|
"learning_rate": 5.876640065469508e-08,
|
|
"loss": 0.0796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03255182504653931,
|
|
"step": 6190,
|
|
"valid_targets_mean": 8022.5,
|
|
"valid_targets_min": 1221
|
|
},
|
|
{
|
|
"epoch": 6.853348090758162,
|
|
"grad_norm": 0.24281825590975215,
|
|
"learning_rate": 5.4616532284239576e-08,
|
|
"loss": 0.0597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.026530781760811806,
|
|
"step": 6195,
|
|
"valid_targets_mean": 7332.0,
|
|
"valid_targets_min": 935
|
|
},
|
|
{
|
|
"epoch": 6.858882125069176,
|
|
"grad_norm": 0.3378643276186545,
|
|
"learning_rate": 5.061840193363754e-08,
|
|
"loss": 0.0734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.043400898575782776,
|
|
"step": 6200,
|
|
"valid_targets_mean": 4677.9,
|
|
"valid_targets_min": 1413
|
|
},
|
|
{
|
|
"epoch": 6.8644161593801885,
|
|
"grad_norm": 0.2939484612048038,
|
|
"learning_rate": 4.677204001937163e-08,
|
|
"loss": 0.0809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04071999713778496,
|
|
"step": 6205,
|
|
"valid_targets_mean": 5047.0,
|
|
"valid_targets_min": 1422
|
|
},
|
|
{
|
|
"epoch": 6.869950193691201,
|
|
"grad_norm": 0.26447990416566697,
|
|
"learning_rate": 4.3077475803317006e-08,
|
|
"loss": 0.0709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03539319708943367,
|
|
"step": 6210,
|
|
"valid_targets_mean": 4560.3,
|
|
"valid_targets_min": 2177
|
|
},
|
|
{
|
|
"epoch": 6.875484228002214,
|
|
"grad_norm": 0.24448712356713942,
|
|
"learning_rate": 3.953473739252145e-08,
|
|
"loss": 0.0728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03483784198760986,
|
|
"step": 6215,
|
|
"valid_targets_mean": 4750.5,
|
|
"valid_targets_min": 2094
|
|
},
|
|
{
|
|
"epoch": 6.881018262313226,
|
|
"grad_norm": 0.24136576815497512,
|
|
"learning_rate": 3.6143851738992265e-08,
|
|
"loss": 0.0714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03553052619099617,
|
|
"step": 6220,
|
|
"valid_targets_mean": 4360.2,
|
|
"valid_targets_min": 1571
|
|
},
|
|
{
|
|
"epoch": 6.886552296624239,
|
|
"grad_norm": 0.2540992167336122,
|
|
"learning_rate": 3.290484463948973e-08,
|
|
"loss": 0.0737,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03783242031931877,
|
|
"step": 6225,
|
|
"valid_targets_mean": 4350.1,
|
|
"valid_targets_min": 1350
|
|
},
|
|
{
|
|
"epoch": 6.892086330935252,
|
|
"grad_norm": 0.26436404059943236,
|
|
"learning_rate": 2.981774073533172e-08,
|
|
"loss": 0.0847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.046336591243743896,
|
|
"step": 6230,
|
|
"valid_targets_mean": 5759.9,
|
|
"valid_targets_min": 2496
|
|
},
|
|
{
|
|
"epoch": 6.897620365246264,
|
|
"grad_norm": 0.28374694012095986,
|
|
"learning_rate": 2.6882563512204972e-08,
|
|
"loss": 0.0886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04141201451420784,
|
|
"step": 6235,
|
|
"valid_targets_mean": 4665.8,
|
|
"valid_targets_min": 1862
|
|
},
|
|
{
|
|
"epoch": 6.903154399557277,
|
|
"grad_norm": 0.27667918620344506,
|
|
"learning_rate": 2.4099335299987426e-08,
|
|
"loss": 0.0909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.047536592930555344,
|
|
"step": 6240,
|
|
"valid_targets_mean": 5136.1,
|
|
"valid_targets_min": 2075
|
|
},
|
|
{
|
|
"epoch": 6.90868843386829,
|
|
"grad_norm": 0.2801499093244407,
|
|
"learning_rate": 2.146807727257727e-08,
|
|
"loss": 0.0935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04789487645030022,
|
|
"step": 6245,
|
|
"valid_targets_mean": 5201.3,
|
|
"valid_targets_min": 2016
|
|
},
|
|
{
|
|
"epoch": 6.914222468179303,
|
|
"grad_norm": 0.25953453335013227,
|
|
"learning_rate": 1.898880944773307e-08,
|
|
"loss": 0.0908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04826788604259491,
|
|
"step": 6250,
|
|
"valid_targets_mean": 5300.8,
|
|
"valid_targets_min": 1145
|
|
},
|
|
{
|
|
"epoch": 6.919756502490316,
|
|
"grad_norm": 0.2529759224910664,
|
|
"learning_rate": 1.666155068692499e-08,
|
|
"loss": 0.0849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04037823900580406,
|
|
"step": 6255,
|
|
"valid_targets_mean": 4597.2,
|
|
"valid_targets_min": 1963
|
|
},
|
|
{
|
|
"epoch": 6.925290536801328,
|
|
"grad_norm": 0.23381027910263688,
|
|
"learning_rate": 1.4486318695181577e-08,
|
|
"loss": 0.0846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03696097061038017,
|
|
"step": 6260,
|
|
"valid_targets_mean": 4532.3,
|
|
"valid_targets_min": 1109
|
|
},
|
|
{
|
|
"epoch": 6.930824571112341,
|
|
"grad_norm": 0.2437542812149771,
|
|
"learning_rate": 1.246313002096544e-08,
|
|
"loss": 0.0904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.042215894907712936,
|
|
"step": 6265,
|
|
"valid_targets_mean": 5127.4,
|
|
"valid_targets_min": 2148
|
|
},
|
|
{
|
|
"epoch": 6.9363586054233535,
|
|
"grad_norm": 0.25411674940759044,
|
|
"learning_rate": 1.0592000056039997e-08,
|
|
"loss": 0.0867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.048433512449264526,
|
|
"step": 6270,
|
|
"valid_targets_mean": 5093.4,
|
|
"valid_targets_min": 2249
|
|
},
|
|
{
|
|
"epoch": 6.941892639734366,
|
|
"grad_norm": 0.2424493912852331,
|
|
"learning_rate": 8.87294303535402e-09,
|
|
"loss": 0.0845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04118829965591431,
|
|
"step": 6275,
|
|
"valid_targets_mean": 4493.6,
|
|
"valid_targets_min": 1745
|
|
},
|
|
{
|
|
"epoch": 6.947426674045379,
|
|
"grad_norm": 0.2320256567615275,
|
|
"learning_rate": 7.305972036941722e-09,
|
|
"loss": 0.0856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04444737359881401,
|
|
"step": 6280,
|
|
"valid_targets_mean": 5985.7,
|
|
"valid_targets_min": 1785
|
|
},
|
|
{
|
|
"epoch": 6.952960708356391,
|
|
"grad_norm": 0.22506455612538573,
|
|
"learning_rate": 5.891098981805065e-09,
|
|
"loss": 0.0676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.033273909240961075,
|
|
"step": 6285,
|
|
"valid_targets_mean": 4282.6,
|
|
"valid_targets_min": 1345
|
|
},
|
|
{
|
|
"epoch": 6.958494742667405,
|
|
"grad_norm": 0.22991189057710432,
|
|
"learning_rate": 4.628334633844933e-09,
|
|
"loss": 0.0625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.030094504356384277,
|
|
"step": 6290,
|
|
"valid_targets_mean": 4043.1,
|
|
"valid_targets_min": 1453
|
|
},
|
|
{
|
|
"epoch": 6.9640287769784175,
|
|
"grad_norm": 0.23313445838803135,
|
|
"learning_rate": 3.5176885997634247e-09,
|
|
"loss": 0.0607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.030323075130581856,
|
|
"step": 6295,
|
|
"valid_targets_mean": 3962.6,
|
|
"valid_targets_min": 1950
|
|
},
|
|
{
|
|
"epoch": 6.96956281128943,
|
|
"grad_norm": 0.23322994906696576,
|
|
"learning_rate": 2.5591693289928055e-09,
|
|
"loss": 0.0626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03191163390874863,
|
|
"step": 6300,
|
|
"valid_targets_mean": 4089.9,
|
|
"valid_targets_min": 2225
|
|
},
|
|
{
|
|
"epoch": 6.975096845600443,
|
|
"grad_norm": 0.21610306998231182,
|
|
"learning_rate": 1.7527841136399937e-09,
|
|
"loss": 0.0618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.031901244074106216,
|
|
"step": 6305,
|
|
"valid_targets_mean": 4647.0,
|
|
"valid_targets_min": 1720
|
|
},
|
|
{
|
|
"epoch": 6.980630879911455,
|
|
"grad_norm": 0.22791524852331468,
|
|
"learning_rate": 1.098539088422168e-09,
|
|
"loss": 0.0635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03343750908970833,
|
|
"step": 6310,
|
|
"valid_targets_mean": 4220.0,
|
|
"valid_targets_min": 1637
|
|
},
|
|
{
|
|
"epoch": 6.986164914222468,
|
|
"grad_norm": 0.20503543897763343,
|
|
"learning_rate": 5.964392306223587e-10,
|
|
"loss": 0.063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.029046183452010155,
|
|
"step": 6315,
|
|
"valid_targets_mean": 4065.2,
|
|
"valid_targets_min": 2020
|
|
},
|
|
{
|
|
"epoch": 6.991698948533481,
|
|
"grad_norm": 0.21235691685325675,
|
|
"learning_rate": 2.464883600539203e-10,
|
|
"loss": 0.0626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.032713327556848526,
|
|
"step": 6320,
|
|
"valid_targets_mean": 4378.8,
|
|
"valid_targets_min": 2197
|
|
},
|
|
{
|
|
"epoch": 6.997232982844494,
|
|
"grad_norm": 0.21559969865151898,
|
|
"learning_rate": 4.8689139031665724e-11,
|
|
"loss": 0.0622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.03392450883984566,
|
|
"step": 6325,
|
|
"valid_targets_mean": 4250.2,
|
|
"valid_targets_min": 2029
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"step": 6328,
|
|
"total_flos": 1.7062153069854196e+19,
|
|
"train_loss": 0.0,
|
|
"train_runtime": 1.2427,
|
|
"train_samples_per_second": 488356.021,
|
|
"train_steps_per_second": 5092.029
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 6328,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 750,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1.7062153069854196e+19,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|