7913 lines
219 KiB
JSON
7913 lines
219 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 3577,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.009784735812133072,
|
|
"grad_norm": 7.664685937206668,
|
|
"learning_rate": 4.46927374301676e-07,
|
|
"loss": 0.5047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40290629863739014,
|
|
"step": 5,
|
|
"valid_targets_mean": 3617.4,
|
|
"valid_targets_min": 741
|
|
},
|
|
{
|
|
"epoch": 0.019569471624266144,
|
|
"grad_norm": 8.50433555316219,
|
|
"learning_rate": 1.005586592178771e-06,
|
|
"loss": 0.5159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4588755965232849,
|
|
"step": 10,
|
|
"valid_targets_mean": 3822.6,
|
|
"valid_targets_min": 914
|
|
},
|
|
{
|
|
"epoch": 0.029354207436399216,
|
|
"grad_norm": 8.480800377443527,
|
|
"learning_rate": 1.564245810055866e-06,
|
|
"loss": 0.5094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5068870782852173,
|
|
"step": 15,
|
|
"valid_targets_mean": 3282.5,
|
|
"valid_targets_min": 854
|
|
},
|
|
{
|
|
"epoch": 0.03913894324853229,
|
|
"grad_norm": 6.389055588909931,
|
|
"learning_rate": 2.1229050279329612e-06,
|
|
"loss": 0.4612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47990190982818604,
|
|
"step": 20,
|
|
"valid_targets_mean": 2669.2,
|
|
"valid_targets_min": 696
|
|
},
|
|
{
|
|
"epoch": 0.04892367906066536,
|
|
"grad_norm": 4.830165379278388,
|
|
"learning_rate": 2.6815642458100562e-06,
|
|
"loss": 0.4547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47788089513778687,
|
|
"step": 25,
|
|
"valid_targets_mean": 2784.6,
|
|
"valid_targets_min": 700
|
|
},
|
|
{
|
|
"epoch": 0.05870841487279843,
|
|
"grad_norm": 3.079180456646436,
|
|
"learning_rate": 3.240223463687151e-06,
|
|
"loss": 0.4163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3710671663284302,
|
|
"step": 30,
|
|
"valid_targets_mean": 2843.2,
|
|
"valid_targets_min": 658
|
|
},
|
|
{
|
|
"epoch": 0.0684931506849315,
|
|
"grad_norm": 2.7785737491972933,
|
|
"learning_rate": 3.798882681564246e-06,
|
|
"loss": 0.3624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3115715980529785,
|
|
"step": 35,
|
|
"valid_targets_mean": 2966.5,
|
|
"valid_targets_min": 324
|
|
},
|
|
{
|
|
"epoch": 0.07827788649706457,
|
|
"grad_norm": 1.5502383905218011,
|
|
"learning_rate": 4.357541899441341e-06,
|
|
"loss": 0.3232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26140934228897095,
|
|
"step": 40,
|
|
"valid_targets_mean": 3349.2,
|
|
"valid_targets_min": 606
|
|
},
|
|
{
|
|
"epoch": 0.08806262230919765,
|
|
"grad_norm": 1.209196728082173,
|
|
"learning_rate": 4.916201117318436e-06,
|
|
"loss": 0.2769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2740545868873596,
|
|
"step": 45,
|
|
"valid_targets_mean": 2365.2,
|
|
"valid_targets_min": 459
|
|
},
|
|
{
|
|
"epoch": 0.09784735812133072,
|
|
"grad_norm": 1.028489362532582,
|
|
"learning_rate": 5.474860335195531e-06,
|
|
"loss": 0.3076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2979404330253601,
|
|
"step": 50,
|
|
"valid_targets_mean": 2470.4,
|
|
"valid_targets_min": 794
|
|
},
|
|
{
|
|
"epoch": 0.10763209393346379,
|
|
"grad_norm": 0.9537486210298864,
|
|
"learning_rate": 6.033519553072626e-06,
|
|
"loss": 0.2577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2909826934337616,
|
|
"step": 55,
|
|
"valid_targets_mean": 2224.9,
|
|
"valid_targets_min": 596
|
|
},
|
|
{
|
|
"epoch": 0.11741682974559686,
|
|
"grad_norm": 0.8658000347460151,
|
|
"learning_rate": 6.592178770949721e-06,
|
|
"loss": 0.2912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.291208416223526,
|
|
"step": 60,
|
|
"valid_targets_mean": 2375.1,
|
|
"valid_targets_min": 589
|
|
},
|
|
{
|
|
"epoch": 0.12720156555772993,
|
|
"grad_norm": 0.6647224322577404,
|
|
"learning_rate": 7.150837988826816e-06,
|
|
"loss": 0.2473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24509967863559723,
|
|
"step": 65,
|
|
"valid_targets_mean": 2958.6,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 0.136986301369863,
|
|
"grad_norm": 0.6073269190290562,
|
|
"learning_rate": 7.709497206703911e-06,
|
|
"loss": 0.2292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20280693471431732,
|
|
"step": 70,
|
|
"valid_targets_mean": 2785.6,
|
|
"valid_targets_min": 535
|
|
},
|
|
{
|
|
"epoch": 0.14677103718199608,
|
|
"grad_norm": 0.6870346313715996,
|
|
"learning_rate": 8.268156424581007e-06,
|
|
"loss": 0.2335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2646217346191406,
|
|
"step": 75,
|
|
"valid_targets_mean": 2854.3,
|
|
"valid_targets_min": 589
|
|
},
|
|
{
|
|
"epoch": 0.15655577299412915,
|
|
"grad_norm": 0.5472437159642437,
|
|
"learning_rate": 8.826815642458101e-06,
|
|
"loss": 0.2065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19694247841835022,
|
|
"step": 80,
|
|
"valid_targets_mean": 3170.3,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 0.16634050880626222,
|
|
"grad_norm": 0.5389183745441031,
|
|
"learning_rate": 9.385474860335197e-06,
|
|
"loss": 0.2116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17740264534950256,
|
|
"step": 85,
|
|
"valid_targets_mean": 2783.5,
|
|
"valid_targets_min": 527
|
|
},
|
|
{
|
|
"epoch": 0.1761252446183953,
|
|
"grad_norm": 0.5944677207861985,
|
|
"learning_rate": 9.944134078212291e-06,
|
|
"loss": 0.2087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22743840515613556,
|
|
"step": 90,
|
|
"valid_targets_mean": 2798.4,
|
|
"valid_targets_min": 524
|
|
},
|
|
{
|
|
"epoch": 0.18590998043052837,
|
|
"grad_norm": 0.571414117444414,
|
|
"learning_rate": 1.0502793296089386e-05,
|
|
"loss": 0.1932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18952876329421997,
|
|
"step": 95,
|
|
"valid_targets_mean": 2777.1,
|
|
"valid_targets_min": 737
|
|
},
|
|
{
|
|
"epoch": 0.19569471624266144,
|
|
"grad_norm": 0.4604883444760045,
|
|
"learning_rate": 1.1061452513966481e-05,
|
|
"loss": 0.1856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15866714715957642,
|
|
"step": 100,
|
|
"valid_targets_mean": 3476.1,
|
|
"valid_targets_min": 826
|
|
},
|
|
{
|
|
"epoch": 0.2054794520547945,
|
|
"grad_norm": 0.5755387019398245,
|
|
"learning_rate": 1.1620111731843577e-05,
|
|
"loss": 0.1781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17682120203971863,
|
|
"step": 105,
|
|
"valid_targets_mean": 2597.6,
|
|
"valid_targets_min": 569
|
|
},
|
|
{
|
|
"epoch": 0.21526418786692758,
|
|
"grad_norm": 0.6252526071380757,
|
|
"learning_rate": 1.2178770949720671e-05,
|
|
"loss": 0.1972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1682274341583252,
|
|
"step": 110,
|
|
"valid_targets_mean": 2008.2,
|
|
"valid_targets_min": 623
|
|
},
|
|
{
|
|
"epoch": 0.22504892367906065,
|
|
"grad_norm": 0.7444119586394704,
|
|
"learning_rate": 1.2737430167597766e-05,
|
|
"loss": 0.1906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19016528129577637,
|
|
"step": 115,
|
|
"valid_targets_mean": 2239.0,
|
|
"valid_targets_min": 554
|
|
},
|
|
{
|
|
"epoch": 0.23483365949119372,
|
|
"grad_norm": 0.4890590982481748,
|
|
"learning_rate": 1.3296089385474861e-05,
|
|
"loss": 0.1804,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1630125641822815,
|
|
"step": 120,
|
|
"valid_targets_mean": 3072.8,
|
|
"valid_targets_min": 659
|
|
},
|
|
{
|
|
"epoch": 0.2446183953033268,
|
|
"grad_norm": 0.465912335927827,
|
|
"learning_rate": 1.3854748603351957e-05,
|
|
"loss": 0.1829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15328490734100342,
|
|
"step": 125,
|
|
"valid_targets_mean": 3095.4,
|
|
"valid_targets_min": 447
|
|
},
|
|
{
|
|
"epoch": 0.25440313111545987,
|
|
"grad_norm": 0.6891592359602547,
|
|
"learning_rate": 1.4413407821229052e-05,
|
|
"loss": 0.1817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26953524351119995,
|
|
"step": 130,
|
|
"valid_targets_mean": 3000.7,
|
|
"valid_targets_min": 597
|
|
},
|
|
{
|
|
"epoch": 0.26418786692759294,
|
|
"grad_norm": 0.6641646030161167,
|
|
"learning_rate": 1.4972067039106146e-05,
|
|
"loss": 0.1953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21545499563217163,
|
|
"step": 135,
|
|
"valid_targets_mean": 2525.5,
|
|
"valid_targets_min": 616
|
|
},
|
|
{
|
|
"epoch": 0.273972602739726,
|
|
"grad_norm": 0.5830725755570016,
|
|
"learning_rate": 1.553072625698324e-05,
|
|
"loss": 0.1915,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.216715008020401,
|
|
"step": 140,
|
|
"valid_targets_mean": 2687.6,
|
|
"valid_targets_min": 893
|
|
},
|
|
{
|
|
"epoch": 0.2837573385518591,
|
|
"grad_norm": 0.5767631139839036,
|
|
"learning_rate": 1.6089385474860336e-05,
|
|
"loss": 0.1783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19670595228672028,
|
|
"step": 145,
|
|
"valid_targets_mean": 3301.4,
|
|
"valid_targets_min": 860
|
|
},
|
|
{
|
|
"epoch": 0.29354207436399216,
|
|
"grad_norm": 0.49142360319674744,
|
|
"learning_rate": 1.664804469273743e-05,
|
|
"loss": 0.168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14769160747528076,
|
|
"step": 150,
|
|
"valid_targets_mean": 3333.7,
|
|
"valid_targets_min": 582
|
|
},
|
|
{
|
|
"epoch": 0.30332681017612523,
|
|
"grad_norm": 0.5716984289756593,
|
|
"learning_rate": 1.7206703910614527e-05,
|
|
"loss": 0.1758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17623479664325714,
|
|
"step": 155,
|
|
"valid_targets_mean": 2626.4,
|
|
"valid_targets_min": 502
|
|
},
|
|
{
|
|
"epoch": 0.3131115459882583,
|
|
"grad_norm": 0.5084369227675787,
|
|
"learning_rate": 1.776536312849162e-05,
|
|
"loss": 0.1681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15722373127937317,
|
|
"step": 160,
|
|
"valid_targets_mean": 2934.6,
|
|
"valid_targets_min": 333
|
|
},
|
|
{
|
|
"epoch": 0.32289628180039137,
|
|
"grad_norm": 0.5735742518385566,
|
|
"learning_rate": 1.8324022346368716e-05,
|
|
"loss": 0.1832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23492315411567688,
|
|
"step": 165,
|
|
"valid_targets_mean": 3038.0,
|
|
"valid_targets_min": 865
|
|
},
|
|
{
|
|
"epoch": 0.33268101761252444,
|
|
"grad_norm": 0.4693397977357504,
|
|
"learning_rate": 1.888268156424581e-05,
|
|
"loss": 0.1614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15091291069984436,
|
|
"step": 170,
|
|
"valid_targets_mean": 3301.8,
|
|
"valid_targets_min": 811
|
|
},
|
|
{
|
|
"epoch": 0.3424657534246575,
|
|
"grad_norm": 0.48675755956862893,
|
|
"learning_rate": 1.9441340782122907e-05,
|
|
"loss": 0.1772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15300670266151428,
|
|
"step": 175,
|
|
"valid_targets_mean": 3417.9,
|
|
"valid_targets_min": 715
|
|
},
|
|
{
|
|
"epoch": 0.3522504892367906,
|
|
"grad_norm": 0.5102150154869952,
|
|
"learning_rate": 2e-05,
|
|
"loss": 0.17,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16056200861930847,
|
|
"step": 180,
|
|
"valid_targets_mean": 3057.6,
|
|
"valid_targets_min": 803
|
|
},
|
|
{
|
|
"epoch": 0.36203522504892366,
|
|
"grad_norm": 0.5515832587824223,
|
|
"learning_rate": 2.0558659217877096e-05,
|
|
"loss": 0.1707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14652328193187714,
|
|
"step": 185,
|
|
"valid_targets_mean": 2680.7,
|
|
"valid_targets_min": 464
|
|
},
|
|
{
|
|
"epoch": 0.37181996086105673,
|
|
"grad_norm": 0.5989282020525712,
|
|
"learning_rate": 2.1117318435754193e-05,
|
|
"loss": 0.1646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18726202845573425,
|
|
"step": 190,
|
|
"valid_targets_mean": 2799.1,
|
|
"valid_targets_min": 520
|
|
},
|
|
{
|
|
"epoch": 0.3816046966731898,
|
|
"grad_norm": 0.4504168777552236,
|
|
"learning_rate": 2.1675977653631288e-05,
|
|
"loss": 0.157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12995876371860504,
|
|
"step": 195,
|
|
"valid_targets_mean": 3549.4,
|
|
"valid_targets_min": 795
|
|
},
|
|
{
|
|
"epoch": 0.3913894324853229,
|
|
"grad_norm": 0.5818242440905317,
|
|
"learning_rate": 2.2234636871508385e-05,
|
|
"loss": 0.163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17200620472431183,
|
|
"step": 200,
|
|
"valid_targets_mean": 2801.2,
|
|
"valid_targets_min": 363
|
|
},
|
|
{
|
|
"epoch": 0.40117416829745595,
|
|
"grad_norm": 0.5177140652573206,
|
|
"learning_rate": 2.2793296089385476e-05,
|
|
"loss": 0.155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1553056240081787,
|
|
"step": 205,
|
|
"valid_targets_mean": 3184.4,
|
|
"valid_targets_min": 868
|
|
},
|
|
{
|
|
"epoch": 0.410958904109589,
|
|
"grad_norm": 0.490634701023724,
|
|
"learning_rate": 2.335195530726257e-05,
|
|
"loss": 0.1699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14875340461730957,
|
|
"step": 210,
|
|
"valid_targets_mean": 3164.2,
|
|
"valid_targets_min": 824
|
|
},
|
|
{
|
|
"epoch": 0.4207436399217221,
|
|
"grad_norm": 0.5871331423256096,
|
|
"learning_rate": 2.3910614525139668e-05,
|
|
"loss": 0.1448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20259450376033783,
|
|
"step": 215,
|
|
"valid_targets_mean": 2785.0,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 0.43052837573385516,
|
|
"grad_norm": 0.5333267414306153,
|
|
"learning_rate": 2.4469273743016762e-05,
|
|
"loss": 0.155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16707800328731537,
|
|
"step": 220,
|
|
"valid_targets_mean": 3486.7,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 0.44031311154598823,
|
|
"grad_norm": 0.45969079039542965,
|
|
"learning_rate": 2.5027932960893856e-05,
|
|
"loss": 0.1362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14990051090717316,
|
|
"step": 225,
|
|
"valid_targets_mean": 3446.0,
|
|
"valid_targets_min": 889
|
|
},
|
|
{
|
|
"epoch": 0.4500978473581213,
|
|
"grad_norm": 0.5556944949224483,
|
|
"learning_rate": 2.5586592178770953e-05,
|
|
"loss": 0.1595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16094809770584106,
|
|
"step": 230,
|
|
"valid_targets_mean": 3048.9,
|
|
"valid_targets_min": 729
|
|
},
|
|
{
|
|
"epoch": 0.4598825831702544,
|
|
"grad_norm": 0.49961250529766793,
|
|
"learning_rate": 2.6145251396648048e-05,
|
|
"loss": 0.1287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1248483955860138,
|
|
"step": 235,
|
|
"valid_targets_mean": 2743.2,
|
|
"valid_targets_min": 459
|
|
},
|
|
{
|
|
"epoch": 0.46966731898238745,
|
|
"grad_norm": 0.5630898453724997,
|
|
"learning_rate": 2.6703910614525145e-05,
|
|
"loss": 0.1547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13761404156684875,
|
|
"step": 240,
|
|
"valid_targets_mean": 2726.6,
|
|
"valid_targets_min": 703
|
|
},
|
|
{
|
|
"epoch": 0.4794520547945205,
|
|
"grad_norm": 0.4731743705256023,
|
|
"learning_rate": 2.7262569832402236e-05,
|
|
"loss": 0.1741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13565003871917725,
|
|
"step": 245,
|
|
"valid_targets_mean": 3192.2,
|
|
"valid_targets_min": 704
|
|
},
|
|
{
|
|
"epoch": 0.4892367906066536,
|
|
"grad_norm": 0.6416047138090571,
|
|
"learning_rate": 2.782122905027933e-05,
|
|
"loss": 0.1481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1502951830625534,
|
|
"step": 250,
|
|
"valid_targets_mean": 2625.6,
|
|
"valid_targets_min": 684
|
|
},
|
|
{
|
|
"epoch": 0.49902152641878667,
|
|
"grad_norm": 0.5557971187389114,
|
|
"learning_rate": 2.8379888268156424e-05,
|
|
"loss": 0.1565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15362128615379333,
|
|
"step": 255,
|
|
"valid_targets_mean": 2840.2,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 0.5088062622309197,
|
|
"grad_norm": 0.4690894971847644,
|
|
"learning_rate": 2.8938547486033522e-05,
|
|
"loss": 0.1614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1330149620771408,
|
|
"step": 260,
|
|
"valid_targets_mean": 3050.8,
|
|
"valid_targets_min": 673
|
|
},
|
|
{
|
|
"epoch": 0.5185909980430529,
|
|
"grad_norm": 0.6025829145137604,
|
|
"learning_rate": 2.9497206703910616e-05,
|
|
"loss": 0.14,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13668930530548096,
|
|
"step": 265,
|
|
"valid_targets_mean": 2562.8,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 0.5283757338551859,
|
|
"grad_norm": 0.4779769696150171,
|
|
"learning_rate": 3.0055865921787714e-05,
|
|
"loss": 0.154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1378680169582367,
|
|
"step": 270,
|
|
"valid_targets_mean": 3117.8,
|
|
"valid_targets_min": 645
|
|
},
|
|
{
|
|
"epoch": 0.538160469667319,
|
|
"grad_norm": 0.5087206893144688,
|
|
"learning_rate": 3.061452513966481e-05,
|
|
"loss": 0.1431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15285688638687134,
|
|
"step": 275,
|
|
"valid_targets_mean": 3502.2,
|
|
"valid_targets_min": 773
|
|
},
|
|
{
|
|
"epoch": 0.547945205479452,
|
|
"grad_norm": 0.46853867062532467,
|
|
"learning_rate": 3.11731843575419e-05,
|
|
"loss": 0.1481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13073411583900452,
|
|
"step": 280,
|
|
"valid_targets_mean": 3103.8,
|
|
"valid_targets_min": 851
|
|
},
|
|
{
|
|
"epoch": 0.5577299412915852,
|
|
"grad_norm": 0.5244592262420299,
|
|
"learning_rate": 3.1731843575418996e-05,
|
|
"loss": 0.1604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16360273957252502,
|
|
"step": 285,
|
|
"valid_targets_mean": 2776.2,
|
|
"valid_targets_min": 563
|
|
},
|
|
{
|
|
"epoch": 0.5675146771037182,
|
|
"grad_norm": 0.6304714586468612,
|
|
"learning_rate": 3.229050279329609e-05,
|
|
"loss": 0.1718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2574073374271393,
|
|
"step": 290,
|
|
"valid_targets_mean": 2842.6,
|
|
"valid_targets_min": 531
|
|
},
|
|
{
|
|
"epoch": 0.5772994129158513,
|
|
"grad_norm": 0.5060780163327306,
|
|
"learning_rate": 3.2849162011173184e-05,
|
|
"loss": 0.1398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12446580827236176,
|
|
"step": 295,
|
|
"valid_targets_mean": 3434.2,
|
|
"valid_targets_min": 925
|
|
},
|
|
{
|
|
"epoch": 0.5870841487279843,
|
|
"grad_norm": 0.5645619555774959,
|
|
"learning_rate": 3.340782122905028e-05,
|
|
"loss": 0.1502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14075535535812378,
|
|
"step": 300,
|
|
"valid_targets_mean": 2749.1,
|
|
"valid_targets_min": 765
|
|
},
|
|
{
|
|
"epoch": 0.5968688845401174,
|
|
"grad_norm": 0.5019563126770876,
|
|
"learning_rate": 3.396648044692738e-05,
|
|
"loss": 0.1499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1525002121925354,
|
|
"step": 305,
|
|
"valid_targets_mean": 2889.3,
|
|
"valid_targets_min": 342
|
|
},
|
|
{
|
|
"epoch": 0.6066536203522505,
|
|
"grad_norm": 0.8633134325378282,
|
|
"learning_rate": 3.4525139664804474e-05,
|
|
"loss": 0.1455,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15216107666492462,
|
|
"step": 310,
|
|
"valid_targets_mean": 2542.5,
|
|
"valid_targets_min": 742
|
|
},
|
|
{
|
|
"epoch": 0.6164383561643836,
|
|
"grad_norm": 0.518661419343394,
|
|
"learning_rate": 3.508379888268157e-05,
|
|
"loss": 0.1358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14660073816776276,
|
|
"step": 315,
|
|
"valid_targets_mean": 3098.8,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 0.6262230919765166,
|
|
"grad_norm": 0.4288729432621478,
|
|
"learning_rate": 3.564245810055866e-05,
|
|
"loss": 0.1473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1281621754169464,
|
|
"step": 320,
|
|
"valid_targets_mean": 3366.9,
|
|
"valid_targets_min": 492
|
|
},
|
|
{
|
|
"epoch": 0.6360078277886497,
|
|
"grad_norm": 0.5245999073963563,
|
|
"learning_rate": 3.6201117318435756e-05,
|
|
"loss": 0.1587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14279915392398834,
|
|
"step": 325,
|
|
"valid_targets_mean": 2867.5,
|
|
"valid_targets_min": 658
|
|
},
|
|
{
|
|
"epoch": 0.6457925636007827,
|
|
"grad_norm": 0.5409938601242019,
|
|
"learning_rate": 3.675977653631285e-05,
|
|
"loss": 0.1354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1241241842508316,
|
|
"step": 330,
|
|
"valid_targets_mean": 2814.8,
|
|
"valid_targets_min": 650
|
|
},
|
|
{
|
|
"epoch": 0.6555772994129159,
|
|
"grad_norm": 0.5367639238304938,
|
|
"learning_rate": 3.7318435754189944e-05,
|
|
"loss": 0.1754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13935647904872894,
|
|
"step": 335,
|
|
"valid_targets_mean": 2107.4,
|
|
"valid_targets_min": 598
|
|
},
|
|
{
|
|
"epoch": 0.6653620352250489,
|
|
"grad_norm": 0.5190264673793914,
|
|
"learning_rate": 3.787709497206704e-05,
|
|
"loss": 0.1459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13536672294139862,
|
|
"step": 340,
|
|
"valid_targets_mean": 3303.1,
|
|
"valid_targets_min": 805
|
|
},
|
|
{
|
|
"epoch": 0.675146771037182,
|
|
"grad_norm": 0.4810572486523759,
|
|
"learning_rate": 3.843575418994414e-05,
|
|
"loss": 0.1498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15841111540794373,
|
|
"step": 345,
|
|
"valid_targets_mean": 3270.0,
|
|
"valid_targets_min": 762
|
|
},
|
|
{
|
|
"epoch": 0.684931506849315,
|
|
"grad_norm": 0.45491328608286835,
|
|
"learning_rate": 3.8994413407821234e-05,
|
|
"loss": 0.1367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14639562368392944,
|
|
"step": 350,
|
|
"valid_targets_mean": 3250.1,
|
|
"valid_targets_min": 807
|
|
},
|
|
{
|
|
"epoch": 0.6947162426614482,
|
|
"grad_norm": 0.4672379304521524,
|
|
"learning_rate": 3.955307262569833e-05,
|
|
"loss": 0.1415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13699516654014587,
|
|
"step": 355,
|
|
"valid_targets_mean": 3220.0,
|
|
"valid_targets_min": 808
|
|
},
|
|
{
|
|
"epoch": 0.7045009784735812,
|
|
"grad_norm": 0.587847822651443,
|
|
"learning_rate": 3.99999904751585e-05,
|
|
"loss": 0.1592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21118012070655823,
|
|
"step": 360,
|
|
"valid_targets_mean": 2303.2,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 0.7142857142857143,
|
|
"grad_norm": 0.5160918575652919,
|
|
"learning_rate": 3.999965710665851e-05,
|
|
"loss": 0.151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14607413113117218,
|
|
"step": 365,
|
|
"valid_targets_mean": 2839.4,
|
|
"valid_targets_min": 494
|
|
},
|
|
{
|
|
"epoch": 0.7240704500978473,
|
|
"grad_norm": 0.4341683819577898,
|
|
"learning_rate": 3.999884750515563e-05,
|
|
"loss": 0.1314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13683652877807617,
|
|
"step": 370,
|
|
"valid_targets_mean": 3752.6,
|
|
"valid_targets_min": 550
|
|
},
|
|
{
|
|
"epoch": 0.7338551859099804,
|
|
"grad_norm": 0.5603714103623396,
|
|
"learning_rate": 3.999756168992814e-05,
|
|
"loss": 0.1446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17999452352523804,
|
|
"step": 375,
|
|
"valid_targets_mean": 2864.7,
|
|
"valid_targets_min": 482
|
|
},
|
|
{
|
|
"epoch": 0.7436399217221135,
|
|
"grad_norm": 0.4578337150635251,
|
|
"learning_rate": 3.999579969159395e-05,
|
|
"loss": 0.1469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1431577503681183,
|
|
"step": 380,
|
|
"valid_targets_mean": 3439.2,
|
|
"valid_targets_min": 852
|
|
},
|
|
{
|
|
"epoch": 0.7534246575342466,
|
|
"grad_norm": 0.48938549638567597,
|
|
"learning_rate": 3.999356155210986e-05,
|
|
"loss": 0.1527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19597536325454712,
|
|
"step": 385,
|
|
"valid_targets_mean": 3274.6,
|
|
"valid_targets_min": 553
|
|
},
|
|
{
|
|
"epoch": 0.7632093933463796,
|
|
"grad_norm": 0.5734981062865676,
|
|
"learning_rate": 3.9990847324770584e-05,
|
|
"loss": 0.1548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13992062211036682,
|
|
"step": 390,
|
|
"valid_targets_mean": 2851.9,
|
|
"valid_targets_min": 685
|
|
},
|
|
{
|
|
"epoch": 0.7729941291585127,
|
|
"grad_norm": 0.5219177121218723,
|
|
"learning_rate": 3.998765707420746e-05,
|
|
"loss": 0.1412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16220493614673615,
|
|
"step": 395,
|
|
"valid_targets_mean": 2681.6,
|
|
"valid_targets_min": 912
|
|
},
|
|
{
|
|
"epoch": 0.7827788649706457,
|
|
"grad_norm": 0.5117389003410739,
|
|
"learning_rate": 3.998399087638692e-05,
|
|
"loss": 0.1515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13427501916885376,
|
|
"step": 400,
|
|
"valid_targets_mean": 2833.2,
|
|
"valid_targets_min": 357
|
|
},
|
|
{
|
|
"epoch": 0.7925636007827789,
|
|
"grad_norm": 0.45630749060617215,
|
|
"learning_rate": 3.997984881860869e-05,
|
|
"loss": 0.1491,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13904061913490295,
|
|
"step": 405,
|
|
"valid_targets_mean": 2951.7,
|
|
"valid_targets_min": 664
|
|
},
|
|
{
|
|
"epoch": 0.8023483365949119,
|
|
"grad_norm": 0.532441802548241,
|
|
"learning_rate": 3.9975230999503674e-05,
|
|
"loss": 0.1424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18587175011634827,
|
|
"step": 410,
|
|
"valid_targets_mean": 2718.6,
|
|
"valid_targets_min": 731
|
|
},
|
|
{
|
|
"epoch": 0.812133072407045,
|
|
"grad_norm": 0.5201313018088487,
|
|
"learning_rate": 3.997013752903166e-05,
|
|
"loss": 0.1526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17946481704711914,
|
|
"step": 415,
|
|
"valid_targets_mean": 2429.7,
|
|
"valid_targets_min": 602
|
|
},
|
|
{
|
|
"epoch": 0.821917808219178,
|
|
"grad_norm": 0.4733086498760068,
|
|
"learning_rate": 3.996456852847867e-05,
|
|
"loss": 0.1361,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15358425676822662,
|
|
"step": 420,
|
|
"valid_targets_mean": 3413.8,
|
|
"valid_targets_min": 1009
|
|
},
|
|
{
|
|
"epoch": 0.8317025440313112,
|
|
"grad_norm": 0.43812773082093437,
|
|
"learning_rate": 3.995852413045406e-05,
|
|
"loss": 0.145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13964691758155823,
|
|
"step": 425,
|
|
"valid_targets_mean": 3069.9,
|
|
"valid_targets_min": 1049
|
|
},
|
|
{
|
|
"epoch": 0.8414872798434442,
|
|
"grad_norm": 0.41481899566210445,
|
|
"learning_rate": 3.995200447888739e-05,
|
|
"loss": 0.1284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11863437294960022,
|
|
"step": 430,
|
|
"valid_targets_mean": 3568.8,
|
|
"valid_targets_min": 821
|
|
},
|
|
{
|
|
"epoch": 0.8512720156555773,
|
|
"grad_norm": 0.4498805859648879,
|
|
"learning_rate": 3.9945009729024984e-05,
|
|
"loss": 0.1586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16341759264469147,
|
|
"step": 435,
|
|
"valid_targets_mean": 2945.4,
|
|
"valid_targets_min": 731
|
|
},
|
|
{
|
|
"epoch": 0.8610567514677103,
|
|
"grad_norm": 0.49895994291075085,
|
|
"learning_rate": 3.993754004742625e-05,
|
|
"loss": 0.137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13261038064956665,
|
|
"step": 440,
|
|
"valid_targets_mean": 2722.4,
|
|
"valid_targets_min": 766
|
|
},
|
|
{
|
|
"epoch": 0.8708414872798435,
|
|
"grad_norm": 0.47491888903670937,
|
|
"learning_rate": 3.992959561195965e-05,
|
|
"loss": 0.1501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20256978273391724,
|
|
"step": 445,
|
|
"valid_targets_mean": 3347.3,
|
|
"valid_targets_min": 837
|
|
},
|
|
{
|
|
"epoch": 0.8806262230919765,
|
|
"grad_norm": 0.4455913646457985,
|
|
"learning_rate": 3.9921176611798577e-05,
|
|
"loss": 0.133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11824070662260056,
|
|
"step": 450,
|
|
"valid_targets_mean": 3272.7,
|
|
"valid_targets_min": 1058
|
|
},
|
|
{
|
|
"epoch": 0.8904109589041096,
|
|
"grad_norm": 0.5687543644243216,
|
|
"learning_rate": 3.9912283247416746e-05,
|
|
"loss": 0.147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13478372991085052,
|
|
"step": 455,
|
|
"valid_targets_mean": 3132.4,
|
|
"valid_targets_min": 742
|
|
},
|
|
{
|
|
"epoch": 0.9001956947162426,
|
|
"grad_norm": 0.37827545084266456,
|
|
"learning_rate": 3.990291573058345e-05,
|
|
"loss": 0.1307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10885334014892578,
|
|
"step": 460,
|
|
"valid_targets_mean": 3407.7,
|
|
"valid_targets_min": 943
|
|
},
|
|
{
|
|
"epoch": 0.9099804305283757,
|
|
"grad_norm": 0.4175485573365561,
|
|
"learning_rate": 3.989307428435858e-05,
|
|
"loss": 0.1368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1276848316192627,
|
|
"step": 465,
|
|
"valid_targets_mean": 3251.0,
|
|
"valid_targets_min": 867
|
|
},
|
|
{
|
|
"epoch": 0.9197651663405088,
|
|
"grad_norm": 0.49808980526853586,
|
|
"learning_rate": 3.9882759143087194e-05,
|
|
"loss": 0.1332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14108321070671082,
|
|
"step": 470,
|
|
"valid_targets_mean": 2904.2,
|
|
"valid_targets_min": 503
|
|
},
|
|
{
|
|
"epoch": 0.9295499021526419,
|
|
"grad_norm": 0.40177200330690865,
|
|
"learning_rate": 3.9871970552394066e-05,
|
|
"loss": 0.1255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13899895548820496,
|
|
"step": 475,
|
|
"valid_targets_mean": 3147.8,
|
|
"valid_targets_min": 795
|
|
},
|
|
{
|
|
"epoch": 0.9393346379647749,
|
|
"grad_norm": 0.42481204661487465,
|
|
"learning_rate": 3.986070876917773e-05,
|
|
"loss": 0.1415,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16770583391189575,
|
|
"step": 480,
|
|
"valid_targets_mean": 3443.7,
|
|
"valid_targets_min": 691
|
|
},
|
|
{
|
|
"epoch": 0.949119373776908,
|
|
"grad_norm": 0.5444357960985896,
|
|
"learning_rate": 3.984897406160443e-05,
|
|
"loss": 0.1336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16325880587100983,
|
|
"step": 485,
|
|
"valid_targets_mean": 2540.9,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 0.958904109589041,
|
|
"grad_norm": 0.4632384623061562,
|
|
"learning_rate": 3.9836766709101714e-05,
|
|
"loss": 0.1496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17608124017715454,
|
|
"step": 490,
|
|
"valid_targets_mean": 3103.3,
|
|
"valid_targets_min": 613
|
|
},
|
|
{
|
|
"epoch": 0.9686888454011742,
|
|
"grad_norm": 0.4187576842004876,
|
|
"learning_rate": 3.9824087002351765e-05,
|
|
"loss": 0.1363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1268356442451477,
|
|
"step": 495,
|
|
"valid_targets_mean": 3034.8,
|
|
"valid_targets_min": 873
|
|
},
|
|
{
|
|
"epoch": 0.9784735812133072,
|
|
"grad_norm": 0.48280261413787057,
|
|
"learning_rate": 3.9810935243284496e-05,
|
|
"loss": 0.1292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12477391213178635,
|
|
"step": 500,
|
|
"valid_targets_mean": 3063.3,
|
|
"valid_targets_min": 738
|
|
},
|
|
{
|
|
"epoch": 0.9882583170254403,
|
|
"grad_norm": 0.39168001410008785,
|
|
"learning_rate": 3.979731174507038e-05,
|
|
"loss": 0.1275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12986153364181519,
|
|
"step": 505,
|
|
"valid_targets_mean": 3226.4,
|
|
"valid_targets_min": 559
|
|
},
|
|
{
|
|
"epoch": 0.9980430528375733,
|
|
"grad_norm": 0.46896658336753394,
|
|
"learning_rate": 3.978321683211294e-05,
|
|
"loss": 0.1441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15444520115852356,
|
|
"step": 510,
|
|
"valid_targets_mean": 2962.0,
|
|
"valid_targets_min": 669
|
|
},
|
|
{
|
|
"epoch": 1.0078277886497065,
|
|
"grad_norm": 0.4298181968101464,
|
|
"learning_rate": 3.976865084004107e-05,
|
|
"loss": 0.1363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1293889433145523,
|
|
"step": 515,
|
|
"valid_targets_mean": 2834.4,
|
|
"valid_targets_min": 516
|
|
},
|
|
{
|
|
"epoch": 1.0176125244618395,
|
|
"grad_norm": 0.4403428932485871,
|
|
"learning_rate": 3.975361411570101e-05,
|
|
"loss": 0.1335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1426129937171936,
|
|
"step": 520,
|
|
"valid_targets_mean": 3185.1,
|
|
"valid_targets_min": 559
|
|
},
|
|
{
|
|
"epoch": 1.0273972602739727,
|
|
"grad_norm": 0.43223418895898846,
|
|
"learning_rate": 3.9738107017148145e-05,
|
|
"loss": 0.1305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11700769513845444,
|
|
"step": 525,
|
|
"valid_targets_mean": 2929.1,
|
|
"valid_targets_min": 524
|
|
},
|
|
{
|
|
"epoch": 1.0371819960861057,
|
|
"grad_norm": 0.383816735814963,
|
|
"learning_rate": 3.972212991363839e-05,
|
|
"loss": 0.1397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11716213077306747,
|
|
"step": 530,
|
|
"valid_targets_mean": 3100.0,
|
|
"valid_targets_min": 195
|
|
},
|
|
{
|
|
"epoch": 1.0469667318982387,
|
|
"grad_norm": 0.47087470364735173,
|
|
"learning_rate": 3.970568318561947e-05,
|
|
"loss": 0.1447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14262157678604126,
|
|
"step": 535,
|
|
"valid_targets_mean": 3104.8,
|
|
"valid_targets_min": 559
|
|
},
|
|
{
|
|
"epoch": 1.0567514677103718,
|
|
"grad_norm": 0.4263249245829921,
|
|
"learning_rate": 3.9688767224721834e-05,
|
|
"loss": 0.127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11953842639923096,
|
|
"step": 540,
|
|
"valid_targets_mean": 2913.2,
|
|
"valid_targets_min": 629
|
|
},
|
|
{
|
|
"epoch": 1.0665362035225048,
|
|
"grad_norm": 0.4098653850081825,
|
|
"learning_rate": 3.9671382433749335e-05,
|
|
"loss": 0.1199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12505799531936646,
|
|
"step": 545,
|
|
"valid_targets_mean": 3437.8,
|
|
"valid_targets_min": 789
|
|
},
|
|
{
|
|
"epoch": 1.076320939334638,
|
|
"grad_norm": 0.4263174933585311,
|
|
"learning_rate": 3.965352922666963e-05,
|
|
"loss": 0.1324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1353525072336197,
|
|
"step": 550,
|
|
"valid_targets_mean": 3178.6,
|
|
"valid_targets_min": 916
|
|
},
|
|
{
|
|
"epoch": 1.086105675146771,
|
|
"grad_norm": 0.34866454920181195,
|
|
"learning_rate": 3.963520802860433e-05,
|
|
"loss": 0.1216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11077715456485748,
|
|
"step": 555,
|
|
"valid_targets_mean": 3791.6,
|
|
"valid_targets_min": 1366
|
|
},
|
|
{
|
|
"epoch": 1.095890410958904,
|
|
"grad_norm": 0.4611779341003833,
|
|
"learning_rate": 3.961641927581886e-05,
|
|
"loss": 0.1206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1390659213066101,
|
|
"step": 560,
|
|
"valid_targets_mean": 3071.4,
|
|
"valid_targets_min": 805
|
|
},
|
|
{
|
|
"epoch": 1.1056751467710373,
|
|
"grad_norm": 0.4407781207124439,
|
|
"learning_rate": 3.9597163415712115e-05,
|
|
"loss": 0.1252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11901605874300003,
|
|
"step": 565,
|
|
"valid_targets_mean": 2988.6,
|
|
"valid_targets_min": 531
|
|
},
|
|
{
|
|
"epoch": 1.1154598825831703,
|
|
"grad_norm": 0.4893014553120929,
|
|
"learning_rate": 3.957744090680575e-05,
|
|
"loss": 0.1323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14788511395454407,
|
|
"step": 570,
|
|
"valid_targets_mean": 2563.8,
|
|
"valid_targets_min": 801
|
|
},
|
|
{
|
|
"epoch": 1.1252446183953033,
|
|
"grad_norm": 0.4451966676623463,
|
|
"learning_rate": 3.9557252218733306e-05,
|
|
"loss": 0.1434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13166627287864685,
|
|
"step": 575,
|
|
"valid_targets_mean": 3022.2,
|
|
"valid_targets_min": 531
|
|
},
|
|
{
|
|
"epoch": 1.1350293542074363,
|
|
"grad_norm": 0.4128155142273592,
|
|
"learning_rate": 3.9536597832228995e-05,
|
|
"loss": 0.1213,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13153548538684845,
|
|
"step": 580,
|
|
"valid_targets_mean": 3557.9,
|
|
"valid_targets_min": 856
|
|
},
|
|
{
|
|
"epoch": 1.1448140900195694,
|
|
"grad_norm": 0.52173362133626,
|
|
"learning_rate": 3.951547823911628e-05,
|
|
"loss": 0.1324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14766138792037964,
|
|
"step": 585,
|
|
"valid_targets_mean": 2381.2,
|
|
"valid_targets_min": 490
|
|
},
|
|
{
|
|
"epoch": 1.1545988258317026,
|
|
"grad_norm": 0.43298593648168354,
|
|
"learning_rate": 3.9493893942296146e-05,
|
|
"loss": 0.1309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12547074258327484,
|
|
"step": 590,
|
|
"valid_targets_mean": 3065.9,
|
|
"valid_targets_min": 748
|
|
},
|
|
{
|
|
"epoch": 1.1643835616438356,
|
|
"grad_norm": 0.5197412292961334,
|
|
"learning_rate": 3.947184545573513e-05,
|
|
"loss": 0.1295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12710896134376526,
|
|
"step": 595,
|
|
"valid_targets_mean": 2627.9,
|
|
"valid_targets_min": 492
|
|
},
|
|
{
|
|
"epoch": 1.1741682974559686,
|
|
"grad_norm": 0.4474540656691777,
|
|
"learning_rate": 3.944933330445307e-05,
|
|
"loss": 0.1241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12056642025709152,
|
|
"step": 600,
|
|
"valid_targets_mean": 2639.8,
|
|
"valid_targets_min": 663
|
|
},
|
|
{
|
|
"epoch": 1.1839530332681019,
|
|
"grad_norm": 0.40153254889780027,
|
|
"learning_rate": 3.942635802451064e-05,
|
|
"loss": 0.1047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.104750856757164,
|
|
"step": 605,
|
|
"valid_targets_mean": 3113.3,
|
|
"valid_targets_min": 518
|
|
},
|
|
{
|
|
"epoch": 1.1937377690802349,
|
|
"grad_norm": 0.41024686531210736,
|
|
"learning_rate": 3.940292016299654e-05,
|
|
"loss": 0.1432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13328008353710175,
|
|
"step": 610,
|
|
"valid_targets_mean": 2628.1,
|
|
"valid_targets_min": 602
|
|
},
|
|
{
|
|
"epoch": 1.203522504892368,
|
|
"grad_norm": 0.4610646996665633,
|
|
"learning_rate": 3.93790202780145e-05,
|
|
"loss": 0.1403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13698866963386536,
|
|
"step": 615,
|
|
"valid_targets_mean": 2848.0,
|
|
"valid_targets_min": 577
|
|
},
|
|
{
|
|
"epoch": 1.213307240704501,
|
|
"grad_norm": 0.38926695397791977,
|
|
"learning_rate": 3.935465893866998e-05,
|
|
"loss": 0.1207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10700084269046783,
|
|
"step": 620,
|
|
"valid_targets_mean": 3378.0,
|
|
"valid_targets_min": 681
|
|
},
|
|
{
|
|
"epoch": 1.223091976516634,
|
|
"grad_norm": 0.45388226915219076,
|
|
"learning_rate": 3.932983672505661e-05,
|
|
"loss": 0.1223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12499846518039703,
|
|
"step": 625,
|
|
"valid_targets_mean": 2968.1,
|
|
"valid_targets_min": 472
|
|
},
|
|
{
|
|
"epoch": 1.2328767123287672,
|
|
"grad_norm": 0.5155794540649574,
|
|
"learning_rate": 3.9304554228242396e-05,
|
|
"loss": 0.1477,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16853684186935425,
|
|
"step": 630,
|
|
"valid_targets_mean": 2686.6,
|
|
"valid_targets_min": 613
|
|
},
|
|
{
|
|
"epoch": 1.2426614481409002,
|
|
"grad_norm": 0.4305098652867405,
|
|
"learning_rate": 3.927881205025562e-05,
|
|
"loss": 0.1172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1281091570854187,
|
|
"step": 635,
|
|
"valid_targets_mean": 2797.1,
|
|
"valid_targets_min": 689
|
|
},
|
|
{
|
|
"epoch": 1.2524461839530332,
|
|
"grad_norm": 0.3947701639870653,
|
|
"learning_rate": 3.9252610804070526e-05,
|
|
"loss": 0.1124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11794069409370422,
|
|
"step": 640,
|
|
"valid_targets_mean": 3482.6,
|
|
"valid_targets_min": 554
|
|
},
|
|
{
|
|
"epoch": 1.2622309197651664,
|
|
"grad_norm": 0.5334016538211457,
|
|
"learning_rate": 3.9225951113592735e-05,
|
|
"loss": 0.1316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.147422656416893,
|
|
"step": 645,
|
|
"valid_targets_mean": 2707.8,
|
|
"valid_targets_min": 493
|
|
},
|
|
{
|
|
"epoch": 1.2720156555772995,
|
|
"grad_norm": 0.5165896986431493,
|
|
"learning_rate": 3.9198833613644333e-05,
|
|
"loss": 0.1227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12175339460372925,
|
|
"step": 650,
|
|
"valid_targets_mean": 2479.2,
|
|
"valid_targets_min": 583
|
|
},
|
|
{
|
|
"epoch": 1.2818003913894325,
|
|
"grad_norm": 0.40502772636845047,
|
|
"learning_rate": 3.9171258949948827e-05,
|
|
"loss": 0.1334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11957898736000061,
|
|
"step": 655,
|
|
"valid_targets_mean": 3226.8,
|
|
"valid_targets_min": 891
|
|
},
|
|
{
|
|
"epoch": 1.2915851272015655,
|
|
"grad_norm": 0.48197554939891096,
|
|
"learning_rate": 3.914322777911571e-05,
|
|
"loss": 0.1144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.112558513879776,
|
|
"step": 660,
|
|
"valid_targets_mean": 2785.1,
|
|
"valid_targets_min": 710
|
|
},
|
|
{
|
|
"epoch": 1.3013698630136985,
|
|
"grad_norm": 0.4295467516080876,
|
|
"learning_rate": 3.911474076862487e-05,
|
|
"loss": 0.1263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14773568511009216,
|
|
"step": 665,
|
|
"valid_targets_mean": 2864.2,
|
|
"valid_targets_min": 795
|
|
},
|
|
{
|
|
"epoch": 1.3111545988258317,
|
|
"grad_norm": 0.4615813117659187,
|
|
"learning_rate": 3.908579859681065e-05,
|
|
"loss": 0.1185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12034747749567032,
|
|
"step": 670,
|
|
"valid_targets_mean": 2775.5,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 1.3209393346379648,
|
|
"grad_norm": 0.5034226535478493,
|
|
"learning_rate": 3.905640195284574e-05,
|
|
"loss": 0.1457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16286320984363556,
|
|
"step": 675,
|
|
"valid_targets_mean": 2521.2,
|
|
"valid_targets_min": 766
|
|
},
|
|
{
|
|
"epoch": 1.3307240704500978,
|
|
"grad_norm": 0.42231990338171094,
|
|
"learning_rate": 3.9026551536724754e-05,
|
|
"loss": 0.1251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13433174788951874,
|
|
"step": 680,
|
|
"valid_targets_mean": 3121.4,
|
|
"valid_targets_min": 721
|
|
},
|
|
{
|
|
"epoch": 1.340508806262231,
|
|
"grad_norm": 0.4038708539406575,
|
|
"learning_rate": 3.899624805924753e-05,
|
|
"loss": 0.1289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1403772532939911,
|
|
"step": 685,
|
|
"valid_targets_mean": 3254.3,
|
|
"valid_targets_min": 647
|
|
},
|
|
{
|
|
"epoch": 1.350293542074364,
|
|
"grad_norm": 0.4299434106098187,
|
|
"learning_rate": 3.896549224200225e-05,
|
|
"loss": 0.1255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12135376781225204,
|
|
"step": 690,
|
|
"valid_targets_mean": 2571.9,
|
|
"valid_targets_min": 618
|
|
},
|
|
{
|
|
"epoch": 1.360078277886497,
|
|
"grad_norm": 0.4306682083734945,
|
|
"learning_rate": 3.8934284817348224e-05,
|
|
"loss": 0.1364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10059612989425659,
|
|
"step": 695,
|
|
"valid_targets_mean": 3323.1,
|
|
"valid_targets_min": 811
|
|
},
|
|
{
|
|
"epoch": 1.36986301369863,
|
|
"grad_norm": 0.5108627017777738,
|
|
"learning_rate": 3.890262652839847e-05,
|
|
"loss": 0.1308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14139822125434875,
|
|
"step": 700,
|
|
"valid_targets_mean": 3045.6,
|
|
"valid_targets_min": 633
|
|
},
|
|
{
|
|
"epoch": 1.379647749510763,
|
|
"grad_norm": 0.43419713202620003,
|
|
"learning_rate": 3.887051812900203e-05,
|
|
"loss": 0.129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15847548842430115,
|
|
"step": 705,
|
|
"valid_targets_mean": 2795.1,
|
|
"valid_targets_min": 803
|
|
},
|
|
{
|
|
"epoch": 1.3894324853228963,
|
|
"grad_norm": 0.40330836063363107,
|
|
"learning_rate": 3.883796038372596e-05,
|
|
"loss": 0.122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1492297649383545,
|
|
"step": 710,
|
|
"valid_targets_mean": 3402.1,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 1.3992172211350293,
|
|
"grad_norm": 0.4967106304379057,
|
|
"learning_rate": 3.8804954067837215e-05,
|
|
"loss": 0.1297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14624662697315216,
|
|
"step": 715,
|
|
"valid_targets_mean": 3127.6,
|
|
"valid_targets_min": 1054
|
|
},
|
|
{
|
|
"epoch": 1.4090019569471623,
|
|
"grad_norm": 0.4217697643614769,
|
|
"learning_rate": 3.87714999672841e-05,
|
|
"loss": 0.118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13098879158496857,
|
|
"step": 720,
|
|
"valid_targets_mean": 2735.1,
|
|
"valid_targets_min": 678
|
|
},
|
|
{
|
|
"epoch": 1.4187866927592956,
|
|
"grad_norm": 0.3863038902644223,
|
|
"learning_rate": 3.873759887867762e-05,
|
|
"loss": 0.1335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1081153154373169,
|
|
"step": 725,
|
|
"valid_targets_mean": 2962.9,
|
|
"valid_targets_min": 333
|
|
},
|
|
{
|
|
"epoch": 1.4285714285714286,
|
|
"grad_norm": 0.4366298043459826,
|
|
"learning_rate": 3.8703251609272464e-05,
|
|
"loss": 0.122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12468526512384415,
|
|
"step": 730,
|
|
"valid_targets_mean": 2868.8,
|
|
"valid_targets_min": 623
|
|
},
|
|
{
|
|
"epoch": 1.4383561643835616,
|
|
"grad_norm": 0.5389918919734983,
|
|
"learning_rate": 3.8668458976947834e-05,
|
|
"loss": 0.1295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12536370754241943,
|
|
"step": 735,
|
|
"valid_targets_mean": 2728.8,
|
|
"valid_targets_min": 730
|
|
},
|
|
{
|
|
"epoch": 1.4481409001956946,
|
|
"grad_norm": 0.4209283462484907,
|
|
"learning_rate": 3.8633221810187914e-05,
|
|
"loss": 0.1116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14346784353256226,
|
|
"step": 740,
|
|
"valid_targets_mean": 2871.4,
|
|
"valid_targets_min": 523
|
|
},
|
|
{
|
|
"epoch": 1.4579256360078277,
|
|
"grad_norm": 0.3944143889984141,
|
|
"learning_rate": 3.859754094806217e-05,
|
|
"loss": 0.1232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11453887820243835,
|
|
"step": 745,
|
|
"valid_targets_mean": 3111.7,
|
|
"valid_targets_min": 779
|
|
},
|
|
{
|
|
"epoch": 1.467710371819961,
|
|
"grad_norm": 0.4509652697403836,
|
|
"learning_rate": 3.856141724020539e-05,
|
|
"loss": 0.1282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11700256168842316,
|
|
"step": 750,
|
|
"valid_targets_mean": 2824.9,
|
|
"valid_targets_min": 535
|
|
},
|
|
{
|
|
"epoch": 1.477495107632094,
|
|
"grad_norm": 0.4838886367870153,
|
|
"learning_rate": 3.85248515467974e-05,
|
|
"loss": 0.1381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13873013854026794,
|
|
"step": 755,
|
|
"valid_targets_mean": 2238.0,
|
|
"valid_targets_min": 526
|
|
},
|
|
{
|
|
"epoch": 1.487279843444227,
|
|
"grad_norm": 0.5509873326146855,
|
|
"learning_rate": 3.848784473854264e-05,
|
|
"loss": 0.1238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14058071374893188,
|
|
"step": 760,
|
|
"valid_targets_mean": 2273.0,
|
|
"valid_targets_min": 634
|
|
},
|
|
{
|
|
"epoch": 1.4970645792563602,
|
|
"grad_norm": 0.39915637445399327,
|
|
"learning_rate": 3.8450397696649375e-05,
|
|
"loss": 0.118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0962296724319458,
|
|
"step": 765,
|
|
"valid_targets_mean": 2820.8,
|
|
"valid_targets_min": 799
|
|
},
|
|
{
|
|
"epoch": 1.5068493150684932,
|
|
"grad_norm": 0.4256620360833589,
|
|
"learning_rate": 3.841251131280877e-05,
|
|
"loss": 0.1366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13869571685791016,
|
|
"step": 770,
|
|
"valid_targets_mean": 2889.6,
|
|
"valid_targets_min": 572
|
|
},
|
|
{
|
|
"epoch": 1.5166340508806262,
|
|
"grad_norm": 0.4738644474376625,
|
|
"learning_rate": 3.8374186489173586e-05,
|
|
"loss": 0.1314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15252867341041565,
|
|
"step": 775,
|
|
"valid_targets_mean": 2849.9,
|
|
"valid_targets_min": 568
|
|
},
|
|
{
|
|
"epoch": 1.5264187866927594,
|
|
"grad_norm": 0.4586682660406289,
|
|
"learning_rate": 3.833542413833678e-05,
|
|
"loss": 0.1143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11463161557912827,
|
|
"step": 780,
|
|
"valid_targets_mean": 2530.9,
|
|
"valid_targets_min": 578
|
|
},
|
|
{
|
|
"epoch": 1.5362035225048922,
|
|
"grad_norm": 0.5624104088370981,
|
|
"learning_rate": 3.8296225183309717e-05,
|
|
"loss": 0.1188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13047656416893005,
|
|
"step": 785,
|
|
"valid_targets_mean": 2341.8,
|
|
"valid_targets_min": 676
|
|
},
|
|
{
|
|
"epoch": 1.5459882583170255,
|
|
"grad_norm": 0.4198461156087575,
|
|
"learning_rate": 3.825659055750019e-05,
|
|
"loss": 0.1191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13537245988845825,
|
|
"step": 790,
|
|
"valid_targets_mean": 3042.0,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 1.5557729941291585,
|
|
"grad_norm": 0.36831522550408496,
|
|
"learning_rate": 3.821652120469023e-05,
|
|
"loss": 0.1264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12275904417037964,
|
|
"step": 795,
|
|
"valid_targets_mean": 3540.9,
|
|
"valid_targets_min": 1425
|
|
},
|
|
{
|
|
"epoch": 1.5655577299412915,
|
|
"grad_norm": 0.5195803523592514,
|
|
"learning_rate": 3.81760180790136e-05,
|
|
"loss": 0.1237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17436471581459045,
|
|
"step": 800,
|
|
"valid_targets_mean": 2270.1,
|
|
"valid_targets_min": 497
|
|
},
|
|
{
|
|
"epoch": 1.5753424657534247,
|
|
"grad_norm": 0.3900604756464933,
|
|
"learning_rate": 3.81350821449331e-05,
|
|
"loss": 0.1238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12121501564979553,
|
|
"step": 805,
|
|
"valid_targets_mean": 3190.0,
|
|
"valid_targets_min": 634
|
|
},
|
|
{
|
|
"epoch": 1.5851272015655578,
|
|
"grad_norm": 0.40674054952886596,
|
|
"learning_rate": 3.809371437721758e-05,
|
|
"loss": 0.117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.129991352558136,
|
|
"step": 810,
|
|
"valid_targets_mean": 3047.8,
|
|
"valid_targets_min": 882
|
|
},
|
|
{
|
|
"epoch": 1.5949119373776908,
|
|
"grad_norm": 0.46210844804033524,
|
|
"learning_rate": 3.805191576091874e-05,
|
|
"loss": 0.1273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1609773337841034,
|
|
"step": 815,
|
|
"valid_targets_mean": 2791.4,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 1.604696673189824,
|
|
"grad_norm": 0.44015961841612017,
|
|
"learning_rate": 3.800968729134767e-05,
|
|
"loss": 0.1228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11716459691524506,
|
|
"step": 820,
|
|
"valid_targets_mean": 2432.6,
|
|
"valid_targets_min": 503
|
|
},
|
|
{
|
|
"epoch": 1.6144814090019568,
|
|
"grad_norm": 0.3674024934355759,
|
|
"learning_rate": 3.7967029974051166e-05,
|
|
"loss": 0.1266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12538853287696838,
|
|
"step": 825,
|
|
"valid_targets_mean": 3603.0,
|
|
"valid_targets_min": 2751
|
|
},
|
|
{
|
|
"epoch": 1.62426614481409,
|
|
"grad_norm": 0.445318152082194,
|
|
"learning_rate": 3.7923944824787746e-05,
|
|
"loss": 0.1335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12994927167892456,
|
|
"step": 830,
|
|
"valid_targets_mean": 2598.8,
|
|
"valid_targets_min": 623
|
|
},
|
|
{
|
|
"epoch": 1.634050880626223,
|
|
"grad_norm": 0.3890277594921652,
|
|
"learning_rate": 3.78804328695035e-05,
|
|
"loss": 0.1178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1315343677997589,
|
|
"step": 835,
|
|
"valid_targets_mean": 3161.4,
|
|
"valid_targets_min": 960
|
|
},
|
|
{
|
|
"epoch": 1.643835616438356,
|
|
"grad_norm": 0.389914488948936,
|
|
"learning_rate": 3.7836495144307644e-05,
|
|
"loss": 0.1307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12948748469352722,
|
|
"step": 840,
|
|
"valid_targets_mean": 3036.4,
|
|
"valid_targets_min": 677
|
|
},
|
|
{
|
|
"epoch": 1.6536203522504893,
|
|
"grad_norm": 0.40333039661437403,
|
|
"learning_rate": 3.779213269544788e-05,
|
|
"loss": 0.1352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1190645694732666,
|
|
"step": 845,
|
|
"valid_targets_mean": 2843.1,
|
|
"valid_targets_min": 950
|
|
},
|
|
{
|
|
"epoch": 1.6634050880626223,
|
|
"grad_norm": 0.3781685193021049,
|
|
"learning_rate": 3.77473465792854e-05,
|
|
"loss": 0.1069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10028046369552612,
|
|
"step": 850,
|
|
"valid_targets_mean": 3250.7,
|
|
"valid_targets_min": 786
|
|
},
|
|
{
|
|
"epoch": 1.6731898238747553,
|
|
"grad_norm": 0.4260461202222236,
|
|
"learning_rate": 3.770213786226984e-05,
|
|
"loss": 0.1195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11517356336116791,
|
|
"step": 855,
|
|
"valid_targets_mean": 2516.8,
|
|
"valid_targets_min": 647
|
|
},
|
|
{
|
|
"epoch": 1.6829745596868886,
|
|
"grad_norm": 0.43990271393461494,
|
|
"learning_rate": 3.7656507620913796e-05,
|
|
"loss": 0.1273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14541923999786377,
|
|
"step": 860,
|
|
"valid_targets_mean": 2481.6,
|
|
"valid_targets_min": 652
|
|
},
|
|
{
|
|
"epoch": 1.6927592954990214,
|
|
"grad_norm": 0.44787305256684223,
|
|
"learning_rate": 3.761045694176725e-05,
|
|
"loss": 0.1234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1405627429485321,
|
|
"step": 865,
|
|
"valid_targets_mean": 2722.5,
|
|
"valid_targets_min": 837
|
|
},
|
|
{
|
|
"epoch": 1.7025440313111546,
|
|
"grad_norm": 0.4117925127419459,
|
|
"learning_rate": 3.756398692139165e-05,
|
|
"loss": 0.1183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1328292191028595,
|
|
"step": 870,
|
|
"valid_targets_mean": 3010.8,
|
|
"valid_targets_min": 333
|
|
},
|
|
{
|
|
"epoch": 1.7123287671232876,
|
|
"grad_norm": 0.35516745903521674,
|
|
"learning_rate": 3.751709866633384e-05,
|
|
"loss": 0.1163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11084195226430893,
|
|
"step": 875,
|
|
"valid_targets_mean": 3389.6,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 1.7221135029354206,
|
|
"grad_norm": 0.38527383980094254,
|
|
"learning_rate": 3.74697932930997e-05,
|
|
"loss": 0.135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11033222079277039,
|
|
"step": 880,
|
|
"valid_targets_mean": 2953.3,
|
|
"valid_targets_min": 826
|
|
},
|
|
{
|
|
"epoch": 1.7318982387475539,
|
|
"grad_norm": 0.3706588060490617,
|
|
"learning_rate": 3.742207192812752e-05,
|
|
"loss": 0.1379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14947205781936646,
|
|
"step": 885,
|
|
"valid_targets_mean": 3483.3,
|
|
"valid_targets_min": 1061
|
|
},
|
|
{
|
|
"epoch": 1.741682974559687,
|
|
"grad_norm": 0.42813076779856735,
|
|
"learning_rate": 3.737393570776124e-05,
|
|
"loss": 0.1388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16240671277046204,
|
|
"step": 890,
|
|
"valid_targets_mean": 3113.2,
|
|
"valid_targets_min": 761
|
|
},
|
|
{
|
|
"epoch": 1.75146771037182,
|
|
"grad_norm": 0.4217288729608291,
|
|
"learning_rate": 3.732538577822335e-05,
|
|
"loss": 0.1189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12056794762611389,
|
|
"step": 895,
|
|
"valid_targets_mean": 2541.4,
|
|
"valid_targets_min": 575
|
|
},
|
|
{
|
|
"epoch": 1.7612524461839532,
|
|
"grad_norm": 0.4173165892277017,
|
|
"learning_rate": 3.7276423295587614e-05,
|
|
"loss": 0.1337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11108045279979706,
|
|
"step": 900,
|
|
"valid_targets_mean": 3487.0,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 1.771037181996086,
|
|
"grad_norm": 0.42994312549627534,
|
|
"learning_rate": 3.722704942575151e-05,
|
|
"loss": 0.1102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12259448319673538,
|
|
"step": 905,
|
|
"valid_targets_mean": 2648.9,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 1.7808219178082192,
|
|
"grad_norm": 0.3880527350360599,
|
|
"learning_rate": 3.717726534440853e-05,
|
|
"loss": 0.1223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13088837265968323,
|
|
"step": 910,
|
|
"valid_targets_mean": 3324.6,
|
|
"valid_targets_min": 566
|
|
},
|
|
{
|
|
"epoch": 1.7906066536203522,
|
|
"grad_norm": 0.4434184187700376,
|
|
"learning_rate": 3.71270722370201e-05,
|
|
"loss": 0.1373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1504274606704712,
|
|
"step": 915,
|
|
"valid_targets_mean": 3018.6,
|
|
"valid_targets_min": 1146
|
|
},
|
|
{
|
|
"epoch": 1.8003913894324852,
|
|
"grad_norm": 0.4292410117909898,
|
|
"learning_rate": 3.7076471298787454e-05,
|
|
"loss": 0.1182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12650364637374878,
|
|
"step": 920,
|
|
"valid_targets_mean": 2849.6,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 1.8101761252446185,
|
|
"grad_norm": 0.4620328865073727,
|
|
"learning_rate": 3.702546373462307e-05,
|
|
"loss": 0.1197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14168865978717804,
|
|
"step": 925,
|
|
"valid_targets_mean": 2532.8,
|
|
"valid_targets_min": 458
|
|
},
|
|
{
|
|
"epoch": 1.8199608610567515,
|
|
"grad_norm": 0.39121311292947436,
|
|
"learning_rate": 3.697405075912204e-05,
|
|
"loss": 0.1198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1334732323884964,
|
|
"step": 930,
|
|
"valid_targets_mean": 2946.3,
|
|
"valid_targets_min": 763
|
|
},
|
|
{
|
|
"epoch": 1.8297455968688845,
|
|
"grad_norm": 0.4231935245395829,
|
|
"learning_rate": 3.692223359653316e-05,
|
|
"loss": 0.1258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12490694969892502,
|
|
"step": 935,
|
|
"valid_targets_mean": 2961.6,
|
|
"valid_targets_min": 875
|
|
},
|
|
{
|
|
"epoch": 1.8395303326810177,
|
|
"grad_norm": 0.3836018694964112,
|
|
"learning_rate": 3.6870013480729714e-05,
|
|
"loss": 0.1224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1226375624537468,
|
|
"step": 940,
|
|
"valid_targets_mean": 3168.6,
|
|
"valid_targets_min": 818
|
|
},
|
|
{
|
|
"epoch": 1.8493150684931505,
|
|
"grad_norm": 0.3817306405122374,
|
|
"learning_rate": 3.681739165518015e-05,
|
|
"loss": 0.1263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11338752508163452,
|
|
"step": 945,
|
|
"valid_targets_mean": 3203.5,
|
|
"valid_targets_min": 829
|
|
},
|
|
{
|
|
"epoch": 1.8590998043052838,
|
|
"grad_norm": 0.36451803335347466,
|
|
"learning_rate": 3.676436937291845e-05,
|
|
"loss": 0.119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10180258750915527,
|
|
"step": 950,
|
|
"valid_targets_mean": 3313.1,
|
|
"valid_targets_min": 540
|
|
},
|
|
{
|
|
"epoch": 1.8688845401174168,
|
|
"grad_norm": 0.4176609159216137,
|
|
"learning_rate": 3.671094789651431e-05,
|
|
"loss": 0.1355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13892988860607147,
|
|
"step": 955,
|
|
"valid_targets_mean": 2564.2,
|
|
"valid_targets_min": 622
|
|
},
|
|
{
|
|
"epoch": 1.8786692759295498,
|
|
"grad_norm": 0.34566771786799344,
|
|
"learning_rate": 3.665712849804303e-05,
|
|
"loss": 0.1217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10103757679462433,
|
|
"step": 960,
|
|
"valid_targets_mean": 2706.9,
|
|
"valid_targets_min": 545
|
|
},
|
|
{
|
|
"epoch": 1.888454011741683,
|
|
"grad_norm": 0.3774548068273026,
|
|
"learning_rate": 3.660291245905527e-05,
|
|
"loss": 0.1148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11747957766056061,
|
|
"step": 965,
|
|
"valid_targets_mean": 3305.8,
|
|
"valid_targets_min": 917
|
|
},
|
|
{
|
|
"epoch": 1.898238747553816,
|
|
"grad_norm": 0.3120169544499443,
|
|
"learning_rate": 3.654830107054653e-05,
|
|
"loss": 0.1177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09748544543981552,
|
|
"step": 970,
|
|
"valid_targets_mean": 3459.0,
|
|
"valid_targets_min": 673
|
|
},
|
|
{
|
|
"epoch": 1.908023483365949,
|
|
"grad_norm": 0.33933572630751907,
|
|
"learning_rate": 3.649329563292636e-05,
|
|
"loss": 0.1075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09858500957489014,
|
|
"step": 975,
|
|
"valid_targets_mean": 3246.3,
|
|
"valid_targets_min": 675
|
|
},
|
|
{
|
|
"epoch": 1.9178082191780823,
|
|
"grad_norm": 0.40603295725781186,
|
|
"learning_rate": 3.643789745598747e-05,
|
|
"loss": 0.1142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1274852752685547,
|
|
"step": 980,
|
|
"valid_targets_mean": 2795.3,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 1.927592954990215,
|
|
"grad_norm": 0.4191871755564024,
|
|
"learning_rate": 3.6382107858874486e-05,
|
|
"loss": 0.126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10585680603981018,
|
|
"step": 985,
|
|
"valid_targets_mean": 3008.0,
|
|
"valid_targets_min": 523
|
|
},
|
|
{
|
|
"epoch": 1.9373776908023483,
|
|
"grad_norm": 0.3377742800232099,
|
|
"learning_rate": 3.6325928170052544e-05,
|
|
"loss": 0.1336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12053216248750687,
|
|
"step": 990,
|
|
"valid_targets_mean": 3640.9,
|
|
"valid_targets_min": 579
|
|
},
|
|
{
|
|
"epoch": 1.9471624266144814,
|
|
"grad_norm": 0.36466290074318763,
|
|
"learning_rate": 3.626935972727568e-05,
|
|
"loss": 0.1107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1167193278670311,
|
|
"step": 995,
|
|
"valid_targets_mean": 3513.2,
|
|
"valid_targets_min": 387
|
|
},
|
|
{
|
|
"epoch": 1.9569471624266144,
|
|
"grad_norm": 0.40372660130343563,
|
|
"learning_rate": 3.621240387755494e-05,
|
|
"loss": 0.1149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11789439618587494,
|
|
"step": 1000,
|
|
"valid_targets_mean": 2972.4,
|
|
"valid_targets_min": 912
|
|
},
|
|
{
|
|
"epoch": 1.9667318982387476,
|
|
"grad_norm": 0.42797189481651093,
|
|
"learning_rate": 3.615506197712637e-05,
|
|
"loss": 0.1204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1452379822731018,
|
|
"step": 1005,
|
|
"valid_targets_mean": 2670.8,
|
|
"valid_targets_min": 536
|
|
},
|
|
{
|
|
"epoch": 1.9765166340508806,
|
|
"grad_norm": 0.35936118436803327,
|
|
"learning_rate": 3.6097335391418634e-05,
|
|
"loss": 0.1203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10821876674890518,
|
|
"step": 1010,
|
|
"valid_targets_mean": 3538.1,
|
|
"valid_targets_min": 736
|
|
},
|
|
{
|
|
"epoch": 1.9863013698630136,
|
|
"grad_norm": 0.4019939735333693,
|
|
"learning_rate": 3.603922549502056e-05,
|
|
"loss": 0.1236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.122370645403862,
|
|
"step": 1015,
|
|
"valid_targets_mean": 2999.8,
|
|
"valid_targets_min": 688
|
|
},
|
|
{
|
|
"epoch": 1.9960861056751469,
|
|
"grad_norm": 0.4079341056255008,
|
|
"learning_rate": 3.59807336716484e-05,
|
|
"loss": 0.1288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13142286241054535,
|
|
"step": 1020,
|
|
"valid_targets_mean": 2939.8,
|
|
"valid_targets_min": 723
|
|
},
|
|
{
|
|
"epoch": 2.0058708414872797,
|
|
"grad_norm": 0.37927102783719413,
|
|
"learning_rate": 3.592186131411288e-05,
|
|
"loss": 0.1179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1184164509177208,
|
|
"step": 1025,
|
|
"valid_targets_mean": 3151.8,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 2.015655577299413,
|
|
"grad_norm": 0.35567280100676857,
|
|
"learning_rate": 3.5862609824285995e-05,
|
|
"loss": 0.11,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09433744847774506,
|
|
"step": 1030,
|
|
"valid_targets_mean": 3086.3,
|
|
"valid_targets_min": 689
|
|
},
|
|
{
|
|
"epoch": 2.025440313111546,
|
|
"grad_norm": 0.3202690936990612,
|
|
"learning_rate": 3.580298061306769e-05,
|
|
"loss": 0.108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08629706501960754,
|
|
"step": 1035,
|
|
"valid_targets_mean": 3638.2,
|
|
"valid_targets_min": 1517
|
|
},
|
|
{
|
|
"epoch": 2.035225048923679,
|
|
"grad_norm": 0.41815661879008403,
|
|
"learning_rate": 3.574297510035222e-05,
|
|
"loss": 0.1104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13752664625644684,
|
|
"step": 1040,
|
|
"valid_targets_mean": 2789.5,
|
|
"valid_targets_min": 712
|
|
},
|
|
{
|
|
"epoch": 2.045009784735812,
|
|
"grad_norm": 0.4285897024481416,
|
|
"learning_rate": 3.568259471499437e-05,
|
|
"loss": 0.1166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12494560331106186,
|
|
"step": 1045,
|
|
"valid_targets_mean": 2809.8,
|
|
"valid_targets_min": 542
|
|
},
|
|
{
|
|
"epoch": 2.0547945205479454,
|
|
"grad_norm": 0.42321458686631774,
|
|
"learning_rate": 3.562184089477539e-05,
|
|
"loss": 0.1203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11364944279193878,
|
|
"step": 1050,
|
|
"valid_targets_mean": 3062.5,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 2.064579256360078,
|
|
"grad_norm": 0.39688700303262975,
|
|
"learning_rate": 3.556071508636879e-05,
|
|
"loss": 0.1132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10698528587818146,
|
|
"step": 1055,
|
|
"valid_targets_mean": 3079.9,
|
|
"valid_targets_min": 612
|
|
},
|
|
{
|
|
"epoch": 2.0743639921722115,
|
|
"grad_norm": 0.4363705848578071,
|
|
"learning_rate": 3.5499218745305906e-05,
|
|
"loss": 0.1163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11804713308811188,
|
|
"step": 1060,
|
|
"valid_targets_mean": 2758.2,
|
|
"valid_targets_min": 650
|
|
},
|
|
{
|
|
"epoch": 2.0841487279843443,
|
|
"grad_norm": 0.33761894592833736,
|
|
"learning_rate": 3.5437353335941194e-05,
|
|
"loss": 0.0965,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08259192854166031,
|
|
"step": 1065,
|
|
"valid_targets_mean": 3166.1,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 2.0939334637964775,
|
|
"grad_norm": 0.393251902764125,
|
|
"learning_rate": 3.537512033141739e-05,
|
|
"loss": 0.1014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10607272386550903,
|
|
"step": 1070,
|
|
"valid_targets_mean": 3049.6,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 2.1037181996086107,
|
|
"grad_norm": 0.39166697829631236,
|
|
"learning_rate": 3.531252121363044e-05,
|
|
"loss": 0.0938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10185883939266205,
|
|
"step": 1075,
|
|
"valid_targets_mean": 2846.7,
|
|
"valid_targets_min": 577
|
|
},
|
|
{
|
|
"epoch": 2.1135029354207435,
|
|
"grad_norm": 0.5046667169575071,
|
|
"learning_rate": 3.524955747319419e-05,
|
|
"loss": 0.1168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11997954547405243,
|
|
"step": 1080,
|
|
"valid_targets_mean": 2414.1,
|
|
"valid_targets_min": 734
|
|
},
|
|
{
|
|
"epoch": 2.1232876712328768,
|
|
"grad_norm": 0.40188627311027003,
|
|
"learning_rate": 3.5186230609404895e-05,
|
|
"loss": 0.1091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10753701627254486,
|
|
"step": 1085,
|
|
"valid_targets_mean": 3123.8,
|
|
"valid_targets_min": 933
|
|
},
|
|
{
|
|
"epoch": 2.1330724070450096,
|
|
"grad_norm": 0.42426388964255,
|
|
"learning_rate": 3.512254213020554e-05,
|
|
"loss": 0.1073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10298150777816772,
|
|
"step": 1090,
|
|
"valid_targets_mean": 3043.2,
|
|
"valid_targets_min": 350
|
|
},
|
|
{
|
|
"epoch": 2.142857142857143,
|
|
"grad_norm": 0.4492568368515402,
|
|
"learning_rate": 3.5058493552149917e-05,
|
|
"loss": 0.1236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13410577178001404,
|
|
"step": 1095,
|
|
"valid_targets_mean": 3618.4,
|
|
"valid_targets_min": 863
|
|
},
|
|
{
|
|
"epoch": 2.152641878669276,
|
|
"grad_norm": 0.417502185720637,
|
|
"learning_rate": 3.499408640036649e-05,
|
|
"loss": 0.1194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12203118950128555,
|
|
"step": 1100,
|
|
"valid_targets_mean": 3457.2,
|
|
"valid_targets_min": 329
|
|
},
|
|
{
|
|
"epoch": 2.162426614481409,
|
|
"grad_norm": 0.4353056795928977,
|
|
"learning_rate": 3.492932220852214e-05,
|
|
"loss": 0.1222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10841433703899384,
|
|
"step": 1105,
|
|
"valid_targets_mean": 2355.2,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 2.172211350293542,
|
|
"grad_norm": 0.41782255813374414,
|
|
"learning_rate": 3.486420251878557e-05,
|
|
"loss": 0.117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10531320422887802,
|
|
"step": 1110,
|
|
"valid_targets_mean": 2684.2,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 2.1819960861056753,
|
|
"grad_norm": 0.32549855933739147,
|
|
"learning_rate": 3.4798728881790645e-05,
|
|
"loss": 0.1139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11060058325529099,
|
|
"step": 1115,
|
|
"valid_targets_mean": 3967.6,
|
|
"valid_targets_min": 647
|
|
},
|
|
{
|
|
"epoch": 2.191780821917808,
|
|
"grad_norm": 0.41464471586125,
|
|
"learning_rate": 3.4732902856599425e-05,
|
|
"loss": 0.113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11559237539768219,
|
|
"step": 1120,
|
|
"valid_targets_mean": 2882.2,
|
|
"valid_targets_min": 609
|
|
},
|
|
{
|
|
"epoch": 2.2015655577299413,
|
|
"grad_norm": 0.42718370156925883,
|
|
"learning_rate": 3.466672601066506e-05,
|
|
"loss": 0.1091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12083616107702255,
|
|
"step": 1125,
|
|
"valid_targets_mean": 2868.4,
|
|
"valid_targets_min": 876
|
|
},
|
|
{
|
|
"epoch": 2.2113502935420746,
|
|
"grad_norm": 0.4615524479310674,
|
|
"learning_rate": 3.460019991979448e-05,
|
|
"loss": 0.1145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12627968192100525,
|
|
"step": 1130,
|
|
"valid_targets_mean": 2956.2,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 2.2211350293542074,
|
|
"grad_norm": 0.3793457912875511,
|
|
"learning_rate": 3.453332616811084e-05,
|
|
"loss": 0.1103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09939718246459961,
|
|
"step": 1135,
|
|
"valid_targets_mean": 3136.5,
|
|
"valid_targets_min": 554
|
|
},
|
|
{
|
|
"epoch": 2.2309197651663406,
|
|
"grad_norm": 0.3594596351840816,
|
|
"learning_rate": 3.446610634801583e-05,
|
|
"loss": 0.108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09593316167593002,
|
|
"step": 1140,
|
|
"valid_targets_mean": 3030.4,
|
|
"valid_targets_min": 474
|
|
},
|
|
{
|
|
"epoch": 2.2407045009784734,
|
|
"grad_norm": 0.4171021162376825,
|
|
"learning_rate": 3.439854206015172e-05,
|
|
"loss": 0.1103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1409609317779541,
|
|
"step": 1145,
|
|
"valid_targets_mean": 2861.6,
|
|
"valid_targets_min": 743
|
|
},
|
|
{
|
|
"epoch": 2.2504892367906066,
|
|
"grad_norm": 0.38835562412908814,
|
|
"learning_rate": 3.4330634913363283e-05,
|
|
"loss": 0.1112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13764947652816772,
|
|
"step": 1150,
|
|
"valid_targets_mean": 3205.4,
|
|
"valid_targets_min": 1001
|
|
},
|
|
{
|
|
"epoch": 2.26027397260274,
|
|
"grad_norm": 0.33531554597240587,
|
|
"learning_rate": 3.426238652465945e-05,
|
|
"loss": 0.1212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10017026215791702,
|
|
"step": 1155,
|
|
"valid_targets_mean": 3359.6,
|
|
"valid_targets_min": 662
|
|
},
|
|
{
|
|
"epoch": 2.2700587084148727,
|
|
"grad_norm": 0.40441508691761036,
|
|
"learning_rate": 3.4193798519174865e-05,
|
|
"loss": 0.1094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10481183230876923,
|
|
"step": 1160,
|
|
"valid_targets_mean": 2804.2,
|
|
"valid_targets_min": 512
|
|
},
|
|
{
|
|
"epoch": 2.279843444227006,
|
|
"grad_norm": 0.3538663740998075,
|
|
"learning_rate": 3.41248725301311e-05,
|
|
"loss": 0.1102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09048883616924286,
|
|
"step": 1165,
|
|
"valid_targets_mean": 3445.9,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 2.2896281800391387,
|
|
"grad_norm": 0.3712507430976939,
|
|
"learning_rate": 3.405561019879784e-05,
|
|
"loss": 0.1276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09899841248989105,
|
|
"step": 1170,
|
|
"valid_targets_mean": 3065.1,
|
|
"valid_targets_min": 623
|
|
},
|
|
{
|
|
"epoch": 2.299412915851272,
|
|
"grad_norm": 0.3637032799817131,
|
|
"learning_rate": 3.398601317445375e-05,
|
|
"loss": 0.1018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08682147413492203,
|
|
"step": 1175,
|
|
"valid_targets_mean": 2818.9,
|
|
"valid_targets_min": 745
|
|
},
|
|
{
|
|
"epoch": 2.309197651663405,
|
|
"grad_norm": 0.3549849338540409,
|
|
"learning_rate": 3.391608311434726e-05,
|
|
"loss": 0.1074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11768273264169693,
|
|
"step": 1180,
|
|
"valid_targets_mean": 2909.6,
|
|
"valid_targets_min": 839
|
|
},
|
|
{
|
|
"epoch": 2.318982387475538,
|
|
"grad_norm": 0.4136254318220814,
|
|
"learning_rate": 3.384582168365703e-05,
|
|
"loss": 0.1199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13258326053619385,
|
|
"step": 1185,
|
|
"valid_targets_mean": 2655.4,
|
|
"valid_targets_min": 323
|
|
},
|
|
{
|
|
"epoch": 2.328767123287671,
|
|
"grad_norm": 0.4216592344674509,
|
|
"learning_rate": 3.377523055545235e-05,
|
|
"loss": 0.1024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10435128957033157,
|
|
"step": 1190,
|
|
"valid_targets_mean": 3124.4,
|
|
"valid_targets_min": 550
|
|
},
|
|
{
|
|
"epoch": 2.3385518590998045,
|
|
"grad_norm": 0.3909841531161173,
|
|
"learning_rate": 3.370431141065329e-05,
|
|
"loss": 0.1158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11007117480039597,
|
|
"step": 1195,
|
|
"valid_targets_mean": 2939.0,
|
|
"valid_targets_min": 627
|
|
},
|
|
{
|
|
"epoch": 2.3483365949119372,
|
|
"grad_norm": 0.37065177231009755,
|
|
"learning_rate": 3.363306593799066e-05,
|
|
"loss": 0.1044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08931955695152283,
|
|
"step": 1200,
|
|
"valid_targets_mean": 2879.9,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 2.3581213307240705,
|
|
"grad_norm": 0.4199158543373936,
|
|
"learning_rate": 3.3561495833965824e-05,
|
|
"loss": 0.1034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09923933446407318,
|
|
"step": 1205,
|
|
"valid_targets_mean": 2743.2,
|
|
"valid_targets_min": 523
|
|
},
|
|
{
|
|
"epoch": 2.3679060665362037,
|
|
"grad_norm": 0.3746622479896516,
|
|
"learning_rate": 3.3489602802810276e-05,
|
|
"loss": 0.1054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10282056033611298,
|
|
"step": 1210,
|
|
"valid_targets_mean": 2939.0,
|
|
"valid_targets_min": 609
|
|
},
|
|
{
|
|
"epoch": 2.3776908023483365,
|
|
"grad_norm": 0.4513364760083478,
|
|
"learning_rate": 3.3417388556445084e-05,
|
|
"loss": 0.1156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1419263631105423,
|
|
"step": 1215,
|
|
"valid_targets_mean": 2800.4,
|
|
"valid_targets_min": 730
|
|
},
|
|
{
|
|
"epoch": 2.3874755381604698,
|
|
"grad_norm": 0.40657555459747047,
|
|
"learning_rate": 3.3344854814440076e-05,
|
|
"loss": 0.1218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09741845726966858,
|
|
"step": 1220,
|
|
"valid_targets_mean": 3114.3,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 2.3972602739726026,
|
|
"grad_norm": 0.37832023349019445,
|
|
"learning_rate": 3.327200330397297e-05,
|
|
"loss": 0.1112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08784421533346176,
|
|
"step": 1225,
|
|
"valid_targets_mean": 2816.4,
|
|
"valid_targets_min": 690
|
|
},
|
|
{
|
|
"epoch": 2.407045009784736,
|
|
"grad_norm": 0.41506010334022525,
|
|
"learning_rate": 3.319883575978818e-05,
|
|
"loss": 0.106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10869326442480087,
|
|
"step": 1230,
|
|
"valid_targets_mean": 2765.4,
|
|
"valid_targets_min": 766
|
|
},
|
|
{
|
|
"epoch": 2.416829745596869,
|
|
"grad_norm": 0.4792475285631822,
|
|
"learning_rate": 3.312535392415554e-05,
|
|
"loss": 0.1082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11564777046442032,
|
|
"step": 1235,
|
|
"valid_targets_mean": 2251.7,
|
|
"valid_targets_min": 625
|
|
},
|
|
{
|
|
"epoch": 2.426614481409002,
|
|
"grad_norm": 0.39428259324977244,
|
|
"learning_rate": 3.305155954682881e-05,
|
|
"loss": 0.1188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09808069467544556,
|
|
"step": 1240,
|
|
"valid_targets_mean": 3245.1,
|
|
"valid_targets_min": 637
|
|
},
|
|
{
|
|
"epoch": 2.436399217221135,
|
|
"grad_norm": 0.4195848772997183,
|
|
"learning_rate": 3.297745438500402e-05,
|
|
"loss": 0.1153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11323357373476028,
|
|
"step": 1245,
|
|
"valid_targets_mean": 2847.3,
|
|
"valid_targets_min": 690
|
|
},
|
|
{
|
|
"epoch": 2.446183953033268,
|
|
"grad_norm": 0.42741752250777426,
|
|
"learning_rate": 3.290304020327761e-05,
|
|
"loss": 0.1042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10795131325721741,
|
|
"step": 1250,
|
|
"valid_targets_mean": 2699.6,
|
|
"valid_targets_min": 507
|
|
},
|
|
{
|
|
"epoch": 2.455968688845401,
|
|
"grad_norm": 0.40882254202245477,
|
|
"learning_rate": 3.282831877360441e-05,
|
|
"loss": 0.1166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10642804205417633,
|
|
"step": 1255,
|
|
"valid_targets_mean": 2908.2,
|
|
"valid_targets_min": 657
|
|
},
|
|
{
|
|
"epoch": 2.4657534246575343,
|
|
"grad_norm": 0.35084753480045056,
|
|
"learning_rate": 3.275329187525547e-05,
|
|
"loss": 0.1109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10106814652681351,
|
|
"step": 1260,
|
|
"valid_targets_mean": 3006.5,
|
|
"valid_targets_min": 513
|
|
},
|
|
{
|
|
"epoch": 2.475538160469667,
|
|
"grad_norm": 0.36699970096352375,
|
|
"learning_rate": 3.2677961294775686e-05,
|
|
"loss": 0.1061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09151563793420792,
|
|
"step": 1265,
|
|
"valid_targets_mean": 2794.2,
|
|
"valid_targets_min": 488
|
|
},
|
|
{
|
|
"epoch": 2.4853228962818004,
|
|
"grad_norm": 0.3556386173650718,
|
|
"learning_rate": 3.2602328825941225e-05,
|
|
"loss": 0.1155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10737474262714386,
|
|
"step": 1270,
|
|
"valid_targets_mean": 3092.9,
|
|
"valid_targets_min": 673
|
|
},
|
|
{
|
|
"epoch": 2.4951076320939336,
|
|
"grad_norm": 0.3751799706125244,
|
|
"learning_rate": 3.2526396269716856e-05,
|
|
"loss": 0.1125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10193692892789841,
|
|
"step": 1275,
|
|
"valid_targets_mean": 3543.3,
|
|
"valid_targets_min": 653
|
|
},
|
|
{
|
|
"epoch": 2.5048923679060664,
|
|
"grad_norm": 0.38573758460528124,
|
|
"learning_rate": 3.245016543421305e-05,
|
|
"loss": 0.1042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09200307726860046,
|
|
"step": 1280,
|
|
"valid_targets_mean": 2936.6,
|
|
"valid_targets_min": 560
|
|
},
|
|
{
|
|
"epoch": 2.5146771037181996,
|
|
"grad_norm": 0.42837841993074,
|
|
"learning_rate": 3.23736381346429e-05,
|
|
"loss": 0.1024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10868542641401291,
|
|
"step": 1285,
|
|
"valid_targets_mean": 2723.4,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 2.524461839530333,
|
|
"grad_norm": 0.4050058503579518,
|
|
"learning_rate": 3.229681619327894e-05,
|
|
"loss": 0.1089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14006638526916504,
|
|
"step": 1290,
|
|
"valid_targets_mean": 3020.4,
|
|
"valid_targets_min": 564
|
|
},
|
|
{
|
|
"epoch": 2.5342465753424657,
|
|
"grad_norm": 0.4201197105368221,
|
|
"learning_rate": 3.221970143940973e-05,
|
|
"loss": 0.1068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10334217548370361,
|
|
"step": 1295,
|
|
"valid_targets_mean": 2442.4,
|
|
"valid_targets_min": 806
|
|
},
|
|
{
|
|
"epoch": 2.544031311154599,
|
|
"grad_norm": 0.41541097510697117,
|
|
"learning_rate": 3.2142295709296274e-05,
|
|
"loss": 0.1216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14761820435523987,
|
|
"step": 1300,
|
|
"valid_targets_mean": 3183.7,
|
|
"valid_targets_min": 536
|
|
},
|
|
{
|
|
"epoch": 2.553816046966732,
|
|
"grad_norm": 0.45131929268936527,
|
|
"learning_rate": 3.206460084612835e-05,
|
|
"loss": 0.1153,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12431158125400543,
|
|
"step": 1305,
|
|
"valid_targets_mean": 2544.8,
|
|
"valid_targets_min": 589
|
|
},
|
|
{
|
|
"epoch": 2.563600782778865,
|
|
"grad_norm": 0.39494238913307517,
|
|
"learning_rate": 3.1986618699980586e-05,
|
|
"loss": 0.1021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09494001418352127,
|
|
"step": 1310,
|
|
"valid_targets_mean": 2799.4,
|
|
"valid_targets_min": 518
|
|
},
|
|
{
|
|
"epoch": 2.573385518590998,
|
|
"grad_norm": 0.32517864786117584,
|
|
"learning_rate": 3.1908351127768374e-05,
|
|
"loss": 0.1027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07930716872215271,
|
|
"step": 1315,
|
|
"valid_targets_mean": 3285.2,
|
|
"valid_targets_min": 669
|
|
},
|
|
{
|
|
"epoch": 2.583170254403131,
|
|
"grad_norm": 0.4330795967908774,
|
|
"learning_rate": 3.182979999320373e-05,
|
|
"loss": 0.1125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11279530823230743,
|
|
"step": 1320,
|
|
"valid_targets_mean": 3316.5,
|
|
"valid_targets_min": 567
|
|
},
|
|
{
|
|
"epoch": 2.592954990215264,
|
|
"grad_norm": 0.3903225044565865,
|
|
"learning_rate": 3.175096716675087e-05,
|
|
"loss": 0.1169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14655640721321106,
|
|
"step": 1325,
|
|
"valid_targets_mean": 3319.7,
|
|
"valid_targets_min": 826
|
|
},
|
|
{
|
|
"epoch": 2.602739726027397,
|
|
"grad_norm": 0.3349866071473293,
|
|
"learning_rate": 3.1671854525581625e-05,
|
|
"loss": 0.1127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08214075118303299,
|
|
"step": 1330,
|
|
"valid_targets_mean": 3446.7,
|
|
"valid_targets_min": 981
|
|
},
|
|
{
|
|
"epoch": 2.6125244618395302,
|
|
"grad_norm": 0.4197218725607564,
|
|
"learning_rate": 3.159246395353086e-05,
|
|
"loss": 0.1064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.110893614590168,
|
|
"step": 1335,
|
|
"valid_targets_mean": 2602.2,
|
|
"valid_targets_min": 680
|
|
},
|
|
{
|
|
"epoch": 2.6223091976516635,
|
|
"grad_norm": 0.39357060094694973,
|
|
"learning_rate": 3.151279734105151e-05,
|
|
"loss": 0.0993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11349768191576004,
|
|
"step": 1340,
|
|
"valid_targets_mean": 2960.3,
|
|
"valid_targets_min": 647
|
|
},
|
|
{
|
|
"epoch": 2.6320939334637963,
|
|
"grad_norm": 0.3694651983894364,
|
|
"learning_rate": 3.1432856585169576e-05,
|
|
"loss": 0.1012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11032050102949142,
|
|
"step": 1345,
|
|
"valid_targets_mean": 3003.5,
|
|
"valid_targets_min": 574
|
|
},
|
|
{
|
|
"epoch": 2.6418786692759295,
|
|
"grad_norm": 0.3480652593011676,
|
|
"learning_rate": 3.135264358943903e-05,
|
|
"loss": 0.1096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11561337858438492,
|
|
"step": 1350,
|
|
"valid_targets_mean": 3375.7,
|
|
"valid_targets_min": 535
|
|
},
|
|
{
|
|
"epoch": 2.6516634050880628,
|
|
"grad_norm": 0.42225286123376454,
|
|
"learning_rate": 3.127216026389641e-05,
|
|
"loss": 0.107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11898268759250641,
|
|
"step": 1355,
|
|
"valid_targets_mean": 2994.9,
|
|
"valid_targets_min": 585
|
|
},
|
|
{
|
|
"epoch": 2.6614481409001955,
|
|
"grad_norm": 0.3668356779097436,
|
|
"learning_rate": 3.1191408525015344e-05,
|
|
"loss": 0.1091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09555927664041519,
|
|
"step": 1360,
|
|
"valid_targets_mean": 2917.9,
|
|
"valid_targets_min": 703
|
|
},
|
|
{
|
|
"epoch": 2.671232876712329,
|
|
"grad_norm": 0.4792096383507245,
|
|
"learning_rate": 3.1110390295660976e-05,
|
|
"loss": 0.117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1280497908592224,
|
|
"step": 1365,
|
|
"valid_targets_mean": 2813.3,
|
|
"valid_targets_min": 885
|
|
},
|
|
{
|
|
"epoch": 2.681017612524462,
|
|
"grad_norm": 0.4861908230527638,
|
|
"learning_rate": 3.10291075050441e-05,
|
|
"loss": 0.1117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15495100617408752,
|
|
"step": 1370,
|
|
"valid_targets_mean": 2758.3,
|
|
"valid_targets_min": 502
|
|
},
|
|
{
|
|
"epoch": 2.690802348336595,
|
|
"grad_norm": 0.3450995082913833,
|
|
"learning_rate": 3.094756208867528e-05,
|
|
"loss": 0.0983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08989180624485016,
|
|
"step": 1375,
|
|
"valid_targets_mean": 3410.1,
|
|
"valid_targets_min": 685
|
|
},
|
|
{
|
|
"epoch": 2.700587084148728,
|
|
"grad_norm": 0.4008397494171553,
|
|
"learning_rate": 3.086575598831873e-05,
|
|
"loss": 0.1127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10978572070598602,
|
|
"step": 1380,
|
|
"valid_targets_mean": 2615.2,
|
|
"valid_targets_min": 719
|
|
},
|
|
{
|
|
"epoch": 2.7103718199608613,
|
|
"grad_norm": 0.3503551086666885,
|
|
"learning_rate": 3.078369115194609e-05,
|
|
"loss": 0.1055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0917101576924324,
|
|
"step": 1385,
|
|
"valid_targets_mean": 3238.8,
|
|
"valid_targets_min": 788
|
|
},
|
|
{
|
|
"epoch": 2.720156555772994,
|
|
"grad_norm": 0.3879357205588297,
|
|
"learning_rate": 3.0701369533690035e-05,
|
|
"loss": 0.0958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09571725130081177,
|
|
"step": 1390,
|
|
"valid_targets_mean": 2725.3,
|
|
"valid_targets_min": 446
|
|
},
|
|
{
|
|
"epoch": 2.7299412915851273,
|
|
"grad_norm": 0.33036722394947055,
|
|
"learning_rate": 3.061879309379774e-05,
|
|
"loss": 0.1062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10175631940364838,
|
|
"step": 1395,
|
|
"valid_targets_mean": 3565.9,
|
|
"valid_targets_min": 333
|
|
},
|
|
{
|
|
"epoch": 2.73972602739726,
|
|
"grad_norm": 0.35924320904735907,
|
|
"learning_rate": 3.053596379858422e-05,
|
|
"loss": 0.1178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10558217763900757,
|
|
"step": 1400,
|
|
"valid_targets_mean": 2973.7,
|
|
"valid_targets_min": 776
|
|
},
|
|
{
|
|
"epoch": 2.7495107632093934,
|
|
"grad_norm": 0.3803671568191816,
|
|
"learning_rate": 3.045288362038549e-05,
|
|
"loss": 0.103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10167603194713593,
|
|
"step": 1405,
|
|
"valid_targets_mean": 2803.1,
|
|
"valid_targets_min": 555
|
|
},
|
|
{
|
|
"epoch": 2.759295499021526,
|
|
"grad_norm": 0.3653145793525553,
|
|
"learning_rate": 3.0369554537511607e-05,
|
|
"loss": 0.1024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1083512157201767,
|
|
"step": 1410,
|
|
"valid_targets_mean": 3030.9,
|
|
"valid_targets_min": 906
|
|
},
|
|
{
|
|
"epoch": 2.7690802348336594,
|
|
"grad_norm": 0.3190701118141104,
|
|
"learning_rate": 3.028597853419955e-05,
|
|
"loss": 0.1065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09129766374826431,
|
|
"step": 1415,
|
|
"valid_targets_mean": 3725.6,
|
|
"valid_targets_min": 1512
|
|
},
|
|
{
|
|
"epoch": 2.7788649706457926,
|
|
"grad_norm": 0.3889832014166842,
|
|
"learning_rate": 3.0202157600565997e-05,
|
|
"loss": 0.1037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1031104102730751,
|
|
"step": 1420,
|
|
"valid_targets_mean": 2825.6,
|
|
"valid_targets_min": 520
|
|
},
|
|
{
|
|
"epoch": 2.7886497064579254,
|
|
"grad_norm": 0.37801201319116307,
|
|
"learning_rate": 3.0118093732559914e-05,
|
|
"loss": 0.1018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1037970706820488,
|
|
"step": 1425,
|
|
"valid_targets_mean": 2680.2,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 2.7984344422700587,
|
|
"grad_norm": 0.3817751239677767,
|
|
"learning_rate": 3.0033788931915028e-05,
|
|
"loss": 0.1095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10228165984153748,
|
|
"step": 1430,
|
|
"valid_targets_mean": 2815.8,
|
|
"valid_targets_min": 1005
|
|
},
|
|
{
|
|
"epoch": 2.808219178082192,
|
|
"grad_norm": 0.379648144825907,
|
|
"learning_rate": 2.9949245206102185e-05,
|
|
"loss": 0.1231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10076449811458588,
|
|
"step": 1435,
|
|
"valid_targets_mean": 3059.8,
|
|
"valid_targets_min": 960
|
|
},
|
|
{
|
|
"epoch": 2.8180039138943247,
|
|
"grad_norm": 0.4402766625587281,
|
|
"learning_rate": 2.986446456828151e-05,
|
|
"loss": 0.1181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11402945220470428,
|
|
"step": 1440,
|
|
"valid_targets_mean": 2989.2,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 2.827788649706458,
|
|
"grad_norm": 0.37883497798659016,
|
|
"learning_rate": 2.977944903725451e-05,
|
|
"loss": 0.1045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09855394065380096,
|
|
"step": 1445,
|
|
"valid_targets_mean": 2883.6,
|
|
"valid_targets_min": 566
|
|
},
|
|
{
|
|
"epoch": 2.837573385518591,
|
|
"grad_norm": 0.36148811298502337,
|
|
"learning_rate": 2.9694200637415967e-05,
|
|
"loss": 0.1158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09221038967370987,
|
|
"step": 1450,
|
|
"valid_targets_mean": 2968.4,
|
|
"valid_targets_min": 357
|
|
},
|
|
{
|
|
"epoch": 2.847358121330724,
|
|
"grad_norm": 0.3566538664567121,
|
|
"learning_rate": 2.9608721398705768e-05,
|
|
"loss": 0.1028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08568309247493744,
|
|
"step": 1455,
|
|
"valid_targets_mean": 3442.5,
|
|
"valid_targets_min": 920
|
|
},
|
|
{
|
|
"epoch": 2.857142857142857,
|
|
"grad_norm": 0.37633017266925395,
|
|
"learning_rate": 2.952301335656053e-05,
|
|
"loss": 0.1122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09396082907915115,
|
|
"step": 1460,
|
|
"valid_targets_mean": 2965.3,
|
|
"valid_targets_min": 536
|
|
},
|
|
{
|
|
"epoch": 2.8669275929549904,
|
|
"grad_norm": 0.35848214949952517,
|
|
"learning_rate": 2.943707855186515e-05,
|
|
"loss": 0.1089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08908859640359879,
|
|
"step": 1465,
|
|
"valid_targets_mean": 3153.4,
|
|
"valid_targets_min": 540
|
|
},
|
|
{
|
|
"epoch": 2.8767123287671232,
|
|
"grad_norm": 0.3822152685430384,
|
|
"learning_rate": 2.9350919030904227e-05,
|
|
"loss": 0.1186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14029645919799805,
|
|
"step": 1470,
|
|
"valid_targets_mean": 3109.6,
|
|
"valid_targets_min": 852
|
|
},
|
|
{
|
|
"epoch": 2.8864970645792565,
|
|
"grad_norm": 0.3413018378399703,
|
|
"learning_rate": 2.926453684531329e-05,
|
|
"loss": 0.0974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08753784000873566,
|
|
"step": 1475,
|
|
"valid_targets_mean": 3042.3,
|
|
"valid_targets_min": 820
|
|
},
|
|
{
|
|
"epoch": 2.8962818003913893,
|
|
"grad_norm": 0.4631072227207729,
|
|
"learning_rate": 2.917793405203001e-05,
|
|
"loss": 0.1037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12335332483053207,
|
|
"step": 1480,
|
|
"valid_targets_mean": 2351.6,
|
|
"valid_targets_min": 700
|
|
},
|
|
{
|
|
"epoch": 2.9060665362035225,
|
|
"grad_norm": 0.41434492251896626,
|
|
"learning_rate": 2.9091112713245136e-05,
|
|
"loss": 0.1054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12784239649772644,
|
|
"step": 1485,
|
|
"valid_targets_mean": 2976.8,
|
|
"valid_targets_min": 540
|
|
},
|
|
{
|
|
"epoch": 2.9158512720156553,
|
|
"grad_norm": 0.4359166147369341,
|
|
"learning_rate": 2.9004074896353468e-05,
|
|
"loss": 0.1106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15002021193504333,
|
|
"step": 1490,
|
|
"valid_targets_mean": 2503.5,
|
|
"valid_targets_min": 840
|
|
},
|
|
{
|
|
"epoch": 2.9256360078277885,
|
|
"grad_norm": 0.3997076491880868,
|
|
"learning_rate": 2.8916822673904584e-05,
|
|
"loss": 0.1035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11894476413726807,
|
|
"step": 1495,
|
|
"valid_targets_mean": 3113.6,
|
|
"valid_targets_min": 654
|
|
},
|
|
{
|
|
"epoch": 2.935420743639922,
|
|
"grad_norm": 0.4175582010143018,
|
|
"learning_rate": 2.882935812355349e-05,
|
|
"loss": 0.1205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17195875942707062,
|
|
"step": 1500,
|
|
"valid_targets_mean": 2791.0,
|
|
"valid_targets_min": 492
|
|
},
|
|
{
|
|
"epoch": 2.9452054794520546,
|
|
"grad_norm": 0.3307412019430931,
|
|
"learning_rate": 2.8741683328011184e-05,
|
|
"loss": 0.102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09898516535758972,
|
|
"step": 1505,
|
|
"valid_targets_mean": 3485.9,
|
|
"valid_targets_min": 1008
|
|
},
|
|
{
|
|
"epoch": 2.954990215264188,
|
|
"grad_norm": 0.3459910596152458,
|
|
"learning_rate": 2.8653800374995008e-05,
|
|
"loss": 0.0963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09195069968700409,
|
|
"step": 1510,
|
|
"valid_targets_mean": 2914.2,
|
|
"valid_targets_min": 817
|
|
},
|
|
{
|
|
"epoch": 2.964774951076321,
|
|
"grad_norm": 0.35273324567641856,
|
|
"learning_rate": 2.8565711357178964e-05,
|
|
"loss": 0.0977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08810217678546906,
|
|
"step": 1515,
|
|
"valid_targets_mean": 3367.4,
|
|
"valid_targets_min": 848
|
|
},
|
|
{
|
|
"epoch": 2.974559686888454,
|
|
"grad_norm": 0.36045123569179105,
|
|
"learning_rate": 2.8477418372143897e-05,
|
|
"loss": 0.1007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10645238310098648,
|
|
"step": 1520,
|
|
"valid_targets_mean": 3010.5,
|
|
"valid_targets_min": 834
|
|
},
|
|
{
|
|
"epoch": 2.984344422700587,
|
|
"grad_norm": 0.36580872832620526,
|
|
"learning_rate": 2.838892352232752e-05,
|
|
"loss": 0.1126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09797756373882294,
|
|
"step": 1525,
|
|
"valid_targets_mean": 3353.2,
|
|
"valid_targets_min": 799
|
|
},
|
|
{
|
|
"epoch": 2.9941291585127203,
|
|
"grad_norm": 0.3909700612235282,
|
|
"learning_rate": 2.8300228914974365e-05,
|
|
"loss": 0.1079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.097335085272789,
|
|
"step": 1530,
|
|
"valid_targets_mean": 2301.2,
|
|
"valid_targets_min": 387
|
|
},
|
|
{
|
|
"epoch": 3.003913894324853,
|
|
"grad_norm": 0.39960294073850516,
|
|
"learning_rate": 2.82113366620856e-05,
|
|
"loss": 0.1194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.098995640873909,
|
|
"step": 1535,
|
|
"valid_targets_mean": 2721.5,
|
|
"valid_targets_min": 751
|
|
},
|
|
{
|
|
"epoch": 3.0136986301369864,
|
|
"grad_norm": 0.4780867112307395,
|
|
"learning_rate": 2.812224888036875e-05,
|
|
"loss": 0.1031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13829249143600464,
|
|
"step": 1540,
|
|
"valid_targets_mean": 2505.8,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 3.0234833659491196,
|
|
"grad_norm": 0.3986599206779102,
|
|
"learning_rate": 2.8032967691187258e-05,
|
|
"loss": 0.1106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10849355161190033,
|
|
"step": 1545,
|
|
"valid_targets_mean": 3316.6,
|
|
"valid_targets_min": 1185
|
|
},
|
|
{
|
|
"epoch": 3.0332681017612524,
|
|
"grad_norm": 0.4161663796512601,
|
|
"learning_rate": 2.794349522051003e-05,
|
|
"loss": 0.0901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09069333970546722,
|
|
"step": 1550,
|
|
"valid_targets_mean": 2789.6,
|
|
"valid_targets_min": 840
|
|
},
|
|
{
|
|
"epoch": 3.0430528375733856,
|
|
"grad_norm": 0.4410690914740329,
|
|
"learning_rate": 2.785383359886075e-05,
|
|
"loss": 0.0989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09545639902353287,
|
|
"step": 1555,
|
|
"valid_targets_mean": 2650.6,
|
|
"valid_targets_min": 574
|
|
},
|
|
{
|
|
"epoch": 3.0528375733855184,
|
|
"grad_norm": 0.5575943198695258,
|
|
"learning_rate": 2.7763984961267202e-05,
|
|
"loss": 0.0959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09871990978717804,
|
|
"step": 1560,
|
|
"valid_targets_mean": 2915.4,
|
|
"valid_targets_min": 468
|
|
},
|
|
{
|
|
"epoch": 3.0626223091976517,
|
|
"grad_norm": 0.3917830696660751,
|
|
"learning_rate": 2.767395144721038e-05,
|
|
"loss": 0.0952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08617722988128662,
|
|
"step": 1565,
|
|
"valid_targets_mean": 2731.6,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 3.072407045009785,
|
|
"grad_norm": 0.5227822005219008,
|
|
"learning_rate": 2.758373520057359e-05,
|
|
"loss": 0.1067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16681335866451263,
|
|
"step": 1570,
|
|
"valid_targets_mean": 2202.9,
|
|
"valid_targets_min": 622
|
|
},
|
|
{
|
|
"epoch": 3.0821917808219177,
|
|
"grad_norm": 0.4481983579473897,
|
|
"learning_rate": 2.7493338369591355e-05,
|
|
"loss": 0.1025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10602610558271408,
|
|
"step": 1575,
|
|
"valid_targets_mean": 2457.4,
|
|
"valid_targets_min": 658
|
|
},
|
|
{
|
|
"epoch": 3.091976516634051,
|
|
"grad_norm": 0.41182336536618824,
|
|
"learning_rate": 2.7402763106798295e-05,
|
|
"loss": 0.097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09512235224246979,
|
|
"step": 1580,
|
|
"valid_targets_mean": 2693.2,
|
|
"valid_targets_min": 323
|
|
},
|
|
{
|
|
"epoch": 3.1017612524461837,
|
|
"grad_norm": 0.4111689440731356,
|
|
"learning_rate": 2.731201156897785e-05,
|
|
"loss": 0.088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08959066867828369,
|
|
"step": 1585,
|
|
"valid_targets_mean": 2895.9,
|
|
"valid_targets_min": 614
|
|
},
|
|
{
|
|
"epoch": 3.111545988258317,
|
|
"grad_norm": 0.45106807085617473,
|
|
"learning_rate": 2.722108591711095e-05,
|
|
"loss": 0.1028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10399621725082397,
|
|
"step": 1590,
|
|
"valid_targets_mean": 2582.5,
|
|
"valid_targets_min": 526
|
|
},
|
|
{
|
|
"epoch": 3.12133072407045,
|
|
"grad_norm": 0.40429829012168483,
|
|
"learning_rate": 2.7129988316324522e-05,
|
|
"loss": 0.1017,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12241924554109573,
|
|
"step": 1595,
|
|
"valid_targets_mean": 2891.0,
|
|
"valid_targets_min": 424
|
|
},
|
|
{
|
|
"epoch": 3.131115459882583,
|
|
"grad_norm": 0.41002787703696647,
|
|
"learning_rate": 2.7038720935839955e-05,
|
|
"loss": 0.1067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10877316445112228,
|
|
"step": 1600,
|
|
"valid_targets_mean": 2865.1,
|
|
"valid_targets_min": 324
|
|
},
|
|
{
|
|
"epoch": 3.1409001956947162,
|
|
"grad_norm": 0.46037341512154545,
|
|
"learning_rate": 2.6947285948921447e-05,
|
|
"loss": 0.1013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10007884353399277,
|
|
"step": 1605,
|
|
"valid_targets_mean": 2481.1,
|
|
"valid_targets_min": 688
|
|
},
|
|
{
|
|
"epoch": 3.1506849315068495,
|
|
"grad_norm": 5.110272464423244,
|
|
"learning_rate": 2.6855685532824236e-05,
|
|
"loss": 0.093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11062052845954895,
|
|
"step": 1610,
|
|
"valid_targets_mean": 2889.8,
|
|
"valid_targets_min": 965
|
|
},
|
|
{
|
|
"epoch": 3.1604696673189823,
|
|
"grad_norm": 0.4114215687770999,
|
|
"learning_rate": 2.676392186874279e-05,
|
|
"loss": 0.0877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09809882938861847,
|
|
"step": 1615,
|
|
"valid_targets_mean": 2879.0,
|
|
"valid_targets_min": 650
|
|
},
|
|
{
|
|
"epoch": 3.1702544031311155,
|
|
"grad_norm": 0.3996926275976193,
|
|
"learning_rate": 2.6671997141758827e-05,
|
|
"loss": 0.0927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12717470526695251,
|
|
"step": 1620,
|
|
"valid_targets_mean": 2880.8,
|
|
"valid_targets_min": 676
|
|
},
|
|
{
|
|
"epoch": 3.1800391389432487,
|
|
"grad_norm": 0.3639885387428381,
|
|
"learning_rate": 2.6579913540789322e-05,
|
|
"loss": 0.0887,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07497823983430862,
|
|
"step": 1625,
|
|
"valid_targets_mean": 2640.9,
|
|
"valid_targets_min": 515
|
|
},
|
|
{
|
|
"epoch": 3.1898238747553815,
|
|
"grad_norm": 0.3703997916696169,
|
|
"learning_rate": 2.6487673258534354e-05,
|
|
"loss": 0.0885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0767689049243927,
|
|
"step": 1630,
|
|
"valid_targets_mean": 3069.6,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 3.1996086105675148,
|
|
"grad_norm": 0.4413852282140088,
|
|
"learning_rate": 2.639527849142491e-05,
|
|
"loss": 0.1015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10007453709840775,
|
|
"step": 1635,
|
|
"valid_targets_mean": 2802.6,
|
|
"valid_targets_min": 924
|
|
},
|
|
{
|
|
"epoch": 3.2093933463796476,
|
|
"grad_norm": 0.44437000715097036,
|
|
"learning_rate": 2.630273143957058e-05,
|
|
"loss": 0.0977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09787821769714355,
|
|
"step": 1640,
|
|
"valid_targets_mean": 2599.1,
|
|
"valid_targets_min": 516
|
|
},
|
|
{
|
|
"epoch": 3.219178082191781,
|
|
"grad_norm": 0.4598260445599975,
|
|
"learning_rate": 2.6210034306707165e-05,
|
|
"loss": 0.0938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09357476234436035,
|
|
"step": 1645,
|
|
"valid_targets_mean": 2352.2,
|
|
"valid_targets_min": 659
|
|
},
|
|
{
|
|
"epoch": 3.228962818003914,
|
|
"grad_norm": 0.42650769322905097,
|
|
"learning_rate": 2.6117189300144212e-05,
|
|
"loss": 0.1012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13607531785964966,
|
|
"step": 1650,
|
|
"valid_targets_mean": 2959.1,
|
|
"valid_targets_min": 684
|
|
},
|
|
{
|
|
"epoch": 3.238747553816047,
|
|
"grad_norm": 0.3954567699571377,
|
|
"learning_rate": 2.6024198630712444e-05,
|
|
"loss": 0.0951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08541727066040039,
|
|
"step": 1655,
|
|
"valid_targets_mean": 2900.4,
|
|
"valid_targets_min": 538
|
|
},
|
|
{
|
|
"epoch": 3.24853228962818,
|
|
"grad_norm": 0.3799054478005135,
|
|
"learning_rate": 2.5931064512711094e-05,
|
|
"loss": 0.0974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1218566820025444,
|
|
"step": 1660,
|
|
"valid_targets_mean": 3502.8,
|
|
"valid_targets_min": 774
|
|
},
|
|
{
|
|
"epoch": 3.258317025440313,
|
|
"grad_norm": 0.39283413314321314,
|
|
"learning_rate": 2.583778916385522e-05,
|
|
"loss": 0.0908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08874887228012085,
|
|
"step": 1665,
|
|
"valid_targets_mean": 2809.4,
|
|
"valid_targets_min": 663
|
|
},
|
|
{
|
|
"epoch": 3.268101761252446,
|
|
"grad_norm": 0.4381850178027912,
|
|
"learning_rate": 2.5744374805222892e-05,
|
|
"loss": 0.0993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11543179303407669,
|
|
"step": 1670,
|
|
"valid_targets_mean": 3048.3,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 3.2778864970645794,
|
|
"grad_norm": 0.36917810068552503,
|
|
"learning_rate": 2.565082366120226e-05,
|
|
"loss": 0.0929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08536778390407562,
|
|
"step": 1675,
|
|
"valid_targets_mean": 3376.4,
|
|
"valid_targets_min": 1004
|
|
},
|
|
{
|
|
"epoch": 3.287671232876712,
|
|
"grad_norm": 0.4385678853719193,
|
|
"learning_rate": 2.555713795943864e-05,
|
|
"loss": 0.1045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11536893248558044,
|
|
"step": 1680,
|
|
"valid_targets_mean": 2739.6,
|
|
"valid_targets_min": 824
|
|
},
|
|
{
|
|
"epoch": 3.2974559686888454,
|
|
"grad_norm": 0.393601306502803,
|
|
"learning_rate": 2.5463319930781415e-05,
|
|
"loss": 0.089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09885405004024506,
|
|
"step": 1685,
|
|
"valid_targets_mean": 3358.1,
|
|
"valid_targets_min": 699
|
|
},
|
|
{
|
|
"epoch": 3.3072407045009786,
|
|
"grad_norm": 0.36006488816751797,
|
|
"learning_rate": 2.5369371809230977e-05,
|
|
"loss": 0.0903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08712077885866165,
|
|
"step": 1690,
|
|
"valid_targets_mean": 3766.2,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 3.3170254403131114,
|
|
"grad_norm": 0.37713092406723087,
|
|
"learning_rate": 2.527529583188548e-05,
|
|
"loss": 0.0961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09263085573911667,
|
|
"step": 1695,
|
|
"valid_targets_mean": 2927.4,
|
|
"valid_targets_min": 766
|
|
},
|
|
{
|
|
"epoch": 3.3268101761252447,
|
|
"grad_norm": 0.414881476450131,
|
|
"learning_rate": 2.518109423888758e-05,
|
|
"loss": 0.09,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09965046495199203,
|
|
"step": 1700,
|
|
"valid_targets_mean": 2728.1,
|
|
"valid_targets_min": 805
|
|
},
|
|
{
|
|
"epoch": 3.336594911937378,
|
|
"grad_norm": 0.3276882930558519,
|
|
"learning_rate": 2.5086769273371122e-05,
|
|
"loss": 0.0902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07712644338607788,
|
|
"step": 1705,
|
|
"valid_targets_mean": 3432.0,
|
|
"valid_targets_min": 1229
|
|
},
|
|
{
|
|
"epoch": 3.3463796477495107,
|
|
"grad_norm": 0.41120960483108754,
|
|
"learning_rate": 2.499232318140769e-05,
|
|
"loss": 0.0963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09201785922050476,
|
|
"step": 1710,
|
|
"valid_targets_mean": 2984.1,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 3.356164383561644,
|
|
"grad_norm": 0.4114515415368508,
|
|
"learning_rate": 2.4897758211953135e-05,
|
|
"loss": 0.0887,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09025739133358002,
|
|
"step": 1715,
|
|
"valid_targets_mean": 2741.2,
|
|
"valid_targets_min": 707
|
|
},
|
|
{
|
|
"epoch": 3.3659491193737767,
|
|
"grad_norm": 0.3677616557466712,
|
|
"learning_rate": 2.4803076616794038e-05,
|
|
"loss": 0.0957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08147165179252625,
|
|
"step": 1720,
|
|
"valid_targets_mean": 3088.6,
|
|
"valid_targets_min": 350
|
|
},
|
|
{
|
|
"epoch": 3.37573385518591,
|
|
"grad_norm": 0.44042021167276707,
|
|
"learning_rate": 2.4708280650494067e-05,
|
|
"loss": 0.0895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09253993630409241,
|
|
"step": 1725,
|
|
"valid_targets_mean": 2585.8,
|
|
"valid_targets_min": 716
|
|
},
|
|
{
|
|
"epoch": 3.385518590998043,
|
|
"grad_norm": 0.4215992681842096,
|
|
"learning_rate": 2.461337257034031e-05,
|
|
"loss": 0.0982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09265173971652985,
|
|
"step": 1730,
|
|
"valid_targets_mean": 2482.9,
|
|
"valid_targets_min": 652
|
|
},
|
|
{
|
|
"epoch": 3.395303326810176,
|
|
"grad_norm": 0.4142825675183947,
|
|
"learning_rate": 2.4518354636289523e-05,
|
|
"loss": 0.0877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09479625523090363,
|
|
"step": 1735,
|
|
"valid_targets_mean": 2704.0,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 3.4050880626223092,
|
|
"grad_norm": 0.4359979529870605,
|
|
"learning_rate": 2.442322911091428e-05,
|
|
"loss": 0.0986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0992426872253418,
|
|
"step": 1740,
|
|
"valid_targets_mean": 2675.4,
|
|
"valid_targets_min": 794
|
|
},
|
|
{
|
|
"epoch": 3.414872798434442,
|
|
"grad_norm": 0.37594536492115166,
|
|
"learning_rate": 2.4327998259349165e-05,
|
|
"loss": 0.0952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09966818988323212,
|
|
"step": 1745,
|
|
"valid_targets_mean": 3095.5,
|
|
"valid_targets_min": 759
|
|
},
|
|
{
|
|
"epoch": 3.4246575342465753,
|
|
"grad_norm": 0.40001334846317066,
|
|
"learning_rate": 2.4232664349236757e-05,
|
|
"loss": 0.0887,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0902199074625969,
|
|
"step": 1750,
|
|
"valid_targets_mean": 2817.9,
|
|
"valid_targets_min": 635
|
|
},
|
|
{
|
|
"epoch": 3.4344422700587085,
|
|
"grad_norm": 0.4006625889271972,
|
|
"learning_rate": 2.413722965067371e-05,
|
|
"loss": 0.0974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09919355064630508,
|
|
"step": 1755,
|
|
"valid_targets_mean": 2923.6,
|
|
"valid_targets_min": 636
|
|
},
|
|
{
|
|
"epoch": 3.4442270058708413,
|
|
"grad_norm": 0.3812585798186099,
|
|
"learning_rate": 2.404169643615663e-05,
|
|
"loss": 0.0899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09447631239891052,
|
|
"step": 1760,
|
|
"valid_targets_mean": 3224.1,
|
|
"valid_targets_min": 782
|
|
},
|
|
{
|
|
"epoch": 3.4540117416829745,
|
|
"grad_norm": 0.393226956942131,
|
|
"learning_rate": 2.3946066980528e-05,
|
|
"loss": 0.0977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08800017833709717,
|
|
"step": 1765,
|
|
"valid_targets_mean": 2783.4,
|
|
"valid_targets_min": 588
|
|
},
|
|
{
|
|
"epoch": 3.4637964774951078,
|
|
"grad_norm": 0.4130522975010669,
|
|
"learning_rate": 2.3850343560922008e-05,
|
|
"loss": 0.0951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09055754542350769,
|
|
"step": 1770,
|
|
"valid_targets_mean": 2447.2,
|
|
"valid_targets_min": 647
|
|
},
|
|
{
|
|
"epoch": 3.4735812133072406,
|
|
"grad_norm": 0.37842294460082415,
|
|
"learning_rate": 2.37545284567103e-05,
|
|
"loss": 0.0948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0931730717420578,
|
|
"step": 1775,
|
|
"valid_targets_mean": 3382.8,
|
|
"valid_targets_min": 697
|
|
},
|
|
{
|
|
"epoch": 3.483365949119374,
|
|
"grad_norm": 0.39858769418626966,
|
|
"learning_rate": 2.3658623949447735e-05,
|
|
"loss": 0.0901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09716151654720306,
|
|
"step": 1780,
|
|
"valid_targets_mean": 2783.8,
|
|
"valid_targets_min": 699
|
|
},
|
|
{
|
|
"epoch": 3.493150684931507,
|
|
"grad_norm": 0.3587757194116961,
|
|
"learning_rate": 2.3562632322818047e-05,
|
|
"loss": 0.0864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08153725415468216,
|
|
"step": 1785,
|
|
"valid_targets_mean": 3316.6,
|
|
"valid_targets_min": 784
|
|
},
|
|
{
|
|
"epoch": 3.50293542074364,
|
|
"grad_norm": 0.5073856878677895,
|
|
"learning_rate": 2.346655586257945e-05,
|
|
"loss": 0.0975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12582001090049744,
|
|
"step": 1790,
|
|
"valid_targets_mean": 2520.9,
|
|
"valid_targets_min": 922
|
|
},
|
|
{
|
|
"epoch": 3.512720156555773,
|
|
"grad_norm": 0.3824304763924219,
|
|
"learning_rate": 2.3370396856510228e-05,
|
|
"loss": 0.0915,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09068600833415985,
|
|
"step": 1795,
|
|
"valid_targets_mean": 3130.3,
|
|
"valid_targets_min": 1082
|
|
},
|
|
{
|
|
"epoch": 3.5225048923679063,
|
|
"grad_norm": 0.33517308415172126,
|
|
"learning_rate": 2.3274157594354244e-05,
|
|
"loss": 0.1002,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07240046560764313,
|
|
"step": 1800,
|
|
"valid_targets_mean": 3421.6,
|
|
"valid_targets_min": 1003
|
|
},
|
|
{
|
|
"epoch": 3.532289628180039,
|
|
"grad_norm": 0.4151492239970627,
|
|
"learning_rate": 2.317784036776643e-05,
|
|
"loss": 0.095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10990441590547562,
|
|
"step": 1805,
|
|
"valid_targets_mean": 2744.4,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 3.5420743639921723,
|
|
"grad_norm": 0.3897935534208523,
|
|
"learning_rate": 2.308144747025822e-05,
|
|
"loss": 0.1068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09307827055454254,
|
|
"step": 1810,
|
|
"valid_targets_mean": 3265.9,
|
|
"valid_targets_min": 756
|
|
},
|
|
{
|
|
"epoch": 3.551859099804305,
|
|
"grad_norm": 0.43581519464806656,
|
|
"learning_rate": 2.2984981197142903e-05,
|
|
"loss": 0.0993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1167551726102829,
|
|
"step": 1815,
|
|
"valid_targets_mean": 2591.2,
|
|
"valid_targets_min": 836
|
|
},
|
|
{
|
|
"epoch": 3.5616438356164384,
|
|
"grad_norm": 0.386659833532582,
|
|
"learning_rate": 2.2888443845481014e-05,
|
|
"loss": 0.0933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09008432179689407,
|
|
"step": 1820,
|
|
"valid_targets_mean": 3245.9,
|
|
"valid_targets_min": 856
|
|
},
|
|
{
|
|
"epoch": 3.571428571428571,
|
|
"grad_norm": 0.3543818030871814,
|
|
"learning_rate": 2.2791837714025605e-05,
|
|
"loss": 0.0932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09043369442224503,
|
|
"step": 1825,
|
|
"valid_targets_mean": 3304.0,
|
|
"valid_targets_min": 800
|
|
},
|
|
{
|
|
"epoch": 3.5812133072407044,
|
|
"grad_norm": 0.438729662155011,
|
|
"learning_rate": 2.269516510316753e-05,
|
|
"loss": 0.1043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09659003466367722,
|
|
"step": 1830,
|
|
"valid_targets_mean": 2356.2,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 3.5909980430528377,
|
|
"grad_norm": 0.4367444363023162,
|
|
"learning_rate": 2.2598428314880626e-05,
|
|
"loss": 0.1196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1389397382736206,
|
|
"step": 1835,
|
|
"valid_targets_mean": 2930.7,
|
|
"valid_targets_min": 729
|
|
},
|
|
{
|
|
"epoch": 3.6007827788649704,
|
|
"grad_norm": 0.3533335852440453,
|
|
"learning_rate": 2.250162965266696e-05,
|
|
"loss": 0.0959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08077824115753174,
|
|
"step": 1840,
|
|
"valid_targets_mean": 3471.1,
|
|
"valid_targets_min": 712
|
|
},
|
|
{
|
|
"epoch": 3.6105675146771037,
|
|
"grad_norm": 0.412577131838221,
|
|
"learning_rate": 2.2404771421501924e-05,
|
|
"loss": 0.1041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12000207602977753,
|
|
"step": 1845,
|
|
"valid_targets_mean": 3292.5,
|
|
"valid_targets_min": 774
|
|
},
|
|
{
|
|
"epoch": 3.620352250489237,
|
|
"grad_norm": 0.34314515066987156,
|
|
"learning_rate": 2.230785592777938e-05,
|
|
"loss": 0.1096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0764012485742569,
|
|
"step": 1850,
|
|
"valid_targets_mean": 3464.9,
|
|
"valid_targets_min": 664
|
|
},
|
|
{
|
|
"epoch": 3.6301369863013697,
|
|
"grad_norm": 0.5021484175858066,
|
|
"learning_rate": 2.2210885479256713e-05,
|
|
"loss": 0.101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0931086540222168,
|
|
"step": 1855,
|
|
"valid_targets_mean": 2372.5,
|
|
"valid_targets_min": 767
|
|
},
|
|
{
|
|
"epoch": 3.639921722113503,
|
|
"grad_norm": 0.418925470817628,
|
|
"learning_rate": 2.2113862384999908e-05,
|
|
"loss": 0.0962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09805671125650406,
|
|
"step": 1860,
|
|
"valid_targets_mean": 2864.6,
|
|
"valid_targets_min": 575
|
|
},
|
|
{
|
|
"epoch": 3.649706457925636,
|
|
"grad_norm": 0.40788258413865075,
|
|
"learning_rate": 2.2016788955328555e-05,
|
|
"loss": 0.0938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08214108645915985,
|
|
"step": 1865,
|
|
"valid_targets_mean": 3143.8,
|
|
"valid_targets_min": 680
|
|
},
|
|
{
|
|
"epoch": 3.659491193737769,
|
|
"grad_norm": 0.39469198310409365,
|
|
"learning_rate": 2.191966750176083e-05,
|
|
"loss": 0.0949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09547490626573563,
|
|
"step": 1870,
|
|
"valid_targets_mean": 3034.0,
|
|
"valid_targets_min": 333
|
|
},
|
|
{
|
|
"epoch": 3.6692759295499022,
|
|
"grad_norm": 0.45914037172512734,
|
|
"learning_rate": 2.182250033695846e-05,
|
|
"loss": 0.1033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11296926438808441,
|
|
"step": 1875,
|
|
"valid_targets_mean": 2711.8,
|
|
"valid_targets_min": 569
|
|
},
|
|
{
|
|
"epoch": 3.6790606653620355,
|
|
"grad_norm": 0.3475562297830893,
|
|
"learning_rate": 2.1725289774671638e-05,
|
|
"loss": 0.0995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07544345408678055,
|
|
"step": 1880,
|
|
"valid_targets_mean": 3378.5,
|
|
"valid_targets_min": 538
|
|
},
|
|
{
|
|
"epoch": 3.6888454011741683,
|
|
"grad_norm": 0.3326326001204827,
|
|
"learning_rate": 2.1628038129683957e-05,
|
|
"loss": 0.0934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0732976496219635,
|
|
"step": 1885,
|
|
"valid_targets_mean": 3177.5,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 3.6986301369863015,
|
|
"grad_norm": 0.4293275842467327,
|
|
"learning_rate": 2.1530747717757263e-05,
|
|
"loss": 0.1007,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1031743511557579,
|
|
"step": 1890,
|
|
"valid_targets_mean": 3229.9,
|
|
"valid_targets_min": 802
|
|
},
|
|
{
|
|
"epoch": 3.7084148727984343,
|
|
"grad_norm": 0.4391241516655618,
|
|
"learning_rate": 2.1433420855576527e-05,
|
|
"loss": 0.1078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11853335797786713,
|
|
"step": 1895,
|
|
"valid_targets_mean": 2822.1,
|
|
"valid_targets_min": 502
|
|
},
|
|
{
|
|
"epoch": 3.7181996086105675,
|
|
"grad_norm": 0.33615772425302315,
|
|
"learning_rate": 2.1336059860694672e-05,
|
|
"loss": 0.0838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07403817027807236,
|
|
"step": 1900,
|
|
"valid_targets_mean": 3502.2,
|
|
"valid_targets_min": 715
|
|
},
|
|
{
|
|
"epoch": 3.7279843444227003,
|
|
"grad_norm": 0.4432964958725013,
|
|
"learning_rate": 2.12386670514774e-05,
|
|
"loss": 0.1045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12059329450130463,
|
|
"step": 1905,
|
|
"valid_targets_mean": 2732.8,
|
|
"valid_targets_min": 1022
|
|
},
|
|
{
|
|
"epoch": 3.7377690802348336,
|
|
"grad_norm": 0.4070557595034096,
|
|
"learning_rate": 2.1141244747047966e-05,
|
|
"loss": 0.0951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1020825058221817,
|
|
"step": 1910,
|
|
"valid_targets_mean": 2716.7,
|
|
"valid_targets_min": 727
|
|
},
|
|
{
|
|
"epoch": 3.747553816046967,
|
|
"grad_norm": 0.457268061002794,
|
|
"learning_rate": 2.1043795267231966e-05,
|
|
"loss": 0.0961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11039022356271744,
|
|
"step": 1915,
|
|
"valid_targets_mean": 2666.3,
|
|
"valid_targets_min": 503
|
|
},
|
|
{
|
|
"epoch": 3.7573385518590996,
|
|
"grad_norm": 0.4136493991453706,
|
|
"learning_rate": 2.094632093250212e-05,
|
|
"loss": 0.091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08572959899902344,
|
|
"step": 1920,
|
|
"valid_targets_mean": 3254.5,
|
|
"valid_targets_min": 583
|
|
},
|
|
{
|
|
"epoch": 3.767123287671233,
|
|
"grad_norm": 0.32806235850432924,
|
|
"learning_rate": 2.084882406392297e-05,
|
|
"loss": 0.0888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07594846934080124,
|
|
"step": 1925,
|
|
"valid_targets_mean": 3780.8,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 3.776908023483366,
|
|
"grad_norm": 0.45759987074773256,
|
|
"learning_rate": 2.0751306983095643e-05,
|
|
"loss": 0.0906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12067622691392899,
|
|
"step": 1930,
|
|
"valid_targets_mean": 2477.0,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 3.786692759295499,
|
|
"grad_norm": 0.3576601656361029,
|
|
"learning_rate": 2.065377201210256e-05,
|
|
"loss": 0.0958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08292445540428162,
|
|
"step": 1935,
|
|
"valid_targets_mean": 3148.9,
|
|
"valid_targets_min": 776
|
|
},
|
|
{
|
|
"epoch": 3.796477495107632,
|
|
"grad_norm": 0.3888288020581319,
|
|
"learning_rate": 2.0556221473452148e-05,
|
|
"loss": 0.097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0956001952290535,
|
|
"step": 1940,
|
|
"valid_targets_mean": 3289.1,
|
|
"valid_targets_min": 791
|
|
},
|
|
{
|
|
"epoch": 3.8062622309197653,
|
|
"grad_norm": 0.3536271628449205,
|
|
"learning_rate": 2.0458657690023525e-05,
|
|
"loss": 0.1016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08662386238574982,
|
|
"step": 1945,
|
|
"valid_targets_mean": 3108.9,
|
|
"valid_targets_min": 696
|
|
},
|
|
{
|
|
"epoch": 3.816046966731898,
|
|
"grad_norm": 0.4445545144675986,
|
|
"learning_rate": 2.036108298501121e-05,
|
|
"loss": 0.0974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10066284239292145,
|
|
"step": 1950,
|
|
"valid_targets_mean": 2228.6,
|
|
"valid_targets_min": 525
|
|
},
|
|
{
|
|
"epoch": 3.8258317025440314,
|
|
"grad_norm": 0.3486495806571903,
|
|
"learning_rate": 2.026349968186977e-05,
|
|
"loss": 0.0796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08311164379119873,
|
|
"step": 1955,
|
|
"valid_targets_mean": 2806.6,
|
|
"valid_targets_min": 652
|
|
},
|
|
{
|
|
"epoch": 3.8356164383561646,
|
|
"grad_norm": 0.41762766327312373,
|
|
"learning_rate": 2.0165910104258518e-05,
|
|
"loss": 0.0952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09838832169771194,
|
|
"step": 1960,
|
|
"valid_targets_mean": 2575.5,
|
|
"valid_targets_min": 652
|
|
},
|
|
{
|
|
"epoch": 3.8454011741682974,
|
|
"grad_norm": 0.4116450919445935,
|
|
"learning_rate": 2.0068316575986185e-05,
|
|
"loss": 0.0977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09968436509370804,
|
|
"step": 1965,
|
|
"valid_targets_mean": 2753.2,
|
|
"valid_targets_min": 520
|
|
},
|
|
{
|
|
"epoch": 3.8551859099804306,
|
|
"grad_norm": 0.44362431242875416,
|
|
"learning_rate": 1.997072142095554e-05,
|
|
"loss": 0.0975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10917310416698456,
|
|
"step": 1970,
|
|
"valid_targets_mean": 2599.4,
|
|
"valid_targets_min": 567
|
|
},
|
|
{
|
|
"epoch": 3.8649706457925634,
|
|
"grad_norm": 0.36533007507160525,
|
|
"learning_rate": 1.9873126963108136e-05,
|
|
"loss": 0.0909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08436676114797592,
|
|
"step": 1975,
|
|
"valid_targets_mean": 2983.3,
|
|
"valid_targets_min": 697
|
|
},
|
|
{
|
|
"epoch": 3.8747553816046967,
|
|
"grad_norm": 0.3873930571500393,
|
|
"learning_rate": 1.9775535526368896e-05,
|
|
"loss": 0.1085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12226013839244843,
|
|
"step": 1980,
|
|
"valid_targets_mean": 3220.3,
|
|
"valid_targets_min": 757
|
|
},
|
|
{
|
|
"epoch": 3.8845401174168295,
|
|
"grad_norm": 0.3982111947251814,
|
|
"learning_rate": 1.9677949434590807e-05,
|
|
"loss": 0.0888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09624812752008438,
|
|
"step": 1985,
|
|
"valid_targets_mean": 3241.6,
|
|
"valid_targets_min": 759
|
|
},
|
|
{
|
|
"epoch": 3.8943248532289627,
|
|
"grad_norm": 0.4262008952175965,
|
|
"learning_rate": 1.9580371011499587e-05,
|
|
"loss": 0.099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0822620540857315,
|
|
"step": 1990,
|
|
"valid_targets_mean": 2368.4,
|
|
"valid_targets_min": 718
|
|
},
|
|
{
|
|
"epoch": 3.904109589041096,
|
|
"grad_norm": 0.4735174382000528,
|
|
"learning_rate": 1.9482802580638346e-05,
|
|
"loss": 0.0986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11047929525375366,
|
|
"step": 1995,
|
|
"valid_targets_mean": 2749.8,
|
|
"valid_targets_min": 745
|
|
},
|
|
{
|
|
"epoch": 3.9138943248532287,
|
|
"grad_norm": 0.44187199238337815,
|
|
"learning_rate": 1.9385246465312252e-05,
|
|
"loss": 0.1061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10588258504867554,
|
|
"step": 2000,
|
|
"valid_targets_mean": 2721.5,
|
|
"valid_targets_min": 711
|
|
},
|
|
{
|
|
"epoch": 3.923679060665362,
|
|
"grad_norm": 0.44168863210644604,
|
|
"learning_rate": 1.928770498853323e-05,
|
|
"loss": 0.0954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09524992108345032,
|
|
"step": 2005,
|
|
"valid_targets_mean": 3425.9,
|
|
"valid_targets_min": 939
|
|
},
|
|
{
|
|
"epoch": 3.9334637964774952,
|
|
"grad_norm": 0.37881242527643244,
|
|
"learning_rate": 1.9190180472964614e-05,
|
|
"loss": 0.0957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09194761514663696,
|
|
"step": 2010,
|
|
"valid_targets_mean": 2941.8,
|
|
"valid_targets_min": 585
|
|
},
|
|
{
|
|
"epoch": 3.943248532289628,
|
|
"grad_norm": 0.443444952622372,
|
|
"learning_rate": 1.9092675240865863e-05,
|
|
"loss": 0.0992,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1167190670967102,
|
|
"step": 2015,
|
|
"valid_targets_mean": 2865.1,
|
|
"valid_targets_min": 763
|
|
},
|
|
{
|
|
"epoch": 3.9530332681017613,
|
|
"grad_norm": 0.40398307970988073,
|
|
"learning_rate": 1.899519161403726e-05,
|
|
"loss": 0.1158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11057884991168976,
|
|
"step": 2020,
|
|
"valid_targets_mean": 2692.5,
|
|
"valid_targets_min": 540
|
|
},
|
|
{
|
|
"epoch": 3.9628180039138945,
|
|
"grad_norm": 0.35462175087471715,
|
|
"learning_rate": 1.8897731913764624e-05,
|
|
"loss": 0.0921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08501636981964111,
|
|
"step": 2025,
|
|
"valid_targets_mean": 3447.5,
|
|
"valid_targets_min": 623
|
|
},
|
|
{
|
|
"epoch": 3.9726027397260273,
|
|
"grad_norm": 0.40113304138042305,
|
|
"learning_rate": 1.880029846076402e-05,
|
|
"loss": 0.0969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0958636999130249,
|
|
"step": 2030,
|
|
"valid_targets_mean": 2927.4,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 3.9823874755381605,
|
|
"grad_norm": 0.3499095157228217,
|
|
"learning_rate": 1.8702893575126522e-05,
|
|
"loss": 0.0903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08162295818328857,
|
|
"step": 2035,
|
|
"valid_targets_mean": 3658.2,
|
|
"valid_targets_min": 579
|
|
},
|
|
{
|
|
"epoch": 3.9921722113502938,
|
|
"grad_norm": 0.389382700380517,
|
|
"learning_rate": 1.8605519576262957e-05,
|
|
"loss": 0.0915,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09373045712709427,
|
|
"step": 2040,
|
|
"valid_targets_mean": 3264.7,
|
|
"valid_targets_min": 771
|
|
},
|
|
{
|
|
"epoch": 4.001956947162427,
|
|
"grad_norm": 0.33347090417729486,
|
|
"learning_rate": 1.850817878284867e-05,
|
|
"loss": 0.0901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07215394079685211,
|
|
"step": 2045,
|
|
"valid_targets_mean": 3224.1,
|
|
"valid_targets_min": 739
|
|
},
|
|
{
|
|
"epoch": 4.011741682974559,
|
|
"grad_norm": 0.4279822957204113,
|
|
"learning_rate": 1.8410873512768318e-05,
|
|
"loss": 0.0842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10365812480449677,
|
|
"step": 2050,
|
|
"valid_targets_mean": 2869.9,
|
|
"valid_targets_min": 486
|
|
},
|
|
{
|
|
"epoch": 4.021526418786693,
|
|
"grad_norm": 0.5174724407207372,
|
|
"learning_rate": 1.831360608306068e-05,
|
|
"loss": 0.0794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09613613039255142,
|
|
"step": 2055,
|
|
"valid_targets_mean": 2579.1,
|
|
"valid_targets_min": 530
|
|
},
|
|
{
|
|
"epoch": 4.031311154598826,
|
|
"grad_norm": 0.4758726038547127,
|
|
"learning_rate": 1.8216378809863465e-05,
|
|
"loss": 0.0751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08645018935203552,
|
|
"step": 2060,
|
|
"valid_targets_mean": 2447.8,
|
|
"valid_targets_min": 644
|
|
},
|
|
{
|
|
"epoch": 4.041095890410959,
|
|
"grad_norm": 0.4569397341530332,
|
|
"learning_rate": 1.8119194008358187e-05,
|
|
"loss": 0.0847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1112334281206131,
|
|
"step": 2065,
|
|
"valid_targets_mean": 3251.8,
|
|
"valid_targets_min": 876
|
|
},
|
|
{
|
|
"epoch": 4.050880626223092,
|
|
"grad_norm": 0.40211824561355514,
|
|
"learning_rate": 1.8022053992715007e-05,
|
|
"loss": 0.0798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08205370604991913,
|
|
"step": 2070,
|
|
"valid_targets_mean": 3019.1,
|
|
"valid_targets_min": 588
|
|
},
|
|
{
|
|
"epoch": 4.060665362035225,
|
|
"grad_norm": 0.43322593298423395,
|
|
"learning_rate": 1.7924961076037655e-05,
|
|
"loss": 0.0825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07359033077955246,
|
|
"step": 2075,
|
|
"valid_targets_mean": 2844.8,
|
|
"valid_targets_min": 546
|
|
},
|
|
{
|
|
"epoch": 4.070450097847358,
|
|
"grad_norm": 0.4406952972720065,
|
|
"learning_rate": 1.782791757030833e-05,
|
|
"loss": 0.0892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08964337408542633,
|
|
"step": 2080,
|
|
"valid_targets_mean": 2993.1,
|
|
"valid_targets_min": 704
|
|
},
|
|
{
|
|
"epoch": 4.080234833659492,
|
|
"grad_norm": 0.41185891067535985,
|
|
"learning_rate": 1.773092578633265e-05,
|
|
"loss": 0.0948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10361534357070923,
|
|
"step": 2085,
|
|
"valid_targets_mean": 3335.3,
|
|
"valid_targets_min": 804
|
|
},
|
|
{
|
|
"epoch": 4.090019569471624,
|
|
"grad_norm": 0.45078174424393247,
|
|
"learning_rate": 1.763398803368466e-05,
|
|
"loss": 0.1072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10514619201421738,
|
|
"step": 2090,
|
|
"valid_targets_mean": 2871.2,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 4.099804305283757,
|
|
"grad_norm": 0.6106485028695341,
|
|
"learning_rate": 1.7537106620651766e-05,
|
|
"loss": 0.0962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10618409514427185,
|
|
"step": 2095,
|
|
"valid_targets_mean": 1862.8,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 4.109589041095891,
|
|
"grad_norm": 0.33022730463004335,
|
|
"learning_rate": 1.7440283854179842e-05,
|
|
"loss": 0.0849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06401461362838745,
|
|
"step": 2100,
|
|
"valid_targets_mean": 3686.1,
|
|
"valid_targets_min": 920
|
|
},
|
|
{
|
|
"epoch": 4.119373776908024,
|
|
"grad_norm": 0.4126400761141411,
|
|
"learning_rate": 1.7343522039818258e-05,
|
|
"loss": 0.0827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07285851240158081,
|
|
"step": 2105,
|
|
"valid_targets_mean": 2948.2,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 4.129158512720156,
|
|
"grad_norm": 0.4086389799740897,
|
|
"learning_rate": 1.724682348166498e-05,
|
|
"loss": 0.0901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07260704785585403,
|
|
"step": 2110,
|
|
"valid_targets_mean": 2825.8,
|
|
"valid_targets_min": 721
|
|
},
|
|
{
|
|
"epoch": 4.138943248532289,
|
|
"grad_norm": 0.46286261572756504,
|
|
"learning_rate": 1.7150190482311727e-05,
|
|
"loss": 0.0788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07903758436441422,
|
|
"step": 2115,
|
|
"valid_targets_mean": 2571.9,
|
|
"valid_targets_min": 628
|
|
},
|
|
{
|
|
"epoch": 4.148727984344423,
|
|
"grad_norm": 0.4020407338776383,
|
|
"learning_rate": 1.705362534278912e-05,
|
|
"loss": 0.0769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08354012668132782,
|
|
"step": 2120,
|
|
"valid_targets_mean": 3056.5,
|
|
"valid_targets_min": 540
|
|
},
|
|
{
|
|
"epoch": 4.158512720156556,
|
|
"grad_norm": 0.39781399587220173,
|
|
"learning_rate": 1.6957130362511907e-05,
|
|
"loss": 0.0833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07108549028635025,
|
|
"step": 2125,
|
|
"valid_targets_mean": 2955.3,
|
|
"valid_targets_min": 696
|
|
},
|
|
{
|
|
"epoch": 4.1682974559686885,
|
|
"grad_norm": 0.41645999440413334,
|
|
"learning_rate": 1.6860707839224183e-05,
|
|
"loss": 0.0888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08625232428312302,
|
|
"step": 2130,
|
|
"valid_targets_mean": 2813.1,
|
|
"valid_targets_min": 697
|
|
},
|
|
{
|
|
"epoch": 4.178082191780822,
|
|
"grad_norm": 0.4644596872947445,
|
|
"learning_rate": 1.6764360068944706e-05,
|
|
"loss": 0.0758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09085105359554291,
|
|
"step": 2135,
|
|
"valid_targets_mean": 2999.6,
|
|
"valid_targets_min": 669
|
|
},
|
|
{
|
|
"epoch": 4.187866927592955,
|
|
"grad_norm": 0.3706643100998044,
|
|
"learning_rate": 1.6668089345912217e-05,
|
|
"loss": 0.0955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07670444250106812,
|
|
"step": 2140,
|
|
"valid_targets_mean": 2918.2,
|
|
"valid_targets_min": 724
|
|
},
|
|
{
|
|
"epoch": 4.197651663405088,
|
|
"grad_norm": 0.47220192026093816,
|
|
"learning_rate": 1.6571897962530784e-05,
|
|
"loss": 0.0855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08821834623813629,
|
|
"step": 2145,
|
|
"valid_targets_mean": 2610.6,
|
|
"valid_targets_min": 647
|
|
},
|
|
{
|
|
"epoch": 4.2074363992172215,
|
|
"grad_norm": 0.405018454979078,
|
|
"learning_rate": 1.6475788209315248e-05,
|
|
"loss": 0.0825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09472696483135223,
|
|
"step": 2150,
|
|
"valid_targets_mean": 3579.1,
|
|
"valid_targets_min": 912
|
|
},
|
|
{
|
|
"epoch": 4.217221135029354,
|
|
"grad_norm": 0.44603853536731675,
|
|
"learning_rate": 1.6379762374836665e-05,
|
|
"loss": 0.075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08254195749759674,
|
|
"step": 2155,
|
|
"valid_targets_mean": 3158.2,
|
|
"valid_targets_min": 246
|
|
},
|
|
{
|
|
"epoch": 4.227005870841487,
|
|
"grad_norm": 0.470220253122075,
|
|
"learning_rate": 1.6283822745667818e-05,
|
|
"loss": 0.0921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1383131593465805,
|
|
"step": 2160,
|
|
"valid_targets_mean": 2484.4,
|
|
"valid_targets_min": 889
|
|
},
|
|
{
|
|
"epoch": 4.236790606653621,
|
|
"grad_norm": 0.41664049920215673,
|
|
"learning_rate": 1.6187971606328744e-05,
|
|
"loss": 0.0843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08246999979019165,
|
|
"step": 2165,
|
|
"valid_targets_mean": 3189.9,
|
|
"valid_targets_min": 509
|
|
},
|
|
{
|
|
"epoch": 4.2465753424657535,
|
|
"grad_norm": 0.4177172165571193,
|
|
"learning_rate": 1.609221123923239e-05,
|
|
"loss": 0.0805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07430468499660492,
|
|
"step": 2170,
|
|
"valid_targets_mean": 2448.2,
|
|
"valid_targets_min": 796
|
|
},
|
|
{
|
|
"epoch": 4.256360078277886,
|
|
"grad_norm": 0.3659569689790677,
|
|
"learning_rate": 1.5996543924630197e-05,
|
|
"loss": 0.0763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06855691969394684,
|
|
"step": 2175,
|
|
"valid_targets_mean": 3914.1,
|
|
"valid_targets_min": 800
|
|
},
|
|
{
|
|
"epoch": 4.266144814090019,
|
|
"grad_norm": 0.3928665815103277,
|
|
"learning_rate": 1.590097194055786e-05,
|
|
"loss": 0.078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07608282566070557,
|
|
"step": 2180,
|
|
"valid_targets_mean": 3281.5,
|
|
"valid_targets_min": 818
|
|
},
|
|
{
|
|
"epoch": 4.275929549902153,
|
|
"grad_norm": 0.38118551530157957,
|
|
"learning_rate": 1.5805497562781018e-05,
|
|
"loss": 0.0835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08345082402229309,
|
|
"step": 2185,
|
|
"valid_targets_mean": 3308.7,
|
|
"valid_targets_min": 680
|
|
},
|
|
{
|
|
"epoch": 4.285714285714286,
|
|
"grad_norm": 0.3140135252262881,
|
|
"learning_rate": 1.571012306474115e-05,
|
|
"loss": 0.0825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0600694976747036,
|
|
"step": 2190,
|
|
"valid_targets_mean": 4122.0,
|
|
"valid_targets_min": 1031
|
|
},
|
|
{
|
|
"epoch": 4.295499021526418,
|
|
"grad_norm": 0.36985203539245803,
|
|
"learning_rate": 1.5614850717501374e-05,
|
|
"loss": 0.0762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06799148768186569,
|
|
"step": 2195,
|
|
"valid_targets_mean": 3213.6,
|
|
"valid_targets_min": 447
|
|
},
|
|
{
|
|
"epoch": 4.305283757338552,
|
|
"grad_norm": 0.49624335733136077,
|
|
"learning_rate": 1.5519682789692375e-05,
|
|
"loss": 0.0819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0989300012588501,
|
|
"step": 2200,
|
|
"valid_targets_mean": 2505.1,
|
|
"valid_targets_min": 776
|
|
},
|
|
{
|
|
"epoch": 4.315068493150685,
|
|
"grad_norm": 0.5817276658624355,
|
|
"learning_rate": 1.5424621547458416e-05,
|
|
"loss": 0.0882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08102203160524368,
|
|
"step": 2205,
|
|
"valid_targets_mean": 2599.9,
|
|
"valid_targets_min": 716
|
|
},
|
|
{
|
|
"epoch": 4.324853228962818,
|
|
"grad_norm": 0.40804741152655644,
|
|
"learning_rate": 1.5329669254403342e-05,
|
|
"loss": 0.0822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09205059707164764,
|
|
"step": 2210,
|
|
"valid_targets_mean": 3102.4,
|
|
"valid_targets_min": 712
|
|
},
|
|
{
|
|
"epoch": 4.334637964774951,
|
|
"grad_norm": 0.3994952601025495,
|
|
"learning_rate": 1.5234828171536705e-05,
|
|
"loss": 0.079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06924515962600708,
|
|
"step": 2215,
|
|
"valid_targets_mean": 2688.7,
|
|
"valid_targets_min": 730
|
|
},
|
|
{
|
|
"epoch": 4.344422700587084,
|
|
"grad_norm": 0.43795940048070464,
|
|
"learning_rate": 1.5140100557219902e-05,
|
|
"loss": 0.0885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08188264071941376,
|
|
"step": 2220,
|
|
"valid_targets_mean": 3017.8,
|
|
"valid_targets_min": 542
|
|
},
|
|
{
|
|
"epoch": 4.354207436399217,
|
|
"grad_norm": 0.39285934151052326,
|
|
"learning_rate": 1.5045488667112421e-05,
|
|
"loss": 0.083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08715667575597763,
|
|
"step": 2225,
|
|
"valid_targets_mean": 3435.9,
|
|
"valid_targets_min": 810
|
|
},
|
|
{
|
|
"epoch": 4.363992172211351,
|
|
"grad_norm": 0.3927584507586724,
|
|
"learning_rate": 1.4950994754118115e-05,
|
|
"loss": 0.0818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07635986059904099,
|
|
"step": 2230,
|
|
"valid_targets_mean": 3151.9,
|
|
"valid_targets_min": 904
|
|
},
|
|
{
|
|
"epoch": 4.373776908023483,
|
|
"grad_norm": 0.40139400189745017,
|
|
"learning_rate": 1.4856621068331557e-05,
|
|
"loss": 0.0857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07851336896419525,
|
|
"step": 2235,
|
|
"valid_targets_mean": 3499.4,
|
|
"valid_targets_min": 703
|
|
},
|
|
{
|
|
"epoch": 4.383561643835616,
|
|
"grad_norm": 0.5472252641844141,
|
|
"learning_rate": 1.4762369856984466e-05,
|
|
"loss": 0.0893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11168396472930908,
|
|
"step": 2240,
|
|
"valid_targets_mean": 2343.6,
|
|
"valid_targets_min": 541
|
|
},
|
|
{
|
|
"epoch": 4.39334637964775,
|
|
"grad_norm": 0.44988499850357433,
|
|
"learning_rate": 1.4668243364392187e-05,
|
|
"loss": 0.0853,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08482237160205841,
|
|
"step": 2245,
|
|
"valid_targets_mean": 2604.9,
|
|
"valid_targets_min": 541
|
|
},
|
|
{
|
|
"epoch": 4.403131115459883,
|
|
"grad_norm": 0.40278145909096286,
|
|
"learning_rate": 1.4574243831900252e-05,
|
|
"loss": 0.0832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08804011344909668,
|
|
"step": 2250,
|
|
"valid_targets_mean": 2919.4,
|
|
"valid_targets_min": 623
|
|
},
|
|
{
|
|
"epoch": 4.4129158512720155,
|
|
"grad_norm": 0.4283698076896282,
|
|
"learning_rate": 1.4480373497831034e-05,
|
|
"loss": 0.0846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08546335250139236,
|
|
"step": 2255,
|
|
"valid_targets_mean": 3130.6,
|
|
"valid_targets_min": 598
|
|
},
|
|
{
|
|
"epoch": 4.422700587084149,
|
|
"grad_norm": 0.42452807333687326,
|
|
"learning_rate": 1.4386634597430402e-05,
|
|
"loss": 0.0862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0829804390668869,
|
|
"step": 2260,
|
|
"valid_targets_mean": 3186.8,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 4.432485322896282,
|
|
"grad_norm": 0.5080896782394324,
|
|
"learning_rate": 1.4293029362814521e-05,
|
|
"loss": 0.087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08919325470924377,
|
|
"step": 2265,
|
|
"valid_targets_mean": 3136.6,
|
|
"valid_targets_min": 608
|
|
},
|
|
{
|
|
"epoch": 4.442270058708415,
|
|
"grad_norm": 0.3654051449018453,
|
|
"learning_rate": 1.4199560022916701e-05,
|
|
"loss": 0.0802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.061577536165714264,
|
|
"step": 2270,
|
|
"valid_targets_mean": 3570.6,
|
|
"valid_targets_min": 1093
|
|
},
|
|
{
|
|
"epoch": 4.4520547945205475,
|
|
"grad_norm": 0.4071984690405413,
|
|
"learning_rate": 1.4106228803434329e-05,
|
|
"loss": 0.0911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08632341772317886,
|
|
"step": 2275,
|
|
"valid_targets_mean": 3279.2,
|
|
"valid_targets_min": 784
|
|
},
|
|
{
|
|
"epoch": 4.461839530332681,
|
|
"grad_norm": 0.5465175072506565,
|
|
"learning_rate": 1.4013037926775857e-05,
|
|
"loss": 0.0942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11121866852045059,
|
|
"step": 2280,
|
|
"valid_targets_mean": 2290.5,
|
|
"valid_targets_min": 759
|
|
},
|
|
{
|
|
"epoch": 4.471624266144814,
|
|
"grad_norm": 0.4545421717381925,
|
|
"learning_rate": 1.3919989612007873e-05,
|
|
"loss": 0.0863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09390892088413239,
|
|
"step": 2285,
|
|
"valid_targets_mean": 2565.9,
|
|
"valid_targets_min": 357
|
|
},
|
|
{
|
|
"epoch": 4.481409001956947,
|
|
"grad_norm": 0.4744600265214128,
|
|
"learning_rate": 1.3827086074802288e-05,
|
|
"loss": 0.0904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09152168780565262,
|
|
"step": 2290,
|
|
"valid_targets_mean": 3018.2,
|
|
"valid_targets_min": 840
|
|
},
|
|
{
|
|
"epoch": 4.4911937377690805,
|
|
"grad_norm": 0.37848480874679646,
|
|
"learning_rate": 1.373432952738355e-05,
|
|
"loss": 0.0887,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07510063052177429,
|
|
"step": 2295,
|
|
"valid_targets_mean": 3468.9,
|
|
"valid_targets_min": 947
|
|
},
|
|
{
|
|
"epoch": 4.500978473581213,
|
|
"grad_norm": 0.41100746477510236,
|
|
"learning_rate": 1.3641722178475982e-05,
|
|
"loss": 0.0882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07598906755447388,
|
|
"step": 2300,
|
|
"valid_targets_mean": 3540.9,
|
|
"valid_targets_min": 743
|
|
},
|
|
{
|
|
"epoch": 4.510763209393346,
|
|
"grad_norm": 0.4031977134950344,
|
|
"learning_rate": 1.354926623325119e-05,
|
|
"loss": 0.0816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08031619340181351,
|
|
"step": 2305,
|
|
"valid_targets_mean": 3444.3,
|
|
"valid_targets_min": 763
|
|
},
|
|
{
|
|
"epoch": 4.52054794520548,
|
|
"grad_norm": 0.3907727106261878,
|
|
"learning_rate": 1.3456963893275541e-05,
|
|
"loss": 0.0894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0799025148153305,
|
|
"step": 2310,
|
|
"valid_targets_mean": 3235.8,
|
|
"valid_targets_min": 785
|
|
},
|
|
{
|
|
"epoch": 4.5303326810176126,
|
|
"grad_norm": 0.41542931620683693,
|
|
"learning_rate": 1.3364817356457739e-05,
|
|
"loss": 0.0838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0947611927986145,
|
|
"step": 2315,
|
|
"valid_targets_mean": 3364.6,
|
|
"valid_targets_min": 929
|
|
},
|
|
{
|
|
"epoch": 4.540117416829745,
|
|
"grad_norm": 0.47481824122948996,
|
|
"learning_rate": 1.3272828816996498e-05,
|
|
"loss": 0.0908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11458331346511841,
|
|
"step": 2320,
|
|
"valid_targets_mean": 2688.8,
|
|
"valid_targets_min": 730
|
|
},
|
|
{
|
|
"epoch": 4.549902152641879,
|
|
"grad_norm": 0.4169815831506829,
|
|
"learning_rate": 1.3181000465328298e-05,
|
|
"loss": 0.0877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07972556352615356,
|
|
"step": 2325,
|
|
"valid_targets_mean": 2968.4,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 4.559686888454012,
|
|
"grad_norm": 0.42758434189152333,
|
|
"learning_rate": 1.3089334488075207e-05,
|
|
"loss": 0.1022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09930993616580963,
|
|
"step": 2330,
|
|
"valid_targets_mean": 2937.4,
|
|
"valid_targets_min": 515
|
|
},
|
|
{
|
|
"epoch": 4.569471624266145,
|
|
"grad_norm": 0.4036679246953518,
|
|
"learning_rate": 1.2997833067992827e-05,
|
|
"loss": 0.0821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08978946506977081,
|
|
"step": 2335,
|
|
"valid_targets_mean": 3032.6,
|
|
"valid_targets_min": 612
|
|
},
|
|
{
|
|
"epoch": 4.579256360078277,
|
|
"grad_norm": 0.42899233045163393,
|
|
"learning_rate": 1.2906498383918327e-05,
|
|
"loss": 0.0847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09562341123819351,
|
|
"step": 2340,
|
|
"valid_targets_mean": 2699.2,
|
|
"valid_targets_min": 509
|
|
},
|
|
{
|
|
"epoch": 4.589041095890411,
|
|
"grad_norm": 0.527541585974065,
|
|
"learning_rate": 1.2815332610718522e-05,
|
|
"loss": 0.087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12526696920394897,
|
|
"step": 2345,
|
|
"valid_targets_mean": 2469.9,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 4.598825831702544,
|
|
"grad_norm": 0.46389236440630605,
|
|
"learning_rate": 1.2724337919238133e-05,
|
|
"loss": 0.0892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09370893239974976,
|
|
"step": 2350,
|
|
"valid_targets_mean": 2425.7,
|
|
"valid_targets_min": 538
|
|
},
|
|
{
|
|
"epoch": 4.608610567514678,
|
|
"grad_norm": 0.4627174077448799,
|
|
"learning_rate": 1.2633516476248067e-05,
|
|
"loss": 0.0863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08172088861465454,
|
|
"step": 2355,
|
|
"valid_targets_mean": 2239.6,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 4.61839530332681,
|
|
"grad_norm": 0.4273871422554128,
|
|
"learning_rate": 1.2542870444393831e-05,
|
|
"loss": 0.0879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07641121745109558,
|
|
"step": 2360,
|
|
"valid_targets_mean": 2943.8,
|
|
"valid_targets_min": 705
|
|
},
|
|
{
|
|
"epoch": 4.628180039138943,
|
|
"grad_norm": 0.45482968385454625,
|
|
"learning_rate": 1.2452401982144027e-05,
|
|
"loss": 0.0848,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07942764461040497,
|
|
"step": 2365,
|
|
"valid_targets_mean": 2497.5,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 4.637964774951076,
|
|
"grad_norm": 0.3831987122114139,
|
|
"learning_rate": 1.2362113243738957e-05,
|
|
"loss": 0.0849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06884678453207016,
|
|
"step": 2370,
|
|
"valid_targets_mean": 3136.9,
|
|
"valid_targets_min": 652
|
|
},
|
|
{
|
|
"epoch": 4.64774951076321,
|
|
"grad_norm": 0.3426993876504314,
|
|
"learning_rate": 1.227200637913934e-05,
|
|
"loss": 0.0775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06089651957154274,
|
|
"step": 2375,
|
|
"valid_targets_mean": 3657.6,
|
|
"valid_targets_min": 1169
|
|
},
|
|
{
|
|
"epoch": 4.657534246575342,
|
|
"grad_norm": 0.36555463416806805,
|
|
"learning_rate": 1.2182083533975093e-05,
|
|
"loss": 0.0741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07065147161483765,
|
|
"step": 2380,
|
|
"valid_targets_mean": 3312.8,
|
|
"valid_targets_min": 939
|
|
},
|
|
{
|
|
"epoch": 4.667318982387475,
|
|
"grad_norm": 0.49916195706518923,
|
|
"learning_rate": 1.2092346849494249e-05,
|
|
"loss": 0.0932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1260618418455124,
|
|
"step": 2385,
|
|
"valid_targets_mean": 2458.0,
|
|
"valid_targets_min": 819
|
|
},
|
|
{
|
|
"epoch": 4.677103718199609,
|
|
"grad_norm": 0.4401026984347728,
|
|
"learning_rate": 1.2002798462511991e-05,
|
|
"loss": 0.0948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08726265281438828,
|
|
"step": 2390,
|
|
"valid_targets_mean": 2878.9,
|
|
"valid_targets_min": 699
|
|
},
|
|
{
|
|
"epoch": 4.686888454011742,
|
|
"grad_norm": 0.41329131996894747,
|
|
"learning_rate": 1.1913440505359737e-05,
|
|
"loss": 0.0888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09846547991037369,
|
|
"step": 2395,
|
|
"valid_targets_mean": 2795.2,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 4.6966731898238745,
|
|
"grad_norm": 0.40015455540562234,
|
|
"learning_rate": 1.1824275105834376e-05,
|
|
"loss": 0.0955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09469325095415115,
|
|
"step": 2400,
|
|
"valid_targets_mean": 2817.9,
|
|
"valid_targets_min": 760
|
|
},
|
|
{
|
|
"epoch": 4.706457925636008,
|
|
"grad_norm": 0.3638478848724825,
|
|
"learning_rate": 1.173530438714761e-05,
|
|
"loss": 0.0854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0795590728521347,
|
|
"step": 2405,
|
|
"valid_targets_mean": 3600.8,
|
|
"valid_targets_min": 792
|
|
},
|
|
{
|
|
"epoch": 4.716242661448141,
|
|
"grad_norm": 0.3610144363902236,
|
|
"learning_rate": 1.1646530467875384e-05,
|
|
"loss": 0.0795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06894271075725555,
|
|
"step": 2410,
|
|
"valid_targets_mean": 2944.6,
|
|
"valid_targets_min": 708
|
|
},
|
|
{
|
|
"epoch": 4.726027397260274,
|
|
"grad_norm": 0.4720220662855206,
|
|
"learning_rate": 1.1557955461907468e-05,
|
|
"loss": 0.0813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09234504401683807,
|
|
"step": 2415,
|
|
"valid_targets_mean": 2605.2,
|
|
"valid_targets_min": 839
|
|
},
|
|
{
|
|
"epoch": 4.7358121330724074,
|
|
"grad_norm": 0.4231419000389196,
|
|
"learning_rate": 1.1469581478397044e-05,
|
|
"loss": 0.0793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07564327120780945,
|
|
"step": 2420,
|
|
"valid_targets_mean": 3238.0,
|
|
"valid_targets_min": 531
|
|
},
|
|
{
|
|
"epoch": 4.74559686888454,
|
|
"grad_norm": 0.5160752220286023,
|
|
"learning_rate": 1.1381410621710582e-05,
|
|
"loss": 0.0891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10082191228866577,
|
|
"step": 2425,
|
|
"valid_targets_mean": 2365.6,
|
|
"valid_targets_min": 507
|
|
},
|
|
{
|
|
"epoch": 4.755381604696673,
|
|
"grad_norm": 0.41958078295292695,
|
|
"learning_rate": 1.1293444991377683e-05,
|
|
"loss": 0.0874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07534124702215195,
|
|
"step": 2430,
|
|
"valid_targets_mean": 2718.6,
|
|
"valid_targets_min": 676
|
|
},
|
|
{
|
|
"epoch": 4.765166340508806,
|
|
"grad_norm": 0.3993376985467739,
|
|
"learning_rate": 1.120568668204105e-05,
|
|
"loss": 0.0833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07338666170835495,
|
|
"step": 2435,
|
|
"valid_targets_mean": 3466.2,
|
|
"valid_targets_min": 995
|
|
},
|
|
{
|
|
"epoch": 4.7749510763209395,
|
|
"grad_norm": 0.40708648042993384,
|
|
"learning_rate": 1.1118137783406691e-05,
|
|
"loss": 0.0801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07422689348459244,
|
|
"step": 2440,
|
|
"valid_targets_mean": 2852.9,
|
|
"valid_targets_min": 613
|
|
},
|
|
{
|
|
"epoch": 4.784735812133072,
|
|
"grad_norm": 0.32933199228045734,
|
|
"learning_rate": 1.1030800380194075e-05,
|
|
"loss": 0.0824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05903619900345802,
|
|
"step": 2445,
|
|
"valid_targets_mean": 3809.5,
|
|
"valid_targets_min": 981
|
|
},
|
|
{
|
|
"epoch": 4.794520547945205,
|
|
"grad_norm": 0.45145938340577096,
|
|
"learning_rate": 1.0943676552086555e-05,
|
|
"loss": 0.0823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09547198563814163,
|
|
"step": 2450,
|
|
"valid_targets_mean": 3029.9,
|
|
"valid_targets_min": 493
|
|
},
|
|
{
|
|
"epoch": 4.804305283757339,
|
|
"grad_norm": 0.39536994402553677,
|
|
"learning_rate": 1.0856768373681824e-05,
|
|
"loss": 0.0897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06591817736625671,
|
|
"step": 2455,
|
|
"valid_targets_mean": 3374.6,
|
|
"valid_targets_min": 523
|
|
},
|
|
{
|
|
"epoch": 4.814090019569472,
|
|
"grad_norm": 0.43737528144487636,
|
|
"learning_rate": 1.0770077914442481e-05,
|
|
"loss": 0.0972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08177588880062103,
|
|
"step": 2460,
|
|
"valid_targets_mean": 2486.7,
|
|
"valid_targets_min": 721
|
|
},
|
|
{
|
|
"epoch": 4.823874755381604,
|
|
"grad_norm": 0.4029529691750347,
|
|
"learning_rate": 1.0683607238646823e-05,
|
|
"loss": 0.0861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07464135438203812,
|
|
"step": 2465,
|
|
"valid_targets_mean": 2826.1,
|
|
"valid_targets_min": 540
|
|
},
|
|
{
|
|
"epoch": 4.833659491193738,
|
|
"grad_norm": 0.40915386401021575,
|
|
"learning_rate": 1.0597358405339612e-05,
|
|
"loss": 0.078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.083827443420887,
|
|
"step": 2470,
|
|
"valid_targets_mean": 3118.3,
|
|
"valid_targets_min": 672
|
|
},
|
|
{
|
|
"epoch": 4.843444227005871,
|
|
"grad_norm": 0.4673013862594274,
|
|
"learning_rate": 1.0511333468283123e-05,
|
|
"loss": 0.0876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09946395456790924,
|
|
"step": 2475,
|
|
"valid_targets_mean": 2776.6,
|
|
"valid_targets_min": 802
|
|
},
|
|
{
|
|
"epoch": 4.853228962818004,
|
|
"grad_norm": 0.4147413930838571,
|
|
"learning_rate": 1.0425534475908162e-05,
|
|
"loss": 0.0815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07261813431978226,
|
|
"step": 2480,
|
|
"valid_targets_mean": 3035.1,
|
|
"valid_targets_min": 530
|
|
},
|
|
{
|
|
"epoch": 4.863013698630137,
|
|
"grad_norm": 0.39749212592351296,
|
|
"learning_rate": 1.0339963471265354e-05,
|
|
"loss": 0.0886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06496164202690125,
|
|
"step": 2485,
|
|
"valid_targets_mean": 3029.6,
|
|
"valid_targets_min": 459
|
|
},
|
|
{
|
|
"epoch": 4.87279843444227,
|
|
"grad_norm": 0.44617161341849004,
|
|
"learning_rate": 1.0254622491976467e-05,
|
|
"loss": 0.0877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08341960608959198,
|
|
"step": 2490,
|
|
"valid_targets_mean": 2654.7,
|
|
"valid_targets_min": 753
|
|
},
|
|
{
|
|
"epoch": 4.882583170254403,
|
|
"grad_norm": 0.44433835158297286,
|
|
"learning_rate": 1.0169513570185873e-05,
|
|
"loss": 0.0774,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09123141318559647,
|
|
"step": 2495,
|
|
"valid_targets_mean": 2940.3,
|
|
"valid_targets_min": 786
|
|
},
|
|
{
|
|
"epoch": 4.892367906066536,
|
|
"grad_norm": 0.43946592148289093,
|
|
"learning_rate": 1.0084638732512193e-05,
|
|
"loss": 0.0885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09572343528270721,
|
|
"step": 2500,
|
|
"valid_targets_mean": 2890.5,
|
|
"valid_targets_min": 667
|
|
},
|
|
{
|
|
"epoch": 4.902152641878669,
|
|
"grad_norm": 0.37271846213586,
|
|
"learning_rate": 1.0000000000000006e-05,
|
|
"loss": 0.0777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07236606627702713,
|
|
"step": 2505,
|
|
"valid_targets_mean": 3438.8,
|
|
"valid_targets_min": 871
|
|
},
|
|
{
|
|
"epoch": 4.911937377690802,
|
|
"grad_norm": 0.4134697517013373,
|
|
"learning_rate": 9.91559938807176e-06,
|
|
"loss": 0.0888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0818052664399147,
|
|
"step": 2510,
|
|
"valid_targets_mean": 3295.9,
|
|
"valid_targets_min": 847
|
|
},
|
|
{
|
|
"epoch": 4.921722113502936,
|
|
"grad_norm": 0.5292638132574928,
|
|
"learning_rate": 9.831438906479764e-06,
|
|
"loss": 0.0857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09691952913999557,
|
|
"step": 2515,
|
|
"valid_targets_mean": 2324.8,
|
|
"valid_targets_min": 538
|
|
},
|
|
{
|
|
"epoch": 4.931506849315069,
|
|
"grad_norm": 0.4261666106299731,
|
|
"learning_rate": 9.747520559258304e-06,
|
|
"loss": 0.0794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07897660881280899,
|
|
"step": 2520,
|
|
"valid_targets_mean": 2536.9,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 4.9412915851272015,
|
|
"grad_norm": 0.36788850762651626,
|
|
"learning_rate": 9.663846344675982e-06,
|
|
"loss": 0.0847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08979177474975586,
|
|
"step": 2525,
|
|
"valid_targets_mean": 3350.4,
|
|
"valid_targets_min": 622
|
|
},
|
|
{
|
|
"epoch": 4.951076320939334,
|
|
"grad_norm": 0.5905666803453788,
|
|
"learning_rate": 9.580418255188064e-06,
|
|
"loss": 0.0864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08794091641902924,
|
|
"step": 2530,
|
|
"valid_targets_mean": 2570.4,
|
|
"valid_targets_min": 716
|
|
},
|
|
{
|
|
"epoch": 4.960861056751468,
|
|
"grad_norm": 0.455660084440653,
|
|
"learning_rate": 9.497238277389096e-06,
|
|
"loss": 0.0863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09039708971977234,
|
|
"step": 2535,
|
|
"valid_targets_mean": 2890.1,
|
|
"valid_targets_min": 890
|
|
},
|
|
{
|
|
"epoch": 4.970645792563601,
|
|
"grad_norm": 0.43123529859668797,
|
|
"learning_rate": 9.414308391965576e-06,
|
|
"loss": 0.0738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08583714812994003,
|
|
"step": 2540,
|
|
"valid_targets_mean": 3090.9,
|
|
"valid_targets_min": 804
|
|
},
|
|
{
|
|
"epoch": 4.9804305283757335,
|
|
"grad_norm": 0.38267209615286657,
|
|
"learning_rate": 9.331630573648762e-06,
|
|
"loss": 0.0877,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0717456191778183,
|
|
"step": 2545,
|
|
"valid_targets_mean": 3247.4,
|
|
"valid_targets_min": 700
|
|
},
|
|
{
|
|
"epoch": 4.990215264187867,
|
|
"grad_norm": 0.4869270630648989,
|
|
"learning_rate": 9.249206791167708e-06,
|
|
"loss": 0.0925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1435987800359726,
|
|
"step": 2550,
|
|
"valid_targets_mean": 2629.9,
|
|
"valid_targets_min": 623
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.37216419041648807,
|
|
"learning_rate": 9.167039007202318e-06,
|
|
"loss": 0.0869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0866527259349823,
|
|
"step": 2555,
|
|
"valid_targets_mean": 3667.2,
|
|
"valid_targets_min": 673
|
|
},
|
|
{
|
|
"epoch": 5.009784735812133,
|
|
"grad_norm": 0.3846448015454443,
|
|
"learning_rate": 9.08512917833668e-06,
|
|
"loss": 0.08,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07525740563869476,
|
|
"step": 2560,
|
|
"valid_targets_mean": 3151.8,
|
|
"valid_targets_min": 446
|
|
},
|
|
{
|
|
"epoch": 5.0195694716242665,
|
|
"grad_norm": 0.4566927718008459,
|
|
"learning_rate": 9.003479255012407e-06,
|
|
"loss": 0.0819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07278042286634445,
|
|
"step": 2565,
|
|
"valid_targets_mean": 2763.4,
|
|
"valid_targets_min": 645
|
|
},
|
|
{
|
|
"epoch": 5.029354207436399,
|
|
"grad_norm": 0.44155540925429854,
|
|
"learning_rate": 8.922091181482244e-06,
|
|
"loss": 0.0723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07004959881305695,
|
|
"step": 2570,
|
|
"valid_targets_mean": 3266.8,
|
|
"valid_targets_min": 605
|
|
},
|
|
{
|
|
"epoch": 5.039138943248532,
|
|
"grad_norm": 0.41575306410926377,
|
|
"learning_rate": 8.84096689576377e-06,
|
|
"loss": 0.0775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07973407208919525,
|
|
"step": 2575,
|
|
"valid_targets_mean": 3170.4,
|
|
"valid_targets_min": 739
|
|
},
|
|
{
|
|
"epoch": 5.048923679060666,
|
|
"grad_norm": 0.40927473295170586,
|
|
"learning_rate": 8.760108329593182e-06,
|
|
"loss": 0.076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06959472596645355,
|
|
"step": 2580,
|
|
"valid_targets_mean": 3163.4,
|
|
"valid_targets_min": 622
|
|
},
|
|
{
|
|
"epoch": 5.0587084148727985,
|
|
"grad_norm": 0.4647202504030572,
|
|
"learning_rate": 8.679517408379397e-06,
|
|
"loss": 0.0678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06403348594903946,
|
|
"step": 2585,
|
|
"valid_targets_mean": 3153.7,
|
|
"valid_targets_min": 644
|
|
},
|
|
{
|
|
"epoch": 5.068493150684931,
|
|
"grad_norm": 0.4472345809309543,
|
|
"learning_rate": 8.599196051158138e-06,
|
|
"loss": 0.0801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07315093278884888,
|
|
"step": 2590,
|
|
"valid_targets_mean": 3103.4,
|
|
"valid_targets_min": 623
|
|
},
|
|
{
|
|
"epoch": 5.078277886497064,
|
|
"grad_norm": 0.4327382824717028,
|
|
"learning_rate": 8.519146170546244e-06,
|
|
"loss": 0.0748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06694217026233673,
|
|
"step": 2595,
|
|
"valid_targets_mean": 3028.6,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 5.088062622309198,
|
|
"grad_norm": 0.48093617379902154,
|
|
"learning_rate": 8.439369672696163e-06,
|
|
"loss": 0.0814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11695721000432968,
|
|
"step": 2600,
|
|
"valid_targets_mean": 2941.0,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 5.097847358121331,
|
|
"grad_norm": 0.4947290913856816,
|
|
"learning_rate": 8.3598684572505e-06,
|
|
"loss": 0.0749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06659847497940063,
|
|
"step": 2605,
|
|
"valid_targets_mean": 3053.4,
|
|
"valid_targets_min": 645
|
|
},
|
|
{
|
|
"epoch": 5.107632093933463,
|
|
"grad_norm": 0.425722662758008,
|
|
"learning_rate": 8.280644417296857e-06,
|
|
"loss": 0.0746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05943864956498146,
|
|
"step": 2610,
|
|
"valid_targets_mean": 3356.5,
|
|
"valid_targets_min": 841
|
|
},
|
|
{
|
|
"epoch": 5.117416829745597,
|
|
"grad_norm": 0.7010546552125257,
|
|
"learning_rate": 8.201699439322683e-06,
|
|
"loss": 0.0743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08390269428491592,
|
|
"step": 2615,
|
|
"valid_targets_mean": 2372.9,
|
|
"valid_targets_min": 654
|
|
},
|
|
{
|
|
"epoch": 5.12720156555773,
|
|
"grad_norm": 0.4707701192161638,
|
|
"learning_rate": 8.123035403170416e-06,
|
|
"loss": 0.0782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09458690881729126,
|
|
"step": 2620,
|
|
"valid_targets_mean": 3179.9,
|
|
"valid_targets_min": 740
|
|
},
|
|
{
|
|
"epoch": 5.136986301369863,
|
|
"grad_norm": 0.3733882899924583,
|
|
"learning_rate": 8.044654181992681e-06,
|
|
"loss": 0.0829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06267109513282776,
|
|
"step": 2625,
|
|
"valid_targets_mean": 3535.1,
|
|
"valid_targets_min": 736
|
|
},
|
|
{
|
|
"epoch": 5.146771037181996,
|
|
"grad_norm": 0.5670266134338218,
|
|
"learning_rate": 7.96655764220768e-06,
|
|
"loss": 0.083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10448236763477325,
|
|
"step": 2630,
|
|
"valid_targets_mean": 2141.2,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 5.156555772994129,
|
|
"grad_norm": 0.4579181411339998,
|
|
"learning_rate": 7.888747643454795e-06,
|
|
"loss": 0.0738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07108033448457718,
|
|
"step": 2635,
|
|
"valid_targets_mean": 3233.6,
|
|
"valid_targets_min": 898
|
|
},
|
|
{
|
|
"epoch": 5.166340508806262,
|
|
"grad_norm": 0.4796639740717722,
|
|
"learning_rate": 7.811226038550243e-06,
|
|
"loss": 0.0833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08697977662086487,
|
|
"step": 2640,
|
|
"valid_targets_mean": 2687.1,
|
|
"valid_targets_min": 579
|
|
},
|
|
{
|
|
"epoch": 5.176125244618396,
|
|
"grad_norm": 0.4281783389607264,
|
|
"learning_rate": 7.733994673443022e-06,
|
|
"loss": 0.0747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0809246152639389,
|
|
"step": 2645,
|
|
"valid_targets_mean": 3109.3,
|
|
"valid_targets_min": 826
|
|
},
|
|
{
|
|
"epoch": 5.185909980430528,
|
|
"grad_norm": 0.4700576121724099,
|
|
"learning_rate": 7.657055387170902e-06,
|
|
"loss": 0.0695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.075184665620327,
|
|
"step": 2650,
|
|
"valid_targets_mean": 2657.9,
|
|
"valid_targets_min": 575
|
|
},
|
|
{
|
|
"epoch": 5.195694716242661,
|
|
"grad_norm": 0.46934057298939746,
|
|
"learning_rate": 7.580410011816653e-06,
|
|
"loss": 0.0701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.074135422706604,
|
|
"step": 2655,
|
|
"valid_targets_mean": 2797.4,
|
|
"valid_targets_min": 779
|
|
},
|
|
{
|
|
"epoch": 5.205479452054795,
|
|
"grad_norm": 0.4586698081794364,
|
|
"learning_rate": 7.504060372464441e-06,
|
|
"loss": 0.0747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0656193196773529,
|
|
"step": 2660,
|
|
"valid_targets_mean": 2879.9,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 5.215264187866928,
|
|
"grad_norm": 0.42532010620654703,
|
|
"learning_rate": 7.428008287156323e-06,
|
|
"loss": 0.0675,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06322677433490753,
|
|
"step": 2665,
|
|
"valid_targets_mean": 3072.4,
|
|
"valid_targets_min": 664
|
|
},
|
|
{
|
|
"epoch": 5.2250489236790605,
|
|
"grad_norm": 0.4546147009432678,
|
|
"learning_rate": 7.352255566849005e-06,
|
|
"loss": 0.0707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07324878871440887,
|
|
"step": 2670,
|
|
"valid_targets_mean": 3465.8,
|
|
"valid_targets_min": 919
|
|
},
|
|
{
|
|
"epoch": 5.234833659491194,
|
|
"grad_norm": 0.4004826507870159,
|
|
"learning_rate": 7.276804015370691e-06,
|
|
"loss": 0.0735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06853301078081131,
|
|
"step": 2675,
|
|
"valid_targets_mean": 3300.2,
|
|
"valid_targets_min": 761
|
|
},
|
|
{
|
|
"epoch": 5.244618395303327,
|
|
"grad_norm": 0.522264738532023,
|
|
"learning_rate": 7.201655429378118e-06,
|
|
"loss": 0.0897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11054161190986633,
|
|
"step": 2680,
|
|
"valid_targets_mean": 2592.6,
|
|
"valid_targets_min": 647
|
|
},
|
|
{
|
|
"epoch": 5.25440313111546,
|
|
"grad_norm": 0.5331610897574944,
|
|
"learning_rate": 7.1268115983138164e-06,
|
|
"loss": 0.0763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1116286963224411,
|
|
"step": 2685,
|
|
"valid_targets_mean": 2340.2,
|
|
"valid_targets_min": 819
|
|
},
|
|
{
|
|
"epoch": 5.2641878669275926,
|
|
"grad_norm": 0.49689270254400725,
|
|
"learning_rate": 7.052274304363449e-06,
|
|
"loss": 0.0759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.083046555519104,
|
|
"step": 2690,
|
|
"valid_targets_mean": 2871.1,
|
|
"valid_targets_min": 572
|
|
},
|
|
{
|
|
"epoch": 5.273972602739726,
|
|
"grad_norm": 0.4064264879916974,
|
|
"learning_rate": 6.978045322413425e-06,
|
|
"loss": 0.0867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07062041759490967,
|
|
"step": 2695,
|
|
"valid_targets_mean": 3404.1,
|
|
"valid_targets_min": 582
|
|
},
|
|
{
|
|
"epoch": 5.283757338551859,
|
|
"grad_norm": 0.43874521235403546,
|
|
"learning_rate": 6.904126420008583e-06,
|
|
"loss": 0.0786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07893872261047363,
|
|
"step": 2700,
|
|
"valid_targets_mean": 3099.3,
|
|
"valid_targets_min": 743
|
|
},
|
|
{
|
|
"epoch": 5.293542074363992,
|
|
"grad_norm": 0.42947030250371915,
|
|
"learning_rate": 6.830519357310153e-06,
|
|
"loss": 0.0763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07591693103313446,
|
|
"step": 2705,
|
|
"valid_targets_mean": 3025.2,
|
|
"valid_targets_min": 700
|
|
},
|
|
{
|
|
"epoch": 5.3033268101761255,
|
|
"grad_norm": 0.41983088407002433,
|
|
"learning_rate": 6.757225887053815e-06,
|
|
"loss": 0.0765,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06995223462581635,
|
|
"step": 2710,
|
|
"valid_targets_mean": 3067.0,
|
|
"valid_targets_min": 844
|
|
},
|
|
{
|
|
"epoch": 5.313111545988258,
|
|
"grad_norm": 0.5065180079964318,
|
|
"learning_rate": 6.684247754507951e-06,
|
|
"loss": 0.0747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08915503323078156,
|
|
"step": 2715,
|
|
"valid_targets_mean": 2829.2,
|
|
"valid_targets_min": 663
|
|
},
|
|
{
|
|
"epoch": 5.322896281800391,
|
|
"grad_norm": 0.3514651583143728,
|
|
"learning_rate": 6.611586697432124e-06,
|
|
"loss": 0.0678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.056846678256988525,
|
|
"step": 2720,
|
|
"valid_targets_mean": 3205.9,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 5.332681017612525,
|
|
"grad_norm": 0.46576323988464474,
|
|
"learning_rate": 6.539244446035657e-06,
|
|
"loss": 0.0823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0915067046880722,
|
|
"step": 2725,
|
|
"valid_targets_mean": 3027.2,
|
|
"valid_targets_min": 885
|
|
},
|
|
{
|
|
"epoch": 5.342465753424658,
|
|
"grad_norm": 0.38295613031553155,
|
|
"learning_rate": 6.467222722936481e-06,
|
|
"loss": 0.0765,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08527641743421555,
|
|
"step": 2730,
|
|
"valid_targets_mean": 3871.1,
|
|
"valid_targets_min": 1785
|
|
},
|
|
{
|
|
"epoch": 5.35225048923679,
|
|
"grad_norm": 0.48047512841445,
|
|
"learning_rate": 6.395523243120061e-06,
|
|
"loss": 0.0743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07413574308156967,
|
|
"step": 2735,
|
|
"valid_targets_mean": 2652.9,
|
|
"valid_targets_min": 559
|
|
},
|
|
{
|
|
"epoch": 5.362035225048924,
|
|
"grad_norm": 0.40613604935428527,
|
|
"learning_rate": 6.324147713898592e-06,
|
|
"loss": 0.067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06498594582080841,
|
|
"step": 2740,
|
|
"valid_targets_mean": 3446.8,
|
|
"valid_targets_min": 816
|
|
},
|
|
{
|
|
"epoch": 5.371819960861057,
|
|
"grad_norm": 0.4421159325694742,
|
|
"learning_rate": 6.253097834870358e-06,
|
|
"loss": 0.0732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06856618821620941,
|
|
"step": 2745,
|
|
"valid_targets_mean": 2949.9,
|
|
"valid_targets_min": 681
|
|
},
|
|
{
|
|
"epoch": 5.38160469667319,
|
|
"grad_norm": 0.5016463840499853,
|
|
"learning_rate": 6.1823752978792125e-06,
|
|
"loss": 0.0754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08905217051506042,
|
|
"step": 2750,
|
|
"valid_targets_mean": 2479.9,
|
|
"valid_targets_min": 342
|
|
},
|
|
{
|
|
"epoch": 5.391389432485322,
|
|
"grad_norm": 0.4924121530285662,
|
|
"learning_rate": 6.111981786974346e-06,
|
|
"loss": 0.0808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09011845290660858,
|
|
"step": 2755,
|
|
"valid_targets_mean": 2914.0,
|
|
"valid_targets_min": 744
|
|
},
|
|
{
|
|
"epoch": 5.401174168297456,
|
|
"grad_norm": 0.5061843867577648,
|
|
"learning_rate": 6.0419189783701514e-06,
|
|
"loss": 0.0749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08524833619594574,
|
|
"step": 2760,
|
|
"valid_targets_mean": 2597.8,
|
|
"valid_targets_min": 782
|
|
},
|
|
{
|
|
"epoch": 5.410958904109589,
|
|
"grad_norm": 0.48279546078078917,
|
|
"learning_rate": 5.972188540406312e-06,
|
|
"loss": 0.0893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09057684987783432,
|
|
"step": 2765,
|
|
"valid_targets_mean": 2810.7,
|
|
"valid_targets_min": 337
|
|
},
|
|
{
|
|
"epoch": 5.420743639921722,
|
|
"grad_norm": 0.4277493595292542,
|
|
"learning_rate": 5.902792133508095e-06,
|
|
"loss": 0.0788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06424663215875626,
|
|
"step": 2770,
|
|
"valid_targets_mean": 3190.0,
|
|
"valid_targets_min": 676
|
|
},
|
|
{
|
|
"epoch": 5.430528375733855,
|
|
"grad_norm": 0.5086478426477666,
|
|
"learning_rate": 5.833731410146786e-06,
|
|
"loss": 0.078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07968039065599442,
|
|
"step": 2775,
|
|
"valid_targets_mean": 2663.5,
|
|
"valid_targets_min": 652
|
|
},
|
|
{
|
|
"epoch": 5.440313111545988,
|
|
"grad_norm": 0.5557090459869544,
|
|
"learning_rate": 5.765008014800375e-06,
|
|
"loss": 0.0702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08440135419368744,
|
|
"step": 2780,
|
|
"valid_targets_mean": 2660.8,
|
|
"valid_targets_min": 324
|
|
},
|
|
{
|
|
"epoch": 5.450097847358121,
|
|
"grad_norm": 0.3792929646660218,
|
|
"learning_rate": 5.6966235839143495e-06,
|
|
"loss": 0.0738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08909265697002411,
|
|
"step": 2785,
|
|
"valid_targets_mean": 3611.2,
|
|
"valid_targets_min": 1122
|
|
},
|
|
{
|
|
"epoch": 5.459882583170255,
|
|
"grad_norm": 0.45468140912134825,
|
|
"learning_rate": 5.628579745862777e-06,
|
|
"loss": 0.0758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07230260968208313,
|
|
"step": 2790,
|
|
"valid_targets_mean": 2914.3,
|
|
"valid_targets_min": 323
|
|
},
|
|
{
|
|
"epoch": 5.4696673189823874,
|
|
"grad_norm": 0.4473717214046915,
|
|
"learning_rate": 5.560878120909512e-06,
|
|
"loss": 0.0739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0659240335226059,
|
|
"step": 2795,
|
|
"valid_targets_mean": 3016.2,
|
|
"valid_targets_min": 716
|
|
},
|
|
{
|
|
"epoch": 5.47945205479452,
|
|
"grad_norm": 0.3767059140003713,
|
|
"learning_rate": 5.493520321169587e-06,
|
|
"loss": 0.0815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06713557243347168,
|
|
"step": 2800,
|
|
"valid_targets_mean": 3682.9,
|
|
"valid_targets_min": 1875
|
|
},
|
|
{
|
|
"epoch": 5.489236790606654,
|
|
"grad_norm": 0.4532874827402321,
|
|
"learning_rate": 5.426507950570874e-06,
|
|
"loss": 0.0796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08205291628837585,
|
|
"step": 2805,
|
|
"valid_targets_mean": 3044.5,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 5.499021526418787,
|
|
"grad_norm": 0.5602610792291699,
|
|
"learning_rate": 5.359842604815853e-06,
|
|
"loss": 0.0781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0752958208322525,
|
|
"step": 2810,
|
|
"valid_targets_mean": 2457.2,
|
|
"valid_targets_min": 801
|
|
},
|
|
{
|
|
"epoch": 5.5088062622309195,
|
|
"grad_norm": 0.40646962416999005,
|
|
"learning_rate": 5.293525871343619e-06,
|
|
"loss": 0.0772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06652592867612839,
|
|
"step": 2815,
|
|
"valid_targets_mean": 3495.7,
|
|
"valid_targets_min": 761
|
|
},
|
|
{
|
|
"epoch": 5.518590998043053,
|
|
"grad_norm": 0.45587818448470707,
|
|
"learning_rate": 5.227559329292115e-06,
|
|
"loss": 0.0886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07505495846271515,
|
|
"step": 2820,
|
|
"valid_targets_mean": 2507.4,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 5.528375733855186,
|
|
"grad_norm": 0.5332414159903734,
|
|
"learning_rate": 5.161944549460483e-06,
|
|
"loss": 0.0791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08048176765441895,
|
|
"step": 2825,
|
|
"valid_targets_mean": 2633.4,
|
|
"valid_targets_min": 622
|
|
},
|
|
{
|
|
"epoch": 5.538160469667319,
|
|
"grad_norm": 0.5913396187384118,
|
|
"learning_rate": 5.096683094271702e-06,
|
|
"loss": 0.0823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09726202487945557,
|
|
"step": 2830,
|
|
"valid_targets_mean": 2740.9,
|
|
"valid_targets_min": 531
|
|
},
|
|
{
|
|
"epoch": 5.5479452054794525,
|
|
"grad_norm": 0.39863791888320843,
|
|
"learning_rate": 5.031776517735341e-06,
|
|
"loss": 0.0654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05999351665377617,
|
|
"step": 2835,
|
|
"valid_targets_mean": 3267.8,
|
|
"valid_targets_min": 732
|
|
},
|
|
{
|
|
"epoch": 5.557729941291585,
|
|
"grad_norm": 0.4516136822713863,
|
|
"learning_rate": 4.967226365410598e-06,
|
|
"loss": 0.0805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07914609462022781,
|
|
"step": 2840,
|
|
"valid_targets_mean": 2839.3,
|
|
"valid_targets_min": 945
|
|
},
|
|
{
|
|
"epoch": 5.567514677103718,
|
|
"grad_norm": 0.42885281973077843,
|
|
"learning_rate": 4.903034174369477e-06,
|
|
"loss": 0.0741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08505196124315262,
|
|
"step": 2845,
|
|
"valid_targets_mean": 3390.4,
|
|
"valid_targets_min": 807
|
|
},
|
|
{
|
|
"epoch": 5.577299412915851,
|
|
"grad_norm": 0.40360506605925617,
|
|
"learning_rate": 4.839201473160162e-06,
|
|
"loss": 0.0652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05721169710159302,
|
|
"step": 2850,
|
|
"valid_targets_mean": 2749.6,
|
|
"valid_targets_min": 583
|
|
},
|
|
{
|
|
"epoch": 5.5870841487279845,
|
|
"grad_norm": 0.4655567417756694,
|
|
"learning_rate": 4.775729781770682e-06,
|
|
"loss": 0.0837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0737919956445694,
|
|
"step": 2855,
|
|
"valid_targets_mean": 3181.2,
|
|
"valid_targets_min": 831
|
|
},
|
|
{
|
|
"epoch": 5.596868884540117,
|
|
"grad_norm": 0.45830977254087935,
|
|
"learning_rate": 4.7126206115926375e-06,
|
|
"loss": 0.0648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06951932609081268,
|
|
"step": 2860,
|
|
"valid_targets_mean": 2819.4,
|
|
"valid_targets_min": 474
|
|
},
|
|
{
|
|
"epoch": 5.60665362035225,
|
|
"grad_norm": 0.4894743876633723,
|
|
"learning_rate": 4.649875465385294e-06,
|
|
"loss": 0.0834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08948928862810135,
|
|
"step": 2865,
|
|
"valid_targets_mean": 2493.1,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 5.616438356164384,
|
|
"grad_norm": 0.4244925832158591,
|
|
"learning_rate": 4.587495837239722e-06,
|
|
"loss": 0.083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06952004879713058,
|
|
"step": 2870,
|
|
"valid_targets_mean": 3181.3,
|
|
"valid_targets_min": 719
|
|
},
|
|
{
|
|
"epoch": 5.626223091976517,
|
|
"grad_norm": 0.40823168515859165,
|
|
"learning_rate": 4.525483212543273e-06,
|
|
"loss": 0.0797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06525617837905884,
|
|
"step": 2875,
|
|
"valid_targets_mean": 3030.6,
|
|
"valid_targets_min": 908
|
|
},
|
|
{
|
|
"epoch": 5.636007827788649,
|
|
"grad_norm": 0.4223584270445933,
|
|
"learning_rate": 4.463839067944196e-06,
|
|
"loss": 0.0744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07718521356582642,
|
|
"step": 2880,
|
|
"valid_targets_mean": 3210.1,
|
|
"valid_targets_min": 516
|
|
},
|
|
{
|
|
"epoch": 5.645792563600783,
|
|
"grad_norm": 0.4102616492959249,
|
|
"learning_rate": 4.402564871316455e-06,
|
|
"loss": 0.0777,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05997679382562637,
|
|
"step": 2885,
|
|
"valid_targets_mean": 3926.1,
|
|
"valid_targets_min": 329
|
|
},
|
|
{
|
|
"epoch": 5.655577299412916,
|
|
"grad_norm": 0.4603342285016962,
|
|
"learning_rate": 4.341662081724794e-06,
|
|
"loss": 0.0739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07202913612127304,
|
|
"step": 2890,
|
|
"valid_targets_mean": 2729.5,
|
|
"valid_targets_min": 536
|
|
},
|
|
{
|
|
"epoch": 5.665362035225049,
|
|
"grad_norm": 0.6329113399161261,
|
|
"learning_rate": 4.281132149390004e-06,
|
|
"loss": 0.073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08393596112728119,
|
|
"step": 2895,
|
|
"valid_targets_mean": 2008.6,
|
|
"valid_targets_min": 567
|
|
},
|
|
{
|
|
"epoch": 5.675146771037182,
|
|
"grad_norm": 0.4694934388592357,
|
|
"learning_rate": 4.220976515654358e-06,
|
|
"loss": 0.0769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08040758222341537,
|
|
"step": 2900,
|
|
"valid_targets_mean": 2670.1,
|
|
"valid_targets_min": 613
|
|
},
|
|
{
|
|
"epoch": 5.684931506849315,
|
|
"grad_norm": 0.3448575873042548,
|
|
"learning_rate": 4.1611966129473336e-06,
|
|
"loss": 0.0702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.051838748157024384,
|
|
"step": 2905,
|
|
"valid_targets_mean": 3567.4,
|
|
"valid_targets_min": 1293
|
|
},
|
|
{
|
|
"epoch": 5.694716242661448,
|
|
"grad_norm": 0.45465205799302394,
|
|
"learning_rate": 4.101793864751449e-06,
|
|
"loss": 0.0817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09528183937072754,
|
|
"step": 2910,
|
|
"valid_targets_mean": 2994.5,
|
|
"valid_targets_min": 848
|
|
},
|
|
{
|
|
"epoch": 5.704500978473581,
|
|
"grad_norm": 0.4707601342793054,
|
|
"learning_rate": 4.0427696855684266e-06,
|
|
"loss": 0.0745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07119297236204147,
|
|
"step": 2915,
|
|
"valid_targets_mean": 2591.8,
|
|
"valid_targets_min": 393
|
|
},
|
|
{
|
|
"epoch": 5.714285714285714,
|
|
"grad_norm": 0.4030225676073155,
|
|
"learning_rate": 3.984125480885455e-06,
|
|
"loss": 0.0702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06661651283502579,
|
|
"step": 2920,
|
|
"valid_targets_mean": 3427.6,
|
|
"valid_targets_min": 1524
|
|
},
|
|
{
|
|
"epoch": 5.724070450097847,
|
|
"grad_norm": 0.5059078539553407,
|
|
"learning_rate": 3.925862647141769e-06,
|
|
"loss": 0.0779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07960377633571625,
|
|
"step": 2925,
|
|
"valid_targets_mean": 2849.5,
|
|
"valid_targets_min": 784
|
|
},
|
|
{
|
|
"epoch": 5.733855185909981,
|
|
"grad_norm": 0.44174550198775075,
|
|
"learning_rate": 3.867982571695372e-06,
|
|
"loss": 0.0753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0717354342341423,
|
|
"step": 2930,
|
|
"valid_targets_mean": 3005.4,
|
|
"valid_targets_min": 799
|
|
},
|
|
{
|
|
"epoch": 5.743639921722114,
|
|
"grad_norm": 0.4890108032015441,
|
|
"learning_rate": 3.810486632789987e-06,
|
|
"loss": 0.0774,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09158261865377426,
|
|
"step": 2935,
|
|
"valid_targets_mean": 3100.3,
|
|
"valid_targets_min": 790
|
|
},
|
|
{
|
|
"epoch": 5.7534246575342465,
|
|
"grad_norm": 0.4451920276397767,
|
|
"learning_rate": 3.753376199522285e-06,
|
|
"loss": 0.0747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06903664767742157,
|
|
"step": 2940,
|
|
"valid_targets_mean": 2797.9,
|
|
"valid_targets_min": 599
|
|
},
|
|
{
|
|
"epoch": 5.763209393346379,
|
|
"grad_norm": 0.5124581684682721,
|
|
"learning_rate": 3.696652631809221e-06,
|
|
"loss": 0.0719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07900827378034592,
|
|
"step": 2945,
|
|
"valid_targets_mean": 2342.2,
|
|
"valid_targets_min": 619
|
|
},
|
|
{
|
|
"epoch": 5.772994129158513,
|
|
"grad_norm": 0.4517907195455391,
|
|
"learning_rate": 3.640317280355712e-06,
|
|
"loss": 0.074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06947002559900284,
|
|
"step": 2950,
|
|
"valid_targets_mean": 2831.8,
|
|
"valid_targets_min": 961
|
|
},
|
|
{
|
|
"epoch": 5.782778864970646,
|
|
"grad_norm": 0.40988865690038767,
|
|
"learning_rate": 3.5843714866224376e-06,
|
|
"loss": 0.0801,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07078046351671219,
|
|
"step": 2955,
|
|
"valid_targets_mean": 3242.0,
|
|
"valid_targets_min": 1131
|
|
},
|
|
{
|
|
"epoch": 5.7925636007827785,
|
|
"grad_norm": 0.3907386140442345,
|
|
"learning_rate": 3.528816582793899e-06,
|
|
"loss": 0.067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.060267020016908646,
|
|
"step": 2960,
|
|
"valid_targets_mean": 3141.2,
|
|
"valid_targets_min": 702
|
|
},
|
|
{
|
|
"epoch": 5.802348336594912,
|
|
"grad_norm": 0.4028211436491414,
|
|
"learning_rate": 3.4736538917467222e-06,
|
|
"loss": 0.0749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07580472528934479,
|
|
"step": 2965,
|
|
"valid_targets_mean": 3886.1,
|
|
"valid_targets_min": 990
|
|
},
|
|
{
|
|
"epoch": 5.812133072407045,
|
|
"grad_norm": 0.41123716912616415,
|
|
"learning_rate": 3.418884727018108e-06,
|
|
"loss": 0.071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06308360397815704,
|
|
"step": 2970,
|
|
"valid_targets_mean": 2827.6,
|
|
"valid_targets_min": 719
|
|
},
|
|
{
|
|
"epoch": 5.821917808219178,
|
|
"grad_norm": 0.4472389010099648,
|
|
"learning_rate": 3.364510392774616e-06,
|
|
"loss": 0.0832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07658056914806366,
|
|
"step": 2975,
|
|
"valid_targets_mean": 2843.1,
|
|
"valid_targets_min": 604
|
|
},
|
|
{
|
|
"epoch": 5.8317025440313115,
|
|
"grad_norm": 0.4451983511580819,
|
|
"learning_rate": 3.3105321837810722e-06,
|
|
"loss": 0.0748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07372932881116867,
|
|
"step": 2980,
|
|
"valid_targets_mean": 3195.2,
|
|
"valid_targets_min": 759
|
|
},
|
|
{
|
|
"epoch": 5.841487279843444,
|
|
"grad_norm": 0.46011692123417103,
|
|
"learning_rate": 3.2569513853697333e-06,
|
|
"loss": 0.0755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07479192316532135,
|
|
"step": 2985,
|
|
"valid_targets_mean": 3105.9,
|
|
"valid_targets_min": 736
|
|
},
|
|
{
|
|
"epoch": 5.851272015655577,
|
|
"grad_norm": 0.6002060396100081,
|
|
"learning_rate": 3.203769273409707e-06,
|
|
"loss": 0.0788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11194086074829102,
|
|
"step": 2990,
|
|
"valid_targets_mean": 2463.3,
|
|
"valid_targets_min": 614
|
|
},
|
|
{
|
|
"epoch": 5.861056751467711,
|
|
"grad_norm": 0.4753432159430569,
|
|
"learning_rate": 3.1509871142765423e-06,
|
|
"loss": 0.0807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09354139864444733,
|
|
"step": 2995,
|
|
"valid_targets_mean": 2870.1,
|
|
"valid_targets_min": 472
|
|
},
|
|
{
|
|
"epoch": 5.870841487279844,
|
|
"grad_norm": 0.48413719078598255,
|
|
"learning_rate": 3.0986061648221e-06,
|
|
"loss": 0.072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08323981612920761,
|
|
"step": 3000,
|
|
"valid_targets_mean": 2966.8,
|
|
"valid_targets_min": 953
|
|
},
|
|
{
|
|
"epoch": 5.880626223091976,
|
|
"grad_norm": 0.4569897092743185,
|
|
"learning_rate": 3.046627672344602e-06,
|
|
"loss": 0.0817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07903550565242767,
|
|
"step": 3005,
|
|
"valid_targets_mean": 2830.2,
|
|
"valid_targets_min": 747
|
|
},
|
|
{
|
|
"epoch": 5.890410958904109,
|
|
"grad_norm": 0.462909444059214,
|
|
"learning_rate": 2.99505287455895e-06,
|
|
"loss": 0.076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06472785770893097,
|
|
"step": 3010,
|
|
"valid_targets_mean": 2494.8,
|
|
"valid_targets_min": 715
|
|
},
|
|
{
|
|
"epoch": 5.900195694716243,
|
|
"grad_norm": 0.4807548754280305,
|
|
"learning_rate": 2.9438829995672446e-06,
|
|
"loss": 0.0846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09636728465557098,
|
|
"step": 3015,
|
|
"valid_targets_mean": 3200.9,
|
|
"valid_targets_min": 823
|
|
},
|
|
{
|
|
"epoch": 5.909980430528376,
|
|
"grad_norm": 0.4374839076350857,
|
|
"learning_rate": 2.893119265829527e-06,
|
|
"loss": 0.0754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07287952303886414,
|
|
"step": 3020,
|
|
"valid_targets_mean": 2857.5,
|
|
"valid_targets_min": 697
|
|
},
|
|
{
|
|
"epoch": 5.919765166340508,
|
|
"grad_norm": 0.5220099840364439,
|
|
"learning_rate": 2.8427628821347997e-06,
|
|
"loss": 0.0725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09079423546791077,
|
|
"step": 3025,
|
|
"valid_targets_mean": 2285.8,
|
|
"valid_targets_min": 518
|
|
},
|
|
{
|
|
"epoch": 5.929549902152642,
|
|
"grad_norm": 0.5213731033603919,
|
|
"learning_rate": 2.7928150475722015e-06,
|
|
"loss": 0.0676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07722978293895721,
|
|
"step": 3030,
|
|
"valid_targets_mean": 2516.9,
|
|
"valid_targets_min": 525
|
|
},
|
|
{
|
|
"epoch": 5.939334637964775,
|
|
"grad_norm": 0.5552580974539469,
|
|
"learning_rate": 2.7432769515024917e-06,
|
|
"loss": 0.0745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0762719139456749,
|
|
"step": 3035,
|
|
"valid_targets_mean": 2393.5,
|
|
"valid_targets_min": 559
|
|
},
|
|
{
|
|
"epoch": 5.949119373776908,
|
|
"grad_norm": 0.5116590588913543,
|
|
"learning_rate": 2.694149773529715e-06,
|
|
"loss": 0.0836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06904571503400803,
|
|
"step": 3040,
|
|
"valid_targets_mean": 3790.9,
|
|
"valid_targets_min": 1400
|
|
},
|
|
{
|
|
"epoch": 5.958904109589041,
|
|
"grad_norm": 0.4453120019573562,
|
|
"learning_rate": 2.6454346834730826e-06,
|
|
"loss": 0.0916,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07235158234834671,
|
|
"step": 3045,
|
|
"valid_targets_mean": 2963.2,
|
|
"valid_targets_min": 756
|
|
},
|
|
{
|
|
"epoch": 5.968688845401174,
|
|
"grad_norm": 0.5147122188137311,
|
|
"learning_rate": 2.5971328413391805e-06,
|
|
"loss": 0.0825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07566611468791962,
|
|
"step": 3050,
|
|
"valid_targets_mean": 2326.0,
|
|
"valid_targets_min": 569
|
|
},
|
|
{
|
|
"epoch": 5.978473581213307,
|
|
"grad_norm": 0.40827862937111237,
|
|
"learning_rate": 2.549245397294282e-06,
|
|
"loss": 0.0725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06662864983081818,
|
|
"step": 3055,
|
|
"valid_targets_mean": 3237.1,
|
|
"valid_targets_min": 471
|
|
},
|
|
{
|
|
"epoch": 5.988258317025441,
|
|
"grad_norm": 0.5107580927974414,
|
|
"learning_rate": 2.5017734916370073e-06,
|
|
"loss": 0.0724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0984416976571083,
|
|
"step": 3060,
|
|
"valid_targets_mean": 3002.3,
|
|
"valid_targets_min": 701
|
|
},
|
|
{
|
|
"epoch": 5.998043052837573,
|
|
"grad_norm": 0.501114134610983,
|
|
"learning_rate": 2.454718254771149e-06,
|
|
"loss": 0.084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08588661998510361,
|
|
"step": 3065,
|
|
"valid_targets_mean": 2286.1,
|
|
"valid_targets_min": 526
|
|
},
|
|
{
|
|
"epoch": 6.007827788649706,
|
|
"grad_norm": 0.44192958420733774,
|
|
"learning_rate": 2.4080808071787475e-06,
|
|
"loss": 0.0665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07303124666213989,
|
|
"step": 3070,
|
|
"valid_targets_mean": 2771.9,
|
|
"valid_targets_min": 482
|
|
},
|
|
{
|
|
"epoch": 6.01761252446184,
|
|
"grad_norm": 0.43635614879975476,
|
|
"learning_rate": 2.361862259393437e-06,
|
|
"loss": 0.0757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0739935114979744,
|
|
"step": 3075,
|
|
"valid_targets_mean": 3080.9,
|
|
"valid_targets_min": 687
|
|
},
|
|
{
|
|
"epoch": 6.027397260273973,
|
|
"grad_norm": 0.43422797068567826,
|
|
"learning_rate": 2.316063711973966e-06,
|
|
"loss": 0.0676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0661170482635498,
|
|
"step": 3080,
|
|
"valid_targets_mean": 3041.1,
|
|
"valid_targets_min": 599
|
|
},
|
|
{
|
|
"epoch": 6.0371819960861055,
|
|
"grad_norm": 0.4754450126513109,
|
|
"learning_rate": 2.270686255478025e-06,
|
|
"loss": 0.0637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07159139215946198,
|
|
"step": 3085,
|
|
"valid_targets_mean": 2441.1,
|
|
"valid_targets_min": 592
|
|
},
|
|
{
|
|
"epoch": 6.046966731898239,
|
|
"grad_norm": 0.4981193042856778,
|
|
"learning_rate": 2.225730970436264e-06,
|
|
"loss": 0.062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07770034670829773,
|
|
"step": 3090,
|
|
"valid_targets_mean": 2525.2,
|
|
"valid_targets_min": 689
|
|
},
|
|
{
|
|
"epoch": 6.056751467710372,
|
|
"grad_norm": 0.49360571619910937,
|
|
"learning_rate": 2.1811989273265464e-06,
|
|
"loss": 0.07,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07154182344675064,
|
|
"step": 3095,
|
|
"valid_targets_mean": 2613.0,
|
|
"valid_targets_min": 752
|
|
},
|
|
{
|
|
"epoch": 6.066536203522505,
|
|
"grad_norm": 0.4828277732842118,
|
|
"learning_rate": 2.13709118654849e-06,
|
|
"loss": 0.0838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07128246128559113,
|
|
"step": 3100,
|
|
"valid_targets_mean": 2490.1,
|
|
"valid_targets_min": 621
|
|
},
|
|
{
|
|
"epoch": 6.076320939334638,
|
|
"grad_norm": 0.41670521030012386,
|
|
"learning_rate": 2.0934087983981865e-06,
|
|
"loss": 0.0698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.060906365513801575,
|
|
"step": 3105,
|
|
"valid_targets_mean": 2875.4,
|
|
"valid_targets_min": 583
|
|
},
|
|
{
|
|
"epoch": 6.086105675146771,
|
|
"grad_norm": 0.6321266492958042,
|
|
"learning_rate": 2.0501528030432193e-06,
|
|
"loss": 0.0718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0788595974445343,
|
|
"step": 3110,
|
|
"valid_targets_mean": 2716.3,
|
|
"valid_targets_min": 683
|
|
},
|
|
{
|
|
"epoch": 6.095890410958904,
|
|
"grad_norm": 0.42036326945027286,
|
|
"learning_rate": 2.0073242304978714e-06,
|
|
"loss": 0.0695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.058495331555604935,
|
|
"step": 3115,
|
|
"valid_targets_mean": 3160.2,
|
|
"valid_targets_min": 915
|
|
},
|
|
{
|
|
"epoch": 6.105675146771037,
|
|
"grad_norm": 0.4576932419686335,
|
|
"learning_rate": 1.9649241005986196e-06,
|
|
"loss": 0.0683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07226675748825073,
|
|
"step": 3120,
|
|
"valid_targets_mean": 2632.1,
|
|
"valid_targets_min": 717
|
|
},
|
|
{
|
|
"epoch": 6.1154598825831705,
|
|
"grad_norm": 0.4238466831137611,
|
|
"learning_rate": 1.92295342297983e-06,
|
|
"loss": 0.0609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05905325710773468,
|
|
"step": 3125,
|
|
"valid_targets_mean": 3000.3,
|
|
"valid_targets_min": 523
|
|
},
|
|
{
|
|
"epoch": 6.125244618395303,
|
|
"grad_norm": 0.45936995523354823,
|
|
"learning_rate": 1.881413197049722e-06,
|
|
"loss": 0.09,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08321767300367355,
|
|
"step": 3130,
|
|
"valid_targets_mean": 3155.2,
|
|
"valid_targets_min": 764
|
|
},
|
|
{
|
|
"epoch": 6.135029354207436,
|
|
"grad_norm": 0.40115814381067516,
|
|
"learning_rate": 1.840304411966587e-06,
|
|
"loss": 0.0684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.058111824095249176,
|
|
"step": 3135,
|
|
"valid_targets_mean": 3388.8,
|
|
"valid_targets_min": 542
|
|
},
|
|
{
|
|
"epoch": 6.14481409001957,
|
|
"grad_norm": 0.40435655215447924,
|
|
"learning_rate": 1.7996280466152206e-06,
|
|
"loss": 0.0642,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06856388598680496,
|
|
"step": 3140,
|
|
"valid_targets_mean": 3130.8,
|
|
"valid_targets_min": 474
|
|
},
|
|
{
|
|
"epoch": 6.154598825831703,
|
|
"grad_norm": 0.4650210024055785,
|
|
"learning_rate": 1.7593850695836012e-06,
|
|
"loss": 0.073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08073366433382034,
|
|
"step": 3145,
|
|
"valid_targets_mean": 2809.2,
|
|
"valid_targets_min": 518
|
|
},
|
|
{
|
|
"epoch": 6.164383561643835,
|
|
"grad_norm": 0.47086624358290463,
|
|
"learning_rate": 1.719576439139854e-06,
|
|
"loss": 0.0638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06494764238595963,
|
|
"step": 3150,
|
|
"valid_targets_mean": 2855.2,
|
|
"valid_targets_min": 529
|
|
},
|
|
{
|
|
"epoch": 6.174168297455969,
|
|
"grad_norm": 0.44044476778002606,
|
|
"learning_rate": 1.6802031032094079e-06,
|
|
"loss": 0.0715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.062370240688323975,
|
|
"step": 3155,
|
|
"valid_targets_mean": 2845.0,
|
|
"valid_targets_min": 526
|
|
},
|
|
{
|
|
"epoch": 6.183953033268102,
|
|
"grad_norm": 0.5499889346906841,
|
|
"learning_rate": 1.6412659993524416e-06,
|
|
"loss": 0.0759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10551338642835617,
|
|
"step": 3160,
|
|
"valid_targets_mean": 2796.4,
|
|
"valid_targets_min": 647
|
|
},
|
|
{
|
|
"epoch": 6.193737769080235,
|
|
"grad_norm": 0.3840436421973751,
|
|
"learning_rate": 1.602766054741538e-06,
|
|
"loss": 0.0707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06305300444364548,
|
|
"step": 3165,
|
|
"valid_targets_mean": 3447.1,
|
|
"valid_targets_min": 985
|
|
},
|
|
{
|
|
"epoch": 6.2035225048923675,
|
|
"grad_norm": 0.46815680240348373,
|
|
"learning_rate": 1.564704186139634e-06,
|
|
"loss": 0.0692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07610894739627838,
|
|
"step": 3170,
|
|
"valid_targets_mean": 3054.6,
|
|
"valid_targets_min": 766
|
|
},
|
|
{
|
|
"epoch": 6.213307240704501,
|
|
"grad_norm": 0.47070496004221424,
|
|
"learning_rate": 1.5270812998781658e-06,
|
|
"loss": 0.0747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07871316373348236,
|
|
"step": 3175,
|
|
"valid_targets_mean": 3014.9,
|
|
"valid_targets_min": 753
|
|
},
|
|
{
|
|
"epoch": 6.223091976516634,
|
|
"grad_norm": 0.40290413823371585,
|
|
"learning_rate": 1.4898982918354942e-06,
|
|
"loss": 0.0662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05782357230782509,
|
|
"step": 3180,
|
|
"valid_targets_mean": 3368.4,
|
|
"valid_targets_min": 457
|
|
},
|
|
{
|
|
"epoch": 6.232876712328767,
|
|
"grad_norm": 0.41283880434326553,
|
|
"learning_rate": 1.4531560474155849e-06,
|
|
"loss": 0.0771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.061546504497528076,
|
|
"step": 3185,
|
|
"valid_targets_mean": 3097.3,
|
|
"valid_targets_min": 669
|
|
},
|
|
{
|
|
"epoch": 6.2426614481409,
|
|
"grad_norm": 0.4529313049393873,
|
|
"learning_rate": 1.4168554415268987e-06,
|
|
"loss": 0.0849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10040386766195297,
|
|
"step": 3190,
|
|
"valid_targets_mean": 3039.8,
|
|
"valid_targets_min": 337
|
|
},
|
|
{
|
|
"epoch": 6.252446183953033,
|
|
"grad_norm": 0.5206840406534334,
|
|
"learning_rate": 1.3809973385615916e-06,
|
|
"loss": 0.0696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07968272268772125,
|
|
"step": 3195,
|
|
"valid_targets_mean": 2537.8,
|
|
"valid_targets_min": 647
|
|
},
|
|
{
|
|
"epoch": 6.262230919765166,
|
|
"grad_norm": 0.4212637058908629,
|
|
"learning_rate": 1.345582592374901e-06,
|
|
"loss": 0.0622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.056147150695323944,
|
|
"step": 3200,
|
|
"valid_targets_mean": 2918.4,
|
|
"valid_targets_min": 745
|
|
},
|
|
{
|
|
"epoch": 6.2720156555773,
|
|
"grad_norm": 0.42506169113076164,
|
|
"learning_rate": 1.3106120462648275e-06,
|
|
"loss": 0.0728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06854747235774994,
|
|
"step": 3205,
|
|
"valid_targets_mean": 3473.9,
|
|
"valid_targets_min": 767
|
|
},
|
|
{
|
|
"epoch": 6.2818003913894325,
|
|
"grad_norm": 0.4367962042686225,
|
|
"learning_rate": 1.2760865329520655e-06,
|
|
"loss": 0.0719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06835062801837921,
|
|
"step": 3210,
|
|
"valid_targets_mean": 3223.1,
|
|
"valid_targets_min": 847
|
|
},
|
|
{
|
|
"epoch": 6.291585127201565,
|
|
"grad_norm": 0.5787526761733716,
|
|
"learning_rate": 1.2420068745601466e-06,
|
|
"loss": 0.077,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08949454128742218,
|
|
"step": 3215,
|
|
"valid_targets_mean": 2135.1,
|
|
"valid_targets_min": 614
|
|
},
|
|
{
|
|
"epoch": 6.301369863013699,
|
|
"grad_norm": 0.4526991495398931,
|
|
"learning_rate": 1.208373882595888e-06,
|
|
"loss": 0.0816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06896747648715973,
|
|
"step": 3220,
|
|
"valid_targets_mean": 3314.7,
|
|
"valid_targets_min": 533
|
|
},
|
|
{
|
|
"epoch": 6.311154598825832,
|
|
"grad_norm": 0.45990264333851216,
|
|
"learning_rate": 1.1751883579300638e-06,
|
|
"loss": 0.0665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06581242382526398,
|
|
"step": 3225,
|
|
"valid_targets_mean": 2982.8,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 6.3209393346379645,
|
|
"grad_norm": 0.4957572419180614,
|
|
"learning_rate": 1.142451090778316e-06,
|
|
"loss": 0.0694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0708361566066742,
|
|
"step": 3230,
|
|
"valid_targets_mean": 2542.9,
|
|
"valid_targets_min": 458
|
|
},
|
|
{
|
|
"epoch": 6.330724070450098,
|
|
"grad_norm": 0.4311209078972947,
|
|
"learning_rate": 1.1101628606823712e-06,
|
|
"loss": 0.0691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07917267084121704,
|
|
"step": 3235,
|
|
"valid_targets_mean": 3413.6,
|
|
"valid_targets_min": 1167
|
|
},
|
|
{
|
|
"epoch": 6.340508806262231,
|
|
"grad_norm": 0.5037246170434637,
|
|
"learning_rate": 1.0783244364914424e-06,
|
|
"loss": 0.0698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07734787464141846,
|
|
"step": 3240,
|
|
"valid_targets_mean": 2585.9,
|
|
"valid_targets_min": 544
|
|
},
|
|
{
|
|
"epoch": 6.350293542074364,
|
|
"grad_norm": 0.4115960837292546,
|
|
"learning_rate": 1.0469365763439532e-06,
|
|
"loss": 0.0716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05661565810441971,
|
|
"step": 3245,
|
|
"valid_targets_mean": 2866.4,
|
|
"valid_targets_min": 609
|
|
},
|
|
{
|
|
"epoch": 6.3600782778864975,
|
|
"grad_norm": 0.453432086965716,
|
|
"learning_rate": 1.016000027649453e-06,
|
|
"loss": 0.0676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06349624693393707,
|
|
"step": 3250,
|
|
"valid_targets_mean": 2563.8,
|
|
"valid_targets_min": 818
|
|
},
|
|
{
|
|
"epoch": 6.36986301369863,
|
|
"grad_norm": 0.4503359296980998,
|
|
"learning_rate": 9.855155270708505e-07,
|
|
"loss": 0.0635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06908641755580902,
|
|
"step": 3255,
|
|
"valid_targets_mean": 3333.1,
|
|
"valid_targets_min": 884
|
|
},
|
|
{
|
|
"epoch": 6.379647749510763,
|
|
"grad_norm": 0.4427969727086593,
|
|
"learning_rate": 9.554838005068578e-07,
|
|
"loss": 0.0728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06669929623603821,
|
|
"step": 3260,
|
|
"valid_targets_mean": 2871.0,
|
|
"valid_targets_min": 754
|
|
},
|
|
{
|
|
"epoch": 6.389432485322896,
|
|
"grad_norm": 0.382983514257635,
|
|
"learning_rate": 9.259055630746939e-07,
|
|
"loss": 0.0689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.061314281076192856,
|
|
"step": 3265,
|
|
"valid_targets_mean": 3473.1,
|
|
"valid_targets_min": 798
|
|
},
|
|
{
|
|
"epoch": 6.3992172211350296,
|
|
"grad_norm": 0.4697420687550243,
|
|
"learning_rate": 8.96781519093084e-07,
|
|
"loss": 0.0676,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06169832870364189,
|
|
"step": 3270,
|
|
"valid_targets_mean": 2443.7,
|
|
"valid_targets_min": 582
|
|
},
|
|
{
|
|
"epoch": 6.409001956947162,
|
|
"grad_norm": 0.4143290111512886,
|
|
"learning_rate": 8.681123620654563e-07,
|
|
"loss": 0.0825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06625143438577652,
|
|
"step": 3275,
|
|
"valid_targets_mean": 3436.2,
|
|
"valid_targets_min": 970
|
|
},
|
|
{
|
|
"epoch": 6.418786692759295,
|
|
"grad_norm": 0.484009641221671,
|
|
"learning_rate": 8.398987746634546e-07,
|
|
"loss": 0.0782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06878200173377991,
|
|
"step": 3280,
|
|
"valid_targets_mean": 3051.4,
|
|
"valid_targets_min": 497
|
|
},
|
|
{
|
|
"epoch": 6.428571428571429,
|
|
"grad_norm": 0.4403520522409024,
|
|
"learning_rate": 8.121414287106711e-07,
|
|
"loss": 0.07,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06902438402175903,
|
|
"step": 3285,
|
|
"valid_targets_mean": 3071.1,
|
|
"valid_targets_min": 526
|
|
},
|
|
{
|
|
"epoch": 6.438356164383562,
|
|
"grad_norm": 0.4799058710132896,
|
|
"learning_rate": 7.848409851666461e-07,
|
|
"loss": 0.0759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0697348341345787,
|
|
"step": 3290,
|
|
"valid_targets_mean": 3066.8,
|
|
"valid_targets_min": 1083
|
|
},
|
|
{
|
|
"epoch": 6.448140900195694,
|
|
"grad_norm": 0.5018899520017504,
|
|
"learning_rate": 7.579980941111387e-07,
|
|
"loss": 0.0678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08869493752717972,
|
|
"step": 3295,
|
|
"valid_targets_mean": 2888.8,
|
|
"valid_targets_min": 716
|
|
},
|
|
{
|
|
"epoch": 6.457925636007828,
|
|
"grad_norm": 0.5397448470834869,
|
|
"learning_rate": 7.316133947286342e-07,
|
|
"loss": 0.0706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09075383841991425,
|
|
"step": 3300,
|
|
"valid_targets_mean": 2435.1,
|
|
"valid_targets_min": 459
|
|
},
|
|
{
|
|
"epoch": 6.467710371819961,
|
|
"grad_norm": 0.4343565829140892,
|
|
"learning_rate": 7.056875152931386e-07,
|
|
"loss": 0.0701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06514838337898254,
|
|
"step": 3305,
|
|
"valid_targets_mean": 3577.0,
|
|
"valid_targets_min": 884
|
|
},
|
|
{
|
|
"epoch": 6.477495107632094,
|
|
"grad_norm": 0.437505635101395,
|
|
"learning_rate": 6.802210731532066e-07,
|
|
"loss": 0.0703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0722024068236351,
|
|
"step": 3310,
|
|
"valid_targets_mean": 3294.1,
|
|
"valid_targets_min": 540
|
|
},
|
|
{
|
|
"epoch": 6.487279843444227,
|
|
"grad_norm": 0.43250932688840876,
|
|
"learning_rate": 6.552146747172416e-07,
|
|
"loss": 0.0669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06441313028335571,
|
|
"step": 3315,
|
|
"valid_targets_mean": 3179.9,
|
|
"valid_targets_min": 665
|
|
},
|
|
{
|
|
"epoch": 6.49706457925636,
|
|
"grad_norm": 0.5306303137510542,
|
|
"learning_rate": 6.3066891543907e-07,
|
|
"loss": 0.069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08177472651004791,
|
|
"step": 3320,
|
|
"valid_targets_mean": 2607.1,
|
|
"valid_targets_min": 376
|
|
},
|
|
{
|
|
"epoch": 6.506849315068493,
|
|
"grad_norm": 0.4645763344346858,
|
|
"learning_rate": 6.065843798037362e-07,
|
|
"loss": 0.0766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07787041366100311,
|
|
"step": 3325,
|
|
"valid_targets_mean": 3014.7,
|
|
"valid_targets_min": 507
|
|
},
|
|
{
|
|
"epoch": 6.516634050880626,
|
|
"grad_norm": 0.46590886033192663,
|
|
"learning_rate": 5.829616413136196e-07,
|
|
"loss": 0.0686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0689203292131424,
|
|
"step": 3330,
|
|
"valid_targets_mean": 2799.1,
|
|
"valid_targets_min": 516
|
|
},
|
|
{
|
|
"epoch": 6.526418786692759,
|
|
"grad_norm": 0.5280078983823107,
|
|
"learning_rate": 5.598012624747396e-07,
|
|
"loss": 0.0718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08582378178834915,
|
|
"step": 3335,
|
|
"valid_targets_mean": 2405.5,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 6.536203522504892,
|
|
"grad_norm": 0.4094668239027261,
|
|
"learning_rate": 5.371037947833935e-07,
|
|
"loss": 0.0608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05724884569644928,
|
|
"step": 3340,
|
|
"valid_targets_mean": 3165.3,
|
|
"valid_targets_min": 673
|
|
},
|
|
{
|
|
"epoch": 6.545988258317026,
|
|
"grad_norm": 0.3927725451944761,
|
|
"learning_rate": 5.148697787130097e-07,
|
|
"loss": 0.0707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06178641691803932,
|
|
"step": 3345,
|
|
"valid_targets_mean": 3628.3,
|
|
"valid_targets_min": 641
|
|
},
|
|
{
|
|
"epoch": 6.555772994129159,
|
|
"grad_norm": 0.4402791629455773,
|
|
"learning_rate": 4.930997437012708e-07,
|
|
"loss": 0.073,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0625002384185791,
|
|
"step": 3350,
|
|
"valid_targets_mean": 2967.3,
|
|
"valid_targets_min": 623
|
|
},
|
|
{
|
|
"epoch": 6.5655577299412915,
|
|
"grad_norm": 0.44037028426323266,
|
|
"learning_rate": 4.7179420813752817e-07,
|
|
"loss": 0.0787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08126027137041092,
|
|
"step": 3355,
|
|
"valid_targets_mean": 3116.8,
|
|
"valid_targets_min": 756
|
|
},
|
|
{
|
|
"epoch": 6.575342465753424,
|
|
"grad_norm": 0.43892558836696705,
|
|
"learning_rate": 4.5095367935043654e-07,
|
|
"loss": 0.0671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07232559472322464,
|
|
"step": 3360,
|
|
"valid_targets_mean": 3449.4,
|
|
"valid_targets_min": 856
|
|
},
|
|
{
|
|
"epoch": 6.585127201565558,
|
|
"grad_norm": 0.4451158102887051,
|
|
"learning_rate": 4.3057865359588336e-07,
|
|
"loss": 0.0815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06538708508014679,
|
|
"step": 3365,
|
|
"valid_targets_mean": 2891.5,
|
|
"valid_targets_min": 593
|
|
},
|
|
{
|
|
"epoch": 6.594911937377691,
|
|
"grad_norm": 0.5049095904166596,
|
|
"learning_rate": 4.1066961604517173e-07,
|
|
"loss": 0.0727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07348017394542694,
|
|
"step": 3370,
|
|
"valid_targets_mean": 2615.9,
|
|
"valid_targets_min": 424
|
|
},
|
|
{
|
|
"epoch": 6.604696673189824,
|
|
"grad_norm": 0.4665931012172164,
|
|
"learning_rate": 3.912270407734653e-07,
|
|
"loss": 0.0709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07094801217317581,
|
|
"step": 3375,
|
|
"valid_targets_mean": 2681.4,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 6.614481409001957,
|
|
"grad_norm": 0.43757819898632666,
|
|
"learning_rate": 3.722513907485059e-07,
|
|
"loss": 0.069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05991899222135544,
|
|
"step": 3380,
|
|
"valid_targets_mean": 3376.2,
|
|
"valid_targets_min": 663
|
|
},
|
|
{
|
|
"epoch": 6.62426614481409,
|
|
"grad_norm": 0.4827208072192271,
|
|
"learning_rate": 3.53743117819576e-07,
|
|
"loss": 0.0691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07711789011955261,
|
|
"step": 3385,
|
|
"valid_targets_mean": 2911.2,
|
|
"valid_targets_min": 900
|
|
},
|
|
{
|
|
"epoch": 6.634050880626223,
|
|
"grad_norm": 0.4603145031106069,
|
|
"learning_rate": 3.357026627067517e-07,
|
|
"loss": 0.0693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0726405456662178,
|
|
"step": 3390,
|
|
"valid_targets_mean": 3347.3,
|
|
"valid_targets_min": 718
|
|
},
|
|
{
|
|
"epoch": 6.6438356164383565,
|
|
"grad_norm": 0.4512525039794043,
|
|
"learning_rate": 3.1813045499040853e-07,
|
|
"loss": 0.0729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06396128982305527,
|
|
"step": 3395,
|
|
"valid_targets_mean": 2948.3,
|
|
"valid_targets_min": 488
|
|
},
|
|
{
|
|
"epoch": 6.653620352250489,
|
|
"grad_norm": 0.5590748700269601,
|
|
"learning_rate": 3.0102691310097465e-07,
|
|
"loss": 0.0664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09083080291748047,
|
|
"step": 3400,
|
|
"valid_targets_mean": 2479.1,
|
|
"valid_targets_min": 502
|
|
},
|
|
{
|
|
"epoch": 6.663405088062622,
|
|
"grad_norm": 0.4601230840272086,
|
|
"learning_rate": 2.843924443089963e-07,
|
|
"loss": 0.0698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08319628238677979,
|
|
"step": 3405,
|
|
"valid_targets_mean": 3018.4,
|
|
"valid_targets_min": 855
|
|
},
|
|
{
|
|
"epoch": 6.673189823874756,
|
|
"grad_norm": 0.45005808469478187,
|
|
"learning_rate": 2.6822744471540986e-07,
|
|
"loss": 0.0701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08596012741327286,
|
|
"step": 3410,
|
|
"valid_targets_mean": 2817.6,
|
|
"valid_targets_min": 720
|
|
},
|
|
{
|
|
"epoch": 6.682974559686889,
|
|
"grad_norm": 0.3919190742162424,
|
|
"learning_rate": 2.5253229924213197e-07,
|
|
"loss": 0.0691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05439390987157822,
|
|
"step": 3415,
|
|
"valid_targets_mean": 3104.9,
|
|
"valid_targets_min": 861
|
|
},
|
|
{
|
|
"epoch": 6.692759295499021,
|
|
"grad_norm": 0.42935587377345263,
|
|
"learning_rate": 2.3730738162288214e-07,
|
|
"loss": 0.079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06405270844697952,
|
|
"step": 3420,
|
|
"valid_targets_mean": 3171.0,
|
|
"valid_targets_min": 797
|
|
},
|
|
{
|
|
"epoch": 6.702544031311154,
|
|
"grad_norm": 0.4050572131233319,
|
|
"learning_rate": 2.2255305439428775e-07,
|
|
"loss": 0.0735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0682176873087883,
|
|
"step": 3425,
|
|
"valid_targets_mean": 3082.6,
|
|
"valid_targets_min": 844
|
|
},
|
|
{
|
|
"epoch": 6.712328767123288,
|
|
"grad_norm": 0.5112504640863977,
|
|
"learning_rate": 2.082696688872554e-07,
|
|
"loss": 0.0744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07272009551525116,
|
|
"step": 3430,
|
|
"valid_targets_mean": 2604.7,
|
|
"valid_targets_min": 585
|
|
},
|
|
{
|
|
"epoch": 6.722113502935421,
|
|
"grad_norm": 0.4520469516904392,
|
|
"learning_rate": 1.944575652185865e-07,
|
|
"loss": 0.0632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0747576355934143,
|
|
"step": 3435,
|
|
"valid_targets_mean": 3447.1,
|
|
"valid_targets_min": 778
|
|
},
|
|
{
|
|
"epoch": 6.731898238747553,
|
|
"grad_norm": 0.4959728539370891,
|
|
"learning_rate": 1.8111707228290587e-07,
|
|
"loss": 0.0803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10230695456266403,
|
|
"step": 3440,
|
|
"valid_targets_mean": 2490.7,
|
|
"valid_targets_min": 680
|
|
},
|
|
{
|
|
"epoch": 6.741682974559687,
|
|
"grad_norm": 0.564687612939018,
|
|
"learning_rate": 1.6824850774480817e-07,
|
|
"loss": 0.075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10522986948490143,
|
|
"step": 3445,
|
|
"valid_targets_mean": 2524.9,
|
|
"valid_targets_min": 831
|
|
},
|
|
{
|
|
"epoch": 6.75146771037182,
|
|
"grad_norm": 0.4564262808981618,
|
|
"learning_rate": 1.5585217803130382e-07,
|
|
"loss": 0.0693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06908312439918518,
|
|
"step": 3450,
|
|
"valid_targets_mean": 3246.7,
|
|
"valid_targets_min": 816
|
|
},
|
|
{
|
|
"epoch": 6.761252446183953,
|
|
"grad_norm": 0.3685097171137825,
|
|
"learning_rate": 1.4392837832452044e-07,
|
|
"loss": 0.0624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.04976704716682434,
|
|
"step": 3455,
|
|
"valid_targets_mean": 3558.4,
|
|
"valid_targets_min": 524
|
|
},
|
|
{
|
|
"epoch": 6.771037181996086,
|
|
"grad_norm": 0.44998166826049446,
|
|
"learning_rate": 1.3247739255467073e-07,
|
|
"loss": 0.0683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06746148318052292,
|
|
"step": 3460,
|
|
"valid_targets_mean": 3382.1,
|
|
"valid_targets_min": 800
|
|
},
|
|
{
|
|
"epoch": 6.780821917808219,
|
|
"grad_norm": 0.412114167098292,
|
|
"learning_rate": 1.2149949339330224e-07,
|
|
"loss": 0.072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06159614771604538,
|
|
"step": 3465,
|
|
"valid_targets_mean": 3328.3,
|
|
"valid_targets_min": 924
|
|
},
|
|
{
|
|
"epoch": 6.790606653620352,
|
|
"grad_norm": 0.5001898542531955,
|
|
"learning_rate": 1.1099494224678265e-07,
|
|
"loss": 0.0756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07178150862455368,
|
|
"step": 3470,
|
|
"valid_targets_mean": 2874.9,
|
|
"valid_targets_min": 717
|
|
},
|
|
{
|
|
"epoch": 6.800391389432486,
|
|
"grad_norm": 0.45394214339484407,
|
|
"learning_rate": 1.0096398925010464e-07,
|
|
"loss": 0.0782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06360410153865814,
|
|
"step": 3475,
|
|
"valid_targets_mean": 3023.1,
|
|
"valid_targets_min": 758
|
|
},
|
|
{
|
|
"epoch": 6.8101761252446185,
|
|
"grad_norm": 0.4031908330874765,
|
|
"learning_rate": 9.140687326090192e-08,
|
|
"loss": 0.075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06069003790616989,
|
|
"step": 3480,
|
|
"valid_targets_mean": 3315.9,
|
|
"valid_targets_min": 799
|
|
},
|
|
{
|
|
"epoch": 6.819960861056751,
|
|
"grad_norm": 0.5352073918425082,
|
|
"learning_rate": 8.232382185378252e-08,
|
|
"loss": 0.0726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09013031423091888,
|
|
"step": 3485,
|
|
"valid_targets_mean": 2478.2,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 6.829745596868884,
|
|
"grad_norm": 0.4742305907566785,
|
|
"learning_rate": 7.37150513148932e-08,
|
|
"loss": 0.0762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06776893138885498,
|
|
"step": 3490,
|
|
"valid_targets_mean": 2488.6,
|
|
"valid_targets_min": 542
|
|
},
|
|
{
|
|
"epoch": 6.839530332681018,
|
|
"grad_norm": 0.5029427253629741,
|
|
"learning_rate": 6.558076663678137e-08,
|
|
"loss": 0.0731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07862377166748047,
|
|
"step": 3495,
|
|
"valid_targets_mean": 2839.6,
|
|
"valid_targets_min": 656
|
|
},
|
|
{
|
|
"epoch": 6.8493150684931505,
|
|
"grad_norm": 0.42216015174860255,
|
|
"learning_rate": 5.7921161513512237e-08,
|
|
"loss": 0.0763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05871112644672394,
|
|
"step": 3500,
|
|
"valid_targets_mean": 2906.5,
|
|
"valid_targets_min": 629
|
|
},
|
|
{
|
|
"epoch": 6.859099804305284,
|
|
"grad_norm": 0.4509016571919049,
|
|
"learning_rate": 5.0736418336043705e-08,
|
|
"loss": 0.0674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.060274116694927216,
|
|
"step": 3505,
|
|
"valid_targets_mean": 2768.0,
|
|
"valid_targets_min": 549
|
|
},
|
|
{
|
|
"epoch": 6.868884540117417,
|
|
"grad_norm": 0.4267381143120202,
|
|
"learning_rate": 4.402670818790755e-08,
|
|
"loss": 0.0665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.05474133789539337,
|
|
"step": 3510,
|
|
"valid_targets_mean": 3061.9,
|
|
"valid_targets_min": 471
|
|
},
|
|
{
|
|
"epoch": 6.87866927592955,
|
|
"grad_norm": 0.4342700484129526,
|
|
"learning_rate": 3.77921908411083e-08,
|
|
"loss": 0.0732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06307805329561234,
|
|
"step": 3515,
|
|
"valid_targets_mean": 3074.4,
|
|
"valid_targets_min": 933
|
|
},
|
|
{
|
|
"epoch": 6.888454011741683,
|
|
"grad_norm": 0.5326321563392985,
|
|
"learning_rate": 3.203301475233955e-08,
|
|
"loss": 0.0806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09981432557106018,
|
|
"step": 3520,
|
|
"valid_targets_mean": 2756.1,
|
|
"valid_targets_min": 801
|
|
},
|
|
{
|
|
"epoch": 6.898238747553816,
|
|
"grad_norm": 0.45488358604153306,
|
|
"learning_rate": 2.674931705943573e-08,
|
|
"loss": 0.069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06855349242687225,
|
|
"step": 3525,
|
|
"valid_targets_mean": 3244.5,
|
|
"valid_targets_min": 827
|
|
},
|
|
{
|
|
"epoch": 6.908023483365949,
|
|
"grad_norm": 0.4535624735503632,
|
|
"learning_rate": 2.194122357811912e-08,
|
|
"loss": 0.0897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08300478011369705,
|
|
"step": 3530,
|
|
"valid_targets_mean": 2724.9,
|
|
"valid_targets_min": 634
|
|
},
|
|
{
|
|
"epoch": 6.917808219178082,
|
|
"grad_norm": 0.5408105625931828,
|
|
"learning_rate": 1.760884879898894e-08,
|
|
"loss": 0.078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07406459748744965,
|
|
"step": 3535,
|
|
"valid_targets_mean": 2703.8,
|
|
"valid_targets_min": 519
|
|
},
|
|
{
|
|
"epoch": 6.9275929549902155,
|
|
"grad_norm": 0.4034744719739111,
|
|
"learning_rate": 1.3752295884807976e-08,
|
|
"loss": 0.0719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06537080556154251,
|
|
"step": 3540,
|
|
"valid_targets_mean": 3100.1,
|
|
"valid_targets_min": 1032
|
|
},
|
|
{
|
|
"epoch": 6.937377690802348,
|
|
"grad_norm": 0.4841177004223412,
|
|
"learning_rate": 1.0371656668037855e-08,
|
|
"loss": 0.0814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10481493175029755,
|
|
"step": 3545,
|
|
"valid_targets_mean": 3090.2,
|
|
"valid_targets_min": 722
|
|
},
|
|
{
|
|
"epoch": 6.947162426614481,
|
|
"grad_norm": 0.4857124568122188,
|
|
"learning_rate": 7.467011648660816e-09,
|
|
"loss": 0.0817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09597225487232208,
|
|
"step": 3550,
|
|
"valid_targets_mean": 3196.7,
|
|
"valid_targets_min": 811
|
|
},
|
|
{
|
|
"epoch": 6.956947162426615,
|
|
"grad_norm": 0.4388488828589182,
|
|
"learning_rate": 5.0384299922501266e-09,
|
|
"loss": 0.0663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07025769352912903,
|
|
"step": 3555,
|
|
"valid_targets_mean": 3822.1,
|
|
"valid_targets_min": 954
|
|
},
|
|
{
|
|
"epoch": 6.966731898238748,
|
|
"grad_norm": 0.4993622905131659,
|
|
"learning_rate": 3.085969528333621e-09,
|
|
"loss": 0.069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0812491849064827,
|
|
"step": 3560,
|
|
"valid_targets_mean": 2753.4,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 6.97651663405088,
|
|
"grad_norm": 0.4550823355534768,
|
|
"learning_rate": 1.6096767490170195e-09,
|
|
"loss": 0.0686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06270751357078552,
|
|
"step": 3565,
|
|
"valid_targets_mean": 3089.9,
|
|
"valid_targets_min": 864
|
|
},
|
|
{
|
|
"epoch": 6.986301369863014,
|
|
"grad_norm": 0.5623088551886786,
|
|
"learning_rate": 6.095868078670464e-10,
|
|
"loss": 0.0687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08164788782596588,
|
|
"step": 3570,
|
|
"valid_targets_mean": 2565.1,
|
|
"valid_targets_min": 536
|
|
},
|
|
{
|
|
"epoch": 6.996086105675147,
|
|
"grad_norm": 0.4922155503670224,
|
|
"learning_rate": 8.572351908542331e-11,
|
|
"loss": 0.082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08383533358573914,
|
|
"step": 3575,
|
|
"valid_targets_mean": 2658.2,
|
|
"valid_targets_min": 559
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06287752836942673,
|
|
"step": 3577,
|
|
"total_flos": 1086238443962368.0,
|
|
"train_loss": 0.10740660232207214,
|
|
"train_runtime": 20738.2442,
|
|
"train_samples_per_second": 2.758,
|
|
"train_steps_per_second": 0.172,
|
|
"valid_targets_mean": 3093.3,
|
|
"valid_targets_min": 804
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 3577,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 200,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1086238443962368.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|