6230 lines
173 KiB
JSON
6230 lines
173 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 2814,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.012437810945273632,
|
|
"grad_norm": 13.380951406456147,
|
|
"learning_rate": 5.673758865248227e-07,
|
|
"loss": 0.7822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7657574415206909,
|
|
"step": 5,
|
|
"valid_targets_mean": 4010.9,
|
|
"valid_targets_min": 1767
|
|
},
|
|
{
|
|
"epoch": 0.024875621890547265,
|
|
"grad_norm": 12.545654526351226,
|
|
"learning_rate": 1.276595744680851e-06,
|
|
"loss": 0.8056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8114880919456482,
|
|
"step": 10,
|
|
"valid_targets_mean": 3892.8,
|
|
"valid_targets_min": 2200
|
|
},
|
|
{
|
|
"epoch": 0.03731343283582089,
|
|
"grad_norm": 9.667916880642556,
|
|
"learning_rate": 1.9858156028368797e-06,
|
|
"loss": 0.7418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6924527287483215,
|
|
"step": 15,
|
|
"valid_targets_mean": 4415.2,
|
|
"valid_targets_min": 1889
|
|
},
|
|
{
|
|
"epoch": 0.04975124378109453,
|
|
"grad_norm": 6.112913024339798,
|
|
"learning_rate": 2.695035460992908e-06,
|
|
"loss": 0.6973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.668594241142273,
|
|
"step": 20,
|
|
"valid_targets_mean": 3674.9,
|
|
"valid_targets_min": 191
|
|
},
|
|
{
|
|
"epoch": 0.06218905472636816,
|
|
"grad_norm": 4.866305911621686,
|
|
"learning_rate": 3.4042553191489363e-06,
|
|
"loss": 0.6624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.644311785697937,
|
|
"step": 25,
|
|
"valid_targets_mean": 3697.1,
|
|
"valid_targets_min": 355
|
|
},
|
|
{
|
|
"epoch": 0.07462686567164178,
|
|
"grad_norm": 4.31903460986635,
|
|
"learning_rate": 4.113475177304965e-06,
|
|
"loss": 0.6025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.569460391998291,
|
|
"step": 30,
|
|
"valid_targets_mean": 4055.9,
|
|
"valid_targets_min": 1675
|
|
},
|
|
{
|
|
"epoch": 0.08706467661691543,
|
|
"grad_norm": 1.8781712293982455,
|
|
"learning_rate": 4.822695035460993e-06,
|
|
"loss": 0.5493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5322237014770508,
|
|
"step": 35,
|
|
"valid_targets_mean": 3793.4,
|
|
"valid_targets_min": 1413
|
|
},
|
|
{
|
|
"epoch": 0.09950248756218906,
|
|
"grad_norm": 1.3648445070279718,
|
|
"learning_rate": 5.531914893617022e-06,
|
|
"loss": 0.5353,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5094773769378662,
|
|
"step": 40,
|
|
"valid_targets_mean": 3781.3,
|
|
"valid_targets_min": 1421
|
|
},
|
|
{
|
|
"epoch": 0.11194029850746269,
|
|
"grad_norm": 0.9598535483295971,
|
|
"learning_rate": 6.24113475177305e-06,
|
|
"loss": 0.4963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4716443717479706,
|
|
"step": 45,
|
|
"valid_targets_mean": 4314.4,
|
|
"valid_targets_min": 1362
|
|
},
|
|
{
|
|
"epoch": 0.12437810945273632,
|
|
"grad_norm": 0.8095195355924959,
|
|
"learning_rate": 6.950354609929079e-06,
|
|
"loss": 0.4842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4684237241744995,
|
|
"step": 50,
|
|
"valid_targets_mean": 4619.2,
|
|
"valid_targets_min": 2128
|
|
},
|
|
{
|
|
"epoch": 0.13681592039800994,
|
|
"grad_norm": 0.9535042747879249,
|
|
"learning_rate": 7.659574468085107e-06,
|
|
"loss": 0.4799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.502250611782074,
|
|
"step": 55,
|
|
"valid_targets_mean": 3554.8,
|
|
"valid_targets_min": 2013
|
|
},
|
|
{
|
|
"epoch": 0.14925373134328357,
|
|
"grad_norm": 0.7738334376355982,
|
|
"learning_rate": 8.368794326241135e-06,
|
|
"loss": 0.4574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49030762910842896,
|
|
"step": 60,
|
|
"valid_targets_mean": 3948.2,
|
|
"valid_targets_min": 2060
|
|
},
|
|
{
|
|
"epoch": 0.16169154228855723,
|
|
"grad_norm": 0.7133304690095129,
|
|
"learning_rate": 9.078014184397164e-06,
|
|
"loss": 0.4319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41507917642593384,
|
|
"step": 65,
|
|
"valid_targets_mean": 4287.1,
|
|
"valid_targets_min": 2085
|
|
},
|
|
{
|
|
"epoch": 0.17412935323383086,
|
|
"grad_norm": 0.6718539217454166,
|
|
"learning_rate": 9.787234042553192e-06,
|
|
"loss": 0.4215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41251397132873535,
|
|
"step": 70,
|
|
"valid_targets_mean": 4147.2,
|
|
"valid_targets_min": 2291
|
|
},
|
|
{
|
|
"epoch": 0.1865671641791045,
|
|
"grad_norm": 0.6888672437166001,
|
|
"learning_rate": 1.049645390070922e-05,
|
|
"loss": 0.422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4331628978252411,
|
|
"step": 75,
|
|
"valid_targets_mean": 3984.5,
|
|
"valid_targets_min": 1357
|
|
},
|
|
{
|
|
"epoch": 0.19900497512437812,
|
|
"grad_norm": 0.7612883410877886,
|
|
"learning_rate": 1.120567375886525e-05,
|
|
"loss": 0.3944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3932691514492035,
|
|
"step": 80,
|
|
"valid_targets_mean": 3577.6,
|
|
"valid_targets_min": 1628
|
|
},
|
|
{
|
|
"epoch": 0.21144278606965175,
|
|
"grad_norm": 0.6889250639530377,
|
|
"learning_rate": 1.1914893617021277e-05,
|
|
"loss": 0.3975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4268104135990143,
|
|
"step": 85,
|
|
"valid_targets_mean": 4420.2,
|
|
"valid_targets_min": 2703
|
|
},
|
|
{
|
|
"epoch": 0.22388059701492538,
|
|
"grad_norm": 0.7389874437127749,
|
|
"learning_rate": 1.2624113475177307e-05,
|
|
"loss": 0.3817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38703107833862305,
|
|
"step": 90,
|
|
"valid_targets_mean": 3759.1,
|
|
"valid_targets_min": 2311
|
|
},
|
|
{
|
|
"epoch": 0.236318407960199,
|
|
"grad_norm": 0.6632832871080014,
|
|
"learning_rate": 1.3333333333333333e-05,
|
|
"loss": 0.3705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3471001982688904,
|
|
"step": 95,
|
|
"valid_targets_mean": 4546.7,
|
|
"valid_targets_min": 2457
|
|
},
|
|
{
|
|
"epoch": 0.24875621890547264,
|
|
"grad_norm": 0.6773454789521931,
|
|
"learning_rate": 1.4042553191489363e-05,
|
|
"loss": 0.3705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37445783615112305,
|
|
"step": 100,
|
|
"valid_targets_mean": 3990.8,
|
|
"valid_targets_min": 1875
|
|
},
|
|
{
|
|
"epoch": 0.26119402985074625,
|
|
"grad_norm": 0.5679953534246097,
|
|
"learning_rate": 1.475177304964539e-05,
|
|
"loss": 0.35,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3382793068885803,
|
|
"step": 105,
|
|
"valid_targets_mean": 5048.9,
|
|
"valid_targets_min": 1979
|
|
},
|
|
{
|
|
"epoch": 0.2736318407960199,
|
|
"grad_norm": 0.6335472342807328,
|
|
"learning_rate": 1.546099290780142e-05,
|
|
"loss": 0.3527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36811989545822144,
|
|
"step": 110,
|
|
"valid_targets_mean": 3544.6,
|
|
"valid_targets_min": 2019
|
|
},
|
|
{
|
|
"epoch": 0.2860696517412935,
|
|
"grad_norm": 0.5873605193729509,
|
|
"learning_rate": 1.6170212765957446e-05,
|
|
"loss": 0.35,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3602096438407898,
|
|
"step": 115,
|
|
"valid_targets_mean": 4335.9,
|
|
"valid_targets_min": 2097
|
|
},
|
|
{
|
|
"epoch": 0.29850746268656714,
|
|
"grad_norm": 0.6603180715772641,
|
|
"learning_rate": 1.6879432624113476e-05,
|
|
"loss": 0.3513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36116451025009155,
|
|
"step": 120,
|
|
"valid_targets_mean": 3368.5,
|
|
"valid_targets_min": 1322
|
|
},
|
|
{
|
|
"epoch": 0.31094527363184077,
|
|
"grad_norm": 0.6177290895347114,
|
|
"learning_rate": 1.7588652482269506e-05,
|
|
"loss": 0.3454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35631102323532104,
|
|
"step": 125,
|
|
"valid_targets_mean": 4271.0,
|
|
"valid_targets_min": 2572
|
|
},
|
|
{
|
|
"epoch": 0.32338308457711445,
|
|
"grad_norm": 0.5970279991046994,
|
|
"learning_rate": 1.8297872340425533e-05,
|
|
"loss": 0.3302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3269408345222473,
|
|
"step": 130,
|
|
"valid_targets_mean": 4144.0,
|
|
"valid_targets_min": 2433
|
|
},
|
|
{
|
|
"epoch": 0.3358208955223881,
|
|
"grad_norm": 0.6988436544880555,
|
|
"learning_rate": 1.9007092198581563e-05,
|
|
"loss": 0.3462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3329390287399292,
|
|
"step": 135,
|
|
"valid_targets_mean": 3773.1,
|
|
"valid_targets_min": 1880
|
|
},
|
|
{
|
|
"epoch": 0.3482587064676617,
|
|
"grad_norm": 0.5663119370039671,
|
|
"learning_rate": 1.971631205673759e-05,
|
|
"loss": 0.3185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33959251642227173,
|
|
"step": 140,
|
|
"valid_targets_mean": 5226.4,
|
|
"valid_targets_min": 1685
|
|
},
|
|
{
|
|
"epoch": 0.36069651741293535,
|
|
"grad_norm": 0.7382656927862868,
|
|
"learning_rate": 2.0425531914893616e-05,
|
|
"loss": 0.3288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2998434901237488,
|
|
"step": 145,
|
|
"valid_targets_mean": 3405.1,
|
|
"valid_targets_min": 1997
|
|
},
|
|
{
|
|
"epoch": 0.373134328358209,
|
|
"grad_norm": 0.6399598041440727,
|
|
"learning_rate": 2.113475177304965e-05,
|
|
"loss": 0.3285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3149760365486145,
|
|
"step": 150,
|
|
"valid_targets_mean": 4147.7,
|
|
"valid_targets_min": 1051
|
|
},
|
|
{
|
|
"epoch": 0.3855721393034826,
|
|
"grad_norm": 0.6190347784845001,
|
|
"learning_rate": 2.1843971631205676e-05,
|
|
"loss": 0.3218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32194218039512634,
|
|
"step": 155,
|
|
"valid_targets_mean": 3982.0,
|
|
"valid_targets_min": 1776
|
|
},
|
|
{
|
|
"epoch": 0.39800995024875624,
|
|
"grad_norm": 0.6629989034109828,
|
|
"learning_rate": 2.2553191489361703e-05,
|
|
"loss": 0.3105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29769569635391235,
|
|
"step": 160,
|
|
"valid_targets_mean": 3653.1,
|
|
"valid_targets_min": 2275
|
|
},
|
|
{
|
|
"epoch": 0.41044776119402987,
|
|
"grad_norm": 0.6705243113369573,
|
|
"learning_rate": 2.326241134751773e-05,
|
|
"loss": 0.3093,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31587380170822144,
|
|
"step": 165,
|
|
"valid_targets_mean": 4081.2,
|
|
"valid_targets_min": 2034
|
|
},
|
|
{
|
|
"epoch": 0.4228855721393035,
|
|
"grad_norm": 0.9424068832053736,
|
|
"learning_rate": 2.3971631205673763e-05,
|
|
"loss": 0.3142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3107619285583496,
|
|
"step": 170,
|
|
"valid_targets_mean": 3648.3,
|
|
"valid_targets_min": 1597
|
|
},
|
|
{
|
|
"epoch": 0.43532338308457713,
|
|
"grad_norm": 0.6511358668082933,
|
|
"learning_rate": 2.468085106382979e-05,
|
|
"loss": 0.3245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34112197160720825,
|
|
"step": 175,
|
|
"valid_targets_mean": 3819.4,
|
|
"valid_targets_min": 1782
|
|
},
|
|
{
|
|
"epoch": 0.44776119402985076,
|
|
"grad_norm": 0.6476380044242603,
|
|
"learning_rate": 2.5390070921985816e-05,
|
|
"loss": 0.3162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29842424392700195,
|
|
"step": 180,
|
|
"valid_targets_mean": 3789.5,
|
|
"valid_targets_min": 1876
|
|
},
|
|
{
|
|
"epoch": 0.4601990049751244,
|
|
"grad_norm": 0.627368462104055,
|
|
"learning_rate": 2.609929078014185e-05,
|
|
"loss": 0.3129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33267688751220703,
|
|
"step": 185,
|
|
"valid_targets_mean": 4517.8,
|
|
"valid_targets_min": 228
|
|
},
|
|
{
|
|
"epoch": 0.472636815920398,
|
|
"grad_norm": 0.6466632344322205,
|
|
"learning_rate": 2.6808510638297876e-05,
|
|
"loss": 0.3025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29927682876586914,
|
|
"step": 190,
|
|
"valid_targets_mean": 4185.9,
|
|
"valid_targets_min": 1492
|
|
},
|
|
{
|
|
"epoch": 0.48507462686567165,
|
|
"grad_norm": 0.6662800469045165,
|
|
"learning_rate": 2.7517730496453903e-05,
|
|
"loss": 0.3151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3514325022697449,
|
|
"step": 195,
|
|
"valid_targets_mean": 4175.0,
|
|
"valid_targets_min": 443
|
|
},
|
|
{
|
|
"epoch": 0.4975124378109453,
|
|
"grad_norm": 0.6517689188685865,
|
|
"learning_rate": 2.822695035460993e-05,
|
|
"loss": 0.3084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3168841302394867,
|
|
"step": 200,
|
|
"valid_targets_mean": 4177.4,
|
|
"valid_targets_min": 853
|
|
},
|
|
{
|
|
"epoch": 0.5099502487562189,
|
|
"grad_norm": 0.6855031086226197,
|
|
"learning_rate": 2.8936170212765963e-05,
|
|
"loss": 0.297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2854161858558655,
|
|
"step": 205,
|
|
"valid_targets_mean": 3589.5,
|
|
"valid_targets_min": 1106
|
|
},
|
|
{
|
|
"epoch": 0.5223880597014925,
|
|
"grad_norm": 0.6720194490881026,
|
|
"learning_rate": 2.964539007092199e-05,
|
|
"loss": 0.2946,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2903139591217041,
|
|
"step": 210,
|
|
"valid_targets_mean": 3750.4,
|
|
"valid_targets_min": 1685
|
|
},
|
|
{
|
|
"epoch": 0.5348258706467661,
|
|
"grad_norm": 0.6995838272080971,
|
|
"learning_rate": 3.0354609929078016e-05,
|
|
"loss": 0.3066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29320502281188965,
|
|
"step": 215,
|
|
"valid_targets_mean": 3562.6,
|
|
"valid_targets_min": 831
|
|
},
|
|
{
|
|
"epoch": 0.5472636815920398,
|
|
"grad_norm": 0.5930224509626889,
|
|
"learning_rate": 3.1063829787234046e-05,
|
|
"loss": 0.3155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33783021569252014,
|
|
"step": 220,
|
|
"valid_targets_mean": 4651.5,
|
|
"valid_targets_min": 2381
|
|
},
|
|
{
|
|
"epoch": 0.5597014925373134,
|
|
"grad_norm": 0.6173823876530793,
|
|
"learning_rate": 3.1773049645390076e-05,
|
|
"loss": 0.3011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30158767104148865,
|
|
"step": 225,
|
|
"valid_targets_mean": 4075.6,
|
|
"valid_targets_min": 2042
|
|
},
|
|
{
|
|
"epoch": 0.572139303482587,
|
|
"grad_norm": 0.7805956934434903,
|
|
"learning_rate": 3.24822695035461e-05,
|
|
"loss": 0.3008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31823039054870605,
|
|
"step": 230,
|
|
"valid_targets_mean": 3906.0,
|
|
"valid_targets_min": 1425
|
|
},
|
|
{
|
|
"epoch": 0.5845771144278606,
|
|
"grad_norm": 0.6419587125168111,
|
|
"learning_rate": 3.319148936170213e-05,
|
|
"loss": 0.302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31326407194137573,
|
|
"step": 235,
|
|
"valid_targets_mean": 3665.2,
|
|
"valid_targets_min": 1696
|
|
},
|
|
{
|
|
"epoch": 0.5970149253731343,
|
|
"grad_norm": 0.8679447185514305,
|
|
"learning_rate": 3.390070921985816e-05,
|
|
"loss": 0.3012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29827624559402466,
|
|
"step": 240,
|
|
"valid_targets_mean": 3904.9,
|
|
"valid_targets_min": 2058
|
|
},
|
|
{
|
|
"epoch": 0.6094527363184079,
|
|
"grad_norm": 0.6810499039895173,
|
|
"learning_rate": 3.460992907801419e-05,
|
|
"loss": 0.2988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2755545377731323,
|
|
"step": 245,
|
|
"valid_targets_mean": 3971.8,
|
|
"valid_targets_min": 2000
|
|
},
|
|
{
|
|
"epoch": 0.6218905472636815,
|
|
"grad_norm": 0.6597104835502321,
|
|
"learning_rate": 3.531914893617022e-05,
|
|
"loss": 0.2968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2824779152870178,
|
|
"step": 250,
|
|
"valid_targets_mean": 3968.2,
|
|
"valid_targets_min": 1270
|
|
},
|
|
{
|
|
"epoch": 0.6343283582089553,
|
|
"grad_norm": 0.740278069571726,
|
|
"learning_rate": 3.602836879432624e-05,
|
|
"loss": 0.2997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2923567593097687,
|
|
"step": 255,
|
|
"valid_targets_mean": 3380.2,
|
|
"valid_targets_min": 2111
|
|
},
|
|
{
|
|
"epoch": 0.6467661691542289,
|
|
"grad_norm": 0.6593440154558078,
|
|
"learning_rate": 3.673758865248227e-05,
|
|
"loss": 0.3098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.315481960773468,
|
|
"step": 260,
|
|
"valid_targets_mean": 3845.1,
|
|
"valid_targets_min": 1793
|
|
},
|
|
{
|
|
"epoch": 0.6592039800995025,
|
|
"grad_norm": 0.5518441987402686,
|
|
"learning_rate": 3.74468085106383e-05,
|
|
"loss": 0.2894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2802160084247589,
|
|
"step": 265,
|
|
"valid_targets_mean": 4580.6,
|
|
"valid_targets_min": 2473
|
|
},
|
|
{
|
|
"epoch": 0.6716417910447762,
|
|
"grad_norm": 0.8104837314241858,
|
|
"learning_rate": 3.815602836879433e-05,
|
|
"loss": 0.3062,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3234109878540039,
|
|
"step": 270,
|
|
"valid_targets_mean": 3906.8,
|
|
"valid_targets_min": 2167
|
|
},
|
|
{
|
|
"epoch": 0.6840796019900498,
|
|
"grad_norm": 0.574610630199438,
|
|
"learning_rate": 3.8865248226950355e-05,
|
|
"loss": 0.2879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2707560956478119,
|
|
"step": 275,
|
|
"valid_targets_mean": 4401.9,
|
|
"valid_targets_min": 2550
|
|
},
|
|
{
|
|
"epoch": 0.6965174129353234,
|
|
"grad_norm": 0.6473677332767744,
|
|
"learning_rate": 3.9574468085106385e-05,
|
|
"loss": 0.2886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29438745975494385,
|
|
"step": 280,
|
|
"valid_targets_mean": 4070.2,
|
|
"valid_targets_min": 2058
|
|
},
|
|
{
|
|
"epoch": 0.7089552238805971,
|
|
"grad_norm": 0.6669914526756555,
|
|
"learning_rate": 3.999993842107385e-05,
|
|
"loss": 0.3014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29622241854667664,
|
|
"step": 285,
|
|
"valid_targets_mean": 3670.9,
|
|
"valid_targets_min": 1575
|
|
},
|
|
{
|
|
"epoch": 0.7213930348258707,
|
|
"grad_norm": 0.6040016655187556,
|
|
"learning_rate": 3.999924566250946e-05,
|
|
"loss": 0.2847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25055813789367676,
|
|
"step": 290,
|
|
"valid_targets_mean": 4318.6,
|
|
"valid_targets_min": 2013
|
|
},
|
|
{
|
|
"epoch": 0.7338308457711443,
|
|
"grad_norm": 0.6412537411386398,
|
|
"learning_rate": 3.999778319847388e-05,
|
|
"loss": 0.275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2983705401420593,
|
|
"step": 295,
|
|
"valid_targets_mean": 4027.8,
|
|
"valid_targets_min": 1756
|
|
},
|
|
{
|
|
"epoch": 0.746268656716418,
|
|
"grad_norm": 0.7602229991895589,
|
|
"learning_rate": 3.999555108525255e-05,
|
|
"loss": 0.2855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2838355004787445,
|
|
"step": 300,
|
|
"valid_targets_mean": 4391.4,
|
|
"valid_targets_min": 1883
|
|
},
|
|
{
|
|
"epoch": 0.7587064676616916,
|
|
"grad_norm": 0.5443397707250325,
|
|
"learning_rate": 3.999254940875221e-05,
|
|
"loss": 0.2941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30530381202697754,
|
|
"step": 305,
|
|
"valid_targets_mean": 4607.9,
|
|
"valid_targets_min": 1976
|
|
},
|
|
{
|
|
"epoch": 0.7711442786069652,
|
|
"grad_norm": 0.7644982978652062,
|
|
"learning_rate": 3.998877828449755e-05,
|
|
"loss": 0.2889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29389941692352295,
|
|
"step": 310,
|
|
"valid_targets_mean": 3338.8,
|
|
"valid_targets_min": 1997
|
|
},
|
|
{
|
|
"epoch": 0.7835820895522388,
|
|
"grad_norm": 0.6097921931378489,
|
|
"learning_rate": 3.99842378576268e-05,
|
|
"loss": 0.2791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2719549834728241,
|
|
"step": 315,
|
|
"valid_targets_mean": 4149.0,
|
|
"valid_targets_min": 2106
|
|
},
|
|
{
|
|
"epoch": 0.7960199004975125,
|
|
"grad_norm": 0.6026543056079762,
|
|
"learning_rate": 3.997892830288611e-05,
|
|
"loss": 0.2763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28515109419822693,
|
|
"step": 320,
|
|
"valid_targets_mean": 3764.9,
|
|
"valid_targets_min": 2243
|
|
},
|
|
{
|
|
"epoch": 0.8084577114427861,
|
|
"grad_norm": 0.7013246853914047,
|
|
"learning_rate": 3.997284982462286e-05,
|
|
"loss": 0.3086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32851818203926086,
|
|
"step": 325,
|
|
"valid_targets_mean": 3250.0,
|
|
"valid_targets_min": 196
|
|
},
|
|
{
|
|
"epoch": 0.8208955223880597,
|
|
"grad_norm": 0.6272103188781436,
|
|
"learning_rate": 3.9966002656777775e-05,
|
|
"loss": 0.2838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2576582133769989,
|
|
"step": 330,
|
|
"valid_targets_mean": 3494.9,
|
|
"valid_targets_min": 471
|
|
},
|
|
{
|
|
"epoch": 0.8333333333333334,
|
|
"grad_norm": 0.6106794281804707,
|
|
"learning_rate": 3.9958387062875924e-05,
|
|
"loss": 0.2803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28204280138015747,
|
|
"step": 335,
|
|
"valid_targets_mean": 3829.8,
|
|
"valid_targets_min": 1935
|
|
},
|
|
{
|
|
"epoch": 0.845771144278607,
|
|
"grad_norm": 0.5489443050050972,
|
|
"learning_rate": 3.9950003336016564e-05,
|
|
"loss": 0.2772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2528287172317505,
|
|
"step": 340,
|
|
"valid_targets_mean": 4614.7,
|
|
"valid_targets_min": 2051
|
|
},
|
|
{
|
|
"epoch": 0.8582089552238806,
|
|
"grad_norm": 0.5465751723442666,
|
|
"learning_rate": 3.99408517988619e-05,
|
|
"loss": 0.2654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26588577032089233,
|
|
"step": 345,
|
|
"valid_targets_mean": 4694.1,
|
|
"valid_targets_min": 2446
|
|
},
|
|
{
|
|
"epoch": 0.8706467661691543,
|
|
"grad_norm": 0.5320142976407558,
|
|
"learning_rate": 3.993093280362462e-05,
|
|
"loss": 0.2714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25479674339294434,
|
|
"step": 350,
|
|
"valid_targets_mean": 4793.0,
|
|
"valid_targets_min": 2890
|
|
},
|
|
{
|
|
"epoch": 0.8830845771144279,
|
|
"grad_norm": 0.643787133545081,
|
|
"learning_rate": 3.9920246732054374e-05,
|
|
"loss": 0.2751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2818455398082733,
|
|
"step": 355,
|
|
"valid_targets_mean": 4366.5,
|
|
"valid_targets_min": 1670
|
|
},
|
|
{
|
|
"epoch": 0.8955223880597015,
|
|
"grad_norm": 0.6844127977814671,
|
|
"learning_rate": 3.990879399542305e-05,
|
|
"loss": 0.2673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2798788845539093,
|
|
"step": 360,
|
|
"valid_targets_mean": 3420.6,
|
|
"valid_targets_min": 1241
|
|
},
|
|
{
|
|
"epoch": 0.9079601990049752,
|
|
"grad_norm": 0.5798388818266013,
|
|
"learning_rate": 3.989657503450898e-05,
|
|
"loss": 0.2859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28030702471733093,
|
|
"step": 365,
|
|
"valid_targets_mean": 4205.9,
|
|
"valid_targets_min": 2328
|
|
},
|
|
{
|
|
"epoch": 0.9203980099502488,
|
|
"grad_norm": 0.5693045180329982,
|
|
"learning_rate": 3.9883590319579966e-05,
|
|
"loss": 0.2952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3048206567764282,
|
|
"step": 370,
|
|
"valid_targets_mean": 4680.9,
|
|
"valid_targets_min": 2049
|
|
},
|
|
{
|
|
"epoch": 0.9328358208955224,
|
|
"grad_norm": 0.77488630073332,
|
|
"learning_rate": 3.986984035037514e-05,
|
|
"loss": 0.2759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2770192325115204,
|
|
"step": 375,
|
|
"valid_targets_mean": 3701.6,
|
|
"valid_targets_min": 374
|
|
},
|
|
{
|
|
"epoch": 0.945273631840796,
|
|
"grad_norm": 0.5726365998589211,
|
|
"learning_rate": 3.9855325656085815e-05,
|
|
"loss": 0.279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29798316955566406,
|
|
"step": 380,
|
|
"valid_targets_mean": 4057.5,
|
|
"valid_targets_min": 1639
|
|
},
|
|
{
|
|
"epoch": 0.9577114427860697,
|
|
"grad_norm": 0.5794218327334973,
|
|
"learning_rate": 3.984004679533502e-05,
|
|
"loss": 0.2721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2974611520767212,
|
|
"step": 385,
|
|
"valid_targets_mean": 4386.4,
|
|
"valid_targets_min": 1244
|
|
},
|
|
{
|
|
"epoch": 0.9701492537313433,
|
|
"grad_norm": 0.6200804315597975,
|
|
"learning_rate": 3.982400435615608e-05,
|
|
"loss": 0.2654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26094967126846313,
|
|
"step": 390,
|
|
"valid_targets_mean": 3871.4,
|
|
"valid_targets_min": 2061
|
|
},
|
|
{
|
|
"epoch": 0.9825870646766169,
|
|
"grad_norm": 0.5663415236256595,
|
|
"learning_rate": 3.980719895596994e-05,
|
|
"loss": 0.2679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2790575921535492,
|
|
"step": 395,
|
|
"valid_targets_mean": 4268.0,
|
|
"valid_targets_min": 819
|
|
},
|
|
{
|
|
"epoch": 0.9950248756218906,
|
|
"grad_norm": 0.5335095669700068,
|
|
"learning_rate": 3.978963124156141e-05,
|
|
"loss": 0.2692,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25512856245040894,
|
|
"step": 400,
|
|
"valid_targets_mean": 4742.2,
|
|
"valid_targets_min": 2403
|
|
},
|
|
{
|
|
"epoch": 1.007462686567164,
|
|
"grad_norm": 0.6706949371800399,
|
|
"learning_rate": 3.977130188905429e-05,
|
|
"loss": 0.2671,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25862938165664673,
|
|
"step": 405,
|
|
"valid_targets_mean": 3975.2,
|
|
"valid_targets_min": 1886
|
|
},
|
|
{
|
|
"epoch": 1.0199004975124377,
|
|
"grad_norm": 0.6198638973234687,
|
|
"learning_rate": 3.975221160388535e-05,
|
|
"loss": 0.2586,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2816004753112793,
|
|
"step": 410,
|
|
"valid_targets_mean": 4115.3,
|
|
"valid_targets_min": 1240
|
|
},
|
|
{
|
|
"epoch": 1.0323383084577114,
|
|
"grad_norm": 0.5477295121301582,
|
|
"learning_rate": 3.973236112077712e-05,
|
|
"loss": 0.2449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2588885724544525,
|
|
"step": 415,
|
|
"valid_targets_mean": 4426.1,
|
|
"valid_targets_min": 2062
|
|
},
|
|
{
|
|
"epoch": 1.044776119402985,
|
|
"grad_norm": 0.6101761931142692,
|
|
"learning_rate": 3.971175120370971e-05,
|
|
"loss": 0.2552,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26513034105300903,
|
|
"step": 420,
|
|
"valid_targets_mean": 4070.8,
|
|
"valid_targets_min": 1988
|
|
},
|
|
{
|
|
"epoch": 1.0572139303482586,
|
|
"grad_norm": 0.5897683101564732,
|
|
"learning_rate": 3.969038264589132e-05,
|
|
"loss": 0.2528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25092267990112305,
|
|
"step": 425,
|
|
"valid_targets_mean": 4073.1,
|
|
"valid_targets_min": 443
|
|
},
|
|
{
|
|
"epoch": 1.0696517412935322,
|
|
"grad_norm": 0.5625409270111561,
|
|
"learning_rate": 3.966825626972777e-05,
|
|
"loss": 0.2694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2630409002304077,
|
|
"step": 430,
|
|
"valid_targets_mean": 4102.7,
|
|
"valid_targets_min": 1379
|
|
},
|
|
{
|
|
"epoch": 1.0820895522388059,
|
|
"grad_norm": 0.565867121844965,
|
|
"learning_rate": 3.964537292679081e-05,
|
|
"loss": 0.2529,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2534593641757965,
|
|
"step": 435,
|
|
"valid_targets_mean": 4240.8,
|
|
"valid_targets_min": 1908
|
|
},
|
|
{
|
|
"epoch": 1.0945273631840795,
|
|
"grad_norm": 0.6406440477946179,
|
|
"learning_rate": 3.962173349778538e-05,
|
|
"loss": 0.2594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28820428252220154,
|
|
"step": 440,
|
|
"valid_targets_mean": 3382.1,
|
|
"valid_targets_min": 1769
|
|
},
|
|
{
|
|
"epoch": 1.1069651741293531,
|
|
"grad_norm": 0.5787142049618801,
|
|
"learning_rate": 3.959733889251569e-05,
|
|
"loss": 0.2726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2697809636592865,
|
|
"step": 445,
|
|
"valid_targets_mean": 4141.3,
|
|
"valid_targets_min": 1130
|
|
},
|
|
{
|
|
"epoch": 1.1194029850746268,
|
|
"grad_norm": 0.5835970548274867,
|
|
"learning_rate": 3.9572190049850186e-05,
|
|
"loss": 0.2645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2650759518146515,
|
|
"step": 450,
|
|
"valid_targets_mean": 3681.7,
|
|
"valid_targets_min": 1929
|
|
},
|
|
{
|
|
"epoch": 1.1318407960199006,
|
|
"grad_norm": 0.6578060953434757,
|
|
"learning_rate": 3.9546287937685485e-05,
|
|
"loss": 0.2656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2592794895172119,
|
|
"step": 455,
|
|
"valid_targets_mean": 3309.7,
|
|
"valid_targets_min": 1400
|
|
},
|
|
{
|
|
"epoch": 1.144278606965174,
|
|
"grad_norm": 0.5127241563740503,
|
|
"learning_rate": 3.9519633552909054e-05,
|
|
"loss": 0.2541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23262007534503937,
|
|
"step": 460,
|
|
"valid_targets_mean": 5108.5,
|
|
"valid_targets_min": 2353
|
|
},
|
|
{
|
|
"epoch": 1.1567164179104479,
|
|
"grad_norm": 0.6164622352052561,
|
|
"learning_rate": 3.949222792136087e-05,
|
|
"loss": 0.2472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23169252276420593,
|
|
"step": 465,
|
|
"valid_targets_mean": 4548.3,
|
|
"valid_targets_min": 1282
|
|
},
|
|
{
|
|
"epoch": 1.1691542288557213,
|
|
"grad_norm": 0.6496210536118021,
|
|
"learning_rate": 3.946407209779395e-05,
|
|
"loss": 0.264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2703050374984741,
|
|
"step": 470,
|
|
"valid_targets_mean": 3854.2,
|
|
"valid_targets_min": 2534
|
|
},
|
|
{
|
|
"epoch": 1.1815920398009951,
|
|
"grad_norm": 0.6983143179255165,
|
|
"learning_rate": 3.9435167165833724e-05,
|
|
"loss": 0.2581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26550132036209106,
|
|
"step": 475,
|
|
"valid_targets_mean": 3896.3,
|
|
"valid_targets_min": 2130
|
|
},
|
|
{
|
|
"epoch": 1.1940298507462686,
|
|
"grad_norm": 0.5555184412354105,
|
|
"learning_rate": 3.940551423793638e-05,
|
|
"loss": 0.2603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2535971403121948,
|
|
"step": 480,
|
|
"valid_targets_mean": 4125.7,
|
|
"valid_targets_min": 1715
|
|
},
|
|
{
|
|
"epoch": 1.2064676616915424,
|
|
"grad_norm": 0.5704921624189617,
|
|
"learning_rate": 3.937511445534599e-05,
|
|
"loss": 0.2525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26805728673934937,
|
|
"step": 485,
|
|
"valid_targets_mean": 3877.7,
|
|
"valid_targets_min": 2189
|
|
},
|
|
{
|
|
"epoch": 1.2189054726368158,
|
|
"grad_norm": 0.6495765923740222,
|
|
"learning_rate": 3.934396898805064e-05,
|
|
"loss": 0.2544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24718452990055084,
|
|
"step": 490,
|
|
"valid_targets_mean": 3588.3,
|
|
"valid_targets_min": 1692
|
|
},
|
|
{
|
|
"epoch": 1.2313432835820897,
|
|
"grad_norm": 0.5876505289888774,
|
|
"learning_rate": 3.931207903473737e-05,
|
|
"loss": 0.2641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2569234371185303,
|
|
"step": 495,
|
|
"valid_targets_mean": 4021.1,
|
|
"valid_targets_min": 1943
|
|
},
|
|
{
|
|
"epoch": 1.243781094527363,
|
|
"grad_norm": 0.5703559162652219,
|
|
"learning_rate": 3.9279445822746045e-05,
|
|
"loss": 0.2438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23905381560325623,
|
|
"step": 500,
|
|
"valid_targets_mean": 3682.4,
|
|
"valid_targets_min": 1850
|
|
},
|
|
{
|
|
"epoch": 1.256218905472637,
|
|
"grad_norm": 0.6506750265611713,
|
|
"learning_rate": 3.9246070608022125e-05,
|
|
"loss": 0.263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26591765880584717,
|
|
"step": 505,
|
|
"valid_targets_mean": 3393.1,
|
|
"valid_targets_min": 2128
|
|
},
|
|
{
|
|
"epoch": 1.2686567164179103,
|
|
"grad_norm": 0.5787258957804087,
|
|
"learning_rate": 3.921195467506833e-05,
|
|
"loss": 0.2645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2495618611574173,
|
|
"step": 510,
|
|
"valid_targets_mean": 3888.8,
|
|
"valid_targets_min": 1134
|
|
},
|
|
{
|
|
"epoch": 1.2810945273631842,
|
|
"grad_norm": 0.519436549423975,
|
|
"learning_rate": 3.917709933689519e-05,
|
|
"loss": 0.2624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26761433482170105,
|
|
"step": 515,
|
|
"valid_targets_mean": 4347.1,
|
|
"valid_targets_min": 1773
|
|
},
|
|
{
|
|
"epoch": 1.2935323383084576,
|
|
"grad_norm": 0.6076968588802545,
|
|
"learning_rate": 3.914150593497054e-05,
|
|
"loss": 0.2459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2475341260433197,
|
|
"step": 520,
|
|
"valid_targets_mean": 3474.6,
|
|
"valid_targets_min": 777
|
|
},
|
|
{
|
|
"epoch": 1.3059701492537314,
|
|
"grad_norm": 0.6306744703358103,
|
|
"learning_rate": 3.910517583916783e-05,
|
|
"loss": 0.2565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28335511684417725,
|
|
"step": 525,
|
|
"valid_targets_mean": 3507.3,
|
|
"valid_targets_min": 1850
|
|
},
|
|
{
|
|
"epoch": 1.3184079601990049,
|
|
"grad_norm": 0.5110706633146148,
|
|
"learning_rate": 3.90681104477135e-05,
|
|
"loss": 0.2519,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23450972139835358,
|
|
"step": 530,
|
|
"valid_targets_mean": 4537.1,
|
|
"valid_targets_min": 1051
|
|
},
|
|
{
|
|
"epoch": 1.3308457711442787,
|
|
"grad_norm": 0.6389986218399706,
|
|
"learning_rate": 3.903031118713307e-05,
|
|
"loss": 0.2555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26319772005081177,
|
|
"step": 535,
|
|
"valid_targets_mean": 3198.9,
|
|
"valid_targets_min": 1306
|
|
},
|
|
{
|
|
"epoch": 1.3432835820895521,
|
|
"grad_norm": 0.6465328887813366,
|
|
"learning_rate": 3.8991779512196294e-05,
|
|
"loss": 0.2685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25991159677505493,
|
|
"step": 540,
|
|
"valid_targets_mean": 3375.8,
|
|
"valid_targets_min": 1682
|
|
},
|
|
{
|
|
"epoch": 1.355721393034826,
|
|
"grad_norm": 0.5736019923931159,
|
|
"learning_rate": 3.8952516905861155e-05,
|
|
"loss": 0.2565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2686164379119873,
|
|
"step": 545,
|
|
"valid_targets_mean": 4258.2,
|
|
"valid_targets_min": 1889
|
|
},
|
|
{
|
|
"epoch": 1.3681592039800994,
|
|
"grad_norm": 0.5067100499995646,
|
|
"learning_rate": 3.89125248792168e-05,
|
|
"loss": 0.2426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23678192496299744,
|
|
"step": 550,
|
|
"valid_targets_mean": 4387.9,
|
|
"valid_targets_min": 1724
|
|
},
|
|
{
|
|
"epoch": 1.3805970149253732,
|
|
"grad_norm": 0.5477380240295115,
|
|
"learning_rate": 3.8871804971425353e-05,
|
|
"loss": 0.2521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2501421570777893,
|
|
"step": 555,
|
|
"valid_targets_mean": 3848.6,
|
|
"valid_targets_min": 1156
|
|
},
|
|
{
|
|
"epoch": 1.3930348258706466,
|
|
"grad_norm": 0.5863449356154877,
|
|
"learning_rate": 3.883035874966273e-05,
|
|
"loss": 0.2547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24096009135246277,
|
|
"step": 560,
|
|
"valid_targets_mean": 3734.2,
|
|
"valid_targets_min": 1872
|
|
},
|
|
{
|
|
"epoch": 1.4054726368159205,
|
|
"grad_norm": 0.5379477926228984,
|
|
"learning_rate": 3.878818780905826e-05,
|
|
"loss": 0.2509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23347872495651245,
|
|
"step": 565,
|
|
"valid_targets_mean": 4070.9,
|
|
"valid_targets_min": 2509
|
|
},
|
|
{
|
|
"epoch": 1.417910447761194,
|
|
"grad_norm": 0.5419932690009873,
|
|
"learning_rate": 3.874529377263335e-05,
|
|
"loss": 0.2626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25849100947380066,
|
|
"step": 570,
|
|
"valid_targets_mean": 4307.4,
|
|
"valid_targets_min": 1746
|
|
},
|
|
{
|
|
"epoch": 1.4303482587064678,
|
|
"grad_norm": 0.5729702092975528,
|
|
"learning_rate": 3.870167829123899e-05,
|
|
"loss": 0.262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2920796871185303,
|
|
"step": 575,
|
|
"valid_targets_mean": 4220.1,
|
|
"valid_targets_min": 1363
|
|
},
|
|
{
|
|
"epoch": 1.4427860696517412,
|
|
"grad_norm": 0.5205776739120929,
|
|
"learning_rate": 3.865734304349224e-05,
|
|
"loss": 0.2458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23339340090751648,
|
|
"step": 580,
|
|
"valid_targets_mean": 4367.3,
|
|
"valid_targets_min": 1685
|
|
},
|
|
{
|
|
"epoch": 1.455223880597015,
|
|
"grad_norm": 0.5536951564209288,
|
|
"learning_rate": 3.861228973571158e-05,
|
|
"loss": 0.2489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24707631766796112,
|
|
"step": 585,
|
|
"valid_targets_mean": 4527.9,
|
|
"valid_targets_min": 1406
|
|
},
|
|
{
|
|
"epoch": 1.4676616915422884,
|
|
"grad_norm": 0.564511967740949,
|
|
"learning_rate": 3.856652010185128e-05,
|
|
"loss": 0.2538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2429649531841278,
|
|
"step": 590,
|
|
"valid_targets_mean": 3914.8,
|
|
"valid_targets_min": 2402
|
|
},
|
|
{
|
|
"epoch": 1.4800995024875623,
|
|
"grad_norm": 0.5579953289935093,
|
|
"learning_rate": 3.852003590343467e-05,
|
|
"loss": 0.2442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21945007145404816,
|
|
"step": 595,
|
|
"valid_targets_mean": 4183.3,
|
|
"valid_targets_min": 1280
|
|
},
|
|
{
|
|
"epoch": 1.4925373134328357,
|
|
"grad_norm": 0.5631837154917557,
|
|
"learning_rate": 3.847283892948631e-05,
|
|
"loss": 0.2541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27015361189842224,
|
|
"step": 600,
|
|
"valid_targets_mean": 3908.9,
|
|
"valid_targets_min": 1638
|
|
},
|
|
{
|
|
"epoch": 1.5049751243781095,
|
|
"grad_norm": 0.5901864513013043,
|
|
"learning_rate": 3.8424930996463173e-05,
|
|
"loss": 0.2565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25428202748298645,
|
|
"step": 605,
|
|
"valid_targets_mean": 4185.3,
|
|
"valid_targets_min": 1322
|
|
},
|
|
{
|
|
"epoch": 1.517412935323383,
|
|
"grad_norm": 0.5161889221577463,
|
|
"learning_rate": 3.837631394818471e-05,
|
|
"loss": 0.2399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2310006320476532,
|
|
"step": 610,
|
|
"valid_targets_mean": 4516.8,
|
|
"valid_targets_min": 1988
|
|
},
|
|
{
|
|
"epoch": 1.5298507462686568,
|
|
"grad_norm": 0.5359807008019226,
|
|
"learning_rate": 3.832698965576189e-05,
|
|
"loss": 0.256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2637181878089905,
|
|
"step": 615,
|
|
"valid_targets_mean": 3867.1,
|
|
"valid_targets_min": 2465
|
|
},
|
|
{
|
|
"epoch": 1.5422885572139302,
|
|
"grad_norm": 0.5674145217928909,
|
|
"learning_rate": 3.8276960017525197e-05,
|
|
"loss": 0.237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22988495230674744,
|
|
"step": 620,
|
|
"valid_targets_mean": 4242.8,
|
|
"valid_targets_min": 1588
|
|
},
|
|
{
|
|
"epoch": 1.554726368159204,
|
|
"grad_norm": 0.5723411242257961,
|
|
"learning_rate": 3.822622695895157e-05,
|
|
"loss": 0.2578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25821536779403687,
|
|
"step": 625,
|
|
"valid_targets_mean": 4392.1,
|
|
"valid_targets_min": 1765
|
|
},
|
|
{
|
|
"epoch": 1.5671641791044775,
|
|
"grad_norm": 0.5376996090192159,
|
|
"learning_rate": 3.8174792432590294e-05,
|
|
"loss": 0.2573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23149748146533966,
|
|
"step": 630,
|
|
"valid_targets_mean": 4030.4,
|
|
"valid_targets_min": 1742
|
|
},
|
|
{
|
|
"epoch": 1.5796019900497513,
|
|
"grad_norm": 0.5681703694899468,
|
|
"learning_rate": 3.8122658417987854e-05,
|
|
"loss": 0.2472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24532127380371094,
|
|
"step": 635,
|
|
"valid_targets_mean": 3592.2,
|
|
"valid_targets_min": 963
|
|
},
|
|
{
|
|
"epoch": 1.5920398009950247,
|
|
"grad_norm": 0.5672170181926013,
|
|
"learning_rate": 3.8069826921611736e-05,
|
|
"loss": 0.2527,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25750595331192017,
|
|
"step": 640,
|
|
"valid_targets_mean": 3691.6,
|
|
"valid_targets_min": 2389
|
|
},
|
|
{
|
|
"epoch": 1.6044776119402986,
|
|
"grad_norm": 0.5840248420927169,
|
|
"learning_rate": 3.8016299976773215e-05,
|
|
"loss": 0.2479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24948638677597046,
|
|
"step": 645,
|
|
"valid_targets_mean": 3554.7,
|
|
"valid_targets_min": 2311
|
|
},
|
|
{
|
|
"epoch": 1.616915422885572,
|
|
"grad_norm": 0.5359793017604382,
|
|
"learning_rate": 3.796207964354911e-05,
|
|
"loss": 0.2419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.236485093832016,
|
|
"step": 650,
|
|
"valid_targets_mean": 4295.7,
|
|
"valid_targets_min": 2176
|
|
},
|
|
{
|
|
"epoch": 1.6293532338308458,
|
|
"grad_norm": 0.5407696413708218,
|
|
"learning_rate": 3.7907168008702485e-05,
|
|
"loss": 0.2452,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24842476844787598,
|
|
"step": 655,
|
|
"valid_targets_mean": 4364.2,
|
|
"valid_targets_min": 1789
|
|
},
|
|
{
|
|
"epoch": 1.6417910447761193,
|
|
"grad_norm": 0.5209593522870282,
|
|
"learning_rate": 3.785156718560234e-05,
|
|
"loss": 0.2522,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23498183488845825,
|
|
"step": 660,
|
|
"valid_targets_mean": 4318.7,
|
|
"valid_targets_min": 2193
|
|
},
|
|
{
|
|
"epoch": 1.654228855721393,
|
|
"grad_norm": 0.550433073135447,
|
|
"learning_rate": 3.779527931414227e-05,
|
|
"loss": 0.2384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22987571358680725,
|
|
"step": 665,
|
|
"valid_targets_mean": 4133.3,
|
|
"valid_targets_min": 1619
|
|
},
|
|
{
|
|
"epoch": 1.6666666666666665,
|
|
"grad_norm": 0.5574429313585283,
|
|
"learning_rate": 3.773830656065811e-05,
|
|
"loss": 0.249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2494204193353653,
|
|
"step": 670,
|
|
"valid_targets_mean": 4016.2,
|
|
"valid_targets_min": 2044
|
|
},
|
|
{
|
|
"epoch": 1.6791044776119404,
|
|
"grad_norm": 0.6329586119901996,
|
|
"learning_rate": 3.768065111784457e-05,
|
|
"loss": 0.2474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2279907613992691,
|
|
"step": 675,
|
|
"valid_targets_mean": 3598.1,
|
|
"valid_targets_min": 819
|
|
},
|
|
{
|
|
"epoch": 1.6915422885572138,
|
|
"grad_norm": 0.5334914217295829,
|
|
"learning_rate": 3.762231520467082e-05,
|
|
"loss": 0.2487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2509036362171173,
|
|
"step": 680,
|
|
"valid_targets_mean": 3799.6,
|
|
"valid_targets_min": 1225
|
|
},
|
|
{
|
|
"epoch": 1.7039800995024876,
|
|
"grad_norm": 0.5489580465491168,
|
|
"learning_rate": 3.7563301066295144e-05,
|
|
"loss": 0.2369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2305251657962799,
|
|
"step": 685,
|
|
"valid_targets_mean": 3473.2,
|
|
"valid_targets_min": 992
|
|
},
|
|
{
|
|
"epoch": 1.716417910447761,
|
|
"grad_norm": 0.5288516281459692,
|
|
"learning_rate": 3.750361097397844e-05,
|
|
"loss": 0.2382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23850487172603607,
|
|
"step": 690,
|
|
"valid_targets_mean": 4001.9,
|
|
"valid_targets_min": 1741
|
|
},
|
|
{
|
|
"epoch": 1.728855721393035,
|
|
"grad_norm": 0.5800907388120042,
|
|
"learning_rate": 3.74432472249969e-05,
|
|
"loss": 0.2438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25247111916542053,
|
|
"step": 695,
|
|
"valid_targets_mean": 3964.8,
|
|
"valid_targets_min": 1725
|
|
},
|
|
{
|
|
"epoch": 1.7412935323383083,
|
|
"grad_norm": 0.5153130568766624,
|
|
"learning_rate": 3.7382212142553526e-05,
|
|
"loss": 0.2424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23832815885543823,
|
|
"step": 700,
|
|
"valid_targets_mean": 5047.1,
|
|
"valid_targets_min": 2105
|
|
},
|
|
{
|
|
"epoch": 1.7537313432835822,
|
|
"grad_norm": 0.4775135414376218,
|
|
"learning_rate": 3.732050807568878e-05,
|
|
"loss": 0.2472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2324645072221756,
|
|
"step": 705,
|
|
"valid_targets_mean": 5206.6,
|
|
"valid_targets_min": 2405
|
|
},
|
|
{
|
|
"epoch": 1.7661691542288556,
|
|
"grad_norm": 0.6038057766815212,
|
|
"learning_rate": 3.7258137399190104e-05,
|
|
"loss": 0.247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23930123448371887,
|
|
"step": 710,
|
|
"valid_targets_mean": 4152.9,
|
|
"valid_targets_min": 1787
|
|
},
|
|
{
|
|
"epoch": 1.7786069651741294,
|
|
"grad_norm": 0.5565167118827314,
|
|
"learning_rate": 3.71951025135006e-05,
|
|
"loss": 0.2629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2660183310508728,
|
|
"step": 715,
|
|
"valid_targets_mean": 3864.9,
|
|
"valid_targets_min": 1404
|
|
},
|
|
{
|
|
"epoch": 1.7910447761194028,
|
|
"grad_norm": 0.6001026329663603,
|
|
"learning_rate": 3.713140584462659e-05,
|
|
"loss": 0.2531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2475225031375885,
|
|
"step": 720,
|
|
"valid_targets_mean": 3926.4,
|
|
"valid_targets_min": 196
|
|
},
|
|
{
|
|
"epoch": 1.8034825870646767,
|
|
"grad_norm": 0.5652238036041524,
|
|
"learning_rate": 3.7067049844044246e-05,
|
|
"loss": 0.2492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24355122447013855,
|
|
"step": 725,
|
|
"valid_targets_mean": 4200.6,
|
|
"valid_targets_min": 2117
|
|
},
|
|
{
|
|
"epoch": 1.81592039800995,
|
|
"grad_norm": 0.5488019914228596,
|
|
"learning_rate": 3.700203698860528e-05,
|
|
"loss": 0.2396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2313595861196518,
|
|
"step": 730,
|
|
"valid_targets_mean": 3598.3,
|
|
"valid_targets_min": 2223
|
|
},
|
|
{
|
|
"epoch": 1.828358208955224,
|
|
"grad_norm": 0.51796472681004,
|
|
"learning_rate": 3.6936369780441605e-05,
|
|
"loss": 0.2562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24363797903060913,
|
|
"step": 735,
|
|
"valid_targets_mean": 4339.6,
|
|
"valid_targets_min": 2214
|
|
},
|
|
{
|
|
"epoch": 1.8407960199004973,
|
|
"grad_norm": 0.5659545424872905,
|
|
"learning_rate": 3.6870050746869e-05,
|
|
"loss": 0.242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24636013805866241,
|
|
"step": 740,
|
|
"valid_targets_mean": 4091.0,
|
|
"valid_targets_min": 1926
|
|
},
|
|
{
|
|
"epoch": 1.8532338308457712,
|
|
"grad_norm": 0.5096718996130041,
|
|
"learning_rate": 3.680308244028988e-05,
|
|
"loss": 0.2544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21994280815124512,
|
|
"step": 745,
|
|
"valid_targets_mean": 4172.7,
|
|
"valid_targets_min": 1149
|
|
},
|
|
{
|
|
"epoch": 1.8656716417910446,
|
|
"grad_norm": 0.5065361527427243,
|
|
"learning_rate": 3.673546743809507e-05,
|
|
"loss": 0.2508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23863369226455688,
|
|
"step": 750,
|
|
"valid_targets_mean": 4798.6,
|
|
"valid_targets_min": 2111
|
|
},
|
|
{
|
|
"epoch": 1.8781094527363185,
|
|
"grad_norm": 0.5931017170469689,
|
|
"learning_rate": 3.666720834256456e-05,
|
|
"loss": 0.2463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2788572311401367,
|
|
"step": 755,
|
|
"valid_targets_mean": 4347.8,
|
|
"valid_targets_min": 2099
|
|
},
|
|
{
|
|
"epoch": 1.890547263681592,
|
|
"grad_norm": 0.5455237703990018,
|
|
"learning_rate": 3.659830778076741e-05,
|
|
"loss": 0.2609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23518797755241394,
|
|
"step": 760,
|
|
"valid_targets_mean": 4198.4,
|
|
"valid_targets_min": 2807
|
|
},
|
|
{
|
|
"epoch": 1.9029850746268657,
|
|
"grad_norm": 0.5393054024523214,
|
|
"learning_rate": 3.65287684044606e-05,
|
|
"loss": 0.2464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2718955874443054,
|
|
"step": 765,
|
|
"valid_targets_mean": 3854.9,
|
|
"valid_targets_min": 2330
|
|
},
|
|
{
|
|
"epoch": 1.9154228855721394,
|
|
"grad_norm": 0.8356397481562335,
|
|
"learning_rate": 3.6458592889986986e-05,
|
|
"loss": 0.2567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25400418043136597,
|
|
"step": 770,
|
|
"valid_targets_mean": 4668.4,
|
|
"valid_targets_min": 2281
|
|
},
|
|
{
|
|
"epoch": 1.927860696517413,
|
|
"grad_norm": 0.5065732254922442,
|
|
"learning_rate": 3.638778393817233e-05,
|
|
"loss": 0.2428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2371484339237213,
|
|
"step": 775,
|
|
"valid_targets_mean": 4130.5,
|
|
"valid_targets_min": 2548
|
|
},
|
|
{
|
|
"epoch": 1.9402985074626866,
|
|
"grad_norm": 0.5421381515387088,
|
|
"learning_rate": 3.6316344274221276e-05,
|
|
"loss": 0.2365,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25802528858184814,
|
|
"step": 780,
|
|
"valid_targets_mean": 4095.2,
|
|
"valid_targets_min": 2527
|
|
},
|
|
{
|
|
"epoch": 1.9527363184079602,
|
|
"grad_norm": 0.5365802561261709,
|
|
"learning_rate": 3.624427664761254e-05,
|
|
"loss": 0.245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23089924454689026,
|
|
"step": 785,
|
|
"valid_targets_mean": 3962.5,
|
|
"valid_targets_min": 2058
|
|
},
|
|
{
|
|
"epoch": 1.9651741293532339,
|
|
"grad_norm": 0.5643291821086128,
|
|
"learning_rate": 3.6171583831993076e-05,
|
|
"loss": 0.2442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25705310702323914,
|
|
"step": 790,
|
|
"valid_targets_mean": 4278.8,
|
|
"valid_targets_min": 2126
|
|
},
|
|
{
|
|
"epoch": 1.9776119402985075,
|
|
"grad_norm": 0.5790941804858589,
|
|
"learning_rate": 3.609826862507128e-05,
|
|
"loss": 0.252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2411389946937561,
|
|
"step": 795,
|
|
"valid_targets_mean": 3969.7,
|
|
"valid_targets_min": 2089
|
|
},
|
|
{
|
|
"epoch": 1.9900497512437811,
|
|
"grad_norm": 0.48846796864819103,
|
|
"learning_rate": 3.6024333848509384e-05,
|
|
"loss": 0.2391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23043224215507507,
|
|
"step": 800,
|
|
"valid_targets_mean": 4327.9,
|
|
"valid_targets_min": 1962
|
|
},
|
|
{
|
|
"epoch": 2.0024875621890548,
|
|
"grad_norm": 0.5849877152700085,
|
|
"learning_rate": 3.594978234781481e-05,
|
|
"loss": 0.2378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23692670464515686,
|
|
"step": 805,
|
|
"valid_targets_mean": 3665.8,
|
|
"valid_targets_min": 2020
|
|
},
|
|
{
|
|
"epoch": 2.014925373134328,
|
|
"grad_norm": 0.538247528051884,
|
|
"learning_rate": 3.587461699223067e-05,
|
|
"loss": 0.2283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2252245843410492,
|
|
"step": 810,
|
|
"valid_targets_mean": 4359.8,
|
|
"valid_targets_min": 1739
|
|
},
|
|
{
|
|
"epoch": 2.027363184079602,
|
|
"grad_norm": 0.5381268521719818,
|
|
"learning_rate": 3.579884067462535e-05,
|
|
"loss": 0.2275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2488606870174408,
|
|
"step": 815,
|
|
"valid_targets_mean": 4062.9,
|
|
"valid_targets_min": 2410
|
|
},
|
|
{
|
|
"epoch": 2.0398009950248754,
|
|
"grad_norm": 0.6115694003336873,
|
|
"learning_rate": 3.572245631138116e-05,
|
|
"loss": 0.2301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2483055591583252,
|
|
"step": 820,
|
|
"valid_targets_mean": 3527.8,
|
|
"valid_targets_min": 1282
|
|
},
|
|
{
|
|
"epoch": 2.0522388059701493,
|
|
"grad_norm": 0.5094263989797099,
|
|
"learning_rate": 3.564546684228209e-05,
|
|
"loss": 0.223,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1991591900587082,
|
|
"step": 825,
|
|
"valid_targets_mean": 4397.8,
|
|
"valid_targets_min": 1706
|
|
},
|
|
{
|
|
"epoch": 2.0646766169154227,
|
|
"grad_norm": 0.5601385200195886,
|
|
"learning_rate": 3.556787523040069e-05,
|
|
"loss": 0.228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2520599961280823,
|
|
"step": 830,
|
|
"valid_targets_mean": 3847.5,
|
|
"valid_targets_min": 1319
|
|
},
|
|
{
|
|
"epoch": 2.0771144278606966,
|
|
"grad_norm": 0.5380726404824564,
|
|
"learning_rate": 3.548968446198398e-05,
|
|
"loss": 0.2174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21053871512413025,
|
|
"step": 835,
|
|
"valid_targets_mean": 4242.5,
|
|
"valid_targets_min": 973
|
|
},
|
|
{
|
|
"epoch": 2.08955223880597,
|
|
"grad_norm": 0.5025117595976427,
|
|
"learning_rate": 3.54108975463386e-05,
|
|
"loss": 0.2184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2069125920534134,
|
|
"step": 840,
|
|
"valid_targets_mean": 4396.9,
|
|
"valid_targets_min": 2427
|
|
},
|
|
{
|
|
"epoch": 2.101990049751244,
|
|
"grad_norm": 0.5354301465480238,
|
|
"learning_rate": 3.533151751571489e-05,
|
|
"loss": 0.2235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2244517207145691,
|
|
"step": 845,
|
|
"valid_targets_mean": 3890.4,
|
|
"valid_targets_min": 1859
|
|
},
|
|
{
|
|
"epoch": 2.1144278606965172,
|
|
"grad_norm": 0.5610413426394177,
|
|
"learning_rate": 3.5251547425190294e-05,
|
|
"loss": 0.2248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24040347337722778,
|
|
"step": 850,
|
|
"valid_targets_mean": 4252.7,
|
|
"valid_targets_min": 1379
|
|
},
|
|
{
|
|
"epoch": 2.126865671641791,
|
|
"grad_norm": 0.7445240143199634,
|
|
"learning_rate": 3.51709903525517e-05,
|
|
"loss": 0.2259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22202104330062866,
|
|
"step": 855,
|
|
"valid_targets_mean": 3389.6,
|
|
"valid_targets_min": 1472
|
|
},
|
|
{
|
|
"epoch": 2.1393034825870645,
|
|
"grad_norm": 0.5584087371143899,
|
|
"learning_rate": 3.5089849398177013e-05,
|
|
"loss": 0.237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25943320989608765,
|
|
"step": 860,
|
|
"valid_targets_mean": 4247.4,
|
|
"valid_targets_min": 1713
|
|
},
|
|
{
|
|
"epoch": 2.1517412935323383,
|
|
"grad_norm": 0.5832938350626173,
|
|
"learning_rate": 3.500812768491586e-05,
|
|
"loss": 0.2301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2365805208683014,
|
|
"step": 865,
|
|
"valid_targets_mean": 4043.2,
|
|
"valid_targets_min": 1548
|
|
},
|
|
{
|
|
"epoch": 2.1641791044776117,
|
|
"grad_norm": 0.5400590413107126,
|
|
"learning_rate": 3.4925828357969344e-05,
|
|
"loss": 0.2321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21506357192993164,
|
|
"step": 870,
|
|
"valid_targets_mean": 3890.4,
|
|
"valid_targets_min": 2165
|
|
},
|
|
{
|
|
"epoch": 2.1766169154228856,
|
|
"grad_norm": 0.5408725543370927,
|
|
"learning_rate": 3.484295458476905e-05,
|
|
"loss": 0.2301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2199130356311798,
|
|
"step": 875,
|
|
"valid_targets_mean": 3965.0,
|
|
"valid_targets_min": 1830
|
|
},
|
|
{
|
|
"epoch": 2.189054726368159,
|
|
"grad_norm": 0.5544609623772936,
|
|
"learning_rate": 3.475950955485511e-05,
|
|
"loss": 0.2344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2255849838256836,
|
|
"step": 880,
|
|
"valid_targets_mean": 3941.9,
|
|
"valid_targets_min": 1907
|
|
},
|
|
{
|
|
"epoch": 2.201492537313433,
|
|
"grad_norm": 0.5335810864251953,
|
|
"learning_rate": 3.467549647975346e-05,
|
|
"loss": 0.2318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2269669473171234,
|
|
"step": 885,
|
|
"valid_targets_mean": 3739.8,
|
|
"valid_targets_min": 1409
|
|
},
|
|
{
|
|
"epoch": 2.2139303482587063,
|
|
"grad_norm": 0.544212423789675,
|
|
"learning_rate": 3.4590918592852214e-05,
|
|
"loss": 0.2226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22404810786247253,
|
|
"step": 890,
|
|
"valid_targets_mean": 3947.4,
|
|
"valid_targets_min": 2505
|
|
},
|
|
{
|
|
"epoch": 2.22636815920398,
|
|
"grad_norm": 0.48777043633608497,
|
|
"learning_rate": 3.450577914927728e-05,
|
|
"loss": 0.2283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2288726568222046,
|
|
"step": 895,
|
|
"valid_targets_mean": 4936.0,
|
|
"valid_targets_min": 1097
|
|
},
|
|
{
|
|
"epoch": 2.2388059701492535,
|
|
"grad_norm": 0.5375629176173219,
|
|
"learning_rate": 3.442008142576701e-05,
|
|
"loss": 0.2226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21632421016693115,
|
|
"step": 900,
|
|
"valid_targets_mean": 4131.2,
|
|
"valid_targets_min": 2105
|
|
},
|
|
{
|
|
"epoch": 2.2512437810945274,
|
|
"grad_norm": 0.5159769756167506,
|
|
"learning_rate": 3.433382872054614e-05,
|
|
"loss": 0.2271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24016112089157104,
|
|
"step": 905,
|
|
"valid_targets_mean": 4466.8,
|
|
"valid_targets_min": 2033
|
|
},
|
|
{
|
|
"epoch": 2.2636815920398012,
|
|
"grad_norm": 0.5464064642346769,
|
|
"learning_rate": 3.4247024353198826e-05,
|
|
"loss": 0.2324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23476913571357727,
|
|
"step": 910,
|
|
"valid_targets_mean": 4390.9,
|
|
"valid_targets_min": 1993
|
|
},
|
|
{
|
|
"epoch": 2.2761194029850746,
|
|
"grad_norm": 0.5493349140412681,
|
|
"learning_rate": 3.415967166454091e-05,
|
|
"loss": 0.2232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21979095041751862,
|
|
"step": 915,
|
|
"valid_targets_mean": 3712.5,
|
|
"valid_targets_min": 853
|
|
},
|
|
{
|
|
"epoch": 2.288557213930348,
|
|
"grad_norm": 0.5988218138371607,
|
|
"learning_rate": 3.4071774016491295e-05,
|
|
"loss": 0.2299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22979789972305298,
|
|
"step": 920,
|
|
"valid_targets_mean": 3520.1,
|
|
"valid_targets_min": 1018
|
|
},
|
|
{
|
|
"epoch": 2.300995024875622,
|
|
"grad_norm": 0.6119392010234547,
|
|
"learning_rate": 3.398333479194261e-05,
|
|
"loss": 0.2291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2250669300556183,
|
|
"step": 925,
|
|
"valid_targets_mean": 3029.0,
|
|
"valid_targets_min": 1741
|
|
},
|
|
{
|
|
"epoch": 2.3134328358208958,
|
|
"grad_norm": 0.5738059500201181,
|
|
"learning_rate": 3.389435739463099e-05,
|
|
"loss": 0.2282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22356033325195312,
|
|
"step": 930,
|
|
"valid_targets_mean": 3890.4,
|
|
"valid_targets_min": 1947
|
|
},
|
|
{
|
|
"epoch": 2.325870646766169,
|
|
"grad_norm": 0.5200786239469339,
|
|
"learning_rate": 3.380484524900506e-05,
|
|
"loss": 0.2166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.217308908700943,
|
|
"step": 935,
|
|
"valid_targets_mean": 4040.2,
|
|
"valid_targets_min": 1742
|
|
},
|
|
{
|
|
"epoch": 2.3383084577114426,
|
|
"grad_norm": 0.5347052584780078,
|
|
"learning_rate": 3.371480180009418e-05,
|
|
"loss": 0.2142,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21474695205688477,
|
|
"step": 940,
|
|
"valid_targets_mean": 3967.4,
|
|
"valid_targets_min": 1804
|
|
},
|
|
{
|
|
"epoch": 2.3507462686567164,
|
|
"grad_norm": 0.6357404551408977,
|
|
"learning_rate": 3.362423051337581e-05,
|
|
"loss": 0.2424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2433660328388214,
|
|
"step": 945,
|
|
"valid_targets_mean": 3678.4,
|
|
"valid_targets_min": 1041
|
|
},
|
|
{
|
|
"epoch": 2.3631840796019903,
|
|
"grad_norm": 0.5200162752679304,
|
|
"learning_rate": 3.353313487464217e-05,
|
|
"loss": 0.2204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21824005246162415,
|
|
"step": 950,
|
|
"valid_targets_mean": 4523.8,
|
|
"valid_targets_min": 2556
|
|
},
|
|
{
|
|
"epoch": 2.3756218905472637,
|
|
"grad_norm": 0.5521210931060869,
|
|
"learning_rate": 3.3441518389866075e-05,
|
|
"loss": 0.2221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2285742461681366,
|
|
"step": 955,
|
|
"valid_targets_mean": 4061.0,
|
|
"valid_targets_min": 933
|
|
},
|
|
{
|
|
"epoch": 2.388059701492537,
|
|
"grad_norm": 0.5509197209245766,
|
|
"learning_rate": 3.334938458506599e-05,
|
|
"loss": 0.2261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24184472858905792,
|
|
"step": 960,
|
|
"valid_targets_mean": 4080.4,
|
|
"valid_targets_min": 1530
|
|
},
|
|
{
|
|
"epoch": 2.400497512437811,
|
|
"grad_norm": 0.5236287052530746,
|
|
"learning_rate": 3.325673700617035e-05,
|
|
"loss": 0.2226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20668141543865204,
|
|
"step": 965,
|
|
"valid_targets_mean": 4257.0,
|
|
"valid_targets_min": 1666
|
|
},
|
|
{
|
|
"epoch": 2.412935323383085,
|
|
"grad_norm": 0.6083605097982535,
|
|
"learning_rate": 3.316357921888104e-05,
|
|
"loss": 0.2263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2471916675567627,
|
|
"step": 970,
|
|
"valid_targets_mean": 3378.9,
|
|
"valid_targets_min": 1597
|
|
},
|
|
{
|
|
"epoch": 2.425373134328358,
|
|
"grad_norm": 0.5335881223915271,
|
|
"learning_rate": 3.306991480853624e-05,
|
|
"loss": 0.227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20531508326530457,
|
|
"step": 975,
|
|
"valid_targets_mean": 4332.2,
|
|
"valid_targets_min": 1927
|
|
},
|
|
{
|
|
"epoch": 2.4378109452736316,
|
|
"grad_norm": 0.491874909775028,
|
|
"learning_rate": 3.2975747379972345e-05,
|
|
"loss": 0.2167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21635055541992188,
|
|
"step": 980,
|
|
"valid_targets_mean": 4316.7,
|
|
"valid_targets_min": 1860
|
|
},
|
|
{
|
|
"epoch": 2.4502487562189055,
|
|
"grad_norm": 0.5761426750570783,
|
|
"learning_rate": 3.288108055738531e-05,
|
|
"loss": 0.2217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2470484972000122,
|
|
"step": 985,
|
|
"valid_targets_mean": 3830.1,
|
|
"valid_targets_min": 1418
|
|
},
|
|
{
|
|
"epoch": 2.4626865671641793,
|
|
"grad_norm": 0.5320370757070529,
|
|
"learning_rate": 3.278591798419112e-05,
|
|
"loss": 0.2195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2289409041404724,
|
|
"step": 990,
|
|
"valid_targets_mean": 4497.8,
|
|
"valid_targets_min": 1957
|
|
},
|
|
{
|
|
"epoch": 2.4751243781094527,
|
|
"grad_norm": 0.5771119980643729,
|
|
"learning_rate": 3.2690263322885564e-05,
|
|
"loss": 0.2329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23160231113433838,
|
|
"step": 995,
|
|
"valid_targets_mean": 3809.6,
|
|
"valid_targets_min": 1732
|
|
},
|
|
{
|
|
"epoch": 2.487562189054726,
|
|
"grad_norm": 0.5428978241749973,
|
|
"learning_rate": 3.259412025490331e-05,
|
|
"loss": 0.2219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22234313189983368,
|
|
"step": 1000,
|
|
"valid_targets_mean": 3825.9,
|
|
"valid_targets_min": 2537
|
|
},
|
|
{
|
|
"epoch": 2.5,
|
|
"grad_norm": 0.4988498596419294,
|
|
"learning_rate": 3.249749248047619e-05,
|
|
"loss": 0.2312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2289588749408722,
|
|
"step": 1005,
|
|
"valid_targets_mean": 4525.5,
|
|
"valid_targets_min": 2281
|
|
},
|
|
{
|
|
"epoch": 2.512437810945274,
|
|
"grad_norm": 0.5359144087715436,
|
|
"learning_rate": 3.24003837184908e-05,
|
|
"loss": 0.227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.221797913312912,
|
|
"step": 1010,
|
|
"valid_targets_mean": 3942.2,
|
|
"valid_targets_min": 1802
|
|
},
|
|
{
|
|
"epoch": 2.5248756218905473,
|
|
"grad_norm": 0.5568260899912753,
|
|
"learning_rate": 3.230279770634538e-05,
|
|
"loss": 0.2276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21767503023147583,
|
|
"step": 1015,
|
|
"valid_targets_mean": 4057.7,
|
|
"valid_targets_min": 2287
|
|
},
|
|
{
|
|
"epoch": 2.5373134328358207,
|
|
"grad_norm": 0.6004807280832435,
|
|
"learning_rate": 3.220473819980594e-05,
|
|
"loss": 0.2321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23624536395072937,
|
|
"step": 1020,
|
|
"valid_targets_mean": 3945.8,
|
|
"valid_targets_min": 1124
|
|
},
|
|
{
|
|
"epoch": 2.5497512437810945,
|
|
"grad_norm": 0.5323221469131362,
|
|
"learning_rate": 3.2106208972861775e-05,
|
|
"loss": 0.2309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22079509496688843,
|
|
"step": 1025,
|
|
"valid_targets_mean": 4226.3,
|
|
"valid_targets_min": 2281
|
|
},
|
|
{
|
|
"epoch": 2.5621890547263684,
|
|
"grad_norm": 0.5648978978257522,
|
|
"learning_rate": 3.2007213817580165e-05,
|
|
"loss": 0.2162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22475269436836243,
|
|
"step": 1030,
|
|
"valid_targets_mean": 4073.6,
|
|
"valid_targets_min": 2117
|
|
},
|
|
{
|
|
"epoch": 2.574626865671642,
|
|
"grad_norm": 0.5796358749507854,
|
|
"learning_rate": 3.1907756543960425e-05,
|
|
"loss": 0.2162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23154105246067047,
|
|
"step": 1035,
|
|
"valid_targets_mean": 3477.8,
|
|
"valid_targets_min": 1170
|
|
},
|
|
{
|
|
"epoch": 2.587064676616915,
|
|
"grad_norm": 0.5229192190104138,
|
|
"learning_rate": 3.180784097978732e-05,
|
|
"loss": 0.2195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20573227107524872,
|
|
"step": 1040,
|
|
"valid_targets_mean": 4331.4,
|
|
"valid_targets_min": 2708
|
|
},
|
|
{
|
|
"epoch": 2.599502487562189,
|
|
"grad_norm": 0.8210781641217509,
|
|
"learning_rate": 3.1707470970483716e-05,
|
|
"loss": 0.2203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21882575750350952,
|
|
"step": 1045,
|
|
"valid_targets_mean": 4022.0,
|
|
"valid_targets_min": 1396
|
|
},
|
|
{
|
|
"epoch": 2.611940298507463,
|
|
"grad_norm": 0.4797756242590163,
|
|
"learning_rate": 3.160665037896256e-05,
|
|
"loss": 0.2199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21537092328071594,
|
|
"step": 1050,
|
|
"valid_targets_mean": 5050.7,
|
|
"valid_targets_min": 2642
|
|
},
|
|
{
|
|
"epoch": 2.6243781094527363,
|
|
"grad_norm": 0.5768753494873544,
|
|
"learning_rate": 3.150538308547826e-05,
|
|
"loss": 0.2376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2303762435913086,
|
|
"step": 1055,
|
|
"valid_targets_mean": 3295.4,
|
|
"valid_targets_min": 840
|
|
},
|
|
{
|
|
"epoch": 2.6368159203980097,
|
|
"grad_norm": 0.5551715330405659,
|
|
"learning_rate": 3.14036729874773e-05,
|
|
"loss": 0.2281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2432781457901001,
|
|
"step": 1060,
|
|
"valid_targets_mean": 3878.2,
|
|
"valid_targets_min": 1155
|
|
},
|
|
{
|
|
"epoch": 2.6492537313432836,
|
|
"grad_norm": 0.5533747207127068,
|
|
"learning_rate": 3.130152399944827e-05,
|
|
"loss": 0.2247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21668297052383423,
|
|
"step": 1065,
|
|
"valid_targets_mean": 3560.9,
|
|
"valid_targets_min": 1236
|
|
},
|
|
{
|
|
"epoch": 2.6616915422885574,
|
|
"grad_norm": 0.6091887065700332,
|
|
"learning_rate": 3.1198940052771196e-05,
|
|
"loss": 0.2243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22194230556488037,
|
|
"step": 1070,
|
|
"valid_targets_mean": 3523.8,
|
|
"valid_targets_min": 1943
|
|
},
|
|
{
|
|
"epoch": 2.674129353233831,
|
|
"grad_norm": 0.5217859543197857,
|
|
"learning_rate": 3.109592509556625e-05,
|
|
"loss": 0.2219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19868099689483643,
|
|
"step": 1075,
|
|
"valid_targets_mean": 3734.4,
|
|
"valid_targets_min": 1771
|
|
},
|
|
{
|
|
"epoch": 2.6865671641791042,
|
|
"grad_norm": 0.5907560446630902,
|
|
"learning_rate": 3.0992483092541757e-05,
|
|
"loss": 0.2321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22035574913024902,
|
|
"step": 1080,
|
|
"valid_targets_mean": 4186.8,
|
|
"valid_targets_min": 1370
|
|
},
|
|
{
|
|
"epoch": 2.699004975124378,
|
|
"grad_norm": 0.5354879248422757,
|
|
"learning_rate": 3.088861802484168e-05,
|
|
"loss": 0.2178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22888147830963135,
|
|
"step": 1085,
|
|
"valid_targets_mean": 4406.8,
|
|
"valid_targets_min": 1937
|
|
},
|
|
{
|
|
"epoch": 2.711442786069652,
|
|
"grad_norm": 0.5354309571052726,
|
|
"learning_rate": 3.078433388989232e-05,
|
|
"loss": 0.225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23584789037704468,
|
|
"step": 1090,
|
|
"valid_targets_mean": 4082.8,
|
|
"valid_targets_min": 2016
|
|
},
|
|
{
|
|
"epoch": 2.7238805970149254,
|
|
"grad_norm": 0.5230105200073858,
|
|
"learning_rate": 3.067963470124852e-05,
|
|
"loss": 0.2187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21761374175548553,
|
|
"step": 1095,
|
|
"valid_targets_mean": 4048.8,
|
|
"valid_targets_min": 1515
|
|
},
|
|
{
|
|
"epoch": 2.7363184079601988,
|
|
"grad_norm": 0.5254789832854005,
|
|
"learning_rate": 3.0574524488439166e-05,
|
|
"loss": 0.2158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21649402379989624,
|
|
"step": 1100,
|
|
"valid_targets_mean": 3900.8,
|
|
"valid_targets_min": 1840
|
|
},
|
|
{
|
|
"epoch": 2.7487562189054726,
|
|
"grad_norm": 0.5512516143404952,
|
|
"learning_rate": 3.046900729681215e-05,
|
|
"loss": 0.2148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2197253406047821,
|
|
"step": 1105,
|
|
"valid_targets_mean": 3859.6,
|
|
"valid_targets_min": 1241
|
|
},
|
|
{
|
|
"epoch": 2.7611940298507465,
|
|
"grad_norm": 0.4780381184240064,
|
|
"learning_rate": 3.0363087187378618e-05,
|
|
"loss": 0.2167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21492695808410645,
|
|
"step": 1110,
|
|
"valid_targets_mean": 4405.6,
|
|
"valid_targets_min": 2322
|
|
},
|
|
{
|
|
"epoch": 2.77363184079602,
|
|
"grad_norm": 0.6383173460840905,
|
|
"learning_rate": 3.025676823665671e-05,
|
|
"loss": 0.2196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23645994067192078,
|
|
"step": 1115,
|
|
"valid_targets_mean": 3986.8,
|
|
"valid_targets_min": 2007
|
|
},
|
|
{
|
|
"epoch": 2.7860696517412933,
|
|
"grad_norm": 0.5606483903400612,
|
|
"learning_rate": 3.0150054536514655e-05,
|
|
"loss": 0.2268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2141602337360382,
|
|
"step": 1120,
|
|
"valid_targets_mean": 3552.6,
|
|
"valid_targets_min": 196
|
|
},
|
|
{
|
|
"epoch": 2.798507462686567,
|
|
"grad_norm": 0.5372938232844723,
|
|
"learning_rate": 3.0042950194013313e-05,
|
|
"loss": 0.2198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22424790263175964,
|
|
"step": 1125,
|
|
"valid_targets_mean": 4286.3,
|
|
"valid_targets_min": 2746
|
|
},
|
|
{
|
|
"epoch": 2.810945273631841,
|
|
"grad_norm": 0.647264121448103,
|
|
"learning_rate": 2.993545933124807e-05,
|
|
"loss": 0.2335,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2541235089302063,
|
|
"step": 1130,
|
|
"valid_targets_mean": 3984.4,
|
|
"valid_targets_min": 1033
|
|
},
|
|
{
|
|
"epoch": 2.8233830845771144,
|
|
"grad_norm": 0.5395140967298383,
|
|
"learning_rate": 2.9827586085190217e-05,
|
|
"loss": 0.2251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20689839124679565,
|
|
"step": 1135,
|
|
"valid_targets_mean": 3842.4,
|
|
"valid_targets_min": 1140
|
|
},
|
|
{
|
|
"epoch": 2.835820895522388,
|
|
"grad_norm": 0.5132101606150065,
|
|
"learning_rate": 2.971933460752773e-05,
|
|
"loss": 0.2237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2214384526014328,
|
|
"step": 1140,
|
|
"valid_targets_mean": 4829.9,
|
|
"valid_targets_min": 1793
|
|
},
|
|
{
|
|
"epoch": 2.8482587064676617,
|
|
"grad_norm": 0.545837953147779,
|
|
"learning_rate": 2.961070906450548e-05,
|
|
"loss": 0.2229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24275824427604675,
|
|
"step": 1145,
|
|
"valid_targets_mean": 4090.0,
|
|
"valid_targets_min": 1876
|
|
},
|
|
{
|
|
"epoch": 2.8606965174129355,
|
|
"grad_norm": 0.545483447551859,
|
|
"learning_rate": 2.950171363676488e-05,
|
|
"loss": 0.2225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21436379849910736,
|
|
"step": 1150,
|
|
"valid_targets_mean": 3576.0,
|
|
"valid_targets_min": 1297
|
|
},
|
|
{
|
|
"epoch": 2.873134328358209,
|
|
"grad_norm": 0.5046264457103875,
|
|
"learning_rate": 2.9392352519183003e-05,
|
|
"loss": 0.2266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2066916823387146,
|
|
"step": 1155,
|
|
"valid_targets_mean": 4762.8,
|
|
"valid_targets_min": 2454
|
|
},
|
|
{
|
|
"epoch": 2.8855721393034823,
|
|
"grad_norm": 0.5186125948476298,
|
|
"learning_rate": 2.928262992071113e-05,
|
|
"loss": 0.2123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2164272964000702,
|
|
"step": 1160,
|
|
"valid_targets_mean": 3770.9,
|
|
"valid_targets_min": 1244
|
|
},
|
|
{
|
|
"epoch": 2.898009950248756,
|
|
"grad_norm": 0.5004644338207715,
|
|
"learning_rate": 2.9172550064212747e-05,
|
|
"loss": 0.2185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1974021941423416,
|
|
"step": 1165,
|
|
"valid_targets_mean": 4147.8,
|
|
"valid_targets_min": 2450
|
|
},
|
|
{
|
|
"epoch": 2.91044776119403,
|
|
"grad_norm": 0.5284009585202385,
|
|
"learning_rate": 2.906211718630104e-05,
|
|
"loss": 0.2219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23019076883792877,
|
|
"step": 1170,
|
|
"valid_targets_mean": 3926.6,
|
|
"valid_targets_min": 2084
|
|
},
|
|
{
|
|
"epoch": 2.9228855721393034,
|
|
"grad_norm": 0.5437530454710895,
|
|
"learning_rate": 2.895133553717582e-05,
|
|
"loss": 0.2186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22214023768901825,
|
|
"step": 1175,
|
|
"valid_targets_mean": 3957.7,
|
|
"valid_targets_min": 1130
|
|
},
|
|
{
|
|
"epoch": 2.935323383084577,
|
|
"grad_norm": 0.5309034940862213,
|
|
"learning_rate": 2.8840209380459983e-05,
|
|
"loss": 0.2174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2027333378791809,
|
|
"step": 1180,
|
|
"valid_targets_mean": 3732.1,
|
|
"valid_targets_min": 1670
|
|
},
|
|
{
|
|
"epoch": 2.9477611940298507,
|
|
"grad_norm": 0.502018444365329,
|
|
"learning_rate": 2.8728742993035376e-05,
|
|
"loss": 0.2162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20462360978126526,
|
|
"step": 1185,
|
|
"valid_targets_mean": 3885.9,
|
|
"valid_targets_min": 2023
|
|
},
|
|
{
|
|
"epoch": 2.9601990049751246,
|
|
"grad_norm": 0.5097178679874955,
|
|
"learning_rate": 2.8616940664878217e-05,
|
|
"loss": 0.2298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21697774529457092,
|
|
"step": 1190,
|
|
"valid_targets_mean": 4445.0,
|
|
"valid_targets_min": 1363
|
|
},
|
|
{
|
|
"epoch": 2.972636815920398,
|
|
"grad_norm": 0.568255587186467,
|
|
"learning_rate": 2.850480669889397e-05,
|
|
"loss": 0.2346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2217128425836563,
|
|
"step": 1195,
|
|
"valid_targets_mean": 3645.9,
|
|
"valid_targets_min": 2013
|
|
},
|
|
{
|
|
"epoch": 2.9850746268656714,
|
|
"grad_norm": 0.5435581128114583,
|
|
"learning_rate": 2.839234541075178e-05,
|
|
"loss": 0.2137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23033751547336578,
|
|
"step": 1200,
|
|
"valid_targets_mean": 3913.1,
|
|
"valid_targets_min": 428
|
|
},
|
|
{
|
|
"epoch": 2.9975124378109452,
|
|
"grad_norm": 0.5344645879564283,
|
|
"learning_rate": 2.8279561128718324e-05,
|
|
"loss": 0.227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1959632784128189,
|
|
"step": 1205,
|
|
"valid_targets_mean": 3608.3,
|
|
"valid_targets_min": 2089
|
|
},
|
|
{
|
|
"epoch": 3.009950248756219,
|
|
"grad_norm": 0.49623627025746214,
|
|
"learning_rate": 2.8166458193491287e-05,
|
|
"loss": 0.2089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20732742547988892,
|
|
"step": 1210,
|
|
"valid_targets_mean": 4868.1,
|
|
"valid_targets_min": 2028
|
|
},
|
|
{
|
|
"epoch": 3.0223880597014925,
|
|
"grad_norm": 0.5737814175353435,
|
|
"learning_rate": 2.805304095803222e-05,
|
|
"loss": 0.2037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21487167477607727,
|
|
"step": 1215,
|
|
"valid_targets_mean": 4191.0,
|
|
"valid_targets_min": 1524
|
|
},
|
|
{
|
|
"epoch": 3.0348258706467663,
|
|
"grad_norm": 0.5106023876957765,
|
|
"learning_rate": 2.7939313787399118e-05,
|
|
"loss": 0.2221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2161100208759308,
|
|
"step": 1220,
|
|
"valid_targets_mean": 4569.1,
|
|
"valid_targets_min": 2916
|
|
},
|
|
{
|
|
"epoch": 3.0472636815920398,
|
|
"grad_norm": 0.5564896880436795,
|
|
"learning_rate": 2.7825281058578326e-05,
|
|
"loss": 0.2061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2229604572057724,
|
|
"step": 1225,
|
|
"valid_targets_mean": 4257.8,
|
|
"valid_targets_min": 1109
|
|
},
|
|
{
|
|
"epoch": 3.0597014925373136,
|
|
"grad_norm": 0.557147418201146,
|
|
"learning_rate": 2.7710947160316145e-05,
|
|
"loss": 0.2019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20052112638950348,
|
|
"step": 1230,
|
|
"valid_targets_mean": 4028.7,
|
|
"valid_targets_min": 1645
|
|
},
|
|
{
|
|
"epoch": 3.072139303482587,
|
|
"grad_norm": 0.5878465938206581,
|
|
"learning_rate": 2.759631649294989e-05,
|
|
"loss": 0.2057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22095069289207458,
|
|
"step": 1235,
|
|
"valid_targets_mean": 4023.8,
|
|
"valid_targets_min": 1998
|
|
},
|
|
{
|
|
"epoch": 3.084577114427861,
|
|
"grad_norm": 0.5194814837667161,
|
|
"learning_rate": 2.7481393468238558e-05,
|
|
"loss": 0.2008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19190478324890137,
|
|
"step": 1240,
|
|
"valid_targets_mean": 4377.2,
|
|
"valid_targets_min": 2378
|
|
},
|
|
{
|
|
"epoch": 3.0970149253731343,
|
|
"grad_norm": 0.5898646554241795,
|
|
"learning_rate": 2.7366182509193e-05,
|
|
"loss": 0.2065,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20672670006752014,
|
|
"step": 1245,
|
|
"valid_targets_mean": 3683.2,
|
|
"valid_targets_min": 1530
|
|
},
|
|
{
|
|
"epoch": 3.109452736318408,
|
|
"grad_norm": 0.5408382733969814,
|
|
"learning_rate": 2.725068804990575e-05,
|
|
"loss": 0.2145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22049081325531006,
|
|
"step": 1250,
|
|
"valid_targets_mean": 4423.6,
|
|
"valid_targets_min": 1725
|
|
},
|
|
{
|
|
"epoch": 3.1218905472636815,
|
|
"grad_norm": 0.5240915497470742,
|
|
"learning_rate": 2.7134914535380305e-05,
|
|
"loss": 0.2081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19481924176216125,
|
|
"step": 1255,
|
|
"valid_targets_mean": 4028.4,
|
|
"valid_targets_min": 831
|
|
},
|
|
{
|
|
"epoch": 3.1343283582089554,
|
|
"grad_norm": 0.586082749576896,
|
|
"learning_rate": 2.7018866421360114e-05,
|
|
"loss": 0.2181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22566986083984375,
|
|
"step": 1260,
|
|
"valid_targets_mean": 3726.2,
|
|
"valid_targets_min": 844
|
|
},
|
|
{
|
|
"epoch": 3.146766169154229,
|
|
"grad_norm": 0.5224048615979382,
|
|
"learning_rate": 2.6902548174157028e-05,
|
|
"loss": 0.2063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.219328373670578,
|
|
"step": 1265,
|
|
"valid_targets_mean": 4112.4,
|
|
"valid_targets_min": 1498
|
|
},
|
|
{
|
|
"epoch": 3.1592039800995027,
|
|
"grad_norm": 0.501751299595383,
|
|
"learning_rate": 2.6785964270479472e-05,
|
|
"loss": 0.2027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1844957172870636,
|
|
"step": 1270,
|
|
"valid_targets_mean": 4384.2,
|
|
"valid_targets_min": 2813
|
|
},
|
|
{
|
|
"epoch": 3.171641791044776,
|
|
"grad_norm": 0.5831574633686978,
|
|
"learning_rate": 2.66691191972601e-05,
|
|
"loss": 0.2103,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2090991735458374,
|
|
"step": 1275,
|
|
"valid_targets_mean": 3494.6,
|
|
"valid_targets_min": 1780
|
|
},
|
|
{
|
|
"epoch": 3.18407960199005,
|
|
"grad_norm": 0.5850385454163992,
|
|
"learning_rate": 2.6552017451483136e-05,
|
|
"loss": 0.2028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21853281557559967,
|
|
"step": 1280,
|
|
"valid_targets_mean": 4045.5,
|
|
"valid_targets_min": 1823
|
|
},
|
|
{
|
|
"epoch": 3.1965174129353233,
|
|
"grad_norm": 0.5599684535438738,
|
|
"learning_rate": 2.6434663540011278e-05,
|
|
"loss": 0.2087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.219620943069458,
|
|
"step": 1285,
|
|
"valid_targets_mean": 4112.5,
|
|
"valid_targets_min": 2313
|
|
},
|
|
{
|
|
"epoch": 3.208955223880597,
|
|
"grad_norm": 0.9513235022639203,
|
|
"learning_rate": 2.631706197941227e-05,
|
|
"loss": 0.2063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1958315670490265,
|
|
"step": 1290,
|
|
"valid_targets_mean": 4007.0,
|
|
"valid_targets_min": 1381
|
|
},
|
|
{
|
|
"epoch": 3.2213930348258706,
|
|
"grad_norm": 0.548449991410438,
|
|
"learning_rate": 2.619921729578504e-05,
|
|
"loss": 0.2016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20220553874969482,
|
|
"step": 1295,
|
|
"valid_targets_mean": 3707.1,
|
|
"valid_targets_min": 1997
|
|
},
|
|
{
|
|
"epoch": 3.2338308457711444,
|
|
"grad_norm": 0.5435395454910702,
|
|
"learning_rate": 2.6081134024585558e-05,
|
|
"loss": 0.2018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19983190298080444,
|
|
"step": 1300,
|
|
"valid_targets_mean": 3682.4,
|
|
"valid_targets_min": 1533
|
|
},
|
|
{
|
|
"epoch": 3.246268656716418,
|
|
"grad_norm": 0.5396400851922742,
|
|
"learning_rate": 2.5962816710452217e-05,
|
|
"loss": 0.2086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19172373414039612,
|
|
"step": 1305,
|
|
"valid_targets_mean": 4202.8,
|
|
"valid_targets_min": 1828
|
|
},
|
|
{
|
|
"epoch": 3.2587064676616917,
|
|
"grad_norm": 0.5528015303255912,
|
|
"learning_rate": 2.5844269907030972e-05,
|
|
"loss": 0.2008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21026234328746796,
|
|
"step": 1310,
|
|
"valid_targets_mean": 3956.2,
|
|
"valid_targets_min": 2237
|
|
},
|
|
{
|
|
"epoch": 3.271144278606965,
|
|
"grad_norm": 0.6555416969802464,
|
|
"learning_rate": 2.5725498176800053e-05,
|
|
"loss": 0.2059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20781803131103516,
|
|
"step": 1315,
|
|
"valid_targets_mean": 3484.7,
|
|
"valid_targets_min": 1823
|
|
},
|
|
{
|
|
"epoch": 3.283582089552239,
|
|
"grad_norm": 0.5520065160976478,
|
|
"learning_rate": 2.560650609089441e-05,
|
|
"loss": 0.2113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21545690298080444,
|
|
"step": 1320,
|
|
"valid_targets_mean": 3796.1,
|
|
"valid_targets_min": 2150
|
|
},
|
|
{
|
|
"epoch": 3.2960199004975124,
|
|
"grad_norm": 0.5594356265872277,
|
|
"learning_rate": 2.5487298228929746e-05,
|
|
"loss": 0.1982,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19716203212738037,
|
|
"step": 1325,
|
|
"valid_targets_mean": 4177.7,
|
|
"valid_targets_min": 2666
|
|
},
|
|
{
|
|
"epoch": 3.308457711442786,
|
|
"grad_norm": 0.5673243925575643,
|
|
"learning_rate": 2.5367879178826278e-05,
|
|
"loss": 0.2106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21999700367450714,
|
|
"step": 1330,
|
|
"valid_targets_mean": 3577.8,
|
|
"valid_targets_min": 1379
|
|
},
|
|
{
|
|
"epoch": 3.3208955223880596,
|
|
"grad_norm": 0.5112451211401966,
|
|
"learning_rate": 2.5248253536632176e-05,
|
|
"loss": 0.2055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20441888272762299,
|
|
"step": 1335,
|
|
"valid_targets_mean": 4252.4,
|
|
"valid_targets_min": 1758
|
|
},
|
|
{
|
|
"epoch": 3.3333333333333335,
|
|
"grad_norm": 0.5701315285018725,
|
|
"learning_rate": 2.512842590634664e-05,
|
|
"loss": 0.2116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.228561133146286,
|
|
"step": 1340,
|
|
"valid_targets_mean": 3937.6,
|
|
"valid_targets_min": 1865
|
|
},
|
|
{
|
|
"epoch": 3.345771144278607,
|
|
"grad_norm": 0.5421666508199918,
|
|
"learning_rate": 2.5008400899742757e-05,
|
|
"loss": 0.1953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20633219182491302,
|
|
"step": 1345,
|
|
"valid_targets_mean": 4439.9,
|
|
"valid_targets_min": 3260
|
|
},
|
|
{
|
|
"epoch": 3.3582089552238807,
|
|
"grad_norm": 0.49279685109536914,
|
|
"learning_rate": 2.4888183136189952e-05,
|
|
"loss": 0.2082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1922161877155304,
|
|
"step": 1350,
|
|
"valid_targets_mean": 4147.8,
|
|
"valid_targets_min": 2538
|
|
},
|
|
{
|
|
"epoch": 3.370646766169154,
|
|
"grad_norm": 0.4688762074597621,
|
|
"learning_rate": 2.4767777242476263e-05,
|
|
"loss": 0.1997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19103196263313293,
|
|
"step": 1355,
|
|
"valid_targets_mean": 5005.8,
|
|
"valid_targets_min": 2533
|
|
},
|
|
{
|
|
"epoch": 3.383084577114428,
|
|
"grad_norm": 0.5500017320569016,
|
|
"learning_rate": 2.4647187852630227e-05,
|
|
"loss": 0.2028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20763391256332397,
|
|
"step": 1360,
|
|
"valid_targets_mean": 3853.6,
|
|
"valid_targets_min": 1548
|
|
},
|
|
{
|
|
"epoch": 3.3955223880597014,
|
|
"grad_norm": 0.5872147896176038,
|
|
"learning_rate": 2.4526419607742543e-05,
|
|
"loss": 0.2044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22904819250106812,
|
|
"step": 1365,
|
|
"valid_targets_mean": 3977.2,
|
|
"valid_targets_min": 2209
|
|
},
|
|
{
|
|
"epoch": 3.4079601990049753,
|
|
"grad_norm": 0.5155674931487689,
|
|
"learning_rate": 2.4405477155787457e-05,
|
|
"loss": 0.2084,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21794819831848145,
|
|
"step": 1370,
|
|
"valid_targets_mean": 4300.1,
|
|
"valid_targets_min": 1567
|
|
},
|
|
{
|
|
"epoch": 3.4203980099502487,
|
|
"grad_norm": 0.5958084420992633,
|
|
"learning_rate": 2.4284365151443892e-05,
|
|
"loss": 0.2095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21897193789482117,
|
|
"step": 1375,
|
|
"valid_targets_mean": 3848.8,
|
|
"valid_targets_min": 1201
|
|
},
|
|
{
|
|
"epoch": 3.4328358208955225,
|
|
"grad_norm": 0.5269624601178255,
|
|
"learning_rate": 2.416308825591626e-05,
|
|
"loss": 0.2018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1867186725139618,
|
|
"step": 1380,
|
|
"valid_targets_mean": 4206.9,
|
|
"valid_targets_min": 1330
|
|
},
|
|
{
|
|
"epoch": 3.445273631840796,
|
|
"grad_norm": 0.5745335400755156,
|
|
"learning_rate": 2.4041651136755112e-05,
|
|
"loss": 0.208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21894606947898865,
|
|
"step": 1385,
|
|
"valid_targets_mean": 4212.0,
|
|
"valid_targets_min": 1793
|
|
},
|
|
{
|
|
"epoch": 3.45771144278607,
|
|
"grad_norm": 0.5062989296925714,
|
|
"learning_rate": 2.3920058467677475e-05,
|
|
"loss": 0.2,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19788777828216553,
|
|
"step": 1390,
|
|
"valid_targets_mean": 4500.6,
|
|
"valid_targets_min": 1771
|
|
},
|
|
{
|
|
"epoch": 3.470149253731343,
|
|
"grad_norm": 0.5291231334084273,
|
|
"learning_rate": 2.3798314928386986e-05,
|
|
"loss": 0.207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19889307022094727,
|
|
"step": 1395,
|
|
"valid_targets_mean": 4059.6,
|
|
"valid_targets_min": 1629
|
|
},
|
|
{
|
|
"epoch": 3.482587064676617,
|
|
"grad_norm": 0.6204408757812566,
|
|
"learning_rate": 2.367642520439378e-05,
|
|
"loss": 0.2072,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2038464993238449,
|
|
"step": 1400,
|
|
"valid_targets_mean": 3189.2,
|
|
"valid_targets_min": 941
|
|
},
|
|
{
|
|
"epoch": 3.4950248756218905,
|
|
"grad_norm": 0.5860309965160757,
|
|
"learning_rate": 2.3554393986834173e-05,
|
|
"loss": 0.2109,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18992196023464203,
|
|
"step": 1405,
|
|
"valid_targets_mean": 4013.2,
|
|
"valid_targets_min": 777
|
|
},
|
|
{
|
|
"epoch": 3.5074626865671643,
|
|
"grad_norm": 0.5323469940034676,
|
|
"learning_rate": 2.3432225972290086e-05,
|
|
"loss": 0.1953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1906590759754181,
|
|
"step": 1410,
|
|
"valid_targets_mean": 4144.9,
|
|
"valid_targets_min": 1661
|
|
},
|
|
{
|
|
"epoch": 3.5199004975124377,
|
|
"grad_norm": 0.5720751778006824,
|
|
"learning_rate": 2.3309925862608318e-05,
|
|
"loss": 0.204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1970103681087494,
|
|
"step": 1415,
|
|
"valid_targets_mean": 3570.9,
|
|
"valid_targets_min": 1244
|
|
},
|
|
{
|
|
"epoch": 3.5323383084577116,
|
|
"grad_norm": 0.5077487997895391,
|
|
"learning_rate": 2.3187498364719576e-05,
|
|
"loss": 0.2041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1936759650707245,
|
|
"step": 1420,
|
|
"valid_targets_mean": 4038.2,
|
|
"valid_targets_min": 2313
|
|
},
|
|
{
|
|
"epoch": 3.544776119402985,
|
|
"grad_norm": 0.5828341436838554,
|
|
"learning_rate": 2.3064948190457335e-05,
|
|
"loss": 0.2046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18977367877960205,
|
|
"step": 1425,
|
|
"valid_targets_mean": 3396.9,
|
|
"valid_targets_min": 2106
|
|
},
|
|
{
|
|
"epoch": 3.557213930348259,
|
|
"grad_norm": 0.5735838721354387,
|
|
"learning_rate": 2.2942280056376457e-05,
|
|
"loss": 0.2033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20034313201904297,
|
|
"step": 1430,
|
|
"valid_targets_mean": 3937.6,
|
|
"valid_targets_min": 1627
|
|
},
|
|
{
|
|
"epoch": 3.5696517412935322,
|
|
"grad_norm": 0.5234257291474523,
|
|
"learning_rate": 2.2819498683571718e-05,
|
|
"loss": 0.2021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20040658116340637,
|
|
"step": 1435,
|
|
"valid_targets_mean": 4346.8,
|
|
"valid_targets_min": 2323
|
|
},
|
|
{
|
|
"epoch": 3.582089552238806,
|
|
"grad_norm": 0.5271233727171688,
|
|
"learning_rate": 2.2696608797496045e-05,
|
|
"loss": 0.2132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1866839975118637,
|
|
"step": 1440,
|
|
"valid_targets_mean": 3945.6,
|
|
"valid_targets_min": 1378
|
|
},
|
|
{
|
|
"epoch": 3.5945273631840795,
|
|
"grad_norm": 0.5242920703023999,
|
|
"learning_rate": 2.2573615127778733e-05,
|
|
"loss": 0.1984,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19095249474048615,
|
|
"step": 1445,
|
|
"valid_targets_mean": 3907.6,
|
|
"valid_targets_min": 1780
|
|
},
|
|
{
|
|
"epoch": 3.6069651741293534,
|
|
"grad_norm": 0.6005933256936844,
|
|
"learning_rate": 2.2450522408043324e-05,
|
|
"loss": 0.2128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2235599309206009,
|
|
"step": 1450,
|
|
"valid_targets_mean": 3177.5,
|
|
"valid_targets_min": 992
|
|
},
|
|
{
|
|
"epoch": 3.6194029850746268,
|
|
"grad_norm": 0.5536958757152657,
|
|
"learning_rate": 2.232733537572551e-05,
|
|
"loss": 0.2009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20974500477313995,
|
|
"step": 1455,
|
|
"valid_targets_mean": 3999.3,
|
|
"valid_targets_min": 1643
|
|
},
|
|
{
|
|
"epoch": 3.6318407960199006,
|
|
"grad_norm": 0.45147835839349537,
|
|
"learning_rate": 2.2204058771890735e-05,
|
|
"loss": 0.2029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19078528881072998,
|
|
"step": 1460,
|
|
"valid_targets_mean": 4792.8,
|
|
"valid_targets_min": 1685
|
|
},
|
|
{
|
|
"epoch": 3.644278606965174,
|
|
"grad_norm": 0.6125570787690607,
|
|
"learning_rate": 2.2080697341051777e-05,
|
|
"loss": 0.2091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21356597542762756,
|
|
"step": 1465,
|
|
"valid_targets_mean": 3559.8,
|
|
"valid_targets_min": 2038
|
|
},
|
|
{
|
|
"epoch": 3.656716417910448,
|
|
"grad_norm": 0.5118359172048365,
|
|
"learning_rate": 2.195725583098611e-05,
|
|
"loss": 0.2055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20302622020244598,
|
|
"step": 1470,
|
|
"valid_targets_mean": 4243.6,
|
|
"valid_targets_min": 2620
|
|
},
|
|
{
|
|
"epoch": 3.6691542288557213,
|
|
"grad_norm": 0.5524385866849025,
|
|
"learning_rate": 2.183373899255321e-05,
|
|
"loss": 0.2006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1972339153289795,
|
|
"step": 1475,
|
|
"valid_targets_mean": 3786.9,
|
|
"valid_targets_min": 2602
|
|
},
|
|
{
|
|
"epoch": 3.681592039800995,
|
|
"grad_norm": 0.5519280039906248,
|
|
"learning_rate": 2.171015157951169e-05,
|
|
"loss": 0.2127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23621603846549988,
|
|
"step": 1480,
|
|
"valid_targets_mean": 4710.5,
|
|
"valid_targets_min": 2121
|
|
},
|
|
{
|
|
"epoch": 3.6940298507462686,
|
|
"grad_norm": 0.4961853443443449,
|
|
"learning_rate": 2.158649834833636e-05,
|
|
"loss": 0.2006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21436795592308044,
|
|
"step": 1485,
|
|
"valid_targets_mean": 4526.8,
|
|
"valid_targets_min": 2490
|
|
},
|
|
{
|
|
"epoch": 3.7064676616915424,
|
|
"grad_norm": 0.5544971340683077,
|
|
"learning_rate": 2.146278405803512e-05,
|
|
"loss": 0.2068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20669138431549072,
|
|
"step": 1490,
|
|
"valid_targets_mean": 3391.8,
|
|
"valid_targets_min": 228
|
|
},
|
|
{
|
|
"epoch": 3.718905472636816,
|
|
"grad_norm": 0.5059854792152241,
|
|
"learning_rate": 2.133901346996588e-05,
|
|
"loss": 0.2037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2086849957704544,
|
|
"step": 1495,
|
|
"valid_targets_mean": 4446.4,
|
|
"valid_targets_min": 2480
|
|
},
|
|
{
|
|
"epoch": 3.7313432835820897,
|
|
"grad_norm": 0.5008031450948552,
|
|
"learning_rate": 2.1215191347653227e-05,
|
|
"loss": 0.2011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20051947236061096,
|
|
"step": 1500,
|
|
"valid_targets_mean": 4183.6,
|
|
"valid_targets_min": 1422
|
|
},
|
|
{
|
|
"epoch": 3.743781094527363,
|
|
"grad_norm": 0.5016740412477303,
|
|
"learning_rate": 2.109132245660517e-05,
|
|
"loss": 0.2046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2014276683330536,
|
|
"step": 1505,
|
|
"valid_targets_mean": 4293.1,
|
|
"valid_targets_min": 2065
|
|
},
|
|
{
|
|
"epoch": 3.756218905472637,
|
|
"grad_norm": 0.5773401034416584,
|
|
"learning_rate": 2.096741156412967e-05,
|
|
"loss": 0.2171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24956290423870087,
|
|
"step": 1510,
|
|
"valid_targets_mean": 4016.8,
|
|
"valid_targets_min": 2328
|
|
},
|
|
{
|
|
"epoch": 3.7686567164179103,
|
|
"grad_norm": 0.5557509017649421,
|
|
"learning_rate": 2.084346343915118e-05,
|
|
"loss": 0.2061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21007095277309418,
|
|
"step": 1515,
|
|
"valid_targets_mean": 4184.1,
|
|
"valid_targets_min": 2042
|
|
},
|
|
{
|
|
"epoch": 3.781094527363184,
|
|
"grad_norm": 0.5069995725105243,
|
|
"learning_rate": 2.0719482852027122e-05,
|
|
"loss": 0.1948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19801217317581177,
|
|
"step": 1520,
|
|
"valid_targets_mean": 4527.2,
|
|
"valid_targets_min": 2586
|
|
},
|
|
{
|
|
"epoch": 3.7935323383084576,
|
|
"grad_norm": 0.5072359725349612,
|
|
"learning_rate": 2.059547457436429e-05,
|
|
"loss": 0.2089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21030068397521973,
|
|
"step": 1525,
|
|
"valid_targets_mean": 4119.2,
|
|
"valid_targets_min": 1876
|
|
},
|
|
{
|
|
"epoch": 3.8059701492537314,
|
|
"grad_norm": 0.5809162860083377,
|
|
"learning_rate": 2.0471443378835173e-05,
|
|
"loss": 0.2037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20429758727550507,
|
|
"step": 1530,
|
|
"valid_targets_mean": 3375.6,
|
|
"valid_targets_min": 423
|
|
},
|
|
{
|
|
"epoch": 3.818407960199005,
|
|
"grad_norm": 0.6146168591855865,
|
|
"learning_rate": 2.0347394038994305e-05,
|
|
"loss": 0.1966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20681461691856384,
|
|
"step": 1535,
|
|
"valid_targets_mean": 3487.4,
|
|
"valid_targets_min": 1170
|
|
},
|
|
{
|
|
"epoch": 3.8308457711442787,
|
|
"grad_norm": 0.5243852804360046,
|
|
"learning_rate": 2.0223331329094534e-05,
|
|
"loss": 0.2002,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20334434509277344,
|
|
"step": 1540,
|
|
"valid_targets_mean": 3998.9,
|
|
"valid_targets_min": 1988
|
|
},
|
|
{
|
|
"epoch": 3.843283582089552,
|
|
"grad_norm": 0.5291633419029611,
|
|
"learning_rate": 2.0099260023903286e-05,
|
|
"loss": 0.211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18365240097045898,
|
|
"step": 1545,
|
|
"valid_targets_mean": 4010.1,
|
|
"valid_targets_min": 1927
|
|
},
|
|
{
|
|
"epoch": 3.855721393034826,
|
|
"grad_norm": 0.5121951623491455,
|
|
"learning_rate": 1.997518489851878e-05,
|
|
"loss": 0.2021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20143933594226837,
|
|
"step": 1550,
|
|
"valid_targets_mean": 4428.3,
|
|
"valid_targets_min": 2098
|
|
},
|
|
{
|
|
"epoch": 3.8681592039800994,
|
|
"grad_norm": 0.5327816071630699,
|
|
"learning_rate": 1.985111072818626e-05,
|
|
"loss": 0.2111,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2264530062675476,
|
|
"step": 1555,
|
|
"valid_targets_mean": 4157.4,
|
|
"valid_targets_min": 1472
|
|
},
|
|
{
|
|
"epoch": 3.8805970149253732,
|
|
"grad_norm": 0.5872325233516333,
|
|
"learning_rate": 1.9727042288114223e-05,
|
|
"loss": 0.2003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21333108842372894,
|
|
"step": 1560,
|
|
"valid_targets_mean": 3713.8,
|
|
"valid_targets_min": 1874
|
|
},
|
|
{
|
|
"epoch": 3.8930348258706466,
|
|
"grad_norm": 0.5465903196708187,
|
|
"learning_rate": 1.9602984353290627e-05,
|
|
"loss": 0.2041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20426753163337708,
|
|
"step": 1565,
|
|
"valid_targets_mean": 3976.4,
|
|
"valid_targets_min": 1752
|
|
},
|
|
{
|
|
"epoch": 3.9054726368159205,
|
|
"grad_norm": 0.48170818915584446,
|
|
"learning_rate": 1.9478941698299108e-05,
|
|
"loss": 0.2018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19036681950092316,
|
|
"step": 1570,
|
|
"valid_targets_mean": 5013.6,
|
|
"valid_targets_min": 1987
|
|
},
|
|
{
|
|
"epoch": 3.917910447761194,
|
|
"grad_norm": 0.5470516146085361,
|
|
"learning_rate": 1.9354919097135233e-05,
|
|
"loss": 0.2033,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20237231254577637,
|
|
"step": 1575,
|
|
"valid_targets_mean": 3921.6,
|
|
"valid_targets_min": 2194
|
|
},
|
|
{
|
|
"epoch": 3.9303482587064678,
|
|
"grad_norm": 0.49608737593410446,
|
|
"learning_rate": 1.9230921323022777e-05,
|
|
"loss": 0.2004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19408315420150757,
|
|
"step": 1580,
|
|
"valid_targets_mean": 4462.3,
|
|
"valid_targets_min": 2876
|
|
},
|
|
{
|
|
"epoch": 3.942786069651741,
|
|
"grad_norm": 0.5215118271002951,
|
|
"learning_rate": 1.9106953148229986e-05,
|
|
"loss": 0.2063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20697835087776184,
|
|
"step": 1585,
|
|
"valid_targets_mean": 4175.0,
|
|
"valid_targets_min": 2020
|
|
},
|
|
{
|
|
"epoch": 3.955223880597015,
|
|
"grad_norm": 0.5461860904207121,
|
|
"learning_rate": 1.8983019343885937e-05,
|
|
"loss": 0.2003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2104094922542572,
|
|
"step": 1590,
|
|
"valid_targets_mean": 3759.0,
|
|
"valid_targets_min": 1717
|
|
},
|
|
{
|
|
"epoch": 3.9676616915422884,
|
|
"grad_norm": 0.5231042143924045,
|
|
"learning_rate": 1.8859124679796893e-05,
|
|
"loss": 0.2049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21831703186035156,
|
|
"step": 1595,
|
|
"valid_targets_mean": 4261.8,
|
|
"valid_targets_min": 2399
|
|
},
|
|
{
|
|
"epoch": 3.9800995024875623,
|
|
"grad_norm": 0.5222686881020943,
|
|
"learning_rate": 1.8735273924262727e-05,
|
|
"loss": 0.2038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2033132016658783,
|
|
"step": 1600,
|
|
"valid_targets_mean": 4387.1,
|
|
"valid_targets_min": 2804
|
|
},
|
|
{
|
|
"epoch": 3.9925373134328357,
|
|
"grad_norm": 0.6234021033332222,
|
|
"learning_rate": 1.8611471843893447e-05,
|
|
"loss": 0.2024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19351506233215332,
|
|
"step": 1605,
|
|
"valid_targets_mean": 3928.3,
|
|
"valid_targets_min": 1828
|
|
},
|
|
{
|
|
"epoch": 4.0049751243781095,
|
|
"grad_norm": 0.5228099382124228,
|
|
"learning_rate": 1.848772320342568e-05,
|
|
"loss": 0.1936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1864427924156189,
|
|
"step": 1610,
|
|
"valid_targets_mean": 4102.9,
|
|
"valid_targets_min": 2681
|
|
},
|
|
{
|
|
"epoch": 4.017412935323383,
|
|
"grad_norm": 0.5441470808752659,
|
|
"learning_rate": 1.8364032765539355e-05,
|
|
"loss": 0.2005,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2033585011959076,
|
|
"step": 1615,
|
|
"valid_targets_mean": 4462.2,
|
|
"valid_targets_min": 2005
|
|
},
|
|
{
|
|
"epoch": 4.029850746268656,
|
|
"grad_norm": 0.5740596060113697,
|
|
"learning_rate": 1.8240405290674348e-05,
|
|
"loss": 0.1899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1972273290157318,
|
|
"step": 1620,
|
|
"valid_targets_mean": 3756.9,
|
|
"valid_targets_min": 1255
|
|
},
|
|
{
|
|
"epoch": 4.04228855721393,
|
|
"grad_norm": 0.5719711729617718,
|
|
"learning_rate": 1.8116845536847306e-05,
|
|
"loss": 0.1935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19884632527828217,
|
|
"step": 1625,
|
|
"valid_targets_mean": 4049.6,
|
|
"valid_targets_min": 374
|
|
},
|
|
{
|
|
"epoch": 4.054726368159204,
|
|
"grad_norm": 0.4974824417143702,
|
|
"learning_rate": 1.799335825946853e-05,
|
|
"loss": 0.1904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18048778176307678,
|
|
"step": 1630,
|
|
"valid_targets_mean": 4554.6,
|
|
"valid_targets_min": 2925
|
|
},
|
|
{
|
|
"epoch": 4.067164179104478,
|
|
"grad_norm": 0.544654409825013,
|
|
"learning_rate": 1.7869948211158898e-05,
|
|
"loss": 0.1883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17982539534568787,
|
|
"step": 1635,
|
|
"valid_targets_mean": 4247.0,
|
|
"valid_targets_min": 1548
|
|
},
|
|
{
|
|
"epoch": 4.079601990049751,
|
|
"grad_norm": 0.5337906477339353,
|
|
"learning_rate": 1.774662014156705e-05,
|
|
"loss": 0.1874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1838865578174591,
|
|
"step": 1640,
|
|
"valid_targets_mean": 3586.6,
|
|
"valid_targets_min": 1801
|
|
},
|
|
{
|
|
"epoch": 4.092039800995025,
|
|
"grad_norm": 0.5334091034891687,
|
|
"learning_rate": 1.762337879718649e-05,
|
|
"loss": 0.1908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20184041559696198,
|
|
"step": 1645,
|
|
"valid_targets_mean": 4037.2,
|
|
"valid_targets_min": 2085
|
|
},
|
|
{
|
|
"epoch": 4.104477611940299,
|
|
"grad_norm": 0.48859337981552636,
|
|
"learning_rate": 1.750022892117296e-05,
|
|
"loss": 0.1844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17627070844173431,
|
|
"step": 1650,
|
|
"valid_targets_mean": 4640.4,
|
|
"valid_targets_min": 2534
|
|
},
|
|
{
|
|
"epoch": 4.116915422885572,
|
|
"grad_norm": 0.5661329566773484,
|
|
"learning_rate": 1.7377175253161907e-05,
|
|
"loss": 0.1851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17362037301063538,
|
|
"step": 1655,
|
|
"valid_targets_mean": 4436.4,
|
|
"valid_targets_min": 2446
|
|
},
|
|
{
|
|
"epoch": 4.129353233830845,
|
|
"grad_norm": 0.5703288632207718,
|
|
"learning_rate": 1.7254222529086024e-05,
|
|
"loss": 0.187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19699379801750183,
|
|
"step": 1660,
|
|
"valid_targets_mean": 4017.1,
|
|
"valid_targets_min": 2457
|
|
},
|
|
{
|
|
"epoch": 4.141791044776119,
|
|
"grad_norm": 0.681732512062585,
|
|
"learning_rate": 1.7131375480993014e-05,
|
|
"loss": 0.1842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18208718299865723,
|
|
"step": 1665,
|
|
"valid_targets_mean": 4043.7,
|
|
"valid_targets_min": 960
|
|
},
|
|
{
|
|
"epoch": 4.154228855721393,
|
|
"grad_norm": 0.4723457137036097,
|
|
"learning_rate": 1.7008638836863455e-05,
|
|
"loss": 0.1843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17454874515533447,
|
|
"step": 1670,
|
|
"valid_targets_mean": 4918.2,
|
|
"valid_targets_min": 2042
|
|
},
|
|
{
|
|
"epoch": 4.166666666666667,
|
|
"grad_norm": 0.5581086668881344,
|
|
"learning_rate": 1.6886017320428817e-05,
|
|
"loss": 0.1838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18187645077705383,
|
|
"step": 1675,
|
|
"valid_targets_mean": 3673.4,
|
|
"valid_targets_min": 2213
|
|
},
|
|
{
|
|
"epoch": 4.17910447761194,
|
|
"grad_norm": 0.6292356323245095,
|
|
"learning_rate": 1.676351565098973e-05,
|
|
"loss": 0.1842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19597412645816803,
|
|
"step": 1680,
|
|
"valid_targets_mean": 3223.8,
|
|
"valid_targets_min": 1178
|
|
},
|
|
{
|
|
"epoch": 4.191542288557214,
|
|
"grad_norm": 0.5771443261900594,
|
|
"learning_rate": 1.6641138543234253e-05,
|
|
"loss": 0.2058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23651202023029327,
|
|
"step": 1685,
|
|
"valid_targets_mean": 3848.0,
|
|
"valid_targets_min": 443
|
|
},
|
|
{
|
|
"epoch": 4.203980099502488,
|
|
"grad_norm": 0.5522616677962544,
|
|
"learning_rate": 1.6518890707056522e-05,
|
|
"loss": 0.1802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1811617612838745,
|
|
"step": 1690,
|
|
"valid_targets_mean": 4084.1,
|
|
"valid_targets_min": 1696
|
|
},
|
|
{
|
|
"epoch": 4.2164179104477615,
|
|
"grad_norm": 0.5796056383907187,
|
|
"learning_rate": 1.639677684737539e-05,
|
|
"loss": 0.1924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18784770369529724,
|
|
"step": 1695,
|
|
"valid_targets_mean": 3328.5,
|
|
"valid_targets_min": 1507
|
|
},
|
|
{
|
|
"epoch": 4.2288557213930345,
|
|
"grad_norm": 0.622905968717353,
|
|
"learning_rate": 1.6274801663953415e-05,
|
|
"loss": 0.1836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1970984935760498,
|
|
"step": 1700,
|
|
"valid_targets_mean": 3623.7,
|
|
"valid_targets_min": 1520
|
|
},
|
|
{
|
|
"epoch": 4.241293532338308,
|
|
"grad_norm": 0.5349694808917531,
|
|
"learning_rate": 1.6152969851215966e-05,
|
|
"loss": 0.1917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17795711755752563,
|
|
"step": 1705,
|
|
"valid_targets_mean": 4193.2,
|
|
"valid_targets_min": 1270
|
|
},
|
|
{
|
|
"epoch": 4.253731343283582,
|
|
"grad_norm": 0.6065208601441333,
|
|
"learning_rate": 1.6031286098070523e-05,
|
|
"loss": 0.1901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19792214035987854,
|
|
"step": 1710,
|
|
"valid_targets_mean": 3305.4,
|
|
"valid_targets_min": 1130
|
|
},
|
|
{
|
|
"epoch": 4.266169154228856,
|
|
"grad_norm": 0.6573570535633596,
|
|
"learning_rate": 1.5909755087726265e-05,
|
|
"loss": 0.1921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19432350993156433,
|
|
"step": 1715,
|
|
"valid_targets_mean": 4008.4,
|
|
"valid_targets_min": 200
|
|
},
|
|
{
|
|
"epoch": 4.278606965174129,
|
|
"grad_norm": 0.561968603462814,
|
|
"learning_rate": 1.5788381497513784e-05,
|
|
"loss": 0.1971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1960953325033188,
|
|
"step": 1720,
|
|
"valid_targets_mean": 4200.1,
|
|
"valid_targets_min": 1752
|
|
},
|
|
{
|
|
"epoch": 4.291044776119403,
|
|
"grad_norm": 0.5705642184565684,
|
|
"learning_rate": 1.5667169998705065e-05,
|
|
"loss": 0.1883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19628682732582092,
|
|
"step": 1725,
|
|
"valid_targets_mean": 4268.7,
|
|
"valid_targets_min": 2160
|
|
},
|
|
{
|
|
"epoch": 4.303482587064677,
|
|
"grad_norm": 0.5543526015290202,
|
|
"learning_rate": 1.5546125256333778e-05,
|
|
"loss": 0.1881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20755350589752197,
|
|
"step": 1730,
|
|
"valid_targets_mean": 4264.7,
|
|
"valid_targets_min": 1388
|
|
},
|
|
{
|
|
"epoch": 4.3159203980099505,
|
|
"grad_norm": 0.5131024835801369,
|
|
"learning_rate": 1.5425251929015635e-05,
|
|
"loss": 0.1807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1865994930267334,
|
|
"step": 1735,
|
|
"valid_targets_mean": 4723.0,
|
|
"valid_targets_min": 2825
|
|
},
|
|
{
|
|
"epoch": 4.3283582089552235,
|
|
"grad_norm": 0.5369465865720151,
|
|
"learning_rate": 1.5304554668769175e-05,
|
|
"loss": 0.1817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16630160808563232,
|
|
"step": 1740,
|
|
"valid_targets_mean": 4633.6,
|
|
"valid_targets_min": 1557
|
|
},
|
|
{
|
|
"epoch": 4.340796019900497,
|
|
"grad_norm": 0.5488638067704708,
|
|
"learning_rate": 1.5184038120836678e-05,
|
|
"loss": 0.1855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19109171628952026,
|
|
"step": 1745,
|
|
"valid_targets_mean": 4214.2,
|
|
"valid_targets_min": 1109
|
|
},
|
|
{
|
|
"epoch": 4.353233830845771,
|
|
"grad_norm": 0.6194076089336067,
|
|
"learning_rate": 1.5063706923505392e-05,
|
|
"loss": 0.188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2046584188938141,
|
|
"step": 1750,
|
|
"valid_targets_mean": 3853.9,
|
|
"valid_targets_min": 2765
|
|
},
|
|
{
|
|
"epoch": 4.365671641791045,
|
|
"grad_norm": 0.5294295584701465,
|
|
"learning_rate": 1.494356570792905e-05,
|
|
"loss": 0.1885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18276120722293854,
|
|
"step": 1755,
|
|
"valid_targets_mean": 4236.4,
|
|
"valid_targets_min": 1666
|
|
},
|
|
{
|
|
"epoch": 4.378109452736318,
|
|
"grad_norm": 0.585302204807222,
|
|
"learning_rate": 1.4823619097949584e-05,
|
|
"loss": 0.1914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19066128134727478,
|
|
"step": 1760,
|
|
"valid_targets_mean": 3614.5,
|
|
"valid_targets_min": 2120
|
|
},
|
|
{
|
|
"epoch": 4.390547263681592,
|
|
"grad_norm": 0.5450990664960823,
|
|
"learning_rate": 1.4703871709919217e-05,
|
|
"loss": 0.1886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1853064000606537,
|
|
"step": 1765,
|
|
"valid_targets_mean": 4034.2,
|
|
"valid_targets_min": 1155
|
|
},
|
|
{
|
|
"epoch": 4.402985074626866,
|
|
"grad_norm": 0.5070494966282225,
|
|
"learning_rate": 1.4584328152522762e-05,
|
|
"loss": 0.183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19450125098228455,
|
|
"step": 1770,
|
|
"valid_targets_mean": 4716.2,
|
|
"valid_targets_min": 970
|
|
},
|
|
{
|
|
"epoch": 4.41542288557214,
|
|
"grad_norm": 0.5501844594868129,
|
|
"learning_rate": 1.446499302660024e-05,
|
|
"loss": 0.1865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17513319849967957,
|
|
"step": 1775,
|
|
"valid_targets_mean": 3711.8,
|
|
"valid_targets_min": 2079
|
|
},
|
|
{
|
|
"epoch": 4.4278606965174125,
|
|
"grad_norm": 0.591362418900047,
|
|
"learning_rate": 1.4345870924969862e-05,
|
|
"loss": 0.1972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20799198746681213,
|
|
"step": 1780,
|
|
"valid_targets_mean": 4011.0,
|
|
"valid_targets_min": 2539
|
|
},
|
|
{
|
|
"epoch": 4.440298507462686,
|
|
"grad_norm": 0.5193747556433063,
|
|
"learning_rate": 1.4226966432251201e-05,
|
|
"loss": 0.1895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19595052301883698,
|
|
"step": 1785,
|
|
"valid_targets_mean": 4659.1,
|
|
"valid_targets_min": 2807
|
|
},
|
|
{
|
|
"epoch": 4.45273631840796,
|
|
"grad_norm": 0.5569115821469196,
|
|
"learning_rate": 1.4108284124688796e-05,
|
|
"loss": 0.1883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19918416440486908,
|
|
"step": 1790,
|
|
"valid_targets_mean": 4438.2,
|
|
"valid_targets_min": 1874
|
|
},
|
|
{
|
|
"epoch": 4.465174129353234,
|
|
"grad_norm": 0.5283360905638507,
|
|
"learning_rate": 1.3989828569976003e-05,
|
|
"loss": 0.1918,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1961868703365326,
|
|
"step": 1795,
|
|
"valid_targets_mean": 4511.8,
|
|
"valid_targets_min": 1407
|
|
},
|
|
{
|
|
"epoch": 4.477611940298507,
|
|
"grad_norm": 0.5532204571218482,
|
|
"learning_rate": 1.3871604327079184e-05,
|
|
"loss": 0.1883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1693786382675171,
|
|
"step": 1800,
|
|
"valid_targets_mean": 3788.8,
|
|
"valid_targets_min": 1567
|
|
},
|
|
{
|
|
"epoch": 4.490049751243781,
|
|
"grad_norm": 0.5359674168093468,
|
|
"learning_rate": 1.37536159460623e-05,
|
|
"loss": 0.1929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.183126300573349,
|
|
"step": 1805,
|
|
"valid_targets_mean": 4236.6,
|
|
"valid_targets_min": 1831
|
|
},
|
|
{
|
|
"epoch": 4.502487562189055,
|
|
"grad_norm": 0.5263751071985473,
|
|
"learning_rate": 1.3635867967911734e-05,
|
|
"loss": 0.1854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18391427397727966,
|
|
"step": 1810,
|
|
"valid_targets_mean": 4034.3,
|
|
"valid_targets_min": 2682
|
|
},
|
|
{
|
|
"epoch": 4.514925373134329,
|
|
"grad_norm": 0.6145126374437491,
|
|
"learning_rate": 1.3518364924361564e-05,
|
|
"loss": 0.1917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18755874037742615,
|
|
"step": 1815,
|
|
"valid_targets_mean": 3663.2,
|
|
"valid_targets_min": 941
|
|
},
|
|
{
|
|
"epoch": 4.5273631840796025,
|
|
"grad_norm": 0.5461908468497769,
|
|
"learning_rate": 1.340111133771913e-05,
|
|
"loss": 0.1942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19034013152122498,
|
|
"step": 1820,
|
|
"valid_targets_mean": 4258.0,
|
|
"valid_targets_min": 2231
|
|
},
|
|
{
|
|
"epoch": 4.539800995024875,
|
|
"grad_norm": 0.5437588969087739,
|
|
"learning_rate": 1.3284111720690987e-05,
|
|
"loss": 0.1919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18442508578300476,
|
|
"step": 1825,
|
|
"valid_targets_mean": 3990.9,
|
|
"valid_targets_min": 2466
|
|
},
|
|
{
|
|
"epoch": 4.552238805970149,
|
|
"grad_norm": 0.5315792588743522,
|
|
"learning_rate": 1.3167370576209253e-05,
|
|
"loss": 0.1874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18224917352199554,
|
|
"step": 1830,
|
|
"valid_targets_mean": 4163.6,
|
|
"valid_targets_min": 2181
|
|
},
|
|
{
|
|
"epoch": 4.564676616915423,
|
|
"grad_norm": 0.5757247651612372,
|
|
"learning_rate": 1.305089239725826e-05,
|
|
"loss": 0.1902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20078767836093903,
|
|
"step": 1835,
|
|
"valid_targets_mean": 4150.7,
|
|
"valid_targets_min": 1840
|
|
},
|
|
{
|
|
"epoch": 4.577114427860696,
|
|
"grad_norm": 0.5938129394057137,
|
|
"learning_rate": 1.2934681666701674e-05,
|
|
"loss": 0.1894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18216751515865326,
|
|
"step": 1840,
|
|
"valid_targets_mean": 4025.4,
|
|
"valid_targets_min": 2013
|
|
},
|
|
{
|
|
"epoch": 4.58955223880597,
|
|
"grad_norm": 0.6028091843290723,
|
|
"learning_rate": 1.2818742857109947e-05,
|
|
"loss": 0.1861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19109377264976501,
|
|
"step": 1845,
|
|
"valid_targets_mean": 3510.1,
|
|
"valid_targets_min": 228
|
|
},
|
|
{
|
|
"epoch": 4.601990049751244,
|
|
"grad_norm": 0.5776897498789417,
|
|
"learning_rate": 1.270308043058816e-05,
|
|
"loss": 0.1905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18680351972579956,
|
|
"step": 1850,
|
|
"valid_targets_mean": 3375.8,
|
|
"valid_targets_min": 1071
|
|
},
|
|
{
|
|
"epoch": 4.614427860696518,
|
|
"grad_norm": 0.5240221774849542,
|
|
"learning_rate": 1.2587698838604357e-05,
|
|
"loss": 0.1925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1797061264514923,
|
|
"step": 1855,
|
|
"valid_targets_mean": 4271.2,
|
|
"valid_targets_min": 1977
|
|
},
|
|
{
|
|
"epoch": 4.6268656716417915,
|
|
"grad_norm": 0.5639717377124501,
|
|
"learning_rate": 1.2472602521818136e-05,
|
|
"loss": 0.1948,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21001404523849487,
|
|
"step": 1860,
|
|
"valid_targets_mean": 3991.2,
|
|
"valid_targets_min": 1970
|
|
},
|
|
{
|
|
"epoch": 4.6393034825870645,
|
|
"grad_norm": 0.5205449157527914,
|
|
"learning_rate": 1.2357795909909831e-05,
|
|
"loss": 0.1924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21091000735759735,
|
|
"step": 1865,
|
|
"valid_targets_mean": 4543.5,
|
|
"valid_targets_min": 1236
|
|
},
|
|
{
|
|
"epoch": 4.651741293532338,
|
|
"grad_norm": 0.5642643394569002,
|
|
"learning_rate": 1.2243283421409944e-05,
|
|
"loss": 0.1966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19856856763362885,
|
|
"step": 1870,
|
|
"valid_targets_mean": 4086.1,
|
|
"valid_targets_min": 2486
|
|
},
|
|
{
|
|
"epoch": 4.664179104477612,
|
|
"grad_norm": 0.5328298872378295,
|
|
"learning_rate": 1.2129069463529147e-05,
|
|
"loss": 0.1861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1959315538406372,
|
|
"step": 1875,
|
|
"valid_targets_mean": 4242.9,
|
|
"valid_targets_min": 2666
|
|
},
|
|
{
|
|
"epoch": 4.676616915422885,
|
|
"grad_norm": 0.553203140113736,
|
|
"learning_rate": 1.2015158431988656e-05,
|
|
"loss": 0.1947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1867968738079071,
|
|
"step": 1880,
|
|
"valid_targets_mean": 4032.2,
|
|
"valid_targets_min": 1997
|
|
},
|
|
{
|
|
"epoch": 4.689054726368159,
|
|
"grad_norm": 0.579092184466775,
|
|
"learning_rate": 1.1901554710851022e-05,
|
|
"loss": 0.2058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20128527283668518,
|
|
"step": 1885,
|
|
"valid_targets_mean": 3413.8,
|
|
"valid_targets_min": 973
|
|
},
|
|
{
|
|
"epoch": 4.701492537313433,
|
|
"grad_norm": 0.6107118718654273,
|
|
"learning_rate": 1.1788262672351451e-05,
|
|
"loss": 0.1835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19431200623512268,
|
|
"step": 1890,
|
|
"valid_targets_mean": 3529.1,
|
|
"valid_targets_min": 1539
|
|
},
|
|
{
|
|
"epoch": 4.713930348258707,
|
|
"grad_norm": 0.5389866374015265,
|
|
"learning_rate": 1.1675286676729489e-05,
|
|
"loss": 0.1941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19274672865867615,
|
|
"step": 1895,
|
|
"valid_targets_mean": 4071.8,
|
|
"valid_targets_min": 2330
|
|
},
|
|
{
|
|
"epoch": 4.726368159203981,
|
|
"grad_norm": 0.5466037683250415,
|
|
"learning_rate": 1.1562631072061214e-05,
|
|
"loss": 0.189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1851121038198471,
|
|
"step": 1900,
|
|
"valid_targets_mean": 4066.6,
|
|
"valid_targets_min": 2216
|
|
},
|
|
{
|
|
"epoch": 4.7388059701492535,
|
|
"grad_norm": 0.5172604730124416,
|
|
"learning_rate": 1.1450300194091936e-05,
|
|
"loss": 0.1922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18047800660133362,
|
|
"step": 1905,
|
|
"valid_targets_mean": 4594.6,
|
|
"valid_targets_min": 852
|
|
},
|
|
{
|
|
"epoch": 4.751243781094527,
|
|
"grad_norm": 0.5097432319490798,
|
|
"learning_rate": 1.1338298366069282e-05,
|
|
"loss": 0.1899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17912769317626953,
|
|
"step": 1910,
|
|
"valid_targets_mean": 4337.2,
|
|
"valid_targets_min": 921
|
|
},
|
|
{
|
|
"epoch": 4.763681592039801,
|
|
"grad_norm": 0.527120343823522,
|
|
"learning_rate": 1.1226629898576818e-05,
|
|
"loss": 0.1912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22057533264160156,
|
|
"step": 1915,
|
|
"valid_targets_mean": 4766.2,
|
|
"valid_targets_min": 2509
|
|
},
|
|
{
|
|
"epoch": 4.776119402985074,
|
|
"grad_norm": 0.5341316468563161,
|
|
"learning_rate": 1.1115299089368163e-05,
|
|
"loss": 0.195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18534281849861145,
|
|
"step": 1920,
|
|
"valid_targets_mean": 3921.1,
|
|
"valid_targets_min": 2193
|
|
},
|
|
{
|
|
"epoch": 4.788557213930348,
|
|
"grad_norm": 0.5460035866192121,
|
|
"learning_rate": 1.1004310223201567e-05,
|
|
"loss": 0.1834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20613934099674225,
|
|
"step": 1925,
|
|
"valid_targets_mean": 4485.4,
|
|
"valid_targets_min": 2111
|
|
},
|
|
{
|
|
"epoch": 4.800995024875622,
|
|
"grad_norm": 0.5517532913974614,
|
|
"learning_rate": 1.089366757167504e-05,
|
|
"loss": 0.1885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1839025318622589,
|
|
"step": 1930,
|
|
"valid_targets_mean": 3908.1,
|
|
"valid_targets_min": 2332
|
|
},
|
|
{
|
|
"epoch": 4.813432835820896,
|
|
"grad_norm": 0.529510753958286,
|
|
"learning_rate": 1.0783375393061867e-05,
|
|
"loss": 0.1863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19102586805820465,
|
|
"step": 1935,
|
|
"valid_targets_mean": 4116.6,
|
|
"valid_targets_min": 2062
|
|
},
|
|
{
|
|
"epoch": 4.82587064676617,
|
|
"grad_norm": 0.5194175454337181,
|
|
"learning_rate": 1.0673437932146844e-05,
|
|
"loss": 0.1946,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1900445818901062,
|
|
"step": 1940,
|
|
"valid_targets_mean": 5161.1,
|
|
"valid_targets_min": 2400
|
|
},
|
|
{
|
|
"epoch": 4.838308457711443,
|
|
"grad_norm": 0.5563197452474851,
|
|
"learning_rate": 1.05638594200628e-05,
|
|
"loss": 0.1906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1914559304714203,
|
|
"step": 1945,
|
|
"valid_targets_mean": 3629.1,
|
|
"valid_targets_min": 1381
|
|
},
|
|
{
|
|
"epoch": 4.850746268656716,
|
|
"grad_norm": 0.5125531536441792,
|
|
"learning_rate": 1.0454644074127818e-05,
|
|
"loss": 0.1822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17317280173301697,
|
|
"step": 1950,
|
|
"valid_targets_mean": 4402.2,
|
|
"valid_targets_min": 1409
|
|
},
|
|
{
|
|
"epoch": 4.86318407960199,
|
|
"grad_norm": 0.5514274931255213,
|
|
"learning_rate": 1.0345796097682896e-05,
|
|
"loss": 0.1912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1964603066444397,
|
|
"step": 1955,
|
|
"valid_targets_mean": 4132.4,
|
|
"valid_targets_min": 2369
|
|
},
|
|
{
|
|
"epoch": 4.875621890547263,
|
|
"grad_norm": 0.542246915785968,
|
|
"learning_rate": 1.023731967993018e-05,
|
|
"loss": 0.1949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19377470016479492,
|
|
"step": 1960,
|
|
"valid_targets_mean": 4014.6,
|
|
"valid_targets_min": 2105
|
|
},
|
|
{
|
|
"epoch": 4.888059701492537,
|
|
"grad_norm": 0.5081914366033573,
|
|
"learning_rate": 1.0129218995771766e-05,
|
|
"loss": 0.1865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17151615023612976,
|
|
"step": 1965,
|
|
"valid_targets_mean": 3851.6,
|
|
"valid_targets_min": 196
|
|
},
|
|
{
|
|
"epoch": 4.900497512437811,
|
|
"grad_norm": 0.5987169529933362,
|
|
"learning_rate": 1.002149820564897e-05,
|
|
"loss": 0.1888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2023550570011139,
|
|
"step": 1970,
|
|
"valid_targets_mean": 3578.8,
|
|
"valid_targets_min": 1055
|
|
},
|
|
{
|
|
"epoch": 4.912935323383085,
|
|
"grad_norm": 0.5582434934098669,
|
|
"learning_rate": 9.914161455382215e-06,
|
|
"loss": 0.1929,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1893100142478943,
|
|
"step": 1975,
|
|
"valid_targets_mean": 3755.3,
|
|
"valid_targets_min": 1619
|
|
},
|
|
{
|
|
"epoch": 4.925373134328359,
|
|
"grad_norm": 0.524299212994188,
|
|
"learning_rate": 9.807212876011528e-06,
|
|
"loss": 0.1922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19495365023612976,
|
|
"step": 1980,
|
|
"valid_targets_mean": 4259.8,
|
|
"valid_targets_min": 317
|
|
},
|
|
{
|
|
"epoch": 4.937810945273632,
|
|
"grad_norm": 0.5458717305766948,
|
|
"learning_rate": 9.700656583637484e-06,
|
|
"loss": 0.1947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18475620448589325,
|
|
"step": 1985,
|
|
"valid_targets_mean": 3955.7,
|
|
"valid_targets_min": 1312
|
|
},
|
|
{
|
|
"epoch": 4.9502487562189055,
|
|
"grad_norm": 0.5988339564997255,
|
|
"learning_rate": 9.594496679262822e-06,
|
|
"loss": 0.1909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20608876645565033,
|
|
"step": 1990,
|
|
"valid_targets_mean": 3641.8,
|
|
"valid_targets_min": 1811
|
|
},
|
|
{
|
|
"epoch": 4.962686567164179,
|
|
"grad_norm": 0.5140756054780553,
|
|
"learning_rate": 9.488737248634603e-06,
|
|
"loss": 0.1953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21765117347240448,
|
|
"step": 1995,
|
|
"valid_targets_mean": 5012.4,
|
|
"valid_targets_min": 2130
|
|
},
|
|
{
|
|
"epoch": 4.975124378109452,
|
|
"grad_norm": 0.5750911579001797,
|
|
"learning_rate": 9.383382362086959e-06,
|
|
"loss": 0.1951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19809523224830627,
|
|
"step": 2000,
|
|
"valid_targets_mean": 3847.8,
|
|
"valid_targets_min": 1889
|
|
},
|
|
{
|
|
"epoch": 4.987562189054726,
|
|
"grad_norm": 0.5469643211901647,
|
|
"learning_rate": 9.27843607438447e-06,
|
|
"loss": 0.1951,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19203123450279236,
|
|
"step": 2005,
|
|
"valid_targets_mean": 3831.5,
|
|
"valid_targets_min": 1988
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.5145051979175338,
|
|
"learning_rate": 9.173902424566057e-06,
|
|
"loss": 0.1912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19573429226875305,
|
|
"step": 2010,
|
|
"valid_targets_mean": 4327.5,
|
|
"valid_targets_min": 2066
|
|
},
|
|
{
|
|
"epoch": 5.012437810945274,
|
|
"grad_norm": 0.48975408551998934,
|
|
"learning_rate": 9.06978543578957e-06,
|
|
"loss": 0.1798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17476843297481537,
|
|
"step": 2015,
|
|
"valid_targets_mean": 4369.6,
|
|
"valid_targets_min": 1170
|
|
},
|
|
{
|
|
"epoch": 5.024875621890548,
|
|
"grad_norm": 1.1782763959202165,
|
|
"learning_rate": 8.966089115176945e-06,
|
|
"loss": 0.1797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17875716090202332,
|
|
"step": 2020,
|
|
"valid_targets_mean": 4059.2,
|
|
"valid_targets_min": 1149
|
|
},
|
|
{
|
|
"epoch": 5.037313432835821,
|
|
"grad_norm": 0.5373043390636772,
|
|
"learning_rate": 8.862817453659968e-06,
|
|
"loss": 0.1839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1860535442829132,
|
|
"step": 2025,
|
|
"valid_targets_mean": 4277.5,
|
|
"valid_targets_min": 2642
|
|
},
|
|
{
|
|
"epoch": 5.0497512437810945,
|
|
"grad_norm": 0.5331643182583649,
|
|
"learning_rate": 8.759974425826696e-06,
|
|
"loss": 0.1802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16362561285495758,
|
|
"step": 2030,
|
|
"valid_targets_mean": 3765.5,
|
|
"valid_targets_min": 2097
|
|
},
|
|
{
|
|
"epoch": 5.062189054726368,
|
|
"grad_norm": 0.544019833677962,
|
|
"learning_rate": 8.657563989768467e-06,
|
|
"loss": 0.1885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2051907181739807,
|
|
"step": 2035,
|
|
"valid_targets_mean": 4519.1,
|
|
"valid_targets_min": 2674
|
|
},
|
|
{
|
|
"epoch": 5.074626865671641,
|
|
"grad_norm": 0.62327450747123,
|
|
"learning_rate": 8.555590086927602e-06,
|
|
"loss": 0.1885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19175231456756592,
|
|
"step": 2040,
|
|
"valid_targets_mean": 3685.4,
|
|
"valid_targets_min": 853
|
|
},
|
|
{
|
|
"epoch": 5.087064676616915,
|
|
"grad_norm": 0.5308764481118281,
|
|
"learning_rate": 8.454056641945665e-06,
|
|
"loss": 0.1859,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1750047206878662,
|
|
"step": 2045,
|
|
"valid_targets_mean": 4141.5,
|
|
"valid_targets_min": 2422
|
|
},
|
|
{
|
|
"epoch": 5.099502487562189,
|
|
"grad_norm": 0.5185262701369572,
|
|
"learning_rate": 8.352967562512448e-06,
|
|
"loss": 0.1811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1702880859375,
|
|
"step": 2050,
|
|
"valid_targets_mean": 4598.6,
|
|
"valid_targets_min": 1330
|
|
},
|
|
{
|
|
"epoch": 5.111940298507463,
|
|
"grad_norm": 0.5829644925735894,
|
|
"learning_rate": 8.252326739215568e-06,
|
|
"loss": 0.1698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1757672280073166,
|
|
"step": 2055,
|
|
"valid_targets_mean": 3722.4,
|
|
"valid_targets_min": 1948
|
|
},
|
|
{
|
|
"epoch": 5.124378109452737,
|
|
"grad_norm": 0.5984769834446014,
|
|
"learning_rate": 8.152138045390725e-06,
|
|
"loss": 0.1792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19725893437862396,
|
|
"step": 2060,
|
|
"valid_targets_mean": 3535.5,
|
|
"valid_targets_min": 1839
|
|
},
|
|
{
|
|
"epoch": 5.13681592039801,
|
|
"grad_norm": 0.4884187116919498,
|
|
"learning_rate": 8.052405336972659e-06,
|
|
"loss": 0.1708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16476622223854065,
|
|
"step": 2065,
|
|
"valid_targets_mean": 4477.9,
|
|
"valid_targets_min": 1939
|
|
},
|
|
{
|
|
"epoch": 5.149253731343284,
|
|
"grad_norm": 0.8229280153452522,
|
|
"learning_rate": 7.953132452346693e-06,
|
|
"loss": 0.1735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16132110357284546,
|
|
"step": 2070,
|
|
"valid_targets_mean": 3924.3,
|
|
"valid_targets_min": 1278
|
|
},
|
|
{
|
|
"epoch": 5.161691542288557,
|
|
"grad_norm": 0.4724798431111937,
|
|
"learning_rate": 7.854323212201047e-06,
|
|
"loss": 0.1829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17464956641197205,
|
|
"step": 2075,
|
|
"valid_targets_mean": 5130.9,
|
|
"valid_targets_min": 3182
|
|
},
|
|
{
|
|
"epoch": 5.174129353233831,
|
|
"grad_norm": 0.6676453133494172,
|
|
"learning_rate": 7.75598141937981e-06,
|
|
"loss": 0.1842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17902524769306183,
|
|
"step": 2080,
|
|
"valid_targets_mean": 3899.2,
|
|
"valid_targets_min": 1769
|
|
},
|
|
{
|
|
"epoch": 5.186567164179104,
|
|
"grad_norm": 0.581722657791125,
|
|
"learning_rate": 7.658110858736523e-06,
|
|
"loss": 0.1839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1738991141319275,
|
|
"step": 2085,
|
|
"valid_targets_mean": 4717.8,
|
|
"valid_targets_min": 1597
|
|
},
|
|
{
|
|
"epoch": 5.199004975124378,
|
|
"grad_norm": 0.6119229068009544,
|
|
"learning_rate": 7.560715296988554e-06,
|
|
"loss": 0.167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16843843460083008,
|
|
"step": 2090,
|
|
"valid_targets_mean": 3586.2,
|
|
"valid_targets_min": 1305
|
|
},
|
|
{
|
|
"epoch": 5.211442786069652,
|
|
"grad_norm": 0.6296917134765727,
|
|
"learning_rate": 7.463798482572122e-06,
|
|
"loss": 0.1735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17999431490898132,
|
|
"step": 2095,
|
|
"valid_targets_mean": 3528.2,
|
|
"valid_targets_min": 1513
|
|
},
|
|
{
|
|
"epoch": 5.223880597014926,
|
|
"grad_norm": 0.545873150726275,
|
|
"learning_rate": 7.36736414549802e-06,
|
|
"loss": 0.1718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18782277405261993,
|
|
"step": 2100,
|
|
"valid_targets_mean": 4359.8,
|
|
"valid_targets_min": 2033
|
|
},
|
|
{
|
|
"epoch": 5.236318407960199,
|
|
"grad_norm": 0.5785180015454172,
|
|
"learning_rate": 7.271415997208093e-06,
|
|
"loss": 0.1849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1869148463010788,
|
|
"step": 2105,
|
|
"valid_targets_mean": 3707.5,
|
|
"valid_targets_min": 2328
|
|
},
|
|
{
|
|
"epoch": 5.248756218905473,
|
|
"grad_norm": 0.5764954136094295,
|
|
"learning_rate": 7.17595773043233e-06,
|
|
"loss": 0.1695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18479612469673157,
|
|
"step": 2110,
|
|
"valid_targets_mean": 4026.6,
|
|
"valid_targets_min": 2210
|
|
},
|
|
{
|
|
"epoch": 5.2611940298507465,
|
|
"grad_norm": 0.5204559401076309,
|
|
"learning_rate": 7.080993019046827e-06,
|
|
"loss": 0.1822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17545653879642487,
|
|
"step": 2115,
|
|
"valid_targets_mean": 4619.4,
|
|
"valid_targets_min": 2520
|
|
},
|
|
{
|
|
"epoch": 5.273631840796019,
|
|
"grad_norm": 0.5629684650741945,
|
|
"learning_rate": 6.986525517932321e-06,
|
|
"loss": 0.1773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16429266333580017,
|
|
"step": 2120,
|
|
"valid_targets_mean": 3826.1,
|
|
"valid_targets_min": 1140
|
|
},
|
|
{
|
|
"epoch": 5.286069651741293,
|
|
"grad_norm": 0.5207487254296341,
|
|
"learning_rate": 6.892558862833569e-06,
|
|
"loss": 0.19,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17473994195461273,
|
|
"step": 2125,
|
|
"valid_targets_mean": 4406.2,
|
|
"valid_targets_min": 2004
|
|
},
|
|
{
|
|
"epoch": 5.298507462686567,
|
|
"grad_norm": 0.6137205942170112,
|
|
"learning_rate": 6.799096670219396e-06,
|
|
"loss": 0.1832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18868446350097656,
|
|
"step": 2130,
|
|
"valid_targets_mean": 4076.2,
|
|
"valid_targets_min": 1472
|
|
},
|
|
{
|
|
"epoch": 5.310945273631841,
|
|
"grad_norm": 0.4900273933795682,
|
|
"learning_rate": 6.706142537143518e-06,
|
|
"loss": 0.1778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17624013125896454,
|
|
"step": 2135,
|
|
"valid_targets_mean": 5214.7,
|
|
"valid_targets_min": 1883
|
|
},
|
|
{
|
|
"epoch": 5.323383084577115,
|
|
"grad_norm": 0.5288471175274457,
|
|
"learning_rate": 6.613700041106119e-06,
|
|
"loss": 0.1701,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18341976404190063,
|
|
"step": 2140,
|
|
"valid_targets_mean": 4289.6,
|
|
"valid_targets_min": 1156
|
|
},
|
|
{
|
|
"epoch": 5.335820895522388,
|
|
"grad_norm": 0.5519799885784733,
|
|
"learning_rate": 6.521772739916137e-06,
|
|
"loss": 0.1824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16220718622207642,
|
|
"step": 2145,
|
|
"valid_targets_mean": 4097.6,
|
|
"valid_targets_min": 1881
|
|
},
|
|
{
|
|
"epoch": 5.348258706467662,
|
|
"grad_norm": 0.641394208147078,
|
|
"learning_rate": 6.430364171554329e-06,
|
|
"loss": 0.1789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1815165877342224,
|
|
"step": 2150,
|
|
"valid_targets_mean": 3265.1,
|
|
"valid_targets_min": 222
|
|
},
|
|
{
|
|
"epoch": 5.3606965174129355,
|
|
"grad_norm": 0.5017060911996063,
|
|
"learning_rate": 6.33947785403716e-06,
|
|
"loss": 0.1768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15326166152954102,
|
|
"step": 2155,
|
|
"valid_targets_mean": 4282.4,
|
|
"valid_targets_min": 1306
|
|
},
|
|
{
|
|
"epoch": 5.373134328358209,
|
|
"grad_norm": 0.5629109675071312,
|
|
"learning_rate": 6.249117285281348e-06,
|
|
"loss": 0.1981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19157464802265167,
|
|
"step": 2160,
|
|
"valid_targets_mean": 4362.0,
|
|
"valid_targets_min": 1910
|
|
},
|
|
{
|
|
"epoch": 5.385572139303482,
|
|
"grad_norm": 0.5573924452067587,
|
|
"learning_rate": 6.159285942969266e-06,
|
|
"loss": 0.1741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18228746950626373,
|
|
"step": 2165,
|
|
"valid_targets_mean": 3975.6,
|
|
"valid_targets_min": 2189
|
|
},
|
|
{
|
|
"epoch": 5.398009950248756,
|
|
"grad_norm": 0.5588862231529654,
|
|
"learning_rate": 6.0699872844151e-06,
|
|
"loss": 0.1909,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20605233311653137,
|
|
"step": 2170,
|
|
"valid_targets_mean": 4185.6,
|
|
"valid_targets_min": 1682
|
|
},
|
|
{
|
|
"epoch": 5.41044776119403,
|
|
"grad_norm": 0.5819160509013732,
|
|
"learning_rate": 5.9812247464317685e-06,
|
|
"loss": 0.1873,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1820629984140396,
|
|
"step": 2175,
|
|
"valid_targets_mean": 3783.1,
|
|
"valid_targets_min": 941
|
|
},
|
|
{
|
|
"epoch": 5.422885572139304,
|
|
"grad_norm": 0.5625161894644168,
|
|
"learning_rate": 5.893001745198692e-06,
|
|
"loss": 0.1789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17581865191459656,
|
|
"step": 2180,
|
|
"valid_targets_mean": 3561.9,
|
|
"valid_targets_min": 2275
|
|
},
|
|
{
|
|
"epoch": 5.435323383084577,
|
|
"grad_norm": 0.5946540446785107,
|
|
"learning_rate": 5.805321676130262e-06,
|
|
"loss": 0.1841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1829841583967209,
|
|
"step": 2185,
|
|
"valid_targets_mean": 3860.6,
|
|
"valid_targets_min": 2281
|
|
},
|
|
{
|
|
"epoch": 5.447761194029851,
|
|
"grad_norm": 0.5531530928449198,
|
|
"learning_rate": 5.718187913745199e-06,
|
|
"loss": 0.1755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1640455424785614,
|
|
"step": 2190,
|
|
"valid_targets_mean": 4010.9,
|
|
"valid_targets_min": 1509
|
|
},
|
|
{
|
|
"epoch": 5.460199004975125,
|
|
"grad_norm": 0.5893338522051409,
|
|
"learning_rate": 5.631603811536668e-06,
|
|
"loss": 0.1892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1882874071598053,
|
|
"step": 2195,
|
|
"valid_targets_mean": 3535.2,
|
|
"valid_targets_min": 1706
|
|
},
|
|
{
|
|
"epoch": 5.472636815920398,
|
|
"grad_norm": 0.6073509265616175,
|
|
"learning_rate": 5.545572701843216e-06,
|
|
"loss": 0.1782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18738335371017456,
|
|
"step": 2200,
|
|
"valid_targets_mean": 4425.8,
|
|
"valid_targets_min": 2013
|
|
},
|
|
{
|
|
"epoch": 5.485074626865671,
|
|
"grad_norm": 0.5310642730413633,
|
|
"learning_rate": 5.4600978957205135e-06,
|
|
"loss": 0.1885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.168120339512825,
|
|
"step": 2205,
|
|
"valid_targets_mean": 3844.4,
|
|
"valid_targets_min": 1734
|
|
},
|
|
{
|
|
"epoch": 5.497512437810945,
|
|
"grad_norm": 0.555820405124083,
|
|
"learning_rate": 5.375182682813929e-06,
|
|
"loss": 0.1795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1976512372493744,
|
|
"step": 2210,
|
|
"valid_targets_mean": 3939.1,
|
|
"valid_targets_min": 1724
|
|
},
|
|
{
|
|
"epoch": 5.509950248756219,
|
|
"grad_norm": 0.5509023684550313,
|
|
"learning_rate": 5.290830331231933e-06,
|
|
"loss": 0.1743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1776900589466095,
|
|
"step": 2215,
|
|
"valid_targets_mean": 4289.1,
|
|
"valid_targets_min": 1658
|
|
},
|
|
{
|
|
"epoch": 5.522388059701493,
|
|
"grad_norm": 0.5113496796090481,
|
|
"learning_rate": 5.2070440874202925e-06,
|
|
"loss": 0.1728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17290140688419342,
|
|
"step": 2220,
|
|
"valid_targets_mean": 4486.5,
|
|
"valid_targets_min": 1644
|
|
},
|
|
{
|
|
"epoch": 5.534825870646766,
|
|
"grad_norm": 0.5419706723077836,
|
|
"learning_rate": 5.123827176037146e-06,
|
|
"loss": 0.1798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18113742768764496,
|
|
"step": 2225,
|
|
"valid_targets_mean": 4176.4,
|
|
"valid_targets_min": 1097
|
|
},
|
|
{
|
|
"epoch": 5.54726368159204,
|
|
"grad_norm": 0.5714071952278112,
|
|
"learning_rate": 5.041182799828888e-06,
|
|
"loss": 0.1764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16627778112888336,
|
|
"step": 2230,
|
|
"valid_targets_mean": 3533.5,
|
|
"valid_targets_min": 1673
|
|
},
|
|
{
|
|
"epoch": 5.559701492537314,
|
|
"grad_norm": 0.5631880598259031,
|
|
"learning_rate": 4.959114139506909e-06,
|
|
"loss": 0.1703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16612115502357483,
|
|
"step": 2235,
|
|
"valid_targets_mean": 3968.1,
|
|
"valid_targets_min": 2003
|
|
},
|
|
{
|
|
"epoch": 5.572139303482587,
|
|
"grad_norm": 0.5954530663996263,
|
|
"learning_rate": 4.877624353625197e-06,
|
|
"loss": 0.177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18126454949378967,
|
|
"step": 2240,
|
|
"valid_targets_mean": 3962.8,
|
|
"valid_targets_min": 2013
|
|
},
|
|
{
|
|
"epoch": 5.58457711442786,
|
|
"grad_norm": 0.5504249794441187,
|
|
"learning_rate": 4.7967165784587284e-06,
|
|
"loss": 0.1832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19141951203346252,
|
|
"step": 2245,
|
|
"valid_targets_mean": 4483.5,
|
|
"valid_targets_min": 1041
|
|
},
|
|
{
|
|
"epoch": 5.597014925373134,
|
|
"grad_norm": 0.6159216942243276,
|
|
"learning_rate": 4.71639392788281e-06,
|
|
"loss": 0.184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18392285704612732,
|
|
"step": 2250,
|
|
"valid_targets_mean": 3742.2,
|
|
"valid_targets_min": 1851
|
|
},
|
|
{
|
|
"epoch": 5.609452736318408,
|
|
"grad_norm": 0.6025337544936586,
|
|
"learning_rate": 4.6366594932532285e-06,
|
|
"loss": 0.1781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19583985209465027,
|
|
"step": 2255,
|
|
"valid_targets_mean": 3859.6,
|
|
"valid_targets_min": 1696
|
|
},
|
|
{
|
|
"epoch": 5.621890547263682,
|
|
"grad_norm": 0.5398477942967179,
|
|
"learning_rate": 4.557516343287251e-06,
|
|
"loss": 0.173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1676681786775589,
|
|
"step": 2260,
|
|
"valid_targets_mean": 3899.4,
|
|
"valid_targets_min": 448
|
|
},
|
|
{
|
|
"epoch": 5.634328358208955,
|
|
"grad_norm": 0.5503845662196336,
|
|
"learning_rate": 4.4789675239455385e-06,
|
|
"loss": 0.1785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17880871891975403,
|
|
"step": 2265,
|
|
"valid_targets_mean": 3832.9,
|
|
"valid_targets_min": 1595
|
|
},
|
|
{
|
|
"epoch": 5.646766169154229,
|
|
"grad_norm": 0.5103257331663372,
|
|
"learning_rate": 4.401016058314913e-06,
|
|
"loss": 0.1732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17229273915290833,
|
|
"step": 2270,
|
|
"valid_targets_mean": 4335.1,
|
|
"valid_targets_min": 1417
|
|
},
|
|
{
|
|
"epoch": 5.659203980099503,
|
|
"grad_norm": 0.5094018756134759,
|
|
"learning_rate": 4.3236649464919986e-06,
|
|
"loss": 0.1789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16803838312625885,
|
|
"step": 2275,
|
|
"valid_targets_mean": 4230.3,
|
|
"valid_targets_min": 2119
|
|
},
|
|
{
|
|
"epoch": 5.6716417910447765,
|
|
"grad_norm": 0.5319736807745282,
|
|
"learning_rate": 4.246917165467799e-06,
|
|
"loss": 0.1816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19026246666908264,
|
|
"step": 2280,
|
|
"valid_targets_mean": 4721.9,
|
|
"valid_targets_min": 2278
|
|
},
|
|
{
|
|
"epoch": 5.6840796019900495,
|
|
"grad_norm": 0.5869835916274105,
|
|
"learning_rate": 4.170775669013041e-06,
|
|
"loss": 0.1741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18648019433021545,
|
|
"step": 2285,
|
|
"valid_targets_mean": 3507.3,
|
|
"valid_targets_min": 2093
|
|
},
|
|
{
|
|
"epoch": 5.696517412935323,
|
|
"grad_norm": 0.5334102609956934,
|
|
"learning_rate": 4.095243387564593e-06,
|
|
"loss": 0.1732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18063801527023315,
|
|
"step": 2290,
|
|
"valid_targets_mean": 4553.8,
|
|
"valid_targets_min": 2042
|
|
},
|
|
{
|
|
"epoch": 5.708955223880597,
|
|
"grad_norm": 0.5694193724204722,
|
|
"learning_rate": 4.020323228112604e-06,
|
|
"loss": 0.1927,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1846069097518921,
|
|
"step": 2295,
|
|
"valid_targets_mean": 4064.2,
|
|
"valid_targets_min": 1370
|
|
},
|
|
{
|
|
"epoch": 5.721393034825871,
|
|
"grad_norm": 0.5452728006991922,
|
|
"learning_rate": 3.9460180740886625e-06,
|
|
"loss": 0.1775,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17648135125637054,
|
|
"step": 2300,
|
|
"valid_targets_mean": 4065.1,
|
|
"valid_targets_min": 2102
|
|
},
|
|
{
|
|
"epoch": 5.733830845771144,
|
|
"grad_norm": 0.5669921219625754,
|
|
"learning_rate": 3.872330785254803e-06,
|
|
"loss": 0.1784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18603132665157318,
|
|
"step": 2305,
|
|
"valid_targets_mean": 3896.8,
|
|
"valid_targets_min": 1993
|
|
},
|
|
{
|
|
"epoch": 5.746268656716418,
|
|
"grad_norm": 0.5623459293769465,
|
|
"learning_rate": 3.7992641975934595e-06,
|
|
"loss": 0.1897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1948518455028534,
|
|
"step": 2310,
|
|
"valid_targets_mean": 4391.6,
|
|
"valid_targets_min": 1950
|
|
},
|
|
{
|
|
"epoch": 5.758706467661692,
|
|
"grad_norm": 0.5330298017777138,
|
|
"learning_rate": 3.7268211231983185e-06,
|
|
"loss": 0.1821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1722792387008667,
|
|
"step": 2315,
|
|
"valid_targets_mean": 4296.8,
|
|
"valid_targets_min": 2070
|
|
},
|
|
{
|
|
"epoch": 5.7711442786069655,
|
|
"grad_norm": 0.6779738524621803,
|
|
"learning_rate": 3.6550043501660736e-06,
|
|
"loss": 0.1829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17438241839408875,
|
|
"step": 2320,
|
|
"valid_targets_mean": 3621.8,
|
|
"valid_targets_min": 1850
|
|
},
|
|
{
|
|
"epoch": 5.7835820895522385,
|
|
"grad_norm": 0.6308227572737273,
|
|
"learning_rate": 3.583816642489113e-06,
|
|
"loss": 0.1784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18285539746284485,
|
|
"step": 2325,
|
|
"valid_targets_mean": 3364.1,
|
|
"valid_targets_min": 1441
|
|
},
|
|
{
|
|
"epoch": 5.796019900497512,
|
|
"grad_norm": 0.5466656922264711,
|
|
"learning_rate": 3.513260739949196e-06,
|
|
"loss": 0.1785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.160654678940773,
|
|
"step": 2330,
|
|
"valid_targets_mean": 3844.6,
|
|
"valid_targets_min": 2190
|
|
},
|
|
{
|
|
"epoch": 5.808457711442786,
|
|
"grad_norm": 0.5473761351277354,
|
|
"learning_rate": 3.4433393580119436e-06,
|
|
"loss": 0.1821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17680670320987701,
|
|
"step": 2335,
|
|
"valid_targets_mean": 3974.6,
|
|
"valid_targets_min": 1692
|
|
},
|
|
{
|
|
"epoch": 5.82089552238806,
|
|
"grad_norm": 0.549317498826441,
|
|
"learning_rate": 3.3740551877223647e-06,
|
|
"loss": 0.1813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17757967114448547,
|
|
"step": 2340,
|
|
"valid_targets_mean": 3935.1,
|
|
"valid_targets_min": 2554
|
|
},
|
|
{
|
|
"epoch": 5.833333333333333,
|
|
"grad_norm": 0.5224545749951351,
|
|
"learning_rate": 3.30541089560128e-06,
|
|
"loss": 0.1744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16635212302207947,
|
|
"step": 2345,
|
|
"valid_targets_mean": 4472.9,
|
|
"valid_targets_min": 2049
|
|
},
|
|
{
|
|
"epoch": 5.845771144278607,
|
|
"grad_norm": 0.5477440996260752,
|
|
"learning_rate": 3.2374091235426918e-06,
|
|
"loss": 0.1759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16781067848205566,
|
|
"step": 2350,
|
|
"valid_targets_mean": 3725.6,
|
|
"valid_targets_min": 2020
|
|
},
|
|
{
|
|
"epoch": 5.858208955223881,
|
|
"grad_norm": 0.6293176391340207,
|
|
"learning_rate": 3.1700524887121188e-06,
|
|
"loss": 0.1786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.193925142288208,
|
|
"step": 2355,
|
|
"valid_targets_mean": 3524.7,
|
|
"valid_targets_min": 1801
|
|
},
|
|
{
|
|
"epoch": 5.870646766169155,
|
|
"grad_norm": 0.5849523473688234,
|
|
"learning_rate": 3.103343583445848e-06,
|
|
"loss": 0.1794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18686872720718384,
|
|
"step": 2360,
|
|
"valid_targets_mean": 3800.3,
|
|
"valid_targets_min": 2279
|
|
},
|
|
{
|
|
"epoch": 5.883084577114428,
|
|
"grad_norm": 0.546446441498615,
|
|
"learning_rate": 3.037284975151182e-06,
|
|
"loss": 0.179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.174645334482193,
|
|
"step": 2365,
|
|
"valid_targets_mean": 4354.5,
|
|
"valid_targets_min": 2242
|
|
},
|
|
{
|
|
"epoch": 5.895522388059701,
|
|
"grad_norm": 0.6294684870590153,
|
|
"learning_rate": 2.9718792062076264e-06,
|
|
"loss": 0.1773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16901081800460815,
|
|
"step": 2370,
|
|
"valid_targets_mean": 3182.9,
|
|
"valid_targets_min": 960
|
|
},
|
|
{
|
|
"epoch": 5.907960199004975,
|
|
"grad_norm": 0.545529756238248,
|
|
"learning_rate": 2.9071287938690298e-06,
|
|
"loss": 0.1844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19544681906700134,
|
|
"step": 2375,
|
|
"valid_targets_mean": 4001.1,
|
|
"valid_targets_min": 915
|
|
},
|
|
{
|
|
"epoch": 5.920398009950249,
|
|
"grad_norm": 0.5761291485378018,
|
|
"learning_rate": 2.843036230166718e-06,
|
|
"loss": 0.1749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17292159795761108,
|
|
"step": 2380,
|
|
"valid_targets_mean": 3945.6,
|
|
"valid_targets_min": 1765
|
|
},
|
|
{
|
|
"epoch": 5.932835820895522,
|
|
"grad_norm": 0.5200481295816748,
|
|
"learning_rate": 2.779603981813568e-06,
|
|
"loss": 0.1672,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1651022732257843,
|
|
"step": 2385,
|
|
"valid_targets_mean": 4494.5,
|
|
"valid_targets_min": 860
|
|
},
|
|
{
|
|
"epoch": 5.945273631840796,
|
|
"grad_norm": 0.5690349697582774,
|
|
"learning_rate": 2.7168344901091016e-06,
|
|
"loss": 0.1855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17301228642463684,
|
|
"step": 2390,
|
|
"valid_targets_mean": 4024.2,
|
|
"valid_targets_min": 2194
|
|
},
|
|
{
|
|
"epoch": 5.95771144278607,
|
|
"grad_norm": 0.5426341756011361,
|
|
"learning_rate": 2.6547301708454877e-06,
|
|
"loss": 0.184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19446370005607605,
|
|
"step": 2395,
|
|
"valid_targets_mean": 4282.0,
|
|
"valid_targets_min": 1255
|
|
},
|
|
{
|
|
"epoch": 5.970149253731344,
|
|
"grad_norm": 0.6008319137622877,
|
|
"learning_rate": 2.5932934142145906e-06,
|
|
"loss": 0.1695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17186178267002106,
|
|
"step": 2400,
|
|
"valid_targets_mean": 4482.7,
|
|
"valid_targets_min": 1574
|
|
},
|
|
{
|
|
"epoch": 5.982587064676617,
|
|
"grad_norm": 0.5514765334050616,
|
|
"learning_rate": 2.5325265847159798e-06,
|
|
"loss": 0.1813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.167934849858284,
|
|
"step": 2405,
|
|
"valid_targets_mean": 3971.0,
|
|
"valid_targets_min": 1741
|
|
},
|
|
{
|
|
"epoch": 5.9950248756218905,
|
|
"grad_norm": 0.5743859815483373,
|
|
"learning_rate": 2.472432021065918e-06,
|
|
"loss": 0.175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17497491836547852,
|
|
"step": 2410,
|
|
"valid_targets_mean": 4070.7,
|
|
"valid_targets_min": 1109
|
|
},
|
|
{
|
|
"epoch": 6.007462686567164,
|
|
"grad_norm": 0.5125255425588743,
|
|
"learning_rate": 2.4130120361073716e-06,
|
|
"loss": 0.1705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1637192666530609,
|
|
"step": 2415,
|
|
"valid_targets_mean": 4396.6,
|
|
"valid_targets_min": 1850
|
|
},
|
|
{
|
|
"epoch": 6.019900497512438,
|
|
"grad_norm": 0.5724876553676014,
|
|
"learning_rate": 2.3542689167209563e-06,
|
|
"loss": 0.1782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17785580456256866,
|
|
"step": 2420,
|
|
"valid_targets_mean": 3476.4,
|
|
"valid_targets_min": 1979
|
|
},
|
|
{
|
|
"epoch": 6.032338308457711,
|
|
"grad_norm": 0.5879062795130893,
|
|
"learning_rate": 2.2962049237369643e-06,
|
|
"loss": 0.1727,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16875743865966797,
|
|
"step": 2425,
|
|
"valid_targets_mean": 3416.4,
|
|
"valid_targets_min": 1696
|
|
},
|
|
{
|
|
"epoch": 6.044776119402985,
|
|
"grad_norm": 0.5588828300455307,
|
|
"learning_rate": 2.238822291848344e-06,
|
|
"loss": 0.1746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17179271578788757,
|
|
"step": 2430,
|
|
"valid_targets_mean": 3605.9,
|
|
"valid_targets_min": 1418
|
|
},
|
|
{
|
|
"epoch": 6.057213930348259,
|
|
"grad_norm": 0.5295142510715316,
|
|
"learning_rate": 2.182123229524673e-06,
|
|
"loss": 0.1741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1968148648738861,
|
|
"step": 2435,
|
|
"valid_targets_mean": 4323.1,
|
|
"valid_targets_min": 2235
|
|
},
|
|
{
|
|
"epoch": 6.069651741293533,
|
|
"grad_norm": 0.6168460567108859,
|
|
"learning_rate": 2.1261099189271792e-06,
|
|
"loss": 0.1715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1779354065656662,
|
|
"step": 2440,
|
|
"valid_targets_mean": 3457.3,
|
|
"valid_targets_min": 1629
|
|
},
|
|
{
|
|
"epoch": 6.082089552238806,
|
|
"grad_norm": 0.5621656463077643,
|
|
"learning_rate": 2.070784515824753e-06,
|
|
"loss": 0.1753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.159359872341156,
|
|
"step": 2445,
|
|
"valid_targets_mean": 4689.1,
|
|
"valid_targets_min": 2366
|
|
},
|
|
{
|
|
"epoch": 6.0945273631840795,
|
|
"grad_norm": 0.5487542240351966,
|
|
"learning_rate": 2.016149149510975e-06,
|
|
"loss": 0.1738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16500002145767212,
|
|
"step": 2450,
|
|
"valid_targets_mean": 4167.4,
|
|
"valid_targets_min": 2067
|
|
},
|
|
{
|
|
"epoch": 6.106965174129353,
|
|
"grad_norm": 0.5686330625834844,
|
|
"learning_rate": 1.9622059227221825e-06,
|
|
"loss": 0.1733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17375610768795013,
|
|
"step": 2455,
|
|
"valid_targets_mean": 3917.0,
|
|
"valid_targets_min": 1876
|
|
},
|
|
{
|
|
"epoch": 6.119402985074627,
|
|
"grad_norm": 0.5233617770605969,
|
|
"learning_rate": 1.9089569115565052e-06,
|
|
"loss": 0.1753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16628089547157288,
|
|
"step": 2460,
|
|
"valid_targets_mean": 4445.4,
|
|
"valid_targets_min": 2398
|
|
},
|
|
{
|
|
"epoch": 6.1318407960199,
|
|
"grad_norm": 0.5914278966941198,
|
|
"learning_rate": 1.8564041653940123e-06,
|
|
"loss": 0.1717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16870436072349548,
|
|
"step": 2465,
|
|
"valid_targets_mean": 4021.4,
|
|
"valid_targets_min": 2352
|
|
},
|
|
{
|
|
"epoch": 6.144278606965174,
|
|
"grad_norm": 0.5693055333467447,
|
|
"learning_rate": 1.8045497068177975e-06,
|
|
"loss": 0.1798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1836708039045334,
|
|
"step": 2470,
|
|
"valid_targets_mean": 3719.6,
|
|
"valid_targets_min": 1833
|
|
},
|
|
{
|
|
"epoch": 6.156716417910448,
|
|
"grad_norm": 0.652372737993125,
|
|
"learning_rate": 1.7533955315361551e-06,
|
|
"loss": 0.1756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1608482003211975,
|
|
"step": 2475,
|
|
"valid_targets_mean": 3182.6,
|
|
"valid_targets_min": 2079
|
|
},
|
|
{
|
|
"epoch": 6.169154228855722,
|
|
"grad_norm": 0.533482071857129,
|
|
"learning_rate": 1.7029436083057715e-06,
|
|
"loss": 0.1785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16928087174892426,
|
|
"step": 2480,
|
|
"valid_targets_mean": 4331.8,
|
|
"valid_targets_min": 1303
|
|
},
|
|
{
|
|
"epoch": 6.181592039800995,
|
|
"grad_norm": 0.5464490007192712,
|
|
"learning_rate": 1.6531958788559465e-06,
|
|
"loss": 0.1713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16682176291942596,
|
|
"step": 2485,
|
|
"valid_targets_mean": 3949.8,
|
|
"valid_targets_min": 1289
|
|
},
|
|
{
|
|
"epoch": 6.1940298507462686,
|
|
"grad_norm": 0.5623683876558216,
|
|
"learning_rate": 1.6041542578138746e-06,
|
|
"loss": 0.1749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18091854453086853,
|
|
"step": 2490,
|
|
"valid_targets_mean": 4234.9,
|
|
"valid_targets_min": 1575
|
|
},
|
|
{
|
|
"epoch": 6.206467661691542,
|
|
"grad_norm": 0.5238269588411919,
|
|
"learning_rate": 1.5558206326309511e-06,
|
|
"loss": 0.1807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16900426149368286,
|
|
"step": 2495,
|
|
"valid_targets_mean": 4697.7,
|
|
"valid_targets_min": 1876
|
|
},
|
|
{
|
|
"epoch": 6.218905472636816,
|
|
"grad_norm": 0.5716740018491285,
|
|
"learning_rate": 1.5081968635101097e-06,
|
|
"loss": 0.1739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18251916766166687,
|
|
"step": 2500,
|
|
"valid_targets_mean": 3668.0,
|
|
"valid_targets_min": 1509
|
|
},
|
|
{
|
|
"epoch": 6.231343283582089,
|
|
"grad_norm": 0.5363180031909048,
|
|
"learning_rate": 1.4612847833342759e-06,
|
|
"loss": 0.1768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1818065345287323,
|
|
"step": 2505,
|
|
"valid_targets_mean": 4083.4,
|
|
"valid_targets_min": 1806
|
|
},
|
|
{
|
|
"epoch": 6.243781094527363,
|
|
"grad_norm": 0.6429529839019501,
|
|
"learning_rate": 1.4150861975957786e-06,
|
|
"loss": 0.1699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18589231371879578,
|
|
"step": 2510,
|
|
"valid_targets_mean": 3552.3,
|
|
"valid_targets_min": 1876
|
|
},
|
|
{
|
|
"epoch": 6.256218905472637,
|
|
"grad_norm": 0.4975492608341589,
|
|
"learning_rate": 1.3696028843268993e-06,
|
|
"loss": 0.1677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15583984553813934,
|
|
"step": 2515,
|
|
"valid_targets_mean": 4623.1,
|
|
"valid_targets_min": 1947
|
|
},
|
|
{
|
|
"epoch": 6.268656716417911,
|
|
"grad_norm": 0.5345304367276562,
|
|
"learning_rate": 1.3248365940314067e-06,
|
|
"loss": 0.1729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1705108880996704,
|
|
"step": 2520,
|
|
"valid_targets_mean": 4393.9,
|
|
"valid_targets_min": 1990
|
|
},
|
|
{
|
|
"epoch": 6.281094527363184,
|
|
"grad_norm": 0.5373113443994114,
|
|
"learning_rate": 1.2807890496172103e-06,
|
|
"loss": 0.1713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1581442952156067,
|
|
"step": 2525,
|
|
"valid_targets_mean": 4299.2,
|
|
"valid_targets_min": 1423
|
|
},
|
|
{
|
|
"epoch": 6.293532338308458,
|
|
"grad_norm": 0.514081511794592,
|
|
"learning_rate": 1.237461946330054e-06,
|
|
"loss": 0.1739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15877807140350342,
|
|
"step": 2530,
|
|
"valid_targets_mean": 4525.9,
|
|
"valid_targets_min": 2502
|
|
},
|
|
{
|
|
"epoch": 6.3059701492537314,
|
|
"grad_norm": 0.560992308158361,
|
|
"learning_rate": 1.1948569516882503e-06,
|
|
"loss": 0.1706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16327780485153198,
|
|
"step": 2535,
|
|
"valid_targets_mean": 4197.6,
|
|
"valid_targets_min": 2315
|
|
},
|
|
{
|
|
"epoch": 6.318407960199005,
|
|
"grad_norm": 0.5265783790926104,
|
|
"learning_rate": 1.1529757054185176e-06,
|
|
"loss": 0.174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17675918340682983,
|
|
"step": 2540,
|
|
"valid_targets_mean": 4462.6,
|
|
"valid_targets_min": 1880
|
|
},
|
|
{
|
|
"epoch": 6.330845771144278,
|
|
"grad_norm": 0.5416867779723658,
|
|
"learning_rate": 1.111819819392872e-06,
|
|
"loss": 0.1768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17835187911987305,
|
|
"step": 2545,
|
|
"valid_targets_mean": 4455.2,
|
|
"valid_targets_min": 1971
|
|
},
|
|
{
|
|
"epoch": 6.343283582089552,
|
|
"grad_norm": 0.5881401941231864,
|
|
"learning_rate": 1.0713908775665827e-06,
|
|
"loss": 0.1752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16701434552669525,
|
|
"step": 2550,
|
|
"valid_targets_mean": 3613.5,
|
|
"valid_targets_min": 1130
|
|
},
|
|
{
|
|
"epoch": 6.355721393034826,
|
|
"grad_norm": 0.5504035044096116,
|
|
"learning_rate": 1.0316904359172297e-06,
|
|
"loss": 0.1789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16181015968322754,
|
|
"step": 2555,
|
|
"valid_targets_mean": 3846.8,
|
|
"valid_targets_min": 1976
|
|
},
|
|
{
|
|
"epoch": 6.3681592039801,
|
|
"grad_norm": 0.566462536612002,
|
|
"learning_rate": 9.92720022384792e-07,
|
|
"loss": 0.1668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1657314896583557,
|
|
"step": 2560,
|
|
"valid_targets_mean": 4052.9,
|
|
"valid_targets_min": 2216
|
|
},
|
|
{
|
|
"epoch": 6.380597014925373,
|
|
"grad_norm": 0.5700973608301066,
|
|
"learning_rate": 9.544811368128703e-07,
|
|
"loss": 0.1645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17204223573207855,
|
|
"step": 2565,
|
|
"valid_targets_mean": 4212.5,
|
|
"valid_targets_min": 1450
|
|
},
|
|
{
|
|
"epoch": 6.393034825870647,
|
|
"grad_norm": 0.5934427501781491,
|
|
"learning_rate": 9.169752508909413e-07,
|
|
"loss": 0.1724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17751480638980865,
|
|
"step": 2570,
|
|
"valid_targets_mean": 3961.2,
|
|
"valid_targets_min": 2303
|
|
},
|
|
{
|
|
"epoch": 6.4054726368159205,
|
|
"grad_norm": 0.5691373349683894,
|
|
"learning_rate": 8.802038080977305e-07,
|
|
"loss": 0.1796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17300844192504883,
|
|
"step": 2575,
|
|
"valid_targets_mean": 3961.4,
|
|
"valid_targets_min": 2205
|
|
},
|
|
{
|
|
"epoch": 6.417910447761194,
|
|
"grad_norm": 0.6340863357974218,
|
|
"learning_rate": 8.441682236456472e-07,
|
|
"loss": 0.1735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17821021378040314,
|
|
"step": 2580,
|
|
"valid_targets_mean": 3086.1,
|
|
"valid_targets_min": 963
|
|
},
|
|
{
|
|
"epoch": 6.430348258706467,
|
|
"grad_norm": 0.579617792634382,
|
|
"learning_rate": 8.088698844263243e-07,
|
|
"loss": 0.1704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16770654916763306,
|
|
"step": 2585,
|
|
"valid_targets_mean": 3542.5,
|
|
"valid_targets_min": 2121
|
|
},
|
|
{
|
|
"epoch": 6.442786069651741,
|
|
"grad_norm": 0.5395856283137651,
|
|
"learning_rate": 7.743101489572491e-07,
|
|
"loss": 0.1797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19851364195346832,
|
|
"step": 2590,
|
|
"valid_targets_mean": 4706.3,
|
|
"valid_targets_min": 1763
|
|
},
|
|
{
|
|
"epoch": 6.455223880597015,
|
|
"grad_norm": 0.5238594468934492,
|
|
"learning_rate": 7.404903473294612e-07,
|
|
"loss": 0.1686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15577292442321777,
|
|
"step": 2595,
|
|
"valid_targets_mean": 4178.8,
|
|
"valid_targets_min": 1773
|
|
},
|
|
{
|
|
"epoch": 6.467661691542289,
|
|
"grad_norm": 0.632754230583396,
|
|
"learning_rate": 7.074117811563619e-07,
|
|
"loss": 0.1698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17483484745025635,
|
|
"step": 2600,
|
|
"valid_targets_mean": 3031.8,
|
|
"valid_targets_min": 344
|
|
},
|
|
{
|
|
"epoch": 6.480099502487562,
|
|
"grad_norm": 0.5671291151476788,
|
|
"learning_rate": 6.750757235236461e-07,
|
|
"loss": 0.1739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17332680523395538,
|
|
"step": 2605,
|
|
"valid_targets_mean": 3747.0,
|
|
"valid_targets_min": 1149
|
|
},
|
|
{
|
|
"epoch": 6.492537313432836,
|
|
"grad_norm": 0.6093460900077653,
|
|
"learning_rate": 6.434834189402716e-07,
|
|
"loss": 0.175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18352875113487244,
|
|
"step": 2610,
|
|
"valid_targets_mean": 3692.0,
|
|
"valid_targets_min": 1907
|
|
},
|
|
{
|
|
"epoch": 6.5049751243781095,
|
|
"grad_norm": 0.574114276787967,
|
|
"learning_rate": 6.126360832905831e-07,
|
|
"loss": 0.1819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18569818139076233,
|
|
"step": 2615,
|
|
"valid_targets_mean": 3639.9,
|
|
"valid_targets_min": 1544
|
|
},
|
|
{
|
|
"epoch": 6.517412935323383,
|
|
"grad_norm": 0.5146553892569846,
|
|
"learning_rate": 5.825349037875106e-07,
|
|
"loss": 0.1779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16424797475337982,
|
|
"step": 2620,
|
|
"valid_targets_mean": 4595.1,
|
|
"valid_targets_min": 1975
|
|
},
|
|
{
|
|
"epoch": 6.529850746268656,
|
|
"grad_norm": 0.5252261416414551,
|
|
"learning_rate": 5.531810389268732e-07,
|
|
"loss": 0.1601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15995575487613678,
|
|
"step": 2625,
|
|
"valid_targets_mean": 4328.8,
|
|
"valid_targets_min": 1661
|
|
},
|
|
{
|
|
"epoch": 6.54228855721393,
|
|
"grad_norm": 0.5685319838301356,
|
|
"learning_rate": 5.245756184428041e-07,
|
|
"loss": 0.1744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18082097172737122,
|
|
"step": 2630,
|
|
"valid_targets_mean": 4233.4,
|
|
"valid_targets_min": 1916
|
|
},
|
|
{
|
|
"epoch": 6.554726368159204,
|
|
"grad_norm": 0.5341982017596594,
|
|
"learning_rate": 4.967197432642579e-07,
|
|
"loss": 0.176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17173001170158386,
|
|
"step": 2635,
|
|
"valid_targets_mean": 4326.6,
|
|
"valid_targets_min": 2650
|
|
},
|
|
{
|
|
"epoch": 6.567164179104478,
|
|
"grad_norm": 0.6401995160355601,
|
|
"learning_rate": 4.69614485472647e-07,
|
|
"loss": 0.1765,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17902860045433044,
|
|
"step": 2640,
|
|
"valid_targets_mean": 3158.5,
|
|
"valid_targets_min": 191
|
|
},
|
|
{
|
|
"epoch": 6.579601990049751,
|
|
"grad_norm": 0.5667151478749745,
|
|
"learning_rate": 4.432608882605771e-07,
|
|
"loss": 0.1724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17057490348815918,
|
|
"step": 2645,
|
|
"valid_targets_mean": 3628.1,
|
|
"valid_targets_min": 2236
|
|
},
|
|
{
|
|
"epoch": 6.592039800995025,
|
|
"grad_norm": 0.5485021773091285,
|
|
"learning_rate": 4.1765996589170353e-07,
|
|
"loss": 0.1807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.179216206073761,
|
|
"step": 2650,
|
|
"valid_targets_mean": 4233.1,
|
|
"valid_targets_min": 1682
|
|
},
|
|
{
|
|
"epoch": 6.604477611940299,
|
|
"grad_norm": 0.519072304817314,
|
|
"learning_rate": 3.928127036616869e-07,
|
|
"loss": 0.1752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18321016430854797,
|
|
"step": 2655,
|
|
"valid_targets_mean": 4888.4,
|
|
"valid_targets_min": 1732
|
|
},
|
|
{
|
|
"epoch": 6.616915422885572,
|
|
"grad_norm": 0.5786793950538549,
|
|
"learning_rate": 3.687200578602812e-07,
|
|
"loss": 0.1723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17171373963356018,
|
|
"step": 2660,
|
|
"valid_targets_mean": 4240.3,
|
|
"valid_targets_min": 1550
|
|
},
|
|
{
|
|
"epoch": 6.629353233830845,
|
|
"grad_norm": 0.5589571697627682,
|
|
"learning_rate": 3.453829557345212e-07,
|
|
"loss": 0.1626,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1652020364999771,
|
|
"step": 2665,
|
|
"valid_targets_mean": 3949.5,
|
|
"valid_targets_min": 222
|
|
},
|
|
{
|
|
"epoch": 6.641791044776119,
|
|
"grad_norm": 0.5511197102525519,
|
|
"learning_rate": 3.228022954530463e-07,
|
|
"loss": 0.1778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1774759292602539,
|
|
"step": 2670,
|
|
"valid_targets_mean": 4197.3,
|
|
"valid_targets_min": 428
|
|
},
|
|
{
|
|
"epoch": 6.654228855721393,
|
|
"grad_norm": 0.5864879776585974,
|
|
"learning_rate": 3.009789460715218e-07,
|
|
"loss": 0.1902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16247567534446716,
|
|
"step": 2675,
|
|
"valid_targets_mean": 3430.1,
|
|
"valid_targets_min": 926
|
|
},
|
|
{
|
|
"epoch": 6.666666666666667,
|
|
"grad_norm": 0.5572934258361881,
|
|
"learning_rate": 2.799137474991942e-07,
|
|
"loss": 0.1784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18331031501293182,
|
|
"step": 2680,
|
|
"valid_targets_mean": 3912.9,
|
|
"valid_targets_min": 1883
|
|
},
|
|
{
|
|
"epoch": 6.67910447761194,
|
|
"grad_norm": 0.5559560602699174,
|
|
"learning_rate": 2.5960751046657296e-07,
|
|
"loss": 0.1733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17147520184516907,
|
|
"step": 2685,
|
|
"valid_targets_mean": 4053.0,
|
|
"valid_targets_min": 1895
|
|
},
|
|
{
|
|
"epoch": 6.691542288557214,
|
|
"grad_norm": 0.5575426373568327,
|
|
"learning_rate": 2.400610164942241e-07,
|
|
"loss": 0.1679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16202571988105774,
|
|
"step": 2690,
|
|
"valid_targets_mean": 4100.9,
|
|
"valid_targets_min": 2530
|
|
},
|
|
{
|
|
"epoch": 6.703980099502488,
|
|
"grad_norm": 0.5562136135591627,
|
|
"learning_rate": 2.2127501786268546e-07,
|
|
"loss": 0.169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15551359951496124,
|
|
"step": 2695,
|
|
"valid_targets_mean": 4108.9,
|
|
"valid_targets_min": 2029
|
|
},
|
|
{
|
|
"epoch": 6.7164179104477615,
|
|
"grad_norm": 0.5198839932899565,
|
|
"learning_rate": 2.0325023758352545e-07,
|
|
"loss": 0.1741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17046837508678436,
|
|
"step": 2700,
|
|
"valid_targets_mean": 4698.8,
|
|
"valid_targets_min": 443
|
|
},
|
|
{
|
|
"epoch": 6.7288557213930345,
|
|
"grad_norm": 0.5938343168062021,
|
|
"learning_rate": 1.859873693715075e-07,
|
|
"loss": 0.1756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1763916015625,
|
|
"step": 2705,
|
|
"valid_targets_mean": 4298.5,
|
|
"valid_targets_min": 1533
|
|
},
|
|
{
|
|
"epoch": 6.741293532338308,
|
|
"grad_norm": 0.5495808847371538,
|
|
"learning_rate": 1.6948707761789807e-07,
|
|
"loss": 0.1751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17928853631019592,
|
|
"step": 2710,
|
|
"valid_targets_mean": 4255.4,
|
|
"valid_targets_min": 1937
|
|
},
|
|
{
|
|
"epoch": 6.753731343283582,
|
|
"grad_norm": 0.6045663915002609,
|
|
"learning_rate": 1.5374999736488927e-07,
|
|
"loss": 0.1703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1754097044467926,
|
|
"step": 2715,
|
|
"valid_targets_mean": 3834.8,
|
|
"valid_targets_min": 1388
|
|
},
|
|
{
|
|
"epoch": 6.766169154228856,
|
|
"grad_norm": 0.5553708025379133,
|
|
"learning_rate": 1.3877673428116302e-07,
|
|
"loss": 0.179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19348856806755066,
|
|
"step": 2720,
|
|
"valid_targets_mean": 4249.7,
|
|
"valid_targets_min": 3032
|
|
},
|
|
{
|
|
"epoch": 6.778606965174129,
|
|
"grad_norm": 0.629594904612257,
|
|
"learning_rate": 1.245678646385784e-07,
|
|
"loss": 0.1668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17597973346710205,
|
|
"step": 2725,
|
|
"valid_targets_mean": 3250.0,
|
|
"valid_targets_min": 941
|
|
},
|
|
{
|
|
"epoch": 6.791044776119403,
|
|
"grad_norm": 0.5963075561787823,
|
|
"learning_rate": 1.1112393529000288e-07,
|
|
"loss": 0.1738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1901685893535614,
|
|
"step": 2730,
|
|
"valid_targets_mean": 3795.7,
|
|
"valid_targets_min": 2540
|
|
},
|
|
{
|
|
"epoch": 6.803482587064677,
|
|
"grad_norm": 0.5609315377417177,
|
|
"learning_rate": 9.844546364824459e-08,
|
|
"loss": 0.1721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17084071040153503,
|
|
"step": 2735,
|
|
"valid_targets_mean": 3876.4,
|
|
"valid_targets_min": 1109
|
|
},
|
|
{
|
|
"epoch": 6.8159203980099505,
|
|
"grad_norm": 0.5043275099355118,
|
|
"learning_rate": 8.653293766615945e-08,
|
|
"loss": 0.1702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16672548651695251,
|
|
"step": 2740,
|
|
"valid_targets_mean": 4784.5,
|
|
"valid_targets_min": 1905
|
|
},
|
|
{
|
|
"epoch": 6.8283582089552235,
|
|
"grad_norm": 0.6108276462294715,
|
|
"learning_rate": 7.538681581785945e-08,
|
|
"loss": 0.176,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18459632992744446,
|
|
"step": 2745,
|
|
"valid_targets_mean": 3379.5,
|
|
"valid_targets_min": 852
|
|
},
|
|
{
|
|
"epoch": 6.840796019900497,
|
|
"grad_norm": 0.5499395587166259,
|
|
"learning_rate": 6.50075270810735e-08,
|
|
"loss": 0.1778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16682520508766174,
|
|
"step": 2750,
|
|
"valid_targets_mean": 4087.9,
|
|
"valid_targets_min": 1786
|
|
},
|
|
{
|
|
"epoch": 6.853233830845771,
|
|
"grad_norm": 0.5762377369105647,
|
|
"learning_rate": 5.539547092063391e-08,
|
|
"loss": 0.1763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16152673959732056,
|
|
"step": 2755,
|
|
"valid_targets_mean": 3740.1,
|
|
"valid_targets_min": 862
|
|
},
|
|
{
|
|
"epoch": 6.865671641791045,
|
|
"grad_norm": 0.6162665248042457,
|
|
"learning_rate": 4.655101727310651e-08,
|
|
"loss": 0.168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17481867969036102,
|
|
"step": 2760,
|
|
"valid_targets_mean": 3653.2,
|
|
"valid_targets_min": 2037
|
|
},
|
|
{
|
|
"epoch": 6.878109452736318,
|
|
"grad_norm": 0.5537138349691514,
|
|
"learning_rate": 3.847450653254425e-08,
|
|
"loss": 0.1831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19818416237831116,
|
|
"step": 2765,
|
|
"valid_targets_mean": 4187.0,
|
|
"valid_targets_min": 2037
|
|
},
|
|
{
|
|
"epoch": 6.890547263681592,
|
|
"grad_norm": 0.5538646057123637,
|
|
"learning_rate": 3.1166249537402104e-08,
|
|
"loss": 0.1747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1677202731370926,
|
|
"step": 2770,
|
|
"valid_targets_mean": 4246.5,
|
|
"valid_targets_min": 1887
|
|
},
|
|
{
|
|
"epoch": 6.902985074626866,
|
|
"grad_norm": 0.5458989012174927,
|
|
"learning_rate": 2.4626527558551106e-08,
|
|
"loss": 0.1684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16506440937519073,
|
|
"step": 2775,
|
|
"valid_targets_mean": 4147.2,
|
|
"valid_targets_min": 1054
|
|
},
|
|
{
|
|
"epoch": 6.91542288557214,
|
|
"grad_norm": 0.5428319526484747,
|
|
"learning_rate": 1.885559228847811e-08,
|
|
"loss": 0.1753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16628624498844147,
|
|
"step": 2780,
|
|
"valid_targets_mean": 4325.8,
|
|
"valid_targets_min": 2837
|
|
},
|
|
{
|
|
"epoch": 6.927860696517413,
|
|
"grad_norm": 0.5215809087161279,
|
|
"learning_rate": 1.385366583158243e-08,
|
|
"loss": 0.1767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19465038180351257,
|
|
"step": 2785,
|
|
"valid_targets_mean": 4546.7,
|
|
"valid_targets_min": 2100
|
|
},
|
|
{
|
|
"epoch": 6.940298507462686,
|
|
"grad_norm": 0.5104612235616637,
|
|
"learning_rate": 9.620940695633797e-09,
|
|
"loss": 0.183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1838764250278473,
|
|
"step": 2790,
|
|
"valid_targets_mean": 4532.4,
|
|
"valid_targets_min": 2044
|
|
},
|
|
{
|
|
"epoch": 6.95273631840796,
|
|
"grad_norm": 0.5508437850202684,
|
|
"learning_rate": 6.15757978435827e-09,
|
|
"loss": 0.1744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17743362486362457,
|
|
"step": 2795,
|
|
"valid_targets_mean": 4273.7,
|
|
"valid_targets_min": 2596
|
|
},
|
|
{
|
|
"epoch": 6.965174129353234,
|
|
"grad_norm": 0.5364623622959384,
|
|
"learning_rate": 3.463716391176597e-09,
|
|
"loss": 0.1799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2021486759185791,
|
|
"step": 2800,
|
|
"valid_targets_mean": 4783.9,
|
|
"valid_targets_min": 1910
|
|
},
|
|
{
|
|
"epoch": 6.977611940298507,
|
|
"grad_norm": 0.5071144651033146,
|
|
"learning_rate": 1.5394541940705332e-09,
|
|
"loss": 0.1691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16921557486057281,
|
|
"step": 2805,
|
|
"valid_targets_mean": 4896.3,
|
|
"valid_targets_min": 1519
|
|
},
|
|
{
|
|
"epoch": 6.990049751243781,
|
|
"grad_norm": 0.5760603012343628,
|
|
"learning_rate": 3.848672515882612e-10,
|
|
"loss": 0.1735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16937696933746338,
|
|
"step": 2810,
|
|
"valid_targets_mean": 3735.2,
|
|
"valid_targets_min": 2097
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17162613570690155,
|
|
"step": 2814,
|
|
"total_flos": 1157609273950208.0,
|
|
"train_loss": 0.22598929736066897,
|
|
"train_runtime": 20554.2284,
|
|
"train_samples_per_second": 2.189,
|
|
"train_steps_per_second": 0.137,
|
|
"valid_targets_mean": 3578.4,
|
|
"valid_targets_min": 1983
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 2814,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 200,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1157609273950208.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|