9255 lines
256 KiB
JSON
9255 lines
256 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 4186,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.012626262626262626,
|
|
"grad_norm": 8.8502213152326,
|
|
"learning_rate": 5.755395683453238e-07,
|
|
"loss": 0.9749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 1.0252163410186768,
|
|
"step": 5,
|
|
"valid_targets_mean": 3810.4,
|
|
"valid_targets_min": 1456
|
|
},
|
|
{
|
|
"epoch": 0.025252525252525252,
|
|
"grad_norm": 7.0326382192271595,
|
|
"learning_rate": 1.2949640287769785e-06,
|
|
"loss": 0.97,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8746949434280396,
|
|
"step": 10,
|
|
"valid_targets_mean": 4736.2,
|
|
"valid_targets_min": 1766
|
|
},
|
|
{
|
|
"epoch": 0.03787878787878788,
|
|
"grad_norm": 6.6940046336920975,
|
|
"learning_rate": 2.0143884892086333e-06,
|
|
"loss": 0.9064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8456529378890991,
|
|
"step": 15,
|
|
"valid_targets_mean": 3504.3,
|
|
"valid_targets_min": 638
|
|
},
|
|
{
|
|
"epoch": 0.050505050505050504,
|
|
"grad_norm": 7.365976748970077,
|
|
"learning_rate": 2.733812949640288e-06,
|
|
"loss": 0.8841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.9217684268951416,
|
|
"step": 20,
|
|
"valid_targets_mean": 2712.1,
|
|
"valid_targets_min": 1633
|
|
},
|
|
{
|
|
"epoch": 0.06313131313131314,
|
|
"grad_norm": 4.630647013342811,
|
|
"learning_rate": 3.453237410071943e-06,
|
|
"loss": 0.8408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7999893426895142,
|
|
"step": 25,
|
|
"valid_targets_mean": 2972.6,
|
|
"valid_targets_min": 1862
|
|
},
|
|
{
|
|
"epoch": 0.07575757575757576,
|
|
"grad_norm": 2.9138235176363723,
|
|
"learning_rate": 4.172661870503597e-06,
|
|
"loss": 0.7736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7608543634414673,
|
|
"step": 30,
|
|
"valid_targets_mean": 3347.9,
|
|
"valid_targets_min": 1252
|
|
},
|
|
{
|
|
"epoch": 0.08838383838383838,
|
|
"grad_norm": 1.7558069634979983,
|
|
"learning_rate": 4.892086330935253e-06,
|
|
"loss": 0.7821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.826799750328064,
|
|
"step": 35,
|
|
"valid_targets_mean": 4368.2,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 0.10101010101010101,
|
|
"grad_norm": 1.7134790575388017,
|
|
"learning_rate": 5.611510791366906e-06,
|
|
"loss": 0.7603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8243893384933472,
|
|
"step": 40,
|
|
"valid_targets_mean": 3075.2,
|
|
"valid_targets_min": 596
|
|
},
|
|
{
|
|
"epoch": 0.11363636363636363,
|
|
"grad_norm": 1.2388060565695977,
|
|
"learning_rate": 6.330935251798561e-06,
|
|
"loss": 0.6669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6428935527801514,
|
|
"step": 45,
|
|
"valid_targets_mean": 3558.9,
|
|
"valid_targets_min": 1264
|
|
},
|
|
{
|
|
"epoch": 0.12626262626262627,
|
|
"grad_norm": 1.0686895415900157,
|
|
"learning_rate": 7.050359712230216e-06,
|
|
"loss": 0.6699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6036921143531799,
|
|
"step": 50,
|
|
"valid_targets_mean": 2548.7,
|
|
"valid_targets_min": 1499
|
|
},
|
|
{
|
|
"epoch": 0.1388888888888889,
|
|
"grad_norm": 0.8408313638128855,
|
|
"learning_rate": 7.769784172661872e-06,
|
|
"loss": 0.6825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6488921642303467,
|
|
"step": 55,
|
|
"valid_targets_mean": 3712.4,
|
|
"valid_targets_min": 741
|
|
},
|
|
{
|
|
"epoch": 0.15151515151515152,
|
|
"grad_norm": 0.8793404803472428,
|
|
"learning_rate": 8.489208633093526e-06,
|
|
"loss": 0.6406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5900353193283081,
|
|
"step": 60,
|
|
"valid_targets_mean": 2931.1,
|
|
"valid_targets_min": 1889
|
|
},
|
|
{
|
|
"epoch": 0.16414141414141414,
|
|
"grad_norm": 0.9233082070664455,
|
|
"learning_rate": 9.20863309352518e-06,
|
|
"loss": 0.6118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6843211650848389,
|
|
"step": 65,
|
|
"valid_targets_mean": 2688.2,
|
|
"valid_targets_min": 587
|
|
},
|
|
{
|
|
"epoch": 0.17676767676767677,
|
|
"grad_norm": 0.7695618266666612,
|
|
"learning_rate": 9.928057553956835e-06,
|
|
"loss": 0.5512,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5856298208236694,
|
|
"step": 70,
|
|
"valid_targets_mean": 2935.6,
|
|
"valid_targets_min": 750
|
|
},
|
|
{
|
|
"epoch": 0.1893939393939394,
|
|
"grad_norm": 0.7101539830507525,
|
|
"learning_rate": 1.0647482014388491e-05,
|
|
"loss": 0.5928,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.602681577205658,
|
|
"step": 75,
|
|
"valid_targets_mean": 3589.7,
|
|
"valid_targets_min": 741
|
|
},
|
|
{
|
|
"epoch": 0.20202020202020202,
|
|
"grad_norm": 0.6757736826618855,
|
|
"learning_rate": 1.1366906474820146e-05,
|
|
"loss": 0.5919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5044267177581787,
|
|
"step": 80,
|
|
"valid_targets_mean": 3180.0,
|
|
"valid_targets_min": 1420
|
|
},
|
|
{
|
|
"epoch": 0.21464646464646464,
|
|
"grad_norm": 0.7052512337384288,
|
|
"learning_rate": 1.20863309352518e-05,
|
|
"loss": 0.5713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.546467661857605,
|
|
"step": 85,
|
|
"valid_targets_mean": 2828.8,
|
|
"valid_targets_min": 1071
|
|
},
|
|
{
|
|
"epoch": 0.22727272727272727,
|
|
"grad_norm": 0.8245207595088883,
|
|
"learning_rate": 1.2805755395683454e-05,
|
|
"loss": 0.5641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.605029284954071,
|
|
"step": 90,
|
|
"valid_targets_mean": 2803.2,
|
|
"valid_targets_min": 643
|
|
},
|
|
{
|
|
"epoch": 0.2398989898989899,
|
|
"grad_norm": 0.7141934894188182,
|
|
"learning_rate": 1.3525179856115109e-05,
|
|
"loss": 0.5706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5871100425720215,
|
|
"step": 95,
|
|
"valid_targets_mean": 3075.2,
|
|
"valid_targets_min": 1141
|
|
},
|
|
{
|
|
"epoch": 0.25252525252525254,
|
|
"grad_norm": 0.7508217175713096,
|
|
"learning_rate": 1.4244604316546765e-05,
|
|
"loss": 0.5615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5834653973579407,
|
|
"step": 100,
|
|
"valid_targets_mean": 3024.4,
|
|
"valid_targets_min": 1610
|
|
},
|
|
{
|
|
"epoch": 0.26515151515151514,
|
|
"grad_norm": 0.6869296589904229,
|
|
"learning_rate": 1.496402877697842e-05,
|
|
"loss": 0.5107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5773706436157227,
|
|
"step": 105,
|
|
"valid_targets_mean": 3346.2,
|
|
"valid_targets_min": 465
|
|
},
|
|
{
|
|
"epoch": 0.2777777777777778,
|
|
"grad_norm": 0.6649670199118626,
|
|
"learning_rate": 1.5683453237410072e-05,
|
|
"loss": 0.6192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5906274318695068,
|
|
"step": 110,
|
|
"valid_targets_mean": 4020.4,
|
|
"valid_targets_min": 1802
|
|
},
|
|
{
|
|
"epoch": 0.2904040404040404,
|
|
"grad_norm": 0.749817497328354,
|
|
"learning_rate": 1.640287769784173e-05,
|
|
"loss": 0.542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6468950510025024,
|
|
"step": 115,
|
|
"valid_targets_mean": 3753.2,
|
|
"valid_targets_min": 550
|
|
},
|
|
{
|
|
"epoch": 0.30303030303030304,
|
|
"grad_norm": 0.6555186253660183,
|
|
"learning_rate": 1.7122302158273384e-05,
|
|
"loss": 0.5134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5901004076004028,
|
|
"step": 120,
|
|
"valid_targets_mean": 4881.6,
|
|
"valid_targets_min": 1494
|
|
},
|
|
{
|
|
"epoch": 0.31565656565656564,
|
|
"grad_norm": 0.7273014966297405,
|
|
"learning_rate": 1.784172661870504e-05,
|
|
"loss": 0.5702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5654794573783875,
|
|
"step": 125,
|
|
"valid_targets_mean": 3368.1,
|
|
"valid_targets_min": 1521
|
|
},
|
|
{
|
|
"epoch": 0.3282828282828283,
|
|
"grad_norm": 0.7586180547020986,
|
|
"learning_rate": 1.8561151079136693e-05,
|
|
"loss": 0.5293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41630545258522034,
|
|
"step": 130,
|
|
"valid_targets_mean": 3200.5,
|
|
"valid_targets_min": 1661
|
|
},
|
|
{
|
|
"epoch": 0.3409090909090909,
|
|
"grad_norm": 0.7688770234195088,
|
|
"learning_rate": 1.9280575539568347e-05,
|
|
"loss": 0.5299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5642590522766113,
|
|
"step": 135,
|
|
"valid_targets_mean": 3158.9,
|
|
"valid_targets_min": 799
|
|
},
|
|
{
|
|
"epoch": 0.35353535353535354,
|
|
"grad_norm": 0.8258141160820167,
|
|
"learning_rate": 2e-05,
|
|
"loss": 0.4706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43767279386520386,
|
|
"step": 140,
|
|
"valid_targets_mean": 2534.0,
|
|
"valid_targets_min": 527
|
|
},
|
|
{
|
|
"epoch": 0.3661616161616162,
|
|
"grad_norm": 0.6638208230368448,
|
|
"learning_rate": 2.0719424460431656e-05,
|
|
"loss": 0.5226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5216605067253113,
|
|
"step": 145,
|
|
"valid_targets_mean": 3478.1,
|
|
"valid_targets_min": 1455
|
|
},
|
|
{
|
|
"epoch": 0.3787878787878788,
|
|
"grad_norm": 0.6659508828600877,
|
|
"learning_rate": 2.143884892086331e-05,
|
|
"loss": 0.5054,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46430742740631104,
|
|
"step": 150,
|
|
"valid_targets_mean": 3356.1,
|
|
"valid_targets_min": 1804
|
|
},
|
|
{
|
|
"epoch": 0.39141414141414144,
|
|
"grad_norm": 0.6857469917039041,
|
|
"learning_rate": 2.2158273381294965e-05,
|
|
"loss": 0.4803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46997326612472534,
|
|
"step": 155,
|
|
"valid_targets_mean": 3069.1,
|
|
"valid_targets_min": 712
|
|
},
|
|
{
|
|
"epoch": 0.40404040404040403,
|
|
"grad_norm": 0.7497478551719995,
|
|
"learning_rate": 2.287769784172662e-05,
|
|
"loss": 0.5096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5357323884963989,
|
|
"step": 160,
|
|
"valid_targets_mean": 2978.5,
|
|
"valid_targets_min": 978
|
|
},
|
|
{
|
|
"epoch": 0.4166666666666667,
|
|
"grad_norm": 0.6266767386519286,
|
|
"learning_rate": 2.3597122302158274e-05,
|
|
"loss": 0.4854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49465256929397583,
|
|
"step": 165,
|
|
"valid_targets_mean": 4385.0,
|
|
"valid_targets_min": 1146
|
|
},
|
|
{
|
|
"epoch": 0.4292929292929293,
|
|
"grad_norm": 0.7198305240954321,
|
|
"learning_rate": 2.431654676258993e-05,
|
|
"loss": 0.5133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4758256673812866,
|
|
"step": 170,
|
|
"valid_targets_mean": 3358.2,
|
|
"valid_targets_min": 1265
|
|
},
|
|
{
|
|
"epoch": 0.44191919191919193,
|
|
"grad_norm": 0.744564295446295,
|
|
"learning_rate": 2.5035971223021586e-05,
|
|
"loss": 0.4285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4645718038082123,
|
|
"step": 175,
|
|
"valid_targets_mean": 3257.9,
|
|
"valid_targets_min": 713
|
|
},
|
|
{
|
|
"epoch": 0.45454545454545453,
|
|
"grad_norm": 0.741277701622229,
|
|
"learning_rate": 2.575539568345324e-05,
|
|
"loss": 0.4867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5038046836853027,
|
|
"step": 180,
|
|
"valid_targets_mean": 3023.1,
|
|
"valid_targets_min": 696
|
|
},
|
|
{
|
|
"epoch": 0.4671717171717172,
|
|
"grad_norm": 0.7440421968116456,
|
|
"learning_rate": 2.6474820143884895e-05,
|
|
"loss": 0.5095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.570155918598175,
|
|
"step": 185,
|
|
"valid_targets_mean": 3422.4,
|
|
"valid_targets_min": 949
|
|
},
|
|
{
|
|
"epoch": 0.4797979797979798,
|
|
"grad_norm": 0.8024642384967607,
|
|
"learning_rate": 2.719424460431655e-05,
|
|
"loss": 0.4493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5823109149932861,
|
|
"step": 190,
|
|
"valid_targets_mean": 2894.9,
|
|
"valid_targets_min": 1061
|
|
},
|
|
{
|
|
"epoch": 0.49242424242424243,
|
|
"grad_norm": 0.7498359662712905,
|
|
"learning_rate": 2.7913669064748203e-05,
|
|
"loss": 0.5014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5892109870910645,
|
|
"step": 195,
|
|
"valid_targets_mean": 3757.8,
|
|
"valid_targets_min": 1305
|
|
},
|
|
{
|
|
"epoch": 0.5050505050505051,
|
|
"grad_norm": 0.6969972771455335,
|
|
"learning_rate": 2.8633093525179858e-05,
|
|
"loss": 0.4966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4585283696651459,
|
|
"step": 200,
|
|
"valid_targets_mean": 3301.2,
|
|
"valid_targets_min": 1200
|
|
},
|
|
{
|
|
"epoch": 0.5176767676767676,
|
|
"grad_norm": 0.7428922534210859,
|
|
"learning_rate": 2.9352517985611512e-05,
|
|
"loss": 0.4873,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35433435440063477,
|
|
"step": 205,
|
|
"valid_targets_mean": 2534.4,
|
|
"valid_targets_min": 1051
|
|
},
|
|
{
|
|
"epoch": 0.5303030303030303,
|
|
"grad_norm": 0.7952927627365796,
|
|
"learning_rate": 3.0071942446043167e-05,
|
|
"loss": 0.4483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3344321846961975,
|
|
"step": 210,
|
|
"valid_targets_mean": 2273.5,
|
|
"valid_targets_min": 764
|
|
},
|
|
{
|
|
"epoch": 0.5429292929292929,
|
|
"grad_norm": 0.7452719253142043,
|
|
"learning_rate": 3.0791366906474824e-05,
|
|
"loss": 0.4887,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4398002624511719,
|
|
"step": 215,
|
|
"valid_targets_mean": 2894.6,
|
|
"valid_targets_min": 1774
|
|
},
|
|
{
|
|
"epoch": 0.5555555555555556,
|
|
"grad_norm": 0.793089120317565,
|
|
"learning_rate": 3.1510791366906475e-05,
|
|
"loss": 0.4446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3552176058292389,
|
|
"step": 220,
|
|
"valid_targets_mean": 2370.2,
|
|
"valid_targets_min": 988
|
|
},
|
|
{
|
|
"epoch": 0.5681818181818182,
|
|
"grad_norm": 0.6957948039355799,
|
|
"learning_rate": 3.223021582733813e-05,
|
|
"loss": 0.4677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4744777977466583,
|
|
"step": 225,
|
|
"valid_targets_mean": 3270.9,
|
|
"valid_targets_min": 908
|
|
},
|
|
{
|
|
"epoch": 0.5808080808080808,
|
|
"grad_norm": 0.9598066573051235,
|
|
"learning_rate": 3.2949640287769784e-05,
|
|
"loss": 0.4678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.53702712059021,
|
|
"step": 230,
|
|
"valid_targets_mean": 3305.6,
|
|
"valid_targets_min": 1465
|
|
},
|
|
{
|
|
"epoch": 0.5934343434343434,
|
|
"grad_norm": 0.7385990473183633,
|
|
"learning_rate": 3.366906474820144e-05,
|
|
"loss": 0.4269,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4913618564605713,
|
|
"step": 235,
|
|
"valid_targets_mean": 3022.6,
|
|
"valid_targets_min": 1359
|
|
},
|
|
{
|
|
"epoch": 0.6060606060606061,
|
|
"grad_norm": 0.8954334270151011,
|
|
"learning_rate": 3.438848920863309e-05,
|
|
"loss": 0.4773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4261593222618103,
|
|
"step": 240,
|
|
"valid_targets_mean": 3027.9,
|
|
"valid_targets_min": 780
|
|
},
|
|
{
|
|
"epoch": 0.6186868686868687,
|
|
"grad_norm": 0.7460534615902407,
|
|
"learning_rate": 3.510791366906475e-05,
|
|
"loss": 0.4695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5014075040817261,
|
|
"step": 245,
|
|
"valid_targets_mean": 3182.8,
|
|
"valid_targets_min": 1704
|
|
},
|
|
{
|
|
"epoch": 0.6313131313131313,
|
|
"grad_norm": 0.7476045212031468,
|
|
"learning_rate": 3.582733812949641e-05,
|
|
"loss": 0.4897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5569243431091309,
|
|
"step": 250,
|
|
"valid_targets_mean": 4160.6,
|
|
"valid_targets_min": 1184
|
|
},
|
|
{
|
|
"epoch": 0.6439393939393939,
|
|
"grad_norm": 0.7290825841839375,
|
|
"learning_rate": 3.654676258992806e-05,
|
|
"loss": 0.5141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5026414394378662,
|
|
"step": 255,
|
|
"valid_targets_mean": 3161.4,
|
|
"valid_targets_min": 710
|
|
},
|
|
{
|
|
"epoch": 0.6565656565656566,
|
|
"grad_norm": 0.7490321855827192,
|
|
"learning_rate": 3.726618705035972e-05,
|
|
"loss": 0.5036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46991053223609924,
|
|
"step": 260,
|
|
"valid_targets_mean": 2653.6,
|
|
"valid_targets_min": 647
|
|
},
|
|
{
|
|
"epoch": 0.6691919191919192,
|
|
"grad_norm": 0.739603256968017,
|
|
"learning_rate": 3.798561151079137e-05,
|
|
"loss": 0.4898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4988292157649994,
|
|
"step": 265,
|
|
"valid_targets_mean": 3293.9,
|
|
"valid_targets_min": 1137
|
|
},
|
|
{
|
|
"epoch": 0.6818181818181818,
|
|
"grad_norm": 0.8025519110811695,
|
|
"learning_rate": 3.8705035971223026e-05,
|
|
"loss": 0.5011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5377944707870483,
|
|
"step": 270,
|
|
"valid_targets_mean": 3180.2,
|
|
"valid_targets_min": 1127
|
|
},
|
|
{
|
|
"epoch": 0.6944444444444444,
|
|
"grad_norm": 0.7900087471325877,
|
|
"learning_rate": 3.942446043165468e-05,
|
|
"loss": 0.4673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5039912462234497,
|
|
"step": 275,
|
|
"valid_targets_mean": 2830.6,
|
|
"valid_targets_min": 1799
|
|
},
|
|
{
|
|
"epoch": 0.7070707070707071,
|
|
"grad_norm": 0.7659391333417415,
|
|
"learning_rate": 3.999998413256274e-05,
|
|
"loss": 0.5003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4912080764770508,
|
|
"step": 280,
|
|
"valid_targets_mean": 3327.0,
|
|
"valid_targets_min": 790
|
|
},
|
|
{
|
|
"epoch": 0.7196969696969697,
|
|
"grad_norm": 0.694845702448467,
|
|
"learning_rate": 3.9999428774902425e-05,
|
|
"loss": 0.4588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5030688643455505,
|
|
"step": 285,
|
|
"valid_targets_mean": 3079.2,
|
|
"valid_targets_min": 1122
|
|
},
|
|
{
|
|
"epoch": 0.7323232323232324,
|
|
"grad_norm": 0.7144245421458943,
|
|
"learning_rate": 3.999808007055667e-05,
|
|
"loss": 0.4796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5815045237541199,
|
|
"step": 290,
|
|
"valid_targets_mean": 4749.9,
|
|
"valid_targets_min": 880
|
|
},
|
|
{
|
|
"epoch": 0.7449494949494949,
|
|
"grad_norm": 0.8137606805905706,
|
|
"learning_rate": 3.999593807302654e-05,
|
|
"loss": 0.4778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3889826536178589,
|
|
"step": 295,
|
|
"valid_targets_mean": 2409.6,
|
|
"valid_targets_min": 1216
|
|
},
|
|
{
|
|
"epoch": 0.7575757575757576,
|
|
"grad_norm": 0.7671755034689156,
|
|
"learning_rate": 3.999300286728176e-05,
|
|
"loss": 0.4424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40401798486709595,
|
|
"step": 300,
|
|
"valid_targets_mean": 2374.2,
|
|
"valid_targets_min": 412
|
|
},
|
|
{
|
|
"epoch": 0.7702020202020202,
|
|
"grad_norm": 0.775435011663138,
|
|
"learning_rate": 3.9989274569757467e-05,
|
|
"loss": 0.4529,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5113080143928528,
|
|
"step": 305,
|
|
"valid_targets_mean": 3065.1,
|
|
"valid_targets_min": 872
|
|
},
|
|
{
|
|
"epoch": 0.7828282828282829,
|
|
"grad_norm": 0.7829159491303057,
|
|
"learning_rate": 3.99847533283495e-05,
|
|
"loss": 0.4483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4134717285633087,
|
|
"step": 310,
|
|
"valid_targets_mean": 2551.4,
|
|
"valid_targets_min": 511
|
|
},
|
|
{
|
|
"epoch": 0.7954545454545454,
|
|
"grad_norm": 0.70594805762661,
|
|
"learning_rate": 3.9979439322408575e-05,
|
|
"loss": 0.4722,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5452604293823242,
|
|
"step": 315,
|
|
"valid_targets_mean": 4207.9,
|
|
"valid_targets_min": 782
|
|
},
|
|
{
|
|
"epoch": 0.8080808080808081,
|
|
"grad_norm": 0.741059134063674,
|
|
"learning_rate": 3.9973332762733165e-05,
|
|
"loss": 0.4703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.523596465587616,
|
|
"step": 320,
|
|
"valid_targets_mean": 3050.4,
|
|
"valid_targets_min": 1682
|
|
},
|
|
{
|
|
"epoch": 0.8207070707070707,
|
|
"grad_norm": 0.646169301673137,
|
|
"learning_rate": 3.996643389156114e-05,
|
|
"loss": 0.4531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4629998803138733,
|
|
"step": 325,
|
|
"valid_targets_mean": 3761.1,
|
|
"valid_targets_min": 1341
|
|
},
|
|
{
|
|
"epoch": 0.8333333333333334,
|
|
"grad_norm": 0.7311851901523451,
|
|
"learning_rate": 3.995874298256014e-05,
|
|
"loss": 0.4769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49332910776138306,
|
|
"step": 330,
|
|
"valid_targets_mean": 3003.4,
|
|
"valid_targets_min": 1840
|
|
},
|
|
{
|
|
"epoch": 0.8459595959595959,
|
|
"grad_norm": 0.684697792793549,
|
|
"learning_rate": 3.9950260340816723e-05,
|
|
"loss": 0.4772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.476440966129303,
|
|
"step": 335,
|
|
"valid_targets_mean": 3521.4,
|
|
"valid_targets_min": 1763
|
|
},
|
|
{
|
|
"epoch": 0.8585858585858586,
|
|
"grad_norm": 0.7235672828868595,
|
|
"learning_rate": 3.9940986302824305e-05,
|
|
"loss": 0.4573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4808931350708008,
|
|
"step": 340,
|
|
"valid_targets_mean": 3157.8,
|
|
"valid_targets_min": 1116
|
|
},
|
|
{
|
|
"epoch": 0.8712121212121212,
|
|
"grad_norm": 0.618583293178677,
|
|
"learning_rate": 3.993092123646975e-05,
|
|
"loss": 0.4419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4222564101219177,
|
|
"step": 345,
|
|
"valid_targets_mean": 3462.4,
|
|
"valid_targets_min": 1697
|
|
},
|
|
{
|
|
"epoch": 0.8838383838383839,
|
|
"grad_norm": 0.9731914681849075,
|
|
"learning_rate": 3.992006554101881e-05,
|
|
"loss": 0.4431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4798777997493744,
|
|
"step": 350,
|
|
"valid_targets_mean": 2746.6,
|
|
"valid_targets_min": 674
|
|
},
|
|
{
|
|
"epoch": 0.8964646464646465,
|
|
"grad_norm": 0.6711603679996875,
|
|
"learning_rate": 3.99084196471003e-05,
|
|
"loss": 0.4315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39052852988243103,
|
|
"step": 355,
|
|
"valid_targets_mean": 2697.2,
|
|
"valid_targets_min": 467
|
|
},
|
|
{
|
|
"epoch": 0.9090909090909091,
|
|
"grad_norm": 0.6756824530106742,
|
|
"learning_rate": 3.9895984016688956e-05,
|
|
"loss": 0.4208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4884558618068695,
|
|
"step": 360,
|
|
"valid_targets_mean": 3316.8,
|
|
"valid_targets_min": 1433
|
|
},
|
|
{
|
|
"epoch": 0.9217171717171717,
|
|
"grad_norm": 0.6730754745335135,
|
|
"learning_rate": 3.9882759143087194e-05,
|
|
"loss": 0.4558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4529365003108978,
|
|
"step": 365,
|
|
"valid_targets_mean": 2792.6,
|
|
"valid_targets_min": 1934
|
|
},
|
|
{
|
|
"epoch": 0.9343434343434344,
|
|
"grad_norm": 0.7396239570533569,
|
|
"learning_rate": 3.9868745550905475e-05,
|
|
"loss": 0.4806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48611992597579956,
|
|
"step": 370,
|
|
"valid_targets_mean": 3224.1,
|
|
"valid_targets_min": 1756
|
|
},
|
|
{
|
|
"epoch": 0.946969696969697,
|
|
"grad_norm": 0.5859949295622379,
|
|
"learning_rate": 3.985394379604152e-05,
|
|
"loss": 0.4543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4721411466598511,
|
|
"step": 375,
|
|
"valid_targets_mean": 4448.3,
|
|
"valid_targets_min": 1472
|
|
},
|
|
{
|
|
"epoch": 0.9595959595959596,
|
|
"grad_norm": 0.6661047355223618,
|
|
"learning_rate": 3.983835446565826e-05,
|
|
"loss": 0.4424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49498146772384644,
|
|
"step": 380,
|
|
"valid_targets_mean": 3239.3,
|
|
"valid_targets_min": 479
|
|
},
|
|
{
|
|
"epoch": 0.9722222222222222,
|
|
"grad_norm": 0.5805898239548394,
|
|
"learning_rate": 3.982197817816054e-05,
|
|
"loss": 0.4422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38239431381225586,
|
|
"step": 385,
|
|
"valid_targets_mean": 3780.7,
|
|
"valid_targets_min": 779
|
|
},
|
|
{
|
|
"epoch": 0.9848484848484849,
|
|
"grad_norm": 0.7798921127063227,
|
|
"learning_rate": 3.980481558317057e-05,
|
|
"loss": 0.4662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5424267053604126,
|
|
"step": 390,
|
|
"valid_targets_mean": 3867.1,
|
|
"valid_targets_min": 632
|
|
},
|
|
{
|
|
"epoch": 0.9974747474747475,
|
|
"grad_norm": 0.6339986265381534,
|
|
"learning_rate": 3.978686736150221e-05,
|
|
"loss": 0.4535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47920912504196167,
|
|
"step": 395,
|
|
"valid_targets_mean": 3606.8,
|
|
"valid_targets_min": 1028
|
|
},
|
|
{
|
|
"epoch": 1.0101010101010102,
|
|
"grad_norm": 0.6706130776528586,
|
|
"learning_rate": 3.976813422513388e-05,
|
|
"loss": 0.437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4376629590988159,
|
|
"step": 400,
|
|
"valid_targets_mean": 3430.6,
|
|
"valid_targets_min": 1860
|
|
},
|
|
{
|
|
"epoch": 1.0227272727272727,
|
|
"grad_norm": 0.6708127261682855,
|
|
"learning_rate": 3.9748616917180406e-05,
|
|
"loss": 0.3809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3455820381641388,
|
|
"step": 405,
|
|
"valid_targets_mean": 2637.0,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 1.0353535353535352,
|
|
"grad_norm": 0.7742495725599213,
|
|
"learning_rate": 3.972831621186345e-05,
|
|
"loss": 0.417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3599938154220581,
|
|
"step": 410,
|
|
"valid_targets_mean": 2649.8,
|
|
"valid_targets_min": 1327
|
|
},
|
|
{
|
|
"epoch": 1.047979797979798,
|
|
"grad_norm": 0.6102251044562367,
|
|
"learning_rate": 3.970723291448092e-05,
|
|
"loss": 0.4354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.445651650428772,
|
|
"step": 415,
|
|
"valid_targets_mean": 4083.1,
|
|
"valid_targets_min": 1361
|
|
},
|
|
{
|
|
"epoch": 1.0606060606060606,
|
|
"grad_norm": 0.6976342123087943,
|
|
"learning_rate": 3.968536786137487e-05,
|
|
"loss": 0.4382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40597984194755554,
|
|
"step": 420,
|
|
"valid_targets_mean": 2881.2,
|
|
"valid_targets_min": 1527
|
|
},
|
|
{
|
|
"epoch": 1.0732323232323233,
|
|
"grad_norm": 0.8976764193372789,
|
|
"learning_rate": 3.966272191989847e-05,
|
|
"loss": 0.4593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5112510323524475,
|
|
"step": 425,
|
|
"valid_targets_mean": 3042.1,
|
|
"valid_targets_min": 506
|
|
},
|
|
{
|
|
"epoch": 1.0858585858585859,
|
|
"grad_norm": 0.7365732301930217,
|
|
"learning_rate": 3.963929598838149e-05,
|
|
"loss": 0.4145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43491220474243164,
|
|
"step": 430,
|
|
"valid_targets_mean": 2794.9,
|
|
"valid_targets_min": 1679
|
|
},
|
|
{
|
|
"epoch": 1.0984848484848484,
|
|
"grad_norm": 0.6773959614029044,
|
|
"learning_rate": 3.9615090996094726e-05,
|
|
"loss": 0.4226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5162553787231445,
|
|
"step": 435,
|
|
"valid_targets_mean": 4462.0,
|
|
"valid_targets_min": 509
|
|
},
|
|
{
|
|
"epoch": 1.1111111111111112,
|
|
"grad_norm": 0.5970043671496138,
|
|
"learning_rate": 3.959010790321313e-05,
|
|
"loss": 0.4334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33793818950653076,
|
|
"step": 440,
|
|
"valid_targets_mean": 3196.2,
|
|
"valid_targets_min": 1361
|
|
},
|
|
{
|
|
"epoch": 1.1237373737373737,
|
|
"grad_norm": 0.7763112041835692,
|
|
"learning_rate": 3.9564347700777705e-05,
|
|
"loss": 0.4279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4716646075248718,
|
|
"step": 445,
|
|
"valid_targets_mean": 2790.6,
|
|
"valid_targets_min": 596
|
|
},
|
|
{
|
|
"epoch": 1.1363636363636362,
|
|
"grad_norm": 0.7191861233677,
|
|
"learning_rate": 3.953781141065619e-05,
|
|
"loss": 0.4571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4991220235824585,
|
|
"step": 450,
|
|
"valid_targets_mean": 2805.2,
|
|
"valid_targets_min": 1500
|
|
},
|
|
{
|
|
"epoch": 1.148989898989899,
|
|
"grad_norm": 0.6931761780564214,
|
|
"learning_rate": 3.9510500085502556e-05,
|
|
"loss": 0.403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3412482738494873,
|
|
"step": 455,
|
|
"valid_targets_mean": 2871.2,
|
|
"valid_targets_min": 1064
|
|
},
|
|
{
|
|
"epoch": 1.1616161616161615,
|
|
"grad_norm": 0.5776459149074374,
|
|
"learning_rate": 3.9482414808715194e-05,
|
|
"loss": 0.4203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4245602786540985,
|
|
"step": 460,
|
|
"valid_targets_mean": 4715.2,
|
|
"valid_targets_min": 808
|
|
},
|
|
{
|
|
"epoch": 1.1742424242424243,
|
|
"grad_norm": 0.6555627012964024,
|
|
"learning_rate": 3.945355669439399e-05,
|
|
"loss": 0.4475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44374722242355347,
|
|
"step": 465,
|
|
"valid_targets_mean": 3508.2,
|
|
"valid_targets_min": 882
|
|
},
|
|
{
|
|
"epoch": 1.1868686868686869,
|
|
"grad_norm": 1.2615252581904304,
|
|
"learning_rate": 3.9423926887296114e-05,
|
|
"loss": 0.4471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4283289909362793,
|
|
"step": 470,
|
|
"valid_targets_mean": 3297.8,
|
|
"valid_targets_min": 1445
|
|
},
|
|
{
|
|
"epoch": 1.1994949494949494,
|
|
"grad_norm": 0.7227367427497255,
|
|
"learning_rate": 3.939352656279059e-05,
|
|
"loss": 0.4105,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4885120689868927,
|
|
"step": 475,
|
|
"valid_targets_mean": 3030.8,
|
|
"valid_targets_min": 1180
|
|
},
|
|
{
|
|
"epoch": 1.2121212121212122,
|
|
"grad_norm": 0.7171565868676774,
|
|
"learning_rate": 3.93623569268117e-05,
|
|
"loss": 0.4334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37661659717559814,
|
|
"step": 480,
|
|
"valid_targets_mean": 2516.6,
|
|
"valid_targets_min": 935
|
|
},
|
|
{
|
|
"epoch": 1.2247474747474747,
|
|
"grad_norm": 0.6709298722578017,
|
|
"learning_rate": 3.933041921581113e-05,
|
|
"loss": 0.4524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4951278269290924,
|
|
"step": 485,
|
|
"valid_targets_mean": 3910.1,
|
|
"valid_targets_min": 1281
|
|
},
|
|
{
|
|
"epoch": 1.2373737373737375,
|
|
"grad_norm": 0.6284344251816701,
|
|
"learning_rate": 3.929771469670892e-05,
|
|
"loss": 0.4364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4901544451713562,
|
|
"step": 490,
|
|
"valid_targets_mean": 5372.7,
|
|
"valid_targets_min": 863
|
|
},
|
|
{
|
|
"epoch": 1.25,
|
|
"grad_norm": 0.695150714261409,
|
|
"learning_rate": 3.926424466684321e-05,
|
|
"loss": 0.4036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3937031328678131,
|
|
"step": 495,
|
|
"valid_targets_mean": 2620.4,
|
|
"valid_targets_min": 1061
|
|
},
|
|
{
|
|
"epoch": 1.2626262626262625,
|
|
"grad_norm": 0.5659681496879514,
|
|
"learning_rate": 3.92300104539188e-05,
|
|
"loss": 0.4347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43232929706573486,
|
|
"step": 500,
|
|
"valid_targets_mean": 4517.9,
|
|
"valid_targets_min": 1356
|
|
},
|
|
{
|
|
"epoch": 1.2752525252525253,
|
|
"grad_norm": 0.6499437965902374,
|
|
"learning_rate": 3.919501341595442e-05,
|
|
"loss": 0.4463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4581197500228882,
|
|
"step": 505,
|
|
"valid_targets_mean": 3634.6,
|
|
"valid_targets_min": 1176
|
|
},
|
|
{
|
|
"epoch": 1.2878787878787878,
|
|
"grad_norm": 0.5715005441806082,
|
|
"learning_rate": 3.9159254941228944e-05,
|
|
"loss": 0.4015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3313661217689514,
|
|
"step": 510,
|
|
"valid_targets_mean": 3508.3,
|
|
"valid_targets_min": 1355
|
|
},
|
|
{
|
|
"epoch": 1.3005050505050506,
|
|
"grad_norm": 0.7308800974579575,
|
|
"learning_rate": 3.912273644822625e-05,
|
|
"loss": 0.4295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4216563105583191,
|
|
"step": 515,
|
|
"valid_targets_mean": 2695.7,
|
|
"valid_targets_min": 1010
|
|
},
|
|
{
|
|
"epoch": 1.3131313131313131,
|
|
"grad_norm": 0.6837093491179472,
|
|
"learning_rate": 3.9085459385578984e-05,
|
|
"loss": 0.3947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4423089027404785,
|
|
"step": 520,
|
|
"valid_targets_mean": 2883.6,
|
|
"valid_targets_min": 710
|
|
},
|
|
{
|
|
"epoch": 1.3257575757575757,
|
|
"grad_norm": 0.6554872754713126,
|
|
"learning_rate": 3.9047425232011076e-05,
|
|
"loss": 0.3826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3657465875148773,
|
|
"step": 525,
|
|
"valid_targets_mean": 3170.8,
|
|
"valid_targets_min": 1136
|
|
},
|
|
{
|
|
"epoch": 1.3383838383838385,
|
|
"grad_norm": 0.6685453762625018,
|
|
"learning_rate": 3.900863549627911e-05,
|
|
"loss": 0.428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4607548713684082,
|
|
"step": 530,
|
|
"valid_targets_mean": 3880.3,
|
|
"valid_targets_min": 1232
|
|
},
|
|
{
|
|
"epoch": 1.351010101010101,
|
|
"grad_norm": 0.6443269502166363,
|
|
"learning_rate": 3.896909171711245e-05,
|
|
"loss": 0.4348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4501422941684723,
|
|
"step": 535,
|
|
"valid_targets_mean": 3417.6,
|
|
"valid_targets_min": 1573
|
|
},
|
|
{
|
|
"epoch": 1.3636363636363638,
|
|
"grad_norm": 0.6126406593256342,
|
|
"learning_rate": 3.8928795463152186e-05,
|
|
"loss": 0.3799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3543352782726288,
|
|
"step": 540,
|
|
"valid_targets_mean": 3163.1,
|
|
"valid_targets_min": 1501
|
|
},
|
|
{
|
|
"epoch": 1.3762626262626263,
|
|
"grad_norm": 0.6881727131350188,
|
|
"learning_rate": 3.888774833288898e-05,
|
|
"loss": 0.4257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4091816842556,
|
|
"step": 545,
|
|
"valid_targets_mean": 3140.7,
|
|
"valid_targets_min": 1252
|
|
},
|
|
{
|
|
"epoch": 1.3888888888888888,
|
|
"grad_norm": 0.6580723165353671,
|
|
"learning_rate": 3.8845951954599545e-05,
|
|
"loss": 0.3958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3906816244125366,
|
|
"step": 550,
|
|
"valid_targets_mean": 2895.7,
|
|
"valid_targets_min": 1240
|
|
},
|
|
{
|
|
"epoch": 1.4015151515151514,
|
|
"grad_norm": 0.6770136577009271,
|
|
"learning_rate": 3.8803407986282156e-05,
|
|
"loss": 0.4221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35806700587272644,
|
|
"step": 555,
|
|
"valid_targets_mean": 2287.6,
|
|
"valid_targets_min": 594
|
|
},
|
|
{
|
|
"epoch": 1.4141414141414141,
|
|
"grad_norm": 0.6405715120436977,
|
|
"learning_rate": 3.876011811559084e-05,
|
|
"loss": 0.4279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4252093434333801,
|
|
"step": 560,
|
|
"valid_targets_mean": 3118.0,
|
|
"valid_targets_min": 707
|
|
},
|
|
{
|
|
"epoch": 1.4267676767676767,
|
|
"grad_norm": 0.6944966556141179,
|
|
"learning_rate": 3.871608405976838e-05,
|
|
"loss": 0.4115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42029836773872375,
|
|
"step": 565,
|
|
"valid_targets_mean": 2919.8,
|
|
"valid_targets_min": 1070
|
|
},
|
|
{
|
|
"epoch": 1.4393939393939394,
|
|
"grad_norm": 0.6466971885955511,
|
|
"learning_rate": 3.867130756557832e-05,
|
|
"loss": 0.4312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3371127247810364,
|
|
"step": 570,
|
|
"valid_targets_mean": 2752.6,
|
|
"valid_targets_min": 750
|
|
},
|
|
{
|
|
"epoch": 1.452020202020202,
|
|
"grad_norm": 0.6100566892783015,
|
|
"learning_rate": 3.862579040923552e-05,
|
|
"loss": 0.3838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3892483115196228,
|
|
"step": 575,
|
|
"valid_targets_mean": 3388.8,
|
|
"valid_targets_min": 968
|
|
},
|
|
{
|
|
"epoch": 1.4646464646464645,
|
|
"grad_norm": 0.6600534442172271,
|
|
"learning_rate": 3.8579534396335835e-05,
|
|
"loss": 0.412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46965640783309937,
|
|
"step": 580,
|
|
"valid_targets_mean": 3274.4,
|
|
"valid_targets_min": 685
|
|
},
|
|
{
|
|
"epoch": 1.4772727272727273,
|
|
"grad_norm": 0.6946470349793337,
|
|
"learning_rate": 3.853254136178437e-05,
|
|
"loss": 0.4195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4128877818584442,
|
|
"step": 585,
|
|
"valid_targets_mean": 2874.7,
|
|
"valid_targets_min": 1690
|
|
},
|
|
{
|
|
"epoch": 1.4898989898989898,
|
|
"grad_norm": 0.658007836439284,
|
|
"learning_rate": 3.8484813169722794e-05,
|
|
"loss": 0.4202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4265574812889099,
|
|
"step": 590,
|
|
"valid_targets_mean": 2866.8,
|
|
"valid_targets_min": 1791
|
|
},
|
|
{
|
|
"epoch": 1.5025252525252526,
|
|
"grad_norm": 0.7613954671864829,
|
|
"learning_rate": 3.843635171345532e-05,
|
|
"loss": 0.4369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44407981634140015,
|
|
"step": 595,
|
|
"valid_targets_mean": 3021.1,
|
|
"valid_targets_min": 638
|
|
},
|
|
{
|
|
"epoch": 1.5151515151515151,
|
|
"grad_norm": 0.7625903826308273,
|
|
"learning_rate": 3.838715891537365e-05,
|
|
"loss": 0.3805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34579938650131226,
|
|
"step": 600,
|
|
"valid_targets_mean": 2343.3,
|
|
"valid_targets_min": 844
|
|
},
|
|
{
|
|
"epoch": 1.5277777777777777,
|
|
"grad_norm": 0.6482084611263487,
|
|
"learning_rate": 3.8337236726880674e-05,
|
|
"loss": 0.441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41672322154045105,
|
|
"step": 605,
|
|
"valid_targets_mean": 2920.0,
|
|
"valid_targets_min": 1776
|
|
},
|
|
{
|
|
"epoch": 1.5404040404040404,
|
|
"grad_norm": 0.6065329237085881,
|
|
"learning_rate": 3.828658712831311e-05,
|
|
"loss": 0.398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3971773386001587,
|
|
"step": 610,
|
|
"valid_targets_mean": 3118.1,
|
|
"valid_targets_min": 1473
|
|
},
|
|
{
|
|
"epoch": 1.553030303030303,
|
|
"grad_norm": 0.6544115401255524,
|
|
"learning_rate": 3.823521212886287e-05,
|
|
"loss": 0.441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45314595103263855,
|
|
"step": 615,
|
|
"valid_targets_mean": 3466.2,
|
|
"valid_targets_min": 377
|
|
},
|
|
{
|
|
"epoch": 1.5656565656565657,
|
|
"grad_norm": 0.7218840612228915,
|
|
"learning_rate": 3.818311376649746e-05,
|
|
"loss": 0.3962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40665388107299805,
|
|
"step": 620,
|
|
"valid_targets_mean": 3132.6,
|
|
"valid_targets_min": 1208
|
|
},
|
|
{
|
|
"epoch": 1.5782828282828283,
|
|
"grad_norm": 0.6457705553977882,
|
|
"learning_rate": 3.813029410787906e-05,
|
|
"loss": 0.4014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4007648229598999,
|
|
"step": 625,
|
|
"valid_targets_mean": 3385.1,
|
|
"valid_targets_min": 1276
|
|
},
|
|
{
|
|
"epoch": 1.5909090909090908,
|
|
"grad_norm": 0.6340707674027326,
|
|
"learning_rate": 3.807675524828255e-05,
|
|
"loss": 0.3921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3663696050643921,
|
|
"step": 630,
|
|
"valid_targets_mean": 2994.5,
|
|
"valid_targets_min": 1839
|
|
},
|
|
{
|
|
"epoch": 1.6035353535353534,
|
|
"grad_norm": 0.6510799288150312,
|
|
"learning_rate": 3.802249931151245e-05,
|
|
"loss": 0.4083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4528045356273651,
|
|
"step": 635,
|
|
"valid_targets_mean": 3057.3,
|
|
"valid_targets_min": 724
|
|
},
|
|
{
|
|
"epoch": 1.6161616161616161,
|
|
"grad_norm": 0.7096957049322412,
|
|
"learning_rate": 3.7967528449818604e-05,
|
|
"loss": 0.4129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3862096667289734,
|
|
"step": 640,
|
|
"valid_targets_mean": 2390.7,
|
|
"valid_targets_min": 556
|
|
},
|
|
{
|
|
"epoch": 1.628787878787879,
|
|
"grad_norm": 0.7345826232266963,
|
|
"learning_rate": 3.791184484381083e-05,
|
|
"loss": 0.433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3896709382534027,
|
|
"step": 645,
|
|
"valid_targets_mean": 2601.5,
|
|
"valid_targets_min": 782
|
|
},
|
|
{
|
|
"epoch": 1.6414141414141414,
|
|
"grad_norm": 0.6381339146897167,
|
|
"learning_rate": 3.7855450702372446e-05,
|
|
"loss": 0.3921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42096877098083496,
|
|
"step": 650,
|
|
"valid_targets_mean": 2983.1,
|
|
"valid_targets_min": 1513
|
|
},
|
|
{
|
|
"epoch": 1.654040404040404,
|
|
"grad_norm": 0.7088141347121194,
|
|
"learning_rate": 3.7798348262572595e-05,
|
|
"loss": 0.4184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38489383459091187,
|
|
"step": 655,
|
|
"valid_targets_mean": 2589.2,
|
|
"valid_targets_min": 1265
|
|
},
|
|
{
|
|
"epoch": 1.6666666666666665,
|
|
"grad_norm": 0.6158104461556841,
|
|
"learning_rate": 3.774053978957754e-05,
|
|
"loss": 0.422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33309605717658997,
|
|
"step": 660,
|
|
"valid_targets_mean": 2628.7,
|
|
"valid_targets_min": 858
|
|
},
|
|
{
|
|
"epoch": 1.6792929292929293,
|
|
"grad_norm": 0.6621621785950462,
|
|
"learning_rate": 3.76820275765608e-05,
|
|
"loss": 0.4482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3812219202518463,
|
|
"step": 665,
|
|
"valid_targets_mean": 2696.2,
|
|
"valid_targets_min": 1315
|
|
},
|
|
{
|
|
"epoch": 1.691919191919192,
|
|
"grad_norm": 0.5915638746228544,
|
|
"learning_rate": 3.7622813944612184e-05,
|
|
"loss": 0.4268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45549070835113525,
|
|
"step": 670,
|
|
"valid_targets_mean": 4037.2,
|
|
"valid_targets_min": 1593
|
|
},
|
|
{
|
|
"epoch": 1.7045454545454546,
|
|
"grad_norm": 0.6214323792581383,
|
|
"learning_rate": 3.75629012426457e-05,
|
|
"loss": 0.4049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30740225315093994,
|
|
"step": 675,
|
|
"valid_targets_mean": 2484.4,
|
|
"valid_targets_min": 991
|
|
},
|
|
{
|
|
"epoch": 1.7171717171717171,
|
|
"grad_norm": 0.6181383119204046,
|
|
"learning_rate": 3.750229184730643e-05,
|
|
"loss": 0.4034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41900163888931274,
|
|
"step": 680,
|
|
"valid_targets_mean": 3214.8,
|
|
"valid_targets_min": 1219
|
|
},
|
|
{
|
|
"epoch": 1.7297979797979797,
|
|
"grad_norm": 0.7022543694921178,
|
|
"learning_rate": 3.744098816287616e-05,
|
|
"loss": 0.3934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3738209009170532,
|
|
"step": 685,
|
|
"valid_targets_mean": 2518.6,
|
|
"valid_targets_min": 1060
|
|
},
|
|
{
|
|
"epoch": 1.7424242424242424,
|
|
"grad_norm": 0.6444777112364757,
|
|
"learning_rate": 3.73789926211781e-05,
|
|
"loss": 0.4013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40667980909347534,
|
|
"step": 690,
|
|
"valid_targets_mean": 2943.8,
|
|
"valid_targets_min": 586
|
|
},
|
|
{
|
|
"epoch": 1.7550505050505052,
|
|
"grad_norm": 0.6456649483546918,
|
|
"learning_rate": 3.7316307681480364e-05,
|
|
"loss": 0.4464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5095163583755493,
|
|
"step": 695,
|
|
"valid_targets_mean": 3686.2,
|
|
"valid_targets_min": 1323
|
|
},
|
|
{
|
|
"epoch": 1.7676767676767677,
|
|
"grad_norm": 0.6312723135470779,
|
|
"learning_rate": 3.725293583039843e-05,
|
|
"loss": 0.4029,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3778528869152069,
|
|
"step": 700,
|
|
"valid_targets_mean": 3107.6,
|
|
"valid_targets_min": 1432
|
|
},
|
|
{
|
|
"epoch": 1.7803030303030303,
|
|
"grad_norm": 0.6535661127784683,
|
|
"learning_rate": 3.71888795817965e-05,
|
|
"loss": 0.4127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47286659479141235,
|
|
"step": 705,
|
|
"valid_targets_mean": 4064.8,
|
|
"valid_targets_min": 1051
|
|
},
|
|
{
|
|
"epoch": 1.7929292929292928,
|
|
"grad_norm": 0.6896428501647339,
|
|
"learning_rate": 3.712414147668777e-05,
|
|
"loss": 0.4215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4112127423286438,
|
|
"step": 710,
|
|
"valid_targets_mean": 2654.4,
|
|
"valid_targets_min": 1151
|
|
},
|
|
{
|
|
"epoch": 1.8055555555555556,
|
|
"grad_norm": 0.5820441201531631,
|
|
"learning_rate": 3.7058724083133645e-05,
|
|
"loss": 0.4077,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34681016206741333,
|
|
"step": 715,
|
|
"valid_targets_mean": 2884.4,
|
|
"valid_targets_min": 1351
|
|
},
|
|
{
|
|
"epoch": 1.8181818181818183,
|
|
"grad_norm": 0.6800591266970959,
|
|
"learning_rate": 3.6992629996141864e-05,
|
|
"loss": 0.4424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4916991591453552,
|
|
"step": 720,
|
|
"valid_targets_mean": 3417.7,
|
|
"valid_targets_min": 1329
|
|
},
|
|
{
|
|
"epoch": 1.8308080808080809,
|
|
"grad_norm": 0.6563197557373552,
|
|
"learning_rate": 3.692586183756354e-05,
|
|
"loss": 0.3807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3805094361305237,
|
|
"step": 725,
|
|
"valid_targets_mean": 2503.5,
|
|
"valid_targets_min": 1064
|
|
},
|
|
{
|
|
"epoch": 1.8434343434343434,
|
|
"grad_norm": 0.7047851378134344,
|
|
"learning_rate": 3.685842225598919e-05,
|
|
"loss": 0.4278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4543723464012146,
|
|
"step": 730,
|
|
"valid_targets_mean": 2930.4,
|
|
"valid_targets_min": 1798
|
|
},
|
|
{
|
|
"epoch": 1.856060606060606,
|
|
"grad_norm": 0.6499844550915188,
|
|
"learning_rate": 3.6790313926643664e-05,
|
|
"loss": 0.4119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3960087299346924,
|
|
"step": 735,
|
|
"valid_targets_mean": 2956.1,
|
|
"valid_targets_min": 1401
|
|
},
|
|
{
|
|
"epoch": 1.8686868686868687,
|
|
"grad_norm": 0.649385668021803,
|
|
"learning_rate": 3.6721539551279966e-05,
|
|
"loss": 0.4369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4332810640335083,
|
|
"step": 740,
|
|
"valid_targets_mean": 3076.4,
|
|
"valid_targets_min": 998
|
|
},
|
|
{
|
|
"epoch": 1.8813131313131313,
|
|
"grad_norm": 0.7091254200389234,
|
|
"learning_rate": 3.665210185807217e-05,
|
|
"loss": 0.4388,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44163447618484497,
|
|
"step": 745,
|
|
"valid_targets_mean": 3328.9,
|
|
"valid_targets_min": 1660
|
|
},
|
|
{
|
|
"epoch": 1.893939393939394,
|
|
"grad_norm": 0.5985641510783979,
|
|
"learning_rate": 3.658200360150713e-05,
|
|
"loss": 0.4091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37661778926849365,
|
|
"step": 750,
|
|
"valid_targets_mean": 3146.1,
|
|
"valid_targets_min": 1067
|
|
},
|
|
{
|
|
"epoch": 1.9065656565656566,
|
|
"grad_norm": 0.6268419140387759,
|
|
"learning_rate": 3.651124756227525e-05,
|
|
"loss": 0.401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4080426096916199,
|
|
"step": 755,
|
|
"valid_targets_mean": 3422.2,
|
|
"valid_targets_min": 1798
|
|
},
|
|
{
|
|
"epoch": 1.9191919191919191,
|
|
"grad_norm": 0.7426620476089505,
|
|
"learning_rate": 3.6439836547160144e-05,
|
|
"loss": 0.4044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38891446590423584,
|
|
"step": 760,
|
|
"valid_targets_mean": 3295.2,
|
|
"valid_targets_min": 1546
|
|
},
|
|
{
|
|
"epoch": 1.9318181818181817,
|
|
"grad_norm": 0.6671491506286603,
|
|
"learning_rate": 3.636777338892732e-05,
|
|
"loss": 0.4466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44117462635040283,
|
|
"step": 765,
|
|
"valid_targets_mean": 2958.4,
|
|
"valid_targets_min": 1837
|
|
},
|
|
{
|
|
"epoch": 1.9444444444444444,
|
|
"grad_norm": 0.6342292838426378,
|
|
"learning_rate": 3.629506094621183e-05,
|
|
"loss": 0.3886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3897709250450134,
|
|
"step": 770,
|
|
"valid_targets_mean": 2692.6,
|
|
"valid_targets_min": 1547
|
|
},
|
|
{
|
|
"epoch": 1.9570707070707072,
|
|
"grad_norm": 0.621675970891514,
|
|
"learning_rate": 3.622170210340482e-05,
|
|
"loss": 0.4183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38866496086120605,
|
|
"step": 775,
|
|
"valid_targets_mean": 2999.8,
|
|
"valid_targets_min": 1116
|
|
},
|
|
{
|
|
"epoch": 1.9696969696969697,
|
|
"grad_norm": 0.5905010144602377,
|
|
"learning_rate": 3.614769977053914e-05,
|
|
"loss": 0.4291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3574790060520172,
|
|
"step": 780,
|
|
"valid_targets_mean": 3125.0,
|
|
"valid_targets_min": 963
|
|
},
|
|
{
|
|
"epoch": 1.9823232323232323,
|
|
"grad_norm": 0.5836024549642513,
|
|
"learning_rate": 3.607305688317391e-05,
|
|
"loss": 0.4046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4132801294326782,
|
|
"step": 785,
|
|
"valid_targets_mean": 3194.9,
|
|
"valid_targets_min": 1242
|
|
},
|
|
{
|
|
"epoch": 1.9949494949494948,
|
|
"grad_norm": 0.7228511270619273,
|
|
"learning_rate": 3.5997776402278055e-05,
|
|
"loss": 0.3925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42261964082717896,
|
|
"step": 790,
|
|
"valid_targets_mean": 3067.1,
|
|
"valid_targets_min": 1603
|
|
},
|
|
{
|
|
"epoch": 2.007575757575758,
|
|
"grad_norm": 0.6700499879024988,
|
|
"learning_rate": 3.592186131411288e-05,
|
|
"loss": 0.376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3675363063812256,
|
|
"step": 795,
|
|
"valid_targets_mean": 2926.6,
|
|
"valid_targets_min": 1575
|
|
},
|
|
{
|
|
"epoch": 2.0202020202020203,
|
|
"grad_norm": 0.6733433964569425,
|
|
"learning_rate": 3.584531463011356e-05,
|
|
"loss": 0.3648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3609054982662201,
|
|
"step": 800,
|
|
"valid_targets_mean": 3172.1,
|
|
"valid_targets_min": 599
|
|
},
|
|
{
|
|
"epoch": 2.032828282828283,
|
|
"grad_norm": 0.6042940030317498,
|
|
"learning_rate": 3.576813938676973e-05,
|
|
"loss": 0.3973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3894050121307373,
|
|
"step": 805,
|
|
"valid_targets_mean": 3448.1,
|
|
"valid_targets_min": 923
|
|
},
|
|
{
|
|
"epoch": 2.0454545454545454,
|
|
"grad_norm": 0.6749197830689422,
|
|
"learning_rate": 3.569033864550501e-05,
|
|
"loss": 0.38,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3814485967159271,
|
|
"step": 810,
|
|
"valid_targets_mean": 2783.8,
|
|
"valid_targets_min": 880
|
|
},
|
|
{
|
|
"epoch": 2.058080808080808,
|
|
"grad_norm": 0.6483067354879916,
|
|
"learning_rate": 3.561191549255555e-05,
|
|
"loss": 0.3565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3972342312335968,
|
|
"step": 815,
|
|
"valid_targets_mean": 3241.1,
|
|
"valid_targets_min": 1649
|
|
},
|
|
{
|
|
"epoch": 2.0707070707070705,
|
|
"grad_norm": 0.6353320580780115,
|
|
"learning_rate": 3.553287303884762e-05,
|
|
"loss": 0.3695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3591780662536621,
|
|
"step": 820,
|
|
"valid_targets_mean": 2904.1,
|
|
"valid_targets_min": 1116
|
|
},
|
|
{
|
|
"epoch": 2.0833333333333335,
|
|
"grad_norm": 0.6551388984986852,
|
|
"learning_rate": 3.545321441987422e-05,
|
|
"loss": 0.3704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29770687222480774,
|
|
"step": 825,
|
|
"valid_targets_mean": 2442.8,
|
|
"valid_targets_min": 568
|
|
},
|
|
{
|
|
"epoch": 2.095959595959596,
|
|
"grad_norm": 0.6968910493636307,
|
|
"learning_rate": 3.5372942795570644e-05,
|
|
"loss": 0.358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3901800215244293,
|
|
"step": 830,
|
|
"valid_targets_mean": 2854.0,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 2.1085858585858586,
|
|
"grad_norm": 0.568367502641086,
|
|
"learning_rate": 3.529206135018922e-05,
|
|
"loss": 0.3625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4262069761753082,
|
|
"step": 835,
|
|
"valid_targets_mean": 4330.8,
|
|
"valid_targets_min": 2021
|
|
},
|
|
{
|
|
"epoch": 2.121212121212121,
|
|
"grad_norm": 0.6824608784442717,
|
|
"learning_rate": 3.521057329217289e-05,
|
|
"loss": 0.3845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3504842519760132,
|
|
"step": 840,
|
|
"valid_targets_mean": 2486.9,
|
|
"valid_targets_min": 1574
|
|
},
|
|
{
|
|
"epoch": 2.1338383838383836,
|
|
"grad_norm": 0.6810643162239562,
|
|
"learning_rate": 3.512848185402804e-05,
|
|
"loss": 0.3885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42560821771621704,
|
|
"step": 845,
|
|
"valid_targets_mean": 3205.6,
|
|
"valid_targets_min": 1295
|
|
},
|
|
{
|
|
"epoch": 2.1464646464646466,
|
|
"grad_norm": 0.7051006217015395,
|
|
"learning_rate": 3.504579029219618e-05,
|
|
"loss": 0.385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47030025720596313,
|
|
"step": 850,
|
|
"valid_targets_mean": 4252.9,
|
|
"valid_targets_min": 620
|
|
},
|
|
{
|
|
"epoch": 2.159090909090909,
|
|
"grad_norm": 0.6158184161250323,
|
|
"learning_rate": 3.4962501886924817e-05,
|
|
"loss": 0.3643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3438853621482849,
|
|
"step": 855,
|
|
"valid_targets_mean": 3000.1,
|
|
"valid_targets_min": 1183
|
|
},
|
|
{
|
|
"epoch": 2.1717171717171717,
|
|
"grad_norm": 0.9033510849058328,
|
|
"learning_rate": 3.4878619942137346e-05,
|
|
"loss": 0.3566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36623990535736084,
|
|
"step": 860,
|
|
"valid_targets_mean": 3486.9,
|
|
"valid_targets_min": 1208
|
|
},
|
|
{
|
|
"epoch": 2.1843434343434343,
|
|
"grad_norm": 0.6336084994385605,
|
|
"learning_rate": 3.479414778530194e-05,
|
|
"loss": 0.3553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3282182812690735,
|
|
"step": 865,
|
|
"valid_targets_mean": 2926.6,
|
|
"valid_targets_min": 527
|
|
},
|
|
{
|
|
"epoch": 2.196969696969697,
|
|
"grad_norm": 0.642030989385258,
|
|
"learning_rate": 3.470908876729958e-05,
|
|
"loss": 0.3757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38485634326934814,
|
|
"step": 870,
|
|
"valid_targets_mean": 3797.9,
|
|
"valid_targets_min": 1802
|
|
},
|
|
{
|
|
"epoch": 2.20959595959596,
|
|
"grad_norm": 0.6859750653264282,
|
|
"learning_rate": 3.462344626229113e-05,
|
|
"loss": 0.3831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4057656526565552,
|
|
"step": 875,
|
|
"valid_targets_mean": 3077.2,
|
|
"valid_targets_min": 1477
|
|
},
|
|
{
|
|
"epoch": 2.2222222222222223,
|
|
"grad_norm": 0.7051975354184825,
|
|
"learning_rate": 3.4537223667583517e-05,
|
|
"loss": 0.3941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3670094311237335,
|
|
"step": 880,
|
|
"valid_targets_mean": 2480.9,
|
|
"valid_targets_min": 949
|
|
},
|
|
{
|
|
"epoch": 2.234848484848485,
|
|
"grad_norm": 0.6263495715036997,
|
|
"learning_rate": 3.44504244034949e-05,
|
|
"loss": 0.3744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38415664434432983,
|
|
"step": 885,
|
|
"valid_targets_mean": 3738.8,
|
|
"valid_targets_min": 1905
|
|
},
|
|
{
|
|
"epoch": 2.2474747474747474,
|
|
"grad_norm": 0.6736583840874508,
|
|
"learning_rate": 3.4363051913219054e-05,
|
|
"loss": 0.3787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32438012957572937,
|
|
"step": 890,
|
|
"valid_targets_mean": 2544.9,
|
|
"valid_targets_min": 1115
|
|
},
|
|
{
|
|
"epoch": 2.26010101010101,
|
|
"grad_norm": 0.6358475233857454,
|
|
"learning_rate": 3.427510966268874e-05,
|
|
"loss": 0.4206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38413217663764954,
|
|
"step": 895,
|
|
"valid_targets_mean": 3256.4,
|
|
"valid_targets_min": 1028
|
|
},
|
|
{
|
|
"epoch": 2.2727272727272725,
|
|
"grad_norm": 0.5703544012357852,
|
|
"learning_rate": 3.418660114043824e-05,
|
|
"loss": 0.3628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34161847829818726,
|
|
"step": 900,
|
|
"valid_targets_mean": 3436.0,
|
|
"valid_targets_min": 1334
|
|
},
|
|
{
|
|
"epoch": 2.2853535353535355,
|
|
"grad_norm": 0.6756271018463891,
|
|
"learning_rate": 3.409752985746498e-05,
|
|
"loss": 0.3907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39379531145095825,
|
|
"step": 905,
|
|
"valid_targets_mean": 3228.2,
|
|
"valid_targets_min": 1693
|
|
},
|
|
{
|
|
"epoch": 2.297979797979798,
|
|
"grad_norm": 0.5750798297997253,
|
|
"learning_rate": 3.400789934709023e-05,
|
|
"loss": 0.4308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4697551727294922,
|
|
"step": 910,
|
|
"valid_targets_mean": 4887.2,
|
|
"valid_targets_min": 1320
|
|
},
|
|
{
|
|
"epoch": 2.3106060606060606,
|
|
"grad_norm": 0.6035695330159876,
|
|
"learning_rate": 3.391771316481895e-05,
|
|
"loss": 0.386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37602925300598145,
|
|
"step": 915,
|
|
"valid_targets_mean": 3335.6,
|
|
"valid_targets_min": 1865
|
|
},
|
|
{
|
|
"epoch": 2.323232323232323,
|
|
"grad_norm": 0.6231533529555526,
|
|
"learning_rate": 3.3826974888198764e-05,
|
|
"loss": 0.3847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46126362681388855,
|
|
"step": 920,
|
|
"valid_targets_mean": 4108.6,
|
|
"valid_targets_min": 1070
|
|
},
|
|
{
|
|
"epoch": 2.3358585858585856,
|
|
"grad_norm": 0.682022720271219,
|
|
"learning_rate": 3.373568811667802e-05,
|
|
"loss": 0.3822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39595192670822144,
|
|
"step": 925,
|
|
"valid_targets_mean": 2894.8,
|
|
"valid_targets_min": 1426
|
|
},
|
|
{
|
|
"epoch": 2.3484848484848486,
|
|
"grad_norm": 0.6797289333546708,
|
|
"learning_rate": 3.3643856471463036e-05,
|
|
"loss": 0.3785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38361161947250366,
|
|
"step": 930,
|
|
"valid_targets_mean": 3060.6,
|
|
"valid_targets_min": 1501
|
|
},
|
|
{
|
|
"epoch": 2.361111111111111,
|
|
"grad_norm": 0.6031465467740685,
|
|
"learning_rate": 3.355148359537441e-05,
|
|
"loss": 0.3961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44060710072517395,
|
|
"step": 935,
|
|
"valid_targets_mean": 4271.9,
|
|
"valid_targets_min": 1417
|
|
},
|
|
{
|
|
"epoch": 2.3737373737373737,
|
|
"grad_norm": 0.8364843828650894,
|
|
"learning_rate": 3.3458573152702556e-05,
|
|
"loss": 0.3854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36398792266845703,
|
|
"step": 940,
|
|
"valid_targets_mean": 2913.2,
|
|
"valid_targets_min": 1188
|
|
},
|
|
{
|
|
"epoch": 2.3863636363636362,
|
|
"grad_norm": 0.5980417429474294,
|
|
"learning_rate": 3.336512882906234e-05,
|
|
"loss": 0.3636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.307436466217041,
|
|
"step": 945,
|
|
"valid_targets_mean": 2891.1,
|
|
"valid_targets_min": 1219
|
|
},
|
|
{
|
|
"epoch": 2.398989898989899,
|
|
"grad_norm": 0.6020944292492557,
|
|
"learning_rate": 3.3271154331246825e-05,
|
|
"loss": 0.3548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3505295515060425,
|
|
"step": 950,
|
|
"valid_targets_mean": 4080.4,
|
|
"valid_targets_min": 988
|
|
},
|
|
{
|
|
"epoch": 2.4116161616161618,
|
|
"grad_norm": 0.6948551306615971,
|
|
"learning_rate": 3.317665338708033e-05,
|
|
"loss": 0.3449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38028645515441895,
|
|
"step": 955,
|
|
"valid_targets_mean": 3400.9,
|
|
"valid_targets_min": 452
|
|
},
|
|
{
|
|
"epoch": 2.4242424242424243,
|
|
"grad_norm": 0.7054147906410959,
|
|
"learning_rate": 3.308162974527045e-05,
|
|
"loss": 0.3478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3440144658088684,
|
|
"step": 960,
|
|
"valid_targets_mean": 2568.9,
|
|
"valid_targets_min": 1248
|
|
},
|
|
{
|
|
"epoch": 2.436868686868687,
|
|
"grad_norm": 0.6502674537729093,
|
|
"learning_rate": 3.298608717525941e-05,
|
|
"loss": 0.3966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4167352616786957,
|
|
"step": 965,
|
|
"valid_targets_mean": 3487.2,
|
|
"valid_targets_min": 729
|
|
},
|
|
{
|
|
"epoch": 2.4494949494949494,
|
|
"grad_norm": 0.6945755822435844,
|
|
"learning_rate": 3.289002946707452e-05,
|
|
"loss": 0.3618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38510334491729736,
|
|
"step": 970,
|
|
"valid_targets_mean": 3070.3,
|
|
"valid_targets_min": 1615
|
|
},
|
|
{
|
|
"epoch": 2.462121212121212,
|
|
"grad_norm": 0.7008172093994913,
|
|
"learning_rate": 3.2793460431177827e-05,
|
|
"loss": 0.3892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34910762310028076,
|
|
"step": 975,
|
|
"valid_targets_mean": 2654.6,
|
|
"valid_targets_min": 807
|
|
},
|
|
{
|
|
"epoch": 2.474747474747475,
|
|
"grad_norm": 0.6315928723707651,
|
|
"learning_rate": 3.269638389831498e-05,
|
|
"loss": 0.3683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2948976755142212,
|
|
"step": 980,
|
|
"valid_targets_mean": 2513.4,
|
|
"valid_targets_min": 1048
|
|
},
|
|
{
|
|
"epoch": 2.4873737373737375,
|
|
"grad_norm": 0.659399817304144,
|
|
"learning_rate": 3.2598803719363234e-05,
|
|
"loss": 0.3995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3890318274497986,
|
|
"step": 985,
|
|
"valid_targets_mean": 2942.6,
|
|
"valid_targets_min": 959
|
|
},
|
|
{
|
|
"epoch": 2.5,
|
|
"grad_norm": 0.6602751081844956,
|
|
"learning_rate": 3.250072376517873e-05,
|
|
"loss": 0.39,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31299781799316406,
|
|
"step": 990,
|
|
"valid_targets_mean": 2744.5,
|
|
"valid_targets_min": 1621
|
|
},
|
|
{
|
|
"epoch": 2.5126262626262625,
|
|
"grad_norm": 0.5849536829497557,
|
|
"learning_rate": 3.240214792644291e-05,
|
|
"loss": 0.3908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4854844808578491,
|
|
"step": 995,
|
|
"valid_targets_mean": 4775.6,
|
|
"valid_targets_min": 1064
|
|
},
|
|
{
|
|
"epoch": 2.525252525252525,
|
|
"grad_norm": 0.5606732760702411,
|
|
"learning_rate": 3.2303080113508206e-05,
|
|
"loss": 0.3851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3052321672439575,
|
|
"step": 1000,
|
|
"valid_targets_mean": 3365.4,
|
|
"valid_targets_min": 1298
|
|
},
|
|
{
|
|
"epoch": 2.537878787878788,
|
|
"grad_norm": 0.6875908835706088,
|
|
"learning_rate": 3.220352425624292e-05,
|
|
"loss": 0.382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3734514117240906,
|
|
"step": 1005,
|
|
"valid_targets_mean": 2893.5,
|
|
"valid_targets_min": 1773
|
|
},
|
|
{
|
|
"epoch": 2.5505050505050506,
|
|
"grad_norm": 0.5931935199736621,
|
|
"learning_rate": 3.210348430387531e-05,
|
|
"loss": 0.404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3862113058567047,
|
|
"step": 1010,
|
|
"valid_targets_mean": 3470.7,
|
|
"valid_targets_min": 2311
|
|
},
|
|
{
|
|
"epoch": 2.563131313131313,
|
|
"grad_norm": 0.5854966748703261,
|
|
"learning_rate": 3.200296422483694e-05,
|
|
"loss": 0.3898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3955685496330261,
|
|
"step": 1015,
|
|
"valid_targets_mean": 3589.4,
|
|
"valid_targets_min": 1829
|
|
},
|
|
{
|
|
"epoch": 2.5757575757575757,
|
|
"grad_norm": 0.6466992426589206,
|
|
"learning_rate": 3.19019680066053e-05,
|
|
"loss": 0.3517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3543087840080261,
|
|
"step": 1020,
|
|
"valid_targets_mean": 2809.2,
|
|
"valid_targets_min": 1982
|
|
},
|
|
{
|
|
"epoch": 2.5883838383838382,
|
|
"grad_norm": 0.6437875698154034,
|
|
"learning_rate": 3.180049965554554e-05,
|
|
"loss": 0.4091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5216984748840332,
|
|
"step": 1025,
|
|
"valid_targets_mean": 4236.3,
|
|
"valid_targets_min": 971
|
|
},
|
|
{
|
|
"epoch": 2.601010101010101,
|
|
"grad_norm": 0.6686276392769377,
|
|
"learning_rate": 3.169856319675163e-05,
|
|
"loss": 0.3711,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4543102979660034,
|
|
"step": 1030,
|
|
"valid_targets_mean": 3561.1,
|
|
"valid_targets_min": 520
|
|
},
|
|
{
|
|
"epoch": 2.6136363636363638,
|
|
"grad_norm": 0.5951534964453324,
|
|
"learning_rate": 3.159616267388665e-05,
|
|
"loss": 0.3683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3885546326637268,
|
|
"step": 1035,
|
|
"valid_targets_mean": 3710.8,
|
|
"valid_targets_min": 935
|
|
},
|
|
{
|
|
"epoch": 2.6262626262626263,
|
|
"grad_norm": 0.5998214906422664,
|
|
"learning_rate": 3.1493302149022394e-05,
|
|
"loss": 0.3643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3495018482208252,
|
|
"step": 1040,
|
|
"valid_targets_mean": 3388.9,
|
|
"valid_targets_min": 1017
|
|
},
|
|
{
|
|
"epoch": 2.638888888888889,
|
|
"grad_norm": 0.679459404989866,
|
|
"learning_rate": 3.1389985702478204e-05,
|
|
"loss": 0.3733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40621423721313477,
|
|
"step": 1045,
|
|
"valid_targets_mean": 2743.4,
|
|
"valid_targets_min": 638
|
|
},
|
|
{
|
|
"epoch": 2.6515151515151514,
|
|
"grad_norm": 0.49821839728054024,
|
|
"learning_rate": 3.1286217432659176e-05,
|
|
"loss": 0.3848,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42677822709083557,
|
|
"step": 1050,
|
|
"valid_targets_mean": 5945.1,
|
|
"valid_targets_min": 1605
|
|
},
|
|
{
|
|
"epoch": 2.6641414141414144,
|
|
"grad_norm": 0.6245151459734792,
|
|
"learning_rate": 3.118200145589351e-05,
|
|
"loss": 0.4174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3920952081680298,
|
|
"step": 1055,
|
|
"valid_targets_mean": 3122.9,
|
|
"valid_targets_min": 1889
|
|
},
|
|
{
|
|
"epoch": 2.676767676767677,
|
|
"grad_norm": 0.6230627615382611,
|
|
"learning_rate": 3.1077341906269275e-05,
|
|
"loss": 0.3746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4046259820461273,
|
|
"step": 1060,
|
|
"valid_targets_mean": 3375.1,
|
|
"valid_targets_min": 876
|
|
},
|
|
{
|
|
"epoch": 2.6893939393939394,
|
|
"grad_norm": 0.5984044868920384,
|
|
"learning_rate": 3.097224293547039e-05,
|
|
"loss": 0.3761,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37177756428718567,
|
|
"step": 1065,
|
|
"valid_targets_mean": 3157.3,
|
|
"valid_targets_min": 928
|
|
},
|
|
{
|
|
"epoch": 2.702020202020202,
|
|
"grad_norm": 0.6200698875715712,
|
|
"learning_rate": 3.086670871261193e-05,
|
|
"loss": 0.3824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4072413146495819,
|
|
"step": 1070,
|
|
"valid_targets_mean": 3493.7,
|
|
"valid_targets_min": 1659
|
|
},
|
|
{
|
|
"epoch": 2.7146464646464645,
|
|
"grad_norm": 0.7099464467086446,
|
|
"learning_rate": 3.076074342407476e-05,
|
|
"loss": 0.3903,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3890324532985687,
|
|
"step": 1075,
|
|
"valid_targets_mean": 2398.0,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 2.7272727272727275,
|
|
"grad_norm": 0.5964382108688258,
|
|
"learning_rate": 3.0654351273339465e-05,
|
|
"loss": 0.3576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41678887605667114,
|
|
"step": 1080,
|
|
"valid_targets_mean": 4061.4,
|
|
"valid_targets_min": 587
|
|
},
|
|
{
|
|
"epoch": 2.73989898989899,
|
|
"grad_norm": 0.6483267090943723,
|
|
"learning_rate": 3.0547536480819584e-05,
|
|
"loss": 0.4063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38063323497772217,
|
|
"step": 1085,
|
|
"valid_targets_mean": 2821.2,
|
|
"valid_targets_min": 1120
|
|
},
|
|
{
|
|
"epoch": 2.7525252525252526,
|
|
"grad_norm": 0.6126539646079129,
|
|
"learning_rate": 3.044030328369422e-05,
|
|
"loss": 0.3809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3242890238761902,
|
|
"step": 1090,
|
|
"valid_targets_mean": 2823.2,
|
|
"valid_targets_min": 674
|
|
},
|
|
{
|
|
"epoch": 2.765151515151515,
|
|
"grad_norm": 0.666211099047787,
|
|
"learning_rate": 3.033265593573994e-05,
|
|
"loss": 0.3988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35766348242759705,
|
|
"step": 1095,
|
|
"valid_targets_mean": 2767.8,
|
|
"valid_targets_min": 1512
|
|
},
|
|
{
|
|
"epoch": 2.7777777777777777,
|
|
"grad_norm": 0.5893274171238146,
|
|
"learning_rate": 3.0224598707162037e-05,
|
|
"loss": 0.3812,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35240688920021057,
|
|
"step": 1100,
|
|
"valid_targets_mean": 3162.4,
|
|
"valid_targets_min": 855
|
|
},
|
|
{
|
|
"epoch": 2.7904040404040407,
|
|
"grad_norm": 0.6436336249871378,
|
|
"learning_rate": 3.0116135884425156e-05,
|
|
"loss": 0.377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4571429193019867,
|
|
"step": 1105,
|
|
"valid_targets_mean": 3306.6,
|
|
"valid_targets_min": 999
|
|
},
|
|
{
|
|
"epoch": 2.8030303030303028,
|
|
"grad_norm": 0.6192587115834132,
|
|
"learning_rate": 3.0007271770083216e-05,
|
|
"loss": 0.3935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2913767397403717,
|
|
"step": 1110,
|
|
"valid_targets_mean": 2528.9,
|
|
"valid_targets_min": 780
|
|
},
|
|
{
|
|
"epoch": 2.8156565656565657,
|
|
"grad_norm": 0.606484213579456,
|
|
"learning_rate": 2.989801068260878e-05,
|
|
"loss": 0.3754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29592519998550415,
|
|
"step": 1115,
|
|
"valid_targets_mean": 2622.3,
|
|
"valid_targets_min": 1436
|
|
},
|
|
{
|
|
"epoch": 2.8282828282828283,
|
|
"grad_norm": 0.645202232373261,
|
|
"learning_rate": 2.9788356956221712e-05,
|
|
"loss": 0.3682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3493780195713043,
|
|
"step": 1120,
|
|
"valid_targets_mean": 2796.6,
|
|
"valid_targets_min": 1281
|
|
},
|
|
{
|
|
"epoch": 2.840909090909091,
|
|
"grad_norm": 0.6382732894080507,
|
|
"learning_rate": 2.9678314940717276e-05,
|
|
"loss": 0.3192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31767457723617554,
|
|
"step": 1125,
|
|
"valid_targets_mean": 2719.1,
|
|
"valid_targets_min": 1784
|
|
},
|
|
{
|
|
"epoch": 2.8535353535353534,
|
|
"grad_norm": 0.6864660560112232,
|
|
"learning_rate": 2.9567889001293553e-05,
|
|
"loss": 0.3885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3313673436641693,
|
|
"step": 1130,
|
|
"valid_targets_mean": 2460.8,
|
|
"valid_targets_min": 588
|
|
},
|
|
{
|
|
"epoch": 2.866161616161616,
|
|
"grad_norm": 0.6023940706764531,
|
|
"learning_rate": 2.94570835183783e-05,
|
|
"loss": 0.3516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34224456548690796,
|
|
"step": 1135,
|
|
"valid_targets_mean": 3116.4,
|
|
"valid_targets_min": 1116
|
|
},
|
|
{
|
|
"epoch": 2.878787878787879,
|
|
"grad_norm": 0.6067779962957488,
|
|
"learning_rate": 2.93459028874552e-05,
|
|
"loss": 0.3723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3636412024497986,
|
|
"step": 1140,
|
|
"valid_targets_mean": 3337.4,
|
|
"valid_targets_min": 571
|
|
},
|
|
{
|
|
"epoch": 2.8914141414141414,
|
|
"grad_norm": 0.6092698495443272,
|
|
"learning_rate": 2.9234351518889465e-05,
|
|
"loss": 0.4067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4876616597175598,
|
|
"step": 1145,
|
|
"valid_targets_mean": 4359.3,
|
|
"valid_targets_min": 1940
|
|
},
|
|
{
|
|
"epoch": 2.904040404040404,
|
|
"grad_norm": 0.6240475836405307,
|
|
"learning_rate": 2.9122433837752906e-05,
|
|
"loss": 0.3557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31040680408477783,
|
|
"step": 1150,
|
|
"valid_targets_mean": 2761.3,
|
|
"valid_targets_min": 1568
|
|
},
|
|
{
|
|
"epoch": 2.9166666666666665,
|
|
"grad_norm": 0.5730069310790777,
|
|
"learning_rate": 2.9010154283648382e-05,
|
|
"loss": 0.403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3821036219596863,
|
|
"step": 1155,
|
|
"valid_targets_mean": 3784.9,
|
|
"valid_targets_min": 1335
|
|
},
|
|
{
|
|
"epoch": 2.929292929292929,
|
|
"grad_norm": 0.6572646648160343,
|
|
"learning_rate": 2.8897517310533733e-05,
|
|
"loss": 0.3557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3128521740436554,
|
|
"step": 1160,
|
|
"valid_targets_mean": 2357.7,
|
|
"valid_targets_min": 1131
|
|
},
|
|
{
|
|
"epoch": 2.941919191919192,
|
|
"grad_norm": 0.6115860029185797,
|
|
"learning_rate": 2.8784527386545006e-05,
|
|
"loss": 0.4476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49554532766342163,
|
|
"step": 1165,
|
|
"valid_targets_mean": 4678.3,
|
|
"valid_targets_min": 808
|
|
},
|
|
{
|
|
"epoch": 2.9545454545454546,
|
|
"grad_norm": 0.6535005566432543,
|
|
"learning_rate": 2.8671188993819327e-05,
|
|
"loss": 0.4041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4300668239593506,
|
|
"step": 1170,
|
|
"valid_targets_mean": 3714.4,
|
|
"valid_targets_min": 2330
|
|
},
|
|
{
|
|
"epoch": 2.967171717171717,
|
|
"grad_norm": 0.6344464229430659,
|
|
"learning_rate": 2.8557506628317e-05,
|
|
"loss": 0.3867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4097946882247925,
|
|
"step": 1175,
|
|
"valid_targets_mean": 3480.2,
|
|
"valid_targets_min": 1844
|
|
},
|
|
{
|
|
"epoch": 2.9797979797979797,
|
|
"grad_norm": 0.5549040647946087,
|
|
"learning_rate": 2.844348479964323e-05,
|
|
"loss": 0.3997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36530980467796326,
|
|
"step": 1180,
|
|
"valid_targets_mean": 3680.7,
|
|
"valid_targets_min": 2068
|
|
},
|
|
{
|
|
"epoch": 2.992424242424242,
|
|
"grad_norm": 0.6350789264846295,
|
|
"learning_rate": 2.8329128030869183e-05,
|
|
"loss": 0.3765,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36941784620285034,
|
|
"step": 1185,
|
|
"valid_targets_mean": 2964.5,
|
|
"valid_targets_min": 598
|
|
},
|
|
{
|
|
"epoch": 3.005050505050505,
|
|
"grad_norm": 0.6101230464628173,
|
|
"learning_rate": 2.8214440858352602e-05,
|
|
"loss": 0.3585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3312152624130249,
|
|
"step": 1190,
|
|
"valid_targets_mean": 3049.0,
|
|
"valid_targets_min": 1661
|
|
},
|
|
{
|
|
"epoch": 3.0176767676767677,
|
|
"grad_norm": 0.6717050734051245,
|
|
"learning_rate": 2.8099427831557836e-05,
|
|
"loss": 0.344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3820994794368744,
|
|
"step": 1195,
|
|
"valid_targets_mean": 3147.5,
|
|
"valid_targets_min": 1505
|
|
},
|
|
{
|
|
"epoch": 3.0303030303030303,
|
|
"grad_norm": 0.6855402222878124,
|
|
"learning_rate": 2.798409351287537e-05,
|
|
"loss": 0.3489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3795431852340698,
|
|
"step": 1200,
|
|
"valid_targets_mean": 3020.4,
|
|
"valid_targets_min": 1208
|
|
},
|
|
{
|
|
"epoch": 3.042929292929293,
|
|
"grad_norm": 0.6290362160754815,
|
|
"learning_rate": 2.7868442477440845e-05,
|
|
"loss": 0.3716,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3297278881072998,
|
|
"step": 1205,
|
|
"valid_targets_mean": 3352.2,
|
|
"valid_targets_min": 1870
|
|
},
|
|
{
|
|
"epoch": 3.0555555555555554,
|
|
"grad_norm": 0.6542985881986048,
|
|
"learning_rate": 2.7752479312953576e-05,
|
|
"loss": 0.3242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37977492809295654,
|
|
"step": 1210,
|
|
"valid_targets_mean": 3504.4,
|
|
"valid_targets_min": 1174
|
|
},
|
|
{
|
|
"epoch": 3.0681818181818183,
|
|
"grad_norm": 0.6865766533109904,
|
|
"learning_rate": 2.763620861949456e-05,
|
|
"loss": 0.3148,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3136219382286072,
|
|
"step": 1215,
|
|
"valid_targets_mean": 2935.5,
|
|
"valid_targets_min": 671
|
|
},
|
|
{
|
|
"epoch": 3.080808080808081,
|
|
"grad_norm": 0.650463581157268,
|
|
"learning_rate": 2.7519635009343982e-05,
|
|
"loss": 0.3616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3669140934944153,
|
|
"step": 1220,
|
|
"valid_targets_mean": 3443.1,
|
|
"valid_targets_min": 1433
|
|
},
|
|
{
|
|
"epoch": 3.0934343434343434,
|
|
"grad_norm": 0.6374974160183221,
|
|
"learning_rate": 2.7402763106798295e-05,
|
|
"loss": 0.385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3267042636871338,
|
|
"step": 1225,
|
|
"valid_targets_mean": 3336.6,
|
|
"valid_targets_min": 1490
|
|
},
|
|
{
|
|
"epoch": 3.106060606060606,
|
|
"grad_norm": 0.7164459430574219,
|
|
"learning_rate": 2.7285597547986715e-05,
|
|
"loss": 0.3452,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3801751732826233,
|
|
"step": 1230,
|
|
"valid_targets_mean": 3270.2,
|
|
"valid_targets_min": 1810
|
|
},
|
|
{
|
|
"epoch": 3.1186868686868685,
|
|
"grad_norm": 0.6786289930271091,
|
|
"learning_rate": 2.7168142980687394e-05,
|
|
"loss": 0.3578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.343783438205719,
|
|
"step": 1235,
|
|
"valid_targets_mean": 2905.7,
|
|
"valid_targets_min": 1686
|
|
},
|
|
{
|
|
"epoch": 3.1313131313131315,
|
|
"grad_norm": 0.8228076465104819,
|
|
"learning_rate": 2.7050404064142985e-05,
|
|
"loss": 0.3593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3551754355430603,
|
|
"step": 1240,
|
|
"valid_targets_mean": 3203.6,
|
|
"valid_targets_min": 599
|
|
},
|
|
{
|
|
"epoch": 3.143939393939394,
|
|
"grad_norm": 0.7057452290840084,
|
|
"learning_rate": 2.693238546887583e-05,
|
|
"loss": 0.3613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38851505517959595,
|
|
"step": 1245,
|
|
"valid_targets_mean": 2690.1,
|
|
"valid_targets_min": 738
|
|
},
|
|
{
|
|
"epoch": 3.1565656565656566,
|
|
"grad_norm": 0.6516753493509879,
|
|
"learning_rate": 2.6814091876502738e-05,
|
|
"loss": 0.3702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30876827239990234,
|
|
"step": 1250,
|
|
"valid_targets_mean": 2591.5,
|
|
"valid_targets_min": 1481
|
|
},
|
|
{
|
|
"epoch": 3.169191919191919,
|
|
"grad_norm": 0.5373908122046772,
|
|
"learning_rate": 2.669552797954917e-05,
|
|
"loss": 0.3621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4019007682800293,
|
|
"step": 1255,
|
|
"valid_targets_mean": 4778.8,
|
|
"valid_targets_min": 1761
|
|
},
|
|
{
|
|
"epoch": 3.1818181818181817,
|
|
"grad_norm": 0.656469897426696,
|
|
"learning_rate": 2.6576698481263216e-05,
|
|
"loss": 0.3694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3878747522830963,
|
|
"step": 1260,
|
|
"valid_targets_mean": 3612.4,
|
|
"valid_targets_min": 1553
|
|
},
|
|
{
|
|
"epoch": 3.1944444444444446,
|
|
"grad_norm": 0.6890638135835326,
|
|
"learning_rate": 2.6457608095428925e-05,
|
|
"loss": 0.3607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37581291794776917,
|
|
"step": 1265,
|
|
"valid_targets_mean": 3227.2,
|
|
"valid_targets_min": 876
|
|
},
|
|
{
|
|
"epoch": 3.207070707070707,
|
|
"grad_norm": 0.6686732667303787,
|
|
"learning_rate": 2.633826154617938e-05,
|
|
"loss": 0.3687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32370471954345703,
|
|
"step": 1270,
|
|
"valid_targets_mean": 3021.6,
|
|
"valid_targets_min": 555
|
|
},
|
|
{
|
|
"epoch": 3.2196969696969697,
|
|
"grad_norm": 0.60906879769235,
|
|
"learning_rate": 2.621866356780924e-05,
|
|
"loss": 0.333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2982814908027649,
|
|
"step": 1275,
|
|
"valid_targets_mean": 3329.8,
|
|
"valid_targets_min": 1659
|
|
},
|
|
{
|
|
"epoch": 3.2323232323232323,
|
|
"grad_norm": 0.6788697674939761,
|
|
"learning_rate": 2.6098818904587014e-05,
|
|
"loss": 0.362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3478686213493347,
|
|
"step": 1280,
|
|
"valid_targets_mean": 3108.2,
|
|
"valid_targets_min": 1337
|
|
},
|
|
{
|
|
"epoch": 3.244949494949495,
|
|
"grad_norm": 0.7418192989966806,
|
|
"learning_rate": 2.597873231056679e-05,
|
|
"loss": 0.342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3505740165710449,
|
|
"step": 1285,
|
|
"valid_targets_mean": 2378.4,
|
|
"valid_targets_min": 699
|
|
},
|
|
{
|
|
"epoch": 3.257575757575758,
|
|
"grad_norm": 0.6743259771608354,
|
|
"learning_rate": 2.585840854939969e-05,
|
|
"loss": 0.3385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.337390661239624,
|
|
"step": 1290,
|
|
"valid_targets_mean": 2974.2,
|
|
"valid_targets_min": 869
|
|
},
|
|
{
|
|
"epoch": 3.2702020202020203,
|
|
"grad_norm": 0.6553868057444038,
|
|
"learning_rate": 2.5737852394144903e-05,
|
|
"loss": 0.3231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3390675187110901,
|
|
"step": 1295,
|
|
"valid_targets_mean": 2955.1,
|
|
"valid_targets_min": 1599
|
|
},
|
|
{
|
|
"epoch": 3.282828282828283,
|
|
"grad_norm": 0.6403703744617418,
|
|
"learning_rate": 2.5617068627080305e-05,
|
|
"loss": 0.3688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4630083739757538,
|
|
"step": 1300,
|
|
"valid_targets_mean": 4076.2,
|
|
"valid_targets_min": 689
|
|
},
|
|
{
|
|
"epoch": 3.2954545454545454,
|
|
"grad_norm": 0.6023984043375903,
|
|
"learning_rate": 2.5496062039512834e-05,
|
|
"loss": 0.348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32059431076049805,
|
|
"step": 1305,
|
|
"valid_targets_mean": 3375.9,
|
|
"valid_targets_min": 1878
|
|
},
|
|
{
|
|
"epoch": 3.308080808080808,
|
|
"grad_norm": 0.6396425541446585,
|
|
"learning_rate": 2.5374837431588322e-05,
|
|
"loss": 0.3341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3704610764980316,
|
|
"step": 1310,
|
|
"valid_targets_mean": 3331.3,
|
|
"valid_targets_min": 1406
|
|
},
|
|
{
|
|
"epoch": 3.320707070707071,
|
|
"grad_norm": 0.6776583962480702,
|
|
"learning_rate": 2.525339961210117e-05,
|
|
"loss": 0.356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2946220636367798,
|
|
"step": 1315,
|
|
"valid_targets_mean": 2489.2,
|
|
"valid_targets_min": 587
|
|
},
|
|
{
|
|
"epoch": 3.3333333333333335,
|
|
"grad_norm": 0.7512035560086019,
|
|
"learning_rate": 2.5131753398303546e-05,
|
|
"loss": 0.3612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3226706385612488,
|
|
"step": 1320,
|
|
"valid_targets_mean": 2257.8,
|
|
"valid_targets_min": 513
|
|
},
|
|
{
|
|
"epoch": 3.345959595959596,
|
|
"grad_norm": 0.6691736317428321,
|
|
"learning_rate": 2.5009903615714288e-05,
|
|
"loss": 0.3713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38169464468955994,
|
|
"step": 1325,
|
|
"valid_targets_mean": 3674.4,
|
|
"valid_targets_min": 710
|
|
},
|
|
{
|
|
"epoch": 3.3585858585858586,
|
|
"grad_norm": 0.6529576389515854,
|
|
"learning_rate": 2.4887855097927515e-05,
|
|
"loss": 0.3191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3396865725517273,
|
|
"step": 1330,
|
|
"valid_targets_mean": 3407.2,
|
|
"valid_targets_min": 755
|
|
},
|
|
{
|
|
"epoch": 3.371212121212121,
|
|
"grad_norm": 0.7154816898888791,
|
|
"learning_rate": 2.4765612686420855e-05,
|
|
"loss": 0.3367,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.343447744846344,
|
|
"step": 1335,
|
|
"valid_targets_mean": 2833.6,
|
|
"valid_targets_min": 506
|
|
},
|
|
{
|
|
"epoch": 3.3838383838383836,
|
|
"grad_norm": 0.6736312101027783,
|
|
"learning_rate": 2.46431812303634e-05,
|
|
"loss": 0.378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32376375794410706,
|
|
"step": 1340,
|
|
"valid_targets_mean": 2666.4,
|
|
"valid_targets_min": 1333
|
|
},
|
|
{
|
|
"epoch": 3.3964646464646466,
|
|
"grad_norm": 0.6475094233289387,
|
|
"learning_rate": 2.4520565586423364e-05,
|
|
"loss": 0.3204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29635465145111084,
|
|
"step": 1345,
|
|
"valid_targets_mean": 2748.7,
|
|
"valid_targets_min": 1392
|
|
},
|
|
{
|
|
"epoch": 3.409090909090909,
|
|
"grad_norm": 0.5843996644681299,
|
|
"learning_rate": 2.4397770618575402e-05,
|
|
"loss": 0.3462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3405814468860626,
|
|
"step": 1350,
|
|
"valid_targets_mean": 3957.3,
|
|
"valid_targets_min": 1585
|
|
},
|
|
{
|
|
"epoch": 3.4217171717171717,
|
|
"grad_norm": 0.6569771648498826,
|
|
"learning_rate": 2.4274801197907665e-05,
|
|
"loss": 0.3674,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35267573595046997,
|
|
"step": 1355,
|
|
"valid_targets_mean": 3056.4,
|
|
"valid_targets_min": 1380
|
|
},
|
|
{
|
|
"epoch": 3.4343434343434343,
|
|
"grad_norm": 0.6278190545664051,
|
|
"learning_rate": 2.4151662202428613e-05,
|
|
"loss": 0.3628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3695211410522461,
|
|
"step": 1360,
|
|
"valid_targets_mean": 3506.5,
|
|
"valid_targets_min": 1884
|
|
},
|
|
{
|
|
"epoch": 3.446969696969697,
|
|
"grad_norm": 0.634074126688538,
|
|
"learning_rate": 2.4028358516873427e-05,
|
|
"loss": 0.3216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3146069645881653,
|
|
"step": 1365,
|
|
"valid_targets_mean": 3289.8,
|
|
"valid_targets_min": 1669
|
|
},
|
|
{
|
|
"epoch": 3.45959595959596,
|
|
"grad_norm": 0.6801530900407086,
|
|
"learning_rate": 2.390489503251034e-05,
|
|
"loss": 0.3597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4268019199371338,
|
|
"step": 1370,
|
|
"valid_targets_mean": 3845.6,
|
|
"valid_targets_min": 1843
|
|
},
|
|
{
|
|
"epoch": 3.4722222222222223,
|
|
"grad_norm": 0.6302056512933344,
|
|
"learning_rate": 2.3781276646946526e-05,
|
|
"loss": 0.3483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35302233695983887,
|
|
"step": 1375,
|
|
"valid_targets_mean": 3840.2,
|
|
"valid_targets_min": 1261
|
|
},
|
|
{
|
|
"epoch": 3.484848484848485,
|
|
"grad_norm": 0.6377742108475873,
|
|
"learning_rate": 2.3657508263933874e-05,
|
|
"loss": 0.3612,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33783674240112305,
|
|
"step": 1380,
|
|
"valid_targets_mean": 3263.4,
|
|
"valid_targets_min": 1149
|
|
},
|
|
{
|
|
"epoch": 3.4974747474747474,
|
|
"grad_norm": 0.7042898368764126,
|
|
"learning_rate": 2.3533594793174426e-05,
|
|
"loss": 0.3258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3383498191833496,
|
|
"step": 1385,
|
|
"valid_targets_mean": 2682.6,
|
|
"valid_targets_min": 1382
|
|
},
|
|
{
|
|
"epoch": 3.51010101010101,
|
|
"grad_norm": 0.7030408364050017,
|
|
"learning_rate": 2.3409541150125645e-05,
|
|
"loss": 0.3557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4075915217399597,
|
|
"step": 1390,
|
|
"valid_targets_mean": 3162.8,
|
|
"valid_targets_min": 1353
|
|
},
|
|
{
|
|
"epoch": 3.5227272727272725,
|
|
"grad_norm": 0.6274505021865276,
|
|
"learning_rate": 2.3285352255805398e-05,
|
|
"loss": 0.3496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31670212745666504,
|
|
"step": 1395,
|
|
"valid_targets_mean": 3087.0,
|
|
"valid_targets_min": 392
|
|
},
|
|
{
|
|
"epoch": 3.5353535353535355,
|
|
"grad_norm": 1.2486599391824142,
|
|
"learning_rate": 2.316103303659678e-05,
|
|
"loss": 0.3296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36197564005851746,
|
|
"step": 1400,
|
|
"valid_targets_mean": 3082.6,
|
|
"valid_targets_min": 527
|
|
},
|
|
{
|
|
"epoch": 3.547979797979798,
|
|
"grad_norm": 0.5371320718863393,
|
|
"learning_rate": 2.3036588424052672e-05,
|
|
"loss": 0.351,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3815935254096985,
|
|
"step": 1405,
|
|
"valid_targets_mean": 4752.9,
|
|
"valid_targets_min": 1819
|
|
},
|
|
{
|
|
"epoch": 3.5606060606060606,
|
|
"grad_norm": 0.705016854939806,
|
|
"learning_rate": 2.2912023354700105e-05,
|
|
"loss": 0.3665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3603851795196533,
|
|
"step": 1410,
|
|
"valid_targets_mean": 2926.8,
|
|
"valid_targets_min": 1512
|
|
},
|
|
{
|
|
"epoch": 3.573232323232323,
|
|
"grad_norm": 0.5984653768510367,
|
|
"learning_rate": 2.2787342769844444e-05,
|
|
"loss": 0.3222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3309054374694824,
|
|
"step": 1415,
|
|
"valid_targets_mean": 3481.1,
|
|
"valid_targets_min": 1110
|
|
},
|
|
{
|
|
"epoch": 3.5858585858585856,
|
|
"grad_norm": 0.8191525002477924,
|
|
"learning_rate": 2.2662551615373402e-05,
|
|
"loss": 0.3989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44843584299087524,
|
|
"step": 1420,
|
|
"valid_targets_mean": 2668.3,
|
|
"valid_targets_min": 648
|
|
},
|
|
{
|
|
"epoch": 3.5984848484848486,
|
|
"grad_norm": 0.6651567018600946,
|
|
"learning_rate": 2.25376548415608e-05,
|
|
"loss": 0.3694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3173557221889496,
|
|
"step": 1425,
|
|
"valid_targets_mean": 2731.2,
|
|
"valid_targets_min": 1126
|
|
},
|
|
{
|
|
"epoch": 3.611111111111111,
|
|
"grad_norm": 0.6266431493382524,
|
|
"learning_rate": 2.241265740287021e-05,
|
|
"loss": 0.3443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29255497455596924,
|
|
"step": 1430,
|
|
"valid_targets_mean": 2907.2,
|
|
"valid_targets_min": 1479
|
|
},
|
|
{
|
|
"epoch": 3.6237373737373737,
|
|
"grad_norm": 0.5693607197936612,
|
|
"learning_rate": 2.2287564257758446e-05,
|
|
"loss": 0.3977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41386911273002625,
|
|
"step": 1435,
|
|
"valid_targets_mean": 4703.8,
|
|
"valid_targets_min": 707
|
|
},
|
|
{
|
|
"epoch": 3.6363636363636362,
|
|
"grad_norm": 0.7080853718994018,
|
|
"learning_rate": 2.2162380368478836e-05,
|
|
"loss": 0.3847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3499618172645569,
|
|
"step": 1440,
|
|
"valid_targets_mean": 2786.1,
|
|
"valid_targets_min": 596
|
|
},
|
|
{
|
|
"epoch": 3.648989898989899,
|
|
"grad_norm": 0.5548114926983784,
|
|
"learning_rate": 2.2037110700884395e-05,
|
|
"loss": 0.3565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25791436433792114,
|
|
"step": 1445,
|
|
"valid_targets_mean": 3419.7,
|
|
"valid_targets_min": 1345
|
|
},
|
|
{
|
|
"epoch": 3.6616161616161618,
|
|
"grad_norm": 0.5977854910500919,
|
|
"learning_rate": 2.1911760224230824e-05,
|
|
"loss": 0.3493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33634889125823975,
|
|
"step": 1450,
|
|
"valid_targets_mean": 3442.4,
|
|
"valid_targets_min": 1773
|
|
},
|
|
{
|
|
"epoch": 3.6742424242424243,
|
|
"grad_norm": 0.6796398481161333,
|
|
"learning_rate": 2.17863339109794e-05,
|
|
"loss": 0.3525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36308521032333374,
|
|
"step": 1455,
|
|
"valid_targets_mean": 3190.7,
|
|
"valid_targets_min": 1471
|
|
},
|
|
{
|
|
"epoch": 3.686868686868687,
|
|
"grad_norm": 0.6479531096999324,
|
|
"learning_rate": 2.166083673659973e-05,
|
|
"loss": 0.336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35041192173957825,
|
|
"step": 1460,
|
|
"valid_targets_mean": 3109.9,
|
|
"valid_targets_min": 880
|
|
},
|
|
{
|
|
"epoch": 3.6994949494949494,
|
|
"grad_norm": 0.6190656382920072,
|
|
"learning_rate": 2.153527367937236e-05,
|
|
"loss": 0.3489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3769229054450989,
|
|
"step": 1465,
|
|
"valid_targets_mean": 3737.9,
|
|
"valid_targets_min": 1786
|
|
},
|
|
{
|
|
"epoch": 3.712121212121212,
|
|
"grad_norm": 0.6732653753234384,
|
|
"learning_rate": 2.140964972019132e-05,
|
|
"loss": 0.3496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3730910122394562,
|
|
"step": 1470,
|
|
"valid_targets_mean": 3028.1,
|
|
"valid_targets_min": 786
|
|
},
|
|
{
|
|
"epoch": 3.724747474747475,
|
|
"grad_norm": 0.682620817181734,
|
|
"learning_rate": 2.128396984236651e-05,
|
|
"loss": 0.339,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37931591272354126,
|
|
"step": 1475,
|
|
"valid_targets_mean": 3248.3,
|
|
"valid_targets_min": 1448
|
|
},
|
|
{
|
|
"epoch": 3.7373737373737375,
|
|
"grad_norm": 1.5193685144502576,
|
|
"learning_rate": 2.115823903142606e-05,
|
|
"loss": 0.3605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3566587567329407,
|
|
"step": 1480,
|
|
"valid_targets_mean": 3345.8,
|
|
"valid_targets_min": 603
|
|
},
|
|
{
|
|
"epoch": 3.75,
|
|
"grad_norm": 0.6092501389476551,
|
|
"learning_rate": 2.1032462274918517e-05,
|
|
"loss": 0.3393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30876415967941284,
|
|
"step": 1485,
|
|
"valid_targets_mean": 3301.3,
|
|
"valid_targets_min": 952
|
|
},
|
|
{
|
|
"epoch": 3.7626262626262625,
|
|
"grad_norm": 0.7276893961486488,
|
|
"learning_rate": 2.0906644562215037e-05,
|
|
"loss": 0.3381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3389100432395935,
|
|
"step": 1490,
|
|
"valid_targets_mean": 2715.4,
|
|
"valid_targets_min": 753
|
|
},
|
|
{
|
|
"epoch": 3.775252525252525,
|
|
"grad_norm": 0.5858712774395105,
|
|
"learning_rate": 2.078079088431143e-05,
|
|
"loss": 0.3767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3030000925064087,
|
|
"step": 1495,
|
|
"valid_targets_mean": 3219.7,
|
|
"valid_targets_min": 2366
|
|
},
|
|
{
|
|
"epoch": 3.787878787878788,
|
|
"grad_norm": 0.6888688838778153,
|
|
"learning_rate": 2.0654906233630197e-05,
|
|
"loss": 0.3738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.416027694940567,
|
|
"step": 1500,
|
|
"valid_targets_mean": 3128.2,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 2.516722408026756,
|
|
"grad_norm": 0.662986162649258,
|
|
"learning_rate": 3.23558282621135e-05,
|
|
"loss": 0.3218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29847097396850586,
|
|
"step": 1505,
|
|
"valid_targets_mean": 3091.0,
|
|
"valid_targets_min": 508
|
|
},
|
|
{
|
|
"epoch": 2.5250836120401337,
|
|
"grad_norm": 0.6783328257944375,
|
|
"learning_rate": 3.229014188825108e-05,
|
|
"loss": 0.3139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3512507975101471,
|
|
"step": 1510,
|
|
"valid_targets_mean": 3352.5,
|
|
"valid_targets_min": 1133
|
|
},
|
|
{
|
|
"epoch": 2.5334448160535117,
|
|
"grad_norm": 0.6677148404277962,
|
|
"learning_rate": 3.2224241814261216e-05,
|
|
"loss": 0.3225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26659291982650757,
|
|
"step": 1515,
|
|
"valid_targets_mean": 2794.4,
|
|
"valid_targets_min": 1460
|
|
},
|
|
{
|
|
"epoch": 2.5418060200668897,
|
|
"grad_norm": 0.7219771342415074,
|
|
"learning_rate": 3.215812918600978e-05,
|
|
"loss": 0.3324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3495546579360962,
|
|
"step": 1520,
|
|
"valid_targets_mean": 2987.6,
|
|
"valid_targets_min": 632
|
|
},
|
|
{
|
|
"epoch": 2.5501672240802673,
|
|
"grad_norm": 0.6560298605364091,
|
|
"learning_rate": 3.209180515305855e-05,
|
|
"loss": 0.3119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.331092894077301,
|
|
"step": 1525,
|
|
"valid_targets_mean": 3713.1,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 2.5585284280936453,
|
|
"grad_norm": 0.6919955457501047,
|
|
"learning_rate": 3.2025270868645146e-05,
|
|
"loss": 0.3497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3320353925228119,
|
|
"step": 1530,
|
|
"valid_targets_mean": 3199.7,
|
|
"valid_targets_min": 1028
|
|
},
|
|
{
|
|
"epoch": 2.5668896321070234,
|
|
"grad_norm": 0.708863656128023,
|
|
"learning_rate": 3.195852748966306e-05,
|
|
"loss": 0.3478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36112624406814575,
|
|
"step": 1535,
|
|
"valid_targets_mean": 3085.5,
|
|
"valid_targets_min": 588
|
|
},
|
|
{
|
|
"epoch": 2.5752508361204014,
|
|
"grad_norm": 0.7471046326039033,
|
|
"learning_rate": 3.189157617664151e-05,
|
|
"loss": 0.3075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28388604521751404,
|
|
"step": 1540,
|
|
"valid_targets_mean": 2699.7,
|
|
"valid_targets_min": 620
|
|
},
|
|
{
|
|
"epoch": 2.5836120401337794,
|
|
"grad_norm": 0.7471683542567666,
|
|
"learning_rate": 3.182441809372523e-05,
|
|
"loss": 0.317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33689889311790466,
|
|
"step": 1545,
|
|
"valid_targets_mean": 2698.6,
|
|
"valid_targets_min": 1551
|
|
},
|
|
{
|
|
"epoch": 2.591973244147157,
|
|
"grad_norm": 0.6750267862750765,
|
|
"learning_rate": 3.1757054408654266e-05,
|
|
"loss": 0.3833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2789178192615509,
|
|
"step": 1550,
|
|
"valid_targets_mean": 2798.8,
|
|
"valid_targets_min": 1573
|
|
},
|
|
{
|
|
"epoch": 2.600334448160535,
|
|
"grad_norm": 0.681930396951124,
|
|
"learning_rate": 3.168948629274367e-05,
|
|
"loss": 0.3283,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3054756820201874,
|
|
"step": 1555,
|
|
"valid_targets_mean": 2962.2,
|
|
"valid_targets_min": 1403
|
|
},
|
|
{
|
|
"epoch": 2.608695652173913,
|
|
"grad_norm": 0.6634403273333718,
|
|
"learning_rate": 3.1621714920863104e-05,
|
|
"loss": 0.3259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4073365032672882,
|
|
"step": 1560,
|
|
"valid_targets_mean": 4461.2,
|
|
"valid_targets_min": 1433
|
|
},
|
|
{
|
|
"epoch": 2.617056856187291,
|
|
"grad_norm": 0.8238005424494197,
|
|
"learning_rate": 3.155374147141646e-05,
|
|
"loss": 0.3604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38494405150413513,
|
|
"step": 1565,
|
|
"valid_targets_mean": 2640.8,
|
|
"valid_targets_min": 863
|
|
},
|
|
{
|
|
"epoch": 2.625418060200669,
|
|
"grad_norm": 0.7052867973145739,
|
|
"learning_rate": 3.1485567126321295e-05,
|
|
"loss": 0.3451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3390483260154724,
|
|
"step": 1570,
|
|
"valid_targets_mean": 3285.6,
|
|
"valid_targets_min": 1434
|
|
},
|
|
{
|
|
"epoch": 2.6337792642140467,
|
|
"grad_norm": 0.6953845280826801,
|
|
"learning_rate": 3.1417193070988383e-05,
|
|
"loss": 0.3313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3528243601322174,
|
|
"step": 1575,
|
|
"valid_targets_mean": 3543.2,
|
|
"valid_targets_min": 1601
|
|
},
|
|
{
|
|
"epoch": 2.6421404682274248,
|
|
"grad_norm": 0.7552847898076053,
|
|
"learning_rate": 3.134862049430099e-05,
|
|
"loss": 0.3392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39836952090263367,
|
|
"step": 1580,
|
|
"valid_targets_mean": 3433.9,
|
|
"valid_targets_min": 1805
|
|
},
|
|
{
|
|
"epoch": 2.650501672240803,
|
|
"grad_norm": 1.4277354366593005,
|
|
"learning_rate": 3.12798505885943e-05,
|
|
"loss": 0.36,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3085378408432007,
|
|
"step": 1585,
|
|
"valid_targets_mean": 3400.1,
|
|
"valid_targets_min": 1201
|
|
},
|
|
{
|
|
"epoch": 2.6588628762541804,
|
|
"grad_norm": 0.7410833217029822,
|
|
"learning_rate": 3.1210884549634624e-05,
|
|
"loss": 0.3107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28592583537101746,
|
|
"step": 1590,
|
|
"valid_targets_mean": 2576.4,
|
|
"valid_targets_min": 1460
|
|
},
|
|
{
|
|
"epoch": 2.6672240802675584,
|
|
"grad_norm": 0.8296406296042037,
|
|
"learning_rate": 3.114172357659861e-05,
|
|
"loss": 0.3542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.370746374130249,
|
|
"step": 1595,
|
|
"valid_targets_mean": 2589.8,
|
|
"valid_targets_min": 587
|
|
},
|
|
{
|
|
"epoch": 2.6755852842809364,
|
|
"grad_norm": 0.5928853152526612,
|
|
"learning_rate": 3.107236887205242e-05,
|
|
"loss": 0.3683,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33890628814697266,
|
|
"step": 1600,
|
|
"valid_targets_mean": 4613.9,
|
|
"valid_targets_min": 1582
|
|
},
|
|
{
|
|
"epoch": 2.6839464882943145,
|
|
"grad_norm": 0.7237916757375374,
|
|
"learning_rate": 3.1002821641930815e-05,
|
|
"loss": 0.3467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3008784055709839,
|
|
"step": 1605,
|
|
"valid_targets_mean": 2820.9,
|
|
"valid_targets_min": 1351
|
|
},
|
|
{
|
|
"epoch": 2.6923076923076925,
|
|
"grad_norm": 0.6958603576096208,
|
|
"learning_rate": 3.093308309551616e-05,
|
|
"loss": 0.3285,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26427051424980164,
|
|
"step": 1610,
|
|
"valid_targets_mean": 2401.6,
|
|
"valid_targets_min": 377
|
|
},
|
|
{
|
|
"epoch": 2.70066889632107,
|
|
"grad_norm": 0.7246194414411228,
|
|
"learning_rate": 3.0863154445417426e-05,
|
|
"loss": 0.3112,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31494617462158203,
|
|
"step": 1615,
|
|
"valid_targets_mean": 2992.2,
|
|
"valid_targets_min": 1254
|
|
},
|
|
{
|
|
"epoch": 2.709030100334448,
|
|
"grad_norm": 0.6683077048389021,
|
|
"learning_rate": 3.079303690754908e-05,
|
|
"loss": 0.3588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46505263447761536,
|
|
"step": 1620,
|
|
"valid_targets_mean": 4923.4,
|
|
"valid_targets_min": 1295
|
|
},
|
|
{
|
|
"epoch": 2.717391304347826,
|
|
"grad_norm": 0.6839193880655342,
|
|
"learning_rate": 3.072273170110998e-05,
|
|
"loss": 0.344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3480769693851471,
|
|
"step": 1625,
|
|
"valid_targets_mean": 3295.5,
|
|
"valid_targets_min": 698
|
|
},
|
|
{
|
|
"epoch": 2.7257525083612038,
|
|
"grad_norm": 0.6148350220041032,
|
|
"learning_rate": 3.0652240048562134e-05,
|
|
"loss": 0.3337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3841598629951477,
|
|
"step": 1630,
|
|
"valid_targets_mean": 4531.8,
|
|
"valid_targets_min": 2092
|
|
},
|
|
{
|
|
"epoch": 2.734113712374582,
|
|
"grad_norm": 0.7604959335941064,
|
|
"learning_rate": 3.058156317560945e-05,
|
|
"loss": 0.3097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3188338279724121,
|
|
"step": 1635,
|
|
"valid_targets_mean": 2672.3,
|
|
"valid_targets_min": 971
|
|
},
|
|
{
|
|
"epoch": 2.74247491638796,
|
|
"grad_norm": 0.7495694482080785,
|
|
"learning_rate": 3.0510702311176477e-05,
|
|
"loss": 0.3385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28397685289382935,
|
|
"step": 1640,
|
|
"valid_targets_mean": 2509.3,
|
|
"valid_targets_min": 1020
|
|
},
|
|
{
|
|
"epoch": 2.750836120401338,
|
|
"grad_norm": 0.5960363613154568,
|
|
"learning_rate": 3.043965868738695e-05,
|
|
"loss": 0.33,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26706498861312866,
|
|
"step": 1645,
|
|
"valid_targets_mean": 3699.4,
|
|
"valid_targets_min": 638
|
|
},
|
|
{
|
|
"epoch": 2.759197324414716,
|
|
"grad_norm": 0.6692357148144276,
|
|
"learning_rate": 3.0368433539542433e-05,
|
|
"loss": 0.3714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2700284719467163,
|
|
"step": 1650,
|
|
"valid_targets_mean": 3072.4,
|
|
"valid_targets_min": 1064
|
|
},
|
|
{
|
|
"epoch": 2.7675585284280935,
|
|
"grad_norm": 0.7677279753834415,
|
|
"learning_rate": 3.029702810610082e-05,
|
|
"loss": 0.3281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3318488895893097,
|
|
"step": 1655,
|
|
"valid_targets_mean": 2859.8,
|
|
"valid_targets_min": 714
|
|
},
|
|
{
|
|
"epoch": 2.7759197324414715,
|
|
"grad_norm": 0.7231622567261902,
|
|
"learning_rate": 3.0225443628654787e-05,
|
|
"loss": 0.3182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3072254955768585,
|
|
"step": 1660,
|
|
"valid_targets_mean": 3285.6,
|
|
"valid_targets_min": 1067
|
|
},
|
|
{
|
|
"epoch": 2.7842809364548495,
|
|
"grad_norm": 0.7351401519067079,
|
|
"learning_rate": 3.0153681351910226e-05,
|
|
"loss": 0.3046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3622933626174927,
|
|
"step": 1665,
|
|
"valid_targets_mean": 3139.9,
|
|
"valid_targets_min": 1919
|
|
},
|
|
{
|
|
"epoch": 2.7926421404682276,
|
|
"grad_norm": 0.7218998613996677,
|
|
"learning_rate": 3.0081742523664576e-05,
|
|
"loss": 0.305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2836865782737732,
|
|
"step": 1670,
|
|
"valid_targets_mean": 2875.6,
|
|
"valid_targets_min": 908
|
|
},
|
|
{
|
|
"epoch": 2.8010033444816056,
|
|
"grad_norm": 0.7498483662923942,
|
|
"learning_rate": 3.0009628394785158e-05,
|
|
"loss": 0.3279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2725030481815338,
|
|
"step": 1675,
|
|
"valid_targets_mean": 2490.9,
|
|
"valid_targets_min": 858
|
|
},
|
|
{
|
|
"epoch": 2.809364548494983,
|
|
"grad_norm": 0.7901928997172852,
|
|
"learning_rate": 2.9937340219187402e-05,
|
|
"loss": 0.3407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3297320008277893,
|
|
"step": 1680,
|
|
"valid_targets_mean": 2595.1,
|
|
"valid_targets_min": 1608
|
|
},
|
|
{
|
|
"epoch": 2.817725752508361,
|
|
"grad_norm": 0.6829285663175374,
|
|
"learning_rate": 2.986487925381304e-05,
|
|
"loss": 0.3098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28002995252609253,
|
|
"step": 1685,
|
|
"valid_targets_mean": 2900.0,
|
|
"valid_targets_min": 810
|
|
},
|
|
{
|
|
"epoch": 2.8260869565217392,
|
|
"grad_norm": 0.7868893655628152,
|
|
"learning_rate": 2.9792246758608283e-05,
|
|
"loss": 0.3628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3678167164325714,
|
|
"step": 1690,
|
|
"valid_targets_mean": 3206.1,
|
|
"valid_targets_min": 1573
|
|
},
|
|
{
|
|
"epoch": 2.834448160535117,
|
|
"grad_norm": 0.6961974009885317,
|
|
"learning_rate": 2.9719443996501858e-05,
|
|
"loss": 0.3441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3555162250995636,
|
|
"step": 1695,
|
|
"valid_targets_mean": 3276.0,
|
|
"valid_targets_min": 1491
|
|
},
|
|
{
|
|
"epoch": 2.842809364548495,
|
|
"grad_norm": 0.6616306549579406,
|
|
"learning_rate": 2.9646472233383118e-05,
|
|
"loss": 0.3263,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.307451993227005,
|
|
"step": 1700,
|
|
"valid_targets_mean": 3025.1,
|
|
"valid_targets_min": 508
|
|
},
|
|
{
|
|
"epoch": 2.851170568561873,
|
|
"grad_norm": 0.699054155600686,
|
|
"learning_rate": 2.9573332738079964e-05,
|
|
"loss": 0.3428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.325960248708725,
|
|
"step": 1705,
|
|
"valid_targets_mean": 3397.9,
|
|
"valid_targets_min": 553
|
|
},
|
|
{
|
|
"epoch": 2.859531772575251,
|
|
"grad_norm": 0.5618995876602438,
|
|
"learning_rate": 2.9500026782336828e-05,
|
|
"loss": 0.3321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3432704210281372,
|
|
"step": 1710,
|
|
"valid_targets_mean": 5129.1,
|
|
"valid_targets_min": 794
|
|
},
|
|
{
|
|
"epoch": 2.867892976588629,
|
|
"grad_norm": 0.7396609388739099,
|
|
"learning_rate": 2.942655564079254e-05,
|
|
"loss": 0.3364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30100107192993164,
|
|
"step": 1715,
|
|
"valid_targets_mean": 2780.6,
|
|
"valid_targets_min": 741
|
|
},
|
|
{
|
|
"epoch": 2.8762541806020065,
|
|
"grad_norm": 0.6882252256679291,
|
|
"learning_rate": 2.9352920590958173e-05,
|
|
"loss": 0.3636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3655456006526947,
|
|
"step": 1720,
|
|
"valid_targets_mean": 3660.6,
|
|
"valid_targets_min": 1486
|
|
},
|
|
{
|
|
"epoch": 2.8846153846153846,
|
|
"grad_norm": 0.6954294304011069,
|
|
"learning_rate": 2.927912291319482e-05,
|
|
"loss": 0.2649,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2898782789707184,
|
|
"step": 1725,
|
|
"valid_targets_mean": 2949.5,
|
|
"valid_targets_min": 1417
|
|
},
|
|
{
|
|
"epoch": 2.8929765886287626,
|
|
"grad_norm": 0.7685790812651909,
|
|
"learning_rate": 2.9205163890691338e-05,
|
|
"loss": 0.3633,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.351645827293396,
|
|
"step": 1730,
|
|
"valid_targets_mean": 3392.4,
|
|
"valid_targets_min": 1028
|
|
},
|
|
{
|
|
"epoch": 2.90133779264214,
|
|
"grad_norm": 0.6514952351452494,
|
|
"learning_rate": 2.9131044809442038e-05,
|
|
"loss": 0.3061,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27277517318725586,
|
|
"step": 1735,
|
|
"valid_targets_mean": 3230.4,
|
|
"valid_targets_min": 1120
|
|
},
|
|
{
|
|
"epoch": 2.9096989966555182,
|
|
"grad_norm": 0.6336596359886507,
|
|
"learning_rate": 2.9056766958224324e-05,
|
|
"loss": 0.3282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31338661909103394,
|
|
"step": 1740,
|
|
"valid_targets_mean": 3871.8,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 2.9180602006688963,
|
|
"grad_norm": 0.7074870527746576,
|
|
"learning_rate": 2.898233162857627e-05,
|
|
"loss": 0.3159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31244152784347534,
|
|
"step": 1745,
|
|
"valid_targets_mean": 3252.4,
|
|
"valid_targets_min": 1672
|
|
},
|
|
{
|
|
"epoch": 2.9264214046822743,
|
|
"grad_norm": 0.7317476390005567,
|
|
"learning_rate": 2.8907740114774185e-05,
|
|
"loss": 0.3326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3308480381965637,
|
|
"step": 1750,
|
|
"valid_targets_mean": 2940.5,
|
|
"valid_targets_min": 971
|
|
},
|
|
{
|
|
"epoch": 2.9347826086956523,
|
|
"grad_norm": 0.669207925359114,
|
|
"learning_rate": 2.8832993713810095e-05,
|
|
"loss": 0.3291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34635135531425476,
|
|
"step": 1755,
|
|
"valid_targets_mean": 3485.1,
|
|
"valid_targets_min": 772
|
|
},
|
|
{
|
|
"epoch": 2.94314381270903,
|
|
"grad_norm": 0.7081348378617047,
|
|
"learning_rate": 2.8758093725369193e-05,
|
|
"loss": 0.3508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41460931301116943,
|
|
"step": 1760,
|
|
"valid_targets_mean": 3348.2,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 2.951505016722408,
|
|
"grad_norm": 0.6563436808159772,
|
|
"learning_rate": 2.868304145180722e-05,
|
|
"loss": 0.363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43143677711486816,
|
|
"step": 1765,
|
|
"valid_targets_mean": 5450.4,
|
|
"valid_targets_min": 1058
|
|
},
|
|
{
|
|
"epoch": 2.959866220735786,
|
|
"grad_norm": 1.0813836021947982,
|
|
"learning_rate": 2.8607838198127886e-05,
|
|
"loss": 0.3024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3358667194843292,
|
|
"step": 1770,
|
|
"valid_targets_mean": 2942.9,
|
|
"valid_targets_min": 556
|
|
},
|
|
{
|
|
"epoch": 2.968227424749164,
|
|
"grad_norm": 0.6535458920771104,
|
|
"learning_rate": 2.8532485271960088e-05,
|
|
"loss": 0.3272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30923616886138916,
|
|
"step": 1775,
|
|
"valid_targets_mean": 3134.3,
|
|
"valid_targets_min": 1443
|
|
},
|
|
{
|
|
"epoch": 2.976588628762542,
|
|
"grad_norm": 0.7051449951363025,
|
|
"learning_rate": 2.8456983983535243e-05,
|
|
"loss": 0.3081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3197246789932251,
|
|
"step": 1780,
|
|
"valid_targets_mean": 3088.7,
|
|
"valid_targets_min": 722
|
|
},
|
|
{
|
|
"epoch": 2.9849498327759196,
|
|
"grad_norm": 0.7514550832469282,
|
|
"learning_rate": 2.838133564566447e-05,
|
|
"loss": 0.3221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33178478479385376,
|
|
"step": 1785,
|
|
"valid_targets_mean": 2903.9,
|
|
"valid_targets_min": 1051
|
|
},
|
|
{
|
|
"epoch": 2.9933110367892977,
|
|
"grad_norm": 0.7487764007212119,
|
|
"learning_rate": 2.8305541573715775e-05,
|
|
"loss": 0.355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32397782802581787,
|
|
"step": 1790,
|
|
"valid_targets_mean": 2655.2,
|
|
"valid_targets_min": 364
|
|
},
|
|
{
|
|
"epoch": 3.0016722408026757,
|
|
"grad_norm": 0.697866012772672,
|
|
"learning_rate": 2.8229603085591178e-05,
|
|
"loss": 0.3045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24206683039665222,
|
|
"step": 1795,
|
|
"valid_targets_mean": 2628.6,
|
|
"valid_targets_min": 1232
|
|
},
|
|
{
|
|
"epoch": 3.0100334448160537,
|
|
"grad_norm": 0.7292123243657525,
|
|
"learning_rate": 2.8153521501703803e-05,
|
|
"loss": 0.3032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32684776186943054,
|
|
"step": 1800,
|
|
"valid_targets_mean": 3399.4,
|
|
"valid_targets_min": 1615
|
|
},
|
|
{
|
|
"epoch": 3.0183946488294313,
|
|
"grad_norm": 0.7473811261850504,
|
|
"learning_rate": 2.8077298144954904e-05,
|
|
"loss": 0.2602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2628287672996521,
|
|
"step": 1805,
|
|
"valid_targets_mean": 2887.2,
|
|
"valid_targets_min": 807
|
|
},
|
|
{
|
|
"epoch": 3.0267558528428093,
|
|
"grad_norm": 0.7650083502129927,
|
|
"learning_rate": 2.8000934340710883e-05,
|
|
"loss": 0.316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30963340401649475,
|
|
"step": 1810,
|
|
"valid_targets_mean": 3063.6,
|
|
"valid_targets_min": 1264
|
|
},
|
|
{
|
|
"epoch": 3.0351170568561874,
|
|
"grad_norm": 0.7397530221539242,
|
|
"learning_rate": 2.792443141678022e-05,
|
|
"loss": 0.304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27429091930389404,
|
|
"step": 1815,
|
|
"valid_targets_mean": 2718.4,
|
|
"valid_targets_min": 757
|
|
},
|
|
{
|
|
"epoch": 3.0434782608695654,
|
|
"grad_norm": 0.889361629882369,
|
|
"learning_rate": 2.784779070339041e-05,
|
|
"loss": 0.316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3256457448005676,
|
|
"step": 1820,
|
|
"valid_targets_mean": 2798.4,
|
|
"valid_targets_min": 470
|
|
},
|
|
{
|
|
"epoch": 3.051839464882943,
|
|
"grad_norm": 0.6809205141917194,
|
|
"learning_rate": 2.7771013533164805e-05,
|
|
"loss": 0.3188,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3276858329772949,
|
|
"step": 1825,
|
|
"valid_targets_mean": 3351.5,
|
|
"valid_targets_min": 399
|
|
},
|
|
{
|
|
"epoch": 3.060200668896321,
|
|
"grad_norm": 0.6744548172485136,
|
|
"learning_rate": 2.7694101241099484e-05,
|
|
"loss": 0.3659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4031035006046295,
|
|
"step": 1830,
|
|
"valid_targets_mean": 4916.2,
|
|
"valid_targets_min": 772
|
|
},
|
|
{
|
|
"epoch": 3.068561872909699,
|
|
"grad_norm": 0.7582120508573842,
|
|
"learning_rate": 2.7617055164539993e-05,
|
|
"loss": 0.3349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31827855110168457,
|
|
"step": 1835,
|
|
"valid_targets_mean": 2926.5,
|
|
"valid_targets_min": 2041
|
|
},
|
|
{
|
|
"epoch": 3.076923076923077,
|
|
"grad_norm": 0.8315396768017139,
|
|
"learning_rate": 2.753987664315813e-05,
|
|
"loss": 0.3021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35661745071411133,
|
|
"step": 1840,
|
|
"valid_targets_mean": 2858.8,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 3.0852842809364547,
|
|
"grad_norm": 0.7155190185107607,
|
|
"learning_rate": 2.746256701892861e-05,
|
|
"loss": 0.3741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3801868259906769,
|
|
"step": 1845,
|
|
"valid_targets_mean": 4064.2,
|
|
"valid_targets_min": 1411
|
|
},
|
|
{
|
|
"epoch": 3.0936454849498327,
|
|
"grad_norm": 0.6741071495612008,
|
|
"learning_rate": 2.738512763610579e-05,
|
|
"loss": 0.308,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37737980484962463,
|
|
"step": 1850,
|
|
"valid_targets_mean": 4971.6,
|
|
"valid_targets_min": 1956
|
|
},
|
|
{
|
|
"epoch": 3.1020066889632107,
|
|
"grad_norm": 0.7590694014485702,
|
|
"learning_rate": 2.7307559841200238e-05,
|
|
"loss": 0.3291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3324778079986572,
|
|
"step": 1855,
|
|
"valid_targets_mean": 2946.1,
|
|
"valid_targets_min": 1303
|
|
},
|
|
{
|
|
"epoch": 3.1103678929765888,
|
|
"grad_norm": 0.7215558419615479,
|
|
"learning_rate": 2.7229864982955328e-05,
|
|
"loss": 0.3186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3469077944755554,
|
|
"step": 1860,
|
|
"valid_targets_mean": 3110.1,
|
|
"valid_targets_min": 1665
|
|
},
|
|
{
|
|
"epoch": 3.1187290969899664,
|
|
"grad_norm": 0.6897412184844943,
|
|
"learning_rate": 2.7152044412323842e-05,
|
|
"loss": 0.3075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28832149505615234,
|
|
"step": 1865,
|
|
"valid_targets_mean": 3266.5,
|
|
"valid_targets_min": 1272
|
|
},
|
|
{
|
|
"epoch": 3.1270903010033444,
|
|
"grad_norm": 0.8950983106159963,
|
|
"learning_rate": 2.7074099482444406e-05,
|
|
"loss": 0.3128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3346301317214966,
|
|
"step": 1870,
|
|
"valid_targets_mean": 2962.5,
|
|
"valid_targets_min": 794
|
|
},
|
|
{
|
|
"epoch": 3.1354515050167224,
|
|
"grad_norm": 0.6829362728624939,
|
|
"learning_rate": 2.699603154861801e-05,
|
|
"loss": 0.3217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2530469298362732,
|
|
"step": 1875,
|
|
"valid_targets_mean": 3057.2,
|
|
"valid_targets_min": 1148
|
|
},
|
|
{
|
|
"epoch": 3.1438127090301005,
|
|
"grad_norm": 0.7233648289737619,
|
|
"learning_rate": 2.6917841968284433e-05,
|
|
"loss": 0.3161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3072529733181,
|
|
"step": 1880,
|
|
"valid_targets_mean": 2967.2,
|
|
"valid_targets_min": 1431
|
|
},
|
|
{
|
|
"epoch": 3.1521739130434785,
|
|
"grad_norm": 0.7416457175298982,
|
|
"learning_rate": 2.6839532100998623e-05,
|
|
"loss": 0.3185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3872751295566559,
|
|
"step": 1885,
|
|
"valid_targets_mean": 3862.7,
|
|
"valid_targets_min": 1334
|
|
},
|
|
{
|
|
"epoch": 3.160535117056856,
|
|
"grad_norm": 0.7812518570866998,
|
|
"learning_rate": 2.6761103308407076e-05,
|
|
"loss": 0.3277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25746119022369385,
|
|
"step": 1890,
|
|
"valid_targets_mean": 2487.5,
|
|
"valid_targets_min": 844
|
|
},
|
|
{
|
|
"epoch": 3.168896321070234,
|
|
"grad_norm": 0.7436986096190392,
|
|
"learning_rate": 2.668255695422415e-05,
|
|
"loss": 0.3709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37360888719558716,
|
|
"step": 1895,
|
|
"valid_targets_mean": 3813.4,
|
|
"valid_targets_min": 1554
|
|
},
|
|
{
|
|
"epoch": 3.177257525083612,
|
|
"grad_norm": 0.7142319442876787,
|
|
"learning_rate": 2.660389440420836e-05,
|
|
"loss": 0.3068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.272394061088562,
|
|
"step": 1900,
|
|
"valid_targets_mean": 2995.9,
|
|
"valid_targets_min": 1008
|
|
},
|
|
{
|
|
"epoch": 3.1856187290969897,
|
|
"grad_norm": 0.7424793152458009,
|
|
"learning_rate": 2.6525117026138614e-05,
|
|
"loss": 0.3392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34922927618026733,
|
|
"step": 1905,
|
|
"valid_targets_mean": 3819.7,
|
|
"valid_targets_min": 1601
|
|
},
|
|
{
|
|
"epoch": 3.1939799331103678,
|
|
"grad_norm": 0.7607562321366768,
|
|
"learning_rate": 2.644622618979047e-05,
|
|
"loss": 0.2896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24981051683425903,
|
|
"step": 1910,
|
|
"valid_targets_mean": 2548.7,
|
|
"valid_targets_min": 923
|
|
},
|
|
{
|
|
"epoch": 3.202341137123746,
|
|
"grad_norm": 0.6904862318660174,
|
|
"learning_rate": 2.6367223266912252e-05,
|
|
"loss": 0.3317,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37764304876327515,
|
|
"step": 1915,
|
|
"valid_targets_mean": 3910.4,
|
|
"valid_targets_min": 1349
|
|
},
|
|
{
|
|
"epoch": 3.210702341137124,
|
|
"grad_norm": 0.8961099343679048,
|
|
"learning_rate": 2.6288109631201266e-05,
|
|
"loss": 0.3337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3819909393787384,
|
|
"step": 1920,
|
|
"valid_targets_mean": 2620.2,
|
|
"valid_targets_min": 942
|
|
},
|
|
{
|
|
"epoch": 3.219063545150502,
|
|
"grad_norm": 0.7566386982431123,
|
|
"learning_rate": 2.6208886658279875e-05,
|
|
"loss": 0.3137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3507644534111023,
|
|
"step": 1925,
|
|
"valid_targets_mean": 3339.7,
|
|
"valid_targets_min": 1683
|
|
},
|
|
{
|
|
"epoch": 3.2274247491638794,
|
|
"grad_norm": 0.8144566736238488,
|
|
"learning_rate": 2.6129555725671586e-05,
|
|
"loss": 0.3041,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3747752010822296,
|
|
"step": 1930,
|
|
"valid_targets_mean": 2877.8,
|
|
"valid_targets_min": 1324
|
|
},
|
|
{
|
|
"epoch": 3.2357859531772575,
|
|
"grad_norm": 0.8094873874087346,
|
|
"learning_rate": 2.605011821277712e-05,
|
|
"loss": 0.306,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3171420693397522,
|
|
"step": 1935,
|
|
"valid_targets_mean": 2959.7,
|
|
"valid_targets_min": 783
|
|
},
|
|
{
|
|
"epoch": 3.2441471571906355,
|
|
"grad_norm": 0.8547392925559153,
|
|
"learning_rate": 2.597057550085037e-05,
|
|
"loss": 0.3001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27529996633529663,
|
|
"step": 1940,
|
|
"valid_targets_mean": 2264.8,
|
|
"valid_targets_min": 971
|
|
},
|
|
{
|
|
"epoch": 3.2525083612040135,
|
|
"grad_norm": 0.6817740682466431,
|
|
"learning_rate": 2.589092897297447e-05,
|
|
"loss": 0.3122,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33298245072364807,
|
|
"step": 1945,
|
|
"valid_targets_mean": 3641.5,
|
|
"valid_targets_min": 2028
|
|
},
|
|
{
|
|
"epoch": 3.260869565217391,
|
|
"grad_norm": 0.7178992056677376,
|
|
"learning_rate": 2.581118001403767e-05,
|
|
"loss": 0.3346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.298176109790802,
|
|
"step": 1950,
|
|
"valid_targets_mean": 2812.1,
|
|
"valid_targets_min": 1666
|
|
},
|
|
{
|
|
"epoch": 3.269230769230769,
|
|
"grad_norm": 0.821395268384391,
|
|
"learning_rate": 2.573133001070928e-05,
|
|
"loss": 0.3032,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32294681668281555,
|
|
"step": 1955,
|
|
"valid_targets_mean": 2872.3,
|
|
"valid_targets_min": 1526
|
|
},
|
|
{
|
|
"epoch": 3.277591973244147,
|
|
"grad_norm": 0.7018121583116861,
|
|
"learning_rate": 2.565138035141558e-05,
|
|
"loss": 0.3199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32964542508125305,
|
|
"step": 1960,
|
|
"valid_targets_mean": 3175.1,
|
|
"valid_targets_min": 1682
|
|
},
|
|
{
|
|
"epoch": 3.2859531772575252,
|
|
"grad_norm": 1.1803702361445025,
|
|
"learning_rate": 2.557133242631565e-05,
|
|
"loss": 0.3063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3171846866607666,
|
|
"step": 1965,
|
|
"valid_targets_mean": 3355.6,
|
|
"valid_targets_min": 467
|
|
},
|
|
{
|
|
"epoch": 3.294314381270903,
|
|
"grad_norm": 0.6610850683002595,
|
|
"learning_rate": 2.549118762727721e-05,
|
|
"loss": 0.3053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24194839596748352,
|
|
"step": 1970,
|
|
"valid_targets_mean": 3080.6,
|
|
"valid_targets_min": 1573
|
|
},
|
|
{
|
|
"epoch": 3.302675585284281,
|
|
"grad_norm": 0.7255555656749366,
|
|
"learning_rate": 2.5410947347852436e-05,
|
|
"loss": 0.3355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4482927620410919,
|
|
"step": 1975,
|
|
"valid_targets_mean": 4331.0,
|
|
"valid_targets_min": 1168
|
|
},
|
|
{
|
|
"epoch": 3.311036789297659,
|
|
"grad_norm": 0.7459931964816805,
|
|
"learning_rate": 2.5330612983253667e-05,
|
|
"loss": 0.3369,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28639882802963257,
|
|
"step": 1980,
|
|
"valid_targets_mean": 3048.4,
|
|
"valid_targets_min": 1599
|
|
},
|
|
{
|
|
"epoch": 3.319397993311037,
|
|
"grad_norm": 0.7132568663511498,
|
|
"learning_rate": 2.5250185930329235e-05,
|
|
"loss": 0.3194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3665810227394104,
|
|
"step": 1985,
|
|
"valid_targets_mean": 3538.0,
|
|
"valid_targets_min": 1274
|
|
},
|
|
{
|
|
"epoch": 3.327759197324415,
|
|
"grad_norm": 0.8008208379218488,
|
|
"learning_rate": 2.5169667587539105e-05,
|
|
"loss": 0.3226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30878740549087524,
|
|
"step": 1990,
|
|
"valid_targets_mean": 2999.9,
|
|
"valid_targets_min": 553
|
|
},
|
|
{
|
|
"epoch": 3.3361204013377925,
|
|
"grad_norm": 0.7114926198475543,
|
|
"learning_rate": 2.5089059354930584e-05,
|
|
"loss": 0.3253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3151680827140808,
|
|
"step": 1995,
|
|
"valid_targets_mean": 3198.5,
|
|
"valid_targets_min": 1876
|
|
},
|
|
{
|
|
"epoch": 3.3444816053511706,
|
|
"grad_norm": 0.6910899592340489,
|
|
"learning_rate": 2.5008362634113986e-05,
|
|
"loss": 0.3158,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27248960733413696,
|
|
"step": 2000,
|
|
"valid_targets_mean": 3219.5,
|
|
"valid_targets_min": 1548
|
|
},
|
|
{
|
|
"epoch": 3.3528428093645486,
|
|
"grad_norm": 0.9194983641690248,
|
|
"learning_rate": 2.4927578828238253e-05,
|
|
"loss": 0.2827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30196303129196167,
|
|
"step": 2005,
|
|
"valid_targets_mean": 2675.1,
|
|
"valid_targets_min": 1560
|
|
},
|
|
{
|
|
"epoch": 3.361204013377926,
|
|
"grad_norm": 0.7413625053509546,
|
|
"learning_rate": 2.484670934196654e-05,
|
|
"loss": 0.3028,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2616788148880005,
|
|
"step": 2010,
|
|
"valid_targets_mean": 2792.3,
|
|
"valid_targets_min": 1188
|
|
},
|
|
{
|
|
"epoch": 3.369565217391304,
|
|
"grad_norm": 0.7183291854874223,
|
|
"learning_rate": 2.476575558145183e-05,
|
|
"loss": 0.3222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3487267792224884,
|
|
"step": 2015,
|
|
"valid_targets_mean": 3637.6,
|
|
"valid_targets_min": 1018
|
|
},
|
|
{
|
|
"epoch": 3.3779264214046822,
|
|
"grad_norm": 0.7490283521521003,
|
|
"learning_rate": 2.468471895431243e-05,
|
|
"loss": 0.3021,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.325644314289093,
|
|
"step": 2020,
|
|
"valid_targets_mean": 3182.8,
|
|
"valid_targets_min": 1285
|
|
},
|
|
{
|
|
"epoch": 3.3862876254180603,
|
|
"grad_norm": 0.7101402332669482,
|
|
"learning_rate": 2.4603600869607564e-05,
|
|
"loss": 0.3267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2660444378852844,
|
|
"step": 2025,
|
|
"valid_targets_mean": 2812.0,
|
|
"valid_targets_min": 1258
|
|
},
|
|
{
|
|
"epoch": 3.3946488294314383,
|
|
"grad_norm": 0.730576453198831,
|
|
"learning_rate": 2.452240273781281e-05,
|
|
"loss": 0.3016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26417773962020874,
|
|
"step": 2030,
|
|
"valid_targets_mean": 3180.6,
|
|
"valid_targets_min": 729
|
|
},
|
|
{
|
|
"epoch": 3.403010033444816,
|
|
"grad_norm": 0.7181814679922088,
|
|
"learning_rate": 2.444112597079558e-05,
|
|
"loss": 0.3162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2812889814376831,
|
|
"step": 2035,
|
|
"valid_targets_mean": 3133.4,
|
|
"valid_targets_min": 1126
|
|
},
|
|
{
|
|
"epoch": 3.411371237458194,
|
|
"grad_norm": 0.7924112516136512,
|
|
"learning_rate": 2.435977198179065e-05,
|
|
"loss": 0.3287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27810171246528625,
|
|
"step": 2040,
|
|
"valid_targets_mean": 2717.6,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 3.419732441471572,
|
|
"grad_norm": 0.707141761893294,
|
|
"learning_rate": 2.4278342185375467e-05,
|
|
"loss": 0.3362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31040269136428833,
|
|
"step": 2045,
|
|
"valid_targets_mean": 3125.6,
|
|
"valid_targets_min": 1931
|
|
},
|
|
{
|
|
"epoch": 3.42809364548495,
|
|
"grad_norm": 0.7561619308472627,
|
|
"learning_rate": 2.4196837997445636e-05,
|
|
"loss": 0.3243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3123628795146942,
|
|
"step": 2050,
|
|
"valid_targets_mean": 3224.0,
|
|
"valid_targets_min": 1631
|
|
},
|
|
{
|
|
"epoch": 3.4364548494983276,
|
|
"grad_norm": 0.7289170075667627,
|
|
"learning_rate": 2.4115260835190285e-05,
|
|
"loss": 0.2989,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2535760998725891,
|
|
"step": 2055,
|
|
"valid_targets_mean": 3353.6,
|
|
"valid_targets_min": 1805
|
|
},
|
|
{
|
|
"epoch": 3.4448160535117056,
|
|
"grad_norm": 0.945847336384475,
|
|
"learning_rate": 2.4033612117067396e-05,
|
|
"loss": 0.2888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2784852981567383,
|
|
"step": 2060,
|
|
"valid_targets_mean": 3911.6,
|
|
"valid_targets_min": 1064
|
|
},
|
|
{
|
|
"epoch": 3.4531772575250836,
|
|
"grad_norm": 0.7817559218890252,
|
|
"learning_rate": 2.395189326277918e-05,
|
|
"loss": 0.2968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3658587336540222,
|
|
"step": 2065,
|
|
"valid_targets_mean": 3038.2,
|
|
"valid_targets_min": 1011
|
|
},
|
|
{
|
|
"epoch": 3.4615384615384617,
|
|
"grad_norm": 0.7908249845730267,
|
|
"learning_rate": 2.3870105693247347e-05,
|
|
"loss": 0.3314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40112197399139404,
|
|
"step": 2070,
|
|
"valid_targets_mean": 3198.5,
|
|
"valid_targets_min": 707
|
|
},
|
|
{
|
|
"epoch": 3.4698996655518393,
|
|
"grad_norm": 0.7869243206602189,
|
|
"learning_rate": 2.3788250830588437e-05,
|
|
"loss": 0.3191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2704690992832184,
|
|
"step": 2075,
|
|
"valid_targets_mean": 2424.8,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 3.4782608695652173,
|
|
"grad_norm": 0.7080463514373667,
|
|
"learning_rate": 2.3706330098089077e-05,
|
|
"loss": 0.314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3315350413322449,
|
|
"step": 2080,
|
|
"valid_targets_mean": 4324.7,
|
|
"valid_targets_min": 1855
|
|
},
|
|
{
|
|
"epoch": 3.4866220735785953,
|
|
"grad_norm": 0.8218808851621214,
|
|
"learning_rate": 2.3624344920181243e-05,
|
|
"loss": 0.2889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3174235224723816,
|
|
"step": 2085,
|
|
"valid_targets_mean": 2845.2,
|
|
"valid_targets_min": 1569
|
|
},
|
|
{
|
|
"epoch": 3.4949832775919734,
|
|
"grad_norm": 0.7054886370767576,
|
|
"learning_rate": 2.3542296722417452e-05,
|
|
"loss": 0.3309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3576347529888153,
|
|
"step": 2090,
|
|
"valid_targets_mean": 3957.9,
|
|
"valid_targets_min": 1892
|
|
},
|
|
{
|
|
"epoch": 3.5033444816053514,
|
|
"grad_norm": 0.7737550873720773,
|
|
"learning_rate": 2.346018693144605e-05,
|
|
"loss": 0.3147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3562350869178772,
|
|
"step": 2095,
|
|
"valid_targets_mean": 3385.3,
|
|
"valid_targets_min": 1565
|
|
},
|
|
{
|
|
"epoch": 3.511705685618729,
|
|
"grad_norm": 0.793659888489234,
|
|
"learning_rate": 2.3378016974986326e-05,
|
|
"loss": 0.3287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3222711384296417,
|
|
"step": 2100,
|
|
"valid_targets_mean": 2910.6,
|
|
"valid_targets_min": 1569
|
|
},
|
|
{
|
|
"epoch": 3.520066889632107,
|
|
"grad_norm": 0.8708230054218504,
|
|
"learning_rate": 2.3295788281803733e-05,
|
|
"loss": 0.3126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33255574107170105,
|
|
"step": 2105,
|
|
"valid_targets_mean": 3600.3,
|
|
"valid_targets_min": 1512
|
|
},
|
|
{
|
|
"epoch": 3.528428093645485,
|
|
"grad_norm": 0.7022641595914034,
|
|
"learning_rate": 2.321350228168505e-05,
|
|
"loss": 0.2912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3107645511627197,
|
|
"step": 2110,
|
|
"valid_targets_mean": 3758.8,
|
|
"valid_targets_min": 1562
|
|
},
|
|
{
|
|
"epoch": 3.5367892976588626,
|
|
"grad_norm": 0.8652735159983888,
|
|
"learning_rate": 2.3131160405413472e-05,
|
|
"loss": 0.2842,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26982659101486206,
|
|
"step": 2115,
|
|
"valid_targets_mean": 2795.1,
|
|
"valid_targets_min": 1208
|
|
},
|
|
{
|
|
"epoch": 3.5451505016722407,
|
|
"grad_norm": 0.656569555739008,
|
|
"learning_rate": 2.30487640847438e-05,
|
|
"loss": 0.3267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3019533157348633,
|
|
"step": 2120,
|
|
"valid_targets_mean": 3843.1,
|
|
"valid_targets_min": 1730
|
|
},
|
|
{
|
|
"epoch": 3.5535117056856187,
|
|
"grad_norm": 0.721412813412654,
|
|
"learning_rate": 2.296631475237749e-05,
|
|
"loss": 0.3217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.326846718788147,
|
|
"step": 2125,
|
|
"valid_targets_mean": 3144.4,
|
|
"valid_targets_min": 963
|
|
},
|
|
{
|
|
"epoch": 3.5618729096989967,
|
|
"grad_norm": 0.8698536209082071,
|
|
"learning_rate": 2.2883813841937754e-05,
|
|
"loss": 0.3396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32312431931495667,
|
|
"step": 2130,
|
|
"valid_targets_mean": 2314.3,
|
|
"valid_targets_min": 467
|
|
},
|
|
{
|
|
"epoch": 3.5702341137123748,
|
|
"grad_norm": 0.8109874750171677,
|
|
"learning_rate": 2.2801262787944668e-05,
|
|
"loss": 0.3159,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.298412948846817,
|
|
"step": 2135,
|
|
"valid_targets_mean": 3258.5,
|
|
"valid_targets_min": 1480
|
|
},
|
|
{
|
|
"epoch": 3.5785953177257523,
|
|
"grad_norm": 0.8162989082018804,
|
|
"learning_rate": 2.2718663025790183e-05,
|
|
"loss": 0.3138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2839236259460449,
|
|
"step": 2140,
|
|
"valid_targets_mean": 2840.1,
|
|
"valid_targets_min": 1006
|
|
},
|
|
{
|
|
"epoch": 3.5869565217391304,
|
|
"grad_norm": 0.8038116768717268,
|
|
"learning_rate": 2.2636015991713167e-05,
|
|
"loss": 0.3237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3331753611564636,
|
|
"step": 2145,
|
|
"valid_targets_mean": 2935.8,
|
|
"valid_targets_min": 1186
|
|
},
|
|
{
|
|
"epoch": 3.5953177257525084,
|
|
"grad_norm": 0.7739334985662348,
|
|
"learning_rate": 2.2553323122774487e-05,
|
|
"loss": 0.2988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3323972821235657,
|
|
"step": 2150,
|
|
"valid_targets_mean": 3164.0,
|
|
"valid_targets_min": 527
|
|
},
|
|
{
|
|
"epoch": 3.6036789297658864,
|
|
"grad_norm": 0.7740161292149046,
|
|
"learning_rate": 2.2470585856831953e-05,
|
|
"loss": 0.2824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25347644090652466,
|
|
"step": 2155,
|
|
"valid_targets_mean": 2834.1,
|
|
"valid_targets_min": 1341
|
|
},
|
|
{
|
|
"epoch": 3.6120401337792645,
|
|
"grad_norm": 0.6771441745861378,
|
|
"learning_rate": 2.2387805632515365e-05,
|
|
"loss": 0.3432,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31761401891708374,
|
|
"step": 2160,
|
|
"valid_targets_mean": 4497.9,
|
|
"valid_targets_min": 699
|
|
},
|
|
{
|
|
"epoch": 3.620401337792642,
|
|
"grad_norm": 0.7521566633081039,
|
|
"learning_rate": 2.2304983889201467e-05,
|
|
"loss": 0.3186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3334948420524597,
|
|
"step": 2165,
|
|
"valid_targets_mean": 3627.6,
|
|
"valid_targets_min": 1882
|
|
},
|
|
{
|
|
"epoch": 3.62876254180602,
|
|
"grad_norm": 0.7031338023674866,
|
|
"learning_rate": 2.222212206698894e-05,
|
|
"loss": 0.3302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24446901679039001,
|
|
"step": 2170,
|
|
"valid_targets_mean": 2985.4,
|
|
"valid_targets_min": 620
|
|
},
|
|
{
|
|
"epoch": 3.637123745819398,
|
|
"grad_norm": 0.6866912372395072,
|
|
"learning_rate": 2.2139221606673353e-05,
|
|
"loss": 0.3618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3053354024887085,
|
|
"step": 2175,
|
|
"valid_targets_mean": 3322.4,
|
|
"valid_targets_min": 1844
|
|
},
|
|
{
|
|
"epoch": 3.6454849498327757,
|
|
"grad_norm": 0.7043379955622305,
|
|
"learning_rate": 2.2056283949722114e-05,
|
|
"loss": 0.3045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29806143045425415,
|
|
"step": 2180,
|
|
"valid_targets_mean": 3245.2,
|
|
"valid_targets_min": 1284
|
|
},
|
|
{
|
|
"epoch": 3.6538461538461537,
|
|
"grad_norm": 0.7223387295053122,
|
|
"learning_rate": 2.197331053824939e-05,
|
|
"loss": 0.2912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2715410888195038,
|
|
"step": 2185,
|
|
"valid_targets_mean": 3403.3,
|
|
"valid_targets_min": 1091
|
|
},
|
|
{
|
|
"epoch": 3.6622073578595318,
|
|
"grad_norm": 0.7923524210124755,
|
|
"learning_rate": 2.1890302814991075e-05,
|
|
"loss": 0.293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27055805921554565,
|
|
"step": 2190,
|
|
"valid_targets_mean": 2505.2,
|
|
"valid_targets_min": 808
|
|
},
|
|
{
|
|
"epoch": 3.67056856187291,
|
|
"grad_norm": 0.9333927510801652,
|
|
"learning_rate": 2.1807262223279633e-05,
|
|
"loss": 0.2979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33472126722335815,
|
|
"step": 2195,
|
|
"valid_targets_mean": 2667.6,
|
|
"valid_targets_min": 1058
|
|
},
|
|
{
|
|
"epoch": 3.678929765886288,
|
|
"grad_norm": 0.7418078632890535,
|
|
"learning_rate": 2.172419020701907e-05,
|
|
"loss": 0.3113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2958923280239105,
|
|
"step": 2200,
|
|
"valid_targets_mean": 3098.1,
|
|
"valid_targets_min": 663
|
|
},
|
|
{
|
|
"epoch": 3.6872909698996654,
|
|
"grad_norm": 0.8027987445224076,
|
|
"learning_rate": 2.1641088210659804e-05,
|
|
"loss": 0.3027,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.294519305229187,
|
|
"step": 2205,
|
|
"valid_targets_mean": 2970.6,
|
|
"valid_targets_min": 594
|
|
},
|
|
{
|
|
"epoch": 3.6956521739130435,
|
|
"grad_norm": 0.8206281052811005,
|
|
"learning_rate": 2.155795767917352e-05,
|
|
"loss": 0.2961,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31719639897346497,
|
|
"step": 2210,
|
|
"valid_targets_mean": 3145.6,
|
|
"valid_targets_min": 1123
|
|
},
|
|
{
|
|
"epoch": 3.7040133779264215,
|
|
"grad_norm": 0.7960144879970568,
|
|
"learning_rate": 2.14748000580281e-05,
|
|
"loss": 0.3162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3015314042568207,
|
|
"step": 2215,
|
|
"valid_targets_mean": 2424.9,
|
|
"valid_targets_min": 1020
|
|
},
|
|
{
|
|
"epoch": 3.712374581939799,
|
|
"grad_norm": 0.7242043132987579,
|
|
"learning_rate": 2.1391616793162435e-05,
|
|
"loss": 0.319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29711389541625977,
|
|
"step": 2220,
|
|
"valid_targets_mean": 3244.1,
|
|
"valid_targets_min": 599
|
|
},
|
|
{
|
|
"epoch": 3.720735785953177,
|
|
"grad_norm": 0.8514530826111597,
|
|
"learning_rate": 2.1308409330961308e-05,
|
|
"loss": 0.309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3484026789665222,
|
|
"step": 2225,
|
|
"valid_targets_mean": 3235.6,
|
|
"valid_targets_min": 509
|
|
},
|
|
{
|
|
"epoch": 3.729096989966555,
|
|
"grad_norm": 0.7964054451384952,
|
|
"learning_rate": 2.122517911823027e-05,
|
|
"loss": 0.3255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2610142230987549,
|
|
"step": 2230,
|
|
"valid_targets_mean": 2320.4,
|
|
"valid_targets_min": 779
|
|
},
|
|
{
|
|
"epoch": 3.737458193979933,
|
|
"grad_norm": 0.8055361034248192,
|
|
"learning_rate": 2.114192760217042e-05,
|
|
"loss": 0.2997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3181573450565338,
|
|
"step": 2235,
|
|
"valid_targets_mean": 3128.3,
|
|
"valid_targets_min": 799
|
|
},
|
|
{
|
|
"epoch": 3.745819397993311,
|
|
"grad_norm": 0.7412318158314767,
|
|
"learning_rate": 2.10586562303533e-05,
|
|
"loss": 0.2753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2673335671424866,
|
|
"step": 2240,
|
|
"valid_targets_mean": 2812.3,
|
|
"valid_targets_min": 511
|
|
},
|
|
{
|
|
"epoch": 3.754180602006689,
|
|
"grad_norm": 0.806735595661349,
|
|
"learning_rate": 2.0975366450695707e-05,
|
|
"loss": 0.2901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34919291734695435,
|
|
"step": 2245,
|
|
"valid_targets_mean": 2936.5,
|
|
"valid_targets_min": 723
|
|
},
|
|
{
|
|
"epoch": 3.762541806020067,
|
|
"grad_norm": 0.7799986710841482,
|
|
"learning_rate": 2.0892059711434496e-05,
|
|
"loss": 0.3019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30157431960105896,
|
|
"step": 2250,
|
|
"valid_targets_mean": 2891.5,
|
|
"valid_targets_min": 937
|
|
},
|
|
{
|
|
"epoch": 3.770903010033445,
|
|
"grad_norm": 0.6961704800642899,
|
|
"learning_rate": 2.0808737461101417e-05,
|
|
"loss": 0.2889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2948909401893616,
|
|
"step": 2255,
|
|
"valid_targets_mean": 3901.8,
|
|
"valid_targets_min": 511
|
|
},
|
|
{
|
|
"epoch": 3.779264214046823,
|
|
"grad_norm": 0.6430882576013616,
|
|
"learning_rate": 2.0725401148497946e-05,
|
|
"loss": 0.3453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36942294239997864,
|
|
"step": 2260,
|
|
"valid_targets_mean": 4432.6,
|
|
"valid_targets_min": 2022
|
|
},
|
|
{
|
|
"epoch": 3.787625418060201,
|
|
"grad_norm": 0.9476578148171562,
|
|
"learning_rate": 2.0642052222670043e-05,
|
|
"loss": 0.3016,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34764429926872253,
|
|
"step": 2265,
|
|
"valid_targets_mean": 2577.2,
|
|
"valid_targets_min": 586
|
|
},
|
|
{
|
|
"epoch": 3.7959866220735785,
|
|
"grad_norm": 0.8377190542394216,
|
|
"learning_rate": 2.0558692132883008e-05,
|
|
"loss": 0.31,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3148440420627594,
|
|
"step": 2270,
|
|
"valid_targets_mean": 2928.6,
|
|
"valid_targets_min": 1524
|
|
},
|
|
{
|
|
"epoch": 3.8043478260869565,
|
|
"grad_norm": 0.7674746381928171,
|
|
"learning_rate": 2.047532232859625e-05,
|
|
"loss": 0.3163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2882377505302429,
|
|
"step": 2275,
|
|
"valid_targets_mean": 3211.1,
|
|
"valid_targets_min": 666
|
|
},
|
|
{
|
|
"epoch": 3.8127090301003346,
|
|
"grad_norm": 0.7603539777572138,
|
|
"learning_rate": 2.039194425943808e-05,
|
|
"loss": 0.3297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28924140334129333,
|
|
"step": 2280,
|
|
"valid_targets_mean": 2899.9,
|
|
"valid_targets_min": 1333
|
|
},
|
|
{
|
|
"epoch": 3.821070234113712,
|
|
"grad_norm": 0.6686475071499299,
|
|
"learning_rate": 2.0308559375180557e-05,
|
|
"loss": 0.274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22952501475811005,
|
|
"step": 2285,
|
|
"valid_targets_mean": 2880.6,
|
|
"valid_targets_min": 1364
|
|
},
|
|
{
|
|
"epoch": 3.82943143812709,
|
|
"grad_norm": 0.9368254198322394,
|
|
"learning_rate": 2.0225169125714193e-05,
|
|
"loss": 0.3089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3129611909389496,
|
|
"step": 2290,
|
|
"valid_targets_mean": 3224.8,
|
|
"valid_targets_min": 1399
|
|
},
|
|
{
|
|
"epoch": 3.8377926421404682,
|
|
"grad_norm": 0.8093927362873512,
|
|
"learning_rate": 2.0141774961022826e-05,
|
|
"loss": 0.2892,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30943459272384644,
|
|
"step": 2295,
|
|
"valid_targets_mean": 2797.4,
|
|
"valid_targets_min": 1188
|
|
},
|
|
{
|
|
"epoch": 3.8461538461538463,
|
|
"grad_norm": 0.8283893332898696,
|
|
"learning_rate": 2.0058378331158357e-05,
|
|
"loss": 0.2882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3456169366836548,
|
|
"step": 2300,
|
|
"valid_targets_mean": 3146.9,
|
|
"valid_targets_min": 983
|
|
},
|
|
{
|
|
"epoch": 3.8545150501672243,
|
|
"grad_norm": 0.8137074950993536,
|
|
"learning_rate": 1.9974980686215546e-05,
|
|
"loss": 0.2795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24720898270606995,
|
|
"step": 2305,
|
|
"valid_targets_mean": 2561.4,
|
|
"valid_targets_min": 1072
|
|
},
|
|
{
|
|
"epoch": 3.862876254180602,
|
|
"grad_norm": 0.7732084599764113,
|
|
"learning_rate": 1.9891583476306814e-05,
|
|
"loss": 0.327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31134891510009766,
|
|
"step": 2310,
|
|
"valid_targets_mean": 2994.1,
|
|
"valid_targets_min": 377
|
|
},
|
|
{
|
|
"epoch": 3.87123745819398,
|
|
"grad_norm": 0.7919123584972602,
|
|
"learning_rate": 1.9808188151537008e-05,
|
|
"loss": 0.2996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25116395950317383,
|
|
"step": 2315,
|
|
"valid_targets_mean": 2886.5,
|
|
"valid_targets_min": 1120
|
|
},
|
|
{
|
|
"epoch": 3.879598662207358,
|
|
"grad_norm": 0.7406481098660109,
|
|
"learning_rate": 1.972479616197821e-05,
|
|
"loss": 0.3043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27166545391082764,
|
|
"step": 2320,
|
|
"valid_targets_mean": 3462.1,
|
|
"valid_targets_min": 1420
|
|
},
|
|
{
|
|
"epoch": 3.8879598662207355,
|
|
"grad_norm": 0.8731416317069042,
|
|
"learning_rate": 1.96414089576445e-05,
|
|
"loss": 0.2973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35053551197052,
|
|
"step": 2325,
|
|
"valid_targets_mean": 2947.4,
|
|
"valid_targets_min": 918
|
|
},
|
|
{
|
|
"epoch": 3.8963210702341136,
|
|
"grad_norm": 0.7355424472980551,
|
|
"learning_rate": 1.9558027988466743e-05,
|
|
"loss": 0.3077,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31646502017974854,
|
|
"step": 2330,
|
|
"valid_targets_mean": 3953.5,
|
|
"valid_targets_min": 1805
|
|
},
|
|
{
|
|
"epoch": 3.9046822742474916,
|
|
"grad_norm": 0.7933779157508104,
|
|
"learning_rate": 1.947465470426741e-05,
|
|
"loss": 0.3036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2982478141784668,
|
|
"step": 2335,
|
|
"valid_targets_mean": 3171.0,
|
|
"valid_targets_min": 587
|
|
},
|
|
{
|
|
"epoch": 3.9130434782608696,
|
|
"grad_norm": 0.8102903523307262,
|
|
"learning_rate": 1.9391290554735326e-05,
|
|
"loss": 0.3092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3341084122657776,
|
|
"step": 2340,
|
|
"valid_targets_mean": 2779.8,
|
|
"valid_targets_min": 919
|
|
},
|
|
{
|
|
"epoch": 3.9214046822742477,
|
|
"grad_norm": 0.7472511964464619,
|
|
"learning_rate": 1.93079369894005e-05,
|
|
"loss": 0.3187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30373987555503845,
|
|
"step": 2345,
|
|
"valid_targets_mean": 3103.1,
|
|
"valid_targets_min": 1099
|
|
},
|
|
{
|
|
"epoch": 3.9297658862876252,
|
|
"grad_norm": 0.7947702151134352,
|
|
"learning_rate": 1.922459545760889e-05,
|
|
"loss": 0.3175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.363625705242157,
|
|
"step": 2350,
|
|
"valid_targets_mean": 3638.0,
|
|
"valid_targets_min": 1698
|
|
},
|
|
{
|
|
"epoch": 3.9381270903010033,
|
|
"grad_norm": 0.8068716471695595,
|
|
"learning_rate": 1.914126740849723e-05,
|
|
"loss": 0.3014,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2704453766345978,
|
|
"step": 2355,
|
|
"valid_targets_mean": 2755.2,
|
|
"valid_targets_min": 1140
|
|
},
|
|
{
|
|
"epoch": 3.9464882943143813,
|
|
"grad_norm": 0.8320104780946185,
|
|
"learning_rate": 1.9057954290967795e-05,
|
|
"loss": 0.3297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38438886404037476,
|
|
"step": 2360,
|
|
"valid_targets_mean": 4250.0,
|
|
"valid_targets_min": 1306
|
|
},
|
|
{
|
|
"epoch": 3.9548494983277593,
|
|
"grad_norm": 0.7539305683119704,
|
|
"learning_rate": 1.897465755366325e-05,
|
|
"loss": 0.3009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3422359228134155,
|
|
"step": 2365,
|
|
"valid_targets_mean": 3382.0,
|
|
"valid_targets_min": 692
|
|
},
|
|
{
|
|
"epoch": 3.9632107023411374,
|
|
"grad_norm": 0.7840574192641151,
|
|
"learning_rate": 1.8891378644941437e-05,
|
|
"loss": 0.2832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31690046191215515,
|
|
"step": 2370,
|
|
"valid_targets_mean": 3137.6,
|
|
"valid_targets_min": 978
|
|
},
|
|
{
|
|
"epoch": 3.971571906354515,
|
|
"grad_norm": 0.7175563191342146,
|
|
"learning_rate": 1.88081190128502e-05,
|
|
"loss": 0.2915,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30410125851631165,
|
|
"step": 2375,
|
|
"valid_targets_mean": 3167.8,
|
|
"valid_targets_min": 1180
|
|
},
|
|
{
|
|
"epoch": 3.979933110367893,
|
|
"grad_norm": 0.7773926485050648,
|
|
"learning_rate": 1.8724880105102196e-05,
|
|
"loss": 0.2751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2675161361694336,
|
|
"step": 2380,
|
|
"valid_targets_mean": 3070.1,
|
|
"valid_targets_min": 1741
|
|
},
|
|
{
|
|
"epoch": 3.988294314381271,
|
|
"grad_norm": 0.7560734554908822,
|
|
"learning_rate": 1.8641663369049724e-05,
|
|
"loss": 0.287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2527911365032196,
|
|
"step": 2385,
|
|
"valid_targets_mean": 2798.2,
|
|
"valid_targets_min": 660
|
|
},
|
|
{
|
|
"epoch": 3.9966555183946486,
|
|
"grad_norm": 0.9060459955316458,
|
|
"learning_rate": 1.8558470251659574e-05,
|
|
"loss": 0.2824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27911093831062317,
|
|
"step": 2390,
|
|
"valid_targets_mean": 2562.9,
|
|
"valid_targets_min": 633
|
|
},
|
|
{
|
|
"epoch": 4.005016722408027,
|
|
"grad_norm": 0.8594449711299057,
|
|
"learning_rate": 1.8475302199487848e-05,
|
|
"loss": 0.309,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2793564200401306,
|
|
"step": 2395,
|
|
"valid_targets_mean": 3010.3,
|
|
"valid_targets_min": 1511
|
|
},
|
|
{
|
|
"epoch": 4.013377926421405,
|
|
"grad_norm": 0.8838950374531943,
|
|
"learning_rate": 1.8392160658654826e-05,
|
|
"loss": 0.2648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3361824154853821,
|
|
"step": 2400,
|
|
"valid_targets_mean": 3615.7,
|
|
"valid_targets_min": 1133
|
|
},
|
|
{
|
|
"epoch": 4.021739130434782,
|
|
"grad_norm": 0.7437793364772181,
|
|
"learning_rate": 1.8309047074819805e-05,
|
|
"loss": 0.3187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2967781126499176,
|
|
"step": 2405,
|
|
"valid_targets_mean": 3316.2,
|
|
"valid_targets_min": 508
|
|
},
|
|
{
|
|
"epoch": 4.030100334448161,
|
|
"grad_norm": 0.8335679273374376,
|
|
"learning_rate": 1.822596289315596e-05,
|
|
"loss": 0.281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29241418838500977,
|
|
"step": 2410,
|
|
"valid_targets_mean": 3000.0,
|
|
"valid_targets_min": 729
|
|
},
|
|
{
|
|
"epoch": 4.038461538461538,
|
|
"grad_norm": 0.9450863777551641,
|
|
"learning_rate": 1.814290955832523e-05,
|
|
"loss": 0.2639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27432459592819214,
|
|
"step": 2415,
|
|
"valid_targets_mean": 2766.5,
|
|
"valid_targets_min": 701
|
|
},
|
|
{
|
|
"epoch": 4.046822742474917,
|
|
"grad_norm": 0.6892365494886082,
|
|
"learning_rate": 1.8059888514453196e-05,
|
|
"loss": 0.2743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35358893871307373,
|
|
"step": 2420,
|
|
"valid_targets_mean": 4687.2,
|
|
"valid_targets_min": 707
|
|
},
|
|
{
|
|
"epoch": 4.055183946488294,
|
|
"grad_norm": 0.8546396818192133,
|
|
"learning_rate": 1.7976901205103953e-05,
|
|
"loss": 0.2409,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2732149660587311,
|
|
"step": 2425,
|
|
"valid_targets_mean": 2872.1,
|
|
"valid_targets_min": 1565
|
|
},
|
|
{
|
|
"epoch": 4.063545150501672,
|
|
"grad_norm": 0.9074538701365267,
|
|
"learning_rate": 1.789394907325504e-05,
|
|
"loss": 0.2829,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27739718556404114,
|
|
"step": 2430,
|
|
"valid_targets_mean": 2549.9,
|
|
"valid_targets_min": 412
|
|
},
|
|
{
|
|
"epoch": 4.0719063545150505,
|
|
"grad_norm": 0.7010499122198948,
|
|
"learning_rate": 1.7811033561272328e-05,
|
|
"loss": 0.2805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28559303283691406,
|
|
"step": 2435,
|
|
"valid_targets_mean": 4280.4,
|
|
"valid_targets_min": 1688
|
|
},
|
|
{
|
|
"epoch": 4.080267558528428,
|
|
"grad_norm": 0.7783072544646484,
|
|
"learning_rate": 1.7728156110884924e-05,
|
|
"loss": 0.2541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27712807059288025,
|
|
"step": 2440,
|
|
"valid_targets_mean": 3133.4,
|
|
"valid_targets_min": 474
|
|
},
|
|
{
|
|
"epoch": 4.088628762541806,
|
|
"grad_norm": 0.8270554308257784,
|
|
"learning_rate": 1.7645318163160146e-05,
|
|
"loss": 0.3121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2960726022720337,
|
|
"step": 2445,
|
|
"valid_targets_mean": 3287.0,
|
|
"valid_targets_min": 1298
|
|
},
|
|
{
|
|
"epoch": 4.096989966555184,
|
|
"grad_norm": 0.8001563378250733,
|
|
"learning_rate": 1.7562521158478432e-05,
|
|
"loss": 0.279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23369301855564117,
|
|
"step": 2450,
|
|
"valid_targets_mean": 3446.6,
|
|
"valid_targets_min": 924
|
|
},
|
|
{
|
|
"epoch": 4.105351170568562,
|
|
"grad_norm": 0.8436159622655983,
|
|
"learning_rate": 1.7479766536508313e-05,
|
|
"loss": 0.292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28451332449913025,
|
|
"step": 2455,
|
|
"valid_targets_mean": 3320.6,
|
|
"valid_targets_min": 1505
|
|
},
|
|
{
|
|
"epoch": 4.11371237458194,
|
|
"grad_norm": 0.8511630672848115,
|
|
"learning_rate": 1.7397055736181366e-05,
|
|
"loss": 0.2975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2644152343273163,
|
|
"step": 2460,
|
|
"valid_targets_mean": 2837.6,
|
|
"valid_targets_min": 1265
|
|
},
|
|
{
|
|
"epoch": 4.122073578595318,
|
|
"grad_norm": 0.8329359927809505,
|
|
"learning_rate": 1.7314390195667193e-05,
|
|
"loss": 0.2839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27568697929382324,
|
|
"step": 2465,
|
|
"valid_targets_mean": 3675.1,
|
|
"valid_targets_min": 1439
|
|
},
|
|
{
|
|
"epoch": 4.130434782608695,
|
|
"grad_norm": 0.8456711766375431,
|
|
"learning_rate": 1.723177135234844e-05,
|
|
"loss": 0.2939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2696060538291931,
|
|
"step": 2470,
|
|
"valid_targets_mean": 3198.6,
|
|
"valid_targets_min": 550
|
|
},
|
|
{
|
|
"epoch": 4.138795986622074,
|
|
"grad_norm": 0.8660274510460868,
|
|
"learning_rate": 1.7149200642795765e-05,
|
|
"loss": 0.2902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39124855399131775,
|
|
"step": 2475,
|
|
"valid_targets_mean": 3618.2,
|
|
"valid_targets_min": 1647
|
|
},
|
|
{
|
|
"epoch": 4.147157190635451,
|
|
"grad_norm": 0.8805366064387657,
|
|
"learning_rate": 1.70666795027429e-05,
|
|
"loss": 0.2943,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3205997943878174,
|
|
"step": 2480,
|
|
"valid_targets_mean": 3357.6,
|
|
"valid_targets_min": 1361
|
|
},
|
|
{
|
|
"epoch": 4.15551839464883,
|
|
"grad_norm": 0.8831645923939866,
|
|
"learning_rate": 1.6984209367061657e-05,
|
|
"loss": 0.2741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3723400831222534,
|
|
"step": 2485,
|
|
"valid_targets_mean": 4439.7,
|
|
"valid_targets_min": 872
|
|
},
|
|
{
|
|
"epoch": 4.1638795986622075,
|
|
"grad_norm": 0.8033626392233867,
|
|
"learning_rate": 1.6901791669736974e-05,
|
|
"loss": 0.2835,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23729437589645386,
|
|
"step": 2490,
|
|
"valid_targets_mean": 3279.5,
|
|
"valid_targets_min": 1317
|
|
},
|
|
{
|
|
"epoch": 4.172240802675585,
|
|
"grad_norm": 0.8529045414447846,
|
|
"learning_rate": 1.6819427843842016e-05,
|
|
"loss": 0.2724,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24402417242527008,
|
|
"step": 2495,
|
|
"valid_targets_mean": 2829.3,
|
|
"valid_targets_min": 971
|
|
},
|
|
{
|
|
"epoch": 4.1806020066889635,
|
|
"grad_norm": 0.7772728028989128,
|
|
"learning_rate": 1.6737119321513224e-05,
|
|
"loss": 0.3132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3176288604736328,
|
|
"step": 2500,
|
|
"valid_targets_mean": 3672.4,
|
|
"valid_targets_min": 965
|
|
},
|
|
{
|
|
"epoch": 4.188963210702341,
|
|
"grad_norm": 0.8338000934996834,
|
|
"learning_rate": 1.6654867533925418e-05,
|
|
"loss": 0.2758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2721063494682312,
|
|
"step": 2505,
|
|
"valid_targets_mean": 3436.9,
|
|
"valid_targets_min": 1539
|
|
},
|
|
{
|
|
"epoch": 4.197324414715719,
|
|
"grad_norm": 0.8267713909712678,
|
|
"learning_rate": 1.6572673911266943e-05,
|
|
"loss": 0.2976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28024280071258545,
|
|
"step": 2510,
|
|
"valid_targets_mean": 3187.4,
|
|
"valid_targets_min": 1120
|
|
},
|
|
{
|
|
"epoch": 4.205685618729097,
|
|
"grad_norm": 0.9157546033073007,
|
|
"learning_rate": 1.6490539882714756e-05,
|
|
"loss": 0.2999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2485034167766571,
|
|
"step": 2515,
|
|
"valid_targets_mean": 2835.7,
|
|
"valid_targets_min": 1525
|
|
},
|
|
{
|
|
"epoch": 4.214046822742475,
|
|
"grad_norm": 0.9249610301165988,
|
|
"learning_rate": 1.6408466876409596e-05,
|
|
"loss": 0.2725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23990267515182495,
|
|
"step": 2520,
|
|
"valid_targets_mean": 2692.9,
|
|
"valid_targets_min": 490
|
|
},
|
|
{
|
|
"epoch": 4.222408026755853,
|
|
"grad_norm": 0.8518340628069135,
|
|
"learning_rate": 1.6326456319431154e-05,
|
|
"loss": 0.2999,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31082072854042053,
|
|
"step": 2525,
|
|
"valid_targets_mean": 3444.6,
|
|
"valid_targets_min": 1569
|
|
},
|
|
{
|
|
"epoch": 4.230769230769231,
|
|
"grad_norm": 0.9837882931443535,
|
|
"learning_rate": 1.6244509637773256e-05,
|
|
"loss": 0.2714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29353979229927063,
|
|
"step": 2530,
|
|
"valid_targets_mean": 3000.7,
|
|
"valid_targets_min": 1243
|
|
},
|
|
{
|
|
"epoch": 4.239130434782608,
|
|
"grad_norm": 0.8579109041612042,
|
|
"learning_rate": 1.6162628256319078e-05,
|
|
"loss": 0.2849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2931848168373108,
|
|
"step": 2535,
|
|
"valid_targets_mean": 2974.6,
|
|
"valid_targets_min": 1303
|
|
},
|
|
{
|
|
"epoch": 4.247491638795987,
|
|
"grad_norm": 0.7372703580470882,
|
|
"learning_rate": 1.6080813598816355e-05,
|
|
"loss": 0.3212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35204175114631653,
|
|
"step": 2540,
|
|
"valid_targets_mean": 4623.1,
|
|
"valid_targets_min": 1208
|
|
},
|
|
{
|
|
"epoch": 4.2558528428093645,
|
|
"grad_norm": 0.9167357129141266,
|
|
"learning_rate": 1.599906708785262e-05,
|
|
"loss": 0.2447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23422318696975708,
|
|
"step": 2545,
|
|
"valid_targets_mean": 2499.6,
|
|
"valid_targets_min": 1144
|
|
},
|
|
{
|
|
"epoch": 4.264214046822742,
|
|
"grad_norm": 0.8898102183876689,
|
|
"learning_rate": 1.5917390144830488e-05,
|
|
"loss": 0.2956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.295245885848999,
|
|
"step": 2550,
|
|
"valid_targets_mean": 3242.8,
|
|
"valid_targets_min": 1323
|
|
},
|
|
{
|
|
"epoch": 4.2725752508361206,
|
|
"grad_norm": 0.9416423066048137,
|
|
"learning_rate": 1.583578418994294e-05,
|
|
"loss": 0.2993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23591409623622894,
|
|
"step": 2555,
|
|
"valid_targets_mean": 2772.4,
|
|
"valid_targets_min": 1460
|
|
},
|
|
{
|
|
"epoch": 4.280936454849498,
|
|
"grad_norm": 0.8396792312207702,
|
|
"learning_rate": 1.5754250642148592e-05,
|
|
"loss": 0.2838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2691681385040283,
|
|
"step": 2560,
|
|
"valid_targets_mean": 3140.4,
|
|
"valid_targets_min": 1822
|
|
},
|
|
{
|
|
"epoch": 4.289297658862877,
|
|
"grad_norm": 0.7552376746174522,
|
|
"learning_rate": 1.5672790919147096e-05,
|
|
"loss": 0.2648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30901944637298584,
|
|
"step": 2565,
|
|
"valid_targets_mean": 3934.4,
|
|
"valid_targets_min": 1525
|
|
},
|
|
{
|
|
"epoch": 4.297658862876254,
|
|
"grad_norm": 0.9282959640580136,
|
|
"learning_rate": 1.5591406437354394e-05,
|
|
"loss": 0.2932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2631564736366272,
|
|
"step": 2570,
|
|
"valid_targets_mean": 3684.6,
|
|
"valid_targets_min": 603
|
|
},
|
|
{
|
|
"epoch": 4.306020066889632,
|
|
"grad_norm": 0.8408763401027246,
|
|
"learning_rate": 1.5510098611878177e-05,
|
|
"loss": 0.2522,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1855039745569229,
|
|
"step": 2575,
|
|
"valid_targets_mean": 2278.2,
|
|
"valid_targets_min": 685
|
|
},
|
|
{
|
|
"epoch": 4.31438127090301,
|
|
"grad_norm": 0.9661559358244203,
|
|
"learning_rate": 1.542886885649322e-05,
|
|
"loss": 0.2875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20213311910629272,
|
|
"step": 2580,
|
|
"valid_targets_mean": 2565.0,
|
|
"valid_targets_min": 1622
|
|
},
|
|
{
|
|
"epoch": 4.322742474916388,
|
|
"grad_norm": 0.8691049252102782,
|
|
"learning_rate": 1.534771858361683e-05,
|
|
"loss": 0.2705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2871206998825073,
|
|
"step": 2585,
|
|
"valid_targets_mean": 3261.2,
|
|
"valid_targets_min": 757
|
|
},
|
|
{
|
|
"epoch": 4.331103678929766,
|
|
"grad_norm": 0.8392260775398249,
|
|
"learning_rate": 1.5266649204284273e-05,
|
|
"loss": 0.2793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2738720774650574,
|
|
"step": 2590,
|
|
"valid_targets_mean": 3433.5,
|
|
"valid_targets_min": 1852
|
|
},
|
|
{
|
|
"epoch": 4.339464882943144,
|
|
"grad_norm": 0.8840473177758481,
|
|
"learning_rate": 1.5185662128124254e-05,
|
|
"loss": 0.2594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2708375155925751,
|
|
"step": 2595,
|
|
"valid_targets_mean": 3278.4,
|
|
"valid_targets_min": 399
|
|
},
|
|
{
|
|
"epoch": 4.3478260869565215,
|
|
"grad_norm": 0.8804892196325519,
|
|
"learning_rate": 1.510475876333438e-05,
|
|
"loss": 0.2994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23639748990535736,
|
|
"step": 2600,
|
|
"valid_targets_mean": 2654.6,
|
|
"valid_targets_min": 528
|
|
},
|
|
{
|
|
"epoch": 4.3561872909699,
|
|
"grad_norm": 0.8094688600381953,
|
|
"learning_rate": 1.5023940516656697e-05,
|
|
"loss": 0.2935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2887652516365051,
|
|
"step": 2605,
|
|
"valid_targets_mean": 3643.9,
|
|
"valid_targets_min": 2109
|
|
},
|
|
{
|
|
"epoch": 4.364548494983278,
|
|
"grad_norm": 0.9244051200125512,
|
|
"learning_rate": 1.4943208793353235e-05,
|
|
"loss": 0.2653,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2914060056209564,
|
|
"step": 2610,
|
|
"valid_targets_mean": 3336.8,
|
|
"valid_targets_min": 550
|
|
},
|
|
{
|
|
"epoch": 4.372909698996655,
|
|
"grad_norm": 0.7461063611424056,
|
|
"learning_rate": 1.4862564997181528e-05,
|
|
"loss": 0.2765,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36067837476730347,
|
|
"step": 2615,
|
|
"valid_targets_mean": 5280.1,
|
|
"valid_targets_min": 1147
|
|
},
|
|
{
|
|
"epoch": 4.381270903010034,
|
|
"grad_norm": 0.9652367742763335,
|
|
"learning_rate": 1.4782010530370294e-05,
|
|
"loss": 0.2636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25113189220428467,
|
|
"step": 2620,
|
|
"valid_targets_mean": 2949.8,
|
|
"valid_targets_min": 1399
|
|
},
|
|
{
|
|
"epoch": 4.389632107023411,
|
|
"grad_norm": 0.8992134459024913,
|
|
"learning_rate": 1.470154679359495e-05,
|
|
"loss": 0.2412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24360308051109314,
|
|
"step": 2625,
|
|
"valid_targets_mean": 2608.5,
|
|
"valid_targets_min": 935
|
|
},
|
|
{
|
|
"epoch": 4.39799331103679,
|
|
"grad_norm": 0.8220794903907007,
|
|
"learning_rate": 1.4621175185953322e-05,
|
|
"loss": 0.277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3220457136631012,
|
|
"step": 2630,
|
|
"valid_targets_mean": 4098.1,
|
|
"valid_targets_min": 403
|
|
},
|
|
{
|
|
"epoch": 4.406354515050167,
|
|
"grad_norm": 0.8521683178016516,
|
|
"learning_rate": 1.4540897104941307e-05,
|
|
"loss": 0.2753,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3359491527080536,
|
|
"step": 2635,
|
|
"valid_targets_mean": 3451.1,
|
|
"valid_targets_min": 794
|
|
},
|
|
{
|
|
"epoch": 4.414715719063545,
|
|
"grad_norm": 0.8796923872304094,
|
|
"learning_rate": 1.4460713946428553e-05,
|
|
"loss": 0.2772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24371229112148285,
|
|
"step": 2640,
|
|
"valid_targets_mean": 2400.8,
|
|
"valid_targets_min": 1000
|
|
},
|
|
{
|
|
"epoch": 4.423076923076923,
|
|
"grad_norm": 1.0131860235742853,
|
|
"learning_rate": 1.4380627104634224e-05,
|
|
"loss": 0.2554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3007010817527771,
|
|
"step": 2645,
|
|
"valid_targets_mean": 2731.2,
|
|
"valid_targets_min": 1006
|
|
},
|
|
{
|
|
"epoch": 4.431438127090301,
|
|
"grad_norm": 0.7392458747612428,
|
|
"learning_rate": 1.4300637972102721e-05,
|
|
"loss": 0.2702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3089587688446045,
|
|
"step": 2650,
|
|
"valid_targets_mean": 4641.4,
|
|
"valid_targets_min": 1954
|
|
},
|
|
{
|
|
"epoch": 4.4397993311036785,
|
|
"grad_norm": 0.8818880175089461,
|
|
"learning_rate": 1.4220747939679478e-05,
|
|
"loss": 0.2579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2619357407093048,
|
|
"step": 2655,
|
|
"valid_targets_mean": 3080.8,
|
|
"valid_targets_min": 971
|
|
},
|
|
{
|
|
"epoch": 4.448160535117057,
|
|
"grad_norm": 0.9056508130411561,
|
|
"learning_rate": 1.414095839648679e-05,
|
|
"loss": 0.2827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25068965554237366,
|
|
"step": 2660,
|
|
"valid_targets_mean": 2617.4,
|
|
"valid_targets_min": 1481
|
|
},
|
|
{
|
|
"epoch": 4.456521739130435,
|
|
"grad_norm": 0.8935220051791342,
|
|
"learning_rate": 1.4061270729899663e-05,
|
|
"loss": 0.2726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26286935806274414,
|
|
"step": 2665,
|
|
"valid_targets_mean": 2883.9,
|
|
"valid_targets_min": 928
|
|
},
|
|
{
|
|
"epoch": 4.464882943143813,
|
|
"grad_norm": 0.8961547416267401,
|
|
"learning_rate": 1.3981686325521647e-05,
|
|
"loss": 0.2768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29072946310043335,
|
|
"step": 2670,
|
|
"valid_targets_mean": 2993.4,
|
|
"valid_targets_min": 1894
|
|
},
|
|
{
|
|
"epoch": 4.473244147157191,
|
|
"grad_norm": 0.9599671002081342,
|
|
"learning_rate": 1.3902206567160827e-05,
|
|
"loss": 0.2455,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28398674726486206,
|
|
"step": 2675,
|
|
"valid_targets_mean": 2923.8,
|
|
"valid_targets_min": 534
|
|
},
|
|
{
|
|
"epoch": 4.481605351170568,
|
|
"grad_norm": 0.8341406674593823,
|
|
"learning_rate": 1.3822832836805667e-05,
|
|
"loss": 0.3141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3429492115974426,
|
|
"step": 2680,
|
|
"valid_targets_mean": 3562.8,
|
|
"valid_targets_min": 949
|
|
},
|
|
{
|
|
"epoch": 4.489966555183947,
|
|
"grad_norm": 0.8660387274121687,
|
|
"learning_rate": 1.3743566514601037e-05,
|
|
"loss": 0.2615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28438010811805725,
|
|
"step": 2685,
|
|
"valid_targets_mean": 3383.6,
|
|
"valid_targets_min": 1276
|
|
},
|
|
{
|
|
"epoch": 4.498327759197324,
|
|
"grad_norm": 0.8240982742485928,
|
|
"learning_rate": 1.3664408978824209e-05,
|
|
"loss": 0.2648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2728864550590515,
|
|
"step": 2690,
|
|
"valid_targets_mean": 3949.8,
|
|
"valid_targets_min": 1505
|
|
},
|
|
{
|
|
"epoch": 4.506688963210703,
|
|
"grad_norm": 0.9047049810271199,
|
|
"learning_rate": 1.3585361605860863e-05,
|
|
"loss": 0.2831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2605753540992737,
|
|
"step": 2695,
|
|
"valid_targets_mean": 2817.6,
|
|
"valid_targets_min": 952
|
|
},
|
|
{
|
|
"epoch": 4.51505016722408,
|
|
"grad_norm": 0.8397078739499565,
|
|
"learning_rate": 1.3506425770181211e-05,
|
|
"loss": 0.2871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24365462362766266,
|
|
"step": 2700,
|
|
"valid_targets_mean": 2901.8,
|
|
"valid_targets_min": 2041
|
|
},
|
|
{
|
|
"epoch": 4.523411371237458,
|
|
"grad_norm": 0.8582678085193816,
|
|
"learning_rate": 1.342760284431603e-05,
|
|
"loss": 0.3192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2986323833465576,
|
|
"step": 2705,
|
|
"valid_targets_mean": 3100.2,
|
|
"valid_targets_min": 1217
|
|
},
|
|
{
|
|
"epoch": 4.531772575250836,
|
|
"grad_norm": 0.7889072695692275,
|
|
"learning_rate": 1.3348894198832845e-05,
|
|
"loss": 0.2708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26328298449516296,
|
|
"step": 2710,
|
|
"valid_targets_mean": 3108.0,
|
|
"valid_targets_min": 967
|
|
},
|
|
{
|
|
"epoch": 4.540133779264214,
|
|
"grad_norm": 0.742736182328933,
|
|
"learning_rate": 1.3270301202312075e-05,
|
|
"loss": 0.2846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37916791439056396,
|
|
"step": 2715,
|
|
"valid_targets_mean": 5153.8,
|
|
"valid_targets_min": 1141
|
|
},
|
|
{
|
|
"epoch": 4.548494983277592,
|
|
"grad_norm": 0.9919759799813364,
|
|
"learning_rate": 1.3191825221323246e-05,
|
|
"loss": 0.2655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30372777581214905,
|
|
"step": 2720,
|
|
"valid_targets_mean": 2784.2,
|
|
"valid_targets_min": 1361
|
|
},
|
|
{
|
|
"epoch": 4.55685618729097,
|
|
"grad_norm": 0.794724468575154,
|
|
"learning_rate": 1.311346762040123e-05,
|
|
"loss": 0.276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2651178538799286,
|
|
"step": 2725,
|
|
"valid_targets_mean": 3715.1,
|
|
"valid_targets_min": 983
|
|
},
|
|
{
|
|
"epoch": 4.565217391304348,
|
|
"grad_norm": 0.9806561211664588,
|
|
"learning_rate": 1.3035229762022513e-05,
|
|
"loss": 0.2577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2600407600402832,
|
|
"step": 2730,
|
|
"valid_targets_mean": 2778.1,
|
|
"valid_targets_min": 520
|
|
},
|
|
{
|
|
"epoch": 4.573578595317725,
|
|
"grad_norm": 0.8400790069869569,
|
|
"learning_rate": 1.2957113006581494e-05,
|
|
"loss": 0.2809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30648618936538696,
|
|
"step": 2735,
|
|
"valid_targets_mean": 3071.6,
|
|
"valid_targets_min": 1115
|
|
},
|
|
{
|
|
"epoch": 4.581939799331104,
|
|
"grad_norm": 0.912124026132302,
|
|
"learning_rate": 1.2879118712366858e-05,
|
|
"loss": 0.2922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3389705717563629,
|
|
"step": 2740,
|
|
"valid_targets_mean": 3479.6,
|
|
"valid_targets_min": 1737
|
|
},
|
|
{
|
|
"epoch": 4.590301003344481,
|
|
"grad_norm": 0.7136031625832305,
|
|
"learning_rate": 1.280124823553794e-05,
|
|
"loss": 0.2648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26324355602264404,
|
|
"step": 2745,
|
|
"valid_targets_mean": 4402.9,
|
|
"valid_targets_min": 1958
|
|
},
|
|
{
|
|
"epoch": 4.59866220735786,
|
|
"grad_norm": 0.8876231778745343,
|
|
"learning_rate": 1.2723502930101126e-05,
|
|
"loss": 0.3051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3144656717777252,
|
|
"step": 2750,
|
|
"valid_targets_mean": 4292.6,
|
|
"valid_targets_min": 568
|
|
},
|
|
{
|
|
"epoch": 4.607023411371237,
|
|
"grad_norm": 0.9123329157042492,
|
|
"learning_rate": 1.2645884147886376e-05,
|
|
"loss": 0.2452,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22034093737602234,
|
|
"step": 2755,
|
|
"valid_targets_mean": 3341.9,
|
|
"valid_targets_min": 998
|
|
},
|
|
{
|
|
"epoch": 4.615384615384615,
|
|
"grad_norm": 0.8627497975800223,
|
|
"learning_rate": 1.2568393238523627e-05,
|
|
"loss": 0.2905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28283166885375977,
|
|
"step": 2760,
|
|
"valid_targets_mean": 3059.2,
|
|
"valid_targets_min": 1435
|
|
},
|
|
{
|
|
"epoch": 4.6237458193979935,
|
|
"grad_norm": 0.8879285250035446,
|
|
"learning_rate": 1.2491031549419396e-05,
|
|
"loss": 0.2594,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28905028104782104,
|
|
"step": 2765,
|
|
"valid_targets_mean": 3173.9,
|
|
"valid_targets_min": 1069
|
|
},
|
|
{
|
|
"epoch": 4.632107023411371,
|
|
"grad_norm": 0.7869539440700758,
|
|
"learning_rate": 1.2413800425733324e-05,
|
|
"loss": 0.2637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23459485173225403,
|
|
"step": 2770,
|
|
"valid_targets_mean": 3455.7,
|
|
"valid_targets_min": 1705
|
|
},
|
|
{
|
|
"epoch": 4.6404682274247495,
|
|
"grad_norm": 0.874081153668911,
|
|
"learning_rate": 1.2336701210354774e-05,
|
|
"loss": 0.2598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26058483123779297,
|
|
"step": 2775,
|
|
"valid_targets_mean": 3799.4,
|
|
"valid_targets_min": 2140
|
|
},
|
|
{
|
|
"epoch": 4.648829431438127,
|
|
"grad_norm": 0.8505418138060274,
|
|
"learning_rate": 1.2259735243879533e-05,
|
|
"loss": 0.271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2382025420665741,
|
|
"step": 2780,
|
|
"valid_targets_mean": 2949.2,
|
|
"valid_targets_min": 1662
|
|
},
|
|
{
|
|
"epoch": 4.657190635451505,
|
|
"grad_norm": 0.8408719538127842,
|
|
"learning_rate": 1.2182903864586424e-05,
|
|
"loss": 0.311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3271663188934326,
|
|
"step": 2785,
|
|
"valid_targets_mean": 3384.8,
|
|
"valid_targets_min": 1772
|
|
},
|
|
{
|
|
"epoch": 4.665551839464883,
|
|
"grad_norm": 0.7906487246754133,
|
|
"learning_rate": 1.2106208408414101e-05,
|
|
"loss": 0.261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23760370910167694,
|
|
"step": 2790,
|
|
"valid_targets_mean": 3003.1,
|
|
"valid_targets_min": 638
|
|
},
|
|
{
|
|
"epoch": 4.673913043478261,
|
|
"grad_norm": 0.7560469434433705,
|
|
"learning_rate": 1.202965020893779e-05,
|
|
"loss": 0.2795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3065562844276428,
|
|
"step": 2795,
|
|
"valid_targets_mean": 4305.3,
|
|
"valid_targets_min": 1436
|
|
},
|
|
{
|
|
"epoch": 4.682274247491639,
|
|
"grad_norm": 0.931404543547185,
|
|
"learning_rate": 1.1953230597346116e-05,
|
|
"loss": 0.2814,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30321380496025085,
|
|
"step": 2800,
|
|
"valid_targets_mean": 3184.8,
|
|
"valid_targets_min": 1215
|
|
},
|
|
{
|
|
"epoch": 4.690635451505017,
|
|
"grad_norm": 0.8785086713293649,
|
|
"learning_rate": 1.1876950902417921e-05,
|
|
"loss": 0.311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2660791277885437,
|
|
"step": 2805,
|
|
"valid_targets_mean": 2884.8,
|
|
"valid_targets_min": 991
|
|
},
|
|
{
|
|
"epoch": 4.698996655518394,
|
|
"grad_norm": 0.8658447278693211,
|
|
"learning_rate": 1.1800812450499227e-05,
|
|
"loss": 0.2447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24090614914894104,
|
|
"step": 2810,
|
|
"valid_targets_mean": 2715.7,
|
|
"valid_targets_min": 764
|
|
},
|
|
{
|
|
"epoch": 4.707357859531773,
|
|
"grad_norm": 1.0374938153628415,
|
|
"learning_rate": 1.1724816565480092e-05,
|
|
"loss": 0.2597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20534390211105347,
|
|
"step": 2815,
|
|
"valid_targets_mean": 2147.6,
|
|
"valid_targets_min": 401
|
|
},
|
|
{
|
|
"epoch": 4.7157190635451505,
|
|
"grad_norm": 0.7920774633218928,
|
|
"learning_rate": 1.1648964568771661e-05,
|
|
"loss": 0.2632,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18782053887844086,
|
|
"step": 2820,
|
|
"valid_targets_mean": 3404.8,
|
|
"valid_targets_min": 2009
|
|
},
|
|
{
|
|
"epoch": 4.724080267558528,
|
|
"grad_norm": 0.8633417845835815,
|
|
"learning_rate": 1.157325777928314e-05,
|
|
"loss": 0.2725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30495747923851013,
|
|
"step": 2825,
|
|
"valid_targets_mean": 3513.8,
|
|
"valid_targets_min": 1059
|
|
},
|
|
{
|
|
"epoch": 4.7324414715719065,
|
|
"grad_norm": 0.9344722457587161,
|
|
"learning_rate": 1.149769751339889e-05,
|
|
"loss": 0.2654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2682046592235565,
|
|
"step": 2830,
|
|
"valid_targets_mean": 3535.4,
|
|
"valid_targets_min": 1574
|
|
},
|
|
{
|
|
"epoch": 4.740802675585284,
|
|
"grad_norm": 0.9085942238263078,
|
|
"learning_rate": 1.142228508495553e-05,
|
|
"loss": 0.2743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27757948637008667,
|
|
"step": 2835,
|
|
"valid_targets_mean": 2801.4,
|
|
"valid_targets_min": 1637
|
|
},
|
|
{
|
|
"epoch": 4.749163879598662,
|
|
"grad_norm": 0.9016528116196614,
|
|
"learning_rate": 1.1347021805219092e-05,
|
|
"loss": 0.281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24009665846824646,
|
|
"step": 2840,
|
|
"valid_targets_mean": 2991.9,
|
|
"valid_targets_min": 962
|
|
},
|
|
{
|
|
"epoch": 4.75752508361204,
|
|
"grad_norm": 0.8216538528179507,
|
|
"learning_rate": 1.1271908982862214e-05,
|
|
"loss": 0.2614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2618323862552643,
|
|
"step": 2845,
|
|
"valid_targets_mean": 4103.2,
|
|
"valid_targets_min": 1949
|
|
},
|
|
{
|
|
"epoch": 4.765886287625418,
|
|
"grad_norm": 1.0231544478271972,
|
|
"learning_rate": 1.11969479239414e-05,
|
|
"loss": 0.2577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30647069215774536,
|
|
"step": 2850,
|
|
"valid_targets_mean": 2558.7,
|
|
"valid_targets_min": 364
|
|
},
|
|
{
|
|
"epoch": 4.774247491638796,
|
|
"grad_norm": 2.596885204828478,
|
|
"learning_rate": 1.1122139931874303e-05,
|
|
"loss": 0.3129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3939549922943115,
|
|
"step": 2855,
|
|
"valid_targets_mean": 4341.0,
|
|
"valid_targets_min": 821
|
|
},
|
|
{
|
|
"epoch": 4.782608695652174,
|
|
"grad_norm": 0.9290067746793299,
|
|
"learning_rate": 1.104748630741705e-05,
|
|
"loss": 0.3034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30313026905059814,
|
|
"step": 2860,
|
|
"valid_targets_mean": 2781.0,
|
|
"valid_targets_min": 392
|
|
},
|
|
{
|
|
"epoch": 4.790969899665551,
|
|
"grad_norm": 1.7683165858974776,
|
|
"learning_rate": 1.0972988348641643e-05,
|
|
"loss": 0.2548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22824634611606598,
|
|
"step": 2865,
|
|
"valid_targets_mean": 2794.6,
|
|
"valid_targets_min": 721
|
|
},
|
|
{
|
|
"epoch": 4.79933110367893,
|
|
"grad_norm": 0.8612735757395678,
|
|
"learning_rate": 1.0898647350913376e-05,
|
|
"loss": 0.2637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24694600701332092,
|
|
"step": 2870,
|
|
"valid_targets_mean": 3144.4,
|
|
"valid_targets_min": 1359
|
|
},
|
|
{
|
|
"epoch": 4.8076923076923075,
|
|
"grad_norm": 0.883437344029108,
|
|
"learning_rate": 1.0824464606868323e-05,
|
|
"loss": 0.2571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24586498737335205,
|
|
"step": 2875,
|
|
"valid_targets_mean": 3431.4,
|
|
"valid_targets_min": 1257
|
|
},
|
|
{
|
|
"epoch": 4.816053511705686,
|
|
"grad_norm": 1.0330995770490803,
|
|
"learning_rate": 1.0750441406390841e-05,
|
|
"loss": 0.2459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2311995029449463,
|
|
"step": 2880,
|
|
"valid_targets_mean": 2488.1,
|
|
"valid_targets_min": 1242
|
|
},
|
|
{
|
|
"epoch": 4.8244147157190636,
|
|
"grad_norm": 0.958681058026221,
|
|
"learning_rate": 1.0676579036591167e-05,
|
|
"loss": 0.2742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2965131998062134,
|
|
"step": 2885,
|
|
"valid_targets_mean": 3851.2,
|
|
"valid_targets_min": 1182
|
|
},
|
|
{
|
|
"epoch": 4.832775919732441,
|
|
"grad_norm": 0.788119350116846,
|
|
"learning_rate": 1.0602878781783019e-05,
|
|
"loss": 0.2932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.328529953956604,
|
|
"step": 2890,
|
|
"valid_targets_mean": 3779.6,
|
|
"valid_targets_min": 1456
|
|
},
|
|
{
|
|
"epoch": 4.84113712374582,
|
|
"grad_norm": 0.8696431488480126,
|
|
"learning_rate": 1.0529341923461272e-05,
|
|
"loss": 0.2744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26618334650993347,
|
|
"step": 2895,
|
|
"valid_targets_mean": 3219.2,
|
|
"valid_targets_min": 1480
|
|
},
|
|
{
|
|
"epoch": 4.849498327759197,
|
|
"grad_norm": 0.9950302873697183,
|
|
"learning_rate": 1.0455969740279675e-05,
|
|
"loss": 0.271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28443658351898193,
|
|
"step": 2900,
|
|
"valid_targets_mean": 2625.2,
|
|
"valid_targets_min": 999
|
|
},
|
|
{
|
|
"epoch": 4.857859531772576,
|
|
"grad_norm": 0.9363541770349809,
|
|
"learning_rate": 1.0382763508028615e-05,
|
|
"loss": 0.256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23824138939380646,
|
|
"step": 2905,
|
|
"valid_targets_mean": 2914.6,
|
|
"valid_targets_min": 580
|
|
},
|
|
{
|
|
"epoch": 4.866220735785953,
|
|
"grad_norm": 0.9212980221737899,
|
|
"learning_rate": 1.0309724499612939e-05,
|
|
"loss": 0.2566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30664288997650146,
|
|
"step": 2910,
|
|
"valid_targets_mean": 3167.0,
|
|
"valid_targets_min": 1422
|
|
},
|
|
{
|
|
"epoch": 4.874581939799331,
|
|
"grad_norm": 0.9658354985211547,
|
|
"learning_rate": 1.0236853985029815e-05,
|
|
"loss": 0.2841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2978072762489319,
|
|
"step": 2915,
|
|
"valid_targets_mean": 4677.4,
|
|
"valid_targets_min": 1257
|
|
},
|
|
{
|
|
"epoch": 4.882943143812709,
|
|
"grad_norm": 0.8858319243242195,
|
|
"learning_rate": 1.0164153231346656e-05,
|
|
"loss": 0.2423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24526861310005188,
|
|
"step": 2920,
|
|
"valid_targets_mean": 3126.1,
|
|
"valid_targets_min": 880
|
|
},
|
|
{
|
|
"epoch": 4.891304347826087,
|
|
"grad_norm": 1.0526204864364126,
|
|
"learning_rate": 1.0091623502679075e-05,
|
|
"loss": 0.3037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3539399802684784,
|
|
"step": 2925,
|
|
"valid_targets_mean": 3289.9,
|
|
"valid_targets_min": 1212
|
|
},
|
|
{
|
|
"epoch": 4.8996655518394645,
|
|
"grad_norm": 0.885379966379974,
|
|
"learning_rate": 1.0019266060168929e-05,
|
|
"loss": 0.2934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30143141746520996,
|
|
"step": 2930,
|
|
"valid_targets_mean": 3651.1,
|
|
"valid_targets_min": 1477
|
|
},
|
|
{
|
|
"epoch": 4.908026755852843,
|
|
"grad_norm": 1.1625828671998966,
|
|
"learning_rate": 9.947082161962363e-06,
|
|
"loss": 0.248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24516814947128296,
|
|
"step": 2935,
|
|
"valid_targets_mean": 2769.2,
|
|
"valid_targets_min": 569
|
|
},
|
|
{
|
|
"epoch": 4.916387959866221,
|
|
"grad_norm": 0.8652972290357003,
|
|
"learning_rate": 9.875073063187947e-06,
|
|
"loss": 0.2476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26153671741485596,
|
|
"step": 2940,
|
|
"valid_targets_mean": 3214.2,
|
|
"valid_targets_min": 1062
|
|
},
|
|
{
|
|
"epoch": 4.924749163879599,
|
|
"grad_norm": 0.9078402224774373,
|
|
"learning_rate": 9.803240015934859e-06,
|
|
"loss": 0.3063,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2811887264251709,
|
|
"step": 2945,
|
|
"valid_targets_mean": 3040.2,
|
|
"valid_targets_min": 918
|
|
},
|
|
{
|
|
"epoch": 4.933110367892977,
|
|
"grad_norm": 0.855619293705552,
|
|
"learning_rate": 9.731584269231094e-06,
|
|
"loss": 0.2411,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24628743529319763,
|
|
"step": 2950,
|
|
"valid_targets_mean": 3259.8,
|
|
"valid_targets_min": 1456
|
|
},
|
|
{
|
|
"epoch": 4.941471571906354,
|
|
"grad_norm": 0.9213152242653578,
|
|
"learning_rate": 9.660107069021767e-06,
|
|
"loss": 0.2783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33864572644233704,
|
|
"step": 2955,
|
|
"valid_targets_mean": 3462.7,
|
|
"valid_targets_min": 527
|
|
},
|
|
{
|
|
"epoch": 4.949832775919733,
|
|
"grad_norm": 0.8302602925046146,
|
|
"learning_rate": 9.588809658147433e-06,
|
|
"loss": 0.2748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28112542629241943,
|
|
"step": 2960,
|
|
"valid_targets_mean": 3587.3,
|
|
"valid_targets_min": 1509
|
|
},
|
|
{
|
|
"epoch": 4.95819397993311,
|
|
"grad_norm": 0.9191160601073972,
|
|
"learning_rate": 9.517693276322488e-06,
|
|
"loss": 0.2888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26543906331062317,
|
|
"step": 2965,
|
|
"valid_targets_mean": 2759.7,
|
|
"valid_targets_min": 520
|
|
},
|
|
{
|
|
"epoch": 4.966555183946488,
|
|
"grad_norm": 0.9353550508600107,
|
|
"learning_rate": 9.446759160113602e-06,
|
|
"loss": 0.2826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2775583863258362,
|
|
"step": 2970,
|
|
"valid_targets_mean": 2901.1,
|
|
"valid_targets_min": 1694
|
|
},
|
|
{
|
|
"epoch": 4.974916387959866,
|
|
"grad_norm": 0.9139877304453574,
|
|
"learning_rate": 9.376008542918227e-06,
|
|
"loss": 0.2281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19975979626178741,
|
|
"step": 2975,
|
|
"valid_targets_mean": 2663.2,
|
|
"valid_targets_min": 1166
|
|
},
|
|
{
|
|
"epoch": 4.983277591973244,
|
|
"grad_norm": 0.922883713258425,
|
|
"learning_rate": 9.305442654943145e-06,
|
|
"loss": 0.2498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2600483000278473,
|
|
"step": 2980,
|
|
"valid_targets_mean": 2795.2,
|
|
"valid_targets_min": 1070
|
|
},
|
|
{
|
|
"epoch": 4.991638795986622,
|
|
"grad_norm": 0.9322875420888264,
|
|
"learning_rate": 9.235062723183076e-06,
|
|
"loss": 0.2583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2565094232559204,
|
|
"step": 2985,
|
|
"valid_targets_mean": 2820.6,
|
|
"valid_targets_min": 1321
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.7643560224911901,
|
|
"learning_rate": 9.164869971399359e-06,
|
|
"loss": 0.2743,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3641391396522522,
|
|
"step": 2990,
|
|
"valid_targets_mean": 4868.6,
|
|
"valid_targets_min": 1329
|
|
},
|
|
{
|
|
"epoch": 5.008361204013378,
|
|
"grad_norm": 0.8481755315514735,
|
|
"learning_rate": 9.094865620098646e-06,
|
|
"loss": 0.2614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22899624705314636,
|
|
"step": 2995,
|
|
"valid_targets_mean": 3174.0,
|
|
"valid_targets_min": 1965
|
|
},
|
|
{
|
|
"epoch": 5.016722408026756,
|
|
"grad_norm": 0.8188076252143365,
|
|
"learning_rate": 9.025050886511702e-06,
|
|
"loss": 0.2747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3067629039287567,
|
|
"step": 3000,
|
|
"valid_targets_mean": 4291.9,
|
|
"valid_targets_min": 1704
|
|
},
|
|
{
|
|
"epoch": 5.025083612040134,
|
|
"grad_norm": 0.8029644241788463,
|
|
"learning_rate": 8.955426984572228e-06,
|
|
"loss": 0.2581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3037480115890503,
|
|
"step": 3005,
|
|
"valid_targets_mean": 4106.2,
|
|
"valid_targets_min": 973
|
|
},
|
|
{
|
|
"epoch": 5.033444816053512,
|
|
"grad_norm": 0.8661738149420555,
|
|
"learning_rate": 8.885995124895768e-06,
|
|
"loss": 0.2402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2674838900566101,
|
|
"step": 3010,
|
|
"valid_targets_mean": 3359.9,
|
|
"valid_targets_min": 1500
|
|
},
|
|
{
|
|
"epoch": 5.04180602006689,
|
|
"grad_norm": 0.9620964614380081,
|
|
"learning_rate": 8.816756514758634e-06,
|
|
"loss": 0.2832,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2533591389656067,
|
|
"step": 3015,
|
|
"valid_targets_mean": 3118.5,
|
|
"valid_targets_min": 1356
|
|
},
|
|
{
|
|
"epoch": 5.050167224080267,
|
|
"grad_norm": 0.7541256752914219,
|
|
"learning_rate": 8.747712358076936e-06,
|
|
"loss": 0.2311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24237793684005737,
|
|
"step": 3020,
|
|
"valid_targets_mean": 4191.6,
|
|
"valid_targets_min": 1668
|
|
},
|
|
{
|
|
"epoch": 5.058528428093646,
|
|
"grad_norm": 0.9025605491692672,
|
|
"learning_rate": 8.678863855385646e-06,
|
|
"loss": 0.2646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33202728629112244,
|
|
"step": 3025,
|
|
"valid_targets_mean": 4466.5,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 5.066889632107023,
|
|
"grad_norm": 0.8425388830294137,
|
|
"learning_rate": 8.61021220381771e-06,
|
|
"loss": 0.242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3084254860877991,
|
|
"step": 3030,
|
|
"valid_targets_mean": 4306.9,
|
|
"valid_targets_min": 968
|
|
},
|
|
{
|
|
"epoch": 5.075250836120401,
|
|
"grad_norm": 0.8389860612401873,
|
|
"learning_rate": 8.54175859708324e-06,
|
|
"loss": 0.2322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22211232781410217,
|
|
"step": 3035,
|
|
"valid_targets_mean": 3588.2,
|
|
"valid_targets_min": 880
|
|
},
|
|
{
|
|
"epoch": 5.083612040133779,
|
|
"grad_norm": 0.9514036120737082,
|
|
"learning_rate": 8.473504225448765e-06,
|
|
"loss": 0.257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2409243881702423,
|
|
"step": 3040,
|
|
"valid_targets_mean": 2856.7,
|
|
"valid_targets_min": 967
|
|
},
|
|
{
|
|
"epoch": 5.091973244147157,
|
|
"grad_norm": 0.9560659512366602,
|
|
"learning_rate": 8.405450275716525e-06,
|
|
"loss": 0.2544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2601991891860962,
|
|
"step": 3045,
|
|
"valid_targets_mean": 3723.9,
|
|
"valid_targets_min": 1569
|
|
},
|
|
{
|
|
"epoch": 5.1003344481605355,
|
|
"grad_norm": 0.9864743563923629,
|
|
"learning_rate": 8.337597931203836e-06,
|
|
"loss": 0.239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3188202381134033,
|
|
"step": 3050,
|
|
"valid_targets_mean": 3242.7,
|
|
"valid_targets_min": 1154
|
|
},
|
|
{
|
|
"epoch": 5.108695652173913,
|
|
"grad_norm": 1.1490912764067835,
|
|
"learning_rate": 8.269948371722518e-06,
|
|
"loss": 0.2602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21089932322502136,
|
|
"step": 3055,
|
|
"valid_targets_mean": 2516.8,
|
|
"valid_targets_min": 988
|
|
},
|
|
{
|
|
"epoch": 5.117056856187291,
|
|
"grad_norm": 0.8194749944721579,
|
|
"learning_rate": 8.20250277355838e-06,
|
|
"loss": 0.2787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3658130168914795,
|
|
"step": 3060,
|
|
"valid_targets_mean": 4739.9,
|
|
"valid_targets_min": 1512
|
|
},
|
|
{
|
|
"epoch": 5.125418060200669,
|
|
"grad_norm": 0.9728259830553914,
|
|
"learning_rate": 8.135262309450764e-06,
|
|
"loss": 0.2707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27498018741607666,
|
|
"step": 3065,
|
|
"valid_targets_mean": 3150.4,
|
|
"valid_targets_min": 594
|
|
},
|
|
{
|
|
"epoch": 5.133779264214047,
|
|
"grad_norm": 1.0179078265962471,
|
|
"learning_rate": 8.068228148572157e-06,
|
|
"loss": 0.2445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19528654217720032,
|
|
"step": 3070,
|
|
"valid_targets_mean": 2590.3,
|
|
"valid_targets_min": 513
|
|
},
|
|
{
|
|
"epoch": 5.142140468227424,
|
|
"grad_norm": 1.0414753424178975,
|
|
"learning_rate": 8.001401456507858e-06,
|
|
"loss": 0.2298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24913433194160461,
|
|
"step": 3075,
|
|
"valid_targets_mean": 2828.6,
|
|
"valid_targets_min": 1509
|
|
},
|
|
{
|
|
"epoch": 5.150501672240803,
|
|
"grad_norm": 0.9673371238498945,
|
|
"learning_rate": 7.934783395235716e-06,
|
|
"loss": 0.253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2619754672050476,
|
|
"step": 3080,
|
|
"valid_targets_mean": 3273.0,
|
|
"valid_targets_min": 1547
|
|
},
|
|
{
|
|
"epoch": 5.15886287625418,
|
|
"grad_norm": 1.0777145808692163,
|
|
"learning_rate": 7.868375123105921e-06,
|
|
"loss": 0.29,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.241988867521286,
|
|
"step": 3085,
|
|
"valid_targets_mean": 2760.0,
|
|
"valid_targets_min": 1115
|
|
},
|
|
{
|
|
"epoch": 5.167224080267559,
|
|
"grad_norm": 0.9357681236144693,
|
|
"learning_rate": 7.802177794820857e-06,
|
|
"loss": 0.2851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4071310758590698,
|
|
"step": 3090,
|
|
"valid_targets_mean": 3665.1,
|
|
"valid_targets_min": 638
|
|
},
|
|
{
|
|
"epoch": 5.1755852842809364,
|
|
"grad_norm": 0.8810724848053256,
|
|
"learning_rate": 7.736192561415045e-06,
|
|
"loss": 0.2883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3404349386692047,
|
|
"step": 3095,
|
|
"valid_targets_mean": 3941.1,
|
|
"valid_targets_min": 799
|
|
},
|
|
{
|
|
"epoch": 5.183946488294314,
|
|
"grad_norm": 0.9345756552103464,
|
|
"learning_rate": 7.670420570235113e-06,
|
|
"loss": 0.2718,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20666499435901642,
|
|
"step": 3100,
|
|
"valid_targets_mean": 2741.7,
|
|
"valid_targets_min": 660
|
|
},
|
|
{
|
|
"epoch": 5.1923076923076925,
|
|
"grad_norm": 0.89831218286986,
|
|
"learning_rate": 7.604862964919819e-06,
|
|
"loss": 0.2531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26516643166542053,
|
|
"step": 3105,
|
|
"valid_targets_mean": 3194.3,
|
|
"valid_targets_min": 1893
|
|
},
|
|
{
|
|
"epoch": 5.20066889632107,
|
|
"grad_norm": 1.0235151109065963,
|
|
"learning_rate": 7.539520885380242e-06,
|
|
"loss": 0.2647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23447315394878387,
|
|
"step": 3110,
|
|
"valid_targets_mean": 2732.0,
|
|
"valid_targets_min": 1261
|
|
},
|
|
{
|
|
"epoch": 5.209030100334449,
|
|
"grad_norm": 1.0527575176171846,
|
|
"learning_rate": 7.474395467779885e-06,
|
|
"loss": 0.2386,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24746760725975037,
|
|
"step": 3115,
|
|
"valid_targets_mean": 3079.3,
|
|
"valid_targets_min": 1647
|
|
},
|
|
{
|
|
"epoch": 5.217391304347826,
|
|
"grad_norm": 0.8412479852328654,
|
|
"learning_rate": 7.409487844514946e-06,
|
|
"loss": 0.2765,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2139432430267334,
|
|
"step": 3120,
|
|
"valid_targets_mean": 3529.4,
|
|
"valid_targets_min": 2262
|
|
},
|
|
{
|
|
"epoch": 5.225752508361204,
|
|
"grad_norm": 0.8859243166629239,
|
|
"learning_rate": 7.344799144194647e-06,
|
|
"loss": 0.2646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30506086349487305,
|
|
"step": 3125,
|
|
"valid_targets_mean": 4511.6,
|
|
"valid_targets_min": 1709
|
|
},
|
|
{
|
|
"epoch": 5.234113712374582,
|
|
"grad_norm": 0.928318267325103,
|
|
"learning_rate": 7.280330491621579e-06,
|
|
"loss": 0.265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25192520022392273,
|
|
"step": 3130,
|
|
"valid_targets_mean": 3102.3,
|
|
"valid_targets_min": 1604
|
|
},
|
|
{
|
|
"epoch": 5.24247491638796,
|
|
"grad_norm": 0.8514180338596942,
|
|
"learning_rate": 7.2160830077721655e-06,
|
|
"loss": 0.2806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42115187644958496,
|
|
"step": 3135,
|
|
"valid_targets_mean": 4393.4,
|
|
"valid_targets_min": 2044
|
|
},
|
|
{
|
|
"epoch": 5.250836120401337,
|
|
"grad_norm": 0.8701754054847802,
|
|
"learning_rate": 7.15205780977716e-06,
|
|
"loss": 0.2483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23187309503555298,
|
|
"step": 3140,
|
|
"valid_targets_mean": 3508.4,
|
|
"valid_targets_min": 1806
|
|
},
|
|
{
|
|
"epoch": 5.259197324414716,
|
|
"grad_norm": 1.0414461577784981,
|
|
"learning_rate": 7.0882560109022255e-06,
|
|
"loss": 0.2487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2434636354446411,
|
|
"step": 3145,
|
|
"valid_targets_mean": 2689.4,
|
|
"valid_targets_min": 860
|
|
},
|
|
{
|
|
"epoch": 5.2675585284280935,
|
|
"grad_norm": 0.9992819578511724,
|
|
"learning_rate": 7.02467872052858e-06,
|
|
"loss": 0.2108,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20966418087482452,
|
|
"step": 3150,
|
|
"valid_targets_mean": 3109.2,
|
|
"valid_targets_min": 1471
|
|
},
|
|
{
|
|
"epoch": 5.275919732441472,
|
|
"grad_norm": 0.9478720751528437,
|
|
"learning_rate": 6.9613270441337075e-06,
|
|
"loss": 0.2412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22975382208824158,
|
|
"step": 3155,
|
|
"valid_targets_mean": 3196.7,
|
|
"valid_targets_min": 1774
|
|
},
|
|
{
|
|
"epoch": 5.2842809364548495,
|
|
"grad_norm": 0.8672829908504149,
|
|
"learning_rate": 6.8982020832721054e-06,
|
|
"loss": 0.2426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22695282101631165,
|
|
"step": 3160,
|
|
"valid_targets_mean": 3513.6,
|
|
"valid_targets_min": 723
|
|
},
|
|
{
|
|
"epoch": 5.292642140468227,
|
|
"grad_norm": 1.022405240131088,
|
|
"learning_rate": 6.835304935556198e-06,
|
|
"loss": 0.2525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2423592507839203,
|
|
"step": 3165,
|
|
"valid_targets_mean": 3192.9,
|
|
"valid_targets_min": 1355
|
|
},
|
|
{
|
|
"epoch": 5.301003344481606,
|
|
"grad_norm": 1.081711172053293,
|
|
"learning_rate": 6.772636694637183e-06,
|
|
"loss": 0.2441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25607234239578247,
|
|
"step": 3170,
|
|
"valid_targets_mean": 2742.1,
|
|
"valid_targets_min": 1670
|
|
},
|
|
{
|
|
"epoch": 5.309364548494983,
|
|
"grad_norm": 0.959669265454484,
|
|
"learning_rate": 6.710198450186047e-06,
|
|
"loss": 0.2657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2445649951696396,
|
|
"step": 3175,
|
|
"valid_targets_mean": 2531.1,
|
|
"valid_targets_min": 741
|
|
},
|
|
{
|
|
"epoch": 5.317725752508361,
|
|
"grad_norm": 1.0556452222632704,
|
|
"learning_rate": 6.6479912878746225e-06,
|
|
"loss": 0.2698,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32292306423187256,
|
|
"step": 3180,
|
|
"valid_targets_mean": 3753.7,
|
|
"valid_targets_min": 1316
|
|
},
|
|
{
|
|
"epoch": 5.326086956521739,
|
|
"grad_norm": 1.110335544316918,
|
|
"learning_rate": 6.586016289356692e-06,
|
|
"loss": 0.2582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20516835153102875,
|
|
"step": 3185,
|
|
"valid_targets_mean": 2823.1,
|
|
"valid_targets_min": 965
|
|
},
|
|
{
|
|
"epoch": 5.334448160535117,
|
|
"grad_norm": 0.9443496373098136,
|
|
"learning_rate": 6.524274532249195e-06,
|
|
"loss": 0.2247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2695116698741913,
|
|
"step": 3190,
|
|
"valid_targets_mean": 2830.7,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 5.342809364548495,
|
|
"grad_norm": 1.2883459095067469,
|
|
"learning_rate": 6.462767090113486e-06,
|
|
"loss": 0.2514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21128371357917786,
|
|
"step": 3195,
|
|
"valid_targets_mean": 2801.6,
|
|
"valid_targets_min": 1972
|
|
},
|
|
{
|
|
"epoch": 5.351170568561873,
|
|
"grad_norm": 0.7305670443838644,
|
|
"learning_rate": 6.401495032436667e-06,
|
|
"loss": 0.2613,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34377604722976685,
|
|
"step": 3200,
|
|
"valid_targets_mean": 5868.9,
|
|
"valid_targets_min": 1873
|
|
},
|
|
{
|
|
"epoch": 5.3595317725752505,
|
|
"grad_norm": 0.9618080510519665,
|
|
"learning_rate": 6.34045942461299e-06,
|
|
"loss": 0.2266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20367145538330078,
|
|
"step": 3205,
|
|
"valid_targets_mean": 2934.6,
|
|
"valid_targets_min": 1274
|
|
},
|
|
{
|
|
"epoch": 5.367892976588629,
|
|
"grad_norm": 0.9183980494977025,
|
|
"learning_rate": 6.279661327925333e-06,
|
|
"loss": 0.2591,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24812433123588562,
|
|
"step": 3210,
|
|
"valid_targets_mean": 3023.1,
|
|
"valid_targets_min": 1477
|
|
},
|
|
{
|
|
"epoch": 5.3762541806020065,
|
|
"grad_norm": 0.9596558209841023,
|
|
"learning_rate": 6.219101799526753e-06,
|
|
"loss": 0.2541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33065247535705566,
|
|
"step": 3215,
|
|
"valid_targets_mean": 4098.1,
|
|
"valid_targets_min": 1177
|
|
},
|
|
{
|
|
"epoch": 5.384615384615385,
|
|
"grad_norm": 1.065383634447373,
|
|
"learning_rate": 6.158781892422085e-06,
|
|
"loss": 0.2614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2475639134645462,
|
|
"step": 3220,
|
|
"valid_targets_mean": 2772.1,
|
|
"valid_targets_min": 1252
|
|
},
|
|
{
|
|
"epoch": 5.392976588628763,
|
|
"grad_norm": 1.044424208460433,
|
|
"learning_rate": 6.098702655449664e-06,
|
|
"loss": 0.2439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2278965413570404,
|
|
"step": 3225,
|
|
"valid_targets_mean": 2502.4,
|
|
"valid_targets_min": 721
|
|
},
|
|
{
|
|
"epoch": 5.40133779264214,
|
|
"grad_norm": 0.9665704851257386,
|
|
"learning_rate": 6.038865133263054e-06,
|
|
"loss": 0.2448,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31926289200782776,
|
|
"step": 3230,
|
|
"valid_targets_mean": 3628.5,
|
|
"valid_targets_min": 1750
|
|
},
|
|
{
|
|
"epoch": 5.409698996655519,
|
|
"grad_norm": 0.964435362243508,
|
|
"learning_rate": 5.9792703663129125e-06,
|
|
"loss": 0.2289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21948401629924774,
|
|
"step": 3235,
|
|
"valid_targets_mean": 2733.2,
|
|
"valid_targets_min": 484
|
|
},
|
|
{
|
|
"epoch": 5.418060200668896,
|
|
"grad_norm": 0.9794403625903487,
|
|
"learning_rate": 5.919919390828859e-06,
|
|
"loss": 0.2556,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22015729546546936,
|
|
"step": 3240,
|
|
"valid_targets_mean": 2922.4,
|
|
"valid_targets_min": 1575
|
|
},
|
|
{
|
|
"epoch": 5.426421404682274,
|
|
"grad_norm": 0.9982047586124657,
|
|
"learning_rate": 5.860813238801523e-06,
|
|
"loss": 0.2553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21148087084293365,
|
|
"step": 3245,
|
|
"valid_targets_mean": 3052.9,
|
|
"valid_targets_min": 1548
|
|
},
|
|
{
|
|
"epoch": 5.434782608695652,
|
|
"grad_norm": 0.8716785695051501,
|
|
"learning_rate": 5.801952937964537e-06,
|
|
"loss": 0.2537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23328334093093872,
|
|
"step": 3250,
|
|
"valid_targets_mean": 3625.8,
|
|
"valid_targets_min": 648
|
|
},
|
|
{
|
|
"epoch": 5.44314381270903,
|
|
"grad_norm": 1.0317002727776252,
|
|
"learning_rate": 5.743339511776693e-06,
|
|
"loss": 0.2525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26123955845832825,
|
|
"step": 3255,
|
|
"valid_targets_mean": 3316.4,
|
|
"valid_targets_min": 553
|
|
},
|
|
{
|
|
"epoch": 5.451505016722408,
|
|
"grad_norm": 0.8933485431020612,
|
|
"learning_rate": 5.684973979404144e-06,
|
|
"loss": 0.2154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23536235094070435,
|
|
"step": 3260,
|
|
"valid_targets_mean": 3289.2,
|
|
"valid_targets_min": 1411
|
|
},
|
|
{
|
|
"epoch": 5.459866220735786,
|
|
"grad_norm": 0.9330309774110559,
|
|
"learning_rate": 5.6268573557026865e-06,
|
|
"loss": 0.274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25734925270080566,
|
|
"step": 3265,
|
|
"valid_targets_mean": 3473.9,
|
|
"valid_targets_min": 1469
|
|
},
|
|
{
|
|
"epoch": 5.468227424749164,
|
|
"grad_norm": 1.0119464981075554,
|
|
"learning_rate": 5.568990651200108e-06,
|
|
"loss": 0.2646,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2644900977611542,
|
|
"step": 3270,
|
|
"valid_targets_mean": 3406.8,
|
|
"valid_targets_min": 569
|
|
},
|
|
{
|
|
"epoch": 5.476588628762542,
|
|
"grad_norm": 1.0468454697016878,
|
|
"learning_rate": 5.511374872078616e-06,
|
|
"loss": 0.2538,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29436013102531433,
|
|
"step": 3275,
|
|
"valid_targets_mean": 3085.1,
|
|
"valid_targets_min": 1665
|
|
},
|
|
{
|
|
"epoch": 5.48494983277592,
|
|
"grad_norm": 0.9588401461605252,
|
|
"learning_rate": 5.454011020157348e-06,
|
|
"loss": 0.2243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21803942322731018,
|
|
"step": 3280,
|
|
"valid_targets_mean": 3367.6,
|
|
"valid_targets_min": 1776
|
|
},
|
|
{
|
|
"epoch": 5.493311036789297,
|
|
"grad_norm": 0.8920342971378608,
|
|
"learning_rate": 5.396900092874953e-06,
|
|
"loss": 0.2525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20749205350875854,
|
|
"step": 3285,
|
|
"valid_targets_mean": 3192.9,
|
|
"valid_targets_min": 1668
|
|
},
|
|
{
|
|
"epoch": 5.501672240802676,
|
|
"grad_norm": 0.8025673440389024,
|
|
"learning_rate": 5.340043083272239e-06,
|
|
"loss": 0.2647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25130224227905273,
|
|
"step": 3290,
|
|
"valid_targets_mean": 3616.1,
|
|
"valid_targets_min": 1456
|
|
},
|
|
{
|
|
"epoch": 5.510033444816053,
|
|
"grad_norm": 1.0290460488107447,
|
|
"learning_rate": 5.283440979974901e-06,
|
|
"loss": 0.2353,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.255595326423645,
|
|
"step": 3295,
|
|
"valid_targets_mean": 3070.9,
|
|
"valid_targets_min": 858
|
|
},
|
|
{
|
|
"epoch": 5.518394648829432,
|
|
"grad_norm": 0.9036243917362351,
|
|
"learning_rate": 5.227094767176364e-06,
|
|
"loss": 0.2535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2398861050605774,
|
|
"step": 3300,
|
|
"valid_targets_mean": 3360.8,
|
|
"valid_targets_min": 1448
|
|
},
|
|
{
|
|
"epoch": 5.526755852842809,
|
|
"grad_norm": 0.9707296599929639,
|
|
"learning_rate": 5.17100542462063e-06,
|
|
"loss": 0.2485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24867475032806396,
|
|
"step": 3305,
|
|
"valid_targets_mean": 3409.8,
|
|
"valid_targets_min": 1500
|
|
},
|
|
{
|
|
"epoch": 5.535117056856187,
|
|
"grad_norm": 1.0059190414101475,
|
|
"learning_rate": 5.115173927585264e-06,
|
|
"loss": 0.2462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20245328545570374,
|
|
"step": 3310,
|
|
"valid_targets_mean": 2579.6,
|
|
"valid_targets_min": 965
|
|
},
|
|
{
|
|
"epoch": 5.543478260869565,
|
|
"grad_norm": 1.150440106449695,
|
|
"learning_rate": 5.059601246864438e-06,
|
|
"loss": 0.2338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2849067747592926,
|
|
"step": 3315,
|
|
"valid_targets_mean": 3054.2,
|
|
"valid_targets_min": 1511
|
|
},
|
|
{
|
|
"epoch": 5.551839464882943,
|
|
"grad_norm": 1.082246481751666,
|
|
"learning_rate": 5.004288348752018e-06,
|
|
"loss": 0.2211,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24570798873901367,
|
|
"step": 3320,
|
|
"valid_targets_mean": 3292.1,
|
|
"valid_targets_min": 1374
|
|
},
|
|
{
|
|
"epoch": 5.5602006688963215,
|
|
"grad_norm": 0.9677466519631982,
|
|
"learning_rate": 4.949236195024825e-06,
|
|
"loss": 0.2483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21175232529640198,
|
|
"step": 3325,
|
|
"valid_targets_mean": 2859.6,
|
|
"valid_targets_min": 452
|
|
},
|
|
{
|
|
"epoch": 5.568561872909699,
|
|
"grad_norm": 0.9670337251548189,
|
|
"learning_rate": 4.894445742925853e-06,
|
|
"loss": 0.2623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23612819612026215,
|
|
"step": 3330,
|
|
"valid_targets_mean": 2778.9,
|
|
"valid_targets_min": 479
|
|
},
|
|
{
|
|
"epoch": 5.576923076923077,
|
|
"grad_norm": 0.9399114769292228,
|
|
"learning_rate": 4.839917945147647e-06,
|
|
"loss": 0.2502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24552688002586365,
|
|
"step": 3335,
|
|
"valid_targets_mean": 2997.2,
|
|
"valid_targets_min": 1116
|
|
},
|
|
{
|
|
"epoch": 5.585284280936455,
|
|
"grad_norm": 0.8414887814395099,
|
|
"learning_rate": 4.785653749815744e-06,
|
|
"loss": 0.2561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29480239748954773,
|
|
"step": 3340,
|
|
"valid_targets_mean": 4310.2,
|
|
"valid_targets_min": 1636
|
|
},
|
|
{
|
|
"epoch": 5.593645484949833,
|
|
"grad_norm": 0.7931418317297482,
|
|
"learning_rate": 4.731654100472178e-06,
|
|
"loss": 0.2583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3128657341003418,
|
|
"step": 3345,
|
|
"valid_targets_mean": 4607.0,
|
|
"valid_targets_min": 1093
|
|
},
|
|
{
|
|
"epoch": 5.602006688963211,
|
|
"grad_norm": 0.8840163267997618,
|
|
"learning_rate": 4.677919936059064e-06,
|
|
"loss": 0.2558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24446120858192444,
|
|
"step": 3350,
|
|
"valid_targets_mean": 3667.1,
|
|
"valid_targets_min": 1631
|
|
},
|
|
{
|
|
"epoch": 5.610367892976589,
|
|
"grad_norm": 0.9191791952149919,
|
|
"learning_rate": 4.624452190902304e-06,
|
|
"loss": 0.2307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2651020884513855,
|
|
"step": 3355,
|
|
"valid_targets_mean": 3420.4,
|
|
"valid_targets_min": 1745
|
|
},
|
|
{
|
|
"epoch": 5.618729096989966,
|
|
"grad_norm": 0.9355892323378777,
|
|
"learning_rate": 4.571251794695308e-06,
|
|
"loss": 0.2397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29256555438041687,
|
|
"step": 3360,
|
|
"valid_targets_mean": 4212.4,
|
|
"valid_targets_min": 1120
|
|
},
|
|
{
|
|
"epoch": 5.627090301003345,
|
|
"grad_norm": 1.0090759936821445,
|
|
"learning_rate": 4.518319672482845e-06,
|
|
"loss": 0.2427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2667285203933716,
|
|
"step": 3365,
|
|
"valid_targets_mean": 2737.7,
|
|
"valid_targets_min": 729
|
|
},
|
|
{
|
|
"epoch": 5.635451505016722,
|
|
"grad_norm": 1.0171835292538327,
|
|
"learning_rate": 4.465656744644957e-06,
|
|
"loss": 0.2726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30735355615615845,
|
|
"step": 3370,
|
|
"valid_targets_mean": 3446.8,
|
|
"valid_targets_min": 1726
|
|
},
|
|
{
|
|
"epoch": 5.6438127090301,
|
|
"grad_norm": 0.9819290282194414,
|
|
"learning_rate": 4.413263926880935e-06,
|
|
"loss": 0.2608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20418116450309753,
|
|
"step": 3375,
|
|
"valid_targets_mean": 3223.5,
|
|
"valid_targets_min": 726
|
|
},
|
|
{
|
|
"epoch": 5.6521739130434785,
|
|
"grad_norm": 0.9512457595996446,
|
|
"learning_rate": 4.3611421301934435e-06,
|
|
"loss": 0.2741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2649402320384979,
|
|
"step": 3380,
|
|
"valid_targets_mean": 2930.5,
|
|
"valid_targets_min": 1139
|
|
},
|
|
{
|
|
"epoch": 5.660535117056856,
|
|
"grad_norm": 0.9658417094141116,
|
|
"learning_rate": 4.309292260872633e-06,
|
|
"loss": 0.2548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21503464877605438,
|
|
"step": 3385,
|
|
"valid_targets_mean": 2954.3,
|
|
"valid_targets_min": 1684
|
|
},
|
|
{
|
|
"epoch": 5.668896321070234,
|
|
"grad_norm": 1.0062110489738594,
|
|
"learning_rate": 4.257715220480405e-06,
|
|
"loss": 0.2721,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2543056011199951,
|
|
"step": 3390,
|
|
"valid_targets_mean": 3383.0,
|
|
"valid_targets_min": 1705
|
|
},
|
|
{
|
|
"epoch": 5.677257525083612,
|
|
"grad_norm": 1.0054767456103721,
|
|
"learning_rate": 4.206411905834733e-06,
|
|
"loss": 0.2492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3248503506183624,
|
|
"step": 3395,
|
|
"valid_targets_mean": 3018.8,
|
|
"valid_targets_min": 399
|
|
},
|
|
{
|
|
"epoch": 5.68561872909699,
|
|
"grad_norm": 1.085787300697935,
|
|
"learning_rate": 4.155383208994055e-06,
|
|
"loss": 0.2578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24291780591011047,
|
|
"step": 3400,
|
|
"valid_targets_mean": 3004.7,
|
|
"valid_targets_min": 1883
|
|
},
|
|
{
|
|
"epoch": 5.693979933110368,
|
|
"grad_norm": 1.0345601696467057,
|
|
"learning_rate": 4.10463001724178e-06,
|
|
"loss": 0.2416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24932274222373962,
|
|
"step": 3405,
|
|
"valid_targets_mean": 2949.5,
|
|
"valid_targets_min": 1622
|
|
},
|
|
{
|
|
"epoch": 5.702341137123746,
|
|
"grad_norm": 0.9908894862017844,
|
|
"learning_rate": 4.054153213070868e-06,
|
|
"loss": 0.2495,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2516738772392273,
|
|
"step": 3410,
|
|
"valid_targets_mean": 3543.2,
|
|
"valid_targets_min": 1348
|
|
},
|
|
{
|
|
"epoch": 5.710702341137123,
|
|
"grad_norm": 1.0855149671023934,
|
|
"learning_rate": 4.003953674168455e-06,
|
|
"loss": 0.242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2057594358921051,
|
|
"step": 3415,
|
|
"valid_targets_mean": 3033.4,
|
|
"valid_targets_min": 1026
|
|
},
|
|
{
|
|
"epoch": 5.719063545150502,
|
|
"grad_norm": 0.9852747879835021,
|
|
"learning_rate": 3.954032273400608e-06,
|
|
"loss": 0.2406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.266365110874176,
|
|
"step": 3420,
|
|
"valid_targets_mean": 3131.4,
|
|
"valid_targets_min": 1591
|
|
},
|
|
{
|
|
"epoch": 5.7274247491638794,
|
|
"grad_norm": 0.8893474256351308,
|
|
"learning_rate": 3.904389878797159e-06,
|
|
"loss": 0.2598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2710364758968353,
|
|
"step": 3425,
|
|
"valid_targets_mean": 3534.0,
|
|
"valid_targets_min": 1096
|
|
},
|
|
{
|
|
"epoch": 5.735785953177258,
|
|
"grad_norm": 0.8715257939471994,
|
|
"learning_rate": 3.85502735353658e-06,
|
|
"loss": 0.2492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2547582983970642,
|
|
"step": 3430,
|
|
"valid_targets_mean": 3161.2,
|
|
"valid_targets_min": 1420
|
|
},
|
|
{
|
|
"epoch": 5.7441471571906355,
|
|
"grad_norm": 0.9676813409941313,
|
|
"learning_rate": 3.8059455559310167e-06,
|
|
"loss": 0.2423,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2646714746952057,
|
|
"step": 3435,
|
|
"valid_targets_mean": 3319.5,
|
|
"valid_targets_min": 1710
|
|
},
|
|
{
|
|
"epoch": 5.752508361204013,
|
|
"grad_norm": 0.8470578522309716,
|
|
"learning_rate": 3.757145339411332e-06,
|
|
"loss": 0.229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22734609246253967,
|
|
"step": 3440,
|
|
"valid_targets_mean": 4302.4,
|
|
"valid_targets_min": 667
|
|
},
|
|
{
|
|
"epoch": 5.760869565217392,
|
|
"grad_norm": 0.853636864130374,
|
|
"learning_rate": 3.708627552512276e-06,
|
|
"loss": 0.2301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20801186561584473,
|
|
"step": 3445,
|
|
"valid_targets_mean": 3614.8,
|
|
"valid_targets_min": 1135
|
|
},
|
|
{
|
|
"epoch": 5.769230769230769,
|
|
"grad_norm": 0.8787913423829194,
|
|
"learning_rate": 3.660393038857739e-06,
|
|
"loss": 0.2541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22825166583061218,
|
|
"step": 3450,
|
|
"valid_targets_mean": 3887.0,
|
|
"valid_targets_min": 1180
|
|
},
|
|
{
|
|
"epoch": 5.777591973244148,
|
|
"grad_norm": 1.1205253969747466,
|
|
"learning_rate": 3.6124426371460542e-06,
|
|
"loss": 0.2611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.272126704454422,
|
|
"step": 3455,
|
|
"valid_targets_mean": 2778.0,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 5.785953177257525,
|
|
"grad_norm": 1.0498337049709496,
|
|
"learning_rate": 3.564777181135466e-06,
|
|
"loss": 0.2762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2422400861978531,
|
|
"step": 3460,
|
|
"valid_targets_mean": 2545.4,
|
|
"valid_targets_min": 640
|
|
},
|
|
{
|
|
"epoch": 5.794314381270903,
|
|
"grad_norm": 1.1993568136310828,
|
|
"learning_rate": 3.517397499629589e-06,
|
|
"loss": 0.2378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2053757905960083,
|
|
"step": 3465,
|
|
"valid_targets_mean": 3302.1,
|
|
"valid_targets_min": 1127
|
|
},
|
|
{
|
|
"epoch": 5.802675585284281,
|
|
"grad_norm": 0.843797327600791,
|
|
"learning_rate": 3.4703044164630064e-06,
|
|
"loss": 0.1986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2291642725467682,
|
|
"step": 3470,
|
|
"valid_targets_mean": 4161.9,
|
|
"valid_targets_min": 1041
|
|
},
|
|
{
|
|
"epoch": 5.811036789297659,
|
|
"grad_norm": 0.8755458954346165,
|
|
"learning_rate": 3.4234987504869553e-06,
|
|
"loss": 0.2483,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3180461823940277,
|
|
"step": 3475,
|
|
"valid_targets_mean": 4406.1,
|
|
"valid_targets_min": 2243
|
|
},
|
|
{
|
|
"epoch": 5.8193979933110365,
|
|
"grad_norm": 1.0301442813101,
|
|
"learning_rate": 3.376981315555086e-06,
|
|
"loss": 0.2767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27760040760040283,
|
|
"step": 3480,
|
|
"valid_targets_mean": 2683.6,
|
|
"valid_targets_min": 591
|
|
},
|
|
{
|
|
"epoch": 5.827759197324415,
|
|
"grad_norm": 0.9573217341539905,
|
|
"learning_rate": 3.3307529205092903e-06,
|
|
"loss": 0.273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.368574321269989,
|
|
"step": 3485,
|
|
"valid_targets_mean": 4355.6,
|
|
"valid_targets_min": 520
|
|
},
|
|
{
|
|
"epoch": 5.8361204013377925,
|
|
"grad_norm": 1.029121606843474,
|
|
"learning_rate": 3.2848143691656807e-06,
|
|
"loss": 0.2549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23345233500003815,
|
|
"step": 3490,
|
|
"valid_targets_mean": 2955.6,
|
|
"valid_targets_min": 1126
|
|
},
|
|
{
|
|
"epoch": 5.84448160535117,
|
|
"grad_norm": 1.0526837100245612,
|
|
"learning_rate": 3.239166460300571e-06,
|
|
"loss": 0.2456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23312462866306305,
|
|
"step": 3495,
|
|
"valid_targets_mean": 2799.9,
|
|
"valid_targets_min": 1173
|
|
},
|
|
{
|
|
"epoch": 5.852842809364549,
|
|
"grad_norm": 0.9555127758551365,
|
|
"learning_rate": 3.1938099876366047e-06,
|
|
"loss": 0.2627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2574623227119446,
|
|
"step": 3500,
|
|
"valid_targets_mean": 2801.6,
|
|
"valid_targets_min": 1072
|
|
},
|
|
{
|
|
"epoch": 5.861204013377926,
|
|
"grad_norm": 1.0434914071493264,
|
|
"learning_rate": 3.1487457398289645e-06,
|
|
"loss": 0.2476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2559327781200409,
|
|
"step": 3505,
|
|
"valid_targets_mean": 3459.7,
|
|
"valid_targets_min": 788
|
|
},
|
|
{
|
|
"epoch": 5.869565217391305,
|
|
"grad_norm": 1.012926148789505,
|
|
"learning_rate": 3.1039745004516207e-06,
|
|
"loss": 0.2417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2865094840526581,
|
|
"step": 3510,
|
|
"valid_targets_mean": 3056.8,
|
|
"valid_targets_min": 696
|
|
},
|
|
{
|
|
"epoch": 5.877926421404682,
|
|
"grad_norm": 0.9712574615035141,
|
|
"learning_rate": 3.0594970479837683e-06,
|
|
"loss": 0.2177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21085739135742188,
|
|
"step": 3515,
|
|
"valid_targets_mean": 2824.9,
|
|
"valid_targets_min": 599
|
|
},
|
|
{
|
|
"epoch": 5.88628762541806,
|
|
"grad_norm": 1.0473259954733953,
|
|
"learning_rate": 3.015314155796234e-06,
|
|
"loss": 0.2031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2044457346200943,
|
|
"step": 3520,
|
|
"valid_targets_mean": 2760.9,
|
|
"valid_targets_min": 715
|
|
},
|
|
{
|
|
"epoch": 5.894648829431438,
|
|
"grad_norm": 0.9843115195011657,
|
|
"learning_rate": 2.9714265921380557e-06,
|
|
"loss": 0.2371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20010721683502197,
|
|
"step": 3525,
|
|
"valid_targets_mean": 2771.8,
|
|
"valid_targets_min": 963
|
|
},
|
|
{
|
|
"epoch": 5.903010033444816,
|
|
"grad_norm": 0.8585558006225893,
|
|
"learning_rate": 2.927835120123128e-06,
|
|
"loss": 0.2512,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3017348051071167,
|
|
"step": 3530,
|
|
"valid_targets_mean": 3708.9,
|
|
"valid_targets_min": 1217
|
|
},
|
|
{
|
|
"epoch": 5.911371237458194,
|
|
"grad_norm": 1.0365669573233212,
|
|
"learning_rate": 2.8845404977169057e-06,
|
|
"loss": 0.2316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24910643696784973,
|
|
"step": 3535,
|
|
"valid_targets_mean": 3256.1,
|
|
"valid_targets_min": 1093
|
|
},
|
|
{
|
|
"epoch": 5.919732441471572,
|
|
"grad_norm": 1.1977783758414982,
|
|
"learning_rate": 2.841543477723254e-06,
|
|
"loss": 0.3115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3304036557674408,
|
|
"step": 3540,
|
|
"valid_targets_mean": 4009.1,
|
|
"valid_targets_min": 1801
|
|
},
|
|
{
|
|
"epoch": 5.9280936454849495,
|
|
"grad_norm": 1.0475636986470853,
|
|
"learning_rate": 2.7988448077713592e-06,
|
|
"loss": 0.2254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23120662569999695,
|
|
"step": 3545,
|
|
"valid_targets_mean": 2633.4,
|
|
"valid_targets_min": 1628
|
|
},
|
|
{
|
|
"epoch": 5.936454849498328,
|
|
"grad_norm": 0.8986613684835274,
|
|
"learning_rate": 2.7564452303027024e-06,
|
|
"loss": 0.24,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25718823075294495,
|
|
"step": 3550,
|
|
"valid_targets_mean": 3136.1,
|
|
"valid_targets_min": 1648
|
|
},
|
|
{
|
|
"epoch": 5.944816053511706,
|
|
"grad_norm": 0.8828527508561764,
|
|
"learning_rate": 2.7143454825581714e-06,
|
|
"loss": 0.2043,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.189050555229187,
|
|
"step": 3555,
|
|
"valid_targets_mean": 3121.1,
|
|
"valid_targets_min": 1683
|
|
},
|
|
{
|
|
"epoch": 5.953177257525084,
|
|
"grad_norm": 1.117960588110681,
|
|
"learning_rate": 2.672546296565237e-06,
|
|
"loss": 0.2232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24358290433883667,
|
|
"step": 3560,
|
|
"valid_targets_mean": 3175.4,
|
|
"valid_targets_min": 1662
|
|
},
|
|
{
|
|
"epoch": 5.961538461538462,
|
|
"grad_norm": 1.0074867797130804,
|
|
"learning_rate": 2.6310483991252133e-06,
|
|
"loss": 0.2442,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2374524474143982,
|
|
"step": 3565,
|
|
"valid_targets_mean": 2834.6,
|
|
"valid_targets_min": 1201
|
|
},
|
|
{
|
|
"epoch": 5.969899665551839,
|
|
"grad_norm": 1.0690795488474825,
|
|
"learning_rate": 2.589852511800646e-06,
|
|
"loss": 0.2652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17189118266105652,
|
|
"step": 3570,
|
|
"valid_targets_mean": 2602.5,
|
|
"valid_targets_min": 1331
|
|
},
|
|
{
|
|
"epoch": 5.978260869565218,
|
|
"grad_norm": 0.9971267052649938,
|
|
"learning_rate": 2.54895935090274e-06,
|
|
"loss": 0.2497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21394936740398407,
|
|
"step": 3575,
|
|
"valid_targets_mean": 2751.8,
|
|
"valid_targets_min": 750
|
|
},
|
|
{
|
|
"epoch": 5.986622073578595,
|
|
"grad_norm": 0.9537557291572413,
|
|
"learning_rate": 2.508369627478917e-06,
|
|
"loss": 0.2331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22841092944145203,
|
|
"step": 3580,
|
|
"valid_targets_mean": 3169.6,
|
|
"valid_targets_min": 799
|
|
},
|
|
{
|
|
"epoch": 5.994983277591973,
|
|
"grad_norm": 0.9343814378914798,
|
|
"learning_rate": 2.468084047300452e-06,
|
|
"loss": 0.2185,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2370833456516266,
|
|
"step": 3585,
|
|
"valid_targets_mean": 3215.1,
|
|
"valid_targets_min": 998
|
|
},
|
|
{
|
|
"epoch": 6.003344481605351,
|
|
"grad_norm": 0.8238052983578956,
|
|
"learning_rate": 2.4281033108501873e-06,
|
|
"loss": 0.2314,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2642517685890198,
|
|
"step": 3590,
|
|
"valid_targets_mean": 4032.8,
|
|
"valid_targets_min": 1390
|
|
},
|
|
{
|
|
"epoch": 6.011705685618729,
|
|
"grad_norm": 0.8851781452831197,
|
|
"learning_rate": 2.3884281133103725e-06,
|
|
"loss": 0.2232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2074108123779297,
|
|
"step": 3595,
|
|
"valid_targets_mean": 3117.4,
|
|
"valid_targets_min": 685
|
|
},
|
|
{
|
|
"epoch": 6.0200668896321075,
|
|
"grad_norm": 1.0191065552656804,
|
|
"learning_rate": 2.3490591445505715e-06,
|
|
"loss": 0.2346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20292872190475464,
|
|
"step": 3600,
|
|
"valid_targets_mean": 2607.1,
|
|
"valid_targets_min": 1071
|
|
},
|
|
{
|
|
"epoch": 6.028428093645485,
|
|
"grad_norm": 0.8274990269440808,
|
|
"learning_rate": 2.309997089115659e-06,
|
|
"loss": 0.2578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27467474341392517,
|
|
"step": 3605,
|
|
"valid_targets_mean": 4511.8,
|
|
"valid_targets_min": 1261
|
|
},
|
|
{
|
|
"epoch": 6.036789297658863,
|
|
"grad_norm": 0.9829693732867127,
|
|
"learning_rate": 2.271242626213925e-06,
|
|
"loss": 0.2406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2838166654109955,
|
|
"step": 3610,
|
|
"valid_targets_mean": 3051.6,
|
|
"valid_targets_min": 848
|
|
},
|
|
{
|
|
"epoch": 6.045150501672241,
|
|
"grad_norm": 0.9430424383463232,
|
|
"learning_rate": 2.232796429705253e-06,
|
|
"loss": 0.2751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2801671028137207,
|
|
"step": 3615,
|
|
"valid_targets_mean": 3661.6,
|
|
"valid_targets_min": 783
|
|
},
|
|
{
|
|
"epoch": 6.053511705685619,
|
|
"grad_norm": 0.9799256756829944,
|
|
"learning_rate": 2.1946591680894145e-06,
|
|
"loss": 0.2252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21253493428230286,
|
|
"step": 3620,
|
|
"valid_targets_mean": 3080.5,
|
|
"valid_targets_min": 1116
|
|
},
|
|
{
|
|
"epoch": 6.061872909698996,
|
|
"grad_norm": 0.890449059990666,
|
|
"learning_rate": 2.1568315044944586e-06,
|
|
"loss": 0.2279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23232722282409668,
|
|
"step": 3625,
|
|
"valid_targets_mean": 3467.1,
|
|
"valid_targets_min": 689
|
|
},
|
|
{
|
|
"epoch": 6.070234113712375,
|
|
"grad_norm": 1.553508951062491,
|
|
"learning_rate": 2.1193140966651484e-06,
|
|
"loss": 0.2247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2310488224029541,
|
|
"step": 3630,
|
|
"valid_targets_mean": 3239.2,
|
|
"valid_targets_min": 1426
|
|
},
|
|
{
|
|
"epoch": 6.078595317725752,
|
|
"grad_norm": 1.0015740592300846,
|
|
"learning_rate": 2.082107596951548e-06,
|
|
"loss": 0.2417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2560243308544159,
|
|
"step": 3635,
|
|
"valid_targets_mean": 3611.4,
|
|
"valid_targets_min": 1662
|
|
},
|
|
{
|
|
"epoch": 6.086956521739131,
|
|
"grad_norm": 0.9610763495680839,
|
|
"learning_rate": 2.0452126522976746e-06,
|
|
"loss": 0.2572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23907433450222015,
|
|
"step": 3640,
|
|
"valid_targets_mean": 3443.9,
|
|
"valid_targets_min": 971
|
|
},
|
|
{
|
|
"epoch": 6.095317725752508,
|
|
"grad_norm": 1.0195087469479902,
|
|
"learning_rate": 2.008629904230237e-06,
|
|
"loss": 0.2266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21630674600601196,
|
|
"step": 3645,
|
|
"valid_targets_mean": 3096.0,
|
|
"valid_targets_min": 1263
|
|
},
|
|
{
|
|
"epoch": 6.103678929765886,
|
|
"grad_norm": 1.1781421008430892,
|
|
"learning_rate": 1.972359988847499e-06,
|
|
"loss": 0.2767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22949744760990143,
|
|
"step": 3650,
|
|
"valid_targets_mean": 2490.5,
|
|
"valid_targets_min": 1214
|
|
},
|
|
{
|
|
"epoch": 6.1120401337792645,
|
|
"grad_norm": 1.0009259653669014,
|
|
"learning_rate": 1.9364035368082222e-06,
|
|
"loss": 0.2501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17091065645217896,
|
|
"step": 3655,
|
|
"valid_targets_mean": 2323.1,
|
|
"valid_targets_min": 1277
|
|
},
|
|
{
|
|
"epoch": 6.120401337792642,
|
|
"grad_norm": 0.9973471406872797,
|
|
"learning_rate": 1.9007611733206733e-06,
|
|
"loss": 0.2399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29807907342910767,
|
|
"step": 3660,
|
|
"valid_targets_mean": 3280.1,
|
|
"valid_targets_min": 919
|
|
},
|
|
{
|
|
"epoch": 6.12876254180602,
|
|
"grad_norm": 0.9458454127635654,
|
|
"learning_rate": 1.8654335181317784e-06,
|
|
"loss": 0.2427,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26122528314590454,
|
|
"step": 3665,
|
|
"valid_targets_mean": 3286.4,
|
|
"valid_targets_min": 1471
|
|
},
|
|
{
|
|
"epoch": 6.137123745819398,
|
|
"grad_norm": 0.9302638864213695,
|
|
"learning_rate": 1.8304211855163311e-06,
|
|
"loss": 0.2551,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30081790685653687,
|
|
"step": 3670,
|
|
"valid_targets_mean": 3793.2,
|
|
"valid_targets_min": 392
|
|
},
|
|
{
|
|
"epoch": 6.145484949832776,
|
|
"grad_norm": 0.9293216572645322,
|
|
"learning_rate": 1.7957247842663194e-06,
|
|
"loss": 0.2067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2717227041721344,
|
|
"step": 3675,
|
|
"valid_targets_mean": 3530.5,
|
|
"valid_targets_min": 1072
|
|
},
|
|
{
|
|
"epoch": 6.153846153846154,
|
|
"grad_norm": 1.0642259780797343,
|
|
"learning_rate": 1.7613449176803476e-06,
|
|
"loss": 0.191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18526840209960938,
|
|
"step": 3680,
|
|
"valid_targets_mean": 3227.3,
|
|
"valid_targets_min": 1819
|
|
},
|
|
{
|
|
"epoch": 6.162207357859532,
|
|
"grad_norm": 1.0950639668119169,
|
|
"learning_rate": 1.7272821835531295e-06,
|
|
"loss": 0.2536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23934021592140198,
|
|
"step": 3685,
|
|
"valid_targets_mean": 2936.0,
|
|
"valid_targets_min": 1404
|
|
},
|
|
{
|
|
"epoch": 6.170568561872909,
|
|
"grad_norm": 0.8320781108074271,
|
|
"learning_rate": 1.693537174165103e-06,
|
|
"loss": 0.2569,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2670717239379883,
|
|
"step": 3690,
|
|
"valid_targets_mean": 4768.5,
|
|
"valid_targets_min": 754
|
|
},
|
|
{
|
|
"epoch": 6.178929765886288,
|
|
"grad_norm": 1.0190310935542606,
|
|
"learning_rate": 1.660110476272132e-06,
|
|
"loss": 0.2267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1878700852394104,
|
|
"step": 3695,
|
|
"valid_targets_mean": 2822.3,
|
|
"valid_targets_min": 1513
|
|
},
|
|
{
|
|
"epoch": 6.187290969899665,
|
|
"grad_norm": 0.946671940766795,
|
|
"learning_rate": 1.6270026710952924e-06,
|
|
"loss": 0.2299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18372738361358643,
|
|
"step": 3700,
|
|
"valid_targets_mean": 3202.5,
|
|
"valid_targets_min": 1844
|
|
},
|
|
{
|
|
"epoch": 6.195652173913044,
|
|
"grad_norm": 1.0311761236131074,
|
|
"learning_rate": 1.5942143343107953e-06,
|
|
"loss": 0.2366,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1737433671951294,
|
|
"step": 3705,
|
|
"valid_targets_mean": 2330.4,
|
|
"valid_targets_min": 983
|
|
},
|
|
{
|
|
"epoch": 6.2040133779264215,
|
|
"grad_norm": 1.7693463573078112,
|
|
"learning_rate": 1.5617460360399439e-06,
|
|
"loss": 0.2312,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24771828949451447,
|
|
"step": 3710,
|
|
"valid_targets_mean": 2783.2,
|
|
"valid_targets_min": 674
|
|
},
|
|
{
|
|
"epoch": 6.212374581939799,
|
|
"grad_norm": 1.1913402227691237,
|
|
"learning_rate": 1.529598340839238e-06,
|
|
"loss": 0.2247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20294560492038727,
|
|
"step": 3715,
|
|
"valid_targets_mean": 3321.5,
|
|
"valid_targets_min": 1623
|
|
},
|
|
{
|
|
"epoch": 6.2207357859531776,
|
|
"grad_norm": 1.1205061236211051,
|
|
"learning_rate": 1.4977718076905533e-06,
|
|
"loss": 0.245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20099762082099915,
|
|
"step": 3720,
|
|
"valid_targets_mean": 2455.0,
|
|
"valid_targets_min": 682
|
|
},
|
|
{
|
|
"epoch": 6.229096989966555,
|
|
"grad_norm": 0.872701348205632,
|
|
"learning_rate": 1.4662669899914161e-06,
|
|
"loss": 0.2251,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.240260511636734,
|
|
"step": 3725,
|
|
"valid_targets_mean": 4170.6,
|
|
"valid_targets_min": 587
|
|
},
|
|
{
|
|
"epoch": 6.237458193979933,
|
|
"grad_norm": 1.149500218715332,
|
|
"learning_rate": 1.4350844355453952e-06,
|
|
"loss": 0.2167,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24577274918556213,
|
|
"step": 3730,
|
|
"valid_targets_mean": 2776.9,
|
|
"valid_targets_min": 1146
|
|
},
|
|
{
|
|
"epoch": 6.245819397993311,
|
|
"grad_norm": 0.9744589071128174,
|
|
"learning_rate": 1.404224686552571e-06,
|
|
"loss": 0.2699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2977054715156555,
|
|
"step": 3735,
|
|
"valid_targets_mean": 3161.8,
|
|
"valid_targets_min": 1420
|
|
},
|
|
{
|
|
"epoch": 6.254180602006689,
|
|
"grad_norm": 1.0622602777455052,
|
|
"learning_rate": 1.3736882796000983e-06,
|
|
"loss": 0.2217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2422657608985901,
|
|
"step": 3740,
|
|
"valid_targets_mean": 3276.9,
|
|
"valid_targets_min": 2083
|
|
},
|
|
{
|
|
"epoch": 6.262541806020067,
|
|
"grad_norm": 0.9192780596090956,
|
|
"learning_rate": 1.3434757456528868e-06,
|
|
"loss": 0.2165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23593655228614807,
|
|
"step": 3745,
|
|
"valid_targets_mean": 3154.8,
|
|
"valid_targets_min": 1058
|
|
},
|
|
{
|
|
"epoch": 6.270903010033445,
|
|
"grad_norm": 1.0794767225936577,
|
|
"learning_rate": 1.3135876100443557e-06,
|
|
"loss": 0.2451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19183969497680664,
|
|
"step": 3750,
|
|
"valid_targets_mean": 2641.8,
|
|
"valid_targets_min": 1122
|
|
},
|
|
{
|
|
"epoch": 6.2792642140468224,
|
|
"grad_norm": 1.1069267787089752,
|
|
"learning_rate": 1.2840243924673202e-06,
|
|
"loss": 0.2327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19526013731956482,
|
|
"step": 3755,
|
|
"valid_targets_mean": 2651.9,
|
|
"valid_targets_min": 1307
|
|
},
|
|
{
|
|
"epoch": 6.287625418060201,
|
|
"grad_norm": 0.9530346118359216,
|
|
"learning_rate": 1.2547866069649418e-06,
|
|
"loss": 0.2508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2443237155675888,
|
|
"step": 3760,
|
|
"valid_targets_mean": 2864.8,
|
|
"valid_targets_min": 1587
|
|
},
|
|
{
|
|
"epoch": 6.2959866220735785,
|
|
"grad_norm": 0.9997194895255284,
|
|
"learning_rate": 1.225874761921788e-06,
|
|
"loss": 0.2238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22411714494228363,
|
|
"step": 3765,
|
|
"valid_targets_mean": 2974.7,
|
|
"valid_targets_min": 1770
|
|
},
|
|
{
|
|
"epoch": 6.304347826086957,
|
|
"grad_norm": 0.9257546461579786,
|
|
"learning_rate": 1.1972893600550007e-06,
|
|
"loss": 0.271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20396903157234192,
|
|
"step": 3770,
|
|
"valid_targets_mean": 3238.4,
|
|
"valid_targets_min": 696
|
|
},
|
|
{
|
|
"epoch": 6.312709030100335,
|
|
"grad_norm": 0.9526741297223604,
|
|
"learning_rate": 1.1690308984055454e-06,
|
|
"loss": 0.2326,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31394025683403015,
|
|
"step": 3775,
|
|
"valid_targets_mean": 3941.2,
|
|
"valid_targets_min": 1904
|
|
},
|
|
{
|
|
"epoch": 6.321070234113712,
|
|
"grad_norm": 1.0127893431246604,
|
|
"learning_rate": 1.141099868329576e-06,
|
|
"loss": 0.2697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2128710150718689,
|
|
"step": 3780,
|
|
"valid_targets_mean": 3173.2,
|
|
"valid_targets_min": 2167
|
|
},
|
|
{
|
|
"epoch": 6.329431438127091,
|
|
"grad_norm": 0.8562910539058333,
|
|
"learning_rate": 1.1134967554898868e-06,
|
|
"loss": 0.2487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3077230155467987,
|
|
"step": 3785,
|
|
"valid_targets_mean": 3871.0,
|
|
"valid_targets_min": 1399
|
|
},
|
|
{
|
|
"epoch": 6.337792642140468,
|
|
"grad_norm": 1.0570481175477418,
|
|
"learning_rate": 1.0862220398474798e-06,
|
|
"loss": 0.202,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24611452221870422,
|
|
"step": 3790,
|
|
"valid_targets_mean": 2720.8,
|
|
"valid_targets_min": 1018
|
|
},
|
|
{
|
|
"epoch": 6.346153846153846,
|
|
"grad_norm": 0.9974643589859099,
|
|
"learning_rate": 1.0592761956531983e-06,
|
|
"loss": 0.26,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15949031710624695,
|
|
"step": 3795,
|
|
"valid_targets_mean": 2890.8,
|
|
"valid_targets_min": 693
|
|
},
|
|
{
|
|
"epoch": 6.354515050167224,
|
|
"grad_norm": 1.0285315175075678,
|
|
"learning_rate": 1.0326596914395015e-06,
|
|
"loss": 0.2196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.218702495098114,
|
|
"step": 3800,
|
|
"valid_targets_mean": 2983.0,
|
|
"valid_targets_min": 1307
|
|
},
|
|
{
|
|
"epoch": 6.362876254180602,
|
|
"grad_norm": 1.0391314709729795,
|
|
"learning_rate": 1.0063729900122943e-06,
|
|
"loss": 0.2349,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22132647037506104,
|
|
"step": 3805,
|
|
"valid_targets_mean": 3014.7,
|
|
"valid_targets_min": 1600
|
|
},
|
|
{
|
|
"epoch": 6.3712374581939795,
|
|
"grad_norm": 1.0732291866174521,
|
|
"learning_rate": 9.80416548442904e-07,
|
|
"loss": 0.2359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16350066661834717,
|
|
"step": 3810,
|
|
"valid_targets_mean": 2471.2,
|
|
"valid_targets_min": 1252
|
|
},
|
|
{
|
|
"epoch": 6.379598662207358,
|
|
"grad_norm": 0.9478207894233418,
|
|
"learning_rate": 9.547908180601274e-07,
|
|
"loss": 0.218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2005515992641449,
|
|
"step": 3815,
|
|
"valid_targets_mean": 3266.4,
|
|
"valid_targets_min": 1219
|
|
},
|
|
{
|
|
"epoch": 6.3879598662207355,
|
|
"grad_norm": 0.9884148175012485,
|
|
"learning_rate": 9.294962444423672e-07,
|
|
"loss": 0.2352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18229839205741882,
|
|
"step": 3820,
|
|
"valid_targets_mean": 2692.9,
|
|
"valid_targets_min": 1731
|
|
},
|
|
{
|
|
"epoch": 6.396321070234114,
|
|
"grad_norm": 1.0689420786608823,
|
|
"learning_rate": 9.045332674099039e-07,
|
|
"loss": 0.2429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2374996840953827,
|
|
"step": 3825,
|
|
"valid_targets_mean": 3234.9,
|
|
"valid_targets_min": 978
|
|
},
|
|
{
|
|
"epoch": 6.404682274247492,
|
|
"grad_norm": 0.9999053556423455,
|
|
"learning_rate": 8.799023210172319e-07,
|
|
"loss": 0.2183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2577441334724426,
|
|
"step": 3830,
|
|
"valid_targets_mean": 3541.5,
|
|
"valid_targets_min": 860
|
|
},
|
|
{
|
|
"epoch": 6.413043478260869,
|
|
"grad_norm": 1.0162923003110396,
|
|
"learning_rate": 8.556038335455241e-07,
|
|
"loss": 0.2354,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17862260341644287,
|
|
"step": 3835,
|
|
"valid_targets_mean": 3000.1,
|
|
"valid_targets_min": 1116
|
|
},
|
|
{
|
|
"epoch": 6.421404682274248,
|
|
"grad_norm": 0.9436887696227507,
|
|
"learning_rate": 8.316382274951773e-07,
|
|
"loss": 0.2596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3100453317165375,
|
|
"step": 3840,
|
|
"valid_targets_mean": 4021.4,
|
|
"valid_targets_min": 556
|
|
},
|
|
{
|
|
"epoch": 6.429765886287625,
|
|
"grad_norm": 0.9165404614671304,
|
|
"learning_rate": 8.080059195784829e-07,
|
|
"loss": 0.2715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29584699869155884,
|
|
"step": 3845,
|
|
"valid_targets_mean": 3990.2,
|
|
"valid_targets_min": 1509
|
|
},
|
|
{
|
|
"epoch": 6.438127090301004,
|
|
"grad_norm": 1.1633047424989935,
|
|
"learning_rate": 7.847073207123523e-07,
|
|
"loss": 0.2805,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2624181807041168,
|
|
"step": 3850,
|
|
"valid_targets_mean": 2811.7,
|
|
"valid_targets_min": 1472
|
|
},
|
|
{
|
|
"epoch": 6.446488294314381,
|
|
"grad_norm": 0.9983233707225826,
|
|
"learning_rate": 7.617428360111945e-07,
|
|
"loss": 0.2271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.281194269657135,
|
|
"step": 3855,
|
|
"valid_targets_mean": 3405.9,
|
|
"valid_targets_min": 738
|
|
},
|
|
{
|
|
"epoch": 6.454849498327759,
|
|
"grad_norm": 1.1246524097930575,
|
|
"learning_rate": 7.391128647798607e-07,
|
|
"loss": 0.2389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22077451646327972,
|
|
"step": 3860,
|
|
"valid_targets_mean": 2505.6,
|
|
"valid_targets_min": 1234
|
|
},
|
|
{
|
|
"epoch": 6.463210702341137,
|
|
"grad_norm": 0.886456015080147,
|
|
"learning_rate": 7.168178005067062e-07,
|
|
"loss": 0.2656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29799652099609375,
|
|
"step": 3865,
|
|
"valid_targets_mean": 3983.9,
|
|
"valid_targets_min": 1248
|
|
},
|
|
{
|
|
"epoch": 6.471571906354515,
|
|
"grad_norm": 1.103332362483573,
|
|
"learning_rate": 6.948580308567532e-07,
|
|
"loss": 0.2222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2571655511856079,
|
|
"step": 3870,
|
|
"valid_targets_mean": 2698.6,
|
|
"valid_targets_min": 710
|
|
},
|
|
{
|
|
"epoch": 6.479933110367893,
|
|
"grad_norm": 1.0813420856710951,
|
|
"learning_rate": 6.732339376649388e-07,
|
|
"loss": 0.2307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22849063575267792,
|
|
"step": 3875,
|
|
"valid_targets_mean": 2950.5,
|
|
"valid_targets_min": 1156
|
|
},
|
|
{
|
|
"epoch": 6.488294314381271,
|
|
"grad_norm": 1.0707482467762934,
|
|
"learning_rate": 6.519458969294845e-07,
|
|
"loss": 0.2237,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18700650334358215,
|
|
"step": 3880,
|
|
"valid_targets_mean": 2505.4,
|
|
"valid_targets_min": 1004
|
|
},
|
|
{
|
|
"epoch": 6.496655518394649,
|
|
"grad_norm": 0.9136997187616676,
|
|
"learning_rate": 6.309942788053502e-07,
|
|
"loss": 0.2166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18983229994773865,
|
|
"step": 3885,
|
|
"valid_targets_mean": 2847.4,
|
|
"valid_targets_min": 1011
|
|
},
|
|
{
|
|
"epoch": 6.505016722408027,
|
|
"grad_norm": 0.9836787164297042,
|
|
"learning_rate": 6.103794475978086e-07,
|
|
"loss": 0.2476,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27802348136901855,
|
|
"step": 3890,
|
|
"valid_targets_mean": 3571.6,
|
|
"valid_targets_min": 1766
|
|
},
|
|
{
|
|
"epoch": 6.513377926421405,
|
|
"grad_norm": 1.0040587188078438,
|
|
"learning_rate": 5.901017617560989e-07,
|
|
"loss": 0.2256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2178075611591339,
|
|
"step": 3895,
|
|
"valid_targets_mean": 3485.3,
|
|
"valid_targets_min": 1748
|
|
},
|
|
{
|
|
"epoch": 6.521739130434782,
|
|
"grad_norm": 0.9972334739609068,
|
|
"learning_rate": 5.701615738672073e-07,
|
|
"loss": 0.2391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2519034743309021,
|
|
"step": 3900,
|
|
"valid_targets_mean": 3683.1,
|
|
"valid_targets_min": 1279
|
|
},
|
|
{
|
|
"epoch": 6.530100334448161,
|
|
"grad_norm": 1.0048200677550907,
|
|
"learning_rate": 5.505592306497298e-07,
|
|
"loss": 0.2199,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2037794291973114,
|
|
"step": 3905,
|
|
"valid_targets_mean": 2815.6,
|
|
"valid_targets_min": 1187
|
|
},
|
|
{
|
|
"epoch": 6.538461538461538,
|
|
"grad_norm": 0.9738058489579007,
|
|
"learning_rate": 5.312950729478327e-07,
|
|
"loss": 0.2571,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1917770355939865,
|
|
"step": 3910,
|
|
"valid_targets_mean": 2968.3,
|
|
"valid_targets_min": 1757
|
|
},
|
|
{
|
|
"epoch": 6.546822742474916,
|
|
"grad_norm": 0.9699331976466884,
|
|
"learning_rate": 5.123694357253439e-07,
|
|
"loss": 0.2332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19296962022781372,
|
|
"step": 3915,
|
|
"valid_targets_mean": 3587.3,
|
|
"valid_targets_min": 1059
|
|
},
|
|
{
|
|
"epoch": 6.555183946488294,
|
|
"grad_norm": 0.9210291981550992,
|
|
"learning_rate": 4.937826480599195e-07,
|
|
"loss": 0.239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25981834530830383,
|
|
"step": 3920,
|
|
"valid_targets_mean": 3616.3,
|
|
"valid_targets_min": 772
|
|
},
|
|
{
|
|
"epoch": 6.563545150501672,
|
|
"grad_norm": 0.9639041354225487,
|
|
"learning_rate": 4.755350331373243e-07,
|
|
"loss": 0.2665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22324317693710327,
|
|
"step": 3925,
|
|
"valid_targets_mean": 3426.1,
|
|
"valid_targets_min": 1477
|
|
},
|
|
{
|
|
"epoch": 6.5719063545150505,
|
|
"grad_norm": 1.0060449285500996,
|
|
"learning_rate": 4.576269082458118e-07,
|
|
"loss": 0.2155,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18587031960487366,
|
|
"step": 3930,
|
|
"valid_targets_mean": 2988.8,
|
|
"valid_targets_min": 1536
|
|
},
|
|
{
|
|
"epoch": 6.580267558528428,
|
|
"grad_norm": 1.125813362538308,
|
|
"learning_rate": 4.4005858477060404e-07,
|
|
"loss": 0.2445,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21866926550865173,
|
|
"step": 3935,
|
|
"valid_targets_mean": 3366.3,
|
|
"valid_targets_min": 1060
|
|
},
|
|
{
|
|
"epoch": 6.588628762541806,
|
|
"grad_norm": 1.0628904813820763,
|
|
"learning_rate": 4.228303681884782e-07,
|
|
"loss": 0.2324,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25543057918548584,
|
|
"step": 3940,
|
|
"valid_targets_mean": 3191.6,
|
|
"valid_targets_min": 1800
|
|
},
|
|
{
|
|
"epoch": 6.596989966555184,
|
|
"grad_norm": 0.9915725733061522,
|
|
"learning_rate": 4.059425580624576e-07,
|
|
"loss": 0.2374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28621232509613037,
|
|
"step": 3945,
|
|
"valid_targets_mean": 3603.1,
|
|
"valid_targets_min": 1548
|
|
},
|
|
{
|
|
"epoch": 6.605351170568562,
|
|
"grad_norm": 0.9949146897868569,
|
|
"learning_rate": 3.893954480366091e-07,
|
|
"loss": 0.227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1640351116657257,
|
|
"step": 3950,
|
|
"valid_targets_mean": 2460.5,
|
|
"valid_targets_min": 880
|
|
},
|
|
{
|
|
"epoch": 6.61371237458194,
|
|
"grad_norm": 1.111214711835855,
|
|
"learning_rate": 3.731893258309227e-07,
|
|
"loss": 0.2271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22030051052570343,
|
|
"step": 3955,
|
|
"valid_targets_mean": 2509.4,
|
|
"valid_targets_min": 553
|
|
},
|
|
{
|
|
"epoch": 6.622073578595318,
|
|
"grad_norm": 1.1676078726615438,
|
|
"learning_rate": 3.573244732363179e-07,
|
|
"loss": 0.2437,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.243607297539711,
|
|
"step": 3960,
|
|
"valid_targets_mean": 3134.6,
|
|
"valid_targets_min": 689
|
|
},
|
|
{
|
|
"epoch": 6.630434782608695,
|
|
"grad_norm": 0.9596071900127525,
|
|
"learning_rate": 3.4180116610973645e-07,
|
|
"loss": 0.2194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.280606210231781,
|
|
"step": 3965,
|
|
"valid_targets_mean": 3487.9,
|
|
"valid_targets_min": 1539
|
|
},
|
|
{
|
|
"epoch": 6.638795986622074,
|
|
"grad_norm": 1.0307311782355484,
|
|
"learning_rate": 3.2661967436936394e-07,
|
|
"loss": 0.2347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2129300832748413,
|
|
"step": 3970,
|
|
"valid_targets_mean": 3150.9,
|
|
"valid_targets_min": 364
|
|
},
|
|
{
|
|
"epoch": 6.647157190635451,
|
|
"grad_norm": 0.9029900853777432,
|
|
"learning_rate": 3.117802619899113e-07,
|
|
"loss": 0.2266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2022523730993271,
|
|
"step": 3975,
|
|
"valid_targets_mean": 3131.2,
|
|
"valid_targets_min": 1462
|
|
},
|
|
{
|
|
"epoch": 6.65551839464883,
|
|
"grad_norm": 0.9353250337198755,
|
|
"learning_rate": 2.9728318699804525e-07,
|
|
"loss": 0.2183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26376834511756897,
|
|
"step": 3980,
|
|
"valid_targets_mean": 3456.8,
|
|
"valid_targets_min": 1636
|
|
},
|
|
{
|
|
"epoch": 6.6638795986622075,
|
|
"grad_norm": 1.1365720683226703,
|
|
"learning_rate": 2.831287014678941e-07,
|
|
"loss": 0.2141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19698631763458252,
|
|
"step": 3985,
|
|
"valid_targets_mean": 2634.6,
|
|
"valid_targets_min": 569
|
|
},
|
|
{
|
|
"epoch": 6.672240802675585,
|
|
"grad_norm": 0.9037444665225245,
|
|
"learning_rate": 2.693170515166599e-07,
|
|
"loss": 0.2162,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17331139743328094,
|
|
"step": 3990,
|
|
"valid_targets_mean": 3423.4,
|
|
"valid_targets_min": 2070
|
|
},
|
|
{
|
|
"epoch": 6.6806020066889635,
|
|
"grad_norm": 1.012894148288379,
|
|
"learning_rate": 2.558484773003445e-07,
|
|
"loss": 0.2485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2655693292617798,
|
|
"step": 3995,
|
|
"valid_targets_mean": 3323.4,
|
|
"valid_targets_min": 1844
|
|
},
|
|
{
|
|
"epoch": 6.688963210702341,
|
|
"grad_norm": 0.9548388318374232,
|
|
"learning_rate": 2.427232130095747e-07,
|
|
"loss": 0.2373,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21322210133075714,
|
|
"step": 4000,
|
|
"valid_targets_mean": 2801.6,
|
|
"valid_targets_min": 1264
|
|
},
|
|
{
|
|
"epoch": 6.697324414715719,
|
|
"grad_norm": 1.0832658905494663,
|
|
"learning_rate": 2.299414868655281e-07,
|
|
"loss": 0.2434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2207055389881134,
|
|
"step": 4005,
|
|
"valid_targets_mean": 2772.9,
|
|
"valid_targets_min": 1399
|
|
},
|
|
{
|
|
"epoch": 6.705685618729097,
|
|
"grad_norm": 0.9005094142183284,
|
|
"learning_rate": 2.1750352111596707e-07,
|
|
"loss": 0.24,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24203582108020782,
|
|
"step": 4010,
|
|
"valid_targets_mean": 3876.8,
|
|
"valid_targets_min": 2273
|
|
},
|
|
{
|
|
"epoch": 6.714046822742475,
|
|
"grad_norm": 0.9978940251893587,
|
|
"learning_rate": 2.0540953203137093e-07,
|
|
"loss": 0.225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1879318505525589,
|
|
"step": 4015,
|
|
"valid_targets_mean": 3497.7,
|
|
"valid_targets_min": 1234
|
|
},
|
|
{
|
|
"epoch": 6.722408026755852,
|
|
"grad_norm": 0.9909059278250834,
|
|
"learning_rate": 1.9365972990117e-07,
|
|
"loss": 0.2323,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1991642862558365,
|
|
"step": 4020,
|
|
"valid_targets_mean": 2930.1,
|
|
"valid_targets_min": 1353
|
|
},
|
|
{
|
|
"epoch": 6.730769230769231,
|
|
"grad_norm": 0.9925673396527694,
|
|
"learning_rate": 1.8225431903010403e-07,
|
|
"loss": 0.2172,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23669245839118958,
|
|
"step": 4025,
|
|
"valid_targets_mean": 2976.0,
|
|
"valid_targets_min": 978
|
|
},
|
|
{
|
|
"epoch": 6.739130434782608,
|
|
"grad_norm": 0.97958659307702,
|
|
"learning_rate": 1.7119349773466076e-07,
|
|
"loss": 0.2493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18920472264289856,
|
|
"step": 4030,
|
|
"valid_targets_mean": 2922.8,
|
|
"valid_targets_min": 799
|
|
},
|
|
{
|
|
"epoch": 6.747491638795987,
|
|
"grad_norm": 0.9895938981944002,
|
|
"learning_rate": 1.6047745833962735e-07,
|
|
"loss": 0.2638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4012299180030823,
|
|
"step": 4035,
|
|
"valid_targets_mean": 4044.7,
|
|
"valid_targets_min": 750
|
|
},
|
|
{
|
|
"epoch": 6.7558528428093645,
|
|
"grad_norm": 1.005865202853038,
|
|
"learning_rate": 1.5010638717474878e-07,
|
|
"loss": 0.2231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2346489280462265,
|
|
"step": 4040,
|
|
"valid_targets_mean": 3234.9,
|
|
"valid_targets_min": 1001
|
|
},
|
|
{
|
|
"epoch": 6.764214046822742,
|
|
"grad_norm": 0.9665878862524135,
|
|
"learning_rate": 1.400804645714815e-07,
|
|
"loss": 0.235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20860449969768524,
|
|
"step": 4045,
|
|
"valid_targets_mean": 2963.2,
|
|
"valid_targets_min": 1512
|
|
},
|
|
{
|
|
"epoch": 6.7725752508361206,
|
|
"grad_norm": 0.9073442199596257,
|
|
"learning_rate": 1.30399864859867e-07,
|
|
"loss": 0.2187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21425005793571472,
|
|
"step": 4050,
|
|
"valid_targets_mean": 4054.4,
|
|
"valid_targets_min": 970
|
|
},
|
|
{
|
|
"epoch": 6.780936454849498,
|
|
"grad_norm": 1.1395884258364646,
|
|
"learning_rate": 1.2106475636549654e-07,
|
|
"loss": 0.2503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2343166172504425,
|
|
"step": 4055,
|
|
"valid_targets_mean": 2424.9,
|
|
"valid_targets_min": 511
|
|
},
|
|
{
|
|
"epoch": 6.789297658862877,
|
|
"grad_norm": 1.2355832602043204,
|
|
"learning_rate": 1.1207530140658452e-07,
|
|
"loss": 0.247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1832336038351059,
|
|
"step": 4060,
|
|
"valid_targets_mean": 2785.1,
|
|
"valid_targets_min": 1439
|
|
},
|
|
{
|
|
"epoch": 6.797658862876254,
|
|
"grad_norm": 1.0756162811102452,
|
|
"learning_rate": 1.0343165629114416e-07,
|
|
"loss": 0.2292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2562386989593506,
|
|
"step": 4065,
|
|
"valid_targets_mean": 2926.1,
|
|
"valid_targets_min": 1110
|
|
},
|
|
{
|
|
"epoch": 6.806020066889632,
|
|
"grad_norm": 1.12479348737203,
|
|
"learning_rate": 9.513397131427404e-08,
|
|
"loss": 0.2543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19680562615394592,
|
|
"step": 4070,
|
|
"valid_targets_mean": 3050.8,
|
|
"valid_targets_min": 1505
|
|
},
|
|
{
|
|
"epoch": 6.81438127090301,
|
|
"grad_norm": 1.3027817427113138,
|
|
"learning_rate": 8.71823907555358e-08,
|
|
"loss": 0.2794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2448652684688568,
|
|
"step": 4075,
|
|
"valid_targets_mean": 3234.3,
|
|
"valid_targets_min": 965
|
|
},
|
|
{
|
|
"epoch": 6.822742474916388,
|
|
"grad_norm": 1.0120756120555998,
|
|
"learning_rate": 7.957705287645834e-08,
|
|
"loss": 0.2318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.236515074968338,
|
|
"step": 4080,
|
|
"valid_targets_mean": 3107.5,
|
|
"valid_targets_min": 1469
|
|
},
|
|
{
|
|
"epoch": 6.831103678929766,
|
|
"grad_norm": 1.1643671752495577,
|
|
"learning_rate": 7.231808991812639e-08,
|
|
"loss": 0.227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24128901958465576,
|
|
"step": 4085,
|
|
"valid_targets_mean": 2651.4,
|
|
"valid_targets_min": 513
|
|
},
|
|
{
|
|
"epoch": 6.839464882943144,
|
|
"grad_norm": 1.0224383394242054,
|
|
"learning_rate": 6.540562809887574e-08,
|
|
"loss": 0.2128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2086195945739746,
|
|
"step": 4090,
|
|
"valid_targets_mean": 2881.9,
|
|
"valid_targets_min": 594
|
|
},
|
|
{
|
|
"epoch": 6.8478260869565215,
|
|
"grad_norm": 0.8984874112503329,
|
|
"learning_rate": 5.8839787612114955e-08,
|
|
"loss": 0.2302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19981077313423157,
|
|
"step": 4095,
|
|
"valid_targets_mean": 3368.8,
|
|
"valid_targets_min": 1532
|
|
},
|
|
{
|
|
"epoch": 6.8561872909699,
|
|
"grad_norm": 1.1889036672332234,
|
|
"learning_rate": 5.2620682624213714e-08,
|
|
"loss": 0.2788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3057703673839569,
|
|
"step": 4100,
|
|
"valid_targets_mean": 2838.0,
|
|
"valid_targets_min": 520
|
|
},
|
|
{
|
|
"epoch": 6.864548494983278,
|
|
"grad_norm": 0.9722791686736607,
|
|
"learning_rate": 4.6748421272537756e-08,
|
|
"loss": 0.2356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3012353777885437,
|
|
"step": 4105,
|
|
"valid_targets_mean": 3410.2,
|
|
"valid_targets_min": 401
|
|
},
|
|
{
|
|
"epoch": 6.872909698996655,
|
|
"grad_norm": 0.8439698339063494,
|
|
"learning_rate": 4.1223105663554806e-08,
|
|
"loss": 0.2274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22458210587501526,
|
|
"step": 4110,
|
|
"valid_targets_mean": 4013.0,
|
|
"valid_targets_min": 967
|
|
},
|
|
{
|
|
"epoch": 6.881270903010034,
|
|
"grad_norm": 0.9937076155434867,
|
|
"learning_rate": 3.604483187106711e-08,
|
|
"loss": 0.2557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21647757291793823,
|
|
"step": 4115,
|
|
"valid_targets_mean": 2891.7,
|
|
"valid_targets_min": 1398
|
|
},
|
|
{
|
|
"epoch": 6.889632107023411,
|
|
"grad_norm": 0.9554683145250049,
|
|
"learning_rate": 3.1213689934537215e-08,
|
|
"loss": 0.2428,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.227905735373497,
|
|
"step": 4120,
|
|
"valid_targets_mean": 3056.9,
|
|
"valid_targets_min": 1091
|
|
},
|
|
{
|
|
"epoch": 6.897993311036789,
|
|
"grad_norm": 0.9851841831055532,
|
|
"learning_rate": 2.6729763857522573e-08,
|
|
"loss": 0.2184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20012471079826355,
|
|
"step": 4125,
|
|
"valid_targets_mean": 2812.2,
|
|
"valid_targets_min": 959
|
|
},
|
|
{
|
|
"epoch": 6.906354515050167,
|
|
"grad_norm": 0.9102139042294596,
|
|
"learning_rate": 2.2593131606216677e-08,
|
|
"loss": 0.2404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22855457663536072,
|
|
"step": 4130,
|
|
"valid_targets_mean": 3411.4,
|
|
"valid_targets_min": 780
|
|
},
|
|
{
|
|
"epoch": 6.914715719063545,
|
|
"grad_norm": 0.9968709409763933,
|
|
"learning_rate": 1.880386510809018e-08,
|
|
"loss": 0.2175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24055655300617218,
|
|
"step": 4135,
|
|
"valid_targets_mean": 2944.4,
|
|
"valid_targets_min": 1447
|
|
},
|
|
{
|
|
"epoch": 6.923076923076923,
|
|
"grad_norm": 1.0089101681026145,
|
|
"learning_rate": 1.536203025064742e-08,
|
|
"loss": 0.2634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23049476742744446,
|
|
"step": 4140,
|
|
"valid_targets_mean": 3192.5,
|
|
"valid_targets_min": 1745
|
|
},
|
|
{
|
|
"epoch": 6.931438127090301,
|
|
"grad_norm": 1.0054169631158152,
|
|
"learning_rate": 1.226768688026736e-08,
|
|
"loss": 0.2575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21784156560897827,
|
|
"step": 4145,
|
|
"valid_targets_mean": 2982.2,
|
|
"valid_targets_min": 663
|
|
},
|
|
{
|
|
"epoch": 6.9397993311036785,
|
|
"grad_norm": 1.0500654956007207,
|
|
"learning_rate": 9.520888801182182e-09,
|
|
"loss": 0.2275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21303492784500122,
|
|
"step": 4150,
|
|
"valid_targets_mean": 2794.8,
|
|
"valid_targets_min": 1323
|
|
},
|
|
{
|
|
"epoch": 6.948160535117057,
|
|
"grad_norm": 1.054419064540089,
|
|
"learning_rate": 7.121683774518051e-09,
|
|
"loss": 0.2378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17308101058006287,
|
|
"step": 4155,
|
|
"valid_targets_mean": 2646.4,
|
|
"valid_targets_min": 1295
|
|
},
|
|
{
|
|
"epoch": 6.956521739130435,
|
|
"grad_norm": 1.00371154489669,
|
|
"learning_rate": 5.0701135174890944e-09,
|
|
"loss": 0.2474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29931163787841797,
|
|
"step": 4160,
|
|
"valid_targets_mean": 3849.4,
|
|
"valid_targets_min": 403
|
|
},
|
|
{
|
|
"epoch": 6.964882943143813,
|
|
"grad_norm": 0.930732845194307,
|
|
"learning_rate": 3.3662137026535537e-09,
|
|
"loss": 0.2348,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22841259837150574,
|
|
"step": 4165,
|
|
"valid_targets_mean": 3192.3,
|
|
"valid_targets_min": 1398
|
|
},
|
|
{
|
|
"epoch": 6.973244147157191,
|
|
"grad_norm": 0.9699847666900223,
|
|
"learning_rate": 2.0100139573031584e-09,
|
|
"loss": 0.2398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2246263176202774,
|
|
"step": 4170,
|
|
"valid_targets_mean": 2845.4,
|
|
"valid_targets_min": 1337
|
|
},
|
|
{
|
|
"epoch": 6.981605351170568,
|
|
"grad_norm": 0.9378995458223955,
|
|
"learning_rate": 1.0015378629413265e-09,
|
|
"loss": 0.2268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21635910868644714,
|
|
"step": 4175,
|
|
"valid_targets_mean": 2931.9,
|
|
"valid_targets_min": 780
|
|
},
|
|
{
|
|
"epoch": 6.989966555183947,
|
|
"grad_norm": 1.0220155583231438,
|
|
"learning_rate": 3.4080295488347903e-10,
|
|
"loss": 0.2187,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2119850516319275,
|
|
"step": 4180,
|
|
"valid_targets_mean": 2728.2,
|
|
"valid_targets_min": 1313
|
|
},
|
|
{
|
|
"epoch": 6.998327759197324,
|
|
"grad_norm": 0.9751477515928492,
|
|
"learning_rate": 2.7820721939519902e-11,
|
|
"loss": 0.231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21303658187389374,
|
|
"step": 4185,
|
|
"valid_targets_mean": 3151.9,
|
|
"valid_targets_min": 1522
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22936706244945526,
|
|
"step": 4186,
|
|
"total_flos": 791537577689088.0,
|
|
"train_loss": 0.17696809689205084,
|
|
"train_runtime": 13311.7439,
|
|
"train_samples_per_second": 5.027,
|
|
"train_steps_per_second": 0.314,
|
|
"valid_targets_mean": 3387.6,
|
|
"valid_targets_min": 1204
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 4186,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 1500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 791537577689088.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|