Files
100k_epochs3__Qwen3-8B/trainer_state.json

4301 lines
119 KiB
JSON
Raw Normal View History

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 1938,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.007739938080495356,
"grad_norm": 27.524375797654567,
"learning_rate": 8.247422680412372e-07,
"loss": 0.8916,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.8891476392745972,
"step": 5,
"valid_targets_mean": 3787.4,
"valid_targets_min": 443
},
{
"epoch": 0.015479876160990712,
"grad_norm": 24.288144237324385,
"learning_rate": 1.8556701030927837e-06,
"loss": 0.8643,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.8326998353004456,
"step": 10,
"valid_targets_mean": 3744.1,
"valid_targets_min": 445
},
{
"epoch": 0.02321981424148607,
"grad_norm": 15.387565688754766,
"learning_rate": 2.8865979381443297e-06,
"loss": 0.7836,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.7326061725616455,
"step": 15,
"valid_targets_mean": 3806.1,
"valid_targets_min": 364
},
{
"epoch": 0.030959752321981424,
"grad_norm": 6.234776596005629,
"learning_rate": 3.917525773195877e-06,
"loss": 0.6587,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6209056377410889,
"step": 20,
"valid_targets_mean": 3883.8,
"valid_targets_min": 423
},
{
"epoch": 0.03869969040247678,
"grad_norm": 2.5389947376944724,
"learning_rate": 4.948453608247423e-06,
"loss": 0.575,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5564368963241577,
"step": 25,
"valid_targets_mean": 3872.5,
"valid_targets_min": 249
},
{
"epoch": 0.04643962848297214,
"grad_norm": 1.5288498389148235,
"learning_rate": 5.979381443298969e-06,
"loss": 0.5363,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.529486358165741,
"step": 30,
"valid_targets_mean": 3780.5,
"valid_targets_min": 372
},
{
"epoch": 0.05417956656346749,
"grad_norm": 1.3523989809328827,
"learning_rate": 7.010309278350515e-06,
"loss": 0.5143,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5047813057899475,
"step": 35,
"valid_targets_mean": 3663.5,
"valid_targets_min": 460
},
{
"epoch": 0.06191950464396285,
"grad_norm": 0.9302783185308883,
"learning_rate": 8.041237113402063e-06,
"loss": 0.4863,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4762948751449585,
"step": 40,
"valid_targets_mean": 3721.1,
"valid_targets_min": 448
},
{
"epoch": 0.0696594427244582,
"grad_norm": 0.6719299997500342,
"learning_rate": 9.072164948453609e-06,
"loss": 0.4628,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4557168483734131,
"step": 45,
"valid_targets_mean": 3723.2,
"valid_targets_min": 391
},
{
"epoch": 0.07739938080495357,
"grad_norm": 0.521938817483369,
"learning_rate": 1.0103092783505156e-05,
"loss": 0.4424,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4353238344192505,
"step": 50,
"valid_targets_mean": 3877.2,
"valid_targets_min": 462
},
{
"epoch": 0.08513931888544891,
"grad_norm": 0.4866090083622629,
"learning_rate": 1.1134020618556703e-05,
"loss": 0.4273,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4267203211784363,
"step": 55,
"valid_targets_mean": 3845.5,
"valid_targets_min": 497
},
{
"epoch": 0.09287925696594428,
"grad_norm": 0.7414739260025202,
"learning_rate": 1.2164948453608248e-05,
"loss": 0.4995,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.49669697880744934,
"step": 60,
"valid_targets_mean": 4920.5,
"valid_targets_min": 2219
},
{
"epoch": 0.10061919504643962,
"grad_norm": 2.4546549920454144,
"learning_rate": 1.3195876288659795e-05,
"loss": 0.5912,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.7627507448196411,
"step": 65,
"valid_targets_mean": 965.5,
"valid_targets_min": 456
},
{
"epoch": 0.10835913312693499,
"grad_norm": 0.6107036683741611,
"learning_rate": 1.4226804123711342e-05,
"loss": 0.5298,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4461647868156433,
"step": 70,
"valid_targets_mean": 4698.2,
"valid_targets_min": 3199
},
{
"epoch": 0.11609907120743033,
"grad_norm": 1.3370797632992233,
"learning_rate": 1.5257731958762888e-05,
"loss": 0.5807,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.7594808340072632,
"step": 75,
"valid_targets_mean": 877.1,
"valid_targets_min": 427
},
{
"epoch": 0.1238390092879257,
"grad_norm": 0.40163470209773594,
"learning_rate": 1.6288659793814433e-05,
"loss": 0.4163,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.40786969661712646,
"step": 80,
"valid_targets_mean": 4635.7,
"valid_targets_min": 3134
},
{
"epoch": 0.13157894736842105,
"grad_norm": 1.1010196281594,
"learning_rate": 1.731958762886598e-05,
"loss": 0.5356,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.7283442616462708,
"step": 85,
"valid_targets_mean": 855.6,
"valid_targets_min": 374
},
{
"epoch": 0.1393188854489164,
"grad_norm": 0.34863970907424235,
"learning_rate": 1.8350515463917527e-05,
"loss": 0.4733,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.396062433719635,
"step": 90,
"valid_targets_mean": 4777.3,
"valid_targets_min": 2671
},
{
"epoch": 0.14705882352941177,
"grad_norm": 1.0895786453008987,
"learning_rate": 1.9381443298969072e-05,
"loss": 0.4539,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.7016916871070862,
"step": 95,
"valid_targets_mean": 898.8,
"valid_targets_min": 299
},
{
"epoch": 0.15479876160990713,
"grad_norm": 0.35015283959800747,
"learning_rate": 2.0412371134020618e-05,
"loss": 0.4793,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.35827910900115967,
"step": 100,
"valid_targets_mean": 4618.4,
"valid_targets_min": 2204
},
{
"epoch": 0.16253869969040247,
"grad_norm": 0.39223389296050876,
"learning_rate": 2.1443298969072166e-05,
"loss": 0.3739,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3823808431625366,
"step": 105,
"valid_targets_mean": 3491.6,
"valid_targets_min": 313
},
{
"epoch": 0.17027863777089783,
"grad_norm": 0.3959500777293298,
"learning_rate": 2.2474226804123712e-05,
"loss": 0.5074,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.34726065397262573,
"step": 110,
"valid_targets_mean": 4612.6,
"valid_targets_min": 3000
},
{
"epoch": 0.1780185758513932,
"grad_norm": 0.34302377655422733,
"learning_rate": 2.350515463917526e-05,
"loss": 0.3577,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3575766682624817,
"step": 115,
"valid_targets_mean": 3892.6,
"valid_targets_min": 238
},
{
"epoch": 0.18575851393188855,
"grad_norm": 0.6569398422255293,
"learning_rate": 2.453608247422681e-05,
"loss": 0.5422,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3516051173210144,
"step": 120,
"valid_targets_mean": 4844.6,
"valid_targets_min": 2731
},
{
"epoch": 0.19349845201238391,
"grad_norm": 1.9719516136254733,
"learning_rate": 2.556701030927835e-05,
"loss": 0.4012,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.621773362159729,
"step": 125,
"valid_targets_mean": 984.1,
"valid_targets_min": 428
},
{
"epoch": 0.20123839009287925,
"grad_norm": 0.5040116006755825,
"learning_rate": 2.6597938144329897e-05,
"loss": 0.4853,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3492981493473053,
"step": 130,
"valid_targets_mean": 4991.7,
"valid_targets_min": 3133
},
{
"epoch": 0.2089783281733746,
"grad_norm": 0.36060008121628434,
"learning_rate": 2.7628865979381445e-05,
"loss": 0.3517,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3732391595840454,
"step": 135,
"valid_targets_mean": 3466.8,
"valid_targets_min": 300
},
{
"epoch": 0.21671826625386997,
"grad_norm": 0.6220433976621038,
"learning_rate": 2.865979381443299e-05,
"loss": 0.5567,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.42027992010116577,
"step": 140,
"valid_targets_mean": 5756.4,
"valid_targets_min": 472
},
{
"epoch": 0.22445820433436534,
"grad_norm": 0.3538953150206175,
"learning_rate": 2.969072164948454e-05,
"loss": 0.4102,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.39266014099121094,
"step": 145,
"valid_targets_mean": 5488.7,
"valid_targets_min": 504
},
{
"epoch": 0.23219814241486067,
"grad_norm": 0.306525611774786,
"learning_rate": 3.0721649484536085e-05,
"loss": 0.4184,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4043034315109253,
"step": 150,
"valid_targets_mean": 5880.8,
"valid_targets_min": 492
},
{
"epoch": 0.23993808049535603,
"grad_norm": 0.27265867164938057,
"learning_rate": 3.175257731958763e-05,
"loss": 0.3954,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3888610899448395,
"step": 155,
"valid_targets_mean": 5631.9,
"valid_targets_min": 1034
},
{
"epoch": 0.2476780185758514,
"grad_norm": 0.26867129331556494,
"learning_rate": 3.2783505154639176e-05,
"loss": 0.4007,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3766745924949646,
"step": 160,
"valid_targets_mean": 5617.9,
"valid_targets_min": 391
},
{
"epoch": 0.25541795665634676,
"grad_norm": 0.23778752661886557,
"learning_rate": 3.3814432989690724e-05,
"loss": 0.3873,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3770267367362976,
"step": 165,
"valid_targets_mean": 6436.5,
"valid_targets_min": 560
},
{
"epoch": 0.2631578947368421,
"grad_norm": 0.2718071722062838,
"learning_rate": 3.484536082474227e-05,
"loss": 0.3853,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3977334499359131,
"step": 170,
"valid_targets_mean": 5309.5,
"valid_targets_min": 482
},
{
"epoch": 0.2708978328173375,
"grad_norm": 0.40911648384963295,
"learning_rate": 3.587628865979382e-05,
"loss": 0.3747,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3614729642868042,
"step": 175,
"valid_targets_mean": 5691.1,
"valid_targets_min": 844
},
{
"epoch": 0.2786377708978328,
"grad_norm": 0.22354807753985279,
"learning_rate": 3.6907216494845364e-05,
"loss": 0.3698,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.36764293909072876,
"step": 180,
"valid_targets_mean": 5925.6,
"valid_targets_min": 500
},
{
"epoch": 0.28637770897832815,
"grad_norm": 0.21693850181727353,
"learning_rate": 3.7938144329896906e-05,
"loss": 0.3676,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3595467209815979,
"step": 185,
"valid_targets_mean": 5658.9,
"valid_targets_min": 491
},
{
"epoch": 0.29411764705882354,
"grad_norm": 0.23965212827073076,
"learning_rate": 3.8969072164948455e-05,
"loss": 0.3632,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.38485220074653625,
"step": 190,
"valid_targets_mean": 5660.4,
"valid_targets_min": 422
},
{
"epoch": 0.3018575851393189,
"grad_norm": 0.2149011814927512,
"learning_rate": 4e-05,
"loss": 0.3527,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.33452799916267395,
"step": 195,
"valid_targets_mean": 6256.5,
"valid_targets_min": 590
},
{
"epoch": 0.30959752321981426,
"grad_norm": 0.23156965525376894,
"learning_rate": 3.999918877027267e-05,
"loss": 0.358,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3639640212059021,
"step": 200,
"valid_targets_mean": 5626.4,
"valid_targets_min": 524
},
{
"epoch": 0.3173374613003096,
"grad_norm": 0.27544480267453636,
"learning_rate": 3.999675514690003e-05,
"loss": 0.3512,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3505948483943939,
"step": 205,
"valid_targets_mean": 5776.4,
"valid_targets_min": 921
},
{
"epoch": 0.32507739938080493,
"grad_norm": 0.2252464636729443,
"learning_rate": 3.9992699327304845e-05,
"loss": 0.3502,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.348880410194397,
"step": 210,
"valid_targets_mean": 5494.7,
"valid_targets_min": 1092
},
{
"epoch": 0.3328173374613003,
"grad_norm": 0.23153168043305264,
"learning_rate": 3.998702164050726e-05,
"loss": 0.3478,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3480554521083832,
"step": 215,
"valid_targets_mean": 6014.0,
"valid_targets_min": 234
},
{
"epoch": 0.34055727554179566,
"grad_norm": 0.22019666186278802,
"learning_rate": 3.997972254709811e-05,
"loss": 0.3623,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3661324381828308,
"step": 220,
"valid_targets_mean": 6489.6,
"valid_targets_min": 487
},
{
"epoch": 0.34829721362229105,
"grad_norm": 0.3505889989673578,
"learning_rate": 3.997080263920155e-05,
"loss": 0.4905,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3960522413253784,
"step": 225,
"valid_targets_mean": 3702.6,
"valid_targets_min": 466
},
{
"epoch": 0.3560371517027864,
"grad_norm": 0.23666618818592255,
"learning_rate": 3.9960262640427016e-05,
"loss": 0.3451,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.34150058031082153,
"step": 230,
"valid_targets_mean": 5997.3,
"valid_targets_min": 240
},
{
"epoch": 0.3637770897832817,
"grad_norm": 0.388486054978801,
"learning_rate": 3.994810340581056e-05,
"loss": 0.4567,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3778737783432007,
"step": 235,
"valid_targets_mean": 4160.1,
"valid_targets_min": 473
},
{
"epoch": 0.3715170278637771,
"grad_norm": 0.29026315145068443,
"learning_rate": 3.993432592174541e-05,
"loss": 0.3426,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3361317813396454,
"step": 240,
"valid_targets_mean": 6533.9,
"valid_targets_min": 1876
},
{
"epoch": 0.37925696594427244,
"grad_norm": 0.24143050901851154,
"learning_rate": 3.991893130590206e-05,
"loss": 0.4457,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.33998745679855347,
"step": 245,
"valid_targets_mean": 6688.6,
"valid_targets_min": 298
},
{
"epoch": 0.38699690402476783,
"grad_norm": 0.24364975307971998,
"learning_rate": 3.990192080713749e-05,
"loss": 0.346,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3610474467277527,
"step": 250,
"valid_targets_mean": 6724.0,
"valid_targets_min": 549
},
{
"epoch": 0.39473684210526316,
"grad_norm": 0.35938926126684523,
"learning_rate": 3.988329580539395e-05,
"loss": 0.4906,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3444874882698059,
"step": 255,
"valid_targets_mean": 6117.0,
"valid_targets_min": 219
},
{
"epoch": 0.4024767801857585,
"grad_norm": 0.2276644524642497,
"learning_rate": 3.9863057811586926e-05,
"loss": 0.3347,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3267607092857361,
"step": 260,
"valid_targets_mean": 6469.6,
"valid_targets_min": 209
},
{
"epoch": 0.4102167182662539,
"grad_norm": 0.28676870840884927,
"learning_rate": 3.984120846748264e-05,
"loss": 0.4497,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3532854914665222,
"step": 265,
"valid_targets_mean": 6510.8,
"valid_targets_min": 206
},
{
"epoch": 0.4179566563467492,
"grad_norm": 0.2453432922502153,
"learning_rate": 3.9817749545564866e-05,
"loss": 0.3306,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3410989046096802,
"step": 270,
"valid_targets_mean": 5183.3,
"valid_targets_min": 399
},
{
"epoch": 0.42569659442724456,
"grad_norm": 0.31219155957134104,
"learning_rate": 3.979268294889105e-05,
"loss": 0.4835,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3323940634727478,
"step": 275,
"valid_targets_mean": 6249.5,
"valid_targets_min": 284
},
{
"epoch": 0.43343653250773995,
"grad_norm": 0.2294409069053472,
"learning_rate": 3.9766010710938055e-05,
"loss": 0.3305,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3406042456626892,
"step": 280,
"valid_targets_mean": 5410.6,
"valid_targets_min": 265
},
{
"epoch": 0.4411764705882353,
"grad_norm": 0.30083907414157973,
"learning_rate": 3.97377349954371e-05,
"loss": 0.4778,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.32756131887435913,
"step": 285,
"valid_targets_mean": 6375.5,
"valid_targets_min": 401
},
{
"epoch": 0.44891640866873067,
"grad_norm": 0.35727630671749366,
"learning_rate": 3.970785809619829e-05,
"loss": 0.3369,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.37338879704475403,
"step": 290,
"valid_targets_mean": 3327.1,
"valid_targets_min": 428
},
{
"epoch": 0.456656346749226,
"grad_norm": 1.1550687449863875,
"learning_rate": 3.96763824369245e-05,
"loss": 0.6867,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6693388223648071,
"step": 295,
"valid_targets_mean": 3046.5,
"valid_targets_min": 249
},
{
"epoch": 0.46439628482972134,
"grad_norm": 0.5924681699568595,
"learning_rate": 3.964331057101479e-05,
"loss": 0.6378,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.614995002746582,
"step": 300,
"valid_targets_mean": 3748.6,
"valid_targets_min": 339
},
{
"epoch": 0.47213622291021673,
"grad_norm": 0.4378860108802947,
"learning_rate": 3.9608645181357223e-05,
"loss": 0.6239,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5896746516227722,
"step": 305,
"valid_targets_mean": 3423.9,
"valid_targets_min": 455
},
{
"epoch": 0.47987616099071206,
"grad_norm": 0.33365115943254425,
"learning_rate": 3.9572389080111276e-05,
"loss": 0.6103,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5521975159645081,
"step": 310,
"valid_targets_mean": 4002.1,
"valid_targets_min": 292
},
{
"epoch": 0.48761609907120745,
"grad_norm": 0.3969354765077491,
"learning_rate": 3.9534545208479645e-05,
"loss": 0.6133,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6419351696968079,
"step": 315,
"valid_targets_mean": 3102.1,
"valid_targets_min": 317
},
{
"epoch": 0.4953560371517028,
"grad_norm": 0.39504133651165185,
"learning_rate": 3.949511663646971e-05,
"loss": 0.5843,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5931770205497742,
"step": 320,
"valid_targets_mean": 2004.2,
"valid_targets_min": 192
},
{
"epoch": 0.5030959752321982,
"grad_norm": 0.3599916096569783,
"learning_rate": 3.945410656264443e-05,
"loss": 0.6022,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6142541170120239,
"step": 325,
"valid_targets_mean": 2664.8,
"valid_targets_min": 247
},
{
"epoch": 0.5108359133126935,
"grad_norm": 0.3956398459205433,
"learning_rate": 3.941151831386291e-05,
"loss": 0.5839,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6046449542045593,
"step": 330,
"valid_targets_mean": 3044.0,
"valid_targets_min": 298
},
{
"epoch": 0.5185758513931888,
"grad_norm": 0.28683489229087217,
"learning_rate": 3.936735534501051e-05,
"loss": 0.5715,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5844941139221191,
"step": 335,
"valid_targets_mean": 3938.2,
"valid_targets_min": 941
},
{
"epoch": 0.5263157894736842,
"grad_norm": 0.3050152661525686,
"learning_rate": 3.932162123871853e-05,
"loss": 0.5821,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5821244120597839,
"step": 340,
"valid_targets_mean": 3717.0,
"valid_targets_min": 403
},
{
"epoch": 0.5340557275541795,
"grad_norm": 0.25159114526764875,
"learning_rate": 3.927431970507362e-05,
"loss": 0.5573,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5323772430419922,
"step": 345,
"valid_targets_mean": 4406.2,
"valid_targets_min": 387
},
{
"epoch": 0.541795665634675,
"grad_norm": 0.2887704499367723,
"learning_rate": 3.922545458131683e-05,
"loss": 0.583,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6194786429405212,
"step": 350,
"valid_targets_mean": 3612.3,
"valid_targets_min": 371
},
{
"epoch": 0.5495356037151703,
"grad_norm": 0.3280016267710637,
"learning_rate": 3.917502983153225e-05,
"loss": 0.5538,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5695074796676636,
"step": 355,
"valid_targets_mean": 2256.9,
"valid_targets_min": 283
},
{
"epoch": 0.5572755417956656,
"grad_norm": 0.3341165803200255,
"learning_rate": 3.9123049546325475e-05,
"loss": 0.5677,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.581693708896637,
"step": 360,
"valid_targets_mean": 2972.2,
"valid_targets_min": 318
},
{
"epoch": 0.565015479876161,
"grad_norm": 0.467347373374131,
"learning_rate": 3.9069517942491774e-05,
"loss": 0.6032,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.671524703502655,
"step": 365,
"valid_targets_mean": 3760.0,
"valid_targets_min": 1056
},
{
"epoch": 0.5727554179566563,
"grad_norm": 0.3513800021014567,
"learning_rate": 3.901443936267398e-05,
"loss": 0.6833,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6314958333969116,
"step": 370,
"valid_targets_mean": 4103.2,
"valid_targets_min": 797
},
{
"epoch": 0.5804953560371517,
"grad_norm": 0.38650302362396954,
"learning_rate": 3.895781827501022e-05,
"loss": 0.7054,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.7055186033248901,
"step": 375,
"valid_targets_mean": 3301.9,
"valid_targets_min": 791
},
{
"epoch": 0.5882352941176471,
"grad_norm": 0.2946682880232837,
"learning_rate": 3.8899659272771454e-05,
"loss": 0.684,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6608676314353943,
"step": 380,
"valid_targets_mean": 4432.9,
"valid_targets_min": 877
},
{
"epoch": 0.5959752321981424,
"grad_norm": 0.29230040304138466,
"learning_rate": 3.883996707398883e-05,
"loss": 0.6764,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6655077338218689,
"step": 385,
"valid_targets_mean": 4270.7,
"valid_targets_min": 907
},
{
"epoch": 0.6037151702786377,
"grad_norm": 0.27606014724941075,
"learning_rate": 3.877874652107096e-05,
"loss": 0.6741,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6826651096343994,
"step": 390,
"valid_targets_mean": 4288.7,
"valid_targets_min": 758
},
{
"epoch": 0.6114551083591331,
"grad_norm": 0.2968942052399876,
"learning_rate": 3.8716002580411084e-05,
"loss": 0.6787,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6491446495056152,
"step": 395,
"valid_targets_mean": 4493.8,
"valid_targets_min": 1317
},
{
"epoch": 0.6191950464396285,
"grad_norm": 0.24106023474079094,
"learning_rate": 3.86517403419842e-05,
"loss": 0.6573,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6111230850219727,
"step": 400,
"valid_targets_mean": 4781.4,
"valid_targets_min": 671
},
{
"epoch": 0.6269349845201239,
"grad_norm": 0.28063476540629106,
"learning_rate": 3.858596501893413e-05,
"loss": 0.6734,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6427568793296814,
"step": 405,
"valid_targets_mean": 4028.6,
"valid_targets_min": 1025
},
{
"epoch": 0.6346749226006192,
"grad_norm": 0.2963922038322634,
"learning_rate": 3.8518681947150584e-05,
"loss": 0.6584,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6942857503890991,
"step": 410,
"valid_targets_mean": 3116.2,
"valid_targets_min": 442
},
{
"epoch": 0.6424148606811145,
"grad_norm": 0.2800249645770306,
"learning_rate": 3.844989658483639e-05,
"loss": 0.6743,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6975204348564148,
"step": 415,
"valid_targets_mean": 4106.4,
"valid_targets_min": 854
},
{
"epoch": 0.6501547987616099,
"grad_norm": 0.2796558531297951,
"learning_rate": 3.83796145120646e-05,
"loss": 0.6699,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6599092483520508,
"step": 420,
"valid_targets_mean": 3609.7,
"valid_targets_min": 1029
},
{
"epoch": 0.6578947368421053,
"grad_norm": 0.2697840591750541,
"learning_rate": 3.83078414303259e-05,
"loss": 0.6755,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6651268005371094,
"step": 425,
"valid_targets_mean": 4175.3,
"valid_targets_min": 1010
},
{
"epoch": 0.6656346749226006,
"grad_norm": 0.3266528302290397,
"learning_rate": 3.823458316206603e-05,
"loss": 0.6534,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6431168913841248,
"step": 430,
"valid_targets_mean": 3942.0,
"valid_targets_min": 1104
},
{
"epoch": 0.673374613003096,
"grad_norm": 0.2887225491658185,
"learning_rate": 3.81598456502135e-05,
"loss": 0.6716,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6703712940216064,
"step": 435,
"valid_targets_mean": 3787.9,
"valid_targets_min": 776
},
{
"epoch": 0.6811145510835913,
"grad_norm": 0.45784545814125294,
"learning_rate": 3.8083634957697445e-05,
"loss": 0.6219,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5753607153892517,
"step": 440,
"valid_targets_mean": 3564.7,
"valid_targets_min": 900
},
{
"epoch": 0.6888544891640866,
"grad_norm": 0.30242995797318084,
"learning_rate": 3.800595726695578e-05,
"loss": 0.5599,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5282618999481201,
"step": 445,
"valid_targets_mean": 4363.4,
"valid_targets_min": 750
},
{
"epoch": 0.6965944272445821,
"grad_norm": 0.43946569956807485,
"learning_rate": 3.792681887943372e-05,
"loss": 0.5555,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.530036449432373,
"step": 450,
"valid_targets_mean": 3516.3,
"valid_targets_min": 767
},
{
"epoch": 0.7043343653250774,
"grad_norm": 0.27208065178106133,
"learning_rate": 3.7846226215072495e-05,
"loss": 0.5548,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.528791069984436,
"step": 455,
"valid_targets_mean": 4421.9,
"valid_targets_min": 880
},
{
"epoch": 0.7120743034055728,
"grad_norm": 0.31540867569770586,
"learning_rate": 3.776418581178863e-05,
"loss": 0.5628,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5669809579849243,
"step": 460,
"valid_targets_mean": 3265.3,
"valid_targets_min": 906
},
{
"epoch": 0.7198142414860681,
"grad_norm": 0.26388751482498035,
"learning_rate": 3.768070432494353e-05,
"loss": 0.5334,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.49619990587234497,
"step": 465,
"valid_targets_mean": 4266.4,
"valid_targets_min": 1047
},
{
"epoch": 0.7275541795665634,
"grad_norm": 0.30165161522894157,
"learning_rate": 3.759578852680355e-05,
"loss": 0.5617,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5646347403526306,
"step": 470,
"valid_targets_mean": 3498.2,
"valid_targets_min": 1036
},
{
"epoch": 0.7352941176470589,
"grad_norm": 0.26399437889537725,
"learning_rate": 3.750944530599069e-05,
"loss": 0.5394,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5367434620857239,
"step": 475,
"valid_targets_mean": 4387.9,
"valid_targets_min": 635
},
{
"epoch": 0.7430340557275542,
"grad_norm": 0.2711483804191905,
"learning_rate": 3.74216816669237e-05,
"loss": 0.5464,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5406283736228943,
"step": 480,
"valid_targets_mean": 3467.2,
"valid_targets_min": 695
},
{
"epoch": 0.7507739938080495,
"grad_norm": 0.2476637519734938,
"learning_rate": 3.7332504729249865e-05,
"loss": 0.5308,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5285801887512207,
"step": 485,
"valid_targets_mean": 4467.1,
"valid_targets_min": 613
},
{
"epoch": 0.7585139318885449,
"grad_norm": 0.2818961864587132,
"learning_rate": 3.724192172726747e-05,
"loss": 0.5523,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5752028226852417,
"step": 490,
"valid_targets_mean": 3536.6,
"valid_targets_min": 834
},
{
"epoch": 0.7662538699690402,
"grad_norm": 0.25597440316250303,
"learning_rate": 3.714994000933893e-05,
"loss": 0.5405,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5525496602058411,
"step": 495,
"valid_targets_mean": 4088.9,
"valid_targets_min": 837
},
{
"epoch": 0.7739938080495357,
"grad_norm": 0.28994428627605706,
"learning_rate": 3.705656703729463e-05,
"loss": 0.5456,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5487348437309265,
"step": 500,
"valid_targets_mean": 3560.9,
"valid_targets_min": 1032
},
{
"epoch": 0.781733746130031,
"grad_norm": 0.24479348444572127,
"learning_rate": 3.6961810385827624e-05,
"loss": 0.5361,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5247283577919006,
"step": 505,
"valid_targets_mean": 4477.0,
"valid_targets_min": 502
},
{
"epoch": 0.7894736842105263,
"grad_norm": 0.31803943373773413,
"learning_rate": 3.6865677741879186e-05,
"loss": 0.5387,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5490365028381348,
"step": 510,
"valid_targets_mean": 3193.1,
"valid_targets_min": 883
},
{
"epoch": 0.7972136222910217,
"grad_norm": 0.24052703407469328,
"learning_rate": 3.676817690401516e-05,
"loss": 0.546,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5295630693435669,
"step": 515,
"valid_targets_mean": 4601.6,
"valid_targets_min": 602
},
{
"epoch": 0.804953560371517,
"grad_norm": 0.3322812041628695,
"learning_rate": 3.666931578179335e-05,
"loss": 0.5393,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5549291372299194,
"step": 520,
"valid_targets_mean": 3430.9,
"valid_targets_min": 1017
},
{
"epoch": 0.8126934984520123,
"grad_norm": 0.26830168196964743,
"learning_rate": 3.656910239512189e-05,
"loss": 0.5253,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5216836929321289,
"step": 525,
"valid_targets_mean": 3828.6,
"valid_targets_min": 1071
},
{
"epoch": 0.8204334365325078,
"grad_norm": 0.29777403207690095,
"learning_rate": 3.646754487360861e-05,
"loss": 0.628,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5326442718505859,
"step": 530,
"valid_targets_mean": 3390.7,
"valid_targets_min": 996
},
{
"epoch": 0.8281733746130031,
"grad_norm": 0.29573945098254656,
"learning_rate": 3.636465145590156e-05,
"loss": 0.5329,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5153015851974487,
"step": 535,
"valid_targets_mean": 3718.5,
"valid_targets_min": 819
},
{
"epoch": 0.8359133126934984,
"grad_norm": 0.30897425354025815,
"learning_rate": 3.626043048902066e-05,
"loss": 0.6416,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.521770715713501,
"step": 540,
"valid_targets_mean": 3193.4,
"valid_targets_min": 558
},
{
"epoch": 0.8436532507739938,
"grad_norm": 0.26854975778232826,
"learning_rate": 3.615489042768056e-05,
"loss": 0.5259,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.514534592628479,
"step": 545,
"valid_targets_mean": 3690.3,
"valid_targets_min": 434
},
{
"epoch": 0.8513931888544891,
"grad_norm": 0.3337704457340356,
"learning_rate": 3.604803983360478e-05,
"loss": 0.5894,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5469678044319153,
"step": 550,
"valid_targets_mean": 3144.1,
"valid_targets_min": 742
},
{
"epoch": 0.8591331269349846,
"grad_norm": 0.30925056109154825,
"learning_rate": 3.593988737483115e-05,
"loss": 0.5232,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5279290676116943,
"step": 555,
"valid_targets_mean": 3871.4,
"valid_targets_min": 502
},
{
"epoch": 0.8668730650154799,
"grad_norm": 0.27741337408365946,
"learning_rate": 3.583044182500865e-05,
"loss": 0.5833,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5350396633148193,
"step": 560,
"valid_targets_mean": 3256.5,
"valid_targets_min": 999
},
{
"epoch": 0.8746130030959752,
"grad_norm": 0.2682135862875639,
"learning_rate": 3.5719712062685604e-05,
"loss": 0.5252,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5352612733840942,
"step": 565,
"valid_targets_mean": 3665.0,
"valid_targets_min": 640
},
{
"epoch": 0.8823529411764706,
"grad_norm": 0.32747585544444807,
"learning_rate": 3.560770707058952e-05,
"loss": 0.6347,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5486376881599426,
"step": 570,
"valid_targets_mean": 3457.3,
"valid_targets_min": 1110
},
{
"epoch": 0.8900928792569659,
"grad_norm": 0.33314798459658457,
"learning_rate": 3.549443593489832e-05,
"loss": 0.5166,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5069097280502319,
"step": 575,
"valid_targets_mean": 3141.0,
"valid_targets_min": 566
},
{
"epoch": 0.8978328173374613,
"grad_norm": 0.3232997965070111,
"learning_rate": 3.537990784450324e-05,
"loss": 0.6174,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5290309190750122,
"step": 580,
"valid_targets_mean": 3509.7,
"valid_targets_min": 780
},
{
"epoch": 0.9055727554179567,
"grad_norm": 0.31911406686125754,
"learning_rate": 3.526413209026346e-05,
"loss": 0.5301,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.51148521900177,
"step": 585,
"valid_targets_mean": 3106.4,
"valid_targets_min": 794
},
{
"epoch": 0.913312693498452,
"grad_norm": 0.37107762634385655,
"learning_rate": 3.514711806425231e-05,
"loss": 0.5652,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5133883953094482,
"step": 590,
"valid_targets_mean": 3530.0,
"valid_targets_min": 744
},
{
"epoch": 0.9210526315789473,
"grad_norm": 0.3457780498003014,
"learning_rate": 3.502887525899544e-05,
"loss": 0.5268,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.540345311164856,
"step": 595,
"valid_targets_mean": 3219.6,
"valid_targets_min": 435
},
{
"epoch": 0.9287925696594427,
"grad_norm": 1.7120255019264785,
"learning_rate": 3.490941326670073e-05,
"loss": 0.5271,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.39785030484199524,
"step": 600,
"valid_targets_mean": 4948.2,
"valid_targets_min": 1383
},
{
"epoch": 0.9365325077399381,
"grad_norm": 0.554237503586826,
"learning_rate": 3.47887417784801e-05,
"loss": 0.3514,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3161848187446594,
"step": 605,
"valid_targets_mean": 4927.9,
"valid_targets_min": 1102
},
{
"epoch": 0.9442724458204335,
"grad_norm": 0.5340419975249076,
"learning_rate": 3.466687058356341e-05,
"loss": 0.3172,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3274293839931488,
"step": 610,
"valid_targets_mean": 4602.5,
"valid_targets_min": 1297
},
{
"epoch": 0.9520123839009288,
"grad_norm": 0.38335606187666377,
"learning_rate": 3.4543809568504286e-05,
"loss": 0.2737,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2700973153114319,
"step": 615,
"valid_targets_mean": 4983.0,
"valid_targets_min": 938
},
{
"epoch": 0.9597523219814241,
"grad_norm": 0.36164087059504424,
"learning_rate": 3.44195687163781e-05,
"loss": 0.2788,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2856438159942627,
"step": 620,
"valid_targets_mean": 4184.5,
"valid_targets_min": 1158
},
{
"epoch": 0.9674922600619195,
"grad_norm": 0.33384191107630423,
"learning_rate": 3.4294158105972095e-05,
"loss": 0.2834,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27378416061401367,
"step": 625,
"valid_targets_mean": 4258.2,
"valid_targets_min": 662
},
{
"epoch": 0.9752321981424149,
"grad_norm": 0.3499790559422126,
"learning_rate": 3.416758791096782e-05,
"loss": 0.2751,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2694745659828186,
"step": 630,
"valid_targets_mean": 4959.4,
"valid_targets_min": 2049
},
{
"epoch": 0.9829721362229102,
"grad_norm": 0.5343904741585188,
"learning_rate": 3.4039868399115736e-05,
"loss": 0.2845,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2808476686477661,
"step": 635,
"valid_targets_mean": 3119.1,
"valid_targets_min": 1116
},
{
"epoch": 0.9907120743034056,
"grad_norm": 0.551789004570227,
"learning_rate": 3.391100993140233e-05,
"loss": 0.2764,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2603238523006439,
"step": 640,
"valid_targets_mean": 2866.9,
"valid_targets_min": 1189
},
{
"epoch": 0.9984520123839009,
"grad_norm": 0.47251144663554534,
"learning_rate": 3.378102296120956e-05,
"loss": 0.2624,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2715701758861542,
"step": 645,
"valid_targets_mean": 3248.7,
"valid_targets_min": 1746
},
{
"epoch": 1.0061919504643964,
"grad_norm": 0.48139061749920886,
"learning_rate": 3.364991803346687e-05,
"loss": 0.3172,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3139479458332062,
"step": 650,
"valid_targets_mean": 3799.7,
"valid_targets_min": 470
},
{
"epoch": 1.0139318885448916,
"grad_norm": 0.4002801120318358,
"learning_rate": 3.351770578379573e-05,
"loss": 0.3011,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3000005781650543,
"step": 655,
"valid_targets_mean": 3723.8,
"valid_targets_min": 388
},
{
"epoch": 1.021671826625387,
"grad_norm": 0.32300020372074756,
"learning_rate": 3.338439693764688e-05,
"loss": 0.2914,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29472923278808594,
"step": 660,
"valid_targets_mean": 3488.4,
"valid_targets_min": 338
},
{
"epoch": 1.0294117647058822,
"grad_norm": 0.2534602174594846,
"learning_rate": 3.325000230943019e-05,
"loss": 0.2858,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27545419335365295,
"step": 665,
"valid_targets_mean": 3832.2,
"valid_targets_min": 506
},
{
"epoch": 1.0371517027863777,
"grad_norm": 0.24788434112652252,
"learning_rate": 3.311453280163744e-05,
"loss": 0.2799,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27271491289138794,
"step": 670,
"valid_targets_mean": 3689.5,
"valid_targets_min": 397
},
{
"epoch": 1.0448916408668731,
"grad_norm": 0.2596056148039042,
"learning_rate": 3.297799940395781e-05,
"loss": 0.2765,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26611781120300293,
"step": 675,
"valid_targets_mean": 3592.6,
"valid_targets_min": 330
},
{
"epoch": 1.0526315789473684,
"grad_norm": 0.2609799822219866,
"learning_rate": 3.28404131923864e-05,
"loss": 0.274,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2731304168701172,
"step": 680,
"valid_targets_mean": 3646.3,
"valid_targets_min": 428
},
{
"epoch": 1.0603715170278638,
"grad_norm": 0.23086080112783416,
"learning_rate": 3.270178532832568e-05,
"loss": 0.2707,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2716541588306427,
"step": 685,
"valid_targets_mean": 3811.8,
"valid_targets_min": 430
},
{
"epoch": 1.068111455108359,
"grad_norm": 0.22901327274684632,
"learning_rate": 3.2562127057680116e-05,
"loss": 0.2667,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27574634552001953,
"step": 690,
"valid_targets_mean": 3824.1,
"valid_targets_min": 349
},
{
"epoch": 1.0758513931888545,
"grad_norm": 0.2274846230773506,
"learning_rate": 3.242144970994377e-05,
"loss": 0.2658,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25684624910354614,
"step": 695,
"valid_targets_mean": 3658.0,
"valid_targets_min": 444
},
{
"epoch": 1.08359133126935,
"grad_norm": 0.23504790034851347,
"learning_rate": 3.22797646972813e-05,
"loss": 0.2673,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2675577402114868,
"step": 700,
"valid_targets_mean": 3583.1,
"valid_targets_min": 457
},
{
"epoch": 1.0913312693498451,
"grad_norm": 0.37928507653308724,
"learning_rate": 3.2137083513602115e-05,
"loss": 0.3088,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.32227909564971924,
"step": 705,
"valid_targets_mean": 4880.6,
"valid_targets_min": 3320
},
{
"epoch": 1.0990712074303406,
"grad_norm": 0.7946890508526923,
"learning_rate": 3.1993417733627986e-05,
"loss": 0.3525,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5340238213539124,
"step": 710,
"valid_targets_mean": 978.9,
"valid_targets_min": 200
},
{
"epoch": 1.1068111455108358,
"grad_norm": 0.38660722110507,
"learning_rate": 3.1848779011954076e-05,
"loss": 0.4102,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3029667139053345,
"step": 715,
"valid_targets_mean": 4807.5,
"valid_targets_min": 2887
},
{
"epoch": 1.1145510835913313,
"grad_norm": 1.276053464706334,
"learning_rate": 3.1703179082103447e-05,
"loss": 0.365,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5683897733688354,
"step": 720,
"valid_targets_mean": 870.8,
"valid_targets_min": 239
},
{
"epoch": 1.1222910216718267,
"grad_norm": 0.35511704990994863,
"learning_rate": 3.155662975557525e-05,
"loss": 0.3521,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2964516282081604,
"step": 725,
"valid_targets_mean": 4695.3,
"valid_targets_min": 3169
},
{
"epoch": 1.130030959752322,
"grad_norm": 1.1027701592450079,
"learning_rate": 3.140914292088649e-05,
"loss": 0.3494,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5393781661987305,
"step": 730,
"valid_targets_mean": 994.2,
"valid_targets_min": 414
},
{
"epoch": 1.1377708978328174,
"grad_norm": 0.300490486853147,
"learning_rate": 3.126073054260765e-05,
"loss": 0.4132,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2866157293319702,
"step": 735,
"valid_targets_mean": 4788.1,
"valid_targets_min": 2297
},
{
"epoch": 1.1455108359133126,
"grad_norm": 0.30468939031761166,
"learning_rate": 3.111140466039205e-05,
"loss": 0.2988,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3231448233127594,
"step": 740,
"valid_targets_mean": 3206.7,
"valid_targets_min": 447
},
{
"epoch": 1.153250773993808,
"grad_norm": 0.4185461221093088,
"learning_rate": 3.0961177387999143e-05,
"loss": 0.4374,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30215156078338623,
"step": 745,
"valid_targets_mean": 4860.4,
"valid_targets_min": 3021
},
{
"epoch": 1.1609907120743035,
"grad_norm": 0.3246362159115541,
"learning_rate": 3.081006091231187e-05,
"loss": 0.2904,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2925494909286499,
"step": 750,
"valid_targets_mean": 4770.2,
"valid_targets_min": 2896
},
{
"epoch": 1.1687306501547987,
"grad_norm": 0.5388796431924842,
"learning_rate": 3.065806749234795e-05,
"loss": 0.4219,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.297951340675354,
"step": 755,
"valid_targets_mean": 4724.9,
"valid_targets_min": 3054
},
{
"epoch": 1.1764705882352942,
"grad_norm": 0.41506798440532944,
"learning_rate": 3.0505209458265463e-05,
"loss": 0.2873,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2955588698387146,
"step": 760,
"valid_targets_mean": 4654.0,
"valid_targets_min": 1994
},
{
"epoch": 1.1842105263157894,
"grad_norm": 0.47126780281343006,
"learning_rate": 3.0351499210362526e-05,
"loss": 0.4456,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2873014211654663,
"step": 765,
"valid_targets_mean": 4692.8,
"valid_targets_min": 742
},
{
"epoch": 1.1919504643962848,
"grad_norm": 0.34901444949701144,
"learning_rate": 3.01969492180714e-05,
"loss": 0.2844,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29014477133750916,
"step": 770,
"valid_targets_mean": 5052.9,
"valid_targets_min": 2772
},
{
"epoch": 1.1996904024767803,
"grad_norm": 0.5180698881182623,
"learning_rate": 3.004157201894689e-05,
"loss": 0.4448,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2949589788913727,
"step": 775,
"valid_targets_mean": 5030.0,
"valid_targets_min": 3232
},
{
"epoch": 1.2074303405572755,
"grad_norm": 0.3407530260823277,
"learning_rate": 2.9885380217649285e-05,
"loss": 0.2888,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2843036651611328,
"step": 780,
"valid_targets_mean": 5112.5,
"valid_targets_min": 3319
},
{
"epoch": 1.215170278637771,
"grad_norm": 0.33606213636554133,
"learning_rate": 2.972838648492182e-05,
"loss": 0.4564,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.32512155175209045,
"step": 785,
"valid_targets_mean": 5787.9,
"valid_targets_min": 628
},
{
"epoch": 1.2229102167182662,
"grad_norm": 0.26150380566158515,
"learning_rate": 2.95706035565628e-05,
"loss": 0.3279,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3358706533908844,
"step": 790,
"valid_targets_mean": 6091.0,
"valid_targets_min": 590
},
{
"epoch": 1.2306501547987616,
"grad_norm": 0.26424156107594843,
"learning_rate": 2.9412044232392416e-05,
"loss": 0.3331,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3356711268424988,
"step": 795,
"valid_targets_mean": 5457.9,
"valid_targets_min": 537
},
{
"epoch": 1.238390092879257,
"grad_norm": 0.24610103045052448,
"learning_rate": 2.92527213752144e-05,
"loss": 0.3233,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.32360732555389404,
"step": 800,
"valid_targets_mean": 5283.4,
"valid_targets_min": 556
},
{
"epoch": 1.2461300309597523,
"grad_norm": 0.20473845910198676,
"learning_rate": 2.9092647909772547e-05,
"loss": 0.3322,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3107759952545166,
"step": 805,
"valid_targets_mean": 6079.6,
"valid_targets_min": 531
},
{
"epoch": 1.2538699690402477,
"grad_norm": 0.1982417032561459,
"learning_rate": 2.893183682170224e-05,
"loss": 0.323,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.34520620107650757,
"step": 810,
"valid_targets_mean": 5938.4,
"valid_targets_min": 588
},
{
"epoch": 1.261609907120743,
"grad_norm": 0.19813643799333697,
"learning_rate": 2.8770301156476985e-05,
"loss": 0.3215,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.31860482692718506,
"step": 815,
"valid_targets_mean": 5991.7,
"valid_targets_min": 605
},
{
"epoch": 1.2693498452012384,
"grad_norm": 0.21223208558324866,
"learning_rate": 2.8608054018350144e-05,
"loss": 0.3252,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3146929144859314,
"step": 820,
"valid_targets_mean": 5768.7,
"valid_targets_min": 589
},
{
"epoch": 1.2770897832817338,
"grad_norm": 0.21668883079476361,
"learning_rate": 2.8445108569291882e-05,
"loss": 0.3174,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.31380555033683777,
"step": 825,
"valid_targets_mean": 5973.1,
"valid_targets_min": 598
},
{
"epoch": 1.284829721362229,
"grad_norm": 0.2138530658257456,
"learning_rate": 2.8281478027921428e-05,
"loss": 0.3203,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3224177658557892,
"step": 830,
"valid_targets_mean": 5782.9,
"valid_targets_min": 427
},
{
"epoch": 1.2925696594427245,
"grad_norm": 0.22678360029970596,
"learning_rate": 2.8117175668434713e-05,
"loss": 0.3125,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3132099509239197,
"step": 835,
"valid_targets_mean": 5733.2,
"valid_targets_min": 768
},
{
"epoch": 1.3003095975232197,
"grad_norm": 0.21256605493714836,
"learning_rate": 2.795221481952758e-05,
"loss": 0.3183,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.32242241501808167,
"step": 840,
"valid_targets_mean": 5674.5,
"valid_targets_min": 597
},
{
"epoch": 1.3080495356037152,
"grad_norm": 0.23958348168020002,
"learning_rate": 2.778660886331447e-05,
"loss": 0.3105,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.324482798576355,
"step": 845,
"valid_targets_mean": 5429.0,
"valid_targets_min": 755
},
{
"epoch": 1.3157894736842106,
"grad_norm": 0.20703439180417177,
"learning_rate": 2.7620371234242857e-05,
"loss": 0.3138,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3022202253341675,
"step": 850,
"valid_targets_mean": 5852.8,
"valid_targets_min": 1304
},
{
"epoch": 1.3235294117647058,
"grad_norm": 0.24125893912340335,
"learning_rate": 2.7453515418003385e-05,
"loss": 0.313,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3281027674674988,
"step": 855,
"valid_targets_mean": 5466.6,
"valid_targets_min": 1012
},
{
"epoch": 1.3312693498452013,
"grad_norm": 0.2157557221470851,
"learning_rate": 2.728605495043589e-05,
"loss": 0.3115,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3008950352668762,
"step": 860,
"valid_targets_mean": 5432.9,
"valid_targets_min": 495
},
{
"epoch": 1.3390092879256965,
"grad_norm": 0.21720447478539381,
"learning_rate": 2.7118003416431312e-05,
"loss": 0.3153,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.32623517513275146,
"step": 865,
"valid_targets_mean": 6355.9,
"valid_targets_min": 629
},
{
"epoch": 1.346749226006192,
"grad_norm": 1.4436049900618573,
"learning_rate": 2.6949374448829666e-05,
"loss": 0.4247,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6241855621337891,
"step": 870,
"valid_targets_mean": 805.4,
"valid_targets_min": 438
},
{
"epoch": 1.3544891640866874,
"grad_norm": 0.24132631335087232,
"learning_rate": 2.6780181727314096e-05,
"loss": 0.3162,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3145143389701843,
"step": 875,
"valid_targets_mean": 6133.8,
"valid_targets_min": 229
},
{
"epoch": 1.3622291021671826,
"grad_norm": 0.9705810504068956,
"learning_rate": 2.6610438977301128e-05,
"loss": 0.3938,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6266766786575317,
"step": 880,
"valid_targets_mean": 817.7,
"valid_targets_min": 373
},
{
"epoch": 1.369969040247678,
"grad_norm": 0.21906903305378964,
"learning_rate": 2.6440159968827255e-05,
"loss": 0.3142,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3028450608253479,
"step": 885,
"valid_targets_mean": 6547.3,
"valid_targets_min": 1808
},
{
"epoch": 1.3777089783281733,
"grad_norm": 0.27954069223468464,
"learning_rate": 2.6269358515431825e-05,
"loss": 0.3977,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.31099915504455566,
"step": 890,
"valid_targets_mean": 6934.5,
"valid_targets_min": 376
},
{
"epoch": 1.3854489164086687,
"grad_norm": 0.2282751162688024,
"learning_rate": 2.6098048473036487e-05,
"loss": 0.3092,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3102579116821289,
"step": 895,
"valid_targets_mean": 6416.0,
"valid_targets_min": 451
},
{
"epoch": 1.3931888544891642,
"grad_norm": 0.29347218348748927,
"learning_rate": 2.5926243738821148e-05,
"loss": 0.4439,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3172456920146942,
"step": 900,
"valid_targets_mean": 6050.4,
"valid_targets_min": 298
},
{
"epoch": 1.4009287925696594,
"grad_norm": 0.23302656493339524,
"learning_rate": 2.575395825009657e-05,
"loss": 0.3069,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2993898391723633,
"step": 905,
"valid_targets_mean": 5863.9,
"valid_targets_min": 212
},
{
"epoch": 1.4086687306501549,
"grad_norm": 0.380205064262657,
"learning_rate": 2.5581205983173763e-05,
"loss": 0.4018,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.31587034463882446,
"step": 910,
"valid_targets_mean": 6129.1,
"valid_targets_min": 324
},
{
"epoch": 1.41640866873065,
"grad_norm": 0.2714599791425628,
"learning_rate": 2.5408000952230158e-05,
"loss": 0.3035,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3035046458244324,
"step": 915,
"valid_targets_mean": 6139.4,
"valid_targets_min": 478
},
{
"epoch": 1.4241486068111455,
"grad_norm": 0.2967636848805332,
"learning_rate": 2.523435720817277e-05,
"loss": 0.4405,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3169952630996704,
"step": 920,
"valid_targets_mean": 6069.3,
"valid_targets_min": 230
},
{
"epoch": 1.431888544891641,
"grad_norm": 0.21154727850987246,
"learning_rate": 2.5060288837498296e-05,
"loss": 0.3011,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30303075909614563,
"step": 925,
"valid_targets_mean": 6344.3,
"valid_targets_min": 600
},
{
"epoch": 1.4396284829721362,
"grad_norm": 0.2666594031964154,
"learning_rate": 2.4885809961150436e-05,
"loss": 0.4366,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3026265501976013,
"step": 930,
"valid_targets_mean": 6771.4,
"valid_targets_min": 486
},
{
"epoch": 1.4473684210526316,
"grad_norm": 0.21624632300338756,
"learning_rate": 2.4710934733374313e-05,
"loss": 0.3007,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3007712960243225,
"step": 935,
"valid_targets_mean": 6569.9,
"valid_targets_min": 353
},
{
"epoch": 1.4551083591331269,
"grad_norm": 1.4857134613281382,
"learning_rate": 2.4535677340568275e-05,
"loss": 0.5409,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5613664388656616,
"step": 940,
"valid_targets_mean": 2007.4,
"valid_targets_min": 246
},
{
"epoch": 1.4628482972136223,
"grad_norm": 0.38140244794748396,
"learning_rate": 2.4360052000133004e-05,
"loss": 0.5363,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5474293231964111,
"step": 945,
"valid_targets_mean": 3351.7,
"valid_targets_min": 267
},
{
"epoch": 1.4705882352941178,
"grad_norm": 0.3603442528537929,
"learning_rate": 2.4184072959318208e-05,
"loss": 0.5287,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5367631912231445,
"step": 950,
"valid_targets_mean": 3195.7,
"valid_targets_min": 296
},
{
"epoch": 1.478328173374613,
"grad_norm": 0.3343077768972529,
"learning_rate": 2.4007754494066822e-05,
"loss": 0.5251,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5016438961029053,
"step": 955,
"valid_targets_mean": 3282.1,
"valid_targets_min": 296
},
{
"epoch": 1.4860681114551084,
"grad_norm": 0.3376798618230589,
"learning_rate": 2.3831110907856886e-05,
"loss": 0.51,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4855489432811737,
"step": 960,
"valid_targets_mean": 3273.5,
"valid_targets_min": 426
},
{
"epoch": 1.4938080495356036,
"grad_norm": 0.2782369122581895,
"learning_rate": 2.3654156530541236e-05,
"loss": 0.5156,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.45853185653686523,
"step": 965,
"valid_targets_mean": 3904.3,
"valid_targets_min": 286
},
{
"epoch": 1.501547987616099,
"grad_norm": 0.3049436859222503,
"learning_rate": 2.3476905717184994e-05,
"loss": 0.5222,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5525015592575073,
"step": 970,
"valid_targets_mean": 3417.9,
"valid_targets_min": 431
},
{
"epoch": 1.5092879256965945,
"grad_norm": 0.4337294221477672,
"learning_rate": 2.329937284690106e-05,
"loss": 0.5152,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.546398401260376,
"step": 975,
"valid_targets_mean": 2151.9,
"valid_targets_min": 306
},
{
"epoch": 1.5170278637770898,
"grad_norm": 0.2864428701765129,
"learning_rate": 2.3121572321683624e-05,
"loss": 0.5074,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.48784539103507996,
"step": 980,
"valid_targets_mean": 2757.0,
"valid_targets_min": 366
},
{
"epoch": 1.524767801857585,
"grad_norm": 0.3218727885494719,
"learning_rate": 2.2943518565239855e-05,
"loss": 0.5163,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5275843739509583,
"step": 985,
"valid_targets_mean": 2699.5,
"valid_targets_min": 297
},
{
"epoch": 1.5325077399380804,
"grad_norm": 0.27972239430276774,
"learning_rate": 2.2765226021819773e-05,
"loss": 0.5063,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.49300703406333923,
"step": 990,
"valid_targets_mean": 3419.5,
"valid_targets_min": 281
},
{
"epoch": 1.5402476780185759,
"grad_norm": 0.28727899220649333,
"learning_rate": 2.258670915504453e-05,
"loss": 0.506,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5013266205787659,
"step": 995,
"valid_targets_mean": 3438.5,
"valid_targets_min": 301
},
{
"epoch": 1.5479876160990713,
"grad_norm": 0.24156375169268698,
"learning_rate": 2.2407982446733027e-05,
"loss": 0.5075,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4864124059677124,
"step": 1000,
"valid_targets_mean": 4330.6,
"valid_targets_min": 247
},
{
"epoch": 1.5557275541795665,
"grad_norm": 0.4115069627169929,
"learning_rate": 2.222906039572715e-05,
"loss": 0.5068,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5431771278381348,
"step": 1005,
"valid_targets_mean": 2712.3,
"valid_targets_min": 262
},
{
"epoch": 1.5634674922600618,
"grad_norm": 0.6349890726818864,
"learning_rate": 2.2049957516715573e-05,
"loss": 0.5322,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6668592095375061,
"step": 1010,
"valid_targets_mean": 3619.0,
"valid_targets_min": 1099
},
{
"epoch": 1.5712074303405572,
"grad_norm": 0.3761640092661745,
"learning_rate": 2.1870688339056265e-05,
"loss": 0.6381,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6248250603675842,
"step": 1015,
"valid_targets_mean": 3834.9,
"valid_targets_min": 1026
},
{
"epoch": 1.5789473684210527,
"grad_norm": 0.35008924670124053,
"learning_rate": 2.1691267405597834e-05,
"loss": 0.6383,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6214076280593872,
"step": 1020,
"valid_targets_mean": 3756.7,
"valid_targets_min": 1083
},
{
"epoch": 1.586687306501548,
"grad_norm": 0.2935122332602019,
"learning_rate": 2.151170927149977e-05,
"loss": 0.6413,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6318605542182922,
"step": 1025,
"valid_targets_mean": 3851.6,
"valid_targets_min": 1063
},
{
"epoch": 1.5944272445820433,
"grad_norm": 0.27413481584559085,
"learning_rate": 2.1332028503051693e-05,
"loss": 0.6252,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6500615477561951,
"step": 1030,
"valid_targets_mean": 3610.4,
"valid_targets_min": 1092
},
{
"epoch": 1.6021671826625385,
"grad_norm": 0.4147553577122656,
"learning_rate": 2.1152239676491687e-05,
"loss": 0.6219,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5811175107955933,
"step": 1035,
"valid_targets_mean": 4324.1,
"valid_targets_min": 791
},
{
"epoch": 1.609907120743034,
"grad_norm": 0.29937825889380415,
"learning_rate": 2.097235737682382e-05,
"loss": 0.6371,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6158422231674194,
"step": 1040,
"valid_targets_mean": 3782.9,
"valid_targets_min": 1167
},
{
"epoch": 1.6176470588235294,
"grad_norm": 0.2695054440616614,
"learning_rate": 2.079239619663499e-05,
"loss": 0.6183,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.595893144607544,
"step": 1045,
"valid_targets_mean": 4237.8,
"valid_targets_min": 636
},
{
"epoch": 1.6253869969040249,
"grad_norm": 0.24701393198924002,
"learning_rate": 2.0612370734911095e-05,
"loss": 0.6213,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6063376069068909,
"step": 1050,
"valid_targets_mean": 4379.6,
"valid_targets_min": 417
},
{
"epoch": 1.63312693498452,
"grad_norm": 0.23072601464589526,
"learning_rate": 2.0432295595852774e-05,
"loss": 0.6054,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5472590923309326,
"step": 1055,
"valid_targets_mean": 4701.0,
"valid_targets_min": 826
},
{
"epoch": 1.6408668730650153,
"grad_norm": 0.23494698490345117,
"learning_rate": 2.0252185387690627e-05,
"loss": 0.6292,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.591805636882782,
"step": 1060,
"valid_targets_mean": 4621.8,
"valid_targets_min": 983
},
{
"epoch": 1.6486068111455108,
"grad_norm": 0.286574606641743,
"learning_rate": 2.007205472150014e-05,
"loss": 0.6349,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6813613176345825,
"step": 1065,
"valid_targets_mean": 3181.1,
"valid_targets_min": 492
},
{
"epoch": 1.6563467492260062,
"grad_norm": 0.2678271671127905,
"learning_rate": 1.9891918210016453e-05,
"loss": 0.6333,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6792501211166382,
"step": 1070,
"valid_targets_mean": 4021.6,
"valid_targets_min": 665
},
{
"epoch": 1.6640866873065017,
"grad_norm": 0.287461265170696,
"learning_rate": 1.9711790466448863e-05,
"loss": 0.6183,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6828348636627197,
"step": 1075,
"valid_targets_mean": 3290.7,
"valid_targets_min": 280
},
{
"epoch": 1.671826625386997,
"grad_norm": 0.23540874788494098,
"learning_rate": 1.95316861032954e-05,
"loss": 0.6277,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6578258275985718,
"step": 1080,
"valid_targets_mean": 4297.4,
"valid_targets_min": 1038
},
{
"epoch": 1.6795665634674921,
"grad_norm": 0.5245084769694451,
"learning_rate": 1.9351619731157415e-05,
"loss": 0.6037,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5338871479034424,
"step": 1085,
"valid_targets_mean": 3676.8,
"valid_targets_min": 1023
},
{
"epoch": 1.6873065015479876,
"grad_norm": 0.35757608120176726,
"learning_rate": 1.91716059575543e-05,
"loss": 0.5365,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5506007671356201,
"step": 1090,
"valid_targets_mean": 3877.6,
"valid_targets_min": 891
},
{
"epoch": 1.695046439628483,
"grad_norm": 0.29617547242231923,
"learning_rate": 1.899165938573851e-05,
"loss": 0.5212,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5574710369110107,
"step": 1095,
"valid_targets_mean": 3730.8,
"valid_targets_min": 921
},
{
"epoch": 1.7027863777089784,
"grad_norm": 0.2663718259319401,
"learning_rate": 1.881179461351087e-05,
"loss": 0.5219,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5038564205169678,
"step": 1100,
"valid_targets_mean": 3764.0,
"valid_targets_min": 771
},
{
"epoch": 1.7105263157894737,
"grad_norm": 0.2728449458525243,
"learning_rate": 1.8632026232036397e-05,
"loss": 0.5219,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5010745525360107,
"step": 1105,
"valid_targets_mean": 3619.3,
"valid_targets_min": 981
},
{
"epoch": 1.718266253869969,
"grad_norm": 0.2660611928357808,
"learning_rate": 1.8452368824660604e-05,
"loss": 0.5147,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5162074565887451,
"step": 1110,
"valid_targets_mean": 3383.6,
"valid_targets_min": 747
},
{
"epoch": 1.7260061919504643,
"grad_norm": 0.26724477754654813,
"learning_rate": 1.827283696572646e-05,
"loss": 0.5167,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5333441495895386,
"step": 1115,
"valid_targets_mean": 3617.2,
"valid_targets_min": 877
},
{
"epoch": 1.7337461300309598,
"grad_norm": 0.24922101516229775,
"learning_rate": 1.8093445219392038e-05,
"loss": 0.5135,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5010273456573486,
"step": 1120,
"valid_targets_mean": 3628.0,
"valid_targets_min": 1027
},
{
"epoch": 1.7414860681114552,
"grad_norm": 0.2544991655030091,
"learning_rate": 1.7914208138449108e-05,
"loss": 0.5147,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5082110166549683,
"step": 1125,
"valid_targets_mean": 3666.4,
"valid_targets_min": 1044
},
{
"epoch": 1.7492260061919505,
"grad_norm": 0.2586854297067809,
"learning_rate": 1.7735140263142483e-05,
"loss": 0.5024,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.498982697725296,
"step": 1130,
"valid_targets_mean": 3281.1,
"valid_targets_min": 851
},
{
"epoch": 1.7569659442724457,
"grad_norm": 0.265351851103295,
"learning_rate": 1.7556256119990538e-05,
"loss": 0.513,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5036240220069885,
"step": 1135,
"valid_targets_mean": 3577.5,
"valid_targets_min": 1157
},
{
"epoch": 1.7647058823529411,
"grad_norm": 0.25556536651758294,
"learning_rate": 1.7377570220606745e-05,
"loss": 0.5155,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.47923219203948975,
"step": 1140,
"valid_targets_mean": 3314.5,
"valid_targets_min": 706
},
{
"epoch": 1.7724458204334366,
"grad_norm": 0.2659278252988841,
"learning_rate": 1.7199097060522437e-05,
"loss": 0.5173,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5065821409225464,
"step": 1145,
"valid_targets_mean": 3562.3,
"valid_targets_min": 1014
},
{
"epoch": 1.780185758513932,
"grad_norm": 0.2570609206552128,
"learning_rate": 1.7020851118010918e-05,
"loss": 0.5125,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5099133253097534,
"step": 1150,
"valid_targets_mean": 3536.9,
"valid_targets_min": 792
},
{
"epoch": 1.7879256965944272,
"grad_norm": 0.2508651381096074,
"learning_rate": 1.684284685291292e-05,
"loss": 0.5056,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.514965832233429,
"step": 1155,
"valid_targets_mean": 3570.4,
"valid_targets_min": 1059
},
{
"epoch": 1.7956656346749225,
"grad_norm": 0.24698558245827082,
"learning_rate": 1.666509870546359e-05,
"loss": 0.5225,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5093916654586792,
"step": 1160,
"valid_targets_mean": 3647.4,
"valid_targets_min": 784
},
{
"epoch": 1.803405572755418,
"grad_norm": 0.26466317261621247,
"learning_rate": 1.648762109512105e-05,
"loss": 0.508,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5089800357818604,
"step": 1165,
"valid_targets_mean": 3439.6,
"valid_targets_min": 1012
},
{
"epoch": 1.8111455108359134,
"grad_norm": 0.24987520286912196,
"learning_rate": 1.6310428419396636e-05,
"loss": 0.505,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4624229669570923,
"step": 1170,
"valid_targets_mean": 3415.4,
"valid_targets_min": 555
},
{
"epoch": 1.8188854489164088,
"grad_norm": 0.2727113667131823,
"learning_rate": 1.6133535052686953e-05,
"loss": 0.5947,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.49991852045059204,
"step": 1175,
"valid_targets_mean": 3413.9,
"valid_targets_min": 973
},
{
"epoch": 1.826625386996904,
"grad_norm": 0.2619566476966724,
"learning_rate": 1.595695534510777e-05,
"loss": 0.5103,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4921343922615051,
"step": 1180,
"valid_targets_mean": 3740.7,
"valid_targets_min": 849
},
{
"epoch": 1.8343653250773992,
"grad_norm": 0.3089259820699402,
"learning_rate": 1.5780703621329893e-05,
"loss": 0.6108,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5340328216552734,
"step": 1185,
"valid_targets_mean": 3384.0,
"valid_targets_min": 870
},
{
"epoch": 1.8421052631578947,
"grad_norm": 0.2690108692070922,
"learning_rate": 1.5604794179417083e-05,
"loss": 0.5024,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4995267689228058,
"step": 1190,
"valid_targets_mean": 3341.3,
"valid_targets_min": 1113
},
{
"epoch": 1.8498452012383901,
"grad_norm": 0.2652646205794999,
"learning_rate": 1.542924128966622e-05,
"loss": 0.5553,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5052015781402588,
"step": 1195,
"valid_targets_mean": 3684.2,
"valid_targets_min": 1026
},
{
"epoch": 1.8575851393188856,
"grad_norm": 0.2762577600723093,
"learning_rate": 1.5254059193449585e-05,
"loss": 0.5025,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.49244827032089233,
"step": 1200,
"valid_targets_mean": 3157.4,
"valid_targets_min": 525
},
{
"epoch": 1.8653250773993808,
"grad_norm": 0.27400155947690535,
"learning_rate": 1.5079262102059602e-05,
"loss": 0.5555,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5354343056678772,
"step": 1205,
"valid_targets_mean": 3240.1,
"valid_targets_min": 984
},
{
"epoch": 1.873065015479876,
"grad_norm": 0.2508418984472677,
"learning_rate": 1.4904864195555942e-05,
"loss": 0.5006,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5009927153587341,
"step": 1210,
"valid_targets_mean": 3223.1,
"valid_targets_min": 328
},
{
"epoch": 1.8808049535603715,
"grad_norm": 0.25556621332680995,
"learning_rate": 1.4730879621615217e-05,
"loss": 0.6041,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5159991383552551,
"step": 1215,
"valid_targets_mean": 3842.8,
"valid_targets_min": 811
},
{
"epoch": 1.888544891640867,
"grad_norm": 0.27756109590528844,
"learning_rate": 1.4557322494383274e-05,
"loss": 0.5018,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5096072554588318,
"step": 1220,
"valid_targets_mean": 3401.7,
"valid_targets_min": 358
},
{
"epoch": 1.8962848297213624,
"grad_norm": 0.29072340907542393,
"learning_rate": 1.4384206893330216e-05,
"loss": 0.5866,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5704678297042847,
"step": 1225,
"valid_targets_mean": 3138.4,
"valid_targets_min": 426
},
{
"epoch": 1.9040247678018576,
"grad_norm": 0.25114676744419256,
"learning_rate": 1.4211546862108242e-05,
"loss": 0.5104,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5284104347229004,
"step": 1230,
"valid_targets_mean": 3528.3,
"valid_targets_min": 285
},
{
"epoch": 1.9117647058823528,
"grad_norm": 0.3049014526259051,
"learning_rate": 1.4039356407412329e-05,
"loss": 0.5403,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6094015836715698,
"step": 1235,
"valid_targets_mean": 2719.7,
"valid_targets_min": 533
},
{
"epoch": 1.9195046439628483,
"grad_norm": 0.2521098064388986,
"learning_rate": 1.3867649497844058e-05,
"loss": 0.4999,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5124248266220093,
"step": 1240,
"valid_targets_mean": 3570.4,
"valid_targets_min": 564
},
{
"epoch": 1.9272445820433437,
"grad_norm": 2.8985727416760403,
"learning_rate": 1.3696440062778363e-05,
"loss": 0.5295,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5617307424545288,
"step": 1245,
"valid_targets_mean": 2341.9,
"valid_targets_min": 417
},
{
"epoch": 1.9349845201238391,
"grad_norm": 0.9839086679727692,
"learning_rate": 1.3525741991233576e-05,
"loss": 0.3357,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.31935644149780273,
"step": 1250,
"valid_targets_mean": 4556.8,
"valid_targets_min": 1546
},
{
"epoch": 1.9427244582043344,
"grad_norm": 0.5367112686535409,
"learning_rate": 1.3355569130744717e-05,
"loss": 0.2842,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2978261113166809,
"step": 1255,
"valid_targets_mean": 4626.2,
"valid_targets_min": 925
},
{
"epoch": 1.9504643962848296,
"grad_norm": 0.4051027506035485,
"learning_rate": 1.3185935286240081e-05,
"loss": 0.2583,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25202229619026184,
"step": 1260,
"valid_targets_mean": 4938.1,
"valid_targets_min": 1451
},
{
"epoch": 1.958204334365325,
"grad_norm": 0.4350351782057793,
"learning_rate": 1.3016854218921432e-05,
"loss": 0.255,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26276537775993347,
"step": 1265,
"valid_targets_mean": 4161.5,
"valid_targets_min": 1249
},
{
"epoch": 1.9659442724458205,
"grad_norm": 0.49748555632911934,
"learning_rate": 1.2848339645147574e-05,
"loss": 0.2664,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2641948461532593,
"step": 1270,
"valid_targets_mean": 4068.4,
"valid_targets_min": 1239
},
{
"epoch": 1.973684210526316,
"grad_norm": 0.4256957837723888,
"learning_rate": 1.2680405235321684e-05,
"loss": 0.2618,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25480371713638306,
"step": 1275,
"valid_targets_mean": 4988.2,
"valid_targets_min": 1593
},
{
"epoch": 1.9814241486068112,
"grad_norm": 0.750944747452251,
"learning_rate": 1.2513064612782308e-05,
"loss": 0.2587,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24965959787368774,
"step": 1280,
"valid_targets_mean": 2636.5,
"valid_targets_min": 274
},
{
"epoch": 1.9891640866873064,
"grad_norm": 0.430852624660782,
"learning_rate": 1.2346331352698206e-05,
"loss": 0.2567,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2593318819999695,
"step": 1285,
"valid_targets_mean": 2712.8,
"valid_targets_min": 982
},
{
"epoch": 1.9969040247678018,
"grad_norm": 0.3450106479763493,
"learning_rate": 1.2180218980967091e-05,
"loss": 0.2409,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24146604537963867,
"step": 1290,
"valid_targets_mean": 3597.4,
"valid_targets_min": 1869
},
{
"epoch": 2.0046439628482973,
"grad_norm": 0.7137021285572581,
"learning_rate": 1.201474097311837e-05,
"loss": 0.2746,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2817814350128174,
"step": 1295,
"valid_targets_mean": 3714.5,
"valid_targets_min": 313
},
{
"epoch": 2.0123839009287927,
"grad_norm": 0.4074614051315701,
"learning_rate": 1.1849910753219956e-05,
"loss": 0.276,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2710038125514984,
"step": 1300,
"valid_targets_mean": 3769.2,
"valid_targets_min": 428
},
{
"epoch": 2.0201238390092877,
"grad_norm": 0.3552531242519296,
"learning_rate": 1.1685741692789284e-05,
"loss": 0.2673,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2690993547439575,
"step": 1305,
"valid_targets_mean": 3845.8,
"valid_targets_min": 444
},
{
"epoch": 2.027863777089783,
"grad_norm": 0.3087607772738728,
"learning_rate": 1.1522247109708564e-05,
"loss": 0.2664,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2663906216621399,
"step": 1310,
"valid_targets_mean": 3735.0,
"valid_targets_min": 259
},
{
"epoch": 2.0356037151702786,
"grad_norm": 0.2667932220570936,
"learning_rate": 1.1359440267144413e-05,
"loss": 0.2595,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25116756558418274,
"step": 1315,
"valid_targets_mean": 3725.4,
"valid_targets_min": 386
},
{
"epoch": 2.043343653250774,
"grad_norm": 0.23773274178550025,
"learning_rate": 1.119733437247187e-05,
"loss": 0.2588,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25612616539001465,
"step": 1320,
"valid_targets_mean": 3718.9,
"valid_targets_min": 463
},
{
"epoch": 2.0510835913312695,
"grad_norm": 0.22262330851695253,
"learning_rate": 1.103594257620301e-05,
"loss": 0.2554,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.256261944770813,
"step": 1325,
"valid_targets_mean": 3712.7,
"valid_targets_min": 434
},
{
"epoch": 2.0588235294117645,
"grad_norm": 0.21229515407480232,
"learning_rate": 1.0875277970920118e-05,
"loss": 0.254,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25342199206352234,
"step": 1330,
"valid_targets_mean": 3745.0,
"valid_targets_min": 433
},
{
"epoch": 2.06656346749226,
"grad_norm": 0.22333151724909178,
"learning_rate": 1.0715353590213597e-05,
"loss": 0.2501,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24361343681812286,
"step": 1335,
"valid_targets_mean": 3624.5,
"valid_targets_min": 397
},
{
"epoch": 2.0743034055727554,
"grad_norm": 0.2172013859447262,
"learning_rate": 1.0556182407624616e-05,
"loss": 0.2546,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2626274526119232,
"step": 1340,
"valid_targets_mean": 3754.9,
"valid_targets_min": 269
},
{
"epoch": 2.082043343653251,
"grad_norm": 0.20904050399109872,
"learning_rate": 1.0397777335592693e-05,
"loss": 0.2516,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25204020738601685,
"step": 1345,
"valid_targets_mean": 3859.0,
"valid_targets_min": 463
},
{
"epoch": 2.0897832817337463,
"grad_norm": 0.46665012190374583,
"learning_rate": 1.024015122440815e-05,
"loss": 0.2769,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2907443046569824,
"step": 1350,
"valid_targets_mean": 4864.0,
"valid_targets_min": 2389
},
{
"epoch": 2.0975232198142413,
"grad_norm": 0.30910992282947736,
"learning_rate": 1.0083316861169704e-05,
"loss": 0.2873,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2828359007835388,
"step": 1355,
"valid_targets_mean": 4532.4,
"valid_targets_min": 468
},
{
"epoch": 2.1052631578947367,
"grad_norm": 0.3522274973128065,
"learning_rate": 9.927286968747147e-06,
"loss": 0.4222,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2904053330421448,
"step": 1360,
"valid_targets_mean": 5055.2,
"valid_targets_min": 2995
},
{
"epoch": 2.113003095975232,
"grad_norm": 0.3236448171441497,
"learning_rate": 9.772074204749178e-06,
"loss": 0.2911,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.31897926330566406,
"step": 1365,
"valid_targets_mean": 2780.0,
"valid_targets_min": 448
},
{
"epoch": 2.1207430340557276,
"grad_norm": 0.2580480567815723,
"learning_rate": 9.617691160496616e-06,
"loss": 0.3777,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2766563296318054,
"step": 1370,
"valid_targets_mean": 4765.8,
"valid_targets_min": 2211
},
{
"epoch": 2.128482972136223,
"grad_norm": 0.2519211562935606,
"learning_rate": 9.464150360000946e-06,
"loss": 0.2836,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3104143738746643,
"step": 1375,
"valid_targets_mean": 3243.5,
"valid_targets_min": 412
},
{
"epoch": 2.136222910216718,
"grad_norm": 0.315089516820891,
"learning_rate": 9.311464258948354e-06,
"loss": 0.4343,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2775019109249115,
"step": 1380,
"valid_targets_mean": 4668.5,
"valid_targets_min": 2456
},
{
"epoch": 2.1439628482972135,
"grad_norm": 0.23754548575984485,
"learning_rate": 9.159645243689245e-06,
"loss": 0.2765,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2711362838745117,
"step": 1385,
"valid_targets_mean": 4707.3,
"valid_targets_min": 2483
},
{
"epoch": 2.151702786377709,
"grad_norm": 0.31059351838823335,
"learning_rate": 9.008705630233454e-06,
"loss": 0.4127,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2804102897644043,
"step": 1390,
"valid_targets_mean": 4803.9,
"valid_targets_min": 3123
},
{
"epoch": 2.1594427244582044,
"grad_norm": 0.24069835959534366,
"learning_rate": 8.85865766325113e-06,
"loss": 0.2782,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2760240435600281,
"step": 1395,
"valid_targets_mean": 4720.2,
"valid_targets_min": 2652
},
{
"epoch": 2.1671826625387,
"grad_norm": 0.28907425488238114,
"learning_rate": 8.709513515079398e-06,
"loss": 0.3943,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29097089171409607,
"step": 1400,
"valid_targets_mean": 3505.7,
"valid_targets_min": 429
},
{
"epoch": 2.174922600619195,
"grad_norm": 0.22017513910048814,
"learning_rate": 8.561285284734938e-06,
"loss": 0.2745,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2772256135940552,
"step": 1405,
"valid_targets_mean": 4646.0,
"valid_targets_min": 2402
},
{
"epoch": 2.1826625386996903,
"grad_norm": 0.5785356850860002,
"learning_rate": 8.413984996932431e-06,
"loss": 0.4228,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5103334188461304,
"step": 1410,
"valid_targets_mean": 973.6,
"valid_targets_min": 281
},
{
"epoch": 2.1904024767801857,
"grad_norm": 0.2184008661733165,
"learning_rate": 8.26762460110911e-06,
"loss": 0.2718,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26770395040512085,
"step": 1415,
"valid_targets_mean": 4744.8,
"valid_targets_min": 2671
},
{
"epoch": 2.198142414860681,
"grad_norm": 0.253779582176502,
"learning_rate": 8.122215970455371e-06,
"loss": 0.4197,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3024511933326721,
"step": 1420,
"valid_targets_mean": 3931.4,
"valid_targets_min": 474
},
{
"epoch": 2.2058823529411766,
"grad_norm": 0.20325660357569308,
"learning_rate": 7.977770900951592e-06,
"loss": 0.2788,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27750131487846375,
"step": 1425,
"valid_targets_mean": 5010.2,
"valid_targets_min": 2642
},
{
"epoch": 2.2136222910216716,
"grad_norm": 0.6308558213826081,
"learning_rate": 7.834301110411216e-06,
"loss": 0.4236,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5459995269775391,
"step": 1430,
"valid_targets_mean": 812.3,
"valid_targets_min": 129
},
{
"epoch": 2.221362229102167,
"grad_norm": 0.22091190358730112,
"learning_rate": 7.691818237530147e-06,
"loss": 0.3065,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3073071837425232,
"step": 1435,
"valid_targets_mean": 5991.4,
"valid_targets_min": 913
},
{
"epoch": 2.2291021671826625,
"grad_norm": 0.2400662413354753,
"learning_rate": 7.550333840942594e-06,
"loss": 0.3157,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3117232918739319,
"step": 1440,
"valid_targets_mean": 5973.2,
"valid_targets_min": 542
},
{
"epoch": 2.236842105263158,
"grad_norm": 0.24596319963688767,
"learning_rate": 7.409859398283406e-06,
"loss": 0.3097,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30473989248275757,
"step": 1445,
"valid_targets_mean": 5461.6,
"valid_targets_min": 360
},
{
"epoch": 2.2445820433436534,
"grad_norm": 0.21303500837963765,
"learning_rate": 7.270406305256954e-06,
"loss": 0.3191,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3208760619163513,
"step": 1450,
"valid_targets_mean": 5406.7,
"valid_targets_min": 310
},
{
"epoch": 2.2523219814241484,
"grad_norm": 0.22282067837975372,
"learning_rate": 7.131985874712717e-06,
"loss": 0.3019,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29536110162734985,
"step": 1455,
"valid_targets_mean": 5525.3,
"valid_targets_min": 987
},
{
"epoch": 2.260061919504644,
"grad_norm": 0.2088956316251687,
"learning_rate": 6.994609335727503e-06,
"loss": 0.3132,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3129228353500366,
"step": 1460,
"valid_targets_mean": 5494.2,
"valid_targets_min": 803
},
{
"epoch": 2.2678018575851393,
"grad_norm": 0.19600857595943563,
"learning_rate": 6.858287832694535e-06,
"loss": 0.3128,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3101985454559326,
"step": 1465,
"valid_targets_mean": 5719.7,
"valid_targets_min": 988
},
{
"epoch": 2.2755417956656347,
"grad_norm": 0.22277262871201828,
"learning_rate": 6.723032424419387e-06,
"loss": 0.3052,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.324319064617157,
"step": 1470,
"valid_targets_mean": 5339.7,
"valid_targets_min": 596
},
{
"epoch": 2.28328173374613,
"grad_norm": 0.21247652547555076,
"learning_rate": 6.588854083222851e-06,
"loss": 0.3068,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.32305365800857544,
"step": 1475,
"valid_targets_mean": 5534.7,
"valid_targets_min": 575
},
{
"epoch": 2.291021671826625,
"grad_norm": 0.19804812009122108,
"learning_rate": 6.4557636940508625e-06,
"loss": 0.3031,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3071126937866211,
"step": 1480,
"valid_targets_mean": 5558.2,
"valid_targets_min": 256
},
{
"epoch": 2.2987616099071206,
"grad_norm": 0.21231476361570778,
"learning_rate": 6.323772053591404e-06,
"loss": 0.3057,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30539828538894653,
"step": 1485,
"valid_targets_mean": 5328.4,
"valid_targets_min": 524
},
{
"epoch": 2.306501547987616,
"grad_norm": 0.21260199451840608,
"learning_rate": 6.192889869398748e-06,
"loss": 0.2997,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3049812912940979,
"step": 1490,
"valid_targets_mean": 5696.6,
"valid_targets_min": 614
},
{
"epoch": 2.3142414860681115,
"grad_norm": 0.23564604671250605,
"learning_rate": 6.063127759024745e-06,
"loss": 0.3085,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30037930607795715,
"step": 1495,
"valid_targets_mean": 5787.3,
"valid_targets_min": 496
},
{
"epoch": 2.321981424148607,
"grad_norm": 0.19791326643477766,
"learning_rate": 5.934496249157533e-06,
"loss": 0.2982,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3089606761932373,
"step": 1500,
"valid_targets_mean": 5574.5,
"valid_targets_min": 736
},
{
"epoch": 2.329721362229102,
"grad_norm": 0.20289524095960015,
"learning_rate": 5.807005774767598e-06,
"loss": 0.3077,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30494993925094604,
"step": 1505,
"valid_targets_mean": 5946.5,
"valid_targets_min": 670
},
{
"epoch": 2.3374613003095974,
"grad_norm": 0.18304855157538058,
"learning_rate": 5.68066667826119e-06,
"loss": 0.2999,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.302049845457077,
"step": 1510,
"valid_targets_mean": 6433.6,
"valid_targets_min": 353
},
{
"epoch": 2.345201238390093,
"grad_norm": 0.5493322314615379,
"learning_rate": 5.555489208641412e-06,
"loss": 0.349,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.509677529335022,
"step": 1515,
"valid_targets_mean": 1094.5,
"valid_targets_min": 286
},
{
"epoch": 2.3529411764705883,
"grad_norm": 0.1805140863690906,
"learning_rate": 5.431483520676704e-06,
"loss": 0.3602,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2936132848262787,
"step": 1520,
"valid_targets_mean": 6291.4,
"valid_targets_min": 338
},
{
"epoch": 2.3606811145510838,
"grad_norm": 0.42641728494351316,
"learning_rate": 5.308659674077128e-06,
"loss": 0.3197,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.42394477128982544,
"step": 1525,
"valid_targets_mean": 1675.7,
"valid_targets_min": 527
},
{
"epoch": 2.3684210526315788,
"grad_norm": 0.19803200180071323,
"learning_rate": 5.187027632678228e-06,
"loss": 0.3644,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29865461587905884,
"step": 1530,
"valid_targets_mean": 6662.4,
"valid_targets_min": 1694
},
{
"epoch": 2.376160990712074,
"grad_norm": 0.3570559506169205,
"learning_rate": 5.0665972636327775e-06,
"loss": 0.3813,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.40023019909858704,
"step": 1535,
"valid_targets_mean": 2726.5,
"valid_targets_min": 474
},
{
"epoch": 2.3839009287925697,
"grad_norm": 0.17326643727386593,
"learning_rate": 4.947378336610336e-06,
"loss": 0.3006,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2955681085586548,
"step": 1540,
"valid_targets_mean": 6219.8,
"valid_targets_min": 602
},
{
"epoch": 2.391640866873065,
"grad_norm": 0.5644311898078787,
"learning_rate": 4.829380523004657e-06,
"loss": 0.4264,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5778234601020813,
"step": 1545,
"valid_targets_mean": 844.2,
"valid_targets_min": 290
},
{
"epoch": 2.3993808049535605,
"grad_norm": 0.18134571299636928,
"learning_rate": 4.712613395149173e-06,
"loss": 0.302,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.31143641471862793,
"step": 1550,
"valid_targets_mean": 5912.4,
"valid_targets_min": 277
},
{
"epoch": 2.4071207430340555,
"grad_norm": 0.40570041188391864,
"learning_rate": 4.597086425540389e-06,
"loss": 0.3851,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4236239492893219,
"step": 1555,
"valid_targets_mean": 1662.7,
"valid_targets_min": 313
},
{
"epoch": 2.414860681114551,
"grad_norm": 0.1720233461064192,
"learning_rate": 4.482808986069531e-06,
"loss": 0.2983,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2866983711719513,
"step": 1560,
"valid_targets_mean": 6104.8,
"valid_targets_min": 435
},
{
"epoch": 2.4226006191950464,
"grad_norm": 0.25170741905375205,
"learning_rate": 4.369790347262197e-06,
"loss": 0.4231,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.32215455174446106,
"step": 1565,
"valid_targets_mean": 2831.9,
"valid_targets_min": 265
},
{
"epoch": 2.430340557275542,
"grad_norm": 0.17795832859887645,
"learning_rate": 4.258039677526344e-06,
"loss": 0.2967,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29880303144454956,
"step": 1570,
"valid_targets_mean": 6694.6,
"valid_targets_min": 250
},
{
"epoch": 2.4380804953560373,
"grad_norm": 0.2494150992717737,
"learning_rate": 4.147566042408502e-06,
"loss": 0.424,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3349652886390686,
"step": 1575,
"valid_targets_mean": 3286.7,
"valid_targets_min": 441
},
{
"epoch": 2.4458204334365323,
"grad_norm": 0.16923708951207625,
"learning_rate": 4.0383784038583585e-06,
"loss": 0.2942,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3039069175720215,
"step": 1580,
"valid_targets_mean": 6532.3,
"valid_targets_min": 694
},
{
"epoch": 2.4535603715170278,
"grad_norm": 0.8694258992837159,
"learning_rate": 3.930485619501747e-06,
"loss": 0.4719,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.530645489692688,
"step": 1585,
"valid_targets_mean": 1694.4,
"valid_targets_min": 257
},
{
"epoch": 2.461300309597523,
"grad_norm": 0.6861559520309628,
"learning_rate": 3.823896441922066e-06,
"loss": 0.5183,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5340226888656616,
"step": 1590,
"valid_targets_mean": 2654.1,
"valid_targets_min": 261
},
{
"epoch": 2.4690402476780187,
"grad_norm": 0.41883498638420535,
"learning_rate": 3.7186195179502636e-06,
"loss": 0.512,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.496014267206192,
"step": 1595,
"valid_targets_mean": 2251.2,
"valid_targets_min": 337
},
{
"epoch": 2.476780185758514,
"grad_norm": 0.28975030266636936,
"learning_rate": 3.614663387963371e-06,
"loss": 0.5135,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.512344241142273,
"step": 1600,
"valid_targets_mean": 3427.8,
"valid_targets_min": 298
},
{
"epoch": 2.484520123839009,
"grad_norm": 0.26418682453433606,
"learning_rate": 3.5120364851916832e-06,
"loss": 0.4949,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.49718883633613586,
"step": 1605,
"valid_targets_mean": 3676.3,
"valid_targets_min": 415
},
{
"epoch": 2.4922600619195046,
"grad_norm": 0.281710151730984,
"learning_rate": 3.410747135034642e-06,
"loss": 0.5037,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.48265427350997925,
"step": 1610,
"valid_targets_mean": 3749.4,
"valid_targets_min": 319
},
{
"epoch": 2.5,
"grad_norm": 0.2577956784865143,
"learning_rate": 3.310803554385438e-06,
"loss": 0.4872,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4795178174972534,
"step": 1615,
"valid_targets_mean": 3321.1,
"valid_targets_min": 266
},
{
"epoch": 2.5077399380804954,
"grad_norm": 0.35159723334083737,
"learning_rate": 3.2122138509644364e-06,
"loss": 0.4999,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.47505080699920654,
"step": 1620,
"valid_targets_mean": 3494.8,
"valid_targets_min": 260
},
{
"epoch": 2.515479876160991,
"grad_norm": 0.2512223834877677,
"learning_rate": 3.1149860226614613e-06,
"loss": 0.5031,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5007380843162537,
"step": 1625,
"valid_targets_mean": 3470.5,
"valid_targets_min": 217
},
{
"epoch": 2.523219814241486,
"grad_norm": 0.3141754515576957,
"learning_rate": 3.019127956886969e-06,
"loss": 0.4917,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5128790736198425,
"step": 1630,
"valid_targets_mean": 2027.0,
"valid_targets_min": 249
},
{
"epoch": 2.5309597523219813,
"grad_norm": 0.2635355892638946,
"learning_rate": 2.924647429932228e-06,
"loss": 0.4976,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.48266178369522095,
"step": 1635,
"valid_targets_mean": 2672.5,
"valid_targets_min": 243
},
{
"epoch": 2.538699690402477,
"grad_norm": 0.2539204593977298,
"learning_rate": 2.8315521063384467e-06,
"loss": 0.4887,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.49488547444343567,
"step": 1640,
"valid_targets_mean": 3140.0,
"valid_targets_min": 302
},
{
"epoch": 2.5464396284829722,
"grad_norm": 0.217726211895958,
"learning_rate": 2.739849538275019e-06,
"loss": 0.4956,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4866185188293457,
"step": 1645,
"valid_targets_mean": 3984.5,
"valid_targets_min": 258
},
{
"epoch": 2.5541795665634677,
"grad_norm": 0.2320001335465348,
"learning_rate": 2.6495471649268757e-06,
"loss": 0.4801,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.463797390460968,
"step": 1650,
"valid_targets_mean": 3525.9,
"valid_targets_min": 242
},
{
"epoch": 2.5619195046439627,
"grad_norm": 0.2266217849063618,
"learning_rate": 2.560652311890981e-06,
"loss": 0.4933,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.47365492582321167,
"step": 1655,
"valid_targets_mean": 4537.2,
"valid_targets_min": 394
},
{
"epoch": 2.569659442724458,
"grad_norm": 0.5535731219467451,
"learning_rate": 2.4731721905820916e-06,
"loss": 0.6353,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6892400979995728,
"step": 1660,
"valid_targets_mean": 3944.9,
"valid_targets_min": 626
},
{
"epoch": 2.5773993808049536,
"grad_norm": 0.41074838934685265,
"learning_rate": 2.3871138976476815e-06,
"loss": 0.6281,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.7635179162025452,
"step": 1665,
"valid_targets_mean": 3327.8,
"valid_targets_min": 423
},
{
"epoch": 2.585139318885449,
"grad_norm": 0.2695103381259381,
"learning_rate": 2.3024844143923164e-06,
"loss": 0.6276,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6791110038757324,
"step": 1670,
"valid_targets_mean": 4198.4,
"valid_targets_min": 570
},
{
"epoch": 2.5928792569659445,
"grad_norm": 0.29028639276044116,
"learning_rate": 2.219290606211253e-06,
"loss": 0.61,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6073037981987,
"step": 1675,
"valid_targets_mean": 3893.0,
"valid_targets_min": 885
},
{
"epoch": 2.6006191950464395,
"grad_norm": 0.26808292030451497,
"learning_rate": 2.137539222033527e-06,
"loss": 0.624,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6242520809173584,
"step": 1680,
"valid_targets_mean": 3704.1,
"valid_targets_min": 891
},
{
"epoch": 2.608359133126935,
"grad_norm": 0.25579873742778253,
"learning_rate": 2.05723689377445e-06,
"loss": 0.6184,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5944548845291138,
"step": 1685,
"valid_targets_mean": 3793.6,
"valid_targets_min": 1124
},
{
"epoch": 2.6160990712074303,
"grad_norm": 0.2680775147249504,
"learning_rate": 1.9783901357975987e-06,
"loss": 0.6108,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5884455442428589,
"step": 1690,
"valid_targets_mean": 3647.5,
"valid_targets_min": 775
},
{
"epoch": 2.623839009287926,
"grad_norm": 0.2648780300863295,
"learning_rate": 1.9010053443863796e-06,
"loss": 0.6077,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6278525590896606,
"step": 1695,
"valid_targets_mean": 3801.7,
"valid_targets_min": 835
},
{
"epoch": 2.6315789473684212,
"grad_norm": 0.23985387358779475,
"learning_rate": 1.8250887972251096e-06,
"loss": 0.6065,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5884929895401001,
"step": 1700,
"valid_targets_mean": 4259.5,
"valid_targets_min": 886
},
{
"epoch": 2.6393188854489162,
"grad_norm": 0.231419199707839,
"learning_rate": 1.7506466528897802e-06,
"loss": 0.6093,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6190042495727539,
"step": 1705,
"valid_targets_mean": 4118.3,
"valid_targets_min": 1310
},
{
"epoch": 2.6470588235294117,
"grad_norm": 0.22779000268320077,
"learning_rate": 1.677684950348435e-06,
"loss": 0.6067,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5958472490310669,
"step": 1710,
"valid_targets_mean": 4160.2,
"valid_targets_min": 795
},
{
"epoch": 2.654798761609907,
"grad_norm": 0.22402609358806513,
"learning_rate": 1.6062096084712786e-06,
"loss": 0.623,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5853190422058105,
"step": 1715,
"valid_targets_mean": 4790.0,
"valid_targets_min": 1104
},
{
"epoch": 2.6625386996904026,
"grad_norm": 0.21928463378918323,
"learning_rate": 1.53622642555052e-06,
"loss": 0.608,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.573377251625061,
"step": 1720,
"valid_targets_mean": 4677.6,
"valid_targets_min": 628
},
{
"epoch": 2.670278637770898,
"grad_norm": 0.22990181538010174,
"learning_rate": 1.4677410788299984e-06,
"loss": 0.6225,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5744858980178833,
"step": 1725,
"valid_targets_mean": 3947.1,
"valid_targets_min": 1088
},
{
"epoch": 2.678018575851393,
"grad_norm": 0.4076358392577911,
"learning_rate": 1.400759124044637e-06,
"loss": 0.6184,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6449670791625977,
"step": 1730,
"valid_targets_mean": 2494.4,
"valid_targets_min": 400
},
{
"epoch": 2.6857585139318885,
"grad_norm": 0.5280765848335666,
"learning_rate": 1.3352859949697127e-06,
"loss": 0.5258,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5440828800201416,
"step": 1735,
"valid_targets_mean": 3803.9,
"valid_targets_min": 341
},
{
"epoch": 2.693498452012384,
"grad_norm": 0.4788888153411866,
"learning_rate": 1.2713270029801028e-06,
"loss": 0.513,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5939984321594238,
"step": 1740,
"valid_targets_mean": 2460.7,
"valid_targets_min": 507
},
{
"epoch": 2.7012383900928794,
"grad_norm": 0.3173889002826398,
"learning_rate": 1.2088873366193687e-06,
"loss": 0.5251,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5380067229270935,
"step": 1745,
"valid_targets_mean": 3748.4,
"valid_targets_min": 754
},
{
"epoch": 2.708978328173375,
"grad_norm": 0.3705774365156222,
"learning_rate": 1.1479720611788525e-06,
"loss": 0.5139,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6330641508102417,
"step": 1750,
"valid_targets_mean": 2391.1,
"valid_targets_min": 521
},
{
"epoch": 2.71671826625387,
"grad_norm": 0.25829421667909225,
"learning_rate": 1.0885861182867985e-06,
"loss": 0.5036,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.514169454574585,
"step": 1755,
"valid_targets_mean": 3659.7,
"valid_targets_min": 806
},
{
"epoch": 2.7244582043343653,
"grad_norm": 0.31912077406959755,
"learning_rate": 1.0307343255074187e-06,
"loss": 0.505,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6068185567855835,
"step": 1760,
"valid_targets_mean": 3227.0,
"valid_targets_min": 450
},
{
"epoch": 2.7321981424148607,
"grad_norm": 0.25975497335621966,
"learning_rate": 9.744213759501275e-07,
"loss": 0.5117,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5231520533561707,
"step": 1765,
"valid_targets_mean": 3305.4,
"valid_targets_min": 621
},
{
"epoch": 2.739938080495356,
"grad_norm": 0.26497384483537273,
"learning_rate": 9.196518378887975e-07,
"loss": 0.505,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5685962438583374,
"step": 1770,
"valid_targets_mean": 3149.7,
"valid_targets_min": 462
},
{
"epoch": 2.7476780185758516,
"grad_norm": 0.2467240160425324,
"learning_rate": 8.66430154391169e-07,
"loss": 0.4962,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5243191123008728,
"step": 1775,
"valid_targets_mean": 3709.8,
"valid_targets_min": 263
},
{
"epoch": 2.7554179566563466,
"grad_norm": 0.3035163036989275,
"learning_rate": 8.147606429584232e-07,
"loss": 0.504,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5350388288497925,
"step": 1780,
"valid_targets_mean": 3436.5,
"valid_targets_min": 523
},
{
"epoch": 2.763157894736842,
"grad_norm": 0.21982119897667,
"learning_rate": 7.646474951749217e-07,
"loss": 0.5127,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.544601559638977,
"step": 1785,
"valid_targets_mean": 4309.8,
"valid_targets_min": 269
},
{
"epoch": 2.7708978328173375,
"grad_norm": 0.23378731204836112,
"learning_rate": 7.160947763681924e-07,
"loss": 0.5041,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5182209014892578,
"step": 1790,
"valid_targets_mean": 3643.2,
"valid_targets_min": 373
},
{
"epoch": 2.778637770897833,
"grad_norm": 0.23122266832381014,
"learning_rate": 6.691064252791158e-07,
"loss": 0.5044,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5215298533439636,
"step": 1795,
"valid_targets_mean": 3911.8,
"valid_targets_min": 1193
},
{
"epoch": 2.7863777089783284,
"grad_norm": 0.2351100926605,
"learning_rate": 6.236862537424194e-07,
"loss": 0.497,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5257998704910278,
"step": 1800,
"valid_targets_mean": 3986.0,
"valid_targets_min": 1074
},
{
"epoch": 2.7941176470588234,
"grad_norm": 0.2283488540535019,
"learning_rate": 5.798379463774373e-07,
"loss": 0.5165,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5136227607727051,
"step": 1805,
"valid_targets_mean": 4058.6,
"valid_targets_min": 964
},
{
"epoch": 2.801857585139319,
"grad_norm": 0.2288831862161014,
"learning_rate": 5.375650602892091e-07,
"loss": 0.5009,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4868626594543457,
"step": 1810,
"valid_targets_mean": 3589.4,
"valid_targets_min": 1066
},
{
"epoch": 2.8095975232198143,
"grad_norm": 0.23150890542263308,
"learning_rate": 4.96871024779928e-07,
"loss": 0.5075,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5141134262084961,
"step": 1815,
"valid_targets_mean": 3394.9,
"valid_targets_min": 311
},
{
"epoch": 2.8173374613003097,
"grad_norm": 0.23789582679013196,
"learning_rate": 4.5775914107072164e-07,
"loss": 0.5791,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5422489047050476,
"step": 1820,
"valid_targets_mean": 3667.1,
"valid_targets_min": 855
},
{
"epoch": 2.825077399380805,
"grad_norm": 0.2424143922969789,
"learning_rate": 4.202325820338682e-07,
"loss": 0.5051,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5248996615409851,
"step": 1825,
"valid_targets_mean": 3378.0,
"valid_targets_min": 549
},
{
"epoch": 2.8328173374613,
"grad_norm": 0.2641392304157764,
"learning_rate": 3.842943919353914e-07,
"loss": 0.5944,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5383692979812622,
"step": 1830,
"valid_targets_mean": 2999.8,
"valid_targets_min": 301
},
{
"epoch": 2.8405572755417956,
"grad_norm": 0.23456221052117204,
"learning_rate": 3.499474861881069e-07,
"loss": 0.5028,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5162132978439331,
"step": 1835,
"valid_targets_mean": 3465.6,
"valid_targets_min": 544
},
{
"epoch": 2.848297213622291,
"grad_norm": 0.3055562941134727,
"learning_rate": 3.1719465111511583e-07,
"loss": 0.5469,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6021124124526978,
"step": 1840,
"valid_targets_mean": 2587.6,
"valid_targets_min": 369
},
{
"epoch": 2.8560371517027865,
"grad_norm": 0.2502192285084054,
"learning_rate": 2.8603854372376117e-07,
"loss": 0.4991,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5194599032402039,
"step": 1845,
"valid_targets_mean": 3274.3,
"valid_targets_min": 346
},
{
"epoch": 2.863777089783282,
"grad_norm": 0.3862187176497177,
"learning_rate": 2.5648169149009583e-07,
"loss": 0.5405,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6468001008033752,
"step": 1850,
"valid_targets_mean": 2213.1,
"valid_targets_min": 520
},
{
"epoch": 2.871517027863777,
"grad_norm": 0.24642548117518157,
"learning_rate": 2.2852649215383548e-07,
"loss": 0.5021,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5029493570327759,
"step": 1855,
"valid_targets_mean": 3187.1,
"valid_targets_min": 454
},
{
"epoch": 2.8792569659442724,
"grad_norm": 0.6393738585673495,
"learning_rate": 2.021752135238564e-07,
"loss": 0.5939,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.9357088804244995,
"step": 1860,
"valid_targets_mean": 1165.0,
"valid_targets_min": 257
},
{
"epoch": 2.886996904024768,
"grad_norm": 0.24473998134972677,
"learning_rate": 1.774299932942136e-07,
"loss": 0.4976,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4848713278770447,
"step": 1865,
"valid_targets_mean": 3309.6,
"valid_targets_min": 624
},
{
"epoch": 2.8947368421052633,
"grad_norm": 0.811216041248401,
"learning_rate": 1.5429283887073543e-07,
"loss": 0.5679,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.9271347522735596,
"step": 1870,
"valid_targets_mean": 1005.5,
"valid_targets_min": 350
},
{
"epoch": 2.9024767801857587,
"grad_norm": 0.2348557107715619,
"learning_rate": 1.3276562720816677e-07,
"loss": 0.5134,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4838997721672058,
"step": 1875,
"valid_targets_mean": 3382.6,
"valid_targets_min": 693
},
{
"epoch": 2.9102167182662537,
"grad_norm": 0.39825490103557576,
"learning_rate": 1.1285010465791335e-07,
"loss": 0.5182,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6444357633590698,
"step": 1880,
"valid_targets_mean": 1660.4,
"valid_targets_min": 441
},
{
"epoch": 2.917956656346749,
"grad_norm": 0.23174085543587303,
"learning_rate": 9.454788682637051e-08,
"loss": 0.514,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.47774291038513184,
"step": 1885,
"valid_targets_mean": 3356.6,
"valid_targets_min": 296
},
{
"epoch": 2.9256965944272446,
"grad_norm": 0.36941506963780263,
"learning_rate": 7.786045844385248e-08,
"loss": 0.5142,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6007277965545654,
"step": 1890,
"valid_targets_mean": 2242.9,
"valid_targets_min": 530
},
{
"epoch": 2.93343653250774,
"grad_norm": 1.179204248521706,
"learning_rate": 6.27891732441599e-08,
"loss": 0.356,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3209402561187744,
"step": 1895,
"valid_targets_mean": 4536.9,
"valid_targets_min": 1128
},
{
"epoch": 2.9411764705882355,
"grad_norm": 1.2129510713833693,
"learning_rate": 4.933525385474758e-08,
"loss": 0.2926,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2939125597476959,
"step": 1900,
"valid_targets_mean": 4706.6,
"valid_targets_min": 957
},
{
"epoch": 2.9489164086687305,
"grad_norm": 1.1863291012650041,
"learning_rate": 3.749979169755502e-08,
"loss": 0.2913,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26739662885665894,
"step": 1905,
"valid_targets_mean": 5179.4,
"valid_targets_min": 1060
},
{
"epoch": 2.956656346749226,
"grad_norm": 1.197256521892218,
"learning_rate": 2.7283746900454987e-08,
"loss": 0.2818,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29562872648239136,
"step": 1910,
"valid_targets_mean": 4519.4,
"valid_targets_min": 1164
},
{
"epoch": 2.9643962848297214,
"grad_norm": 1.3498867410906616,
"learning_rate": 1.8687948219371367e-08,
"loss": 0.3047,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3049085736274719,
"step": 1915,
"valid_targets_mean": 4008.1,
"valid_targets_min": 1280
},
{
"epoch": 2.972136222910217,
"grad_norm": 1.40473659835793,
"learning_rate": 1.171309297104406e-08,
"loss": 0.3081,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3065996468067169,
"step": 1920,
"valid_targets_mean": 4820.1,
"valid_targets_min": 1337
},
{
"epoch": 2.9798761609907123,
"grad_norm": 1.3394815925622965,
"learning_rate": 6.3597469764675735e-09,
"loss": 0.287,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27871161699295044,
"step": 1925,
"valid_targets_mean": 2952.3,
"valid_targets_min": 187
},
{
"epoch": 2.9876160990712073,
"grad_norm": 1.3180578947124486,
"learning_rate": 2.6283445149810625e-09,
"loss": 0.2763,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28097784519195557,
"step": 1930,
"valid_targets_mean": 2867.5,
"valid_targets_min": 1216
},
{
"epoch": 2.9953560371517027,
"grad_norm": 1.2405589554919676,
"learning_rate": 5.191882890454025e-10,
"loss": 0.2741,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25938713550567627,
"step": 1935,
"valid_targets_mean": 3687.5,
"valid_targets_min": 1785
},
{
"epoch": 3.0,
"step": 1938,
"total_flos": 5686933939290112.0,
"train_loss": 0.0,
"train_runtime": 5.6192,
"train_samples_per_second": 44118.376,
"train_steps_per_second": 344.888
}
],
"logging_steps": 5,
"max_steps": 1938,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 300,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5686933939290112.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}