{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 1938, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.007739938080495356, "grad_norm": 27.524375797654567, "learning_rate": 8.247422680412372e-07, "loss": 0.8916, "loss_nan_ranks": 0, "loss_rank_avg": 0.8891476392745972, "step": 5, "valid_targets_mean": 3787.4, "valid_targets_min": 443 }, { "epoch": 0.015479876160990712, "grad_norm": 24.288144237324385, "learning_rate": 1.8556701030927837e-06, "loss": 0.8643, "loss_nan_ranks": 0, "loss_rank_avg": 0.8326998353004456, "step": 10, "valid_targets_mean": 3744.1, "valid_targets_min": 445 }, { "epoch": 0.02321981424148607, "grad_norm": 15.387565688754766, "learning_rate": 2.8865979381443297e-06, "loss": 0.7836, "loss_nan_ranks": 0, "loss_rank_avg": 0.7326061725616455, "step": 15, "valid_targets_mean": 3806.1, "valid_targets_min": 364 }, { "epoch": 0.030959752321981424, "grad_norm": 6.234776596005629, "learning_rate": 3.917525773195877e-06, "loss": 0.6587, "loss_nan_ranks": 0, "loss_rank_avg": 0.6209056377410889, "step": 20, "valid_targets_mean": 3883.8, "valid_targets_min": 423 }, { "epoch": 0.03869969040247678, "grad_norm": 2.5389947376944724, "learning_rate": 4.948453608247423e-06, "loss": 0.575, "loss_nan_ranks": 0, "loss_rank_avg": 0.5564368963241577, "step": 25, "valid_targets_mean": 3872.5, "valid_targets_min": 249 }, { "epoch": 0.04643962848297214, "grad_norm": 1.5288498389148235, "learning_rate": 5.979381443298969e-06, "loss": 0.5363, "loss_nan_ranks": 0, "loss_rank_avg": 0.529486358165741, "step": 30, "valid_targets_mean": 3780.5, "valid_targets_min": 372 }, { "epoch": 0.05417956656346749, "grad_norm": 1.3523989809328827, "learning_rate": 7.010309278350515e-06, "loss": 0.5143, "loss_nan_ranks": 0, "loss_rank_avg": 0.5047813057899475, "step": 35, "valid_targets_mean": 3663.5, "valid_targets_min": 460 }, { "epoch": 0.06191950464396285, "grad_norm": 0.9302783185308883, "learning_rate": 8.041237113402063e-06, "loss": 0.4863, "loss_nan_ranks": 0, "loss_rank_avg": 0.4762948751449585, "step": 40, "valid_targets_mean": 3721.1, "valid_targets_min": 448 }, { "epoch": 0.0696594427244582, "grad_norm": 0.6719299997500342, "learning_rate": 9.072164948453609e-06, "loss": 0.4628, "loss_nan_ranks": 0, "loss_rank_avg": 0.4557168483734131, "step": 45, "valid_targets_mean": 3723.2, "valid_targets_min": 391 }, { "epoch": 0.07739938080495357, "grad_norm": 0.521938817483369, "learning_rate": 1.0103092783505156e-05, "loss": 0.4424, "loss_nan_ranks": 0, "loss_rank_avg": 0.4353238344192505, "step": 50, "valid_targets_mean": 3877.2, "valid_targets_min": 462 }, { "epoch": 0.08513931888544891, "grad_norm": 0.4866090083622629, "learning_rate": 1.1134020618556703e-05, "loss": 0.4273, "loss_nan_ranks": 0, "loss_rank_avg": 0.4267203211784363, "step": 55, "valid_targets_mean": 3845.5, "valid_targets_min": 497 }, { "epoch": 0.09287925696594428, "grad_norm": 0.7414739260025202, "learning_rate": 1.2164948453608248e-05, "loss": 0.4995, "loss_nan_ranks": 0, "loss_rank_avg": 0.49669697880744934, "step": 60, "valid_targets_mean": 4920.5, "valid_targets_min": 2219 }, { "epoch": 0.10061919504643962, "grad_norm": 2.4546549920454144, "learning_rate": 1.3195876288659795e-05, "loss": 0.5912, "loss_nan_ranks": 0, "loss_rank_avg": 0.7627507448196411, "step": 65, "valid_targets_mean": 965.5, "valid_targets_min": 456 }, { "epoch": 0.10835913312693499, "grad_norm": 0.6107036683741611, "learning_rate": 1.4226804123711342e-05, "loss": 0.5298, "loss_nan_ranks": 0, "loss_rank_avg": 0.4461647868156433, "step": 70, "valid_targets_mean": 4698.2, "valid_targets_min": 3199 }, { "epoch": 0.11609907120743033, "grad_norm": 1.3370797632992233, "learning_rate": 1.5257731958762888e-05, "loss": 0.5807, "loss_nan_ranks": 0, "loss_rank_avg": 0.7594808340072632, "step": 75, "valid_targets_mean": 877.1, "valid_targets_min": 427 }, { "epoch": 0.1238390092879257, "grad_norm": 0.40163470209773594, "learning_rate": 1.6288659793814433e-05, "loss": 0.4163, "loss_nan_ranks": 0, "loss_rank_avg": 0.40786969661712646, "step": 80, "valid_targets_mean": 4635.7, "valid_targets_min": 3134 }, { "epoch": 0.13157894736842105, "grad_norm": 1.1010196281594, "learning_rate": 1.731958762886598e-05, "loss": 0.5356, "loss_nan_ranks": 0, "loss_rank_avg": 0.7283442616462708, "step": 85, "valid_targets_mean": 855.6, "valid_targets_min": 374 }, { "epoch": 0.1393188854489164, "grad_norm": 0.34863970907424235, "learning_rate": 1.8350515463917527e-05, "loss": 0.4733, "loss_nan_ranks": 0, "loss_rank_avg": 0.396062433719635, "step": 90, "valid_targets_mean": 4777.3, "valid_targets_min": 2671 }, { "epoch": 0.14705882352941177, "grad_norm": 1.0895786453008987, "learning_rate": 1.9381443298969072e-05, "loss": 0.4539, "loss_nan_ranks": 0, "loss_rank_avg": 0.7016916871070862, "step": 95, "valid_targets_mean": 898.8, "valid_targets_min": 299 }, { "epoch": 0.15479876160990713, "grad_norm": 0.35015283959800747, "learning_rate": 2.0412371134020618e-05, "loss": 0.4793, "loss_nan_ranks": 0, "loss_rank_avg": 0.35827910900115967, "step": 100, "valid_targets_mean": 4618.4, "valid_targets_min": 2204 }, { "epoch": 0.16253869969040247, "grad_norm": 0.39223389296050876, "learning_rate": 2.1443298969072166e-05, "loss": 0.3739, "loss_nan_ranks": 0, "loss_rank_avg": 0.3823808431625366, "step": 105, "valid_targets_mean": 3491.6, "valid_targets_min": 313 }, { "epoch": 0.17027863777089783, "grad_norm": 0.3959500777293298, "learning_rate": 2.2474226804123712e-05, "loss": 0.5074, "loss_nan_ranks": 0, "loss_rank_avg": 0.34726065397262573, "step": 110, "valid_targets_mean": 4612.6, "valid_targets_min": 3000 }, { "epoch": 0.1780185758513932, "grad_norm": 0.34302377655422733, "learning_rate": 2.350515463917526e-05, "loss": 0.3577, "loss_nan_ranks": 0, "loss_rank_avg": 0.3575766682624817, "step": 115, "valid_targets_mean": 3892.6, "valid_targets_min": 238 }, { "epoch": 0.18575851393188855, "grad_norm": 0.6569398422255293, "learning_rate": 2.453608247422681e-05, "loss": 0.5422, "loss_nan_ranks": 0, "loss_rank_avg": 0.3516051173210144, "step": 120, "valid_targets_mean": 4844.6, "valid_targets_min": 2731 }, { "epoch": 0.19349845201238391, "grad_norm": 1.9719516136254733, "learning_rate": 2.556701030927835e-05, "loss": 0.4012, "loss_nan_ranks": 0, "loss_rank_avg": 0.621773362159729, "step": 125, "valid_targets_mean": 984.1, "valid_targets_min": 428 }, { "epoch": 0.20123839009287925, "grad_norm": 0.5040116006755825, "learning_rate": 2.6597938144329897e-05, "loss": 0.4853, "loss_nan_ranks": 0, "loss_rank_avg": 0.3492981493473053, "step": 130, "valid_targets_mean": 4991.7, "valid_targets_min": 3133 }, { "epoch": 0.2089783281733746, "grad_norm": 0.36060008121628434, "learning_rate": 2.7628865979381445e-05, "loss": 0.3517, "loss_nan_ranks": 0, "loss_rank_avg": 0.3732391595840454, "step": 135, "valid_targets_mean": 3466.8, "valid_targets_min": 300 }, { "epoch": 0.21671826625386997, "grad_norm": 0.6220433976621038, "learning_rate": 2.865979381443299e-05, "loss": 0.5567, "loss_nan_ranks": 0, "loss_rank_avg": 0.42027992010116577, "step": 140, "valid_targets_mean": 5756.4, "valid_targets_min": 472 }, { "epoch": 0.22445820433436534, "grad_norm": 0.3538953150206175, "learning_rate": 2.969072164948454e-05, "loss": 0.4102, "loss_nan_ranks": 0, "loss_rank_avg": 0.39266014099121094, "step": 145, "valid_targets_mean": 5488.7, "valid_targets_min": 504 }, { "epoch": 0.23219814241486067, "grad_norm": 0.306525611774786, "learning_rate": 3.0721649484536085e-05, "loss": 0.4184, "loss_nan_ranks": 0, "loss_rank_avg": 0.4043034315109253, "step": 150, "valid_targets_mean": 5880.8, "valid_targets_min": 492 }, { "epoch": 0.23993808049535603, "grad_norm": 0.27265867164938057, "learning_rate": 3.175257731958763e-05, "loss": 0.3954, "loss_nan_ranks": 0, "loss_rank_avg": 0.3888610899448395, "step": 155, "valid_targets_mean": 5631.9, "valid_targets_min": 1034 }, { "epoch": 0.2476780185758514, "grad_norm": 0.26867129331556494, "learning_rate": 3.2783505154639176e-05, "loss": 0.4007, "loss_nan_ranks": 0, "loss_rank_avg": 0.3766745924949646, "step": 160, "valid_targets_mean": 5617.9, "valid_targets_min": 391 }, { "epoch": 0.25541795665634676, "grad_norm": 0.23778752661886557, "learning_rate": 3.3814432989690724e-05, "loss": 0.3873, "loss_nan_ranks": 0, "loss_rank_avg": 0.3770267367362976, "step": 165, "valid_targets_mean": 6436.5, "valid_targets_min": 560 }, { "epoch": 0.2631578947368421, "grad_norm": 0.2718071722062838, "learning_rate": 3.484536082474227e-05, "loss": 0.3853, "loss_nan_ranks": 0, "loss_rank_avg": 0.3977334499359131, "step": 170, "valid_targets_mean": 5309.5, "valid_targets_min": 482 }, { "epoch": 0.2708978328173375, "grad_norm": 0.40911648384963295, "learning_rate": 3.587628865979382e-05, "loss": 0.3747, "loss_nan_ranks": 0, "loss_rank_avg": 0.3614729642868042, "step": 175, "valid_targets_mean": 5691.1, "valid_targets_min": 844 }, { "epoch": 0.2786377708978328, "grad_norm": 0.22354807753985279, "learning_rate": 3.6907216494845364e-05, "loss": 0.3698, "loss_nan_ranks": 0, "loss_rank_avg": 0.36764293909072876, "step": 180, "valid_targets_mean": 5925.6, "valid_targets_min": 500 }, { "epoch": 0.28637770897832815, "grad_norm": 0.21693850181727353, "learning_rate": 3.7938144329896906e-05, "loss": 0.3676, "loss_nan_ranks": 0, "loss_rank_avg": 0.3595467209815979, "step": 185, "valid_targets_mean": 5658.9, "valid_targets_min": 491 }, { "epoch": 0.29411764705882354, "grad_norm": 0.23965212827073076, "learning_rate": 3.8969072164948455e-05, "loss": 0.3632, "loss_nan_ranks": 0, "loss_rank_avg": 0.38485220074653625, "step": 190, "valid_targets_mean": 5660.4, "valid_targets_min": 422 }, { "epoch": 0.3018575851393189, "grad_norm": 0.2149011814927512, "learning_rate": 4e-05, "loss": 0.3527, "loss_nan_ranks": 0, "loss_rank_avg": 0.33452799916267395, "step": 195, "valid_targets_mean": 6256.5, "valid_targets_min": 590 }, { "epoch": 0.30959752321981426, "grad_norm": 0.23156965525376894, "learning_rate": 3.999918877027267e-05, "loss": 0.358, "loss_nan_ranks": 0, "loss_rank_avg": 0.3639640212059021, "step": 200, "valid_targets_mean": 5626.4, "valid_targets_min": 524 }, { "epoch": 0.3173374613003096, "grad_norm": 0.27544480267453636, "learning_rate": 3.999675514690003e-05, "loss": 0.3512, "loss_nan_ranks": 0, "loss_rank_avg": 0.3505948483943939, "step": 205, "valid_targets_mean": 5776.4, "valid_targets_min": 921 }, { "epoch": 0.32507739938080493, "grad_norm": 0.2252464636729443, "learning_rate": 3.9992699327304845e-05, "loss": 0.3502, "loss_nan_ranks": 0, "loss_rank_avg": 0.348880410194397, "step": 210, "valid_targets_mean": 5494.7, "valid_targets_min": 1092 }, { "epoch": 0.3328173374613003, "grad_norm": 0.23153168043305264, "learning_rate": 3.998702164050726e-05, "loss": 0.3478, "loss_nan_ranks": 0, "loss_rank_avg": 0.3480554521083832, "step": 215, "valid_targets_mean": 6014.0, "valid_targets_min": 234 }, { "epoch": 0.34055727554179566, "grad_norm": 0.22019666186278802, "learning_rate": 3.997972254709811e-05, "loss": 0.3623, "loss_nan_ranks": 0, "loss_rank_avg": 0.3661324381828308, "step": 220, "valid_targets_mean": 6489.6, "valid_targets_min": 487 }, { "epoch": 0.34829721362229105, "grad_norm": 0.3505889989673578, "learning_rate": 3.997080263920155e-05, "loss": 0.4905, "loss_nan_ranks": 0, "loss_rank_avg": 0.3960522413253784, "step": 225, "valid_targets_mean": 3702.6, "valid_targets_min": 466 }, { "epoch": 0.3560371517027864, "grad_norm": 0.23666618818592255, "learning_rate": 3.9960262640427016e-05, "loss": 0.3451, "loss_nan_ranks": 0, "loss_rank_avg": 0.34150058031082153, "step": 230, "valid_targets_mean": 5997.3, "valid_targets_min": 240 }, { "epoch": 0.3637770897832817, "grad_norm": 0.388486054978801, "learning_rate": 3.994810340581056e-05, "loss": 0.4567, "loss_nan_ranks": 0, "loss_rank_avg": 0.3778737783432007, "step": 235, "valid_targets_mean": 4160.1, "valid_targets_min": 473 }, { "epoch": 0.3715170278637771, "grad_norm": 0.29026315145068443, "learning_rate": 3.993432592174541e-05, "loss": 0.3426, "loss_nan_ranks": 0, "loss_rank_avg": 0.3361317813396454, "step": 240, "valid_targets_mean": 6533.9, "valid_targets_min": 1876 }, { "epoch": 0.37925696594427244, "grad_norm": 0.24143050901851154, "learning_rate": 3.991893130590206e-05, "loss": 0.4457, "loss_nan_ranks": 0, "loss_rank_avg": 0.33998745679855347, "step": 245, "valid_targets_mean": 6688.6, "valid_targets_min": 298 }, { "epoch": 0.38699690402476783, "grad_norm": 0.24364975307971998, "learning_rate": 3.990192080713749e-05, "loss": 0.346, "loss_nan_ranks": 0, "loss_rank_avg": 0.3610474467277527, "step": 250, "valid_targets_mean": 6724.0, "valid_targets_min": 549 }, { "epoch": 0.39473684210526316, "grad_norm": 0.35938926126684523, "learning_rate": 3.988329580539395e-05, "loss": 0.4906, "loss_nan_ranks": 0, "loss_rank_avg": 0.3444874882698059, "step": 255, "valid_targets_mean": 6117.0, "valid_targets_min": 219 }, { "epoch": 0.4024767801857585, "grad_norm": 0.2276644524642497, "learning_rate": 3.9863057811586926e-05, "loss": 0.3347, "loss_nan_ranks": 0, "loss_rank_avg": 0.3267607092857361, "step": 260, "valid_targets_mean": 6469.6, "valid_targets_min": 209 }, { "epoch": 0.4102167182662539, "grad_norm": 0.28676870840884927, "learning_rate": 3.984120846748264e-05, "loss": 0.4497, "loss_nan_ranks": 0, "loss_rank_avg": 0.3532854914665222, "step": 265, "valid_targets_mean": 6510.8, "valid_targets_min": 206 }, { "epoch": 0.4179566563467492, "grad_norm": 0.2453432922502153, "learning_rate": 3.9817749545564866e-05, "loss": 0.3306, "loss_nan_ranks": 0, "loss_rank_avg": 0.3410989046096802, "step": 270, "valid_targets_mean": 5183.3, "valid_targets_min": 399 }, { "epoch": 0.42569659442724456, "grad_norm": 0.31219155957134104, "learning_rate": 3.979268294889105e-05, "loss": 0.4835, "loss_nan_ranks": 0, "loss_rank_avg": 0.3323940634727478, "step": 275, "valid_targets_mean": 6249.5, "valid_targets_min": 284 }, { "epoch": 0.43343653250773995, "grad_norm": 0.2294409069053472, "learning_rate": 3.9766010710938055e-05, "loss": 0.3305, "loss_nan_ranks": 0, "loss_rank_avg": 0.3406042456626892, "step": 280, "valid_targets_mean": 5410.6, "valid_targets_min": 265 }, { "epoch": 0.4411764705882353, "grad_norm": 0.30083907414157973, "learning_rate": 3.97377349954371e-05, "loss": 0.4778, "loss_nan_ranks": 0, "loss_rank_avg": 0.32756131887435913, "step": 285, "valid_targets_mean": 6375.5, "valid_targets_min": 401 }, { "epoch": 0.44891640866873067, "grad_norm": 0.35727630671749366, "learning_rate": 3.970785809619829e-05, "loss": 0.3369, "loss_nan_ranks": 0, "loss_rank_avg": 0.37338879704475403, "step": 290, "valid_targets_mean": 3327.1, "valid_targets_min": 428 }, { "epoch": 0.456656346749226, "grad_norm": 1.1550687449863875, "learning_rate": 3.96763824369245e-05, "loss": 0.6867, "loss_nan_ranks": 0, "loss_rank_avg": 0.6693388223648071, "step": 295, "valid_targets_mean": 3046.5, "valid_targets_min": 249 }, { "epoch": 0.46439628482972134, "grad_norm": 0.5924681699568595, "learning_rate": 3.964331057101479e-05, "loss": 0.6378, "loss_nan_ranks": 0, "loss_rank_avg": 0.614995002746582, "step": 300, "valid_targets_mean": 3748.6, "valid_targets_min": 339 }, { "epoch": 0.47213622291021673, "grad_norm": 0.4378860108802947, "learning_rate": 3.9608645181357223e-05, "loss": 0.6239, "loss_nan_ranks": 0, "loss_rank_avg": 0.5896746516227722, "step": 305, "valid_targets_mean": 3423.9, "valid_targets_min": 455 }, { "epoch": 0.47987616099071206, "grad_norm": 0.33365115943254425, "learning_rate": 3.9572389080111276e-05, "loss": 0.6103, "loss_nan_ranks": 0, "loss_rank_avg": 0.5521975159645081, "step": 310, "valid_targets_mean": 4002.1, "valid_targets_min": 292 }, { "epoch": 0.48761609907120745, "grad_norm": 0.3969354765077491, "learning_rate": 3.9534545208479645e-05, "loss": 0.6133, "loss_nan_ranks": 0, "loss_rank_avg": 0.6419351696968079, "step": 315, "valid_targets_mean": 3102.1, "valid_targets_min": 317 }, { "epoch": 0.4953560371517028, "grad_norm": 0.39504133651165185, "learning_rate": 3.949511663646971e-05, "loss": 0.5843, "loss_nan_ranks": 0, "loss_rank_avg": 0.5931770205497742, "step": 320, "valid_targets_mean": 2004.2, "valid_targets_min": 192 }, { "epoch": 0.5030959752321982, "grad_norm": 0.3599916096569783, "learning_rate": 3.945410656264443e-05, "loss": 0.6022, "loss_nan_ranks": 0, "loss_rank_avg": 0.6142541170120239, "step": 325, "valid_targets_mean": 2664.8, "valid_targets_min": 247 }, { "epoch": 0.5108359133126935, "grad_norm": 0.3956398459205433, "learning_rate": 3.941151831386291e-05, "loss": 0.5839, "loss_nan_ranks": 0, "loss_rank_avg": 0.6046449542045593, "step": 330, "valid_targets_mean": 3044.0, "valid_targets_min": 298 }, { "epoch": 0.5185758513931888, "grad_norm": 0.28683489229087217, "learning_rate": 3.936735534501051e-05, "loss": 0.5715, "loss_nan_ranks": 0, "loss_rank_avg": 0.5844941139221191, "step": 335, "valid_targets_mean": 3938.2, "valid_targets_min": 941 }, { "epoch": 0.5263157894736842, "grad_norm": 0.3050152661525686, "learning_rate": 3.932162123871853e-05, "loss": 0.5821, "loss_nan_ranks": 0, "loss_rank_avg": 0.5821244120597839, "step": 340, "valid_targets_mean": 3717.0, "valid_targets_min": 403 }, { "epoch": 0.5340557275541795, "grad_norm": 0.25159114526764875, "learning_rate": 3.927431970507362e-05, "loss": 0.5573, "loss_nan_ranks": 0, "loss_rank_avg": 0.5323772430419922, "step": 345, "valid_targets_mean": 4406.2, "valid_targets_min": 387 }, { "epoch": 0.541795665634675, "grad_norm": 0.2887704499367723, "learning_rate": 3.922545458131683e-05, "loss": 0.583, "loss_nan_ranks": 0, "loss_rank_avg": 0.6194786429405212, "step": 350, "valid_targets_mean": 3612.3, "valid_targets_min": 371 }, { "epoch": 0.5495356037151703, "grad_norm": 0.3280016267710637, "learning_rate": 3.917502983153225e-05, "loss": 0.5538, "loss_nan_ranks": 0, "loss_rank_avg": 0.5695074796676636, "step": 355, "valid_targets_mean": 2256.9, "valid_targets_min": 283 }, { "epoch": 0.5572755417956656, "grad_norm": 0.3341165803200255, "learning_rate": 3.9123049546325475e-05, "loss": 0.5677, "loss_nan_ranks": 0, "loss_rank_avg": 0.581693708896637, "step": 360, "valid_targets_mean": 2972.2, "valid_targets_min": 318 }, { "epoch": 0.565015479876161, "grad_norm": 0.467347373374131, "learning_rate": 3.9069517942491774e-05, "loss": 0.6032, "loss_nan_ranks": 0, "loss_rank_avg": 0.671524703502655, "step": 365, "valid_targets_mean": 3760.0, "valid_targets_min": 1056 }, { "epoch": 0.5727554179566563, "grad_norm": 0.3513800021014567, "learning_rate": 3.901443936267398e-05, "loss": 0.6833, "loss_nan_ranks": 0, "loss_rank_avg": 0.6314958333969116, "step": 370, "valid_targets_mean": 4103.2, "valid_targets_min": 797 }, { "epoch": 0.5804953560371517, "grad_norm": 0.38650302362396954, "learning_rate": 3.895781827501022e-05, "loss": 0.7054, "loss_nan_ranks": 0, "loss_rank_avg": 0.7055186033248901, "step": 375, "valid_targets_mean": 3301.9, "valid_targets_min": 791 }, { "epoch": 0.5882352941176471, "grad_norm": 0.2946682880232837, "learning_rate": 3.8899659272771454e-05, "loss": 0.684, "loss_nan_ranks": 0, "loss_rank_avg": 0.6608676314353943, "step": 380, "valid_targets_mean": 4432.9, "valid_targets_min": 877 }, { "epoch": 0.5959752321981424, "grad_norm": 0.29230040304138466, "learning_rate": 3.883996707398883e-05, "loss": 0.6764, "loss_nan_ranks": 0, "loss_rank_avg": 0.6655077338218689, "step": 385, "valid_targets_mean": 4270.7, "valid_targets_min": 907 }, { "epoch": 0.6037151702786377, "grad_norm": 0.27606014724941075, "learning_rate": 3.877874652107096e-05, "loss": 0.6741, "loss_nan_ranks": 0, "loss_rank_avg": 0.6826651096343994, "step": 390, "valid_targets_mean": 4288.7, "valid_targets_min": 758 }, { "epoch": 0.6114551083591331, "grad_norm": 0.2968942052399876, "learning_rate": 3.8716002580411084e-05, "loss": 0.6787, "loss_nan_ranks": 0, "loss_rank_avg": 0.6491446495056152, "step": 395, "valid_targets_mean": 4493.8, "valid_targets_min": 1317 }, { "epoch": 0.6191950464396285, "grad_norm": 0.24106023474079094, "learning_rate": 3.86517403419842e-05, "loss": 0.6573, "loss_nan_ranks": 0, "loss_rank_avg": 0.6111230850219727, "step": 400, "valid_targets_mean": 4781.4, "valid_targets_min": 671 }, { "epoch": 0.6269349845201239, "grad_norm": 0.28063476540629106, "learning_rate": 3.858596501893413e-05, "loss": 0.6734, "loss_nan_ranks": 0, "loss_rank_avg": 0.6427568793296814, "step": 405, "valid_targets_mean": 4028.6, "valid_targets_min": 1025 }, { "epoch": 0.6346749226006192, "grad_norm": 0.2963922038322634, "learning_rate": 3.8518681947150584e-05, "loss": 0.6584, "loss_nan_ranks": 0, "loss_rank_avg": 0.6942857503890991, "step": 410, "valid_targets_mean": 3116.2, "valid_targets_min": 442 }, { "epoch": 0.6424148606811145, "grad_norm": 0.2800249645770306, "learning_rate": 3.844989658483639e-05, "loss": 0.6743, "loss_nan_ranks": 0, "loss_rank_avg": 0.6975204348564148, "step": 415, "valid_targets_mean": 4106.4, "valid_targets_min": 854 }, { "epoch": 0.6501547987616099, "grad_norm": 0.2796558531297951, "learning_rate": 3.83796145120646e-05, "loss": 0.6699, "loss_nan_ranks": 0, "loss_rank_avg": 0.6599092483520508, "step": 420, "valid_targets_mean": 3609.7, "valid_targets_min": 1029 }, { "epoch": 0.6578947368421053, "grad_norm": 0.2697840591750541, "learning_rate": 3.83078414303259e-05, "loss": 0.6755, "loss_nan_ranks": 0, "loss_rank_avg": 0.6651268005371094, "step": 425, "valid_targets_mean": 4175.3, "valid_targets_min": 1010 }, { "epoch": 0.6656346749226006, "grad_norm": 0.3266528302290397, "learning_rate": 3.823458316206603e-05, "loss": 0.6534, "loss_nan_ranks": 0, "loss_rank_avg": 0.6431168913841248, "step": 430, "valid_targets_mean": 3942.0, "valid_targets_min": 1104 }, { "epoch": 0.673374613003096, "grad_norm": 0.2887225491658185, "learning_rate": 3.81598456502135e-05, "loss": 0.6716, "loss_nan_ranks": 0, "loss_rank_avg": 0.6703712940216064, "step": 435, "valid_targets_mean": 3787.9, "valid_targets_min": 776 }, { "epoch": 0.6811145510835913, "grad_norm": 0.45784545814125294, "learning_rate": 3.8083634957697445e-05, "loss": 0.6219, "loss_nan_ranks": 0, "loss_rank_avg": 0.5753607153892517, "step": 440, "valid_targets_mean": 3564.7, "valid_targets_min": 900 }, { "epoch": 0.6888544891640866, "grad_norm": 0.30242995797318084, "learning_rate": 3.800595726695578e-05, "loss": 0.5599, "loss_nan_ranks": 0, "loss_rank_avg": 0.5282618999481201, "step": 445, "valid_targets_mean": 4363.4, "valid_targets_min": 750 }, { "epoch": 0.6965944272445821, "grad_norm": 0.43946569956807485, "learning_rate": 3.792681887943372e-05, "loss": 0.5555, "loss_nan_ranks": 0, "loss_rank_avg": 0.530036449432373, "step": 450, "valid_targets_mean": 3516.3, "valid_targets_min": 767 }, { "epoch": 0.7043343653250774, "grad_norm": 0.27208065178106133, "learning_rate": 3.7846226215072495e-05, "loss": 0.5548, "loss_nan_ranks": 0, "loss_rank_avg": 0.528791069984436, "step": 455, "valid_targets_mean": 4421.9, "valid_targets_min": 880 }, { "epoch": 0.7120743034055728, "grad_norm": 0.31540867569770586, "learning_rate": 3.776418581178863e-05, "loss": 0.5628, "loss_nan_ranks": 0, "loss_rank_avg": 0.5669809579849243, "step": 460, "valid_targets_mean": 3265.3, "valid_targets_min": 906 }, { "epoch": 0.7198142414860681, "grad_norm": 0.26388751482498035, "learning_rate": 3.768070432494353e-05, "loss": 0.5334, "loss_nan_ranks": 0, "loss_rank_avg": 0.49619990587234497, "step": 465, "valid_targets_mean": 4266.4, "valid_targets_min": 1047 }, { "epoch": 0.7275541795665634, "grad_norm": 0.30165161522894157, "learning_rate": 3.759578852680355e-05, "loss": 0.5617, "loss_nan_ranks": 0, "loss_rank_avg": 0.5646347403526306, "step": 470, "valid_targets_mean": 3498.2, "valid_targets_min": 1036 }, { "epoch": 0.7352941176470589, "grad_norm": 0.26399437889537725, "learning_rate": 3.750944530599069e-05, "loss": 0.5394, "loss_nan_ranks": 0, "loss_rank_avg": 0.5367434620857239, "step": 475, "valid_targets_mean": 4387.9, "valid_targets_min": 635 }, { "epoch": 0.7430340557275542, "grad_norm": 0.2711483804191905, "learning_rate": 3.74216816669237e-05, "loss": 0.5464, "loss_nan_ranks": 0, "loss_rank_avg": 0.5406283736228943, "step": 480, "valid_targets_mean": 3467.2, "valid_targets_min": 695 }, { "epoch": 0.7507739938080495, "grad_norm": 0.2476637519734938, "learning_rate": 3.7332504729249865e-05, "loss": 0.5308, "loss_nan_ranks": 0, "loss_rank_avg": 0.5285801887512207, "step": 485, "valid_targets_mean": 4467.1, "valid_targets_min": 613 }, { "epoch": 0.7585139318885449, "grad_norm": 0.2818961864587132, "learning_rate": 3.724192172726747e-05, "loss": 0.5523, "loss_nan_ranks": 0, "loss_rank_avg": 0.5752028226852417, "step": 490, "valid_targets_mean": 3536.6, "valid_targets_min": 834 }, { "epoch": 0.7662538699690402, "grad_norm": 0.25597440316250303, "learning_rate": 3.714994000933893e-05, "loss": 0.5405, "loss_nan_ranks": 0, "loss_rank_avg": 0.5525496602058411, "step": 495, "valid_targets_mean": 4088.9, "valid_targets_min": 837 }, { "epoch": 0.7739938080495357, "grad_norm": 0.28994428627605706, "learning_rate": 3.705656703729463e-05, "loss": 0.5456, "loss_nan_ranks": 0, "loss_rank_avg": 0.5487348437309265, "step": 500, "valid_targets_mean": 3560.9, "valid_targets_min": 1032 }, { "epoch": 0.781733746130031, "grad_norm": 0.24479348444572127, "learning_rate": 3.6961810385827624e-05, "loss": 0.5361, "loss_nan_ranks": 0, "loss_rank_avg": 0.5247283577919006, "step": 505, "valid_targets_mean": 4477.0, "valid_targets_min": 502 }, { "epoch": 0.7894736842105263, "grad_norm": 0.31803943373773413, "learning_rate": 3.6865677741879186e-05, "loss": 0.5387, "loss_nan_ranks": 0, "loss_rank_avg": 0.5490365028381348, "step": 510, "valid_targets_mean": 3193.1, "valid_targets_min": 883 }, { "epoch": 0.7972136222910217, "grad_norm": 0.24052703407469328, "learning_rate": 3.676817690401516e-05, "loss": 0.546, "loss_nan_ranks": 0, "loss_rank_avg": 0.5295630693435669, "step": 515, "valid_targets_mean": 4601.6, "valid_targets_min": 602 }, { "epoch": 0.804953560371517, "grad_norm": 0.3322812041628695, "learning_rate": 3.666931578179335e-05, "loss": 0.5393, "loss_nan_ranks": 0, "loss_rank_avg": 0.5549291372299194, "step": 520, "valid_targets_mean": 3430.9, "valid_targets_min": 1017 }, { "epoch": 0.8126934984520123, "grad_norm": 0.26830168196964743, "learning_rate": 3.656910239512189e-05, "loss": 0.5253, "loss_nan_ranks": 0, "loss_rank_avg": 0.5216836929321289, "step": 525, "valid_targets_mean": 3828.6, "valid_targets_min": 1071 }, { "epoch": 0.8204334365325078, "grad_norm": 0.29777403207690095, "learning_rate": 3.646754487360861e-05, "loss": 0.628, "loss_nan_ranks": 0, "loss_rank_avg": 0.5326442718505859, "step": 530, "valid_targets_mean": 3390.7, "valid_targets_min": 996 }, { "epoch": 0.8281733746130031, "grad_norm": 0.29573945098254656, "learning_rate": 3.636465145590156e-05, "loss": 0.5329, "loss_nan_ranks": 0, "loss_rank_avg": 0.5153015851974487, "step": 535, "valid_targets_mean": 3718.5, "valid_targets_min": 819 }, { "epoch": 0.8359133126934984, "grad_norm": 0.30897425354025815, "learning_rate": 3.626043048902066e-05, "loss": 0.6416, "loss_nan_ranks": 0, "loss_rank_avg": 0.521770715713501, "step": 540, "valid_targets_mean": 3193.4, "valid_targets_min": 558 }, { "epoch": 0.8436532507739938, "grad_norm": 0.26854975778232826, "learning_rate": 3.615489042768056e-05, "loss": 0.5259, "loss_nan_ranks": 0, "loss_rank_avg": 0.514534592628479, "step": 545, "valid_targets_mean": 3690.3, "valid_targets_min": 434 }, { "epoch": 0.8513931888544891, "grad_norm": 0.3337704457340356, "learning_rate": 3.604803983360478e-05, "loss": 0.5894, "loss_nan_ranks": 0, "loss_rank_avg": 0.5469678044319153, "step": 550, "valid_targets_mean": 3144.1, "valid_targets_min": 742 }, { "epoch": 0.8591331269349846, "grad_norm": 0.30925056109154825, "learning_rate": 3.593988737483115e-05, "loss": 0.5232, "loss_nan_ranks": 0, "loss_rank_avg": 0.5279290676116943, "step": 555, "valid_targets_mean": 3871.4, "valid_targets_min": 502 }, { "epoch": 0.8668730650154799, "grad_norm": 0.27741337408365946, "learning_rate": 3.583044182500865e-05, "loss": 0.5833, "loss_nan_ranks": 0, "loss_rank_avg": 0.5350396633148193, "step": 560, "valid_targets_mean": 3256.5, "valid_targets_min": 999 }, { "epoch": 0.8746130030959752, "grad_norm": 0.2682135862875639, "learning_rate": 3.5719712062685604e-05, "loss": 0.5252, "loss_nan_ranks": 0, "loss_rank_avg": 0.5352612733840942, "step": 565, "valid_targets_mean": 3665.0, "valid_targets_min": 640 }, { "epoch": 0.8823529411764706, "grad_norm": 0.32747585544444807, "learning_rate": 3.560770707058952e-05, "loss": 0.6347, "loss_nan_ranks": 0, "loss_rank_avg": 0.5486376881599426, "step": 570, "valid_targets_mean": 3457.3, "valid_targets_min": 1110 }, { "epoch": 0.8900928792569659, "grad_norm": 0.33314798459658457, "learning_rate": 3.549443593489832e-05, "loss": 0.5166, "loss_nan_ranks": 0, "loss_rank_avg": 0.5069097280502319, "step": 575, "valid_targets_mean": 3141.0, "valid_targets_min": 566 }, { "epoch": 0.8978328173374613, "grad_norm": 0.3232997965070111, "learning_rate": 3.537990784450324e-05, "loss": 0.6174, "loss_nan_ranks": 0, "loss_rank_avg": 0.5290309190750122, "step": 580, "valid_targets_mean": 3509.7, "valid_targets_min": 780 }, { "epoch": 0.9055727554179567, "grad_norm": 0.31911406686125754, "learning_rate": 3.526413209026346e-05, "loss": 0.5301, "loss_nan_ranks": 0, "loss_rank_avg": 0.51148521900177, "step": 585, "valid_targets_mean": 3106.4, "valid_targets_min": 794 }, { "epoch": 0.913312693498452, "grad_norm": 0.37107762634385655, "learning_rate": 3.514711806425231e-05, "loss": 0.5652, "loss_nan_ranks": 0, "loss_rank_avg": 0.5133883953094482, "step": 590, "valid_targets_mean": 3530.0, "valid_targets_min": 744 }, { "epoch": 0.9210526315789473, "grad_norm": 0.3457780498003014, "learning_rate": 3.502887525899544e-05, "loss": 0.5268, "loss_nan_ranks": 0, "loss_rank_avg": 0.540345311164856, "step": 595, "valid_targets_mean": 3219.6, "valid_targets_min": 435 }, { "epoch": 0.9287925696594427, "grad_norm": 1.7120255019264785, "learning_rate": 3.490941326670073e-05, "loss": 0.5271, "loss_nan_ranks": 0, "loss_rank_avg": 0.39785030484199524, "step": 600, "valid_targets_mean": 4948.2, "valid_targets_min": 1383 }, { "epoch": 0.9365325077399381, "grad_norm": 0.554237503586826, "learning_rate": 3.47887417784801e-05, "loss": 0.3514, "loss_nan_ranks": 0, "loss_rank_avg": 0.3161848187446594, "step": 605, "valid_targets_mean": 4927.9, "valid_targets_min": 1102 }, { "epoch": 0.9442724458204335, "grad_norm": 0.5340419975249076, "learning_rate": 3.466687058356341e-05, "loss": 0.3172, "loss_nan_ranks": 0, "loss_rank_avg": 0.3274293839931488, "step": 610, "valid_targets_mean": 4602.5, "valid_targets_min": 1297 }, { "epoch": 0.9520123839009288, "grad_norm": 0.38335606187666377, "learning_rate": 3.4543809568504286e-05, "loss": 0.2737, "loss_nan_ranks": 0, "loss_rank_avg": 0.2700973153114319, "step": 615, "valid_targets_mean": 4983.0, "valid_targets_min": 938 }, { "epoch": 0.9597523219814241, "grad_norm": 0.36164087059504424, "learning_rate": 3.44195687163781e-05, "loss": 0.2788, "loss_nan_ranks": 0, "loss_rank_avg": 0.2856438159942627, "step": 620, "valid_targets_mean": 4184.5, "valid_targets_min": 1158 }, { "epoch": 0.9674922600619195, "grad_norm": 0.33384191107630423, "learning_rate": 3.4294158105972095e-05, "loss": 0.2834, "loss_nan_ranks": 0, "loss_rank_avg": 0.27378416061401367, "step": 625, "valid_targets_mean": 4258.2, "valid_targets_min": 662 }, { "epoch": 0.9752321981424149, "grad_norm": 0.3499790559422126, "learning_rate": 3.416758791096782e-05, "loss": 0.2751, "loss_nan_ranks": 0, "loss_rank_avg": 0.2694745659828186, "step": 630, "valid_targets_mean": 4959.4, "valid_targets_min": 2049 }, { "epoch": 0.9829721362229102, "grad_norm": 0.5343904741585188, "learning_rate": 3.4039868399115736e-05, "loss": 0.2845, "loss_nan_ranks": 0, "loss_rank_avg": 0.2808476686477661, "step": 635, "valid_targets_mean": 3119.1, "valid_targets_min": 1116 }, { "epoch": 0.9907120743034056, "grad_norm": 0.551789004570227, "learning_rate": 3.391100993140233e-05, "loss": 0.2764, "loss_nan_ranks": 0, "loss_rank_avg": 0.2603238523006439, "step": 640, "valid_targets_mean": 2866.9, "valid_targets_min": 1189 }, { "epoch": 0.9984520123839009, "grad_norm": 0.47251144663554534, "learning_rate": 3.378102296120956e-05, "loss": 0.2624, "loss_nan_ranks": 0, "loss_rank_avg": 0.2715701758861542, "step": 645, "valid_targets_mean": 3248.7, "valid_targets_min": 1746 }, { "epoch": 1.0061919504643964, "grad_norm": 0.48139061749920886, "learning_rate": 3.364991803346687e-05, "loss": 0.3172, "loss_nan_ranks": 0, "loss_rank_avg": 0.3139479458332062, "step": 650, "valid_targets_mean": 3799.7, "valid_targets_min": 470 }, { "epoch": 1.0139318885448916, "grad_norm": 0.4002801120318358, "learning_rate": 3.351770578379573e-05, "loss": 0.3011, "loss_nan_ranks": 0, "loss_rank_avg": 0.3000005781650543, "step": 655, "valid_targets_mean": 3723.8, "valid_targets_min": 388 }, { "epoch": 1.021671826625387, "grad_norm": 0.32300020372074756, "learning_rate": 3.338439693764688e-05, "loss": 0.2914, "loss_nan_ranks": 0, "loss_rank_avg": 0.29472923278808594, "step": 660, "valid_targets_mean": 3488.4, "valid_targets_min": 338 }, { "epoch": 1.0294117647058822, "grad_norm": 0.2534602174594846, "learning_rate": 3.325000230943019e-05, "loss": 0.2858, "loss_nan_ranks": 0, "loss_rank_avg": 0.27545419335365295, "step": 665, "valid_targets_mean": 3832.2, "valid_targets_min": 506 }, { "epoch": 1.0371517027863777, "grad_norm": 0.24788434112652252, "learning_rate": 3.311453280163744e-05, "loss": 0.2799, "loss_nan_ranks": 0, "loss_rank_avg": 0.27271491289138794, "step": 670, "valid_targets_mean": 3689.5, "valid_targets_min": 397 }, { "epoch": 1.0448916408668731, "grad_norm": 0.2596056148039042, "learning_rate": 3.297799940395781e-05, "loss": 0.2765, "loss_nan_ranks": 0, "loss_rank_avg": 0.26611781120300293, "step": 675, "valid_targets_mean": 3592.6, "valid_targets_min": 330 }, { "epoch": 1.0526315789473684, "grad_norm": 0.2609799822219866, "learning_rate": 3.28404131923864e-05, "loss": 0.274, "loss_nan_ranks": 0, "loss_rank_avg": 0.2731304168701172, "step": 680, "valid_targets_mean": 3646.3, "valid_targets_min": 428 }, { "epoch": 1.0603715170278638, "grad_norm": 0.23086080112783416, "learning_rate": 3.270178532832568e-05, "loss": 0.2707, "loss_nan_ranks": 0, "loss_rank_avg": 0.2716541588306427, "step": 685, "valid_targets_mean": 3811.8, "valid_targets_min": 430 }, { "epoch": 1.068111455108359, "grad_norm": 0.22901327274684632, "learning_rate": 3.2562127057680116e-05, "loss": 0.2667, "loss_nan_ranks": 0, "loss_rank_avg": 0.27574634552001953, "step": 690, "valid_targets_mean": 3824.1, "valid_targets_min": 349 }, { "epoch": 1.0758513931888545, "grad_norm": 0.2274846230773506, "learning_rate": 3.242144970994377e-05, "loss": 0.2658, "loss_nan_ranks": 0, "loss_rank_avg": 0.25684624910354614, "step": 695, "valid_targets_mean": 3658.0, "valid_targets_min": 444 }, { "epoch": 1.08359133126935, "grad_norm": 0.23504790034851347, "learning_rate": 3.22797646972813e-05, "loss": 0.2673, "loss_nan_ranks": 0, "loss_rank_avg": 0.2675577402114868, "step": 700, "valid_targets_mean": 3583.1, "valid_targets_min": 457 }, { "epoch": 1.0913312693498451, "grad_norm": 0.37928507653308724, "learning_rate": 3.2137083513602115e-05, "loss": 0.3088, "loss_nan_ranks": 0, "loss_rank_avg": 0.32227909564971924, "step": 705, "valid_targets_mean": 4880.6, "valid_targets_min": 3320 }, { "epoch": 1.0990712074303406, "grad_norm": 0.7946890508526923, "learning_rate": 3.1993417733627986e-05, "loss": 0.3525, "loss_nan_ranks": 0, "loss_rank_avg": 0.5340238213539124, "step": 710, "valid_targets_mean": 978.9, "valid_targets_min": 200 }, { "epoch": 1.1068111455108358, "grad_norm": 0.38660722110507, "learning_rate": 3.1848779011954076e-05, "loss": 0.4102, "loss_nan_ranks": 0, "loss_rank_avg": 0.3029667139053345, "step": 715, "valid_targets_mean": 4807.5, "valid_targets_min": 2887 }, { "epoch": 1.1145510835913313, "grad_norm": 1.276053464706334, "learning_rate": 3.1703179082103447e-05, "loss": 0.365, "loss_nan_ranks": 0, "loss_rank_avg": 0.5683897733688354, "step": 720, "valid_targets_mean": 870.8, "valid_targets_min": 239 }, { "epoch": 1.1222910216718267, "grad_norm": 0.35511704990994863, "learning_rate": 3.155662975557525e-05, "loss": 0.3521, "loss_nan_ranks": 0, "loss_rank_avg": 0.2964516282081604, "step": 725, "valid_targets_mean": 4695.3, "valid_targets_min": 3169 }, { "epoch": 1.130030959752322, "grad_norm": 1.1027701592450079, "learning_rate": 3.140914292088649e-05, "loss": 0.3494, "loss_nan_ranks": 0, "loss_rank_avg": 0.5393781661987305, "step": 730, "valid_targets_mean": 994.2, "valid_targets_min": 414 }, { "epoch": 1.1377708978328174, "grad_norm": 0.300490486853147, "learning_rate": 3.126073054260765e-05, "loss": 0.4132, "loss_nan_ranks": 0, "loss_rank_avg": 0.2866157293319702, "step": 735, "valid_targets_mean": 4788.1, "valid_targets_min": 2297 }, { "epoch": 1.1455108359133126, "grad_norm": 0.30468939031761166, "learning_rate": 3.111140466039205e-05, "loss": 0.2988, "loss_nan_ranks": 0, "loss_rank_avg": 0.3231448233127594, "step": 740, "valid_targets_mean": 3206.7, "valid_targets_min": 447 }, { "epoch": 1.153250773993808, "grad_norm": 0.4185461221093088, "learning_rate": 3.0961177387999143e-05, "loss": 0.4374, "loss_nan_ranks": 0, "loss_rank_avg": 0.30215156078338623, "step": 745, "valid_targets_mean": 4860.4, "valid_targets_min": 3021 }, { "epoch": 1.1609907120743035, "grad_norm": 0.3246362159115541, "learning_rate": 3.081006091231187e-05, "loss": 0.2904, "loss_nan_ranks": 0, "loss_rank_avg": 0.2925494909286499, "step": 750, "valid_targets_mean": 4770.2, "valid_targets_min": 2896 }, { "epoch": 1.1687306501547987, "grad_norm": 0.5388796431924842, "learning_rate": 3.065806749234795e-05, "loss": 0.4219, "loss_nan_ranks": 0, "loss_rank_avg": 0.297951340675354, "step": 755, "valid_targets_mean": 4724.9, "valid_targets_min": 3054 }, { "epoch": 1.1764705882352942, "grad_norm": 0.41506798440532944, "learning_rate": 3.0505209458265463e-05, "loss": 0.2873, "loss_nan_ranks": 0, "loss_rank_avg": 0.2955588698387146, "step": 760, "valid_targets_mean": 4654.0, "valid_targets_min": 1994 }, { "epoch": 1.1842105263157894, "grad_norm": 0.47126780281343006, "learning_rate": 3.0351499210362526e-05, "loss": 0.4456, "loss_nan_ranks": 0, "loss_rank_avg": 0.2873014211654663, "step": 765, "valid_targets_mean": 4692.8, "valid_targets_min": 742 }, { "epoch": 1.1919504643962848, "grad_norm": 0.34901444949701144, "learning_rate": 3.01969492180714e-05, "loss": 0.2844, "loss_nan_ranks": 0, "loss_rank_avg": 0.29014477133750916, "step": 770, "valid_targets_mean": 5052.9, "valid_targets_min": 2772 }, { "epoch": 1.1996904024767803, "grad_norm": 0.5180698881182623, "learning_rate": 3.004157201894689e-05, "loss": 0.4448, "loss_nan_ranks": 0, "loss_rank_avg": 0.2949589788913727, "step": 775, "valid_targets_mean": 5030.0, "valid_targets_min": 3232 }, { "epoch": 1.2074303405572755, "grad_norm": 0.3407530260823277, "learning_rate": 2.9885380217649285e-05, "loss": 0.2888, "loss_nan_ranks": 0, "loss_rank_avg": 0.2843036651611328, "step": 780, "valid_targets_mean": 5112.5, "valid_targets_min": 3319 }, { "epoch": 1.215170278637771, "grad_norm": 0.33606213636554133, "learning_rate": 2.972838648492182e-05, "loss": 0.4564, "loss_nan_ranks": 0, "loss_rank_avg": 0.32512155175209045, "step": 785, "valid_targets_mean": 5787.9, "valid_targets_min": 628 }, { "epoch": 1.2229102167182662, "grad_norm": 0.26150380566158515, "learning_rate": 2.95706035565628e-05, "loss": 0.3279, "loss_nan_ranks": 0, "loss_rank_avg": 0.3358706533908844, "step": 790, "valid_targets_mean": 6091.0, "valid_targets_min": 590 }, { "epoch": 1.2306501547987616, "grad_norm": 0.26424156107594843, "learning_rate": 2.9412044232392416e-05, "loss": 0.3331, "loss_nan_ranks": 0, "loss_rank_avg": 0.3356711268424988, "step": 795, "valid_targets_mean": 5457.9, "valid_targets_min": 537 }, { "epoch": 1.238390092879257, "grad_norm": 0.24610103045052448, "learning_rate": 2.92527213752144e-05, "loss": 0.3233, "loss_nan_ranks": 0, "loss_rank_avg": 0.32360732555389404, "step": 800, "valid_targets_mean": 5283.4, "valid_targets_min": 556 }, { "epoch": 1.2461300309597523, "grad_norm": 0.20473845910198676, "learning_rate": 2.9092647909772547e-05, "loss": 0.3322, "loss_nan_ranks": 0, "loss_rank_avg": 0.3107759952545166, "step": 805, "valid_targets_mean": 6079.6, "valid_targets_min": 531 }, { "epoch": 1.2538699690402477, "grad_norm": 0.1982417032561459, "learning_rate": 2.893183682170224e-05, "loss": 0.323, "loss_nan_ranks": 0, "loss_rank_avg": 0.34520620107650757, "step": 810, "valid_targets_mean": 5938.4, "valid_targets_min": 588 }, { "epoch": 1.261609907120743, "grad_norm": 0.19813643799333697, "learning_rate": 2.8770301156476985e-05, "loss": 0.3215, "loss_nan_ranks": 0, "loss_rank_avg": 0.31860482692718506, "step": 815, "valid_targets_mean": 5991.7, "valid_targets_min": 605 }, { "epoch": 1.2693498452012384, "grad_norm": 0.21223208558324866, "learning_rate": 2.8608054018350144e-05, "loss": 0.3252, "loss_nan_ranks": 0, "loss_rank_avg": 0.3146929144859314, "step": 820, "valid_targets_mean": 5768.7, "valid_targets_min": 589 }, { "epoch": 1.2770897832817338, "grad_norm": 0.21668883079476361, "learning_rate": 2.8445108569291882e-05, "loss": 0.3174, "loss_nan_ranks": 0, "loss_rank_avg": 0.31380555033683777, "step": 825, "valid_targets_mean": 5973.1, "valid_targets_min": 598 }, { "epoch": 1.284829721362229, "grad_norm": 0.2138530658257456, "learning_rate": 2.8281478027921428e-05, "loss": 0.3203, "loss_nan_ranks": 0, "loss_rank_avg": 0.3224177658557892, "step": 830, "valid_targets_mean": 5782.9, "valid_targets_min": 427 }, { "epoch": 1.2925696594427245, "grad_norm": 0.22678360029970596, "learning_rate": 2.8117175668434713e-05, "loss": 0.3125, "loss_nan_ranks": 0, "loss_rank_avg": 0.3132099509239197, "step": 835, "valid_targets_mean": 5733.2, "valid_targets_min": 768 }, { "epoch": 1.3003095975232197, "grad_norm": 0.21256605493714836, "learning_rate": 2.795221481952758e-05, "loss": 0.3183, "loss_nan_ranks": 0, "loss_rank_avg": 0.32242241501808167, "step": 840, "valid_targets_mean": 5674.5, "valid_targets_min": 597 }, { "epoch": 1.3080495356037152, "grad_norm": 0.23958348168020002, "learning_rate": 2.778660886331447e-05, "loss": 0.3105, "loss_nan_ranks": 0, "loss_rank_avg": 0.324482798576355, "step": 845, "valid_targets_mean": 5429.0, "valid_targets_min": 755 }, { "epoch": 1.3157894736842106, "grad_norm": 0.20703439180417177, "learning_rate": 2.7620371234242857e-05, "loss": 0.3138, "loss_nan_ranks": 0, "loss_rank_avg": 0.3022202253341675, "step": 850, "valid_targets_mean": 5852.8, "valid_targets_min": 1304 }, { "epoch": 1.3235294117647058, "grad_norm": 0.24125893912340335, "learning_rate": 2.7453515418003385e-05, "loss": 0.313, "loss_nan_ranks": 0, "loss_rank_avg": 0.3281027674674988, "step": 855, "valid_targets_mean": 5466.6, "valid_targets_min": 1012 }, { "epoch": 1.3312693498452013, "grad_norm": 0.2157557221470851, "learning_rate": 2.728605495043589e-05, "loss": 0.3115, "loss_nan_ranks": 0, "loss_rank_avg": 0.3008950352668762, "step": 860, "valid_targets_mean": 5432.9, "valid_targets_min": 495 }, { "epoch": 1.3390092879256965, "grad_norm": 0.21720447478539381, "learning_rate": 2.7118003416431312e-05, "loss": 0.3153, "loss_nan_ranks": 0, "loss_rank_avg": 0.32623517513275146, "step": 865, "valid_targets_mean": 6355.9, "valid_targets_min": 629 }, { "epoch": 1.346749226006192, "grad_norm": 1.4436049900618573, "learning_rate": 2.6949374448829666e-05, "loss": 0.4247, "loss_nan_ranks": 0, "loss_rank_avg": 0.6241855621337891, "step": 870, "valid_targets_mean": 805.4, "valid_targets_min": 438 }, { "epoch": 1.3544891640866874, "grad_norm": 0.24132631335087232, "learning_rate": 2.6780181727314096e-05, "loss": 0.3162, "loss_nan_ranks": 0, "loss_rank_avg": 0.3145143389701843, "step": 875, "valid_targets_mean": 6133.8, "valid_targets_min": 229 }, { "epoch": 1.3622291021671826, "grad_norm": 0.9705810504068956, "learning_rate": 2.6610438977301128e-05, "loss": 0.3938, "loss_nan_ranks": 0, "loss_rank_avg": 0.6266766786575317, "step": 880, "valid_targets_mean": 817.7, "valid_targets_min": 373 }, { "epoch": 1.369969040247678, "grad_norm": 0.21906903305378964, "learning_rate": 2.6440159968827255e-05, "loss": 0.3142, "loss_nan_ranks": 0, "loss_rank_avg": 0.3028450608253479, "step": 885, "valid_targets_mean": 6547.3, "valid_targets_min": 1808 }, { "epoch": 1.3777089783281733, "grad_norm": 0.27954069223468464, "learning_rate": 2.6269358515431825e-05, "loss": 0.3977, "loss_nan_ranks": 0, "loss_rank_avg": 0.31099915504455566, "step": 890, "valid_targets_mean": 6934.5, "valid_targets_min": 376 }, { "epoch": 1.3854489164086687, "grad_norm": 0.2282751162688024, "learning_rate": 2.6098048473036487e-05, "loss": 0.3092, "loss_nan_ranks": 0, "loss_rank_avg": 0.3102579116821289, "step": 895, "valid_targets_mean": 6416.0, "valid_targets_min": 451 }, { "epoch": 1.3931888544891642, "grad_norm": 0.29347218348748927, "learning_rate": 2.5926243738821148e-05, "loss": 0.4439, "loss_nan_ranks": 0, "loss_rank_avg": 0.3172456920146942, "step": 900, "valid_targets_mean": 6050.4, "valid_targets_min": 298 }, { "epoch": 1.4009287925696594, "grad_norm": 0.23302656493339524, "learning_rate": 2.575395825009657e-05, "loss": 0.3069, "loss_nan_ranks": 0, "loss_rank_avg": 0.2993898391723633, "step": 905, "valid_targets_mean": 5863.9, "valid_targets_min": 212 }, { "epoch": 1.4086687306501549, "grad_norm": 0.380205064262657, "learning_rate": 2.5581205983173763e-05, "loss": 0.4018, "loss_nan_ranks": 0, "loss_rank_avg": 0.31587034463882446, "step": 910, "valid_targets_mean": 6129.1, "valid_targets_min": 324 }, { "epoch": 1.41640866873065, "grad_norm": 0.2714599791425628, "learning_rate": 2.5408000952230158e-05, "loss": 0.3035, "loss_nan_ranks": 0, "loss_rank_avg": 0.3035046458244324, "step": 915, "valid_targets_mean": 6139.4, "valid_targets_min": 478 }, { "epoch": 1.4241486068111455, "grad_norm": 0.2967636848805332, "learning_rate": 2.523435720817277e-05, "loss": 0.4405, "loss_nan_ranks": 0, "loss_rank_avg": 0.3169952630996704, "step": 920, "valid_targets_mean": 6069.3, "valid_targets_min": 230 }, { "epoch": 1.431888544891641, "grad_norm": 0.21154727850987246, "learning_rate": 2.5060288837498296e-05, "loss": 0.3011, "loss_nan_ranks": 0, "loss_rank_avg": 0.30303075909614563, "step": 925, "valid_targets_mean": 6344.3, "valid_targets_min": 600 }, { "epoch": 1.4396284829721362, "grad_norm": 0.2666594031964154, "learning_rate": 2.4885809961150436e-05, "loss": 0.4366, "loss_nan_ranks": 0, "loss_rank_avg": 0.3026265501976013, "step": 930, "valid_targets_mean": 6771.4, "valid_targets_min": 486 }, { "epoch": 1.4473684210526316, "grad_norm": 0.21624632300338756, "learning_rate": 2.4710934733374313e-05, "loss": 0.3007, "loss_nan_ranks": 0, "loss_rank_avg": 0.3007712960243225, "step": 935, "valid_targets_mean": 6569.9, "valid_targets_min": 353 }, { "epoch": 1.4551083591331269, "grad_norm": 1.4857134613281382, "learning_rate": 2.4535677340568275e-05, "loss": 0.5409, "loss_nan_ranks": 0, "loss_rank_avg": 0.5613664388656616, "step": 940, "valid_targets_mean": 2007.4, "valid_targets_min": 246 }, { "epoch": 1.4628482972136223, "grad_norm": 0.38140244794748396, "learning_rate": 2.4360052000133004e-05, "loss": 0.5363, "loss_nan_ranks": 0, "loss_rank_avg": 0.5474293231964111, "step": 945, "valid_targets_mean": 3351.7, "valid_targets_min": 267 }, { "epoch": 1.4705882352941178, "grad_norm": 0.3603442528537929, "learning_rate": 2.4184072959318208e-05, "loss": 0.5287, "loss_nan_ranks": 0, "loss_rank_avg": 0.5367631912231445, "step": 950, "valid_targets_mean": 3195.7, "valid_targets_min": 296 }, { "epoch": 1.478328173374613, "grad_norm": 0.3343077768972529, "learning_rate": 2.4007754494066822e-05, "loss": 0.5251, "loss_nan_ranks": 0, "loss_rank_avg": 0.5016438961029053, "step": 955, "valid_targets_mean": 3282.1, "valid_targets_min": 296 }, { "epoch": 1.4860681114551084, "grad_norm": 0.3376798618230589, "learning_rate": 2.3831110907856886e-05, "loss": 0.51, "loss_nan_ranks": 0, "loss_rank_avg": 0.4855489432811737, "step": 960, "valid_targets_mean": 3273.5, "valid_targets_min": 426 }, { "epoch": 1.4938080495356036, "grad_norm": 0.2782369122581895, "learning_rate": 2.3654156530541236e-05, "loss": 0.5156, "loss_nan_ranks": 0, "loss_rank_avg": 0.45853185653686523, "step": 965, "valid_targets_mean": 3904.3, "valid_targets_min": 286 }, { "epoch": 1.501547987616099, "grad_norm": 0.3049436859222503, "learning_rate": 2.3476905717184994e-05, "loss": 0.5222, "loss_nan_ranks": 0, "loss_rank_avg": 0.5525015592575073, "step": 970, "valid_targets_mean": 3417.9, "valid_targets_min": 431 }, { "epoch": 1.5092879256965945, "grad_norm": 0.4337294221477672, "learning_rate": 2.329937284690106e-05, "loss": 0.5152, "loss_nan_ranks": 0, "loss_rank_avg": 0.546398401260376, "step": 975, "valid_targets_mean": 2151.9, "valid_targets_min": 306 }, { "epoch": 1.5170278637770898, "grad_norm": 0.2864428701765129, "learning_rate": 2.3121572321683624e-05, "loss": 0.5074, "loss_nan_ranks": 0, "loss_rank_avg": 0.48784539103507996, "step": 980, "valid_targets_mean": 2757.0, "valid_targets_min": 366 }, { "epoch": 1.524767801857585, "grad_norm": 0.3218727885494719, "learning_rate": 2.2943518565239855e-05, "loss": 0.5163, "loss_nan_ranks": 0, "loss_rank_avg": 0.5275843739509583, "step": 985, "valid_targets_mean": 2699.5, "valid_targets_min": 297 }, { "epoch": 1.5325077399380804, "grad_norm": 0.27972239430276774, "learning_rate": 2.2765226021819773e-05, "loss": 0.5063, "loss_nan_ranks": 0, "loss_rank_avg": 0.49300703406333923, "step": 990, "valid_targets_mean": 3419.5, "valid_targets_min": 281 }, { "epoch": 1.5402476780185759, "grad_norm": 0.28727899220649333, "learning_rate": 2.258670915504453e-05, "loss": 0.506, "loss_nan_ranks": 0, "loss_rank_avg": 0.5013266205787659, "step": 995, "valid_targets_mean": 3438.5, "valid_targets_min": 301 }, { "epoch": 1.5479876160990713, "grad_norm": 0.24156375169268698, "learning_rate": 2.2407982446733027e-05, "loss": 0.5075, "loss_nan_ranks": 0, "loss_rank_avg": 0.4864124059677124, "step": 1000, "valid_targets_mean": 4330.6, "valid_targets_min": 247 }, { "epoch": 1.5557275541795665, "grad_norm": 0.4115069627169929, "learning_rate": 2.222906039572715e-05, "loss": 0.5068, "loss_nan_ranks": 0, "loss_rank_avg": 0.5431771278381348, "step": 1005, "valid_targets_mean": 2712.3, "valid_targets_min": 262 }, { "epoch": 1.5634674922600618, "grad_norm": 0.6349890726818864, "learning_rate": 2.2049957516715573e-05, "loss": 0.5322, "loss_nan_ranks": 0, "loss_rank_avg": 0.6668592095375061, "step": 1010, "valid_targets_mean": 3619.0, "valid_targets_min": 1099 }, { "epoch": 1.5712074303405572, "grad_norm": 0.3761640092661745, "learning_rate": 2.1870688339056265e-05, "loss": 0.6381, "loss_nan_ranks": 0, "loss_rank_avg": 0.6248250603675842, "step": 1015, "valid_targets_mean": 3834.9, "valid_targets_min": 1026 }, { "epoch": 1.5789473684210527, "grad_norm": 0.35008924670124053, "learning_rate": 2.1691267405597834e-05, "loss": 0.6383, "loss_nan_ranks": 0, "loss_rank_avg": 0.6214076280593872, "step": 1020, "valid_targets_mean": 3756.7, "valid_targets_min": 1083 }, { "epoch": 1.586687306501548, "grad_norm": 0.2935122332602019, "learning_rate": 2.151170927149977e-05, "loss": 0.6413, "loss_nan_ranks": 0, "loss_rank_avg": 0.6318605542182922, "step": 1025, "valid_targets_mean": 3851.6, "valid_targets_min": 1063 }, { "epoch": 1.5944272445820433, "grad_norm": 0.27413481584559085, "learning_rate": 2.1332028503051693e-05, "loss": 0.6252, "loss_nan_ranks": 0, "loss_rank_avg": 0.6500615477561951, "step": 1030, "valid_targets_mean": 3610.4, "valid_targets_min": 1092 }, { "epoch": 1.6021671826625385, "grad_norm": 0.4147553577122656, "learning_rate": 2.1152239676491687e-05, "loss": 0.6219, "loss_nan_ranks": 0, "loss_rank_avg": 0.5811175107955933, "step": 1035, "valid_targets_mean": 4324.1, "valid_targets_min": 791 }, { "epoch": 1.609907120743034, "grad_norm": 0.29937825889380415, "learning_rate": 2.097235737682382e-05, "loss": 0.6371, "loss_nan_ranks": 0, "loss_rank_avg": 0.6158422231674194, "step": 1040, "valid_targets_mean": 3782.9, "valid_targets_min": 1167 }, { "epoch": 1.6176470588235294, "grad_norm": 0.2695054440616614, "learning_rate": 2.079239619663499e-05, "loss": 0.6183, "loss_nan_ranks": 0, "loss_rank_avg": 0.595893144607544, "step": 1045, "valid_targets_mean": 4237.8, "valid_targets_min": 636 }, { "epoch": 1.6253869969040249, "grad_norm": 0.24701393198924002, "learning_rate": 2.0612370734911095e-05, "loss": 0.6213, "loss_nan_ranks": 0, "loss_rank_avg": 0.6063376069068909, "step": 1050, "valid_targets_mean": 4379.6, "valid_targets_min": 417 }, { "epoch": 1.63312693498452, "grad_norm": 0.23072601464589526, "learning_rate": 2.0432295595852774e-05, "loss": 0.6054, "loss_nan_ranks": 0, "loss_rank_avg": 0.5472590923309326, "step": 1055, "valid_targets_mean": 4701.0, "valid_targets_min": 826 }, { "epoch": 1.6408668730650153, "grad_norm": 0.23494698490345117, "learning_rate": 2.0252185387690627e-05, "loss": 0.6292, "loss_nan_ranks": 0, "loss_rank_avg": 0.591805636882782, "step": 1060, "valid_targets_mean": 4621.8, "valid_targets_min": 983 }, { "epoch": 1.6486068111455108, "grad_norm": 0.286574606641743, "learning_rate": 2.007205472150014e-05, "loss": 0.6349, "loss_nan_ranks": 0, "loss_rank_avg": 0.6813613176345825, "step": 1065, "valid_targets_mean": 3181.1, "valid_targets_min": 492 }, { "epoch": 1.6563467492260062, "grad_norm": 0.2678271671127905, "learning_rate": 1.9891918210016453e-05, "loss": 0.6333, "loss_nan_ranks": 0, "loss_rank_avg": 0.6792501211166382, "step": 1070, "valid_targets_mean": 4021.6, "valid_targets_min": 665 }, { "epoch": 1.6640866873065017, "grad_norm": 0.287461265170696, "learning_rate": 1.9711790466448863e-05, "loss": 0.6183, "loss_nan_ranks": 0, "loss_rank_avg": 0.6828348636627197, "step": 1075, "valid_targets_mean": 3290.7, "valid_targets_min": 280 }, { "epoch": 1.671826625386997, "grad_norm": 0.23540874788494098, "learning_rate": 1.95316861032954e-05, "loss": 0.6277, "loss_nan_ranks": 0, "loss_rank_avg": 0.6578258275985718, "step": 1080, "valid_targets_mean": 4297.4, "valid_targets_min": 1038 }, { "epoch": 1.6795665634674921, "grad_norm": 0.5245084769694451, "learning_rate": 1.9351619731157415e-05, "loss": 0.6037, "loss_nan_ranks": 0, "loss_rank_avg": 0.5338871479034424, "step": 1085, "valid_targets_mean": 3676.8, "valid_targets_min": 1023 }, { "epoch": 1.6873065015479876, "grad_norm": 0.35757608120176726, "learning_rate": 1.91716059575543e-05, "loss": 0.5365, "loss_nan_ranks": 0, "loss_rank_avg": 0.5506007671356201, "step": 1090, "valid_targets_mean": 3877.6, "valid_targets_min": 891 }, { "epoch": 1.695046439628483, "grad_norm": 0.29617547242231923, "learning_rate": 1.899165938573851e-05, "loss": 0.5212, "loss_nan_ranks": 0, "loss_rank_avg": 0.5574710369110107, "step": 1095, "valid_targets_mean": 3730.8, "valid_targets_min": 921 }, { "epoch": 1.7027863777089784, "grad_norm": 0.2663718259319401, "learning_rate": 1.881179461351087e-05, "loss": 0.5219, "loss_nan_ranks": 0, "loss_rank_avg": 0.5038564205169678, "step": 1100, "valid_targets_mean": 3764.0, "valid_targets_min": 771 }, { "epoch": 1.7105263157894737, "grad_norm": 0.2728449458525243, "learning_rate": 1.8632026232036397e-05, "loss": 0.5219, "loss_nan_ranks": 0, "loss_rank_avg": 0.5010745525360107, "step": 1105, "valid_targets_mean": 3619.3, "valid_targets_min": 981 }, { "epoch": 1.718266253869969, "grad_norm": 0.2660611928357808, "learning_rate": 1.8452368824660604e-05, "loss": 0.5147, "loss_nan_ranks": 0, "loss_rank_avg": 0.5162074565887451, "step": 1110, "valid_targets_mean": 3383.6, "valid_targets_min": 747 }, { "epoch": 1.7260061919504643, "grad_norm": 0.26724477754654813, "learning_rate": 1.827283696572646e-05, "loss": 0.5167, "loss_nan_ranks": 0, "loss_rank_avg": 0.5333441495895386, "step": 1115, "valid_targets_mean": 3617.2, "valid_targets_min": 877 }, { "epoch": 1.7337461300309598, "grad_norm": 0.24922101516229775, "learning_rate": 1.8093445219392038e-05, "loss": 0.5135, "loss_nan_ranks": 0, "loss_rank_avg": 0.5010273456573486, "step": 1120, "valid_targets_mean": 3628.0, "valid_targets_min": 1027 }, { "epoch": 1.7414860681114552, "grad_norm": 0.2544991655030091, "learning_rate": 1.7914208138449108e-05, "loss": 0.5147, "loss_nan_ranks": 0, "loss_rank_avg": 0.5082110166549683, "step": 1125, "valid_targets_mean": 3666.4, "valid_targets_min": 1044 }, { "epoch": 1.7492260061919505, "grad_norm": 0.2586854297067809, "learning_rate": 1.7735140263142483e-05, "loss": 0.5024, "loss_nan_ranks": 0, "loss_rank_avg": 0.498982697725296, "step": 1130, "valid_targets_mean": 3281.1, "valid_targets_min": 851 }, { "epoch": 1.7569659442724457, "grad_norm": 0.265351851103295, "learning_rate": 1.7556256119990538e-05, "loss": 0.513, "loss_nan_ranks": 0, "loss_rank_avg": 0.5036240220069885, "step": 1135, "valid_targets_mean": 3577.5, "valid_targets_min": 1157 }, { "epoch": 1.7647058823529411, "grad_norm": 0.25556536651758294, "learning_rate": 1.7377570220606745e-05, "loss": 0.5155, "loss_nan_ranks": 0, "loss_rank_avg": 0.47923219203948975, "step": 1140, "valid_targets_mean": 3314.5, "valid_targets_min": 706 }, { "epoch": 1.7724458204334366, "grad_norm": 0.2659278252988841, "learning_rate": 1.7199097060522437e-05, "loss": 0.5173, "loss_nan_ranks": 0, "loss_rank_avg": 0.5065821409225464, "step": 1145, "valid_targets_mean": 3562.3, "valid_targets_min": 1014 }, { "epoch": 1.780185758513932, "grad_norm": 0.2570609206552128, "learning_rate": 1.7020851118010918e-05, "loss": 0.5125, "loss_nan_ranks": 0, "loss_rank_avg": 0.5099133253097534, "step": 1150, "valid_targets_mean": 3536.9, "valid_targets_min": 792 }, { "epoch": 1.7879256965944272, "grad_norm": 0.2508651381096074, "learning_rate": 1.684284685291292e-05, "loss": 0.5056, "loss_nan_ranks": 0, "loss_rank_avg": 0.514965832233429, "step": 1155, "valid_targets_mean": 3570.4, "valid_targets_min": 1059 }, { "epoch": 1.7956656346749225, "grad_norm": 0.24698558245827082, "learning_rate": 1.666509870546359e-05, "loss": 0.5225, "loss_nan_ranks": 0, "loss_rank_avg": 0.5093916654586792, "step": 1160, "valid_targets_mean": 3647.4, "valid_targets_min": 784 }, { "epoch": 1.803405572755418, "grad_norm": 0.26466317261621247, "learning_rate": 1.648762109512105e-05, "loss": 0.508, "loss_nan_ranks": 0, "loss_rank_avg": 0.5089800357818604, "step": 1165, "valid_targets_mean": 3439.6, "valid_targets_min": 1012 }, { "epoch": 1.8111455108359134, "grad_norm": 0.24987520286912196, "learning_rate": 1.6310428419396636e-05, "loss": 0.505, "loss_nan_ranks": 0, "loss_rank_avg": 0.4624229669570923, "step": 1170, "valid_targets_mean": 3415.4, "valid_targets_min": 555 }, { "epoch": 1.8188854489164088, "grad_norm": 0.2727113667131823, "learning_rate": 1.6133535052686953e-05, "loss": 0.5947, "loss_nan_ranks": 0, "loss_rank_avg": 0.49991852045059204, "step": 1175, "valid_targets_mean": 3413.9, "valid_targets_min": 973 }, { "epoch": 1.826625386996904, "grad_norm": 0.2619566476966724, "learning_rate": 1.595695534510777e-05, "loss": 0.5103, "loss_nan_ranks": 0, "loss_rank_avg": 0.4921343922615051, "step": 1180, "valid_targets_mean": 3740.7, "valid_targets_min": 849 }, { "epoch": 1.8343653250773992, "grad_norm": 0.3089259820699402, "learning_rate": 1.5780703621329893e-05, "loss": 0.6108, "loss_nan_ranks": 0, "loss_rank_avg": 0.5340328216552734, "step": 1185, "valid_targets_mean": 3384.0, "valid_targets_min": 870 }, { "epoch": 1.8421052631578947, "grad_norm": 0.2690108692070922, "learning_rate": 1.5604794179417083e-05, "loss": 0.5024, "loss_nan_ranks": 0, "loss_rank_avg": 0.4995267689228058, "step": 1190, "valid_targets_mean": 3341.3, "valid_targets_min": 1113 }, { "epoch": 1.8498452012383901, "grad_norm": 0.2652646205794999, "learning_rate": 1.542924128966622e-05, "loss": 0.5553, "loss_nan_ranks": 0, "loss_rank_avg": 0.5052015781402588, "step": 1195, "valid_targets_mean": 3684.2, "valid_targets_min": 1026 }, { "epoch": 1.8575851393188856, "grad_norm": 0.2762577600723093, "learning_rate": 1.5254059193449585e-05, "loss": 0.5025, "loss_nan_ranks": 0, "loss_rank_avg": 0.49244827032089233, "step": 1200, "valid_targets_mean": 3157.4, "valid_targets_min": 525 }, { "epoch": 1.8653250773993808, "grad_norm": 0.27400155947690535, "learning_rate": 1.5079262102059602e-05, "loss": 0.5555, "loss_nan_ranks": 0, "loss_rank_avg": 0.5354343056678772, "step": 1205, "valid_targets_mean": 3240.1, "valid_targets_min": 984 }, { "epoch": 1.873065015479876, "grad_norm": 0.2508418984472677, "learning_rate": 1.4904864195555942e-05, "loss": 0.5006, "loss_nan_ranks": 0, "loss_rank_avg": 0.5009927153587341, "step": 1210, "valid_targets_mean": 3223.1, "valid_targets_min": 328 }, { "epoch": 1.8808049535603715, "grad_norm": 0.25556621332680995, "learning_rate": 1.4730879621615217e-05, "loss": 0.6041, "loss_nan_ranks": 0, "loss_rank_avg": 0.5159991383552551, "step": 1215, "valid_targets_mean": 3842.8, "valid_targets_min": 811 }, { "epoch": 1.888544891640867, "grad_norm": 0.27756109590528844, "learning_rate": 1.4557322494383274e-05, "loss": 0.5018, "loss_nan_ranks": 0, "loss_rank_avg": 0.5096072554588318, "step": 1220, "valid_targets_mean": 3401.7, "valid_targets_min": 358 }, { "epoch": 1.8962848297213624, "grad_norm": 0.29072340907542393, "learning_rate": 1.4384206893330216e-05, "loss": 0.5866, "loss_nan_ranks": 0, "loss_rank_avg": 0.5704678297042847, "step": 1225, "valid_targets_mean": 3138.4, "valid_targets_min": 426 }, { "epoch": 1.9040247678018576, "grad_norm": 0.25114676744419256, "learning_rate": 1.4211546862108242e-05, "loss": 0.5104, "loss_nan_ranks": 0, "loss_rank_avg": 0.5284104347229004, "step": 1230, "valid_targets_mean": 3528.3, "valid_targets_min": 285 }, { "epoch": 1.9117647058823528, "grad_norm": 0.3049014526259051, "learning_rate": 1.4039356407412329e-05, "loss": 0.5403, "loss_nan_ranks": 0, "loss_rank_avg": 0.6094015836715698, "step": 1235, "valid_targets_mean": 2719.7, "valid_targets_min": 533 }, { "epoch": 1.9195046439628483, "grad_norm": 0.2521098064388986, "learning_rate": 1.3867649497844058e-05, "loss": 0.4999, "loss_nan_ranks": 0, "loss_rank_avg": 0.5124248266220093, "step": 1240, "valid_targets_mean": 3570.4, "valid_targets_min": 564 }, { "epoch": 1.9272445820433437, "grad_norm": 2.8985727416760403, "learning_rate": 1.3696440062778363e-05, "loss": 0.5295, "loss_nan_ranks": 0, "loss_rank_avg": 0.5617307424545288, "step": 1245, "valid_targets_mean": 2341.9, "valid_targets_min": 417 }, { "epoch": 1.9349845201238391, "grad_norm": 0.9839086679727692, "learning_rate": 1.3525741991233576e-05, "loss": 0.3357, "loss_nan_ranks": 0, "loss_rank_avg": 0.31935644149780273, "step": 1250, "valid_targets_mean": 4556.8, "valid_targets_min": 1546 }, { "epoch": 1.9427244582043344, "grad_norm": 0.5367112686535409, "learning_rate": 1.3355569130744717e-05, "loss": 0.2842, "loss_nan_ranks": 0, "loss_rank_avg": 0.2978261113166809, "step": 1255, "valid_targets_mean": 4626.2, "valid_targets_min": 925 }, { "epoch": 1.9504643962848296, "grad_norm": 0.4051027506035485, "learning_rate": 1.3185935286240081e-05, "loss": 0.2583, "loss_nan_ranks": 0, "loss_rank_avg": 0.25202229619026184, "step": 1260, "valid_targets_mean": 4938.1, "valid_targets_min": 1451 }, { "epoch": 1.958204334365325, "grad_norm": 0.4350351782057793, "learning_rate": 1.3016854218921432e-05, "loss": 0.255, "loss_nan_ranks": 0, "loss_rank_avg": 0.26276537775993347, "step": 1265, "valid_targets_mean": 4161.5, "valid_targets_min": 1249 }, { "epoch": 1.9659442724458205, "grad_norm": 0.49748555632911934, "learning_rate": 1.2848339645147574e-05, "loss": 0.2664, "loss_nan_ranks": 0, "loss_rank_avg": 0.2641948461532593, "step": 1270, "valid_targets_mean": 4068.4, "valid_targets_min": 1239 }, { "epoch": 1.973684210526316, "grad_norm": 0.4256957837723888, "learning_rate": 1.2680405235321684e-05, "loss": 0.2618, "loss_nan_ranks": 0, "loss_rank_avg": 0.25480371713638306, "step": 1275, "valid_targets_mean": 4988.2, "valid_targets_min": 1593 }, { "epoch": 1.9814241486068112, "grad_norm": 0.750944747452251, "learning_rate": 1.2513064612782308e-05, "loss": 0.2587, "loss_nan_ranks": 0, "loss_rank_avg": 0.24965959787368774, "step": 1280, "valid_targets_mean": 2636.5, "valid_targets_min": 274 }, { "epoch": 1.9891640866873064, "grad_norm": 0.430852624660782, "learning_rate": 1.2346331352698206e-05, "loss": 0.2567, "loss_nan_ranks": 0, "loss_rank_avg": 0.2593318819999695, "step": 1285, "valid_targets_mean": 2712.8, "valid_targets_min": 982 }, { "epoch": 1.9969040247678018, "grad_norm": 0.3450106479763493, "learning_rate": 1.2180218980967091e-05, "loss": 0.2409, "loss_nan_ranks": 0, "loss_rank_avg": 0.24146604537963867, "step": 1290, "valid_targets_mean": 3597.4, "valid_targets_min": 1869 }, { "epoch": 2.0046439628482973, "grad_norm": 0.7137021285572581, "learning_rate": 1.201474097311837e-05, "loss": 0.2746, "loss_nan_ranks": 0, "loss_rank_avg": 0.2817814350128174, "step": 1295, "valid_targets_mean": 3714.5, "valid_targets_min": 313 }, { "epoch": 2.0123839009287927, "grad_norm": 0.4074614051315701, "learning_rate": 1.1849910753219956e-05, "loss": 0.276, "loss_nan_ranks": 0, "loss_rank_avg": 0.2710038125514984, "step": 1300, "valid_targets_mean": 3769.2, "valid_targets_min": 428 }, { "epoch": 2.0201238390092877, "grad_norm": 0.3552531242519296, "learning_rate": 1.1685741692789284e-05, "loss": 0.2673, "loss_nan_ranks": 0, "loss_rank_avg": 0.2690993547439575, "step": 1305, "valid_targets_mean": 3845.8, "valid_targets_min": 444 }, { "epoch": 2.027863777089783, "grad_norm": 0.3087607772738728, "learning_rate": 1.1522247109708564e-05, "loss": 0.2664, "loss_nan_ranks": 0, "loss_rank_avg": 0.2663906216621399, "step": 1310, "valid_targets_mean": 3735.0, "valid_targets_min": 259 }, { "epoch": 2.0356037151702786, "grad_norm": 0.2667932220570936, "learning_rate": 1.1359440267144413e-05, "loss": 0.2595, "loss_nan_ranks": 0, "loss_rank_avg": 0.25116756558418274, "step": 1315, "valid_targets_mean": 3725.4, "valid_targets_min": 386 }, { "epoch": 2.043343653250774, "grad_norm": 0.23773274178550025, "learning_rate": 1.119733437247187e-05, "loss": 0.2588, "loss_nan_ranks": 0, "loss_rank_avg": 0.25612616539001465, "step": 1320, "valid_targets_mean": 3718.9, "valid_targets_min": 463 }, { "epoch": 2.0510835913312695, "grad_norm": 0.22262330851695253, "learning_rate": 1.103594257620301e-05, "loss": 0.2554, "loss_nan_ranks": 0, "loss_rank_avg": 0.256261944770813, "step": 1325, "valid_targets_mean": 3712.7, "valid_targets_min": 434 }, { "epoch": 2.0588235294117645, "grad_norm": 0.21229515407480232, "learning_rate": 1.0875277970920118e-05, "loss": 0.254, "loss_nan_ranks": 0, "loss_rank_avg": 0.25342199206352234, "step": 1330, "valid_targets_mean": 3745.0, "valid_targets_min": 433 }, { "epoch": 2.06656346749226, "grad_norm": 0.22333151724909178, "learning_rate": 1.0715353590213597e-05, "loss": 0.2501, "loss_nan_ranks": 0, "loss_rank_avg": 0.24361343681812286, "step": 1335, "valid_targets_mean": 3624.5, "valid_targets_min": 397 }, { "epoch": 2.0743034055727554, "grad_norm": 0.2172013859447262, "learning_rate": 1.0556182407624616e-05, "loss": 0.2546, "loss_nan_ranks": 0, "loss_rank_avg": 0.2626274526119232, "step": 1340, "valid_targets_mean": 3754.9, "valid_targets_min": 269 }, { "epoch": 2.082043343653251, "grad_norm": 0.20904050399109872, "learning_rate": 1.0397777335592693e-05, "loss": 0.2516, "loss_nan_ranks": 0, "loss_rank_avg": 0.25204020738601685, "step": 1345, "valid_targets_mean": 3859.0, "valid_targets_min": 463 }, { "epoch": 2.0897832817337463, "grad_norm": 0.46665012190374583, "learning_rate": 1.024015122440815e-05, "loss": 0.2769, "loss_nan_ranks": 0, "loss_rank_avg": 0.2907443046569824, "step": 1350, "valid_targets_mean": 4864.0, "valid_targets_min": 2389 }, { "epoch": 2.0975232198142413, "grad_norm": 0.30910992282947736, "learning_rate": 1.0083316861169704e-05, "loss": 0.2873, "loss_nan_ranks": 0, "loss_rank_avg": 0.2828359007835388, "step": 1355, "valid_targets_mean": 4532.4, "valid_targets_min": 468 }, { "epoch": 2.1052631578947367, "grad_norm": 0.3522274973128065, "learning_rate": 9.927286968747147e-06, "loss": 0.4222, "loss_nan_ranks": 0, "loss_rank_avg": 0.2904053330421448, "step": 1360, "valid_targets_mean": 5055.2, "valid_targets_min": 2995 }, { "epoch": 2.113003095975232, "grad_norm": 0.3236448171441497, "learning_rate": 9.772074204749178e-06, "loss": 0.2911, "loss_nan_ranks": 0, "loss_rank_avg": 0.31897926330566406, "step": 1365, "valid_targets_mean": 2780.0, "valid_targets_min": 448 }, { "epoch": 2.1207430340557276, "grad_norm": 0.2580480567815723, "learning_rate": 9.617691160496616e-06, "loss": 0.3777, "loss_nan_ranks": 0, "loss_rank_avg": 0.2766563296318054, "step": 1370, "valid_targets_mean": 4765.8, "valid_targets_min": 2211 }, { "epoch": 2.128482972136223, "grad_norm": 0.2519211562935606, "learning_rate": 9.464150360000946e-06, "loss": 0.2836, "loss_nan_ranks": 0, "loss_rank_avg": 0.3104143738746643, "step": 1375, "valid_targets_mean": 3243.5, "valid_targets_min": 412 }, { "epoch": 2.136222910216718, "grad_norm": 0.315089516820891, "learning_rate": 9.311464258948354e-06, "loss": 0.4343, "loss_nan_ranks": 0, "loss_rank_avg": 0.2775019109249115, "step": 1380, "valid_targets_mean": 4668.5, "valid_targets_min": 2456 }, { "epoch": 2.1439628482972135, "grad_norm": 0.23754548575984485, "learning_rate": 9.159645243689245e-06, "loss": 0.2765, "loss_nan_ranks": 0, "loss_rank_avg": 0.2711362838745117, "step": 1385, "valid_targets_mean": 4707.3, "valid_targets_min": 2483 }, { "epoch": 2.151702786377709, "grad_norm": 0.31059351838823335, "learning_rate": 9.008705630233454e-06, "loss": 0.4127, "loss_nan_ranks": 0, "loss_rank_avg": 0.2804102897644043, "step": 1390, "valid_targets_mean": 4803.9, "valid_targets_min": 3123 }, { "epoch": 2.1594427244582044, "grad_norm": 0.24069835959534366, "learning_rate": 8.85865766325113e-06, "loss": 0.2782, "loss_nan_ranks": 0, "loss_rank_avg": 0.2760240435600281, "step": 1395, "valid_targets_mean": 4720.2, "valid_targets_min": 2652 }, { "epoch": 2.1671826625387, "grad_norm": 0.28907425488238114, "learning_rate": 8.709513515079398e-06, "loss": 0.3943, "loss_nan_ranks": 0, "loss_rank_avg": 0.29097089171409607, "step": 1400, "valid_targets_mean": 3505.7, "valid_targets_min": 429 }, { "epoch": 2.174922600619195, "grad_norm": 0.22017513910048814, "learning_rate": 8.561285284734938e-06, "loss": 0.2745, "loss_nan_ranks": 0, "loss_rank_avg": 0.2772256135940552, "step": 1405, "valid_targets_mean": 4646.0, "valid_targets_min": 2402 }, { "epoch": 2.1826625386996903, "grad_norm": 0.5785356850860002, "learning_rate": 8.413984996932431e-06, "loss": 0.4228, "loss_nan_ranks": 0, "loss_rank_avg": 0.5103334188461304, "step": 1410, "valid_targets_mean": 973.6, "valid_targets_min": 281 }, { "epoch": 2.1904024767801857, "grad_norm": 0.2184008661733165, "learning_rate": 8.26762460110911e-06, "loss": 0.2718, "loss_nan_ranks": 0, "loss_rank_avg": 0.26770395040512085, "step": 1415, "valid_targets_mean": 4744.8, "valid_targets_min": 2671 }, { "epoch": 2.198142414860681, "grad_norm": 0.253779582176502, "learning_rate": 8.122215970455371e-06, "loss": 0.4197, "loss_nan_ranks": 0, "loss_rank_avg": 0.3024511933326721, "step": 1420, "valid_targets_mean": 3931.4, "valid_targets_min": 474 }, { "epoch": 2.2058823529411766, "grad_norm": 0.20325660357569308, "learning_rate": 7.977770900951592e-06, "loss": 0.2788, "loss_nan_ranks": 0, "loss_rank_avg": 0.27750131487846375, "step": 1425, "valid_targets_mean": 5010.2, "valid_targets_min": 2642 }, { "epoch": 2.2136222910216716, "grad_norm": 0.6308558213826081, "learning_rate": 7.834301110411216e-06, "loss": 0.4236, "loss_nan_ranks": 0, "loss_rank_avg": 0.5459995269775391, "step": 1430, "valid_targets_mean": 812.3, "valid_targets_min": 129 }, { "epoch": 2.221362229102167, "grad_norm": 0.22091190358730112, "learning_rate": 7.691818237530147e-06, "loss": 0.3065, "loss_nan_ranks": 0, "loss_rank_avg": 0.3073071837425232, "step": 1435, "valid_targets_mean": 5991.4, "valid_targets_min": 913 }, { "epoch": 2.2291021671826625, "grad_norm": 0.2400662413354753, "learning_rate": 7.550333840942594e-06, "loss": 0.3157, "loss_nan_ranks": 0, "loss_rank_avg": 0.3117232918739319, "step": 1440, "valid_targets_mean": 5973.2, "valid_targets_min": 542 }, { "epoch": 2.236842105263158, "grad_norm": 0.24596319963688767, "learning_rate": 7.409859398283406e-06, "loss": 0.3097, "loss_nan_ranks": 0, "loss_rank_avg": 0.30473989248275757, "step": 1445, "valid_targets_mean": 5461.6, "valid_targets_min": 360 }, { "epoch": 2.2445820433436534, "grad_norm": 0.21303500837963765, "learning_rate": 7.270406305256954e-06, "loss": 0.3191, "loss_nan_ranks": 0, "loss_rank_avg": 0.3208760619163513, "step": 1450, "valid_targets_mean": 5406.7, "valid_targets_min": 310 }, { "epoch": 2.2523219814241484, "grad_norm": 0.22282067837975372, "learning_rate": 7.131985874712717e-06, "loss": 0.3019, "loss_nan_ranks": 0, "loss_rank_avg": 0.29536110162734985, "step": 1455, "valid_targets_mean": 5525.3, "valid_targets_min": 987 }, { "epoch": 2.260061919504644, "grad_norm": 0.2088956316251687, "learning_rate": 6.994609335727503e-06, "loss": 0.3132, "loss_nan_ranks": 0, "loss_rank_avg": 0.3129228353500366, "step": 1460, "valid_targets_mean": 5494.2, "valid_targets_min": 803 }, { "epoch": 2.2678018575851393, "grad_norm": 0.19600857595943563, "learning_rate": 6.858287832694535e-06, "loss": 0.3128, "loss_nan_ranks": 0, "loss_rank_avg": 0.3101985454559326, "step": 1465, "valid_targets_mean": 5719.7, "valid_targets_min": 988 }, { "epoch": 2.2755417956656347, "grad_norm": 0.22277262871201828, "learning_rate": 6.723032424419387e-06, "loss": 0.3052, "loss_nan_ranks": 0, "loss_rank_avg": 0.324319064617157, "step": 1470, "valid_targets_mean": 5339.7, "valid_targets_min": 596 }, { "epoch": 2.28328173374613, "grad_norm": 0.21247652547555076, "learning_rate": 6.588854083222851e-06, "loss": 0.3068, "loss_nan_ranks": 0, "loss_rank_avg": 0.32305365800857544, "step": 1475, "valid_targets_mean": 5534.7, "valid_targets_min": 575 }, { "epoch": 2.291021671826625, "grad_norm": 0.19804812009122108, "learning_rate": 6.4557636940508625e-06, "loss": 0.3031, "loss_nan_ranks": 0, "loss_rank_avg": 0.3071126937866211, "step": 1480, "valid_targets_mean": 5558.2, "valid_targets_min": 256 }, { "epoch": 2.2987616099071206, "grad_norm": 0.21231476361570778, "learning_rate": 6.323772053591404e-06, "loss": 0.3057, "loss_nan_ranks": 0, "loss_rank_avg": 0.30539828538894653, "step": 1485, "valid_targets_mean": 5328.4, "valid_targets_min": 524 }, { "epoch": 2.306501547987616, "grad_norm": 0.21260199451840608, "learning_rate": 6.192889869398748e-06, "loss": 0.2997, "loss_nan_ranks": 0, "loss_rank_avg": 0.3049812912940979, "step": 1490, "valid_targets_mean": 5696.6, "valid_targets_min": 614 }, { "epoch": 2.3142414860681115, "grad_norm": 0.23564604671250605, "learning_rate": 6.063127759024745e-06, "loss": 0.3085, "loss_nan_ranks": 0, "loss_rank_avg": 0.30037930607795715, "step": 1495, "valid_targets_mean": 5787.3, "valid_targets_min": 496 }, { "epoch": 2.321981424148607, "grad_norm": 0.19791326643477766, "learning_rate": 5.934496249157533e-06, "loss": 0.2982, "loss_nan_ranks": 0, "loss_rank_avg": 0.3089606761932373, "step": 1500, "valid_targets_mean": 5574.5, "valid_targets_min": 736 }, { "epoch": 2.329721362229102, "grad_norm": 0.20289524095960015, "learning_rate": 5.807005774767598e-06, "loss": 0.3077, "loss_nan_ranks": 0, "loss_rank_avg": 0.30494993925094604, "step": 1505, "valid_targets_mean": 5946.5, "valid_targets_min": 670 }, { "epoch": 2.3374613003095974, "grad_norm": 0.18304855157538058, "learning_rate": 5.68066667826119e-06, "loss": 0.2999, "loss_nan_ranks": 0, "loss_rank_avg": 0.302049845457077, "step": 1510, "valid_targets_mean": 6433.6, "valid_targets_min": 353 }, { "epoch": 2.345201238390093, "grad_norm": 0.5493322314615379, "learning_rate": 5.555489208641412e-06, "loss": 0.349, "loss_nan_ranks": 0, "loss_rank_avg": 0.509677529335022, "step": 1515, "valid_targets_mean": 1094.5, "valid_targets_min": 286 }, { "epoch": 2.3529411764705883, "grad_norm": 0.1805140863690906, "learning_rate": 5.431483520676704e-06, "loss": 0.3602, "loss_nan_ranks": 0, "loss_rank_avg": 0.2936132848262787, "step": 1520, "valid_targets_mean": 6291.4, "valid_targets_min": 338 }, { "epoch": 2.3606811145510838, "grad_norm": 0.42641728494351316, "learning_rate": 5.308659674077128e-06, "loss": 0.3197, "loss_nan_ranks": 0, "loss_rank_avg": 0.42394477128982544, "step": 1525, "valid_targets_mean": 1675.7, "valid_targets_min": 527 }, { "epoch": 2.3684210526315788, "grad_norm": 0.19803200180071323, "learning_rate": 5.187027632678228e-06, "loss": 0.3644, "loss_nan_ranks": 0, "loss_rank_avg": 0.29865461587905884, "step": 1530, "valid_targets_mean": 6662.4, "valid_targets_min": 1694 }, { "epoch": 2.376160990712074, "grad_norm": 0.3570559506169205, "learning_rate": 5.0665972636327775e-06, "loss": 0.3813, "loss_nan_ranks": 0, "loss_rank_avg": 0.40023019909858704, "step": 1535, "valid_targets_mean": 2726.5, "valid_targets_min": 474 }, { "epoch": 2.3839009287925697, "grad_norm": 0.17326643727386593, "learning_rate": 4.947378336610336e-06, "loss": 0.3006, "loss_nan_ranks": 0, "loss_rank_avg": 0.2955681085586548, "step": 1540, "valid_targets_mean": 6219.8, "valid_targets_min": 602 }, { "epoch": 2.391640866873065, "grad_norm": 0.5644311898078787, "learning_rate": 4.829380523004657e-06, "loss": 0.4264, "loss_nan_ranks": 0, "loss_rank_avg": 0.5778234601020813, "step": 1545, "valid_targets_mean": 844.2, "valid_targets_min": 290 }, { "epoch": 2.3993808049535605, "grad_norm": 0.18134571299636928, "learning_rate": 4.712613395149173e-06, "loss": 0.302, "loss_nan_ranks": 0, "loss_rank_avg": 0.31143641471862793, "step": 1550, "valid_targets_mean": 5912.4, "valid_targets_min": 277 }, { "epoch": 2.4071207430340555, "grad_norm": 0.40570041188391864, "learning_rate": 4.597086425540389e-06, "loss": 0.3851, "loss_nan_ranks": 0, "loss_rank_avg": 0.4236239492893219, "step": 1555, "valid_targets_mean": 1662.7, "valid_targets_min": 313 }, { "epoch": 2.414860681114551, "grad_norm": 0.1720233461064192, "learning_rate": 4.482808986069531e-06, "loss": 0.2983, "loss_nan_ranks": 0, "loss_rank_avg": 0.2866983711719513, "step": 1560, "valid_targets_mean": 6104.8, "valid_targets_min": 435 }, { "epoch": 2.4226006191950464, "grad_norm": 0.25170741905375205, "learning_rate": 4.369790347262197e-06, "loss": 0.4231, "loss_nan_ranks": 0, "loss_rank_avg": 0.32215455174446106, "step": 1565, "valid_targets_mean": 2831.9, "valid_targets_min": 265 }, { "epoch": 2.430340557275542, "grad_norm": 0.17795832859887645, "learning_rate": 4.258039677526344e-06, "loss": 0.2967, "loss_nan_ranks": 0, "loss_rank_avg": 0.29880303144454956, "step": 1570, "valid_targets_mean": 6694.6, "valid_targets_min": 250 }, { "epoch": 2.4380804953560373, "grad_norm": 0.2494150992717737, "learning_rate": 4.147566042408502e-06, "loss": 0.424, "loss_nan_ranks": 0, "loss_rank_avg": 0.3349652886390686, "step": 1575, "valid_targets_mean": 3286.7, "valid_targets_min": 441 }, { "epoch": 2.4458204334365323, "grad_norm": 0.16923708951207625, "learning_rate": 4.0383784038583585e-06, "loss": 0.2942, "loss_nan_ranks": 0, "loss_rank_avg": 0.3039069175720215, "step": 1580, "valid_targets_mean": 6532.3, "valid_targets_min": 694 }, { "epoch": 2.4535603715170278, "grad_norm": 0.8694258992837159, "learning_rate": 3.930485619501747e-06, "loss": 0.4719, "loss_nan_ranks": 0, "loss_rank_avg": 0.530645489692688, "step": 1585, "valid_targets_mean": 1694.4, "valid_targets_min": 257 }, { "epoch": 2.461300309597523, "grad_norm": 0.6861559520309628, "learning_rate": 3.823896441922066e-06, "loss": 0.5183, "loss_nan_ranks": 0, "loss_rank_avg": 0.5340226888656616, "step": 1590, "valid_targets_mean": 2654.1, "valid_targets_min": 261 }, { "epoch": 2.4690402476780187, "grad_norm": 0.41883498638420535, "learning_rate": 3.7186195179502636e-06, "loss": 0.512, "loss_nan_ranks": 0, "loss_rank_avg": 0.496014267206192, "step": 1595, "valid_targets_mean": 2251.2, "valid_targets_min": 337 }, { "epoch": 2.476780185758514, "grad_norm": 0.28975030266636936, "learning_rate": 3.614663387963371e-06, "loss": 0.5135, "loss_nan_ranks": 0, "loss_rank_avg": 0.512344241142273, "step": 1600, "valid_targets_mean": 3427.8, "valid_targets_min": 298 }, { "epoch": 2.484520123839009, "grad_norm": 0.26418682453433606, "learning_rate": 3.5120364851916832e-06, "loss": 0.4949, "loss_nan_ranks": 0, "loss_rank_avg": 0.49718883633613586, "step": 1605, "valid_targets_mean": 3676.3, "valid_targets_min": 415 }, { "epoch": 2.4922600619195046, "grad_norm": 0.281710151730984, "learning_rate": 3.410747135034642e-06, "loss": 0.5037, "loss_nan_ranks": 0, "loss_rank_avg": 0.48265427350997925, "step": 1610, "valid_targets_mean": 3749.4, "valid_targets_min": 319 }, { "epoch": 2.5, "grad_norm": 0.2577956784865143, "learning_rate": 3.310803554385438e-06, "loss": 0.4872, "loss_nan_ranks": 0, "loss_rank_avg": 0.4795178174972534, "step": 1615, "valid_targets_mean": 3321.1, "valid_targets_min": 266 }, { "epoch": 2.5077399380804954, "grad_norm": 0.35159723334083737, "learning_rate": 3.2122138509644364e-06, "loss": 0.4999, "loss_nan_ranks": 0, "loss_rank_avg": 0.47505080699920654, "step": 1620, "valid_targets_mean": 3494.8, "valid_targets_min": 260 }, { "epoch": 2.515479876160991, "grad_norm": 0.2512223834877677, "learning_rate": 3.1149860226614613e-06, "loss": 0.5031, "loss_nan_ranks": 0, "loss_rank_avg": 0.5007380843162537, "step": 1625, "valid_targets_mean": 3470.5, "valid_targets_min": 217 }, { "epoch": 2.523219814241486, "grad_norm": 0.3141754515576957, "learning_rate": 3.019127956886969e-06, "loss": 0.4917, "loss_nan_ranks": 0, "loss_rank_avg": 0.5128790736198425, "step": 1630, "valid_targets_mean": 2027.0, "valid_targets_min": 249 }, { "epoch": 2.5309597523219813, "grad_norm": 0.2635355892638946, "learning_rate": 2.924647429932228e-06, "loss": 0.4976, "loss_nan_ranks": 0, "loss_rank_avg": 0.48266178369522095, "step": 1635, "valid_targets_mean": 2672.5, "valid_targets_min": 243 }, { "epoch": 2.538699690402477, "grad_norm": 0.2539204593977298, "learning_rate": 2.8315521063384467e-06, "loss": 0.4887, "loss_nan_ranks": 0, "loss_rank_avg": 0.49488547444343567, "step": 1640, "valid_targets_mean": 3140.0, "valid_targets_min": 302 }, { "epoch": 2.5464396284829722, "grad_norm": 0.217726211895958, "learning_rate": 2.739849538275019e-06, "loss": 0.4956, "loss_nan_ranks": 0, "loss_rank_avg": 0.4866185188293457, "step": 1645, "valid_targets_mean": 3984.5, "valid_targets_min": 258 }, { "epoch": 2.5541795665634677, "grad_norm": 0.2320001335465348, "learning_rate": 2.6495471649268757e-06, "loss": 0.4801, "loss_nan_ranks": 0, "loss_rank_avg": 0.463797390460968, "step": 1650, "valid_targets_mean": 3525.9, "valid_targets_min": 242 }, { "epoch": 2.5619195046439627, "grad_norm": 0.2266217849063618, "learning_rate": 2.560652311890981e-06, "loss": 0.4933, "loss_nan_ranks": 0, "loss_rank_avg": 0.47365492582321167, "step": 1655, "valid_targets_mean": 4537.2, "valid_targets_min": 394 }, { "epoch": 2.569659442724458, "grad_norm": 0.5535731219467451, "learning_rate": 2.4731721905820916e-06, "loss": 0.6353, "loss_nan_ranks": 0, "loss_rank_avg": 0.6892400979995728, "step": 1660, "valid_targets_mean": 3944.9, "valid_targets_min": 626 }, { "epoch": 2.5773993808049536, "grad_norm": 0.41074838934685265, "learning_rate": 2.3871138976476815e-06, "loss": 0.6281, "loss_nan_ranks": 0, "loss_rank_avg": 0.7635179162025452, "step": 1665, "valid_targets_mean": 3327.8, "valid_targets_min": 423 }, { "epoch": 2.585139318885449, "grad_norm": 0.2695103381259381, "learning_rate": 2.3024844143923164e-06, "loss": 0.6276, "loss_nan_ranks": 0, "loss_rank_avg": 0.6791110038757324, "step": 1670, "valid_targets_mean": 4198.4, "valid_targets_min": 570 }, { "epoch": 2.5928792569659445, "grad_norm": 0.29028639276044116, "learning_rate": 2.219290606211253e-06, "loss": 0.61, "loss_nan_ranks": 0, "loss_rank_avg": 0.6073037981987, "step": 1675, "valid_targets_mean": 3893.0, "valid_targets_min": 885 }, { "epoch": 2.6006191950464395, "grad_norm": 0.26808292030451497, "learning_rate": 2.137539222033527e-06, "loss": 0.624, "loss_nan_ranks": 0, "loss_rank_avg": 0.6242520809173584, "step": 1680, "valid_targets_mean": 3704.1, "valid_targets_min": 891 }, { "epoch": 2.608359133126935, "grad_norm": 0.25579873742778253, "learning_rate": 2.05723689377445e-06, "loss": 0.6184, "loss_nan_ranks": 0, "loss_rank_avg": 0.5944548845291138, "step": 1685, "valid_targets_mean": 3793.6, "valid_targets_min": 1124 }, { "epoch": 2.6160990712074303, "grad_norm": 0.2680775147249504, "learning_rate": 1.9783901357975987e-06, "loss": 0.6108, "loss_nan_ranks": 0, "loss_rank_avg": 0.5884455442428589, "step": 1690, "valid_targets_mean": 3647.5, "valid_targets_min": 775 }, { "epoch": 2.623839009287926, "grad_norm": 0.2648780300863295, "learning_rate": 1.9010053443863796e-06, "loss": 0.6077, "loss_nan_ranks": 0, "loss_rank_avg": 0.6278525590896606, "step": 1695, "valid_targets_mean": 3801.7, "valid_targets_min": 835 }, { "epoch": 2.6315789473684212, "grad_norm": 0.23985387358779475, "learning_rate": 1.8250887972251096e-06, "loss": 0.6065, "loss_nan_ranks": 0, "loss_rank_avg": 0.5884929895401001, "step": 1700, "valid_targets_mean": 4259.5, "valid_targets_min": 886 }, { "epoch": 2.6393188854489162, "grad_norm": 0.231419199707839, "learning_rate": 1.7506466528897802e-06, "loss": 0.6093, "loss_nan_ranks": 0, "loss_rank_avg": 0.6190042495727539, "step": 1705, "valid_targets_mean": 4118.3, "valid_targets_min": 1310 }, { "epoch": 2.6470588235294117, "grad_norm": 0.22779000268320077, "learning_rate": 1.677684950348435e-06, "loss": 0.6067, "loss_nan_ranks": 0, "loss_rank_avg": 0.5958472490310669, "step": 1710, "valid_targets_mean": 4160.2, "valid_targets_min": 795 }, { "epoch": 2.654798761609907, "grad_norm": 0.22402609358806513, "learning_rate": 1.6062096084712786e-06, "loss": 0.623, "loss_nan_ranks": 0, "loss_rank_avg": 0.5853190422058105, "step": 1715, "valid_targets_mean": 4790.0, "valid_targets_min": 1104 }, { "epoch": 2.6625386996904026, "grad_norm": 0.21928463378918323, "learning_rate": 1.53622642555052e-06, "loss": 0.608, "loss_nan_ranks": 0, "loss_rank_avg": 0.573377251625061, "step": 1720, "valid_targets_mean": 4677.6, "valid_targets_min": 628 }, { "epoch": 2.670278637770898, "grad_norm": 0.22990181538010174, "learning_rate": 1.4677410788299984e-06, "loss": 0.6225, "loss_nan_ranks": 0, "loss_rank_avg": 0.5744858980178833, "step": 1725, "valid_targets_mean": 3947.1, "valid_targets_min": 1088 }, { "epoch": 2.678018575851393, "grad_norm": 0.4076358392577911, "learning_rate": 1.400759124044637e-06, "loss": 0.6184, "loss_nan_ranks": 0, "loss_rank_avg": 0.6449670791625977, "step": 1730, "valid_targets_mean": 2494.4, "valid_targets_min": 400 }, { "epoch": 2.6857585139318885, "grad_norm": 0.5280765848335666, "learning_rate": 1.3352859949697127e-06, "loss": 0.5258, "loss_nan_ranks": 0, "loss_rank_avg": 0.5440828800201416, "step": 1735, "valid_targets_mean": 3803.9, "valid_targets_min": 341 }, { "epoch": 2.693498452012384, "grad_norm": 0.4788888153411866, "learning_rate": 1.2713270029801028e-06, "loss": 0.513, "loss_nan_ranks": 0, "loss_rank_avg": 0.5939984321594238, "step": 1740, "valid_targets_mean": 2460.7, "valid_targets_min": 507 }, { "epoch": 2.7012383900928794, "grad_norm": 0.3173889002826398, "learning_rate": 1.2088873366193687e-06, "loss": 0.5251, "loss_nan_ranks": 0, "loss_rank_avg": 0.5380067229270935, "step": 1745, "valid_targets_mean": 3748.4, "valid_targets_min": 754 }, { "epoch": 2.708978328173375, "grad_norm": 0.3705774365156222, "learning_rate": 1.1479720611788525e-06, "loss": 0.5139, "loss_nan_ranks": 0, "loss_rank_avg": 0.6330641508102417, "step": 1750, "valid_targets_mean": 2391.1, "valid_targets_min": 521 }, { "epoch": 2.71671826625387, "grad_norm": 0.25829421667909225, "learning_rate": 1.0885861182867985e-06, "loss": 0.5036, "loss_nan_ranks": 0, "loss_rank_avg": 0.514169454574585, "step": 1755, "valid_targets_mean": 3659.7, "valid_targets_min": 806 }, { "epoch": 2.7244582043343653, "grad_norm": 0.31912077406959755, "learning_rate": 1.0307343255074187e-06, "loss": 0.505, "loss_nan_ranks": 0, "loss_rank_avg": 0.6068185567855835, "step": 1760, "valid_targets_mean": 3227.0, "valid_targets_min": 450 }, { "epoch": 2.7321981424148607, "grad_norm": 0.25975497335621966, "learning_rate": 9.744213759501275e-07, "loss": 0.5117, "loss_nan_ranks": 0, "loss_rank_avg": 0.5231520533561707, "step": 1765, "valid_targets_mean": 3305.4, "valid_targets_min": 621 }, { "epoch": 2.739938080495356, "grad_norm": 0.26497384483537273, "learning_rate": 9.196518378887975e-07, "loss": 0.505, "loss_nan_ranks": 0, "loss_rank_avg": 0.5685962438583374, "step": 1770, "valid_targets_mean": 3149.7, "valid_targets_min": 462 }, { "epoch": 2.7476780185758516, "grad_norm": 0.2467240160425324, "learning_rate": 8.66430154391169e-07, "loss": 0.4962, "loss_nan_ranks": 0, "loss_rank_avg": 0.5243191123008728, "step": 1775, "valid_targets_mean": 3709.8, "valid_targets_min": 263 }, { "epoch": 2.7554179566563466, "grad_norm": 0.3035163036989275, "learning_rate": 8.147606429584232e-07, "loss": 0.504, "loss_nan_ranks": 0, "loss_rank_avg": 0.5350388288497925, "step": 1780, "valid_targets_mean": 3436.5, "valid_targets_min": 523 }, { "epoch": 2.763157894736842, "grad_norm": 0.21982119897667, "learning_rate": 7.646474951749217e-07, "loss": 0.5127, "loss_nan_ranks": 0, "loss_rank_avg": 0.544601559638977, "step": 1785, "valid_targets_mean": 4309.8, "valid_targets_min": 269 }, { "epoch": 2.7708978328173375, "grad_norm": 0.23378731204836112, "learning_rate": 7.160947763681924e-07, "loss": 0.5041, "loss_nan_ranks": 0, "loss_rank_avg": 0.5182209014892578, "step": 1790, "valid_targets_mean": 3643.2, "valid_targets_min": 373 }, { "epoch": 2.778637770897833, "grad_norm": 0.23122266832381014, "learning_rate": 6.691064252791158e-07, "loss": 0.5044, "loss_nan_ranks": 0, "loss_rank_avg": 0.5215298533439636, "step": 1795, "valid_targets_mean": 3911.8, "valid_targets_min": 1193 }, { "epoch": 2.7863777089783284, "grad_norm": 0.2351100926605, "learning_rate": 6.236862537424194e-07, "loss": 0.497, "loss_nan_ranks": 0, "loss_rank_avg": 0.5257998704910278, "step": 1800, "valid_targets_mean": 3986.0, "valid_targets_min": 1074 }, { "epoch": 2.7941176470588234, "grad_norm": 0.2283488540535019, "learning_rate": 5.798379463774373e-07, "loss": 0.5165, "loss_nan_ranks": 0, "loss_rank_avg": 0.5136227607727051, "step": 1805, "valid_targets_mean": 4058.6, "valid_targets_min": 964 }, { "epoch": 2.801857585139319, "grad_norm": 0.2288831862161014, "learning_rate": 5.375650602892091e-07, "loss": 0.5009, "loss_nan_ranks": 0, "loss_rank_avg": 0.4868626594543457, "step": 1810, "valid_targets_mean": 3589.4, "valid_targets_min": 1066 }, { "epoch": 2.8095975232198143, "grad_norm": 0.23150890542263308, "learning_rate": 4.96871024779928e-07, "loss": 0.5075, "loss_nan_ranks": 0, "loss_rank_avg": 0.5141134262084961, "step": 1815, "valid_targets_mean": 3394.9, "valid_targets_min": 311 }, { "epoch": 2.8173374613003097, "grad_norm": 0.23789582679013196, "learning_rate": 4.5775914107072164e-07, "loss": 0.5791, "loss_nan_ranks": 0, "loss_rank_avg": 0.5422489047050476, "step": 1820, "valid_targets_mean": 3667.1, "valid_targets_min": 855 }, { "epoch": 2.825077399380805, "grad_norm": 0.2424143922969789, "learning_rate": 4.202325820338682e-07, "loss": 0.5051, "loss_nan_ranks": 0, "loss_rank_avg": 0.5248996615409851, "step": 1825, "valid_targets_mean": 3378.0, "valid_targets_min": 549 }, { "epoch": 2.8328173374613, "grad_norm": 0.2641392304157764, "learning_rate": 3.842943919353914e-07, "loss": 0.5944, "loss_nan_ranks": 0, "loss_rank_avg": 0.5383692979812622, "step": 1830, "valid_targets_mean": 2999.8, "valid_targets_min": 301 }, { "epoch": 2.8405572755417956, "grad_norm": 0.23456221052117204, "learning_rate": 3.499474861881069e-07, "loss": 0.5028, "loss_nan_ranks": 0, "loss_rank_avg": 0.5162132978439331, "step": 1835, "valid_targets_mean": 3465.6, "valid_targets_min": 544 }, { "epoch": 2.848297213622291, "grad_norm": 0.3055562941134727, "learning_rate": 3.1719465111511583e-07, "loss": 0.5469, "loss_nan_ranks": 0, "loss_rank_avg": 0.6021124124526978, "step": 1840, "valid_targets_mean": 2587.6, "valid_targets_min": 369 }, { "epoch": 2.8560371517027865, "grad_norm": 0.2502192285084054, "learning_rate": 2.8603854372376117e-07, "loss": 0.4991, "loss_nan_ranks": 0, "loss_rank_avg": 0.5194599032402039, "step": 1845, "valid_targets_mean": 3274.3, "valid_targets_min": 346 }, { "epoch": 2.863777089783282, "grad_norm": 0.3862187176497177, "learning_rate": 2.5648169149009583e-07, "loss": 0.5405, "loss_nan_ranks": 0, "loss_rank_avg": 0.6468001008033752, "step": 1850, "valid_targets_mean": 2213.1, "valid_targets_min": 520 }, { "epoch": 2.871517027863777, "grad_norm": 0.24642548117518157, "learning_rate": 2.2852649215383548e-07, "loss": 0.5021, "loss_nan_ranks": 0, "loss_rank_avg": 0.5029493570327759, "step": 1855, "valid_targets_mean": 3187.1, "valid_targets_min": 454 }, { "epoch": 2.8792569659442724, "grad_norm": 0.6393738585673495, "learning_rate": 2.021752135238564e-07, "loss": 0.5939, "loss_nan_ranks": 0, "loss_rank_avg": 0.9357088804244995, "step": 1860, "valid_targets_mean": 1165.0, "valid_targets_min": 257 }, { "epoch": 2.886996904024768, "grad_norm": 0.24473998134972677, "learning_rate": 1.774299932942136e-07, "loss": 0.4976, "loss_nan_ranks": 0, "loss_rank_avg": 0.4848713278770447, "step": 1865, "valid_targets_mean": 3309.6, "valid_targets_min": 624 }, { "epoch": 2.8947368421052633, "grad_norm": 0.811216041248401, "learning_rate": 1.5429283887073543e-07, "loss": 0.5679, "loss_nan_ranks": 0, "loss_rank_avg": 0.9271347522735596, "step": 1870, "valid_targets_mean": 1005.5, "valid_targets_min": 350 }, { "epoch": 2.9024767801857587, "grad_norm": 0.2348557107715619, "learning_rate": 1.3276562720816677e-07, "loss": 0.5134, "loss_nan_ranks": 0, "loss_rank_avg": 0.4838997721672058, "step": 1875, "valid_targets_mean": 3382.6, "valid_targets_min": 693 }, { "epoch": 2.9102167182662537, "grad_norm": 0.39825490103557576, "learning_rate": 1.1285010465791335e-07, "loss": 0.5182, "loss_nan_ranks": 0, "loss_rank_avg": 0.6444357633590698, "step": 1880, "valid_targets_mean": 1660.4, "valid_targets_min": 441 }, { "epoch": 2.917956656346749, "grad_norm": 0.23174085543587303, "learning_rate": 9.454788682637051e-08, "loss": 0.514, "loss_nan_ranks": 0, "loss_rank_avg": 0.47774291038513184, "step": 1885, "valid_targets_mean": 3356.6, "valid_targets_min": 296 }, { "epoch": 2.9256965944272446, "grad_norm": 0.36941506963780263, "learning_rate": 7.786045844385248e-08, "loss": 0.5142, "loss_nan_ranks": 0, "loss_rank_avg": 0.6007277965545654, "step": 1890, "valid_targets_mean": 2242.9, "valid_targets_min": 530 }, { "epoch": 2.93343653250774, "grad_norm": 1.179204248521706, "learning_rate": 6.27891732441599e-08, "loss": 0.356, "loss_nan_ranks": 0, "loss_rank_avg": 0.3209402561187744, "step": 1895, "valid_targets_mean": 4536.9, "valid_targets_min": 1128 }, { "epoch": 2.9411764705882355, "grad_norm": 1.2129510713833693, "learning_rate": 4.933525385474758e-08, "loss": 0.2926, "loss_nan_ranks": 0, "loss_rank_avg": 0.2939125597476959, "step": 1900, "valid_targets_mean": 4706.6, "valid_targets_min": 957 }, { "epoch": 2.9489164086687305, "grad_norm": 1.1863291012650041, "learning_rate": 3.749979169755502e-08, "loss": 0.2913, "loss_nan_ranks": 0, "loss_rank_avg": 0.26739662885665894, "step": 1905, "valid_targets_mean": 5179.4, "valid_targets_min": 1060 }, { "epoch": 2.956656346749226, "grad_norm": 1.197256521892218, "learning_rate": 2.7283746900454987e-08, "loss": 0.2818, "loss_nan_ranks": 0, "loss_rank_avg": 0.29562872648239136, "step": 1910, "valid_targets_mean": 4519.4, "valid_targets_min": 1164 }, { "epoch": 2.9643962848297214, "grad_norm": 1.3498867410906616, "learning_rate": 1.8687948219371367e-08, "loss": 0.3047, "loss_nan_ranks": 0, "loss_rank_avg": 0.3049085736274719, "step": 1915, "valid_targets_mean": 4008.1, "valid_targets_min": 1280 }, { "epoch": 2.972136222910217, "grad_norm": 1.40473659835793, "learning_rate": 1.171309297104406e-08, "loss": 0.3081, "loss_nan_ranks": 0, "loss_rank_avg": 0.3065996468067169, "step": 1920, "valid_targets_mean": 4820.1, "valid_targets_min": 1337 }, { "epoch": 2.9798761609907123, "grad_norm": 1.3394815925622965, "learning_rate": 6.3597469764675735e-09, "loss": 0.287, "loss_nan_ranks": 0, "loss_rank_avg": 0.27871161699295044, "step": 1925, "valid_targets_mean": 2952.3, "valid_targets_min": 187 }, { "epoch": 2.9876160990712073, "grad_norm": 1.3180578947124486, "learning_rate": 2.6283445149810625e-09, "loss": 0.2763, "loss_nan_ranks": 0, "loss_rank_avg": 0.28097784519195557, "step": 1930, "valid_targets_mean": 2867.5, "valid_targets_min": 1216 }, { "epoch": 2.9953560371517027, "grad_norm": 1.2405589554919676, "learning_rate": 5.191882890454025e-10, "loss": 0.2741, "loss_nan_ranks": 0, "loss_rank_avg": 0.25938713550567627, "step": 1935, "valid_targets_mean": 3687.5, "valid_targets_min": 1785 }, { "epoch": 3.0, "step": 1938, "total_flos": 5686933939290112.0, "train_loss": 0.0, "train_runtime": 5.6192, "train_samples_per_second": 44118.376, "train_steps_per_second": 344.888 } ], "logging_steps": 5, "max_steps": 1938, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 300, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5686933939290112.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }