{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 7.0, "eval_steps": 500, "global_step": 1379, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.025466893039049237, "grad_norm": 12.905935111767764, "learning_rate": 1.1594202898550726e-06, "loss": 0.5549, "loss_nan_ranks": 0, "loss_rank_avg": 0.15574893355369568, "step": 5, "valid_targets_mean": 4775.0, "valid_targets_min": 444 }, { "epoch": 0.050933786078098474, "grad_norm": 10.078738477645182, "learning_rate": 2.6086956521739132e-06, "loss": 0.5226, "loss_nan_ranks": 0, "loss_rank_avg": 0.19069485366344452, "step": 10, "valid_targets_mean": 6629.3, "valid_targets_min": 499 }, { "epoch": 0.07640067911714771, "grad_norm": 5.205060737600816, "learning_rate": 4.057971014492754e-06, "loss": 0.4676, "loss_nan_ranks": 0, "loss_rank_avg": 0.13914033770561218, "step": 15, "valid_targets_mean": 3617.4, "valid_targets_min": 508 }, { "epoch": 0.10186757215619695, "grad_norm": 1.7561630559315866, "learning_rate": 5.507246376811595e-06, "loss": 0.4211, "loss_nan_ranks": 0, "loss_rank_avg": 0.13013756275177002, "step": 20, "valid_targets_mean": 4257.7, "valid_targets_min": 701 }, { "epoch": 0.1273344651952462, "grad_norm": 1.0501819268457813, "learning_rate": 6.956521739130435e-06, "loss": 0.369, "loss_nan_ranks": 0, "loss_rank_avg": 0.11685547232627869, "step": 25, "valid_targets_mean": 4292.6, "valid_targets_min": 518 }, { "epoch": 0.15280135823429541, "grad_norm": 0.8646360383884321, "learning_rate": 8.405797101449275e-06, "loss": 0.3535, "loss_nan_ranks": 0, "loss_rank_avg": 0.136878103017807, "step": 30, "valid_targets_mean": 4800.1, "valid_targets_min": 456 }, { "epoch": 0.17826825127334464, "grad_norm": 0.5897389861417078, "learning_rate": 9.855072463768118e-06, "loss": 0.3545, "loss_nan_ranks": 0, "loss_rank_avg": 0.11823979020118713, "step": 35, "valid_targets_mean": 4771.1, "valid_targets_min": 528 }, { "epoch": 0.2037351443123939, "grad_norm": 0.4060461860301084, "learning_rate": 1.1304347826086957e-05, "loss": 0.3454, "loss_nan_ranks": 0, "loss_rank_avg": 0.11010861396789551, "step": 40, "valid_targets_mean": 5615.3, "valid_targets_min": 513 }, { "epoch": 0.22920203735144312, "grad_norm": 0.34315152890741674, "learning_rate": 1.2753623188405797e-05, "loss": 0.3384, "loss_nan_ranks": 0, "loss_rank_avg": 0.09936510026454926, "step": 45, "valid_targets_mean": 4959.6, "valid_targets_min": 509 }, { "epoch": 0.2546689303904924, "grad_norm": 0.35246800035056547, "learning_rate": 1.420289855072464e-05, "loss": 0.2966, "loss_nan_ranks": 0, "loss_rank_avg": 0.11312542855739594, "step": 50, "valid_targets_mean": 6158.5, "valid_targets_min": 642 }, { "epoch": 0.2801358234295416, "grad_norm": 0.23652896675744703, "learning_rate": 1.565217391304348e-05, "loss": 0.2833, "loss_nan_ranks": 0, "loss_rank_avg": 0.07454854249954224, "step": 55, "valid_targets_mean": 5430.5, "valid_targets_min": 370 }, { "epoch": 0.30560271646859083, "grad_norm": 0.2647481031451876, "learning_rate": 1.710144927536232e-05, "loss": 0.2756, "loss_nan_ranks": 0, "loss_rank_avg": 0.09449119865894318, "step": 60, "valid_targets_mean": 5365.9, "valid_targets_min": 501 }, { "epoch": 0.3310696095076401, "grad_norm": 0.22866704217660094, "learning_rate": 1.8550724637681162e-05, "loss": 0.273, "loss_nan_ranks": 0, "loss_rank_avg": 0.07665856182575226, "step": 65, "valid_targets_mean": 3499.1, "valid_targets_min": 394 }, { "epoch": 0.3565365025466893, "grad_norm": 0.23838754270863646, "learning_rate": 2e-05, "loss": 0.2771, "loss_nan_ranks": 0, "loss_rank_avg": 0.09475560486316681, "step": 70, "valid_targets_mean": 5222.6, "valid_targets_min": 452 }, { "epoch": 0.38200339558573854, "grad_norm": 0.22204236958742846, "learning_rate": 2.1449275362318844e-05, "loss": 0.2772, "loss_nan_ranks": 0, "loss_rank_avg": 0.06597326695919037, "step": 75, "valid_targets_mean": 3930.4, "valid_targets_min": 624 }, { "epoch": 0.4074702886247878, "grad_norm": 0.21916062266130582, "learning_rate": 2.2898550724637684e-05, "loss": 0.2525, "loss_nan_ranks": 0, "loss_rank_avg": 0.08288627862930298, "step": 80, "valid_targets_mean": 5469.0, "valid_targets_min": 524 }, { "epoch": 0.432937181663837, "grad_norm": 0.21084610995408531, "learning_rate": 2.4347826086956526e-05, "loss": 0.2461, "loss_nan_ranks": 0, "loss_rank_avg": 0.07836160808801651, "step": 85, "valid_targets_mean": 4980.2, "valid_targets_min": 371 }, { "epoch": 0.45840407470288624, "grad_norm": 0.24877805391611285, "learning_rate": 2.5797101449275362e-05, "loss": 0.244, "loss_nan_ranks": 0, "loss_rank_avg": 0.10522396862506866, "step": 90, "valid_targets_mean": 5283.2, "valid_targets_min": 448 }, { "epoch": 0.4838709677419355, "grad_norm": 0.20293051375960244, "learning_rate": 2.7246376811594205e-05, "loss": 0.2435, "loss_nan_ranks": 0, "loss_rank_avg": 0.0722227692604065, "step": 95, "valid_targets_mean": 5206.1, "valid_targets_min": 688 }, { "epoch": 0.5093378607809848, "grad_norm": 0.20540839975282746, "learning_rate": 2.8695652173913044e-05, "loss": 0.2406, "loss_nan_ranks": 0, "loss_rank_avg": 0.0783793106675148, "step": 100, "valid_targets_mean": 5392.8, "valid_targets_min": 597 }, { "epoch": 0.534804753820034, "grad_norm": 0.22119488094876025, "learning_rate": 3.0144927536231887e-05, "loss": 0.2289, "loss_nan_ranks": 0, "loss_rank_avg": 0.07414476573467255, "step": 105, "valid_targets_mean": 6400.4, "valid_targets_min": 420 }, { "epoch": 0.5602716468590832, "grad_norm": 0.33622198716688095, "learning_rate": 3.1594202898550726e-05, "loss": 0.2146, "loss_nan_ranks": 0, "loss_rank_avg": 0.07343263179063797, "step": 110, "valid_targets_mean": 5583.2, "valid_targets_min": 502 }, { "epoch": 0.5857385398981324, "grad_norm": 0.18463308515750862, "learning_rate": 3.304347826086957e-05, "loss": 0.2199, "loss_nan_ranks": 0, "loss_rank_avg": 0.06492072343826294, "step": 115, "valid_targets_mean": 5075.7, "valid_targets_min": 365 }, { "epoch": 0.6112054329371817, "grad_norm": 0.20390681903894134, "learning_rate": 3.449275362318841e-05, "loss": 0.2231, "loss_nan_ranks": 0, "loss_rank_avg": 0.05670241639018059, "step": 120, "valid_targets_mean": 4689.9, "valid_targets_min": 93 }, { "epoch": 0.6366723259762309, "grad_norm": 0.24639858051715774, "learning_rate": 3.594202898550725e-05, "loss": 0.2184, "loss_nan_ranks": 0, "loss_rank_avg": 0.06681576371192932, "step": 125, "valid_targets_mean": 4932.8, "valid_targets_min": 343 }, { "epoch": 0.6621392190152802, "grad_norm": 0.23628475300889795, "learning_rate": 3.739130434782609e-05, "loss": 0.214, "loss_nan_ranks": 0, "loss_rank_avg": 0.08704615384340286, "step": 130, "valid_targets_mean": 6058.8, "valid_targets_min": 256 }, { "epoch": 0.6876061120543294, "grad_norm": 0.22646525736855452, "learning_rate": 3.884057971014493e-05, "loss": 0.2103, "loss_nan_ranks": 0, "loss_rank_avg": 0.06430119276046753, "step": 135, "valid_targets_mean": 5760.0, "valid_targets_min": 615 }, { "epoch": 0.7130730050933786, "grad_norm": 0.23100345627656083, "learning_rate": 3.999993591506466e-05, "loss": 0.2006, "loss_nan_ranks": 0, "loss_rank_avg": 0.06479993462562561, "step": 140, "valid_targets_mean": 5392.5, "valid_targets_min": 442 }, { "epoch": 0.7385398981324278, "grad_norm": 0.19049974356747768, "learning_rate": 3.99976929854497e-05, "loss": 0.2006, "loss_nan_ranks": 0, "loss_rank_avg": 0.056917525827884674, "step": 145, "valid_targets_mean": 6286.8, "valid_targets_min": 556 }, { "epoch": 0.7640067911714771, "grad_norm": 0.191970251151516, "learning_rate": 3.999224621974382e-05, "loss": 0.2025, "loss_nan_ranks": 0, "loss_rank_avg": 0.0581885427236557, "step": 150, "valid_targets_mean": 4557.8, "valid_targets_min": 525 }, { "epoch": 0.7894736842105263, "grad_norm": 0.18997461349835917, "learning_rate": 3.9983596490574876e-05, "loss": 0.2052, "loss_nan_ranks": 0, "loss_rank_avg": 0.05231058597564697, "step": 155, "valid_targets_mean": 5922.7, "valid_targets_min": 607 }, { "epoch": 0.8149405772495756, "grad_norm": 0.24055282524770882, "learning_rate": 3.9971745183718484e-05, "loss": 0.2226, "loss_nan_ranks": 0, "loss_rank_avg": 0.06692491471767426, "step": 160, "valid_targets_mean": 5274.8, "valid_targets_min": 585 }, { "epoch": 0.8404074702886248, "grad_norm": 0.1942999906232565, "learning_rate": 3.995669419787586e-05, "loss": 0.2102, "loss_nan_ranks": 0, "loss_rank_avg": 0.05607856065034866, "step": 165, "valid_targets_mean": 5621.2, "valid_targets_min": 406 }, { "epoch": 0.865874363327674, "grad_norm": 0.22362655428605688, "learning_rate": 3.9938445944369745e-05, "loss": 0.2116, "loss_nan_ranks": 0, "loss_rank_avg": 0.08082179725170135, "step": 170, "valid_targets_mean": 5157.8, "valid_targets_min": 500 }, { "epoch": 0.8913412563667232, "grad_norm": 0.21350382917499386, "learning_rate": 3.9917003346758035e-05, "loss": 0.1987, "loss_nan_ranks": 0, "loss_rank_avg": 0.060487180948257446, "step": 175, "valid_targets_mean": 5434.3, "valid_targets_min": 424 }, { "epoch": 0.9168081494057725, "grad_norm": 0.20059728021230971, "learning_rate": 3.989236984036541e-05, "loss": 0.1858, "loss_nan_ranks": 0, "loss_rank_avg": 0.056000277400016785, "step": 180, "valid_targets_mean": 4637.7, "valid_targets_min": 328 }, { "epoch": 0.9422750424448217, "grad_norm": 0.21478048535292446, "learning_rate": 3.986454937173292e-05, "loss": 0.2145, "loss_nan_ranks": 0, "loss_rank_avg": 0.05570094287395477, "step": 185, "valid_targets_mean": 4192.4, "valid_targets_min": 418 }, { "epoch": 0.967741935483871, "grad_norm": 0.1847053866044272, "learning_rate": 3.98335463979858e-05, "loss": 0.1915, "loss_nan_ranks": 0, "loss_rank_avg": 0.054768189787864685, "step": 190, "valid_targets_mean": 7047.9, "valid_targets_min": 560 }, { "epoch": 0.9932088285229203, "grad_norm": 0.23251181066885263, "learning_rate": 3.9799365886119304e-05, "loss": 0.1988, "loss_nan_ranks": 0, "loss_rank_avg": 0.06877763569355011, "step": 195, "valid_targets_mean": 5928.5, "valid_targets_min": 339 }, { "epoch": 1.0152801358234296, "grad_norm": 0.2716010848630139, "learning_rate": 3.976201331220296e-05, "loss": 0.1986, "loss_nan_ranks": 0, "loss_rank_avg": 0.07890613377094269, "step": 200, "valid_targets_mean": 4808.2, "valid_targets_min": 442 }, { "epoch": 1.0407470288624787, "grad_norm": 0.25684628959193206, "learning_rate": 3.9721494660503295e-05, "loss": 0.1903, "loss_nan_ranks": 0, "loss_rank_avg": 0.06097788363695145, "step": 205, "valid_targets_mean": 5266.6, "valid_targets_min": 312 }, { "epoch": 1.066213921901528, "grad_norm": 0.24425370965885543, "learning_rate": 3.9677816422525024e-05, "loss": 0.19, "loss_nan_ranks": 0, "loss_rank_avg": 0.07912104576826096, "step": 210, "valid_targets_mean": 4408.8, "valid_targets_min": 314 }, { "epoch": 1.0916808149405772, "grad_norm": 0.2002845335399574, "learning_rate": 3.963098559597112e-05, "loss": 0.1898, "loss_nan_ranks": 0, "loss_rank_avg": 0.05172646418213844, "step": 215, "valid_targets_mean": 5689.8, "valid_targets_min": 623 }, { "epoch": 1.1171477079796266, "grad_norm": 0.20284356911889379, "learning_rate": 3.9581009683621634e-05, "loss": 0.1873, "loss_nan_ranks": 0, "loss_rank_avg": 0.05965813249349594, "step": 220, "valid_targets_mean": 5551.1, "valid_targets_min": 488 }, { "epoch": 1.1426146010186757, "grad_norm": 0.2138271050816797, "learning_rate": 3.952789669213173e-05, "loss": 0.2014, "loss_nan_ranks": 0, "loss_rank_avg": 0.07276459038257599, "step": 225, "valid_targets_mean": 5722.6, "valid_targets_min": 469 }, { "epoch": 1.1680814940577249, "grad_norm": 0.2039562273306318, "learning_rate": 3.9471655130748894e-05, "loss": 0.1894, "loss_nan_ranks": 0, "loss_rank_avg": 0.04895240440964699, "step": 230, "valid_targets_mean": 5333.4, "valid_targets_min": 755 }, { "epoch": 1.1935483870967742, "grad_norm": 0.22459112995344713, "learning_rate": 3.9412294009949716e-05, "loss": 0.1838, "loss_nan_ranks": 0, "loss_rank_avg": 0.057704657316207886, "step": 235, "valid_targets_mean": 4390.5, "valid_targets_min": 508 }, { "epoch": 1.2190152801358234, "grad_norm": 0.22320904545680997, "learning_rate": 3.9349822839996266e-05, "loss": 0.1908, "loss_nan_ranks": 0, "loss_rank_avg": 0.05154874920845032, "step": 240, "valid_targets_mean": 5963.7, "valid_targets_min": 443 }, { "epoch": 1.2444821731748728, "grad_norm": 0.21734471823901905, "learning_rate": 3.928425162941248e-05, "loss": 0.1936, "loss_nan_ranks": 0, "loss_rank_avg": 0.056830763816833496, "step": 245, "valid_targets_mean": 4469.3, "valid_targets_min": 573 }, { "epoch": 1.269949066213922, "grad_norm": 0.22044616993174154, "learning_rate": 3.9215590883380687e-05, "loss": 0.187, "loss_nan_ranks": 0, "loss_rank_avg": 0.04839012026786804, "step": 250, "valid_targets_mean": 3393.2, "valid_targets_min": 463 }, { "epoch": 1.295415959252971, "grad_norm": 0.19907685552815926, "learning_rate": 3.914385160205858e-05, "loss": 0.189, "loss_nan_ranks": 0, "loss_rank_avg": 0.06223241984844208, "step": 255, "valid_targets_mean": 7150.2, "valid_targets_min": 698 }, { "epoch": 1.3208828522920204, "grad_norm": 0.20354883472054425, "learning_rate": 3.9069045278816844e-05, "loss": 0.1797, "loss_nan_ranks": 0, "loss_rank_avg": 0.054079022258520126, "step": 260, "valid_targets_mean": 4868.2, "valid_targets_min": 550 }, { "epoch": 1.3463497453310695, "grad_norm": 0.20367423810310317, "learning_rate": 3.899118389839785e-05, "loss": 0.1756, "loss_nan_ranks": 0, "loss_rank_avg": 0.05707092210650444, "step": 265, "valid_targets_mean": 6063.3, "valid_targets_min": 356 }, { "epoch": 1.371816638370119, "grad_norm": 0.2388271841451862, "learning_rate": 3.8910279934995545e-05, "loss": 0.1889, "loss_nan_ranks": 0, "loss_rank_avg": 0.06782951951026917, "step": 270, "valid_targets_mean": 4558.4, "valid_targets_min": 338 }, { "epoch": 1.397283531409168, "grad_norm": 0.18588514404156858, "learning_rate": 3.8826346350256943e-05, "loss": 0.1793, "loss_nan_ranks": 0, "loss_rank_avg": 0.047811634838581085, "step": 275, "valid_targets_mean": 5856.0, "valid_targets_min": 595 }, { "epoch": 1.4227504244482172, "grad_norm": 0.22939026820955755, "learning_rate": 3.873939659120558e-05, "loss": 0.1849, "loss_nan_ranks": 0, "loss_rank_avg": 0.05040040612220764, "step": 280, "valid_targets_mean": 3587.8, "valid_targets_min": 628 }, { "epoch": 1.4482173174872666, "grad_norm": 0.248321484069037, "learning_rate": 3.864944458808712e-05, "loss": 0.1904, "loss_nan_ranks": 0, "loss_rank_avg": 0.07024089246988297, "step": 285, "valid_targets_mean": 5318.2, "valid_targets_min": 331 }, { "epoch": 1.4736842105263157, "grad_norm": 0.19531700185276812, "learning_rate": 3.855650475213761e-05, "loss": 0.1947, "loss_nan_ranks": 0, "loss_rank_avg": 0.05222306028008461, "step": 290, "valid_targets_mean": 4420.7, "valid_targets_min": 84 }, { "epoch": 1.499151103565365, "grad_norm": 0.21409213927029386, "learning_rate": 3.846059197327466e-05, "loss": 0.191, "loss_nan_ranks": 0, "loss_rank_avg": 0.07738282531499863, "step": 295, "valid_targets_mean": 5504.2, "valid_targets_min": 832 }, { "epoch": 1.5246179966044142, "grad_norm": 0.2727937403888451, "learning_rate": 3.836172161771189e-05, "loss": 0.2016, "loss_nan_ranks": 0, "loss_rank_avg": 0.07301558554172516, "step": 300, "valid_targets_mean": 4146.5, "valid_targets_min": 510 }, { "epoch": 1.5500848896434634, "grad_norm": 0.27591933946670505, "learning_rate": 3.8259909525497134e-05, "loss": 0.184, "loss_nan_ranks": 0, "loss_rank_avg": 0.07577518373727798, "step": 305, "valid_targets_mean": 5413.3, "valid_targets_min": 524 }, { "epoch": 1.5755517826825127, "grad_norm": 0.19788814829959928, "learning_rate": 3.81551720079747e-05, "loss": 0.1787, "loss_nan_ranks": 0, "loss_rank_avg": 0.046708859503269196, "step": 310, "valid_targets_mean": 5426.9, "valid_targets_min": 358 }, { "epoch": 1.601018675721562, "grad_norm": 0.1871223645202415, "learning_rate": 3.8047525845172104e-05, "loss": 0.186, "loss_nan_ranks": 0, "loss_rank_avg": 0.058847323060035706, "step": 315, "valid_targets_mean": 7743.5, "valid_targets_min": 503 }, { "epoch": 1.6264855687606112, "grad_norm": 0.22652771363762644, "learning_rate": 3.7936988283111764e-05, "loss": 0.1935, "loss_nan_ranks": 0, "loss_rank_avg": 0.0625789687037468, "step": 320, "valid_targets_mean": 5233.7, "valid_targets_min": 482 }, { "epoch": 1.6519524617996604, "grad_norm": 0.1827154465048512, "learning_rate": 3.7823577031048e-05, "loss": 0.1704, "loss_nan_ranks": 0, "loss_rank_avg": 0.0553651861846447, "step": 325, "valid_targets_mean": 6211.6, "valid_targets_min": 517 }, { "epoch": 1.6774193548387095, "grad_norm": 0.22217593165464103, "learning_rate": 3.77073102586298e-05, "loss": 0.1836, "loss_nan_ranks": 0, "loss_rank_avg": 0.0627608448266983, "step": 330, "valid_targets_mean": 4552.9, "valid_targets_min": 277 }, { "epoch": 1.7028862478777589, "grad_norm": 0.22270346306202596, "learning_rate": 3.758820659298991e-05, "loss": 0.1759, "loss_nan_ranks": 0, "loss_rank_avg": 0.05124921351671219, "step": 335, "valid_targets_mean": 4196.0, "valid_targets_min": 326 }, { "epoch": 1.7283531409168083, "grad_norm": 0.21105427568746413, "learning_rate": 3.746628511576054e-05, "loss": 0.1921, "loss_nan_ranks": 0, "loss_rank_avg": 0.054783932864665985, "step": 340, "valid_targets_mean": 4825.3, "valid_targets_min": 377 }, { "epoch": 1.7538200339558574, "grad_norm": 0.20812248067096134, "learning_rate": 3.734156536001629e-05, "loss": 0.1864, "loss_nan_ranks": 0, "loss_rank_avg": 0.06411299854516983, "step": 345, "valid_targets_mean": 5575.1, "valid_targets_min": 650 }, { "epoch": 1.7792869269949065, "grad_norm": 0.21613044021886657, "learning_rate": 3.721406730714476e-05, "loss": 0.1777, "loss_nan_ranks": 0, "loss_rank_avg": 0.0606655478477478, "step": 350, "valid_targets_mean": 4606.2, "valid_targets_min": 528 }, { "epoch": 1.804753820033956, "grad_norm": 0.1915904424922132, "learning_rate": 3.7083811383645334e-05, "loss": 0.1692, "loss_nan_ranks": 0, "loss_rank_avg": 0.05197008326649666, "step": 355, "valid_targets_mean": 4822.4, "valid_targets_min": 410 }, { "epoch": 1.830220713073005, "grad_norm": 0.23345796645494707, "learning_rate": 3.695081845785663e-05, "loss": 0.1681, "loss_nan_ranks": 0, "loss_rank_avg": 0.04420693591237068, "step": 360, "valid_targets_mean": 5001.8, "valid_targets_min": 557 }, { "epoch": 1.8556876061120544, "grad_norm": 0.23815063812250117, "learning_rate": 3.6815109836613165e-05, "loss": 0.1717, "loss_nan_ranks": 0, "loss_rank_avg": 0.054919321089982986, "step": 365, "valid_targets_mean": 4973.3, "valid_targets_min": 365 }, { "epoch": 1.8811544991511036, "grad_norm": 0.2468495850612204, "learning_rate": 3.6676707261831836e-05, "loss": 0.1862, "loss_nan_ranks": 0, "loss_rank_avg": 0.06972532719373703, "step": 370, "valid_targets_mean": 4836.3, "valid_targets_min": 355 }, { "epoch": 1.9066213921901527, "grad_norm": 0.1957634434807587, "learning_rate": 3.6535632907028566e-05, "loss": 0.1772, "loss_nan_ranks": 0, "loss_rank_avg": 0.05477672815322876, "step": 375, "valid_targets_mean": 5473.1, "valid_targets_min": 490 }, { "epoch": 1.932088285229202, "grad_norm": 0.22151494928458712, "learning_rate": 3.6391909373765944e-05, "loss": 0.1827, "loss_nan_ranks": 0, "loss_rank_avg": 0.06339579820632935, "step": 380, "valid_targets_mean": 5057.0, "valid_targets_min": 398 }, { "epoch": 1.9575551782682514, "grad_norm": 0.23166366744975464, "learning_rate": 3.6245559688032176e-05, "loss": 0.1798, "loss_nan_ranks": 0, "loss_rank_avg": 0.06725640594959259, "step": 385, "valid_targets_mean": 5779.2, "valid_targets_min": 530 }, { "epoch": 1.9830220713073006, "grad_norm": 0.20716401844004403, "learning_rate": 3.609660729655212e-05, "loss": 0.1786, "loss_nan_ranks": 0, "loss_rank_avg": 0.05772645026445389, "step": 390, "valid_targets_mean": 5280.8, "valid_targets_min": 337 }, { "epoch": 2.00509337860781, "grad_norm": 0.20967807986867762, "learning_rate": 3.5945076063030835e-05, "loss": 0.1776, "loss_nan_ranks": 0, "loss_rank_avg": 0.04859248921275139, "step": 395, "valid_targets_mean": 4550.8, "valid_targets_min": 649 }, { "epoch": 2.030560271646859, "grad_norm": 0.20081072423556587, "learning_rate": 3.579099026433044e-05, "loss": 0.1573, "loss_nan_ranks": 0, "loss_rank_avg": 0.047822657972574234, "step": 400, "valid_targets_mean": 4847.1, "valid_targets_min": 649 }, { "epoch": 2.0560271646859083, "grad_norm": 0.179351052611451, "learning_rate": 3.563437458658064e-05, "loss": 0.179, "loss_nan_ranks": 0, "loss_rank_avg": 0.046782758086919785, "step": 405, "valid_targets_mean": 6659.4, "valid_targets_min": 438 }, { "epoch": 2.0814940577249574, "grad_norm": 0.19813059770710748, "learning_rate": 3.547525412122378e-05, "loss": 0.1613, "loss_nan_ranks": 0, "loss_rank_avg": 0.062020085752010345, "step": 410, "valid_targets_mean": 5190.2, "valid_targets_min": 557 }, { "epoch": 2.1069609507640066, "grad_norm": 0.19987083948716222, "learning_rate": 3.531365436099497e-05, "loss": 0.1629, "loss_nan_ranks": 0, "loss_rank_avg": 0.05440008267760277, "step": 415, "valid_targets_mean": 4833.9, "valid_targets_min": 287 }, { "epoch": 2.132427843803056, "grad_norm": 0.22185874964663296, "learning_rate": 3.5149601195837815e-05, "loss": 0.1721, "loss_nan_ranks": 0, "loss_rank_avg": 0.05104801058769226, "step": 420, "valid_targets_mean": 5238.0, "valid_targets_min": 175 }, { "epoch": 2.1578947368421053, "grad_norm": 0.20211384951819653, "learning_rate": 3.498312090875667e-05, "loss": 0.1619, "loss_nan_ranks": 0, "loss_rank_avg": 0.04737398028373718, "step": 425, "valid_targets_mean": 5755.9, "valid_targets_min": 561 }, { "epoch": 2.1833616298811545, "grad_norm": 0.18791093618983779, "learning_rate": 3.481424017160574e-05, "loss": 0.1626, "loss_nan_ranks": 0, "loss_rank_avg": 0.05240146815776825, "step": 430, "valid_targets_mean": 5847.5, "valid_targets_min": 360 }, { "epoch": 2.2088285229202036, "grad_norm": 0.18568470911214985, "learning_rate": 3.464298604081607e-05, "loss": 0.1725, "loss_nan_ranks": 0, "loss_rank_avg": 0.04802415519952774, "step": 435, "valid_targets_mean": 5056.2, "valid_targets_min": 298 }, { "epoch": 2.234295415959253, "grad_norm": 0.22781637961543907, "learning_rate": 3.4469385953060715e-05, "loss": 0.1793, "loss_nan_ranks": 0, "loss_rank_avg": 0.04768109694123268, "step": 440, "valid_targets_mean": 4325.7, "valid_targets_min": 120 }, { "epoch": 2.2597623089983023, "grad_norm": 0.2397152235569167, "learning_rate": 3.429346772085923e-05, "loss": 0.1667, "loss_nan_ranks": 0, "loss_rank_avg": 0.05326157063245773, "step": 445, "valid_targets_mean": 3865.7, "valid_targets_min": 220 }, { "epoch": 2.2852292020373515, "grad_norm": 0.20489042679339467, "learning_rate": 3.4115259528121685e-05, "loss": 0.1708, "loss_nan_ranks": 0, "loss_rank_avg": 0.057636670768260956, "step": 450, "valid_targets_mean": 6207.6, "valid_targets_min": 530 }, { "epoch": 2.3106960950764006, "grad_norm": 0.21677251280545407, "learning_rate": 3.3934789925633426e-05, "loss": 0.1723, "loss_nan_ranks": 0, "loss_rank_avg": 0.055631302297115326, "step": 455, "valid_targets_mean": 4781.9, "valid_targets_min": 544 }, { "epoch": 2.3361629881154498, "grad_norm": 0.197104867431473, "learning_rate": 3.37520878264809e-05, "loss": 0.1703, "loss_nan_ranks": 0, "loss_rank_avg": 0.05575048178434372, "step": 460, "valid_targets_mean": 3780.2, "valid_targets_min": 440 }, { "epoch": 2.3616298811544993, "grad_norm": 0.19119683771865437, "learning_rate": 3.356718250141945e-05, "loss": 0.1763, "loss_nan_ranks": 0, "loss_rank_avg": 0.04281400889158249, "step": 465, "valid_targets_mean": 4780.9, "valid_targets_min": 648 }, { "epoch": 2.3870967741935485, "grad_norm": 0.24228470287917536, "learning_rate": 3.33801035741839e-05, "loss": 0.1786, "loss_nan_ranks": 0, "loss_rank_avg": 0.06536433100700378, "step": 470, "valid_targets_mean": 6222.0, "valid_targets_min": 611 }, { "epoch": 2.4125636672325976, "grad_norm": 0.22050849754803323, "learning_rate": 3.3190881016742476e-05, "loss": 0.1771, "loss_nan_ranks": 0, "loss_rank_avg": 0.06315451860427856, "step": 475, "valid_targets_mean": 5316.1, "valid_targets_min": 490 }, { "epoch": 2.4380305602716468, "grad_norm": 0.2382957140570959, "learning_rate": 3.2999545144495037e-05, "loss": 0.179, "loss_nan_ranks": 0, "loss_rank_avg": 0.07954886555671692, "step": 480, "valid_targets_mean": 4862.7, "valid_targets_min": 679 }, { "epoch": 2.463497453310696, "grad_norm": 0.23609092463828943, "learning_rate": 3.280612661141615e-05, "loss": 0.1848, "loss_nan_ranks": 0, "loss_rank_avg": 0.05267144739627838, "step": 485, "valid_targets_mean": 5179.8, "valid_targets_min": 666 }, { "epoch": 2.4889643463497455, "grad_norm": 0.18759948079571875, "learning_rate": 3.2610656405144155e-05, "loss": 0.163, "loss_nan_ranks": 0, "loss_rank_avg": 0.06271520256996155, "step": 490, "valid_targets_mean": 6420.2, "valid_targets_min": 545 }, { "epoch": 2.5144312393887946, "grad_norm": 0.20855216144633196, "learning_rate": 3.241316584201647e-05, "loss": 0.1679, "loss_nan_ranks": 0, "loss_rank_avg": 0.06124936416745186, "step": 495, "valid_targets_mean": 5141.9, "valid_targets_min": 502 }, { "epoch": 2.539898132427844, "grad_norm": 0.21246755645053164, "learning_rate": 3.2213686562052474e-05, "loss": 0.1772, "loss_nan_ranks": 0, "loss_rank_avg": 0.0753287523984909, "step": 500, "valid_targets_mean": 5840.4, "valid_targets_min": 338 }, { "epoch": 2.565365025466893, "grad_norm": 0.2115360085430234, "learning_rate": 3.201225052388446e-05, "loss": 0.1655, "loss_nan_ranks": 0, "loss_rank_avg": 0.0660819411277771, "step": 505, "valid_targets_mean": 5808.2, "valid_targets_min": 332 }, { "epoch": 2.590831918505942, "grad_norm": 0.18198237656841157, "learning_rate": 3.1808889999637496e-05, "loss": 0.1792, "loss_nan_ranks": 0, "loss_rank_avg": 0.042239751666784286, "step": 510, "valid_targets_mean": 5342.5, "valid_targets_min": 756 }, { "epoch": 2.6162988115449917, "grad_norm": 0.21647629339184596, "learning_rate": 3.16036375697591e-05, "loss": 0.1689, "loss_nan_ranks": 0, "loss_rank_avg": 0.06771990656852722, "step": 515, "valid_targets_mean": 5934.7, "valid_targets_min": 666 }, { "epoch": 2.641765704584041, "grad_norm": 0.20787346398933795, "learning_rate": 3.1396526117799557e-05, "loss": 0.1708, "loss_nan_ranks": 0, "loss_rank_avg": 0.05229032784700394, "step": 520, "valid_targets_mean": 4764.7, "valid_targets_min": 182 }, { "epoch": 2.66723259762309, "grad_norm": 0.22738252585567634, "learning_rate": 3.1187588825143596e-05, "loss": 0.1764, "loss_nan_ranks": 0, "loss_rank_avg": 0.06814859062433243, "step": 525, "valid_targets_mean": 4228.3, "valid_targets_min": 354 }, { "epoch": 2.692699490662139, "grad_norm": 0.18956065067088942, "learning_rate": 3.097685916569439e-05, "loss": 0.1708, "loss_nan_ranks": 0, "loss_rank_avg": 0.05186791345477104, "step": 530, "valid_targets_mean": 5616.5, "valid_targets_min": 502 }, { "epoch": 2.7181663837011882, "grad_norm": 0.1673599198641445, "learning_rate": 3.076437090051073e-05, "loss": 0.1626, "loss_nan_ranks": 0, "loss_rank_avg": 0.03805826976895332, "step": 535, "valid_targets_mean": 5770.4, "valid_targets_min": 472 }, { "epoch": 2.743633276740238, "grad_norm": 0.2047431050311338, "learning_rate": 3.0550158072398125e-05, "loss": 0.1665, "loss_nan_ranks": 0, "loss_rank_avg": 0.056432489305734634, "step": 540, "valid_targets_mean": 5859.8, "valid_targets_min": 681 }, { "epoch": 2.769100169779287, "grad_norm": 0.19152278114020901, "learning_rate": 3.0334255000454795e-05, "loss": 0.166, "loss_nan_ranks": 0, "loss_rank_avg": 0.04708458483219147, "step": 545, "valid_targets_mean": 5319.8, "valid_targets_min": 422 }, { "epoch": 2.794567062818336, "grad_norm": 0.18664751337731664, "learning_rate": 3.011669627457341e-05, "loss": 0.1636, "loss_nan_ranks": 0, "loss_rank_avg": 0.05980811268091202, "step": 550, "valid_targets_mean": 7653.7, "valid_targets_min": 746 }, { "epoch": 2.8200339558573853, "grad_norm": 0.1998549268481176, "learning_rate": 2.989751674989943e-05, "loss": 0.1667, "loss_nan_ranks": 0, "loss_rank_avg": 0.05653294920921326, "step": 555, "valid_targets_mean": 4836.5, "valid_targets_min": 388 }, { "epoch": 2.8455008488964344, "grad_norm": 0.21026299154262526, "learning_rate": 2.967675154124696e-05, "loss": 0.1599, "loss_nan_ranks": 0, "loss_rank_avg": 0.04772374406456947, "step": 560, "valid_targets_mean": 4973.8, "valid_targets_min": 120 }, { "epoch": 2.870967741935484, "grad_norm": 0.17304228310969938, "learning_rate": 2.945443601747297e-05, "loss": 0.1702, "loss_nan_ranks": 0, "loss_rank_avg": 0.05010491609573364, "step": 565, "valid_targets_mean": 6293.7, "valid_targets_min": 410 }, { "epoch": 2.896434634974533, "grad_norm": 0.20564622695902693, "learning_rate": 2.923060579581087e-05, "loss": 0.1734, "loss_nan_ranks": 0, "loss_rank_avg": 0.06477466225624084, "step": 570, "valid_targets_mean": 5248.3, "valid_targets_min": 507 }, { "epoch": 2.9219015280135823, "grad_norm": 0.21682003672674044, "learning_rate": 2.9005296736164246e-05, "loss": 0.1653, "loss_nan_ranks": 0, "loss_rank_avg": 0.04603603109717369, "step": 575, "valid_targets_mean": 3767.2, "valid_targets_min": 278 }, { "epoch": 2.9473684210526314, "grad_norm": 0.2036828766728075, "learning_rate": 2.8778544935361742e-05, "loss": 0.1627, "loss_nan_ranks": 0, "loss_rank_avg": 0.05513259023427963, "step": 580, "valid_targets_mean": 5203.5, "valid_targets_min": 404 }, { "epoch": 2.9728353140916806, "grad_norm": 0.1849428246148903, "learning_rate": 2.855038672137396e-05, "loss": 0.1641, "loss_nan_ranks": 0, "loss_rank_avg": 0.05258062481880188, "step": 585, "valid_targets_mean": 4857.2, "valid_targets_min": 596 }, { "epoch": 2.99830220713073, "grad_norm": 0.21395815338632157, "learning_rate": 2.8320858647493374e-05, "loss": 0.158, "loss_nan_ranks": 0, "loss_rank_avg": 0.06454746425151825, "step": 590, "valid_targets_mean": 4870.2, "valid_targets_min": 378 }, { "epoch": 3.0203735144312396, "grad_norm": 0.21179059991069732, "learning_rate": 2.8089997486478102e-05, "loss": 0.1619, "loss_nan_ranks": 0, "loss_rank_avg": 0.057160839438438416, "step": 595, "valid_targets_mean": 5278.4, "valid_targets_min": 633 }, { "epoch": 3.0458404074702887, "grad_norm": 0.2098482991745256, "learning_rate": 2.785784022466053e-05, "loss": 0.1598, "loss_nan_ranks": 0, "loss_rank_avg": 0.0546451210975647, "step": 600, "valid_targets_mean": 4272.8, "valid_targets_min": 584 }, { "epoch": 3.071307300509338, "grad_norm": 0.19354015406908995, "learning_rate": 2.7624424056021707e-05, "loss": 0.158, "loss_nan_ranks": 0, "loss_rank_avg": 0.048149678856134415, "step": 605, "valid_targets_mean": 4952.3, "valid_targets_min": 255 }, { "epoch": 3.096774193548387, "grad_norm": 0.21754581543034396, "learning_rate": 2.738978637623252e-05, "loss": 0.1726, "loss_nan_ranks": 0, "loss_rank_avg": 0.06529363989830017, "step": 610, "valid_targets_mean": 4998.4, "valid_targets_min": 689 }, { "epoch": 3.122241086587436, "grad_norm": 0.20888242176073119, "learning_rate": 2.7153964776662517e-05, "loss": 0.1671, "loss_nan_ranks": 0, "loss_rank_avg": 0.05744210258126259, "step": 615, "valid_targets_mean": 4900.2, "valid_targets_min": 475 }, { "epoch": 3.1477079796264857, "grad_norm": 0.19295107145038598, "learning_rate": 2.691699703835733e-05, "loss": 0.1692, "loss_nan_ranks": 0, "loss_rank_avg": 0.05718826502561569, "step": 620, "valid_targets_mean": 5434.8, "valid_targets_min": 588 }, { "epoch": 3.173174872665535, "grad_norm": 0.2137305552260263, "learning_rate": 2.6678921125985845e-05, "loss": 0.159, "loss_nan_ranks": 0, "loss_rank_avg": 0.072231724858284, "step": 625, "valid_targets_mean": 6674.1, "valid_targets_min": 550 }, { "epoch": 3.198641765704584, "grad_norm": 0.2514949379597044, "learning_rate": 2.6439775181757806e-05, "loss": 0.1694, "loss_nan_ranks": 0, "loss_rank_avg": 0.06704398989677429, "step": 630, "valid_targets_mean": 4067.2, "valid_targets_min": 499 }, { "epoch": 3.224108658743633, "grad_norm": 0.21911387821663028, "learning_rate": 2.6199597519313092e-05, "loss": 0.1651, "loss_nan_ranks": 0, "loss_rank_avg": 0.04223302751779556, "step": 635, "valid_targets_mean": 3504.5, "valid_targets_min": 503 }, { "epoch": 3.2495755517826823, "grad_norm": 0.21812279060693707, "learning_rate": 2.5958426617583417e-05, "loss": 0.1601, "loss_nan_ranks": 0, "loss_rank_avg": 0.06164269521832466, "step": 640, "valid_targets_mean": 4890.7, "valid_targets_min": 411 }, { "epoch": 3.275042444821732, "grad_norm": 0.21420332843184514, "learning_rate": 2.5716301114627663e-05, "loss": 0.1622, "loss_nan_ranks": 0, "loss_rank_avg": 0.0604836605489254, "step": 645, "valid_targets_mean": 4715.1, "valid_targets_min": 245 }, { "epoch": 3.300509337860781, "grad_norm": 0.1945183899864475, "learning_rate": 2.5473259801441663e-05, "loss": 0.161, "loss_nan_ranks": 0, "loss_rank_avg": 0.03996345400810242, "step": 650, "valid_targets_mean": 3867.2, "valid_targets_min": 422 }, { "epoch": 3.32597623089983, "grad_norm": 0.1838149996271793, "learning_rate": 2.5229341615743423e-05, "loss": 0.1538, "loss_nan_ranks": 0, "loss_rank_avg": 0.046856336295604706, "step": 655, "valid_targets_mean": 4675.5, "valid_targets_min": 533 }, { "epoch": 3.3514431239388793, "grad_norm": 0.19620472692592283, "learning_rate": 2.4984585635734995e-05, "loss": 0.1499, "loss_nan_ranks": 0, "loss_rank_avg": 0.04475293681025505, "step": 660, "valid_targets_mean": 3989.5, "valid_targets_min": 601 }, { "epoch": 3.376910016977929, "grad_norm": 0.19236934985032755, "learning_rate": 2.4739031073841652e-05, "loss": 0.1584, "loss_nan_ranks": 0, "loss_rank_avg": 0.0530376099050045, "step": 665, "valid_targets_mean": 3726.4, "valid_targets_min": 209 }, { "epoch": 3.402376910016978, "grad_norm": 0.2621809960660758, "learning_rate": 2.4492717270429736e-05, "loss": 0.1616, "loss_nan_ranks": 0, "loss_rank_avg": 0.06869988143444061, "step": 670, "valid_targets_mean": 6037.5, "valid_targets_min": 714 }, { "epoch": 3.427843803056027, "grad_norm": 0.1777064909782656, "learning_rate": 2.424568368750385e-05, "loss": 0.1597, "loss_nan_ranks": 0, "loss_rank_avg": 0.05851980298757553, "step": 675, "valid_targets_mean": 5295.7, "valid_targets_min": 518 }, { "epoch": 3.4533106960950763, "grad_norm": 0.1949390809455965, "learning_rate": 2.3997969902384722e-05, "loss": 0.1618, "loss_nan_ranks": 0, "loss_rank_avg": 0.0510847307741642, "step": 680, "valid_targets_mean": 4908.3, "valid_targets_min": 573 }, { "epoch": 3.4787775891341255, "grad_norm": 0.17456309455989005, "learning_rate": 2.3749615601368434e-05, "loss": 0.1599, "loss_nan_ranks": 0, "loss_rank_avg": 0.0578172393143177, "step": 685, "valid_targets_mean": 6196.7, "valid_targets_min": 459 }, { "epoch": 3.504244482173175, "grad_norm": 0.18989495891248262, "learning_rate": 2.3500660573368305e-05, "loss": 0.1551, "loss_nan_ranks": 0, "loss_rank_avg": 0.05929127335548401, "step": 690, "valid_targets_mean": 5045.1, "valid_targets_min": 483 }, { "epoch": 3.5297113752122242, "grad_norm": 0.22953903468452172, "learning_rate": 2.3251144703540313e-05, "loss": 0.1689, "loss_nan_ranks": 0, "loss_rank_avg": 0.06596667319536209, "step": 695, "valid_targets_mean": 4899.3, "valid_targets_min": 636 }, { "epoch": 3.5551782682512734, "grad_norm": 0.2140483198993044, "learning_rate": 2.3001107966893054e-05, "loss": 0.1597, "loss_nan_ranks": 0, "loss_rank_avg": 0.05482468381524086, "step": 700, "valid_targets_mean": 4562.0, "valid_targets_min": 589 }, { "epoch": 3.5806451612903225, "grad_norm": 0.17307959974608458, "learning_rate": 2.2750590421883348e-05, "loss": 0.157, "loss_nan_ranks": 0, "loss_rank_avg": 0.047615014016628265, "step": 705, "valid_targets_mean": 5728.4, "valid_targets_min": 435 }, { "epoch": 3.6061120543293717, "grad_norm": 0.1958982806804525, "learning_rate": 2.2499632203998454e-05, "loss": 0.1527, "loss_nan_ranks": 0, "loss_rank_avg": 0.04757123440504074, "step": 710, "valid_targets_mean": 4777.9, "valid_targets_min": 406 }, { "epoch": 3.6315789473684212, "grad_norm": 0.19567641358372231, "learning_rate": 2.224827351932596e-05, "loss": 0.1572, "loss_nan_ranks": 0, "loss_rank_avg": 0.04391469061374664, "step": 715, "valid_targets_mean": 4801.1, "valid_targets_min": 291 }, { "epoch": 3.6570458404074704, "grad_norm": 0.17339935956933378, "learning_rate": 2.1996554638112362e-05, "loss": 0.158, "loss_nan_ranks": 0, "loss_rank_avg": 0.05453699454665184, "step": 720, "valid_targets_mean": 5255.2, "valid_targets_min": 1151 }, { "epoch": 3.6825127334465195, "grad_norm": 0.20274261173391694, "learning_rate": 2.174451588831134e-05, "loss": 0.1614, "loss_nan_ranks": 0, "loss_rank_avg": 0.053828105330467224, "step": 725, "valid_targets_mean": 5086.0, "valid_targets_min": 711 }, { "epoch": 3.7079796264855687, "grad_norm": 0.19162539636147466, "learning_rate": 2.1492197649122794e-05, "loss": 0.158, "loss_nan_ranks": 0, "loss_rank_avg": 0.054310865700244904, "step": 730, "valid_targets_mean": 5213.3, "valid_targets_min": 567 }, { "epoch": 3.733446519524618, "grad_norm": 0.2072759818666121, "learning_rate": 2.1239640344523735e-05, "loss": 0.1496, "loss_nan_ranks": 0, "loss_rank_avg": 0.05167793482542038, "step": 735, "valid_targets_mean": 5258.2, "valid_targets_min": 394 }, { "epoch": 3.7589134125636674, "grad_norm": 0.18233070553104944, "learning_rate": 2.0986884436791875e-05, "loss": 0.1552, "loss_nan_ranks": 0, "loss_rank_avg": 0.04380776733160019, "step": 740, "valid_targets_mean": 6251.1, "valid_targets_min": 534 }, { "epoch": 3.7843803056027165, "grad_norm": 0.2008779170543001, "learning_rate": 2.073397042002322e-05, "loss": 0.1599, "loss_nan_ranks": 0, "loss_rank_avg": 0.046290189027786255, "step": 745, "valid_targets_mean": 4248.8, "valid_targets_min": 299 }, { "epoch": 3.8098471986417657, "grad_norm": 0.206845021389941, "learning_rate": 2.0480938813644443e-05, "loss": 0.1637, "loss_nan_ranks": 0, "loss_rank_avg": 0.06837663054466248, "step": 750, "valid_targets_mean": 5167.8, "valid_targets_min": 458 }, { "epoch": 3.835314091680815, "grad_norm": 0.1887022297446418, "learning_rate": 2.022783015592132e-05, "loss": 0.1581, "loss_nan_ranks": 0, "loss_rank_avg": 0.05138999596238136, "step": 755, "valid_targets_mean": 3476.4, "valid_targets_min": 550 }, { "epoch": 3.860780984719864, "grad_norm": 0.2053982078217207, "learning_rate": 1.9974684997463986e-05, "loss": 0.1661, "loss_nan_ranks": 0, "loss_rank_avg": 0.06539995223283768, "step": 760, "valid_targets_mean": 4786.1, "valid_targets_min": 363 }, { "epoch": 3.8862478777589136, "grad_norm": 0.23624597205106068, "learning_rate": 1.9721543894730428e-05, "loss": 0.1633, "loss_nan_ranks": 0, "loss_rank_avg": 0.05308888107538223, "step": 765, "valid_targets_mean": 3819.8, "valid_targets_min": 555 }, { "epoch": 3.9117147707979627, "grad_norm": 0.17988017064853987, "learning_rate": 1.946844740352883e-05, "loss": 0.1536, "loss_nan_ranks": 0, "loss_rank_avg": 0.05310368537902832, "step": 770, "valid_targets_mean": 5330.0, "valid_targets_min": 355 }, { "epoch": 3.937181663837012, "grad_norm": 0.17873166166831936, "learning_rate": 1.9215436072520167e-05, "loss": 0.1487, "loss_nan_ranks": 0, "loss_rank_avg": 0.05123206973075867, "step": 775, "valid_targets_mean": 7128.5, "valid_targets_min": 813 }, { "epoch": 3.962648556876061, "grad_norm": 0.17809523228411328, "learning_rate": 1.8962550436721867e-05, "loss": 0.1537, "loss_nan_ranks": 0, "loss_rank_avg": 0.04764179885387421, "step": 780, "valid_targets_mean": 5059.8, "valid_targets_min": 510 }, { "epoch": 3.98811544991511, "grad_norm": 0.18059771071561703, "learning_rate": 1.8709831011013678e-05, "loss": 0.1605, "loss_nan_ranks": 0, "loss_rank_avg": 0.04299032688140869, "step": 785, "valid_targets_mean": 4471.0, "valid_targets_min": 573 }, { "epoch": 4.01018675721562, "grad_norm": 0.19087550011502016, "learning_rate": 1.8457318283646814e-05, "loss": 0.1609, "loss_nan_ranks": 0, "loss_rank_avg": 0.04256012290716171, "step": 790, "valid_targets_mean": 4504.0, "valid_targets_min": 425 }, { "epoch": 4.035653650254669, "grad_norm": 0.2012261960566325, "learning_rate": 1.8205052709757263e-05, "loss": 0.1638, "loss_nan_ranks": 0, "loss_rank_avg": 0.05523894727230072, "step": 795, "valid_targets_mean": 5278.5, "valid_targets_min": 392 }, { "epoch": 4.061120543293718, "grad_norm": 0.19172876392679508, "learning_rate": 1.79530747048845e-05, "loss": 0.1511, "loss_nan_ranks": 0, "loss_rank_avg": 0.04358460009098053, "step": 800, "valid_targets_mean": 3897.0, "valid_targets_min": 568 }, { "epoch": 4.086587436332767, "grad_norm": 0.2014458476082743, "learning_rate": 1.7701424638496473e-05, "loss": 0.1537, "loss_nan_ranks": 0, "loss_rank_avg": 0.04093575105071068, "step": 805, "valid_targets_mean": 4177.9, "valid_targets_min": 332 }, { "epoch": 4.112054329371817, "grad_norm": 0.20826490372365644, "learning_rate": 1.7450142827522027e-05, "loss": 0.1575, "loss_nan_ranks": 0, "loss_rank_avg": 0.03707022964954376, "step": 810, "valid_targets_mean": 4739.0, "valid_targets_min": 467 }, { "epoch": 4.137521222410866, "grad_norm": 0.20599780475309687, "learning_rate": 1.719926952989169e-05, "loss": 0.1525, "loss_nan_ranks": 0, "loss_rank_avg": 0.06674011051654816, "step": 815, "valid_targets_mean": 5112.4, "valid_targets_min": 645 }, { "epoch": 4.162988115449915, "grad_norm": 0.2135972888739578, "learning_rate": 1.694884493808795e-05, "loss": 0.1493, "loss_nan_ranks": 0, "loss_rank_avg": 0.05046378821134567, "step": 820, "valid_targets_mean": 4885.5, "valid_targets_min": 343 }, { "epoch": 4.1884550084889645, "grad_norm": 0.19507138430628793, "learning_rate": 1.6698909172706e-05, "loss": 0.1443, "loss_nan_ranks": 0, "loss_rank_avg": 0.0384068563580513, "step": 825, "valid_targets_mean": 4854.2, "valid_targets_min": 407 }, { "epoch": 4.213921901528013, "grad_norm": 0.19466510533758044, "learning_rate": 1.644950227602605e-05, "loss": 0.1506, "loss_nan_ranks": 0, "loss_rank_avg": 0.0502655915915966, "step": 830, "valid_targets_mean": 4918.0, "valid_targets_min": 362 }, { "epoch": 4.239388794567063, "grad_norm": 0.20892683791486863, "learning_rate": 1.620066420559805e-05, "loss": 0.1623, "loss_nan_ranks": 0, "loss_rank_avg": 0.05903159826993942, "step": 835, "valid_targets_mean": 4497.6, "valid_targets_min": 337 }, { "epoch": 4.264855687606112, "grad_norm": 0.2106424130972343, "learning_rate": 1.5952434827840187e-05, "loss": 0.1531, "loss_nan_ranks": 0, "loss_rank_avg": 0.06574611365795135, "step": 840, "valid_targets_mean": 5347.1, "valid_targets_min": 560 }, { "epoch": 4.290322580645161, "grad_norm": 0.239733963665783, "learning_rate": 1.5704853911651777e-05, "loss": 0.1737, "loss_nan_ranks": 0, "loss_rank_avg": 0.0693153589963913, "step": 845, "valid_targets_mean": 4404.1, "valid_targets_min": 508 }, { "epoch": 4.315789473684211, "grad_norm": 0.17730261138280262, "learning_rate": 1.545796112204196e-05, "loss": 0.1586, "loss_nan_ranks": 0, "loss_rank_avg": 0.046191245317459106, "step": 850, "valid_targets_mean": 6100.7, "valid_targets_min": 704 }, { "epoch": 4.341256366723259, "grad_norm": 0.21009022637541613, "learning_rate": 1.5211796013774893e-05, "loss": 0.1481, "loss_nan_ranks": 0, "loss_rank_avg": 0.06376850605010986, "step": 855, "valid_targets_mean": 5445.8, "valid_targets_min": 494 }, { "epoch": 4.366723259762309, "grad_norm": 0.19147138861623728, "learning_rate": 1.4966398025032706e-05, "loss": 0.1588, "loss_nan_ranks": 0, "loss_rank_avg": 0.040558382868766785, "step": 860, "valid_targets_mean": 5572.8, "valid_targets_min": 643 }, { "epoch": 4.3921901528013585, "grad_norm": 0.25489222062294176, "learning_rate": 1.4721806471097104e-05, "loss": 0.1535, "loss_nan_ranks": 0, "loss_rank_avg": 0.05043104663491249, "step": 865, "valid_targets_mean": 5482.2, "valid_targets_min": 419 }, { "epoch": 4.417657045840407, "grad_norm": 0.21423497052399765, "learning_rate": 1.4478060538050622e-05, "loss": 0.1661, "loss_nan_ranks": 0, "loss_rank_avg": 0.060023024678230286, "step": 870, "valid_targets_mean": 4986.4, "valid_targets_min": 464 }, { "epoch": 4.443123938879457, "grad_norm": 0.18747238981681072, "learning_rate": 1.4235199276498652e-05, "loss": 0.1531, "loss_nan_ranks": 0, "loss_rank_avg": 0.05143500119447708, "step": 875, "valid_targets_mean": 7035.5, "valid_targets_min": 328 }, { "epoch": 4.468590831918506, "grad_norm": 0.1894590468808143, "learning_rate": 1.3993261595313094e-05, "loss": 0.1536, "loss_nan_ranks": 0, "loss_rank_avg": 0.05919076129794121, "step": 880, "valid_targets_mean": 5586.1, "valid_targets_min": 660 }, { "epoch": 4.494057724957555, "grad_norm": 0.2076964513961011, "learning_rate": 1.3752286255398794e-05, "loss": 0.1539, "loss_nan_ranks": 0, "loss_rank_avg": 0.05181092768907547, "step": 885, "valid_targets_mean": 5430.3, "valid_targets_min": 667 }, { "epoch": 4.519524617996605, "grad_norm": 0.19715103216822133, "learning_rate": 1.3512311863483606e-05, "loss": 0.153, "loss_nan_ranks": 0, "loss_rank_avg": 0.0564362034201622, "step": 890, "valid_targets_mean": 5937.5, "valid_targets_min": 517 }, { "epoch": 4.544991511035653, "grad_norm": 0.21836120644331902, "learning_rate": 1.3273376865933236e-05, "loss": 0.1542, "loss_nan_ranks": 0, "loss_rank_avg": 0.06101875752210617, "step": 895, "valid_targets_mean": 4459.8, "valid_targets_min": 528 }, { "epoch": 4.570458404074703, "grad_norm": 0.19979008110611626, "learning_rate": 1.303551954259172e-05, "loss": 0.144, "loss_nan_ranks": 0, "loss_rank_avg": 0.03848603367805481, "step": 900, "valid_targets_mean": 4476.2, "valid_targets_min": 346 }, { "epoch": 4.595925297113752, "grad_norm": 0.20657333412135, "learning_rate": 1.2798778000648602e-05, "loss": 0.156, "loss_nan_ranks": 0, "loss_rank_avg": 0.049523286521434784, "step": 905, "valid_targets_mean": 4977.6, "valid_targets_min": 456 }, { "epoch": 4.621392190152801, "grad_norm": 0.22194409821177138, "learning_rate": 1.2563190168533766e-05, "loss": 0.1562, "loss_nan_ranks": 0, "loss_rank_avg": 0.0513107106089592, "step": 910, "valid_targets_mean": 4753.3, "valid_targets_min": 411 }, { "epoch": 4.646859083191851, "grad_norm": 0.20073415825748206, "learning_rate": 1.2328793789840926e-05, "loss": 0.1497, "loss_nan_ranks": 0, "loss_rank_avg": 0.05154266953468323, "step": 915, "valid_targets_mean": 4509.1, "valid_targets_min": 667 }, { "epoch": 4.6723259762308995, "grad_norm": 0.20363480305430207, "learning_rate": 1.2095626417280686e-05, "loss": 0.1568, "loss_nan_ranks": 0, "loss_rank_avg": 0.05040046572685242, "step": 920, "valid_targets_mean": 4495.6, "valid_targets_min": 439 }, { "epoch": 4.697792869269949, "grad_norm": 0.15788485491127127, "learning_rate": 1.1863725406664241e-05, "loss": 0.1536, "loss_nan_ranks": 0, "loss_rank_avg": 0.04818938672542572, "step": 925, "valid_targets_mean": 6288.2, "valid_targets_min": 475 }, { "epoch": 4.723259762308999, "grad_norm": 0.18869305593095, "learning_rate": 1.163312791091858e-05, "loss": 0.1497, "loss_nan_ranks": 0, "loss_rank_avg": 0.04854920506477356, "step": 930, "valid_targets_mean": 5647.6, "valid_targets_min": 354 }, { "epoch": 4.748726655348047, "grad_norm": 0.16443579067469954, "learning_rate": 1.1403870874134192e-05, "loss": 0.146, "loss_nan_ranks": 0, "loss_rank_avg": 0.046788256615400314, "step": 935, "valid_targets_mean": 5484.6, "valid_targets_min": 332 }, { "epoch": 4.774193548387097, "grad_norm": 0.20763711069277233, "learning_rate": 1.1175991025646267e-05, "loss": 0.1487, "loss_nan_ranks": 0, "loss_rank_avg": 0.05495909973978996, "step": 940, "valid_targets_mean": 4970.3, "valid_targets_min": 603 }, { "epoch": 4.799660441426146, "grad_norm": 0.17015784881873067, "learning_rate": 1.0949524874150246e-05, "loss": 0.156, "loss_nan_ranks": 0, "loss_rank_avg": 0.04452647641301155, "step": 945, "valid_targets_mean": 4811.3, "valid_targets_min": 616 }, { "epoch": 4.825127334465195, "grad_norm": 0.18869199446083515, "learning_rate": 1.0724508701852807e-05, "loss": 0.1488, "loss_nan_ranks": 0, "loss_rank_avg": 0.047742199152708054, "step": 950, "valid_targets_mean": 4139.6, "valid_targets_min": 496 }, { "epoch": 4.850594227504245, "grad_norm": 0.19979140112460086, "learning_rate": 1.0500978558659001e-05, "loss": 0.1615, "loss_nan_ranks": 0, "loss_rank_avg": 0.06486503779888153, "step": 955, "valid_targets_mean": 5047.7, "valid_targets_min": 415 }, { "epoch": 4.8760611205432935, "grad_norm": 0.1798361541913208, "learning_rate": 1.0278970256396764e-05, "loss": 0.1449, "loss_nan_ranks": 0, "loss_rank_avg": 0.04292941093444824, "step": 960, "valid_targets_mean": 3786.9, "valid_targets_min": 84 }, { "epoch": 4.901528013582343, "grad_norm": 0.18933423790659826, "learning_rate": 1.0058519363079464e-05, "loss": 0.1476, "loss_nan_ranks": 0, "loss_rank_avg": 0.05371493473649025, "step": 965, "valid_targets_mean": 5490.7, "valid_targets_min": 415 }, { "epoch": 4.926994906621392, "grad_norm": 0.16566669416409652, "learning_rate": 9.839661197207527e-06, "loss": 0.1509, "loss_nan_ranks": 0, "loss_rank_avg": 0.03648955747485161, "step": 970, "valid_targets_mean": 4383.9, "valid_targets_min": 394 }, { "epoch": 4.952461799660441, "grad_norm": 0.22033674195119468, "learning_rate": 9.622430822110063e-06, "loss": 0.1455, "loss_nan_ranks": 0, "loss_rank_avg": 0.060049720108509064, "step": 975, "valid_targets_mean": 4436.6, "valid_targets_min": 490 }, { "epoch": 4.977928692699491, "grad_norm": 0.20840871296607372, "learning_rate": 9.40686304032735e-06, "loss": 0.1578, "loss_nan_ranks": 0, "loss_rank_avg": 0.06423541158437729, "step": 980, "valid_targets_mean": 5278.7, "valid_targets_min": 673 }, { "epoch": 5.0, "grad_norm": 0.3112288683683397, "learning_rate": 9.19299238803515e-06, "loss": 0.1426, "loss_nan_ranks": 0, "loss_rank_avg": 0.12402482330799103, "step": 985, "valid_targets_mean": 4365.9, "valid_targets_min": 385 }, { "epoch": 5.02546689303905, "grad_norm": 0.21198712020884392, "learning_rate": 8.980853129511584e-06, "loss": 0.152, "loss_nan_ranks": 0, "loss_rank_avg": 0.045234695076942444, "step": 990, "valid_targets_mean": 5211.6, "valid_targets_min": 564 }, { "epoch": 5.050933786078098, "grad_norm": 0.1832814377777405, "learning_rate": 8.770479251647708e-06, "loss": 0.1486, "loss_nan_ranks": 0, "loss_rank_avg": 0.04300159588456154, "step": 995, "valid_targets_mean": 4824.9, "valid_targets_min": 649 }, { "epoch": 5.076400679117148, "grad_norm": 0.18323969987536556, "learning_rate": 8.561904458502424e-06, "loss": 0.1515, "loss_nan_ranks": 0, "loss_rank_avg": 0.05170092731714249, "step": 1000, "valid_targets_mean": 5709.0, "valid_targets_min": 657 }, { "epoch": 5.101867572156197, "grad_norm": 0.17438645023828483, "learning_rate": 8.355162165902785e-06, "loss": 0.143, "loss_nan_ranks": 0, "loss_rank_avg": 0.04301121085882187, "step": 1005, "valid_targets_mean": 5400.9, "valid_targets_min": 821 }, { "epoch": 5.127334465195246, "grad_norm": 0.21968555614480795, "learning_rate": 8.150285496090388e-06, "loss": 0.1496, "loss_nan_ranks": 0, "loss_rank_avg": 0.048399992287158966, "step": 1010, "valid_targets_mean": 4492.3, "valid_targets_min": 568 }, { "epoch": 5.152801358234296, "grad_norm": 0.17762702372598874, "learning_rate": 7.947307272414874e-06, "loss": 0.1464, "loss_nan_ranks": 0, "loss_rank_avg": 0.0411510169506073, "step": 1015, "valid_targets_mean": 5294.9, "valid_targets_min": 680 }, { "epoch": 5.1782682512733444, "grad_norm": 0.19722109935186782, "learning_rate": 7.746260014075293e-06, "loss": 0.1455, "loss_nan_ranks": 0, "loss_rank_avg": 0.04638431593775749, "step": 1020, "valid_targets_mean": 5451.1, "valid_targets_min": 626 }, { "epoch": 5.203735144312394, "grad_norm": 0.21092644781830097, "learning_rate": 7.547175930910187e-06, "loss": 0.1507, "loss_nan_ranks": 0, "loss_rank_avg": 0.058665595948696136, "step": 1025, "valid_targets_mean": 5437.9, "valid_targets_min": 568 }, { "epoch": 5.229202037351443, "grad_norm": 0.18538550618712651, "learning_rate": 7.350086918237238e-06, "loss": 0.1423, "loss_nan_ranks": 0, "loss_rank_avg": 0.04550827667117119, "step": 1030, "valid_targets_mean": 5939.2, "valid_targets_min": 742 }, { "epoch": 5.254668930390492, "grad_norm": 0.19790413231909332, "learning_rate": 7.155024551743317e-06, "loss": 0.1461, "loss_nan_ranks": 0, "loss_rank_avg": 0.04877372458577156, "step": 1035, "valid_targets_mean": 4680.1, "valid_targets_min": 294 }, { "epoch": 5.280135823429542, "grad_norm": 0.17992804017128824, "learning_rate": 6.962020082425749e-06, "loss": 0.14, "loss_nan_ranks": 0, "loss_rank_avg": 0.0452592596411705, "step": 1040, "valid_targets_mean": 4596.0, "valid_targets_min": 628 }, { "epoch": 5.305602716468591, "grad_norm": 0.21512679110303626, "learning_rate": 6.771104431585551e-06, "loss": 0.1543, "loss_nan_ranks": 0, "loss_rank_avg": 0.05498050898313522, "step": 1045, "valid_targets_mean": 5272.1, "valid_targets_min": 809 }, { "epoch": 5.33106960950764, "grad_norm": 0.1874903612063792, "learning_rate": 6.582308185873536e-06, "loss": 0.1523, "loss_nan_ranks": 0, "loss_rank_avg": 0.04390951991081238, "step": 1050, "valid_targets_mean": 4639.8, "valid_targets_min": 620 }, { "epoch": 5.356536502546689, "grad_norm": 0.2013909570825948, "learning_rate": 6.3956615923900214e-06, "loss": 0.1495, "loss_nan_ranks": 0, "loss_rank_avg": 0.05374452471733093, "step": 1055, "valid_targets_mean": 5208.7, "valid_targets_min": 607 }, { "epoch": 5.3820033955857385, "grad_norm": 0.20150711758852105, "learning_rate": 6.211194553838931e-06, "loss": 0.1578, "loss_nan_ranks": 0, "loss_rank_avg": 0.045037642121315, "step": 1060, "valid_targets_mean": 4180.2, "valid_targets_min": 430 }, { "epoch": 5.407470288624788, "grad_norm": 0.20342486091636308, "learning_rate": 6.028936623737067e-06, "loss": 0.1525, "loss_nan_ranks": 0, "loss_rank_avg": 0.05274080112576485, "step": 1065, "valid_targets_mean": 4714.8, "valid_targets_min": 595 }, { "epoch": 5.432937181663837, "grad_norm": 0.20387106581873626, "learning_rate": 5.848917001679339e-06, "loss": 0.1465, "loss_nan_ranks": 0, "loss_rank_avg": 0.04757241904735565, "step": 1070, "valid_targets_mean": 4019.6, "valid_targets_min": 414 }, { "epoch": 5.458404074702886, "grad_norm": 0.19042029685743941, "learning_rate": 5.671164528660687e-06, "loss": 0.1529, "loss_nan_ranks": 0, "loss_rank_avg": 0.041462354362010956, "step": 1075, "valid_targets_mean": 5469.5, "valid_targets_min": 519 }, { "epoch": 5.483870967741936, "grad_norm": 0.19866514160174117, "learning_rate": 5.495707682455464e-06, "loss": 0.1494, "loss_nan_ranks": 0, "loss_rank_avg": 0.04318884387612343, "step": 1080, "valid_targets_mean": 4570.6, "valid_targets_min": 428 }, { "epoch": 5.509337860780985, "grad_norm": 0.19880221047276755, "learning_rate": 5.322574573054991e-06, "loss": 0.1568, "loss_nan_ranks": 0, "loss_rank_avg": 0.03715455159544945, "step": 1085, "valid_targets_mean": 4258.1, "valid_targets_min": 445 }, { "epoch": 5.534804753820034, "grad_norm": 0.20412947916891577, "learning_rate": 5.151792938164051e-06, "loss": 0.1515, "loss_nan_ranks": 0, "loss_rank_avg": 0.05464012548327446, "step": 1090, "valid_targets_mean": 5134.4, "valid_targets_min": 387 }, { "epoch": 5.560271646859083, "grad_norm": 0.189743897553305, "learning_rate": 4.983390138757027e-06, "loss": 0.149, "loss_nan_ranks": 0, "loss_rank_avg": 0.043821871280670166, "step": 1095, "valid_targets_mean": 5105.5, "valid_targets_min": 604 }, { "epoch": 5.5857385398981325, "grad_norm": 0.2218260695480036, "learning_rate": 4.817393154694399e-06, "loss": 0.1601, "loss_nan_ranks": 0, "loss_rank_avg": 0.07308252155780792, "step": 1100, "valid_targets_mean": 5176.9, "valid_targets_min": 349 }, { "epoch": 5.611205432937181, "grad_norm": 0.1948526443237716, "learning_rate": 4.653828580400275e-06, "loss": 0.146, "loss_nan_ranks": 0, "loss_rank_avg": 0.0502275675535202, "step": 1105, "valid_targets_mean": 4895.3, "valid_targets_min": 642 }, { "epoch": 5.636672325976231, "grad_norm": 0.19490180065468093, "learning_rate": 4.4927226206017e-06, "loss": 0.1576, "loss_nan_ranks": 0, "loss_rank_avg": 0.05357596278190613, "step": 1110, "valid_targets_mean": 5545.6, "valid_targets_min": 233 }, { "epoch": 5.66213921901528, "grad_norm": 0.1948458421330662, "learning_rate": 4.334101086130409e-06, "loss": 0.1541, "loss_nan_ranks": 0, "loss_rank_avg": 0.04059261083602905, "step": 1115, "valid_targets_mean": 5798.2, "valid_targets_min": 513 }, { "epoch": 5.687606112054329, "grad_norm": 0.17712620714761174, "learning_rate": 4.177989389787625e-06, "loss": 0.1488, "loss_nan_ranks": 0, "loss_rank_avg": 0.04724474996328354, "step": 1120, "valid_targets_mean": 6293.3, "valid_targets_min": 661 }, { "epoch": 5.713073005093379, "grad_norm": 0.19834535803430425, "learning_rate": 4.024412542272706e-06, "loss": 0.1484, "loss_nan_ranks": 0, "loss_rank_avg": 0.052177608013153076, "step": 1125, "valid_targets_mean": 5706.2, "valid_targets_min": 299 }, { "epoch": 5.738539898132428, "grad_norm": 0.19806574977490346, "learning_rate": 3.873395148176135e-06, "loss": 0.1424, "loss_nan_ranks": 0, "loss_rank_avg": 0.047021541744470596, "step": 1130, "valid_targets_mean": 4157.7, "valid_targets_min": 522 }, { "epoch": 5.764006791171477, "grad_norm": 0.17186350973731854, "learning_rate": 3.724961402037661e-06, "loss": 0.1421, "loss_nan_ranks": 0, "loss_rank_avg": 0.05093415826559067, "step": 1135, "valid_targets_mean": 5404.0, "valid_targets_min": 429 }, { "epoch": 5.7894736842105265, "grad_norm": 0.20509235758060426, "learning_rate": 3.57913508447004e-06, "loss": 0.1471, "loss_nan_ranks": 0, "loss_rank_avg": 0.054411761462688446, "step": 1140, "valid_targets_mean": 4937.4, "valid_targets_min": 455 }, { "epoch": 5.814940577249575, "grad_norm": 0.18141549621559375, "learning_rate": 3.4359395583491594e-06, "loss": 0.148, "loss_nan_ranks": 0, "loss_rank_avg": 0.054643046110868454, "step": 1145, "valid_targets_mean": 5395.0, "valid_targets_min": 630 }, { "epoch": 5.840407470288625, "grad_norm": 0.20957711061633771, "learning_rate": 3.2953977650710513e-06, "loss": 0.1478, "loss_nan_ranks": 0, "loss_rank_avg": 0.06269936263561249, "step": 1150, "valid_targets_mean": 7170.0, "valid_targets_min": 780 }, { "epoch": 5.8658743633276735, "grad_norm": 0.19968832235520612, "learning_rate": 3.1575322208764714e-06, "loss": 0.1425, "loss_nan_ranks": 0, "loss_rank_avg": 0.03520461916923523, "step": 1155, "valid_targets_mean": 5633.9, "valid_targets_min": 486 }, { "epoch": 5.891341256366723, "grad_norm": 0.18042191473733155, "learning_rate": 3.0223650132435335e-06, "loss": 0.1547, "loss_nan_ranks": 0, "loss_rank_avg": 0.04744488745927811, "step": 1160, "valid_targets_mean": 4839.0, "valid_targets_min": 510 }, { "epoch": 5.916808149405773, "grad_norm": 0.19710041340370513, "learning_rate": 2.8899177973490734e-06, "loss": 0.1436, "loss_nan_ranks": 0, "loss_rank_avg": 0.046739302575588226, "step": 1165, "valid_targets_mean": 5891.5, "valid_targets_min": 306 }, { "epoch": 5.942275042444821, "grad_norm": 0.22661796840060533, "learning_rate": 2.7602117925992964e-06, "loss": 0.1508, "loss_nan_ranks": 0, "loss_rank_avg": 0.05284252017736435, "step": 1170, "valid_targets_mean": 4766.3, "valid_targets_min": 496 }, { "epoch": 5.967741935483871, "grad_norm": 0.22336951734405172, "learning_rate": 2.6332677792301773e-06, "loss": 0.163, "loss_nan_ranks": 0, "loss_rank_avg": 0.06108919158577919, "step": 1175, "valid_targets_mean": 4240.5, "valid_targets_min": 374 }, { "epoch": 5.993208828522921, "grad_norm": 0.1915920301061591, "learning_rate": 2.5091060949782664e-06, "loss": 0.1333, "loss_nan_ranks": 0, "loss_rank_avg": 0.03976128250360489, "step": 1180, "valid_targets_mean": 6280.5, "valid_targets_min": 672 }, { "epoch": 6.01528013582343, "grad_norm": 0.19317503708680775, "learning_rate": 2.3877466318223698e-06, "loss": 0.1525, "loss_nan_ranks": 0, "loss_rank_avg": 0.03881210461258888, "step": 1185, "valid_targets_mean": 5353.2, "valid_targets_min": 339 }, { "epoch": 6.040747028862479, "grad_norm": 0.20764541418959367, "learning_rate": 2.2692088327966655e-06, "loss": 0.1467, "loss_nan_ranks": 0, "loss_rank_avg": 0.044896483421325684, "step": 1190, "valid_targets_mean": 5634.2, "valid_targets_min": 595 }, { "epoch": 6.066213921901528, "grad_norm": 0.20593165601992494, "learning_rate": 2.153511688875707e-06, "loss": 0.1536, "loss_nan_ranks": 0, "loss_rank_avg": 0.04371983930468559, "step": 1195, "valid_targets_mean": 3858.5, "valid_targets_min": 738 }, { "epoch": 6.0916808149405774, "grad_norm": 0.21297043486058082, "learning_rate": 2.0406737359318797e-06, "loss": 0.1432, "loss_nan_ranks": 0, "loss_rank_avg": 0.0521714873611927, "step": 1200, "valid_targets_mean": 4474.7, "valid_targets_min": 647 }, { "epoch": 6.117147707979626, "grad_norm": 0.21186564633702598, "learning_rate": 1.930713051765776e-06, "loss": 0.1475, "loss_nan_ranks": 0, "loss_rank_avg": 0.04589856415987015, "step": 1205, "valid_targets_mean": 4510.9, "valid_targets_min": 335 }, { "epoch": 6.142614601018676, "grad_norm": 0.1990233774591213, "learning_rate": 1.8236472532099413e-06, "loss": 0.1578, "loss_nan_ranks": 0, "loss_rank_avg": 0.05373925343155861, "step": 1210, "valid_targets_mean": 4050.8, "valid_targets_min": 394 }, { "epoch": 6.168081494057725, "grad_norm": 0.18833278391984534, "learning_rate": 1.7194934933064654e-06, "loss": 0.1462, "loss_nan_ranks": 0, "loss_rank_avg": 0.044683780521154404, "step": 1215, "valid_targets_mean": 4603.1, "valid_targets_min": 398 }, { "epoch": 6.193548387096774, "grad_norm": 0.2206745205430478, "learning_rate": 1.6182684585588981e-06, "loss": 0.1547, "loss_nan_ranks": 0, "loss_rank_avg": 0.057726383209228516, "step": 1220, "valid_targets_mean": 4889.2, "valid_targets_min": 488 }, { "epoch": 6.219015280135824, "grad_norm": 0.18839205856507873, "learning_rate": 1.5199883662588954e-06, "loss": 0.1448, "loss_nan_ranks": 0, "loss_rank_avg": 0.049824222922325134, "step": 1225, "valid_targets_mean": 5002.8, "valid_targets_min": 481 }, { "epoch": 6.244482173174872, "grad_norm": 0.18511976328013788, "learning_rate": 1.4246689618880472e-06, "loss": 0.143, "loss_nan_ranks": 0, "loss_rank_avg": 0.053400591015815735, "step": 1230, "valid_targets_mean": 5463.7, "valid_targets_min": 370 }, { "epoch": 6.269949066213922, "grad_norm": 0.1850428475008149, "learning_rate": 1.3323255165952875e-06, "loss": 0.1448, "loss_nan_ranks": 0, "loss_rank_avg": 0.04526630416512489, "step": 1235, "valid_targets_mean": 5302.2, "valid_targets_min": 480 }, { "epoch": 6.2954159592529715, "grad_norm": 0.1806475924766929, "learning_rate": 1.2429728247502926e-06, "loss": 0.156, "loss_nan_ranks": 0, "loss_rank_avg": 0.04810335487127304, "step": 1240, "valid_targets_mean": 5395.3, "valid_targets_min": 459 }, { "epoch": 6.32088285229202, "grad_norm": 0.19446809319802125, "learning_rate": 1.156625201573287e-06, "loss": 0.138, "loss_nan_ranks": 0, "loss_rank_avg": 0.04256840795278549, "step": 1245, "valid_targets_mean": 4533.1, "valid_targets_min": 416 }, { "epoch": 6.34634974533107, "grad_norm": 0.17412413386777287, "learning_rate": 1.0732964808415792e-06, "loss": 0.1422, "loss_nan_ranks": 0, "loss_rank_avg": 0.043165817856788635, "step": 1250, "valid_targets_mean": 5473.6, "valid_targets_min": 489 }, { "epoch": 6.3718166383701185, "grad_norm": 0.1906858840368389, "learning_rate": 9.93000012673262e-07, "loss": 0.145, "loss_nan_ranks": 0, "loss_rank_avg": 0.04549487307667732, "step": 1255, "valid_targets_mean": 5095.8, "valid_targets_min": 758 }, { "epoch": 6.397283531409168, "grad_norm": 0.1937848545695707, "learning_rate": 9.157486613883759e-07, "loss": 0.1472, "loss_nan_ranks": 0, "loss_rank_avg": 0.04257480800151825, "step": 1260, "valid_targets_mean": 4423.5, "valid_targets_min": 481 }, { "epoch": 6.422750424448218, "grad_norm": 0.17775123277009072, "learning_rate": 8.415548034479215e-07, "loss": 0.1489, "loss_nan_ranks": 0, "loss_rank_avg": 0.05277522653341293, "step": 1265, "valid_targets_mean": 5918.0, "valid_targets_min": 681 }, { "epoch": 6.448217317487266, "grad_norm": 0.19966295683357554, "learning_rate": 7.704303254710188e-07, "loss": 0.1594, "loss_nan_ranks": 0, "loss_rank_avg": 0.047615669667720795, "step": 1270, "valid_targets_mean": 4504.9, "valid_targets_min": 490 }, { "epoch": 6.473684210526316, "grad_norm": 0.20669844700159937, "learning_rate": 7.023866223305487e-07, "loss": 0.1434, "loss_nan_ranks": 0, "loss_rank_avg": 0.053897857666015625, "step": 1275, "valid_targets_mean": 5455.9, "valid_targets_min": 518 }, { "epoch": 6.499151103565365, "grad_norm": 0.18115259950945548, "learning_rate": 6.374345953275773e-07, "loss": 0.1449, "loss_nan_ranks": 0, "loss_rank_avg": 0.048284128308296204, "step": 1280, "valid_targets_mean": 4987.2, "valid_targets_min": 567 }, { "epoch": 6.524617996604414, "grad_norm": 0.17791064676741664, "learning_rate": 5.755846504448604e-07, "loss": 0.1528, "loss_nan_ranks": 0, "loss_rank_avg": 0.053075529634952545, "step": 1285, "valid_targets_mean": 5521.2, "valid_targets_min": 491 }, { "epoch": 6.550084889643464, "grad_norm": 0.17384477446959934, "learning_rate": 5.16846696679687e-07, "loss": 0.1468, "loss_nan_ranks": 0, "loss_rank_avg": 0.04560650885105133, "step": 1290, "valid_targets_mean": 5726.9, "valid_targets_min": 473 }, { "epoch": 6.5755517826825125, "grad_norm": 0.17427350773632405, "learning_rate": 4.6123014445636605e-07, "loss": 0.1443, "loss_nan_ranks": 0, "loss_rank_avg": 0.05331622064113617, "step": 1295, "valid_targets_mean": 5595.4, "valid_targets_min": 745 }, { "epoch": 6.601018675721562, "grad_norm": 0.1847039591416848, "learning_rate": 4.087439041185781e-07, "loss": 0.1427, "loss_nan_ranks": 0, "loss_rank_avg": 0.04816567897796631, "step": 1300, "valid_targets_mean": 5271.9, "valid_targets_min": 628 }, { "epoch": 6.626485568760611, "grad_norm": 0.2075639627371849, "learning_rate": 3.5939638450183776e-07, "loss": 0.1513, "loss_nan_ranks": 0, "loss_rank_avg": 0.048930585384368896, "step": 1305, "valid_targets_mean": 3848.1, "valid_targets_min": 567 }, { "epoch": 6.65195246179966, "grad_norm": 0.22392832964076018, "learning_rate": 3.1319549158632444e-07, "loss": 0.1553, "loss_nan_ranks": 0, "loss_rank_avg": 0.05328825116157532, "step": 1310, "valid_targets_mean": 4645.8, "valid_targets_min": 726 }, { "epoch": 6.67741935483871, "grad_norm": 0.3070786462946522, "learning_rate": 2.701486272302534e-07, "loss": 0.1424, "loss_nan_ranks": 0, "loss_rank_avg": 0.06487414985895157, "step": 1315, "valid_targets_mean": 5223.2, "valid_targets_min": 545 }, { "epoch": 6.702886247877759, "grad_norm": 0.18439549160916988, "learning_rate": 2.302626879840353e-07, "loss": 0.1512, "loss_nan_ranks": 0, "loss_rank_avg": 0.04168329015374184, "step": 1320, "valid_targets_mean": 5075.8, "valid_targets_min": 380 }, { "epoch": 6.728353140916808, "grad_norm": 0.16908764146487906, "learning_rate": 1.9354406398535363e-07, "loss": 0.1408, "loss_nan_ranks": 0, "loss_rank_avg": 0.04210246726870537, "step": 1325, "valid_targets_mean": 5584.5, "valid_targets_min": 658 }, { "epoch": 6.753820033955858, "grad_norm": 0.17630235972167382, "learning_rate": 1.599986379354257e-07, "loss": 0.1396, "loss_nan_ranks": 0, "loss_rank_avg": 0.042337290942668915, "step": 1330, "valid_targets_mean": 5165.1, "valid_targets_min": 641 }, { "epoch": 6.7792869269949065, "grad_norm": 0.20555657737356492, "learning_rate": 1.29631784156512e-07, "loss": 0.147, "loss_nan_ranks": 0, "loss_rank_avg": 0.051874805241823196, "step": 1335, "valid_targets_mean": 5075.5, "valid_targets_min": 680 }, { "epoch": 6.804753820033956, "grad_norm": 0.2322688373131418, "learning_rate": 1.0244836773091182e-07, "loss": 0.1466, "loss_nan_ranks": 0, "loss_rank_avg": 0.0678434669971466, "step": 1340, "valid_targets_mean": 4481.3, "valid_targets_min": 365 }, { "epoch": 6.830220713073005, "grad_norm": 0.2065623953027286, "learning_rate": 7.845274372151767e-08, "loss": 0.149, "loss_nan_ranks": 0, "loss_rank_avg": 0.05911225453019142, "step": 1345, "valid_targets_mean": 6274.2, "valid_targets_min": 838 }, { "epoch": 6.855687606112054, "grad_norm": 0.19220426007499147, "learning_rate": 5.7648756474084636e-08, "loss": 0.1505, "loss_nan_ranks": 0, "loss_rank_avg": 0.04873078316450119, "step": 1350, "valid_targets_mean": 3809.2, "valid_targets_min": 570 }, { "epoch": 6.881154499151103, "grad_norm": 0.21298725476050626, "learning_rate": 4.003973900133851e-08, "loss": 0.1499, "loss_nan_ranks": 0, "loss_rank_avg": 0.05683305114507675, "step": 1355, "valid_targets_mean": 5734.7, "valid_targets_min": 465 }, { "epoch": 6.906621392190153, "grad_norm": 0.187496545143332, "learning_rate": 2.5628512448987453e-08, "loss": 0.1478, "loss_nan_ranks": 0, "loss_rank_avg": 0.0421915203332901, "step": 1360, "valid_targets_mean": 5190.7, "valid_targets_min": 537 }, { "epoch": 6.932088285229202, "grad_norm": 0.16945065292116984, "learning_rate": 1.4417385643741289e-08, "loss": 0.1467, "loss_nan_ranks": 0, "loss_rank_avg": 0.05242417752742767, "step": 1365, "valid_targets_mean": 6694.6, "valid_targets_min": 394 }, { "epoch": 6.957555178268251, "grad_norm": 0.1868609584998114, "learning_rate": 6.408154723420712e-09, "loss": 0.1474, "loss_nan_ranks": 0, "loss_rank_avg": 0.06806820631027222, "step": 1370, "valid_targets_mean": 6651.1, "valid_targets_min": 656 }, { "epoch": 6.983022071307301, "grad_norm": 0.19090476524097702, "learning_rate": 1.6021028491941538e-09, "loss": 0.145, "loss_nan_ranks": 0, "loss_rank_avg": 0.05688754841685295, "step": 1375, "valid_targets_mean": 4995.7, "valid_targets_min": 544 } ], "logging_steps": 5, "max_steps": 1379, "num_input_tokens_seen": 0, "num_train_epochs": 7, "save_steps": 300, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.507594515378078e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }