{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 7.0, "eval_steps": 500, "global_step": 686, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.051194539249146756, "grad_norm": 25.82480694310689, "learning_rate": 2.3188405797101453e-06, "loss": 0.8837, "loss_nan_ranks": 0, "loss_rank_avg": 0.28898581862449646, "step": 5, "valid_targets_mean": 4911.5, "valid_targets_min": 2215 }, { "epoch": 0.10238907849829351, "grad_norm": 12.744490357644285, "learning_rate": 5.2173913043478265e-06, "loss": 0.7924, "loss_nan_ranks": 0, "loss_rank_avg": 0.2303556501865387, "step": 10, "valid_targets_mean": 5216.9, "valid_targets_min": 319 }, { "epoch": 0.15358361774744028, "grad_norm": 2.6591207927386353, "learning_rate": 8.115942028985508e-06, "loss": 0.629, "loss_nan_ranks": 0, "loss_rank_avg": 0.22648176550865173, "step": 15, "valid_targets_mean": 5766.3, "valid_targets_min": 2726 }, { "epoch": 0.20477815699658702, "grad_norm": 1.534238831991734, "learning_rate": 1.101449275362319e-05, "loss": 0.5623, "loss_nan_ranks": 0, "loss_rank_avg": 0.1773180514574051, "step": 20, "valid_targets_mean": 5145.9, "valid_targets_min": 2241 }, { "epoch": 0.25597269624573377, "grad_norm": 1.0352664016291477, "learning_rate": 1.391304347826087e-05, "loss": 0.5276, "loss_nan_ranks": 0, "loss_rank_avg": 0.1391734778881073, "step": 25, "valid_targets_mean": 4861.1, "valid_targets_min": 134 }, { "epoch": 0.30716723549488056, "grad_norm": 0.6935657264704905, "learning_rate": 1.681159420289855e-05, "loss": 0.4868, "loss_nan_ranks": 0, "loss_rank_avg": 0.15757042169570923, "step": 30, "valid_targets_mean": 5287.8, "valid_targets_min": 1702 }, { "epoch": 0.3583617747440273, "grad_norm": 0.574511779794303, "learning_rate": 1.9710144927536236e-05, "loss": 0.4641, "loss_nan_ranks": 0, "loss_rank_avg": 0.14636749029159546, "step": 35, "valid_targets_mean": 4977.4, "valid_targets_min": 1383 }, { "epoch": 0.40955631399317405, "grad_norm": 0.45954923986300594, "learning_rate": 2.2608695652173914e-05, "loss": 0.4417, "loss_nan_ranks": 0, "loss_rank_avg": 0.14387136697769165, "step": 40, "valid_targets_mean": 5467.8, "valid_targets_min": 413 }, { "epoch": 0.46075085324232085, "grad_norm": 0.36407097207825967, "learning_rate": 2.5507246376811593e-05, "loss": 0.4245, "loss_nan_ranks": 0, "loss_rank_avg": 0.11947175860404968, "step": 45, "valid_targets_mean": 4556.4, "valid_targets_min": 1526 }, { "epoch": 0.5119453924914675, "grad_norm": 0.3200847843431381, "learning_rate": 2.840579710144928e-05, "loss": 0.4008, "loss_nan_ranks": 0, "loss_rank_avg": 0.12619568407535553, "step": 50, "valid_targets_mean": 5079.8, "valid_targets_min": 409 }, { "epoch": 0.5631399317406144, "grad_norm": 0.2749784180114348, "learning_rate": 3.130434782608696e-05, "loss": 0.3861, "loss_nan_ranks": 0, "loss_rank_avg": 0.15403807163238525, "step": 55, "valid_targets_mean": 5949.9, "valid_targets_min": 350 }, { "epoch": 0.6143344709897611, "grad_norm": 0.260946688973715, "learning_rate": 3.420289855072464e-05, "loss": 0.3798, "loss_nan_ranks": 0, "loss_rank_avg": 0.10824601352214813, "step": 60, "valid_targets_mean": 4519.8, "valid_targets_min": 1730 }, { "epoch": 0.6655290102389079, "grad_norm": 0.2695889942073195, "learning_rate": 3.7101449275362325e-05, "loss": 0.3636, "loss_nan_ranks": 0, "loss_rank_avg": 0.14311721920967102, "step": 65, "valid_targets_mean": 5901.2, "valid_targets_min": 2696 }, { "epoch": 0.7167235494880546, "grad_norm": 0.2513368707958076, "learning_rate": 4e-05, "loss": 0.3524, "loss_nan_ranks": 0, "loss_rank_avg": 0.113977812230587, "step": 70, "valid_targets_mean": 5193.1, "valid_targets_min": 937 }, { "epoch": 0.7679180887372014, "grad_norm": 0.25669844186315777, "learning_rate": 3.999351894109228e-05, "loss": 0.3337, "loss_nan_ranks": 0, "loss_rank_avg": 0.10874387621879578, "step": 75, "valid_targets_mean": 5100.9, "valid_targets_min": 1998 }, { "epoch": 0.8191126279863481, "grad_norm": 0.2654328857905388, "learning_rate": 3.997407996478158e-05, "loss": 0.3345, "loss_nan_ranks": 0, "loss_rank_avg": 0.11455205827951431, "step": 80, "valid_targets_mean": 5224.6, "valid_targets_min": 433 }, { "epoch": 0.8703071672354948, "grad_norm": 0.2593782401899841, "learning_rate": 3.9941695669582944e-05, "loss": 0.3304, "loss_nan_ranks": 0, "loss_rank_avg": 0.10599816590547562, "step": 85, "valid_targets_mean": 4968.2, "valid_targets_min": 2196 }, { "epoch": 0.9215017064846417, "grad_norm": 0.23686889348670484, "learning_rate": 3.989638704394887e-05, "loss": 0.3168, "loss_nan_ranks": 0, "loss_rank_avg": 0.11794209480285645, "step": 90, "valid_targets_mean": 5557.9, "valid_targets_min": 1855 }, { "epoch": 0.9726962457337884, "grad_norm": 0.27218880367654336, "learning_rate": 3.983818345266653e-05, "loss": 0.3149, "loss_nan_ranks": 0, "loss_rank_avg": 0.09047803282737732, "step": 95, "valid_targets_mean": 4906.0, "valid_targets_min": 2087 }, { "epoch": 1.0204778156996588, "grad_norm": 0.2548103228534002, "learning_rate": 3.976712261782631e-05, "loss": 0.3204, "loss_nan_ranks": 0, "loss_rank_avg": 0.10694491863250732, "step": 100, "valid_targets_mean": 4863.1, "valid_targets_min": 1985 }, { "epoch": 1.0716723549488054, "grad_norm": 0.26271765818455856, "learning_rate": 3.968325059437385e-05, "loss": 0.3154, "loss_nan_ranks": 0, "loss_rank_avg": 0.11918433755636215, "step": 105, "valid_targets_mean": 5599.3, "valid_targets_min": 421 }, { "epoch": 1.1228668941979523, "grad_norm": 0.27930934721029477, "learning_rate": 3.958662174026164e-05, "loss": 0.3121, "loss_nan_ranks": 0, "loss_rank_avg": 0.09404879063367844, "step": 110, "valid_targets_mean": 5062.9, "valid_targets_min": 2242 }, { "epoch": 1.174061433447099, "grad_norm": 0.26321540953232964, "learning_rate": 3.947729868121924e-05, "loss": 0.3053, "loss_nan_ranks": 0, "loss_rank_avg": 0.10871408879756927, "step": 115, "valid_targets_mean": 5306.3, "valid_targets_min": 314 }, { "epoch": 1.2252559726962458, "grad_norm": 0.26402045132441954, "learning_rate": 3.935535227016521e-05, "loss": 0.3076, "loss_nan_ranks": 0, "loss_rank_avg": 0.11834833025932312, "step": 120, "valid_targets_mean": 5735.2, "valid_targets_min": 365 }, { "epoch": 1.2764505119453924, "grad_norm": 0.26407368854218555, "learning_rate": 3.922086154128693e-05, "loss": 0.2968, "loss_nan_ranks": 0, "loss_rank_avg": 0.10184627771377563, "step": 125, "valid_targets_mean": 5052.8, "valid_targets_min": 2201 }, { "epoch": 1.3276450511945392, "grad_norm": 0.26565677723672504, "learning_rate": 3.907391365881802e-05, "loss": 0.3075, "loss_nan_ranks": 0, "loss_rank_avg": 0.08913706988096237, "step": 130, "valid_targets_mean": 4881.6, "valid_targets_min": 2122 }, { "epoch": 1.378839590443686, "grad_norm": 0.2831307137649524, "learning_rate": 3.891460386054675e-05, "loss": 0.299, "loss_nan_ranks": 0, "loss_rank_avg": 0.09639839828014374, "step": 135, "valid_targets_mean": 4887.5, "valid_targets_min": 2153 }, { "epoch": 1.4300341296928327, "grad_norm": 0.27646586353677727, "learning_rate": 3.8743035396091845e-05, "loss": 0.2998, "loss_nan_ranks": 0, "loss_rank_avg": 0.11154685169458389, "step": 140, "valid_targets_mean": 5716.4, "valid_targets_min": 2925 }, { "epoch": 1.4812286689419796, "grad_norm": 0.27431775252756874, "learning_rate": 3.8559319459985776e-05, "loss": 0.2969, "loss_nan_ranks": 0, "loss_rank_avg": 0.08550999313592911, "step": 145, "valid_targets_mean": 4336.7, "valid_targets_min": 416 }, { "epoch": 1.5324232081911262, "grad_norm": 0.25781067169583155, "learning_rate": 3.836357511960898e-05, "loss": 0.2946, "loss_nan_ranks": 0, "loss_rank_avg": 0.1011088490486145, "step": 150, "valid_targets_mean": 5483.2, "valid_targets_min": 2251 }, { "epoch": 1.583617747440273, "grad_norm": 0.27911127666111935, "learning_rate": 3.815592923802152e-05, "loss": 0.2922, "loss_nan_ranks": 0, "loss_rank_avg": 0.10180200636386871, "step": 155, "valid_targets_mean": 5241.7, "valid_targets_min": 1968 }, { "epoch": 1.63481228668942, "grad_norm": 0.2743316944893025, "learning_rate": 3.793651639174246e-05, "loss": 0.2881, "loss_nan_ranks": 0, "loss_rank_avg": 0.09541475772857666, "step": 160, "valid_targets_mean": 5239.0, "valid_targets_min": 220 }, { "epoch": 1.6860068259385665, "grad_norm": 0.2943897714245753, "learning_rate": 3.7705478783529986e-05, "loss": 0.2971, "loss_nan_ranks": 0, "loss_rank_avg": 0.08847599476575851, "step": 165, "valid_targets_mean": 4835.5, "valid_targets_min": 2225 }, { "epoch": 1.7372013651877132, "grad_norm": 0.261601602924854, "learning_rate": 3.746296615021896e-05, "loss": 0.296, "loss_nan_ranks": 0, "loss_rank_avg": 0.09461049735546112, "step": 170, "valid_targets_mean": 4939.5, "valid_targets_min": 2062 }, { "epoch": 1.78839590443686, "grad_norm": 0.27191268600437934, "learning_rate": 3.720913566567562e-05, "loss": 0.2894, "loss_nan_ranks": 0, "loss_rank_avg": 0.09755190461874008, "step": 175, "valid_targets_mean": 5106.4, "valid_targets_min": 2367 }, { "epoch": 1.8395904436860069, "grad_norm": 0.24464624444284053, "learning_rate": 3.6944151838932274e-05, "loss": 0.2935, "loss_nan_ranks": 0, "loss_rank_avg": 0.110307976603508, "step": 180, "valid_targets_mean": 6091.2, "valid_targets_min": 1757 }, { "epoch": 1.8907849829351537, "grad_norm": 0.2602335537625986, "learning_rate": 3.666818640756797e-05, "loss": 0.2902, "loss_nan_ranks": 0, "loss_rank_avg": 0.09931231290102005, "step": 185, "valid_targets_mean": 5034.7, "valid_targets_min": 451 }, { "epoch": 1.9419795221843004, "grad_norm": 0.25671393831155714, "learning_rate": 3.638141822640444e-05, "loss": 0.2908, "loss_nan_ranks": 0, "loss_rank_avg": 0.0843583270907402, "step": 190, "valid_targets_mean": 4742.2, "valid_targets_min": 2233 }, { "epoch": 1.993174061433447, "grad_norm": 0.23767750953863967, "learning_rate": 3.608403315158917e-05, "loss": 0.2915, "loss_nan_ranks": 0, "loss_rank_avg": 0.08634407818317413, "step": 195, "valid_targets_mean": 4896.8, "valid_targets_min": 329 }, { "epoch": 2.0409556313993176, "grad_norm": 0.2549526896790837, "learning_rate": 3.5776223920140985e-05, "loss": 0.2833, "loss_nan_ranks": 0, "loss_rank_avg": 0.11179839074611664, "step": 200, "valid_targets_mean": 6050.2, "valid_targets_min": 2254 }, { "epoch": 2.092150170648464, "grad_norm": 0.2738878896819206, "learning_rate": 3.545819002503602e-05, "loss": 0.284, "loss_nan_ranks": 0, "loss_rank_avg": 0.09601035714149475, "step": 205, "valid_targets_mean": 5224.0, "valid_targets_min": 1694 }, { "epoch": 2.143344709897611, "grad_norm": 0.2780264276944305, "learning_rate": 3.513013758591515e-05, "loss": 0.2815, "loss_nan_ranks": 0, "loss_rank_avg": 0.0888153463602066, "step": 210, "valid_targets_mean": 4907.5, "valid_targets_min": 365 }, { "epoch": 2.1945392491467577, "grad_norm": 0.26333842271632707, "learning_rate": 3.479227921549666e-05, "loss": 0.281, "loss_nan_ranks": 0, "loss_rank_avg": 0.09323578327894211, "step": 215, "valid_targets_mean": 5229.7, "valid_targets_min": 241 }, { "epoch": 2.2457337883959045, "grad_norm": 0.2934385105416155, "learning_rate": 3.444483388178066e-05, "loss": 0.27, "loss_nan_ranks": 0, "loss_rank_avg": 0.09266763925552368, "step": 220, "valid_targets_mean": 5267.0, "valid_targets_min": 410 }, { "epoch": 2.296928327645051, "grad_norm": 0.25567549374046494, "learning_rate": 3.4088026766134654e-05, "loss": 0.2782, "loss_nan_ranks": 0, "loss_rank_avg": 0.08348363637924194, "step": 225, "valid_targets_mean": 5137.3, "valid_targets_min": 2069 }, { "epoch": 2.348122866894198, "grad_norm": 0.3022766271875021, "learning_rate": 3.372208911735216e-05, "loss": 0.2856, "loss_nan_ranks": 0, "loss_rank_avg": 0.08997620642185211, "step": 230, "valid_targets_mean": 5248.5, "valid_targets_min": 424 }, { "epoch": 2.3993174061433447, "grad_norm": 0.2534445787104033, "learning_rate": 3.3347258101779015e-05, "loss": 0.282, "loss_nan_ranks": 0, "loss_rank_avg": 0.11496127396821976, "step": 235, "valid_targets_mean": 5611.1, "valid_targets_min": 264 }, { "epoch": 2.4505119453924915, "grad_norm": 0.2520452606948065, "learning_rate": 3.296377664960445e-05, "loss": 0.2708, "loss_nan_ranks": 0, "loss_rank_avg": 0.09025128185749054, "step": 240, "valid_targets_mean": 5019.2, "valid_targets_min": 433 }, { "epoch": 2.5017064846416384, "grad_norm": 0.2627583946055227, "learning_rate": 3.257189329741662e-05, "loss": 0.2723, "loss_nan_ranks": 0, "loss_rank_avg": 0.08457937836647034, "step": 245, "valid_targets_mean": 4861.2, "valid_targets_min": 1761 }, { "epoch": 2.5529010238907848, "grad_norm": 0.2561709090419887, "learning_rate": 3.217186202712458e-05, "loss": 0.274, "loss_nan_ranks": 0, "loss_rank_avg": 0.0853310376405716, "step": 250, "valid_targets_mean": 4727.4, "valid_targets_min": 1792 }, { "epoch": 2.6040955631399316, "grad_norm": 0.44538128860438136, "learning_rate": 3.1763942101351095e-05, "loss": 0.2773, "loss_nan_ranks": 0, "loss_rank_avg": 0.09366409480571747, "step": 255, "valid_targets_mean": 5007.0, "valid_targets_min": 365 }, { "epoch": 2.6552901023890785, "grad_norm": 0.2852975254240757, "learning_rate": 3.134839789540302e-05, "loss": 0.2752, "loss_nan_ranks": 0, "loss_rank_avg": 0.0924173966050148, "step": 260, "valid_targets_mean": 5460.8, "valid_targets_min": 433 }, { "epoch": 2.7064846416382253, "grad_norm": 0.26038175748374537, "learning_rate": 3.0925498725928115e-05, "loss": 0.2702, "loss_nan_ranks": 0, "loss_rank_avg": 0.0912502110004425, "step": 265, "valid_targets_mean": 5070.3, "valid_targets_min": 2242 }, { "epoch": 2.757679180887372, "grad_norm": 0.2663014460071229, "learning_rate": 3.0495518676369306e-05, "loss": 0.2767, "loss_nan_ranks": 0, "loss_rank_avg": 0.09056515991687775, "step": 270, "valid_targets_mean": 5028.2, "valid_targets_min": 1937 }, { "epoch": 2.8088737201365186, "grad_norm": 0.3060566751344927, "learning_rate": 3.0058736419329643e-05, "loss": 0.2693, "loss_nan_ranks": 0, "loss_rank_avg": 0.08921462297439575, "step": 275, "valid_targets_mean": 4812.8, "valid_targets_min": 314 }, { "epoch": 2.8600682593856654, "grad_norm": 0.24797757600989206, "learning_rate": 2.9615435035962878e-05, "loss": 0.2774, "loss_nan_ranks": 0, "loss_rank_avg": 0.09497439861297607, "step": 280, "valid_targets_mean": 5516.0, "valid_targets_min": 2022 }, { "epoch": 2.9112627986348123, "grad_norm": 0.3147509660798771, "learning_rate": 2.9165901832506977e-05, "loss": 0.2714, "loss_nan_ranks": 0, "loss_rank_avg": 0.09763729572296143, "step": 285, "valid_targets_mean": 4715.4, "valid_targets_min": 284 }, { "epoch": 2.962457337883959, "grad_norm": 0.26780692138537443, "learning_rate": 2.8710428154079185e-05, "loss": 0.2684, "loss_nan_ranks": 0, "loss_rank_avg": 0.08496683835983276, "step": 290, "valid_targets_mean": 4745.5, "valid_targets_min": 1235 }, { "epoch": 3.0102389078498293, "grad_norm": 0.26968597810630485, "learning_rate": 2.824930919585359e-05, "loss": 0.2695, "loss_nan_ranks": 0, "loss_rank_avg": 0.08243635296821594, "step": 295, "valid_targets_mean": 5450.9, "valid_targets_min": 317 }, { "epoch": 3.061433447098976, "grad_norm": 0.31124671324275366, "learning_rate": 2.778284381174336e-05, "loss": 0.2619, "loss_nan_ranks": 0, "loss_rank_avg": 0.08364085853099823, "step": 300, "valid_targets_mean": 5530.3, "valid_targets_min": 1478 }, { "epoch": 3.112627986348123, "grad_norm": 0.30315704623816053, "learning_rate": 2.7311334320711784e-05, "loss": 0.2627, "loss_nan_ranks": 0, "loss_rank_avg": 0.09039817005395889, "step": 305, "valid_targets_mean": 5173.2, "valid_targets_min": 273 }, { "epoch": 3.1638225255972694, "grad_norm": 0.2822760635459168, "learning_rate": 2.683508631083755e-05, "loss": 0.2664, "loss_nan_ranks": 0, "loss_rank_avg": 0.07820077240467072, "step": 310, "valid_targets_mean": 4802.0, "valid_targets_min": 2196 }, { "epoch": 3.2150170648464163, "grad_norm": 0.29743866886806536, "learning_rate": 2.6354408441261324e-05, "loss": 0.2679, "loss_nan_ranks": 0, "loss_rank_avg": 0.09245477616786957, "step": 315, "valid_targets_mean": 5530.5, "valid_targets_min": 393 }, { "epoch": 3.266211604095563, "grad_norm": 0.27606973763989207, "learning_rate": 2.5869612242141946e-05, "loss": 0.2664, "loss_nan_ranks": 0, "loss_rank_avg": 0.08491060137748718, "step": 320, "valid_targets_mean": 5039.2, "valid_targets_min": 2348 }, { "epoch": 3.31740614334471, "grad_norm": 0.2502805038424136, "learning_rate": 2.538101191275189e-05, "loss": 0.2647, "loss_nan_ranks": 0, "loss_rank_avg": 0.09891136735677719, "step": 325, "valid_targets_mean": 5541.6, "valid_targets_min": 315 }, { "epoch": 3.368600682593857, "grad_norm": 0.2611678416517467, "learning_rate": 2.488892411784286e-05, "loss": 0.2601, "loss_nan_ranks": 0, "loss_rank_avg": 0.08309619128704071, "step": 330, "valid_targets_mean": 4493.0, "valid_targets_min": 2063 }, { "epoch": 3.419795221843003, "grad_norm": 0.2611914869636264, "learning_rate": 2.439366778241352e-05, "loss": 0.2706, "loss_nan_ranks": 0, "loss_rank_avg": 0.09184084832668304, "step": 335, "valid_targets_mean": 4901.5, "valid_targets_min": 430 }, { "epoch": 3.47098976109215, "grad_norm": 0.27246810160180385, "learning_rate": 2.3895563885012303e-05, "loss": 0.2604, "loss_nan_ranks": 0, "loss_rank_avg": 0.08379268646240234, "step": 340, "valid_targets_mean": 4821.3, "valid_targets_min": 2467 }, { "epoch": 3.522184300341297, "grad_norm": 0.2536800115672537, "learning_rate": 2.3394935249709332e-05, "loss": 0.2641, "loss_nan_ranks": 0, "loss_rank_avg": 0.08427208662033081, "step": 345, "valid_targets_mean": 4990.8, "valid_targets_min": 2231 }, { "epoch": 3.573378839590444, "grad_norm": 0.258064897446696, "learning_rate": 2.2892106336872234e-05, "loss": 0.2659, "loss_nan_ranks": 0, "loss_rank_avg": 0.08550337702035904, "step": 350, "valid_targets_mean": 5197.7, "valid_targets_min": 2135 }, { "epoch": 3.6245733788395906, "grad_norm": 0.2495721887544657, "learning_rate": 2.2387403032881467e-05, "loss": 0.2622, "loss_nan_ranks": 0, "loss_rank_avg": 0.08749514818191528, "step": 355, "valid_targets_mean": 4881.7, "valid_targets_min": 2212 }, { "epoch": 3.675767918088737, "grad_norm": 0.2468445540270157, "learning_rate": 2.1881152438921447e-05, "loss": 0.2624, "loss_nan_ranks": 0, "loss_rank_avg": 0.08829236775636673, "step": 360, "valid_targets_mean": 5321.8, "valid_targets_min": 1695 }, { "epoch": 3.726962457337884, "grad_norm": 0.2550167362316407, "learning_rate": 2.1373682658984317e-05, "loss": 0.2639, "loss_nan_ranks": 0, "loss_rank_avg": 0.0848623663187027, "step": 365, "valid_targets_mean": 4613.7, "valid_targets_min": 2456 }, { "epoch": 3.7781569965870307, "grad_norm": 0.2310804725665606, "learning_rate": 2.0865322587223855e-05, "loss": 0.2585, "loss_nan_ranks": 0, "loss_rank_avg": 0.08814995735883713, "step": 370, "valid_targets_mean": 5460.0, "valid_targets_min": 1805 }, { "epoch": 3.8293515358361776, "grad_norm": 0.23090177072633178, "learning_rate": 2.035640169479719e-05, "loss": 0.2617, "loss_nan_ranks": 0, "loss_rank_avg": 0.06490929424762726, "step": 375, "valid_targets_mean": 4068.9, "valid_targets_min": 1709 }, { "epoch": 3.8805460750853245, "grad_norm": 0.23838704123836477, "learning_rate": 1.9847249816332644e-05, "loss": 0.2679, "loss_nan_ranks": 0, "loss_rank_avg": 0.08089349418878555, "step": 380, "valid_targets_mean": 4778.1, "valid_targets_min": 349 }, { "epoch": 3.931740614334471, "grad_norm": 0.260605487192663, "learning_rate": 1.933819693616195e-05, "loss": 0.2529, "loss_nan_ranks": 0, "loss_rank_avg": 0.08289642632007599, "step": 385, "valid_targets_mean": 4828.9, "valid_targets_min": 2450 }, { "epoch": 3.9829351535836177, "grad_norm": 0.26648319115456115, "learning_rate": 1.8829572974455465e-05, "loss": 0.2689, "loss_nan_ranks": 0, "loss_rank_avg": 0.10124649107456207, "step": 390, "valid_targets_mean": 5940.3, "valid_targets_min": 2112 }, { "epoch": 4.030716723549488, "grad_norm": 0.25074836696819963, "learning_rate": 1.832170757339895e-05, "loss": 0.2607, "loss_nan_ranks": 0, "loss_rank_avg": 0.09088584035634995, "step": 395, "valid_targets_mean": 5790.5, "valid_targets_min": 1996 }, { "epoch": 4.081911262798635, "grad_norm": 0.23932382754020326, "learning_rate": 1.781492988355056e-05, "loss": 0.2591, "loss_nan_ranks": 0, "loss_rank_avg": 0.08872021734714508, "step": 400, "valid_targets_mean": 5202.1, "valid_targets_min": 443 }, { "epoch": 4.1331058020477816, "grad_norm": 0.24243405252900668, "learning_rate": 1.7309568350516376e-05, "loss": 0.254, "loss_nan_ranks": 0, "loss_rank_avg": 0.07671861350536346, "step": 405, "valid_targets_mean": 5088.6, "valid_targets_min": 2001 }, { "epoch": 4.184300341296928, "grad_norm": 0.24082013214852604, "learning_rate": 1.680595050208296e-05, "loss": 0.2549, "loss_nan_ranks": 0, "loss_rank_avg": 0.08671650290489197, "step": 410, "valid_targets_mean": 5681.1, "valid_targets_min": 2280 }, { "epoch": 4.235494880546075, "grad_norm": 0.2705939041983527, "learning_rate": 1.630440273594455e-05, "loss": 0.2647, "loss_nan_ranks": 0, "loss_rank_avg": 0.08631773293018341, "step": 415, "valid_targets_mean": 4975.5, "valid_targets_min": 446 }, { "epoch": 4.286689419795222, "grad_norm": 0.2647801079481516, "learning_rate": 1.5805250108162898e-05, "loss": 0.2493, "loss_nan_ranks": 0, "loss_rank_avg": 0.08514288067817688, "step": 420, "valid_targets_mean": 5125.4, "valid_targets_min": 2016 }, { "epoch": 4.337883959044369, "grad_norm": 0.24156132010147113, "learning_rate": 1.530881612249646e-05, "loss": 0.2581, "loss_nan_ranks": 0, "loss_rank_avg": 0.0757707729935646, "step": 425, "valid_targets_mean": 5020.7, "valid_targets_min": 2842 }, { "epoch": 4.389078498293515, "grad_norm": 0.24248555906848412, "learning_rate": 1.4815422520735735e-05, "loss": 0.2536, "loss_nan_ranks": 0, "loss_rank_avg": 0.07050696760416031, "step": 430, "valid_targets_mean": 4395.3, "valid_targets_min": 2210 }, { "epoch": 4.440273037542662, "grad_norm": 0.24171942435675195, "learning_rate": 1.432538907418047e-05, "loss": 0.2548, "loss_nan_ranks": 0, "loss_rank_avg": 0.08460260927677155, "step": 435, "valid_targets_mean": 5636.2, "valid_targets_min": 2623 }, { "epoch": 4.491467576791809, "grad_norm": 0.31663158719733203, "learning_rate": 1.3839033376394082e-05, "loss": 0.257, "loss_nan_ranks": 0, "loss_rank_avg": 0.08407416194677353, "step": 440, "valid_targets_mean": 4772.5, "valid_targets_min": 134 }, { "epoch": 4.5426621160409555, "grad_norm": 0.23417315461999244, "learning_rate": 1.33566706373693e-05, "loss": 0.2599, "loss_nan_ranks": 0, "loss_rank_avg": 0.08053170144557953, "step": 445, "valid_targets_mean": 5175.6, "valid_targets_min": 281 }, { "epoch": 4.593856655290102, "grad_norm": 0.24575926696014141, "learning_rate": 1.2878613479238774e-05, "loss": 0.2541, "loss_nan_ranks": 0, "loss_rank_avg": 0.08464860171079636, "step": 450, "valid_targets_mean": 5122.8, "valid_targets_min": 1882 }, { "epoch": 4.645051194539249, "grad_norm": 0.23884179028036426, "learning_rate": 1.2405171733662822e-05, "loss": 0.2519, "loss_nan_ranks": 0, "loss_rank_avg": 0.07469707727432251, "step": 455, "valid_targets_mean": 4802.0, "valid_targets_min": 1730 }, { "epoch": 4.696245733788396, "grad_norm": 0.2622151398636782, "learning_rate": 1.1936652241025679e-05, "loss": 0.2499, "loss_nan_ranks": 0, "loss_rank_avg": 0.07871793210506439, "step": 460, "valid_targets_mean": 5118.5, "valid_targets_min": 389 }, { "epoch": 4.747440273037543, "grad_norm": 0.24354224502364916, "learning_rate": 1.1473358651570479e-05, "loss": 0.2589, "loss_nan_ranks": 0, "loss_rank_avg": 0.08825745433568954, "step": 465, "valid_targets_mean": 5207.9, "valid_targets_min": 487 }, { "epoch": 4.798634812286689, "grad_norm": 0.26717838407309835, "learning_rate": 1.1015591228601692e-05, "loss": 0.2523, "loss_nan_ranks": 0, "loss_rank_avg": 0.09401947259902954, "step": 470, "valid_targets_mean": 5151.4, "valid_targets_min": 2040 }, { "epoch": 4.849829351535837, "grad_norm": 0.6288828196111563, "learning_rate": 1.0563646653882755e-05, "loss": 0.2543, "loss_nan_ranks": 0, "loss_rank_avg": 0.08458419144153595, "step": 475, "valid_targets_mean": 5222.2, "valid_targets_min": 2375 }, { "epoch": 4.901023890784983, "grad_norm": 0.2388380799823632, "learning_rate": 1.0117817835354851e-05, "loss": 0.2545, "loss_nan_ranks": 0, "loss_rank_avg": 0.10258987545967102, "step": 480, "valid_targets_mean": 5773.7, "valid_targets_min": 273 }, { "epoch": 4.952218430034129, "grad_norm": 0.23524178853837582, "learning_rate": 9.678393717301526e-06, "loss": 0.2609, "loss_nan_ranks": 0, "loss_rank_avg": 0.08160092681646347, "step": 485, "valid_targets_mean": 4906.0, "valid_targets_min": 2117 }, { "epoch": 5.0, "grad_norm": 0.2815721527054231, "learning_rate": 9.245659093082243e-06, "loss": 0.2541, "loss_nan_ranks": 0, "loss_rank_avg": 0.11172492057085037, "step": 490, "valid_targets_mean": 4392.0, "valid_targets_min": 519 }, { "epoch": 5.051194539249146, "grad_norm": 0.23545881110678396, "learning_rate": 8.819894420556112e-06, "loss": 0.246, "loss_nan_ranks": 0, "loss_rank_avg": 0.09946560859680176, "step": 495, "valid_targets_mean": 5775.0, "valid_targets_min": 1809 }, { "epoch": 5.102389078498294, "grad_norm": 0.2332984410262101, "learning_rate": 8.40137564031547e-06, "loss": 0.2511, "loss_nan_ranks": 0, "loss_rank_avg": 0.08911414444446564, "step": 500, "valid_targets_mean": 4858.6, "valid_targets_min": 424 }, { "epoch": 5.15358361774744, "grad_norm": 0.2406516993513017, "learning_rate": 7.990373996847194e-06, "loss": 0.2511, "loss_nan_ranks": 0, "loss_rank_avg": 0.0798487663269043, "step": 505, "valid_targets_mean": 5106.6, "valid_targets_min": 414 }, { "epoch": 5.204778156996587, "grad_norm": 0.25632115440474484, "learning_rate": 7.5871558627375295e-06, "loss": 0.2481, "loss_nan_ranks": 0, "loss_rank_avg": 0.08686481416225433, "step": 510, "valid_targets_mean": 4846.7, "valid_targets_min": 492 }, { "epoch": 5.255972696245734, "grad_norm": 0.23873965803262287, "learning_rate": 7.1919825660344696e-06, "loss": 0.255, "loss_nan_ranks": 0, "loss_rank_avg": 0.08521182835102081, "step": 515, "valid_targets_mean": 5219.3, "valid_targets_min": 1881 }, { "epoch": 5.30716723549488, "grad_norm": 0.23815246828270364, "learning_rate": 6.805110220879459e-06, "loss": 0.2543, "loss_nan_ranks": 0, "loss_rank_avg": 0.07839064300060272, "step": 520, "valid_targets_mean": 4840.9, "valid_targets_min": 2536 }, { "epoch": 5.3583617747440275, "grad_norm": 0.2975224705505978, "learning_rate": 6.4267895615183915e-06, "loss": 0.2547, "loss_nan_ranks": 0, "loss_rank_avg": 0.08646044880151749, "step": 525, "valid_targets_mean": 4866.6, "valid_targets_min": 497 }, { "epoch": 5.409556313993174, "grad_norm": 0.23640902606037387, "learning_rate": 6.057265779799193e-06, "loss": 0.2553, "loss_nan_ranks": 0, "loss_rank_avg": 0.10723152756690979, "step": 530, "valid_targets_mean": 5736.0, "valid_targets_min": 299 }, { "epoch": 5.460750853242321, "grad_norm": 0.2307913484720284, "learning_rate": 5.696778366261575e-06, "loss": 0.2459, "loss_nan_ranks": 0, "loss_rank_avg": 0.08867950737476349, "step": 535, "valid_targets_mean": 5571.2, "valid_targets_min": 1468 }, { "epoch": 5.511945392491468, "grad_norm": 0.23315915412363156, "learning_rate": 5.345560954921802e-06, "loss": 0.2492, "loss_nan_ranks": 0, "loss_rank_avg": 0.08251780271530151, "step": 540, "valid_targets_mean": 5138.9, "valid_targets_min": 428 }, { "epoch": 5.563139931740614, "grad_norm": 0.22322887600839503, "learning_rate": 5.00384117185311e-06, "loss": 0.2506, "loss_nan_ranks": 0, "loss_rank_avg": 0.09144619107246399, "step": 545, "valid_targets_mean": 5855.6, "valid_targets_min": 1850 }, { "epoch": 5.614334470989761, "grad_norm": 0.24296113504336472, "learning_rate": 4.671840487659882e-06, "loss": 0.2509, "loss_nan_ranks": 0, "loss_rank_avg": 0.09224293380975723, "step": 550, "valid_targets_mean": 6035.7, "valid_targets_min": 2794 }, { "epoch": 5.665529010238908, "grad_norm": 0.2501891462512511, "learning_rate": 4.3497740739413015e-06, "loss": 0.2492, "loss_nan_ranks": 0, "loss_rank_avg": 0.07294757664203644, "step": 555, "valid_targets_mean": 4596.5, "valid_targets_min": 453 }, { "epoch": 5.716723549488055, "grad_norm": 0.23660123212684395, "learning_rate": 4.037850663837315e-06, "loss": 0.2534, "loss_nan_ranks": 0, "loss_rank_avg": 0.07551420480012894, "step": 560, "valid_targets_mean": 4695.0, "valid_targets_min": 1834 }, { "epoch": 5.7679180887372015, "grad_norm": 0.2298824544771145, "learning_rate": 3.7362724167474774e-06, "loss": 0.2558, "loss_nan_ranks": 0, "loss_rank_avg": 0.0779377892613411, "step": 565, "valid_targets_mean": 4954.0, "valid_targets_min": 2135 }, { "epoch": 5.819112627986348, "grad_norm": 0.21382794578335707, "learning_rate": 3.4452347873102565e-06, "loss": 0.2451, "loss_nan_ranks": 0, "loss_rank_avg": 0.07789836078882217, "step": 570, "valid_targets_mean": 5438.0, "valid_targets_min": 2658 }, { "epoch": 5.870307167235495, "grad_norm": 0.21625982549299644, "learning_rate": 3.1649263987277303e-06, "loss": 0.247, "loss_nan_ranks": 0, "loss_rank_avg": 0.0796944797039032, "step": 575, "valid_targets_mean": 5126.0, "valid_targets_min": 1466 }, { "epoch": 5.921501706484642, "grad_norm": 0.21966123329170228, "learning_rate": 2.8955289205177696e-06, "loss": 0.2462, "loss_nan_ranks": 0, "loss_rank_avg": 0.08940281718969345, "step": 580, "valid_targets_mean": 5365.1, "valid_targets_min": 542 }, { "epoch": 5.972696245733788, "grad_norm": 0.23264769437080063, "learning_rate": 2.6372169507729627e-06, "loss": 0.2588, "loss_nan_ranks": 0, "loss_rank_avg": 0.08277872949838638, "step": 585, "valid_targets_mean": 5623.2, "valid_targets_min": 2509 }, { "epoch": 6.020477815699659, "grad_norm": 0.23969753992511295, "learning_rate": 2.3901579030025566e-06, "loss": 0.2553, "loss_nan_ranks": 0, "loss_rank_avg": 0.0817054808139801, "step": 590, "valid_targets_mean": 5169.0, "valid_targets_min": 2446 }, { "epoch": 6.071672354948806, "grad_norm": 0.235094543609173, "learning_rate": 2.15451189763078e-06, "loss": 0.2474, "loss_nan_ranks": 0, "loss_rank_avg": 0.09182994067668915, "step": 595, "valid_targets_mean": 5807.4, "valid_targets_min": 2348 }, { "epoch": 6.122866894197952, "grad_norm": 0.2427230093710648, "learning_rate": 1.930431658221854e-06, "loss": 0.2486, "loss_nan_ranks": 0, "loss_rank_avg": 0.08008411526679993, "step": 600, "valid_targets_mean": 4879.1, "valid_targets_min": 446 }, { "epoch": 6.174061433447099, "grad_norm": 0.23142147399386198, "learning_rate": 1.7180624124989398e-06, "loss": 0.2543, "loss_nan_ranks": 0, "loss_rank_avg": 0.0793958306312561, "step": 605, "valid_targets_mean": 5092.9, "valid_targets_min": 2197 }, { "epoch": 6.225255972696246, "grad_norm": 0.2280686494846546, "learning_rate": 1.5175417982212138e-06, "loss": 0.2475, "loss_nan_ranks": 0, "loss_rank_avg": 0.07384900748729706, "step": 610, "valid_targets_mean": 4928.0, "valid_targets_min": 1860 }, { "epoch": 6.276450511945392, "grad_norm": 0.24929297314875484, "learning_rate": 1.3289997739800108e-06, "loss": 0.2481, "loss_nan_ranks": 0, "loss_rank_avg": 0.0833367109298706, "step": 615, "valid_targets_mean": 5088.8, "valid_targets_min": 2764 }, { "epoch": 6.327645051194539, "grad_norm": 0.22000964241336962, "learning_rate": 1.1525585349718948e-06, "loss": 0.2465, "loss_nan_ranks": 0, "loss_rank_avg": 0.07332947105169296, "step": 620, "valid_targets_mean": 4677.8, "valid_targets_min": 220 }, { "epoch": 6.378839590443686, "grad_norm": 0.21275459726601303, "learning_rate": 9.883324338032474e-07, "loss": 0.2525, "loss_nan_ranks": 0, "loss_rank_avg": 0.07395243644714355, "step": 625, "valid_targets_mean": 5172.9, "valid_targets_min": 2763 }, { "epoch": 6.4300341296928325, "grad_norm": 0.22175862203200505, "learning_rate": 8.364279063776526e-07, "loss": 0.2458, "loss_nan_ranks": 0, "loss_rank_avg": 0.0932476595044136, "step": 630, "valid_targets_mean": 5806.0, "valid_targets_min": 2569 }, { "epoch": 6.48122866894198, "grad_norm": 0.2230388965834993, "learning_rate": 6.969434029141676e-07, "loss": 0.2456, "loss_nan_ranks": 0, "loss_rank_avg": 0.07441005110740662, "step": 635, "valid_targets_mean": 4756.5, "valid_targets_min": 2402 }, { "epoch": 6.532423208191126, "grad_norm": 0.349252390093201, "learning_rate": 5.699693241411619e-07, "loss": 0.248, "loss_nan_ranks": 0, "loss_rank_avg": 0.09055956453084946, "step": 640, "valid_targets_mean": 5269.7, "valid_targets_min": 222 }, { "epoch": 6.5836177474402735, "grad_norm": 0.22602023572874996, "learning_rate": 4.5558796270706254e-07, "loss": 0.2469, "loss_nan_ranks": 0, "loss_rank_avg": 0.09014368057250977, "step": 645, "valid_targets_mean": 5677.8, "valid_targets_min": 2555 }, { "epoch": 6.63481228668942, "grad_norm": 0.24090264289449, "learning_rate": 3.5387344984600946e-07, "loss": 0.2454, "loss_nan_ranks": 0, "loss_rank_avg": 0.08535467833280563, "step": 650, "valid_targets_mean": 4994.6, "valid_targets_min": 2472 }, { "epoch": 6.686006825938566, "grad_norm": 0.20794321625955012, "learning_rate": 2.64891707332966e-07, "loss": 0.2494, "loss_nan_ranks": 0, "loss_rank_avg": 0.09163576364517212, "step": 655, "valid_targets_mean": 5890.2, "valid_targets_min": 2276 }, { "epoch": 6.737201365187714, "grad_norm": 0.22007475356230136, "learning_rate": 1.887004047594232e-07, "loss": 0.239, "loss_nan_ranks": 0, "loss_rank_avg": 0.08739501237869263, "step": 660, "valid_targets_mean": 5521.7, "valid_targets_min": 1730 }, { "epoch": 6.78839590443686, "grad_norm": 0.25286299855724226, "learning_rate": 1.2534892215740667e-07, "loss": 0.2517, "loss_nan_ranks": 0, "loss_rank_avg": 0.10239039361476898, "step": 665, "valid_targets_mean": 5944.1, "valid_targets_min": 393 }, { "epoch": 6.839590443686006, "grad_norm": 0.24536425918506533, "learning_rate": 7.487831799597889e-08, "loss": 0.2463, "loss_nan_ranks": 0, "loss_rank_avg": 0.08520665764808655, "step": 670, "valid_targets_mean": 5079.4, "valid_targets_min": 1871 }, { "epoch": 6.890784982935154, "grad_norm": 0.22126404698942398, "learning_rate": 3.73213025710073e-08, "loss": 0.2511, "loss_nan_ranks": 0, "loss_rank_avg": 0.08079634606838226, "step": 675, "valid_targets_mean": 5323.9, "valid_targets_min": 1875 }, { "epoch": 6.9419795221843, "grad_norm": 0.23165959369746986, "learning_rate": 1.2702216805431377e-08, "loss": 0.257, "loss_nan_ranks": 0, "loss_rank_avg": 0.08266259729862213, "step": 680, "valid_targets_mean": 5277.4, "valid_targets_min": 439 }, { "epoch": 6.993174061433447, "grad_norm": 0.2427793642183869, "learning_rate": 1.037016473757202e-09, "loss": 0.2553, "loss_nan_ranks": 0, "loss_rank_avg": 0.08131192624568939, "step": 685, "valid_targets_mean": 5002.9, "valid_targets_min": 281 }, { "epoch": 7.0, "loss_nan_ranks": 0, "loss_rank_avg": 0.11108480393886566, "step": 686, "total_flos": 2.601520842818978e+18, "train_loss": 0.2925965665424183, "train_runtime": 15951.2631, "train_samples_per_second": 4.106, "train_steps_per_second": 0.043, "valid_targets_mean": 4441.2, "valid_targets_min": 1797 } ], "logging_steps": 5, "max_steps": 686, "num_input_tokens_seen": 0, "num_train_epochs": 7, "save_steps": 300, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.601520842818978e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }