{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 7.0, "eval_steps": 500, "global_step": 728, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04823151125401929, "grad_norm": 12.448248888949518, "learning_rate": 2.191780821917808e-06, "loss": 1.0168, "loss_nan_ranks": 0, "loss_rank_avg": 0.3392058312892914, "step": 5, "valid_targets_mean": 7154.5, "valid_targets_min": 1632 }, { "epoch": 0.09646302250803858, "grad_norm": 7.284065798181604, "learning_rate": 4.931506849315069e-06, "loss": 0.9698, "loss_nan_ranks": 0, "loss_rank_avg": 0.2997957468032837, "step": 10, "valid_targets_mean": 6839.2, "valid_targets_min": 2692 }, { "epoch": 0.14469453376205788, "grad_norm": 2.616202122732068, "learning_rate": 7.671232876712329e-06, "loss": 0.8828, "loss_nan_ranks": 0, "loss_rank_avg": 0.28220462799072266, "step": 15, "valid_targets_mean": 6918.6, "valid_targets_min": 2206 }, { "epoch": 0.19292604501607716, "grad_norm": 1.699025397345384, "learning_rate": 1.0410958904109589e-05, "loss": 0.8296, "loss_nan_ranks": 0, "loss_rank_avg": 0.2794211804866791, "step": 20, "valid_targets_mean": 7609.6, "valid_targets_min": 2545 }, { "epoch": 0.24115755627009647, "grad_norm": 1.290348718584449, "learning_rate": 1.3150684931506849e-05, "loss": 0.7939, "loss_nan_ranks": 0, "loss_rank_avg": 0.2636311650276184, "step": 25, "valid_targets_mean": 7392.3, "valid_targets_min": 2631 }, { "epoch": 0.28938906752411575, "grad_norm": 0.8749707786912787, "learning_rate": 1.589041095890411e-05, "loss": 0.7572, "loss_nan_ranks": 0, "loss_rank_avg": 0.24677854776382446, "step": 30, "valid_targets_mean": 7459.0, "valid_targets_min": 2419 }, { "epoch": 0.33762057877813506, "grad_norm": 0.7052716822140082, "learning_rate": 1.863013698630137e-05, "loss": 0.7285, "loss_nan_ranks": 0, "loss_rank_avg": 0.24943570792675018, "step": 35, "valid_targets_mean": 7053.0, "valid_targets_min": 2482 }, { "epoch": 0.3858520900321543, "grad_norm": 0.5008032218431284, "learning_rate": 2.1369863013698632e-05, "loss": 0.6917, "loss_nan_ranks": 0, "loss_rank_avg": 0.21115031838417053, "step": 40, "valid_targets_mean": 6766.8, "valid_targets_min": 2431 }, { "epoch": 0.4340836012861736, "grad_norm": 0.43138890795771023, "learning_rate": 2.410958904109589e-05, "loss": 0.6598, "loss_nan_ranks": 0, "loss_rank_avg": 0.1905011534690857, "step": 45, "valid_targets_mean": 6303.5, "valid_targets_min": 2602 }, { "epoch": 0.48231511254019294, "grad_norm": 0.3849831894181003, "learning_rate": 2.6849315068493153e-05, "loss": 0.6235, "loss_nan_ranks": 0, "loss_rank_avg": 0.19090837240219116, "step": 50, "valid_targets_mean": 6993.3, "valid_targets_min": 2422 }, { "epoch": 0.5305466237942122, "grad_norm": 0.35114545580046064, "learning_rate": 2.958904109589041e-05, "loss": 0.6019, "loss_nan_ranks": 0, "loss_rank_avg": 0.207503080368042, "step": 55, "valid_targets_mean": 7105.0, "valid_targets_min": 2354 }, { "epoch": 0.5787781350482315, "grad_norm": 0.3193959336368111, "learning_rate": 3.2328767123287676e-05, "loss": 0.5868, "loss_nan_ranks": 0, "loss_rank_avg": 0.18717698752880096, "step": 60, "valid_targets_mean": 6514.3, "valid_targets_min": 1749 }, { "epoch": 0.6270096463022508, "grad_norm": 0.2918586604555415, "learning_rate": 3.506849315068493e-05, "loss": 0.5745, "loss_nan_ranks": 0, "loss_rank_avg": 0.19028222560882568, "step": 65, "valid_targets_mean": 7214.1, "valid_targets_min": 2845 }, { "epoch": 0.6752411575562701, "grad_norm": 0.32462856610800417, "learning_rate": 3.780821917808219e-05, "loss": 0.5558, "loss_nan_ranks": 0, "loss_rank_avg": 0.1807335764169693, "step": 70, "valid_targets_mean": 6766.1, "valid_targets_min": 1311 }, { "epoch": 0.7234726688102894, "grad_norm": 0.3108931341927116, "learning_rate": 3.999976995313839e-05, "loss": 0.5459, "loss_nan_ranks": 0, "loss_rank_avg": 0.1809534728527069, "step": 75, "valid_targets_mean": 7225.8, "valid_targets_min": 2735 }, { "epoch": 0.7717041800643086, "grad_norm": 0.30171119245949446, "learning_rate": 3.999171886864457e-05, "loss": 0.5318, "loss_nan_ranks": 0, "loss_rank_avg": 0.1711938977241516, "step": 80, "valid_targets_mean": 6957.0, "valid_targets_min": 2492 }, { "epoch": 0.819935691318328, "grad_norm": 0.3164513581760958, "learning_rate": 3.997217073267859e-05, "loss": 0.5179, "loss_nan_ranks": 0, "loss_rank_avg": 0.16384953260421753, "step": 85, "valid_targets_mean": 6966.7, "valid_targets_min": 2862 }, { "epoch": 0.8681672025723473, "grad_norm": 0.30738984615139586, "learning_rate": 3.9941136787191535e-05, "loss": 0.5178, "loss_nan_ranks": 0, "loss_rank_avg": 0.18157875537872314, "step": 90, "valid_targets_mean": 7276.3, "valid_targets_min": 2704 }, { "epoch": 0.9163987138263665, "grad_norm": 0.323295425342175, "learning_rate": 3.989863487951665e-05, "loss": 0.5085, "loss_nan_ranks": 0, "loss_rank_avg": 0.17602333426475525, "step": 95, "valid_targets_mean": 8148.8, "valid_targets_min": 4040 }, { "epoch": 0.9646302250803859, "grad_norm": 0.3421041448009979, "learning_rate": 3.984468945210548e-05, "loss": 0.5051, "loss_nan_ranks": 0, "loss_rank_avg": 0.15274131298065186, "step": 100, "valid_targets_mean": 6615.2, "valid_targets_min": 1450 }, { "epoch": 1.0096463022508038, "grad_norm": 0.30441217320888314, "learning_rate": 3.977933152847132e-05, "loss": 0.4967, "loss_nan_ranks": 0, "loss_rank_avg": 0.1587449163198471, "step": 105, "valid_targets_mean": 6899.7, "valid_targets_min": 1658 }, { "epoch": 1.0578778135048232, "grad_norm": 0.3011845985321456, "learning_rate": 3.9702598695347794e-05, "loss": 0.4907, "loss_nan_ranks": 0, "loss_rank_avg": 0.16292162239551544, "step": 110, "valid_targets_mean": 7234.9, "valid_targets_min": 2692 }, { "epoch": 1.1061093247588425, "grad_norm": 0.3049687116327277, "learning_rate": 3.961453508107314e-05, "loss": 0.4852, "loss_nan_ranks": 0, "loss_rank_avg": 0.14895367622375488, "step": 115, "valid_targets_mean": 6615.0, "valid_targets_min": 2490 }, { "epoch": 1.1543408360128617, "grad_norm": 0.3769508662707374, "learning_rate": 3.951519133021237e-05, "loss": 0.4785, "loss_nan_ranks": 0, "loss_rank_avg": 0.1452217400074005, "step": 120, "valid_targets_mean": 6637.8, "valid_targets_min": 1997 }, { "epoch": 1.202572347266881, "grad_norm": 0.3181671579805256, "learning_rate": 3.94046245744321e-05, "loss": 0.4809, "loss_nan_ranks": 0, "loss_rank_avg": 0.16087602078914642, "step": 125, "valid_targets_mean": 7021.9, "valid_targets_min": 1534 }, { "epoch": 1.2508038585209003, "grad_norm": 0.3207028910807402, "learning_rate": 3.928289839964459e-05, "loss": 0.4764, "loss_nan_ranks": 0, "loss_rank_avg": 0.1502930223941803, "step": 130, "valid_targets_mean": 7277.9, "valid_targets_min": 1007 }, { "epoch": 1.2990353697749195, "grad_norm": 0.32515712076357406, "learning_rate": 3.915008280944014e-05, "loss": 0.4767, "loss_nan_ranks": 0, "loss_rank_avg": 0.1590278148651123, "step": 135, "valid_targets_mean": 7355.5, "valid_targets_min": 2678 }, { "epoch": 1.347266881028939, "grad_norm": 0.2998255755095527, "learning_rate": 3.900625418482867e-05, "loss": 0.4751, "loss_nan_ranks": 0, "loss_rank_avg": 0.16431793570518494, "step": 140, "valid_targets_mean": 8011.2, "valid_targets_min": 1749 }, { "epoch": 1.3954983922829582, "grad_norm": 0.3085173284169989, "learning_rate": 3.885149524031366e-05, "loss": 0.4704, "loss_nan_ranks": 0, "loss_rank_avg": 0.14795272052288055, "step": 145, "valid_targets_mean": 6751.4, "valid_targets_min": 2326 }, { "epoch": 1.4437299035369775, "grad_norm": 0.3401175885992734, "learning_rate": 3.868589497632388e-05, "loss": 0.4664, "loss_nan_ranks": 0, "loss_rank_avg": 0.15111419558525085, "step": 150, "valid_targets_mean": 6837.7, "valid_targets_min": 2630 }, { "epoch": 1.4919614147909968, "grad_norm": 0.3862018866759018, "learning_rate": 3.850954862803001e-05, "loss": 0.4595, "loss_nan_ranks": 0, "loss_rank_avg": 0.15462824702262878, "step": 155, "valid_targets_mean": 7058.6, "valid_targets_min": 2845 }, { "epoch": 1.540192926045016, "grad_norm": 0.30996367649676787, "learning_rate": 3.8322557610575826e-05, "loss": 0.4695, "loss_nan_ranks": 0, "loss_rank_avg": 0.1570480465888977, "step": 160, "valid_targets_mean": 7273.3, "valid_targets_min": 2307 }, { "epoch": 1.5884244372990355, "grad_norm": 0.3412824337507056, "learning_rate": 3.812502946075527e-05, "loss": 0.4623, "loss_nan_ranks": 0, "loss_rank_avg": 0.16225996613502502, "step": 165, "valid_targets_mean": 7718.8, "valid_targets_min": 1085 }, { "epoch": 1.6366559485530545, "grad_norm": 0.3077515658368193, "learning_rate": 3.791707777516904e-05, "loss": 0.4611, "loss_nan_ranks": 0, "loss_rank_avg": 0.14422276616096497, "step": 170, "valid_targets_mean": 6652.2, "valid_targets_min": 2034 }, { "epoch": 1.684887459807074, "grad_norm": 0.33129820149764216, "learning_rate": 3.769882214489626e-05, "loss": 0.4593, "loss_nan_ranks": 0, "loss_rank_avg": 0.15183745324611664, "step": 175, "valid_targets_mean": 6814.9, "valid_targets_min": 1305 }, { "epoch": 1.7331189710610932, "grad_norm": 0.34548902207170956, "learning_rate": 3.7470388086718745e-05, "loss": 0.457, "loss_nan_ranks": 0, "loss_rank_avg": 0.1502872258424759, "step": 180, "valid_targets_mean": 6735.3, "valid_targets_min": 1814 }, { "epoch": 1.7813504823151125, "grad_norm": 0.3328552197812335, "learning_rate": 3.7231906970937464e-05, "loss": 0.4552, "loss_nan_ranks": 0, "loss_rank_avg": 0.15311968326568604, "step": 185, "valid_targets_mean": 7155.6, "valid_targets_min": 3210 }, { "epoch": 1.829581993569132, "grad_norm": 0.31935109111420523, "learning_rate": 3.6983515945822736e-05, "loss": 0.4525, "loss_nan_ranks": 0, "loss_rank_avg": 0.1549065113067627, "step": 190, "valid_targets_mean": 7431.0, "valid_targets_min": 2888 }, { "epoch": 1.877813504823151, "grad_norm": 0.3326363124007988, "learning_rate": 3.672535785874148e-05, "loss": 0.4459, "loss_nan_ranks": 0, "loss_rank_avg": 0.1505606472492218, "step": 195, "valid_targets_mean": 6710.3, "valid_targets_min": 2333 }, { "epoch": 1.9260450160771705, "grad_norm": 0.28380996162159844, "learning_rate": 3.64575811740071e-05, "loss": 0.4481, "loss_nan_ranks": 0, "loss_rank_avg": 0.1508420705795288, "step": 200, "valid_targets_mean": 6827.8, "valid_targets_min": 2018 }, { "epoch": 1.9742765273311897, "grad_norm": 0.30735752308501874, "learning_rate": 3.6180339887498953e-05, "loss": 0.4473, "loss_nan_ranks": 0, "loss_rank_avg": 0.1658451110124588, "step": 205, "valid_targets_mean": 7271.1, "valid_targets_min": 1928 }, { "epoch": 2.0192926045016075, "grad_norm": 0.37189914099274535, "learning_rate": 3.589379343810083e-05, "loss": 0.445, "loss_nan_ranks": 0, "loss_rank_avg": 0.1301526576280594, "step": 210, "valid_targets_mean": 6578.2, "valid_targets_min": 1877 }, { "epoch": 2.067524115755627, "grad_norm": 0.3977228836472767, "learning_rate": 3.559810661600907e-05, "loss": 0.4391, "loss_nan_ranks": 0, "loss_rank_avg": 0.14161312580108643, "step": 215, "valid_targets_mean": 6960.3, "valid_targets_min": 1300 }, { "epoch": 2.1157556270096465, "grad_norm": 0.37183924478533426, "learning_rate": 3.529344946796333e-05, "loss": 0.4439, "loss_nan_ranks": 0, "loss_rank_avg": 0.15209510922431946, "step": 220, "valid_targets_mean": 7089.7, "valid_targets_min": 3873 }, { "epoch": 2.1639871382636655, "grad_norm": 0.3408414017109924, "learning_rate": 3.4979997199454195e-05, "loss": 0.4375, "loss_nan_ranks": 0, "loss_rank_avg": 0.15320946276187897, "step": 225, "valid_targets_mean": 7269.2, "valid_targets_min": 2639 }, { "epoch": 2.212218649517685, "grad_norm": 0.30731758497614714, "learning_rate": 3.465793007396421e-05, "loss": 0.4348, "loss_nan_ranks": 0, "loss_rank_avg": 0.14076238870620728, "step": 230, "valid_targets_mean": 6917.7, "valid_targets_min": 1785 }, { "epoch": 2.260450160771704, "grad_norm": 0.372487228314555, "learning_rate": 3.4327433309299986e-05, "loss": 0.4405, "loss_nan_ranks": 0, "loss_rank_avg": 0.14385536313056946, "step": 235, "valid_targets_mean": 6961.8, "valid_targets_min": 2545 }, { "epoch": 2.3086816720257235, "grad_norm": 0.3312561822173933, "learning_rate": 3.398869697107517e-05, "loss": 0.4402, "loss_nan_ranks": 0, "loss_rank_avg": 0.144433856010437, "step": 240, "valid_targets_mean": 6751.5, "valid_targets_min": 2018 }, { "epoch": 2.356913183279743, "grad_norm": 0.35314218176188583, "learning_rate": 3.3641915863405486e-05, "loss": 0.4427, "loss_nan_ranks": 0, "loss_rank_avg": 0.13870275020599365, "step": 245, "valid_targets_mean": 7063.4, "valid_targets_min": 2156 }, { "epoch": 2.405144694533762, "grad_norm": 0.2725162697916855, "learning_rate": 3.328728941687871e-05, "loss": 0.4288, "loss_nan_ranks": 0, "loss_rank_avg": 0.14645615220069885, "step": 250, "valid_targets_mean": 7152.3, "valid_targets_min": 2078 }, { "epoch": 2.4533762057877815, "grad_norm": 0.30613274466141416, "learning_rate": 3.292502157386397e-05, "loss": 0.4357, "loss_nan_ranks": 0, "loss_rank_avg": 0.1441894918680191, "step": 255, "valid_targets_mean": 7401.5, "valid_targets_min": 2283 }, { "epoch": 2.5016077170418005, "grad_norm": 0.30760789389816223, "learning_rate": 3.2555320671226405e-05, "loss": 0.4327, "loss_nan_ranks": 0, "loss_rank_avg": 0.14768892526626587, "step": 260, "valid_targets_mean": 6794.9, "valid_targets_min": 1650 }, { "epoch": 2.54983922829582, "grad_norm": 0.29539485332618615, "learning_rate": 3.217839932051457e-05, "loss": 0.4325, "loss_nan_ranks": 0, "loss_rank_avg": 0.1291293054819107, "step": 265, "valid_targets_mean": 6756.1, "valid_targets_min": 2595 }, { "epoch": 2.598070739549839, "grad_norm": 0.2844011979442231, "learning_rate": 3.179447428568952e-05, "loss": 0.4306, "loss_nan_ranks": 0, "loss_rank_avg": 0.13877539336681366, "step": 270, "valid_targets_mean": 6815.4, "valid_targets_min": 2346 }, { "epoch": 2.6463022508038585, "grad_norm": 0.2939840972561895, "learning_rate": 3.1403766358465833e-05, "loss": 0.4321, "loss_nan_ranks": 0, "loss_rank_avg": 0.14215871691703796, "step": 275, "valid_targets_mean": 6699.9, "valid_targets_min": 2662 }, { "epoch": 2.694533762057878, "grad_norm": 0.33704883535034413, "learning_rate": 3.100650023133643e-05, "loss": 0.4293, "loss_nan_ranks": 0, "loss_rank_avg": 0.15095305442810059, "step": 280, "valid_targets_mean": 6936.8, "valid_targets_min": 3262 }, { "epoch": 2.742765273311897, "grad_norm": 0.30435858181327363, "learning_rate": 3.060290436835392e-05, "loss": 0.434, "loss_nan_ranks": 0, "loss_rank_avg": 0.14832210540771484, "step": 285, "valid_targets_mean": 7377.8, "valid_targets_min": 3814 }, { "epoch": 2.7909967845659165, "grad_norm": 0.3038922987615049, "learning_rate": 3.019321087374313e-05, "loss": 0.4355, "loss_nan_ranks": 0, "loss_rank_avg": 0.1452721804380417, "step": 290, "valid_targets_mean": 7521.8, "valid_targets_min": 3620 }, { "epoch": 2.839228295819936, "grad_norm": 0.2917713716711792, "learning_rate": 2.977765535842007e-05, "loss": 0.4329, "loss_nan_ranks": 0, "loss_rank_avg": 0.14556115865707397, "step": 295, "valid_targets_mean": 7321.2, "valid_targets_min": 2823 }, { "epoch": 2.887459807073955, "grad_norm": 0.29939071791258476, "learning_rate": 2.9356476804494306e-05, "loss": 0.4286, "loss_nan_ranks": 0, "loss_rank_avg": 0.14382721483707428, "step": 300, "valid_targets_mean": 7452.7, "valid_targets_min": 3738 }, { "epoch": 2.935691318327974, "grad_norm": 0.31181832384197283, "learning_rate": 2.892991742783259e-05, "loss": 0.4296, "loss_nan_ranks": 0, "loss_rank_avg": 0.13677389919757843, "step": 305, "valid_targets_mean": 6679.7, "valid_targets_min": 1975 }, { "epoch": 2.9839228295819935, "grad_norm": 0.29910249575281106, "learning_rate": 2.8498222538762737e-05, "loss": 0.4283, "loss_nan_ranks": 0, "loss_rank_avg": 0.1349104940891266, "step": 310, "valid_targets_mean": 7041.6, "valid_targets_min": 1534 }, { "epoch": 3.0289389067524115, "grad_norm": 0.30659296462604446, "learning_rate": 2.8061640400997966e-05, "loss": 0.4285, "loss_nan_ranks": 0, "loss_rank_avg": 0.12650442123413086, "step": 315, "valid_targets_mean": 6438.3, "valid_targets_min": 1711 }, { "epoch": 3.077170418006431, "grad_norm": 0.31032562331811075, "learning_rate": 2.7620422088862736e-05, "loss": 0.4192, "loss_nan_ranks": 0, "loss_rank_avg": 0.13721013069152832, "step": 320, "valid_targets_mean": 6582.1, "valid_targets_min": 2016 }, { "epoch": 3.12540192926045, "grad_norm": 0.298343154549212, "learning_rate": 2.7174821342902234e-05, "loss": 0.4203, "loss_nan_ranks": 0, "loss_rank_avg": 0.1312987208366394, "step": 325, "valid_targets_mean": 6630.8, "valid_targets_min": 1708 }, { "epoch": 3.1736334405144695, "grad_norm": 0.32270907072518445, "learning_rate": 2.6725094423958574e-05, "loss": 0.4257, "loss_nan_ranks": 0, "loss_rank_avg": 0.12568901479244232, "step": 330, "valid_targets_mean": 6373.9, "valid_targets_min": 2413 }, { "epoch": 3.221864951768489, "grad_norm": 0.33685989702154473, "learning_rate": 2.6271499965797532e-05, "loss": 0.4209, "loss_nan_ranks": 0, "loss_rank_avg": 0.14049115777015686, "step": 335, "valid_targets_mean": 6983.8, "valid_targets_min": 1906 }, { "epoch": 3.270096463022508, "grad_norm": 0.3224358346310272, "learning_rate": 2.5814298826370702e-05, "loss": 0.4234, "loss_nan_ranks": 0, "loss_rank_avg": 0.1452215611934662, "step": 340, "valid_targets_mean": 7346.3, "valid_targets_min": 1632 }, { "epoch": 3.3183279742765275, "grad_norm": 0.3190857528054468, "learning_rate": 2.5353753937798527e-05, "loss": 0.4197, "loss_nan_ranks": 0, "loss_rank_avg": 0.14950251579284668, "step": 345, "valid_targets_mean": 7413.2, "valid_targets_min": 3670 }, { "epoch": 3.3665594855305465, "grad_norm": 0.27960704518791396, "learning_rate": 2.4890130155160427e-05, "loss": 0.4199, "loss_nan_ranks": 0, "loss_rank_avg": 0.14225219190120697, "step": 350, "valid_targets_mean": 7716.9, "valid_targets_min": 2063 }, { "epoch": 3.414790996784566, "grad_norm": 0.2806138098599385, "learning_rate": 2.4423694104179176e-05, "loss": 0.4195, "loss_nan_ranks": 0, "loss_rank_avg": 0.14240960776805878, "step": 355, "valid_targets_mean": 6942.1, "valid_targets_min": 2201 }, { "epoch": 3.463022508038585, "grad_norm": 0.3184400082207043, "learning_rate": 2.3954714027886904e-05, "loss": 0.4137, "loss_nan_ranks": 0, "loss_rank_avg": 0.14079201221466064, "step": 360, "valid_targets_mean": 7222.4, "valid_targets_min": 2599 }, { "epoch": 3.5112540192926045, "grad_norm": 0.3007136487008627, "learning_rate": 2.3483459632361e-05, "loss": 0.4151, "loss_nan_ranks": 0, "loss_rank_avg": 0.13650323450565338, "step": 365, "valid_targets_mean": 6989.0, "valid_targets_min": 3489 }, { "epoch": 3.559485530546624, "grad_norm": 0.3265478005060877, "learning_rate": 2.3010201931618696e-05, "loss": 0.4159, "loss_nan_ranks": 0, "loss_rank_avg": 0.1319654881954193, "step": 370, "valid_targets_mean": 7079.9, "valid_targets_min": 2563 }, { "epoch": 3.607717041800643, "grad_norm": 0.3948009132445448, "learning_rate": 2.2535213091759404e-05, "loss": 0.4141, "loss_nan_ranks": 0, "loss_rank_avg": 0.13655099272727966, "step": 375, "valid_targets_mean": 6693.5, "valid_targets_min": 2016 }, { "epoch": 3.6559485530546625, "grad_norm": 0.3019824650671563, "learning_rate": 2.205876627444452e-05, "loss": 0.4213, "loss_nan_ranks": 0, "loss_rank_avg": 0.13880327343940735, "step": 380, "valid_targets_mean": 7379.9, "valid_targets_min": 2742 }, { "epoch": 3.7041800643086815, "grad_norm": 0.2737001441559615, "learning_rate": 2.1581135479804735e-05, "loss": 0.42, "loss_nan_ranks": 0, "loss_rank_avg": 0.14117969572544098, "step": 385, "valid_targets_mean": 7399.1, "valid_targets_min": 2818 }, { "epoch": 3.752411575562701, "grad_norm": 0.2638773595163275, "learning_rate": 2.1102595388865054e-05, "loss": 0.4152, "loss_nan_ranks": 0, "loss_rank_avg": 0.14235882461071014, "step": 390, "valid_targets_mean": 7069.0, "valid_targets_min": 2084 }, { "epoch": 3.80064308681672, "grad_norm": 0.28040665496341327, "learning_rate": 2.062342120557834e-05, "loss": 0.42, "loss_nan_ranks": 0, "loss_rank_avg": 0.13389542698860168, "step": 395, "valid_targets_mean": 7465.1, "valid_targets_min": 2377 }, { "epoch": 3.8488745980707395, "grad_norm": 0.31446821190054974, "learning_rate": 2.0143888498558046e-05, "loss": 0.415, "loss_nan_ranks": 0, "loss_rank_avg": 0.14552685618400574, "step": 400, "valid_targets_mean": 7290.2, "valid_targets_min": 2307 }, { "epoch": 3.897106109324759, "grad_norm": 0.279723657397229, "learning_rate": 1.9664273042601302e-05, "loss": 0.4243, "loss_nan_ranks": 0, "loss_rank_avg": 0.13550975918769836, "step": 405, "valid_targets_mean": 6575.1, "valid_targets_min": 3659 }, { "epoch": 3.945337620578778, "grad_norm": 0.30048258496006014, "learning_rate": 1.918485066009338e-05, "loss": 0.4175, "loss_nan_ranks": 0, "loss_rank_avg": 0.13949520885944366, "step": 410, "valid_targets_mean": 6686.4, "valid_targets_min": 2947 }, { "epoch": 3.9935691318327975, "grad_norm": 0.2867474230478217, "learning_rate": 1.87058970623848e-05, "loss": 0.4122, "loss_nan_ranks": 0, "loss_rank_avg": 0.13645842671394348, "step": 415, "valid_targets_mean": 7611.5, "valid_targets_min": 4001 }, { "epoch": 4.038585209003215, "grad_norm": 0.32425630072343237, "learning_rate": 1.8227687691232322e-05, "loss": 0.4168, "loss_nan_ranks": 0, "loss_rank_avg": 0.14129070937633514, "step": 420, "valid_targets_mean": 7769.8, "valid_targets_min": 2240 }, { "epoch": 4.086816720257235, "grad_norm": 0.2748205187855077, "learning_rate": 1.7750497560394918e-05, "loss": 0.4123, "loss_nan_ranks": 0, "loss_rank_avg": 0.13103340566158295, "step": 425, "valid_targets_mean": 7042.3, "valid_targets_min": 2721 }, { "epoch": 4.135048231511254, "grad_norm": 0.31593879087572435, "learning_rate": 1.7274601097475957e-05, "loss": 0.4148, "loss_nan_ranks": 0, "loss_rank_avg": 0.12848451733589172, "step": 430, "valid_targets_mean": 7032.7, "valid_targets_min": 620 }, { "epoch": 4.183279742765273, "grad_norm": 0.2928706432235063, "learning_rate": 1.6800271986102418e-05, "loss": 0.4111, "loss_nan_ranks": 0, "loss_rank_avg": 0.13245472311973572, "step": 435, "valid_targets_mean": 6995.4, "valid_targets_min": 2509 }, { "epoch": 4.231511254019293, "grad_norm": 0.294138005369505, "learning_rate": 1.6327783008532e-05, "loss": 0.4115, "loss_nan_ranks": 0, "loss_rank_avg": 0.1609172821044922, "step": 440, "valid_targets_mean": 7456.9, "valid_targets_min": 2912 }, { "epoch": 4.279742765273312, "grad_norm": 0.26343752394612546, "learning_rate": 1.5857405888778568e-05, "loss": 0.4048, "loss_nan_ranks": 0, "loss_rank_avg": 0.13041119277477264, "step": 445, "valid_targets_mean": 6549.4, "valid_targets_min": 2315 }, { "epoch": 4.327974276527331, "grad_norm": 0.26515284362959823, "learning_rate": 1.5389411136346225e-05, "loss": 0.4165, "loss_nan_ranks": 0, "loss_rank_avg": 0.1363140195608139, "step": 450, "valid_targets_mean": 6877.8, "valid_targets_min": 2060 }, { "epoch": 4.37620578778135, "grad_norm": 0.3341389590190934, "learning_rate": 1.4924067890661778e-05, "loss": 0.4085, "loss_nan_ranks": 0, "loss_rank_avg": 0.14212492108345032, "step": 455, "valid_targets_mean": 7122.2, "valid_targets_min": 2552 }, { "epoch": 4.42443729903537, "grad_norm": 0.305739315850788, "learning_rate": 1.4461643766295196e-05, "loss": 0.4115, "loss_nan_ranks": 0, "loss_rank_avg": 0.13609100878238678, "step": 460, "valid_targets_mean": 7076.4, "valid_targets_min": 2084 }, { "epoch": 4.472668810289389, "grad_norm": 0.3425663444220552, "learning_rate": 1.4002404699056946e-05, "loss": 0.4088, "loss_nan_ranks": 0, "loss_rank_avg": 0.14678135514259338, "step": 465, "valid_targets_mean": 7620.5, "valid_targets_min": 3262 }, { "epoch": 4.520900321543408, "grad_norm": 0.28442860046969476, "learning_rate": 1.3546614793060757e-05, "loss": 0.4089, "loss_nan_ranks": 0, "loss_rank_avg": 0.12935858964920044, "step": 470, "valid_targets_mean": 6770.5, "valid_targets_min": 1312 }, { "epoch": 4.569131832797428, "grad_norm": 0.27599575997018294, "learning_rate": 1.3094536168839853e-05, "loss": 0.4053, "loss_nan_ranks": 0, "loss_rank_avg": 0.1286410689353943, "step": 475, "valid_targets_mean": 6730.8, "valid_targets_min": 2512 }, { "epoch": 4.617363344051447, "grad_norm": 0.2847734396443323, "learning_rate": 1.2646428812603838e-05, "loss": 0.4039, "loss_nan_ranks": 0, "loss_rank_avg": 0.1336403489112854, "step": 480, "valid_targets_mean": 6643.8, "valid_targets_min": 616 }, { "epoch": 4.665594855305466, "grad_norm": 0.28455785481267665, "learning_rate": 1.2202550426723053e-05, "loss": 0.4041, "loss_nan_ranks": 0, "loss_rank_avg": 0.12907546758651733, "step": 485, "valid_targets_mean": 6958.6, "valid_targets_min": 1876 }, { "epoch": 4.713826366559486, "grad_norm": 0.2584978256745778, "learning_rate": 1.1763156281526348e-05, "loss": 0.4106, "loss_nan_ranks": 0, "loss_rank_avg": 0.1278737336397171, "step": 490, "valid_targets_mean": 6800.9, "valid_targets_min": 2535 }, { "epoch": 4.762057877813505, "grad_norm": 0.2641322558149948, "learning_rate": 1.1328499068497478e-05, "loss": 0.4102, "loss_nan_ranks": 0, "loss_rank_avg": 0.1252935379743576, "step": 495, "valid_targets_mean": 6629.9, "valid_targets_min": 1450 }, { "epoch": 4.810289389067524, "grad_norm": 0.2418848744577093, "learning_rate": 1.0898828754954618e-05, "loss": 0.4073, "loss_nan_ranks": 0, "loss_rank_avg": 0.1492045819759369, "step": 500, "valid_targets_mean": 7692.5, "valid_targets_min": 2454 }, { "epoch": 4.858520900321543, "grad_norm": 0.2609084515860585, "learning_rate": 1.047439244029642e-05, "loss": 0.4088, "loss_nan_ranks": 0, "loss_rank_avg": 0.13392722606658936, "step": 505, "valid_targets_mean": 6826.6, "valid_targets_min": 2617 }, { "epoch": 4.906752411575563, "grad_norm": 0.25321438392795964, "learning_rate": 1.0055434213897529e-05, "loss": 0.4047, "loss_nan_ranks": 0, "loss_rank_avg": 0.1320946365594864, "step": 510, "valid_targets_mean": 7162.3, "valid_targets_min": 2273 }, { "epoch": 4.954983922829582, "grad_norm": 0.2717500389466323, "learning_rate": 9.642195014734972e-06, "loss": 0.4104, "loss_nan_ranks": 0, "loss_rank_avg": 0.1381416916847229, "step": 515, "valid_targets_mean": 7403.7, "valid_targets_min": 3487 }, { "epoch": 5.0, "grad_norm": 0.30446110389596137, "learning_rate": 9.234912492826454e-06, "loss": 0.4062, "loss_nan_ranks": 0, "loss_rank_avg": 0.2185996174812317, "step": 520, "valid_targets_mean": 7307.2, "valid_targets_min": 2603 }, { "epoch": 5.048231511254019, "grad_norm": 0.2607207892888904, "learning_rate": 8.833820872560035e-06, "loss": 0.404, "loss_nan_ranks": 0, "loss_rank_avg": 0.12400539219379425, "step": 525, "valid_targets_mean": 6264.1, "valid_targets_min": 1360 }, { "epoch": 5.096463022508039, "grad_norm": 0.2449802169204654, "learning_rate": 8.439150817993836e-06, "loss": 0.4039, "loss_nan_ranks": 0, "loss_rank_avg": 0.13310736417770386, "step": 530, "valid_targets_mean": 7255.7, "valid_targets_min": 3579 }, { "epoch": 5.144694533762058, "grad_norm": 0.26180442969567413, "learning_rate": 8.051129300203324e-06, "loss": 0.4042, "loss_nan_ranks": 0, "loss_rank_avg": 0.13548702001571655, "step": 535, "valid_targets_mean": 7463.6, "valid_targets_min": 2512 }, { "epoch": 5.192926045016077, "grad_norm": 0.2536175353972523, "learning_rate": 7.669979466752322e-06, "loss": 0.4096, "loss_nan_ranks": 0, "loss_rank_avg": 0.14918869733810425, "step": 540, "valid_targets_mean": 7186.4, "valid_targets_min": 1709 }, { "epoch": 5.241157556270096, "grad_norm": 0.29261182327763363, "learning_rate": 7.295920513362957e-06, "loss": 0.4025, "loss_nan_ranks": 0, "loss_rank_avg": 0.14041003584861755, "step": 545, "valid_targets_mean": 7639.5, "valid_targets_min": 3656 }, { "epoch": 5.289389067524116, "grad_norm": 0.24399791711337562, "learning_rate": 6.92916755785821e-06, "loss": 0.4075, "loss_nan_ranks": 0, "loss_rank_avg": 0.13997626304626465, "step": 550, "valid_targets_mean": 6806.3, "valid_targets_min": 2858 }, { "epoch": 5.337620578778135, "grad_norm": 0.24929427616550545, "learning_rate": 6.5699315164496635e-06, "loss": 0.4026, "loss_nan_ranks": 0, "loss_rank_avg": 0.1325225830078125, "step": 555, "valid_targets_mean": 6992.0, "valid_targets_min": 784 }, { "epoch": 5.385852090032154, "grad_norm": 0.26276669481415915, "learning_rate": 6.2184189824415855e-06, "loss": 0.4059, "loss_nan_ranks": 0, "loss_rank_avg": 0.13438841700553894, "step": 560, "valid_targets_mean": 7446.2, "valid_targets_min": 3927 }, { "epoch": 5.434083601286174, "grad_norm": 0.2402023567433913, "learning_rate": 5.87483210742098e-06, "loss": 0.407, "loss_nan_ranks": 0, "loss_rank_avg": 0.1273195445537567, "step": 565, "valid_targets_mean": 6891.7, "valid_targets_min": 2511 }, { "epoch": 5.482315112540193, "grad_norm": 0.24563573813966255, "learning_rate": 5.539368485002161e-06, "loss": 0.4, "loss_nan_ranks": 0, "loss_rank_avg": 0.14178407192230225, "step": 570, "valid_targets_mean": 7711.9, "valid_targets_min": 2782 }, { "epoch": 5.530546623794212, "grad_norm": 0.2469427497751035, "learning_rate": 5.21222103719244e-06, "loss": 0.4055, "loss_nan_ranks": 0, "loss_rank_avg": 0.1396293342113495, "step": 575, "valid_targets_mean": 6963.2, "valid_targets_min": 2701 }, { "epoch": 5.578778135048232, "grad_norm": 0.23786230223199137, "learning_rate": 4.893577903444524e-06, "loss": 0.4023, "loss_nan_ranks": 0, "loss_rank_avg": 0.12411302328109741, "step": 580, "valid_targets_mean": 6821.6, "valid_targets_min": 2945 }, { "epoch": 5.627009646302251, "grad_norm": 0.24637334341544995, "learning_rate": 4.58362233245923e-06, "loss": 0.3995, "loss_nan_ranks": 0, "loss_rank_avg": 0.14157408475875854, "step": 585, "valid_targets_mean": 7340.6, "valid_targets_min": 2735 }, { "epoch": 5.67524115755627, "grad_norm": 0.24411500634429661, "learning_rate": 4.2825325768008905e-06, "loss": 0.4042, "loss_nan_ranks": 0, "loss_rank_avg": 0.1333349496126175, "step": 590, "valid_targets_mean": 7271.8, "valid_targets_min": 2419 }, { "epoch": 5.723472668810289, "grad_norm": 0.23800985861462287, "learning_rate": 3.990481790385963e-06, "loss": 0.4002, "loss_nan_ranks": 0, "loss_rank_avg": 0.12876483798027039, "step": 595, "valid_targets_mean": 6977.3, "valid_targets_min": 1671 }, { "epoch": 5.771704180064309, "grad_norm": 0.24538130692836058, "learning_rate": 3.7076379289037755e-06, "loss": 0.4069, "loss_nan_ranks": 0, "loss_rank_avg": 0.12505599856376648, "step": 600, "valid_targets_mean": 6450.1, "valid_targets_min": 2535 }, { "epoch": 5.819935691318328, "grad_norm": 0.2281308139817817, "learning_rate": 3.4341636532268476e-06, "loss": 0.399, "loss_nan_ranks": 0, "loss_rank_avg": 0.14104759693145752, "step": 605, "valid_targets_mean": 7660.5, "valid_targets_min": 2583 }, { "epoch": 5.868167202572347, "grad_norm": 0.22646861694596795, "learning_rate": 3.170216235866075e-06, "loss": 0.403, "loss_nan_ranks": 0, "loss_rank_avg": 0.1284317672252655, "step": 610, "valid_targets_mean": 7120.9, "valid_targets_min": 2428 }, { "epoch": 5.916398713826366, "grad_norm": 0.23622946188916152, "learning_rate": 2.9159474705248093e-06, "loss": 0.4056, "loss_nan_ranks": 0, "loss_rank_avg": 0.1416313201189041, "step": 615, "valid_targets_mean": 7431.3, "valid_targets_min": 3551 }, { "epoch": 5.964630225080386, "grad_norm": 0.2415011553269361, "learning_rate": 2.6715035848036962e-06, "loss": 0.407, "loss_nan_ranks": 0, "loss_rank_avg": 0.14078471064567566, "step": 620, "valid_targets_mean": 7541.7, "valid_targets_min": 3135 }, { "epoch": 6.009646302250804, "grad_norm": 0.2988974462268002, "learning_rate": 2.4370251561065363e-06, "loss": 0.4043, "loss_nan_ranks": 0, "loss_rank_avg": 0.1267852485179901, "step": 625, "valid_targets_mean": 7117.3, "valid_targets_min": 2271 }, { "epoch": 6.057877813504823, "grad_norm": 0.22684489644445052, "learning_rate": 2.2126470307955515e-06, "loss": 0.4, "loss_nan_ranks": 0, "loss_rank_avg": 0.14647862315177917, "step": 630, "valid_targets_mean": 7404.9, "valid_targets_min": 3196 }, { "epoch": 6.106109324758842, "grad_norm": 0.2318713634251901, "learning_rate": 1.998498246642464e-06, "loss": 0.4046, "loss_nan_ranks": 0, "loss_rank_avg": 0.14780129492282867, "step": 635, "valid_targets_mean": 7828.9, "valid_targets_min": 2325 }, { "epoch": 6.154340836012862, "grad_norm": 0.2517163898615774, "learning_rate": 1.7947019586201152e-06, "loss": 0.402, "loss_nan_ranks": 0, "loss_rank_avg": 0.14310380816459656, "step": 640, "valid_targets_mean": 7404.0, "valid_targets_min": 2411 }, { "epoch": 6.202572347266881, "grad_norm": 0.23081203575236892, "learning_rate": 1.6013753680771493e-06, "loss": 0.4019, "loss_nan_ranks": 0, "loss_rank_avg": 0.14563211798667908, "step": 645, "valid_targets_mean": 7396.6, "valid_targets_min": 688 }, { "epoch": 6.2508038585209, "grad_norm": 0.232476920350471, "learning_rate": 1.4186296553366274e-06, "loss": 0.4073, "loss_nan_ranks": 0, "loss_rank_avg": 0.13480687141418457, "step": 650, "valid_targets_mean": 7193.0, "valid_targets_min": 2283 }, { "epoch": 6.29903536977492, "grad_norm": 0.32208230826818174, "learning_rate": 1.246569915757263e-06, "loss": 0.4038, "loss_nan_ranks": 0, "loss_rank_avg": 0.1413307934999466, "step": 655, "valid_targets_mean": 6981.1, "valid_targets_min": 1723 }, { "epoch": 6.347266881028939, "grad_norm": 0.24094886248637287, "learning_rate": 1.0852950992940415e-06, "loss": 0.4024, "loss_nan_ranks": 0, "loss_rank_avg": 0.1421469748020172, "step": 660, "valid_targets_mean": 7026.6, "valid_targets_min": 3719 }, { "epoch": 6.395498392282958, "grad_norm": 0.2336825012086835, "learning_rate": 9.348979535930391e-07, "loss": 0.3998, "loss_nan_ranks": 0, "loss_rank_avg": 0.14628173410892487, "step": 665, "valid_targets_mean": 7186.3, "valid_targets_min": 2563 }, { "epoch": 6.443729903536978, "grad_norm": 0.23225672605953232, "learning_rate": 7.95464970653106e-07, "loss": 0.4027, "loss_nan_ranks": 0, "loss_rank_avg": 0.14211036264896393, "step": 670, "valid_targets_mean": 7271.8, "valid_targets_min": 1463 }, { "epoch": 6.491961414790997, "grad_norm": 0.22536427823739902, "learning_rate": 6.670763370851241e-07, "loss": 0.3971, "loss_nan_ranks": 0, "loss_rank_avg": 0.13998502492904663, "step": 675, "valid_targets_mean": 7128.0, "valid_targets_min": 2704 }, { "epoch": 6.540192926045016, "grad_norm": 0.23238632103019738, "learning_rate": 5.4980588799743e-07, "loss": 0.3984, "loss_nan_ranks": 0, "loss_rank_avg": 0.13454963266849518, "step": 680, "valid_targets_mean": 6974.1, "valid_targets_min": 3551 }, { "epoch": 6.588424437299035, "grad_norm": 0.22827166361639628, "learning_rate": 4.4372106453394405e-07, "loss": 0.4015, "loss_nan_ranks": 0, "loss_rank_avg": 0.11654944717884064, "step": 685, "valid_targets_mean": 6038.2, "valid_targets_min": 828 }, { "epoch": 6.636655948553055, "grad_norm": 0.21370459518374998, "learning_rate": 3.48882875089378e-07, "loss": 0.3977, "loss_nan_ranks": 0, "loss_rank_avg": 0.12826097011566162, "step": 690, "valid_targets_mean": 6884.0, "valid_targets_min": 2654 }, { "epoch": 6.684887459807074, "grad_norm": 0.22138782542292237, "learning_rate": 2.653458602238845e-07, "loss": 0.407, "loss_nan_ranks": 0, "loss_rank_avg": 0.12427161633968353, "step": 695, "valid_targets_mean": 6643.1, "valid_targets_min": 2901 }, { "epoch": 6.733118971061093, "grad_norm": 0.21954025300724495, "learning_rate": 1.931580612972983e-07, "loss": 0.4057, "loss_nan_ranks": 0, "loss_rank_avg": 0.13527044653892517, "step": 700, "valid_targets_mean": 6924.3, "valid_targets_min": 2375 }, { "epoch": 6.781350482315112, "grad_norm": 0.22594474295726047, "learning_rate": 1.3236099284097415e-07, "loss": 0.4041, "loss_nan_ranks": 0, "loss_rank_avg": 0.14596149325370789, "step": 705, "valid_targets_mean": 7521.6, "valid_targets_min": 2823 }, { "epoch": 6.829581993569132, "grad_norm": 0.2337034847019303, "learning_rate": 8.298961868318689e-08, "loss": 0.3964, "loss_nan_ranks": 0, "loss_rank_avg": 0.1245846375823021, "step": 710, "valid_targets_mean": 6552.6, "valid_targets_min": 1658 }, { "epoch": 6.877813504823151, "grad_norm": 0.22405182437817006, "learning_rate": 4.507233184174675e-08, "loss": 0.4001, "loss_nan_ranks": 0, "loss_rank_avg": 0.12990376353263855, "step": 715, "valid_targets_mean": 7562.9, "valid_targets_min": 3556 }, { "epoch": 6.92604501607717, "grad_norm": 0.22599182393237854, "learning_rate": 1.863093819545192e-08, "loss": 0.3993, "loss_nan_ranks": 0, "loss_rank_avg": 0.13830092549324036, "step": 720, "valid_targets_mean": 7532.1, "valid_targets_min": 3375 }, { "epoch": 6.97427652733119, "grad_norm": 0.22933841268273067, "learning_rate": 3.680643943708706e-09, "loss": 0.4013, "loss_nan_ranks": 0, "loss_rank_avg": 0.13168975710868835, "step": 725, "valid_targets_mean": 7441.9, "valid_targets_min": 3674 }, { "epoch": 7.0, "step": 728, "total_flos": 2.6866574226798674e+18, "train_loss": 0.0, "train_runtime": 0.825, "train_samples_per_second": 84297.403, "train_steps_per_second": 882.429 } ], "logging_steps": 5, "max_steps": 728, "num_input_tokens_seen": 0, "num_train_epochs": 7, "save_steps": 300, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.6866574226798674e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }