{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 7.0, "eval_steps": 500, "global_step": 1141, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.030800821355236138, "grad_norm": 10.78951034545919, "learning_rate": 1.391304347826087e-06, "loss": 0.8755, "loss_nan_ranks": 0, "loss_rank_avg": 0.2676461935043335, "step": 5, "valid_targets_mean": 8280.3, "valid_targets_min": 373 }, { "epoch": 0.061601642710472276, "grad_norm": 8.246410041446428, "learning_rate": 3.130434782608696e-06, "loss": 0.8526, "loss_nan_ranks": 0, "loss_rank_avg": 0.24272847175598145, "step": 10, "valid_targets_mean": 8654.6, "valid_targets_min": 1049 }, { "epoch": 0.09240246406570841, "grad_norm": 3.9672877405460905, "learning_rate": 4.869565217391305e-06, "loss": 0.7895, "loss_nan_ranks": 0, "loss_rank_avg": 0.24913868308067322, "step": 15, "valid_targets_mean": 10789.0, "valid_targets_min": 410 }, { "epoch": 0.12320328542094455, "grad_norm": 1.8110786022210992, "learning_rate": 6.6086956521739135e-06, "loss": 0.7282, "loss_nan_ranks": 0, "loss_rank_avg": 0.23902273178100586, "step": 20, "valid_targets_mean": 10279.2, "valid_targets_min": 2263 }, { "epoch": 0.1540041067761807, "grad_norm": 1.410577563759979, "learning_rate": 8.347826086956522e-06, "loss": 0.7136, "loss_nan_ranks": 0, "loss_rank_avg": 0.23549094796180725, "step": 25, "valid_targets_mean": 9453.8, "valid_targets_min": 440 }, { "epoch": 0.18480492813141683, "grad_norm": 1.053306513462499, "learning_rate": 1.008695652173913e-05, "loss": 0.6777, "loss_nan_ranks": 0, "loss_rank_avg": 0.22287440299987793, "step": 30, "valid_targets_mean": 9982.4, "valid_targets_min": 2824 }, { "epoch": 0.21560574948665298, "grad_norm": 0.6869651188569896, "learning_rate": 1.182608695652174e-05, "loss": 0.6437, "loss_nan_ranks": 0, "loss_rank_avg": 0.22780485451221466, "step": 35, "valid_targets_mean": 11011.3, "valid_targets_min": 2403 }, { "epoch": 0.2464065708418891, "grad_norm": 0.5729076954739157, "learning_rate": 1.3565217391304348e-05, "loss": 0.6324, "loss_nan_ranks": 0, "loss_rank_avg": 0.20696070790290833, "step": 40, "valid_targets_mean": 9971.0, "valid_targets_min": 1883 }, { "epoch": 0.27720739219712526, "grad_norm": 0.4883774957300283, "learning_rate": 1.5304347826086958e-05, "loss": 0.6134, "loss_nan_ranks": 0, "loss_rank_avg": 0.2091033160686493, "step": 45, "valid_targets_mean": 10380.5, "valid_targets_min": 3726 }, { "epoch": 0.3080082135523614, "grad_norm": 0.3598292149158693, "learning_rate": 1.7043478260869566e-05, "loss": 0.5959, "loss_nan_ranks": 0, "loss_rank_avg": 0.17776153981685638, "step": 50, "valid_targets_mean": 9879.5, "valid_targets_min": 3528 }, { "epoch": 0.33880903490759756, "grad_norm": 0.389165260388212, "learning_rate": 1.8782608695652175e-05, "loss": 0.5784, "loss_nan_ranks": 0, "loss_rank_avg": 0.20730958878993988, "step": 55, "valid_targets_mean": 10501.6, "valid_targets_min": 3010 }, { "epoch": 0.36960985626283366, "grad_norm": 0.2988363102174585, "learning_rate": 2.0521739130434787e-05, "loss": 0.5533, "loss_nan_ranks": 0, "loss_rank_avg": 0.18783965706825256, "step": 60, "valid_targets_mean": 9842.8, "valid_targets_min": 2947 }, { "epoch": 0.4004106776180698, "grad_norm": 0.2683287295477794, "learning_rate": 2.2260869565217392e-05, "loss": 0.5336, "loss_nan_ranks": 0, "loss_rank_avg": 0.18711069226264954, "step": 65, "valid_targets_mean": 10642.9, "valid_targets_min": 4707 }, { "epoch": 0.43121149897330596, "grad_norm": 0.2404329744552859, "learning_rate": 2.4e-05, "loss": 0.5327, "loss_nan_ranks": 0, "loss_rank_avg": 0.1542397290468216, "step": 70, "valid_targets_mean": 8437.6, "valid_targets_min": 2555 }, { "epoch": 0.4620123203285421, "grad_norm": 0.25251403548284934, "learning_rate": 2.573913043478261e-05, "loss": 0.5241, "loss_nan_ranks": 0, "loss_rank_avg": 0.1736878752708435, "step": 75, "valid_targets_mean": 11317.5, "valid_targets_min": 3937 }, { "epoch": 0.4928131416837782, "grad_norm": 0.22646760827022988, "learning_rate": 2.747826086956522e-05, "loss": 0.5111, "loss_nan_ranks": 0, "loss_rank_avg": 0.17943963408470154, "step": 80, "valid_targets_mean": 10079.2, "valid_targets_min": 1047 }, { "epoch": 0.5236139630390144, "grad_norm": 0.22168009683076434, "learning_rate": 2.921739130434783e-05, "loss": 0.5127, "loss_nan_ranks": 0, "loss_rank_avg": 0.1879035234451294, "step": 85, "valid_targets_mean": 10592.5, "valid_targets_min": 3842 }, { "epoch": 0.5544147843942505, "grad_norm": 0.24822610653380095, "learning_rate": 3.0956521739130435e-05, "loss": 0.5028, "loss_nan_ranks": 0, "loss_rank_avg": 0.1684880405664444, "step": 90, "valid_targets_mean": 9292.6, "valid_targets_min": 3273 }, { "epoch": 0.5852156057494866, "grad_norm": 0.2398489128545002, "learning_rate": 3.269565217391305e-05, "loss": 0.4953, "loss_nan_ranks": 0, "loss_rank_avg": 0.20545238256454468, "step": 95, "valid_targets_mean": 11001.3, "valid_targets_min": 2392 }, { "epoch": 0.6160164271047228, "grad_norm": 0.22960412483563136, "learning_rate": 3.443478260869566e-05, "loss": 0.49, "loss_nan_ranks": 0, "loss_rank_avg": 0.17050573229789734, "step": 100, "valid_targets_mean": 10364.2, "valid_targets_min": 3913 }, { "epoch": 0.6468172484599589, "grad_norm": 0.223134537440273, "learning_rate": 3.617391304347826e-05, "loss": 0.4811, "loss_nan_ranks": 0, "loss_rank_avg": 0.1758454144001007, "step": 105, "valid_targets_mean": 10234.9, "valid_targets_min": 3548 }, { "epoch": 0.6776180698151951, "grad_norm": 0.23753676152704173, "learning_rate": 3.791304347826087e-05, "loss": 0.4674, "loss_nan_ranks": 0, "loss_rank_avg": 0.1549699902534485, "step": 110, "valid_targets_mean": 10514.7, "valid_targets_min": 1669 }, { "epoch": 0.7084188911704312, "grad_norm": 0.24288587071503348, "learning_rate": 3.9652173913043484e-05, "loss": 0.472, "loss_nan_ranks": 0, "loss_rank_avg": 0.12784242630004883, "step": 115, "valid_targets_mean": 8451.4, "valid_targets_min": 2031 }, { "epoch": 0.7392197125256673, "grad_norm": 0.2500614038071991, "learning_rate": 3.9998499902188776e-05, "loss": 0.4739, "loss_nan_ranks": 0, "loss_rank_avg": 0.16550754010677338, "step": 120, "valid_targets_mean": 10390.9, "valid_targets_min": 2598 }, { "epoch": 0.7700205338809035, "grad_norm": 0.26501606669707445, "learning_rate": 3.999240614049903e-05, "loss": 0.4708, "loss_nan_ranks": 0, "loss_rank_avg": 0.15240433812141418, "step": 125, "valid_targets_mean": 9886.8, "valid_targets_min": 3444 }, { "epoch": 0.8008213552361396, "grad_norm": 0.23844861174775395, "learning_rate": 3.998162638602725e-05, "loss": 0.4676, "loss_nan_ranks": 0, "loss_rank_avg": 0.16705986857414246, "step": 130, "valid_targets_mean": 10793.2, "valid_targets_min": 4591 }, { "epoch": 0.8316221765913757, "grad_norm": 0.22087987371786275, "learning_rate": 3.996616316542537e-05, "loss": 0.4537, "loss_nan_ranks": 0, "loss_rank_avg": 0.15392373502254486, "step": 135, "valid_targets_mean": 10568.0, "valid_targets_min": 4286 }, { "epoch": 0.8624229979466119, "grad_norm": 0.260373661733574, "learning_rate": 3.994602010309655e-05, "loss": 0.4657, "loss_nan_ranks": 0, "loss_rank_avg": 0.16058529913425446, "step": 140, "valid_targets_mean": 10719.8, "valid_targets_min": 444 }, { "epoch": 0.893223819301848, "grad_norm": 0.2408597877120169, "learning_rate": 3.992120192034568e-05, "loss": 0.4571, "loss_nan_ranks": 0, "loss_rank_avg": 0.1405722200870514, "step": 145, "valid_targets_mean": 9775.9, "valid_targets_min": 248 }, { "epoch": 0.9240246406570842, "grad_norm": 0.2257527377603397, "learning_rate": 3.989171443427273e-05, "loss": 0.4493, "loss_nan_ranks": 0, "loss_rank_avg": 0.14079983532428741, "step": 150, "valid_targets_mean": 9406.4, "valid_targets_min": 1978 }, { "epoch": 0.9548254620123203, "grad_norm": 0.2639637342672226, "learning_rate": 3.98575645564093e-05, "loss": 0.4496, "loss_nan_ranks": 0, "loss_rank_avg": 0.14828550815582275, "step": 155, "valid_targets_mean": 9482.4, "valid_targets_min": 1574 }, { "epoch": 0.9856262833675564, "grad_norm": 0.2526261707928796, "learning_rate": 3.981876029109865e-05, "loss": 0.4477, "loss_nan_ranks": 0, "loss_rank_avg": 0.13988760113716125, "step": 160, "valid_targets_mean": 8873.9, "valid_targets_min": 3430 }, { "epoch": 1.0123203285420945, "grad_norm": 0.25498967898811375, "learning_rate": 3.9775310733619544e-05, "loss": 0.443, "loss_nan_ranks": 0, "loss_rank_avg": 0.1458635777235031, "step": 165, "valid_targets_mean": 10038.5, "valid_targets_min": 3952 }, { "epoch": 1.0431211498973305, "grad_norm": 0.2654670242620403, "learning_rate": 3.972722606805445e-05, "loss": 0.4445, "loss_nan_ranks": 0, "loss_rank_avg": 0.1620415598154068, "step": 170, "valid_targets_mean": 11246.2, "valid_targets_min": 4395 }, { "epoch": 1.0739219712525667, "grad_norm": 0.24113536439654604, "learning_rate": 3.967451756490248e-05, "loss": 0.435, "loss_nan_ranks": 0, "loss_rank_avg": 0.15797394514083862, "step": 175, "valid_targets_mean": 11217.4, "valid_targets_min": 3344 }, { "epoch": 1.104722792607803, "grad_norm": 0.2740746014261754, "learning_rate": 3.961719757843773e-05, "loss": 0.4434, "loss_nan_ranks": 0, "loss_rank_avg": 0.150252103805542, "step": 180, "valid_targets_mean": 9786.6, "valid_targets_min": 1441 }, { "epoch": 1.1355236139630391, "grad_norm": 0.258490919264763, "learning_rate": 3.955527954381359e-05, "loss": 0.4431, "loss_nan_ranks": 0, "loss_rank_avg": 0.13429267704486847, "step": 185, "valid_targets_mean": 9314.9, "valid_targets_min": 357 }, { "epoch": 1.1663244353182751, "grad_norm": 0.25135094678661335, "learning_rate": 3.948877797391365e-05, "loss": 0.4411, "loss_nan_ranks": 0, "loss_rank_avg": 0.15129579603672028, "step": 190, "valid_targets_mean": 9681.8, "valid_targets_min": 3462 }, { "epoch": 1.1971252566735113, "grad_norm": 0.2241624236803852, "learning_rate": 3.941770845595009e-05, "loss": 0.4318, "loss_nan_ranks": 0, "loss_rank_avg": 0.14005209505558014, "step": 195, "valid_targets_mean": 10555.8, "valid_targets_min": 1958 }, { "epoch": 1.2279260780287475, "grad_norm": 0.23528151967814984, "learning_rate": 3.934208764781022e-05, "loss": 0.432, "loss_nan_ranks": 0, "loss_rank_avg": 0.13930237293243408, "step": 200, "valid_targets_mean": 10503.7, "valid_targets_min": 894 }, { "epoch": 1.2587268993839835, "grad_norm": 0.27002466162348915, "learning_rate": 3.9261933274152006e-05, "loss": 0.4309, "loss_nan_ranks": 0, "loss_rank_avg": 0.13504661619663239, "step": 205, "valid_targets_mean": 9799.6, "valid_targets_min": 382 }, { "epoch": 1.2895277207392197, "grad_norm": 0.21563006501429327, "learning_rate": 3.917726412224967e-05, "loss": 0.4412, "loss_nan_ranks": 0, "loss_rank_avg": 0.15858903527259827, "step": 210, "valid_targets_mean": 10780.6, "valid_targets_min": 2033 }, { "epoch": 1.320328542094456, "grad_norm": 0.2394461528414203, "learning_rate": 3.908810003759015e-05, "loss": 0.4368, "loss_nan_ranks": 0, "loss_rank_avg": 0.1396717131137848, "step": 215, "valid_targets_mean": 10120.3, "valid_targets_min": 3639 }, { "epoch": 1.351129363449692, "grad_norm": 0.2506700840081252, "learning_rate": 3.8994461919221514e-05, "loss": 0.4267, "loss_nan_ranks": 0, "loss_rank_avg": 0.1430070549249649, "step": 220, "valid_targets_mean": 10176.3, "valid_targets_min": 3021 }, { "epoch": 1.3819301848049281, "grad_norm": 0.23480839371747675, "learning_rate": 3.889637171485449e-05, "loss": 0.437, "loss_nan_ranks": 0, "loss_rank_avg": 0.15943750739097595, "step": 225, "valid_targets_mean": 11043.3, "valid_targets_min": 5157 }, { "epoch": 1.4127310061601643, "grad_norm": 0.23589975763379348, "learning_rate": 3.879385241571817e-05, "loss": 0.4281, "loss_nan_ranks": 0, "loss_rank_avg": 0.14591297507286072, "step": 230, "valid_targets_mean": 10345.9, "valid_targets_min": 3046 }, { "epoch": 1.4435318275154003, "grad_norm": 0.26204876497809454, "learning_rate": 3.868692805117113e-05, "loss": 0.4288, "loss_nan_ranks": 0, "loss_rank_avg": 0.13737617433071136, "step": 235, "valid_targets_mean": 10190.5, "valid_targets_min": 3107 }, { "epoch": 1.4743326488706365, "grad_norm": 0.2711456241860061, "learning_rate": 3.8575623683069195e-05, "loss": 0.4329, "loss_nan_ranks": 0, "loss_rank_avg": 0.13510125875473022, "step": 240, "valid_targets_mean": 10149.4, "valid_targets_min": 473 }, { "epoch": 1.5051334702258727, "grad_norm": 0.25547982444880957, "learning_rate": 3.845996539989126e-05, "loss": 0.4236, "loss_nan_ranks": 0, "loss_rank_avg": 0.13566002249717712, "step": 245, "valid_targets_mean": 9541.6, "valid_targets_min": 2086 }, { "epoch": 1.5359342915811087, "grad_norm": 0.2480976978769594, "learning_rate": 3.833998031062442e-05, "loss": 0.4201, "loss_nan_ranks": 0, "loss_rank_avg": 0.13639628887176514, "step": 250, "valid_targets_mean": 9303.5, "valid_targets_min": 3090 }, { "epoch": 1.566735112936345, "grad_norm": 0.24221709972181102, "learning_rate": 3.821569653840995e-05, "loss": 0.4277, "loss_nan_ranks": 0, "loss_rank_avg": 0.14368358254432678, "step": 255, "valid_targets_mean": 8807.1, "valid_targets_min": 1766 }, { "epoch": 1.5975359342915811, "grad_norm": 0.27099806998138243, "learning_rate": 3.808714321395155e-05, "loss": 0.4288, "loss_nan_ranks": 0, "loss_rank_avg": 0.16085977852344513, "step": 260, "valid_targets_mean": 10484.7, "valid_targets_min": 3249 }, { "epoch": 1.6283367556468171, "grad_norm": 0.2377874625941885, "learning_rate": 3.795435046868745e-05, "loss": 0.4234, "loss_nan_ranks": 0, "loss_rank_avg": 0.1193472146987915, "step": 265, "valid_targets_mean": 8244.7, "valid_targets_min": 2560 }, { "epoch": 1.6591375770020536, "grad_norm": 0.2541397790886549, "learning_rate": 3.78173494277279e-05, "loss": 0.4159, "loss_nan_ranks": 0, "loss_rank_avg": 0.10338842868804932, "step": 270, "valid_targets_mean": 7190.8, "valid_targets_min": 475 }, { "epoch": 1.6899383983572895, "grad_norm": 0.2524891350308471, "learning_rate": 3.767617220255989e-05, "loss": 0.4214, "loss_nan_ranks": 0, "loss_rank_avg": 0.1306953728199005, "step": 275, "valid_targets_mean": 8843.7, "valid_targets_min": 1938 }, { "epoch": 1.7207392197125255, "grad_norm": 0.31674080289717166, "learning_rate": 3.753085188352047e-05, "loss": 0.4285, "loss_nan_ranks": 0, "loss_rank_avg": 0.15276293456554413, "step": 280, "valid_targets_mean": 11124.9, "valid_targets_min": 2379 }, { "epoch": 1.751540041067762, "grad_norm": 0.271075195175284, "learning_rate": 3.738142253204086e-05, "loss": 0.4143, "loss_nan_ranks": 0, "loss_rank_avg": 0.1333828717470169, "step": 285, "valid_targets_mean": 10931.6, "valid_targets_min": 278 }, { "epoch": 1.782340862422998, "grad_norm": 0.27464953772893264, "learning_rate": 3.722791917266273e-05, "loss": 0.4243, "loss_nan_ranks": 0, "loss_rank_avg": 0.13427472114562988, "step": 290, "valid_targets_mean": 10074.6, "valid_targets_min": 2476 }, { "epoch": 1.813141683778234, "grad_norm": 0.2453667768754085, "learning_rate": 3.707037778482892e-05, "loss": 0.426, "loss_nan_ranks": 0, "loss_rank_avg": 0.13184840977191925, "step": 295, "valid_targets_mean": 9288.8, "valid_targets_min": 3084 }, { "epoch": 1.8439425051334704, "grad_norm": 0.2517748348569757, "learning_rate": 3.690883529445028e-05, "loss": 0.4163, "loss_nan_ranks": 0, "loss_rank_avg": 0.13088074326515198, "step": 300, "valid_targets_mean": 9480.1, "valid_targets_min": 228 }, { "epoch": 1.8747433264887063, "grad_norm": 0.2468601020156896, "learning_rate": 3.6743329565250575e-05, "loss": 0.4142, "loss_nan_ranks": 0, "loss_rank_avg": 0.14135316014289856, "step": 305, "valid_targets_mean": 9932.8, "valid_targets_min": 3640 }, { "epoch": 1.9055441478439425, "grad_norm": 0.24890855608847368, "learning_rate": 3.657389938989173e-05, "loss": 0.4178, "loss_nan_ranks": 0, "loss_rank_avg": 0.13682851195335388, "step": 310, "valid_targets_mean": 9554.7, "valid_targets_min": 2220 }, { "epoch": 1.9363449691991788, "grad_norm": 0.21662618528514496, "learning_rate": 3.6400584480881246e-05, "loss": 0.42, "loss_nan_ranks": 0, "loss_rank_avg": 0.12407977133989334, "step": 315, "valid_targets_mean": 9492.4, "valid_targets_min": 1918 }, { "epoch": 1.9671457905544147, "grad_norm": 0.2665236408311185, "learning_rate": 3.622342546126405e-05, "loss": 0.425, "loss_nan_ranks": 0, "loss_rank_avg": 0.16727863252162933, "step": 320, "valid_targets_mean": 10798.4, "valid_targets_min": 2302 }, { "epoch": 1.997946611909651, "grad_norm": 0.24445750361586888, "learning_rate": 3.604246385510088e-05, "loss": 0.419, "loss_nan_ranks": 0, "loss_rank_avg": 0.13143767416477203, "step": 325, "valid_targets_mean": 9471.8, "valid_targets_min": 2411 }, { "epoch": 2.024640657084189, "grad_norm": 0.2617499382018992, "learning_rate": 3.585774207773557e-05, "loss": 0.4142, "loss_nan_ranks": 0, "loss_rank_avg": 0.15601074695587158, "step": 330, "valid_targets_mean": 10741.7, "valid_targets_min": 497 }, { "epoch": 2.055441478439425, "grad_norm": 0.25390431014432446, "learning_rate": 3.5669303425853325e-05, "loss": 0.408, "loss_nan_ranks": 0, "loss_rank_avg": 0.14050063490867615, "step": 335, "valid_targets_mean": 10453.0, "valid_targets_min": 1313 }, { "epoch": 2.086242299794661, "grad_norm": 0.24784537187252367, "learning_rate": 3.547719206733252e-05, "loss": 0.4112, "loss_nan_ranks": 0, "loss_rank_avg": 0.13861119747161865, "step": 340, "valid_targets_mean": 10057.9, "valid_targets_min": 319 }, { "epoch": 2.1170431211498975, "grad_norm": 0.2426581217742219, "learning_rate": 3.528145303089221e-05, "loss": 0.4123, "loss_nan_ranks": 0, "loss_rank_avg": 0.1406959593296051, "step": 345, "valid_targets_mean": 10138.0, "valid_targets_min": 4464 }, { "epoch": 2.1478439425051334, "grad_norm": 0.24675637675967238, "learning_rate": 3.508213219553793e-05, "loss": 0.4069, "loss_nan_ranks": 0, "loss_rank_avg": 0.14610439538955688, "step": 350, "valid_targets_mean": 10286.1, "valid_targets_min": 4512 }, { "epoch": 2.1786447638603694, "grad_norm": 0.2440931373764094, "learning_rate": 3.4879276279808163e-05, "loss": 0.4098, "loss_nan_ranks": 0, "loss_rank_avg": 0.13264574110507965, "step": 355, "valid_targets_mean": 8596.7, "valid_targets_min": 483 }, { "epoch": 2.209445585215606, "grad_norm": 0.245025185185539, "learning_rate": 3.4672932830824056e-05, "loss": 0.4081, "loss_nan_ranks": 0, "loss_rank_avg": 0.1417609453201294, "step": 360, "valid_targets_mean": 10969.4, "valid_targets_min": 1437 }, { "epoch": 2.240246406570842, "grad_norm": 0.24876551307938133, "learning_rate": 3.446315021314494e-05, "loss": 0.4129, "loss_nan_ranks": 0, "loss_rank_avg": 0.14342427253723145, "step": 365, "valid_targets_mean": 9465.1, "valid_targets_min": 2103 }, { "epoch": 2.2710472279260783, "grad_norm": 0.26795367396469427, "learning_rate": 3.424997759743217e-05, "loss": 0.4013, "loss_nan_ranks": 0, "loss_rank_avg": 0.12778609991073608, "step": 370, "valid_targets_mean": 9634.5, "valid_targets_min": 1785 }, { "epoch": 2.3018480492813143, "grad_norm": 0.23421853162542677, "learning_rate": 3.403346494892411e-05, "loss": 0.4083, "loss_nan_ranks": 0, "loss_rank_avg": 0.13451623916625977, "step": 375, "valid_targets_mean": 9650.5, "valid_targets_min": 305 }, { "epoch": 2.3326488706365502, "grad_norm": 0.2693457105084275, "learning_rate": 3.381366301572489e-05, "loss": 0.4112, "loss_nan_ranks": 0, "loss_rank_avg": 0.12630873918533325, "step": 380, "valid_targets_mean": 10074.8, "valid_targets_min": 421 }, { "epoch": 2.3634496919917867, "grad_norm": 0.2328615075804371, "learning_rate": 3.359062331690956e-05, "loss": 0.4074, "loss_nan_ranks": 0, "loss_rank_avg": 0.13336540758609772, "step": 385, "valid_targets_mean": 9906.7, "valid_targets_min": 246 }, { "epoch": 2.3942505133470227, "grad_norm": 0.23998410697733752, "learning_rate": 3.336439813044861e-05, "loss": 0.41, "loss_nan_ranks": 0, "loss_rank_avg": 0.14154762029647827, "step": 390, "valid_targets_mean": 9435.9, "valid_targets_min": 2754 }, { "epoch": 2.4250513347022586, "grad_norm": 0.24295648286477237, "learning_rate": 3.313504048095469e-05, "loss": 0.4112, "loss_nan_ranks": 0, "loss_rank_avg": 0.13870443403720856, "step": 395, "valid_targets_mean": 9355.4, "valid_targets_min": 254 }, { "epoch": 2.455852156057495, "grad_norm": 0.25647709210106184, "learning_rate": 3.2902604127254134e-05, "loss": 0.4055, "loss_nan_ranks": 0, "loss_rank_avg": 0.14127278327941895, "step": 400, "valid_targets_mean": 10922.0, "valid_targets_min": 317 }, { "epoch": 2.486652977412731, "grad_norm": 0.24788114923573965, "learning_rate": 3.266714354978657e-05, "loss": 0.4055, "loss_nan_ranks": 0, "loss_rank_avg": 0.11631830036640167, "step": 405, "valid_targets_mean": 9566.3, "valid_targets_min": 422 }, { "epoch": 2.517453798767967, "grad_norm": 0.22799816622722477, "learning_rate": 3.24287139378353e-05, "loss": 0.4012, "loss_nan_ranks": 0, "loss_rank_avg": 0.1390194594860077, "step": 410, "valid_targets_mean": 10234.0, "valid_targets_min": 1691 }, { "epoch": 2.5482546201232035, "grad_norm": 0.2484370468858101, "learning_rate": 3.2187371176591574e-05, "loss": 0.4036, "loss_nan_ranks": 0, "loss_rank_avg": 0.12273897975683212, "step": 415, "valid_targets_mean": 9935.5, "valid_targets_min": 391 }, { "epoch": 2.5790554414784395, "grad_norm": 0.2791429635859173, "learning_rate": 3.194317183405573e-05, "loss": 0.4032, "loss_nan_ranks": 0, "loss_rank_avg": 0.13965481519699097, "step": 420, "valid_targets_mean": 10705.2, "valid_targets_min": 2372 }, { "epoch": 2.6098562628336754, "grad_norm": 0.2444287470019243, "learning_rate": 3.169617314777828e-05, "loss": 0.4062, "loss_nan_ranks": 0, "loss_rank_avg": 0.1428462415933609, "step": 425, "valid_targets_mean": 10718.3, "valid_targets_min": 2588 }, { "epoch": 2.640657084188912, "grad_norm": 0.2791873926431355, "learning_rate": 3.14464330114441e-05, "loss": 0.41, "loss_nan_ranks": 0, "loss_rank_avg": 0.15584325790405273, "step": 430, "valid_targets_mean": 10506.5, "valid_targets_min": 313 }, { "epoch": 2.671457905544148, "grad_norm": 0.24786042312840947, "learning_rate": 3.119400996130281e-05, "loss": 0.4118, "loss_nan_ranks": 0, "loss_rank_avg": 0.13824349641799927, "step": 435, "valid_targets_mean": 9737.3, "valid_targets_min": 355 }, { "epoch": 2.702258726899384, "grad_norm": 0.2188588147632267, "learning_rate": 3.093896316244855e-05, "loss": 0.398, "loss_nan_ranks": 0, "loss_rank_avg": 0.11751606315374374, "step": 440, "valid_targets_mean": 8301.4, "valid_targets_min": 2539 }, { "epoch": 2.7330595482546203, "grad_norm": 0.25311777351511694, "learning_rate": 3.0681352394952276e-05, "loss": 0.4098, "loss_nan_ranks": 0, "loss_rank_avg": 0.14293605089187622, "step": 445, "valid_targets_mean": 10999.3, "valid_targets_min": 3311 }, { "epoch": 2.7638603696098563, "grad_norm": 0.2507509543463196, "learning_rate": 3.0421238039850132e-05, "loss": 0.4014, "loss_nan_ranks": 0, "loss_rank_avg": 0.1273411512374878, "step": 450, "valid_targets_mean": 9301.3, "valid_targets_min": 614 }, { "epoch": 2.7946611909650922, "grad_norm": 0.20025231318009487, "learning_rate": 3.0158681064990688e-05, "loss": 0.3973, "loss_nan_ranks": 0, "loss_rank_avg": 0.13788363337516785, "step": 455, "valid_targets_mean": 10190.0, "valid_targets_min": 1776 }, { "epoch": 2.8254620123203287, "grad_norm": 0.22451287983352627, "learning_rate": 2.9893743010744853e-05, "loss": 0.4045, "loss_nan_ranks": 0, "loss_rank_avg": 0.13523007929325104, "step": 460, "valid_targets_mean": 10681.0, "valid_targets_min": 3050 }, { "epoch": 2.8562628336755647, "grad_norm": 0.24527472770616648, "learning_rate": 2.962648597558147e-05, "loss": 0.3936, "loss_nan_ranks": 0, "loss_rank_avg": 0.14209523797035217, "step": 465, "valid_targets_mean": 11035.8, "valid_targets_min": 3838 }, { "epoch": 2.8870636550308006, "grad_norm": 0.2040471862649505, "learning_rate": 2.935697260151216e-05, "loss": 0.405, "loss_nan_ranks": 0, "loss_rank_avg": 0.13653475046157837, "step": 470, "valid_targets_mean": 10036.0, "valid_targets_min": 424 }, { "epoch": 2.917864476386037, "grad_norm": 0.24970399223730452, "learning_rate": 2.9085266059408734e-05, "loss": 0.4064, "loss_nan_ranks": 0, "loss_rank_avg": 0.11486883461475372, "step": 475, "valid_targets_mean": 8802.4, "valid_targets_min": 690 }, { "epoch": 2.948665297741273, "grad_norm": 0.24585352363230156, "learning_rate": 2.8811430034196667e-05, "loss": 0.4016, "loss_nan_ranks": 0, "loss_rank_avg": 0.14084793627262115, "step": 480, "valid_targets_mean": 10247.7, "valid_targets_min": 2463 }, { "epoch": 2.979466119096509, "grad_norm": 0.2284209488306044, "learning_rate": 2.8535528709928076e-05, "loss": 0.4004, "loss_nan_ranks": 0, "loss_rank_avg": 0.12997475266456604, "step": 485, "valid_targets_mean": 10478.2, "valid_targets_min": 511 }, { "epoch": 3.0061601642710474, "grad_norm": 0.28764503180445666, "learning_rate": 2.8257626754737703e-05, "loss": 0.4081, "loss_nan_ranks": 0, "loss_rank_avg": 0.12351834774017334, "step": 490, "valid_targets_mean": 10007.4, "valid_targets_min": 2277 }, { "epoch": 3.0369609856262834, "grad_norm": 0.2259345401970643, "learning_rate": 2.7977789305685424e-05, "loss": 0.3923, "loss_nan_ranks": 0, "loss_rank_avg": 0.13150466978549957, "step": 495, "valid_targets_mean": 9635.6, "valid_targets_min": 3197 }, { "epoch": 3.0677618069815193, "grad_norm": 0.21654883339869027, "learning_rate": 2.7696081953488917e-05, "loss": 0.3982, "loss_nan_ranks": 0, "loss_rank_avg": 0.12963591516017914, "step": 500, "valid_targets_mean": 10897.4, "valid_targets_min": 1408 }, { "epoch": 3.0985626283367558, "grad_norm": 0.2516668390746283, "learning_rate": 2.7412570727149864e-05, "loss": 0.393, "loss_nan_ranks": 0, "loss_rank_avg": 0.13977795839309692, "step": 505, "valid_targets_mean": 9534.0, "valid_targets_min": 1574 }, { "epoch": 3.1293634496919918, "grad_norm": 0.22820277821157164, "learning_rate": 2.712732207847757e-05, "loss": 0.3902, "loss_nan_ranks": 0, "loss_rank_avg": 0.10845097154378891, "step": 510, "valid_targets_mean": 8855.2, "valid_targets_min": 277 }, { "epoch": 3.1601642710472277, "grad_norm": 0.24753089464915212, "learning_rate": 2.684040286651338e-05, "loss": 0.4033, "loss_nan_ranks": 0, "loss_rank_avg": 0.11616842448711395, "step": 515, "valid_targets_mean": 8280.0, "valid_targets_min": 424 }, { "epoch": 3.190965092402464, "grad_norm": 0.3045771848951917, "learning_rate": 2.655188034185969e-05, "loss": 0.4001, "loss_nan_ranks": 0, "loss_rank_avg": 0.1244233027100563, "step": 520, "valid_targets_mean": 8514.0, "valid_targets_min": 1325 }, { "epoch": 3.2217659137577, "grad_norm": 0.24513235456822977, "learning_rate": 2.626182213091717e-05, "loss": 0.399, "loss_nan_ranks": 0, "loss_rank_avg": 0.13471511006355286, "step": 525, "valid_targets_mean": 10048.8, "valid_targets_min": 1367 }, { "epoch": 3.2525667351129366, "grad_norm": 0.22384412934757097, "learning_rate": 2.5970296220033894e-05, "loss": 0.3957, "loss_nan_ranks": 0, "loss_rank_avg": 0.12419713288545609, "step": 530, "valid_targets_mean": 9873.7, "valid_targets_min": 2721 }, { "epoch": 3.2833675564681726, "grad_norm": 0.32631256865781927, "learning_rate": 2.5677370939570142e-05, "loss": 0.3978, "loss_nan_ranks": 0, "loss_rank_avg": 0.14734040200710297, "step": 535, "valid_targets_mean": 11067.4, "valid_targets_min": 5211 }, { "epoch": 3.3141683778234086, "grad_norm": 0.2682949436111079, "learning_rate": 2.538311494788249e-05, "loss": 0.4032, "loss_nan_ranks": 0, "loss_rank_avg": 0.14371266961097717, "step": 540, "valid_targets_mean": 9686.2, "valid_targets_min": 473 }, { "epoch": 3.344969199178645, "grad_norm": 0.213074341400513, "learning_rate": 2.508759721523113e-05, "loss": 0.3902, "loss_nan_ranks": 0, "loss_rank_avg": 0.1292104721069336, "step": 545, "valid_targets_mean": 10185.6, "valid_targets_min": 2656 }, { "epoch": 3.375770020533881, "grad_norm": 0.22979047325988197, "learning_rate": 2.479088700761398e-05, "loss": 0.4028, "loss_nan_ranks": 0, "loss_rank_avg": 0.13095468282699585, "step": 550, "valid_targets_mean": 9349.8, "valid_targets_min": 2665 }, { "epoch": 3.406570841889117, "grad_norm": 0.2317226978815973, "learning_rate": 2.4493053870531534e-05, "loss": 0.3902, "loss_nan_ranks": 0, "loss_rank_avg": 0.12155196070671082, "step": 555, "valid_targets_mean": 9454.3, "valid_targets_min": 309 }, { "epoch": 3.4373716632443534, "grad_norm": 0.2373730952557148, "learning_rate": 2.4194167612686208e-05, "loss": 0.3902, "loss_nan_ranks": 0, "loss_rank_avg": 0.13594003021717072, "step": 560, "valid_targets_mean": 10032.6, "valid_targets_min": 2078 }, { "epoch": 3.4681724845995894, "grad_norm": 0.24854976213675622, "learning_rate": 2.389429828961994e-05, "loss": 0.3902, "loss_nan_ranks": 0, "loss_rank_avg": 0.12389802187681198, "step": 565, "valid_targets_mean": 10126.2, "valid_targets_min": 2513 }, { "epoch": 3.4989733059548254, "grad_norm": 0.20719983612273074, "learning_rate": 2.3593516187293978e-05, "loss": 0.3971, "loss_nan_ranks": 0, "loss_rank_avg": 0.13517609238624573, "step": 570, "valid_targets_mean": 11068.8, "valid_targets_min": 2148 }, { "epoch": 3.529774127310062, "grad_norm": 0.2161162868640456, "learning_rate": 2.329189180561468e-05, "loss": 0.3921, "loss_nan_ranks": 0, "loss_rank_avg": 0.12443984299898148, "step": 575, "valid_targets_mean": 10574.7, "valid_targets_min": 2453 }, { "epoch": 3.560574948665298, "grad_norm": 0.22111992371927122, "learning_rate": 2.298949584190909e-05, "loss": 0.3841, "loss_nan_ranks": 0, "loss_rank_avg": 0.13051998615264893, "step": 580, "valid_targets_mean": 10700.1, "valid_targets_min": 2985 }, { "epoch": 3.5913757700205338, "grad_norm": 0.21764605005096452, "learning_rate": 2.2686399174354313e-05, "loss": 0.3966, "loss_nan_ranks": 0, "loss_rank_avg": 0.12670505046844482, "step": 585, "valid_targets_mean": 10135.7, "valid_targets_min": 2080 }, { "epoch": 3.62217659137577, "grad_norm": 0.21319458178377276, "learning_rate": 2.2382672845364474e-05, "loss": 0.3935, "loss_nan_ranks": 0, "loss_rank_avg": 0.14208604395389557, "step": 590, "valid_targets_mean": 12013.0, "valid_targets_min": 4995 }, { "epoch": 3.652977412731006, "grad_norm": 0.26122804480818995, "learning_rate": 2.2078388044939193e-05, "loss": 0.3984, "loss_nan_ranks": 0, "loss_rank_avg": 0.12731271982192993, "step": 595, "valid_targets_mean": 10844.6, "valid_targets_min": 2639 }, { "epoch": 3.683778234086242, "grad_norm": 0.22781179096787188, "learning_rate": 2.1773616093977433e-05, "loss": 0.393, "loss_nan_ranks": 0, "loss_rank_avg": 0.12237831950187683, "step": 600, "valid_targets_mean": 9943.8, "valid_targets_min": 2118 }, { "epoch": 3.7145790554414786, "grad_norm": 0.22728502345335605, "learning_rate": 2.146842842756071e-05, "loss": 0.3896, "loss_nan_ranks": 0, "loss_rank_avg": 0.1511116325855255, "step": 605, "valid_targets_mean": 11172.6, "valid_targets_min": 2785 }, { "epoch": 3.7453798767967146, "grad_norm": 0.22097946670719984, "learning_rate": 2.1162896578209517e-05, "loss": 0.396, "loss_nan_ranks": 0, "loss_rank_avg": 0.14141415059566498, "step": 610, "valid_targets_mean": 10525.3, "valid_targets_min": 2828 }, { "epoch": 3.7761806981519506, "grad_norm": 0.2295100609487947, "learning_rate": 2.0857092159116927e-05, "loss": 0.3922, "loss_nan_ranks": 0, "loss_rank_avg": 0.12614914774894714, "step": 615, "valid_targets_mean": 9766.1, "valid_targets_min": 2992 }, { "epoch": 3.806981519507187, "grad_norm": 0.22721432401492533, "learning_rate": 2.0551086847363245e-05, "loss": 0.3964, "loss_nan_ranks": 0, "loss_rank_avg": 0.13179969787597656, "step": 620, "valid_targets_mean": 10949.6, "valid_targets_min": 4792 }, { "epoch": 3.837782340862423, "grad_norm": 0.22078763844825, "learning_rate": 2.024495236711574e-05, "loss": 0.3852, "loss_nan_ranks": 0, "loss_rank_avg": 0.12299636006355286, "step": 625, "valid_targets_mean": 9665.4, "valid_targets_min": 1134 }, { "epoch": 3.868583162217659, "grad_norm": 0.22914339313844653, "learning_rate": 1.993876047281731e-05, "loss": 0.394, "loss_nan_ranks": 0, "loss_rank_avg": 0.12025485187768936, "step": 630, "valid_targets_mean": 8838.8, "valid_targets_min": 3422 }, { "epoch": 3.8993839835728954, "grad_norm": 0.2302518030075612, "learning_rate": 1.963258293236805e-05, "loss": 0.3937, "loss_nan_ranks": 0, "loss_rank_avg": 0.123673215508461, "step": 635, "valid_targets_mean": 10010.0, "valid_targets_min": 3800 }, { "epoch": 3.9301848049281314, "grad_norm": 0.21544041332713842, "learning_rate": 1.9326491510303694e-05, "loss": 0.3922, "loss_nan_ranks": 0, "loss_rank_avg": 0.12482661008834839, "step": 640, "valid_targets_mean": 9535.8, "valid_targets_min": 317 }, { "epoch": 3.9609856262833674, "grad_norm": 0.2015648862338233, "learning_rate": 1.902055795097477e-05, "loss": 0.3889, "loss_nan_ranks": 0, "loss_rank_avg": 0.14611949026584625, "step": 645, "valid_targets_mean": 11163.8, "valid_targets_min": 5062 }, { "epoch": 3.991786447638604, "grad_norm": 0.22187245313872425, "learning_rate": 1.87148539617306e-05, "loss": 0.3918, "loss_nan_ranks": 0, "loss_rank_avg": 0.12460841238498688, "step": 650, "valid_targets_mean": 8963.2, "valid_targets_min": 357 }, { "epoch": 4.018480492813142, "grad_norm": 0.22476334343242654, "learning_rate": 1.840945119611188e-05, "loss": 0.3893, "loss_nan_ranks": 0, "loss_rank_avg": 0.16152283549308777, "step": 655, "valid_targets_mean": 11521.0, "valid_targets_min": 3049 }, { "epoch": 4.049281314168378, "grad_norm": 0.21839067338296778, "learning_rate": 1.8104421237055933e-05, "loss": 0.3881, "loss_nan_ranks": 0, "loss_rank_avg": 0.1331462264060974, "step": 660, "valid_targets_mean": 9382.9, "valid_targets_min": 2223 }, { "epoch": 4.080082135523614, "grad_norm": 0.24094322681397756, "learning_rate": 1.7799835580118416e-05, "loss": 0.3865, "loss_nan_ranks": 0, "loss_rank_avg": 0.12497024983167648, "step": 665, "valid_targets_mean": 8761.0, "valid_targets_min": 2050 }, { "epoch": 4.11088295687885, "grad_norm": 0.22795248591570674, "learning_rate": 1.7495765616715656e-05, "loss": 0.3851, "loss_nan_ranks": 0, "loss_rank_avg": 0.1485755890607834, "step": 670, "valid_targets_mean": 10701.9, "valid_targets_min": 2090 }, { "epoch": 4.1416837782340865, "grad_norm": 0.20397983901648906, "learning_rate": 1.719228261739126e-05, "loss": 0.3865, "loss_nan_ranks": 0, "loss_rank_avg": 0.12895135581493378, "step": 675, "valid_targets_mean": 10493.6, "valid_targets_min": 2308 }, { "epoch": 4.172484599589322, "grad_norm": 0.2114871061106328, "learning_rate": 1.6889457715111144e-05, "loss": 0.3911, "loss_nan_ranks": 0, "loss_rank_avg": 0.12499860674142838, "step": 680, "valid_targets_mean": 10399.1, "valid_targets_min": 354 }, { "epoch": 4.2032854209445585, "grad_norm": 0.22161472927911963, "learning_rate": 1.6587361888590718e-05, "loss": 0.385, "loss_nan_ranks": 0, "loss_rank_avg": 0.11722412705421448, "step": 685, "valid_targets_mean": 9381.0, "valid_targets_min": 3469 }, { "epoch": 4.234086242299795, "grad_norm": 0.22760003186393898, "learning_rate": 1.6286065945658368e-05, "loss": 0.3831, "loss_nan_ranks": 0, "loss_rank_avg": 0.12102054059505463, "step": 690, "valid_targets_mean": 9244.2, "valid_targets_min": 216 }, { "epoch": 4.2648870636550305, "grad_norm": 0.19333947497396198, "learning_rate": 1.598564050665882e-05, "loss": 0.3861, "loss_nan_ranks": 0, "loss_rank_avg": 0.13309797644615173, "step": 695, "valid_targets_mean": 10669.2, "valid_targets_min": 2158 }, { "epoch": 4.295687885010267, "grad_norm": 0.241912003794477, "learning_rate": 1.5686155987900604e-05, "loss": 0.3782, "loss_nan_ranks": 0, "loss_rank_avg": 0.11429376900196075, "step": 700, "valid_targets_mean": 9385.2, "valid_targets_min": 4513 }, { "epoch": 4.326488706365503, "grad_norm": 0.20300102211827217, "learning_rate": 1.53876825851512e-05, "loss": 0.3915, "loss_nan_ranks": 0, "loss_rank_avg": 0.12321654707193375, "step": 705, "valid_targets_mean": 8934.8, "valid_targets_min": 3695 }, { "epoch": 4.357289527720739, "grad_norm": 0.2217694923947379, "learning_rate": 1.5090290257184019e-05, "loss": 0.3925, "loss_nan_ranks": 0, "loss_rank_avg": 0.13099467754364014, "step": 710, "valid_targets_mean": 9030.1, "valid_targets_min": 280 }, { "epoch": 4.388090349075975, "grad_norm": 0.48428449706252485, "learning_rate": 1.4794048709380816e-05, "loss": 0.3849, "loss_nan_ranks": 0, "loss_rank_avg": 0.13166169822216034, "step": 715, "valid_targets_mean": 10115.8, "valid_targets_min": 3423 }, { "epoch": 4.418891170431212, "grad_norm": 0.1935843270430586, "learning_rate": 1.4499027377393571e-05, "loss": 0.3827, "loss_nan_ranks": 0, "loss_rank_avg": 0.13026298582553864, "step": 720, "valid_targets_mean": 10494.2, "valid_targets_min": 2821 }, { "epoch": 4.449691991786447, "grad_norm": 0.22093960967485268, "learning_rate": 1.420529541086951e-05, "loss": 0.3845, "loss_nan_ranks": 0, "loss_rank_avg": 0.14177194237709045, "step": 725, "valid_targets_mean": 10633.6, "valid_targets_min": 1912 }, { "epoch": 4.480492813141684, "grad_norm": 0.2115501533825174, "learning_rate": 1.3912921657243282e-05, "loss": 0.3941, "loss_nan_ranks": 0, "loss_rank_avg": 0.11877399682998657, "step": 730, "valid_targets_mean": 9276.1, "valid_targets_min": 2562 }, { "epoch": 4.51129363449692, "grad_norm": 0.1915086121574694, "learning_rate": 1.3621974645599854e-05, "loss": 0.3877, "loss_nan_ranks": 0, "loss_rank_avg": 0.12440060824155807, "step": 735, "valid_targets_mean": 10263.1, "valid_targets_min": 2476 }, { "epoch": 4.5420944558521565, "grad_norm": 0.19955698666287677, "learning_rate": 1.3332522570612097e-05, "loss": 0.3902, "loss_nan_ranks": 0, "loss_rank_avg": 0.11496643722057343, "step": 740, "valid_targets_mean": 8955.9, "valid_targets_min": 1880 }, { "epoch": 4.572895277207392, "grad_norm": 0.21278349081356462, "learning_rate": 1.3044633276556695e-05, "loss": 0.385, "loss_nan_ranks": 0, "loss_rank_avg": 0.12588758766651154, "step": 745, "valid_targets_mean": 10313.9, "valid_targets_min": 4151 }, { "epoch": 4.6036960985626285, "grad_norm": 0.23757114734271179, "learning_rate": 1.2758374241412276e-05, "loss": 0.3904, "loss_nan_ranks": 0, "loss_rank_avg": 0.12705953419208527, "step": 750, "valid_targets_mean": 8989.1, "valid_targets_min": 1365 }, { "epoch": 4.634496919917865, "grad_norm": 0.23378779608166858, "learning_rate": 1.2473812561043293e-05, "loss": 0.3866, "loss_nan_ranks": 0, "loss_rank_avg": 0.1282326877117157, "step": 755, "valid_targets_mean": 9478.4, "valid_targets_min": 1609 }, { "epoch": 4.6652977412731005, "grad_norm": 0.1876324920782784, "learning_rate": 1.2191014933473526e-05, "loss": 0.3801, "loss_nan_ranks": 0, "loss_rank_avg": 0.11655007302761078, "step": 760, "valid_targets_mean": 9860.2, "valid_targets_min": 3490 }, { "epoch": 4.696098562628337, "grad_norm": 0.2024605395439328, "learning_rate": 1.1910047643252822e-05, "loss": 0.3846, "loss_nan_ranks": 0, "loss_rank_avg": 0.12183412909507751, "step": 765, "valid_targets_mean": 9874.6, "valid_targets_min": 460 }, { "epoch": 4.726899383983573, "grad_norm": 0.2240778795739287, "learning_rate": 1.1630976545920777e-05, "loss": 0.379, "loss_nan_ranks": 0, "loss_rank_avg": 0.13441401720046997, "step": 770, "valid_targets_mean": 11569.1, "valid_targets_min": 3648 }, { "epoch": 4.757700205338809, "grad_norm": 0.19443047557328758, "learning_rate": 1.1353867052570915e-05, "loss": 0.3832, "loss_nan_ranks": 0, "loss_rank_avg": 0.1342424750328064, "step": 775, "valid_targets_mean": 10856.6, "valid_targets_min": 1264 }, { "epoch": 4.788501026694045, "grad_norm": 0.2271657443086, "learning_rate": 1.1078784114519072e-05, "loss": 0.3851, "loss_nan_ranks": 0, "loss_rank_avg": 0.13600531220436096, "step": 780, "valid_targets_mean": 10584.8, "valid_targets_min": 1545 }, { "epoch": 4.819301848049282, "grad_norm": 0.21153458923939814, "learning_rate": 1.0805792208079553e-05, "loss": 0.3824, "loss_nan_ranks": 0, "loss_rank_avg": 0.11498244106769562, "step": 785, "valid_targets_mean": 9707.3, "valid_targets_min": 334 }, { "epoch": 4.850102669404517, "grad_norm": 0.19691943715186438, "learning_rate": 1.0534955319452638e-05, "loss": 0.3869, "loss_nan_ranks": 0, "loss_rank_avg": 0.14030292630195618, "step": 790, "valid_targets_mean": 11372.9, "valid_targets_min": 3355 }, { "epoch": 4.880903490759754, "grad_norm": 0.20881508297490065, "learning_rate": 1.0266336929726899e-05, "loss": 0.3919, "loss_nan_ranks": 0, "loss_rank_avg": 0.13601678609848022, "step": 795, "valid_targets_mean": 10858.4, "valid_targets_min": 2310 }, { "epoch": 4.91170431211499, "grad_norm": 0.23817907360477347, "learning_rate": 1.0000000000000006e-05, "loss": 0.3847, "loss_nan_ranks": 0, "loss_rank_avg": 0.13049688935279846, "step": 800, "valid_targets_mean": 9990.5, "valid_targets_min": 3354 }, { "epoch": 4.942505133470226, "grad_norm": 0.21302030580553924, "learning_rate": 9.736006956621302e-06, "loss": 0.3879, "loss_nan_ranks": 0, "loss_rank_avg": 0.14719609916210175, "step": 805, "valid_targets_mean": 11068.6, "valid_targets_min": 2399 }, { "epoch": 4.973305954825462, "grad_norm": 0.20406255452776648, "learning_rate": 9.474419676559846e-06, "loss": 0.3907, "loss_nan_ranks": 0, "loss_rank_avg": 0.11775431036949158, "step": 810, "valid_targets_mean": 9648.8, "valid_targets_min": 3376 }, { "epoch": 5.0, "grad_norm": 0.31881592188028646, "learning_rate": 9.215299472901078e-06, "loss": 0.3903, "loss_nan_ranks": 0, "loss_rank_avg": 0.4071720242500305, "step": 815, "valid_targets_mean": 10948.0, "valid_targets_min": 4411 }, { "epoch": 5.030800821355236, "grad_norm": 0.21230100597619836, "learning_rate": 8.958707080475806e-06, "loss": 0.3811, "loss_nan_ranks": 0, "loss_rank_avg": 0.11637009680271149, "step": 820, "valid_targets_mean": 9163.8, "valid_targets_min": 906 }, { "epoch": 5.061601642710472, "grad_norm": 0.20804828235766026, "learning_rate": 8.704702641624581e-06, "loss": 0.3875, "loss_nan_ranks": 0, "loss_rank_avg": 0.1325514316558838, "step": 825, "valid_targets_mean": 9673.5, "valid_targets_min": 620 }, { "epoch": 5.092402464065708, "grad_norm": 0.19984821989380258, "learning_rate": 8.453345692101076e-06, "loss": 0.3863, "loss_nan_ranks": 0, "loss_rank_avg": 0.14904968440532684, "step": 830, "valid_targets_mean": 10810.3, "valid_targets_min": 3846 }, { "epoch": 5.123203285420945, "grad_norm": 0.1970466702581415, "learning_rate": 8.204695147117527e-06, "loss": 0.3818, "loss_nan_ranks": 0, "loss_rank_avg": 0.1161932498216629, "step": 835, "valid_targets_mean": 9579.4, "valid_targets_min": 4289 }, { "epoch": 5.15400410677618, "grad_norm": 0.2100897675843508, "learning_rate": 7.958809287535741e-06, "loss": 0.3794, "loss_nan_ranks": 0, "loss_rank_avg": 0.13574275374412537, "step": 840, "valid_targets_mean": 10023.3, "valid_targets_min": 2726 }, { "epoch": 5.184804928131417, "grad_norm": 0.21910142685280182, "learning_rate": 7.715745746206644e-06, "loss": 0.389, "loss_nan_ranks": 0, "loss_rank_avg": 0.1291072815656662, "step": 845, "valid_targets_mean": 10159.6, "valid_targets_min": 2555 }, { "epoch": 5.215605749486653, "grad_norm": 0.1897236498106727, "learning_rate": 7.475561494461882e-06, "loss": 0.3856, "loss_nan_ranks": 0, "loss_rank_avg": 0.12237901240587234, "step": 850, "valid_targets_mean": 9590.3, "valid_targets_min": 375 }, { "epoch": 5.246406570841889, "grad_norm": 0.19705449280585383, "learning_rate": 7.238312828760312e-06, "loss": 0.3866, "loss_nan_ranks": 0, "loss_rank_avg": 0.13103556632995605, "step": 855, "valid_targets_mean": 10539.4, "valid_targets_min": 554 }, { "epoch": 5.277207392197125, "grad_norm": 0.20548584664482955, "learning_rate": 7.0040553574928115e-06, "loss": 0.3776, "loss_nan_ranks": 0, "loss_rank_avg": 0.12963303923606873, "step": 860, "valid_targets_mean": 9509.7, "valid_targets_min": 2001 }, { "epoch": 5.308008213552362, "grad_norm": 0.20230787548228713, "learning_rate": 6.772843987948259e-06, "loss": 0.3775, "loss_nan_ranks": 0, "loss_rank_avg": 0.12533414363861084, "step": 865, "valid_targets_mean": 10205.4, "valid_targets_min": 1049 }, { "epoch": 5.338809034907597, "grad_norm": 0.19960934159328975, "learning_rate": 6.544732913443925e-06, "loss": 0.3749, "loss_nan_ranks": 0, "loss_rank_avg": 0.12100720405578613, "step": 870, "valid_targets_mean": 9763.3, "valid_targets_min": 1776 }, { "epoch": 5.369609856262834, "grad_norm": 0.22208580453128474, "learning_rate": 6.319775600623139e-06, "loss": 0.3778, "loss_nan_ranks": 0, "loss_rank_avg": 0.11866745352745056, "step": 875, "valid_targets_mean": 9620.2, "valid_targets_min": 309 }, { "epoch": 5.40041067761807, "grad_norm": 0.20031614807080989, "learning_rate": 6.098024776923359e-06, "loss": 0.3838, "loss_nan_ranks": 0, "loss_rank_avg": 0.10915479809045792, "step": 880, "valid_targets_mean": 9155.7, "valid_targets_min": 2445 }, { "epoch": 5.431211498973306, "grad_norm": 0.2001139189820283, "learning_rate": 5.87953241821741e-06, "loss": 0.3823, "loss_nan_ranks": 0, "loss_rank_avg": 0.13553708791732788, "step": 885, "valid_targets_mean": 10978.1, "valid_targets_min": 2277 }, { "epoch": 5.462012320328542, "grad_norm": 0.19077714182439787, "learning_rate": 5.664349736630979e-06, "loss": 0.3811, "loss_nan_ranks": 0, "loss_rank_avg": 0.12424496561288834, "step": 890, "valid_targets_mean": 9724.1, "valid_targets_min": 2192 }, { "epoch": 5.492813141683778, "grad_norm": 0.220365025104229, "learning_rate": 5.452527168539026e-06, "loss": 0.3847, "loss_nan_ranks": 0, "loss_rank_avg": 0.12921196222305298, "step": 895, "valid_targets_mean": 11099.1, "valid_targets_min": 297 }, { "epoch": 5.523613963039015, "grad_norm": 0.20329801535781664, "learning_rate": 5.244114362744126e-06, "loss": 0.3765, "loss_nan_ranks": 0, "loss_rank_avg": 0.12272413074970245, "step": 900, "valid_targets_mean": 9506.2, "valid_targets_min": 3223 }, { "epoch": 5.55441478439425, "grad_norm": 0.19835171577454092, "learning_rate": 5.039160168839292e-06, "loss": 0.3862, "loss_nan_ranks": 0, "loss_rank_avg": 0.13050030171871185, "step": 905, "valid_targets_mean": 9120.8, "valid_targets_min": 356 }, { "epoch": 5.585215605749487, "grad_norm": 0.19273836833844, "learning_rate": 4.837712625758251e-06, "loss": 0.3811, "loss_nan_ranks": 0, "loss_rank_avg": 0.11816704273223877, "step": 910, "valid_targets_mean": 9898.2, "valid_targets_min": 3404 }, { "epoch": 5.616016427104723, "grad_norm": 0.2135071040731185, "learning_rate": 4.639818950515598e-06, "loss": 0.3714, "loss_nan_ranks": 0, "loss_rank_avg": 0.13075876235961914, "step": 915, "valid_targets_mean": 10097.0, "valid_targets_min": 2298 }, { "epoch": 5.646817248459959, "grad_norm": 0.20145187233343603, "learning_rate": 4.445525527139725e-06, "loss": 0.3888, "loss_nan_ranks": 0, "loss_rank_avg": 0.1326562911272049, "step": 920, "valid_targets_mean": 10346.2, "valid_targets_min": 2579 }, { "epoch": 5.677618069815195, "grad_norm": 0.2026728553310299, "learning_rate": 4.2548778958008795e-06, "loss": 0.3833, "loss_nan_ranks": 0, "loss_rank_avg": 0.12102033942937851, "step": 925, "valid_targets_mean": 10037.0, "valid_targets_min": 2033 }, { "epoch": 5.708418891170432, "grad_norm": 0.22942543241242366, "learning_rate": 4.067920742137115e-06, "loss": 0.3788, "loss_nan_ranks": 0, "loss_rank_avg": 0.12767393887043, "step": 930, "valid_targets_mean": 9976.0, "valid_targets_min": 769 }, { "epoch": 5.739219712525667, "grad_norm": 0.2107947822476004, "learning_rate": 3.884697886780437e-06, "loss": 0.3763, "loss_nan_ranks": 0, "loss_rank_avg": 0.13081158697605133, "step": 935, "valid_targets_mean": 10241.8, "valid_targets_min": 1520 }, { "epoch": 5.770020533880904, "grad_norm": 0.20468361463207382, "learning_rate": 3.705252275085791e-06, "loss": 0.3862, "loss_nan_ranks": 0, "loss_rank_avg": 0.1312338411808014, "step": 940, "valid_targets_mean": 9789.3, "valid_targets_min": 3126 }, { "epoch": 5.80082135523614, "grad_norm": 0.2077142192995124, "learning_rate": 3.5296259670651177e-06, "loss": 0.38, "loss_nan_ranks": 0, "loss_rank_avg": 0.12859410047531128, "step": 945, "valid_targets_mean": 9755.1, "valid_targets_min": 3721 }, { "epoch": 5.831622176591376, "grad_norm": 0.2293598580227486, "learning_rate": 3.357860127529e-06, "loss": 0.3814, "loss_nan_ranks": 0, "loss_rank_avg": 0.11473749577999115, "step": 950, "valid_targets_mean": 9844.6, "valid_targets_min": 2569 }, { "epoch": 5.862422997946612, "grad_norm": 0.22647676832017485, "learning_rate": 3.1899950164380677e-06, "loss": 0.3842, "loss_nan_ranks": 0, "loss_rank_avg": 0.13261717557907104, "step": 955, "valid_targets_mean": 9990.8, "valid_targets_min": 2856 }, { "epoch": 5.8932238193018485, "grad_norm": 0.21079531555994593, "learning_rate": 3.0260699794665527e-06, "loss": 0.3878, "loss_nan_ranks": 0, "loss_rank_avg": 0.12958693504333496, "step": 960, "valid_targets_mean": 9744.6, "valid_targets_min": 3064 }, { "epoch": 5.924024640657084, "grad_norm": 0.19047352276450594, "learning_rate": 2.866123438780073e-06, "loss": 0.3832, "loss_nan_ranks": 0, "loss_rank_avg": 0.10699409246444702, "step": 965, "valid_targets_mean": 9628.2, "valid_targets_min": 382 }, { "epoch": 5.95482546201232, "grad_norm": 0.20209054048874064, "learning_rate": 2.710192884029954e-06, "loss": 0.3781, "loss_nan_ranks": 0, "loss_rank_avg": 0.11523480713367462, "step": 970, "valid_targets_mean": 9445.2, "valid_targets_min": 365 }, { "epoch": 5.985626283367557, "grad_norm": 0.2019234767006319, "learning_rate": 2.558314863566043e-06, "loss": 0.381, "loss_nan_ranks": 0, "loss_rank_avg": 0.11367767304182053, "step": 975, "valid_targets_mean": 9772.4, "valid_targets_min": 3046 }, { "epoch": 6.012320328542095, "grad_norm": 0.1900395292909151, "learning_rate": 2.410524975870221e-06, "loss": 0.3789, "loss_nan_ranks": 0, "loss_rank_avg": 0.10480041801929474, "step": 980, "valid_targets_mean": 9407.6, "valid_targets_min": 2045 }, { "epoch": 6.04312114989733, "grad_norm": 0.19218447130290486, "learning_rate": 2.266857861212499e-06, "loss": 0.3828, "loss_nan_ranks": 0, "loss_rank_avg": 0.1330980658531189, "step": 985, "valid_targets_mean": 10623.0, "valid_targets_min": 2736 }, { "epoch": 6.073921971252567, "grad_norm": 0.199658834668884, "learning_rate": 2.127347193531757e-06, "loss": 0.3788, "loss_nan_ranks": 0, "loss_rank_avg": 0.11302483081817627, "step": 990, "valid_targets_mean": 8684.1, "valid_targets_min": 779 }, { "epoch": 6.104722792607803, "grad_norm": 0.18466431044606102, "learning_rate": 1.9920256725429275e-06, "loss": 0.3787, "loss_nan_ranks": 0, "loss_rank_avg": 0.12558288872241974, "step": 995, "valid_targets_mean": 9945.8, "valid_targets_min": 2175 }, { "epoch": 6.135523613963039, "grad_norm": 0.19451755215714164, "learning_rate": 1.8609250160725877e-06, "loss": 0.3755, "loss_nan_ranks": 0, "loss_rank_avg": 0.12043657898902893, "step": 1000, "valid_targets_mean": 9261.3, "valid_targets_min": 3221 }, { "epoch": 6.166324435318275, "grad_norm": 0.1989609973315876, "learning_rate": 1.7340759526246254e-06, "loss": 0.3761, "loss_nan_ranks": 0, "loss_rank_avg": 0.1296187788248062, "step": 1005, "valid_targets_mean": 10305.9, "valid_targets_min": 313 }, { "epoch": 6.1971252566735116, "grad_norm": 0.20176202225188247, "learning_rate": 1.6115082141778459e-06, "loss": 0.3827, "loss_nan_ranks": 0, "loss_rank_avg": 0.13769292831420898, "step": 1010, "valid_targets_mean": 10433.3, "valid_targets_min": 277 }, { "epoch": 6.227926078028747, "grad_norm": 0.18970780271253246, "learning_rate": 1.4932505292171407e-06, "loss": 0.3743, "loss_nan_ranks": 0, "loss_rank_avg": 0.13758821785449982, "step": 1015, "valid_targets_mean": 10472.3, "valid_targets_min": 3634 }, { "epoch": 6.2587268993839835, "grad_norm": 0.19962437444228312, "learning_rate": 1.3793306159998498e-06, "loss": 0.383, "loss_nan_ranks": 0, "loss_rank_avg": 0.1420556902885437, "step": 1020, "valid_targets_mean": 10590.9, "valid_targets_min": 2254 }, { "epoch": 6.28952772073922, "grad_norm": 0.18873687421623273, "learning_rate": 1.2697751760589072e-06, "loss": 0.379, "loss_nan_ranks": 0, "loss_rank_avg": 0.13322848081588745, "step": 1025, "valid_targets_mean": 10306.2, "valid_targets_min": 2599 }, { "epoch": 6.3203285420944555, "grad_norm": 0.1935512888145092, "learning_rate": 1.1646098879443124e-06, "loss": 0.3827, "loss_nan_ranks": 0, "loss_rank_avg": 0.14393621683120728, "step": 1030, "valid_targets_mean": 10398.6, "valid_targets_min": 373 }, { "epoch": 6.351129363449692, "grad_norm": 0.18545682928793727, "learning_rate": 1.0638594012043834e-06, "loss": 0.3849, "loss_nan_ranks": 0, "loss_rank_avg": 0.11467485129833221, "step": 1035, "valid_targets_mean": 9599.2, "valid_targets_min": 307 }, { "epoch": 6.381930184804928, "grad_norm": 0.2022710371779159, "learning_rate": 9.67547330608165e-07, "loss": 0.3803, "loss_nan_ranks": 0, "loss_rank_avg": 0.10918845981359482, "step": 1040, "valid_targets_mean": 9210.8, "valid_targets_min": 2726 }, { "epoch": 6.412731006160164, "grad_norm": 0.2206043061190306, "learning_rate": 8.756962506103983e-07, "loss": 0.3821, "loss_nan_ranks": 0, "loss_rank_avg": 0.15293879806995392, "step": 1045, "valid_targets_mean": 11501.6, "valid_targets_min": 1882 }, { "epoch": 6.4435318275154, "grad_norm": 0.18582939615049404, "learning_rate": 7.883276900603288e-07, "loss": 0.3762, "loss_nan_ranks": 0, "loss_rank_avg": 0.11088518053293228, "step": 1050, "valid_targets_mean": 9877.3, "valid_targets_min": 3126 }, { "epoch": 6.474332648870637, "grad_norm": 0.1972521643922487, "learning_rate": 7.054621271555917e-07, "loss": 0.3811, "loss_nan_ranks": 0, "loss_rank_avg": 0.11362037062644958, "step": 1055, "valid_targets_mean": 9118.0, "valid_targets_min": 3093 }, { "epoch": 6.505133470225873, "grad_norm": 0.17586915711860904, "learning_rate": 6.271189846423543e-07, "loss": 0.384, "loss_nan_ranks": 0, "loss_rank_avg": 0.12249845266342163, "step": 1060, "valid_targets_mean": 10817.7, "valid_targets_min": 2099 }, { "epoch": 6.535934291581109, "grad_norm": 0.18416484921166754, "learning_rate": 5.533166252628319e-07, "loss": 0.3782, "loss_nan_ranks": 0, "loss_rank_avg": 0.14194294810295105, "step": 1065, "valid_targets_mean": 11814.9, "valid_targets_min": 3005 }, { "epoch": 6.566735112936345, "grad_norm": 0.22274241611101864, "learning_rate": 4.840723474512876e-07, "loss": 0.3811, "loss_nan_ranks": 0, "loss_rank_avg": 0.11658580601215363, "step": 1070, "valid_targets_mean": 8806.6, "valid_targets_min": 2184 }, { "epoch": 6.597535934291582, "grad_norm": 0.18488977562314957, "learning_rate": 4.1940238127946785e-07, "loss": 0.3772, "loss_nan_ranks": 0, "loss_rank_avg": 0.13311666250228882, "step": 1075, "valid_targets_mean": 10068.2, "valid_targets_min": 1276 }, { "epoch": 6.628336755646817, "grad_norm": 0.1827839387461324, "learning_rate": 3.593218846524571e-07, "loss": 0.3869, "loss_nan_ranks": 0, "loss_rank_avg": 0.13171571493148804, "step": 1080, "valid_targets_mean": 10453.6, "valid_targets_min": 2963 }, { "epoch": 6.6591375770020536, "grad_norm": 0.1928136072465324, "learning_rate": 3.038449397558396e-07, "loss": 0.384, "loss_nan_ranks": 0, "loss_rank_avg": 0.12778720259666443, "step": 1085, "valid_targets_mean": 9230.7, "valid_targets_min": 2828 }, { "epoch": 6.68993839835729, "grad_norm": 0.1870233805513281, "learning_rate": 2.52984549754991e-07, "loss": 0.3799, "loss_nan_ranks": 0, "loss_rank_avg": 0.12494305521249771, "step": 1090, "valid_targets_mean": 9985.8, "valid_targets_min": 3213 }, { "epoch": 6.7207392197125255, "grad_norm": 0.1918146360124604, "learning_rate": 2.0675263574729376e-07, "loss": 0.3756, "loss_nan_ranks": 0, "loss_rank_avg": 0.1374993622303009, "step": 1095, "valid_targets_mean": 10922.1, "valid_targets_min": 3853 }, { "epoch": 6.751540041067762, "grad_norm": 0.18053030537193954, "learning_rate": 1.6516003396795489e-07, "loss": 0.3819, "loss_nan_ranks": 0, "loss_rank_avg": 0.13119381666183472, "step": 1100, "valid_targets_mean": 10285.2, "valid_targets_min": 1441 }, { "epoch": 6.782340862422998, "grad_norm": 0.18115468859329534, "learning_rate": 1.2821649325012396e-07, "loss": 0.3817, "loss_nan_ranks": 0, "loss_rank_avg": 0.12020154297351837, "step": 1105, "valid_targets_mean": 10192.8, "valid_targets_min": 3135 }, { "epoch": 6.813141683778234, "grad_norm": 0.1815765782306925, "learning_rate": 9.593067273987456e-08, "loss": 0.3827, "loss_nan_ranks": 0, "loss_rank_avg": 0.12480635195970535, "step": 1110, "valid_targets_mean": 9938.3, "valid_targets_min": 280 }, { "epoch": 6.84394250513347, "grad_norm": 0.18701855589480754, "learning_rate": 6.831013986660307e-08, "loss": 0.3769, "loss_nan_ranks": 0, "loss_rank_avg": 0.12354324758052826, "step": 1115, "valid_targets_mean": 9370.7, "valid_targets_min": 394 }, { "epoch": 6.874743326488707, "grad_norm": 0.18257135141110437, "learning_rate": 4.5361368569301064e-08, "loss": 0.3858, "loss_nan_ranks": 0, "loss_rank_avg": 0.11373500525951385, "step": 1120, "valid_targets_mean": 9538.7, "valid_targets_min": 769 }, { "epoch": 6.905544147843942, "grad_norm": 0.18518035371992592, "learning_rate": 2.7089737779142365e-08, "loss": 0.3768, "loss_nan_ranks": 0, "loss_rank_avg": 0.11957018822431564, "step": 1125, "valid_targets_mean": 10110.7, "valid_targets_min": 511 }, { "epoch": 6.936344969199179, "grad_norm": 0.1931229420258479, "learning_rate": 1.349953015872707e-08, "loss": 0.3827, "loss_nan_ranks": 0, "loss_rank_avg": 0.13230648636817932, "step": 1130, "valid_targets_mean": 10297.8, "valid_targets_min": 3331 }, { "epoch": 6.967145790554415, "grad_norm": 0.19107387956856198, "learning_rate": 4.593931098262338e-09, "loss": 0.375, "loss_nan_ranks": 0, "loss_rank_avg": 0.13124366104602814, "step": 1135, "valid_targets_mean": 10772.5, "valid_targets_min": 3633 }, { "epoch": 6.997946611909651, "grad_norm": 0.19309149811821025, "learning_rate": 3.7502796895516615e-10, "loss": 0.3802, "loss_nan_ranks": 0, "loss_rank_avg": 0.12663644552230835, "step": 1140, "valid_targets_mean": 9650.2, "valid_targets_min": 304 }, { "epoch": 7.0, "step": 1141, "total_flos": 4.616861268529644e+18, "train_loss": 0.0, "train_runtime": 12.7361, "train_samples_per_second": 8551.494, "train_steps_per_second": 89.588 } ], "logging_steps": 5, "max_steps": 1141, "num_input_tokens_seen": 0, "num_train_epochs": 7, "save_steps": 300, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.616861268529644e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }