{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 1345, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0186219739292365, "grad_norm": 21.642117448143836, "learning_rate": 1.1851851851851854e-06, "loss": 0.8194, "loss_nan_ranks": 0, "loss_rank_avg": 0.38135290145874023, "step": 5, "valid_targets_mean": 6579.2, "valid_targets_min": 1651 }, { "epoch": 0.037243947858473, "grad_norm": 4.087277364340966, "learning_rate": 2.666666666666667e-06, "loss": 0.7007, "loss_nan_ranks": 0, "loss_rank_avg": 0.29667899012565613, "step": 10, "valid_targets_mean": 6844.8, "valid_targets_min": 2928 }, { "epoch": 0.055865921787709494, "grad_norm": 1.3577392450412775, "learning_rate": 4.1481481481481485e-06, "loss": 0.5605, "loss_nan_ranks": 0, "loss_rank_avg": 0.29086098074913025, "step": 15, "valid_targets_mean": 6970.2, "valid_targets_min": 1816 }, { "epoch": 0.074487895716946, "grad_norm": 0.7405497195075723, "learning_rate": 5.62962962962963e-06, "loss": 0.5243, "loss_nan_ranks": 0, "loss_rank_avg": 0.2757507264614105, "step": 20, "valid_targets_mean": 7589.0, "valid_targets_min": 2058 }, { "epoch": 0.0931098696461825, "grad_norm": 0.5342294038072763, "learning_rate": 7.111111111111112e-06, "loss": 0.4723, "loss_nan_ranks": 0, "loss_rank_avg": 0.23901812732219696, "step": 25, "valid_targets_mean": 7366.7, "valid_targets_min": 3293 }, { "epoch": 0.11173184357541899, "grad_norm": 0.3675025735899647, "learning_rate": 8.592592592592593e-06, "loss": 0.4134, "loss_nan_ranks": 0, "loss_rank_avg": 0.20101700723171234, "step": 30, "valid_targets_mean": 6763.6, "valid_targets_min": 2844 }, { "epoch": 0.1303538175046555, "grad_norm": 0.28406425939888624, "learning_rate": 1.0074074074074074e-05, "loss": 0.4061, "loss_nan_ranks": 0, "loss_rank_avg": 0.2010914832353592, "step": 35, "valid_targets_mean": 6678.9, "valid_targets_min": 2465 }, { "epoch": 0.148975791433892, "grad_norm": 0.24209059303362065, "learning_rate": 1.1555555555555556e-05, "loss": 0.3775, "loss_nan_ranks": 0, "loss_rank_avg": 0.1803324669599533, "step": 40, "valid_targets_mean": 7128.3, "valid_targets_min": 2482 }, { "epoch": 0.16759776536312848, "grad_norm": 0.2198398559021447, "learning_rate": 1.303703703703704e-05, "loss": 0.3596, "loss_nan_ranks": 0, "loss_rank_avg": 0.17511887848377228, "step": 45, "valid_targets_mean": 6365.0, "valid_targets_min": 1594 }, { "epoch": 0.186219739292365, "grad_norm": 0.23483233838383402, "learning_rate": 1.4518518518518521e-05, "loss": 0.3496, "loss_nan_ranks": 0, "loss_rank_avg": 0.17358100414276123, "step": 50, "valid_targets_mean": 6929.4, "valid_targets_min": 2099 }, { "epoch": 0.2048417132216015, "grad_norm": 0.23317500582763062, "learning_rate": 1.6000000000000003e-05, "loss": 0.3422, "loss_nan_ranks": 0, "loss_rank_avg": 0.14544479548931122, "step": 55, "valid_targets_mean": 6168.6, "valid_targets_min": 1277 }, { "epoch": 0.22346368715083798, "grad_norm": 0.22927400488720545, "learning_rate": 1.7481481481481483e-05, "loss": 0.3347, "loss_nan_ranks": 0, "loss_rank_avg": 0.14921344816684723, "step": 60, "valid_targets_mean": 6729.7, "valid_targets_min": 2910 }, { "epoch": 0.24208566108007448, "grad_norm": 0.6336152567068057, "learning_rate": 1.8962962962962966e-05, "loss": 0.3681, "loss_nan_ranks": 0, "loss_rank_avg": 0.24772310256958008, "step": 65, "valid_targets_mean": 4494.7, "valid_targets_min": 1555 }, { "epoch": 0.260707635009311, "grad_norm": 0.3813258332534016, "learning_rate": 2.0444444444444446e-05, "loss": 0.4898, "loss_nan_ranks": 0, "loss_rank_avg": 0.23514796793460846, "step": 70, "valid_targets_mean": 4495.6, "valid_targets_min": 1665 }, { "epoch": 0.27932960893854747, "grad_norm": 0.35453929366021575, "learning_rate": 2.192592592592593e-05, "loss": 0.4593, "loss_nan_ranks": 0, "loss_rank_avg": 0.22719019651412964, "step": 75, "valid_targets_mean": 4864.8, "valid_targets_min": 1754 }, { "epoch": 0.297951582867784, "grad_norm": 0.3727662336939942, "learning_rate": 2.3407407407407406e-05, "loss": 0.4488, "loss_nan_ranks": 0, "loss_rank_avg": 0.204828143119812, "step": 80, "valid_targets_mean": 4095.2, "valid_targets_min": 1680 }, { "epoch": 0.3165735567970205, "grad_norm": 0.3292902280153117, "learning_rate": 2.4888888888888893e-05, "loss": 0.4398, "loss_nan_ranks": 0, "loss_rank_avg": 0.23676007986068726, "step": 85, "valid_targets_mean": 4658.2, "valid_targets_min": 1772 }, { "epoch": 0.33519553072625696, "grad_norm": 0.30524766317944435, "learning_rate": 2.637037037037037e-05, "loss": 0.4274, "loss_nan_ranks": 0, "loss_rank_avg": 0.22691433131694794, "step": 90, "valid_targets_mean": 4365.0, "valid_targets_min": 1554 }, { "epoch": 0.3538175046554935, "grad_norm": 0.2951571873200663, "learning_rate": 2.7851851851851856e-05, "loss": 0.4306, "loss_nan_ranks": 0, "loss_rank_avg": 0.22553832828998566, "step": 95, "valid_targets_mean": 4563.3, "valid_targets_min": 1295 }, { "epoch": 0.37243947858473, "grad_norm": 0.36390699372185614, "learning_rate": 2.9333333333333333e-05, "loss": 0.4196, "loss_nan_ranks": 0, "loss_rank_avg": 0.21875639259815216, "step": 100, "valid_targets_mean": 4605.9, "valid_targets_min": 1345 }, { "epoch": 0.39106145251396646, "grad_norm": 0.3165125581017608, "learning_rate": 3.0814814814814816e-05, "loss": 0.4131, "loss_nan_ranks": 0, "loss_rank_avg": 0.20045466721057892, "step": 105, "valid_targets_mean": 4428.8, "valid_targets_min": 1371 }, { "epoch": 0.409683426443203, "grad_norm": 0.29704668214079993, "learning_rate": 3.22962962962963e-05, "loss": 0.4105, "loss_nan_ranks": 0, "loss_rank_avg": 0.2023867964744568, "step": 110, "valid_targets_mean": 4861.6, "valid_targets_min": 2146 }, { "epoch": 0.42830540037243947, "grad_norm": 0.3129499722925657, "learning_rate": 3.377777777777778e-05, "loss": 0.4089, "loss_nan_ranks": 0, "loss_rank_avg": 0.19118505716323853, "step": 115, "valid_targets_mean": 4324.2, "valid_targets_min": 1820 }, { "epoch": 0.44692737430167595, "grad_norm": 0.29818090952293125, "learning_rate": 3.5259259259259266e-05, "loss": 0.4142, "loss_nan_ranks": 0, "loss_rank_avg": 0.2282976657152176, "step": 120, "valid_targets_mean": 4377.3, "valid_targets_min": 1703 }, { "epoch": 0.4655493482309125, "grad_norm": 0.318290618063663, "learning_rate": 3.674074074074074e-05, "loss": 0.4009, "loss_nan_ranks": 0, "loss_rank_avg": 0.18989060819149017, "step": 125, "valid_targets_mean": 4578.9, "valid_targets_min": 1563 }, { "epoch": 0.48417132216014896, "grad_norm": 0.3245155267672114, "learning_rate": 3.8222222222222226e-05, "loss": 0.4124, "loss_nan_ranks": 0, "loss_rank_avg": 0.19848603010177612, "step": 130, "valid_targets_mean": 4194.0, "valid_targets_min": 1500 }, { "epoch": 0.5027932960893855, "grad_norm": 0.28543573898331964, "learning_rate": 3.970370370370371e-05, "loss": 0.4016, "loss_nan_ranks": 0, "loss_rank_avg": 0.23786954581737518, "step": 135, "valid_targets_mean": 5342.8, "valid_targets_min": 1610 }, { "epoch": 0.521415270018622, "grad_norm": 0.28802986563270455, "learning_rate": 3.999892143807746e-05, "loss": 0.401, "loss_nan_ranks": 0, "loss_rank_avg": 0.20227207243442535, "step": 140, "valid_targets_mean": 4513.6, "valid_targets_min": 1404 }, { "epoch": 0.5400372439478585, "grad_norm": 0.30386893239084545, "learning_rate": 3.9994539979639836e-05, "loss": 0.405, "loss_nan_ranks": 0, "loss_rank_avg": 0.18734104931354523, "step": 145, "valid_targets_mean": 4314.5, "valid_targets_min": 2202 }, { "epoch": 0.5586592178770949, "grad_norm": 0.3383103695230139, "learning_rate": 3.99867889523818e-05, "loss": 0.4053, "loss_nan_ranks": 0, "loss_rank_avg": 0.18549704551696777, "step": 150, "valid_targets_mean": 4813.7, "valid_targets_min": 2071 }, { "epoch": 0.5772811918063314, "grad_norm": 0.2904479068591015, "learning_rate": 3.997566966254095e-05, "loss": 0.3914, "loss_nan_ranks": 0, "loss_rank_avg": 0.1885179728269577, "step": 155, "valid_targets_mean": 4968.1, "valid_targets_min": 2122 }, { "epoch": 0.595903165735568, "grad_norm": 0.25874316987433926, "learning_rate": 3.996118398398948e-05, "loss": 0.3708, "loss_nan_ranks": 0, "loss_rank_avg": 0.18153317272663116, "step": 160, "valid_targets_mean": 4805.8, "valid_targets_min": 2169 }, { "epoch": 0.6145251396648045, "grad_norm": 0.24000968016526364, "learning_rate": 3.9943334357918374e-05, "loss": 0.3737, "loss_nan_ranks": 0, "loss_rank_avg": 0.17824988067150116, "step": 165, "valid_targets_mean": 5026.0, "valid_targets_min": 2262 }, { "epoch": 0.633147113594041, "grad_norm": 0.2622565490667829, "learning_rate": 3.992212379242601e-05, "loss": 0.365, "loss_nan_ranks": 0, "loss_rank_avg": 0.1855766922235489, "step": 170, "valid_targets_mean": 5019.6, "valid_targets_min": 1638 }, { "epoch": 0.6517690875232774, "grad_norm": 0.2813347193628216, "learning_rate": 3.989755586201125e-05, "loss": 0.3753, "loss_nan_ranks": 0, "loss_rank_avg": 0.21819989383220673, "step": 175, "valid_targets_mean": 5755.8, "valid_targets_min": 1629 }, { "epoch": 0.6703910614525139, "grad_norm": 0.35070472645512296, "learning_rate": 3.9869634706971e-05, "loss": 0.3622, "loss_nan_ranks": 0, "loss_rank_avg": 0.17688250541687012, "step": 180, "valid_targets_mean": 4580.4, "valid_targets_min": 1779 }, { "epoch": 0.6890130353817505, "grad_norm": 0.2455164839195718, "learning_rate": 3.983836503270254e-05, "loss": 0.3633, "loss_nan_ranks": 0, "loss_rank_avg": 0.16190500557422638, "step": 185, "valid_targets_mean": 4942.4, "valid_targets_min": 1684 }, { "epoch": 0.707635009310987, "grad_norm": 0.2517618296962006, "learning_rate": 3.9803752108910435e-05, "loss": 0.3062, "loss_nan_ranks": 0, "loss_rank_avg": 0.1465204805135727, "step": 190, "valid_targets_mean": 5849.2, "valid_targets_min": 1692 }, { "epoch": 0.7262569832402235, "grad_norm": 0.21962381487947016, "learning_rate": 3.9765801768718606e-05, "loss": 0.2655, "loss_nan_ranks": 0, "loss_rank_avg": 0.1271316260099411, "step": 195, "valid_targets_mean": 5372.0, "valid_targets_min": 1636 }, { "epoch": 0.74487895716946, "grad_norm": 0.22504863778674275, "learning_rate": 3.972452040768718e-05, "loss": 0.2684, "loss_nan_ranks": 0, "loss_rank_avg": 0.1281280517578125, "step": 200, "valid_targets_mean": 5450.6, "valid_targets_min": 1560 }, { "epoch": 0.7635009310986964, "grad_norm": 0.20034946812157745, "learning_rate": 3.9679914982734765e-05, "loss": 0.2578, "loss_nan_ranks": 0, "loss_rank_avg": 0.13299940526485443, "step": 205, "valid_targets_mean": 5811.3, "valid_targets_min": 3371 }, { "epoch": 0.7821229050279329, "grad_norm": 0.21586328587416592, "learning_rate": 3.9631993010966e-05, "loss": 0.2544, "loss_nan_ranks": 0, "loss_rank_avg": 0.12790830433368683, "step": 210, "valid_targets_mean": 5681.6, "valid_targets_min": 2255 }, { "epoch": 0.8007448789571695, "grad_norm": 0.19845096912413016, "learning_rate": 3.958076256840472e-05, "loss": 0.254, "loss_nan_ranks": 0, "loss_rank_avg": 0.13116233050823212, "step": 215, "valid_targets_mean": 5787.3, "valid_targets_min": 1115 }, { "epoch": 0.819366852886406, "grad_norm": 0.20961259827538878, "learning_rate": 3.952623228863301e-05, "loss": 0.2465, "loss_nan_ranks": 0, "loss_rank_avg": 0.13585473597049713, "step": 220, "valid_targets_mean": 5298.0, "valid_targets_min": 2592 }, { "epoch": 0.8379888268156425, "grad_norm": 0.20265238224585524, "learning_rate": 3.946841136133619e-05, "loss": 0.2481, "loss_nan_ranks": 0, "loss_rank_avg": 0.11897260695695877, "step": 225, "valid_targets_mean": 5558.0, "valid_targets_min": 1887 }, { "epoch": 0.8566108007448789, "grad_norm": 0.2308437781547286, "learning_rate": 3.940730953075414e-05, "loss": 0.2521, "loss_nan_ranks": 0, "loss_rank_avg": 0.11843226104974747, "step": 230, "valid_targets_mean": 5469.3, "valid_targets_min": 1983 }, { "epoch": 0.8752327746741154, "grad_norm": 0.1901545351670762, "learning_rate": 3.934293709403915e-05, "loss": 0.2419, "loss_nan_ranks": 0, "loss_rank_avg": 0.12269283086061478, "step": 235, "valid_targets_mean": 6020.0, "valid_targets_min": 2484 }, { "epoch": 0.8938547486033519, "grad_norm": 0.20390172292403214, "learning_rate": 3.9275304899520595e-05, "loss": 0.2353, "loss_nan_ranks": 0, "loss_rank_avg": 0.12945251166820526, "step": 240, "valid_targets_mean": 5460.2, "valid_targets_min": 2296 }, { "epoch": 0.9124767225325885, "grad_norm": 0.2018105761072753, "learning_rate": 3.920442434487676e-05, "loss": 0.2391, "loss_nan_ranks": 0, "loss_rank_avg": 0.11418928951025009, "step": 245, "valid_targets_mean": 5293.8, "valid_targets_min": 2419 }, { "epoch": 0.931098696461825, "grad_norm": 0.2113162885633886, "learning_rate": 3.913030737521401e-05, "loss": 0.2391, "loss_nan_ranks": 0, "loss_rank_avg": 0.13120897114276886, "step": 250, "valid_targets_mean": 6017.7, "valid_targets_min": 1306 }, { "epoch": 0.9497206703910615, "grad_norm": 0.20370322196516943, "learning_rate": 3.905296648105379e-05, "loss": 0.2295, "loss_nan_ranks": 0, "loss_rank_avg": 0.11392738670110703, "step": 255, "valid_targets_mean": 5287.6, "valid_targets_min": 2147 }, { "epoch": 0.9683426443202979, "grad_norm": 0.2091507978308365, "learning_rate": 3.8972414696227606e-05, "loss": 0.2339, "loss_nan_ranks": 0, "loss_rank_avg": 0.11450576782226562, "step": 260, "valid_targets_mean": 6001.4, "valid_targets_min": 3237 }, { "epoch": 0.9869646182495344, "grad_norm": 0.19044462299382905, "learning_rate": 3.888866559568056e-05, "loss": 0.2371, "loss_nan_ranks": 0, "loss_rank_avg": 0.12079953402280807, "step": 265, "valid_targets_mean": 5754.3, "valid_targets_min": 2672 }, { "epoch": 1.0037243947858474, "grad_norm": 0.23254152478175197, "learning_rate": 3.880173329318363e-05, "loss": 0.2601, "loss_nan_ranks": 0, "loss_rank_avg": 0.1569366604089737, "step": 270, "valid_targets_mean": 7176.8, "valid_targets_min": 2475 }, { "epoch": 1.0223463687150838, "grad_norm": 0.23126772971993143, "learning_rate": 3.871163243895514e-05, "loss": 0.3195, "loss_nan_ranks": 0, "loss_rank_avg": 0.16103023290634155, "step": 275, "valid_targets_mean": 7152.2, "valid_targets_min": 1817 }, { "epoch": 1.0409683426443204, "grad_norm": 0.20910745368528044, "learning_rate": 3.861837821719184e-05, "loss": 0.3114, "loss_nan_ranks": 0, "loss_rank_avg": 0.1427866518497467, "step": 280, "valid_targets_mean": 6413.1, "valid_targets_min": 2694 }, { "epoch": 1.0595903165735567, "grad_norm": 0.21857108825645447, "learning_rate": 3.852198634351002e-05, "loss": 0.3012, "loss_nan_ranks": 0, "loss_rank_avg": 0.15423469245433807, "step": 285, "valid_targets_mean": 6742.4, "valid_targets_min": 2529 }, { "epoch": 1.0782122905027933, "grad_norm": 0.24290514151500678, "learning_rate": 3.8422473062297e-05, "loss": 0.3037, "loss_nan_ranks": 0, "loss_rank_avg": 0.13684120774269104, "step": 290, "valid_targets_mean": 6891.3, "valid_targets_min": 2165 }, { "epoch": 1.0968342644320297, "grad_norm": 0.2381895141979772, "learning_rate": 3.831985514397363e-05, "loss": 0.2963, "loss_nan_ranks": 0, "loss_rank_avg": 0.1476469486951828, "step": 295, "valid_targets_mean": 6714.7, "valid_targets_min": 1734 }, { "epoch": 1.1154562383612663, "grad_norm": 0.2204640407674478, "learning_rate": 3.8214149882167973e-05, "loss": 0.2813, "loss_nan_ranks": 0, "loss_rank_avg": 0.14708954095840454, "step": 300, "valid_targets_mean": 7105.4, "valid_targets_min": 1845 }, { "epoch": 1.1340782122905029, "grad_norm": 0.20271638221684102, "learning_rate": 3.810537509080096e-05, "loss": 0.2861, "loss_nan_ranks": 0, "loss_rank_avg": 0.1319805383682251, "step": 305, "valid_targets_mean": 6475.6, "valid_targets_min": 2297 }, { "epoch": 1.1527001862197392, "grad_norm": 0.19882669398564898, "learning_rate": 3.79935491010843e-05, "loss": 0.2812, "loss_nan_ranks": 0, "loss_rank_avg": 0.14669251441955566, "step": 310, "valid_targets_mean": 6744.1, "valid_targets_min": 2793 }, { "epoch": 1.1713221601489758, "grad_norm": 0.19672205460405268, "learning_rate": 3.787869075843124e-05, "loss": 0.2742, "loss_nan_ranks": 0, "loss_rank_avg": 0.1271461695432663, "step": 315, "valid_targets_mean": 6725.4, "valid_targets_min": 2520 }, { "epoch": 1.1899441340782122, "grad_norm": 0.21185057390384063, "learning_rate": 3.77608194192806e-05, "loss": 0.2768, "loss_nan_ranks": 0, "loss_rank_avg": 0.1285659223794937, "step": 320, "valid_targets_mean": 6286.6, "valid_targets_min": 2643 }, { "epoch": 1.2085661080074488, "grad_norm": 0.19816569561566144, "learning_rate": 3.76399549478348e-05, "loss": 0.278, "loss_nan_ranks": 0, "loss_rank_avg": 0.13813738524913788, "step": 325, "valid_targets_mean": 6647.1, "valid_targets_min": 2218 }, { "epoch": 1.2271880819366854, "grad_norm": 0.2196810339151432, "learning_rate": 3.75161177127122e-05, "loss": 0.2762, "loss_nan_ranks": 0, "loss_rank_avg": 0.14068274199962616, "step": 330, "valid_targets_mean": 6991.6, "valid_targets_min": 3271 }, { "epoch": 1.2458100558659218, "grad_norm": 0.3172944180604243, "learning_rate": 3.7389328583514554e-05, "loss": 0.3238, "loss_nan_ranks": 0, "loss_rank_avg": 0.2535311281681061, "step": 335, "valid_targets_mean": 5593.6, "valid_targets_min": 1902 }, { "epoch": 1.2644320297951583, "grad_norm": 0.6100509748963104, "learning_rate": 3.725960892730991e-05, "loss": 0.3686, "loss_nan_ranks": 0, "loss_rank_avg": 0.18261736631393433, "step": 340, "valid_targets_mean": 4560.6, "valid_targets_min": 1726 }, { "epoch": 1.2830540037243947, "grad_norm": 0.4772667013712657, "learning_rate": 3.712698060503178e-05, "loss": 0.3539, "loss_nan_ranks": 0, "loss_rank_avg": 0.18201421201229095, "step": 345, "valid_targets_mean": 4523.4, "valid_targets_min": 1801 }, { "epoch": 1.3016759776536313, "grad_norm": 0.273306718239659, "learning_rate": 3.699146596779501e-05, "loss": 0.3495, "loss_nan_ranks": 0, "loss_rank_avg": 0.16353599727153778, "step": 350, "valid_targets_mean": 4178.9, "valid_targets_min": 1509 }, { "epoch": 1.3202979515828677, "grad_norm": 0.24477989967733027, "learning_rate": 3.6853087853129076e-05, "loss": 0.3471, "loss_nan_ranks": 0, "loss_rank_avg": 0.1676838994026184, "step": 355, "valid_targets_mean": 4510.9, "valid_targets_min": 1718 }, { "epoch": 1.3389199255121043, "grad_norm": 0.2650679861397239, "learning_rate": 3.6711869581129436e-05, "loss": 0.3419, "loss_nan_ranks": 0, "loss_rank_avg": 0.1828334778547287, "step": 360, "valid_targets_mean": 5023.4, "valid_targets_min": 1480 }, { "epoch": 1.3575418994413408, "grad_norm": 0.25430947626656825, "learning_rate": 3.6567834950527463e-05, "loss": 0.3288, "loss_nan_ranks": 0, "loss_rank_avg": 0.14467652142047882, "step": 365, "valid_targets_mean": 4319.0, "valid_targets_min": 1555 }, { "epoch": 1.3761638733705772, "grad_norm": 0.28579601865971616, "learning_rate": 3.6421008234679834e-05, "loss": 0.3391, "loss_nan_ranks": 0, "loss_rank_avg": 0.2052292674779892, "step": 370, "valid_targets_mean": 5234.1, "valid_targets_min": 1586 }, { "epoch": 1.3947858472998138, "grad_norm": 0.33454042737790046, "learning_rate": 3.627141417747783e-05, "loss": 0.3213, "loss_nan_ranks": 0, "loss_rank_avg": 0.16307473182678223, "step": 375, "valid_targets_mean": 4537.7, "valid_targets_min": 1861 }, { "epoch": 1.4134078212290504, "grad_norm": 0.31924227832532165, "learning_rate": 3.611907798917743e-05, "loss": 0.3218, "loss_nan_ranks": 0, "loss_rank_avg": 0.17609496414661407, "step": 380, "valid_targets_mean": 4638.2, "valid_targets_min": 1752 }, { "epoch": 1.4320297951582868, "grad_norm": 0.3108323026088264, "learning_rate": 3.596402534215074e-05, "loss": 0.3201, "loss_nan_ranks": 0, "loss_rank_avg": 0.1662149280309677, "step": 385, "valid_targets_mean": 3677.5, "valid_targets_min": 1683 }, { "epoch": 1.4506517690875234, "grad_norm": 0.3400629224739461, "learning_rate": 3.580628236655955e-05, "loss": 0.3241, "loss_nan_ranks": 0, "loss_rank_avg": 0.1697298288345337, "step": 390, "valid_targets_mean": 4185.1, "valid_targets_min": 1365 }, { "epoch": 1.4692737430167597, "grad_norm": 0.2692076563805843, "learning_rate": 3.564587564595182e-05, "loss": 0.3105, "loss_nan_ranks": 0, "loss_rank_avg": 0.14816735684871674, "step": 395, "valid_targets_mean": 4103.6, "valid_targets_min": 1774 }, { "epoch": 1.4878957169459963, "grad_norm": 0.2938902558984829, "learning_rate": 3.5482832212781655e-05, "loss": 0.3195, "loss_nan_ranks": 0, "loss_rank_avg": 0.14214275777339935, "step": 400, "valid_targets_mean": 4546.2, "valid_targets_min": 1444 }, { "epoch": 1.5065176908752327, "grad_norm": 0.2779669217184944, "learning_rate": 3.5317179543853676e-05, "loss": 0.3116, "loss_nan_ranks": 0, "loss_rank_avg": 0.17869967222213745, "step": 405, "valid_targets_mean": 4563.6, "valid_targets_min": 1730 }, { "epoch": 1.5251396648044693, "grad_norm": 0.3519340618992221, "learning_rate": 3.514894555569255e-05, "loss": 0.3168, "loss_nan_ranks": 0, "loss_rank_avg": 0.1719929575920105, "step": 410, "valid_targets_mean": 5359.0, "valid_targets_min": 1927 }, { "epoch": 1.5437616387337059, "grad_norm": 0.2895011222215137, "learning_rate": 3.497815859983831e-05, "loss": 0.3164, "loss_nan_ranks": 0, "loss_rank_avg": 0.16711927950382233, "step": 415, "valid_targets_mean": 5427.6, "valid_targets_min": 1828 }, { "epoch": 1.5623836126629422, "grad_norm": 0.30331945108732455, "learning_rate": 3.4804847458068504e-05, "loss": 0.3082, "loss_nan_ranks": 0, "loss_rank_avg": 0.1559574156999588, "step": 420, "valid_targets_mean": 4616.0, "valid_targets_min": 1635 }, { "epoch": 1.5810055865921788, "grad_norm": 0.24000502620110592, "learning_rate": 3.462904133754767e-05, "loss": 0.3025, "loss_nan_ranks": 0, "loss_rank_avg": 0.13982824981212616, "step": 425, "valid_targets_mean": 5337.4, "valid_targets_min": 2186 }, { "epoch": 1.5996275605214154, "grad_norm": 0.26300498381054566, "learning_rate": 3.445076986590531e-05, "loss": 0.2898, "loss_nan_ranks": 0, "loss_rank_avg": 0.15317204594612122, "step": 430, "valid_targets_mean": 5250.1, "valid_targets_min": 2190 }, { "epoch": 1.6182495344506518, "grad_norm": 0.2445293076857602, "learning_rate": 3.427006308624282e-05, "loss": 0.2912, "loss_nan_ranks": 0, "loss_rank_avg": 0.13379313051700592, "step": 435, "valid_targets_mean": 5520.8, "valid_targets_min": 1685 }, { "epoch": 1.6368715083798882, "grad_norm": 0.291611256050332, "learning_rate": 3.408695145207058e-05, "loss": 0.2914, "loss_nan_ranks": 0, "loss_rank_avg": 0.14052408933639526, "step": 440, "valid_targets_mean": 4284.8, "valid_targets_min": 1917 }, { "epoch": 1.6554934823091247, "grad_norm": 0.26344185927865893, "learning_rate": 3.390146582217572e-05, "loss": 0.3026, "loss_nan_ranks": 0, "loss_rank_avg": 0.14575421810150146, "step": 445, "valid_targets_mean": 4883.9, "valid_targets_min": 2201 }, { "epoch": 1.6741154562383613, "grad_norm": 0.27491339530871106, "learning_rate": 3.3713637455421694e-05, "loss": 0.296, "loss_nan_ranks": 0, "loss_rank_avg": 0.13534614443778992, "step": 450, "valid_targets_mean": 4947.1, "valid_targets_min": 2005 }, { "epoch": 1.6927374301675977, "grad_norm": 0.23557643948797238, "learning_rate": 3.352349800548039e-05, "loss": 0.2856, "loss_nan_ranks": 0, "loss_rank_avg": 0.12307044118642807, "step": 455, "valid_targets_mean": 4546.2, "valid_targets_min": 1287 }, { "epoch": 1.7113594040968343, "grad_norm": 0.23755658794564719, "learning_rate": 3.333107951549773e-05, "loss": 0.2116, "loss_nan_ranks": 0, "loss_rank_avg": 0.09926241636276245, "step": 460, "valid_targets_mean": 5677.3, "valid_targets_min": 2077 }, { "epoch": 1.7299813780260709, "grad_norm": 0.24052245151458515, "learning_rate": 3.313641441269361e-05, "loss": 0.194, "loss_nan_ranks": 0, "loss_rank_avg": 0.09152091294527054, "step": 465, "valid_targets_mean": 5346.2, "valid_targets_min": 1922 }, { "epoch": 1.7486033519553073, "grad_norm": 0.20262692112304034, "learning_rate": 3.2939535502897075e-05, "loss": 0.1991, "loss_nan_ranks": 0, "loss_rank_avg": 0.10032620280981064, "step": 470, "valid_targets_mean": 5635.6, "valid_targets_min": 2843 }, { "epoch": 1.7672253258845436, "grad_norm": 0.2082760355908499, "learning_rate": 3.27404759650178e-05, "loss": 0.1914, "loss_nan_ranks": 0, "loss_rank_avg": 0.10044640302658081, "step": 475, "valid_targets_mean": 5877.1, "valid_targets_min": 1542 }, { "epoch": 1.7858472998137802, "grad_norm": 0.18839411974053805, "learning_rate": 3.253926934545459e-05, "loss": 0.1902, "loss_nan_ranks": 0, "loss_rank_avg": 0.09364160150289536, "step": 480, "valid_targets_mean": 5282.3, "valid_targets_min": 1863 }, { "epoch": 1.8044692737430168, "grad_norm": 0.2078067971535603, "learning_rate": 3.233594955244202e-05, "loss": 0.1863, "loss_nan_ranks": 0, "loss_rank_avg": 0.08583924919366837, "step": 485, "valid_targets_mean": 5719.1, "valid_targets_min": 3505 }, { "epoch": 1.8230912476722532, "grad_norm": 0.2067606733374613, "learning_rate": 3.213055085033607e-05, "loss": 0.1869, "loss_nan_ranks": 0, "loss_rank_avg": 0.09971226006746292, "step": 490, "valid_targets_mean": 5770.4, "valid_targets_min": 3200 }, { "epoch": 1.8417132216014898, "grad_norm": 0.22001102128513125, "learning_rate": 3.192310785383967e-05, "loss": 0.1879, "loss_nan_ranks": 0, "loss_rank_avg": 0.09830441325902939, "step": 495, "valid_targets_mean": 5594.3, "valid_targets_min": 2835 }, { "epoch": 1.8603351955307263, "grad_norm": 0.20769566942714865, "learning_rate": 3.1713655522169396e-05, "loss": 0.1922, "loss_nan_ranks": 0, "loss_rank_avg": 0.09986946731805801, "step": 500, "valid_targets_mean": 5947.1, "valid_targets_min": 2162 }, { "epoch": 1.8789571694599627, "grad_norm": 0.20824203625139057, "learning_rate": 3.15022291531639e-05, "loss": 0.1777, "loss_nan_ranks": 0, "loss_rank_avg": 0.09173315018415451, "step": 505, "valid_targets_mean": 5633.9, "valid_targets_min": 962 }, { "epoch": 1.8975791433891993, "grad_norm": 0.1895160390922454, "learning_rate": 3.128886437733539e-05, "loss": 0.1804, "loss_nan_ranks": 0, "loss_rank_avg": 0.09285343438386917, "step": 510, "valid_targets_mean": 5799.0, "valid_targets_min": 1817 }, { "epoch": 1.916201117318436, "grad_norm": 0.2073882558654316, "learning_rate": 3.1073597151865e-05, "loss": 0.1807, "loss_nan_ranks": 0, "loss_rank_avg": 0.08280744403600693, "step": 515, "valid_targets_mean": 5286.2, "valid_targets_min": 1184 }, { "epoch": 1.9348230912476723, "grad_norm": 0.19321114943812392, "learning_rate": 3.085646375454317e-05, "loss": 0.184, "loss_nan_ranks": 0, "loss_rank_avg": 0.09179046005010605, "step": 520, "valid_targets_mean": 5621.4, "valid_targets_min": 1620 }, { "epoch": 1.9534450651769086, "grad_norm": 0.20782347219627428, "learning_rate": 3.0637500777655886e-05, "loss": 0.1757, "loss_nan_ranks": 0, "loss_rank_avg": 0.09478215128183365, "step": 525, "valid_targets_mean": 5574.6, "valid_targets_min": 1495 }, { "epoch": 1.9720670391061452, "grad_norm": 0.1995258445427204, "learning_rate": 3.0416745121818062e-05, "loss": 0.1826, "loss_nan_ranks": 0, "loss_rank_avg": 0.10429661720991135, "step": 530, "valid_targets_mean": 5663.5, "valid_targets_min": 3093 }, { "epoch": 1.9906890130353818, "grad_norm": 0.19212440943218573, "learning_rate": 3.019423398975481e-05, "loss": 0.1796, "loss_nan_ranks": 0, "loss_rank_avg": 0.0856226310133934, "step": 535, "valid_targets_mean": 5346.0, "valid_targets_min": 1972 }, { "epoch": 2.007448789571695, "grad_norm": 0.2640487831217621, "learning_rate": 2.9970004880031918e-05, "loss": 0.2138, "loss_nan_ranks": 0, "loss_rank_avg": 0.1243792250752449, "step": 540, "valid_targets_mean": 6556.2, "valid_targets_min": 1732 }, { "epoch": 2.026070763500931, "grad_norm": 0.22297988123685433, "learning_rate": 2.974409558073641e-05, "loss": 0.2688, "loss_nan_ranks": 0, "loss_rank_avg": 0.1313825100660324, "step": 545, "valid_targets_mean": 6745.2, "valid_targets_min": 2595 }, { "epoch": 2.0446927374301676, "grad_norm": 0.20812569653815188, "learning_rate": 2.9516544163108335e-05, "loss": 0.2521, "loss_nan_ranks": 0, "loss_rank_avg": 0.1266535073518753, "step": 550, "valid_targets_mean": 6414.2, "valid_targets_min": 1804 }, { "epoch": 2.063314711359404, "grad_norm": 0.21892809836030894, "learning_rate": 2.928738897512481e-05, "loss": 0.2519, "loss_nan_ranks": 0, "loss_rank_avg": 0.1333746314048767, "step": 555, "valid_targets_mean": 7147.8, "valid_targets_min": 2654 }, { "epoch": 2.0819366852886407, "grad_norm": 0.2105458445254568, "learning_rate": 2.90566686350375e-05, "loss": 0.257, "loss_nan_ranks": 0, "loss_rank_avg": 0.12962865829467773, "step": 560, "valid_targets_mean": 7391.1, "valid_targets_min": 2506 }, { "epoch": 2.100558659217877, "grad_norm": 0.23229526744342008, "learning_rate": 2.8824422024864427e-05, "loss": 0.2446, "loss_nan_ranks": 0, "loss_rank_avg": 0.0939510241150856, "step": 565, "valid_targets_mean": 5653.9, "valid_targets_min": 2050 }, { "epoch": 2.1191806331471135, "grad_norm": 0.21648288691431336, "learning_rate": 2.859068828383747e-05, "loss": 0.2375, "loss_nan_ranks": 0, "loss_rank_avg": 0.11028096079826355, "step": 570, "valid_targets_mean": 6577.6, "valid_targets_min": 2180 }, { "epoch": 2.1378026070763503, "grad_norm": 0.24028170421075354, "learning_rate": 2.8355506801806392e-05, "loss": 0.2403, "loss_nan_ranks": 0, "loss_rank_avg": 0.1300063580274582, "step": 575, "valid_targets_mean": 7243.6, "valid_targets_min": 1462 }, { "epoch": 2.1564245810055866, "grad_norm": 0.21050370735020552, "learning_rate": 2.8118917212600715e-05, "loss": 0.229, "loss_nan_ranks": 0, "loss_rank_avg": 0.09919723123311996, "step": 580, "valid_targets_mean": 5979.6, "valid_targets_min": 2261 }, { "epoch": 2.175046554934823, "grad_norm": 0.2074491330303158, "learning_rate": 2.7880959387350458e-05, "loss": 0.2248, "loss_nan_ranks": 0, "loss_rank_avg": 0.10763826221227646, "step": 585, "valid_targets_mean": 6526.0, "valid_targets_min": 2889 }, { "epoch": 2.1936685288640594, "grad_norm": 0.23033689702375226, "learning_rate": 2.7641673427766847e-05, "loss": 0.2338, "loss_nan_ranks": 0, "loss_rank_avg": 0.11596024036407471, "step": 590, "valid_targets_mean": 6637.4, "valid_targets_min": 2198 }, { "epoch": 2.212290502793296, "grad_norm": 0.32105132805524217, "learning_rate": 2.740109965938423e-05, "loss": 0.2302, "loss_nan_ranks": 0, "loss_rank_avg": 0.11476022750139236, "step": 595, "valid_targets_mean": 6630.0, "valid_targets_min": 2447 }, { "epoch": 2.2309124767225326, "grad_norm": 0.22409633977119747, "learning_rate": 2.715927862476421e-05, "loss": 0.2317, "loss_nan_ranks": 0, "loss_rank_avg": 0.10554740577936172, "step": 600, "valid_targets_mean": 6784.3, "valid_targets_min": 2018 }, { "epoch": 2.249534450651769, "grad_norm": 0.2699928714484659, "learning_rate": 2.6916251076663252e-05, "loss": 0.274, "loss_nan_ranks": 0, "loss_rank_avg": 0.1415012925863266, "step": 605, "valid_targets_mean": 4402.8, "valid_targets_min": 1761 }, { "epoch": 2.2681564245810057, "grad_norm": 0.3721989864420147, "learning_rate": 2.667205797116484e-05, "loss": 0.2939, "loss_nan_ranks": 0, "loss_rank_avg": 0.13999386131763458, "step": 610, "valid_targets_mean": 4164.7, "valid_targets_min": 1572 }, { "epoch": 2.286778398510242, "grad_norm": 0.3223866147310536, "learning_rate": 2.642674046077737e-05, "loss": 0.2744, "loss_nan_ranks": 0, "loss_rank_avg": 0.12863564491271973, "step": 615, "valid_targets_mean": 4361.9, "valid_targets_min": 1692 }, { "epoch": 2.3054003724394785, "grad_norm": 0.31856607726998415, "learning_rate": 2.618033988749895e-05, "loss": 0.2649, "loss_nan_ranks": 0, "loss_rank_avg": 0.13214845955371857, "step": 620, "valid_targets_mean": 4420.2, "valid_targets_min": 1890 }, { "epoch": 2.3240223463687153, "grad_norm": 0.32666505138818375, "learning_rate": 2.5932897775850276e-05, "loss": 0.2591, "loss_nan_ranks": 0, "loss_rank_avg": 0.12275639921426773, "step": 625, "valid_targets_mean": 4191.0, "valid_targets_min": 1736 }, { "epoch": 2.3426443202979517, "grad_norm": 0.35120524548526266, "learning_rate": 2.568445582587672e-05, "loss": 0.2585, "loss_nan_ranks": 0, "loss_rank_avg": 0.1437913030385971, "step": 630, "valid_targets_mean": 5161.3, "valid_targets_min": 1889 }, { "epoch": 2.361266294227188, "grad_norm": 0.31946963446152915, "learning_rate": 2.5435055906120837e-05, "loss": 0.2389, "loss_nan_ranks": 0, "loss_rank_avg": 0.11732056736946106, "step": 635, "valid_targets_mean": 4429.2, "valid_targets_min": 1278 }, { "epoch": 2.3798882681564244, "grad_norm": 0.3289667519465328, "learning_rate": 2.5184740046566537e-05, "loss": 0.2651, "loss_nan_ranks": 0, "loss_rank_avg": 0.12818960845470428, "step": 640, "valid_targets_mean": 4688.0, "valid_targets_min": 1434 }, { "epoch": 2.398510242085661, "grad_norm": 0.33406726170658674, "learning_rate": 2.4933550431555973e-05, "loss": 0.2483, "loss_nan_ranks": 0, "loss_rank_avg": 0.14254990220069885, "step": 645, "valid_targets_mean": 4809.8, "valid_targets_min": 1383 }, { "epoch": 2.4171322160148976, "grad_norm": 0.3212253397329, "learning_rate": 2.468152939268044e-05, "loss": 0.2535, "loss_nan_ranks": 0, "loss_rank_avg": 0.12575781345367432, "step": 650, "valid_targets_mean": 4856.3, "valid_targets_min": 1622 }, { "epoch": 2.435754189944134, "grad_norm": 0.4193177434942788, "learning_rate": 2.4428719401646494e-05, "loss": 0.2432, "loss_nan_ranks": 0, "loss_rank_avg": 0.13185112178325653, "step": 655, "valid_targets_mean": 4368.7, "valid_targets_min": 1635 }, { "epoch": 2.4543761638733708, "grad_norm": 0.3218928559582912, "learning_rate": 2.4175163063118416e-05, "loss": 0.2479, "loss_nan_ranks": 0, "loss_rank_avg": 0.11782893538475037, "step": 660, "valid_targets_mean": 4609.1, "valid_targets_min": 1411 }, { "epoch": 2.472998137802607, "grad_norm": 0.39047332217518843, "learning_rate": 2.392090310753829e-05, "loss": 0.2326, "loss_nan_ranks": 0, "loss_rank_avg": 0.11099883168935776, "step": 665, "valid_targets_mean": 3889.4, "valid_targets_min": 1389 }, { "epoch": 2.4916201117318435, "grad_norm": 0.35689901255313794, "learning_rate": 2.366598238392487e-05, "loss": 0.2395, "loss_nan_ranks": 0, "loss_rank_avg": 0.11990585923194885, "step": 670, "valid_targets_mean": 4407.1, "valid_targets_min": 1660 }, { "epoch": 2.51024208566108, "grad_norm": 0.3515897774313373, "learning_rate": 2.341044385265248e-05, "loss": 0.2448, "loss_nan_ranks": 0, "loss_rank_avg": 0.12714000046253204, "step": 675, "valid_targets_mean": 4711.8, "valid_targets_min": 1891 }, { "epoch": 2.5288640595903167, "grad_norm": 0.32159496474421606, "learning_rate": 2.315433057821113e-05, "loss": 0.2418, "loss_nan_ranks": 0, "loss_rank_avg": 0.1311168670654297, "step": 680, "valid_targets_mean": 5252.6, "valid_targets_min": 2010 }, { "epoch": 2.547486033519553, "grad_norm": 0.2942349994028514, "learning_rate": 2.289768572194913e-05, "loss": 0.265, "loss_nan_ranks": 0, "loss_rank_avg": 0.14049138128757477, "step": 685, "valid_targets_mean": 5910.4, "valid_targets_min": 2167 }, { "epoch": 2.5661080074487894, "grad_norm": 0.2829531013214266, "learning_rate": 2.26405525347993e-05, "loss": 0.2454, "loss_nan_ranks": 0, "loss_rank_avg": 0.10512509196996689, "step": 690, "valid_targets_mean": 4626.5, "valid_targets_min": 2055 }, { "epoch": 2.5847299813780262, "grad_norm": 0.29066488817804154, "learning_rate": 2.238297434999016e-05, "loss": 0.2397, "loss_nan_ranks": 0, "loss_rank_avg": 0.12394720315933228, "step": 695, "valid_targets_mean": 5047.3, "valid_targets_min": 1900 }, { "epoch": 2.6033519553072626, "grad_norm": 0.31069669583595805, "learning_rate": 2.212499457574321e-05, "loss": 0.2313, "loss_nan_ranks": 0, "loss_rank_avg": 0.12280986458063126, "step": 700, "valid_targets_mean": 4993.0, "valid_targets_min": 1957 }, { "epoch": 2.621973929236499, "grad_norm": 0.2847297085592331, "learning_rate": 2.1866656687957607e-05, "loss": 0.2297, "loss_nan_ranks": 0, "loss_rank_avg": 0.12000254541635513, "step": 705, "valid_targets_mean": 4902.9, "valid_targets_min": 2061 }, { "epoch": 2.6405959031657353, "grad_norm": 0.2907401533736448, "learning_rate": 2.160800422288338e-05, "loss": 0.233, "loss_nan_ranks": 0, "loss_rank_avg": 0.1304769217967987, "step": 710, "valid_targets_mean": 5751.9, "valid_targets_min": 2160 }, { "epoch": 2.659217877094972, "grad_norm": 0.2859926818666239, "learning_rate": 2.134908076978452e-05, "loss": 0.2397, "loss_nan_ranks": 0, "loss_rank_avg": 0.11222833395004272, "step": 715, "valid_targets_mean": 5082.8, "valid_targets_min": 1988 }, { "epoch": 2.6778398510242085, "grad_norm": 0.27206168365644984, "learning_rate": 2.1089929963593126e-05, "loss": 0.2315, "loss_nan_ranks": 0, "loss_rank_avg": 0.101720429956913, "step": 720, "valid_targets_mean": 5274.8, "valid_targets_min": 1647 }, { "epoch": 2.6964618249534453, "grad_norm": 0.3043304137788501, "learning_rate": 2.0830595477555864e-05, "loss": 0.225, "loss_nan_ranks": 0, "loss_rank_avg": 0.07684291154146194, "step": 725, "valid_targets_mean": 5933.8, "valid_targets_min": 3347 }, { "epoch": 2.7150837988826817, "grad_norm": 0.2402351751603468, "learning_rate": 2.0571121015873924e-05, "loss": 0.1535, "loss_nan_ranks": 0, "loss_rank_avg": 0.07310352474451065, "step": 730, "valid_targets_mean": 5624.6, "valid_targets_min": 2049 }, { "epoch": 2.733705772811918, "grad_norm": 0.24128869777290585, "learning_rate": 2.031155030633784e-05, "loss": 0.1558, "loss_nan_ranks": 0, "loss_rank_avg": 0.07828167825937271, "step": 735, "valid_targets_mean": 5613.7, "valid_targets_min": 1873 }, { "epoch": 2.7523277467411544, "grad_norm": 0.2424343463905981, "learning_rate": 2.005192709295824e-05, "loss": 0.15, "loss_nan_ranks": 0, "loss_rank_avg": 0.07069131731987, "step": 740, "valid_targets_mean": 5804.0, "valid_targets_min": 2140 }, { "epoch": 2.770949720670391, "grad_norm": 0.23424931942920957, "learning_rate": 1.979229512859395e-05, "loss": 0.1465, "loss_nan_ranks": 0, "loss_rank_avg": 0.0725865438580513, "step": 745, "valid_targets_mean": 5391.5, "valid_targets_min": 2179 }, { "epoch": 2.7895716945996276, "grad_norm": 0.21095459317824178, "learning_rate": 1.953269816757853e-05, "loss": 0.141, "loss_nan_ranks": 0, "loss_rank_avg": 0.060710322111845016, "step": 750, "valid_targets_mean": 5356.8, "valid_targets_min": 2059 }, { "epoch": 2.808193668528864, "grad_norm": 0.20598792165943527, "learning_rate": 1.9273179958346646e-05, "loss": 0.1421, "loss_nan_ranks": 0, "loss_rank_avg": 0.07347074151039124, "step": 755, "valid_targets_mean": 5635.1, "valid_targets_min": 3283 }, { "epoch": 2.826815642458101, "grad_norm": 0.22337773162145105, "learning_rate": 1.9013784236061337e-05, "loss": 0.1466, "loss_nan_ranks": 0, "loss_rank_avg": 0.08201978355646133, "step": 760, "valid_targets_mean": 5853.5, "valid_targets_min": 2873 }, { "epoch": 2.845437616387337, "grad_norm": 0.2198077783372861, "learning_rate": 1.875455471524362e-05, "loss": 0.1435, "loss_nan_ranks": 0, "loss_rank_avg": 0.06954735517501831, "step": 765, "valid_targets_mean": 5576.1, "valid_targets_min": 2043 }, { "epoch": 2.8640595903165735, "grad_norm": 0.20508966703816744, "learning_rate": 1.8495535082405476e-05, "loss": 0.1461, "loss_nan_ranks": 0, "loss_rank_avg": 0.06442942470312119, "step": 770, "valid_targets_mean": 5982.4, "valid_targets_min": 2448 }, { "epoch": 2.88268156424581, "grad_norm": 0.23390173228904518, "learning_rate": 1.8236768988687665e-05, "loss": 0.1345, "loss_nan_ranks": 0, "loss_rank_avg": 0.06609448045492172, "step": 775, "valid_targets_mean": 5523.9, "valid_targets_min": 2423 }, { "epoch": 2.9013035381750467, "grad_norm": 0.2498082897395668, "learning_rate": 1.797830004250338e-05, "loss": 0.1395, "loss_nan_ranks": 0, "loss_rank_avg": 0.07419461756944656, "step": 780, "valid_targets_mean": 5512.2, "valid_targets_min": 1837 }, { "epoch": 2.919925512104283, "grad_norm": 0.23855742502441213, "learning_rate": 1.772017180218919e-05, "loss": 0.1342, "loss_nan_ranks": 0, "loss_rank_avg": 0.06143729388713837, "step": 785, "valid_targets_mean": 5664.5, "valid_targets_min": 2283 }, { "epoch": 2.9385474860335195, "grad_norm": 0.2311888603873581, "learning_rate": 1.746242776866441e-05, "loss": 0.1369, "loss_nan_ranks": 0, "loss_rank_avg": 0.06190266087651253, "step": 790, "valid_targets_mean": 5318.5, "valid_targets_min": 2009 }, { "epoch": 2.9571694599627563, "grad_norm": 0.2385347395394115, "learning_rate": 1.7205111378100097e-05, "loss": 0.1344, "loss_nan_ranks": 0, "loss_rank_avg": 0.0655934065580368, "step": 795, "valid_targets_mean": 5128.0, "valid_targets_min": 2322 }, { "epoch": 2.9757914338919926, "grad_norm": 0.22210726395021782, "learning_rate": 1.6948265994599042e-05, "loss": 0.1346, "loss_nan_ranks": 0, "loss_rank_avg": 0.06535261124372482, "step": 800, "valid_targets_mean": 5618.6, "valid_targets_min": 2370 }, { "epoch": 2.994413407821229, "grad_norm": 0.22478461073835102, "learning_rate": 1.669193490288781e-05, "loss": 0.1364, "loss_nan_ranks": 0, "loss_rank_avg": 0.07586240768432617, "step": 805, "valid_targets_mean": 5800.1, "valid_targets_min": 1139 }, { "epoch": 3.011173184357542, "grad_norm": 0.3923546745207127, "learning_rate": 1.6436161301022215e-05, "loss": 0.1839, "loss_nan_ranks": 0, "loss_rank_avg": 0.12482082843780518, "step": 810, "valid_targets_mean": 7708.5, "valid_targets_min": 2114 }, { "epoch": 3.0297951582867784, "grad_norm": 0.2769532328786158, "learning_rate": 1.618098829310744e-05, "loss": 0.2342, "loss_nan_ranks": 0, "loss_rank_avg": 0.11922207474708557, "step": 815, "valid_targets_mean": 7049.1, "valid_targets_min": 1582 }, { "epoch": 3.0484171322160147, "grad_norm": 0.2181540258492701, "learning_rate": 1.5926458882033876e-05, "loss": 0.2119, "loss_nan_ranks": 0, "loss_rank_avg": 0.11395502835512161, "step": 820, "valid_targets_mean": 6744.9, "valid_targets_min": 2415 }, { "epoch": 3.0670391061452515, "grad_norm": 0.2193962929759941, "learning_rate": 1.567261596223011e-05, "loss": 0.2119, "loss_nan_ranks": 0, "loss_rank_avg": 0.12162897735834122, "step": 825, "valid_targets_mean": 7223.4, "valid_targets_min": 1917 }, { "epoch": 3.085661080074488, "grad_norm": 0.20490169049334603, "learning_rate": 1.5419502312434177e-05, "loss": 0.2146, "loss_nan_ranks": 0, "loss_rank_avg": 0.10778558254241943, "step": 830, "valid_targets_mean": 6793.6, "valid_targets_min": 2699 }, { "epoch": 3.1042830540037243, "grad_norm": 0.2229350850738388, "learning_rate": 1.5167160588484287e-05, "loss": 0.2006, "loss_nan_ranks": 0, "loss_rank_avg": 0.08931157737970352, "step": 835, "valid_targets_mean": 6319.4, "valid_targets_min": 2331 }, { "epoch": 3.122905027932961, "grad_norm": 0.22327323724804218, "learning_rate": 1.4915633316130267e-05, "loss": 0.2012, "loss_nan_ranks": 0, "loss_rank_avg": 0.09623245149850845, "step": 840, "valid_targets_mean": 6464.3, "valid_targets_min": 2301 }, { "epoch": 3.1415270018621975, "grad_norm": 0.2268340495199551, "learning_rate": 1.4664962883866936e-05, "loss": 0.1997, "loss_nan_ranks": 0, "loss_rank_avg": 0.0771939679980278, "step": 845, "valid_targets_mean": 5905.9, "valid_targets_min": 1789 }, { "epoch": 3.160148975791434, "grad_norm": 0.2326664565154385, "learning_rate": 1.4415191535790605e-05, "loss": 0.1908, "loss_nan_ranks": 0, "loss_rank_avg": 0.09210383892059326, "step": 850, "valid_targets_mean": 6846.8, "valid_targets_min": 2291 }, { "epoch": 3.17877094972067, "grad_norm": 0.23061554940146764, "learning_rate": 1.4166361364479946e-05, "loss": 0.1819, "loss_nan_ranks": 0, "loss_rank_avg": 0.09809307008981705, "step": 855, "valid_targets_mean": 6919.4, "valid_targets_min": 1723 }, { "epoch": 3.197392923649907, "grad_norm": 0.24843559414613484, "learning_rate": 1.3918514303902307e-05, "loss": 0.1955, "loss_nan_ranks": 0, "loss_rank_avg": 0.10550827533006668, "step": 860, "valid_targets_mean": 7691.2, "valid_targets_min": 2510 }, { "epoch": 3.2160148975791434, "grad_norm": 0.22805182583679512, "learning_rate": 1.3671692122346843e-05, "loss": 0.1935, "loss_nan_ranks": 0, "loss_rank_avg": 0.09740543365478516, "step": 865, "valid_targets_mean": 6916.7, "valid_targets_min": 2234 }, { "epoch": 3.2346368715083798, "grad_norm": 0.22827913971204664, "learning_rate": 1.3425936415385557e-05, "loss": 0.1911, "loss_nan_ranks": 0, "loss_rank_avg": 0.10098141431808472, "step": 870, "valid_targets_mean": 6807.6, "valid_targets_min": 2726 }, { "epoch": 3.2532588454376166, "grad_norm": 0.31207084774804683, "learning_rate": 1.318128859886339e-05, "loss": 0.2223, "loss_nan_ranks": 0, "loss_rank_avg": 0.12020784616470337, "step": 875, "valid_targets_mean": 4461.2, "valid_targets_min": 1463 }, { "epoch": 3.271880819366853, "grad_norm": 0.3393354652534016, "learning_rate": 1.2937789901918671e-05, "loss": 0.2344, "loss_nan_ranks": 0, "loss_rank_avg": 0.1211516484618187, "step": 880, "valid_targets_mean": 4668.2, "valid_targets_min": 1743 }, { "epoch": 3.2905027932960893, "grad_norm": 0.3629688521529729, "learning_rate": 1.2695481360034978e-05, "loss": 0.2131, "loss_nan_ranks": 0, "loss_rank_avg": 0.10855499655008316, "step": 885, "valid_targets_mean": 4609.4, "valid_targets_min": 1633 }, { "epoch": 3.3091247672253257, "grad_norm": 0.3504360151541152, "learning_rate": 1.245440380812566e-05, "loss": 0.2012, "loss_nan_ranks": 0, "loss_rank_avg": 0.10300320386886597, "step": 890, "valid_targets_mean": 4465.0, "valid_targets_min": 2085 }, { "epoch": 3.3277467411545625, "grad_norm": 0.32251798545786803, "learning_rate": 1.2214597873652172e-05, "loss": 0.1936, "loss_nan_ranks": 0, "loss_rank_avg": 0.0905773714184761, "step": 895, "valid_targets_mean": 4838.2, "valid_targets_min": 1799 }, { "epoch": 3.346368715083799, "grad_norm": 0.3336622000100127, "learning_rate": 1.1976103969777336e-05, "loss": 0.1995, "loss_nan_ranks": 0, "loss_rank_avg": 0.09432216733694077, "step": 900, "valid_targets_mean": 4426.6, "valid_targets_min": 1714 }, { "epoch": 3.364990689013035, "grad_norm": 0.3929612418575331, "learning_rate": 1.1738962288554745e-05, "loss": 0.1841, "loss_nan_ranks": 0, "loss_rank_avg": 0.0892234817147255, "step": 905, "valid_targets_mean": 4027.8, "valid_targets_min": 1946 }, { "epoch": 3.383612662942272, "grad_norm": 0.3446626418867246, "learning_rate": 1.1503212794155406e-05, "loss": 0.1979, "loss_nan_ranks": 0, "loss_rank_avg": 0.0939427986741066, "step": 910, "valid_targets_mean": 4722.4, "valid_targets_min": 1696 }, { "epoch": 3.4022346368715084, "grad_norm": 0.35774264340118445, "learning_rate": 1.1268895216132818e-05, "loss": 0.1957, "loss_nan_ranks": 0, "loss_rank_avg": 0.11662470549345016, "step": 915, "valid_targets_mean": 4746.3, "valid_targets_min": 1884 }, { "epoch": 3.4208566108007448, "grad_norm": 0.34279937816191636, "learning_rate": 1.1036049042727557e-05, "loss": 0.185, "loss_nan_ranks": 0, "loss_rank_avg": 0.08681117743253708, "step": 920, "valid_targets_mean": 4164.0, "valid_targets_min": 1738 }, { "epoch": 3.439478584729981, "grad_norm": 0.3763744700811407, "learning_rate": 1.0804713514212554e-05, "loss": 0.1833, "loss_nan_ranks": 0, "loss_rank_avg": 0.09903687238693237, "step": 925, "valid_targets_mean": 4449.6, "valid_targets_min": 2173 }, { "epoch": 3.458100558659218, "grad_norm": 0.35837853241908835, "learning_rate": 1.0574927616280139e-05, "loss": 0.1858, "loss_nan_ranks": 0, "loss_rank_avg": 0.09481087327003479, "step": 930, "valid_targets_mean": 4874.1, "valid_targets_min": 2117 }, { "epoch": 3.4767225325884543, "grad_norm": 0.40429914472262846, "learning_rate": 1.0346730073471993e-05, "loss": 0.1807, "loss_nan_ranks": 0, "loss_rank_avg": 0.11803603172302246, "step": 935, "valid_targets_mean": 5221.1, "valid_targets_min": 2154 }, { "epoch": 3.4953445065176907, "grad_norm": 0.3711739680040434, "learning_rate": 1.0120159342653153e-05, "loss": 0.1773, "loss_nan_ranks": 0, "loss_rank_avg": 0.08699009567499161, "step": 940, "valid_targets_mean": 4491.3, "valid_targets_min": 1967 }, { "epoch": 3.5139664804469275, "grad_norm": 0.37363867967479586, "learning_rate": 9.895253606531038e-06, "loss": 0.1831, "loss_nan_ranks": 0, "loss_rank_avg": 0.08354979753494263, "step": 945, "valid_targets_mean": 4046.6, "valid_targets_min": 1432 }, { "epoch": 3.532588454376164, "grad_norm": 0.44263358663951935, "learning_rate": 9.672050767220765e-06, "loss": 0.2038, "loss_nan_ranks": 0, "loss_rank_avg": 0.11277249455451965, "step": 950, "valid_targets_mean": 5051.2, "valid_targets_min": 1855 }, { "epoch": 3.5512104283054002, "grad_norm": 0.3336089325731764, "learning_rate": 9.450588439857697e-06, "loss": 0.2213, "loss_nan_ranks": 0, "loss_rank_avg": 0.11583083122968674, "step": 955, "valid_targets_mean": 5088.0, "valid_targets_min": 1865 }, { "epoch": 3.5698324022346366, "grad_norm": 0.29491303529078194, "learning_rate": 9.230903946258391e-06, "loss": 0.1964, "loss_nan_ranks": 0, "loss_rank_avg": 0.11058211326599121, "step": 960, "valid_targets_mean": 5648.7, "valid_targets_min": 1663 }, { "epoch": 3.5884543761638734, "grad_norm": 0.27522508073944885, "learning_rate": 9.013034308630945e-06, "loss": 0.1878, "loss_nan_ranks": 0, "loss_rank_avg": 0.08331070095300674, "step": 965, "valid_targets_mean": 4394.6, "valid_targets_min": 2021 }, { "epoch": 3.60707635009311, "grad_norm": 0.30907108754902735, "learning_rate": 8.79701624333585e-06, "loss": 0.1864, "loss_nan_ranks": 0, "loss_rank_avg": 0.09504259377717972, "step": 970, "valid_targets_mean": 4734.1, "valid_targets_min": 2147 }, { "epoch": 3.6256983240223466, "grad_norm": 0.3078382923065214, "learning_rate": 8.582886154698407e-06, "loss": 0.1727, "loss_nan_ranks": 0, "loss_rank_avg": 0.08389315009117126, "step": 975, "valid_targets_mean": 4702.8, "valid_targets_min": 1900 }, { "epoch": 3.644320297951583, "grad_norm": 0.2927314109800218, "learning_rate": 8.370680128873679e-06, "loss": 0.1825, "loss_nan_ranks": 0, "loss_rank_avg": 0.08893483877182007, "step": 980, "valid_targets_mean": 5112.3, "valid_targets_min": 2065 }, { "epoch": 3.6629422718808193, "grad_norm": 0.3049682447465429, "learning_rate": 8.160433927765097e-06, "loss": 0.1907, "loss_nan_ranks": 0, "loss_rank_avg": 0.08556380122900009, "step": 985, "valid_targets_mean": 5058.2, "valid_targets_min": 2522 }, { "epoch": 3.6815642458100557, "grad_norm": 0.2770762255480885, "learning_rate": 7.952182982997743e-06, "loss": 0.1779, "loss_nan_ranks": 0, "loss_rank_avg": 0.09861546754837036, "step": 990, "valid_targets_mean": 5131.3, "valid_targets_min": 2161 }, { "epoch": 3.7001862197392925, "grad_norm": 0.3549830142853625, "learning_rate": 7.745962389947195e-06, "loss": 0.1668, "loss_nan_ranks": 0, "loss_rank_avg": 0.06256135553121567, "step": 995, "valid_targets_mean": 5891.1, "valid_targets_min": 1641 }, { "epoch": 3.718808193668529, "grad_norm": 0.28452100847613737, "learning_rate": 7.541806901825141e-06, "loss": 0.1246, "loss_nan_ranks": 0, "loss_rank_avg": 0.06664276123046875, "step": 1000, "valid_targets_mean": 6142.4, "valid_targets_min": 2831 }, { "epoch": 3.7374301675977653, "grad_norm": 0.2744053870236639, "learning_rate": 7.339750923822595e-06, "loss": 0.1267, "loss_nan_ranks": 0, "loss_rank_avg": 0.06239338591694832, "step": 1005, "valid_targets_mean": 5398.8, "valid_targets_min": 2088 }, { "epoch": 3.756052141527002, "grad_norm": 0.23916132063233655, "learning_rate": 7.139828507311792e-06, "loss": 0.1192, "loss_nan_ranks": 0, "loss_rank_avg": 0.06390925496816635, "step": 1010, "valid_targets_mean": 5946.6, "valid_targets_min": 1659 }, { "epoch": 3.7746741154562384, "grad_norm": 0.23032557623436256, "learning_rate": 6.942073344107682e-06, "loss": 0.1109, "loss_nan_ranks": 0, "loss_rank_avg": 0.05467592179775238, "step": 1015, "valid_targets_mean": 5572.2, "valid_targets_min": 1718 }, { "epoch": 3.793296089385475, "grad_norm": 0.24678198189145398, "learning_rate": 6.746518760790071e-06, "loss": 0.1107, "loss_nan_ranks": 0, "loss_rank_avg": 0.06743580102920532, "step": 1020, "valid_targets_mean": 5919.4, "valid_targets_min": 1512 }, { "epoch": 3.811918063314711, "grad_norm": 0.2381354700325367, "learning_rate": 6.553197713087227e-06, "loss": 0.1094, "loss_nan_ranks": 0, "loss_rank_avg": 0.0567602775990963, "step": 1025, "valid_targets_mean": 5579.0, "valid_targets_min": 3195 }, { "epoch": 3.830540037243948, "grad_norm": 0.2276090324661052, "learning_rate": 6.3621427803220735e-06, "loss": 0.1102, "loss_nan_ranks": 0, "loss_rank_avg": 0.05728829279541969, "step": 1030, "valid_targets_mean": 5613.9, "valid_targets_min": 2897 }, { "epoch": 3.8491620111731844, "grad_norm": 0.21642846836817936, "learning_rate": 6.173386159921766e-06, "loss": 0.1133, "loss_nan_ranks": 0, "loss_rank_avg": 0.06424218416213989, "step": 1035, "valid_targets_mean": 5535.1, "valid_targets_min": 1371 }, { "epoch": 3.8677839851024207, "grad_norm": 0.23163902556844526, "learning_rate": 5.98695966199163e-06, "loss": 0.1126, "loss_nan_ranks": 0, "loss_rank_avg": 0.048331666737794876, "step": 1040, "valid_targets_mean": 5154.7, "valid_targets_min": 2315 }, { "epoch": 3.8864059590316575, "grad_norm": 0.21070714474137134, "learning_rate": 5.802894703954382e-06, "loss": 0.1032, "loss_nan_ranks": 0, "loss_rank_avg": 0.05012443661689758, "step": 1045, "valid_targets_mean": 5605.2, "valid_targets_min": 1984 }, { "epoch": 3.905027932960894, "grad_norm": 0.22853410654463693, "learning_rate": 5.621222305255554e-06, "loss": 0.11, "loss_nan_ranks": 0, "loss_rank_avg": 0.05816968157887459, "step": 1050, "valid_targets_mean": 6049.4, "valid_targets_min": 2764 }, { "epoch": 3.9236499068901303, "grad_norm": 0.2379166167854269, "learning_rate": 5.441973082135907e-06, "loss": 0.1028, "loss_nan_ranks": 0, "loss_rank_avg": 0.05406898260116577, "step": 1055, "valid_targets_mean": 5502.0, "valid_targets_min": 2260 }, { "epoch": 3.9422718808193666, "grad_norm": 0.20973140421298134, "learning_rate": 5.265177242471899e-06, "loss": 0.1041, "loss_nan_ranks": 0, "loss_rank_avg": 0.05013309791684151, "step": 1060, "valid_targets_mean": 5478.0, "valid_targets_min": 1855 }, { "epoch": 3.9608938547486034, "grad_norm": 0.22236062413735788, "learning_rate": 5.09086458068488e-06, "loss": 0.1007, "loss_nan_ranks": 0, "loss_rank_avg": 0.05143527686595917, "step": 1065, "valid_targets_mean": 5612.8, "valid_targets_min": 2468 }, { "epoch": 3.97951582867784, "grad_norm": 0.21206442494809297, "learning_rate": 4.919064472720014e-06, "loss": 0.1031, "loss_nan_ranks": 0, "loss_rank_avg": 0.05190473794937134, "step": 1070, "valid_targets_mean": 5816.8, "valid_targets_min": 1834 }, { "epoch": 3.998137802607076, "grad_norm": 0.20629239879718422, "learning_rate": 4.749805871095732e-06, "loss": 0.1029, "loss_nan_ranks": 0, "loss_rank_avg": 0.046846091747283936, "step": 1075, "valid_targets_mean": 5753.0, "valid_targets_min": 1235 }, { "epoch": 4.01489757914339, "grad_norm": 0.5308448838529755, "learning_rate": 4.5831173000245e-06, "loss": 0.1836, "loss_nan_ranks": 0, "loss_rank_avg": 0.12533260881900787, "step": 1080, "valid_targets_mean": 7661.5, "valid_targets_min": 2439 }, { "epoch": 4.033519553072626, "grad_norm": 0.24098689788800592, "learning_rate": 4.4190268506058074e-06, "loss": 0.2018, "loss_nan_ranks": 0, "loss_rank_avg": 0.09956145286560059, "step": 1085, "valid_targets_mean": 6907.5, "valid_targets_min": 2738 }, { "epoch": 4.052141527001862, "grad_norm": 0.24383435515867613, "learning_rate": 4.257562176092127e-06, "loss": 0.1818, "loss_nan_ranks": 0, "loss_rank_avg": 0.0828259289264679, "step": 1090, "valid_targets_mean": 6410.7, "valid_targets_min": 2445 }, { "epoch": 4.070763500931099, "grad_norm": 0.2163950525445711, "learning_rate": 4.098750487228653e-06, "loss": 0.1885, "loss_nan_ranks": 0, "loss_rank_avg": 0.08502789586782455, "step": 1095, "valid_targets_mean": 6399.4, "valid_targets_min": 1929 }, { "epoch": 4.089385474860335, "grad_norm": 0.21083241045721227, "learning_rate": 3.942618547667656e-06, "loss": 0.1849, "loss_nan_ranks": 0, "loss_rank_avg": 0.07837125658988953, "step": 1100, "valid_targets_mean": 6164.7, "valid_targets_min": 1823 }, { "epoch": 4.1080074487895715, "grad_norm": 0.20466030093676923, "learning_rate": 3.7891926694581216e-06, "loss": 0.1652, "loss_nan_ranks": 0, "loss_rank_avg": 0.07821375131607056, "step": 1105, "valid_targets_mean": 6921.8, "valid_targets_min": 2809 }, { "epoch": 4.126629422718808, "grad_norm": 0.22208208771018614, "learning_rate": 3.6384987086115353e-06, "loss": 0.1719, "loss_nan_ranks": 0, "loss_rank_avg": 0.09721019864082336, "step": 1110, "valid_targets_mean": 6955.7, "valid_targets_min": 2131 }, { "epoch": 4.145251396648045, "grad_norm": 0.2135551173088567, "learning_rate": 3.49056206074452e-06, "loss": 0.171, "loss_nan_ranks": 0, "loss_rank_avg": 0.08980279415845871, "step": 1115, "valid_targets_mean": 7009.1, "valid_targets_min": 1790 }, { "epoch": 4.1638733705772815, "grad_norm": 0.22700578037823518, "learning_rate": 3.345407656799058e-06, "loss": 0.158, "loss_nan_ranks": 0, "loss_rank_avg": 0.07464779168367386, "step": 1120, "valid_targets_mean": 6647.1, "valid_targets_min": 1889 }, { "epoch": 4.182495344506518, "grad_norm": 0.2492117954904908, "learning_rate": 3.203059958840999e-06, "loss": 0.1538, "loss_nan_ranks": 0, "loss_rank_avg": 0.08767087012529373, "step": 1125, "valid_targets_mean": 7243.6, "valid_targets_min": 2074 }, { "epoch": 4.201117318435754, "grad_norm": 0.25428629048582396, "learning_rate": 3.063542955937615e-06, "loss": 0.1654, "loss_nan_ranks": 0, "loss_rank_avg": 0.09225225448608398, "step": 1130, "valid_targets_mean": 7255.7, "valid_targets_min": 1549 }, { "epoch": 4.219739292364991, "grad_norm": 0.25094232783984594, "learning_rate": 2.9268801601148555e-06, "loss": 0.1654, "loss_nan_ranks": 0, "loss_rank_avg": 0.07944104820489883, "step": 1135, "valid_targets_mean": 7046.5, "valid_targets_min": 1799 }, { "epoch": 4.238361266294227, "grad_norm": 0.24866583116101532, "learning_rate": 2.793094602395008e-06, "loss": 0.1565, "loss_nan_ranks": 0, "loss_rank_avg": 0.06737985461950302, "step": 1140, "valid_targets_mean": 5729.9, "valid_targets_min": 1609 }, { "epoch": 4.256983240223463, "grad_norm": 0.39758422193173765, "learning_rate": 2.6622088289153804e-06, "loss": 0.1931, "loss_nan_ranks": 0, "loss_rank_avg": 0.09932229667901993, "step": 1145, "valid_targets_mean": 4044.7, "valid_targets_min": 1709 }, { "epoch": 4.275605214152701, "grad_norm": 0.3591756238366576, "learning_rate": 2.534244897128748e-06, "loss": 0.1886, "loss_nan_ranks": 0, "loss_rank_avg": 0.0849931463599205, "step": 1150, "valid_targets_mean": 4355.0, "valid_targets_min": 1978 }, { "epoch": 4.294227188081937, "grad_norm": 0.31402517638422384, "learning_rate": 2.4092243720861276e-06, "loss": 0.1809, "loss_nan_ranks": 0, "loss_rank_avg": 0.12222952395677567, "step": 1155, "valid_targets_mean": 5516.1, "valid_targets_min": 1489 }, { "epoch": 4.312849162011173, "grad_norm": 0.33093787158415533, "learning_rate": 2.287168322802533e-06, "loss": 0.1612, "loss_nan_ranks": 0, "loss_rank_avg": 0.09058675169944763, "step": 1160, "valid_targets_mean": 4852.4, "valid_targets_min": 2071 }, { "epoch": 4.33147113594041, "grad_norm": 0.29044590186071456, "learning_rate": 2.1680973187063415e-06, "loss": 0.1524, "loss_nan_ranks": 0, "loss_rank_avg": 0.07333887368440628, "step": 1165, "valid_targets_mean": 4316.4, "valid_targets_min": 1565 }, { "epoch": 4.350093109869646, "grad_norm": 0.310482032598639, "learning_rate": 2.0520314261728357e-06, "loss": 0.1617, "loss_nan_ranks": 0, "loss_rank_avg": 0.07164377719163895, "step": 1170, "valid_targets_mean": 4140.9, "valid_targets_min": 1878 }, { "epoch": 4.368715083798882, "grad_norm": 0.29736588517519846, "learning_rate": 1.938990205142526e-06, "loss": 0.1449, "loss_nan_ranks": 0, "loss_rank_avg": 0.07602109760046005, "step": 1175, "valid_targets_mean": 4135.1, "valid_targets_min": 1695 }, { "epoch": 4.387337057728119, "grad_norm": 0.3160724687733261, "learning_rate": 1.8289927058248325e-06, "loss": 0.1615, "loss_nan_ranks": 0, "loss_rank_avg": 0.07650075107812881, "step": 1180, "valid_targets_mean": 4301.7, "valid_targets_min": 1418 }, { "epoch": 4.405959031657356, "grad_norm": 0.28031984965761936, "learning_rate": 1.7220574654876453e-06, "loss": 0.1528, "loss_nan_ranks": 0, "loss_rank_avg": 0.07221689075231552, "step": 1185, "valid_targets_mean": 4130.4, "valid_targets_min": 1475 }, { "epoch": 4.424581005586592, "grad_norm": 0.3076875162786684, "learning_rate": 1.6182025053333595e-06, "loss": 0.145, "loss_nan_ranks": 0, "loss_rank_avg": 0.06376394629478455, "step": 1190, "valid_targets_mean": 3774.1, "valid_targets_min": 1587 }, { "epoch": 4.443202979515829, "grad_norm": 0.2836853100448112, "learning_rate": 1.5174453274618416e-06, "loss": 0.144, "loss_nan_ranks": 0, "loss_rank_avg": 0.0705125704407692, "step": 1195, "valid_targets_mean": 4669.5, "valid_targets_min": 1641 }, { "epoch": 4.461824953445065, "grad_norm": 0.31779131062906657, "learning_rate": 1.4198029119209112e-06, "loss": 0.1476, "loss_nan_ranks": 0, "loss_rank_avg": 0.06056446209549904, "step": 1200, "valid_targets_mean": 3839.8, "valid_targets_min": 1878 }, { "epoch": 4.4804469273743015, "grad_norm": 0.2857165674407037, "learning_rate": 1.325291713844785e-06, "loss": 0.144, "loss_nan_ranks": 0, "loss_rank_avg": 0.06362733244895935, "step": 1205, "valid_targets_mean": 4157.1, "valid_targets_min": 1513 }, { "epoch": 4.499068901303538, "grad_norm": 0.29043499467358436, "learning_rate": 1.2339276606809824e-06, "loss": 0.142, "loss_nan_ranks": 0, "loss_rank_avg": 0.07273241877555847, "step": 1210, "valid_targets_mean": 4532.7, "valid_targets_min": 2162 }, { "epoch": 4.517690875232775, "grad_norm": 0.27187077336242454, "learning_rate": 1.145726149506161e-06, "loss": 0.1403, "loss_nan_ranks": 0, "loss_rank_avg": 0.058025311678647995, "step": 1215, "valid_targets_mean": 3950.7, "valid_targets_min": 1411 }, { "epoch": 4.5363128491620115, "grad_norm": 0.4861947344266717, "learning_rate": 1.0607020444313431e-06, "loss": 0.1827, "loss_nan_ranks": 0, "loss_rank_avg": 0.0979158952832222, "step": 1220, "valid_targets_mean": 5058.6, "valid_targets_min": 1870 }, { "epoch": 4.554934823091248, "grad_norm": 0.32419104546438365, "learning_rate": 9.788696740969295e-07, "loss": 0.1901, "loss_nan_ranks": 0, "loss_rank_avg": 0.0731668546795845, "step": 1225, "valid_targets_mean": 4897.2, "valid_targets_min": 1573 }, { "epoch": 4.573556797020484, "grad_norm": 0.29019151441664376, "learning_rate": 9.002428292579912e-07, "loss": 0.1748, "loss_nan_ranks": 0, "loss_rank_avg": 0.0815751776099205, "step": 1230, "valid_targets_mean": 4843.5, "valid_targets_min": 2196 }, { "epoch": 4.592178770949721, "grad_norm": 0.28619939519110477, "learning_rate": 8.248347604601803e-07, "loss": 0.1565, "loss_nan_ranks": 0, "loss_rank_avg": 0.06282777339220047, "step": 1235, "valid_targets_mean": 4174.7, "valid_targets_min": 1942 }, { "epoch": 4.610800744878957, "grad_norm": 0.24908229050606268, "learning_rate": 7.526581758066931e-07, "loss": 0.1613, "loss_nan_ranks": 0, "loss_rank_avg": 0.06715219467878342, "step": 1240, "valid_targets_mean": 4759.8, "valid_targets_min": 1873 }, { "epoch": 4.629422718808193, "grad_norm": 0.25880591606207554, "learning_rate": 6.837252388166416e-07, "loss": 0.1443, "loss_nan_ranks": 0, "loss_rank_avg": 0.07651010155677795, "step": 1245, "valid_targets_mean": 5333.6, "valid_targets_min": 2002 }, { "epoch": 4.648044692737431, "grad_norm": 0.2497181360656177, "learning_rate": 6.180475663752106e-07, "loss": 0.1587, "loss_nan_ranks": 0, "loss_rank_avg": 0.07430170476436615, "step": 1250, "valid_targets_mean": 4628.3, "valid_targets_min": 1927 }, { "epoch": 4.666666666666667, "grad_norm": 0.23020108011760998, "learning_rate": 5.556362267759153e-07, "loss": 0.1574, "loss_nan_ranks": 0, "loss_rank_avg": 0.07936949282884598, "step": 1255, "valid_targets_mean": 5335.1, "valid_targets_min": 1892 }, { "epoch": 4.685288640595903, "grad_norm": 0.2454009759799112, "learning_rate": 4.965017378553349e-07, "loss": 0.1524, "loss_nan_ranks": 0, "loss_rank_avg": 0.09208754450082779, "step": 1260, "valid_targets_mean": 5511.2, "valid_targets_min": 1992 }, { "epoch": 4.70391061452514, "grad_norm": 0.3235628120281909, "learning_rate": 4.4065406522059374e-07, "loss": 0.1284, "loss_nan_ranks": 0, "loss_rank_avg": 0.05675048008561134, "step": 1265, "valid_targets_mean": 5457.9, "valid_targets_min": 1944 }, { "epoch": 4.722532588454376, "grad_norm": 0.3334445932900592, "learning_rate": 3.8810262056991676e-07, "loss": 0.1087, "loss_nan_ranks": 0, "loss_rank_avg": 0.056893110275268555, "step": 1270, "valid_targets_mean": 6020.2, "valid_targets_min": 3332 }, { "epoch": 4.741154562383612, "grad_norm": 0.25685520224274744, "learning_rate": 3.3885626010652153e-07, "loss": 0.1112, "loss_nan_ranks": 0, "loss_rank_avg": 0.049848925322294235, "step": 1275, "valid_targets_mean": 5805.6, "valid_targets_min": 2057 }, { "epoch": 4.759776536312849, "grad_norm": 0.2188109546053078, "learning_rate": 2.929232830461404e-07, "loss": 0.1028, "loss_nan_ranks": 0, "loss_rank_avg": 0.04564082622528076, "step": 1280, "valid_targets_mean": 5415.1, "valid_targets_min": 3408 }, { "epoch": 4.778398510242086, "grad_norm": 0.2187677112583894, "learning_rate": 2.503114302183951e-07, "loss": 0.0952, "loss_nan_ranks": 0, "loss_rank_avg": 0.04444780945777893, "step": 1285, "valid_targets_mean": 5522.8, "valid_targets_min": 1887 }, { "epoch": 4.797020484171322, "grad_norm": 0.22208851636510005, "learning_rate": 2.110278827622758e-07, "loss": 0.0982, "loss_nan_ranks": 0, "loss_rank_avg": 0.05595209822058678, "step": 1290, "valid_targets_mean": 5780.2, "valid_targets_min": 2491 }, { "epoch": 4.815642458100559, "grad_norm": 0.21060016822581026, "learning_rate": 1.7507926091594685e-07, "loss": 0.0918, "loss_nan_ranks": 0, "loss_rank_avg": 0.04811148717999458, "step": 1295, "valid_targets_mean": 5595.6, "valid_targets_min": 2959 }, { "epoch": 4.834264432029795, "grad_norm": 0.19833964961713638, "learning_rate": 1.4247162290107697e-07, "loss": 0.0965, "loss_nan_ranks": 0, "loss_rank_avg": 0.05214523896574974, "step": 1300, "valid_targets_mean": 5988.6, "valid_targets_min": 3215 }, { "epoch": 4.8528864059590315, "grad_norm": 0.2990227082543972, "learning_rate": 1.1321046390187385e-07, "loss": 0.0998, "loss_nan_ranks": 0, "loss_rank_avg": 0.05182049795985222, "step": 1305, "valid_targets_mean": 5927.0, "valid_targets_min": 2563 }, { "epoch": 4.871508379888268, "grad_norm": 0.2015079045157211, "learning_rate": 8.730071513901594e-08, "loss": 0.0948, "loss_nan_ranks": 0, "loss_rank_avg": 0.045056309551000595, "step": 1310, "valid_targets_mean": 5677.5, "valid_targets_min": 2261 }, { "epoch": 4.890130353817504, "grad_norm": 0.20975848200966107, "learning_rate": 6.474674303862172e-08, "loss": 0.09, "loss_nan_ranks": 0, "loss_rank_avg": 0.04260837659239769, "step": 1315, "valid_targets_mean": 5403.8, "valid_targets_min": 2460 }, { "epoch": 4.9087523277467415, "grad_norm": 0.19303968844504368, "learning_rate": 4.555234849639823e-08, "loss": 0.0965, "loss_nan_ranks": 0, "loss_rank_avg": 0.04524529352784157, "step": 1320, "valid_targets_mean": 5690.6, "valid_targets_min": 2096 }, { "epoch": 4.927374301675978, "grad_norm": 0.19699741738755241, "learning_rate": 2.9720766237095745e-08, "loss": 0.0904, "loss_nan_ranks": 0, "loss_rank_avg": 0.05184662342071533, "step": 1325, "valid_targets_mean": 5530.8, "valid_targets_min": 2383 }, { "epoch": 4.945996275605214, "grad_norm": 0.18957844995965004, "learning_rate": 1.7254664269381604e-08, "loss": 0.0894, "loss_nan_ranks": 0, "loss_rank_avg": 0.03667488321661949, "step": 1330, "valid_targets_mean": 5664.4, "valid_targets_min": 2242 }, { "epoch": 4.964618249534451, "grad_norm": 0.19863766435376254, "learning_rate": 8.156143436215403e-09, "loss": 0.0918, "loss_nan_ranks": 0, "loss_rank_avg": 0.049284886568784714, "step": 1335, "valid_targets_mean": 5771.7, "valid_targets_min": 2131 }, { "epoch": 4.983240223463687, "grad_norm": 0.2042161969158567, "learning_rate": 2.426737060798878e-09, "loss": 0.0893, "loss_nan_ranks": 0, "loss_rank_avg": 0.052975624799728394, "step": 1340, "valid_targets_mean": 5368.6, "valid_targets_min": 1541 }, { "epoch": 5.0, "grad_norm": 0.34259468317161895, "learning_rate": 6.741068818261198e-11, "loss": 0.0868, "loss_nan_ranks": 0, "loss_rank_avg": 0.08489986509084702, "step": 1345, "valid_targets_mean": 7030.6, "valid_targets_min": 2798 }, { "epoch": 5.0, "loss_nan_ranks": 0, "loss_rank_avg": 0.08489986509084702, "step": 1345, "total_flos": 3.418313925563777e+18, "train_loss": 0.2346252706192683, "train_runtime": 20740.2268, "train_samples_per_second": 6.206, "train_steps_per_second": 0.065, "valid_targets_mean": 7030.6, "valid_targets_min": 2798 } ], "logging_steps": 5, "max_steps": 1345, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 750, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.418313925563777e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }