{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 7.0, "eval_steps": 500, "global_step": 1841, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.019011406844106463, "grad_norm": 18.768985433269826, "learning_rate": 8.64864864864865e-07, "loss": 0.8865, "loss_nan_ranks": 0, "loss_rank_avg": 0.45824751257896423, "step": 5, "valid_targets_mean": 4315.6, "valid_targets_min": 1507 }, { "epoch": 0.03802281368821293, "grad_norm": 4.161641756419172, "learning_rate": 1.945945945945946e-06, "loss": 0.7829, "loss_nan_ranks": 0, "loss_rank_avg": 0.32671618461608887, "step": 10, "valid_targets_mean": 4631.1, "valid_targets_min": 1345 }, { "epoch": 0.057034220532319393, "grad_norm": 1.6968311772903435, "learning_rate": 3.0270270270270274e-06, "loss": 0.6528, "loss_nan_ranks": 0, "loss_rank_avg": 0.3004853129386902, "step": 15, "valid_targets_mean": 4061.9, "valid_targets_min": 1929 }, { "epoch": 0.07604562737642585, "grad_norm": 0.9491466398184562, "learning_rate": 4.108108108108108e-06, "loss": 0.5875, "loss_nan_ranks": 0, "loss_rank_avg": 0.27897870540618896, "step": 20, "valid_targets_mean": 4492.1, "valid_targets_min": 1512 }, { "epoch": 0.09505703422053231, "grad_norm": 0.5548287452373166, "learning_rate": 5.18918918918919e-06, "loss": 0.5445, "loss_nan_ranks": 0, "loss_rank_avg": 0.2667747437953949, "step": 25, "valid_targets_mean": 4387.7, "valid_targets_min": 1324 }, { "epoch": 0.11406844106463879, "grad_norm": 0.49652319636920533, "learning_rate": 6.270270270270271e-06, "loss": 0.4983, "loss_nan_ranks": 0, "loss_rank_avg": 0.2525099217891693, "step": 30, "valid_targets_mean": 4403.1, "valid_targets_min": 2036 }, { "epoch": 0.13307984790874525, "grad_norm": 0.40788904142748184, "learning_rate": 7.3513513513513525e-06, "loss": 0.4857, "loss_nan_ranks": 0, "loss_rank_avg": 0.23482461273670197, "step": 35, "valid_targets_mean": 4255.0, "valid_targets_min": 2083 }, { "epoch": 0.1520912547528517, "grad_norm": 0.3460099864906639, "learning_rate": 8.432432432432434e-06, "loss": 0.464, "loss_nan_ranks": 0, "loss_rank_avg": 0.21213467419147491, "step": 40, "valid_targets_mean": 4031.0, "valid_targets_min": 1761 }, { "epoch": 0.17110266159695817, "grad_norm": 0.3294697867452768, "learning_rate": 9.513513513513514e-06, "loss": 0.4457, "loss_nan_ranks": 0, "loss_rank_avg": 0.20797699689865112, "step": 45, "valid_targets_mean": 4213.6, "valid_targets_min": 1827 }, { "epoch": 0.19011406844106463, "grad_norm": 0.3007798207728767, "learning_rate": 1.0594594594594597e-05, "loss": 0.3791, "loss_nan_ranks": 0, "loss_rank_avg": 0.1763700693845749, "step": 50, "valid_targets_mean": 5610.1, "valid_targets_min": 2840 }, { "epoch": 0.20912547528517111, "grad_norm": 0.3135648808425615, "learning_rate": 1.1675675675675677e-05, "loss": 0.3193, "loss_nan_ranks": 0, "loss_rank_avg": 0.15119099617004395, "step": 55, "valid_targets_mean": 4945.7, "valid_targets_min": 1853 }, { "epoch": 0.22813688212927757, "grad_norm": 0.3001927364971795, "learning_rate": 1.2756756756756758e-05, "loss": 0.3044, "loss_nan_ranks": 0, "loss_rank_avg": 0.13437247276306152, "step": 60, "valid_targets_mean": 5207.0, "valid_targets_min": 2341 }, { "epoch": 0.24714828897338403, "grad_norm": 0.26327896731667505, "learning_rate": 1.383783783783784e-05, "loss": 0.2891, "loss_nan_ranks": 0, "loss_rank_avg": 0.14838241040706635, "step": 65, "valid_targets_mean": 5175.4, "valid_targets_min": 2093 }, { "epoch": 0.2661596958174905, "grad_norm": 0.23565637422517188, "learning_rate": 1.491891891891892e-05, "loss": 0.2835, "loss_nan_ranks": 0, "loss_rank_avg": 0.1273541897535324, "step": 70, "valid_targets_mean": 5396.7, "valid_targets_min": 1885 }, { "epoch": 0.28517110266159695, "grad_norm": 0.23292525842968348, "learning_rate": 1.6000000000000003e-05, "loss": 0.2734, "loss_nan_ranks": 0, "loss_rank_avg": 0.13985608518123627, "step": 75, "valid_targets_mean": 5246.9, "valid_targets_min": 2203 }, { "epoch": 0.3041825095057034, "grad_norm": 0.21583976978780783, "learning_rate": 1.7081081081081083e-05, "loss": 0.2672, "loss_nan_ranks": 0, "loss_rank_avg": 0.13641823828220367, "step": 80, "valid_targets_mean": 4957.9, "valid_targets_min": 1708 }, { "epoch": 0.3231939163498099, "grad_norm": 0.21703167466244688, "learning_rate": 1.8162162162162164e-05, "loss": 0.2635, "loss_nan_ranks": 0, "loss_rank_avg": 0.12753139436244965, "step": 85, "valid_targets_mean": 4951.1, "valid_targets_min": 1826 }, { "epoch": 0.34220532319391633, "grad_norm": 0.21798199551190492, "learning_rate": 1.9243243243243244e-05, "loss": 0.2607, "loss_nan_ranks": 0, "loss_rank_avg": 0.13001735508441925, "step": 90, "valid_targets_mean": 5112.8, "valid_targets_min": 2968 }, { "epoch": 0.3612167300380228, "grad_norm": 0.23287202361449336, "learning_rate": 2.0324324324324328e-05, "loss": 0.2588, "loss_nan_ranks": 0, "loss_rank_avg": 0.13635805249214172, "step": 95, "valid_targets_mean": 5321.8, "valid_targets_min": 2382 }, { "epoch": 0.38022813688212925, "grad_norm": 0.2771402834224665, "learning_rate": 2.1405405405405405e-05, "loss": 0.2552, "loss_nan_ranks": 0, "loss_rank_avg": 0.12471064180135727, "step": 100, "valid_targets_mean": 4986.9, "valid_targets_min": 2576 }, { "epoch": 0.39923954372623577, "grad_norm": 0.22487804577069473, "learning_rate": 2.248648648648649e-05, "loss": 0.2503, "loss_nan_ranks": 0, "loss_rank_avg": 0.1233469620347023, "step": 105, "valid_targets_mean": 5325.8, "valid_targets_min": 2928 }, { "epoch": 0.41825095057034223, "grad_norm": 0.22857152253610444, "learning_rate": 2.356756756756757e-05, "loss": 0.2445, "loss_nan_ranks": 0, "loss_rank_avg": 0.1288713663816452, "step": 110, "valid_targets_mean": 5161.7, "valid_targets_min": 1862 }, { "epoch": 0.4372623574144487, "grad_norm": 0.24338824886805402, "learning_rate": 2.4648648648648654e-05, "loss": 0.239, "loss_nan_ranks": 0, "loss_rank_avg": 0.11510897427797318, "step": 115, "valid_targets_mean": 4941.3, "valid_targets_min": 1507 }, { "epoch": 0.45627376425855515, "grad_norm": 0.24437893301310154, "learning_rate": 2.572972972972973e-05, "loss": 0.2539, "loss_nan_ranks": 0, "loss_rank_avg": 0.12720325589179993, "step": 120, "valid_targets_mean": 5260.5, "valid_targets_min": 2625 }, { "epoch": 0.4752851711026616, "grad_norm": 0.3917084342269015, "learning_rate": 2.6810810810810815e-05, "loss": 0.2547, "loss_nan_ranks": 0, "loss_rank_avg": 0.17161251604557037, "step": 125, "valid_targets_mean": 4271.5, "valid_targets_min": 1373 }, { "epoch": 0.49429657794676807, "grad_norm": 0.4677580966290995, "learning_rate": 2.7891891891891892e-05, "loss": 0.4642, "loss_nan_ranks": 0, "loss_rank_avg": 0.2109134942293167, "step": 130, "valid_targets_mean": 3648.6, "valid_targets_min": 1151 }, { "epoch": 0.5133079847908745, "grad_norm": 0.37088030476958894, "learning_rate": 2.8972972972972976e-05, "loss": 0.4581, "loss_nan_ranks": 0, "loss_rank_avg": 0.2217126041650772, "step": 135, "valid_targets_mean": 3692.1, "valid_targets_min": 1583 }, { "epoch": 0.532319391634981, "grad_norm": 0.34834316541712207, "learning_rate": 3.0054054054054056e-05, "loss": 0.4374, "loss_nan_ranks": 0, "loss_rank_avg": 0.21792852878570557, "step": 140, "valid_targets_mean": 4084.4, "valid_targets_min": 1601 }, { "epoch": 0.5513307984790875, "grad_norm": 0.3764288574103832, "learning_rate": 3.113513513513514e-05, "loss": 0.4365, "loss_nan_ranks": 0, "loss_rank_avg": 0.22260896861553192, "step": 145, "valid_targets_mean": 3740.4, "valid_targets_min": 1825 }, { "epoch": 0.5703422053231939, "grad_norm": 0.30664676408221175, "learning_rate": 3.221621621621622e-05, "loss": 0.4129, "loss_nan_ranks": 0, "loss_rank_avg": 0.20132417976856232, "step": 150, "valid_targets_mean": 3882.0, "valid_targets_min": 1821 }, { "epoch": 0.5893536121673004, "grad_norm": 0.335985881141292, "learning_rate": 3.3297297297297305e-05, "loss": 0.4231, "loss_nan_ranks": 0, "loss_rank_avg": 0.21901392936706543, "step": 155, "valid_targets_mean": 4136.7, "valid_targets_min": 1743 }, { "epoch": 0.6083650190114068, "grad_norm": 0.3447522223458868, "learning_rate": 3.437837837837838e-05, "loss": 0.4275, "loss_nan_ranks": 0, "loss_rank_avg": 0.22367675602436066, "step": 160, "valid_targets_mean": 7437.8, "valid_targets_min": 591 }, { "epoch": 0.6273764258555133, "grad_norm": 0.29372893235049985, "learning_rate": 3.5459459459459466e-05, "loss": 0.4306, "loss_nan_ranks": 0, "loss_rank_avg": 0.2104015201330185, "step": 165, "valid_targets_mean": 7844.2, "valid_targets_min": 715 }, { "epoch": 0.6463878326996197, "grad_norm": 0.29079875739620076, "learning_rate": 3.654054054054054e-05, "loss": 0.4102, "loss_nan_ranks": 0, "loss_rank_avg": 0.21834035217761993, "step": 170, "valid_targets_mean": 7960.9, "valid_targets_min": 733 }, { "epoch": 0.6653992395437263, "grad_norm": 0.27135578893433554, "learning_rate": 3.762162162162163e-05, "loss": 0.4087, "loss_nan_ranks": 0, "loss_rank_avg": 0.22146134078502655, "step": 175, "valid_targets_mean": 8483.1, "valid_targets_min": 759 }, { "epoch": 0.6844106463878327, "grad_norm": 0.2744187448490044, "learning_rate": 3.8702702702702704e-05, "loss": 0.3907, "loss_nan_ranks": 0, "loss_rank_avg": 0.19432826340198517, "step": 180, "valid_targets_mean": 7507.5, "valid_targets_min": 702 }, { "epoch": 0.7034220532319392, "grad_norm": 0.3048374023380346, "learning_rate": 3.978378378378379e-05, "loss": 0.3923, "loss_nan_ranks": 0, "loss_rank_avg": 0.18287861347198486, "step": 185, "valid_targets_mean": 6985.9, "valid_targets_min": 647 }, { "epoch": 0.7224334600760456, "grad_norm": 0.2649862681572022, "learning_rate": 3.999942416643093e-05, "loss": 0.3985, "loss_nan_ranks": 0, "loss_rank_avg": 0.20548956096172333, "step": 190, "valid_targets_mean": 7685.7, "valid_targets_min": 592 }, { "epoch": 0.7414448669201521, "grad_norm": 0.2992563120534786, "learning_rate": 3.999708489938559e-05, "loss": 0.3871, "loss_nan_ranks": 0, "loss_rank_avg": 0.17796725034713745, "step": 195, "valid_targets_mean": 7207.1, "valid_targets_min": 682 }, { "epoch": 0.7604562737642585, "grad_norm": 0.2616409196519949, "learning_rate": 3.999294641957663e-05, "loss": 0.3852, "loss_nan_ranks": 0, "loss_rank_avg": 0.19631274044513702, "step": 200, "valid_targets_mean": 7820.0, "valid_targets_min": 747 }, { "epoch": 0.779467680608365, "grad_norm": 0.2641716839624123, "learning_rate": 3.998700909935863e-05, "loss": 0.3833, "loss_nan_ranks": 0, "loss_rank_avg": 0.21066488325595856, "step": 205, "valid_targets_mean": 8022.3, "valid_targets_min": 727 }, { "epoch": 0.7984790874524715, "grad_norm": 0.24998064714538126, "learning_rate": 3.9979273472934556e-05, "loss": 0.3774, "loss_nan_ranks": 0, "loss_rank_avg": 0.17630921304225922, "step": 210, "valid_targets_mean": 7562.2, "valid_targets_min": 661 }, { "epoch": 0.8174904942965779, "grad_norm": 0.261840054565909, "learning_rate": 3.9969740236307746e-05, "loss": 0.3786, "loss_nan_ranks": 0, "loss_rank_avg": 0.20479388535022736, "step": 215, "valid_targets_mean": 8467.6, "valid_targets_min": 409 }, { "epoch": 0.8365019011406845, "grad_norm": 0.2526323676493677, "learning_rate": 3.9958410247219265e-05, "loss": 0.3701, "loss_nan_ranks": 0, "loss_rank_avg": 0.20014218986034393, "step": 220, "valid_targets_mean": 7542.6, "valid_targets_min": 781 }, { "epoch": 0.8555133079847909, "grad_norm": 0.5173558603802552, "learning_rate": 3.994528452507076e-05, "loss": 0.3121, "loss_nan_ranks": 0, "loss_rank_avg": 0.12122941017150879, "step": 225, "valid_targets_mean": 5823.9, "valid_targets_min": 1136 }, { "epoch": 0.8745247148288974, "grad_norm": 0.28893785124827975, "learning_rate": 3.993036425083269e-05, "loss": 0.2407, "loss_nan_ranks": 0, "loss_rank_avg": 0.12518875300884247, "step": 230, "valid_targets_mean": 6544.8, "valid_targets_min": 1446 }, { "epoch": 0.8935361216730038, "grad_norm": 0.26737459303895006, "learning_rate": 3.9913650766938115e-05, "loss": 0.2334, "loss_nan_ranks": 0, "loss_rank_avg": 0.12098179012537003, "step": 235, "valid_targets_mean": 6424.3, "valid_targets_min": 1291 }, { "epoch": 0.9125475285171103, "grad_norm": 0.2234023108439984, "learning_rate": 3.98951455771619e-05, "loss": 0.2243, "loss_nan_ranks": 0, "loss_rank_avg": 0.10649286955595016, "step": 240, "valid_targets_mean": 5882.0, "valid_targets_min": 979 }, { "epoch": 0.9315589353612167, "grad_norm": 0.22867133634974954, "learning_rate": 3.987485034648541e-05, "loss": 0.2219, "loss_nan_ranks": 0, "loss_rank_avg": 0.11067407578229904, "step": 245, "valid_targets_mean": 6166.6, "valid_targets_min": 933 }, { "epoch": 0.9505703422053232, "grad_norm": 0.18046687603251277, "learning_rate": 3.985276690094669e-05, "loss": 0.2183, "loss_nan_ranks": 0, "loss_rank_avg": 0.11107806116342545, "step": 250, "valid_targets_mean": 5971.0, "valid_targets_min": 1196 }, { "epoch": 0.9695817490494296, "grad_norm": 0.22379606980948552, "learning_rate": 3.982889722747621e-05, "loss": 0.2129, "loss_nan_ranks": 0, "loss_rank_avg": 0.1035776138305664, "step": 255, "valid_targets_mean": 6159.1, "valid_targets_min": 1238 }, { "epoch": 0.9885931558935361, "grad_norm": 0.17909010829485558, "learning_rate": 3.980324347371806e-05, "loss": 0.211, "loss_nan_ranks": 0, "loss_rank_avg": 0.09941697865724564, "step": 260, "valid_targets_mean": 6163.3, "valid_targets_min": 1300 }, { "epoch": 1.0076045627376427, "grad_norm": 0.33780995576565503, "learning_rate": 3.977580794783672e-05, "loss": 0.2922, "loss_nan_ranks": 0, "loss_rank_avg": 0.18427270650863647, "step": 265, "valid_targets_mean": 4252.4, "valid_targets_min": 2118 }, { "epoch": 1.026615969581749, "grad_norm": 0.2883511493855013, "learning_rate": 3.97465931183094e-05, "loss": 0.3966, "loss_nan_ranks": 0, "loss_rank_avg": 0.19222092628479004, "step": 270, "valid_targets_mean": 4523.0, "valid_targets_min": 1550 }, { "epoch": 1.0456273764258555, "grad_norm": 0.3124414858734432, "learning_rate": 3.971560161370393e-05, "loss": 0.3725, "loss_nan_ranks": 0, "loss_rank_avg": 0.178070530295372, "step": 275, "valid_targets_mean": 4048.7, "valid_targets_min": 2023 }, { "epoch": 1.064638783269962, "grad_norm": 0.29980707122304473, "learning_rate": 3.968283622244229e-05, "loss": 0.3666, "loss_nan_ranks": 0, "loss_rank_avg": 0.17472650110721588, "step": 280, "valid_targets_mean": 4060.6, "valid_targets_min": 1807 }, { "epoch": 1.0836501901140685, "grad_norm": 0.2855083601218697, "learning_rate": 3.9648299892549654e-05, "loss": 0.3522, "loss_nan_ranks": 0, "loss_rank_avg": 0.1766706258058548, "step": 285, "valid_targets_mean": 4122.0, "valid_targets_min": 2089 }, { "epoch": 1.102661596958175, "grad_norm": 0.26544618383492363, "learning_rate": 3.961199573138923e-05, "loss": 0.3541, "loss_nan_ranks": 0, "loss_rank_avg": 0.17382532358169556, "step": 290, "valid_targets_mean": 4094.8, "valid_targets_min": 1660 }, { "epoch": 1.1216730038022813, "grad_norm": 0.27395793149443254, "learning_rate": 3.957392700538261e-05, "loss": 0.3462, "loss_nan_ranks": 0, "loss_rank_avg": 0.17056365311145782, "step": 295, "valid_targets_mean": 3793.4, "valid_targets_min": 2184 }, { "epoch": 1.1406844106463878, "grad_norm": 0.2694202154601782, "learning_rate": 3.9534097139715926e-05, "loss": 0.3407, "loss_nan_ranks": 0, "loss_rank_avg": 0.17225591838359833, "step": 300, "valid_targets_mean": 4053.2, "valid_targets_min": 1891 }, { "epoch": 1.1596958174904943, "grad_norm": 0.2640197797512894, "learning_rate": 3.9492509718031645e-05, "loss": 0.3386, "loss_nan_ranks": 0, "loss_rank_avg": 0.17667798697948456, "step": 305, "valid_targets_mean": 4274.1, "valid_targets_min": 1885 }, { "epoch": 1.1787072243346008, "grad_norm": 0.25361187076817665, "learning_rate": 3.944916848210614e-05, "loss": 0.3374, "loss_nan_ranks": 0, "loss_rank_avg": 0.17618222534656525, "step": 310, "valid_targets_mean": 4422.8, "valid_targets_min": 1908 }, { "epoch": 1.1977186311787071, "grad_norm": 0.2338076023050613, "learning_rate": 3.9404077331513044e-05, "loss": 0.2354, "loss_nan_ranks": 0, "loss_rank_avg": 0.11924882978200912, "step": 315, "valid_targets_mean": 5303.7, "valid_targets_min": 2553 }, { "epoch": 1.2167300380228137, "grad_norm": 0.20686129841395856, "learning_rate": 3.9357240323272367e-05, "loss": 0.2326, "loss_nan_ranks": 0, "loss_rank_avg": 0.1147601380944252, "step": 320, "valid_targets_mean": 5254.6, "valid_targets_min": 2168 }, { "epoch": 1.2357414448669202, "grad_norm": 0.21736762319432792, "learning_rate": 3.930866167148549e-05, "loss": 0.2193, "loss_nan_ranks": 0, "loss_rank_avg": 0.10893138498067856, "step": 325, "valid_targets_mean": 4881.5, "valid_targets_min": 1899 }, { "epoch": 1.2547528517110267, "grad_norm": 0.20801984857584946, "learning_rate": 3.925834574695599e-05, "loss": 0.2205, "loss_nan_ranks": 0, "loss_rank_avg": 0.11135486513376236, "step": 330, "valid_targets_mean": 5599.6, "valid_targets_min": 2212 }, { "epoch": 1.2737642585551332, "grad_norm": 0.19325104513890953, "learning_rate": 3.920629707679641e-05, "loss": 0.2151, "loss_nan_ranks": 0, "loss_rank_avg": 0.10907386988401413, "step": 335, "valid_targets_mean": 4711.3, "valid_targets_min": 1625 }, { "epoch": 1.2927756653992395, "grad_norm": 0.18491316478382414, "learning_rate": 3.915252034402089e-05, "loss": 0.2132, "loss_nan_ranks": 0, "loss_rank_avg": 0.11171620339155197, "step": 340, "valid_targets_mean": 5566.0, "valid_targets_min": 1935 }, { "epoch": 1.311787072243346, "grad_norm": 0.19249485970179556, "learning_rate": 3.9097020387123876e-05, "loss": 0.2043, "loss_nan_ranks": 0, "loss_rank_avg": 0.08895137161016464, "step": 345, "valid_targets_mean": 4913.7, "valid_targets_min": 2127 }, { "epoch": 1.3307984790874525, "grad_norm": 0.1976193837635321, "learning_rate": 3.903980219964474e-05, "loss": 0.2084, "loss_nan_ranks": 0, "loss_rank_avg": 0.0967416986823082, "step": 350, "valid_targets_mean": 4946.9, "valid_targets_min": 2188 }, { "epoch": 1.3498098859315588, "grad_norm": 0.18884045418843914, "learning_rate": 3.898087092971851e-05, "loss": 0.2063, "loss_nan_ranks": 0, "loss_rank_avg": 0.10309580713510513, "step": 355, "valid_targets_mean": 5133.6, "valid_targets_min": 1787 }, { "epoch": 1.3688212927756653, "grad_norm": 0.2150636741987042, "learning_rate": 3.892023187961268e-05, "loss": 0.2071, "loss_nan_ranks": 0, "loss_rank_avg": 0.0983353927731514, "step": 360, "valid_targets_mean": 5151.9, "valid_targets_min": 2159 }, { "epoch": 1.3878326996197718, "grad_norm": 0.18862534780965, "learning_rate": 3.8857890505250103e-05, "loss": 0.2035, "loss_nan_ranks": 0, "loss_rank_avg": 0.09774443507194519, "step": 365, "valid_targets_mean": 4959.3, "valid_targets_min": 1217 }, { "epoch": 1.4068441064638784, "grad_norm": 0.18583559102022223, "learning_rate": 3.879385241571817e-05, "loss": 0.2001, "loss_nan_ranks": 0, "loss_rank_avg": 0.09217417240142822, "step": 370, "valid_targets_mean": 5263.2, "valid_targets_min": 2115 }, { "epoch": 1.4258555133079849, "grad_norm": 0.23173843670661723, "learning_rate": 3.8728123372764085e-05, "loss": 0.1968, "loss_nan_ranks": 0, "loss_rank_avg": 0.09147930145263672, "step": 375, "valid_targets_mean": 5157.2, "valid_targets_min": 1833 }, { "epoch": 1.4448669201520912, "grad_norm": 0.2160702092104236, "learning_rate": 3.866070929027647e-05, "loss": 0.1932, "loss_nan_ranks": 0, "loss_rank_avg": 0.10929874330759048, "step": 380, "valid_targets_mean": 5271.4, "valid_targets_min": 2529 }, { "epoch": 1.4638783269961977, "grad_norm": 0.19288064073755715, "learning_rate": 3.85916162337533e-05, "loss": 0.2053, "loss_nan_ranks": 0, "loss_rank_avg": 0.10217779874801636, "step": 385, "valid_targets_mean": 5138.1, "valid_targets_min": 1934 }, { "epoch": 1.4828897338403042, "grad_norm": 0.37858755377369957, "learning_rate": 3.8520850419756104e-05, "loss": 0.2644, "loss_nan_ranks": 0, "loss_rank_avg": 0.1849764585494995, "step": 390, "valid_targets_mean": 3941.6, "valid_targets_min": 1710 }, { "epoch": 1.5019011406844105, "grad_norm": 0.35470045423494073, "learning_rate": 3.8448418215350726e-05, "loss": 0.3449, "loss_nan_ranks": 0, "loss_rank_avg": 0.1646745651960373, "step": 395, "valid_targets_mean": 4081.7, "valid_targets_min": 1714 }, { "epoch": 1.5209125475285172, "grad_norm": 0.2876074912459836, "learning_rate": 3.837432613753438e-05, "loss": 0.3386, "loss_nan_ranks": 0, "loss_rank_avg": 0.1480315774679184, "step": 400, "valid_targets_mean": 3830.1, "valid_targets_min": 1672 }, { "epoch": 1.5399239543726235, "grad_norm": 0.3611749486973579, "learning_rate": 3.8298580852649316e-05, "loss": 0.3332, "loss_nan_ranks": 0, "loss_rank_avg": 0.16189919412136078, "step": 405, "valid_targets_mean": 3843.3, "valid_targets_min": 1308 }, { "epoch": 1.55893536121673, "grad_norm": 0.3206203251498053, "learning_rate": 3.822118917578304e-05, "loss": 0.3258, "loss_nan_ranks": 0, "loss_rank_avg": 0.17144060134887695, "step": 410, "valid_targets_mean": 3924.0, "valid_targets_min": 1308 }, { "epoch": 1.5779467680608366, "grad_norm": 0.3741047714354632, "learning_rate": 3.814215807015511e-05, "loss": 0.3159, "loss_nan_ranks": 0, "loss_rank_avg": 0.1597537249326706, "step": 415, "valid_targets_mean": 3620.2, "valid_targets_min": 1428 }, { "epoch": 1.5969581749049429, "grad_norm": 0.37521631376088815, "learning_rate": 3.806149464649066e-05, "loss": 0.3184, "loss_nan_ranks": 0, "loss_rank_avg": 0.14377222955226898, "step": 420, "valid_targets_mean": 3715.8, "valid_targets_min": 1498 }, { "epoch": 1.6159695817490496, "grad_norm": 0.26671798721350176, "learning_rate": 3.797920616238058e-05, "loss": 0.3558, "loss_nan_ranks": 0, "loss_rank_avg": 0.1722474843263626, "step": 425, "valid_targets_mean": 7975.3, "valid_targets_min": 657 }, { "epoch": 1.6349809885931559, "grad_norm": 0.27985449043512695, "learning_rate": 3.789530002162856e-05, "loss": 0.3507, "loss_nan_ranks": 0, "loss_rank_avg": 0.16311007738113403, "step": 430, "valid_targets_mean": 7661.3, "valid_targets_min": 711 }, { "epoch": 1.6539923954372624, "grad_norm": 0.24727092770252987, "learning_rate": 3.780978377358493e-05, "loss": 0.3425, "loss_nan_ranks": 0, "loss_rank_avg": 0.17998184263706207, "step": 435, "valid_targets_mean": 7740.2, "valid_targets_min": 588 }, { "epoch": 1.673003802281369, "grad_norm": 0.22934434129529802, "learning_rate": 3.77226651124674e-05, "loss": 0.3389, "loss_nan_ranks": 0, "loss_rank_avg": 0.16208817064762115, "step": 440, "valid_targets_mean": 7938.4, "valid_targets_min": 531 }, { "epoch": 1.6920152091254752, "grad_norm": 0.2257662037497451, "learning_rate": 3.7633951876668826e-05, "loss": 0.3333, "loss_nan_ranks": 0, "loss_rank_avg": 0.1535138487815857, "step": 445, "valid_targets_mean": 6955.9, "valid_targets_min": 674 }, { "epoch": 1.7110266159695817, "grad_norm": 0.2300354581824267, "learning_rate": 3.754365204805189e-05, "loss": 0.339, "loss_nan_ranks": 0, "loss_rank_avg": 0.17140650749206543, "step": 450, "valid_targets_mean": 7178.0, "valid_targets_min": 605 }, { "epoch": 1.7300380228136882, "grad_norm": 0.25849307514203357, "learning_rate": 3.745177375123101e-05, "loss": 0.3361, "loss_nan_ranks": 0, "loss_rank_avg": 0.17827041447162628, "step": 455, "valid_targets_mean": 8065.9, "valid_targets_min": 693 }, { "epoch": 1.7490494296577945, "grad_norm": 0.23634004384778767, "learning_rate": 3.7358325252841326e-05, "loss": 0.3325, "loss_nan_ranks": 0, "loss_rank_avg": 0.15933071076869965, "step": 460, "valid_targets_mean": 7800.7, "valid_targets_min": 413 }, { "epoch": 1.7680608365019013, "grad_norm": 0.2360782519286374, "learning_rate": 3.726331496079486e-05, "loss": 0.3411, "loss_nan_ranks": 0, "loss_rank_avg": 0.17583046853542328, "step": 465, "valid_targets_mean": 7847.2, "valid_targets_min": 695 }, { "epoch": 1.7870722433460076, "grad_norm": 0.23430829246800822, "learning_rate": 3.716675142352411e-05, "loss": 0.3246, "loss_nan_ranks": 0, "loss_rank_avg": 0.15302903950214386, "step": 470, "valid_targets_mean": 7130.6, "valid_targets_min": 614 }, { "epoch": 1.806083650190114, "grad_norm": 0.22227998277895408, "learning_rate": 3.706864332921285e-05, "loss": 0.333, "loss_nan_ranks": 0, "loss_rank_avg": 0.16763727366924286, "step": 475, "valid_targets_mean": 7978.8, "valid_targets_min": 598 }, { "epoch": 1.8250950570342206, "grad_norm": 0.27022623582106853, "learning_rate": 3.696899950501447e-05, "loss": 0.325, "loss_nan_ranks": 0, "loss_rank_avg": 0.16360555589199066, "step": 480, "valid_targets_mean": 7157.9, "valid_targets_min": 589 }, { "epoch": 1.8441064638783269, "grad_norm": 0.24101847996901093, "learning_rate": 3.686782891625772e-05, "loss": 0.3253, "loss_nan_ranks": 0, "loss_rank_avg": 0.16875922679901123, "step": 485, "valid_targets_mean": 7234.4, "valid_targets_min": 593 }, { "epoch": 1.8631178707224336, "grad_norm": 0.26624531145633745, "learning_rate": 3.676514066564009e-05, "loss": 0.1908, "loss_nan_ranks": 0, "loss_rank_avg": 0.08150222152471542, "step": 490, "valid_targets_mean": 6154.4, "valid_targets_min": 1363 }, { "epoch": 1.88212927756654, "grad_norm": 0.25942794427129934, "learning_rate": 3.6660943992408817e-05, "loss": 0.1785, "loss_nan_ranks": 0, "loss_rank_avg": 0.08557536453008652, "step": 495, "valid_targets_mean": 5865.2, "valid_targets_min": 970 }, { "epoch": 1.9011406844106464, "grad_norm": 0.24051565314409892, "learning_rate": 3.6555248271529554e-05, "loss": 0.177, "loss_nan_ranks": 0, "loss_rank_avg": 0.09305544942617416, "step": 500, "valid_targets_mean": 6350.6, "valid_targets_min": 972 }, { "epoch": 1.920152091254753, "grad_norm": 0.2415618705323024, "learning_rate": 3.644806301284293e-05, "loss": 0.1741, "loss_nan_ranks": 0, "loss_rank_avg": 0.08921092748641968, "step": 505, "valid_targets_mean": 5735.5, "valid_targets_min": 959 }, { "epoch": 1.9391634980988592, "grad_norm": 0.19616487082311124, "learning_rate": 3.633939786020884e-05, "loss": 0.1729, "loss_nan_ranks": 0, "loss_rank_avg": 0.09038201719522476, "step": 510, "valid_targets_mean": 6206.8, "valid_targets_min": 1281 }, { "epoch": 1.9581749049429658, "grad_norm": 0.21845612335525297, "learning_rate": 3.622926259063883e-05, "loss": 0.1702, "loss_nan_ranks": 0, "loss_rank_avg": 0.08562606573104858, "step": 515, "valid_targets_mean": 6104.3, "valid_targets_min": 1326 }, { "epoch": 1.9771863117870723, "grad_norm": 0.19802351112521951, "learning_rate": 3.611766711341636e-05, "loss": 0.1709, "loss_nan_ranks": 0, "loss_rank_avg": 0.08315548300743103, "step": 520, "valid_targets_mean": 6447.5, "valid_targets_min": 1353 }, { "epoch": 1.9961977186311786, "grad_norm": 0.19485390745124273, "learning_rate": 3.600462146920525e-05, "loss": 0.17, "loss_nan_ranks": 0, "loss_rank_avg": 0.08430048078298569, "step": 525, "valid_targets_mean": 5833.1, "valid_targets_min": 1011 }, { "epoch": 2.0152091254752853, "grad_norm": 0.3136192928564486, "learning_rate": 3.5890135829146294e-05, "loss": 0.2736, "loss_nan_ranks": 0, "loss_rank_avg": 0.1510065495967865, "step": 530, "valid_targets_mean": 4625.7, "valid_targets_min": 1421 }, { "epoch": 2.0342205323193916, "grad_norm": 0.31107755227200234, "learning_rate": 3.577422049394212e-05, "loss": 0.2852, "loss_nan_ranks": 0, "loss_rank_avg": 0.1280999481678009, "step": 535, "valid_targets_mean": 3831.7, "valid_targets_min": 1240 }, { "epoch": 2.053231939163498, "grad_norm": 0.32564709358200533, "learning_rate": 3.5656885892930376e-05, "loss": 0.2803, "loss_nan_ranks": 0, "loss_rank_avg": 0.15929754078388214, "step": 540, "valid_targets_mean": 4451.9, "valid_targets_min": 2180 }, { "epoch": 2.0722433460076046, "grad_norm": 0.30492913163946755, "learning_rate": 3.5538142583145395e-05, "loss": 0.2741, "loss_nan_ranks": 0, "loss_rank_avg": 0.1532464176416397, "step": 545, "valid_targets_mean": 4672.5, "valid_targets_min": 1869 }, { "epoch": 2.091254752851711, "grad_norm": 0.2959882307284956, "learning_rate": 3.5418001248368324e-05, "loss": 0.2689, "loss_nan_ranks": 0, "loss_rank_avg": 0.13413457572460175, "step": 550, "valid_targets_mean": 4185.8, "valid_targets_min": 1891 }, { "epoch": 2.1102661596958177, "grad_norm": 0.27731170863500637, "learning_rate": 3.5296472698165856e-05, "loss": 0.2684, "loss_nan_ranks": 0, "loss_rank_avg": 0.13876162469387054, "step": 555, "valid_targets_mean": 4360.1, "valid_targets_min": 1878 }, { "epoch": 2.129277566539924, "grad_norm": 0.2607950148321866, "learning_rate": 3.5173567866917664e-05, "loss": 0.2643, "loss_nan_ranks": 0, "loss_rank_avg": 0.12989114224910736, "step": 560, "valid_targets_mean": 4039.0, "valid_targets_min": 2212 }, { "epoch": 2.1482889733840302, "grad_norm": 0.2501785431734494, "learning_rate": 3.504929781283259e-05, "loss": 0.2645, "loss_nan_ranks": 0, "loss_rank_avg": 0.13388948142528534, "step": 565, "valid_targets_mean": 4414.8, "valid_targets_min": 1740 }, { "epoch": 2.167300380228137, "grad_norm": 0.2590008497305306, "learning_rate": 3.4923673716953717e-05, "loss": 0.2606, "loss_nan_ranks": 0, "loss_rank_avg": 0.1188698410987854, "step": 570, "valid_targets_mean": 3860.4, "valid_targets_min": 1987 }, { "epoch": 2.1863117870722433, "grad_norm": 0.22763236836652495, "learning_rate": 3.4796706882152304e-05, "loss": 0.2317, "loss_nan_ranks": 0, "loss_rank_avg": 0.09271648526191711, "step": 575, "valid_targets_mean": 5169.8, "valid_targets_min": 2230 }, { "epoch": 2.20532319391635, "grad_norm": 0.2243687505654757, "learning_rate": 3.4668408732110915e-05, "loss": 0.1786, "loss_nan_ranks": 0, "loss_rank_avg": 0.08923256397247314, "step": 580, "valid_targets_mean": 5347.7, "valid_targets_min": 2590 }, { "epoch": 2.2243346007604563, "grad_norm": 0.20192534845911445, "learning_rate": 3.453879081029552e-05, "loss": 0.1705, "loss_nan_ranks": 0, "loss_rank_avg": 0.08735809475183487, "step": 585, "valid_targets_mean": 5252.0, "valid_targets_min": 2739 }, { "epoch": 2.2433460076045626, "grad_norm": 0.21897534747327288, "learning_rate": 3.440786477891691e-05, "loss": 0.1663, "loss_nan_ranks": 0, "loss_rank_avg": 0.07201861590147018, "step": 590, "valid_targets_mean": 4841.5, "valid_targets_min": 2438 }, { "epoch": 2.2623574144486693, "grad_norm": 0.202393296923521, "learning_rate": 3.42756424178814e-05, "loss": 0.17, "loss_nan_ranks": 0, "loss_rank_avg": 0.08041936904191971, "step": 595, "valid_targets_mean": 5269.5, "valid_targets_min": 2799 }, { "epoch": 2.2813688212927756, "grad_norm": 0.1982442046754712, "learning_rate": 3.4142135623730954e-05, "loss": 0.1598, "loss_nan_ranks": 0, "loss_rank_avg": 0.08316317200660706, "step": 600, "valid_targets_mean": 4994.9, "valid_targets_min": 2925 }, { "epoch": 2.3003802281368824, "grad_norm": 0.1902314107786306, "learning_rate": 3.40073564085728e-05, "loss": 0.1613, "loss_nan_ranks": 0, "loss_rank_avg": 0.07204687595367432, "step": 605, "valid_targets_mean": 5089.9, "valid_targets_min": 1798 }, { "epoch": 2.3193916349809887, "grad_norm": 0.18912526794105103, "learning_rate": 3.387131689899866e-05, "loss": 0.1583, "loss_nan_ranks": 0, "loss_rank_avg": 0.0887041762471199, "step": 610, "valid_targets_mean": 5424.5, "valid_targets_min": 1688 }, { "epoch": 2.338403041825095, "grad_norm": 0.20318427904092756, "learning_rate": 3.3734029334993675e-05, "loss": 0.1571, "loss_nan_ranks": 0, "loss_rank_avg": 0.08025332540273666, "step": 615, "valid_targets_mean": 5056.5, "valid_targets_min": 1818 }, { "epoch": 2.3574144486692017, "grad_norm": 0.21260147977373586, "learning_rate": 3.359550606883511e-05, "loss": 0.1567, "loss_nan_ranks": 0, "loss_rank_avg": 0.08076399564743042, "step": 620, "valid_targets_mean": 5039.8, "valid_targets_min": 2218 }, { "epoch": 2.376425855513308, "grad_norm": 0.2005739212333494, "learning_rate": 3.3455759563981025e-05, "loss": 0.1576, "loss_nan_ranks": 0, "loss_rank_avg": 0.08449053764343262, "step": 625, "valid_targets_mean": 4937.0, "valid_targets_min": 2290 }, { "epoch": 2.3954372623574143, "grad_norm": 0.1939665480340307, "learning_rate": 3.331480239394881e-05, "loss": 0.1532, "loss_nan_ranks": 0, "loss_rank_avg": 0.08675984293222427, "step": 630, "valid_targets_mean": 5262.6, "valid_targets_min": 2744 }, { "epoch": 2.414448669201521, "grad_norm": 0.2067313139807621, "learning_rate": 3.317264724118399e-05, "loss": 0.151, "loss_nan_ranks": 0, "loss_rank_avg": 0.07771243900060654, "step": 635, "valid_targets_mean": 5317.8, "valid_targets_min": 2213 }, { "epoch": 2.4334600760456273, "grad_norm": 0.24438817055982565, "learning_rate": 3.3029306895919056e-05, "loss": 0.1468, "loss_nan_ranks": 0, "loss_rank_avg": 0.07272510975599289, "step": 640, "valid_targets_mean": 4976.9, "valid_targets_min": 1959 }, { "epoch": 2.4524714828897336, "grad_norm": 0.21947954421689864, "learning_rate": 3.288479425502273e-05, "loss": 0.1549, "loss_nan_ranks": 0, "loss_rank_avg": 0.08339103311300278, "step": 645, "valid_targets_mean": 5133.1, "valid_targets_min": 2289 }, { "epoch": 2.4714828897338403, "grad_norm": 0.2361515652158053, "learning_rate": 3.2739122320839567e-05, "loss": 0.1514, "loss_nan_ranks": 0, "loss_rank_avg": 0.07445741444826126, "step": 650, "valid_targets_mean": 4922.0, "valid_targets_min": 1899 }, { "epoch": 2.4904942965779466, "grad_norm": 0.4845308707427055, "learning_rate": 3.25923042000201e-05, "loss": 0.2337, "loss_nan_ranks": 0, "loss_rank_avg": 0.11655815690755844, "step": 655, "valid_targets_mean": 3755.8, "valid_targets_min": 1436 }, { "epoch": 2.5095057034220534, "grad_norm": 0.5041883033987095, "learning_rate": 3.244435310234156e-05, "loss": 0.2444, "loss_nan_ranks": 0, "loss_rank_avg": 0.12083180993795395, "step": 660, "valid_targets_mean": 4149.8, "valid_targets_min": 2001 }, { "epoch": 2.5285171102661597, "grad_norm": 0.4293037233161345, "learning_rate": 3.229528233951935e-05, "loss": 0.2334, "loss_nan_ranks": 0, "loss_rank_avg": 0.12094869464635849, "step": 665, "valid_targets_mean": 4153.3, "valid_targets_min": 1894 }, { "epoch": 2.5475285171102664, "grad_norm": 0.4341690333179762, "learning_rate": 3.214510532400939e-05, "loss": 0.2389, "loss_nan_ranks": 0, "loss_rank_avg": 0.1278359293937683, "step": 670, "valid_targets_mean": 4260.3, "valid_targets_min": 1553 }, { "epoch": 2.5665399239543727, "grad_norm": 0.4497609978338215, "learning_rate": 3.1993835567801266e-05, "loss": 0.2236, "loss_nan_ranks": 0, "loss_rank_avg": 0.1090053841471672, "step": 675, "valid_targets_mean": 3701.6, "valid_targets_min": 1071 }, { "epoch": 2.585551330798479, "grad_norm": 0.36831043021057513, "learning_rate": 3.184148668120253e-05, "loss": 0.2238, "loss_nan_ranks": 0, "loss_rank_avg": 0.10701311379671097, "step": 680, "valid_targets_mean": 3952.4, "valid_targets_min": 1854 }, { "epoch": 2.6045627376425857, "grad_norm": 0.40504235757567003, "learning_rate": 3.16880723716142e-05, "loss": 0.256, "loss_nan_ranks": 0, "loss_rank_avg": 0.1709039807319641, "step": 685, "valid_targets_mean": 7155.4, "valid_targets_min": 483 }, { "epoch": 2.623574144486692, "grad_norm": 0.3013409218707254, "learning_rate": 3.153360644229735e-05, "loss": 0.3231, "loss_nan_ranks": 0, "loss_rank_avg": 0.16763341426849365, "step": 690, "valid_targets_mean": 7369.4, "valid_targets_min": 631 }, { "epoch": 2.6425855513307983, "grad_norm": 0.26444934651735047, "learning_rate": 3.137810279113125e-05, "loss": 0.3117, "loss_nan_ranks": 0, "loss_rank_avg": 0.15889973938465118, "step": 695, "valid_targets_mean": 7592.1, "valid_targets_min": 806 }, { "epoch": 2.661596958174905, "grad_norm": 0.22352602705602617, "learning_rate": 3.122157540936288e-05, "loss": 0.3003, "loss_nan_ranks": 0, "loss_rank_avg": 0.15156026184558868, "step": 700, "valid_targets_mean": 7414.5, "valid_targets_min": 724 }, { "epoch": 2.6806083650190113, "grad_norm": 0.24463272584732818, "learning_rate": 3.106403838034815e-05, "loss": 0.296, "loss_nan_ranks": 0, "loss_rank_avg": 0.15007655322551727, "step": 705, "valid_targets_mean": 7829.4, "valid_targets_min": 495 }, { "epoch": 2.6996197718631176, "grad_norm": 0.21946530753675053, "learning_rate": 3.090550587828466e-05, "loss": 0.2931, "loss_nan_ranks": 0, "loss_rank_avg": 0.14451086521148682, "step": 710, "valid_targets_mean": 7851.2, "valid_targets_min": 660 }, { "epoch": 2.7186311787072244, "grad_norm": 0.22157971443490984, "learning_rate": 3.0745992166936484e-05, "loss": 0.2986, "loss_nan_ranks": 0, "loss_rank_avg": 0.14042143523693085, "step": 715, "valid_targets_mean": 6916.8, "valid_targets_min": 499 }, { "epoch": 2.7376425855513307, "grad_norm": 0.21721619776557882, "learning_rate": 3.058551159835078e-05, "loss": 0.2942, "loss_nan_ranks": 0, "loss_rank_avg": 0.15121476352214813, "step": 720, "valid_targets_mean": 7713.7, "valid_targets_min": 679 }, { "epoch": 2.7566539923954374, "grad_norm": 0.2516514737751596, "learning_rate": 3.0424078611566484e-05, "loss": 0.2884, "loss_nan_ranks": 0, "loss_rank_avg": 0.15188945829868317, "step": 725, "valid_targets_mean": 7996.9, "valid_targets_min": 739 }, { "epoch": 2.7756653992395437, "grad_norm": 0.2378457691057772, "learning_rate": 3.026170773131516e-05, "loss": 0.2947, "loss_nan_ranks": 0, "loss_rank_avg": 0.13439372181892395, "step": 730, "valid_targets_mean": 7238.1, "valid_targets_min": 604 }, { "epoch": 2.7946768060836504, "grad_norm": 0.25213487345273705, "learning_rate": 3.0098413566714165e-05, "loss": 0.2846, "loss_nan_ranks": 0, "loss_rank_avg": 0.14057187736034393, "step": 735, "valid_targets_mean": 7777.8, "valid_targets_min": 678 }, { "epoch": 2.8136882129277567, "grad_norm": 0.24989480394630467, "learning_rate": 2.9934210809952216e-05, "loss": 0.2831, "loss_nan_ranks": 0, "loss_rank_avg": 0.12304911762475967, "step": 740, "valid_targets_mean": 6190.7, "valid_targets_min": 755 }, { "epoch": 2.832699619771863, "grad_norm": 0.2689521231941662, "learning_rate": 2.9769114234967486e-05, "loss": 0.2805, "loss_nan_ranks": 0, "loss_rank_avg": 0.12372348457574844, "step": 745, "valid_targets_mean": 6905.2, "valid_targets_min": 538 }, { "epoch": 2.8517110266159698, "grad_norm": 0.20927078109216926, "learning_rate": 2.9603138696118315e-05, "loss": 0.2343, "loss_nan_ranks": 0, "loss_rank_avg": 0.07049879431724548, "step": 750, "valid_targets_mean": 5756.4, "valid_targets_min": 1271 }, { "epoch": 2.870722433460076, "grad_norm": 0.2675987794700805, "learning_rate": 2.9436299126846726e-05, "loss": 0.1416, "loss_nan_ranks": 0, "loss_rank_avg": 0.07208818197250366, "step": 755, "valid_targets_mean": 6551.0, "valid_targets_min": 1261 }, { "epoch": 2.8897338403041823, "grad_norm": 0.2651422160068339, "learning_rate": 2.92686105383348e-05, "loss": 0.1403, "loss_nan_ranks": 0, "loss_rank_avg": 0.06887445598840714, "step": 760, "valid_targets_mean": 5875.1, "valid_targets_min": 1343 }, { "epoch": 2.908745247148289, "grad_norm": 0.28796721511055345, "learning_rate": 2.910008801815406e-05, "loss": 0.1355, "loss_nan_ranks": 0, "loss_rank_avg": 0.06859635561704636, "step": 765, "valid_targets_mean": 6292.9, "valid_targets_min": 1329 }, { "epoch": 2.9277566539923954, "grad_norm": 0.23162244917131206, "learning_rate": 2.8930746728908002e-05, "loss": 0.1361, "loss_nan_ranks": 0, "loss_rank_avg": 0.07203914225101471, "step": 770, "valid_targets_mean": 6265.1, "valid_targets_min": 1092 }, { "epoch": 2.9467680608365017, "grad_norm": 0.22762426423237933, "learning_rate": 2.876060190686784e-05, "loss": 0.1368, "loss_nan_ranks": 0, "loss_rank_avg": 0.06779056042432785, "step": 775, "valid_targets_mean": 6037.4, "valid_targets_min": 1063 }, { "epoch": 2.9657794676806084, "grad_norm": 0.2342043607080278, "learning_rate": 2.8589668860601643e-05, "loss": 0.1331, "loss_nan_ranks": 0, "loss_rank_avg": 0.06987350434064865, "step": 780, "valid_targets_mean": 6279.7, "valid_targets_min": 1052 }, { "epoch": 2.9847908745247147, "grad_norm": 0.23814802337043967, "learning_rate": 2.8417962969596976e-05, "loss": 0.1349, "loss_nan_ranks": 0, "loss_rank_avg": 0.07049954682588577, "step": 785, "valid_targets_mean": 6157.9, "valid_targets_min": 787 }, { "epoch": 3.0038022813688214, "grad_norm": 0.3462432064505725, "learning_rate": 2.8245499682877152e-05, "loss": 0.1522, "loss_nan_ranks": 0, "loss_rank_avg": 0.11840838193893433, "step": 790, "valid_targets_mean": 4474.1, "valid_targets_min": 2202 }, { "epoch": 3.0228136882129277, "grad_norm": 0.387684012161189, "learning_rate": 2.8072294517611208e-05, "loss": 0.2226, "loss_nan_ranks": 0, "loss_rank_avg": 0.11450818926095963, "step": 795, "valid_targets_mean": 4430.6, "valid_targets_min": 1899 }, { "epoch": 3.041825095057034, "grad_norm": 0.4289500795878552, "learning_rate": 2.7898363057717786e-05, "loss": 0.2082, "loss_nan_ranks": 0, "loss_rank_avg": 0.10947731137275696, "step": 800, "valid_targets_mean": 4507.6, "valid_targets_min": 2374 }, { "epoch": 3.0608365019011408, "grad_norm": 0.38143942347647636, "learning_rate": 2.772372095246297e-05, "loss": 0.2094, "loss_nan_ranks": 0, "loss_rank_avg": 0.10378646850585938, "step": 805, "valid_targets_mean": 4175.6, "valid_targets_min": 1694 }, { "epoch": 3.079847908745247, "grad_norm": 0.35205316942486764, "learning_rate": 2.7548383915052287e-05, "loss": 0.2033, "loss_nan_ranks": 0, "loss_rank_avg": 0.09711352735757828, "step": 810, "valid_targets_mean": 4604.5, "valid_targets_min": 2300 }, { "epoch": 3.098859315589354, "grad_norm": 0.34334350946854836, "learning_rate": 2.7372367721216915e-05, "loss": 0.2016, "loss_nan_ranks": 0, "loss_rank_avg": 0.10052075982093811, "step": 815, "valid_targets_mean": 4258.6, "valid_targets_min": 1637 }, { "epoch": 3.11787072243346, "grad_norm": 0.31906834116059535, "learning_rate": 2.7195688207794277e-05, "loss": 0.1994, "loss_nan_ranks": 0, "loss_rank_avg": 0.09429528564214706, "step": 820, "valid_targets_mean": 4176.6, "valid_targets_min": 2221 }, { "epoch": 3.1368821292775664, "grad_norm": 0.33079871629370033, "learning_rate": 2.701836127130314e-05, "loss": 0.1948, "loss_nan_ranks": 0, "loss_rank_avg": 0.0990242138504982, "step": 825, "valid_targets_mean": 4614.5, "valid_targets_min": 1976 }, { "epoch": 3.155893536121673, "grad_norm": 0.31986706842847734, "learning_rate": 2.684040286651338e-05, "loss": 0.196, "loss_nan_ranks": 0, "loss_rank_avg": 0.09625053405761719, "step": 830, "valid_targets_mean": 4079.5, "valid_targets_min": 1191 }, { "epoch": 3.1749049429657794, "grad_norm": 0.32660199144818897, "learning_rate": 2.666182900501042e-05, "loss": 0.1909, "loss_nan_ranks": 0, "loss_rank_avg": 0.09521114081144333, "step": 835, "valid_targets_mean": 3921.7, "valid_targets_min": 1790 }, { "epoch": 3.1939163498098857, "grad_norm": 0.2789356519847509, "learning_rate": 2.6482655753754657e-05, "loss": 0.1535, "loss_nan_ranks": 0, "loss_rank_avg": 0.07062631845474243, "step": 840, "valid_targets_mean": 5405.6, "valid_targets_min": 1854 }, { "epoch": 3.2129277566539924, "grad_norm": 0.2638806676304068, "learning_rate": 2.6302899233635803e-05, "loss": 0.141, "loss_nan_ranks": 0, "loss_rank_avg": 0.07542570680379868, "step": 845, "valid_targets_mean": 5283.7, "valid_targets_min": 2215 }, { "epoch": 3.2319391634980987, "grad_norm": 0.26528690947633593, "learning_rate": 2.6122575618022487e-05, "loss": 0.1271, "loss_nan_ranks": 0, "loss_rank_avg": 0.0592070035636425, "step": 850, "valid_targets_mean": 5144.5, "valid_targets_min": 2072 }, { "epoch": 3.2509505703422055, "grad_norm": 0.2360500251447195, "learning_rate": 2.594170113130703e-05, "loss": 0.1289, "loss_nan_ranks": 0, "loss_rank_avg": 0.06634698063135147, "step": 855, "valid_targets_mean": 5086.4, "valid_targets_min": 3113 }, { "epoch": 3.2699619771863118, "grad_norm": 0.2257716074443295, "learning_rate": 2.57602920474457e-05, "loss": 0.1258, "loss_nan_ranks": 0, "loss_rank_avg": 0.06248977407813072, "step": 860, "valid_targets_mean": 4885.1, "valid_targets_min": 1735 }, { "epoch": 3.288973384030418, "grad_norm": 0.23619639963838657, "learning_rate": 2.5578364688494475e-05, "loss": 0.1223, "loss_nan_ranks": 0, "loss_rank_avg": 0.05550096556544304, "step": 865, "valid_targets_mean": 4916.5, "valid_targets_min": 1659 }, { "epoch": 3.307984790874525, "grad_norm": 0.2255346409587296, "learning_rate": 2.5395935423140487e-05, "loss": 0.1228, "loss_nan_ranks": 0, "loss_rank_avg": 0.0695202425122261, "step": 870, "valid_targets_mean": 5242.5, "valid_targets_min": 2397 }, { "epoch": 3.326996197718631, "grad_norm": 0.20444394075732844, "learning_rate": 2.5213020665229274e-05, "loss": 0.1154, "loss_nan_ranks": 0, "loss_rank_avg": 0.055293649435043335, "step": 875, "valid_targets_mean": 4999.2, "valid_targets_min": 1210 }, { "epoch": 3.346007604562738, "grad_norm": 0.23554324333284074, "learning_rate": 2.5029636872287953e-05, "loss": 0.1168, "loss_nan_ranks": 0, "loss_rank_avg": 0.060592859983444214, "step": 880, "valid_targets_mean": 5145.7, "valid_targets_min": 1139 }, { "epoch": 3.365019011406844, "grad_norm": 0.24624770501083576, "learning_rate": 2.4845800544044483e-05, "loss": 0.116, "loss_nan_ranks": 0, "loss_rank_avg": 0.05072854831814766, "step": 885, "valid_targets_mean": 5084.9, "valid_targets_min": 1197 }, { "epoch": 3.3840304182509504, "grad_norm": 0.2386065391922119, "learning_rate": 2.4661528220943134e-05, "loss": 0.1153, "loss_nan_ranks": 0, "loss_rank_avg": 0.0552067756652832, "step": 890, "valid_targets_mean": 5369.3, "valid_targets_min": 2995 }, { "epoch": 3.403041825095057, "grad_norm": 0.23227509902254379, "learning_rate": 2.4476836482656257e-05, "loss": 0.1142, "loss_nan_ranks": 0, "loss_rank_avg": 0.052853602916002274, "step": 895, "valid_targets_mean": 5159.8, "valid_targets_min": 1840 }, { "epoch": 3.4220532319391634, "grad_norm": 0.24229052309272628, "learning_rate": 2.4291741946592575e-05, "loss": 0.1113, "loss_nan_ranks": 0, "loss_rank_avg": 0.05704169347882271, "step": 900, "valid_targets_mean": 4962.0, "valid_targets_min": 1021 }, { "epoch": 3.4410646387832697, "grad_norm": 0.2474659051806386, "learning_rate": 2.4106261266402023e-05, "loss": 0.1084, "loss_nan_ranks": 0, "loss_rank_avg": 0.05357471480965614, "step": 905, "valid_targets_mean": 5149.4, "valid_targets_min": 1377 }, { "epoch": 3.4600760456273765, "grad_norm": 0.2350749738396892, "learning_rate": 2.392041113047737e-05, "loss": 0.1169, "loss_nan_ranks": 0, "loss_rank_avg": 0.05722544714808464, "step": 910, "valid_targets_mean": 4918.9, "valid_targets_min": 1444 }, { "epoch": 3.4790874524714828, "grad_norm": 0.4541902536996299, "learning_rate": 2.3734208260452727e-05, "loss": 0.1288, "loss_nan_ranks": 0, "loss_rank_avg": 0.08960682153701782, "step": 915, "valid_targets_mean": 4053.5, "valid_targets_min": 2113 }, { "epoch": 3.4980988593155895, "grad_norm": 0.5243330478051801, "learning_rate": 2.354766940969899e-05, "loss": 0.1682, "loss_nan_ranks": 0, "loss_rank_avg": 0.09234726428985596, "step": 920, "valid_targets_mean": 4056.7, "valid_targets_min": 1784 }, { "epoch": 3.517110266159696, "grad_norm": 0.6150184733706467, "learning_rate": 2.3360811361816525e-05, "loss": 0.1757, "loss_nan_ranks": 0, "loss_rank_avg": 0.07612651586532593, "step": 925, "valid_targets_mean": 3331.2, "valid_targets_min": 1107 }, { "epoch": 3.5361216730038025, "grad_norm": 0.5165218060285858, "learning_rate": 2.317365092912503e-05, "loss": 0.1707, "loss_nan_ranks": 0, "loss_rank_avg": 0.08710366487503052, "step": 930, "valid_targets_mean": 4188.5, "valid_targets_min": 1299 }, { "epoch": 3.555133079847909, "grad_norm": 0.4638634963858916, "learning_rate": 2.2986204951150926e-05, "loss": 0.1692, "loss_nan_ranks": 0, "loss_rank_avg": 0.07611557096242905, "step": 935, "valid_targets_mean": 3755.5, "valid_targets_min": 1590 }, { "epoch": 3.574144486692015, "grad_norm": 0.4507258054785201, "learning_rate": 2.2798490293112216e-05, "loss": 0.1581, "loss_nan_ranks": 0, "loss_rank_avg": 0.08385371416807175, "step": 940, "valid_targets_mean": 3787.1, "valid_targets_min": 1520 }, { "epoch": 3.593155893536122, "grad_norm": 0.4753708125962138, "learning_rate": 2.261052384440104e-05, "loss": 0.1576, "loss_nan_ranks": 0, "loss_rank_avg": 0.08039424568414688, "step": 945, "valid_targets_mean": 3581.0, "valid_targets_min": 2196 }, { "epoch": 3.612167300380228, "grad_norm": 0.30946760254853556, "learning_rate": 2.2422322517064084e-05, "loss": 0.2454, "loss_nan_ranks": 0, "loss_rank_avg": 0.1446402221918106, "step": 950, "valid_targets_mean": 7765.3, "valid_targets_min": 434 }, { "epoch": 3.6311787072243344, "grad_norm": 0.26912647329408734, "learning_rate": 2.2233903244280977e-05, "loss": 0.2918, "loss_nan_ranks": 0, "loss_rank_avg": 0.1367168426513672, "step": 955, "valid_targets_mean": 7428.0, "valid_targets_min": 583 }, { "epoch": 3.650190114068441, "grad_norm": 0.25673057743212085, "learning_rate": 2.2045282978840684e-05, "loss": 0.2806, "loss_nan_ranks": 0, "loss_rank_avg": 0.14741817116737366, "step": 960, "valid_targets_mean": 7663.1, "valid_targets_min": 667 }, { "epoch": 3.6692015209125475, "grad_norm": 0.24695457622644892, "learning_rate": 2.1856478691616262e-05, "loss": 0.2674, "loss_nan_ranks": 0, "loss_rank_avg": 0.12863211333751678, "step": 965, "valid_targets_mean": 7248.7, "valid_targets_min": 620 }, { "epoch": 3.6882129277566538, "grad_norm": 0.24848314595919005, "learning_rate": 2.166750737003787e-05, "loss": 0.2568, "loss_nan_ranks": 0, "loss_rank_avg": 0.12329714745283127, "step": 970, "valid_targets_mean": 7425.4, "valid_targets_min": 622 }, { "epoch": 3.7072243346007605, "grad_norm": 0.2382432971462893, "learning_rate": 2.1478386016564406e-05, "loss": 0.2603, "loss_nan_ranks": 0, "loss_rank_avg": 0.11289852857589722, "step": 975, "valid_targets_mean": 6589.8, "valid_targets_min": 705 }, { "epoch": 3.726235741444867, "grad_norm": 0.2471516002178032, "learning_rate": 2.1289131647153664e-05, "loss": 0.2567, "loss_nan_ranks": 0, "loss_rank_avg": 0.12679801881313324, "step": 980, "valid_targets_mean": 7816.3, "valid_targets_min": 471 }, { "epoch": 3.7452471482889735, "grad_norm": 0.24941306933409185, "learning_rate": 2.109976128973141e-05, "loss": 0.2557, "loss_nan_ranks": 0, "loss_rank_avg": 0.13337580859661102, "step": 985, "valid_targets_mean": 8164.5, "valid_targets_min": 752 }, { "epoch": 3.76425855513308, "grad_norm": 0.29226310695744656, "learning_rate": 2.0910291982659277e-05, "loss": 0.2559, "loss_nan_ranks": 0, "loss_rank_avg": 0.12007047981023788, "step": 990, "valid_targets_mean": 7631.5, "valid_targets_min": 575 }, { "epoch": 3.7832699619771866, "grad_norm": 0.28943535096600376, "learning_rate": 2.072074077320177e-05, "loss": 0.2475, "loss_nan_ranks": 0, "loss_rank_avg": 0.12613262236118317, "step": 995, "valid_targets_mean": 7756.3, "valid_targets_min": 590 }, { "epoch": 3.802281368821293, "grad_norm": 0.30211188723707383, "learning_rate": 2.053112471599245e-05, "loss": 0.2459, "loss_nan_ranks": 0, "loss_rank_avg": 0.1403399258852005, "step": 1000, "valid_targets_mean": 8261.5, "valid_targets_min": 575 }, { "epoch": 3.821292775665399, "grad_norm": 0.30427872241401077, "learning_rate": 2.03414608714995e-05, "loss": 0.2371, "loss_nan_ranks": 0, "loss_rank_avg": 0.1107184886932373, "step": 1005, "valid_targets_mean": 7423.7, "valid_targets_min": 632 }, { "epoch": 3.840304182509506, "grad_norm": 0.28349897038009053, "learning_rate": 2.0151766304490668e-05, "loss": 0.2374, "loss_nan_ranks": 0, "loss_rank_avg": 0.12353726476430893, "step": 1010, "valid_targets_mean": 7188.3, "valid_targets_min": 537 }, { "epoch": 3.859315589353612, "grad_norm": 0.29812579187843263, "learning_rate": 1.9962058082497944e-05, "loss": 0.1475, "loss_nan_ranks": 0, "loss_rank_avg": 0.055223409086465836, "step": 1015, "valid_targets_mean": 6371.1, "valid_targets_min": 1436 }, { "epoch": 3.8783269961977185, "grad_norm": 0.3305078447005314, "learning_rate": 1.9772353274281918e-05, "loss": 0.1138, "loss_nan_ranks": 0, "loss_rank_avg": 0.05937516316771507, "step": 1020, "valid_targets_mean": 6149.8, "valid_targets_min": 990 }, { "epoch": 3.897338403041825, "grad_norm": 0.26960355210295867, "learning_rate": 1.9582668948295998e-05, "loss": 0.11, "loss_nan_ranks": 0, "loss_rank_avg": 0.054687947034835815, "step": 1025, "valid_targets_mean": 6012.3, "valid_targets_min": 1088 }, { "epoch": 3.9163498098859315, "grad_norm": 0.23253306165339072, "learning_rate": 1.9393022171150755e-05, "loss": 0.1063, "loss_nan_ranks": 0, "loss_rank_avg": 0.05543830618262291, "step": 1030, "valid_targets_mean": 6088.7, "valid_targets_min": 1120 }, { "epoch": 3.935361216730038, "grad_norm": 0.2363002943662766, "learning_rate": 1.9203430006078348e-05, "loss": 0.1084, "loss_nan_ranks": 0, "loss_rank_avg": 0.054433271288871765, "step": 1035, "valid_targets_mean": 6360.9, "valid_targets_min": 1028 }, { "epoch": 3.9543726235741445, "grad_norm": 0.23732378066315013, "learning_rate": 1.9013909511397262e-05, "loss": 0.1071, "loss_nan_ranks": 0, "loss_rank_avg": 0.05254880711436272, "step": 1040, "valid_targets_mean": 6166.2, "valid_targets_min": 1260 }, { "epoch": 3.973384030418251, "grad_norm": 0.23101729553624345, "learning_rate": 1.882447773897755e-05, "loss": 0.1058, "loss_nan_ranks": 0, "loss_rank_avg": 0.054109666496515274, "step": 1045, "valid_targets_mean": 5871.1, "valid_targets_min": 1075 }, { "epoch": 3.9923954372623576, "grad_norm": 0.24036940107376528, "learning_rate": 1.8635151732706586e-05, "loss": 0.1062, "loss_nan_ranks": 0, "loss_rank_avg": 0.057960644364356995, "step": 1050, "valid_targets_mean": 6057.0, "valid_targets_min": 902 }, { "epoch": 4.011406844106464, "grad_norm": 0.3548766744558099, "learning_rate": 1.8445948526955555e-05, "loss": 0.1426, "loss_nan_ranks": 0, "loss_rank_avg": 0.07553637772798538, "step": 1055, "valid_targets_mean": 4033.6, "valid_targets_min": 1864 }, { "epoch": 4.030418250950571, "grad_norm": 0.43397890059403704, "learning_rate": 1.8256885145046837e-05, "loss": 0.1599, "loss_nan_ranks": 0, "loss_rank_avg": 0.08146659284830093, "step": 1060, "valid_targets_mean": 4246.9, "valid_targets_min": 1518 }, { "epoch": 4.0494296577946765, "grad_norm": 0.43481649208518697, "learning_rate": 1.8067978597722325e-05, "loss": 0.1487, "loss_nan_ranks": 0, "loss_rank_avg": 0.0713481679558754, "step": 1065, "valid_targets_mean": 4028.1, "valid_targets_min": 1955 }, { "epoch": 4.068441064638783, "grad_norm": 0.42609901166474995, "learning_rate": 1.787924588161291e-05, "loss": 0.1531, "loss_nan_ranks": 0, "loss_rank_avg": 0.0739174485206604, "step": 1070, "valid_targets_mean": 4385.1, "valid_targets_min": 1335 }, { "epoch": 4.08745247148289, "grad_norm": 0.3613193797257685, "learning_rate": 1.7690703977709248e-05, "loss": 0.1458, "loss_nan_ranks": 0, "loss_rank_avg": 0.06789538264274597, "step": 1075, "valid_targets_mean": 4030.6, "valid_targets_min": 1185 }, { "epoch": 4.106463878326996, "grad_norm": 0.36319797246159663, "learning_rate": 1.7502369849833908e-05, "loss": 0.1458, "loss_nan_ranks": 0, "loss_rank_avg": 0.07442685216665268, "step": 1080, "valid_targets_mean": 4733.2, "valid_targets_min": 2193 }, { "epoch": 4.1254752851711025, "grad_norm": 0.33464784259493086, "learning_rate": 1.7314260443115046e-05, "loss": 0.1421, "loss_nan_ranks": 0, "loss_rank_avg": 0.06767291575670242, "step": 1085, "valid_targets_mean": 4051.0, "valid_targets_min": 1738 }, { "epoch": 4.144486692015209, "grad_norm": 0.3452301476773902, "learning_rate": 1.712639268246184e-05, "loss": 0.1363, "loss_nan_ranks": 0, "loss_rank_avg": 0.06405071169137955, "step": 1090, "valid_targets_mean": 4000.5, "valid_targets_min": 1674 }, { "epoch": 4.163498098859316, "grad_norm": 0.3443207863514706, "learning_rate": 1.6938783471041647e-05, "loss": 0.1378, "loss_nan_ranks": 0, "loss_rank_avg": 0.06821317225694656, "step": 1095, "valid_targets_mean": 4199.9, "valid_targets_min": 1924 }, { "epoch": 4.182509505703422, "grad_norm": 0.3598105706642877, "learning_rate": 1.6751449688759194e-05, "loss": 0.1315, "loss_nan_ranks": 0, "loss_rank_avg": 0.05240233615040779, "step": 1100, "valid_targets_mean": 5206.1, "valid_targets_min": 2822 }, { "epoch": 4.201520912547529, "grad_norm": 0.3092390079700352, "learning_rate": 1.65644081907378e-05, "loss": 0.1107, "loss_nan_ranks": 0, "loss_rank_avg": 0.054483313113451004, "step": 1105, "valid_targets_mean": 5036.4, "valid_targets_min": 1421 }, { "epoch": 4.220532319391635, "grad_norm": 0.2763575400218225, "learning_rate": 1.6377675805802882e-05, "loss": 0.1094, "loss_nan_ranks": 0, "loss_rank_avg": 0.048552319407463074, "step": 1110, "valid_targets_mean": 5392.4, "valid_targets_min": 1778 }, { "epoch": 4.239543726235741, "grad_norm": 0.28272183808385165, "learning_rate": 1.6191269334967796e-05, "loss": 0.1004, "loss_nan_ranks": 0, "loss_rank_avg": 0.05730822682380676, "step": 1115, "valid_targets_mean": 5387.4, "valid_targets_min": 2268 }, { "epoch": 4.258555133079848, "grad_norm": 0.25657261472183496, "learning_rate": 1.6005205549922173e-05, "loss": 0.0997, "loss_nan_ranks": 0, "loss_rank_avg": 0.047337133437395096, "step": 1120, "valid_targets_mean": 5138.0, "valid_targets_min": 1548 }, { "epoch": 4.277566539923955, "grad_norm": 0.25086844602226965, "learning_rate": 1.5819501191522917e-05, "loss": 0.0918, "loss_nan_ranks": 0, "loss_rank_avg": 0.04738449677824974, "step": 1125, "valid_targets_mean": 4930.4, "valid_targets_min": 2597 }, { "epoch": 4.2965779467680605, "grad_norm": 0.23414973432884928, "learning_rate": 1.5634172968287974e-05, "loss": 0.0916, "loss_nan_ranks": 0, "loss_rank_avg": 0.039102550595998764, "step": 1130, "valid_targets_mean": 4864.0, "valid_targets_min": 2904 }, { "epoch": 4.315589353612167, "grad_norm": 0.22679376081285432, "learning_rate": 1.5449237554892997e-05, "loss": 0.0879, "loss_nan_ranks": 0, "loss_rank_avg": 0.046861957758665085, "step": 1135, "valid_targets_mean": 5411.9, "valid_targets_min": 2204 }, { "epoch": 4.334600760456274, "grad_norm": 0.27215497448339554, "learning_rate": 1.5264711590671067e-05, "loss": 0.0864, "loss_nan_ranks": 0, "loss_rank_avg": 0.04249775409698486, "step": 1140, "valid_targets_mean": 5154.8, "valid_targets_min": 1423 }, { "epoch": 4.35361216730038, "grad_norm": 0.23781571763344966, "learning_rate": 1.5080611678115585e-05, "loss": 0.0855, "loss_nan_ranks": 0, "loss_rank_avg": 0.04478433355689049, "step": 1145, "valid_targets_mean": 5145.1, "valid_targets_min": 3055 }, { "epoch": 4.3726235741444865, "grad_norm": 0.25822483894259435, "learning_rate": 1.4896954381386477e-05, "loss": 0.0844, "loss_nan_ranks": 0, "loss_rank_avg": 0.043877530843019485, "step": 1150, "valid_targets_mean": 5636.9, "valid_targets_min": 2620 }, { "epoch": 4.391634980988593, "grad_norm": 0.26714599071066003, "learning_rate": 1.4713756224819872e-05, "loss": 0.0829, "loss_nan_ranks": 0, "loss_rank_avg": 0.04205765202641487, "step": 1155, "valid_targets_mean": 5523.9, "valid_targets_min": 3190 }, { "epoch": 4.4106463878327, "grad_norm": 0.23416180161057334, "learning_rate": 1.453103369144134e-05, "loss": 0.0807, "loss_nan_ranks": 0, "loss_rank_avg": 0.03860076516866684, "step": 1160, "valid_targets_mean": 4908.3, "valid_targets_min": 2088 }, { "epoch": 4.429657794676806, "grad_norm": 0.24092755205006228, "learning_rate": 1.4348803221482828e-05, "loss": 0.0797, "loss_nan_ranks": 0, "loss_rank_avg": 0.03292986378073692, "step": 1165, "valid_targets_mean": 5170.9, "valid_targets_min": 2206 }, { "epoch": 4.448669201520913, "grad_norm": 0.2769123972364188, "learning_rate": 1.4167081210903501e-05, "loss": 0.0819, "loss_nan_ranks": 0, "loss_rank_avg": 0.04898636043071747, "step": 1170, "valid_targets_mean": 5348.1, "valid_targets_min": 2959 }, { "epoch": 4.467680608365019, "grad_norm": 0.2687335450112676, "learning_rate": 1.3985884009914542e-05, "loss": 0.0843, "loss_nan_ranks": 0, "loss_rank_avg": 0.04135872796177864, "step": 1175, "valid_targets_mean": 5146.4, "valid_targets_min": 1864 }, { "epoch": 4.486692015209125, "grad_norm": 0.44423160941024686, "learning_rate": 1.3805227921508018e-05, "loss": 0.1042, "loss_nan_ranks": 0, "loss_rank_avg": 0.05701296404004097, "step": 1180, "valid_targets_mean": 3920.1, "valid_targets_min": 2054 }, { "epoch": 4.505703422053232, "grad_norm": 0.4307059905800158, "learning_rate": 1.3625129199990083e-05, "loss": 0.1137, "loss_nan_ranks": 0, "loss_rank_avg": 0.06144963577389717, "step": 1185, "valid_targets_mean": 4123.0, "valid_targets_min": 1702 }, { "epoch": 4.524714828897339, "grad_norm": 0.4735169631189255, "learning_rate": 1.3445604049518503e-05, "loss": 0.1136, "loss_nan_ranks": 0, "loss_rank_avg": 0.058991312980651855, "step": 1190, "valid_targets_mean": 4134.7, "valid_targets_min": 1894 }, { "epoch": 4.5437262357414445, "grad_norm": 0.47226895596823215, "learning_rate": 1.3266668622644696e-05, "loss": 0.1083, "loss_nan_ranks": 0, "loss_rank_avg": 0.05097019299864769, "step": 1195, "valid_targets_mean": 3485.5, "valid_targets_min": 1581 }, { "epoch": 4.562737642585551, "grad_norm": 0.4233485609105754, "learning_rate": 1.3088339018860439e-05, "loss": 0.1067, "loss_nan_ranks": 0, "loss_rank_avg": 0.05011675879359245, "step": 1200, "valid_targets_mean": 3915.2, "valid_targets_min": 1647 }, { "epoch": 4.581749049429658, "grad_norm": 0.39098121916184353, "learning_rate": 1.291063128314934e-05, "loss": 0.0977, "loss_nan_ranks": 0, "loss_rank_avg": 0.04419676959514618, "step": 1205, "valid_targets_mean": 3678.2, "valid_targets_min": 1771 }, { "epoch": 4.600760456273765, "grad_norm": 0.555201081378785, "learning_rate": 1.2733561404543177e-05, "loss": 0.1111, "loss_nan_ranks": 0, "loss_rank_avg": 0.12111985683441162, "step": 1210, "valid_targets_mean": 6885.0, "valid_targets_min": 675 }, { "epoch": 4.619771863117871, "grad_norm": 0.36519466564325903, "learning_rate": 1.2557145314683364e-05, "loss": 0.2596, "loss_nan_ranks": 0, "loss_rank_avg": 0.12528151273727417, "step": 1215, "valid_targets_mean": 7251.2, "valid_targets_min": 511 }, { "epoch": 4.638783269961977, "grad_norm": 0.3187163291454204, "learning_rate": 1.2381398886387466e-05, "loss": 0.2636, "loss_nan_ranks": 0, "loss_rank_avg": 0.13393153250217438, "step": 1220, "valid_targets_mean": 7708.2, "valid_targets_min": 667 }, { "epoch": 4.657794676806084, "grad_norm": 0.2853507467833514, "learning_rate": 1.2206337932221094e-05, "loss": 0.2495, "loss_nan_ranks": 0, "loss_rank_avg": 0.11738086491823196, "step": 1225, "valid_targets_mean": 6611.8, "valid_targets_min": 377 }, { "epoch": 4.67680608365019, "grad_norm": 0.27103089684314385, "learning_rate": 1.2031978203075172e-05, "loss": 0.2356, "loss_nan_ranks": 0, "loss_rank_avg": 0.1208362951874733, "step": 1230, "valid_targets_mean": 7745.3, "valid_targets_min": 726 }, { "epoch": 4.695817490494297, "grad_norm": 0.26218276824066433, "learning_rate": 1.185833538674879e-05, "loss": 0.2291, "loss_nan_ranks": 0, "loss_rank_avg": 0.11319077014923096, "step": 1235, "valid_targets_mean": 7991.2, "valid_targets_min": 647 }, { "epoch": 4.714828897338403, "grad_norm": 0.2604914676003978, "learning_rate": 1.1685425106537688e-05, "loss": 0.2306, "loss_nan_ranks": 0, "loss_rank_avg": 0.12409955263137817, "step": 1240, "valid_targets_mean": 8260.1, "valid_targets_min": 762 }, { "epoch": 4.733840304182509, "grad_norm": 0.3533617817645549, "learning_rate": 1.1513262919828603e-05, "loss": 0.2205, "loss_nan_ranks": 0, "loss_rank_avg": 0.12882865965366364, "step": 1245, "valid_targets_mean": 9195.9, "valid_targets_min": 815 }, { "epoch": 4.752851711026616, "grad_norm": 0.273675157838543, "learning_rate": 1.1341864316699463e-05, "loss": 0.2165, "loss_nan_ranks": 0, "loss_rank_avg": 0.10350463539361954, "step": 1250, "valid_targets_mean": 6803.8, "valid_targets_min": 553 }, { "epoch": 4.771863117870723, "grad_norm": 0.3301627103857975, "learning_rate": 1.1171244718525726e-05, "loss": 0.2198, "loss_nan_ranks": 0, "loss_rank_avg": 0.09687048941850662, "step": 1255, "valid_targets_mean": 7715.6, "valid_targets_min": 686 }, { "epoch": 4.7908745247148286, "grad_norm": 0.3183754012435459, "learning_rate": 1.100141947659288e-05, "loss": 0.2085, "loss_nan_ranks": 0, "loss_rank_avg": 0.11653482168912888, "step": 1260, "valid_targets_mean": 7937.8, "valid_targets_min": 657 }, { "epoch": 4.809885931558935, "grad_norm": 0.305218952013167, "learning_rate": 1.0832403870715153e-05, "loss": 0.2085, "loss_nan_ranks": 0, "loss_rank_avg": 0.10665573924779892, "step": 1265, "valid_targets_mean": 7325.4, "valid_targets_min": 636 }, { "epoch": 4.828897338403042, "grad_norm": 0.37392762584119593, "learning_rate": 1.0664213107860827e-05, "loss": 0.2021, "loss_nan_ranks": 0, "loss_rank_avg": 0.10478193312883377, "step": 1270, "valid_targets_mean": 7820.8, "valid_targets_min": 527 }, { "epoch": 4.847908745247148, "grad_norm": 0.28454754566571105, "learning_rate": 1.0496862320783926e-05, "loss": 0.1835, "loss_nan_ranks": 0, "loss_rank_avg": 0.048708055168390274, "step": 1275, "valid_targets_mean": 6358.3, "valid_targets_min": 1151 }, { "epoch": 4.866920152091255, "grad_norm": 0.2808196037434785, "learning_rate": 1.033036656666272e-05, "loss": 0.0922, "loss_nan_ranks": 0, "loss_rank_avg": 0.04468848183751106, "step": 1280, "valid_targets_mean": 6175.8, "valid_targets_min": 1314 }, { "epoch": 4.885931558935361, "grad_norm": 0.26500582432214914, "learning_rate": 1.016474082574495e-05, "loss": 0.0927, "loss_nan_ranks": 0, "loss_rank_avg": 0.04400516673922539, "step": 1285, "valid_targets_mean": 5880.3, "valid_targets_min": 1151 }, { "epoch": 4.904942965779467, "grad_norm": 0.24664528337491692, "learning_rate": 1.0000000000000006e-05, "loss": 0.0871, "loss_nan_ranks": 0, "loss_rank_avg": 0.03696668520569801, "step": 1290, "valid_targets_mean": 5792.0, "valid_targets_min": 1054 }, { "epoch": 4.923954372623574, "grad_norm": 0.2286048807471563, "learning_rate": 9.836158911778132e-06, "loss": 0.0859, "loss_nan_ranks": 0, "loss_rank_avg": 0.04410147666931152, "step": 1295, "valid_targets_mean": 6356.2, "valid_targets_min": 980 }, { "epoch": 4.942965779467681, "grad_norm": 0.20614916293927305, "learning_rate": 9.673232302476819e-06, "loss": 0.0837, "loss_nan_ranks": 0, "loss_rank_avg": 0.039480239152908325, "step": 1300, "valid_targets_mean": 5821.0, "valid_targets_min": 905 }, { "epoch": 4.961977186311787, "grad_norm": 0.1928349932521555, "learning_rate": 9.511234831214464e-06, "loss": 0.0836, "loss_nan_ranks": 0, "loss_rank_avg": 0.03988807275891304, "step": 1305, "valid_targets_mean": 5897.6, "valid_targets_min": 1223 }, { "epoch": 4.980988593155893, "grad_norm": 0.18807335044788295, "learning_rate": 9.350181073511412e-06, "loss": 0.0814, "loss_nan_ranks": 0, "loss_rank_avg": 0.039488207548856735, "step": 1310, "valid_targets_mean": 6550.6, "valid_targets_min": 1179 }, { "epoch": 5.0, "grad_norm": 0.21533958753633334, "learning_rate": 9.190085519978575e-06, "loss": 0.0827, "loss_nan_ranks": 0, "loss_rank_avg": 0.03866899386048317, "step": 1315, "valid_targets_mean": 5367.0, "valid_targets_min": 1264 }, { "epoch": 5.019011406844107, "grad_norm": 0.35656357662267013, "learning_rate": 9.030962575013622e-06, "loss": 0.119, "loss_nan_ranks": 0, "loss_rank_avg": 0.058930665254592896, "step": 1320, "valid_targets_mean": 4315.6, "valid_targets_min": 1507 }, { "epoch": 5.038022813688213, "grad_norm": 0.3993925897181664, "learning_rate": 8.872826555505012e-06, "loss": 0.1081, "loss_nan_ranks": 0, "loss_rank_avg": 0.05182621255517006, "step": 1325, "valid_targets_mean": 4631.1, "valid_targets_min": 1345 }, { "epoch": 5.057034220532319, "grad_norm": 0.39173542137242245, "learning_rate": 8.715691689543761e-06, "loss": 0.1037, "loss_nan_ranks": 0, "loss_rank_avg": 0.049891237169504166, "step": 1330, "valid_targets_mean": 4061.9, "valid_targets_min": 1929 }, { "epoch": 5.076045627376426, "grad_norm": 0.3346676502787186, "learning_rate": 8.559572115143406e-06, "loss": 0.1027, "loss_nan_ranks": 0, "loss_rank_avg": 0.053807903081178665, "step": 1335, "valid_targets_mean": 4492.1, "valid_targets_min": 1512 }, { "epoch": 5.095057034220532, "grad_norm": 0.33046866192129126, "learning_rate": 8.404481878967848e-06, "loss": 0.0986, "loss_nan_ranks": 0, "loss_rank_avg": 0.04590759798884392, "step": 1340, "valid_targets_mean": 4387.7, "valid_targets_min": 1324 }, { "epoch": 5.114068441064639, "grad_norm": 0.2862525406517934, "learning_rate": 8.250434935067593e-06, "loss": 0.0988, "loss_nan_ranks": 0, "loss_rank_avg": 0.050703514367341995, "step": 1345, "valid_targets_mean": 4403.1, "valid_targets_min": 2036 }, { "epoch": 5.133079847908745, "grad_norm": 0.2830684273551724, "learning_rate": 8.09744514362421e-06, "loss": 0.0936, "loss_nan_ranks": 0, "loss_rank_avg": 0.0482289083302021, "step": 1350, "valid_targets_mean": 4255.0, "valid_targets_min": 2083 }, { "epoch": 5.152091254752852, "grad_norm": 0.2936949182763945, "learning_rate": 7.945526269703295e-06, "loss": 0.0897, "loss_nan_ranks": 0, "loss_rank_avg": 0.042581137269735336, "step": 1355, "valid_targets_mean": 4031.0, "valid_targets_min": 1761 }, { "epoch": 5.171102661596958, "grad_norm": 0.32369753015268776, "learning_rate": 7.794691982015991e-06, "loss": 0.0881, "loss_nan_ranks": 0, "loss_rank_avg": 0.04366425797343254, "step": 1360, "valid_targets_mean": 4213.6, "valid_targets_min": 1827 }, { "epoch": 5.190114068441065, "grad_norm": 0.3876166105871689, "learning_rate": 7.644955851689129e-06, "loss": 0.0875, "loss_nan_ranks": 0, "loss_rank_avg": 0.04470920190215111, "step": 1365, "valid_targets_mean": 5610.1, "valid_targets_min": 2840 }, { "epoch": 5.2091254752851714, "grad_norm": 0.26999235527739424, "learning_rate": 7.496331351044226e-06, "loss": 0.0848, "loss_nan_ranks": 0, "loss_rank_avg": 0.04106168821454048, "step": 1370, "valid_targets_mean": 4945.7, "valid_targets_min": 1853 }, { "epoch": 5.228136882129277, "grad_norm": 0.25109703607220174, "learning_rate": 7.348831852385265e-06, "loss": 0.082, "loss_nan_ranks": 0, "loss_rank_avg": 0.03378221392631531, "step": 1375, "valid_targets_mean": 5207.0, "valid_targets_min": 2341 }, { "epoch": 5.247148288973384, "grad_norm": 0.2350369927621042, "learning_rate": 7.202470626795626e-06, "loss": 0.076, "loss_nan_ranks": 0, "loss_rank_avg": 0.03785305097699165, "step": 1380, "valid_targets_mean": 5175.4, "valid_targets_min": 2093 }, { "epoch": 5.266159695817491, "grad_norm": 0.21697067917279458, "learning_rate": 7.057260842943949e-06, "loss": 0.0728, "loss_nan_ranks": 0, "loss_rank_avg": 0.031559597700834274, "step": 1385, "valid_targets_mean": 5396.7, "valid_targets_min": 1885 }, { "epoch": 5.285171102661597, "grad_norm": 0.22438779122417016, "learning_rate": 6.9132155658993785e-06, "loss": 0.0676, "loss_nan_ranks": 0, "loss_rank_avg": 0.03445998206734657, "step": 1390, "valid_targets_mean": 5246.9, "valid_targets_min": 2203 }, { "epoch": 5.304182509505703, "grad_norm": 0.2117622397099648, "learning_rate": 6.770347755955982e-06, "loss": 0.0668, "loss_nan_ranks": 0, "loss_rank_avg": 0.03348778560757637, "step": 1395, "valid_targets_mean": 4957.9, "valid_targets_min": 1708 }, { "epoch": 5.32319391634981, "grad_norm": 0.200452647832453, "learning_rate": 6.628670267466697e-06, "loss": 0.0626, "loss_nan_ranks": 0, "loss_rank_avg": 0.028821026906371117, "step": 1400, "valid_targets_mean": 4951.1, "valid_targets_min": 1826 }, { "epoch": 5.342205323193916, "grad_norm": 0.21955745199536397, "learning_rate": 6.488195847686795e-06, "loss": 0.061, "loss_nan_ranks": 0, "loss_rank_avg": 0.029862603172659874, "step": 1405, "valid_targets_mean": 5112.8, "valid_targets_min": 2968 }, { "epoch": 5.361216730038023, "grad_norm": 0.22826378564937264, "learning_rate": 6.348937135626922e-06, "loss": 0.0605, "loss_nan_ranks": 0, "loss_rank_avg": 0.0312732495367527, "step": 1410, "valid_targets_mean": 5321.8, "valid_targets_min": 2382 }, { "epoch": 5.380228136882129, "grad_norm": 0.20427947265228474, "learning_rate": 6.210906660915938e-06, "loss": 0.0574, "loss_nan_ranks": 0, "loss_rank_avg": 0.02499421499669552, "step": 1415, "valid_targets_mean": 4986.9, "valid_targets_min": 2576 }, { "epoch": 5.399239543726236, "grad_norm": 0.22020819639088524, "learning_rate": 6.074116842673585e-06, "loss": 0.0583, "loss_nan_ranks": 0, "loss_rank_avg": 0.0311068594455719, "step": 1420, "valid_targets_mean": 5325.8, "valid_targets_min": 2928 }, { "epoch": 5.418250950570342, "grad_norm": 0.22574926260061123, "learning_rate": 5.938579988393099e-06, "loss": 0.0562, "loss_nan_ranks": 0, "loss_rank_avg": 0.029961155727505684, "step": 1425, "valid_targets_mean": 5161.7, "valid_targets_min": 1862 }, { "epoch": 5.437262357414449, "grad_norm": 0.20400844513752284, "learning_rate": 5.80430829283382e-06, "loss": 0.0535, "loss_nan_ranks": 0, "loss_rank_avg": 0.02954328991472721, "step": 1430, "valid_targets_mean": 4941.3, "valid_targets_min": 1507 }, { "epoch": 5.4562737642585555, "grad_norm": 0.22004866648638607, "learning_rate": 5.671313836924039e-06, "loss": 0.0581, "loss_nan_ranks": 0, "loss_rank_avg": 0.029622698202729225, "step": 1435, "valid_targets_mean": 5260.5, "valid_targets_min": 2625 }, { "epoch": 5.475285171102661, "grad_norm": 0.23172139547283307, "learning_rate": 5.539608586673988e-06, "loss": 0.0581, "loss_nan_ranks": 0, "loss_rank_avg": 0.032255351543426514, "step": 1440, "valid_targets_mean": 4271.5, "valid_targets_min": 1373 }, { "epoch": 5.494296577946768, "grad_norm": 0.30348623919330886, "learning_rate": 5.409204392099224e-06, "loss": 0.071, "loss_nan_ranks": 0, "loss_rank_avg": 0.029614215716719627, "step": 1445, "valid_targets_mean": 3648.6, "valid_targets_min": 1151 }, { "epoch": 5.513307984790875, "grad_norm": 0.3446203794591457, "learning_rate": 5.280112986154462e-06, "loss": 0.0721, "loss_nan_ranks": 0, "loss_rank_avg": 0.03383517637848854, "step": 1450, "valid_targets_mean": 3692.1, "valid_targets_min": 1583 }, { "epoch": 5.532319391634981, "grad_norm": 0.304903377781796, "learning_rate": 5.152345983677866e-06, "loss": 0.067, "loss_nan_ranks": 0, "loss_rank_avg": 0.03009621985256672, "step": 1455, "valid_targets_mean": 4084.4, "valid_targets_min": 1601 }, { "epoch": 5.551330798479087, "grad_norm": 0.2991592969123906, "learning_rate": 5.02591488034609e-06, "loss": 0.0671, "loss_nan_ranks": 0, "loss_rank_avg": 0.03557578846812248, "step": 1460, "valid_targets_mean": 3740.4, "valid_targets_min": 1825 }, { "epoch": 5.570342205323194, "grad_norm": 0.29912614724436565, "learning_rate": 4.900831051639892e-06, "loss": 0.0599, "loss_nan_ranks": 0, "loss_rank_avg": 0.03164946287870407, "step": 1465, "valid_targets_mean": 3882.0, "valid_targets_min": 1821 }, { "epoch": 5.589353612167301, "grad_norm": 0.27811420728368913, "learning_rate": 4.777105751820708e-06, "loss": 0.0576, "loss_nan_ranks": 0, "loss_rank_avg": 0.03166574239730835, "step": 1470, "valid_targets_mean": 4136.7, "valid_targets_min": 1743 }, { "epoch": 5.608365019011407, "grad_norm": 0.9103501807156557, "learning_rate": 4.654750112918007e-06, "loss": 0.1307, "loss_nan_ranks": 0, "loss_rank_avg": 0.11657539755105972, "step": 1475, "valid_targets_mean": 7437.8, "valid_targets_min": 591 }, { "epoch": 5.6273764258555135, "grad_norm": 0.5130315310560948, "learning_rate": 4.533775143727748e-06, "loss": 0.2505, "loss_nan_ranks": 0, "loss_rank_avg": 0.1216835007071495, "step": 1480, "valid_targets_mean": 7844.2, "valid_targets_min": 715 }, { "epoch": 5.64638783269962, "grad_norm": 0.38916103997249035, "learning_rate": 4.414191728821838e-06, "loss": 0.2382, "loss_nan_ranks": 0, "loss_rank_avg": 0.12430062144994736, "step": 1485, "valid_targets_mean": 7960.9, "valid_targets_min": 733 }, { "epoch": 5.665399239543726, "grad_norm": 0.34346190346026684, "learning_rate": 4.296010627568823e-06, "loss": 0.2253, "loss_nan_ranks": 0, "loss_rank_avg": 0.1269465535879135, "step": 1490, "valid_targets_mean": 8483.1, "valid_targets_min": 759 }, { "epoch": 5.684410646387833, "grad_norm": 0.26852843419670336, "learning_rate": 4.17924247316585e-06, "loss": 0.2085, "loss_nan_ranks": 0, "loss_rank_avg": 0.1000281274318695, "step": 1495, "valid_targets_mean": 7507.5, "valid_targets_min": 702 }, { "epoch": 5.7034220532319395, "grad_norm": 0.23814374694727244, "learning_rate": 4.0638977716819105e-06, "loss": 0.2061, "loss_nan_ranks": 0, "loss_rank_avg": 0.09797212481498718, "step": 1500, "valid_targets_mean": 6985.9, "valid_targets_min": 647 }, { "epoch": 5.722433460076045, "grad_norm": 0.26229166135458226, "learning_rate": 3.949986901112608e-06, "loss": 0.197, "loss_nan_ranks": 0, "loss_rank_avg": 0.10039948672056198, "step": 1505, "valid_targets_mean": 7685.7, "valid_targets_min": 592 }, { "epoch": 5.741444866920152, "grad_norm": 0.24951933668512646, "learning_rate": 3.837520110446391e-06, "loss": 0.1978, "loss_nan_ranks": 0, "loss_rank_avg": 0.09159765392541885, "step": 1510, "valid_targets_mean": 7207.1, "valid_targets_min": 682 }, { "epoch": 5.760456273764259, "grad_norm": 0.28127344204747545, "learning_rate": 3.7265075187424373e-06, "loss": 0.1895, "loss_nan_ranks": 0, "loss_rank_avg": 0.10092798620462418, "step": 1515, "valid_targets_mean": 7820.0, "valid_targets_min": 747 }, { "epoch": 5.779467680608365, "grad_norm": 0.2563140724682205, "learning_rate": 3.616959114220162e-06, "loss": 0.1867, "loss_nan_ranks": 0, "loss_rank_avg": 0.10091284662485123, "step": 1520, "valid_targets_mean": 8022.3, "valid_targets_min": 727 }, { "epoch": 5.798479087452471, "grad_norm": 0.27238599596323204, "learning_rate": 3.508884753360593e-06, "loss": 0.1801, "loss_nan_ranks": 0, "loss_rank_avg": 0.08664939552545547, "step": 1525, "valid_targets_mean": 7562.2, "valid_targets_min": 661 }, { "epoch": 5.817490494296578, "grad_norm": 0.2922682553948556, "learning_rate": 3.402294160019499e-06, "loss": 0.1771, "loss_nan_ranks": 0, "loss_rank_avg": 0.09690836071968079, "step": 1530, "valid_targets_mean": 8467.6, "valid_targets_min": 409 }, { "epoch": 5.836501901140684, "grad_norm": 0.2878635630184777, "learning_rate": 3.2971969245525215e-06, "loss": 0.1682, "loss_nan_ranks": 0, "loss_rank_avg": 0.09080066531896591, "step": 1535, "valid_targets_mean": 7542.6, "valid_targets_min": 781 }, { "epoch": 5.855513307984791, "grad_norm": 0.23862395323068222, "learning_rate": 3.193602502952291e-06, "loss": 0.1197, "loss_nan_ranks": 0, "loss_rank_avg": 0.038949862122535706, "step": 1540, "valid_targets_mean": 5823.9, "valid_targets_min": 1136 }, { "epoch": 5.8745247148288975, "grad_norm": 0.2387967291215592, "learning_rate": 3.0915202159976453e-06, "loss": 0.0769, "loss_nan_ranks": 0, "loss_rank_avg": 0.042837340384721756, "step": 1545, "valid_targets_mean": 6544.8, "valid_targets_min": 1446 }, { "epoch": 5.893536121673003, "grad_norm": 0.1881656636618961, "learning_rate": 2.9909592484149795e-06, "loss": 0.0752, "loss_nan_ranks": 0, "loss_rank_avg": 0.0399971567094326, "step": 1550, "valid_targets_mean": 6424.3, "valid_targets_min": 1291 }, { "epoch": 5.91254752851711, "grad_norm": 0.17058202140819237, "learning_rate": 2.8919286480518803e-06, "loss": 0.07, "loss_nan_ranks": 0, "loss_rank_avg": 0.03311106935143471, "step": 1555, "valid_targets_mean": 5882.0, "valid_targets_min": 979 }, { "epoch": 5.931558935361217, "grad_norm": 0.1711908706695631, "learning_rate": 2.794437325063064e-06, "loss": 0.0703, "loss_nan_ranks": 0, "loss_rank_avg": 0.03513427823781967, "step": 1560, "valid_targets_mean": 6166.6, "valid_targets_min": 933 }, { "epoch": 5.9505703422053235, "grad_norm": 0.17206848728883706, "learning_rate": 2.6984940511086665e-06, "loss": 0.0689, "loss_nan_ranks": 0, "loss_rank_avg": 0.03399783745408058, "step": 1565, "valid_targets_mean": 5971.0, "valid_targets_min": 1196 }, { "epoch": 5.969581749049429, "grad_norm": 0.16216773283513025, "learning_rate": 2.604107458565066e-06, "loss": 0.0667, "loss_nan_ranks": 0, "loss_rank_avg": 0.03391076996922493, "step": 1570, "valid_targets_mean": 6159.1, "valid_targets_min": 1238 }, { "epoch": 5.988593155893536, "grad_norm": 0.16031421414918762, "learning_rate": 2.5112860397481553e-06, "loss": 0.0655, "loss_nan_ranks": 0, "loss_rank_avg": 0.032533951103687286, "step": 1575, "valid_targets_mean": 6163.3, "valid_targets_min": 1300 }, { "epoch": 6.007604562737643, "grad_norm": 0.3155276653530433, "learning_rate": 2.4200381461492817e-06, "loss": 0.0711, "loss_nan_ranks": 0, "loss_rank_avg": 0.03919023647904396, "step": 1580, "valid_targets_mean": 4252.4, "valid_targets_min": 2118 }, { "epoch": 6.026615969581749, "grad_norm": 0.28335210258711535, "learning_rate": 2.330371987683815e-06, "loss": 0.0839, "loss_nan_ranks": 0, "loss_rank_avg": 0.04079696908593178, "step": 1585, "valid_targets_mean": 4523.0, "valid_targets_min": 1550 }, { "epoch": 6.0456273764258555, "grad_norm": 0.3167321882363807, "learning_rate": 2.242295631952496e-06, "loss": 0.0764, "loss_nan_ranks": 0, "loss_rank_avg": 0.03657423332333565, "step": 1590, "valid_targets_mean": 4048.7, "valid_targets_min": 2023 }, { "epoch": 6.064638783269962, "grad_norm": 0.278519071520387, "learning_rate": 2.155817003515539e-06, "loss": 0.0716, "loss_nan_ranks": 0, "loss_rank_avg": 0.03454483672976494, "step": 1595, "valid_targets_mean": 4060.6, "valid_targets_min": 1807 }, { "epoch": 6.083650190114068, "grad_norm": 0.2629161013037092, "learning_rate": 2.0709438831796303e-06, "loss": 0.0686, "loss_nan_ranks": 0, "loss_rank_avg": 0.03606827184557915, "step": 1600, "valid_targets_mean": 4122.0, "valid_targets_min": 2089 }, { "epoch": 6.102661596958175, "grad_norm": 0.23665623570717814, "learning_rate": 1.987683907297888e-06, "loss": 0.0665, "loss_nan_ranks": 0, "loss_rank_avg": 0.03397540748119354, "step": 1605, "valid_targets_mean": 4094.8, "valid_targets_min": 1660 }, { "epoch": 6.1216730038022815, "grad_norm": 0.23188699309209682, "learning_rate": 1.9060445670827477e-06, "loss": 0.0678, "loss_nan_ranks": 0, "loss_rank_avg": 0.03223345801234245, "step": 1610, "valid_targets_mean": 3793.4, "valid_targets_min": 2184 }, { "epoch": 6.140684410646388, "grad_norm": 0.22419403701656915, "learning_rate": 1.826033207932001e-06, "loss": 0.0619, "loss_nan_ranks": 0, "loss_rank_avg": 0.027402834966778755, "step": 1615, "valid_targets_mean": 4053.2, "valid_targets_min": 1891 }, { "epoch": 6.159695817490494, "grad_norm": 0.22022828114085813, "learning_rate": 1.7476570287678396e-06, "loss": 0.0606, "loss_nan_ranks": 0, "loss_rank_avg": 0.02872721664607525, "step": 1620, "valid_targets_mean": 4274.1, "valid_targets_min": 1885 }, { "epoch": 6.178707224334601, "grad_norm": 0.21969595617322446, "learning_rate": 1.6709230813892042e-06, "loss": 0.0583, "loss_nan_ranks": 0, "loss_rank_avg": 0.03280468285083771, "step": 1625, "valid_targets_mean": 4422.8, "valid_targets_min": 1908 }, { "epoch": 6.197718631178708, "grad_norm": 0.35529496426467005, "learning_rate": 1.5958382698372644e-06, "loss": 0.0645, "loss_nan_ranks": 0, "loss_rank_avg": 0.033318690955638885, "step": 1630, "valid_targets_mean": 5303.7, "valid_targets_min": 2553 }, { "epoch": 6.216730038022813, "grad_norm": 0.27558919056523234, "learning_rate": 1.5224093497742654e-06, "loss": 0.0705, "loss_nan_ranks": 0, "loss_rank_avg": 0.035232577472925186, "step": 1635, "valid_targets_mean": 5254.6, "valid_targets_min": 2168 }, { "epoch": 6.23574144486692, "grad_norm": 0.22612201515640176, "learning_rate": 1.4506429278756672e-06, "loss": 0.0617, "loss_nan_ranks": 0, "loss_rank_avg": 0.029211828485131264, "step": 1640, "valid_targets_mean": 4881.5, "valid_targets_min": 1899 }, { "epoch": 6.254752851711027, "grad_norm": 0.20633288036705308, "learning_rate": 1.380545461235736e-06, "loss": 0.0607, "loss_nan_ranks": 0, "loss_rank_avg": 0.029766544699668884, "step": 1645, "valid_targets_mean": 5599.6, "valid_targets_min": 2212 }, { "epoch": 6.273764258555133, "grad_norm": 0.2016643153825409, "learning_rate": 1.3121232567865793e-06, "loss": 0.0542, "loss_nan_ranks": 0, "loss_rank_avg": 0.025733182206749916, "step": 1650, "valid_targets_mean": 4711.3, "valid_targets_min": 1625 }, { "epoch": 6.2927756653992395, "grad_norm": 0.18952082398350228, "learning_rate": 1.2453824707306628e-06, "loss": 0.0538, "loss_nan_ranks": 0, "loss_rank_avg": 0.030064953491091728, "step": 1655, "valid_targets_mean": 5566.0, "valid_targets_min": 1935 }, { "epoch": 6.311787072243346, "grad_norm": 0.16436076167259167, "learning_rate": 1.180329107986955e-06, "loss": 0.0504, "loss_nan_ranks": 0, "loss_rank_avg": 0.019878484308719635, "step": 1660, "valid_targets_mean": 4913.7, "valid_targets_min": 2127 }, { "epoch": 6.330798479087452, "grad_norm": 0.15922497867390237, "learning_rate": 1.1169690216505846e-06, "loss": 0.0477, "loss_nan_ranks": 0, "loss_rank_avg": 0.021651925519108772, "step": 1665, "valid_targets_mean": 4946.9, "valid_targets_min": 2188 }, { "epoch": 6.349809885931559, "grad_norm": 0.16748076773602358, "learning_rate": 1.0553079124662768e-06, "loss": 0.0476, "loss_nan_ranks": 0, "loss_rank_avg": 0.02340800315141678, "step": 1670, "valid_targets_mean": 5133.6, "valid_targets_min": 1787 }, { "epoch": 6.3688212927756656, "grad_norm": 0.16380388178076224, "learning_rate": 9.953513283153905e-07, "loss": 0.0454, "loss_nan_ranks": 0, "loss_rank_avg": 0.02057066187262535, "step": 1675, "valid_targets_mean": 5151.9, "valid_targets_min": 2159 }, { "epoch": 6.387832699619771, "grad_norm": 0.16325774776610502, "learning_rate": 9.371046637167835e-07, "loss": 0.0438, "loss_nan_ranks": 0, "loss_rank_avg": 0.023373225703835487, "step": 1680, "valid_targets_mean": 4959.3, "valid_targets_min": 1217 }, { "epoch": 6.406844106463878, "grad_norm": 0.16441190471893835, "learning_rate": 8.805731593414268e-07, "loss": 0.0442, "loss_nan_ranks": 0, "loss_rank_avg": 0.018379444256424904, "step": 1685, "valid_targets_mean": 5263.2, "valid_targets_min": 2115 }, { "epoch": 6.425855513307985, "grad_norm": 0.1638841692314567, "learning_rate": 8.25761901540889e-07, "loss": 0.043, "loss_nan_ranks": 0, "loss_rank_avg": 0.01931699924170971, "step": 1690, "valid_targets_mean": 5157.2, "valid_targets_min": 1833 }, { "epoch": 6.444866920152092, "grad_norm": 0.1735206177979583, "learning_rate": 7.726758218897079e-07, "loss": 0.0401, "loss_nan_ranks": 0, "loss_rank_avg": 0.02396308444440365, "step": 1695, "valid_targets_mean": 5271.4, "valid_targets_min": 2529 }, { "epoch": 6.4638783269961975, "grad_norm": 0.17296056006964222, "learning_rate": 7.213196967416624e-07, "loss": 0.0448, "loss_nan_ranks": 0, "loss_rank_avg": 0.02491677738726139, "step": 1700, "valid_targets_mean": 5138.1, "valid_targets_min": 1934 }, { "epoch": 6.482889733840304, "grad_norm": 0.25309281952445806, "learning_rate": 6.716981468000372e-07, "loss": 0.0453, "loss_nan_ranks": 0, "loss_rank_avg": 0.025967717170715332, "step": 1705, "valid_targets_mean": 3941.6, "valid_targets_min": 1710 }, { "epoch": 6.501901140684411, "grad_norm": 0.2599714526800321, "learning_rate": 6.238156367018744e-07, "loss": 0.0495, "loss_nan_ranks": 0, "loss_rank_avg": 0.025150617584586143, "step": 1710, "valid_targets_mean": 4081.7, "valid_targets_min": 1714 }, { "epoch": 6.520912547528517, "grad_norm": 0.2665439212567454, "learning_rate": 5.776764746162778e-07, "loss": 0.0516, "loss_nan_ranks": 0, "loss_rank_avg": 0.02574659138917923, "step": 1715, "valid_targets_mean": 3830.1, "valid_targets_min": 1672 }, { "epoch": 6.5399239543726235, "grad_norm": 0.23963977576827142, "learning_rate": 5.332848118567891e-07, "loss": 0.0464, "loss_nan_ranks": 0, "loss_rank_avg": 0.023752614855766296, "step": 1720, "valid_targets_mean": 3843.3, "valid_targets_min": 1308 }, { "epoch": 6.55893536121673, "grad_norm": 0.2133991211840642, "learning_rate": 4.906446425078782e-07, "loss": 0.046, "loss_nan_ranks": 0, "loss_rank_avg": 0.02487846277654171, "step": 1725, "valid_targets_mean": 3924.0, "valid_targets_min": 1308 }, { "epoch": 6.577946768060836, "grad_norm": 0.19507916434897585, "learning_rate": 4.497598030655814e-07, "loss": 0.0402, "loss_nan_ranks": 0, "loss_rank_avg": 0.018777957186102867, "step": 1730, "valid_targets_mean": 3620.2, "valid_targets_min": 1428 }, { "epoch": 6.596958174904943, "grad_norm": 0.18710157722199877, "learning_rate": 4.106339720923136e-07, "loss": 0.0375, "loss_nan_ranks": 0, "loss_rank_avg": 0.01577587239444256, "step": 1735, "valid_targets_mean": 3715.8, "valid_targets_min": 1498 }, { "epoch": 6.61596958174905, "grad_norm": 1.1221845368347991, "learning_rate": 3.732706698859012e-07, "loss": 0.1824, "loss_nan_ranks": 0, "loss_rank_avg": 0.11630845069885254, "step": 1740, "valid_targets_mean": 7975.3, "valid_targets_min": 657 }, { "epoch": 6.634980988593156, "grad_norm": 1.173169716414322, "learning_rate": 3.376732581628406e-07, "loss": 0.2483, "loss_nan_ranks": 0, "loss_rank_avg": 0.11798709630966187, "step": 1745, "valid_targets_mean": 7661.3, "valid_targets_min": 711 }, { "epoch": 6.653992395437262, "grad_norm": 0.7806879785562468, "learning_rate": 3.038449397558396e-07, "loss": 0.2357, "loss_nan_ranks": 0, "loss_rank_avg": 0.11574181169271469, "step": 1750, "valid_targets_mean": 7740.2, "valid_targets_min": 588 }, { "epoch": 6.673003802281369, "grad_norm": 0.6320222275437308, "learning_rate": 2.7178875832563734e-07, "loss": 0.216, "loss_nan_ranks": 0, "loss_rank_avg": 0.10704654455184937, "step": 1755, "valid_targets_mean": 7938.4, "valid_targets_min": 531 }, { "epoch": 6.692015209125476, "grad_norm": 0.45835070478875656, "learning_rate": 2.4150759808716283e-07, "loss": 0.199, "loss_nan_ranks": 0, "loss_rank_avg": 0.09011327475309372, "step": 1760, "valid_targets_mean": 6955.9, "valid_targets_min": 674 }, { "epoch": 6.7110266159695815, "grad_norm": 0.3833941721398251, "learning_rate": 2.1300418355002296e-07, "loss": 0.1962, "loss_nan_ranks": 0, "loss_rank_avg": 0.09300309419631958, "step": 1765, "valid_targets_mean": 7178.0, "valid_targets_min": 605 }, { "epoch": 6.730038022813688, "grad_norm": 0.3737621062405827, "learning_rate": 1.862810792733849e-07, "loss": 0.1848, "loss_nan_ranks": 0, "loss_rank_avg": 0.10089872032403946, "step": 1770, "valid_targets_mean": 8065.9, "valid_targets_min": 693 }, { "epoch": 6.749049429657795, "grad_norm": 0.337929681633567, "learning_rate": 1.6134068963520988e-07, "loss": 0.1869, "loss_nan_ranks": 0, "loss_rank_avg": 0.08463618904352188, "step": 1775, "valid_targets_mean": 7800.7, "valid_targets_min": 413 }, { "epoch": 6.768060836501901, "grad_norm": 0.3300710696811267, "learning_rate": 1.381852586159349e-07, "loss": 0.1839, "loss_nan_ranks": 0, "loss_rank_avg": 0.09186343103647232, "step": 1780, "valid_targets_mean": 7847.2, "valid_targets_min": 695 }, { "epoch": 6.787072243346008, "grad_norm": 0.30122393387768587, "learning_rate": 1.1681686959657879e-07, "loss": 0.1703, "loss_nan_ranks": 0, "loss_rank_avg": 0.07956251502037048, "step": 1785, "valid_targets_mean": 7130.6, "valid_targets_min": 614 }, { "epoch": 6.806083650190114, "grad_norm": 0.2834417787280566, "learning_rate": 9.723744517128098e-08, "loss": 0.1725, "loss_nan_ranks": 0, "loss_rank_avg": 0.08768697828054428, "step": 1790, "valid_targets_mean": 7978.8, "valid_targets_min": 598 }, { "epoch": 6.82509505703422, "grad_norm": 0.30669459692474976, "learning_rate": 7.944874697432436e-08, "loss": 0.1565, "loss_nan_ranks": 0, "loss_rank_avg": 0.07766545563936234, "step": 1795, "valid_targets_mean": 7157.9, "valid_targets_min": 589 }, { "epoch": 6.844106463878327, "grad_norm": 0.30499859672371843, "learning_rate": 6.345237552163541e-08, "loss": 0.1559, "loss_nan_ranks": 0, "loss_rank_avg": 0.08147227019071579, "step": 1800, "valid_targets_mean": 7234.4, "valid_targets_min": 593 }, { "epoch": 6.863117870722434, "grad_norm": 0.2567755838541607, "learning_rate": 4.9249770066777113e-08, "loss": 0.0762, "loss_nan_ranks": 0, "loss_rank_avg": 0.03280698135495186, "step": 1805, "valid_targets_mean": 6154.4, "valid_targets_min": 1363 }, { "epoch": 6.8821292775665395, "grad_norm": 0.22981178838040223, "learning_rate": 3.684220847145481e-08, "loss": 0.072, "loss_nan_ranks": 0, "loss_rank_avg": 0.03351025655865669, "step": 1810, "valid_targets_mean": 5865.2, "valid_targets_min": 970 }, { "epoch": 6.901140684410646, "grad_norm": 0.23048498681871035, "learning_rate": 2.623080709054149e-08, "loss": 0.0685, "loss_nan_ranks": 0, "loss_rank_avg": 0.03717043623328209, "step": 1815, "valid_targets_mean": 6350.6, "valid_targets_min": 972 }, { "epoch": 6.920152091254753, "grad_norm": 0.198959962331718, "learning_rate": 1.7416520671635905e-08, "loss": 0.0647, "loss_nan_ranks": 0, "loss_rank_avg": 0.03324683755636215, "step": 1820, "valid_targets_mean": 5735.5, "valid_targets_min": 959 }, { "epoch": 6.93916349809886, "grad_norm": 0.186783859845399, "learning_rate": 1.0400142269164637e-08, "loss": 0.0654, "loss_nan_ranks": 0, "loss_rank_avg": 0.03211604431271553, "step": 1825, "valid_targets_mean": 6206.8, "valid_targets_min": 1281 }, { "epoch": 6.9581749049429655, "grad_norm": 0.19629442490410282, "learning_rate": 5.182303173016934e-09, "loss": 0.0644, "loss_nan_ranks": 0, "loss_rank_avg": 0.03166944161057472, "step": 1830, "valid_targets_mean": 6104.3, "valid_targets_min": 1326 }, { "epoch": 6.977186311787072, "grad_norm": 0.1829179481431056, "learning_rate": 1.7634728517545996e-09, "loss": 0.0629, "loss_nan_ranks": 0, "loss_rank_avg": 0.029660126194357872, "step": 1835, "valid_targets_mean": 6447.5, "valid_targets_min": 1353 }, { "epoch": 6.996197718631179, "grad_norm": 0.18366914112103155, "learning_rate": 1.439589103724437e-10, "loss": 0.0618, "loss_nan_ranks": 0, "loss_rank_avg": 0.031334131956100464, "step": 1840, "valid_targets_mean": 5833.1, "valid_targets_min": 1011 }, { "epoch": 7.0, "loss_nan_ranks": 0, "loss_rank_avg": 0.022832343354821205, "step": 1841, "total_flos": 4.836413774367818e+18, "train_loss": 0.19618723029549, "train_runtime": 26839.4318, "train_samples_per_second": 6.577, "train_steps_per_second": 0.069, "valid_targets_mean": 5367.0, "valid_targets_min": 1264 } ], "logging_steps": 5, "max_steps": 1841, "num_input_tokens_seen": 0, "num_train_epochs": 7, "save_steps": 750, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.836413774367818e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }