{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 1495, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.016722408026755852, "grad_norm": 1.7245030312935132, "learning_rate": 1.066666666666667e-06, "loss": 0.6095, "loss_nan_ranks": 0, "loss_rank_avg": 0.61011803150177, "step": 5, "valid_targets_mean": 4391.3, "valid_targets_min": 2139 }, { "epoch": 0.033444816053511704, "grad_norm": 1.624140492424831, "learning_rate": 2.4000000000000003e-06, "loss": 0.6041, "loss_nan_ranks": 0, "loss_rank_avg": 0.6080305576324463, "step": 10, "valid_targets_mean": 4457.5, "valid_targets_min": 1499 }, { "epoch": 0.05016722408026756, "grad_norm": 0.7928560412243121, "learning_rate": 3.7333333333333337e-06, "loss": 0.5469, "loss_nan_ranks": 0, "loss_rank_avg": 0.5168237090110779, "step": 15, "valid_targets_mean": 5488.9, "valid_targets_min": 1859 }, { "epoch": 0.06688963210702341, "grad_norm": 0.7205074736901347, "learning_rate": 5.0666666666666676e-06, "loss": 0.5019, "loss_nan_ranks": 0, "loss_rank_avg": 0.4919741153717041, "step": 20, "valid_targets_mean": 5287.0, "valid_targets_min": 2052 }, { "epoch": 0.08361204013377926, "grad_norm": 0.534571821916365, "learning_rate": 6.4000000000000006e-06, "loss": 0.4857, "loss_nan_ranks": 0, "loss_rank_avg": 0.477079302072525, "step": 25, "valid_targets_mean": 5306.9, "valid_targets_min": 1972 }, { "epoch": 0.10033444816053512, "grad_norm": 0.44837090708345023, "learning_rate": 7.733333333333334e-06, "loss": 0.4705, "loss_nan_ranks": 0, "loss_rank_avg": 0.4555327892303467, "step": 30, "valid_targets_mean": 5720.4, "valid_targets_min": 2222 }, { "epoch": 0.11705685618729098, "grad_norm": 0.35788786516329824, "learning_rate": 9.066666666666667e-06, "loss": 0.4417, "loss_nan_ranks": 0, "loss_rank_avg": 0.44564685225486755, "step": 35, "valid_targets_mean": 5430.3, "valid_targets_min": 2203 }, { "epoch": 0.13377926421404682, "grad_norm": 0.31240093691004855, "learning_rate": 1.04e-05, "loss": 0.4382, "loss_nan_ranks": 0, "loss_rank_avg": 0.4372028410434723, "step": 40, "valid_targets_mean": 5302.0, "valid_targets_min": 1151 }, { "epoch": 0.1505016722408027, "grad_norm": 0.29715421306979134, "learning_rate": 1.1733333333333335e-05, "loss": 0.4157, "loss_nan_ranks": 0, "loss_rank_avg": 0.41423460841178894, "step": 45, "valid_targets_mean": 5404.0, "valid_targets_min": 1923 }, { "epoch": 0.16722408026755853, "grad_norm": 0.2763389239522811, "learning_rate": 1.3066666666666668e-05, "loss": 0.4069, "loss_nan_ranks": 0, "loss_rank_avg": 0.41930079460144043, "step": 50, "valid_targets_mean": 4993.2, "valid_targets_min": 1994 }, { "epoch": 0.18394648829431437, "grad_norm": 0.3019811563327018, "learning_rate": 1.4400000000000001e-05, "loss": 0.3584, "loss_nan_ranks": 0, "loss_rank_avg": 0.29694250226020813, "step": 55, "valid_targets_mean": 6149.0, "valid_targets_min": 1519 }, { "epoch": 0.20066889632107024, "grad_norm": 0.2682535799475, "learning_rate": 1.5733333333333334e-05, "loss": 0.2838, "loss_nan_ranks": 0, "loss_rank_avg": 0.2889573276042938, "step": 60, "valid_targets_mean": 6292.9, "valid_targets_min": 3132 }, { "epoch": 0.21739130434782608, "grad_norm": 0.26850576964528255, "learning_rate": 1.706666666666667e-05, "loss": 0.2742, "loss_nan_ranks": 0, "loss_rank_avg": 0.28359195590019226, "step": 65, "valid_targets_mean": 6224.4, "valid_targets_min": 1257 }, { "epoch": 0.23411371237458195, "grad_norm": 0.2171336590087016, "learning_rate": 1.8400000000000003e-05, "loss": 0.2634, "loss_nan_ranks": 0, "loss_rank_avg": 0.2649698257446289, "step": 70, "valid_targets_mean": 6282.0, "valid_targets_min": 2275 }, { "epoch": 0.2508361204013378, "grad_norm": 0.21962923752273064, "learning_rate": 1.9733333333333336e-05, "loss": 0.2545, "loss_nan_ranks": 0, "loss_rank_avg": 0.2535257637500763, "step": 75, "valid_targets_mean": 6212.7, "valid_targets_min": 3362 }, { "epoch": 0.26755852842809363, "grad_norm": 0.3424384488222486, "learning_rate": 2.1066666666666666e-05, "loss": 0.3148, "loss_nan_ranks": 0, "loss_rank_avg": 0.4487169682979584, "step": 80, "valid_targets_mean": 6572.3, "valid_targets_min": 338 }, { "epoch": 0.2842809364548495, "grad_norm": 0.2506953591604314, "learning_rate": 2.2400000000000002e-05, "loss": 0.437, "loss_nan_ranks": 0, "loss_rank_avg": 0.4307152330875397, "step": 85, "valid_targets_mean": 7014.9, "valid_targets_min": 429 }, { "epoch": 0.3010033444816054, "grad_norm": 0.2576531612366157, "learning_rate": 2.373333333333334e-05, "loss": 0.4246, "loss_nan_ranks": 0, "loss_rank_avg": 0.415714830160141, "step": 90, "valid_targets_mean": 7358.1, "valid_targets_min": 610 }, { "epoch": 0.3177257525083612, "grad_norm": 0.2397054199288198, "learning_rate": 2.5066666666666672e-05, "loss": 0.409, "loss_nan_ranks": 0, "loss_rank_avg": 0.4056137502193451, "step": 95, "valid_targets_mean": 6830.7, "valid_targets_min": 258 }, { "epoch": 0.33444816053511706, "grad_norm": 0.24629116268464377, "learning_rate": 2.6400000000000005e-05, "loss": 0.2766, "loss_nan_ranks": 0, "loss_rank_avg": 0.22848910093307495, "step": 100, "valid_targets_mean": 6237.5, "valid_targets_min": 970 }, { "epoch": 0.3511705685618729, "grad_norm": 0.24910802980831465, "learning_rate": 2.7733333333333338e-05, "loss": 0.2289, "loss_nan_ranks": 0, "loss_rank_avg": 0.23061847686767578, "step": 105, "valid_targets_mean": 6085.3, "valid_targets_min": 1261 }, { "epoch": 0.36789297658862874, "grad_norm": 0.22223000690921413, "learning_rate": 2.906666666666667e-05, "loss": 0.2213, "loss_nan_ranks": 0, "loss_rank_avg": 0.22238491475582123, "step": 110, "valid_targets_mean": 5970.5, "valid_targets_min": 892 }, { "epoch": 0.38461538461538464, "grad_norm": 0.2001390317036958, "learning_rate": 3.0400000000000004e-05, "loss": 0.2084, "loss_nan_ranks": 0, "loss_rank_avg": 0.20133914053440094, "step": 115, "valid_targets_mean": 5825.3, "valid_targets_min": 946 }, { "epoch": 0.4013377926421405, "grad_norm": 0.19885004548254503, "learning_rate": 3.173333333333334e-05, "loss": 0.2068, "loss_nan_ranks": 0, "loss_rank_avg": 0.2087135761976242, "step": 120, "valid_targets_mean": 5954.2, "valid_targets_min": 1020 }, { "epoch": 0.4180602006688963, "grad_norm": 0.1992290882112083, "learning_rate": 3.3066666666666666e-05, "loss": 0.204, "loss_nan_ranks": 0, "loss_rank_avg": 0.20589184761047363, "step": 125, "valid_targets_mean": 6130.7, "valid_targets_min": 1136 }, { "epoch": 0.43478260869565216, "grad_norm": 0.20745021595110374, "learning_rate": 3.44e-05, "loss": 0.2026, "loss_nan_ranks": 0, "loss_rank_avg": 0.20443178713321686, "step": 130, "valid_targets_mean": 5785.0, "valid_targets_min": 1002 }, { "epoch": 0.451505016722408, "grad_norm": 0.19259367440321293, "learning_rate": 3.573333333333333e-05, "loss": 0.1976, "loss_nan_ranks": 0, "loss_rank_avg": 0.19859756529331207, "step": 135, "valid_targets_mean": 6841.9, "valid_targets_min": 997 }, { "epoch": 0.4682274247491639, "grad_norm": 0.21765254448892532, "learning_rate": 3.706666666666667e-05, "loss": 0.2016, "loss_nan_ranks": 0, "loss_rank_avg": 0.20548826456069946, "step": 140, "valid_targets_mean": 5637.8, "valid_targets_min": 980 }, { "epoch": 0.48494983277591974, "grad_norm": 0.34411406995359606, "learning_rate": 3.8400000000000005e-05, "loss": 0.2978, "loss_nan_ranks": 0, "loss_rank_avg": 0.4181755483150482, "step": 145, "valid_targets_mean": 4637.5, "valid_targets_min": 2037 }, { "epoch": 0.5016722408026756, "grad_norm": 0.3082956901441369, "learning_rate": 3.9733333333333335e-05, "loss": 0.4008, "loss_nan_ranks": 0, "loss_rank_avg": 0.4054408371448517, "step": 150, "valid_targets_mean": 4843.1, "valid_targets_min": 1913 }, { "epoch": 0.5183946488294314, "grad_norm": 0.2965599280874575, "learning_rate": 3.999912708491203e-05, "loss": 0.395, "loss_nan_ranks": 0, "loss_rank_avg": 0.38285741209983826, "step": 155, "valid_targets_mean": 4593.1, "valid_targets_min": 1828 }, { "epoch": 0.5351170568561873, "grad_norm": 0.26345274846839833, "learning_rate": 3.9995580997960116e-05, "loss": 0.3803, "loss_nan_ranks": 0, "loss_rank_avg": 0.3705311119556427, "step": 160, "valid_targets_mean": 4561.8, "valid_targets_min": 1614 }, { "epoch": 0.5518394648829431, "grad_norm": 0.26837189335189193, "learning_rate": 3.998930766523859e-05, "loss": 0.3711, "loss_nan_ranks": 0, "loss_rank_avg": 0.3551887571811676, "step": 165, "valid_targets_mean": 4529.3, "valid_targets_min": 1656 }, { "epoch": 0.568561872909699, "grad_norm": 0.284434934811865, "learning_rate": 3.9980307942382504e-05, "loss": 0.3632, "loss_nan_ranks": 0, "loss_rank_avg": 0.36333727836608887, "step": 170, "valid_targets_mean": 4488.3, "valid_targets_min": 1318 }, { "epoch": 0.5852842809364549, "grad_norm": 0.25676025193538143, "learning_rate": 3.9968583056885895e-05, "loss": 0.3698, "loss_nan_ranks": 0, "loss_rank_avg": 0.3708552420139313, "step": 175, "valid_targets_mean": 4814.0, "valid_targets_min": 1322 }, { "epoch": 0.6020066889632107, "grad_norm": 0.34519669738003705, "learning_rate": 3.995413460793431e-05, "loss": 0.354, "loss_nan_ranks": 0, "loss_rank_avg": 0.3409523069858551, "step": 180, "valid_targets_mean": 4526.5, "valid_targets_min": 2181 }, { "epoch": 0.6187290969899666, "grad_norm": 0.322672098987964, "learning_rate": 3.9936964566186735e-05, "loss": 0.3154, "loss_nan_ranks": 0, "loss_rank_avg": 0.318267285823822, "step": 185, "valid_targets_mean": 4525.8, "valid_targets_min": 1661 }, { "epoch": 0.6354515050167224, "grad_norm": 0.28803537408637997, "learning_rate": 3.991707527350679e-05, "loss": 0.3043, "loss_nan_ranks": 0, "loss_rank_avg": 0.29651132225990295, "step": 190, "valid_targets_mean": 4629.6, "valid_targets_min": 674 }, { "epoch": 0.6521739130434783, "grad_norm": 0.28093349762265185, "learning_rate": 3.989446944264332e-05, "loss": 0.2966, "loss_nan_ranks": 0, "loss_rank_avg": 0.2959485352039337, "step": 195, "valid_targets_mean": 4321.1, "valid_targets_min": 796 }, { "epoch": 0.6688963210702341, "grad_norm": 0.31127990832964925, "learning_rate": 3.98691501568604e-05, "loss": 0.2867, "loss_nan_ranks": 0, "loss_rank_avg": 0.27535346150398254, "step": 200, "valid_targets_mean": 4405.4, "valid_targets_min": 2306 }, { "epoch": 0.68561872909699, "grad_norm": 0.28207944703004695, "learning_rate": 3.9841120869516815e-05, "loss": 0.2822, "loss_nan_ranks": 0, "loss_rank_avg": 0.27812016010284424, "step": 205, "valid_targets_mean": 4333.9, "valid_targets_min": 1732 }, { "epoch": 0.7023411371237458, "grad_norm": 0.27736191150225886, "learning_rate": 3.9810385403595004e-05, "loss": 0.2888, "loss_nan_ranks": 0, "loss_rank_avg": 0.29039719700813293, "step": 210, "valid_targets_mean": 4402.1, "valid_targets_min": 383 }, { "epoch": 0.7190635451505016, "grad_norm": 0.3242898315315502, "learning_rate": 3.977694795117969e-05, "loss": 0.2944, "loss_nan_ranks": 0, "loss_rank_avg": 0.31549105048179626, "step": 215, "valid_targets_mean": 4771.9, "valid_targets_min": 488 }, { "epoch": 0.7357859531772575, "grad_norm": 0.24565322352847732, "learning_rate": 3.974081307288607e-05, "loss": 0.2843, "loss_nan_ranks": 0, "loss_rank_avg": 0.2681817412376404, "step": 220, "valid_targets_mean": 4539.5, "valid_targets_min": 322 }, { "epoch": 0.7525083612040134, "grad_norm": 0.4658927198258249, "learning_rate": 3.970198569723779e-05, "loss": 0.5155, "loss_nan_ranks": 0, "loss_rank_avg": 0.3206879794597626, "step": 225, "valid_targets_mean": 3351.5, "valid_targets_min": 236 }, { "epoch": 0.7692307692307693, "grad_norm": 0.3132650030949883, "learning_rate": 3.966047111999477e-05, "loss": 0.2903, "loss_nan_ranks": 0, "loss_rank_avg": 0.30487164855003357, "step": 230, "valid_targets_mean": 3843.1, "valid_targets_min": 403 }, { "epoch": 0.7859531772575251, "grad_norm": 0.6991339613682909, "learning_rate": 3.9616275003430836e-05, "loss": 0.5737, "loss_nan_ranks": 0, "loss_rank_avg": 0.3571525812149048, "step": 235, "valid_targets_mean": 2841.5, "valid_targets_min": 368 }, { "epoch": 0.802675585284281, "grad_norm": 0.39882258778738783, "learning_rate": 3.9569403375561475e-05, "loss": 0.2874, "loss_nan_ranks": 0, "loss_rank_avg": 0.2861405313014984, "step": 240, "valid_targets_mean": 4773.6, "valid_targets_min": 370 }, { "epoch": 0.8193979933110368, "grad_norm": 0.8547662584274843, "learning_rate": 3.9519862629321645e-05, "loss": 0.5134, "loss_nan_ranks": 0, "loss_rank_avg": 0.6095802187919617, "step": 245, "valid_targets_mean": 718.6, "valid_targets_min": 373 }, { "epoch": 0.8361204013377926, "grad_norm": 0.2961625998638665, "learning_rate": 3.9467659521693836e-05, "loss": 0.2983, "loss_nan_ranks": 0, "loss_rank_avg": 0.27202290296554565, "step": 250, "valid_targets_mean": 4782.0, "valid_targets_min": 406 }, { "epoch": 0.8528428093645485, "grad_norm": 1.011866205178202, "learning_rate": 3.9412801172786453e-05, "loss": 0.4749, "loss_nan_ranks": 0, "loss_rank_avg": 0.6505089402198792, "step": 255, "valid_targets_mean": 711.2, "valid_targets_min": 262 }, { "epoch": 0.8695652173913043, "grad_norm": 0.2791415574080233, "learning_rate": 3.935529506486272e-05, "loss": 0.3042, "loss_nan_ranks": 0, "loss_rank_avg": 0.27265265583992004, "step": 260, "valid_targets_mean": 4644.9, "valid_targets_min": 494 }, { "epoch": 0.8862876254180602, "grad_norm": 0.9130625473905867, "learning_rate": 3.9295149041320087e-05, "loss": 0.4796, "loss_nan_ranks": 0, "loss_rank_avg": 0.59287428855896, "step": 265, "valid_targets_mean": 735.2, "valid_targets_min": 241 }, { "epoch": 0.903010033444816, "grad_norm": 0.27384980992154423, "learning_rate": 3.923237130562054e-05, "loss": 0.3604, "loss_nan_ranks": 0, "loss_rank_avg": 0.29318317770957947, "step": 270, "valid_targets_mean": 4780.1, "valid_targets_min": 391 }, { "epoch": 0.919732441471572, "grad_norm": 1.2863030433776386, "learning_rate": 3.916697042017165e-05, "loss": 0.4278, "loss_nan_ranks": 0, "loss_rank_avg": 0.6375625133514404, "step": 275, "valid_targets_mean": 692.4, "valid_targets_min": 281 }, { "epoch": 0.9364548494983278, "grad_norm": 0.3633438212857499, "learning_rate": 3.909895530515874e-05, "loss": 0.4214, "loss_nan_ranks": 0, "loss_rank_avg": 0.29790613055229187, "step": 280, "valid_targets_mean": 4800.1, "valid_targets_min": 420 }, { "epoch": 0.9531772575250836, "grad_norm": 1.4192481716536165, "learning_rate": 3.902833523732824e-05, "loss": 0.3486, "loss_nan_ranks": 0, "loss_rank_avg": 0.6474001407623291, "step": 285, "valid_targets_mean": 671.4, "valid_targets_min": 175 }, { "epoch": 0.9698996655518395, "grad_norm": 0.40183371605769863, "learning_rate": 3.895511984872241e-05, "loss": 0.4674, "loss_nan_ranks": 0, "loss_rank_avg": 0.3006552457809448, "step": 290, "valid_targets_mean": 4780.8, "valid_targets_min": 617 }, { "epoch": 0.9866220735785953, "grad_norm": 0.267962660255704, "learning_rate": 3.887931912536561e-05, "loss": 0.2801, "loss_nan_ranks": 0, "loss_rank_avg": 0.28743860125541687, "step": 295, "valid_targets_mean": 3581.8, "valid_targets_min": 227 }, { "epoch": 1.0033444816053512, "grad_norm": 0.6093597417751938, "learning_rate": 3.8800943405902275e-05, "loss": 0.5388, "loss_nan_ranks": 0, "loss_rank_avg": 0.4148422181606293, "step": 300, "valid_targets_mean": 4744.1, "valid_targets_min": 1979 }, { "epoch": 1.020066889632107, "grad_norm": 0.4592272597393982, "learning_rate": 3.8720003380186785e-05, "loss": 0.4122, "loss_nan_ranks": 0, "loss_rank_avg": 0.4079246520996094, "step": 305, "valid_targets_mean": 4749.8, "valid_targets_min": 1498 }, { "epoch": 1.0367892976588629, "grad_norm": 0.2457894053645451, "learning_rate": 3.863651008782549e-05, "loss": 0.3912, "loss_nan_ranks": 0, "loss_rank_avg": 0.3785780966281891, "step": 310, "valid_targets_mean": 4249.1, "valid_targets_min": 2016 }, { "epoch": 1.0535117056856187, "grad_norm": 0.24947510777727397, "learning_rate": 3.855047491667094e-05, "loss": 0.3536, "loss_nan_ranks": 0, "loss_rank_avg": 0.3452471196651459, "step": 315, "valid_targets_mean": 4861.7, "valid_targets_min": 1145 }, { "epoch": 1.0702341137123745, "grad_norm": 0.21154619985515752, "learning_rate": 3.846190960126873e-05, "loss": 0.3336, "loss_nan_ranks": 0, "loss_rank_avg": 0.33000990748405457, "step": 320, "valid_targets_mean": 5420.3, "valid_targets_min": 2182 }, { "epoch": 1.0869565217391304, "grad_norm": 0.21992928939009868, "learning_rate": 3.837082622125694e-05, "loss": 0.3363, "loss_nan_ranks": 0, "loss_rank_avg": 0.3382793664932251, "step": 325, "valid_targets_mean": 5058.1, "valid_targets_min": 1799 }, { "epoch": 1.1036789297658862, "grad_norm": 0.22532191173117702, "learning_rate": 3.827723719971858e-05, "loss": 0.3296, "loss_nan_ranks": 0, "loss_rank_avg": 0.3184288442134857, "step": 330, "valid_targets_mean": 5303.0, "valid_targets_min": 1895 }, { "epoch": 1.120401337792642, "grad_norm": 0.2042403427663198, "learning_rate": 3.818115530148721e-05, "loss": 0.3249, "loss_nan_ranks": 0, "loss_rank_avg": 0.3313908278942108, "step": 335, "valid_targets_mean": 4888.6, "valid_targets_min": 1893 }, { "epoch": 1.137123745819398, "grad_norm": 0.2113989978626341, "learning_rate": 3.808259363140588e-05, "loss": 0.3258, "loss_nan_ranks": 0, "loss_rank_avg": 0.3202518820762634, "step": 340, "valid_targets_mean": 5227.7, "valid_targets_min": 1767 }, { "epoch": 1.1538461538461537, "grad_norm": 0.18252806016422507, "learning_rate": 3.7981565632539695e-05, "loss": 0.3152, "loss_nan_ranks": 0, "loss_rank_avg": 0.3117549419403076, "step": 345, "valid_targets_mean": 5713.1, "valid_targets_min": 2069 }, { "epoch": 1.1705685618729098, "grad_norm": 0.19139045064244864, "learning_rate": 3.7878085084342375e-05, "loss": 0.3178, "loss_nan_ranks": 0, "loss_rank_avg": 0.3228846490383148, "step": 350, "valid_targets_mean": 5431.2, "valid_targets_min": 2059 }, { "epoch": 1.1872909698996654, "grad_norm": 0.16601714910583143, "learning_rate": 3.777216610077676e-05, "loss": 0.2588, "loss_nan_ranks": 0, "loss_rank_avg": 0.21630533039569855, "step": 355, "valid_targets_mean": 6543.9, "valid_targets_min": 2505 }, { "epoch": 1.2040133779264215, "grad_norm": 0.19207578400249292, "learning_rate": 3.766382312838981e-05, "loss": 0.2193, "loss_nan_ranks": 0, "loss_rank_avg": 0.2166496068239212, "step": 360, "valid_targets_mean": 6602.0, "valid_targets_min": 3272 }, { "epoch": 1.2207357859531773, "grad_norm": 0.18069657462503616, "learning_rate": 3.75530709443422e-05, "loss": 0.2151, "loss_nan_ranks": 0, "loss_rank_avg": 0.20401179790496826, "step": 365, "valid_targets_mean": 6464.9, "valid_targets_min": 2446 }, { "epoch": 1.2374581939799332, "grad_norm": 0.20008780485704025, "learning_rate": 3.743992465439281e-05, "loss": 0.2121, "loss_nan_ranks": 0, "loss_rank_avg": 0.20383743941783905, "step": 370, "valid_targets_mean": 6352.6, "valid_targets_min": 1681 }, { "epoch": 1.254180602006689, "grad_norm": 0.2090526902380887, "learning_rate": 3.732439969083845e-05, "loss": 0.2114, "loss_nan_ranks": 0, "loss_rank_avg": 0.22135812044143677, "step": 375, "valid_targets_mean": 6400.7, "valid_targets_min": 2589 }, { "epoch": 1.2709030100334449, "grad_norm": 0.1919090529284783, "learning_rate": 3.7206511810408964e-05, "loss": 0.2855, "loss_nan_ranks": 0, "loss_rank_avg": 0.3602113723754883, "step": 380, "valid_targets_mean": 6548.3, "valid_targets_min": 343 }, { "epoch": 1.2876254180602007, "grad_norm": 0.24676327799469464, "learning_rate": 3.708627709211818e-05, "loss": 0.362, "loss_nan_ranks": 0, "loss_rank_avg": 0.3631698191165924, "step": 385, "valid_targets_mean": 7437.5, "valid_targets_min": 595 }, { "epoch": 1.3043478260869565, "grad_norm": 0.17717675885306233, "learning_rate": 3.6963711935070824e-05, "loss": 0.3543, "loss_nan_ranks": 0, "loss_rank_avg": 0.34016939997673035, "step": 390, "valid_targets_mean": 7530.9, "valid_targets_min": 608 }, { "epoch": 1.3210702341137124, "grad_norm": 0.18878808771817251, "learning_rate": 3.683883305622582e-05, "loss": 0.3483, "loss_nan_ranks": 0, "loss_rank_avg": 0.3341453969478607, "step": 395, "valid_targets_mean": 7109.4, "valid_targets_min": 541 }, { "epoch": 1.3377926421404682, "grad_norm": 0.19607794599883782, "learning_rate": 3.6711657488116185e-05, "loss": 0.1955, "loss_nan_ranks": 0, "loss_rank_avg": 0.18399910628795624, "step": 400, "valid_targets_mean": 6218.3, "valid_targets_min": 1286 }, { "epoch": 1.354515050167224, "grad_norm": 0.18217893369868915, "learning_rate": 3.6582202576526e-05, "loss": 0.184, "loss_nan_ranks": 0, "loss_rank_avg": 0.18440212309360504, "step": 405, "valid_targets_mean": 6090.0, "valid_targets_min": 974 }, { "epoch": 1.37123745819398, "grad_norm": 0.1857925270997747, "learning_rate": 3.6450485978124494e-05, "loss": 0.1785, "loss_nan_ranks": 0, "loss_rank_avg": 0.17455260455608368, "step": 410, "valid_targets_mean": 5632.2, "valid_targets_min": 912 }, { "epoch": 1.3879598662207357, "grad_norm": 0.16394718108592246, "learning_rate": 3.6316525658057876e-05, "loss": 0.1711, "loss_nan_ranks": 0, "loss_rank_avg": 0.17674054205417633, "step": 415, "valid_targets_mean": 5734.3, "valid_targets_min": 928 }, { "epoch": 1.4046822742474916, "grad_norm": 0.19453873887472128, "learning_rate": 3.6180339887498953e-05, "loss": 0.1719, "loss_nan_ranks": 0, "loss_rank_avg": 0.1782982349395752, "step": 420, "valid_targets_mean": 5851.3, "valid_targets_min": 1044 }, { "epoch": 1.4214046822742474, "grad_norm": 0.15308386340839525, "learning_rate": 3.604194724115515e-05, "loss": 0.1688, "loss_nan_ranks": 0, "loss_rank_avg": 0.16447855532169342, "step": 425, "valid_targets_mean": 5988.3, "valid_targets_min": 880 }, { "epoch": 1.4381270903010033, "grad_norm": 0.1638769012447472, "learning_rate": 3.590136659473502e-05, "loss": 0.1696, "loss_nan_ranks": 0, "loss_rank_avg": 0.15939190983772278, "step": 430, "valid_targets_mean": 5928.3, "valid_targets_min": 946 }, { "epoch": 1.4548494983277591, "grad_norm": 0.1865863393490602, "learning_rate": 3.575861712237375e-05, "loss": 0.1693, "loss_nan_ranks": 0, "loss_rank_avg": 0.16939140856266022, "step": 435, "valid_targets_mean": 6708.4, "valid_targets_min": 1262 }, { "epoch": 1.471571906354515, "grad_norm": 0.15924997227607846, "learning_rate": 3.561371829401796e-05, "loss": 0.171, "loss_nan_ranks": 0, "loss_rank_avg": 0.16857747733592987, "step": 440, "valid_targets_mean": 5901.7, "valid_targets_min": 905 }, { "epoch": 1.488294314381271, "grad_norm": 0.254097411212028, "learning_rate": 3.546668987277014e-05, "loss": 0.2767, "loss_nan_ranks": 0, "loss_rank_avg": 0.32115718722343445, "step": 445, "valid_targets_mean": 4722.6, "valid_targets_min": 1840 }, { "epoch": 1.5050167224080266, "grad_norm": 0.24044078707975364, "learning_rate": 3.531755191219312e-05, "loss": 0.3234, "loss_nan_ranks": 0, "loss_rank_avg": 0.31480270624160767, "step": 450, "valid_targets_mean": 4651.1, "valid_targets_min": 1681 }, { "epoch": 1.5217391304347827, "grad_norm": 0.2489736102838297, "learning_rate": 3.516632475357491e-05, "loss": 0.3212, "loss_nan_ranks": 0, "loss_rank_avg": 0.31182220578193665, "step": 455, "valid_targets_mean": 4644.5, "valid_targets_min": 1635 }, { "epoch": 1.5384615384615383, "grad_norm": 0.25278396588972096, "learning_rate": 3.501302902315432e-05, "loss": 0.3154, "loss_nan_ranks": 0, "loss_rank_avg": 0.320780485868454, "step": 460, "valid_targets_mean": 4504.0, "valid_targets_min": 1994 }, { "epoch": 1.5551839464882944, "grad_norm": 0.2272679305070563, "learning_rate": 3.4857685629307664e-05, "loss": 0.3068, "loss_nan_ranks": 0, "loss_rank_avg": 0.31168070435523987, "step": 465, "valid_targets_mean": 4584.9, "valid_targets_min": 1724 }, { "epoch": 1.57190635451505, "grad_norm": 0.24115211586713795, "learning_rate": 3.4700315759697045e-05, "loss": 0.3049, "loss_nan_ranks": 0, "loss_rank_avg": 0.3153783082962036, "step": 470, "valid_targets_mean": 4716.0, "valid_targets_min": 1668 }, { "epoch": 1.588628762541806, "grad_norm": 0.2337397996054974, "learning_rate": 3.454094087838051e-05, "loss": 0.3069, "loss_nan_ranks": 0, "loss_rank_avg": 0.29250749945640564, "step": 475, "valid_targets_mean": 4789.8, "valid_targets_min": 1328 }, { "epoch": 1.605351170568562, "grad_norm": 0.28525845944953254, "learning_rate": 3.4379582722884496e-05, "loss": 0.2958, "loss_nan_ranks": 0, "loss_rank_avg": 0.26603245735168457, "step": 480, "valid_targets_mean": 4673.9, "valid_targets_min": 1767 }, { "epoch": 1.6220735785953178, "grad_norm": 0.23964069161847545, "learning_rate": 3.4216263301239047e-05, "loss": 0.2601, "loss_nan_ranks": 0, "loss_rank_avg": 0.2560085952281952, "step": 485, "valid_targets_mean": 4649.5, "valid_targets_min": 1280 }, { "epoch": 1.6387959866220736, "grad_norm": 0.25242621271422044, "learning_rate": 3.405100488897603e-05, "loss": 0.257, "loss_nan_ranks": 0, "loss_rank_avg": 0.26679348945617676, "step": 490, "valid_targets_mean": 4470.8, "valid_targets_min": 2111 }, { "epoch": 1.6555183946488294, "grad_norm": 0.25301573935148786, "learning_rate": 3.388383002609093e-05, "loss": 0.2455, "loss_nan_ranks": 0, "loss_rank_avg": 0.2450329065322876, "step": 495, "valid_targets_mean": 4466.0, "valid_targets_min": 2003 }, { "epoch": 1.6722408026755853, "grad_norm": 0.21629002885715765, "learning_rate": 3.371476151396861e-05, "loss": 0.2437, "loss_nan_ranks": 0, "loss_rank_avg": 0.2431306391954422, "step": 500, "valid_targets_mean": 4503.0, "valid_targets_min": 1623 }, { "epoch": 1.6889632107023411, "grad_norm": 0.22238463082465948, "learning_rate": 3.354382241227332e-05, "loss": 0.2428, "loss_nan_ranks": 0, "loss_rank_avg": 0.2501046061515808, "step": 505, "valid_targets_mean": 4517.9, "valid_targets_min": 890 }, { "epoch": 1.705685618729097, "grad_norm": 0.20384227023451656, "learning_rate": 3.3371036035803576e-05, "loss": 0.2486, "loss_nan_ranks": 0, "loss_rank_avg": 0.24747498333454132, "step": 510, "valid_targets_mean": 4592.1, "valid_targets_min": 1266 }, { "epoch": 1.7224080267558528, "grad_norm": 0.20233756237500755, "learning_rate": 3.319642595131216e-05, "loss": 0.2521, "loss_nan_ranks": 0, "loss_rank_avg": 0.24641738831996918, "step": 515, "valid_targets_mean": 4745.8, "valid_targets_min": 347 }, { "epoch": 1.7391304347826086, "grad_norm": 0.294550220574233, "learning_rate": 3.3020015974291814e-05, "loss": 0.2447, "loss_nan_ranks": 0, "loss_rank_avg": 0.2826707065105438, "step": 520, "valid_targets_mean": 1939.9, "valid_targets_min": 153 }, { "epoch": 1.7558528428093645, "grad_norm": 0.2973009229821899, "learning_rate": 3.284183016572701e-05, "loss": 0.3846, "loss_nan_ranks": 0, "loss_rank_avg": 0.24068395793437958, "step": 525, "valid_targets_mean": 4975.5, "valid_targets_min": 384 }, { "epoch": 1.7725752508361206, "grad_norm": 1.0041281625390261, "learning_rate": 3.2661892828812155e-05, "loss": 0.3092, "loss_nan_ranks": 0, "loss_rank_avg": 0.5361660122871399, "step": 530, "valid_targets_mean": 674.2, "valid_targets_min": 283 }, { "epoch": 1.7892976588628762, "grad_norm": 0.43182735363141195, "learning_rate": 3.248022850563688e-05, "loss": 0.3994, "loss_nan_ranks": 0, "loss_rank_avg": 0.2646257281303406, "step": 535, "valid_targets_mean": 4872.7, "valid_targets_min": 370 }, { "epoch": 1.8060200668896322, "grad_norm": 0.2517809597933826, "learning_rate": 3.2296861973838644e-05, "loss": 0.2494, "loss_nan_ranks": 0, "loss_rank_avg": 0.2629435956478119, "step": 540, "valid_targets_mean": 4683.3, "valid_targets_min": 301 }, { "epoch": 1.8227424749163879, "grad_norm": 0.5081675116737157, "learning_rate": 3.211181824322328e-05, "loss": 0.4217, "loss_nan_ranks": 0, "loss_rank_avg": 0.28255191445350647, "step": 545, "valid_targets_mean": 3734.7, "valid_targets_min": 362 }, { "epoch": 1.839464882943144, "grad_norm": 0.31789874000689705, "learning_rate": 3.192512255235382e-05, "loss": 0.2532, "loss_nan_ranks": 0, "loss_rank_avg": 0.25019964575767517, "step": 550, "valid_targets_mean": 4794.3, "valid_targets_min": 350 }, { "epoch": 1.8561872909698995, "grad_norm": 0.4454745577819962, "learning_rate": 3.1736800365108176e-05, "loss": 0.4104, "loss_nan_ranks": 0, "loss_rank_avg": 0.3151957094669342, "step": 555, "valid_targets_mean": 1785.8, "valid_targets_min": 290 }, { "epoch": 1.8729096989966556, "grad_norm": 0.2585604191000923, "learning_rate": 3.1546877367206026e-05, "loss": 0.2524, "loss_nan_ranks": 0, "loss_rank_avg": 0.23732544481754303, "step": 560, "valid_targets_mean": 4309.1, "valid_targets_min": 277 }, { "epoch": 1.8896321070234112, "grad_norm": 0.8074572430964205, "learning_rate": 3.135537946270551e-05, "loss": 0.4531, "loss_nan_ranks": 0, "loss_rank_avg": 0.4839265048503876, "step": 565, "valid_targets_mean": 733.1, "valid_targets_min": 429 }, { "epoch": 1.9063545150501673, "grad_norm": 0.25392758442689983, "learning_rate": 3.116233277047008e-05, "loss": 0.2651, "loss_nan_ranks": 0, "loss_rank_avg": 0.24726684391498566, "step": 570, "valid_targets_mean": 4884.3, "valid_targets_min": 386 }, { "epoch": 1.9230769230769231, "grad_norm": 0.7132205103519725, "learning_rate": 3.096776362060612e-05, "loss": 0.4093, "loss_nan_ranks": 0, "loss_rank_avg": 0.4857273995876312, "step": 575, "valid_targets_mean": 727.4, "valid_targets_min": 282 }, { "epoch": 1.939799331103679, "grad_norm": 0.28030206808075175, "learning_rate": 3.0771698550871696e-05, "loss": 0.3092, "loss_nan_ranks": 0, "loss_rank_avg": 0.24414603412151337, "step": 580, "valid_targets_mean": 4414.5, "valid_targets_min": 280 }, { "epoch": 1.9565217391304348, "grad_norm": 1.0722643385299264, "learning_rate": 3.057416430305701e-05, "loss": 0.3698, "loss_nan_ranks": 0, "loss_rank_avg": 0.5436132550239563, "step": 585, "valid_targets_mean": 696.1, "valid_targets_min": 158 }, { "epoch": 1.9732441471571907, "grad_norm": 0.40778249296899327, "learning_rate": 3.0375187819337014e-05, "loss": 0.3482, "loss_nan_ranks": 0, "loss_rank_avg": 0.2578917443752289, "step": 590, "valid_targets_mean": 5053.5, "valid_targets_min": 379 }, { "epoch": 1.9899665551839465, "grad_norm": 1.0135715593568684, "learning_rate": 3.0174796238596733e-05, "loss": 0.3122, "loss_nan_ranks": 0, "loss_rank_avg": 0.5402693748474121, "step": 595, "valid_targets_mean": 668.8, "valid_targets_min": 373 }, { "epoch": 2.0066889632107023, "grad_norm": 0.37079905131169244, "learning_rate": 2.997301689272968e-05, "loss": 0.4223, "loss_nan_ranks": 0, "loss_rank_avg": 0.36734867095947266, "step": 600, "valid_targets_mean": 4411.8, "valid_targets_min": 1407 }, { "epoch": 2.0234113712374584, "grad_norm": 0.26895676064590374, "learning_rate": 2.9769877302910046e-05, "loss": 0.3494, "loss_nan_ranks": 0, "loss_rank_avg": 0.3412858247756958, "step": 605, "valid_targets_mean": 4468.2, "valid_targets_min": 1592 }, { "epoch": 2.040133779264214, "grad_norm": 0.2766472449540659, "learning_rate": 2.9565405175838968e-05, "loss": 0.3217, "loss_nan_ranks": 0, "loss_rank_avg": 0.3096095025539398, "step": 610, "valid_targets_mean": 5093.7, "valid_targets_min": 1886 }, { "epoch": 2.05685618729097, "grad_norm": 0.1882641912886419, "learning_rate": 2.9359628399965586e-05, "loss": 0.2964, "loss_nan_ranks": 0, "loss_rank_avg": 0.2830745279788971, "step": 615, "valid_targets_mean": 5645.9, "valid_targets_min": 2151 }, { "epoch": 2.0735785953177257, "grad_norm": 0.1957042236075367, "learning_rate": 2.915257504168324e-05, "loss": 0.2871, "loss_nan_ranks": 0, "loss_rank_avg": 0.29117295145988464, "step": 620, "valid_targets_mean": 5221.6, "valid_targets_min": 2132 }, { "epoch": 2.0903010033444818, "grad_norm": 0.20588483629956084, "learning_rate": 2.8944273341501454e-05, "loss": 0.2822, "loss_nan_ranks": 0, "loss_rank_avg": 0.28053486347198486, "step": 625, "valid_targets_mean": 5244.2, "valid_targets_min": 2153 }, { "epoch": 2.1070234113712374, "grad_norm": 0.2193935949331056, "learning_rate": 2.8734751710194118e-05, "loss": 0.2827, "loss_nan_ranks": 0, "loss_rank_avg": 0.28705260157585144, "step": 630, "valid_targets_mean": 5358.2, "valid_targets_min": 1929 }, { "epoch": 2.1237458193979935, "grad_norm": 0.2023089427188991, "learning_rate": 2.852403872492449e-05, "loss": 0.2737, "loss_nan_ranks": 0, "loss_rank_avg": 0.2726423442363739, "step": 635, "valid_targets_mean": 4957.5, "valid_targets_min": 1777 }, { "epoch": 2.140468227424749, "grad_norm": 0.19160273637535705, "learning_rate": 2.8312163125347485e-05, "loss": 0.2801, "loss_nan_ranks": 0, "loss_rank_avg": 0.27666404843330383, "step": 640, "valid_targets_mean": 5104.9, "valid_targets_min": 1745 }, { "epoch": 2.157190635451505, "grad_norm": 0.19046257097512967, "learning_rate": 2.8099153809689804e-05, "loss": 0.2712, "loss_nan_ranks": 0, "loss_rank_avg": 0.2816013693809509, "step": 645, "valid_targets_mean": 5555.8, "valid_targets_min": 1934 }, { "epoch": 2.1739130434782608, "grad_norm": 0.21148504544765329, "learning_rate": 2.7885039830808422e-05, "loss": 0.2758, "loss_nan_ranks": 0, "loss_rank_avg": 0.2803577184677124, "step": 650, "valid_targets_mean": 5031.8, "valid_targets_min": 2036 }, { "epoch": 2.190635451505017, "grad_norm": 0.18874296277514546, "learning_rate": 2.7669850392228023e-05, "loss": 0.1979, "loss_nan_ranks": 0, "loss_rank_avg": 0.17442499101161957, "step": 655, "valid_targets_mean": 6045.2, "valid_targets_min": 3341 }, { "epoch": 2.2073578595317724, "grad_norm": 0.16601951159980557, "learning_rate": 2.7453614844157842e-05, "loss": 0.1883, "loss_nan_ranks": 0, "loss_rank_avg": 0.1943759173154831, "step": 660, "valid_targets_mean": 6267.9, "valid_targets_min": 1288 }, { "epoch": 2.2240802675585285, "grad_norm": 0.15129041350767916, "learning_rate": 2.7236362679488514e-05, "loss": 0.1797, "loss_nan_ranks": 0, "loss_rank_avg": 0.18566341698169708, "step": 665, "valid_targets_mean": 6566.6, "valid_targets_min": 3266 }, { "epoch": 2.240802675585284, "grad_norm": 0.14262447168470901, "learning_rate": 2.7018123529769484e-05, "loss": 0.1754, "loss_nan_ranks": 0, "loss_rank_avg": 0.17142410576343536, "step": 670, "valid_targets_mean": 6732.1, "valid_targets_min": 2028 }, { "epoch": 2.25752508361204, "grad_norm": 0.15243264863957715, "learning_rate": 2.6798927161167485e-05, "loss": 0.1826, "loss_nan_ranks": 0, "loss_rank_avg": 0.18653543293476105, "step": 675, "valid_targets_mean": 6640.0, "valid_targets_min": 3057 }, { "epoch": 2.274247491638796, "grad_norm": 0.17591037990966985, "learning_rate": 2.6578803470406658e-05, "loss": 0.2828, "loss_nan_ranks": 0, "loss_rank_avg": 0.3433483839035034, "step": 680, "valid_targets_mean": 7170.9, "valid_targets_min": 557 }, { "epoch": 2.290969899665552, "grad_norm": 0.16368510902418054, "learning_rate": 2.6357782480690846e-05, "loss": 0.3271, "loss_nan_ranks": 0, "loss_rank_avg": 0.3333417475223541, "step": 685, "valid_targets_mean": 7125.2, "valid_targets_min": 530 }, { "epoch": 2.3076923076923075, "grad_norm": 0.1908597063091604, "learning_rate": 2.6135894337608686e-05, "loss": 0.3199, "loss_nan_ranks": 0, "loss_rank_avg": 0.3223503828048706, "step": 690, "valid_targets_mean": 6939.3, "valid_targets_min": 562 }, { "epoch": 2.3244147157190636, "grad_norm": 0.21925207504093933, "learning_rate": 2.5913169305021944e-05, "loss": 0.2947, "loss_nan_ranks": 0, "loss_rank_avg": 0.2101258486509323, "step": 695, "valid_targets_mean": 6239.0, "valid_targets_min": 791 }, { "epoch": 2.3411371237458196, "grad_norm": 0.16653728056313943, "learning_rate": 2.56896377609378e-05, "loss": 0.1621, "loss_nan_ranks": 0, "loss_rank_avg": 0.16172702610492706, "step": 700, "valid_targets_mean": 5994.6, "valid_targets_min": 972 }, { "epoch": 2.3578595317725752, "grad_norm": 0.16240152922087314, "learning_rate": 2.5465330193365483e-05, "loss": 0.1576, "loss_nan_ranks": 0, "loss_rank_avg": 0.15301357209682465, "step": 705, "valid_targets_mean": 5792.4, "valid_targets_min": 976 }, { "epoch": 2.374581939799331, "grad_norm": 0.15853636657516545, "learning_rate": 2.5240277196157947e-05, "loss": 0.1515, "loss_nan_ranks": 0, "loss_rank_avg": 0.14691826701164246, "step": 710, "valid_targets_mean": 6527.7, "valid_targets_min": 1213 }, { "epoch": 2.391304347826087, "grad_norm": 0.14813404734568322, "learning_rate": 2.5014509464839095e-05, "loss": 0.1451, "loss_nan_ranks": 0, "loss_rank_avg": 0.14025376737117767, "step": 715, "valid_targets_mean": 6059.1, "valid_targets_min": 1092 }, { "epoch": 2.408026755852843, "grad_norm": 0.14905714508058546, "learning_rate": 2.4788057792417147e-05, "loss": 0.147, "loss_nan_ranks": 0, "loss_rank_avg": 0.1445135623216629, "step": 720, "valid_targets_mean": 6294.4, "valid_targets_min": 902 }, { "epoch": 2.4247491638795986, "grad_norm": 0.15540934919273947, "learning_rate": 2.4560953065184724e-05, "loss": 0.1443, "loss_nan_ranks": 0, "loss_rank_avg": 0.14712664484977722, "step": 725, "valid_targets_mean": 6088.0, "valid_targets_min": 1271 }, { "epoch": 2.4414715719063547, "grad_norm": 0.15004313679530892, "learning_rate": 2.433322625850617e-05, "loss": 0.1454, "loss_nan_ranks": 0, "loss_rank_avg": 0.14438866078853607, "step": 730, "valid_targets_mean": 6253.1, "valid_targets_min": 1088 }, { "epoch": 2.4581939799331103, "grad_norm": 0.18665964891293943, "learning_rate": 2.4104908432592732e-05, "loss": 0.1476, "loss_nan_ranks": 0, "loss_rank_avg": 0.14549827575683594, "step": 735, "valid_targets_mean": 6431.6, "valid_targets_min": 928 }, { "epoch": 2.4749163879598663, "grad_norm": 0.16064268926124256, "learning_rate": 2.3876030728266235e-05, "loss": 0.1454, "loss_nan_ranks": 0, "loss_rank_avg": 0.14341790974140167, "step": 740, "valid_targets_mean": 5723.3, "valid_targets_min": 1001 }, { "epoch": 2.491638795986622, "grad_norm": 0.22407683696782663, "learning_rate": 2.3646624362711655e-05, "loss": 0.2709, "loss_nan_ranks": 0, "loss_rank_avg": 0.28306153416633606, "step": 745, "valid_targets_mean": 4555.0, "valid_targets_min": 1402 }, { "epoch": 2.508361204013378, "grad_norm": 0.22686391457978547, "learning_rate": 2.3416720625219373e-05, "loss": 0.2821, "loss_nan_ranks": 0, "loss_rank_avg": 0.2748595178127289, "step": 750, "valid_targets_mean": 4720.4, "valid_targets_min": 1679 }, { "epoch": 2.5250836120401337, "grad_norm": 0.23690652646070248, "learning_rate": 2.3186350872917514e-05, "loss": 0.2825, "loss_nan_ranks": 0, "loss_rank_avg": 0.2806766629219055, "step": 755, "valid_targets_mean": 4706.1, "valid_targets_min": 1820 }, { "epoch": 2.5418060200668897, "grad_norm": 0.21389857186446018, "learning_rate": 2.295554652649511e-05, "loss": 0.2693, "loss_nan_ranks": 0, "loss_rank_avg": 0.26098814606666565, "step": 760, "valid_targets_mean": 4222.0, "valid_targets_min": 1433 }, { "epoch": 2.5585284280936453, "grad_norm": 0.2212548146105914, "learning_rate": 2.272433906591652e-05, "loss": 0.2618, "loss_nan_ranks": 0, "loss_rank_avg": 0.25039681792259216, "step": 765, "valid_targets_mean": 4723.6, "valid_targets_min": 1592 }, { "epoch": 2.5752508361204014, "grad_norm": 0.22322400991786523, "learning_rate": 2.249276002612785e-05, "loss": 0.268, "loss_nan_ranks": 0, "loss_rank_avg": 0.26302745938301086, "step": 770, "valid_targets_mean": 4409.8, "valid_targets_min": 1626 }, { "epoch": 2.591973244147157, "grad_norm": 0.23478742092249213, "learning_rate": 2.2260840992755765e-05, "loss": 0.267, "loss_nan_ranks": 0, "loss_rank_avg": 0.26876962184906006, "step": 775, "valid_targets_mean": 4625.4, "valid_targets_min": 1353 }, { "epoch": 2.608695652173913, "grad_norm": 0.23417620201495953, "learning_rate": 2.202861359779949e-05, "loss": 0.2522, "loss_nan_ranks": 0, "loss_rank_avg": 0.2331017255783081, "step": 780, "valid_targets_mean": 4481.9, "valid_targets_min": 2429 }, { "epoch": 2.625418060200669, "grad_norm": 0.264301297317795, "learning_rate": 2.1796109515316445e-05, "loss": 0.2326, "loss_nan_ranks": 0, "loss_rank_avg": 0.2378367930650711, "step": 785, "valid_targets_mean": 4473.0, "valid_targets_min": 2169 }, { "epoch": 2.6421404682274248, "grad_norm": 0.2565866317091138, "learning_rate": 2.156336045710211e-05, "loss": 0.2255, "loss_nan_ranks": 0, "loss_rank_avg": 0.22258122265338898, "step": 790, "valid_targets_mean": 4505.5, "valid_targets_min": 1829 }, { "epoch": 2.6588628762541804, "grad_norm": 0.1940288155795933, "learning_rate": 2.1330398168364823e-05, "loss": 0.2172, "loss_nan_ranks": 0, "loss_rank_avg": 0.21678568422794342, "step": 795, "valid_targets_mean": 4446.6, "valid_targets_min": 437 }, { "epoch": 2.6755852842809364, "grad_norm": 0.18627970283420714, "learning_rate": 2.1097254423395937e-05, "loss": 0.2124, "loss_nan_ranks": 0, "loss_rank_avg": 0.20801906287670135, "step": 800, "valid_targets_mean": 4618.1, "valid_targets_min": 1043 }, { "epoch": 2.6923076923076925, "grad_norm": 0.19229368174567585, "learning_rate": 2.0863961021236053e-05, "loss": 0.2141, "loss_nan_ranks": 0, "loss_rank_avg": 0.21324963867664337, "step": 805, "valid_targets_mean": 4455.3, "valid_targets_min": 534 }, { "epoch": 2.709030100334448, "grad_norm": 0.19664196403921202, "learning_rate": 2.0630549781337904e-05, "loss": 0.2197, "loss_nan_ranks": 0, "loss_rank_avg": 0.22369103133678436, "step": 810, "valid_targets_mean": 4562.8, "valid_targets_min": 892 }, { "epoch": 2.7257525083612038, "grad_norm": 0.1863005841070964, "learning_rate": 2.0397052539226362e-05, "loss": 0.2205, "loss_nan_ranks": 0, "loss_rank_avg": 0.21109290421009064, "step": 815, "valid_targets_mean": 4596.7, "valid_targets_min": 269 }, { "epoch": 2.74247491638796, "grad_norm": 0.6173372728089213, "learning_rate": 2.016350114215639e-05, "loss": 0.2494, "loss_nan_ranks": 0, "loss_rank_avg": 0.3844788372516632, "step": 820, "valid_targets_mean": 678.1, "valid_targets_min": 258 }, { "epoch": 2.759197324414716, "grad_norm": 0.2907761759630343, "learning_rate": 1.992992744476924e-05, "loss": 0.2839, "loss_nan_ranks": 0, "loss_rank_avg": 0.23014144599437714, "step": 825, "valid_targets_mean": 4751.9, "valid_targets_min": 469 }, { "epoch": 2.7759197324414715, "grad_norm": 0.7059514788472127, "learning_rate": 1.9696363304747786e-05, "loss": 0.305, "loss_nan_ranks": 0, "loss_rank_avg": 0.3986765146255493, "step": 830, "valid_targets_mean": 674.0, "valid_targets_min": 240 }, { "epoch": 2.7926421404682276, "grad_norm": 0.2585488252354123, "learning_rate": 1.9462840578471338e-05, "loss": 0.2944, "loss_nan_ranks": 0, "loss_rank_avg": 0.2241886407136917, "step": 835, "valid_targets_mean": 4826.7, "valid_targets_min": 475 }, { "epoch": 2.809364548494983, "grad_norm": 0.4214404035666233, "learning_rate": 1.922939111667068e-05, "loss": 0.2391, "loss_nan_ranks": 0, "loss_rank_avg": 0.2919781506061554, "step": 840, "valid_targets_mean": 1458.4, "valid_targets_min": 335 }, { "epoch": 2.8260869565217392, "grad_norm": 0.4070462279502931, "learning_rate": 1.899604676008388e-05, "loss": 0.3345, "loss_nan_ranks": 0, "loss_rank_avg": 0.24292106926441193, "step": 845, "valid_targets_mean": 4536.0, "valid_targets_min": 492 }, { "epoch": 2.842809364548495, "grad_norm": 0.2326857123378931, "learning_rate": 1.8762839335113454e-05, "loss": 0.2235, "loss_nan_ranks": 0, "loss_rank_avg": 0.21685612201690674, "step": 850, "valid_targets_mean": 4670.0, "valid_targets_min": 378 }, { "epoch": 2.859531772575251, "grad_norm": 0.3858936345614067, "learning_rate": 1.852980064948549e-05, "loss": 0.3423, "loss_nan_ranks": 0, "loss_rank_avg": 0.23001201450824738, "step": 855, "valid_targets_mean": 4312.6, "valid_targets_min": 358 }, { "epoch": 2.8762541806020065, "grad_norm": 0.2846110038843764, "learning_rate": 1.8296962487911293e-05, "loss": 0.2279, "loss_nan_ranks": 0, "loss_rank_avg": 0.23642951250076294, "step": 860, "valid_targets_mean": 3092.2, "valid_targets_min": 284 }, { "epoch": 2.8929765886287626, "grad_norm": 0.367258015187427, "learning_rate": 1.8064356607752217e-05, "loss": 0.3676, "loss_nan_ranks": 0, "loss_rank_avg": 0.2541278898715973, "step": 865, "valid_targets_mean": 3393.6, "valid_targets_min": 403 }, { "epoch": 2.9096989966555182, "grad_norm": 0.2377725393070408, "learning_rate": 1.7832014734688182e-05, "loss": 0.2243, "loss_nan_ranks": 0, "loss_rank_avg": 0.21238528192043304, "step": 870, "valid_targets_mean": 4894.2, "valid_targets_min": 311 }, { "epoch": 2.9264214046822743, "grad_norm": 0.9018971437972336, "learning_rate": 1.7599968558390553e-05, "loss": 0.3773, "loss_nan_ranks": 0, "loss_rank_avg": 0.3804803788661957, "step": 875, "valid_targets_mean": 738.1, "valid_targets_min": 375 }, { "epoch": 2.94314381270903, "grad_norm": 0.1937813815953225, "learning_rate": 1.7368249728199884e-05, "loss": 0.2351, "loss_nan_ranks": 0, "loss_rank_avg": 0.22677625715732574, "step": 880, "valid_targets_mean": 4568.2, "valid_targets_min": 339 }, { "epoch": 2.959866220735786, "grad_norm": 0.6429163423349497, "learning_rate": 1.7136889848809194e-05, "loss": 0.3477, "loss_nan_ranks": 0, "loss_rank_avg": 0.4165579080581665, "step": 885, "valid_targets_mean": 750.7, "valid_targets_min": 382 }, { "epoch": 2.976588628762542, "grad_norm": 0.23979840190121002, "learning_rate": 1.6905920475953358e-05, "loss": 0.2589, "loss_nan_ranks": 0, "loss_rank_avg": 0.24398088455200195, "step": 890, "valid_targets_mean": 5069.3, "valid_targets_min": 416 }, { "epoch": 2.9933110367892977, "grad_norm": 0.7806320986114523, "learning_rate": 1.6675373112105087e-05, "loss": 0.2998, "loss_nan_ranks": 0, "loss_rank_avg": 0.4030502140522003, "step": 895, "valid_targets_mean": 673.1, "valid_targets_min": 232 }, { "epoch": 3.0100334448160537, "grad_norm": 0.3325966050698298, "learning_rate": 1.6445279202178287e-05, "loss": 0.3281, "loss_nan_ranks": 0, "loss_rank_avg": 0.3040490448474884, "step": 900, "valid_targets_mean": 4467.1, "valid_targets_min": 1380 }, { "epoch": 3.0267558528428093, "grad_norm": 0.25348447790070533, "learning_rate": 1.621567012923917e-05, "loss": 0.2989, "loss_nan_ranks": 0, "loss_rank_avg": 0.27941253781318665, "step": 905, "valid_targets_mean": 4703.9, "valid_targets_min": 1811 }, { "epoch": 3.0434782608695654, "grad_norm": 0.27167890992632737, "learning_rate": 1.5986577210225857e-05, "loss": 0.279, "loss_nan_ranks": 0, "loss_rank_avg": 0.2902013063430786, "step": 910, "valid_targets_mean": 5275.6, "valid_targets_min": 1617 }, { "epoch": 3.060200668896321, "grad_norm": 0.1910683683867069, "learning_rate": 1.575803169167699e-05, "loss": 0.2535, "loss_nan_ranks": 0, "loss_rank_avg": 0.2436305433511734, "step": 915, "valid_targets_mean": 5387.9, "valid_targets_min": 1714 }, { "epoch": 3.076923076923077, "grad_norm": 0.1874060436776931, "learning_rate": 1.553006474546992e-05, "loss": 0.2527, "loss_nan_ranks": 0, "loss_rank_avg": 0.23999392986297607, "step": 920, "valid_targets_mean": 4801.6, "valid_targets_min": 1973 }, { "epoch": 3.0936454849498327, "grad_norm": 0.18119039129245468, "learning_rate": 1.5302707464569132e-05, "loss": 0.2495, "loss_nan_ranks": 0, "loss_rank_avg": 0.24977512657642365, "step": 925, "valid_targets_mean": 5440.0, "valid_targets_min": 1742 }, { "epoch": 3.1103678929765888, "grad_norm": 0.17719444575363888, "learning_rate": 1.5075990858785377e-05, "loss": 0.2466, "loss_nan_ranks": 0, "loss_rank_avg": 0.2357928305864334, "step": 930, "valid_targets_mean": 5248.3, "valid_targets_min": 1992 }, { "epoch": 3.1270903010033444, "grad_norm": 0.21513060390100192, "learning_rate": 1.4849945850546153e-05, "loss": 0.2384, "loss_nan_ranks": 0, "loss_rank_avg": 0.23575645685195923, "step": 935, "valid_targets_mean": 5246.6, "valid_targets_min": 1877 }, { "epoch": 3.1438127090301005, "grad_norm": 0.18107305406498955, "learning_rate": 1.4624603270678171e-05, "loss": 0.2421, "loss_nan_ranks": 0, "loss_rank_avg": 0.22320790588855743, "step": 940, "valid_targets_mean": 5302.3, "valid_targets_min": 1350 }, { "epoch": 3.160535117056856, "grad_norm": 0.19063928947134984, "learning_rate": 1.4399993854202214e-05, "loss": 0.2382, "loss_nan_ranks": 0, "loss_rank_avg": 0.22890616953372955, "step": 945, "valid_targets_mean": 5620.1, "valid_targets_min": 1858 }, { "epoch": 3.177257525083612, "grad_norm": 0.17529233912179928, "learning_rate": 1.4176148236141143e-05, "loss": 0.2415, "loss_nan_ranks": 0, "loss_rank_avg": 0.22984355688095093, "step": 950, "valid_targets_mean": 5451.8, "valid_targets_min": 1093 }, { "epoch": 3.1939799331103678, "grad_norm": 0.15526905588957163, "learning_rate": 1.3953096947341492e-05, "loss": 0.1569, "loss_nan_ranks": 0, "loss_rank_avg": 0.163493350148201, "step": 955, "valid_targets_mean": 6425.3, "valid_targets_min": 2629 }, { "epoch": 3.210702341137124, "grad_norm": 0.23047094958309533, "learning_rate": 1.3730870410309311e-05, "loss": 0.1597, "loss_nan_ranks": 0, "loss_rank_avg": 0.15648625791072845, "step": 960, "valid_targets_mean": 6433.0, "valid_targets_min": 2166 }, { "epoch": 3.2274247491638794, "grad_norm": 0.15051441110478084, "learning_rate": 1.3509498935060746e-05, "loss": 0.1542, "loss_nan_ranks": 0, "loss_rank_avg": 0.14432001113891602, "step": 965, "valid_targets_mean": 6299.8, "valid_targets_min": 2765 }, { "epoch": 3.2441471571906355, "grad_norm": 0.18713782540742388, "learning_rate": 1.3289012714988006e-05, "loss": 0.1556, "loss_nan_ranks": 0, "loss_rank_avg": 0.1696312427520752, "step": 970, "valid_targets_mean": 6668.9, "valid_targets_min": 3131 }, { "epoch": 3.260869565217391, "grad_norm": 0.14193832407851184, "learning_rate": 1.3069441822741195e-05, "loss": 0.1528, "loss_nan_ranks": 0, "loss_rank_avg": 0.15616631507873535, "step": 975, "valid_targets_mean": 6579.5, "valid_targets_min": 1654 }, { "epoch": 3.277591973244147, "grad_norm": 0.17659222491324522, "learning_rate": 1.2850816206126623e-05, "loss": 0.2831, "loss_nan_ranks": 0, "loss_rank_avg": 0.2911715507507324, "step": 980, "valid_targets_mean": 6860.2, "valid_targets_min": 467 }, { "epoch": 3.294314381270903, "grad_norm": 0.17751955711425818, "learning_rate": 1.2633165684022184e-05, "loss": 0.3056, "loss_nan_ranks": 0, "loss_rank_avg": 0.30897554755210876, "step": 985, "valid_targets_mean": 6717.8, "valid_targets_min": 537 }, { "epoch": 3.311036789297659, "grad_norm": 0.15672473164502146, "learning_rate": 1.2416519942310256e-05, "loss": 0.2903, "loss_nan_ranks": 0, "loss_rank_avg": 0.280882328748703, "step": 990, "valid_targets_mean": 7239.4, "valid_targets_min": 651 }, { "epoch": 3.327759197324415, "grad_norm": 0.19003500142538396, "learning_rate": 1.2200908529828794e-05, "loss": 0.2456, "loss_nan_ranks": 0, "loss_rank_avg": 0.14710815250873566, "step": 995, "valid_targets_mean": 6309.6, "valid_targets_min": 912 }, { "epoch": 3.3444816053511706, "grad_norm": 0.18357730003119666, "learning_rate": 1.1986360854341068e-05, "loss": 0.1424, "loss_nan_ranks": 0, "loss_rank_avg": 0.13904519379138947, "step": 1000, "valid_targets_mean": 5761.4, "valid_targets_min": 1141 }, { "epoch": 3.361204013377926, "grad_norm": 0.16001904477148948, "learning_rate": 1.1772906178524691e-05, "loss": 0.138, "loss_nan_ranks": 0, "loss_rank_avg": 0.13291610777378082, "step": 1005, "valid_targets_mean": 6086.1, "valid_targets_min": 1020 }, { "epoch": 3.3779264214046822, "grad_norm": 0.16723753846924963, "learning_rate": 1.1560573615980393e-05, "loss": 0.1337, "loss_nan_ranks": 0, "loss_rank_avg": 0.13813412189483643, "step": 1010, "valid_targets_mean": 5800.1, "valid_targets_min": 959 }, { "epoch": 3.3946488294314383, "grad_norm": 0.1745217811675545, "learning_rate": 1.1349392127261169e-05, "loss": 0.1247, "loss_nan_ranks": 0, "loss_rank_avg": 0.13039818406105042, "step": 1015, "valid_targets_mean": 6074.7, "valid_targets_min": 939 }, { "epoch": 3.411371237458194, "grad_norm": 0.1655260029060803, "learning_rate": 1.113939051592225e-05, "loss": 0.1247, "loss_nan_ranks": 0, "loss_rank_avg": 0.1264718621969223, "step": 1020, "valid_targets_mean": 6158.8, "valid_targets_min": 983 }, { "epoch": 3.42809364548495, "grad_norm": 0.15510678838112874, "learning_rate": 1.0930597424592522e-05, "loss": 0.1225, "loss_nan_ranks": 0, "loss_rank_avg": 0.12112051248550415, "step": 1025, "valid_targets_mean": 6096.9, "valid_targets_min": 933 }, { "epoch": 3.4448160535117056, "grad_norm": 0.14545317901479146, "learning_rate": 1.0723041331067917e-05, "loss": 0.1266, "loss_nan_ranks": 0, "loss_rank_avg": 0.13073737919330597, "step": 1030, "valid_targets_mean": 6842.2, "valid_targets_min": 1071 }, { "epoch": 3.4615384615384617, "grad_norm": 0.14743435245575817, "learning_rate": 1.0516750544427236e-05, "loss": 0.1274, "loss_nan_ranks": 0, "loss_rank_avg": 0.12710754573345184, "step": 1035, "valid_targets_mean": 6249.6, "valid_targets_min": 1300 }, { "epoch": 3.4782608695652173, "grad_norm": 0.19638550592251586, "learning_rate": 1.0311753201171002e-05, "loss": 0.1297, "loss_nan_ranks": 0, "loss_rank_avg": 0.15527376532554626, "step": 1040, "valid_targets_mean": 5658.3, "valid_targets_min": 1037 }, { "epoch": 3.4949832775919734, "grad_norm": 0.2384441511668291, "learning_rate": 1.0108077261383843e-05, "loss": 0.263, "loss_nan_ranks": 0, "loss_rank_avg": 0.2483915090560913, "step": 1045, "valid_targets_mean": 4639.5, "valid_targets_min": 1875 }, { "epoch": 3.511705685618729, "grad_norm": 0.22952161164325965, "learning_rate": 9.905750504920988e-06, "loss": 0.2557, "loss_nan_ranks": 0, "loss_rank_avg": 0.25090211629867554, "step": 1050, "valid_targets_mean": 4660.8, "valid_targets_min": 1403 }, { "epoch": 3.528428093645485, "grad_norm": 0.19081898217049426, "learning_rate": 9.704800527619271e-06, "loss": 0.249, "loss_nan_ranks": 0, "loss_rank_avg": 0.23392079770565033, "step": 1055, "valid_targets_mean": 5102.3, "valid_targets_min": 2424 }, { "epoch": 3.5451505016722407, "grad_norm": 0.2150223947743572, "learning_rate": 9.505254737533288e-06, "loss": 0.2372, "loss_nan_ranks": 0, "loss_rank_avg": 0.23533904552459717, "step": 1060, "valid_targets_mean": 4816.6, "valid_targets_min": 2050 }, { "epoch": 3.5618729096989967, "grad_norm": 0.21559324253644188, "learning_rate": 9.30714035119712e-06, "loss": 0.2284, "loss_nan_ranks": 0, "loss_rank_avg": 0.23167192935943604, "step": 1065, "valid_targets_mean": 4696.9, "valid_targets_min": 1738 }, { "epoch": 3.5785953177257523, "grad_norm": 0.21476036313789315, "learning_rate": 9.110484389912218e-06, "loss": 0.2349, "loss_nan_ranks": 0, "loss_rank_avg": 0.24024538695812225, "step": 1070, "valid_targets_mean": 4515.7, "valid_targets_min": 1559 }, { "epoch": 3.5953177257525084, "grad_norm": 0.20079745576530617, "learning_rate": 8.915313676061925e-06, "loss": 0.2341, "loss_nan_ranks": 0, "loss_rank_avg": 0.2396174669265747, "step": 1075, "valid_targets_mean": 4811.0, "valid_targets_min": 1596 }, { "epoch": 3.6120401337792645, "grad_norm": 0.23949736554686882, "learning_rate": 8.721654829453072e-06, "loss": 0.2163, "loss_nan_ranks": 0, "loss_rank_avg": 0.20303094387054443, "step": 1080, "valid_targets_mean": 4378.2, "valid_targets_min": 912 }, { "epoch": 3.62876254180602, "grad_norm": 0.2162186432786319, "learning_rate": 8.529534263685268e-06, "loss": 0.2095, "loss_nan_ranks": 0, "loss_rank_avg": 0.20163659751415253, "step": 1085, "valid_targets_mean": 4515.4, "valid_targets_min": 2198 }, { "epoch": 3.6454849498327757, "grad_norm": 0.19436543294501413, "learning_rate": 8.338978182548234e-06, "loss": 0.2009, "loss_nan_ranks": 0, "loss_rank_avg": 0.1891588717699051, "step": 1090, "valid_targets_mean": 4146.3, "valid_targets_min": 1038 }, { "epoch": 3.6622073578595318, "grad_norm": 0.19164979346182845, "learning_rate": 8.150012576447872e-06, "loss": 0.1964, "loss_nan_ranks": 0, "loss_rank_avg": 0.20176713168621063, "step": 1095, "valid_targets_mean": 4482.4, "valid_targets_min": 1275 }, { "epoch": 3.678929765886288, "grad_norm": 0.19985143783581158, "learning_rate": 7.962663218861324e-06, "loss": 0.1832, "loss_nan_ranks": 0, "loss_rank_avg": 0.18389666080474854, "step": 1100, "valid_targets_mean": 4490.5, "valid_targets_min": 1533 }, { "epoch": 3.6956521739130435, "grad_norm": 0.20930072238561145, "learning_rate": 7.77695566282169e-06, "loss": 0.1867, "loss_nan_ranks": 0, "loss_rank_avg": 0.18639083206653595, "step": 1105, "valid_targets_mean": 4398.2, "valid_targets_min": 1895 }, { "epoch": 3.712374581939799, "grad_norm": 0.18519836850773455, "learning_rate": 7.5929152374327515e-06, "loss": 0.1945, "loss_nan_ranks": 0, "loss_rank_avg": 0.18989145755767822, "step": 1110, "valid_targets_mean": 4469.8, "valid_targets_min": 2241 }, { "epoch": 3.729096989966555, "grad_norm": 0.18493285556718495, "learning_rate": 7.41056704441433e-06, "loss": 0.2002, "loss_nan_ranks": 0, "loss_rank_avg": 0.19204336404800415, "step": 1115, "valid_targets_mean": 4966.5, "valid_targets_min": 309 }, { "epoch": 3.745819397993311, "grad_norm": 0.5966224380636336, "learning_rate": 7.229935954678544e-06, "loss": 0.2389, "loss_nan_ranks": 0, "loss_rank_avg": 0.29537415504455566, "step": 1120, "valid_targets_mean": 718.0, "valid_targets_min": 281 }, { "epoch": 3.762541806020067, "grad_norm": 0.21823572079242787, "learning_rate": 7.05104660493765e-06, "loss": 0.2263, "loss_nan_ranks": 0, "loss_rank_avg": 0.21302206814289093, "step": 1125, "valid_targets_mean": 4742.8, "valid_targets_min": 575 }, { "epoch": 3.779264214046823, "grad_norm": 0.5832962249106494, "learning_rate": 6.873923394343758e-06, "loss": 0.2721, "loss_nan_ranks": 0, "loss_rank_avg": 0.2774748206138611, "step": 1130, "valid_targets_mean": 747.7, "valid_targets_min": 223 }, { "epoch": 3.7959866220735785, "grad_norm": 0.2167157370011282, "learning_rate": 6.698590481160987e-06, "loss": 0.2364, "loss_nan_ranks": 0, "loss_rank_avg": 0.20832622051239014, "step": 1135, "valid_targets_mean": 4341.8, "valid_targets_min": 303 }, { "epoch": 3.8127090301003346, "grad_norm": 0.5623912186612033, "learning_rate": 6.525071779470429e-06, "loss": 0.2348, "loss_nan_ranks": 0, "loss_rank_avg": 0.31451621651649475, "step": 1140, "valid_targets_mean": 731.8, "valid_targets_min": 311 }, { "epoch": 3.82943143812709, "grad_norm": 0.1974116434223006, "learning_rate": 6.3533909559084915e-06, "loss": 0.2551, "loss_nan_ranks": 0, "loss_rank_avg": 0.21299366652965546, "step": 1145, "valid_targets_mean": 4850.5, "valid_targets_min": 368 }, { "epoch": 3.8461538461538463, "grad_norm": 0.3938591347572728, "learning_rate": 6.183571426438928e-06, "loss": 0.2184, "loss_nan_ranks": 0, "loss_rank_avg": 0.28042930364608765, "step": 1150, "valid_targets_mean": 1309.6, "valid_targets_min": 342 }, { "epoch": 3.862876254180602, "grad_norm": 0.203233024286556, "learning_rate": 6.015636353159073e-06, "loss": 0.2739, "loss_nan_ranks": 0, "loss_rank_avg": 0.2157667726278305, "step": 1155, "valid_targets_mean": 4642.4, "valid_targets_min": 266 }, { "epoch": 3.879598662207358, "grad_norm": 0.5964085554812023, "learning_rate": 5.8496086411407135e-06, "loss": 0.2296, "loss_nan_ranks": 0, "loss_rank_avg": 0.3337545692920685, "step": 1160, "valid_targets_mean": 681.9, "valid_targets_min": 173 }, { "epoch": 3.8963210702341136, "grad_norm": 0.2214120863538693, "learning_rate": 5.685510935305998e-06, "loss": 0.2728, "loss_nan_ranks": 0, "loss_rank_avg": 0.20867155492305756, "step": 1165, "valid_targets_mean": 4420.6, "valid_targets_min": 377 }, { "epoch": 3.9130434782608696, "grad_norm": 0.19187783009425913, "learning_rate": 5.523365617338872e-06, "loss": 0.204, "loss_nan_ranks": 0, "loss_rank_avg": 0.21160221099853516, "step": 1170, "valid_targets_mean": 5155.8, "valid_targets_min": 395 }, { "epoch": 3.9297658862876252, "grad_norm": 0.22333071467247392, "learning_rate": 5.3631948026323585e-06, "loss": 0.3132, "loss_nan_ranks": 0, "loss_rank_avg": 0.229045569896698, "step": 1175, "valid_targets_mean": 4811.5, "valid_targets_min": 345 }, { "epoch": 3.9464882943143813, "grad_norm": 0.15497067788450447, "learning_rate": 5.2050203372722e-06, "loss": 0.208, "loss_nan_ranks": 0, "loss_rank_avg": 0.19900214672088623, "step": 1180, "valid_targets_mean": 4957.0, "valid_targets_min": 409 }, { "epoch": 3.9632107023411374, "grad_norm": 0.42499289353318304, "learning_rate": 5.048863795057191e-06, "loss": 0.3015, "loss_nan_ranks": 0, "loss_rank_avg": 0.2734402120113373, "step": 1185, "valid_targets_mean": 1039.6, "valid_targets_min": 192 }, { "epoch": 3.979933110367893, "grad_norm": 0.16265281657263275, "learning_rate": 4.894746474556717e-06, "loss": 0.2152, "loss_nan_ranks": 0, "loss_rank_avg": 0.2060660570859909, "step": 1190, "valid_targets_mean": 4632.8, "valid_targets_min": 346 }, { "epoch": 3.9966555183946486, "grad_norm": 0.6053798931032222, "learning_rate": 4.742689396205766e-06, "loss": 0.2755, "loss_nan_ranks": 0, "loss_rank_avg": 0.3188337981700897, "step": 1195, "valid_targets_mean": 747.0, "valid_targets_min": 442 }, { "epoch": 4.013377926421405, "grad_norm": 0.27445958249018554, "learning_rate": 4.592713299437905e-06, "loss": 0.2734, "loss_nan_ranks": 0, "loss_rank_avg": 0.27683737874031067, "step": 1200, "valid_targets_mean": 5249.1, "valid_targets_min": 1589 }, { "epoch": 4.030100334448161, "grad_norm": 0.25687751261546804, "learning_rate": 4.444838639856568e-06, "loss": 0.2628, "loss_nan_ranks": 0, "loss_rank_avg": 0.2518869638442993, "step": 1205, "valid_targets_mean": 4295.5, "valid_targets_min": 1829 }, { "epoch": 4.046822742474917, "grad_norm": 0.20651475690375476, "learning_rate": 4.299085586445078e-06, "loss": 0.2475, "loss_nan_ranks": 0, "loss_rank_avg": 0.23218762874603271, "step": 1210, "valid_targets_mean": 5412.7, "valid_targets_min": 1726 }, { "epoch": 4.063545150501672, "grad_norm": 0.19554051256399013, "learning_rate": 4.1554740188157595e-06, "loss": 0.2346, "loss_nan_ranks": 0, "loss_rank_avg": 0.2517744302749634, "step": 1215, "valid_targets_mean": 5297.2, "valid_targets_min": 1415 }, { "epoch": 4.080267558528428, "grad_norm": 0.17978474793556265, "learning_rate": 4.014023524498492e-06, "loss": 0.2253, "loss_nan_ranks": 0, "loss_rank_avg": 0.2261568307876587, "step": 1220, "valid_targets_mean": 5272.5, "valid_targets_min": 1506 }, { "epoch": 4.096989966555184, "grad_norm": 0.18038505982255448, "learning_rate": 3.874753396269135e-06, "loss": 0.2278, "loss_nan_ranks": 0, "loss_rank_avg": 0.23343046009540558, "step": 1225, "valid_targets_mean": 5164.4, "valid_targets_min": 1749 }, { "epoch": 4.11371237458194, "grad_norm": 0.16023221073597105, "learning_rate": 3.7376826295181133e-06, "loss": 0.2186, "loss_nan_ranks": 0, "loss_rank_avg": 0.21051271259784698, "step": 1230, "valid_targets_mean": 5625.0, "valid_targets_min": 1336 }, { "epoch": 4.130434782608695, "grad_norm": 0.17583632120005055, "learning_rate": 3.602829919659623e-06, "loss": 0.2187, "loss_nan_ranks": 0, "loss_rank_avg": 0.2274329662322998, "step": 1235, "valid_targets_mean": 5280.3, "valid_targets_min": 1819 }, { "epoch": 4.147157190635451, "grad_norm": 0.1763063365449292, "learning_rate": 3.4702136595817002e-06, "loss": 0.2152, "loss_nan_ranks": 0, "loss_rank_avg": 0.21405138075351715, "step": 1240, "valid_targets_mean": 5215.2, "valid_targets_min": 2231 }, { "epoch": 4.1638795986622075, "grad_norm": 0.17730523624749578, "learning_rate": 3.3398519371375705e-06, "loss": 0.2147, "loss_nan_ranks": 0, "loss_rank_avg": 0.21326959133148193, "step": 1245, "valid_targets_mean": 5049.2, "valid_targets_min": 2114 }, { "epoch": 4.1806020066889635, "grad_norm": 0.19356277462319527, "learning_rate": 3.2117625326786127e-06, "loss": 0.2031, "loss_nan_ranks": 0, "loss_rank_avg": 0.14234782755374908, "step": 1250, "valid_targets_mean": 6386.6, "valid_targets_min": 1623 }, { "epoch": 4.197324414715719, "grad_norm": 0.16730300258691497, "learning_rate": 3.085962916629235e-06, "loss": 0.1387, "loss_nan_ranks": 0, "loss_rank_avg": 0.13740719854831696, "step": 1255, "valid_targets_mean": 6023.3, "valid_targets_min": 2729 }, { "epoch": 4.214046822742475, "grad_norm": 0.15827200537686784, "learning_rate": 2.962470247104057e-06, "loss": 0.1433, "loss_nan_ranks": 0, "loss_rank_avg": 0.13070766627788544, "step": 1260, "valid_targets_mean": 6772.9, "valid_targets_min": 2394 }, { "epoch": 4.230769230769231, "grad_norm": 0.1491184832085282, "learning_rate": 2.8413013675676703e-06, "loss": 0.1431, "loss_nan_ranks": 0, "loss_rank_avg": 0.14747533202171326, "step": 1265, "valid_targets_mean": 6463.4, "valid_targets_min": 2173 }, { "epoch": 4.247491638795987, "grad_norm": 0.15092183381928306, "learning_rate": 2.722472804537324e-06, "loss": 0.1349, "loss_nan_ranks": 0, "loss_rank_avg": 0.12587887048721313, "step": 1270, "valid_targets_mean": 6199.0, "valid_targets_min": 1047 }, { "epoch": 4.264214046822742, "grad_norm": 0.1608093727346184, "learning_rate": 2.6060007653288155e-06, "loss": 0.1469, "loss_nan_ranks": 0, "loss_rank_avg": 0.18034277856349945, "step": 1275, "valid_targets_mean": 6426.9, "valid_targets_min": 439 }, { "epoch": 4.280936454849498, "grad_norm": 0.18870342067871398, "learning_rate": 2.4919011358459443e-06, "loss": 0.2883, "loss_nan_ranks": 0, "loss_rank_avg": 0.28456005454063416, "step": 1280, "valid_targets_mean": 6586.4, "valid_targets_min": 646 }, { "epoch": 4.297658862876254, "grad_norm": 0.16650802588613298, "learning_rate": 2.380189478413799e-06, "loss": 0.2902, "loss_nan_ranks": 0, "loss_rank_avg": 0.2753714323043823, "step": 1285, "valid_targets_mean": 6664.2, "valid_targets_min": 284 }, { "epoch": 4.31438127090301, "grad_norm": 0.15911538469277994, "learning_rate": 2.2708810296561713e-06, "loss": 0.2805, "loss_nan_ranks": 0, "loss_rank_avg": 0.2978549301624298, "step": 1290, "valid_targets_mean": 6947.3, "valid_targets_min": 464 }, { "epoch": 4.331103678929766, "grad_norm": 0.17444646580607717, "learning_rate": 2.163990698417402e-06, "loss": 0.1979, "loss_nan_ranks": 0, "loss_rank_avg": 0.1320081502199173, "step": 1295, "valid_targets_mean": 6716.0, "valid_targets_min": 787 }, { "epoch": 4.3478260869565215, "grad_norm": 0.15418199902523114, "learning_rate": 2.0595330637289046e-06, "loss": 0.1301, "loss_nan_ranks": 0, "loss_rank_avg": 0.12595362961292267, "step": 1300, "valid_targets_mean": 6080.9, "valid_targets_min": 1010 }, { "epoch": 4.364548494983278, "grad_norm": 0.1445001546999268, "learning_rate": 1.9575223728207217e-06, "loss": 0.1275, "loss_nan_ranks": 0, "loss_rank_avg": 0.1298508644104004, "step": 1305, "valid_targets_mean": 6157.9, "valid_targets_min": 1028 }, { "epoch": 4.381270903010034, "grad_norm": 0.13859121653874001, "learning_rate": 1.857972539178301e-06, "loss": 0.1197, "loss_nan_ranks": 0, "loss_rank_avg": 0.11383823305368423, "step": 1310, "valid_targets_mean": 6154.5, "valid_targets_min": 888 }, { "epoch": 4.39799331103679, "grad_norm": 0.13210345726724612, "learning_rate": 1.7608971406448061e-06, "loss": 0.1119, "loss_nan_ranks": 0, "loss_rank_avg": 0.10649622231721878, "step": 1315, "valid_targets_mean": 6118.1, "valid_targets_min": 957 }, { "epoch": 4.414715719063545, "grad_norm": 0.13740873041385782, "learning_rate": 1.666309417569194e-06, "loss": 0.1136, "loss_nan_ranks": 0, "loss_rank_avg": 0.10804883390665054, "step": 1320, "valid_targets_mean": 5741.9, "valid_targets_min": 962 }, { "epoch": 4.431438127090301, "grad_norm": 0.1460663780198078, "learning_rate": 1.5742222710003252e-06, "loss": 0.1134, "loss_nan_ranks": 0, "loss_rank_avg": 0.12147744745016098, "step": 1325, "valid_targets_mean": 6000.1, "valid_targets_min": 1045 }, { "epoch": 4.448160535117057, "grad_norm": 0.1491078315390638, "learning_rate": 1.4846482609273839e-06, "loss": 0.1125, "loss_nan_ranks": 0, "loss_rank_avg": 0.1106313169002533, "step": 1330, "valid_targets_mean": 6530.3, "valid_targets_min": 1011 }, { "epoch": 4.464882943143813, "grad_norm": 0.13146477252411287, "learning_rate": 1.3975996045667616e-06, "loss": 0.1146, "loss_nan_ranks": 0, "loss_rank_avg": 0.10968706756830215, "step": 1335, "valid_targets_mean": 5982.4, "valid_targets_min": 1192 }, { "epoch": 4.481605351170568, "grad_norm": 0.29637387004163185, "learning_rate": 1.3130881746957447e-06, "loss": 0.1475, "loss_nan_ranks": 0, "loss_rank_avg": 0.2656233608722687, "step": 1340, "valid_targets_mean": 5084.1, "valid_targets_min": 1441 }, { "epoch": 4.498327759197324, "grad_norm": 0.2664430133539512, "learning_rate": 1.2311254980331477e-06, "loss": 0.2415, "loss_nan_ranks": 0, "loss_rank_avg": 0.2514803409576416, "step": 1345, "valid_targets_mean": 4601.8, "valid_targets_min": 1825 }, { "epoch": 4.51505016722408, "grad_norm": 0.2148348860800221, "learning_rate": 1.151722753667137e-06, "loss": 0.2381, "loss_nan_ranks": 0, "loss_rank_avg": 0.24929897487163544, "step": 1350, "valid_targets_mean": 4559.6, "valid_targets_min": 1817 }, { "epoch": 4.531772575250836, "grad_norm": 0.22288534309058536, "learning_rate": 1.0748907715305301e-06, "loss": 0.2284, "loss_nan_ranks": 0, "loss_rank_avg": 0.22896242141723633, "step": 1355, "valid_targets_mean": 4532.3, "valid_targets_min": 1508 }, { "epoch": 4.548494983277592, "grad_norm": 0.2274829356019196, "learning_rate": 1.0006400309236385e-06, "loss": 0.218, "loss_nan_ranks": 0, "loss_rank_avg": 0.21920983493328094, "step": 1360, "valid_targets_mean": 4192.1, "valid_targets_min": 1621 }, { "epoch": 4.565217391304348, "grad_norm": 0.20576644660868582, "learning_rate": 9.289806590849904e-07, "loss": 0.212, "loss_nan_ranks": 0, "loss_rank_avg": 0.21935880184173584, "step": 1365, "valid_targets_mean": 4696.6, "valid_targets_min": 1188 }, { "epoch": 4.581939799331104, "grad_norm": 0.19171424252028949, "learning_rate": 8.599224298100428e-07, "loss": 0.2147, "loss_nan_ranks": 0, "loss_rank_avg": 0.20938502252101898, "step": 1370, "valid_targets_mean": 4814.6, "valid_targets_min": 1866 }, { "epoch": 4.59866220735786, "grad_norm": 0.1887310644022492, "learning_rate": 7.934747621181049e-07, "loss": 0.2143, "loss_nan_ranks": 0, "loss_rank_avg": 0.2093948870897293, "step": 1375, "valid_targets_mean": 4647.9, "valid_targets_min": 1544 }, { "epoch": 4.615384615384615, "grad_norm": 0.2950282554295084, "learning_rate": 7.296467189676737e-07, "loss": 0.1993, "loss_nan_ranks": 0, "loss_rank_avg": 0.19741912186145782, "step": 1380, "valid_targets_mean": 4493.0, "valid_targets_min": 2453 }, { "epoch": 4.632107023411371, "grad_norm": 0.2318488586611998, "learning_rate": 6.684470060202986e-07, "loss": 0.1985, "loss_nan_ranks": 0, "loss_rank_avg": 0.19462911784648895, "step": 1385, "valid_targets_mean": 4325.5, "valid_targets_min": 1507 }, { "epoch": 4.648829431438127, "grad_norm": 0.1918353079745598, "learning_rate": 6.098839704532045e-07, "loss": 0.188, "loss_nan_ranks": 0, "loss_rank_avg": 0.18498313426971436, "step": 1390, "valid_targets_mean": 4599.4, "valid_targets_min": 1330 }, { "epoch": 4.665551839464883, "grad_norm": 0.20421997050212348, "learning_rate": 5.539655998207872e-07, "loss": 0.1812, "loss_nan_ranks": 0, "loss_rank_avg": 0.16699044406414032, "step": 1395, "valid_targets_mean": 4453.3, "valid_targets_min": 709 }, { "epoch": 4.682274247491639, "grad_norm": 0.19927447638970935, "learning_rate": 5.006995209651866e-07, "loss": 0.1733, "loss_nan_ranks": 0, "loss_rank_avg": 0.17403970658779144, "step": 1400, "valid_targets_mean": 4138.5, "valid_targets_min": 1383 }, { "epoch": 4.698996655518394, "grad_norm": 0.18813292577091748, "learning_rate": 4.500929989760372e-07, "loss": 0.1796, "loss_nan_ranks": 0, "loss_rank_avg": 0.19561976194381714, "step": 1405, "valid_targets_mean": 4641.5, "valid_targets_min": 1356 }, { "epoch": 4.7157190635451505, "grad_norm": 0.1945356363289933, "learning_rate": 4.0215293619956466e-07, "loss": 0.1811, "loss_nan_ranks": 0, "loss_rank_avg": 0.19034479558467865, "step": 1410, "valid_targets_mean": 4728.2, "valid_targets_min": 716 }, { "epoch": 4.7324414715719065, "grad_norm": 0.23787414563653314, "learning_rate": 3.568858712971568e-07, "loss": 0.1904, "loss_nan_ranks": 0, "loss_rank_avg": 0.19106751680374146, "step": 1415, "valid_targets_mean": 4865.8, "valid_targets_min": 536 }, { "epoch": 4.749163879598662, "grad_norm": 0.7015353232599782, "learning_rate": 3.1429797835353935e-07, "loss": 0.2298, "loss_nan_ranks": 0, "loss_rank_avg": 0.25036635994911194, "step": 1420, "valid_targets_mean": 700.6, "valid_targets_min": 406 }, { "epoch": 4.765886287625418, "grad_norm": 0.23301838604492123, "learning_rate": 2.7439506603468056e-07, "loss": 0.2004, "loss_nan_ranks": 0, "loss_rank_avg": 0.1926100105047226, "step": 1425, "valid_targets_mean": 4673.6, "valid_targets_min": 799 }, { "epoch": 4.782608695652174, "grad_norm": 0.5751763900806196, "learning_rate": 2.3718257679553603e-07, "loss": 0.2617, "loss_nan_ranks": 0, "loss_rank_avg": 0.2720150649547577, "step": 1430, "valid_targets_mean": 719.2, "valid_targets_min": 240 }, { "epoch": 4.79933110367893, "grad_norm": 0.18730383058307254, "learning_rate": 2.026655861377269e-07, "loss": 0.2042, "loss_nan_ranks": 0, "loss_rank_avg": 0.18452896177768707, "step": 1435, "valid_targets_mean": 5145.3, "valid_targets_min": 506 }, { "epoch": 4.816053511705686, "grad_norm": 0.5733176547263106, "learning_rate": 1.7084880191729601e-07, "loss": 0.2357, "loss_nan_ranks": 0, "loss_rank_avg": 0.27038872241973877, "step": 1440, "valid_targets_mean": 698.6, "valid_targets_min": 213 }, { "epoch": 4.832775919732441, "grad_norm": 0.19122491136437014, "learning_rate": 1.4173656370257693e-07, "loss": 0.2224, "loss_nan_ranks": 0, "loss_rank_avg": 0.20094060897827148, "step": 1445, "valid_targets_mean": 4992.0, "valid_targets_min": 322 }, { "epoch": 4.849498327759197, "grad_norm": 0.6374646702282563, "learning_rate": 1.1533284218231855e-07, "loss": 0.2334, "loss_nan_ranks": 0, "loss_rank_avg": 0.3297445774078369, "step": 1450, "valid_targets_mean": 670.7, "valid_targets_min": 259 }, { "epoch": 4.866220735785953, "grad_norm": 0.1946563231598942, "learning_rate": 9.164123862411612e-08, "loss": 0.2287, "loss_nan_ranks": 0, "loss_rank_avg": 0.21031109988689423, "step": 1455, "valid_targets_mean": 4702.2, "valid_targets_min": 221 }, { "epoch": 4.882943143812709, "grad_norm": 0.5877898109745995, "learning_rate": 7.0664984383213e-08, "loss": 0.2296, "loss_nan_ranks": 0, "loss_rank_avg": 0.27888360619544983, "step": 1460, "valid_targets_mean": 683.8, "valid_targets_min": 332 }, { "epoch": 4.8996655518394645, "grad_norm": 0.19765013589459693, "learning_rate": 5.240694046178219e-08, "loss": 0.2335, "loss_nan_ranks": 0, "loss_rank_avg": 0.19759269058704376, "step": 1465, "valid_targets_mean": 4679.5, "valid_targets_min": 320 }, { "epoch": 4.916387959866221, "grad_norm": 0.6156529589125915, "learning_rate": 3.6869597118698306e-08, "loss": 0.2239, "loss_nan_ranks": 0, "loss_rank_avg": 0.33030518889427185, "step": 1470, "valid_targets_mean": 672.6, "valid_targets_min": 235 }, { "epoch": 4.933110367892977, "grad_norm": 0.2203721452325052, "learning_rate": 2.4055073529887228e-08, "loss": 0.2625, "loss_nan_ranks": 0, "loss_rank_avg": 0.2159907966852188, "step": 1475, "valid_targets_mean": 4921.4, "valid_targets_min": 578 }, { "epoch": 4.949832775919733, "grad_norm": 0.17669342666025922, "learning_rate": 1.3965117499292746e-08, "loss": 0.194, "loss_nan_ranks": 0, "loss_rank_avg": 0.1800321489572525, "step": 1480, "valid_targets_mean": 4283.2, "valid_targets_min": 396 }, { "epoch": 4.966555183946488, "grad_norm": 0.21424544221902356, "learning_rate": 6.60110522047619e-09, "loss": 0.2822, "loss_nan_ranks": 0, "loss_rank_avg": 0.21270084381103516, "step": 1485, "valid_targets_mean": 4852.2, "valid_targets_min": 254 }, { "epoch": 4.983277591973244, "grad_norm": 0.18095176673344857, "learning_rate": 1.9640410889265606e-09, "loss": 0.2025, "loss_nan_ranks": 0, "loss_rank_avg": 0.18122506141662598, "step": 1490, "valid_targets_mean": 4463.2, "valid_targets_min": 388 }, { "epoch": 5.0, "grad_norm": 0.3562161320946657, "learning_rate": 5.455756506345111e-11, "loss": 0.2592, "loss_nan_ranks": 0, "loss_rank_avg": 0.21731798350811005, "step": 1495, "valid_targets_mean": 1465.8, "valid_targets_min": 325 }, { "epoch": 5.0, "loss_nan_ranks": 0, "loss_rank_avg": 0.21731798350811005, "step": 1495, "total_flos": 9850857283649536.0, "train_loss": 0.03974963869139502, "train_runtime": 9909.9567, "train_samples_per_second": 14.475, "train_steps_per_second": 0.151, "valid_targets_mean": 1465.8, "valid_targets_min": 325 } ], "logging_steps": 5, "max_steps": 1495, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 300, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9850857283649536.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }