{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 1650, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.015151515151515152, "grad_norm": 10.781819515475682, "learning_rate": 9.696969696969698e-07, "loss": 0.9617, "loss_nan_ranks": 0, "loss_rank_avg": 0.3307165205478668, "step": 5, "valid_targets_mean": 17257.9, "valid_targets_min": 6573 }, { "epoch": 0.030303030303030304, "grad_norm": 5.8514795468386165, "learning_rate": 2.181818181818182e-06, "loss": 0.9303, "loss_nan_ranks": 0, "loss_rank_avg": 0.3343563675880432, "step": 10, "valid_targets_mean": 17250.3, "valid_targets_min": 6149 }, { "epoch": 0.045454545454545456, "grad_norm": 2.4089715505491576, "learning_rate": 3.3939393939393946e-06, "loss": 0.8651, "loss_nan_ranks": 0, "loss_rank_avg": 0.30503979325294495, "step": 15, "valid_targets_mean": 16585.8, "valid_targets_min": 4802 }, { "epoch": 0.06060606060606061, "grad_norm": 1.805436533710044, "learning_rate": 4.606060606060606e-06, "loss": 0.8108, "loss_nan_ranks": 0, "loss_rank_avg": 0.29714435338974, "step": 20, "valid_targets_mean": 16405.9, "valid_targets_min": 4141 }, { "epoch": 0.07575757575757576, "grad_norm": 1.287151579235644, "learning_rate": 5.8181818181818185e-06, "loss": 0.7799, "loss_nan_ranks": 0, "loss_rank_avg": 0.23542305827140808, "step": 25, "valid_targets_mean": 14672.1, "valid_targets_min": 5753 }, { "epoch": 0.09090909090909091, "grad_norm": 0.8990922328705272, "learning_rate": 7.030303030303031e-06, "loss": 0.7383, "loss_nan_ranks": 0, "loss_rank_avg": 0.2207568883895874, "step": 30, "valid_targets_mean": 13728.5, "valid_targets_min": 2603 }, { "epoch": 0.10606060606060606, "grad_norm": 0.6293271791502106, "learning_rate": 8.242424242424243e-06, "loss": 0.6976, "loss_nan_ranks": 0, "loss_rank_avg": 0.2205752432346344, "step": 35, "valid_targets_mean": 14978.6, "valid_targets_min": 6184 }, { "epoch": 0.12121212121212122, "grad_norm": 0.4829386836834071, "learning_rate": 9.454545454545456e-06, "loss": 0.6631, "loss_nan_ranks": 0, "loss_rank_avg": 0.2082587331533432, "step": 40, "valid_targets_mean": 15860.1, "valid_targets_min": 4007 }, { "epoch": 0.13636363636363635, "grad_norm": 0.3907933259500624, "learning_rate": 1.0666666666666667e-05, "loss": 0.6355, "loss_nan_ranks": 0, "loss_rank_avg": 0.2100229263305664, "step": 45, "valid_targets_mean": 15454.4, "valid_targets_min": 3146 }, { "epoch": 0.15151515151515152, "grad_norm": 0.2966565614873845, "learning_rate": 1.187878787878788e-05, "loss": 0.6043, "loss_nan_ranks": 0, "loss_rank_avg": 0.2182207554578781, "step": 50, "valid_targets_mean": 16907.9, "valid_targets_min": 7771 }, { "epoch": 0.16666666666666666, "grad_norm": 0.30850551847505775, "learning_rate": 1.3090909090909092e-05, "loss": 0.5897, "loss_nan_ranks": 0, "loss_rank_avg": 0.20089182257652283, "step": 55, "valid_targets_mean": 14385.9, "valid_targets_min": 4756 }, { "epoch": 0.18181818181818182, "grad_norm": 0.24779819106568451, "learning_rate": 1.4303030303030305e-05, "loss": 0.5724, "loss_nan_ranks": 0, "loss_rank_avg": 0.1999066174030304, "step": 60, "valid_targets_mean": 16691.1, "valid_targets_min": 7386 }, { "epoch": 0.19696969696969696, "grad_norm": 0.22104440124868213, "learning_rate": 1.5515151515151516e-05, "loss": 0.5586, "loss_nan_ranks": 0, "loss_rank_avg": 0.1938043236732483, "step": 65, "valid_targets_mean": 15472.4, "valid_targets_min": 4913 }, { "epoch": 0.21212121212121213, "grad_norm": 0.24959933484773072, "learning_rate": 1.672727272727273e-05, "loss": 0.5451, "loss_nan_ranks": 0, "loss_rank_avg": 0.17949575185775757, "step": 70, "valid_targets_mean": 14789.4, "valid_targets_min": 3882 }, { "epoch": 0.22727272727272727, "grad_norm": 0.22005550174278024, "learning_rate": 1.7939393939393942e-05, "loss": 0.5295, "loss_nan_ranks": 0, "loss_rank_avg": 0.17324337363243103, "step": 75, "valid_targets_mean": 15401.0, "valid_targets_min": 3272 }, { "epoch": 0.24242424242424243, "grad_norm": 0.22802634959112467, "learning_rate": 1.9151515151515152e-05, "loss": 0.5232, "loss_nan_ranks": 0, "loss_rank_avg": 0.17860084772109985, "step": 80, "valid_targets_mean": 15247.9, "valid_targets_min": 3948 }, { "epoch": 0.25757575757575757, "grad_norm": 0.2624979989553396, "learning_rate": 2.0363636363636365e-05, "loss": 0.5194, "loss_nan_ranks": 0, "loss_rank_avg": 0.16321659088134766, "step": 85, "valid_targets_mean": 13104.3, "valid_targets_min": 6923 }, { "epoch": 0.2727272727272727, "grad_norm": 0.2436635264309524, "learning_rate": 2.1575757575757578e-05, "loss": 0.515, "loss_nan_ranks": 0, "loss_rank_avg": 0.1677224040031433, "step": 90, "valid_targets_mean": 14824.3, "valid_targets_min": 4583 }, { "epoch": 0.2878787878787879, "grad_norm": 0.29651493137861273, "learning_rate": 2.278787878787879e-05, "loss": 0.5038, "loss_nan_ranks": 0, "loss_rank_avg": 0.1670367568731308, "step": 95, "valid_targets_mean": 15513.7, "valid_targets_min": 1267 }, { "epoch": 0.30303030303030304, "grad_norm": 0.2831831041789719, "learning_rate": 2.4e-05, "loss": 0.5042, "loss_nan_ranks": 0, "loss_rank_avg": 0.1713901311159134, "step": 100, "valid_targets_mean": 14562.0, "valid_targets_min": 3932 }, { "epoch": 0.3181818181818182, "grad_norm": 0.23308416680278246, "learning_rate": 2.5212121212121214e-05, "loss": 0.5005, "loss_nan_ranks": 0, "loss_rank_avg": 0.17232999205589294, "step": 105, "valid_targets_mean": 15750.6, "valid_targets_min": 4312 }, { "epoch": 0.3333333333333333, "grad_norm": 0.2890264891487217, "learning_rate": 2.6424242424242427e-05, "loss": 0.4915, "loss_nan_ranks": 0, "loss_rank_avg": 0.1580529510974884, "step": 110, "valid_targets_mean": 14890.7, "valid_targets_min": 6327 }, { "epoch": 0.3484848484848485, "grad_norm": 0.2904637604741024, "learning_rate": 2.763636363636364e-05, "loss": 0.4901, "loss_nan_ranks": 0, "loss_rank_avg": 0.17326104640960693, "step": 115, "valid_targets_mean": 15371.7, "valid_targets_min": 5359 }, { "epoch": 0.36363636363636365, "grad_norm": 0.26161939834322806, "learning_rate": 2.884848484848485e-05, "loss": 0.4865, "loss_nan_ranks": 0, "loss_rank_avg": 0.17404524981975555, "step": 120, "valid_targets_mean": 16654.5, "valid_targets_min": 5580 }, { "epoch": 0.3787878787878788, "grad_norm": 0.33212371681558334, "learning_rate": 3.0060606060606062e-05, "loss": 0.4898, "loss_nan_ranks": 0, "loss_rank_avg": 0.1589001715183258, "step": 125, "valid_targets_mean": 14516.2, "valid_targets_min": 6215 }, { "epoch": 0.3939393939393939, "grad_norm": 0.35155860659846594, "learning_rate": 3.127272727272728e-05, "loss": 0.4813, "loss_nan_ranks": 0, "loss_rank_avg": 0.15316787362098694, "step": 130, "valid_targets_mean": 15876.2, "valid_targets_min": 4197 }, { "epoch": 0.4090909090909091, "grad_norm": 0.3186605453681469, "learning_rate": 3.2484848484848485e-05, "loss": 0.4827, "loss_nan_ranks": 0, "loss_rank_avg": 0.14787974953651428, "step": 135, "valid_targets_mean": 15059.2, "valid_targets_min": 5205 }, { "epoch": 0.42424242424242425, "grad_norm": 0.292885635887715, "learning_rate": 3.36969696969697e-05, "loss": 0.4809, "loss_nan_ranks": 0, "loss_rank_avg": 0.17198166251182556, "step": 140, "valid_targets_mean": 16177.4, "valid_targets_min": 4596 }, { "epoch": 0.4393939393939394, "grad_norm": 0.42141852592961904, "learning_rate": 3.490909090909091e-05, "loss": 0.4823, "loss_nan_ranks": 0, "loss_rank_avg": 0.15170565247535706, "step": 145, "valid_targets_mean": 15029.5, "valid_targets_min": 3573 }, { "epoch": 0.45454545454545453, "grad_norm": 0.30639003045513324, "learning_rate": 3.6121212121212124e-05, "loss": 0.4726, "loss_nan_ranks": 0, "loss_rank_avg": 0.1611180603504181, "step": 150, "valid_targets_mean": 15518.5, "valid_targets_min": 6323 }, { "epoch": 0.4696969696969697, "grad_norm": 0.3495360074509179, "learning_rate": 3.733333333333334e-05, "loss": 0.476, "loss_nan_ranks": 0, "loss_rank_avg": 0.18164387345314026, "step": 155, "valid_targets_mean": 16635.5, "valid_targets_min": 7029 }, { "epoch": 0.48484848484848486, "grad_norm": 0.3999563868846935, "learning_rate": 3.854545454545455e-05, "loss": 0.469, "loss_nan_ranks": 0, "loss_rank_avg": 0.155188649892807, "step": 160, "valid_targets_mean": 15272.1, "valid_targets_min": 3593 }, { "epoch": 0.5, "grad_norm": 0.3449656470737079, "learning_rate": 3.9757575757575757e-05, "loss": 0.4689, "loss_nan_ranks": 0, "loss_rank_avg": 0.15367814898490906, "step": 165, "valid_targets_mean": 14878.1, "valid_targets_min": 2953 }, { "epoch": 0.5151515151515151, "grad_norm": 0.3665757358488547, "learning_rate": 3.999928391557286e-05, "loss": 0.4709, "loss_nan_ranks": 0, "loss_rank_avg": 0.15468040108680725, "step": 170, "valid_targets_mean": 15413.5, "valid_targets_min": 5168 }, { "epoch": 0.5303030303030303, "grad_norm": 0.2788745773590794, "learning_rate": 3.999637491047052e-05, "loss": 0.4693, "loss_nan_ranks": 0, "loss_rank_avg": 0.13528694212436676, "step": 175, "valid_targets_mean": 13168.3, "valid_targets_min": 2921 }, { "epoch": 0.5454545454545454, "grad_norm": 0.4240308998832283, "learning_rate": 3.999122855464813e-05, "loss": 0.4647, "loss_nan_ranks": 0, "loss_rank_avg": 0.15115290880203247, "step": 180, "valid_targets_mean": 15130.8, "valid_targets_min": 6243 }, { "epoch": 0.5606060606060606, "grad_norm": 0.318176983186771, "learning_rate": 3.998384542392021e-05, "loss": 0.4688, "loss_nan_ranks": 0, "loss_rank_avg": 0.14690656960010529, "step": 185, "valid_targets_mean": 15081.4, "valid_targets_min": 2460 }, { "epoch": 0.5757575757575758, "grad_norm": 0.3531303282866239, "learning_rate": 3.9974226344369124e-05, "loss": 0.4676, "loss_nan_ranks": 0, "loss_rank_avg": 0.16395458579063416, "step": 190, "valid_targets_mean": 16651.0, "valid_targets_min": 2044 }, { "epoch": 0.5909090909090909, "grad_norm": 0.3185804484156167, "learning_rate": 3.996237239225268e-05, "loss": 0.4641, "loss_nan_ranks": 0, "loss_rank_avg": 0.14105798304080963, "step": 195, "valid_targets_mean": 15282.0, "valid_targets_min": 3829 }, { "epoch": 0.6060606060606061, "grad_norm": 0.3306774162967017, "learning_rate": 3.994828489388371e-05, "loss": 0.4606, "loss_nan_ranks": 0, "loss_rank_avg": 0.15033362805843353, "step": 200, "valid_targets_mean": 14899.4, "valid_targets_min": 4148 }, { "epoch": 0.6212121212121212, "grad_norm": 0.2666534138366462, "learning_rate": 3.993196542548162e-05, "loss": 0.4659, "loss_nan_ranks": 0, "loss_rank_avg": 0.1435295045375824, "step": 205, "valid_targets_mean": 13889.3, "valid_targets_min": 1537 }, { "epoch": 0.6363636363636364, "grad_norm": 0.2619401719498242, "learning_rate": 3.991341581299609e-05, "loss": 0.4614, "loss_nan_ranks": 0, "loss_rank_avg": 0.15735077857971191, "step": 210, "valid_targets_mean": 16223.8, "valid_targets_min": 7738 }, { "epoch": 0.6515151515151515, "grad_norm": 0.31758013245846667, "learning_rate": 3.9892638131902765e-05, "loss": 0.4546, "loss_nan_ranks": 0, "loss_rank_avg": 0.1687242090702057, "step": 215, "valid_targets_mean": 16545.2, "valid_targets_min": 5321 }, { "epoch": 0.6666666666666666, "grad_norm": 0.28828120577659794, "learning_rate": 3.9869634706971e-05, "loss": 0.4597, "loss_nan_ranks": 0, "loss_rank_avg": 0.15184861421585083, "step": 220, "valid_targets_mean": 15488.8, "valid_targets_min": 4785 }, { "epoch": 0.6818181818181818, "grad_norm": 0.32648424933952014, "learning_rate": 3.984440811200379e-05, "loss": 0.4574, "loss_nan_ranks": 0, "loss_rank_avg": 0.14916986227035522, "step": 225, "valid_targets_mean": 14360.5, "valid_targets_min": 3511 }, { "epoch": 0.696969696969697, "grad_norm": 0.3724763305936334, "learning_rate": 3.981696116954973e-05, "loss": 0.4533, "loss_nan_ranks": 0, "loss_rank_avg": 0.1421215534210205, "step": 230, "valid_targets_mean": 13722.8, "valid_targets_min": 5643 }, { "epoch": 0.7121212121212122, "grad_norm": 0.42652803865674005, "learning_rate": 3.978729695058729e-05, "loss": 0.4534, "loss_nan_ranks": 0, "loss_rank_avg": 0.14489291608333588, "step": 235, "valid_targets_mean": 14735.2, "valid_targets_min": 3745 }, { "epoch": 0.7272727272727273, "grad_norm": 0.4770959081747357, "learning_rate": 3.9755418774181146e-05, "loss": 0.456, "loss_nan_ranks": 0, "loss_rank_avg": 0.16189418733119965, "step": 240, "valid_targets_mean": 17361.2, "valid_targets_min": 4950 }, { "epoch": 0.7424242424242424, "grad_norm": 0.493013737568908, "learning_rate": 3.9721330207110835e-05, "loss": 0.4509, "loss_nan_ranks": 0, "loss_rank_avg": 0.15398374199867249, "step": 245, "valid_targets_mean": 15619.1, "valid_targets_min": 6005 }, { "epoch": 0.7575757575757576, "grad_norm": 0.3225495200220724, "learning_rate": 3.9685035063471675e-05, "loss": 0.4494, "loss_nan_ranks": 0, "loss_rank_avg": 0.15332111716270447, "step": 250, "valid_targets_mean": 15341.6, "valid_targets_min": 5202 }, { "epoch": 0.7727272727272727, "grad_norm": 0.34133731195657563, "learning_rate": 3.964653740424804e-05, "loss": 0.449, "loss_nan_ranks": 0, "loss_rank_avg": 0.1565016657114029, "step": 255, "valid_targets_mean": 16527.7, "valid_targets_min": 4140 }, { "epoch": 0.7878787878787878, "grad_norm": 0.4303037712575851, "learning_rate": 3.960584153685895e-05, "loss": 0.4535, "loss_nan_ranks": 0, "loss_rank_avg": 0.1628049910068512, "step": 260, "valid_targets_mean": 15461.1, "valid_targets_min": 3340 }, { "epoch": 0.803030303030303, "grad_norm": 0.3548169264545474, "learning_rate": 3.9562952014676116e-05, "loss": 0.4507, "loss_nan_ranks": 0, "loss_rank_avg": 0.16039399802684784, "step": 265, "valid_targets_mean": 16171.8, "valid_targets_min": 7126 }, { "epoch": 0.8181818181818182, "grad_norm": 0.25424846956268254, "learning_rate": 3.9517873636514525e-05, "loss": 0.4545, "loss_nan_ranks": 0, "loss_rank_avg": 0.14358675479888916, "step": 270, "valid_targets_mean": 14434.2, "valid_targets_min": 2628 }, { "epoch": 0.8333333333333334, "grad_norm": 0.2694812489208858, "learning_rate": 3.947061144609546e-05, "loss": 0.4421, "loss_nan_ranks": 0, "loss_rank_avg": 0.1384536325931549, "step": 275, "valid_targets_mean": 15667.6, "valid_targets_min": 5195 }, { "epoch": 0.8484848484848485, "grad_norm": 0.3012250215938447, "learning_rate": 3.942117073148221e-05, "loss": 0.4457, "loss_nan_ranks": 0, "loss_rank_avg": 0.15101709961891174, "step": 280, "valid_targets_mean": 13737.2, "valid_targets_min": 3769 }, { "epoch": 0.8636363636363636, "grad_norm": 0.33412324364329166, "learning_rate": 3.9369557024488345e-05, "loss": 0.4494, "loss_nan_ranks": 0, "loss_rank_avg": 0.13744959235191345, "step": 285, "valid_targets_mean": 14516.1, "valid_targets_min": 4159 }, { "epoch": 0.8787878787878788, "grad_norm": 0.28861130651445016, "learning_rate": 3.931577610005883e-05, "loss": 0.4482, "loss_nan_ranks": 0, "loss_rank_avg": 0.12138031423091888, "step": 290, "valid_targets_mean": 12381.9, "valid_targets_min": 3785 }, { "epoch": 0.8939393939393939, "grad_norm": 0.33189139451222754, "learning_rate": 3.925983397562385e-05, "loss": 0.4483, "loss_nan_ranks": 0, "loss_rank_avg": 0.14846676588058472, "step": 295, "valid_targets_mean": 15067.7, "valid_targets_min": 2642 }, { "epoch": 0.9090909090909091, "grad_norm": 0.26725549323789616, "learning_rate": 3.920173691042554e-05, "loss": 0.4486, "loss_nan_ranks": 0, "loss_rank_avg": 0.15860885381698608, "step": 300, "valid_targets_mean": 17651.8, "valid_targets_min": 6461 }, { "epoch": 0.9242424242424242, "grad_norm": 0.28840546967609465, "learning_rate": 3.914149140481766e-05, "loss": 0.4445, "loss_nan_ranks": 0, "loss_rank_avg": 0.1289260983467102, "step": 305, "valid_targets_mean": 14103.6, "valid_targets_min": 4383 }, { "epoch": 0.9393939393939394, "grad_norm": 0.27544900609307754, "learning_rate": 3.9079104199538256e-05, "loss": 0.4468, "loss_nan_ranks": 0, "loss_rank_avg": 0.12725478410720825, "step": 310, "valid_targets_mean": 13964.7, "valid_targets_min": 5266 }, { "epoch": 0.9545454545454546, "grad_norm": 0.3210299225048718, "learning_rate": 3.901458227495549e-05, "loss": 0.4455, "loss_nan_ranks": 0, "loss_rank_avg": 0.14999495446681976, "step": 315, "valid_targets_mean": 14422.9, "valid_targets_min": 4184 }, { "epoch": 0.9696969696969697, "grad_norm": 0.37741796391171567, "learning_rate": 3.8947932850286585e-05, "loss": 0.4453, "loss_nan_ranks": 0, "loss_rank_avg": 0.1468706727027893, "step": 320, "valid_targets_mean": 15338.0, "valid_targets_min": 4021 }, { "epoch": 0.9848484848484849, "grad_norm": 0.34149777743054666, "learning_rate": 3.887916338279014e-05, "loss": 0.44, "loss_nan_ranks": 0, "loss_rank_avg": 0.13318268954753876, "step": 325, "valid_targets_mean": 14689.9, "valid_targets_min": 7070 }, { "epoch": 1.0, "grad_norm": 0.2870293910765331, "learning_rate": 3.8808281566931675e-05, "loss": 0.4371, "loss_nan_ranks": 0, "loss_rank_avg": 0.15207594633102417, "step": 330, "valid_targets_mean": 15349.0, "valid_targets_min": 4947 }, { "epoch": 1.0151515151515151, "grad_norm": 0.36309118319901884, "learning_rate": 3.873529533352277e-05, "loss": 0.4349, "loss_nan_ranks": 0, "loss_rank_avg": 0.1315964013338089, "step": 335, "valid_targets_mean": 14656.5, "valid_targets_min": 2950 }, { "epoch": 1.0303030303030303, "grad_norm": 0.276061831173288, "learning_rate": 3.8660212848833705e-05, "loss": 0.4208, "loss_nan_ranks": 0, "loss_rank_avg": 0.1446404904127121, "step": 340, "valid_targets_mean": 15676.8, "valid_targets_min": 1267 }, { "epoch": 1.0454545454545454, "grad_norm": 0.26011173133082927, "learning_rate": 3.858304251367972e-05, "loss": 0.4322, "loss_nan_ranks": 0, "loss_rank_avg": 0.1397808939218521, "step": 345, "valid_targets_mean": 15134.7, "valid_targets_min": 4839 }, { "epoch": 1.0606060606060606, "grad_norm": 0.402489225579918, "learning_rate": 3.850379296248107e-05, "loss": 0.4323, "loss_nan_ranks": 0, "loss_rank_avg": 0.15310615301132202, "step": 350, "valid_targets_mean": 15064.1, "valid_targets_min": 5758 }, { "epoch": 1.0757575757575757, "grad_norm": 0.4129599441197316, "learning_rate": 3.8422473062297e-05, "loss": 0.4289, "loss_nan_ranks": 0, "loss_rank_avg": 0.13137058913707733, "step": 355, "valid_targets_mean": 14610.6, "valid_targets_min": 4768 }, { "epoch": 1.0909090909090908, "grad_norm": 0.3280334521493676, "learning_rate": 3.8339091911833545e-05, "loss": 0.4337, "loss_nan_ranks": 0, "loss_rank_avg": 0.15061558783054352, "step": 360, "valid_targets_mean": 14947.8, "valid_targets_min": 4483 }, { "epoch": 1.106060606060606, "grad_norm": 0.2996280506022408, "learning_rate": 3.825365884042553e-05, "loss": 0.4375, "loss_nan_ranks": 0, "loss_rank_avg": 0.15207991003990173, "step": 365, "valid_targets_mean": 15574.6, "valid_targets_min": 3271 }, { "epoch": 1.121212121212121, "grad_norm": 0.33383024855953686, "learning_rate": 3.8166183406992745e-05, "loss": 0.4302, "loss_nan_ranks": 0, "loss_rank_avg": 0.13507221639156342, "step": 370, "valid_targets_mean": 13192.1, "valid_targets_min": 2792 }, { "epoch": 1.1363636363636362, "grad_norm": 0.31909323691142444, "learning_rate": 3.807667539897041e-05, "loss": 0.4305, "loss_nan_ranks": 0, "loss_rank_avg": 0.13816970586776733, "step": 375, "valid_targets_mean": 15480.5, "valid_targets_min": 3988 }, { "epoch": 1.1515151515151516, "grad_norm": 0.31075543582482734, "learning_rate": 3.798514483121408e-05, "loss": 0.4309, "loss_nan_ranks": 0, "loss_rank_avg": 0.13919055461883545, "step": 380, "valid_targets_mean": 14907.4, "valid_targets_min": 6132 }, { "epoch": 1.1666666666666667, "grad_norm": 0.2983371224518664, "learning_rate": 3.789160194487908e-05, "loss": 0.4273, "loss_nan_ranks": 0, "loss_rank_avg": 0.13649028539657593, "step": 385, "valid_targets_mean": 15354.1, "valid_targets_min": 6891 }, { "epoch": 1.1818181818181819, "grad_norm": 0.3905433335206102, "learning_rate": 3.7796057206274686e-05, "loss": 0.4272, "loss_nan_ranks": 0, "loss_rank_avg": 0.14891022443771362, "step": 390, "valid_targets_mean": 16094.3, "valid_targets_min": 5444 }, { "epoch": 1.196969696969697, "grad_norm": 0.33769374363353943, "learning_rate": 3.769852130569304e-05, "loss": 0.4341, "loss_nan_ranks": 0, "loss_rank_avg": 0.15502581000328064, "step": 395, "valid_targets_mean": 15651.5, "valid_targets_min": 6372 }, { "epoch": 1.2121212121212122, "grad_norm": 0.29012360041686464, "learning_rate": 3.7599005156213066e-05, "loss": 0.4299, "loss_nan_ranks": 0, "loss_rank_avg": 0.13702401518821716, "step": 400, "valid_targets_mean": 14399.8, "valid_targets_min": 3372 }, { "epoch": 1.2272727272727273, "grad_norm": 0.31947365816457735, "learning_rate": 3.74975198924794e-05, "loss": 0.4297, "loss_nan_ranks": 0, "loss_rank_avg": 0.1433682143688202, "step": 405, "valid_targets_mean": 13850.6, "valid_targets_min": 3607 }, { "epoch": 1.2424242424242424, "grad_norm": 0.31718143050221165, "learning_rate": 3.739407686945658e-05, "loss": 0.4277, "loss_nan_ranks": 0, "loss_rank_avg": 0.14280961453914642, "step": 410, "valid_targets_mean": 16541.7, "valid_targets_min": 4460 }, { "epoch": 1.2575757575757576, "grad_norm": 0.45231696742504074, "learning_rate": 3.728868766115854e-05, "loss": 0.4296, "loss_nan_ranks": 0, "loss_rank_avg": 0.13743780553340912, "step": 415, "valid_targets_mean": 15450.7, "valid_targets_min": 3921 }, { "epoch": 1.2727272727272727, "grad_norm": 0.28084769935009196, "learning_rate": 3.718136405935365e-05, "loss": 0.4278, "loss_nan_ranks": 0, "loss_rank_avg": 0.1414492428302765, "step": 420, "valid_targets_mean": 16063.2, "valid_targets_min": 3552 }, { "epoch": 1.2878787878787878, "grad_norm": 0.3631765153651125, "learning_rate": 3.707211807224534e-05, "loss": 0.4276, "loss_nan_ranks": 0, "loss_rank_avg": 0.13481499254703522, "step": 425, "valid_targets_mean": 15713.6, "valid_targets_min": 5051 }, { "epoch": 1.303030303030303, "grad_norm": 0.3023685657164912, "learning_rate": 3.696096192312852e-05, "loss": 0.4309, "loss_nan_ranks": 0, "loss_rank_avg": 0.14772221446037292, "step": 430, "valid_targets_mean": 16079.5, "valid_targets_min": 6001 }, { "epoch": 1.3181818181818181, "grad_norm": 0.3557818222724191, "learning_rate": 3.684790804902199e-05, "loss": 0.4266, "loss_nan_ranks": 0, "loss_rank_avg": 0.14126922190189362, "step": 435, "valid_targets_mean": 15307.2, "valid_targets_min": 4925 }, { "epoch": 1.3333333333333333, "grad_norm": 0.28051924385178323, "learning_rate": 3.673296909927682e-05, "loss": 0.4253, "loss_nan_ranks": 0, "loss_rank_avg": 0.1278437376022339, "step": 440, "valid_targets_mean": 14616.2, "valid_targets_min": 4863 }, { "epoch": 1.3484848484848486, "grad_norm": 0.24904301125597772, "learning_rate": 3.661615793416109e-05, "loss": 0.4241, "loss_nan_ranks": 0, "loss_rank_avg": 0.1396869719028473, "step": 445, "valid_targets_mean": 15681.2, "valid_targets_min": 4148 }, { "epoch": 1.3636363636363638, "grad_norm": 0.23949545569806505, "learning_rate": 3.649748762342098e-05, "loss": 0.429, "loss_nan_ranks": 0, "loss_rank_avg": 0.14017486572265625, "step": 450, "valid_targets_mean": 15908.3, "valid_targets_min": 4265 }, { "epoch": 1.378787878787879, "grad_norm": 0.3421703032120446, "learning_rate": 3.637697144481839e-05, "loss": 0.4246, "loss_nan_ranks": 0, "loss_rank_avg": 0.14308439195156097, "step": 455, "valid_targets_mean": 15564.7, "valid_targets_min": 5007 }, { "epoch": 1.393939393939394, "grad_norm": 0.35779594775223517, "learning_rate": 3.625462288264536e-05, "loss": 0.4174, "loss_nan_ranks": 0, "loss_rank_avg": 0.14782720804214478, "step": 460, "valid_targets_mean": 15151.2, "valid_targets_min": 3472 }, { "epoch": 1.4090909090909092, "grad_norm": 0.3544625075767238, "learning_rate": 3.613045562621533e-05, "loss": 0.4223, "loss_nan_ranks": 0, "loss_rank_avg": 0.14591552317142487, "step": 465, "valid_targets_mean": 15464.0, "valid_targets_min": 5397 }, { "epoch": 1.4242424242424243, "grad_norm": 0.2739256716466335, "learning_rate": 3.600448356833146e-05, "loss": 0.4242, "loss_nan_ranks": 0, "loss_rank_avg": 0.1398126780986786, "step": 470, "valid_targets_mean": 15358.1, "valid_targets_min": 4249 }, { "epoch": 1.4393939393939394, "grad_norm": 0.2670796299577025, "learning_rate": 3.587672080373219e-05, "loss": 0.4253, "loss_nan_ranks": 0, "loss_rank_avg": 0.13911236822605133, "step": 475, "valid_targets_mean": 14657.8, "valid_targets_min": 4850 }, { "epoch": 1.4545454545454546, "grad_norm": 0.2716034921366766, "learning_rate": 3.574718162751426e-05, "loss": 0.4245, "loss_nan_ranks": 0, "loss_rank_avg": 0.1379561722278595, "step": 480, "valid_targets_mean": 16109.5, "valid_targets_min": 3613 }, { "epoch": 1.4696969696969697, "grad_norm": 0.23873812035866543, "learning_rate": 3.561588053353319e-05, "loss": 0.4248, "loss_nan_ranks": 0, "loss_rank_avg": 0.13202598690986633, "step": 485, "valid_targets_mean": 13867.1, "valid_targets_min": 1931 }, { "epoch": 1.4848484848484849, "grad_norm": 0.23167780418432207, "learning_rate": 3.5482832212781655e-05, "loss": 0.4208, "loss_nan_ranks": 0, "loss_rank_avg": 0.14886680245399475, "step": 490, "valid_targets_mean": 15954.8, "valid_targets_min": 3850 }, { "epoch": 1.5, "grad_norm": 0.27255259888582417, "learning_rate": 3.53480515517457e-05, "loss": 0.4254, "loss_nan_ranks": 0, "loss_rank_avg": 0.15491536259651184, "step": 495, "valid_targets_mean": 15829.9, "valid_targets_min": 3933 }, { "epoch": 1.5151515151515151, "grad_norm": 0.25403824366371247, "learning_rate": 3.5211553630739166e-05, "loss": 0.4262, "loss_nan_ranks": 0, "loss_rank_avg": 0.13875356316566467, "step": 500, "valid_targets_mean": 14701.8, "valid_targets_min": 5768 }, { "epoch": 1.5303030303030303, "grad_norm": 0.26870819873349766, "learning_rate": 3.5073353722216334e-05, "loss": 0.4184, "loss_nan_ranks": 0, "loss_rank_avg": 0.13507430255413055, "step": 505, "valid_targets_mean": 14002.1, "valid_targets_min": 3568 }, { "epoch": 1.5454545454545454, "grad_norm": 0.3584406752763479, "learning_rate": 3.4933467289063156e-05, "loss": 0.4205, "loss_nan_ranks": 0, "loss_rank_avg": 0.13867157697677612, "step": 510, "valid_targets_mean": 15053.2, "valid_targets_min": 3150 }, { "epoch": 1.5606060606060606, "grad_norm": 0.26154597010466046, "learning_rate": 3.4791909982867175e-05, "loss": 0.4188, "loss_nan_ranks": 0, "loss_rank_avg": 0.12970539927482605, "step": 515, "valid_targets_mean": 14832.3, "valid_targets_min": 5373 }, { "epoch": 1.5757575757575757, "grad_norm": 0.3362188714691207, "learning_rate": 3.464869764216622e-05, "loss": 0.4238, "loss_nan_ranks": 0, "loss_rank_avg": 0.12605808675289154, "step": 520, "valid_targets_mean": 14519.6, "valid_targets_min": 4981 }, { "epoch": 1.5909090909090908, "grad_norm": 0.28016874775885214, "learning_rate": 3.450384629067635e-05, "loss": 0.4249, "loss_nan_ranks": 0, "loss_rank_avg": 0.1421700119972229, "step": 525, "valid_targets_mean": 14849.1, "valid_targets_min": 5271 }, { "epoch": 1.606060606060606, "grad_norm": 0.31364330936128626, "learning_rate": 3.435737213549896e-05, "loss": 0.4188, "loss_nan_ranks": 0, "loss_rank_avg": 0.1353946477174759, "step": 530, "valid_targets_mean": 15212.0, "valid_targets_min": 3960 }, { "epoch": 1.621212121212121, "grad_norm": 0.3130590718949519, "learning_rate": 3.420929156530738e-05, "loss": 0.4191, "loss_nan_ranks": 0, "loss_rank_avg": 0.12870505452156067, "step": 535, "valid_targets_mean": 13804.9, "valid_targets_min": 4409 }, { "epoch": 1.6363636363636362, "grad_norm": 0.32648439826942116, "learning_rate": 3.405962114851324e-05, "loss": 0.4188, "loss_nan_ranks": 0, "loss_rank_avg": 0.13703852891921997, "step": 540, "valid_targets_mean": 14489.3, "valid_targets_min": 6422 }, { "epoch": 1.6515151515151514, "grad_norm": 0.23558741635634878, "learning_rate": 3.390837763141261e-05, "loss": 0.425, "loss_nan_ranks": 0, "loss_rank_avg": 0.1324833631515503, "step": 545, "valid_targets_mean": 15401.4, "valid_targets_min": 4438 }, { "epoch": 1.6666666666666665, "grad_norm": 0.2562753721750844, "learning_rate": 3.3755577936312344e-05, "loss": 0.4184, "loss_nan_ranks": 0, "loss_rank_avg": 0.14327028393745422, "step": 550, "valid_targets_mean": 15883.0, "valid_targets_min": 3181 }, { "epoch": 1.6818181818181817, "grad_norm": 0.2573839797506366, "learning_rate": 3.360123915963662e-05, "loss": 0.4202, "loss_nan_ranks": 0, "loss_rank_avg": 0.14354410767555237, "step": 555, "valid_targets_mean": 15241.0, "valid_targets_min": 4218 }, { "epoch": 1.696969696969697, "grad_norm": 0.2393022935992319, "learning_rate": 3.3445378570014125e-05, "loss": 0.4188, "loss_nan_ranks": 0, "loss_rank_avg": 0.14465731382369995, "step": 560, "valid_targets_mean": 16679.2, "valid_targets_min": 6613 }, { "epoch": 1.7121212121212122, "grad_norm": 0.3130578183647946, "learning_rate": 3.328801360634585e-05, "loss": 0.4231, "loss_nan_ranks": 0, "loss_rank_avg": 0.131614089012146, "step": 565, "valid_targets_mean": 15283.2, "valid_targets_min": 5069 }, { "epoch": 1.7272727272727273, "grad_norm": 0.22969999891245066, "learning_rate": 3.312916187585392e-05, "loss": 0.424, "loss_nan_ranks": 0, "loss_rank_avg": 0.14087988436222076, "step": 570, "valid_targets_mean": 15229.3, "valid_targets_min": 4248 }, { "epoch": 1.7424242424242424, "grad_norm": 0.25242347409882654, "learning_rate": 3.296884115211157e-05, "loss": 0.4202, "loss_nan_ranks": 0, "loss_rank_avg": 0.1289135217666626, "step": 575, "valid_targets_mean": 13637.0, "valid_targets_min": 1585 }, { "epoch": 1.7575757575757576, "grad_norm": 0.27395829853778514, "learning_rate": 3.280706937305445e-05, "loss": 0.4233, "loss_nan_ranks": 0, "loss_rank_avg": 0.13507923483848572, "step": 580, "valid_targets_mean": 14127.3, "valid_targets_min": 3212 }, { "epoch": 1.7727272727272727, "grad_norm": 0.25592535738698435, "learning_rate": 3.2643864638973645e-05, "loss": 0.4175, "loss_nan_ranks": 0, "loss_rank_avg": 0.13018617033958435, "step": 585, "valid_targets_mean": 14483.5, "valid_targets_min": 3185 }, { "epoch": 1.7878787878787878, "grad_norm": 0.24574039674480253, "learning_rate": 3.2479245210490434e-05, "loss": 0.4133, "loss_nan_ranks": 0, "loss_rank_avg": 0.14168305695056915, "step": 590, "valid_targets_mean": 15052.1, "valid_targets_min": 3733 }, { "epoch": 1.803030303030303, "grad_norm": 0.257946888158228, "learning_rate": 3.2313229506513167e-05, "loss": 0.4096, "loss_nan_ranks": 0, "loss_rank_avg": 0.11595556139945984, "step": 595, "valid_targets_mean": 13657.6, "valid_targets_min": 4073 }, { "epoch": 1.8181818181818183, "grad_norm": 0.2423108123895089, "learning_rate": 3.2145836102176424e-05, "loss": 0.4217, "loss_nan_ranks": 0, "loss_rank_avg": 0.13753411173820496, "step": 600, "valid_targets_mean": 14381.9, "valid_targets_min": 4035 }, { "epoch": 1.8333333333333335, "grad_norm": 0.3673066567758278, "learning_rate": 3.197708372676265e-05, "loss": 0.4177, "loss_nan_ranks": 0, "loss_rank_avg": 0.14095187187194824, "step": 605, "valid_targets_mean": 15813.5, "valid_targets_min": 7248 }, { "epoch": 1.8484848484848486, "grad_norm": 0.3410767287068366, "learning_rate": 3.1806991261606604e-05, "loss": 0.4236, "loss_nan_ranks": 0, "loss_rank_avg": 0.13289013504981995, "step": 610, "valid_targets_mean": 15133.6, "valid_targets_min": 5425 }, { "epoch": 1.8636363636363638, "grad_norm": 0.3928398693107199, "learning_rate": 3.163557773798276e-05, "loss": 0.4151, "loss_nan_ranks": 0, "loss_rank_avg": 0.13538768887519836, "step": 615, "valid_targets_mean": 15416.2, "valid_targets_min": 4358 }, { "epoch": 1.878787878787879, "grad_norm": 0.30018552288843936, "learning_rate": 3.146286233497593e-05, "loss": 0.4151, "loss_nan_ranks": 0, "loss_rank_avg": 0.12879617512226105, "step": 620, "valid_targets_mean": 14728.0, "valid_targets_min": 4779 }, { "epoch": 1.893939393939394, "grad_norm": 0.27871218248182866, "learning_rate": 3.128886437733539e-05, "loss": 0.4176, "loss_nan_ranks": 0, "loss_rank_avg": 0.14107659459114075, "step": 625, "valid_targets_mean": 16196.8, "valid_targets_min": 5645 }, { "epoch": 1.9090909090909092, "grad_norm": 0.31342858815473673, "learning_rate": 3.111360333331263e-05, "loss": 0.4189, "loss_nan_ranks": 0, "loss_rank_avg": 0.1214226558804512, "step": 630, "valid_targets_mean": 13641.1, "valid_targets_min": 3487 }, { "epoch": 1.9242424242424243, "grad_norm": 0.28759225199732685, "learning_rate": 3.093709881248312e-05, "loss": 0.4137, "loss_nan_ranks": 0, "loss_rank_avg": 0.12949924170970917, "step": 635, "valid_targets_mean": 15259.6, "valid_targets_min": 4815 }, { "epoch": 1.9393939393939394, "grad_norm": 0.4720940997340626, "learning_rate": 3.075937056355225e-05, "loss": 0.4187, "loss_nan_ranks": 0, "loss_rank_avg": 0.1339273750782013, "step": 640, "valid_targets_mean": 14075.0, "valid_targets_min": 4063 }, { "epoch": 1.9545454545454546, "grad_norm": 0.2820424534133633, "learning_rate": 3.0580438472145665e-05, "loss": 0.419, "loss_nan_ranks": 0, "loss_rank_avg": 0.13889871537685394, "step": 645, "valid_targets_mean": 13273.1, "valid_targets_min": 5062 }, { "epoch": 1.9696969696969697, "grad_norm": 0.2494556245622125, "learning_rate": 3.0400322558584308e-05, "loss": 0.4143, "loss_nan_ranks": 0, "loss_rank_avg": 0.13408786058425903, "step": 650, "valid_targets_mean": 14133.0, "valid_targets_min": 6186 }, { "epoch": 1.9848484848484849, "grad_norm": 0.2637423161671155, "learning_rate": 3.0219042975644415e-05, "loss": 0.4134, "loss_nan_ranks": 0, "loss_rank_avg": 0.144476979970932, "step": 655, "valid_targets_mean": 15395.8, "valid_targets_min": 4386 }, { "epoch": 2.0, "grad_norm": 0.2301269495494442, "learning_rate": 3.0036620006302624e-05, "loss": 0.4168, "loss_nan_ranks": 0, "loss_rank_avg": 0.1381838023662567, "step": 660, "valid_targets_mean": 15590.2, "valid_targets_min": 3399 }, { "epoch": 2.015151515151515, "grad_norm": 0.28406665058518976, "learning_rate": 2.9853074061466602e-05, "loss": 0.4032, "loss_nan_ranks": 0, "loss_rank_avg": 0.13899505138397217, "step": 665, "valid_targets_mean": 15553.7, "valid_targets_min": 6431 }, { "epoch": 2.0303030303030303, "grad_norm": 0.21285203894708732, "learning_rate": 2.9668425677691278e-05, "loss": 0.4022, "loss_nan_ranks": 0, "loss_rank_avg": 0.12502285838127136, "step": 670, "valid_targets_mean": 15203.8, "valid_targets_min": 5171 }, { "epoch": 2.0454545454545454, "grad_norm": 0.2445604222211269, "learning_rate": 2.948269551488108e-05, "loss": 0.403, "loss_nan_ranks": 0, "loss_rank_avg": 0.13765007257461548, "step": 675, "valid_targets_mean": 14323.1, "valid_targets_min": 3205 }, { "epoch": 2.0606060606060606, "grad_norm": 0.23550700899136703, "learning_rate": 2.929590435397832e-05, "loss": 0.4098, "loss_nan_ranks": 0, "loss_rank_avg": 0.1422361582517624, "step": 680, "valid_targets_mean": 16510.7, "valid_targets_min": 3186 }, { "epoch": 2.0757575757575757, "grad_norm": 0.26247099670181223, "learning_rate": 2.9108073094638066e-05, "loss": 0.399, "loss_nan_ranks": 0, "loss_rank_avg": 0.12394432723522186, "step": 685, "valid_targets_mean": 15157.7, "valid_targets_min": 4880 }, { "epoch": 2.090909090909091, "grad_norm": 0.27020259267918656, "learning_rate": 2.8919222752889727e-05, "loss": 0.4038, "loss_nan_ranks": 0, "loss_rank_avg": 0.12068557739257812, "step": 690, "valid_targets_mean": 14247.4, "valid_targets_min": 4780 }, { "epoch": 2.106060606060606, "grad_norm": 0.20481401345814657, "learning_rate": 2.8729374458785647e-05, "loss": 0.4027, "loss_nan_ranks": 0, "loss_rank_avg": 0.11181310564279556, "step": 695, "valid_targets_mean": 13585.7, "valid_targets_min": 6632 }, { "epoch": 2.121212121212121, "grad_norm": 0.24273965833842234, "learning_rate": 2.8538549454036838e-05, "loss": 0.4043, "loss_nan_ranks": 0, "loss_rank_avg": 0.1391579806804657, "step": 700, "valid_targets_mean": 15396.5, "valid_targets_min": 2746 }, { "epoch": 2.1363636363636362, "grad_norm": 0.23541344015418184, "learning_rate": 2.834676908963636e-05, "loss": 0.3985, "loss_nan_ranks": 0, "loss_rank_avg": 0.13219976425170898, "step": 705, "valid_targets_mean": 17154.9, "valid_targets_min": 6336 }, { "epoch": 2.1515151515151514, "grad_norm": 0.26258542741948365, "learning_rate": 2.815405482347037e-05, "loss": 0.4034, "loss_nan_ranks": 0, "loss_rank_avg": 0.13979566097259521, "step": 710, "valid_targets_mean": 15050.4, "valid_targets_min": 3770 }, { "epoch": 2.1666666666666665, "grad_norm": 0.2957549576693815, "learning_rate": 2.796042821791725e-05, "loss": 0.408, "loss_nan_ranks": 0, "loss_rank_avg": 0.140570729970932, "step": 715, "valid_targets_mean": 16650.1, "valid_targets_min": 7019 }, { "epoch": 2.1818181818181817, "grad_norm": 0.2674390870851971, "learning_rate": 2.776591093743505e-05, "loss": 0.4032, "loss_nan_ranks": 0, "loss_rank_avg": 0.13701064884662628, "step": 720, "valid_targets_mean": 15406.2, "valid_targets_min": 5127 }, { "epoch": 2.196969696969697, "grad_norm": 0.2751077668087683, "learning_rate": 2.7570524746137485e-05, "loss": 0.4031, "loss_nan_ranks": 0, "loss_rank_avg": 0.145437553524971, "step": 725, "valid_targets_mean": 17099.9, "valid_targets_min": 6081 }, { "epoch": 2.212121212121212, "grad_norm": 0.2390588517173356, "learning_rate": 2.7374291505358818e-05, "loss": 0.4014, "loss_nan_ranks": 0, "loss_rank_avg": 0.13979849219322205, "step": 730, "valid_targets_mean": 15471.2, "valid_targets_min": 3270 }, { "epoch": 2.227272727272727, "grad_norm": 0.2582220951331803, "learning_rate": 2.7177233171207817e-05, "loss": 0.3995, "loss_nan_ranks": 0, "loss_rank_avg": 0.14698559045791626, "step": 735, "valid_targets_mean": 15787.5, "valid_targets_min": 2702 }, { "epoch": 2.242424242424242, "grad_norm": 0.23340513449605393, "learning_rate": 2.6979371792111147e-05, "loss": 0.3992, "loss_nan_ranks": 0, "loss_rank_avg": 0.13736939430236816, "step": 740, "valid_targets_mean": 15320.6, "valid_targets_min": 4022 }, { "epoch": 2.257575757575758, "grad_norm": 0.2536860517731456, "learning_rate": 2.678072950634641e-05, "loss": 0.4036, "loss_nan_ranks": 0, "loss_rank_avg": 0.1464948058128357, "step": 745, "valid_targets_mean": 16852.1, "valid_targets_min": 6696 }, { "epoch": 2.2727272727272725, "grad_norm": 0.2638340109418724, "learning_rate": 2.6581328539565184e-05, "loss": 0.3975, "loss_nan_ranks": 0, "loss_rank_avg": 0.14072707295417786, "step": 750, "valid_targets_mean": 16809.4, "valid_targets_min": 7765 }, { "epoch": 2.287878787878788, "grad_norm": 0.24108039938243508, "learning_rate": 2.638119120230616e-05, "loss": 0.4014, "loss_nan_ranks": 0, "loss_rank_avg": 0.1345876157283783, "step": 755, "valid_targets_mean": 15781.1, "valid_targets_min": 2401 }, { "epoch": 2.303030303030303, "grad_norm": 0.25499518648680075, "learning_rate": 2.618033988749895e-05, "loss": 0.4014, "loss_nan_ranks": 0, "loss_rank_avg": 0.12714101374149323, "step": 760, "valid_targets_mean": 14556.9, "valid_targets_min": 3789 }, { "epoch": 2.3181818181818183, "grad_norm": 0.2412901463663965, "learning_rate": 2.5978797067958542e-05, "loss": 0.4036, "loss_nan_ranks": 0, "loss_rank_avg": 0.13545730710029602, "step": 765, "valid_targets_mean": 16024.8, "valid_targets_min": 7272 }, { "epoch": 2.3333333333333335, "grad_norm": 0.23198272890585742, "learning_rate": 2.5776585293870877e-05, "loss": 0.407, "loss_nan_ranks": 0, "loss_rank_avg": 0.1482217013835907, "step": 770, "valid_targets_mean": 17007.0, "valid_targets_min": 6087 }, { "epoch": 2.3484848484848486, "grad_norm": 0.2227473278480235, "learning_rate": 2.557372719026976e-05, "loss": 0.4031, "loss_nan_ranks": 0, "loss_rank_avg": 0.1505884975194931, "step": 775, "valid_targets_mean": 16449.6, "valid_targets_min": 6102 }, { "epoch": 2.3636363636363638, "grad_norm": 0.2387955795302762, "learning_rate": 2.537024545450539e-05, "loss": 0.4047, "loss_nan_ranks": 0, "loss_rank_avg": 0.13995446264743805, "step": 780, "valid_targets_mean": 14589.6, "valid_targets_min": 3986 }, { "epoch": 2.378787878787879, "grad_norm": 0.22960591186951138, "learning_rate": 2.5166162853704825e-05, "loss": 0.4016, "loss_nan_ranks": 0, "loss_rank_avg": 0.132080078125, "step": 785, "valid_targets_mean": 14911.7, "valid_targets_min": 3358 }, { "epoch": 2.393939393939394, "grad_norm": 0.2317325527335211, "learning_rate": 2.496150222222458e-05, "loss": 0.3987, "loss_nan_ranks": 0, "loss_rank_avg": 0.11762173473834991, "step": 790, "valid_targets_mean": 14146.1, "valid_targets_min": 4815 }, { "epoch": 2.409090909090909, "grad_norm": 0.23950789716559473, "learning_rate": 2.475628645909576e-05, "loss": 0.3967, "loss_nan_ranks": 0, "loss_rank_avg": 0.14730790257453918, "step": 795, "valid_targets_mean": 16316.9, "valid_targets_min": 4600 }, { "epoch": 2.4242424242424243, "grad_norm": 0.2194727783836063, "learning_rate": 2.4550538525461963e-05, "loss": 0.4029, "loss_nan_ranks": 0, "loss_rank_avg": 0.15803992748260498, "step": 800, "valid_targets_mean": 16688.9, "valid_targets_min": 6363 }, { "epoch": 2.4393939393939394, "grad_norm": 0.20726302913342787, "learning_rate": 2.434428144201016e-05, "loss": 0.3994, "loss_nan_ranks": 0, "loss_rank_avg": 0.14212462306022644, "step": 805, "valid_targets_mean": 16110.2, "valid_targets_min": 4638 }, { "epoch": 2.4545454545454546, "grad_norm": 0.23512860005080613, "learning_rate": 2.4137538286394976e-05, "loss": 0.4021, "loss_nan_ranks": 0, "loss_rank_avg": 0.14163710176944733, "step": 810, "valid_targets_mean": 17403.3, "valid_targets_min": 3852 }, { "epoch": 2.4696969696969697, "grad_norm": 0.2631327448975476, "learning_rate": 2.3930332190656604e-05, "loss": 0.4019, "loss_nan_ranks": 0, "loss_rank_avg": 0.12924142181873322, "step": 815, "valid_targets_mean": 13943.7, "valid_targets_min": 4675 }, { "epoch": 2.484848484848485, "grad_norm": 0.2794719573674816, "learning_rate": 2.3722686338632602e-05, "loss": 0.4023, "loss_nan_ranks": 0, "loss_rank_avg": 0.1343647837638855, "step": 820, "valid_targets_mean": 14427.6, "valid_targets_min": 3926 }, { "epoch": 2.5, "grad_norm": 0.25305791183693377, "learning_rate": 2.3514623963363886e-05, "loss": 0.3971, "loss_nan_ranks": 0, "loss_rank_avg": 0.12937594950199127, "step": 825, "valid_targets_mean": 15417.3, "valid_targets_min": 3750 }, { "epoch": 2.515151515151515, "grad_norm": 0.22286330070889673, "learning_rate": 2.330616834449525e-05, "loss": 0.4024, "loss_nan_ranks": 0, "loss_rank_avg": 0.14154498279094696, "step": 830, "valid_targets_mean": 15068.0, "valid_targets_min": 5629 }, { "epoch": 2.5303030303030303, "grad_norm": 0.27527212660583167, "learning_rate": 2.309734280567065e-05, "loss": 0.397, "loss_nan_ranks": 0, "loss_rank_avg": 0.1428302824497223, "step": 835, "valid_targets_mean": 15865.2, "valid_targets_min": 3471 }, { "epoch": 2.5454545454545454, "grad_norm": 0.2013095273828789, "learning_rate": 2.28881707119236e-05, "loss": 0.4034, "loss_nan_ranks": 0, "loss_rank_avg": 0.1361142098903656, "step": 840, "valid_targets_mean": 15358.7, "valid_targets_min": 5102 }, { "epoch": 2.5606060606060606, "grad_norm": 0.20518416839823062, "learning_rate": 2.267867546706287e-05, "loss": 0.3978, "loss_nan_ranks": 0, "loss_rank_avg": 0.12422050535678864, "step": 845, "valid_targets_mean": 14425.1, "valid_targets_min": 6548 }, { "epoch": 2.5757575757575757, "grad_norm": 0.23015921617387777, "learning_rate": 2.2468880511053896e-05, "loss": 0.3996, "loss_nan_ranks": 0, "loss_rank_avg": 0.13271577656269073, "step": 850, "valid_targets_mean": 14928.2, "valid_targets_min": 3566 }, { "epoch": 2.590909090909091, "grad_norm": 0.22763155127997378, "learning_rate": 2.2258809317396163e-05, "loss": 0.4005, "loss_nan_ranks": 0, "loss_rank_avg": 0.13653181493282318, "step": 855, "valid_targets_mean": 15972.4, "valid_targets_min": 6524 }, { "epoch": 2.606060606060606, "grad_norm": 0.20780381709855494, "learning_rate": 2.2048485390496757e-05, "loss": 0.3993, "loss_nan_ranks": 0, "loss_rank_avg": 0.1356944590806961, "step": 860, "valid_targets_mean": 16200.0, "valid_targets_min": 4921 }, { "epoch": 2.621212121212121, "grad_norm": 0.21150841424868858, "learning_rate": 2.1837932263040553e-05, "loss": 0.4028, "loss_nan_ranks": 0, "loss_rank_avg": 0.13849398493766785, "step": 865, "valid_targets_mean": 14841.7, "valid_targets_min": 5522 }, { "epoch": 2.6363636363636362, "grad_norm": 0.19123760683599647, "learning_rate": 2.1627173493357167e-05, "loss": 0.3992, "loss_nan_ranks": 0, "loss_rank_avg": 0.1281318962574005, "step": 870, "valid_targets_mean": 14613.7, "valid_targets_min": 5498 }, { "epoch": 2.6515151515151514, "grad_norm": 0.20328337012713912, "learning_rate": 2.1416232662785084e-05, "loss": 0.4002, "loss_nan_ranks": 0, "loss_rank_avg": 0.13393211364746094, "step": 875, "valid_targets_mean": 15199.2, "valid_targets_min": 4813 }, { "epoch": 2.6666666666666665, "grad_norm": 0.2106198865750161, "learning_rate": 2.1205133373033173e-05, "loss": 0.3987, "loss_nan_ranks": 0, "loss_rank_avg": 0.12791499495506287, "step": 880, "valid_targets_mean": 14866.7, "valid_targets_min": 3781 }, { "epoch": 2.6818181818181817, "grad_norm": 0.20369389176189004, "learning_rate": 2.0993899243539953e-05, "loss": 0.3989, "loss_nan_ranks": 0, "loss_rank_avg": 0.13717877864837646, "step": 885, "valid_targets_mean": 14771.3, "valid_targets_min": 5034 }, { "epoch": 2.6969696969696972, "grad_norm": 0.20309062401939268, "learning_rate": 2.0782553908830887e-05, "loss": 0.3975, "loss_nan_ranks": 0, "loss_rank_avg": 0.1538437306880951, "step": 890, "valid_targets_mean": 16498.7, "valid_targets_min": 3952 }, { "epoch": 2.712121212121212, "grad_norm": 0.20491160735105923, "learning_rate": 2.0571121015873924e-05, "loss": 0.3995, "loss_nan_ranks": 0, "loss_rank_avg": 0.12048079073429108, "step": 895, "valid_targets_mean": 14294.9, "valid_targets_min": 4430 }, { "epoch": 2.7272727272727275, "grad_norm": 0.19593978433356782, "learning_rate": 2.0359624221433728e-05, "loss": 0.3978, "loss_nan_ranks": 0, "loss_rank_avg": 0.1178448349237442, "step": 900, "valid_targets_mean": 12869.5, "valid_targets_min": 5145 }, { "epoch": 2.742424242424242, "grad_norm": 0.29488921317193234, "learning_rate": 2.014808718942476e-05, "loss": 0.3994, "loss_nan_ranks": 0, "loss_rank_avg": 0.12907974421977997, "step": 905, "valid_targets_mean": 14723.8, "valid_targets_min": 6297 }, { "epoch": 2.757575757575758, "grad_norm": 0.2459744310035717, "learning_rate": 1.9936533588263557e-05, "loss": 0.4003, "loss_nan_ranks": 0, "loss_rank_avg": 0.13112419843673706, "step": 910, "valid_targets_mean": 14698.9, "valid_targets_min": 3593 }, { "epoch": 2.7727272727272725, "grad_norm": 0.19945761974335, "learning_rate": 1.9724987088220565e-05, "loss": 0.4004, "loss_nan_ranks": 0, "loss_rank_avg": 0.12379374355077744, "step": 915, "valid_targets_mean": 14036.3, "valid_targets_min": 5990 }, { "epoch": 2.787878787878788, "grad_norm": 0.20489817601379762, "learning_rate": 1.951347135877169e-05, "loss": 0.3986, "loss_nan_ranks": 0, "loss_rank_avg": 0.13167142868041992, "step": 920, "valid_targets_mean": 15465.9, "valid_targets_min": 5550 }, { "epoch": 2.8030303030303028, "grad_norm": 0.20180242794079498, "learning_rate": 1.930201006594999e-05, "loss": 0.3983, "loss_nan_ranks": 0, "loss_rank_avg": 0.12798179686069489, "step": 925, "valid_targets_mean": 14284.5, "valid_targets_min": 4595 }, { "epoch": 2.8181818181818183, "grad_norm": 0.21789914003171218, "learning_rate": 1.9090626869697714e-05, "loss": 0.3976, "loss_nan_ranks": 0, "loss_rank_avg": 0.1261674016714096, "step": 930, "valid_targets_mean": 14832.6, "valid_targets_min": 3916 }, { "epoch": 2.8333333333333335, "grad_norm": 0.20986970672170555, "learning_rate": 1.8879345421219063e-05, "loss": 0.395, "loss_nan_ranks": 0, "loss_rank_avg": 0.13479703664779663, "step": 935, "valid_targets_mean": 15754.2, "valid_targets_min": 2824 }, { "epoch": 2.8484848484848486, "grad_norm": 0.2232787287586755, "learning_rate": 1.8668189360333923e-05, "loss": 0.3995, "loss_nan_ranks": 0, "loss_rank_avg": 0.14347530901432037, "step": 940, "valid_targets_mean": 15561.6, "valid_targets_min": 3982 }, { "epoch": 2.8636363636363638, "grad_norm": 0.22167749659610708, "learning_rate": 1.845718231283281e-05, "loss": 0.4025, "loss_nan_ranks": 0, "loss_rank_avg": 0.12210417538881302, "step": 945, "valid_targets_mean": 13407.8, "valid_targets_min": 4874 }, { "epoch": 2.878787878787879, "grad_norm": 0.20761225343562079, "learning_rate": 1.8246347887833457e-05, "loss": 0.3966, "loss_nan_ranks": 0, "loss_rank_avg": 0.13165885210037231, "step": 950, "valid_targets_mean": 14655.1, "valid_targets_min": 3867 }, { "epoch": 2.893939393939394, "grad_norm": 0.20391281242171244, "learning_rate": 1.8035709675139258e-05, "loss": 0.3966, "loss_nan_ranks": 0, "loss_rank_avg": 0.14313405752182007, "step": 955, "valid_targets_mean": 15892.5, "valid_targets_min": 3390 }, { "epoch": 2.909090909090909, "grad_norm": 0.20034367259133895, "learning_rate": 1.7825291242599837e-05, "loss": 0.4008, "loss_nan_ranks": 0, "loss_rank_avg": 0.14369603991508484, "step": 960, "valid_targets_mean": 17404.2, "valid_targets_min": 8916 }, { "epoch": 2.9242424242424243, "grad_norm": 0.18368085390931338, "learning_rate": 1.7615116133474084e-05, "loss": 0.4013, "loss_nan_ranks": 0, "loss_rank_avg": 0.13607482612133026, "step": 965, "valid_targets_mean": 15710.0, "valid_targets_min": 6646 }, { "epoch": 2.9393939393939394, "grad_norm": 0.19418731665454458, "learning_rate": 1.7405207863795966e-05, "loss": 0.397, "loss_nan_ranks": 0, "loss_rank_avg": 0.1205495223402977, "step": 970, "valid_targets_mean": 13890.4, "valid_targets_min": 4121 }, { "epoch": 2.9545454545454546, "grad_norm": 0.18400200321758117, "learning_rate": 1.719558991974339e-05, "loss": 0.3986, "loss_nan_ranks": 0, "loss_rank_avg": 0.14622147381305695, "step": 975, "valid_targets_mean": 16142.8, "valid_targets_min": 5173 }, { "epoch": 2.9696969696969697, "grad_norm": 0.18605958040871526, "learning_rate": 1.698628575501034e-05, "loss": 0.3955, "loss_nan_ranks": 0, "loss_rank_avg": 0.15678343176841736, "step": 980, "valid_targets_mean": 17844.7, "valid_targets_min": 5582 }, { "epoch": 2.984848484848485, "grad_norm": 0.18691576935828805, "learning_rate": 1.6777318788182723e-05, "loss": 0.4034, "loss_nan_ranks": 0, "loss_rank_avg": 0.1485750377178192, "step": 985, "valid_targets_mean": 16792.2, "valid_targets_min": 5099 }, { "epoch": 3.0, "grad_norm": 0.20524703713347764, "learning_rate": 1.6568712400118102e-05, "loss": 0.392, "loss_nan_ranks": 0, "loss_rank_avg": 0.12552806735038757, "step": 990, "valid_targets_mean": 14872.3, "valid_targets_min": 2839 }, { "epoch": 3.015151515151515, "grad_norm": 0.20201499679456203, "learning_rate": 1.636048993132969e-05, "loss": 0.386, "loss_nan_ranks": 0, "loss_rank_avg": 0.12483127415180206, "step": 995, "valid_targets_mean": 14800.0, "valid_targets_min": 6232 }, { "epoch": 3.0303030303030303, "grad_norm": 0.2139699492207323, "learning_rate": 1.615267467937479e-05, "loss": 0.3863, "loss_nan_ranks": 0, "loss_rank_avg": 0.13610896468162537, "step": 1000, "valid_targets_mean": 15750.8, "valid_targets_min": 2792 }, { "epoch": 3.0454545454545454, "grad_norm": 0.18244176766072634, "learning_rate": 1.59452898962481e-05, "loss": 0.3856, "loss_nan_ranks": 0, "loss_rank_avg": 0.14060482382774353, "step": 1005, "valid_targets_mean": 16487.1, "valid_targets_min": 5947 }, { "epoch": 3.0606060606060606, "grad_norm": 0.23920001444657255, "learning_rate": 1.573835878578013e-05, "loss": 0.3942, "loss_nan_ranks": 0, "loss_rank_avg": 0.12809117138385773, "step": 1010, "valid_targets_mean": 15580.5, "valid_targets_min": 3920 }, { "epoch": 3.0757575757575757, "grad_norm": 0.239805468147421, "learning_rate": 1.5531904501040917e-05, "loss": 0.3869, "loss_nan_ranks": 0, "loss_rank_avg": 0.13520100712776184, "step": 1015, "valid_targets_mean": 15625.3, "valid_targets_min": 3340 }, { "epoch": 3.090909090909091, "grad_norm": 0.19295973970998992, "learning_rate": 1.5325950141749522e-05, "loss": 0.3839, "loss_nan_ranks": 0, "loss_rank_avg": 0.14272280037403107, "step": 1020, "valid_targets_mean": 16487.4, "valid_targets_min": 7824 }, { "epoch": 3.106060606060606, "grad_norm": 0.20762988883475636, "learning_rate": 1.5120518751689438e-05, "loss": 0.3886, "loss_nan_ranks": 0, "loss_rank_avg": 0.12633001804351807, "step": 1025, "valid_targets_mean": 15774.8, "valid_targets_min": 3584 }, { "epoch": 3.121212121212121, "grad_norm": 0.21762044442781273, "learning_rate": 1.4915633316130267e-05, "loss": 0.3815, "loss_nan_ranks": 0, "loss_rank_avg": 0.13853636384010315, "step": 1030, "valid_targets_mean": 16518.2, "valid_targets_min": 5029 }, { "epoch": 3.1363636363636362, "grad_norm": 0.22228023340753078, "learning_rate": 1.4711316759255963e-05, "loss": 0.3843, "loss_nan_ranks": 0, "loss_rank_avg": 0.1272241175174713, "step": 1035, "valid_targets_mean": 13752.9, "valid_targets_min": 3877 }, { "epoch": 3.1515151515151514, "grad_norm": 0.18759116521143898, "learning_rate": 1.450759194159987e-05, "loss": 0.3878, "loss_nan_ranks": 0, "loss_rank_avg": 0.1292319893836975, "step": 1040, "valid_targets_mean": 15550.9, "valid_targets_min": 3552 }, { "epoch": 3.1666666666666665, "grad_norm": 0.22202339372765256, "learning_rate": 1.4304481657486955e-05, "loss": 0.3874, "loss_nan_ranks": 0, "loss_rank_avg": 0.13702820241451263, "step": 1045, "valid_targets_mean": 16725.6, "valid_targets_min": 7579 }, { "epoch": 3.1818181818181817, "grad_norm": 0.20389491383532993, "learning_rate": 1.4102008632483344e-05, "loss": 0.383, "loss_nan_ranks": 0, "loss_rank_avg": 0.12035155296325684, "step": 1050, "valid_targets_mean": 13422.4, "valid_targets_min": 3566 }, { "epoch": 3.196969696969697, "grad_norm": 0.1994171138487457, "learning_rate": 1.3900195520853628e-05, "loss": 0.3835, "loss_nan_ranks": 0, "loss_rank_avg": 0.1312486231327057, "step": 1055, "valid_targets_mean": 14952.1, "valid_targets_min": 4675 }, { "epoch": 3.212121212121212, "grad_norm": 0.1797074361183551, "learning_rate": 1.3699064903026149e-05, "loss": 0.3847, "loss_nan_ranks": 0, "loss_rank_avg": 0.12870073318481445, "step": 1060, "valid_targets_mean": 15102.5, "valid_targets_min": 5087 }, { "epoch": 3.227272727272727, "grad_norm": 0.18502274249189762, "learning_rate": 1.34986392830665e-05, "loss": 0.3854, "loss_nan_ranks": 0, "loss_rank_avg": 0.1210193783044815, "step": 1065, "valid_targets_mean": 13492.0, "valid_targets_min": 4595 }, { "epoch": 3.242424242424242, "grad_norm": 0.1838392123173521, "learning_rate": 1.3298941086159598e-05, "loss": 0.3861, "loss_nan_ranks": 0, "loss_rank_avg": 0.12977665662765503, "step": 1070, "valid_targets_mean": 14951.2, "valid_targets_min": 3850 }, { "epoch": 3.257575757575758, "grad_norm": 0.1860250488205784, "learning_rate": 1.3099992656100592e-05, "loss": 0.381, "loss_nan_ranks": 0, "loss_rank_avg": 0.1121118813753128, "step": 1075, "valid_targets_mean": 13794.9, "valid_targets_min": 4445 }, { "epoch": 3.2727272727272725, "grad_norm": 0.1906435044849517, "learning_rate": 1.2901816252794848e-05, "loss": 0.3837, "loss_nan_ranks": 0, "loss_rank_avg": 0.12230115383863449, "step": 1080, "valid_targets_mean": 14962.3, "valid_targets_min": 6351 }, { "epoch": 3.287878787878788, "grad_norm": 0.19806332929237902, "learning_rate": 1.2704434049767356e-05, "loss": 0.387, "loss_nan_ranks": 0, "loss_rank_avg": 0.1422664374113083, "step": 1085, "valid_targets_mean": 17608.7, "valid_targets_min": 6032 }, { "epoch": 3.303030303030303, "grad_norm": 0.1835996076247742, "learning_rate": 1.250786813168176e-05, "loss": 0.3853, "loss_nan_ranks": 0, "loss_rank_avg": 0.13839831948280334, "step": 1090, "valid_targets_mean": 15443.4, "valid_targets_min": 5234 }, { "epoch": 3.3181818181818183, "grad_norm": 0.21384278054170996, "learning_rate": 1.2312140491869369e-05, "loss": 0.3835, "loss_nan_ranks": 0, "loss_rank_avg": 0.1129048764705658, "step": 1095, "valid_targets_mean": 13407.2, "valid_targets_min": 2746 }, { "epoch": 3.3333333333333335, "grad_norm": 0.1818098211773201, "learning_rate": 1.2117273029868362e-05, "loss": 0.3855, "loss_nan_ranks": 0, "loss_rank_avg": 0.12763696908950806, "step": 1100, "valid_targets_mean": 14486.6, "valid_targets_min": 6386 }, { "epoch": 3.3484848484848486, "grad_norm": 0.20710370150037746, "learning_rate": 1.1923287548973508e-05, "loss": 0.3845, "loss_nan_ranks": 0, "loss_rank_avg": 0.11949425935745239, "step": 1105, "valid_targets_mean": 15547.2, "valid_targets_min": 6096 }, { "epoch": 3.3636363636363638, "grad_norm": 0.2216599163029605, "learning_rate": 1.1730205753796631e-05, "loss": 0.388, "loss_nan_ranks": 0, "loss_rank_avg": 0.13013041019439697, "step": 1110, "valid_targets_mean": 16238.2, "valid_targets_min": 2701 }, { "epoch": 3.378787878787879, "grad_norm": 0.16276094809715302, "learning_rate": 1.1538049247838128e-05, "loss": 0.3865, "loss_nan_ranks": 0, "loss_rank_avg": 0.1291944533586502, "step": 1115, "valid_targets_mean": 16817.2, "valid_targets_min": 5225 }, { "epoch": 3.393939393939394, "grad_norm": 0.1826167977343321, "learning_rate": 1.134683953106983e-05, "loss": 0.3855, "loss_nan_ranks": 0, "loss_rank_avg": 0.11070194095373154, "step": 1120, "valid_targets_mean": 13068.9, "valid_targets_min": 3355 }, { "epoch": 3.409090909090909, "grad_norm": 0.18012880538402126, "learning_rate": 1.115659799752938e-05, "loss": 0.3824, "loss_nan_ranks": 0, "loss_rank_avg": 0.13216200470924377, "step": 1125, "valid_targets_mean": 15118.6, "valid_targets_min": 6631 }, { "epoch": 3.4242424242424243, "grad_norm": 0.18366888272284204, "learning_rate": 1.096734593292649e-05, "loss": 0.3817, "loss_nan_ranks": 0, "loss_rank_avg": 0.132125124335289, "step": 1130, "valid_targets_mean": 16672.8, "valid_targets_min": 6299 }, { "epoch": 3.4393939393939394, "grad_norm": 0.20226724300959517, "learning_rate": 1.077910451226138e-05, "loss": 0.388, "loss_nan_ranks": 0, "loss_rank_avg": 0.13653744757175446, "step": 1135, "valid_targets_mean": 15401.3, "valid_targets_min": 2991 }, { "epoch": 3.4545454545454546, "grad_norm": 0.2069362208879119, "learning_rate": 1.0591894797455526e-05, "loss": 0.3895, "loss_nan_ranks": 0, "loss_rank_avg": 0.12484444677829742, "step": 1140, "valid_targets_mean": 14162.0, "valid_targets_min": 5335 }, { "epoch": 3.4696969696969697, "grad_norm": 0.1830668026563319, "learning_rate": 1.0405737734995083e-05, "loss": 0.3889, "loss_nan_ranks": 0, "loss_rank_avg": 0.12676018476486206, "step": 1145, "valid_targets_mean": 14280.0, "valid_targets_min": 3472 }, { "epoch": 3.484848484848485, "grad_norm": 0.19111910951479347, "learning_rate": 1.0220654153587225e-05, "loss": 0.3868, "loss_nan_ranks": 0, "loss_rank_avg": 0.12387235462665558, "step": 1150, "valid_targets_mean": 14188.9, "valid_targets_min": 3640 }, { "epoch": 3.5, "grad_norm": 0.18368088740310942, "learning_rate": 1.00366647618297e-05, "loss": 0.3827, "loss_nan_ranks": 0, "loss_rank_avg": 0.10632310807704926, "step": 1155, "valid_targets_mean": 13244.9, "valid_targets_min": 4697 }, { "epoch": 3.515151515151515, "grad_norm": 0.17788062044386901, "learning_rate": 9.853790145893742e-06, "loss": 0.39, "loss_nan_ranks": 0, "loss_rank_avg": 0.12414000928401947, "step": 1160, "valid_targets_mean": 14982.0, "valid_targets_min": 3896 }, { "epoch": 3.5303030303030303, "grad_norm": 0.1784902396662646, "learning_rate": 9.672050767220765e-06, "loss": 0.3851, "loss_nan_ranks": 0, "loss_rank_avg": 0.1390392780303955, "step": 1165, "valid_targets_mean": 16485.2, "valid_targets_min": 6081 }, { "epoch": 3.5454545454545454, "grad_norm": 0.1946533441284041, "learning_rate": 9.491466960232955e-06, "loss": 0.3851, "loss_nan_ranks": 0, "loss_rank_avg": 0.12741807103157043, "step": 1170, "valid_targets_mean": 15234.1, "valid_targets_min": 1570 }, { "epoch": 3.5606060606060606, "grad_norm": 0.1927912363049172, "learning_rate": 9.312058930058114e-06, "loss": 0.3908, "loss_nan_ranks": 0, "loss_rank_avg": 0.13081388175487518, "step": 1175, "valid_targets_mean": 15956.8, "valid_targets_min": 3704 }, { "epoch": 3.5757575757575757, "grad_norm": 0.1784659315169825, "learning_rate": 9.133846750268945e-06, "loss": 0.3863, "loss_nan_ranks": 0, "loss_rank_avg": 0.12337636202573776, "step": 1180, "valid_targets_mean": 14114.9, "valid_targets_min": 4155 }, { "epoch": 3.590909090909091, "grad_norm": 0.17640977231862776, "learning_rate": 8.956850360637046e-06, "loss": 0.3845, "loss_nan_ranks": 0, "loss_rank_avg": 0.14645136892795563, "step": 1185, "valid_targets_mean": 16319.7, "valid_targets_min": 4803 }, { "epoch": 3.606060606060606, "grad_norm": 0.16321215876127312, "learning_rate": 8.78108956490194e-06, "loss": 0.3872, "loss_nan_ranks": 0, "loss_rank_avg": 0.1280919909477234, "step": 1190, "valid_targets_mean": 15500.8, "valid_targets_min": 4314 }, { "epoch": 3.621212121212121, "grad_norm": 0.1680205179173797, "learning_rate": 8.606584028555225e-06, "loss": 0.384, "loss_nan_ranks": 0, "loss_rank_avg": 0.12240514159202576, "step": 1195, "valid_targets_mean": 14351.5, "valid_targets_min": 3453 }, { "epoch": 3.6363636363636362, "grad_norm": 0.19907785765048092, "learning_rate": 8.43335327664027e-06, "loss": 0.3913, "loss_nan_ranks": 0, "loss_rank_avg": 0.11995619535446167, "step": 1200, "valid_targets_mean": 15079.6, "valid_targets_min": 4701 }, { "epoch": 3.6515151515151514, "grad_norm": 0.21371967500820704, "learning_rate": 8.261416691567601e-06, "loss": 0.3753, "loss_nan_ranks": 0, "loss_rank_avg": 0.13895614445209503, "step": 1205, "valid_targets_mean": 16545.2, "valid_targets_min": 5321 }, { "epoch": 3.6666666666666665, "grad_norm": 0.19506572014903165, "learning_rate": 8.090793510946242e-06, "loss": 0.3803, "loss_nan_ranks": 0, "loss_rank_avg": 0.12509584426879883, "step": 1210, "valid_targets_mean": 15488.8, "valid_targets_min": 4785 }, { "epoch": 3.6818181818181817, "grad_norm": 0.19064347792276345, "learning_rate": 7.921502825431258e-06, "loss": 0.3787, "loss_nan_ranks": 0, "loss_rank_avg": 0.12347602099180222, "step": 1215, "valid_targets_mean": 14360.5, "valid_targets_min": 3511 }, { "epoch": 3.6969696969696972, "grad_norm": 0.182287532948687, "learning_rate": 7.753563576587753e-06, "loss": 0.3766, "loss_nan_ranks": 0, "loss_rank_avg": 0.11818146705627441, "step": 1220, "valid_targets_mean": 13722.8, "valid_targets_min": 5643 }, { "epoch": 3.712121212121212, "grad_norm": 0.17895536092055953, "learning_rate": 7.5869945547715275e-06, "loss": 0.3769, "loss_nan_ranks": 0, "loss_rank_avg": 0.12111832946538925, "step": 1225, "valid_targets_mean": 14735.2, "valid_targets_min": 3745 }, { "epoch": 3.7272727272727275, "grad_norm": 0.16815427567438399, "learning_rate": 7.421814397026674e-06, "loss": 0.3802, "loss_nan_ranks": 0, "loss_rank_avg": 0.1355920135974884, "step": 1230, "valid_targets_mean": 17361.2, "valid_targets_min": 4950 }, { "epoch": 3.742424242424242, "grad_norm": 0.1771976885465761, "learning_rate": 7.258041585000317e-06, "loss": 0.3769, "loss_nan_ranks": 0, "loss_rank_avg": 0.1293790638446808, "step": 1235, "valid_targets_mean": 15619.1, "valid_targets_min": 6005 }, { "epoch": 3.757575757575758, "grad_norm": 0.17415525773658588, "learning_rate": 7.095694442874743e-06, "loss": 0.3756, "loss_nan_ranks": 0, "loss_rank_avg": 0.1277199685573578, "step": 1240, "valid_targets_mean": 15341.6, "valid_targets_min": 5202 }, { "epoch": 3.7727272727272725, "grad_norm": 0.15865554989600855, "learning_rate": 6.934791135317147e-06, "loss": 0.376, "loss_nan_ranks": 0, "loss_rank_avg": 0.1316549926996231, "step": 1245, "valid_targets_mean": 16527.7, "valid_targets_min": 4140 }, { "epoch": 3.787878787878788, "grad_norm": 0.16688937849086424, "learning_rate": 6.775349665447222e-06, "loss": 0.3802, "loss_nan_ranks": 0, "loss_rank_avg": 0.13643357157707214, "step": 1250, "valid_targets_mean": 15461.1, "valid_targets_min": 3340 }, { "epoch": 3.8030303030303028, "grad_norm": 0.16416148418612428, "learning_rate": 6.617387872822842e-06, "loss": 0.3785, "loss_nan_ranks": 0, "loss_rank_avg": 0.13461175560951233, "step": 1255, "valid_targets_mean": 16171.8, "valid_targets_min": 7126 }, { "epoch": 3.8181818181818183, "grad_norm": 0.16313507588756762, "learning_rate": 6.460923431444015e-06, "loss": 0.3816, "loss_nan_ranks": 0, "loss_rank_avg": 0.12053517997264862, "step": 1260, "valid_targets_mean": 14434.2, "valid_targets_min": 2628 }, { "epoch": 3.8333333333333335, "grad_norm": 0.16325464900607975, "learning_rate": 6.305973847775406e-06, "loss": 0.3717, "loss_nan_ranks": 0, "loss_rank_avg": 0.1159006804227829, "step": 1265, "valid_targets_mean": 15667.6, "valid_targets_min": 5195 }, { "epoch": 3.8484848484848486, "grad_norm": 0.18254604746396977, "learning_rate": 6.152556458787546e-06, "loss": 0.3753, "loss_nan_ranks": 0, "loss_rank_avg": 0.12738105654716492, "step": 1270, "valid_targets_mean": 13737.2, "valid_targets_min": 3769 }, { "epoch": 3.8636363636363638, "grad_norm": 0.16908110201722565, "learning_rate": 6.000688430017048e-06, "loss": 0.3785, "loss_nan_ranks": 0, "loss_rank_avg": 0.11613453924655914, "step": 1275, "valid_targets_mean": 14516.1, "valid_targets_min": 4159 }, { "epoch": 3.878787878787879, "grad_norm": 0.20048809597109035, "learning_rate": 5.850386753645998e-06, "loss": 0.3774, "loss_nan_ranks": 0, "loss_rank_avg": 0.10215884447097778, "step": 1280, "valid_targets_mean": 12381.9, "valid_targets_min": 3785 }, { "epoch": 3.893939393939394, "grad_norm": 0.16139526738714188, "learning_rate": 5.701668246600731e-06, "loss": 0.3781, "loss_nan_ranks": 0, "loss_rank_avg": 0.1251106560230255, "step": 1285, "valid_targets_mean": 15067.7, "valid_targets_min": 2642 }, { "epoch": 3.909090909090909, "grad_norm": 0.157151377694226, "learning_rate": 5.554549548670227e-06, "loss": 0.3786, "loss_nan_ranks": 0, "loss_rank_avg": 0.13455966114997864, "step": 1290, "valid_targets_mean": 17651.8, "valid_targets_min": 6461 }, { "epoch": 3.9242424242424243, "grad_norm": 0.15702018588442043, "learning_rate": 5.409047120644307e-06, "loss": 0.3759, "loss_nan_ranks": 0, "loss_rank_avg": 0.10969670116901398, "step": 1295, "valid_targets_mean": 14103.6, "valid_targets_min": 4383 }, { "epoch": 3.9393939393939394, "grad_norm": 0.18062443113496127, "learning_rate": 5.265177242471899e-06, "loss": 0.3782, "loss_nan_ranks": 0, "loss_rank_avg": 0.10825863480567932, "step": 1300, "valid_targets_mean": 13964.7, "valid_targets_min": 5266 }, { "epoch": 3.9545454545454546, "grad_norm": 0.1726265649492319, "learning_rate": 5.122956011439486e-06, "loss": 0.3782, "loss_nan_ranks": 0, "loss_rank_avg": 0.1273096799850464, "step": 1305, "valid_targets_mean": 14422.9, "valid_targets_min": 4184 }, { "epoch": 3.9696969696969697, "grad_norm": 0.1638570708647358, "learning_rate": 4.982399340370017e-06, "loss": 0.3788, "loss_nan_ranks": 0, "loss_rank_avg": 0.12474849075078964, "step": 1310, "valid_targets_mean": 15338.0, "valid_targets_min": 4021 }, { "epoch": 3.984848484848485, "grad_norm": 0.15821455878356205, "learning_rate": 4.843522955842464e-06, "loss": 0.3745, "loss_nan_ranks": 0, "loss_rank_avg": 0.11333755403757095, "step": 1315, "valid_targets_mean": 14689.9, "valid_targets_min": 7070 }, { "epoch": 4.0, "grad_norm": 0.18144836630716482, "learning_rate": 4.706342396432213e-06, "loss": 0.3722, "loss_nan_ranks": 0, "loss_rank_avg": 0.12877650558948517, "step": 1320, "valid_targets_mean": 15349.0, "valid_targets_min": 4947 }, { "epoch": 4.015151515151516, "grad_norm": 0.1665315993678895, "learning_rate": 4.570873010972477e-06, "loss": 0.3799, "loss_nan_ranks": 0, "loss_rank_avg": 0.11058478057384491, "step": 1325, "valid_targets_mean": 13188.7, "valid_targets_min": 4378 }, { "epoch": 4.03030303030303, "grad_norm": 0.16355510134396425, "learning_rate": 4.43712995683695e-06, "loss": 0.3776, "loss_nan_ranks": 0, "loss_rank_avg": 0.13307559490203857, "step": 1330, "valid_targets_mean": 14841.8, "valid_targets_min": 2461 }, { "epoch": 4.045454545454546, "grad_norm": 0.16568197826559009, "learning_rate": 4.305128198243888e-06, "loss": 0.3816, "loss_nan_ranks": 0, "loss_rank_avg": 0.13005071878433228, "step": 1335, "valid_targets_mean": 16517.5, "valid_targets_min": 5013 }, { "epoch": 4.0606060606060606, "grad_norm": 0.1515725227529857, "learning_rate": 4.174882504581794e-06, "loss": 0.3758, "loss_nan_ranks": 0, "loss_rank_avg": 0.12472750246524811, "step": 1340, "valid_targets_mean": 16017.4, "valid_targets_min": 5848 }, { "epoch": 4.075757575757576, "grad_norm": 0.1560043296087169, "learning_rate": 4.046407448756895e-06, "loss": 0.3721, "loss_nan_ranks": 0, "loss_rank_avg": 0.1273220330476761, "step": 1345, "valid_targets_mean": 15190.7, "valid_targets_min": 4258 }, { "epoch": 4.090909090909091, "grad_norm": 0.16171551990487193, "learning_rate": 3.91971740556262e-06, "loss": 0.3765, "loss_nan_ranks": 0, "loss_rank_avg": 0.11427200585603714, "step": 1350, "valid_targets_mean": 13788.2, "valid_targets_min": 2628 }, { "epoch": 4.106060606060606, "grad_norm": 0.1621886915892531, "learning_rate": 3.7948265500712313e-06, "loss": 0.3787, "loss_nan_ranks": 0, "loss_rank_avg": 0.1313973218202591, "step": 1355, "valid_targets_mean": 15764.5, "valid_targets_min": 6736 }, { "epoch": 4.121212121212121, "grad_norm": 0.15216405342287734, "learning_rate": 3.6717488560478096e-06, "loss": 0.3784, "loss_nan_ranks": 0, "loss_rank_avg": 0.13022267818450928, "step": 1360, "valid_targets_mean": 16802.9, "valid_targets_min": 6235 }, { "epoch": 4.136363636363637, "grad_norm": 0.16451681783578678, "learning_rate": 3.5504980943867538e-06, "loss": 0.3751, "loss_nan_ranks": 0, "loss_rank_avg": 0.13295704126358032, "step": 1365, "valid_targets_mean": 16706.9, "valid_targets_min": 7398 }, { "epoch": 4.151515151515151, "grad_norm": 0.15362560588681468, "learning_rate": 3.4310878315710074e-06, "loss": 0.3785, "loss_nan_ranks": 0, "loss_rank_avg": 0.1346280872821808, "step": 1370, "valid_targets_mean": 17013.3, "valid_targets_min": 4415 }, { "epoch": 4.166666666666667, "grad_norm": 0.1637770046947873, "learning_rate": 3.3135314281540954e-06, "loss": 0.3743, "loss_nan_ranks": 0, "loss_rank_avg": 0.13173696398735046, "step": 1375, "valid_targets_mean": 15431.9, "valid_targets_min": 5145 }, { "epoch": 4.181818181818182, "grad_norm": 0.15262780010059376, "learning_rate": 3.1978420372652776e-06, "loss": 0.375, "loss_nan_ranks": 0, "loss_rank_avg": 0.13413605093955994, "step": 1380, "valid_targets_mean": 15826.1, "valid_targets_min": 6007 }, { "epoch": 4.196969696969697, "grad_norm": 0.15750588369933005, "learning_rate": 3.084032603137852e-06, "loss": 0.3805, "loss_nan_ranks": 0, "loss_rank_avg": 0.1388297826051712, "step": 1385, "valid_targets_mean": 16882.4, "valid_targets_min": 2141 }, { "epoch": 4.212121212121212, "grad_norm": 0.15951360984080548, "learning_rate": 2.9721158596608622e-06, "loss": 0.3769, "loss_nan_ranks": 0, "loss_rank_avg": 0.12477941066026688, "step": 1390, "valid_targets_mean": 14877.5, "valid_targets_min": 4983 }, { "epoch": 4.2272727272727275, "grad_norm": 0.15989308204374056, "learning_rate": 2.8621043289543314e-06, "loss": 0.3784, "loss_nan_ranks": 0, "loss_rank_avg": 0.12433609366416931, "step": 1395, "valid_targets_mean": 14682.3, "valid_targets_min": 4653 }, { "epoch": 4.242424242424242, "grad_norm": 0.15862256305188707, "learning_rate": 2.754010319968181e-06, "loss": 0.3794, "loss_nan_ranks": 0, "loss_rank_avg": 0.1250750571489334, "step": 1400, "valid_targets_mean": 15762.9, "valid_targets_min": 5095 }, { "epoch": 4.257575757575758, "grad_norm": 0.144033968194764, "learning_rate": 2.647845927105015e-06, "loss": 0.378, "loss_nan_ranks": 0, "loss_rank_avg": 0.13507190346717834, "step": 1405, "valid_targets_mean": 17072.8, "valid_targets_min": 3713 }, { "epoch": 4.2727272727272725, "grad_norm": 0.15293749452569375, "learning_rate": 2.543623028866915e-06, "loss": 0.3811, "loss_nan_ranks": 0, "loss_rank_avg": 0.12047475576400757, "step": 1410, "valid_targets_mean": 14718.1, "valid_targets_min": 3986 }, { "epoch": 4.287878787878788, "grad_norm": 0.1519293936485469, "learning_rate": 2.4413532865263533e-06, "loss": 0.3816, "loss_nan_ranks": 0, "loss_rank_avg": 0.13895002007484436, "step": 1415, "valid_targets_mean": 15430.9, "valid_targets_min": 7704 }, { "epoch": 4.303030303030303, "grad_norm": 0.15934597894119953, "learning_rate": 2.3410481428214602e-06, "loss": 0.3744, "loss_nan_ranks": 0, "loss_rank_avg": 0.11791396141052246, "step": 1420, "valid_targets_mean": 15282.0, "valid_targets_min": 5252 }, { "epoch": 4.318181818181818, "grad_norm": 0.1565540137275141, "learning_rate": 2.242718820675718e-06, "loss": 0.3774, "loss_nan_ranks": 0, "loss_rank_avg": 0.13728006184101105, "step": 1425, "valid_targets_mean": 16404.4, "valid_targets_min": 5168 }, { "epoch": 4.333333333333333, "grad_norm": 0.15300286050496542, "learning_rate": 2.1463763219422495e-06, "loss": 0.3766, "loss_nan_ranks": 0, "loss_rank_avg": 0.13043278455734253, "step": 1430, "valid_targets_mean": 15364.1, "valid_targets_min": 4904 }, { "epoch": 4.348484848484849, "grad_norm": 0.15256071826231843, "learning_rate": 2.0520314261728357e-06, "loss": 0.3761, "loss_nan_ranks": 0, "loss_rank_avg": 0.11972136795520782, "step": 1435, "valid_targets_mean": 13876.5, "valid_targets_min": 4299 }, { "epoch": 4.363636363636363, "grad_norm": 0.1498901275439139, "learning_rate": 1.9596946894118306e-06, "loss": 0.3738, "loss_nan_ranks": 0, "loss_rank_avg": 0.12425568699836731, "step": 1440, "valid_targets_mean": 15735.1, "valid_targets_min": 6018 }, { "epoch": 4.378787878787879, "grad_norm": 0.15525357240670695, "learning_rate": 1.8693764430150696e-06, "loss": 0.3743, "loss_nan_ranks": 0, "loss_rank_avg": 0.11837613582611084, "step": 1445, "valid_targets_mean": 14592.8, "valid_targets_min": 3275 }, { "epoch": 4.393939393939394, "grad_norm": 0.14241103294840823, "learning_rate": 1.7810867924938978e-06, "loss": 0.3752, "loss_nan_ranks": 0, "loss_rank_avg": 0.11680356413125992, "step": 1450, "valid_targets_mean": 15398.9, "valid_targets_min": 6343 }, { "epoch": 4.409090909090909, "grad_norm": 0.1421325675854043, "learning_rate": 1.6948356163845048e-06, "loss": 0.3808, "loss_nan_ranks": 0, "loss_rank_avg": 0.1380445659160614, "step": 1455, "valid_targets_mean": 17377.7, "valid_targets_min": 6781 }, { "epoch": 4.424242424242424, "grad_norm": 0.15004001547168774, "learning_rate": 1.610632565142627e-06, "loss": 0.3819, "loss_nan_ranks": 0, "loss_rank_avg": 0.12191175669431686, "step": 1460, "valid_targets_mean": 14812.8, "valid_targets_min": 5848 }, { "epoch": 4.4393939393939394, "grad_norm": 0.1444461782333066, "learning_rate": 1.5284870600637813e-06, "loss": 0.3773, "loss_nan_ranks": 0, "loss_rank_avg": 0.12871713936328888, "step": 1465, "valid_targets_mean": 15949.8, "valid_targets_min": 7108 }, { "epoch": 4.454545454545454, "grad_norm": 0.1489245036934525, "learning_rate": 1.4484082922291376e-06, "loss": 0.3777, "loss_nan_ranks": 0, "loss_rank_avg": 0.12368868291378021, "step": 1470, "valid_targets_mean": 16389.0, "valid_targets_min": 5677 }, { "epoch": 4.46969696969697, "grad_norm": 0.1472974113093515, "learning_rate": 1.3704052214771513e-06, "loss": 0.3758, "loss_nan_ranks": 0, "loss_rank_avg": 0.11158767342567444, "step": 1475, "valid_targets_mean": 13701.0, "valid_targets_min": 4705 }, { "epoch": 4.484848484848484, "grad_norm": 0.15064209648897592, "learning_rate": 1.2944865754010682e-06, "loss": 0.3732, "loss_nan_ranks": 0, "loss_rank_avg": 0.11993849277496338, "step": 1480, "valid_targets_mean": 14086.5, "valid_targets_min": 5697 }, { "epoch": 4.5, "grad_norm": 0.1439829400641997, "learning_rate": 1.2206608483724013e-06, "loss": 0.3755, "loss_nan_ranks": 0, "loss_rank_avg": 0.1392856389284134, "step": 1485, "valid_targets_mean": 16722.5, "valid_targets_min": 4973 }, { "epoch": 4.515151515151516, "grad_norm": 0.14654997539211856, "learning_rate": 1.1489363005905241e-06, "loss": 0.3746, "loss_nan_ranks": 0, "loss_rank_avg": 0.1073300838470459, "step": 1490, "valid_targets_mean": 13875.4, "valid_targets_min": 3778 }, { "epoch": 4.53030303030303, "grad_norm": 0.1450634174975392, "learning_rate": 1.0793209571584562e-06, "loss": 0.3847, "loss_nan_ranks": 0, "loss_rank_avg": 0.13987794518470764, "step": 1495, "valid_targets_mean": 16870.2, "valid_targets_min": 5447 }, { "epoch": 4.545454545454545, "grad_norm": 0.1466804723012743, "learning_rate": 1.0118226071849424e-06, "loss": 0.3761, "loss_nan_ranks": 0, "loss_rank_avg": 0.12252548336982727, "step": 1500, "valid_targets_mean": 14106.8, "valid_targets_min": 4578 }, { "epoch": 4.5606060606060606, "grad_norm": 0.16197123748968928, "learning_rate": 9.464488029129581e-07, "loss": 0.3792, "loss_nan_ranks": 0, "loss_rank_avg": 0.14925982058048248, "step": 1505, "valid_targets_mean": 18311.2, "valid_targets_min": 8146 }, { "epoch": 4.575757575757576, "grad_norm": 0.14327511237462578, "learning_rate": 8.832068588746945e-07, "loss": 0.3725, "loss_nan_ranks": 0, "loss_rank_avg": 0.1239233985543251, "step": 1510, "valid_targets_mean": 16076.7, "valid_targets_min": 3317 }, { "epoch": 4.590909090909091, "grad_norm": 0.14823288821280842, "learning_rate": 8.221038510731704e-07, "loss": 0.3746, "loss_nan_ranks": 0, "loss_rank_avg": 0.1282537877559662, "step": 1515, "valid_targets_mean": 14536.7, "valid_targets_min": 3646 }, { "epoch": 4.606060606060606, "grad_norm": 0.14105179064235956, "learning_rate": 7.631466161904821e-07, "loss": 0.3749, "loss_nan_ranks": 0, "loss_rank_avg": 0.12889966368675232, "step": 1520, "valid_targets_mean": 16155.5, "valid_targets_min": 6424 }, { "epoch": 4.621212121212121, "grad_norm": 0.14818970968513234, "learning_rate": 7.063417508228876e-07, "loss": 0.3768, "loss_nan_ranks": 0, "loss_rank_avg": 0.1282745897769928, "step": 1525, "valid_targets_mean": 15349.0, "valid_targets_min": 6302 }, { "epoch": 4.636363636363637, "grad_norm": 0.14449126106516016, "learning_rate": 6.516956107427241e-07, "loss": 0.3734, "loss_nan_ranks": 0, "loss_rank_avg": 0.12247820198535919, "step": 1530, "valid_targets_mean": 15549.9, "valid_targets_min": 4505 }, { "epoch": 4.651515151515151, "grad_norm": 0.15005279315436212, "learning_rate": 5.992143101872638e-07, "loss": 0.3839, "loss_nan_ranks": 0, "loss_rank_avg": 0.11950863152742386, "step": 1535, "valid_targets_mean": 13700.6, "valid_targets_min": 3590 }, { "epoch": 4.666666666666667, "grad_norm": 0.14661165136036405, "learning_rate": 5.489037211746184e-07, "loss": 0.3804, "loss_nan_ranks": 0, "loss_rank_avg": 0.11654527485370636, "step": 1540, "valid_targets_mean": 14126.2, "valid_targets_min": 5009 }, { "epoch": 4.681818181818182, "grad_norm": 0.14432957174327468, "learning_rate": 5.007694728467228e-07, "loss": 0.377, "loss_nan_ranks": 0, "loss_rank_avg": 0.12720376253128052, "step": 1545, "valid_targets_mean": 14868.3, "valid_targets_min": 4415 }, { "epoch": 4.696969696969697, "grad_norm": 0.13809526163891875, "learning_rate": 4.548169508395028e-07, "loss": 0.3775, "loss_nan_ranks": 0, "loss_rank_avg": 0.12525103986263275, "step": 1550, "valid_targets_mean": 16000.2, "valid_targets_min": 4921 }, { "epoch": 4.712121212121212, "grad_norm": 0.14070611024208945, "learning_rate": 4.1105129668029595e-07, "loss": 0.3809, "loss_nan_ranks": 0, "loss_rank_avg": 0.1342814564704895, "step": 1555, "valid_targets_mean": 17020.2, "valid_targets_min": 7375 }, { "epoch": 4.7272727272727275, "grad_norm": 0.1472076939859629, "learning_rate": 3.6947740721257066e-07, "loss": 0.3781, "loss_nan_ranks": 0, "loss_rank_avg": 0.12940089404582977, "step": 1560, "valid_targets_mean": 15519.5, "valid_targets_min": 4962 }, { "epoch": 4.742424242424242, "grad_norm": 0.1370972068518527, "learning_rate": 3.3009993404802486e-07, "loss": 0.3787, "loss_nan_ranks": 0, "loss_rank_avg": 0.12228408455848694, "step": 1565, "valid_targets_mean": 14750.1, "valid_targets_min": 3171 }, { "epoch": 4.757575757575758, "grad_norm": 0.13755006199041775, "learning_rate": 2.929232830461404e-07, "loss": 0.376, "loss_nan_ranks": 0, "loss_rank_avg": 0.1286831796169281, "step": 1570, "valid_targets_mean": 15723.0, "valid_targets_min": 5089 }, { "epoch": 4.7727272727272725, "grad_norm": 0.14560076346202475, "learning_rate": 2.579516138212101e-07, "loss": 0.3793, "loss_nan_ranks": 0, "loss_rank_avg": 0.13214346766471863, "step": 1575, "valid_targets_mean": 15091.0, "valid_targets_min": 2044 }, { "epoch": 4.787878787878788, "grad_norm": 0.14612474449230148, "learning_rate": 2.2518883927692857e-07, "loss": 0.3773, "loss_nan_ranks": 0, "loss_rank_avg": 0.1365845948457718, "step": 1580, "valid_targets_mean": 15611.6, "valid_targets_min": 4544 }, { "epoch": 4.803030303030303, "grad_norm": 0.14192509401084216, "learning_rate": 1.9463862516859277e-07, "loss": 0.3766, "loss_nan_ranks": 0, "loss_rank_avg": 0.11990797519683838, "step": 1585, "valid_targets_mean": 14435.0, "valid_targets_min": 2745 }, { "epoch": 4.818181818181818, "grad_norm": 0.14022031514520483, "learning_rate": 1.6630438969294615e-07, "loss": 0.3785, "loss_nan_ranks": 0, "loss_rank_avg": 0.14073169231414795, "step": 1590, "valid_targets_mean": 18670.8, "valid_targets_min": 6489 }, { "epoch": 4.833333333333333, "grad_norm": 0.14572269457660778, "learning_rate": 1.4018930310571553e-07, "loss": 0.3781, "loss_nan_ranks": 0, "loss_rank_avg": 0.1340230405330658, "step": 1595, "valid_targets_mean": 15346.7, "valid_targets_min": 7399 }, { "epoch": 4.848484848484849, "grad_norm": 0.14668101679572776, "learning_rate": 1.1629628736690824e-07, "loss": 0.3727, "loss_nan_ranks": 0, "loss_rank_avg": 0.11541043221950531, "step": 1600, "valid_targets_mean": 13274.7, "valid_targets_min": 3833 }, { "epoch": 4.863636363636363, "grad_norm": 0.14319080604843668, "learning_rate": 9.46280158138757e-08, "loss": 0.3751, "loss_nan_ranks": 0, "loss_rank_avg": 0.13603129982948303, "step": 1605, "valid_targets_mean": 15658.5, "valid_targets_min": 7066 }, { "epoch": 4.878787878787879, "grad_norm": 0.14301657158586537, "learning_rate": 7.518691286220625e-08, "loss": 0.3834, "loss_nan_ranks": 0, "loss_rank_avg": 0.13067832589149475, "step": 1610, "valid_targets_mean": 14509.7, "valid_targets_min": 3622 }, { "epoch": 4.893939393939394, "grad_norm": 0.14024618123182978, "learning_rate": 5.797515373445084e-08, "loss": 0.375, "loss_nan_ranks": 0, "loss_rank_avg": 0.11804800480604172, "step": 1615, "valid_targets_mean": 14769.3, "valid_targets_min": 3072 }, { "epoch": 4.909090909090909, "grad_norm": 0.1569062465599826, "learning_rate": 4.299466421675113e-08, "loss": 0.374, "loss_nan_ranks": 0, "loss_rank_avg": 0.12300266325473785, "step": 1620, "valid_targets_mean": 14378.3, "valid_targets_min": 1594 }, { "epoch": 4.924242424242424, "grad_norm": 0.14063309925412415, "learning_rate": 3.0247120443362976e-08, "loss": 0.3778, "loss_nan_ranks": 0, "loss_rank_avg": 0.12379644811153412, "step": 1625, "valid_targets_mean": 16364.5, "valid_targets_min": 5420 }, { "epoch": 4.9393939393939394, "grad_norm": 0.13865249610748842, "learning_rate": 1.973394870912193e-08, "loss": 0.3823, "loss_nan_ranks": 0, "loss_rank_avg": 0.11910425871610641, "step": 1630, "valid_targets_mean": 14783.9, "valid_targets_min": 4717 }, { "epoch": 4.954545454545455, "grad_norm": 0.16786783713364176, "learning_rate": 1.145632530985541e-08, "loss": 0.3814, "loss_nan_ranks": 0, "loss_rank_avg": 0.12280435860157013, "step": 1635, "valid_targets_mean": 15160.9, "valid_targets_min": 2570 }, { "epoch": 4.96969696969697, "grad_norm": 0.1446243387874453, "learning_rate": 5.415176410765721e-09, "loss": 0.3775, "loss_nan_ranks": 0, "loss_rank_avg": 0.13177569210529327, "step": 1640, "valid_targets_mean": 14764.5, "valid_targets_min": 4690 }, { "epoch": 4.984848484848484, "grad_norm": 0.13388250594773463, "learning_rate": 1.611177942812958e-09, "loss": 0.3779, "loss_nan_ranks": 0, "loss_rank_avg": 0.11846122145652771, "step": 1645, "valid_targets_mean": 15252.2, "valid_targets_min": 6161 }, { "epoch": 5.0, "grad_norm": 0.1337804023582898, "learning_rate": 4.475552707772224e-11, "loss": 0.3752, "loss_nan_ranks": 0, "loss_rank_avg": 0.12205812335014343, "step": 1650, "valid_targets_mean": 15604.5, "valid_targets_min": 4811 }, { "epoch": 5.0, "loss_nan_ranks": 0, "loss_rank_avg": 0.12205812335014343, "step": 1650, "total_flos": 9.560237722870743e+18, "train_loss": 0.10292447277993867, "train_runtime": 13793.2861, "train_samples_per_second": 11.477, "train_steps_per_second": 0.12, "valid_targets_mean": 15604.5, "valid_targets_min": 4811 } ], "logging_steps": 5, "max_steps": 1650, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 300, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.560237722870743e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }