{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 1650, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.015182186234817813, "grad_norm": 4.577882285169533, "learning_rate": 9.696969696969698e-07, "loss": 0.3821, "loss_nan_ranks": 0, "loss_rank_avg": 0.14125539362430573, "step": 5, "valid_targets_mean": 6349.3, "valid_targets_min": 1205 }, { "epoch": 0.030364372469635626, "grad_norm": 2.36333443068163, "learning_rate": 2.181818181818182e-06, "loss": 0.3419, "loss_nan_ranks": 0, "loss_rank_avg": 0.10619394481182098, "step": 10, "valid_targets_mean": 6624.3, "valid_targets_min": 1531 }, { "epoch": 0.04554655870445344, "grad_norm": 1.4113487235292677, "learning_rate": 3.3939393939393946e-06, "loss": 0.3243, "loss_nan_ranks": 0, "loss_rank_avg": 0.10431598126888275, "step": 15, "valid_targets_mean": 5438.1, "valid_targets_min": 1068 }, { "epoch": 0.06072874493927125, "grad_norm": 0.62553520757171, "learning_rate": 4.606060606060606e-06, "loss": 0.303, "loss_nan_ranks": 0, "loss_rank_avg": 0.11968786269426346, "step": 20, "valid_targets_mean": 5190.2, "valid_targets_min": 1193 }, { "epoch": 0.07591093117408906, "grad_norm": 0.40619377975716625, "learning_rate": 5.8181818181818185e-06, "loss": 0.2779, "loss_nan_ranks": 0, "loss_rank_avg": 0.0885775089263916, "step": 25, "valid_targets_mean": 6902.4, "valid_targets_min": 903 }, { "epoch": 0.09109311740890688, "grad_norm": 0.3671651963956234, "learning_rate": 7.030303030303031e-06, "loss": 0.2558, "loss_nan_ranks": 0, "loss_rank_avg": 0.08641765266656876, "step": 30, "valid_targets_mean": 5683.3, "valid_targets_min": 1196 }, { "epoch": 0.1062753036437247, "grad_norm": 0.24719995342571235, "learning_rate": 8.242424242424243e-06, "loss": 0.2341, "loss_nan_ranks": 0, "loss_rank_avg": 0.08379081636667252, "step": 35, "valid_targets_mean": 5579.6, "valid_targets_min": 1261 }, { "epoch": 0.1214574898785425, "grad_norm": 0.18778985066745008, "learning_rate": 9.454545454545456e-06, "loss": 0.2168, "loss_nan_ranks": 0, "loss_rank_avg": 0.05891978368163109, "step": 40, "valid_targets_mean": 5071.2, "valid_targets_min": 1382 }, { "epoch": 0.13663967611336034, "grad_norm": 0.18215489837899956, "learning_rate": 1.0666666666666667e-05, "loss": 0.1962, "loss_nan_ranks": 0, "loss_rank_avg": 0.06543523073196411, "step": 45, "valid_targets_mean": 5722.6, "valid_targets_min": 467 }, { "epoch": 0.15182186234817813, "grad_norm": 0.1994040406642919, "learning_rate": 1.187878787878788e-05, "loss": 0.1975, "loss_nan_ranks": 0, "loss_rank_avg": 0.06948289275169373, "step": 50, "valid_targets_mean": 4391.1, "valid_targets_min": 1226 }, { "epoch": 0.16700404858299595, "grad_norm": 0.1794623411727323, "learning_rate": 1.3090909090909092e-05, "loss": 0.1867, "loss_nan_ranks": 0, "loss_rank_avg": 0.0551609992980957, "step": 55, "valid_targets_mean": 5057.4, "valid_targets_min": 1219 }, { "epoch": 0.18218623481781376, "grad_norm": 0.15110982315290433, "learning_rate": 1.4303030303030305e-05, "loss": 0.1829, "loss_nan_ranks": 0, "loss_rank_avg": 0.06352324038743973, "step": 60, "valid_targets_mean": 5477.7, "valid_targets_min": 1130 }, { "epoch": 0.19736842105263158, "grad_norm": 0.17887737225464861, "learning_rate": 1.5515151515151516e-05, "loss": 0.1818, "loss_nan_ranks": 0, "loss_rank_avg": 0.061294250190258026, "step": 65, "valid_targets_mean": 4873.1, "valid_targets_min": 2187 }, { "epoch": 0.2125506072874494, "grad_norm": 0.14662492214397718, "learning_rate": 1.672727272727273e-05, "loss": 0.1732, "loss_nan_ranks": 0, "loss_rank_avg": 0.049816764891147614, "step": 70, "valid_targets_mean": 5407.7, "valid_targets_min": 1155 }, { "epoch": 0.22773279352226722, "grad_norm": 0.15555536697911404, "learning_rate": 1.7939393939393942e-05, "loss": 0.1752, "loss_nan_ranks": 0, "loss_rank_avg": 0.05831480771303177, "step": 75, "valid_targets_mean": 5577.9, "valid_targets_min": 1172 }, { "epoch": 0.242914979757085, "grad_norm": 0.15003625008902433, "learning_rate": 1.9151515151515152e-05, "loss": 0.169, "loss_nan_ranks": 0, "loss_rank_avg": 0.052947502583265305, "step": 80, "valid_targets_mean": 6273.1, "valid_targets_min": 1978 }, { "epoch": 0.25809716599190285, "grad_norm": 0.1484191753674967, "learning_rate": 2.0363636363636365e-05, "loss": 0.1665, "loss_nan_ranks": 0, "loss_rank_avg": 0.06921609491109848, "step": 85, "valid_targets_mean": 6116.3, "valid_targets_min": 1519 }, { "epoch": 0.2732793522267207, "grad_norm": 0.16725415591253295, "learning_rate": 2.1575757575757578e-05, "loss": 0.1606, "loss_nan_ranks": 0, "loss_rank_avg": 0.05471062660217285, "step": 90, "valid_targets_mean": 4370.1, "valid_targets_min": 1040 }, { "epoch": 0.28846153846153844, "grad_norm": 0.15463499334053316, "learning_rate": 2.278787878787879e-05, "loss": 0.1582, "loss_nan_ranks": 0, "loss_rank_avg": 0.05240866169333458, "step": 95, "valid_targets_mean": 5131.7, "valid_targets_min": 982 }, { "epoch": 0.30364372469635625, "grad_norm": 0.14301548261073, "learning_rate": 2.4e-05, "loss": 0.1557, "loss_nan_ranks": 0, "loss_rank_avg": 0.04320158809423447, "step": 100, "valid_targets_mean": 5694.0, "valid_targets_min": 1121 }, { "epoch": 0.3188259109311741, "grad_norm": 0.16164511595169545, "learning_rate": 2.5212121212121214e-05, "loss": 0.1562, "loss_nan_ranks": 0, "loss_rank_avg": 0.04968992620706558, "step": 105, "valid_targets_mean": 5940.3, "valid_targets_min": 1771 }, { "epoch": 0.3340080971659919, "grad_norm": 0.25196399601323866, "learning_rate": 2.6424242424242427e-05, "loss": 0.1488, "loss_nan_ranks": 0, "loss_rank_avg": 0.04176684468984604, "step": 110, "valid_targets_mean": 6489.9, "valid_targets_min": 1543 }, { "epoch": 0.3491902834008097, "grad_norm": 0.14923683707765953, "learning_rate": 2.763636363636364e-05, "loss": 0.145, "loss_nan_ranks": 0, "loss_rank_avg": 0.04738590121269226, "step": 115, "valid_targets_mean": 6781.3, "valid_targets_min": 476 }, { "epoch": 0.3643724696356275, "grad_norm": 0.15818654819578504, "learning_rate": 2.884848484848485e-05, "loss": 0.1508, "loss_nan_ranks": 0, "loss_rank_avg": 0.04790515452623367, "step": 120, "valid_targets_mean": 5334.8, "valid_targets_min": 372 }, { "epoch": 0.37955465587044535, "grad_norm": 0.19234264461233105, "learning_rate": 3.0060606060606062e-05, "loss": 0.152, "loss_nan_ranks": 0, "loss_rank_avg": 0.05456281453371048, "step": 125, "valid_targets_mean": 5340.9, "valid_targets_min": 540 }, { "epoch": 0.39473684210526316, "grad_norm": 0.16265824731534112, "learning_rate": 3.127272727272728e-05, "loss": 0.143, "loss_nan_ranks": 0, "loss_rank_avg": 0.04653193801641464, "step": 130, "valid_targets_mean": 5583.2, "valid_targets_min": 516 }, { "epoch": 0.409919028340081, "grad_norm": 0.15571751458809774, "learning_rate": 3.2484848484848485e-05, "loss": 0.1476, "loss_nan_ranks": 0, "loss_rank_avg": 0.04823809117078781, "step": 135, "valid_targets_mean": 5145.2, "valid_targets_min": 1274 }, { "epoch": 0.4251012145748988, "grad_norm": 0.16542547088134105, "learning_rate": 3.36969696969697e-05, "loss": 0.1407, "loss_nan_ranks": 0, "loss_rank_avg": 0.050901204347610474, "step": 140, "valid_targets_mean": 5734.6, "valid_targets_min": 1040 }, { "epoch": 0.4402834008097166, "grad_norm": 0.1543727010347394, "learning_rate": 3.490909090909091e-05, "loss": 0.1345, "loss_nan_ranks": 0, "loss_rank_avg": 0.042660776525735855, "step": 145, "valid_targets_mean": 6155.5, "valid_targets_min": 1282 }, { "epoch": 0.45546558704453444, "grad_norm": 0.16917853442828049, "learning_rate": 3.6121212121212124e-05, "loss": 0.1378, "loss_nan_ranks": 0, "loss_rank_avg": 0.03548862040042877, "step": 150, "valid_targets_mean": 5270.2, "valid_targets_min": 1186 }, { "epoch": 0.4706477732793522, "grad_norm": 0.16323829901889883, "learning_rate": 3.733333333333334e-05, "loss": 0.1337, "loss_nan_ranks": 0, "loss_rank_avg": 0.0435483492910862, "step": 155, "valid_targets_mean": 5555.4, "valid_targets_min": 1181 }, { "epoch": 0.48582995951417, "grad_norm": 0.1692106384315938, "learning_rate": 3.854545454545455e-05, "loss": 0.1324, "loss_nan_ranks": 0, "loss_rank_avg": 0.04682815819978714, "step": 160, "valid_targets_mean": 5212.5, "valid_targets_min": 1057 }, { "epoch": 0.5010121457489879, "grad_norm": 0.3268896604465774, "learning_rate": 3.9757575757575757e-05, "loss": 0.1295, "loss_nan_ranks": 0, "loss_rank_avg": 0.03665677458047867, "step": 165, "valid_targets_mean": 5034.9, "valid_targets_min": 1057 }, { "epoch": 0.5161943319838057, "grad_norm": 0.1782081910166358, "learning_rate": 3.999928391557286e-05, "loss": 0.1334, "loss_nan_ranks": 0, "loss_rank_avg": 0.0440300777554512, "step": 170, "valid_targets_mean": 6430.0, "valid_targets_min": 1643 }, { "epoch": 0.5313765182186235, "grad_norm": 0.14555082752297552, "learning_rate": 3.999637491047052e-05, "loss": 0.1246, "loss_nan_ranks": 0, "loss_rank_avg": 0.03578227758407593, "step": 175, "valid_targets_mean": 5345.4, "valid_targets_min": 1567 }, { "epoch": 0.5465587044534413, "grad_norm": 0.18362203345864717, "learning_rate": 3.999122855464813e-05, "loss": 0.127, "loss_nan_ranks": 0, "loss_rank_avg": 0.03790612518787384, "step": 180, "valid_targets_mean": 4368.3, "valid_targets_min": 984 }, { "epoch": 0.5617408906882592, "grad_norm": 0.23418747992901073, "learning_rate": 3.998384542392021e-05, "loss": 0.1256, "loss_nan_ranks": 0, "loss_rank_avg": 0.04290394484996796, "step": 185, "valid_targets_mean": 5182.0, "valid_targets_min": 820 }, { "epoch": 0.5769230769230769, "grad_norm": 0.20572571033751894, "learning_rate": 3.9974226344369124e-05, "loss": 0.1329, "loss_nan_ranks": 0, "loss_rank_avg": 0.04855868220329285, "step": 190, "valid_targets_mean": 5835.6, "valid_targets_min": 1171 }, { "epoch": 0.5921052631578947, "grad_norm": 0.18498650361194152, "learning_rate": 3.996237239225268e-05, "loss": 0.1138, "loss_nan_ranks": 0, "loss_rank_avg": 0.0366818904876709, "step": 195, "valid_targets_mean": 5377.1, "valid_targets_min": 854 }, { "epoch": 0.6072874493927125, "grad_norm": 0.20044512387262414, "learning_rate": 3.994828489388371e-05, "loss": 0.1151, "loss_nan_ranks": 0, "loss_rank_avg": 0.043819550424814224, "step": 200, "valid_targets_mean": 6249.6, "valid_targets_min": 1321 }, { "epoch": 0.6224696356275303, "grad_norm": 0.1784828455297008, "learning_rate": 3.993196542548162e-05, "loss": 0.1101, "loss_nan_ranks": 0, "loss_rank_avg": 0.04029553383588791, "step": 205, "valid_targets_mean": 6224.9, "valid_targets_min": 467 }, { "epoch": 0.6376518218623481, "grad_norm": 0.20703849729117568, "learning_rate": 3.991341581299609e-05, "loss": 0.1196, "loss_nan_ranks": 0, "loss_rank_avg": 0.0329214408993721, "step": 210, "valid_targets_mean": 4804.6, "valid_targets_min": 1043 }, { "epoch": 0.652834008097166, "grad_norm": 0.18364337011407494, "learning_rate": 3.9892638131902765e-05, "loss": 0.1176, "loss_nan_ranks": 0, "loss_rank_avg": 0.04511767625808716, "step": 215, "valid_targets_mean": 6160.4, "valid_targets_min": 1384 }, { "epoch": 0.6680161943319838, "grad_norm": 0.1558024964622049, "learning_rate": 3.9869634706971e-05, "loss": 0.1198, "loss_nan_ranks": 0, "loss_rank_avg": 0.03782845661044121, "step": 220, "valid_targets_mean": 5498.9, "valid_targets_min": 1544 }, { "epoch": 0.6831983805668016, "grad_norm": 0.20914383472969159, "learning_rate": 3.984440811200379e-05, "loss": 0.1078, "loss_nan_ranks": 0, "loss_rank_avg": 0.03220248222351074, "step": 225, "valid_targets_mean": 5320.8, "valid_targets_min": 1325 }, { "epoch": 0.6983805668016194, "grad_norm": 0.19517043152035096, "learning_rate": 3.981696116954973e-05, "loss": 0.1062, "loss_nan_ranks": 0, "loss_rank_avg": 0.04534193128347397, "step": 230, "valid_targets_mean": 7207.3, "valid_targets_min": 1196 }, { "epoch": 0.7135627530364372, "grad_norm": 0.17619073031217014, "learning_rate": 3.978729695058729e-05, "loss": 0.1046, "loss_nan_ranks": 0, "loss_rank_avg": 0.031153306365013123, "step": 235, "valid_targets_mean": 6348.5, "valid_targets_min": 1173 }, { "epoch": 0.728744939271255, "grad_norm": 0.18271054183558552, "learning_rate": 3.9755418774181146e-05, "loss": 0.1025, "loss_nan_ranks": 0, "loss_rank_avg": 0.028445910662412643, "step": 240, "valid_targets_mean": 4876.5, "valid_targets_min": 412 }, { "epoch": 0.7439271255060729, "grad_norm": 0.16880517618762886, "learning_rate": 3.9721330207110835e-05, "loss": 0.1055, "loss_nan_ranks": 0, "loss_rank_avg": 0.03436177596449852, "step": 245, "valid_targets_mean": 6114.1, "valid_targets_min": 1860 }, { "epoch": 0.7591093117408907, "grad_norm": 0.19132484996627824, "learning_rate": 3.9685035063471675e-05, "loss": 0.1029, "loss_nan_ranks": 0, "loss_rank_avg": 0.03607245534658432, "step": 250, "valid_targets_mean": 6417.8, "valid_targets_min": 701 }, { "epoch": 0.7742914979757085, "grad_norm": 0.15970529463142694, "learning_rate": 3.964653740424804e-05, "loss": 0.0989, "loss_nan_ranks": 0, "loss_rank_avg": 0.03516208380460739, "step": 255, "valid_targets_mean": 5659.3, "valid_targets_min": 325 }, { "epoch": 0.7894736842105263, "grad_norm": 0.18483145865781372, "learning_rate": 3.960584153685895e-05, "loss": 0.0966, "loss_nan_ranks": 0, "loss_rank_avg": 0.02324419468641281, "step": 260, "valid_targets_mean": 4546.3, "valid_targets_min": 393 }, { "epoch": 0.8046558704453441, "grad_norm": 0.17613276390833632, "learning_rate": 3.9562952014676116e-05, "loss": 0.0959, "loss_nan_ranks": 0, "loss_rank_avg": 0.03462664783000946, "step": 265, "valid_targets_mean": 7255.5, "valid_targets_min": 1810 }, { "epoch": 0.819838056680162, "grad_norm": 0.42619852991668516, "learning_rate": 3.9517873636514525e-05, "loss": 0.0898, "loss_nan_ranks": 0, "loss_rank_avg": 0.027356699109077454, "step": 270, "valid_targets_mean": 5930.0, "valid_targets_min": 1600 }, { "epoch": 0.8350202429149798, "grad_norm": 0.21179870429303485, "learning_rate": 3.947061144609546e-05, "loss": 0.091, "loss_nan_ranks": 0, "loss_rank_avg": 0.0356098935008049, "step": 275, "valid_targets_mean": 6128.2, "valid_targets_min": 826 }, { "epoch": 0.8502024291497976, "grad_norm": 0.20519933925513575, "learning_rate": 3.942117073148221e-05, "loss": 0.0864, "loss_nan_ranks": 0, "loss_rank_avg": 0.027698498219251633, "step": 280, "valid_targets_mean": 5906.8, "valid_targets_min": 1226 }, { "epoch": 0.8653846153846154, "grad_norm": 0.20548545562980808, "learning_rate": 3.9369557024488345e-05, "loss": 0.0845, "loss_nan_ranks": 0, "loss_rank_avg": 0.02698471024632454, "step": 285, "valid_targets_mean": 6887.4, "valid_targets_min": 340 }, { "epoch": 0.8805668016194332, "grad_norm": 0.18811028923789463, "learning_rate": 3.931577610005883e-05, "loss": 0.0868, "loss_nan_ranks": 0, "loss_rank_avg": 0.03699128329753876, "step": 290, "valid_targets_mean": 5884.6, "valid_targets_min": 1231 }, { "epoch": 0.895748987854251, "grad_norm": 0.17799496962672587, "learning_rate": 3.925983397562385e-05, "loss": 0.0853, "loss_nan_ranks": 0, "loss_rank_avg": 0.026425950229167938, "step": 295, "valid_targets_mean": 5361.0, "valid_targets_min": 395 }, { "epoch": 0.9109311740890689, "grad_norm": 0.18919585189082683, "learning_rate": 3.920173691042554e-05, "loss": 0.0879, "loss_nan_ranks": 0, "loss_rank_avg": 0.03504861146211624, "step": 300, "valid_targets_mean": 6373.2, "valid_targets_min": 1155 }, { "epoch": 0.9261133603238867, "grad_norm": 0.14742943229085356, "learning_rate": 3.914149140481766e-05, "loss": 0.0764, "loss_nan_ranks": 0, "loss_rank_avg": 0.02615347132086754, "step": 305, "valid_targets_mean": 8020.1, "valid_targets_min": 1064 }, { "epoch": 0.9412955465587044, "grad_norm": 0.17127932498342496, "learning_rate": 3.9079104199538256e-05, "loss": 0.0792, "loss_nan_ranks": 0, "loss_rank_avg": 0.028958020731806755, "step": 310, "valid_targets_mean": 6390.3, "valid_targets_min": 1186 }, { "epoch": 0.9564777327935222, "grad_norm": 0.1611100779233749, "learning_rate": 3.901458227495549e-05, "loss": 0.0775, "loss_nan_ranks": 0, "loss_rank_avg": 0.018862418830394745, "step": 315, "valid_targets_mean": 7854.0, "valid_targets_min": 1626 }, { "epoch": 0.97165991902834, "grad_norm": 0.1840495847276238, "learning_rate": 3.8947932850286585e-05, "loss": 0.0754, "loss_nan_ranks": 0, "loss_rank_avg": 0.02737005054950714, "step": 320, "valid_targets_mean": 5870.0, "valid_targets_min": 1117 }, { "epoch": 0.9868421052631579, "grad_norm": 0.1622918713412052, "learning_rate": 3.887916338279014e-05, "loss": 0.0771, "loss_nan_ranks": 0, "loss_rank_avg": 0.02394448220729828, "step": 325, "valid_targets_mean": 6428.9, "valid_targets_min": 1226 }, { "epoch": 1.0, "grad_norm": 0.32449365450924933, "learning_rate": 3.8808281566931675e-05, "loss": 0.0774, "loss_nan_ranks": 0, "loss_rank_avg": 0.0782683789730072, "step": 330, "valid_targets_mean": 4910.3, "valid_targets_min": 796 }, { "epoch": 1.0151821862348178, "grad_norm": 0.20746893200474992, "learning_rate": 3.873529533352277e-05, "loss": 0.0718, "loss_nan_ranks": 0, "loss_rank_avg": 0.02715224027633667, "step": 335, "valid_targets_mean": 6245.9, "valid_targets_min": 977 }, { "epoch": 1.0303643724696356, "grad_norm": 0.1943595223782352, "learning_rate": 3.8660212848833705e-05, "loss": 0.0586, "loss_nan_ranks": 0, "loss_rank_avg": 0.01882760412991047, "step": 340, "valid_targets_mean": 5001.6, "valid_targets_min": 425 }, { "epoch": 1.0455465587044535, "grad_norm": 0.1742400328237816, "learning_rate": 3.858304251367972e-05, "loss": 0.0606, "loss_nan_ranks": 0, "loss_rank_avg": 0.021374892443418503, "step": 345, "valid_targets_mean": 7840.0, "valid_targets_min": 633 }, { "epoch": 1.0607287449392713, "grad_norm": 0.17556822665242897, "learning_rate": 3.850379296248107e-05, "loss": 0.0631, "loss_nan_ranks": 0, "loss_rank_avg": 0.019903969019651413, "step": 350, "valid_targets_mean": 5103.5, "valid_targets_min": 395 }, { "epoch": 1.075910931174089, "grad_norm": 0.16742225924074075, "learning_rate": 3.8422473062297e-05, "loss": 0.0563, "loss_nan_ranks": 0, "loss_rank_avg": 0.014354637823998928, "step": 355, "valid_targets_mean": 6132.1, "valid_targets_min": 1181 }, { "epoch": 1.091093117408907, "grad_norm": 0.20021656050383596, "learning_rate": 3.8339091911833545e-05, "loss": 0.0616, "loss_nan_ranks": 0, "loss_rank_avg": 0.01479431428015232, "step": 360, "valid_targets_mean": 5134.5, "valid_targets_min": 437 }, { "epoch": 1.1062753036437247, "grad_norm": 0.19054606128340812, "learning_rate": 3.825365884042553e-05, "loss": 0.0553, "loss_nan_ranks": 0, "loss_rank_avg": 0.017302554100751877, "step": 365, "valid_targets_mean": 5901.2, "valid_targets_min": 961 }, { "epoch": 1.1214574898785425, "grad_norm": 0.17630366336918724, "learning_rate": 3.8166183406992745e-05, "loss": 0.0509, "loss_nan_ranks": 0, "loss_rank_avg": 0.017461497336626053, "step": 370, "valid_targets_mean": 5865.8, "valid_targets_min": 1740 }, { "epoch": 1.1366396761133604, "grad_norm": 0.17393703573797326, "learning_rate": 3.807667539897041e-05, "loss": 0.0556, "loss_nan_ranks": 0, "loss_rank_avg": 0.016034454107284546, "step": 375, "valid_targets_mean": 5455.8, "valid_targets_min": 697 }, { "epoch": 1.1518218623481782, "grad_norm": 0.18171916946177608, "learning_rate": 3.798514483121408e-05, "loss": 0.0542, "loss_nan_ranks": 0, "loss_rank_avg": 0.017630070447921753, "step": 380, "valid_targets_mean": 4784.2, "valid_targets_min": 1020 }, { "epoch": 1.167004048582996, "grad_norm": 0.17545029371192425, "learning_rate": 3.789160194487908e-05, "loss": 0.0512, "loss_nan_ranks": 0, "loss_rank_avg": 0.015380222350358963, "step": 385, "valid_targets_mean": 6277.5, "valid_targets_min": 382 }, { "epoch": 1.1821862348178138, "grad_norm": 0.20217196034333956, "learning_rate": 3.7796057206274686e-05, "loss": 0.0508, "loss_nan_ranks": 0, "loss_rank_avg": 0.021449625492095947, "step": 390, "valid_targets_mean": 6695.3, "valid_targets_min": 1158 }, { "epoch": 1.1973684210526316, "grad_norm": 0.18513715753260984, "learning_rate": 3.769852130569304e-05, "loss": 0.0517, "loss_nan_ranks": 0, "loss_rank_avg": 0.020637229084968567, "step": 395, "valid_targets_mean": 5614.1, "valid_targets_min": 1155 }, { "epoch": 1.2125506072874495, "grad_norm": 0.17569296856429573, "learning_rate": 3.7599005156213066e-05, "loss": 0.0455, "loss_nan_ranks": 0, "loss_rank_avg": 0.013816449791193008, "step": 400, "valid_targets_mean": 5578.5, "valid_targets_min": 446 }, { "epoch": 1.2277327935222673, "grad_norm": 0.1819271647947525, "learning_rate": 3.74975198924794e-05, "loss": 0.0483, "loss_nan_ranks": 0, "loss_rank_avg": 0.013525603339076042, "step": 405, "valid_targets_mean": 6242.0, "valid_targets_min": 502 }, { "epoch": 1.242914979757085, "grad_norm": 0.1797887261745102, "learning_rate": 3.739407686945658e-05, "loss": 0.051, "loss_nan_ranks": 0, "loss_rank_avg": 0.011500559747219086, "step": 410, "valid_targets_mean": 4694.7, "valid_targets_min": 1226 }, { "epoch": 1.258097165991903, "grad_norm": 0.1741276215859531, "learning_rate": 3.728868766115854e-05, "loss": 0.046, "loss_nan_ranks": 0, "loss_rank_avg": 0.020068110898137093, "step": 415, "valid_targets_mean": 6145.7, "valid_targets_min": 1516 }, { "epoch": 1.2732793522267207, "grad_norm": 0.18084874852713817, "learning_rate": 3.718136405935365e-05, "loss": 0.0486, "loss_nan_ranks": 0, "loss_rank_avg": 0.01688222959637642, "step": 420, "valid_targets_mean": 5231.5, "valid_targets_min": 1161 }, { "epoch": 1.2884615384615383, "grad_norm": 0.16889751016736837, "learning_rate": 3.707211807224534e-05, "loss": 0.0438, "loss_nan_ranks": 0, "loss_rank_avg": 0.014528230763971806, "step": 425, "valid_targets_mean": 6355.8, "valid_targets_min": 1046 }, { "epoch": 1.3036437246963564, "grad_norm": 0.1903393788443133, "learning_rate": 3.696096192312852e-05, "loss": 0.0454, "loss_nan_ranks": 0, "loss_rank_avg": 0.018855996429920197, "step": 430, "valid_targets_mean": 6559.8, "valid_targets_min": 832 }, { "epoch": 1.318825910931174, "grad_norm": 0.19972898046944965, "learning_rate": 3.684790804902199e-05, "loss": 0.0442, "loss_nan_ranks": 0, "loss_rank_avg": 0.01779274269938469, "step": 435, "valid_targets_mean": 5856.2, "valid_targets_min": 1771 }, { "epoch": 1.334008097165992, "grad_norm": 0.17018811021407995, "learning_rate": 3.673296909927682e-05, "loss": 0.0466, "loss_nan_ranks": 0, "loss_rank_avg": 0.021237604320049286, "step": 440, "valid_targets_mean": 5553.2, "valid_targets_min": 1358 }, { "epoch": 1.3491902834008096, "grad_norm": 0.1951555980157815, "learning_rate": 3.661615793416109e-05, "loss": 0.0485, "loss_nan_ranks": 0, "loss_rank_avg": 0.01593620888888836, "step": 445, "valid_targets_mean": 5629.2, "valid_targets_min": 942 }, { "epoch": 1.3643724696356276, "grad_norm": 0.180373652890474, "learning_rate": 3.649748762342098e-05, "loss": 0.0419, "loss_nan_ranks": 0, "loss_rank_avg": 0.01810559071600437, "step": 450, "valid_targets_mean": 5972.8, "valid_targets_min": 1388 }, { "epoch": 1.3795546558704452, "grad_norm": 0.16479184696255464, "learning_rate": 3.637697144481839e-05, "loss": 0.0424, "loss_nan_ranks": 0, "loss_rank_avg": 0.008586437441408634, "step": 455, "valid_targets_mean": 5653.8, "valid_targets_min": 1347 }, { "epoch": 1.3947368421052633, "grad_norm": 0.16853080157313605, "learning_rate": 3.625462288264536e-05, "loss": 0.0418, "loss_nan_ranks": 0, "loss_rank_avg": 0.01979016326367855, "step": 460, "valid_targets_mean": 7305.0, "valid_targets_min": 1092 }, { "epoch": 1.4099190283400809, "grad_norm": 0.1714769531014035, "learning_rate": 3.613045562621533e-05, "loss": 0.052, "loss_nan_ranks": 0, "loss_rank_avg": 0.02096070721745491, "step": 465, "valid_targets_mean": 6730.9, "valid_targets_min": 1059 }, { "epoch": 1.425101214574899, "grad_norm": 0.17480947019323384, "learning_rate": 3.600448356833146e-05, "loss": 0.0421, "loss_nan_ranks": 0, "loss_rank_avg": 0.013837619684636593, "step": 470, "valid_targets_mean": 7274.6, "valid_targets_min": 1288 }, { "epoch": 1.4402834008097165, "grad_norm": 0.1588859981479368, "learning_rate": 3.587672080373219e-05, "loss": 0.0433, "loss_nan_ranks": 0, "loss_rank_avg": 0.009698700159788132, "step": 475, "valid_targets_mean": 5479.3, "valid_targets_min": 1114 }, { "epoch": 1.4554655870445345, "grad_norm": 0.161639453514387, "learning_rate": 3.574718162751426e-05, "loss": 0.0375, "loss_nan_ranks": 0, "loss_rank_avg": 0.010496283881366253, "step": 480, "valid_targets_mean": 5544.4, "valid_targets_min": 916 }, { "epoch": 1.4706477732793521, "grad_norm": 0.15503382075391162, "learning_rate": 3.561588053353319e-05, "loss": 0.0379, "loss_nan_ranks": 0, "loss_rank_avg": 0.009971126914024353, "step": 485, "valid_targets_mean": 7162.0, "valid_targets_min": 399 }, { "epoch": 1.48582995951417, "grad_norm": 0.1725961466937425, "learning_rate": 3.5482832212781655e-05, "loss": 0.0402, "loss_nan_ranks": 0, "loss_rank_avg": 0.009653078392148018, "step": 490, "valid_targets_mean": 6713.2, "valid_targets_min": 516 }, { "epoch": 1.5010121457489878, "grad_norm": 0.19680739325496185, "learning_rate": 3.53480515517457e-05, "loss": 0.0379, "loss_nan_ranks": 0, "loss_rank_avg": 0.0158684104681015, "step": 495, "valid_targets_mean": 6155.9, "valid_targets_min": 888 }, { "epoch": 1.5161943319838058, "grad_norm": 0.17098248453785886, "learning_rate": 3.5211553630739166e-05, "loss": 0.0358, "loss_nan_ranks": 0, "loss_rank_avg": 0.012387385591864586, "step": 500, "valid_targets_mean": 6459.1, "valid_targets_min": 1212 }, { "epoch": 1.5313765182186234, "grad_norm": 0.17212144323378897, "learning_rate": 3.5073353722216334e-05, "loss": 0.0367, "loss_nan_ranks": 0, "loss_rank_avg": 0.015653623268008232, "step": 505, "valid_targets_mean": 5349.3, "valid_targets_min": 1249 }, { "epoch": 1.5465587044534415, "grad_norm": 0.15613275126727846, "learning_rate": 3.4933467289063156e-05, "loss": 0.0334, "loss_nan_ranks": 0, "loss_rank_avg": 0.010056696832180023, "step": 510, "valid_targets_mean": 5716.3, "valid_targets_min": 1089 }, { "epoch": 1.561740890688259, "grad_norm": 0.17516245854055706, "learning_rate": 3.4791909982867175e-05, "loss": 0.0339, "loss_nan_ranks": 0, "loss_rank_avg": 0.01214912161231041, "step": 515, "valid_targets_mean": 5831.1, "valid_targets_min": 561 }, { "epoch": 1.5769230769230769, "grad_norm": 0.19105559378665027, "learning_rate": 3.464869764216622e-05, "loss": 0.0327, "loss_nan_ranks": 0, "loss_rank_avg": 0.012154040858149529, "step": 520, "valid_targets_mean": 5496.7, "valid_targets_min": 1195 }, { "epoch": 1.5921052631578947, "grad_norm": 0.17250800846906586, "learning_rate": 3.450384629067635e-05, "loss": 0.031, "loss_nan_ranks": 0, "loss_rank_avg": 0.008524593897163868, "step": 525, "valid_targets_mean": 5553.9, "valid_targets_min": 605 }, { "epoch": 1.6072874493927125, "grad_norm": 0.1717299949148825, "learning_rate": 3.435737213549896e-05, "loss": 0.0415, "loss_nan_ranks": 0, "loss_rank_avg": 0.031826041638851166, "step": 530, "valid_targets_mean": 5484.1, "valid_targets_min": 577 }, { "epoch": 1.6224696356275303, "grad_norm": 0.179799500085348, "learning_rate": 3.420929156530738e-05, "loss": 0.0319, "loss_nan_ranks": 0, "loss_rank_avg": 0.009917274117469788, "step": 535, "valid_targets_mean": 7064.2, "valid_targets_min": 923 }, { "epoch": 1.6376518218623481, "grad_norm": 0.15871774599737085, "learning_rate": 3.405962114851324e-05, "loss": 0.035, "loss_nan_ranks": 0, "loss_rank_avg": 0.010038254782557487, "step": 540, "valid_targets_mean": 7041.0, "valid_targets_min": 436 }, { "epoch": 1.652834008097166, "grad_norm": 0.1794875978731234, "learning_rate": 3.390837763141261e-05, "loss": 0.0342, "loss_nan_ranks": 0, "loss_rank_avg": 0.00696304626762867, "step": 545, "valid_targets_mean": 6311.3, "valid_targets_min": 1669 }, { "epoch": 1.6680161943319838, "grad_norm": 0.1550391073555312, "learning_rate": 3.3755577936312344e-05, "loss": 0.0346, "loss_nan_ranks": 0, "loss_rank_avg": 0.00631769048050046, "step": 550, "valid_targets_mean": 4612.6, "valid_targets_min": 448 }, { "epoch": 1.6831983805668016, "grad_norm": 0.16850242447990393, "learning_rate": 3.360123915963662e-05, "loss": 0.035, "loss_nan_ranks": 0, "loss_rank_avg": 0.00742865027859807, "step": 555, "valid_targets_mean": 6497.4, "valid_targets_min": 1414 }, { "epoch": 1.6983805668016194, "grad_norm": 0.16905370140192488, "learning_rate": 3.3445378570014125e-05, "loss": 0.0314, "loss_nan_ranks": 0, "loss_rank_avg": 0.011094050481915474, "step": 560, "valid_targets_mean": 5412.4, "valid_targets_min": 1495 }, { "epoch": 1.7135627530364372, "grad_norm": 0.15956039481269715, "learning_rate": 3.328801360634585e-05, "loss": 0.0334, "loss_nan_ranks": 0, "loss_rank_avg": 0.005547087173908949, "step": 565, "valid_targets_mean": 4378.1, "valid_targets_min": 1302 }, { "epoch": 1.728744939271255, "grad_norm": 0.16589513640263132, "learning_rate": 3.312916187585392e-05, "loss": 0.0328, "loss_nan_ranks": 0, "loss_rank_avg": 0.010751340538263321, "step": 570, "valid_targets_mean": 5506.6, "valid_targets_min": 1233 }, { "epoch": 1.7439271255060729, "grad_norm": 0.1744055632321872, "learning_rate": 3.296884115211157e-05, "loss": 0.0299, "loss_nan_ranks": 0, "loss_rank_avg": 0.006707729306071997, "step": 575, "valid_targets_mean": 5298.9, "valid_targets_min": 1420 }, { "epoch": 1.7591093117408907, "grad_norm": 0.17696528943812867, "learning_rate": 3.280706937305445e-05, "loss": 0.0265, "loss_nan_ranks": 0, "loss_rank_avg": 0.009326794184744358, "step": 580, "valid_targets_mean": 5770.2, "valid_targets_min": 1414 }, { "epoch": 1.7742914979757085, "grad_norm": 0.15958457014375885, "learning_rate": 3.2643864638973645e-05, "loss": 0.0371, "loss_nan_ranks": 0, "loss_rank_avg": 0.0119598638266325, "step": 585, "valid_targets_mean": 6624.9, "valid_targets_min": 1202 }, { "epoch": 1.7894736842105263, "grad_norm": 0.1769988360539718, "learning_rate": 3.2479245210490434e-05, "loss": 0.0281, "loss_nan_ranks": 0, "loss_rank_avg": 0.011356104165315628, "step": 590, "valid_targets_mean": 6446.4, "valid_targets_min": 1407 }, { "epoch": 1.8046558704453441, "grad_norm": 0.16194592137927077, "learning_rate": 3.2313229506513167e-05, "loss": 0.0285, "loss_nan_ranks": 0, "loss_rank_avg": 0.00683578522875905, "step": 595, "valid_targets_mean": 5801.3, "valid_targets_min": 1246 }, { "epoch": 1.819838056680162, "grad_norm": 0.17822073447265535, "learning_rate": 3.2145836102176424e-05, "loss": 0.0279, "loss_nan_ranks": 0, "loss_rank_avg": 0.007592367008328438, "step": 600, "valid_targets_mean": 6117.9, "valid_targets_min": 653 }, { "epoch": 1.8350202429149798, "grad_norm": 0.17577937924822495, "learning_rate": 3.197708372676265e-05, "loss": 0.0307, "loss_nan_ranks": 0, "loss_rank_avg": 0.013171358034014702, "step": 605, "valid_targets_mean": 6056.2, "valid_targets_min": 1345 }, { "epoch": 1.8502024291497976, "grad_norm": 0.189351919140906, "learning_rate": 3.1806991261606604e-05, "loss": 0.0291, "loss_nan_ranks": 0, "loss_rank_avg": 0.008558647707104683, "step": 610, "valid_targets_mean": 6072.7, "valid_targets_min": 395 }, { "epoch": 1.8653846153846154, "grad_norm": 0.1765130899494387, "learning_rate": 3.163557773798276e-05, "loss": 0.0304, "loss_nan_ranks": 0, "loss_rank_avg": 0.01081151608377695, "step": 615, "valid_targets_mean": 6111.1, "valid_targets_min": 1064 }, { "epoch": 1.8805668016194332, "grad_norm": 0.1887454294616824, "learning_rate": 3.146286233497593e-05, "loss": 0.0276, "loss_nan_ranks": 0, "loss_rank_avg": 0.009745375253260136, "step": 620, "valid_targets_mean": 4363.2, "valid_targets_min": 387 }, { "epoch": 1.895748987854251, "grad_norm": 0.17589978066244452, "learning_rate": 3.128886437733539e-05, "loss": 0.0249, "loss_nan_ranks": 0, "loss_rank_avg": 0.007151344791054726, "step": 625, "valid_targets_mean": 5897.3, "valid_targets_min": 1996 }, { "epoch": 1.9109311740890689, "grad_norm": 0.16909428003654398, "learning_rate": 3.111360333331263e-05, "loss": 0.0237, "loss_nan_ranks": 0, "loss_rank_avg": 0.0048427945002913475, "step": 630, "valid_targets_mean": 5007.2, "valid_targets_min": 1116 }, { "epoch": 1.9261133603238867, "grad_norm": 0.16402341969524917, "learning_rate": 3.093709881248312e-05, "loss": 0.0261, "loss_nan_ranks": 0, "loss_rank_avg": 0.010756228119134903, "step": 635, "valid_targets_mean": 6274.2, "valid_targets_min": 340 }, { "epoch": 1.9412955465587043, "grad_norm": 0.16227058756454754, "learning_rate": 3.075937056355225e-05, "loss": 0.0236, "loss_nan_ranks": 0, "loss_rank_avg": 0.011032762937247753, "step": 640, "valid_targets_mean": 5846.4, "valid_targets_min": 355 }, { "epoch": 1.9564777327935223, "grad_norm": 0.17153068653715034, "learning_rate": 3.0580438472145665e-05, "loss": 0.0217, "loss_nan_ranks": 0, "loss_rank_avg": 0.006100079044699669, "step": 645, "valid_targets_mean": 5265.3, "valid_targets_min": 692 }, { "epoch": 1.97165991902834, "grad_norm": 0.15694835838400872, "learning_rate": 3.0400322558584308e-05, "loss": 0.0254, "loss_nan_ranks": 0, "loss_rank_avg": 0.004396181087940931, "step": 650, "valid_targets_mean": 5833.3, "valid_targets_min": 921 }, { "epoch": 1.986842105263158, "grad_norm": 0.15980764582016754, "learning_rate": 3.0219042975644415e-05, "loss": 0.0226, "loss_nan_ranks": 0, "loss_rank_avg": 0.006331868004053831, "step": 655, "valid_targets_mean": 6124.8, "valid_targets_min": 884 }, { "epoch": 2.0, "grad_norm": 0.27431045014629224, "learning_rate": 3.0036620006302624e-05, "loss": 0.0216, "loss_nan_ranks": 0, "loss_rank_avg": 0.02224479243159294, "step": 660, "valid_targets_mean": 5601.6, "valid_targets_min": 1076 }, { "epoch": 2.0151821862348176, "grad_norm": 0.18008079720274842, "learning_rate": 2.9853074061466602e-05, "loss": 0.0148, "loss_nan_ranks": 0, "loss_rank_avg": 0.003647482953965664, "step": 665, "valid_targets_mean": 4510.8, "valid_targets_min": 372 }, { "epoch": 2.0303643724696356, "grad_norm": 0.16950595950561143, "learning_rate": 2.9668425677691278e-05, "loss": 0.0186, "loss_nan_ranks": 0, "loss_rank_avg": 0.007231104653328657, "step": 670, "valid_targets_mean": 6048.3, "valid_targets_min": 1468 }, { "epoch": 2.0455465587044532, "grad_norm": 0.16358525483049902, "learning_rate": 2.948269551488108e-05, "loss": 0.0183, "loss_nan_ranks": 0, "loss_rank_avg": 0.004514344036579132, "step": 675, "valid_targets_mean": 5153.2, "valid_targets_min": 1176 }, { "epoch": 2.0607287449392713, "grad_norm": 0.1593999835366038, "learning_rate": 2.929590435397832e-05, "loss": 0.0128, "loss_nan_ranks": 0, "loss_rank_avg": 0.003227936802431941, "step": 680, "valid_targets_mean": 4672.3, "valid_targets_min": 1158 }, { "epoch": 2.075910931174089, "grad_norm": 0.1380695703151424, "learning_rate": 2.9108073094638066e-05, "loss": 0.0124, "loss_nan_ranks": 0, "loss_rank_avg": 0.007050475105643272, "step": 685, "valid_targets_mean": 6611.9, "valid_targets_min": 416 }, { "epoch": 2.091093117408907, "grad_norm": 0.15120020386940933, "learning_rate": 2.8919222752889727e-05, "loss": 0.0155, "loss_nan_ranks": 0, "loss_rank_avg": 0.00420386390760541, "step": 690, "valid_targets_mean": 5157.1, "valid_targets_min": 1092 }, { "epoch": 2.1062753036437245, "grad_norm": 0.142832386456704, "learning_rate": 2.8729374458785647e-05, "loss": 0.0212, "loss_nan_ranks": 0, "loss_rank_avg": 0.0038718217983841896, "step": 695, "valid_targets_mean": 6239.8, "valid_targets_min": 947 }, { "epoch": 2.1214574898785425, "grad_norm": 0.1377475247478908, "learning_rate": 2.8538549454036838e-05, "loss": 0.0146, "loss_nan_ranks": 0, "loss_rank_avg": 0.005288434214890003, "step": 700, "valid_targets_mean": 6355.2, "valid_targets_min": 1231 }, { "epoch": 2.13663967611336, "grad_norm": 0.15104543128314743, "learning_rate": 2.834676908963636e-05, "loss": 0.0131, "loss_nan_ranks": 0, "loss_rank_avg": 0.0031184745021164417, "step": 705, "valid_targets_mean": 4491.0, "valid_targets_min": 1525 }, { "epoch": 2.151821862348178, "grad_norm": 0.14038989161016746, "learning_rate": 2.815405482347037e-05, "loss": 0.0131, "loss_nan_ranks": 0, "loss_rank_avg": 0.008421801030635834, "step": 710, "valid_targets_mean": 6889.0, "valid_targets_min": 2031 }, { "epoch": 2.167004048582996, "grad_norm": 0.15798640698682528, "learning_rate": 2.796042821791725e-05, "loss": 0.0131, "loss_nan_ranks": 0, "loss_rank_avg": 0.005701111629605293, "step": 715, "valid_targets_mean": 8192.0, "valid_targets_min": 1439 }, { "epoch": 2.182186234817814, "grad_norm": 0.13009340565031757, "learning_rate": 2.776591093743505e-05, "loss": 0.0192, "loss_nan_ranks": 0, "loss_rank_avg": 0.002326598856598139, "step": 720, "valid_targets_mean": 6049.4, "valid_targets_min": 412 }, { "epoch": 2.1973684210526314, "grad_norm": 0.14208658643553557, "learning_rate": 2.7570524746137485e-05, "loss": 0.0114, "loss_nan_ranks": 0, "loss_rank_avg": 0.0028066979721188545, "step": 725, "valid_targets_mean": 4216.1, "valid_targets_min": 983 }, { "epoch": 2.2125506072874495, "grad_norm": 0.14003401479048846, "learning_rate": 2.7374291505358818e-05, "loss": 0.017, "loss_nan_ranks": 0, "loss_rank_avg": 0.0040283650159835815, "step": 730, "valid_targets_mean": 6767.3, "valid_targets_min": 1798 }, { "epoch": 2.227732793522267, "grad_norm": 0.1404844207738646, "learning_rate": 2.7177233171207817e-05, "loss": 0.0129, "loss_nan_ranks": 0, "loss_rank_avg": 0.004482706543058157, "step": 735, "valid_targets_mean": 5788.1, "valid_targets_min": 1172 }, { "epoch": 2.242914979757085, "grad_norm": 0.14543072160206105, "learning_rate": 2.6979371792111147e-05, "loss": 0.013, "loss_nan_ranks": 0, "loss_rank_avg": 0.0030938778072595596, "step": 740, "valid_targets_mean": 5783.0, "valid_targets_min": 998 }, { "epoch": 2.2580971659919027, "grad_norm": 0.15031997200997996, "learning_rate": 2.678072950634641e-05, "loss": 0.0155, "loss_nan_ranks": 0, "loss_rank_avg": 0.004818389657884836, "step": 745, "valid_targets_mean": 5858.6, "valid_targets_min": 653 }, { "epoch": 2.2732793522267207, "grad_norm": 0.1474155672088557, "learning_rate": 2.6581328539565184e-05, "loss": 0.0105, "loss_nan_ranks": 0, "loss_rank_avg": 0.002948472509160638, "step": 750, "valid_targets_mean": 5001.7, "valid_targets_min": 410 }, { "epoch": 2.2884615384615383, "grad_norm": 0.15651905545877035, "learning_rate": 2.638119120230616e-05, "loss": 0.0134, "loss_nan_ranks": 0, "loss_rank_avg": 0.003472162876278162, "step": 755, "valid_targets_mean": 4897.8, "valid_targets_min": 487 }, { "epoch": 2.3036437246963564, "grad_norm": 0.1489685695433526, "learning_rate": 2.618033988749895e-05, "loss": 0.013, "loss_nan_ranks": 0, "loss_rank_avg": 0.005241721868515015, "step": 760, "valid_targets_mean": 5184.1, "valid_targets_min": 1225 }, { "epoch": 2.318825910931174, "grad_norm": 0.14394874153264994, "learning_rate": 2.5978797067958542e-05, "loss": 0.0112, "loss_nan_ranks": 0, "loss_rank_avg": 0.005601848475635052, "step": 765, "valid_targets_mean": 7270.8, "valid_targets_min": 1516 }, { "epoch": 2.334008097165992, "grad_norm": 0.14641494561745882, "learning_rate": 2.5776585293870877e-05, "loss": 0.0107, "loss_nan_ranks": 0, "loss_rank_avg": 0.003249663859605789, "step": 770, "valid_targets_mean": 6307.8, "valid_targets_min": 492 }, { "epoch": 2.3491902834008096, "grad_norm": 0.13606809371049533, "learning_rate": 2.557372719026976e-05, "loss": 0.0127, "loss_nan_ranks": 0, "loss_rank_avg": 0.001904345117509365, "step": 775, "valid_targets_mean": 5415.7, "valid_targets_min": 1660 }, { "epoch": 2.3643724696356276, "grad_norm": 0.14688794912709033, "learning_rate": 2.537024545450539e-05, "loss": 0.0133, "loss_nan_ranks": 0, "loss_rank_avg": 0.006051709875464439, "step": 780, "valid_targets_mean": 6052.3, "valid_targets_min": 1193 }, { "epoch": 2.3795546558704452, "grad_norm": 0.1410564249211947, "learning_rate": 2.5166162853704825e-05, "loss": 0.0133, "loss_nan_ranks": 0, "loss_rank_avg": 0.007399308495223522, "step": 785, "valid_targets_mean": 6521.0, "valid_targets_min": 1629 }, { "epoch": 2.3947368421052633, "grad_norm": 0.13759684286966475, "learning_rate": 2.496150222222458e-05, "loss": 0.0119, "loss_nan_ranks": 0, "loss_rank_avg": 0.0028168156277388334, "step": 790, "valid_targets_mean": 5283.4, "valid_targets_min": 1152 }, { "epoch": 2.409919028340081, "grad_norm": 0.146148305747386, "learning_rate": 2.475628645909576e-05, "loss": 0.0141, "loss_nan_ranks": 0, "loss_rank_avg": 0.00658248458057642, "step": 795, "valid_targets_mean": 6796.8, "valid_targets_min": 1208 }, { "epoch": 2.425101214574899, "grad_norm": 0.1276669258915863, "learning_rate": 2.4550538525461963e-05, "loss": 0.0123, "loss_nan_ranks": 0, "loss_rank_avg": 0.005004022270441055, "step": 800, "valid_targets_mean": 5959.2, "valid_targets_min": 1689 }, { "epoch": 2.4402834008097165, "grad_norm": 0.15639817230583197, "learning_rate": 2.434428144201016e-05, "loss": 0.012, "loss_nan_ranks": 0, "loss_rank_avg": 0.0051465933211147785, "step": 805, "valid_targets_mean": 5432.3, "valid_targets_min": 1064 }, { "epoch": 2.4554655870445345, "grad_norm": 0.12723540480056106, "learning_rate": 2.4137538286394976e-05, "loss": 0.0118, "loss_nan_ranks": 0, "loss_rank_avg": 0.0057717300951480865, "step": 810, "valid_targets_mean": 8181.6, "valid_targets_min": 1615 }, { "epoch": 2.470647773279352, "grad_norm": 0.11404355239410903, "learning_rate": 2.3930332190656604e-05, "loss": 0.0129, "loss_nan_ranks": 0, "loss_rank_avg": 0.012553590349853039, "step": 815, "valid_targets_mean": 8294.0, "valid_targets_min": 2134 }, { "epoch": 2.48582995951417, "grad_norm": 0.12081352714937799, "learning_rate": 2.3722686338632602e-05, "loss": 0.012, "loss_nan_ranks": 0, "loss_rank_avg": 0.0045616645365953445, "step": 820, "valid_targets_mean": 6857.9, "valid_targets_min": 672 }, { "epoch": 2.501012145748988, "grad_norm": 0.12829629760213093, "learning_rate": 2.3514623963363886e-05, "loss": 0.0138, "loss_nan_ranks": 0, "loss_rank_avg": 0.0032292886171489954, "step": 825, "valid_targets_mean": 6975.8, "valid_targets_min": 1095 }, { "epoch": 2.516194331983806, "grad_norm": 0.13744925270446964, "learning_rate": 2.330616834449525e-05, "loss": 0.0123, "loss_nan_ranks": 0, "loss_rank_avg": 0.0020485175773501396, "step": 830, "valid_targets_mean": 5187.1, "valid_targets_min": 916 }, { "epoch": 2.5313765182186234, "grad_norm": 0.12416127230021508, "learning_rate": 2.309734280567065e-05, "loss": 0.0114, "loss_nan_ranks": 0, "loss_rank_avg": 0.0024628550745546818, "step": 835, "valid_targets_mean": 5624.4, "valid_targets_min": 412 }, { "epoch": 2.5465587044534415, "grad_norm": 0.12170092359902011, "learning_rate": 2.28881707119236e-05, "loss": 0.0099, "loss_nan_ranks": 0, "loss_rank_avg": 0.0037577205803245306, "step": 840, "valid_targets_mean": 6274.0, "valid_targets_min": 846 }, { "epoch": 2.561740890688259, "grad_norm": 0.1263438950688655, "learning_rate": 2.267867546706287e-05, "loss": 0.0114, "loss_nan_ranks": 0, "loss_rank_avg": 0.004231428727507591, "step": 845, "valid_targets_mean": 6037.4, "valid_targets_min": 1107 }, { "epoch": 2.5769230769230766, "grad_norm": 0.1245576799383021, "learning_rate": 2.2468880511053896e-05, "loss": 0.0087, "loss_nan_ranks": 0, "loss_rank_avg": 0.0017101940466091037, "step": 850, "valid_targets_mean": 4867.1, "valid_targets_min": 1523 }, { "epoch": 2.5921052631578947, "grad_norm": 0.12519745938970292, "learning_rate": 2.2258809317396163e-05, "loss": 0.0115, "loss_nan_ranks": 0, "loss_rank_avg": 0.005295773968100548, "step": 855, "valid_targets_mean": 7008.8, "valid_targets_min": 416 }, { "epoch": 2.6072874493927127, "grad_norm": 0.14296631760190703, "learning_rate": 2.2048485390496757e-05, "loss": 0.0126, "loss_nan_ranks": 0, "loss_rank_avg": 0.004541776143014431, "step": 860, "valid_targets_mean": 5919.8, "valid_targets_min": 410 }, { "epoch": 2.6224696356275303, "grad_norm": 0.13592520238173653, "learning_rate": 2.1837932263040553e-05, "loss": 0.009, "loss_nan_ranks": 0, "loss_rank_avg": 0.0023296151775866747, "step": 865, "valid_targets_mean": 4947.3, "valid_targets_min": 1150 }, { "epoch": 2.637651821862348, "grad_norm": 0.1183802153348962, "learning_rate": 2.1627173493357167e-05, "loss": 0.0089, "loss_nan_ranks": 0, "loss_rank_avg": 0.001233824877999723, "step": 870, "valid_targets_mean": 7681.5, "valid_targets_min": 1849 }, { "epoch": 2.652834008097166, "grad_norm": 0.146011801149195, "learning_rate": 2.1416232662785084e-05, "loss": 0.0093, "loss_nan_ranks": 0, "loss_rank_avg": 0.002828218974173069, "step": 875, "valid_targets_mean": 5370.2, "valid_targets_min": 1319 }, { "epoch": 2.668016194331984, "grad_norm": 0.12616997162436008, "learning_rate": 2.1205133373033173e-05, "loss": 0.0087, "loss_nan_ranks": 0, "loss_rank_avg": 0.0021544168703258038, "step": 880, "valid_targets_mean": 5350.7, "valid_targets_min": 1400 }, { "epoch": 2.6831983805668016, "grad_norm": 0.12828277280789904, "learning_rate": 2.0993899243539953e-05, "loss": 0.0095, "loss_nan_ranks": 0, "loss_rank_avg": 0.0030619329772889614, "step": 885, "valid_targets_mean": 5140.3, "valid_targets_min": 1205 }, { "epoch": 2.698380566801619, "grad_norm": 0.12448517529183224, "learning_rate": 2.0782553908830887e-05, "loss": 0.0101, "loss_nan_ranks": 0, "loss_rank_avg": 0.00264772679656744, "step": 890, "valid_targets_mean": 7220.8, "valid_targets_min": 726 }, { "epoch": 2.7135627530364372, "grad_norm": 0.13018465983618735, "learning_rate": 2.0571121015873924e-05, "loss": 0.0096, "loss_nan_ranks": 0, "loss_rank_avg": 0.001603921758942306, "step": 895, "valid_targets_mean": 5471.2, "valid_targets_min": 1121 }, { "epoch": 2.7287449392712553, "grad_norm": 0.12403998240327094, "learning_rate": 2.0359624221433728e-05, "loss": 0.0087, "loss_nan_ranks": 0, "loss_rank_avg": 0.004858669824898243, "step": 900, "valid_targets_mean": 7486.9, "valid_targets_min": 1486 }, { "epoch": 2.743927125506073, "grad_norm": 0.12878060438832242, "learning_rate": 2.014808718942476e-05, "loss": 0.0097, "loss_nan_ranks": 0, "loss_rank_avg": 0.0026276602875441313, "step": 905, "valid_targets_mean": 5077.5, "valid_targets_min": 1271 }, { "epoch": 2.7591093117408905, "grad_norm": 0.1318362046529001, "learning_rate": 1.9936533588263557e-05, "loss": 0.0076, "loss_nan_ranks": 0, "loss_rank_avg": 0.0019232281483709812, "step": 910, "valid_targets_mean": 4782.8, "valid_targets_min": 1046 }, { "epoch": 2.7742914979757085, "grad_norm": 0.13400217889661628, "learning_rate": 1.9724987088220565e-05, "loss": 0.0088, "loss_nan_ranks": 0, "loss_rank_avg": 0.0034655483905225992, "step": 915, "valid_targets_mean": 5774.3, "valid_targets_min": 1068 }, { "epoch": 2.7894736842105265, "grad_norm": 0.14052177532469134, "learning_rate": 1.951347135877169e-05, "loss": 0.0134, "loss_nan_ranks": 0, "loss_rank_avg": 0.0036318274214863777, "step": 920, "valid_targets_mean": 5540.0, "valid_targets_min": 771 }, { "epoch": 2.804655870445344, "grad_norm": 0.12406832197918945, "learning_rate": 1.930201006594999e-05, "loss": 0.008, "loss_nan_ranks": 0, "loss_rank_avg": 0.00445646233856678, "step": 925, "valid_targets_mean": 4831.8, "valid_targets_min": 446 }, { "epoch": 2.8198380566801617, "grad_norm": 0.12308808187111273, "learning_rate": 1.9090626869697714e-05, "loss": 0.0086, "loss_nan_ranks": 0, "loss_rank_avg": 0.002711558947339654, "step": 930, "valid_targets_mean": 5135.3, "valid_targets_min": 1158 }, { "epoch": 2.83502024291498, "grad_norm": 0.12076313343088467, "learning_rate": 1.8879345421219063e-05, "loss": 0.0068, "loss_nan_ranks": 0, "loss_rank_avg": 0.002742330078035593, "step": 935, "valid_targets_mean": 5565.3, "valid_targets_min": 497 }, { "epoch": 2.850202429149798, "grad_norm": 0.11363956368138457, "learning_rate": 1.8668189360333923e-05, "loss": 0.0082, "loss_nan_ranks": 0, "loss_rank_avg": 0.0011241225292906165, "step": 940, "valid_targets_mean": 4582.2, "valid_targets_min": 502 }, { "epoch": 2.8653846153846154, "grad_norm": 0.1408683722620157, "learning_rate": 1.845718231283281e-05, "loss": 0.0116, "loss_nan_ranks": 0, "loss_rank_avg": 0.0036686803214251995, "step": 945, "valid_targets_mean": 5548.2, "valid_targets_min": 1495 }, { "epoch": 2.880566801619433, "grad_norm": 0.11406554429413167, "learning_rate": 1.8246347887833457e-05, "loss": 0.0061, "loss_nan_ranks": 0, "loss_rank_avg": 0.0016177756479009986, "step": 950, "valid_targets_mean": 5573.5, "valid_targets_min": 1325 }, { "epoch": 2.895748987854251, "grad_norm": 0.10960387099674025, "learning_rate": 1.8035709675139258e-05, "loss": 0.0071, "loss_nan_ranks": 0, "loss_rank_avg": 0.0030507217161357403, "step": 955, "valid_targets_mean": 5650.0, "valid_targets_min": 1500 }, { "epoch": 2.910931174089069, "grad_norm": 0.1313062947338427, "learning_rate": 1.7825291242599837e-05, "loss": 0.0102, "loss_nan_ranks": 0, "loss_rank_avg": 0.003934426233172417, "step": 960, "valid_targets_mean": 6043.7, "valid_targets_min": 1059 }, { "epoch": 2.9261133603238867, "grad_norm": 0.11005279182882197, "learning_rate": 1.7615116133474084e-05, "loss": 0.0078, "loss_nan_ranks": 0, "loss_rank_avg": 0.0014721793122589588, "step": 965, "valid_targets_mean": 6052.4, "valid_targets_min": 909 }, { "epoch": 2.9412955465587043, "grad_norm": 0.11020025200867281, "learning_rate": 1.7405207863795966e-05, "loss": 0.0058, "loss_nan_ranks": 0, "loss_rank_avg": 0.0014757635071873665, "step": 970, "valid_targets_mean": 4909.2, "valid_targets_min": 1062 }, { "epoch": 2.9564777327935223, "grad_norm": 0.10805582468251514, "learning_rate": 1.719558991974339e-05, "loss": 0.0099, "loss_nan_ranks": 0, "loss_rank_avg": 0.002586258575320244, "step": 975, "valid_targets_mean": 6932.2, "valid_targets_min": 1186 }, { "epoch": 2.97165991902834, "grad_norm": 0.1265774000941838, "learning_rate": 1.698628575501034e-05, "loss": 0.008, "loss_nan_ranks": 0, "loss_rank_avg": 0.0020268792286515236, "step": 980, "valid_targets_mean": 5083.2, "valid_targets_min": 961 }, { "epoch": 2.986842105263158, "grad_norm": 0.1111504130944611, "learning_rate": 1.6777318788182723e-05, "loss": 0.0067, "loss_nan_ranks": 0, "loss_rank_avg": 0.006772821769118309, "step": 985, "valid_targets_mean": 6128.4, "valid_targets_min": 1046 }, { "epoch": 3.0, "grad_norm": 0.167104684966192, "learning_rate": 1.6568712400118102e-05, "loss": 0.0058, "loss_nan_ranks": 0, "loss_rank_avg": 0.002753224689513445, "step": 990, "valid_targets_mean": 5531.0, "valid_targets_min": 733 }, { "epoch": 3.0151821862348176, "grad_norm": 0.09553905678445854, "learning_rate": 1.636048993132969e-05, "loss": 0.0073, "loss_nan_ranks": 0, "loss_rank_avg": 0.000724322278983891, "step": 995, "valid_targets_mean": 5769.0, "valid_targets_min": 2204 }, { "epoch": 3.0303643724696356, "grad_norm": 0.10240447291611517, "learning_rate": 1.615267467937479e-05, "loss": 0.0046, "loss_nan_ranks": 0, "loss_rank_avg": 0.0008562590228393674, "step": 1000, "valid_targets_mean": 6404.2, "valid_targets_min": 906 }, { "epoch": 3.0455465587044532, "grad_norm": 0.09468266846983169, "learning_rate": 1.59452898962481e-05, "loss": 0.0062, "loss_nan_ranks": 0, "loss_rank_avg": 0.001599764684215188, "step": 1005, "valid_targets_mean": 5183.8, "valid_targets_min": 1185 }, { "epoch": 3.0607287449392713, "grad_norm": 0.09739535591833388, "learning_rate": 1.573835878578013e-05, "loss": 0.0033, "loss_nan_ranks": 0, "loss_rank_avg": 0.000953693815972656, "step": 1010, "valid_targets_mean": 6161.1, "valid_targets_min": 961 }, { "epoch": 3.075910931174089, "grad_norm": 0.11180915656594645, "learning_rate": 1.5531904501040917e-05, "loss": 0.0037, "loss_nan_ranks": 0, "loss_rank_avg": 0.0008014809573069215, "step": 1015, "valid_targets_mean": 5151.8, "valid_targets_min": 993 }, { "epoch": 3.091093117408907, "grad_norm": 0.09727687969118004, "learning_rate": 1.5325950141749522e-05, "loss": 0.0042, "loss_nan_ranks": 0, "loss_rank_avg": 0.001928561832755804, "step": 1020, "valid_targets_mean": 5867.9, "valid_targets_min": 1222 }, { "epoch": 3.1062753036437245, "grad_norm": 0.09494041364817436, "learning_rate": 1.5120518751689438e-05, "loss": 0.0049, "loss_nan_ranks": 0, "loss_rank_avg": 0.0030567715875804424, "step": 1025, "valid_targets_mean": 5811.8, "valid_targets_min": 1531 }, { "epoch": 3.1214574898785425, "grad_norm": 0.11261942531123377, "learning_rate": 1.4915633316130267e-05, "loss": 0.0056, "loss_nan_ranks": 0, "loss_rank_avg": 0.011703863739967346, "step": 1030, "valid_targets_mean": 6078.9, "valid_targets_min": 961 }, { "epoch": 3.13663967611336, "grad_norm": 0.1005072175870218, "learning_rate": 1.4711316759255963e-05, "loss": 0.0036, "loss_nan_ranks": 0, "loss_rank_avg": 0.0007407825905829668, "step": 1035, "valid_targets_mean": 5989.4, "valid_targets_min": 372 }, { "epoch": 3.151821862348178, "grad_norm": 0.0897070631791768, "learning_rate": 1.450759194159987e-05, "loss": 0.0038, "loss_nan_ranks": 0, "loss_rank_avg": 0.001739290077239275, "step": 1040, "valid_targets_mean": 5473.2, "valid_targets_min": 527 }, { "epoch": 3.167004048582996, "grad_norm": 0.09128564039769516, "learning_rate": 1.4304481657486955e-05, "loss": 0.0036, "loss_nan_ranks": 0, "loss_rank_avg": 0.0015778269153088331, "step": 1045, "valid_targets_mean": 5053.5, "valid_targets_min": 916 }, { "epoch": 3.182186234817814, "grad_norm": 0.10717444490017926, "learning_rate": 1.4102008632483344e-05, "loss": 0.0035, "loss_nan_ranks": 0, "loss_rank_avg": 0.0007246138993650675, "step": 1050, "valid_targets_mean": 4916.7, "valid_targets_min": 692 }, { "epoch": 3.1973684210526314, "grad_norm": 0.10375123766311543, "learning_rate": 1.3900195520853628e-05, "loss": 0.0035, "loss_nan_ranks": 0, "loss_rank_avg": 0.0013751237420365214, "step": 1055, "valid_targets_mean": 6373.6, "valid_targets_min": 1948 }, { "epoch": 3.2125506072874495, "grad_norm": 0.09149527268094115, "learning_rate": 1.3699064903026149e-05, "loss": 0.0069, "loss_nan_ranks": 0, "loss_rank_avg": 0.0019966168329119682, "step": 1060, "valid_targets_mean": 6287.4, "valid_targets_min": 1089 }, { "epoch": 3.227732793522267, "grad_norm": 0.11863359183645682, "learning_rate": 1.34986392830665e-05, "loss": 0.0071, "loss_nan_ranks": 0, "loss_rank_avg": 0.0016580638475716114, "step": 1065, "valid_targets_mean": 5563.2, "valid_targets_min": 854 }, { "epoch": 3.242914979757085, "grad_norm": 0.0855112212625228, "learning_rate": 1.3298941086159598e-05, "loss": 0.0069, "loss_nan_ranks": 0, "loss_rank_avg": 0.0004933560267090797, "step": 1070, "valid_targets_mean": 6606.3, "valid_targets_min": 1186 }, { "epoch": 3.2580971659919027, "grad_norm": 0.09570561492253427, "learning_rate": 1.3099992656100592e-05, "loss": 0.0032, "loss_nan_ranks": 0, "loss_rank_avg": 0.00117517972830683, "step": 1075, "valid_targets_mean": 5255.0, "valid_targets_min": 1325 }, { "epoch": 3.2732793522267207, "grad_norm": 0.0909562430234719, "learning_rate": 1.2901816252794848e-05, "loss": 0.0037, "loss_nan_ranks": 0, "loss_rank_avg": 0.0012113149277865887, "step": 1080, "valid_targets_mean": 5675.1, "valid_targets_min": 1231 }, { "epoch": 3.2884615384615383, "grad_norm": 0.08189530486902238, "learning_rate": 1.2704434049767356e-05, "loss": 0.0035, "loss_nan_ranks": 0, "loss_rank_avg": 0.0010620395187288523, "step": 1085, "valid_targets_mean": 5676.3, "valid_targets_min": 1152 }, { "epoch": 3.3036437246963564, "grad_norm": 0.08948614953890663, "learning_rate": 1.250786813168176e-05, "loss": 0.0029, "loss_nan_ranks": 0, "loss_rank_avg": 0.001292562810704112, "step": 1090, "valid_targets_mean": 6668.9, "valid_targets_min": 410 }, { "epoch": 3.318825910931174, "grad_norm": 0.09299479244515868, "learning_rate": 1.2312140491869369e-05, "loss": 0.0035, "loss_nan_ranks": 0, "loss_rank_avg": 0.0010404742788523436, "step": 1095, "valid_targets_mean": 5615.0, "valid_targets_min": 497 }, { "epoch": 3.334008097165992, "grad_norm": 0.08869720551088744, "learning_rate": 1.2117273029868362e-05, "loss": 0.004, "loss_nan_ranks": 0, "loss_rank_avg": 0.00047623467980884016, "step": 1100, "valid_targets_mean": 6315.7, "valid_targets_min": 1252 }, { "epoch": 3.3491902834008096, "grad_norm": 0.08966722709716235, "learning_rate": 1.1923287548973508e-05, "loss": 0.0031, "loss_nan_ranks": 0, "loss_rank_avg": 0.0015258332714438438, "step": 1105, "valid_targets_mean": 4745.8, "valid_targets_min": 1366 }, { "epoch": 3.3643724696356276, "grad_norm": 0.0914871818454804, "learning_rate": 1.1730205753796631e-05, "loss": 0.0042, "loss_nan_ranks": 0, "loss_rank_avg": 0.0005572251975536346, "step": 1110, "valid_targets_mean": 5411.1, "valid_targets_min": 1057 }, { "epoch": 3.3795546558704452, "grad_norm": 0.0991268632009275, "learning_rate": 1.1538049247838128e-05, "loss": 0.0034, "loss_nan_ranks": 0, "loss_rank_avg": 0.0009232150041498244, "step": 1115, "valid_targets_mean": 7529.5, "valid_targets_min": 1068 }, { "epoch": 3.3947368421052633, "grad_norm": 0.06942077265753631, "learning_rate": 1.134683953106983e-05, "loss": 0.0026, "loss_nan_ranks": 0, "loss_rank_avg": 0.00040978423203341663, "step": 1120, "valid_targets_mean": 5348.5, "valid_targets_min": 1222 }, { "epoch": 3.409919028340081, "grad_norm": 0.08938090828584724, "learning_rate": 1.115659799752938e-05, "loss": 0.0034, "loss_nan_ranks": 0, "loss_rank_avg": 0.0008477874216623604, "step": 1125, "valid_targets_mean": 4953.0, "valid_targets_min": 669 }, { "epoch": 3.425101214574899, "grad_norm": 0.09026804310673464, "learning_rate": 1.096734593292649e-05, "loss": 0.0028, "loss_nan_ranks": 0, "loss_rank_avg": 0.0010314269457012415, "step": 1130, "valid_targets_mean": 5280.3, "valid_targets_min": 1506 }, { "epoch": 3.4402834008097165, "grad_norm": 0.06885287233281213, "learning_rate": 1.077910451226138e-05, "loss": 0.0028, "loss_nan_ranks": 0, "loss_rank_avg": 0.0003990654367953539, "step": 1135, "valid_targets_mean": 4768.4, "valid_targets_min": 1602 }, { "epoch": 3.4554655870445345, "grad_norm": 0.07923523998517162, "learning_rate": 1.0591894797455526e-05, "loss": 0.003, "loss_nan_ranks": 0, "loss_rank_avg": 0.0008414952899329364, "step": 1140, "valid_targets_mean": 5667.3, "valid_targets_min": 557 }, { "epoch": 3.470647773279352, "grad_norm": 0.07839620139640652, "learning_rate": 1.0405737734995083e-05, "loss": 0.0059, "loss_nan_ranks": 0, "loss_rank_avg": 0.0009308420703746378, "step": 1145, "valid_targets_mean": 5440.8, "valid_targets_min": 983 }, { "epoch": 3.48582995951417, "grad_norm": 0.08143106991037652, "learning_rate": 1.0220654153587225e-05, "loss": 0.0029, "loss_nan_ranks": 0, "loss_rank_avg": 0.000549286836758256, "step": 1150, "valid_targets_mean": 5549.2, "valid_targets_min": 916 }, { "epoch": 3.501012145748988, "grad_norm": 0.09148397869628853, "learning_rate": 1.00366647618297e-05, "loss": 0.0031, "loss_nan_ranks": 0, "loss_rank_avg": 0.000851424178108573, "step": 1155, "valid_targets_mean": 5381.9, "valid_targets_min": 1428 }, { "epoch": 3.516194331983806, "grad_norm": 0.08536407748387596, "learning_rate": 9.853790145893742e-06, "loss": 0.003, "loss_nan_ranks": 0, "loss_rank_avg": 0.0007849158719182014, "step": 1160, "valid_targets_mean": 5692.0, "valid_targets_min": 1534 }, { "epoch": 3.5313765182186234, "grad_norm": 0.07500738769923868, "learning_rate": 9.672050767220765e-06, "loss": 0.0038, "loss_nan_ranks": 0, "loss_rank_avg": 0.0018731937743723392, "step": 1165, "valid_targets_mean": 4630.3, "valid_targets_min": 1650 }, { "epoch": 3.5465587044534415, "grad_norm": 0.08635271921250572, "learning_rate": 9.491466960232955e-06, "loss": 0.0027, "loss_nan_ranks": 0, "loss_rank_avg": 0.0018844190053641796, "step": 1170, "valid_targets_mean": 6699.6, "valid_targets_min": 1281 }, { "epoch": 3.561740890688259, "grad_norm": 0.07132259294794761, "learning_rate": 9.312058930058114e-06, "loss": 0.0029, "loss_nan_ranks": 0, "loss_rank_avg": 0.00045610909000970423, "step": 1175, "valid_targets_mean": 5466.4, "valid_targets_min": 597 }, { "epoch": 3.5769230769230766, "grad_norm": 0.08076636933899621, "learning_rate": 9.133846750268945e-06, "loss": 0.0029, "loss_nan_ranks": 0, "loss_rank_avg": 0.0009316107607446611, "step": 1180, "valid_targets_mean": 7522.7, "valid_targets_min": 1517 }, { "epoch": 3.5921052631578947, "grad_norm": 0.07356467744946356, "learning_rate": 8.956850360637046e-06, "loss": 0.008, "loss_nan_ranks": 0, "loss_rank_avg": 0.0007976359920576215, "step": 1185, "valid_targets_mean": 7516.5, "valid_targets_min": 2623 }, { "epoch": 3.6072874493927127, "grad_norm": 0.07756733565954317, "learning_rate": 8.78108956490194e-06, "loss": 0.003, "loss_nan_ranks": 0, "loss_rank_avg": 0.0013237085659056902, "step": 1190, "valid_targets_mean": 6886.8, "valid_targets_min": 2053 }, { "epoch": 3.6224696356275303, "grad_norm": 0.07105607166251748, "learning_rate": 8.606584028555225e-06, "loss": 0.0029, "loss_nan_ranks": 0, "loss_rank_avg": 0.0004069001879543066, "step": 1195, "valid_targets_mean": 4807.0, "valid_targets_min": 1153 }, { "epoch": 3.637651821862348, "grad_norm": 0.07399494788941444, "learning_rate": 8.43335327664027e-06, "loss": 0.003, "loss_nan_ranks": 0, "loss_rank_avg": 0.0014096340164542198, "step": 1200, "valid_targets_mean": 6198.2, "valid_targets_min": 2352 }, { "epoch": 3.652834008097166, "grad_norm": 0.08708798278496013, "learning_rate": 8.261416691567601e-06, "loss": 0.0036, "loss_nan_ranks": 0, "loss_rank_avg": 0.00120199890807271, "step": 1205, "valid_targets_mean": 6160.4, "valid_targets_min": 1384 }, { "epoch": 3.668016194331984, "grad_norm": 0.06850782245649316, "learning_rate": 8.090793510946242e-06, "loss": 0.007, "loss_nan_ranks": 0, "loss_rank_avg": 0.0011346151586622, "step": 1210, "valid_targets_mean": 5498.9, "valid_targets_min": 1544 }, { "epoch": 3.6831983805668016, "grad_norm": 0.0665721537533129, "learning_rate": 7.921502825431258e-06, "loss": 0.0022, "loss_nan_ranks": 0, "loss_rank_avg": 0.0009298848453909159, "step": 1215, "valid_targets_mean": 5320.8, "valid_targets_min": 1325 }, { "epoch": 3.698380566801619, "grad_norm": 0.05817280875168348, "learning_rate": 7.753563576587753e-06, "loss": 0.0019, "loss_nan_ranks": 0, "loss_rank_avg": 0.00031473039416596293, "step": 1220, "valid_targets_mean": 7207.3, "valid_targets_min": 1196 }, { "epoch": 3.7135627530364372, "grad_norm": 0.07585282212242273, "learning_rate": 7.5869945547715275e-06, "loss": 0.0035, "loss_nan_ranks": 0, "loss_rank_avg": 0.0001625682198209688, "step": 1225, "valid_targets_mean": 6348.5, "valid_targets_min": 1173 }, { "epoch": 3.7287449392712553, "grad_norm": 0.05928061653856549, "learning_rate": 7.421814397026674e-06, "loss": 0.0055, "loss_nan_ranks": 0, "loss_rank_avg": 0.0002983218291774392, "step": 1230, "valid_targets_mean": 4876.5, "valid_targets_min": 412 }, { "epoch": 3.743927125506073, "grad_norm": 0.05167166612364566, "learning_rate": 7.258041585000317e-06, "loss": 0.0021, "loss_nan_ranks": 0, "loss_rank_avg": 0.0002519974368624389, "step": 1235, "valid_targets_mean": 6114.1, "valid_targets_min": 1860 }, { "epoch": 3.7591093117408905, "grad_norm": 0.06755208306247126, "learning_rate": 7.095694442874743e-06, "loss": 0.0023, "loss_nan_ranks": 0, "loss_rank_avg": 0.0007116460474207997, "step": 1240, "valid_targets_mean": 6417.8, "valid_targets_min": 701 }, { "epoch": 3.7742914979757085, "grad_norm": 0.051297695584593836, "learning_rate": 6.934791135317147e-06, "loss": 0.0018, "loss_nan_ranks": 0, "loss_rank_avg": 0.0005321088247001171, "step": 1245, "valid_targets_mean": 5659.3, "valid_targets_min": 325 }, { "epoch": 3.7894736842105265, "grad_norm": 0.05752662490981521, "learning_rate": 6.775349665447222e-06, "loss": 0.0019, "loss_nan_ranks": 0, "loss_rank_avg": 0.0005416910862550139, "step": 1250, "valid_targets_mean": 4546.3, "valid_targets_min": 393 }, { "epoch": 3.804655870445344, "grad_norm": 0.052946443641855044, "learning_rate": 6.617387872822842e-06, "loss": 0.0015, "loss_nan_ranks": 0, "loss_rank_avg": 0.0010024468647316098, "step": 1255, "valid_targets_mean": 7255.5, "valid_targets_min": 1810 }, { "epoch": 3.8198380566801617, "grad_norm": 0.05880925202734049, "learning_rate": 6.460923431444015e-06, "loss": 0.0013, "loss_nan_ranks": 0, "loss_rank_avg": 0.0004680416896007955, "step": 1260, "valid_targets_mean": 5930.0, "valid_targets_min": 1600 }, { "epoch": 3.83502024291498, "grad_norm": 0.06372750335866821, "learning_rate": 6.305973847775406e-06, "loss": 0.0017, "loss_nan_ranks": 0, "loss_rank_avg": 0.0013748719356954098, "step": 1265, "valid_targets_mean": 6128.2, "valid_targets_min": 826 }, { "epoch": 3.850202429149798, "grad_norm": 0.05722799203054713, "learning_rate": 6.152556458787546e-06, "loss": 0.0016, "loss_nan_ranks": 0, "loss_rank_avg": 0.0002156760892830789, "step": 1270, "valid_targets_mean": 5906.8, "valid_targets_min": 1226 }, { "epoch": 3.8653846153846154, "grad_norm": 0.07081813428284034, "learning_rate": 6.000688430017048e-06, "loss": 0.0012, "loss_nan_ranks": 0, "loss_rank_avg": 0.0004200451367069036, "step": 1275, "valid_targets_mean": 6887.4, "valid_targets_min": 340 }, { "epoch": 3.880566801619433, "grad_norm": 0.06866565362661654, "learning_rate": 5.850386753645998e-06, "loss": 0.0011, "loss_nan_ranks": 0, "loss_rank_avg": 0.00035961525281891227, "step": 1280, "valid_targets_mean": 5884.6, "valid_targets_min": 1231 }, { "epoch": 3.895748987854251, "grad_norm": 0.07473837087777652, "learning_rate": 5.701668246600731e-06, "loss": 0.0037, "loss_nan_ranks": 0, "loss_rank_avg": 0.0006638249033130705, "step": 1285, "valid_targets_mean": 5361.0, "valid_targets_min": 395 }, { "epoch": 3.910931174089069, "grad_norm": 0.06332394817627471, "learning_rate": 5.554549548670227e-06, "loss": 0.0018, "loss_nan_ranks": 0, "loss_rank_avg": 0.00031463263439945877, "step": 1290, "valid_targets_mean": 6373.2, "valid_targets_min": 1155 }, { "epoch": 3.9261133603238867, "grad_norm": 0.04379006866304081, "learning_rate": 5.409047120644307e-06, "loss": 0.0009, "loss_nan_ranks": 0, "loss_rank_avg": 0.0002834107435774058, "step": 1295, "valid_targets_mean": 8020.1, "valid_targets_min": 1064 }, { "epoch": 3.9412955465587043, "grad_norm": 0.0587907679513708, "learning_rate": 5.265177242471899e-06, "loss": 0.001, "loss_nan_ranks": 0, "loss_rank_avg": 0.000306104077026248, "step": 1300, "valid_targets_mean": 6390.3, "valid_targets_min": 1186 }, { "epoch": 3.9564777327935223, "grad_norm": 0.051201748061711794, "learning_rate": 5.122956011439486e-06, "loss": 0.0015, "loss_nan_ranks": 0, "loss_rank_avg": 0.0006984270294196904, "step": 1305, "valid_targets_mean": 7854.0, "valid_targets_min": 1626 }, { "epoch": 3.97165991902834, "grad_norm": 0.04389042358717779, "learning_rate": 4.982399340370017e-06, "loss": 0.0016, "loss_nan_ranks": 0, "loss_rank_avg": 0.000228608405450359, "step": 1310, "valid_targets_mean": 5870.0, "valid_targets_min": 1117 }, { "epoch": 3.986842105263158, "grad_norm": 0.04144493373117889, "learning_rate": 4.843522955842464e-06, "loss": 0.0013, "loss_nan_ranks": 0, "loss_rank_avg": 0.00036165837082080543, "step": 1315, "valid_targets_mean": 6428.9, "valid_targets_min": 1226 }, { "epoch": 4.0, "grad_norm": 0.10432443407266767, "learning_rate": 4.706342396432213e-06, "loss": 0.0014, "loss_nan_ranks": 0, "loss_rank_avg": 0.0010806015925481915, "step": 1320, "valid_targets_mean": 4910.3, "valid_targets_min": 796 }, { "epoch": 4.015182186234818, "grad_norm": 0.04575394395571745, "learning_rate": 4.570873010972477e-06, "loss": 0.0016, "loss_nan_ranks": 0, "loss_rank_avg": 0.00019171061285305768, "step": 1325, "valid_targets_mean": 4514.5, "valid_targets_min": 942 }, { "epoch": 4.030364372469635, "grad_norm": 0.06136036341219001, "learning_rate": 4.43712995683695e-06, "loss": 0.0014, "loss_nan_ranks": 0, "loss_rank_avg": 0.00025325745809823275, "step": 1330, "valid_targets_mean": 5904.1, "valid_targets_min": 1319 }, { "epoch": 4.045546558704453, "grad_norm": 0.04678980368302935, "learning_rate": 4.305128198243888e-06, "loss": 0.0014, "loss_nan_ranks": 0, "loss_rank_avg": 0.00023867773415986449, "step": 1335, "valid_targets_mean": 5641.8, "valid_targets_min": 1051 }, { "epoch": 4.060728744939271, "grad_norm": 0.044273575964736044, "learning_rate": 4.174882504581794e-06, "loss": 0.0019, "loss_nan_ranks": 0, "loss_rank_avg": 0.00012092696852050722, "step": 1340, "valid_targets_mean": 5501.6, "valid_targets_min": 983 }, { "epoch": 4.075910931174089, "grad_norm": 0.06616327486605052, "learning_rate": 4.046407448756895e-06, "loss": 0.0013, "loss_nan_ranks": 0, "loss_rank_avg": 0.0005744840018451214, "step": 1345, "valid_targets_mean": 6480.2, "valid_targets_min": 1409 }, { "epoch": 4.0910931174089065, "grad_norm": 0.03504675811498584, "learning_rate": 3.91971740556262e-06, "loss": 0.0045, "loss_nan_ranks": 0, "loss_rank_avg": 0.00013153860345482826, "step": 1350, "valid_targets_mean": 6125.2, "valid_targets_min": 1260 }, { "epoch": 4.1062753036437245, "grad_norm": 0.05094012282416404, "learning_rate": 3.7948265500712313e-06, "loss": 0.0042, "loss_nan_ranks": 0, "loss_rank_avg": 0.0002957929973490536, "step": 1355, "valid_targets_mean": 4896.0, "valid_targets_min": 561 }, { "epoch": 4.1214574898785425, "grad_norm": 0.03832020075332006, "learning_rate": 3.6717488560478096e-06, "loss": 0.0013, "loss_nan_ranks": 0, "loss_rank_avg": 0.00017256051069125533, "step": 1360, "valid_targets_mean": 5400.4, "valid_targets_min": 697 }, { "epoch": 4.136639676113361, "grad_norm": 0.06499952485602367, "learning_rate": 3.5504980943867538e-06, "loss": 0.0053, "loss_nan_ranks": 0, "loss_rank_avg": 0.00017852283781394362, "step": 1365, "valid_targets_mean": 6389.8, "valid_targets_min": 697 }, { "epoch": 4.151821862348178, "grad_norm": 0.06164804945094931, "learning_rate": 3.4310878315710074e-06, "loss": 0.0025, "loss_nan_ranks": 0, "loss_rank_avg": 0.0013586217537522316, "step": 1370, "valid_targets_mean": 7214.0, "valid_targets_min": 906 }, { "epoch": 4.167004048582996, "grad_norm": 0.04024799044304015, "learning_rate": 3.3135314281540954e-06, "loss": 0.0018, "loss_nan_ranks": 0, "loss_rank_avg": 0.0003238618082832545, "step": 1375, "valid_targets_mean": 4662.5, "valid_targets_min": 653 }, { "epoch": 4.182186234817814, "grad_norm": 0.04299991800079604, "learning_rate": 3.1978420372652776e-06, "loss": 0.001, "loss_nan_ranks": 0, "loss_rank_avg": 0.00016643814160488546, "step": 1380, "valid_targets_mean": 5508.5, "valid_targets_min": 1759 }, { "epoch": 4.197368421052632, "grad_norm": 0.03981366931525965, "learning_rate": 3.084032603137852e-06, "loss": 0.0012, "loss_nan_ranks": 0, "loss_rank_avg": 0.00024849892361089587, "step": 1385, "valid_targets_mean": 5699.5, "valid_targets_min": 909 }, { "epoch": 4.212550607287449, "grad_norm": 0.06374131446855894, "learning_rate": 2.9721158596608622e-06, "loss": 0.0016, "loss_nan_ranks": 0, "loss_rank_avg": 0.0016528195701539516, "step": 1390, "valid_targets_mean": 5386.7, "valid_targets_min": 1449 }, { "epoch": 4.227732793522267, "grad_norm": 0.0488344552225502, "learning_rate": 2.8621043289543314e-06, "loss": 0.0015, "loss_nan_ranks": 0, "loss_rank_avg": 0.0003213430754840374, "step": 1395, "valid_targets_mean": 5162.6, "valid_targets_min": 884 }, { "epoch": 4.242914979757085, "grad_norm": 0.040266253339609565, "learning_rate": 2.754010319968181e-06, "loss": 0.0044, "loss_nan_ranks": 0, "loss_rank_avg": 0.00016541770310141146, "step": 1400, "valid_targets_mean": 7292.2, "valid_targets_min": 1282 }, { "epoch": 4.258097165991903, "grad_norm": 0.05618171654900691, "learning_rate": 2.647845927105015e-06, "loss": 0.0013, "loss_nan_ranks": 0, "loss_rank_avg": 0.000637515913695097, "step": 1405, "valid_targets_mean": 5535.6, "valid_targets_min": 410 }, { "epoch": 4.27327935222672, "grad_norm": 0.044241199347689504, "learning_rate": 2.543623028866915e-06, "loss": 0.0008, "loss_nan_ranks": 0, "loss_rank_avg": 0.0001080352594726719, "step": 1410, "valid_targets_mean": 5776.3, "valid_targets_min": 1448 }, { "epoch": 4.288461538461538, "grad_norm": 0.04240919318151957, "learning_rate": 2.4413532865263533e-06, "loss": 0.0048, "loss_nan_ranks": 0, "loss_rank_avg": 0.0002839623484760523, "step": 1415, "valid_targets_mean": 5609.8, "valid_targets_min": 978 }, { "epoch": 4.303643724696356, "grad_norm": 0.04717996482372255, "learning_rate": 2.3410481428214602e-06, "loss": 0.0014, "loss_nan_ranks": 0, "loss_rank_avg": 0.0003408491029404104, "step": 1420, "valid_targets_mean": 4040.8, "valid_targets_min": 340 }, { "epoch": 4.318825910931174, "grad_norm": 0.06239339252341931, "learning_rate": 2.242718820675718e-06, "loss": 0.0029, "loss_nan_ranks": 0, "loss_rank_avg": 0.0001997358922380954, "step": 1425, "valid_targets_mean": 6168.2, "valid_targets_min": 1162 }, { "epoch": 4.334008097165992, "grad_norm": 0.0625616384807205, "learning_rate": 2.1463763219422495e-06, "loss": 0.004, "loss_nan_ranks": 0, "loss_rank_avg": 0.0009303807746618986, "step": 1430, "valid_targets_mean": 5197.7, "valid_targets_min": 1391 }, { "epoch": 4.34919028340081, "grad_norm": 0.049057182007342326, "learning_rate": 2.0520314261728357e-06, "loss": 0.0008, "loss_nan_ranks": 0, "loss_rank_avg": 0.00032955483766272664, "step": 1435, "valid_targets_mean": 5227.9, "valid_targets_min": 1292 }, { "epoch": 4.364372469635628, "grad_norm": 0.02755211450191434, "learning_rate": 1.9596946894118306e-06, "loss": 0.0006, "loss_nan_ranks": 0, "loss_rank_avg": 0.00012536742724478245, "step": 1440, "valid_targets_mean": 6774.8, "valid_targets_min": 399 }, { "epoch": 4.379554655870446, "grad_norm": 0.059198175261942494, "learning_rate": 1.8693764430150696e-06, "loss": 0.0009, "loss_nan_ranks": 0, "loss_rank_avg": 0.0006651548319496214, "step": 1445, "valid_targets_mean": 5754.2, "valid_targets_min": 1175 }, { "epoch": 4.394736842105263, "grad_norm": 0.06672168346536564, "learning_rate": 1.7810867924938978e-06, "loss": 0.0014, "loss_nan_ranks": 0, "loss_rank_avg": 0.0005800529033876956, "step": 1450, "valid_targets_mean": 6373.2, "valid_targets_min": 1172 }, { "epoch": 4.409919028340081, "grad_norm": 0.04103210163550645, "learning_rate": 1.6948356163845048e-06, "loss": 0.0007, "loss_nan_ranks": 0, "loss_rank_avg": 0.0002091139613185078, "step": 1455, "valid_targets_mean": 4446.1, "valid_targets_min": 1155 }, { "epoch": 4.425101214574899, "grad_norm": 0.0531210573922736, "learning_rate": 1.610632565142627e-06, "loss": 0.0015, "loss_nan_ranks": 0, "loss_rank_avg": 0.00031264807330444455, "step": 1460, "valid_targets_mean": 5350.0, "valid_targets_min": 1583 }, { "epoch": 4.440283400809717, "grad_norm": 0.06214094975132685, "learning_rate": 1.5284870600637813e-06, "loss": 0.0016, "loss_nan_ranks": 0, "loss_rank_avg": 0.0005728084361180663, "step": 1465, "valid_targets_mean": 6931.2, "valid_targets_min": 882 }, { "epoch": 4.455465587044534, "grad_norm": 0.025470297264450825, "learning_rate": 1.4484082922291376e-06, "loss": 0.0007, "loss_nan_ranks": 0, "loss_rank_avg": 0.0001540776138426736, "step": 1470, "valid_targets_mean": 6458.8, "valid_targets_min": 446 }, { "epoch": 4.470647773279352, "grad_norm": 0.0371089157160395, "learning_rate": 1.3704052214771513e-06, "loss": 0.0011, "loss_nan_ranks": 0, "loss_rank_avg": 0.0005277476739138365, "step": 1475, "valid_targets_mean": 5589.3, "valid_targets_min": 1255 }, { "epoch": 4.48582995951417, "grad_norm": 0.03597473308278048, "learning_rate": 1.2944865754010682e-06, "loss": 0.001, "loss_nan_ranks": 0, "loss_rank_avg": 0.00027125386986881495, "step": 1480, "valid_targets_mean": 5186.9, "valid_targets_min": 1046 }, { "epoch": 4.501012145748988, "grad_norm": 0.05603090651957245, "learning_rate": 1.2206608483724013e-06, "loss": 0.001, "loss_nan_ranks": 0, "loss_rank_avg": 0.0006555815925821662, "step": 1485, "valid_targets_mean": 4437.2, "valid_targets_min": 1281 }, { "epoch": 4.516194331983805, "grad_norm": 0.040660615586738724, "learning_rate": 1.1489363005905241e-06, "loss": 0.0014, "loss_nan_ranks": 0, "loss_rank_avg": 0.0005926231970079243, "step": 1490, "valid_targets_mean": 5373.7, "valid_targets_min": 1246 }, { "epoch": 4.531376518218623, "grad_norm": 0.039681408267201096, "learning_rate": 1.0793209571584562e-06, "loss": 0.0016, "loss_nan_ranks": 0, "loss_rank_avg": 0.00044325290946289897, "step": 1495, "valid_targets_mean": 5612.2, "valid_targets_min": 975 }, { "epoch": 4.5465587044534415, "grad_norm": 0.03482398576019173, "learning_rate": 1.0118226071849424e-06, "loss": 0.0015, "loss_nan_ranks": 0, "loss_rank_avg": 0.00012415298260748386, "step": 1500, "valid_targets_mean": 5565.6, "valid_targets_min": 820 }, { "epoch": 4.5617408906882595, "grad_norm": 0.03861786834216097, "learning_rate": 9.464488029129581e-07, "loss": 0.0038, "loss_nan_ranks": 0, "loss_rank_avg": 0.00042347534326836467, "step": 1505, "valid_targets_mean": 6770.4, "valid_targets_min": 2383 }, { "epoch": 4.576923076923077, "grad_norm": 0.03782526929710735, "learning_rate": 8.832068588746945e-07, "loss": 0.0053, "loss_nan_ranks": 0, "loss_rank_avg": 0.00042459441465325654, "step": 1510, "valid_targets_mean": 6201.0, "valid_targets_min": 748 }, { "epoch": 4.592105263157895, "grad_norm": 0.06581264347607255, "learning_rate": 8.221038510731704e-07, "loss": 0.0026, "loss_nan_ranks": 0, "loss_rank_avg": 0.009063578210771084, "step": 1515, "valid_targets_mean": 6460.5, "valid_targets_min": 1058 }, { "epoch": 4.607287449392713, "grad_norm": 0.05187430297042141, "learning_rate": 7.631466161904821e-07, "loss": 0.001, "loss_nan_ranks": 0, "loss_rank_avg": 6.647851114394143e-05, "step": 1520, "valid_targets_mean": 5950.4, "valid_targets_min": 446 }, { "epoch": 4.62246963562753, "grad_norm": 0.03418084442890125, "learning_rate": 7.063417508228876e-07, "loss": 0.0012, "loss_nan_ranks": 0, "loss_rank_avg": 0.0001711228978820145, "step": 1525, "valid_targets_mean": 5680.2, "valid_targets_min": 976 }, { "epoch": 4.637651821862348, "grad_norm": 0.045181614299724095, "learning_rate": 6.516956107427241e-07, "loss": 0.0007, "loss_nan_ranks": 0, "loss_rank_avg": 0.0002168192877434194, "step": 1530, "valid_targets_mean": 3741.5, "valid_targets_min": 563 }, { "epoch": 4.652834008097166, "grad_norm": 0.028822520990575768, "learning_rate": 5.992143101872638e-07, "loss": 0.0009, "loss_nan_ranks": 0, "loss_rank_avg": 6.904886686243117e-05, "step": 1535, "valid_targets_mean": 5398.7, "valid_targets_min": 978 }, { "epoch": 4.668016194331984, "grad_norm": 0.04528903775134061, "learning_rate": 5.489037211746184e-07, "loss": 0.002, "loss_nan_ranks": 0, "loss_rank_avg": 0.0002343966334592551, "step": 1540, "valid_targets_mean": 6025.8, "valid_targets_min": 1408 }, { "epoch": 4.683198380566802, "grad_norm": 0.017318450062876578, "learning_rate": 5.007694728467228e-07, "loss": 0.0007, "loss_nan_ranks": 0, "loss_rank_avg": 9.243115346180275e-05, "step": 1545, "valid_targets_mean": 5785.7, "valid_targets_min": 1152 }, { "epoch": 4.698380566801619, "grad_norm": 0.045176485557852156, "learning_rate": 4.548169508395028e-07, "loss": 0.001, "loss_nan_ranks": 0, "loss_rank_avg": 0.00011724950309144333, "step": 1550, "valid_targets_mean": 5474.6, "valid_targets_min": 2293 }, { "epoch": 4.713562753036437, "grad_norm": 0.04717179450293906, "learning_rate": 4.1105129668029595e-07, "loss": 0.0007, "loss_nan_ranks": 0, "loss_rank_avg": 0.00032075931085273623, "step": 1555, "valid_targets_mean": 5440.1, "valid_targets_min": 1977 }, { "epoch": 4.728744939271255, "grad_norm": 0.02883542239202345, "learning_rate": 3.6947740721257066e-07, "loss": 0.0036, "loss_nan_ranks": 0, "loss_rank_avg": 0.0002757580659817904, "step": 1560, "valid_targets_mean": 5387.8, "valid_targets_min": 1569 }, { "epoch": 4.743927125506072, "grad_norm": 0.04649446397617834, "learning_rate": 3.3009993404802486e-07, "loss": 0.0011, "loss_nan_ranks": 0, "loss_rank_avg": 0.00040277530206367373, "step": 1565, "valid_targets_mean": 5777.2, "valid_targets_min": 1465 }, { "epoch": 4.7591093117408905, "grad_norm": 0.05003669025654074, "learning_rate": 2.929232830461404e-07, "loss": 0.0025, "loss_nan_ranks": 0, "loss_rank_avg": 0.00040337140671908855, "step": 1570, "valid_targets_mean": 5920.8, "valid_targets_min": 1195 }, { "epoch": 4.7742914979757085, "grad_norm": 0.026085597840677918, "learning_rate": 2.579516138212101e-07, "loss": 0.0009, "loss_nan_ranks": 0, "loss_rank_avg": 9.011440852191299e-05, "step": 1575, "valid_targets_mean": 4939.5, "valid_targets_min": 697 }, { "epoch": 4.7894736842105265, "grad_norm": 0.03964386050033589, "learning_rate": 2.2518883927692857e-07, "loss": 0.0011, "loss_nan_ranks": 0, "loss_rank_avg": 0.0005810962175019085, "step": 1580, "valid_targets_mean": 6490.4, "valid_targets_min": 1347 }, { "epoch": 4.804655870445345, "grad_norm": 0.034600096719414955, "learning_rate": 1.9463862516859277e-07, "loss": 0.0007, "loss_nan_ranks": 0, "loss_rank_avg": 0.0001201618870254606, "step": 1585, "valid_targets_mean": 7714.5, "valid_targets_min": 2128 }, { "epoch": 4.819838056680162, "grad_norm": 0.04077060684331417, "learning_rate": 1.6630438969294615e-07, "loss": 0.0017, "loss_nan_ranks": 0, "loss_rank_avg": 0.000434387126006186, "step": 1590, "valid_targets_mean": 5693.0, "valid_targets_min": 773 }, { "epoch": 4.83502024291498, "grad_norm": 0.03559592574542344, "learning_rate": 1.4018930310571553e-07, "loss": 0.0014, "loss_nan_ranks": 0, "loss_rank_avg": 0.0002756250905804336, "step": 1595, "valid_targets_mean": 5118.2, "valid_targets_min": 1126 }, { "epoch": 4.850202429149798, "grad_norm": 0.04102607474420591, "learning_rate": 1.1629628736690824e-07, "loss": 0.0013, "loss_nan_ranks": 0, "loss_rank_avg": 0.002798682078719139, "step": 1600, "valid_targets_mean": 5115.5, "valid_targets_min": 669 }, { "epoch": 4.865384615384615, "grad_norm": 0.03665735826038814, "learning_rate": 9.46280158138757e-08, "loss": 0.0014, "loss_nan_ranks": 0, "loss_rank_avg": 0.0012289484729990363, "step": 1605, "valid_targets_mean": 5086.9, "valid_targets_min": 1019 }, { "epoch": 4.880566801619433, "grad_norm": 0.035401557000787016, "learning_rate": 7.518691286220625e-08, "loss": 0.0009, "loss_nan_ranks": 0, "loss_rank_avg": 0.0005569718778133392, "step": 1610, "valid_targets_mean": 6123.9, "valid_targets_min": 1519 }, { "epoch": 4.895748987854251, "grad_norm": 0.022697139243698643, "learning_rate": 5.797515373445084e-08, "loss": 0.0008, "loss_nan_ranks": 0, "loss_rank_avg": 0.00019126076949760318, "step": 1615, "valid_targets_mean": 5520.2, "valid_targets_min": 1126 }, { "epoch": 4.910931174089069, "grad_norm": 0.05200869813147198, "learning_rate": 4.299466421675113e-08, "loss": 0.001, "loss_nan_ranks": 0, "loss_rank_avg": 0.00018992288096342236, "step": 1620, "valid_targets_mean": 6700.1, "valid_targets_min": 1817 }, { "epoch": 4.926113360323887, "grad_norm": 0.03273174844805149, "learning_rate": 3.0247120443362976e-08, "loss": 0.0044, "loss_nan_ranks": 0, "loss_rank_avg": 0.0004165939171798527, "step": 1625, "valid_targets_mean": 5461.3, "valid_targets_min": 1151 }, { "epoch": 4.941295546558704, "grad_norm": 0.043664082408331556, "learning_rate": 1.973394870912193e-08, "loss": 0.0009, "loss_nan_ranks": 0, "loss_rank_avg": 0.00014396056940313429, "step": 1630, "valid_targets_mean": 5573.7, "valid_targets_min": 497 }, { "epoch": 4.956477732793522, "grad_norm": 0.04647220242878979, "learning_rate": 1.145632530985541e-08, "loss": 0.0021, "loss_nan_ranks": 0, "loss_rank_avg": 0.001477197976782918, "step": 1635, "valid_targets_mean": 6098.6, "valid_targets_min": 1186 }, { "epoch": 4.97165991902834, "grad_norm": 0.03741551615684759, "learning_rate": 5.415176410765721e-09, "loss": 0.0019, "loss_nan_ranks": 0, "loss_rank_avg": 0.000190427279449068, "step": 1640, "valid_targets_mean": 7667.1, "valid_targets_min": 1201 }, { "epoch": 4.9868421052631575, "grad_norm": 0.034908554086182576, "learning_rate": 1.611177942812958e-09, "loss": 0.0006, "loss_nan_ranks": 0, "loss_rank_avg": 0.0005074947839602828, "step": 1645, "valid_targets_mean": 4861.1, "valid_targets_min": 1478 }, { "epoch": 5.0, "grad_norm": 0.06119276217350039, "learning_rate": 4.475552707772224e-11, "loss": 0.001, "loss_nan_ranks": 0, "loss_rank_avg": 0.0014345531817525625, "step": 1650, "valid_targets_mean": 5408.7, "valid_targets_min": 410 }, { "epoch": 5.0, "step": 1650, "total_flos": 4.6024941377264026e+18, "train_loss": 0.0, "train_runtime": 1.9709, "train_samples_per_second": 80167.399, "train_steps_per_second": 837.191 } ], "logging_steps": 5, "max_steps": 1650, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 300, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.6024941377264026e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }