{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 1650, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.015182186234817813, "grad_norm": 9.767456630329582, "learning_rate": 9.696969696969698e-07, "loss": 0.9495, "loss_nan_ranks": 0, "loss_rank_avg": 0.30272722244262695, "step": 5, "valid_targets_mean": 10378.4, "valid_targets_min": 1358 }, { "epoch": 0.030364372469635626, "grad_norm": 6.271278770967426, "learning_rate": 2.181818181818182e-06, "loss": 0.9318, "loss_nan_ranks": 0, "loss_rank_avg": 0.31232553720474243, "step": 10, "valid_targets_mean": 10926.1, "valid_targets_min": 1793 }, { "epoch": 0.04554655870445344, "grad_norm": 2.8647197149421197, "learning_rate": 3.3939393939393946e-06, "loss": 0.8663, "loss_nan_ranks": 0, "loss_rank_avg": 0.30162012577056885, "step": 15, "valid_targets_mean": 11718.9, "valid_targets_min": 2968 }, { "epoch": 0.06072874493927125, "grad_norm": 1.5728241227206552, "learning_rate": 4.606060606060606e-06, "loss": 0.8034, "loss_nan_ranks": 0, "loss_rank_avg": 0.23369714617729187, "step": 20, "valid_targets_mean": 9150.8, "valid_targets_min": 2866 }, { "epoch": 0.07591093117408906, "grad_norm": 1.298780515091113, "learning_rate": 5.8181818181818185e-06, "loss": 0.7551, "loss_nan_ranks": 0, "loss_rank_avg": 0.25340479612350464, "step": 25, "valid_targets_mean": 9598.5, "valid_targets_min": 1022 }, { "epoch": 0.09109311740890688, "grad_norm": 0.753479537539852, "learning_rate": 7.030303030303031e-06, "loss": 0.7108, "loss_nan_ranks": 0, "loss_rank_avg": 0.24694345891475677, "step": 30, "valid_targets_mean": 11339.5, "valid_targets_min": 2455 }, { "epoch": 0.1062753036437247, "grad_norm": 0.5456863914213468, "learning_rate": 8.242424242424243e-06, "loss": 0.6935, "loss_nan_ranks": 0, "loss_rank_avg": 0.23709480464458466, "step": 35, "valid_targets_mean": 10767.9, "valid_targets_min": 2190 }, { "epoch": 0.1214574898785425, "grad_norm": 0.47461380782907614, "learning_rate": 9.454545454545456e-06, "loss": 0.6711, "loss_nan_ranks": 0, "loss_rank_avg": 0.22755438089370728, "step": 40, "valid_targets_mean": 11884.0, "valid_targets_min": 1925 }, { "epoch": 0.13663967611336034, "grad_norm": 0.4106448655482926, "learning_rate": 1.0666666666666667e-05, "loss": 0.6459, "loss_nan_ranks": 0, "loss_rank_avg": 0.21281057596206665, "step": 45, "valid_targets_mean": 9801.6, "valid_targets_min": 1654 }, { "epoch": 0.15182186234817813, "grad_norm": 0.35120072155847837, "learning_rate": 1.187878787878788e-05, "loss": 0.6144, "loss_nan_ranks": 0, "loss_rank_avg": 0.21171334385871887, "step": 50, "valid_targets_mean": 11687.2, "valid_targets_min": 2243 }, { "epoch": 0.16700404858299595, "grad_norm": 0.2896997146592483, "learning_rate": 1.3090909090909092e-05, "loss": 0.5917, "loss_nan_ranks": 0, "loss_rank_avg": 0.18080435693264008, "step": 55, "valid_targets_mean": 10539.2, "valid_targets_min": 1901 }, { "epoch": 0.18218623481781376, "grad_norm": 0.23549511279092397, "learning_rate": 1.4303030303030305e-05, "loss": 0.5778, "loss_nan_ranks": 0, "loss_rank_avg": 0.18052560091018677, "step": 60, "valid_targets_mean": 10891.6, "valid_targets_min": 3467 }, { "epoch": 0.19736842105263158, "grad_norm": 0.22554924567011164, "learning_rate": 1.5515151515151516e-05, "loss": 0.5724, "loss_nan_ranks": 0, "loss_rank_avg": 0.20011213421821594, "step": 65, "valid_targets_mean": 12352.1, "valid_targets_min": 2287 }, { "epoch": 0.2125506072874494, "grad_norm": 0.23069171787024742, "learning_rate": 1.672727272727273e-05, "loss": 0.5582, "loss_nan_ranks": 0, "loss_rank_avg": 0.17292290925979614, "step": 70, "valid_targets_mean": 8332.5, "valid_targets_min": 2508 }, { "epoch": 0.22773279352226722, "grad_norm": 0.24300716935495, "learning_rate": 1.7939393939393942e-05, "loss": 0.5431, "loss_nan_ranks": 0, "loss_rank_avg": 0.18352295458316803, "step": 75, "valid_targets_mean": 10469.7, "valid_targets_min": 3220 }, { "epoch": 0.242914979757085, "grad_norm": 0.2539389077011328, "learning_rate": 1.9151515151515152e-05, "loss": 0.5426, "loss_nan_ranks": 0, "loss_rank_avg": 0.17171943187713623, "step": 80, "valid_targets_mean": 11575.8, "valid_targets_min": 3367 }, { "epoch": 0.25809716599190285, "grad_norm": 0.26124706332109904, "learning_rate": 2.0363636363636365e-05, "loss": 0.5298, "loss_nan_ranks": 0, "loss_rank_avg": 0.1690063625574112, "step": 85, "valid_targets_mean": 9496.4, "valid_targets_min": 1415 }, { "epoch": 0.2732793522267207, "grad_norm": 0.2365981496426835, "learning_rate": 2.1575757575757578e-05, "loss": 0.5298, "loss_nan_ranks": 0, "loss_rank_avg": 0.1702321618795395, "step": 90, "valid_targets_mean": 11065.8, "valid_targets_min": 766 }, { "epoch": 0.28846153846153844, "grad_norm": 0.26451757214209165, "learning_rate": 2.278787878787879e-05, "loss": 0.5302, "loss_nan_ranks": 0, "loss_rank_avg": 0.16847875714302063, "step": 95, "valid_targets_mean": 9680.0, "valid_targets_min": 290 }, { "epoch": 0.30364372469635625, "grad_norm": 0.2741299353525031, "learning_rate": 2.4e-05, "loss": 0.521, "loss_nan_ranks": 0, "loss_rank_avg": 0.1615229696035385, "step": 100, "valid_targets_mean": 9314.8, "valid_targets_min": 975 }, { "epoch": 0.3188259109311741, "grad_norm": 0.2992218104861107, "learning_rate": 2.5212121212121214e-05, "loss": 0.5119, "loss_nan_ranks": 0, "loss_rank_avg": 0.17057791352272034, "step": 105, "valid_targets_mean": 10264.6, "valid_targets_min": 777 }, { "epoch": 0.3340080971659919, "grad_norm": 0.32299047345991855, "learning_rate": 2.6424242424242427e-05, "loss": 0.5086, "loss_nan_ranks": 0, "loss_rank_avg": 0.19047002494335175, "step": 110, "valid_targets_mean": 10825.8, "valid_targets_min": 4362 }, { "epoch": 0.3491902834008097, "grad_norm": 0.29045987481699725, "learning_rate": 2.763636363636364e-05, "loss": 0.5206, "loss_nan_ranks": 0, "loss_rank_avg": 0.17592790722846985, "step": 115, "valid_targets_mean": 10548.6, "valid_targets_min": 2197 }, { "epoch": 0.3643724696356275, "grad_norm": 0.26637442919498494, "learning_rate": 2.884848484848485e-05, "loss": 0.5004, "loss_nan_ranks": 0, "loss_rank_avg": 0.13782358169555664, "step": 120, "valid_targets_mean": 9174.8, "valid_targets_min": 1757 }, { "epoch": 0.37955465587044535, "grad_norm": 0.27717254269356884, "learning_rate": 3.0060606060606062e-05, "loss": 0.5053, "loss_nan_ranks": 0, "loss_rank_avg": 0.17009928822517395, "step": 125, "valid_targets_mean": 10493.4, "valid_targets_min": 2896 }, { "epoch": 0.39473684210526316, "grad_norm": 0.26050759497187553, "learning_rate": 3.127272727272728e-05, "loss": 0.5047, "loss_nan_ranks": 0, "loss_rank_avg": 0.1754511296749115, "step": 130, "valid_targets_mean": 11562.1, "valid_targets_min": 2774 }, { "epoch": 0.409919028340081, "grad_norm": 0.2765139416546145, "learning_rate": 3.2484848484848485e-05, "loss": 0.4994, "loss_nan_ranks": 0, "loss_rank_avg": 0.15204985439777374, "step": 135, "valid_targets_mean": 9299.8, "valid_targets_min": 2076 }, { "epoch": 0.4251012145748988, "grad_norm": 0.27349224278807954, "learning_rate": 3.36969696969697e-05, "loss": 0.4968, "loss_nan_ranks": 0, "loss_rank_avg": 0.16537371277809143, "step": 140, "valid_targets_mean": 10670.5, "valid_targets_min": 343 }, { "epoch": 0.4402834008097166, "grad_norm": 0.2891276908365685, "learning_rate": 3.490909090909091e-05, "loss": 0.4941, "loss_nan_ranks": 0, "loss_rank_avg": 0.17477017641067505, "step": 145, "valid_targets_mean": 11143.9, "valid_targets_min": 1940 }, { "epoch": 0.45546558704453444, "grad_norm": 0.2879865399652702, "learning_rate": 3.6121212121212124e-05, "loss": 0.4921, "loss_nan_ranks": 0, "loss_rank_avg": 0.176496684551239, "step": 150, "valid_targets_mean": 10958.7, "valid_targets_min": 3124 }, { "epoch": 0.4706477732793522, "grad_norm": 0.287556416985574, "learning_rate": 3.733333333333334e-05, "loss": 0.4877, "loss_nan_ranks": 0, "loss_rank_avg": 0.1479555368423462, "step": 155, "valid_targets_mean": 9051.1, "valid_targets_min": 1508 }, { "epoch": 0.48582995951417, "grad_norm": 0.2864787839954415, "learning_rate": 3.854545454545455e-05, "loss": 0.4942, "loss_nan_ranks": 0, "loss_rank_avg": 0.1827171891927719, "step": 160, "valid_targets_mean": 10364.4, "valid_targets_min": 164 }, { "epoch": 0.5010121457489879, "grad_norm": 0.25566057907575657, "learning_rate": 3.9757575757575757e-05, "loss": 0.4833, "loss_nan_ranks": 0, "loss_rank_avg": 0.16354942321777344, "step": 165, "valid_targets_mean": 11585.2, "valid_targets_min": 3505 }, { "epoch": 0.5161943319838057, "grad_norm": 0.3454913577704521, "learning_rate": 3.999928391557286e-05, "loss": 0.4816, "loss_nan_ranks": 0, "loss_rank_avg": 0.12809130549430847, "step": 170, "valid_targets_mean": 8255.8, "valid_targets_min": 996 }, { "epoch": 0.5313765182186235, "grad_norm": 0.25232423038040575, "learning_rate": 3.999637491047052e-05, "loss": 0.4898, "loss_nan_ranks": 0, "loss_rank_avg": 0.14153242111206055, "step": 175, "valid_targets_mean": 10202.8, "valid_targets_min": 3853 }, { "epoch": 0.5465587044534413, "grad_norm": 0.38999173724416125, "learning_rate": 3.999122855464813e-05, "loss": 0.4903, "loss_nan_ranks": 0, "loss_rank_avg": 0.19011259078979492, "step": 180, "valid_targets_mean": 12055.8, "valid_targets_min": 1761 }, { "epoch": 0.5617408906882592, "grad_norm": 0.3358629796944954, "learning_rate": 3.998384542392021e-05, "loss": 0.4868, "loss_nan_ranks": 0, "loss_rank_avg": 0.16506445407867432, "step": 185, "valid_targets_mean": 10798.4, "valid_targets_min": 1793 }, { "epoch": 0.5769230769230769, "grad_norm": 0.2600759867395087, "learning_rate": 3.9974226344369124e-05, "loss": 0.4869, "loss_nan_ranks": 0, "loss_rank_avg": 0.15860313177108765, "step": 190, "valid_targets_mean": 9988.2, "valid_targets_min": 1149 }, { "epoch": 0.5921052631578947, "grad_norm": 0.260677411587491, "learning_rate": 3.996237239225268e-05, "loss": 0.4863, "loss_nan_ranks": 0, "loss_rank_avg": 0.16243574023246765, "step": 195, "valid_targets_mean": 10358.2, "valid_targets_min": 3235 }, { "epoch": 0.6072874493927125, "grad_norm": 0.2645245975670984, "learning_rate": 3.994828489388371e-05, "loss": 0.4822, "loss_nan_ranks": 0, "loss_rank_avg": 0.1531163454055786, "step": 200, "valid_targets_mean": 10201.0, "valid_targets_min": 1772 }, { "epoch": 0.6224696356275303, "grad_norm": 0.25227333524594137, "learning_rate": 3.993196542548162e-05, "loss": 0.4818, "loss_nan_ranks": 0, "loss_rank_avg": 0.14635561406612396, "step": 205, "valid_targets_mean": 9432.2, "valid_targets_min": 1884 }, { "epoch": 0.6376518218623481, "grad_norm": 0.28903743175057034, "learning_rate": 3.991341581299609e-05, "loss": 0.4835, "loss_nan_ranks": 0, "loss_rank_avg": 0.17242132127285004, "step": 210, "valid_targets_mean": 10711.5, "valid_targets_min": 1318 }, { "epoch": 0.652834008097166, "grad_norm": 0.2638604812189626, "learning_rate": 3.9892638131902765e-05, "loss": 0.4796, "loss_nan_ranks": 0, "loss_rank_avg": 0.16491654515266418, "step": 215, "valid_targets_mean": 11328.2, "valid_targets_min": 1957 }, { "epoch": 0.6680161943319838, "grad_norm": 0.27154894774630783, "learning_rate": 3.9869634706971e-05, "loss": 0.4751, "loss_nan_ranks": 0, "loss_rank_avg": 0.14194658398628235, "step": 220, "valid_targets_mean": 9687.9, "valid_targets_min": 3468 }, { "epoch": 0.6831983805668016, "grad_norm": 0.25557034456032535, "learning_rate": 3.984440811200379e-05, "loss": 0.4661, "loss_nan_ranks": 0, "loss_rank_avg": 0.1502843201160431, "step": 225, "valid_targets_mean": 9392.2, "valid_targets_min": 2945 }, { "epoch": 0.6983805668016194, "grad_norm": 0.31261870349322307, "learning_rate": 3.981696116954973e-05, "loss": 0.4849, "loss_nan_ranks": 0, "loss_rank_avg": 0.1625722348690033, "step": 230, "valid_targets_mean": 11870.0, "valid_targets_min": 3248 }, { "epoch": 0.7135627530364372, "grad_norm": 0.2785550873091526, "learning_rate": 3.978729695058729e-05, "loss": 0.4818, "loss_nan_ranks": 0, "loss_rank_avg": 0.17710517346858978, "step": 235, "valid_targets_mean": 10057.5, "valid_targets_min": 755 }, { "epoch": 0.728744939271255, "grad_norm": 0.3037308974545316, "learning_rate": 3.9755418774181146e-05, "loss": 0.4724, "loss_nan_ranks": 0, "loss_rank_avg": 0.1543278694152832, "step": 240, "valid_targets_mean": 10055.2, "valid_targets_min": 1140 }, { "epoch": 0.7439271255060729, "grad_norm": 0.2814092333238354, "learning_rate": 3.9721330207110835e-05, "loss": 0.4683, "loss_nan_ranks": 0, "loss_rank_avg": 0.13317811489105225, "step": 245, "valid_targets_mean": 8725.9, "valid_targets_min": 1518 }, { "epoch": 0.7591093117408907, "grad_norm": 0.2937104349412505, "learning_rate": 3.9685035063471675e-05, "loss": 0.4747, "loss_nan_ranks": 0, "loss_rank_avg": 0.17623022198677063, "step": 250, "valid_targets_mean": 11871.8, "valid_targets_min": 1440 }, { "epoch": 0.7742914979757085, "grad_norm": 0.30478426754913396, "learning_rate": 3.964653740424804e-05, "loss": 0.4715, "loss_nan_ranks": 0, "loss_rank_avg": 0.16203227639198303, "step": 255, "valid_targets_mean": 9864.8, "valid_targets_min": 1364 }, { "epoch": 0.7894736842105263, "grad_norm": 0.31658164073789935, "learning_rate": 3.960584153685895e-05, "loss": 0.4636, "loss_nan_ranks": 0, "loss_rank_avg": 0.1387915015220642, "step": 260, "valid_targets_mean": 10663.3, "valid_targets_min": 1778 }, { "epoch": 0.8046558704453441, "grad_norm": 0.38421252432540776, "learning_rate": 3.9562952014676116e-05, "loss": 0.4716, "loss_nan_ranks": 0, "loss_rank_avg": 0.13185453414916992, "step": 265, "valid_targets_mean": 10560.4, "valid_targets_min": 3376 }, { "epoch": 0.819838056680162, "grad_norm": 0.31563470556487117, "learning_rate": 3.9517873636514525e-05, "loss": 0.464, "loss_nan_ranks": 0, "loss_rank_avg": 0.1441793292760849, "step": 270, "valid_targets_mean": 10806.7, "valid_targets_min": 2834 }, { "epoch": 0.8350202429149798, "grad_norm": 0.24166282014582607, "learning_rate": 3.947061144609546e-05, "loss": 0.4638, "loss_nan_ranks": 0, "loss_rank_avg": 0.1455232948064804, "step": 275, "valid_targets_mean": 9412.5, "valid_targets_min": 1636 }, { "epoch": 0.8502024291497976, "grad_norm": 0.28019156605196904, "learning_rate": 3.942117073148221e-05, "loss": 0.4735, "loss_nan_ranks": 0, "loss_rank_avg": 0.17822107672691345, "step": 280, "valid_targets_mean": 11960.5, "valid_targets_min": 3625 }, { "epoch": 0.8653846153846154, "grad_norm": 0.29544332084190916, "learning_rate": 3.9369557024488345e-05, "loss": 0.466, "loss_nan_ranks": 0, "loss_rank_avg": 0.13360615074634552, "step": 285, "valid_targets_mean": 8446.1, "valid_targets_min": 2648 }, { "epoch": 0.8805668016194332, "grad_norm": 0.2521821962950855, "learning_rate": 3.931577610005883e-05, "loss": 0.4677, "loss_nan_ranks": 0, "loss_rank_avg": 0.1583632528781891, "step": 290, "valid_targets_mean": 10544.0, "valid_targets_min": 2552 }, { "epoch": 0.895748987854251, "grad_norm": 0.2649498200492295, "learning_rate": 3.925983397562385e-05, "loss": 0.465, "loss_nan_ranks": 0, "loss_rank_avg": 0.15108221769332886, "step": 295, "valid_targets_mean": 9534.8, "valid_targets_min": 322 }, { "epoch": 0.9109311740890689, "grad_norm": 0.2705724371949053, "learning_rate": 3.920173691042554e-05, "loss": 0.4652, "loss_nan_ranks": 0, "loss_rank_avg": 0.1575058102607727, "step": 300, "valid_targets_mean": 9826.8, "valid_targets_min": 2599 }, { "epoch": 0.9261133603238867, "grad_norm": 0.25599146225296465, "learning_rate": 3.914149140481766e-05, "loss": 0.4665, "loss_nan_ranks": 0, "loss_rank_avg": 0.17281994223594666, "step": 305, "valid_targets_mean": 11182.3, "valid_targets_min": 1557 }, { "epoch": 0.9412955465587044, "grad_norm": 0.2622184905952597, "learning_rate": 3.9079104199538256e-05, "loss": 0.4737, "loss_nan_ranks": 0, "loss_rank_avg": 0.1571318507194519, "step": 310, "valid_targets_mean": 9966.4, "valid_targets_min": 2376 }, { "epoch": 0.9564777327935222, "grad_norm": 0.24759698838011684, "learning_rate": 3.901458227495549e-05, "loss": 0.4538, "loss_nan_ranks": 0, "loss_rank_avg": 0.15467405319213867, "step": 315, "valid_targets_mean": 10853.2, "valid_targets_min": 4115 }, { "epoch": 0.97165991902834, "grad_norm": 0.23434699678802617, "learning_rate": 3.8947932850286585e-05, "loss": 0.4666, "loss_nan_ranks": 0, "loss_rank_avg": 0.15081250667572021, "step": 320, "valid_targets_mean": 10789.0, "valid_targets_min": 1678 }, { "epoch": 0.9868421052631579, "grad_norm": 0.27532576364760486, "learning_rate": 3.887916338279014e-05, "loss": 0.4708, "loss_nan_ranks": 0, "loss_rank_avg": 0.18342334032058716, "step": 325, "valid_targets_mean": 10936.7, "valid_targets_min": 1649 }, { "epoch": 1.0, "grad_norm": 0.35899681080322277, "learning_rate": 3.8808281566931675e-05, "loss": 0.4468, "loss_nan_ranks": 0, "loss_rank_avg": 0.40738117694854736, "step": 330, "valid_targets_mean": 10492.2, "valid_targets_min": 2847 }, { "epoch": 1.0151821862348178, "grad_norm": 0.28458334885625075, "learning_rate": 3.873529533352277e-05, "loss": 0.4469, "loss_nan_ranks": 0, "loss_rank_avg": 0.13354119658470154, "step": 335, "valid_targets_mean": 9683.1, "valid_targets_min": 2160 }, { "epoch": 1.0303643724696356, "grad_norm": 0.354345323388759, "learning_rate": 3.8660212848833705e-05, "loss": 0.4414, "loss_nan_ranks": 0, "loss_rank_avg": 0.14219801127910614, "step": 340, "valid_targets_mean": 9913.6, "valid_targets_min": 2191 }, { "epoch": 1.0455465587044535, "grad_norm": 0.3350719189242995, "learning_rate": 3.858304251367972e-05, "loss": 0.4407, "loss_nan_ranks": 0, "loss_rank_avg": 0.15460778772830963, "step": 345, "valid_targets_mean": 10729.1, "valid_targets_min": 1941 }, { "epoch": 1.0607287449392713, "grad_norm": 0.2729944048209113, "learning_rate": 3.850379296248107e-05, "loss": 0.4479, "loss_nan_ranks": 0, "loss_rank_avg": 0.1340738832950592, "step": 350, "valid_targets_mean": 9968.5, "valid_targets_min": 2422 }, { "epoch": 1.075910931174089, "grad_norm": 0.22978531889990117, "learning_rate": 3.8422473062297e-05, "loss": 0.4402, "loss_nan_ranks": 0, "loss_rank_avg": 0.17449653148651123, "step": 355, "valid_targets_mean": 12469.6, "valid_targets_min": 755 }, { "epoch": 1.091093117408907, "grad_norm": 0.23887951948467376, "learning_rate": 3.8339091911833545e-05, "loss": 0.4404, "loss_nan_ranks": 0, "loss_rank_avg": 0.16491574048995972, "step": 360, "valid_targets_mean": 11062.2, "valid_targets_min": 3580 }, { "epoch": 1.1062753036437247, "grad_norm": 0.26815554514697304, "learning_rate": 3.825365884042553e-05, "loss": 0.4461, "loss_nan_ranks": 0, "loss_rank_avg": 0.12322846055030823, "step": 365, "valid_targets_mean": 8607.6, "valid_targets_min": 2790 }, { "epoch": 1.1214574898785425, "grad_norm": 0.25346781569245475, "learning_rate": 3.8166183406992745e-05, "loss": 0.4519, "loss_nan_ranks": 0, "loss_rank_avg": 0.15480399131774902, "step": 370, "valid_targets_mean": 10520.0, "valid_targets_min": 1761 }, { "epoch": 1.1366396761133604, "grad_norm": 0.26272434617647855, "learning_rate": 3.807667539897041e-05, "loss": 0.4349, "loss_nan_ranks": 0, "loss_rank_avg": 0.1547757238149643, "step": 375, "valid_targets_mean": 11900.1, "valid_targets_min": 2634 }, { "epoch": 1.1518218623481782, "grad_norm": 0.29964984986988785, "learning_rate": 3.798514483121408e-05, "loss": 0.4396, "loss_nan_ranks": 0, "loss_rank_avg": 0.13471867144107819, "step": 380, "valid_targets_mean": 9117.8, "valid_targets_min": 1867 }, { "epoch": 1.167004048582996, "grad_norm": 0.2533460035751316, "learning_rate": 3.789160194487908e-05, "loss": 0.4383, "loss_nan_ranks": 0, "loss_rank_avg": 0.1802617311477661, "step": 385, "valid_targets_mean": 12489.5, "valid_targets_min": 2196 }, { "epoch": 1.1821862348178138, "grad_norm": 0.28288452724215873, "learning_rate": 3.7796057206274686e-05, "loss": 0.4438, "loss_nan_ranks": 0, "loss_rank_avg": 0.1617857664823532, "step": 390, "valid_targets_mean": 11982.3, "valid_targets_min": 2783 }, { "epoch": 1.1973684210526316, "grad_norm": 0.2526195634079488, "learning_rate": 3.769852130569304e-05, "loss": 0.4429, "loss_nan_ranks": 0, "loss_rank_avg": 0.13690868020057678, "step": 395, "valid_targets_mean": 9777.0, "valid_targets_min": 2093 }, { "epoch": 1.2125506072874495, "grad_norm": 0.22129078972673513, "learning_rate": 3.7599005156213066e-05, "loss": 0.4366, "loss_nan_ranks": 0, "loss_rank_avg": 0.14769168198108673, "step": 400, "valid_targets_mean": 11107.2, "valid_targets_min": 3098 }, { "epoch": 1.2277327935222673, "grad_norm": 0.25167748549310603, "learning_rate": 3.74975198924794e-05, "loss": 0.4389, "loss_nan_ranks": 0, "loss_rank_avg": 0.13846036791801453, "step": 405, "valid_targets_mean": 9990.3, "valid_targets_min": 1426 }, { "epoch": 1.242914979757085, "grad_norm": 0.242028968841572, "learning_rate": 3.739407686945658e-05, "loss": 0.4401, "loss_nan_ranks": 0, "loss_rank_avg": 0.12689189612865448, "step": 410, "valid_targets_mean": 9368.6, "valid_targets_min": 2146 }, { "epoch": 1.258097165991903, "grad_norm": 0.2475327665205262, "learning_rate": 3.728868766115854e-05, "loss": 0.4437, "loss_nan_ranks": 0, "loss_rank_avg": 0.15651461482048035, "step": 415, "valid_targets_mean": 9742.8, "valid_targets_min": 1110 }, { "epoch": 1.2732793522267207, "grad_norm": 0.24920519930764157, "learning_rate": 3.718136405935365e-05, "loss": 0.4432, "loss_nan_ranks": 0, "loss_rank_avg": 0.13602977991104126, "step": 420, "valid_targets_mean": 10792.3, "valid_targets_min": 3021 }, { "epoch": 1.2884615384615383, "grad_norm": 0.2722218179265993, "learning_rate": 3.707211807224534e-05, "loss": 0.4336, "loss_nan_ranks": 0, "loss_rank_avg": 0.16734597086906433, "step": 425, "valid_targets_mean": 11353.7, "valid_targets_min": 1823 }, { "epoch": 1.3036437246963564, "grad_norm": 0.24861353772242864, "learning_rate": 3.696096192312852e-05, "loss": 0.4311, "loss_nan_ranks": 0, "loss_rank_avg": 0.14648747444152832, "step": 430, "valid_targets_mean": 10162.9, "valid_targets_min": 1861 }, { "epoch": 1.318825910931174, "grad_norm": 0.24353293989235789, "learning_rate": 3.684790804902199e-05, "loss": 0.4349, "loss_nan_ranks": 0, "loss_rank_avg": 0.13794153928756714, "step": 435, "valid_targets_mean": 9891.2, "valid_targets_min": 1204 }, { "epoch": 1.334008097165992, "grad_norm": 0.2538455688242792, "learning_rate": 3.673296909927682e-05, "loss": 0.4463, "loss_nan_ranks": 0, "loss_rank_avg": 0.13822206854820251, "step": 440, "valid_targets_mean": 9649.3, "valid_targets_min": 1728 }, { "epoch": 1.3491902834008096, "grad_norm": 0.2195458402548249, "learning_rate": 3.661615793416109e-05, "loss": 0.4329, "loss_nan_ranks": 0, "loss_rank_avg": 0.1351986676454544, "step": 445, "valid_targets_mean": 9482.1, "valid_targets_min": 1607 }, { "epoch": 1.3643724696356276, "grad_norm": 0.262149947406993, "learning_rate": 3.649748762342098e-05, "loss": 0.4393, "loss_nan_ranks": 0, "loss_rank_avg": 0.15256932377815247, "step": 450, "valid_targets_mean": 10223.6, "valid_targets_min": 1663 }, { "epoch": 1.3795546558704452, "grad_norm": 0.24170742573441606, "learning_rate": 3.637697144481839e-05, "loss": 0.4348, "loss_nan_ranks": 0, "loss_rank_avg": 0.14698657393455505, "step": 455, "valid_targets_mean": 10525.9, "valid_targets_min": 2458 }, { "epoch": 1.3947368421052633, "grad_norm": 0.2831147102052386, "learning_rate": 3.625462288264536e-05, "loss": 0.4401, "loss_nan_ranks": 0, "loss_rank_avg": 0.1650058627128601, "step": 460, "valid_targets_mean": 11955.9, "valid_targets_min": 2357 }, { "epoch": 1.4099190283400809, "grad_norm": 0.25507823085926157, "learning_rate": 3.613045562621533e-05, "loss": 0.4426, "loss_nan_ranks": 0, "loss_rank_avg": 0.16729950904846191, "step": 465, "valid_targets_mean": 11234.8, "valid_targets_min": 1488 }, { "epoch": 1.425101214574899, "grad_norm": 0.21517023605472518, "learning_rate": 3.600448356833146e-05, "loss": 0.4435, "loss_nan_ranks": 0, "loss_rank_avg": 0.14533230662345886, "step": 470, "valid_targets_mean": 10856.3, "valid_targets_min": 1958 }, { "epoch": 1.4402834008097165, "grad_norm": 0.27274183444021904, "learning_rate": 3.587672080373219e-05, "loss": 0.4351, "loss_nan_ranks": 0, "loss_rank_avg": 0.15404292941093445, "step": 475, "valid_targets_mean": 11907.4, "valid_targets_min": 2037 }, { "epoch": 1.4554655870445345, "grad_norm": 0.27757044107139334, "learning_rate": 3.574718162751426e-05, "loss": 0.4362, "loss_nan_ranks": 0, "loss_rank_avg": 0.1617705523967743, "step": 480, "valid_targets_mean": 10082.9, "valid_targets_min": 1439 }, { "epoch": 1.4706477732793521, "grad_norm": 0.22467367774993238, "learning_rate": 3.561588053353319e-05, "loss": 0.4448, "loss_nan_ranks": 0, "loss_rank_avg": 0.13222670555114746, "step": 485, "valid_targets_mean": 9991.5, "valid_targets_min": 1576 }, { "epoch": 1.48582995951417, "grad_norm": 0.23355252705926008, "learning_rate": 3.5482832212781655e-05, "loss": 0.4333, "loss_nan_ranks": 0, "loss_rank_avg": 0.18576061725616455, "step": 490, "valid_targets_mean": 12468.6, "valid_targets_min": 1985 }, { "epoch": 1.5010121457489878, "grad_norm": 0.2181912791479774, "learning_rate": 3.53480515517457e-05, "loss": 0.4354, "loss_nan_ranks": 0, "loss_rank_avg": 0.1390918791294098, "step": 495, "valid_targets_mean": 9409.0, "valid_targets_min": 1772 }, { "epoch": 1.5161943319838058, "grad_norm": 0.2604310615555627, "learning_rate": 3.5211553630739166e-05, "loss": 0.4423, "loss_nan_ranks": 0, "loss_rank_avg": 0.12898939847946167, "step": 500, "valid_targets_mean": 9808.9, "valid_targets_min": 2193 }, { "epoch": 1.5313765182186234, "grad_norm": 0.26061930166812924, "learning_rate": 3.5073353722216334e-05, "loss": 0.4407, "loss_nan_ranks": 0, "loss_rank_avg": 0.15980827808380127, "step": 505, "valid_targets_mean": 11388.4, "valid_targets_min": 1640 }, { "epoch": 1.5465587044534415, "grad_norm": 0.25366664801826005, "learning_rate": 3.4933467289063156e-05, "loss": 0.4259, "loss_nan_ranks": 0, "loss_rank_avg": 0.13890908658504486, "step": 510, "valid_targets_mean": 9539.9, "valid_targets_min": 2659 }, { "epoch": 1.561740890688259, "grad_norm": 0.22395372976390027, "learning_rate": 3.4791909982867175e-05, "loss": 0.4339, "loss_nan_ranks": 0, "loss_rank_avg": 0.1427781581878662, "step": 515, "valid_targets_mean": 10788.7, "valid_targets_min": 2272 }, { "epoch": 1.5769230769230769, "grad_norm": 0.2509901501713803, "learning_rate": 3.464869764216622e-05, "loss": 0.4362, "loss_nan_ranks": 0, "loss_rank_avg": 0.1664656698703766, "step": 520, "valid_targets_mean": 10282.4, "valid_targets_min": 1719 }, { "epoch": 1.5921052631578947, "grad_norm": 0.23050758734822147, "learning_rate": 3.450384629067635e-05, "loss": 0.4386, "loss_nan_ranks": 0, "loss_rank_avg": 0.14430655539035797, "step": 525, "valid_targets_mean": 11192.8, "valid_targets_min": 1942 }, { "epoch": 1.6072874493927125, "grad_norm": 0.24432443613636357, "learning_rate": 3.435737213549896e-05, "loss": 0.425, "loss_nan_ranks": 0, "loss_rank_avg": 0.15063516795635223, "step": 530, "valid_targets_mean": 11381.7, "valid_targets_min": 1608 }, { "epoch": 1.6224696356275303, "grad_norm": 0.22577785062585196, "learning_rate": 3.420929156530738e-05, "loss": 0.4337, "loss_nan_ranks": 0, "loss_rank_avg": 0.1626434326171875, "step": 535, "valid_targets_mean": 11110.8, "valid_targets_min": 2254 }, { "epoch": 1.6376518218623481, "grad_norm": 0.21783409731034523, "learning_rate": 3.405962114851324e-05, "loss": 0.4372, "loss_nan_ranks": 0, "loss_rank_avg": 0.1312275528907776, "step": 540, "valid_targets_mean": 9964.3, "valid_targets_min": 891 }, { "epoch": 1.652834008097166, "grad_norm": 0.22363734173869207, "learning_rate": 3.390837763141261e-05, "loss": 0.4295, "loss_nan_ranks": 0, "loss_rank_avg": 0.14635321497917175, "step": 545, "valid_targets_mean": 10282.8, "valid_targets_min": 442 }, { "epoch": 1.6680161943319838, "grad_norm": 0.28509501532395254, "learning_rate": 3.3755577936312344e-05, "loss": 0.4235, "loss_nan_ranks": 0, "loss_rank_avg": 0.11927291005849838, "step": 550, "valid_targets_mean": 8001.4, "valid_targets_min": 1991 }, { "epoch": 1.6831983805668016, "grad_norm": 0.273518896683822, "learning_rate": 3.360123915963662e-05, "loss": 0.4325, "loss_nan_ranks": 0, "loss_rank_avg": 0.14016571640968323, "step": 555, "valid_targets_mean": 10077.3, "valid_targets_min": 2843 }, { "epoch": 1.6983805668016194, "grad_norm": 0.21366230789574794, "learning_rate": 3.3445378570014125e-05, "loss": 0.4364, "loss_nan_ranks": 0, "loss_rank_avg": 0.11847387999296188, "step": 560, "valid_targets_mean": 8844.1, "valid_targets_min": 1250 }, { "epoch": 1.7135627530364372, "grad_norm": 0.22680113952919098, "learning_rate": 3.328801360634585e-05, "loss": 0.4396, "loss_nan_ranks": 0, "loss_rank_avg": 0.1664324402809143, "step": 565, "valid_targets_mean": 12107.6, "valid_targets_min": 2496 }, { "epoch": 1.728744939271255, "grad_norm": 0.22907607154865234, "learning_rate": 3.312916187585392e-05, "loss": 0.428, "loss_nan_ranks": 0, "loss_rank_avg": 0.1518704891204834, "step": 570, "valid_targets_mean": 11503.6, "valid_targets_min": 2129 }, { "epoch": 1.7439271255060729, "grad_norm": 0.22554888976194104, "learning_rate": 3.296884115211157e-05, "loss": 0.4224, "loss_nan_ranks": 0, "loss_rank_avg": 0.1115855723619461, "step": 575, "valid_targets_mean": 9672.9, "valid_targets_min": 2263 }, { "epoch": 1.7591093117408907, "grad_norm": 0.21649372224245605, "learning_rate": 3.280706937305445e-05, "loss": 0.437, "loss_nan_ranks": 0, "loss_rank_avg": 0.16382913291454315, "step": 580, "valid_targets_mean": 11463.7, "valid_targets_min": 2365 }, { "epoch": 1.7742914979757085, "grad_norm": 0.2536358792335515, "learning_rate": 3.2643864638973645e-05, "loss": 0.4336, "loss_nan_ranks": 0, "loss_rank_avg": 0.15374597907066345, "step": 585, "valid_targets_mean": 10600.5, "valid_targets_min": 2442 }, { "epoch": 1.7894736842105263, "grad_norm": 0.2447232506114259, "learning_rate": 3.2479245210490434e-05, "loss": 0.4363, "loss_nan_ranks": 0, "loss_rank_avg": 0.14378559589385986, "step": 590, "valid_targets_mean": 9931.5, "valid_targets_min": 2331 }, { "epoch": 1.8046558704453441, "grad_norm": 0.22625023350242304, "learning_rate": 3.2313229506513167e-05, "loss": 0.4251, "loss_nan_ranks": 0, "loss_rank_avg": 0.14341717958450317, "step": 595, "valid_targets_mean": 11203.3, "valid_targets_min": 3362 }, { "epoch": 1.819838056680162, "grad_norm": 0.22747844454240196, "learning_rate": 3.2145836102176424e-05, "loss": 0.4349, "loss_nan_ranks": 0, "loss_rank_avg": 0.12817899882793427, "step": 600, "valid_targets_mean": 9786.9, "valid_targets_min": 2551 }, { "epoch": 1.8350202429149798, "grad_norm": 0.21461340841905202, "learning_rate": 3.197708372676265e-05, "loss": 0.4328, "loss_nan_ranks": 0, "loss_rank_avg": 0.1507049798965454, "step": 605, "valid_targets_mean": 11408.8, "valid_targets_min": 3494 }, { "epoch": 1.8502024291497976, "grad_norm": 0.23227807829347027, "learning_rate": 3.1806991261606604e-05, "loss": 0.4275, "loss_nan_ranks": 0, "loss_rank_avg": 0.1595226526260376, "step": 610, "valid_targets_mean": 10987.9, "valid_targets_min": 477 }, { "epoch": 1.8653846153846154, "grad_norm": 0.22934685685191428, "learning_rate": 3.163557773798276e-05, "loss": 0.441, "loss_nan_ranks": 0, "loss_rank_avg": 0.18910819292068481, "step": 615, "valid_targets_mean": 12061.2, "valid_targets_min": 2666 }, { "epoch": 1.8805668016194332, "grad_norm": 0.24021212052305302, "learning_rate": 3.146286233497593e-05, "loss": 0.4309, "loss_nan_ranks": 0, "loss_rank_avg": 0.16946552693843842, "step": 620, "valid_targets_mean": 11908.8, "valid_targets_min": 1784 }, { "epoch": 1.895748987854251, "grad_norm": 0.23243414958739897, "learning_rate": 3.128886437733539e-05, "loss": 0.4274, "loss_nan_ranks": 0, "loss_rank_avg": 0.1634024977684021, "step": 625, "valid_targets_mean": 11875.7, "valid_targets_min": 2266 }, { "epoch": 1.9109311740890689, "grad_norm": 0.2321897218435078, "learning_rate": 3.111360333331263e-05, "loss": 0.4358, "loss_nan_ranks": 0, "loss_rank_avg": 0.12072163820266724, "step": 630, "valid_targets_mean": 9380.6, "valid_targets_min": 2435 }, { "epoch": 1.9261133603238867, "grad_norm": 0.2343398914268616, "learning_rate": 3.093709881248312e-05, "loss": 0.4244, "loss_nan_ranks": 0, "loss_rank_avg": 0.14065805077552795, "step": 635, "valid_targets_mean": 10873.3, "valid_targets_min": 2493 }, { "epoch": 1.9412955465587043, "grad_norm": 0.21403036899310532, "learning_rate": 3.075937056355225e-05, "loss": 0.4398, "loss_nan_ranks": 0, "loss_rank_avg": 0.16944406926631927, "step": 640, "valid_targets_mean": 12638.7, "valid_targets_min": 766 }, { "epoch": 1.9564777327935223, "grad_norm": 0.2475176696543535, "learning_rate": 3.0580438472145665e-05, "loss": 0.4231, "loss_nan_ranks": 0, "loss_rank_avg": 0.1551147699356079, "step": 645, "valid_targets_mean": 10001.7, "valid_targets_min": 1887 }, { "epoch": 1.97165991902834, "grad_norm": 0.22067069810740458, "learning_rate": 3.0400322558584308e-05, "loss": 0.4306, "loss_nan_ranks": 0, "loss_rank_avg": 0.14765214920043945, "step": 650, "valid_targets_mean": 10529.8, "valid_targets_min": 1695 }, { "epoch": 1.986842105263158, "grad_norm": 0.2200440889393565, "learning_rate": 3.0219042975644415e-05, "loss": 0.4224, "loss_nan_ranks": 0, "loss_rank_avg": 0.14973564445972443, "step": 655, "valid_targets_mean": 9583.8, "valid_targets_min": 3079 }, { "epoch": 2.0, "grad_norm": 0.2978629103491527, "learning_rate": 3.0036620006302624e-05, "loss": 0.4244, "loss_nan_ranks": 0, "loss_rank_avg": 0.4037216901779175, "step": 660, "valid_targets_mean": 9887.7, "valid_targets_min": 2034 }, { "epoch": 2.0151821862348176, "grad_norm": 0.24355665535718823, "learning_rate": 2.9853074061466602e-05, "loss": 0.3965, "loss_nan_ranks": 0, "loss_rank_avg": 0.11437727510929108, "step": 665, "valid_targets_mean": 9561.8, "valid_targets_min": 2799 }, { "epoch": 2.0303643724696356, "grad_norm": 0.24869442243804363, "learning_rate": 2.9668425677691278e-05, "loss": 0.4074, "loss_nan_ranks": 0, "loss_rank_avg": 0.15065011382102966, "step": 670, "valid_targets_mean": 10834.3, "valid_targets_min": 1423 }, { "epoch": 2.0455465587044532, "grad_norm": 0.2297659593459137, "learning_rate": 2.948269551488108e-05, "loss": 0.4011, "loss_nan_ranks": 0, "loss_rank_avg": 0.14018970727920532, "step": 675, "valid_targets_mean": 11208.8, "valid_targets_min": 2197 }, { "epoch": 2.0607287449392713, "grad_norm": 0.21760065911439302, "learning_rate": 2.929590435397832e-05, "loss": 0.4157, "loss_nan_ranks": 0, "loss_rank_avg": 0.15401902794837952, "step": 680, "valid_targets_mean": 12252.5, "valid_targets_min": 2775 }, { "epoch": 2.075910931174089, "grad_norm": 0.21929998454079455, "learning_rate": 2.9108073094638066e-05, "loss": 0.4061, "loss_nan_ranks": 0, "loss_rank_avg": 0.1402328610420227, "step": 685, "valid_targets_mean": 10520.2, "valid_targets_min": 810 }, { "epoch": 2.091093117408907, "grad_norm": 0.25762854084256154, "learning_rate": 2.8919222752889727e-05, "loss": 0.4042, "loss_nan_ranks": 0, "loss_rank_avg": 0.12665215134620667, "step": 690, "valid_targets_mean": 8784.8, "valid_targets_min": 2482 }, { "epoch": 2.1062753036437245, "grad_norm": 0.20883110039175287, "learning_rate": 2.8729374458785647e-05, "loss": 0.4033, "loss_nan_ranks": 0, "loss_rank_avg": 0.11460208892822266, "step": 695, "valid_targets_mean": 9300.2, "valid_targets_min": 1087 }, { "epoch": 2.1214574898785425, "grad_norm": 0.2280468030852318, "learning_rate": 2.8538549454036838e-05, "loss": 0.3992, "loss_nan_ranks": 0, "loss_rank_avg": 0.12859494984149933, "step": 700, "valid_targets_mean": 9836.1, "valid_targets_min": 3216 }, { "epoch": 2.13663967611336, "grad_norm": 0.2305434920238047, "learning_rate": 2.834676908963636e-05, "loss": 0.4062, "loss_nan_ranks": 0, "loss_rank_avg": 0.12350933253765106, "step": 705, "valid_targets_mean": 9741.5, "valid_targets_min": 2195 }, { "epoch": 2.151821862348178, "grad_norm": 0.2377162666149193, "learning_rate": 2.815405482347037e-05, "loss": 0.4015, "loss_nan_ranks": 0, "loss_rank_avg": 0.1677446961402893, "step": 710, "valid_targets_mean": 12769.0, "valid_targets_min": 2859 }, { "epoch": 2.167004048582996, "grad_norm": 0.21204075312597667, "learning_rate": 2.796042821791725e-05, "loss": 0.403, "loss_nan_ranks": 0, "loss_rank_avg": 0.1556544154882431, "step": 715, "valid_targets_mean": 12234.6, "valid_targets_min": 2507 }, { "epoch": 2.182186234817814, "grad_norm": 0.2039415062906729, "learning_rate": 2.776591093743505e-05, "loss": 0.4106, "loss_nan_ranks": 0, "loss_rank_avg": 0.11131976544857025, "step": 720, "valid_targets_mean": 9257.6, "valid_targets_min": 1640 }, { "epoch": 2.1973684210526314, "grad_norm": 0.22685508299446067, "learning_rate": 2.7570524746137485e-05, "loss": 0.4014, "loss_nan_ranks": 0, "loss_rank_avg": 0.11084121465682983, "step": 725, "valid_targets_mean": 8457.0, "valid_targets_min": 1852 }, { "epoch": 2.2125506072874495, "grad_norm": 0.20939970979319947, "learning_rate": 2.7374291505358818e-05, "loss": 0.4035, "loss_nan_ranks": 0, "loss_rank_avg": 0.13435867428779602, "step": 730, "valid_targets_mean": 10587.6, "valid_targets_min": 3988 }, { "epoch": 2.227732793522267, "grad_norm": 0.21631166810829094, "learning_rate": 2.7177233171207817e-05, "loss": 0.4058, "loss_nan_ranks": 0, "loss_rank_avg": 0.145869642496109, "step": 735, "valid_targets_mean": 12106.4, "valid_targets_min": 2588 }, { "epoch": 2.242914979757085, "grad_norm": 0.22219638160833832, "learning_rate": 2.6979371792111147e-05, "loss": 0.41, "loss_nan_ranks": 0, "loss_rank_avg": 0.12326790392398834, "step": 740, "valid_targets_mean": 9268.1, "valid_targets_min": 2090 }, { "epoch": 2.2580971659919027, "grad_norm": 0.21644687932298712, "learning_rate": 2.678072950634641e-05, "loss": 0.409, "loss_nan_ranks": 0, "loss_rank_avg": 0.14557188749313354, "step": 745, "valid_targets_mean": 11407.6, "valid_targets_min": 1829 }, { "epoch": 2.2732793522267207, "grad_norm": 0.2086747965666265, "learning_rate": 2.6581328539565184e-05, "loss": 0.4125, "loss_nan_ranks": 0, "loss_rank_avg": 0.1296234130859375, "step": 750, "valid_targets_mean": 11610.8, "valid_targets_min": 1192 }, { "epoch": 2.2884615384615383, "grad_norm": 0.21997306254416282, "learning_rate": 2.638119120230616e-05, "loss": 0.4007, "loss_nan_ranks": 0, "loss_rank_avg": 0.11541731655597687, "step": 755, "valid_targets_mean": 9031.7, "valid_targets_min": 1477 }, { "epoch": 2.3036437246963564, "grad_norm": 0.22670698334583528, "learning_rate": 2.618033988749895e-05, "loss": 0.409, "loss_nan_ranks": 0, "loss_rank_avg": 0.1477312594652176, "step": 760, "valid_targets_mean": 11624.3, "valid_targets_min": 1991 }, { "epoch": 2.318825910931174, "grad_norm": 0.226842879488616, "learning_rate": 2.5978797067958542e-05, "loss": 0.402, "loss_nan_ranks": 0, "loss_rank_avg": 0.16331815719604492, "step": 765, "valid_targets_mean": 12411.1, "valid_targets_min": 2223 }, { "epoch": 2.334008097165992, "grad_norm": 0.24365624193190316, "learning_rate": 2.5776585293870877e-05, "loss": 0.4029, "loss_nan_ranks": 0, "loss_rank_avg": 0.1397906094789505, "step": 770, "valid_targets_mean": 10511.0, "valid_targets_min": 3395 }, { "epoch": 2.3491902834008096, "grad_norm": 0.22210597485491806, "learning_rate": 2.557372719026976e-05, "loss": 0.4003, "loss_nan_ranks": 0, "loss_rank_avg": 0.14930933713912964, "step": 775, "valid_targets_mean": 11476.6, "valid_targets_min": 2831 }, { "epoch": 2.3643724696356276, "grad_norm": 0.24020132746247502, "learning_rate": 2.537024545450539e-05, "loss": 0.3997, "loss_nan_ranks": 0, "loss_rank_avg": 0.1250806450843811, "step": 780, "valid_targets_mean": 10802.0, "valid_targets_min": 1656 }, { "epoch": 2.3795546558704452, "grad_norm": 0.21970939450799473, "learning_rate": 2.5166162853704825e-05, "loss": 0.412, "loss_nan_ranks": 0, "loss_rank_avg": 0.1378406286239624, "step": 785, "valid_targets_mean": 9006.6, "valid_targets_min": 1612 }, { "epoch": 2.3947368421052633, "grad_norm": 0.20791735340812462, "learning_rate": 2.496150222222458e-05, "loss": 0.4062, "loss_nan_ranks": 0, "loss_rank_avg": 0.14031660556793213, "step": 790, "valid_targets_mean": 11368.5, "valid_targets_min": 2404 }, { "epoch": 2.409919028340081, "grad_norm": 0.26153369844841917, "learning_rate": 2.475628645909576e-05, "loss": 0.4036, "loss_nan_ranks": 0, "loss_rank_avg": 0.13041003048419952, "step": 795, "valid_targets_mean": 10017.9, "valid_targets_min": 2411 }, { "epoch": 2.425101214574899, "grad_norm": 0.22027316195260252, "learning_rate": 2.4550538525461963e-05, "loss": 0.4058, "loss_nan_ranks": 0, "loss_rank_avg": 0.13168390095233917, "step": 800, "valid_targets_mean": 10116.5, "valid_targets_min": 2301 }, { "epoch": 2.4402834008097165, "grad_norm": 0.20735932325707584, "learning_rate": 2.434428144201016e-05, "loss": 0.4031, "loss_nan_ranks": 0, "loss_rank_avg": 0.141376793384552, "step": 805, "valid_targets_mean": 11278.5, "valid_targets_min": 2941 }, { "epoch": 2.4554655870445345, "grad_norm": 0.20965832979327945, "learning_rate": 2.4137538286394976e-05, "loss": 0.397, "loss_nan_ranks": 0, "loss_rank_avg": 0.1296098828315735, "step": 810, "valid_targets_mean": 9996.4, "valid_targets_min": 2669 }, { "epoch": 2.470647773279352, "grad_norm": 0.20323563119959276, "learning_rate": 2.3930332190656604e-05, "loss": 0.4064, "loss_nan_ranks": 0, "loss_rank_avg": 0.12276121973991394, "step": 815, "valid_targets_mean": 10276.2, "valid_targets_min": 3924 }, { "epoch": 2.48582995951417, "grad_norm": 0.2222712599782443, "learning_rate": 2.3722686338632602e-05, "loss": 0.4048, "loss_nan_ranks": 0, "loss_rank_avg": 0.15194165706634521, "step": 820, "valid_targets_mean": 11763.2, "valid_targets_min": 1754 }, { "epoch": 2.501012145748988, "grad_norm": 0.21676101726038569, "learning_rate": 2.3514623963363886e-05, "loss": 0.399, "loss_nan_ranks": 0, "loss_rank_avg": 0.1177615374326706, "step": 825, "valid_targets_mean": 9788.6, "valid_targets_min": 1647 }, { "epoch": 2.516194331983806, "grad_norm": 0.24747798535074458, "learning_rate": 2.330616834449525e-05, "loss": 0.3976, "loss_nan_ranks": 0, "loss_rank_avg": 0.10470348596572876, "step": 830, "valid_targets_mean": 8228.6, "valid_targets_min": 1640 }, { "epoch": 2.5313765182186234, "grad_norm": 0.23642757448228505, "learning_rate": 2.309734280567065e-05, "loss": 0.4008, "loss_nan_ranks": 0, "loss_rank_avg": 0.1282092034816742, "step": 835, "valid_targets_mean": 9554.1, "valid_targets_min": 1823 }, { "epoch": 2.5465587044534415, "grad_norm": 0.21463605718507556, "learning_rate": 2.28881707119236e-05, "loss": 0.4069, "loss_nan_ranks": 0, "loss_rank_avg": 0.14472943544387817, "step": 840, "valid_targets_mean": 11038.6, "valid_targets_min": 1316 }, { "epoch": 2.561740890688259, "grad_norm": 0.19830704786987963, "learning_rate": 2.267867546706287e-05, "loss": 0.3993, "loss_nan_ranks": 0, "loss_rank_avg": 0.1337677240371704, "step": 845, "valid_targets_mean": 11115.8, "valid_targets_min": 1658 }, { "epoch": 2.5769230769230766, "grad_norm": 0.22111503225469503, "learning_rate": 2.2468880511053896e-05, "loss": 0.4073, "loss_nan_ranks": 0, "loss_rank_avg": 0.13332180678844452, "step": 850, "valid_targets_mean": 9738.6, "valid_targets_min": 3298 }, { "epoch": 2.5921052631578947, "grad_norm": 0.22257999514713242, "learning_rate": 2.2258809317396163e-05, "loss": 0.4052, "loss_nan_ranks": 0, "loss_rank_avg": 0.1417657732963562, "step": 855, "valid_targets_mean": 10401.3, "valid_targets_min": 406 }, { "epoch": 2.6072874493927127, "grad_norm": 0.21253273139588202, "learning_rate": 2.2048485390496757e-05, "loss": 0.3977, "loss_nan_ranks": 0, "loss_rank_avg": 0.09866519272327423, "step": 860, "valid_targets_mean": 7923.8, "valid_targets_min": 2214 }, { "epoch": 2.6224696356275303, "grad_norm": 0.22870814783465315, "learning_rate": 2.1837932263040553e-05, "loss": 0.4027, "loss_nan_ranks": 0, "loss_rank_avg": 0.1660136878490448, "step": 865, "valid_targets_mean": 11078.2, "valid_targets_min": 2212 }, { "epoch": 2.637651821862348, "grad_norm": 0.29931964038640724, "learning_rate": 2.1627173493357167e-05, "loss": 0.4004, "loss_nan_ranks": 0, "loss_rank_avg": 0.14233574271202087, "step": 870, "valid_targets_mean": 10852.8, "valid_targets_min": 2910 }, { "epoch": 2.652834008097166, "grad_norm": 0.25534440527100677, "learning_rate": 2.1416232662785084e-05, "loss": 0.4037, "loss_nan_ranks": 0, "loss_rank_avg": 0.13631418347358704, "step": 875, "valid_targets_mean": 10067.3, "valid_targets_min": 1426 }, { "epoch": 2.668016194331984, "grad_norm": 0.2371733144530005, "learning_rate": 2.1205133373033173e-05, "loss": 0.404, "loss_nan_ranks": 0, "loss_rank_avg": 0.1404227614402771, "step": 880, "valid_targets_mean": 10728.2, "valid_targets_min": 1271 }, { "epoch": 2.6831983805668016, "grad_norm": 0.2078109950480519, "learning_rate": 2.0993899243539953e-05, "loss": 0.3994, "loss_nan_ranks": 0, "loss_rank_avg": 0.1199427992105484, "step": 885, "valid_targets_mean": 8727.6, "valid_targets_min": 1812 }, { "epoch": 2.698380566801619, "grad_norm": 0.2159301658980046, "learning_rate": 2.0782553908830887e-05, "loss": 0.4064, "loss_nan_ranks": 0, "loss_rank_avg": 0.13723723590373993, "step": 890, "valid_targets_mean": 11083.8, "valid_targets_min": 2927 }, { "epoch": 2.7135627530364372, "grad_norm": 0.19636767747538167, "learning_rate": 2.0571121015873924e-05, "loss": 0.4057, "loss_nan_ranks": 0, "loss_rank_avg": 0.16593371331691742, "step": 895, "valid_targets_mean": 13503.1, "valid_targets_min": 1671 }, { "epoch": 2.7287449392712553, "grad_norm": 0.22327456821224023, "learning_rate": 2.0359624221433728e-05, "loss": 0.4058, "loss_nan_ranks": 0, "loss_rank_avg": 0.127730131149292, "step": 900, "valid_targets_mean": 10031.6, "valid_targets_min": 2126 }, { "epoch": 2.743927125506073, "grad_norm": 0.20041291146103954, "learning_rate": 2.014808718942476e-05, "loss": 0.3999, "loss_nan_ranks": 0, "loss_rank_avg": 0.13702960312366486, "step": 905, "valid_targets_mean": 11859.9, "valid_targets_min": 1344 }, { "epoch": 2.7591093117408905, "grad_norm": 0.1959510117062367, "learning_rate": 1.9936533588263557e-05, "loss": 0.399, "loss_nan_ranks": 0, "loss_rank_avg": 0.13599321246147156, "step": 910, "valid_targets_mean": 11546.5, "valid_targets_min": 721 }, { "epoch": 2.7742914979757085, "grad_norm": 0.2150020133924093, "learning_rate": 1.9724987088220565e-05, "loss": 0.4121, "loss_nan_ranks": 0, "loss_rank_avg": 0.14389005303382874, "step": 915, "valid_targets_mean": 11402.6, "valid_targets_min": 2827 }, { "epoch": 2.7894736842105265, "grad_norm": 0.21215898602744604, "learning_rate": 1.951347135877169e-05, "loss": 0.4062, "loss_nan_ranks": 0, "loss_rank_avg": 0.1301729381084442, "step": 920, "valid_targets_mean": 9943.8, "valid_targets_min": 2395 }, { "epoch": 2.804655870445344, "grad_norm": 0.20967773612549848, "learning_rate": 1.930201006594999e-05, "loss": 0.3988, "loss_nan_ranks": 0, "loss_rank_avg": 0.1343080699443817, "step": 925, "valid_targets_mean": 12364.9, "valid_targets_min": 3036 }, { "epoch": 2.8198380566801617, "grad_norm": 0.21630796564169155, "learning_rate": 1.9090626869697714e-05, "loss": 0.3978, "loss_nan_ranks": 0, "loss_rank_avg": 0.12192901223897934, "step": 930, "valid_targets_mean": 9816.8, "valid_targets_min": 2402 }, { "epoch": 2.83502024291498, "grad_norm": 0.20100577630192085, "learning_rate": 1.8879345421219063e-05, "loss": 0.4065, "loss_nan_ranks": 0, "loss_rank_avg": 0.1359075903892517, "step": 935, "valid_targets_mean": 10343.0, "valid_targets_min": 442 }, { "epoch": 2.850202429149798, "grad_norm": 0.23046010924694144, "learning_rate": 1.8668189360333923e-05, "loss": 0.397, "loss_nan_ranks": 0, "loss_rank_avg": 0.11910334974527359, "step": 940, "valid_targets_mean": 9519.0, "valid_targets_min": 978 }, { "epoch": 2.8653846153846154, "grad_norm": 0.20448111009525943, "learning_rate": 1.845718231283281e-05, "loss": 0.4007, "loss_nan_ranks": 0, "loss_rank_avg": 0.11319918185472488, "step": 945, "valid_targets_mean": 8395.6, "valid_targets_min": 1575 }, { "epoch": 2.880566801619433, "grad_norm": 0.20645165771896762, "learning_rate": 1.8246347887833457e-05, "loss": 0.402, "loss_nan_ranks": 0, "loss_rank_avg": 0.11795958131551743, "step": 950, "valid_targets_mean": 9538.2, "valid_targets_min": 343 }, { "epoch": 2.895748987854251, "grad_norm": 0.21282459320317798, "learning_rate": 1.8035709675139258e-05, "loss": 0.4056, "loss_nan_ranks": 0, "loss_rank_avg": 0.11727368831634521, "step": 955, "valid_targets_mean": 8698.4, "valid_targets_min": 1446 }, { "epoch": 2.910931174089069, "grad_norm": 0.19781964939751115, "learning_rate": 1.7825291242599837e-05, "loss": 0.3959, "loss_nan_ranks": 0, "loss_rank_avg": 0.11419452726840973, "step": 960, "valid_targets_mean": 9466.3, "valid_targets_min": 2153 }, { "epoch": 2.9261133603238867, "grad_norm": 0.21002201019480785, "learning_rate": 1.7615116133474084e-05, "loss": 0.4064, "loss_nan_ranks": 0, "loss_rank_avg": 0.13672292232513428, "step": 965, "valid_targets_mean": 10546.8, "valid_targets_min": 1985 }, { "epoch": 2.9412955465587043, "grad_norm": 0.21275931479304183, "learning_rate": 1.7405207863795966e-05, "loss": 0.3967, "loss_nan_ranks": 0, "loss_rank_avg": 0.13758745789527893, "step": 970, "valid_targets_mean": 9654.3, "valid_targets_min": 1915 }, { "epoch": 2.9564777327935223, "grad_norm": 0.20100360416696722, "learning_rate": 1.719558991974339e-05, "loss": 0.3973, "loss_nan_ranks": 0, "loss_rank_avg": 0.1079438179731369, "step": 975, "valid_targets_mean": 9043.8, "valid_targets_min": 2879 }, { "epoch": 2.97165991902834, "grad_norm": 0.2269975836389704, "learning_rate": 1.698628575501034e-05, "loss": 0.3999, "loss_nan_ranks": 0, "loss_rank_avg": 0.14483851194381714, "step": 980, "valid_targets_mean": 11436.7, "valid_targets_min": 1211 }, { "epoch": 2.986842105263158, "grad_norm": 0.1879476526932179, "learning_rate": 1.6777318788182723e-05, "loss": 0.4035, "loss_nan_ranks": 0, "loss_rank_avg": 0.14617758989334106, "step": 985, "valid_targets_mean": 11645.2, "valid_targets_min": 3089 }, { "epoch": 3.0, "grad_norm": 0.3010199328168687, "learning_rate": 1.6568712400118102e-05, "loss": 0.3969, "loss_nan_ranks": 0, "loss_rank_avg": 0.39078396558761597, "step": 990, "valid_targets_mean": 10663.5, "valid_targets_min": 2457 }, { "epoch": 3.0151821862348176, "grad_norm": 0.21781751875463537, "learning_rate": 1.636048993132969e-05, "loss": 0.3834, "loss_nan_ranks": 0, "loss_rank_avg": 0.13729703426361084, "step": 995, "valid_targets_mean": 10773.5, "valid_targets_min": 1279 }, { "epoch": 3.0303643724696356, "grad_norm": 0.2230417656760043, "learning_rate": 1.615267467937479e-05, "loss": 0.3792, "loss_nan_ranks": 0, "loss_rank_avg": 0.13346058130264282, "step": 1000, "valid_targets_mean": 11306.1, "valid_targets_min": 2234 }, { "epoch": 3.0455465587044532, "grad_norm": 0.21733111151874662, "learning_rate": 1.59452898962481e-05, "loss": 0.3901, "loss_nan_ranks": 0, "loss_rank_avg": 0.13238683342933655, "step": 1005, "valid_targets_mean": 11750.0, "valid_targets_min": 3215 }, { "epoch": 3.0607287449392713, "grad_norm": 0.22625370843565767, "learning_rate": 1.573835878578013e-05, "loss": 0.3762, "loss_nan_ranks": 0, "loss_rank_avg": 0.1422734260559082, "step": 1010, "valid_targets_mean": 11209.9, "valid_targets_min": 2079 }, { "epoch": 3.075910931174089, "grad_norm": 0.19666515336266407, "learning_rate": 1.5531904501040917e-05, "loss": 0.3807, "loss_nan_ranks": 0, "loss_rank_avg": 0.1307479739189148, "step": 1015, "valid_targets_mean": 11612.6, "valid_targets_min": 2971 }, { "epoch": 3.091093117408907, "grad_norm": 0.1928886855734126, "learning_rate": 1.5325950141749522e-05, "loss": 0.3826, "loss_nan_ranks": 0, "loss_rank_avg": 0.13941217958927155, "step": 1020, "valid_targets_mean": 11068.5, "valid_targets_min": 909 }, { "epoch": 3.1062753036437245, "grad_norm": 0.21072607913357874, "learning_rate": 1.5120518751689438e-05, "loss": 0.3788, "loss_nan_ranks": 0, "loss_rank_avg": 0.106941819190979, "step": 1025, "valid_targets_mean": 9145.7, "valid_targets_min": 2303 }, { "epoch": 3.1214574898785425, "grad_norm": 0.20354552500648707, "learning_rate": 1.4915633316130267e-05, "loss": 0.3784, "loss_nan_ranks": 0, "loss_rank_avg": 0.12127313017845154, "step": 1030, "valid_targets_mean": 9670.7, "valid_targets_min": 2686 }, { "epoch": 3.13663967611336, "grad_norm": 0.1914779822177274, "learning_rate": 1.4711316759255963e-05, "loss": 0.3878, "loss_nan_ranks": 0, "loss_rank_avg": 0.11697717756032944, "step": 1035, "valid_targets_mean": 9475.3, "valid_targets_min": 1230 }, { "epoch": 3.151821862348178, "grad_norm": 0.20902997896333064, "learning_rate": 1.450759194159987e-05, "loss": 0.381, "loss_nan_ranks": 0, "loss_rank_avg": 0.12032449245452881, "step": 1040, "valid_targets_mean": 9425.8, "valid_targets_min": 2479 }, { "epoch": 3.167004048582996, "grad_norm": 0.19696063514807588, "learning_rate": 1.4304481657486955e-05, "loss": 0.3778, "loss_nan_ranks": 0, "loss_rank_avg": 0.08829522132873535, "step": 1045, "valid_targets_mean": 8185.7, "valid_targets_min": 2185 }, { "epoch": 3.182186234817814, "grad_norm": 0.20177029184014197, "learning_rate": 1.4102008632483344e-05, "loss": 0.3873, "loss_nan_ranks": 0, "loss_rank_avg": 0.11741188913583755, "step": 1050, "valid_targets_mean": 8940.3, "valid_targets_min": 2097 }, { "epoch": 3.1973684210526314, "grad_norm": 0.2136067023802031, "learning_rate": 1.3900195520853628e-05, "loss": 0.3795, "loss_nan_ranks": 0, "loss_rank_avg": 0.13736692070960999, "step": 1055, "valid_targets_mean": 12065.6, "valid_targets_min": 2194 }, { "epoch": 3.2125506072874495, "grad_norm": 0.2012264827781041, "learning_rate": 1.3699064903026149e-05, "loss": 0.3758, "loss_nan_ranks": 0, "loss_rank_avg": 0.13378620147705078, "step": 1060, "valid_targets_mean": 11753.4, "valid_targets_min": 4457 }, { "epoch": 3.227732793522267, "grad_norm": 0.19533737596737558, "learning_rate": 1.34986392830665e-05, "loss": 0.3834, "loss_nan_ranks": 0, "loss_rank_avg": 0.12444774061441422, "step": 1065, "valid_targets_mean": 10883.3, "valid_targets_min": 1829 }, { "epoch": 3.242914979757085, "grad_norm": 0.1913483548977371, "learning_rate": 1.3298941086159598e-05, "loss": 0.3797, "loss_nan_ranks": 0, "loss_rank_avg": 0.1327345073223114, "step": 1070, "valid_targets_mean": 11969.5, "valid_targets_min": 2300 }, { "epoch": 3.2580971659919027, "grad_norm": 0.20277132333096498, "learning_rate": 1.3099992656100592e-05, "loss": 0.3867, "loss_nan_ranks": 0, "loss_rank_avg": 0.10380689799785614, "step": 1075, "valid_targets_mean": 8426.2, "valid_targets_min": 1565 }, { "epoch": 3.2732793522267207, "grad_norm": 0.19529197941773585, "learning_rate": 1.2901816252794848e-05, "loss": 0.3804, "loss_nan_ranks": 0, "loss_rank_avg": 0.12796002626419067, "step": 1080, "valid_targets_mean": 10604.7, "valid_targets_min": 1972 }, { "epoch": 3.2884615384615383, "grad_norm": 0.1842338834508977, "learning_rate": 1.2704434049767356e-05, "loss": 0.3928, "loss_nan_ranks": 0, "loss_rank_avg": 0.15373364090919495, "step": 1085, "valid_targets_mean": 13930.4, "valid_targets_min": 2820 }, { "epoch": 3.3036437246963564, "grad_norm": 0.1963156313504413, "learning_rate": 1.250786813168176e-05, "loss": 0.3814, "loss_nan_ranks": 0, "loss_rank_avg": 0.12112146615982056, "step": 1090, "valid_targets_mean": 9762.8, "valid_targets_min": 245 }, { "epoch": 3.318825910931174, "grad_norm": 0.20045272092400518, "learning_rate": 1.2312140491869369e-05, "loss": 0.3785, "loss_nan_ranks": 0, "loss_rank_avg": 0.11439470946788788, "step": 1095, "valid_targets_mean": 9937.9, "valid_targets_min": 2642 }, { "epoch": 3.334008097165992, "grad_norm": 0.21007599323553236, "learning_rate": 1.2117273029868362e-05, "loss": 0.3802, "loss_nan_ranks": 0, "loss_rank_avg": 0.10896288603544235, "step": 1100, "valid_targets_mean": 9575.8, "valid_targets_min": 2482 }, { "epoch": 3.3491902834008096, "grad_norm": 0.19927323498968952, "learning_rate": 1.1923287548973508e-05, "loss": 0.3818, "loss_nan_ranks": 0, "loss_rank_avg": 0.12662863731384277, "step": 1105, "valid_targets_mean": 11785.4, "valid_targets_min": 1696 }, { "epoch": 3.3643724696356276, "grad_norm": 0.19519616335018541, "learning_rate": 1.1730205753796631e-05, "loss": 0.3759, "loss_nan_ranks": 0, "loss_rank_avg": 0.12134730815887451, "step": 1110, "valid_targets_mean": 10729.1, "valid_targets_min": 3001 }, { "epoch": 3.3795546558704452, "grad_norm": 0.21028694968176903, "learning_rate": 1.1538049247838128e-05, "loss": 0.3871, "loss_nan_ranks": 0, "loss_rank_avg": 0.13904663920402527, "step": 1115, "valid_targets_mean": 10362.8, "valid_targets_min": 1519 }, { "epoch": 3.3947368421052633, "grad_norm": 0.19912225165901493, "learning_rate": 1.134683953106983e-05, "loss": 0.3802, "loss_nan_ranks": 0, "loss_rank_avg": 0.12339392304420471, "step": 1120, "valid_targets_mean": 9278.3, "valid_targets_min": 2476 }, { "epoch": 3.409919028340081, "grad_norm": 0.19327825454771747, "learning_rate": 1.115659799752938e-05, "loss": 0.3733, "loss_nan_ranks": 0, "loss_rank_avg": 0.1173870712518692, "step": 1125, "valid_targets_mean": 10774.1, "valid_targets_min": 413 }, { "epoch": 3.425101214574899, "grad_norm": 0.1844225077898535, "learning_rate": 1.096734593292649e-05, "loss": 0.3823, "loss_nan_ranks": 0, "loss_rank_avg": 0.1067117229104042, "step": 1130, "valid_targets_mean": 9642.9, "valid_targets_min": 2019 }, { "epoch": 3.4402834008097165, "grad_norm": 0.2066798200776645, "learning_rate": 1.077910451226138e-05, "loss": 0.3789, "loss_nan_ranks": 0, "loss_rank_avg": 0.11906486749649048, "step": 1135, "valid_targets_mean": 10392.2, "valid_targets_min": 2625 }, { "epoch": 3.4554655870445345, "grad_norm": 0.20827790359398754, "learning_rate": 1.0591894797455526e-05, "loss": 0.3822, "loss_nan_ranks": 0, "loss_rank_avg": 0.11905896663665771, "step": 1140, "valid_targets_mean": 10585.9, "valid_targets_min": 1563 }, { "epoch": 3.470647773279352, "grad_norm": 0.20902598407121353, "learning_rate": 1.0405737734995083e-05, "loss": 0.3762, "loss_nan_ranks": 0, "loss_rank_avg": 0.13168013095855713, "step": 1145, "valid_targets_mean": 11115.3, "valid_targets_min": 2500 }, { "epoch": 3.48582995951417, "grad_norm": 0.17996554685296223, "learning_rate": 1.0220654153587225e-05, "loss": 0.3864, "loss_nan_ranks": 0, "loss_rank_avg": 0.11831013113260269, "step": 1150, "valid_targets_mean": 9677.0, "valid_targets_min": 1782 }, { "epoch": 3.501012145748988, "grad_norm": 0.18994778826070668, "learning_rate": 1.00366647618297e-05, "loss": 0.3749, "loss_nan_ranks": 0, "loss_rank_avg": 0.10179728269577026, "step": 1155, "valid_targets_mean": 8558.6, "valid_targets_min": 1210 }, { "epoch": 3.516194331983806, "grad_norm": 0.19546690886244628, "learning_rate": 9.853790145893742e-06, "loss": 0.3789, "loss_nan_ranks": 0, "loss_rank_avg": 0.14318439364433289, "step": 1160, "valid_targets_mean": 11218.9, "valid_targets_min": 3131 }, { "epoch": 3.5313765182186234, "grad_norm": 0.18549801901119017, "learning_rate": 9.672050767220765e-06, "loss": 0.3783, "loss_nan_ranks": 0, "loss_rank_avg": 0.12476805597543716, "step": 1165, "valid_targets_mean": 10252.2, "valid_targets_min": 2786 }, { "epoch": 3.5465587044534415, "grad_norm": 0.18732516786072945, "learning_rate": 9.491466960232955e-06, "loss": 0.3723, "loss_nan_ranks": 0, "loss_rank_avg": 0.11273340880870819, "step": 1170, "valid_targets_mean": 8616.5, "valid_targets_min": 1453 }, { "epoch": 3.561740890688259, "grad_norm": 0.20149643791941338, "learning_rate": 9.312058930058114e-06, "loss": 0.3785, "loss_nan_ranks": 0, "loss_rank_avg": 0.11582259833812714, "step": 1175, "valid_targets_mean": 9118.4, "valid_targets_min": 787 }, { "epoch": 3.5769230769230766, "grad_norm": 0.21085059562942834, "learning_rate": 9.133846750268945e-06, "loss": 0.3784, "loss_nan_ranks": 0, "loss_rank_avg": 0.13565123081207275, "step": 1180, "valid_targets_mean": 10716.3, "valid_targets_min": 1933 }, { "epoch": 3.5921052631578947, "grad_norm": 0.19440857465551512, "learning_rate": 8.956850360637046e-06, "loss": 0.3806, "loss_nan_ranks": 0, "loss_rank_avg": 0.11549557745456696, "step": 1185, "valid_targets_mean": 9663.7, "valid_targets_min": 2336 }, { "epoch": 3.6072874493927127, "grad_norm": 0.18580297908502816, "learning_rate": 8.78108956490194e-06, "loss": 0.3822, "loss_nan_ranks": 0, "loss_rank_avg": 0.1174996942281723, "step": 1190, "valid_targets_mean": 9356.2, "valid_targets_min": 1694 }, { "epoch": 3.6224696356275303, "grad_norm": 0.19481766458314584, "learning_rate": 8.606584028555225e-06, "loss": 0.3769, "loss_nan_ranks": 0, "loss_rank_avg": 0.13105148077011108, "step": 1195, "valid_targets_mean": 10477.2, "valid_targets_min": 2939 }, { "epoch": 3.637651821862348, "grad_norm": 0.18881391921278226, "learning_rate": 8.43335327664027e-06, "loss": 0.3765, "loss_nan_ranks": 0, "loss_rank_avg": 0.15695153176784515, "step": 1200, "valid_targets_mean": 13012.8, "valid_targets_min": 417 }, { "epoch": 3.652834008097166, "grad_norm": 0.1994823937746155, "learning_rate": 8.261416691567601e-06, "loss": 0.3691, "loss_nan_ranks": 0, "loss_rank_avg": 0.12895889580249786, "step": 1205, "valid_targets_mean": 11328.2, "valid_targets_min": 1957 }, { "epoch": 3.668016194331984, "grad_norm": 0.19695915654379229, "learning_rate": 8.090793510946242e-06, "loss": 0.3647, "loss_nan_ranks": 0, "loss_rank_avg": 0.10856752097606659, "step": 1210, "valid_targets_mean": 9687.9, "valid_targets_min": 3468 }, { "epoch": 3.6831983805668016, "grad_norm": 0.20405732090532516, "learning_rate": 7.921502825431258e-06, "loss": 0.3583, "loss_nan_ranks": 0, "loss_rank_avg": 0.11551082879304886, "step": 1215, "valid_targets_mean": 9392.2, "valid_targets_min": 2945 }, { "epoch": 3.698380566801619, "grad_norm": 0.19864256408199005, "learning_rate": 7.753563576587753e-06, "loss": 0.3747, "loss_nan_ranks": 0, "loss_rank_avg": 0.12581707537174225, "step": 1220, "valid_targets_mean": 11870.0, "valid_targets_min": 3248 }, { "epoch": 3.7135627530364372, "grad_norm": 0.2153470946121616, "learning_rate": 7.5869945547715275e-06, "loss": 0.3732, "loss_nan_ranks": 0, "loss_rank_avg": 0.1357821673154831, "step": 1225, "valid_targets_mean": 10057.5, "valid_targets_min": 755 }, { "epoch": 3.7287449392712553, "grad_norm": 0.19143348370922542, "learning_rate": 7.421814397026674e-06, "loss": 0.3683, "loss_nan_ranks": 0, "loss_rank_avg": 0.12181690335273743, "step": 1230, "valid_targets_mean": 10055.2, "valid_targets_min": 1140 }, { "epoch": 3.743927125506073, "grad_norm": 0.18437116959705785, "learning_rate": 7.258041585000317e-06, "loss": 0.3608, "loss_nan_ranks": 0, "loss_rank_avg": 0.10211285203695297, "step": 1235, "valid_targets_mean": 8725.9, "valid_targets_min": 1518 }, { "epoch": 3.7591093117408905, "grad_norm": 0.18527155499014733, "learning_rate": 7.095694442874743e-06, "loss": 0.3689, "loss_nan_ranks": 0, "loss_rank_avg": 0.13864530622959137, "step": 1240, "valid_targets_mean": 11871.8, "valid_targets_min": 1440 }, { "epoch": 3.7742914979757085, "grad_norm": 0.20256166841034448, "learning_rate": 6.934791135317147e-06, "loss": 0.3684, "loss_nan_ranks": 0, "loss_rank_avg": 0.12536349892616272, "step": 1245, "valid_targets_mean": 9864.8, "valid_targets_min": 1364 }, { "epoch": 3.7894736842105265, "grad_norm": 0.1885365721274064, "learning_rate": 6.775349665447222e-06, "loss": 0.3612, "loss_nan_ranks": 0, "loss_rank_avg": 0.10805296897888184, "step": 1250, "valid_targets_mean": 10663.3, "valid_targets_min": 1778 }, { "epoch": 3.804655870445344, "grad_norm": 0.19117386431021458, "learning_rate": 6.617387872822842e-06, "loss": 0.3707, "loss_nan_ranks": 0, "loss_rank_avg": 0.10408435761928558, "step": 1255, "valid_targets_mean": 10560.4, "valid_targets_min": 3376 }, { "epoch": 3.8198380566801617, "grad_norm": 0.1887911686679957, "learning_rate": 6.460923431444015e-06, "loss": 0.3634, "loss_nan_ranks": 0, "loss_rank_avg": 0.11224344372749329, "step": 1260, "valid_targets_mean": 10806.7, "valid_targets_min": 2834 }, { "epoch": 3.83502024291498, "grad_norm": 0.2511629707679848, "learning_rate": 6.305973847775406e-06, "loss": 0.3611, "loss_nan_ranks": 0, "loss_rank_avg": 0.11038510501384735, "step": 1265, "valid_targets_mean": 9412.5, "valid_targets_min": 1636 }, { "epoch": 3.850202429149798, "grad_norm": 0.19524260594282525, "learning_rate": 6.152556458787546e-06, "loss": 0.3691, "loss_nan_ranks": 0, "loss_rank_avg": 0.13783195614814758, "step": 1270, "valid_targets_mean": 11960.5, "valid_targets_min": 3625 }, { "epoch": 3.8653846153846154, "grad_norm": 0.20830638946837632, "learning_rate": 6.000688430017048e-06, "loss": 0.3656, "loss_nan_ranks": 0, "loss_rank_avg": 0.1059221401810646, "step": 1275, "valid_targets_mean": 8446.1, "valid_targets_min": 2648 }, { "epoch": 3.880566801619433, "grad_norm": 0.18358339557954717, "learning_rate": 5.850386753645998e-06, "loss": 0.367, "loss_nan_ranks": 0, "loss_rank_avg": 0.12444412708282471, "step": 1280, "valid_targets_mean": 10544.0, "valid_targets_min": 2552 }, { "epoch": 3.895748987854251, "grad_norm": 0.1913134429816862, "learning_rate": 5.701668246600731e-06, "loss": 0.365, "loss_nan_ranks": 0, "loss_rank_avg": 0.11741238832473755, "step": 1285, "valid_targets_mean": 9534.8, "valid_targets_min": 322 }, { "epoch": 3.910931174089069, "grad_norm": 0.19737214526506686, "learning_rate": 5.554549548670227e-06, "loss": 0.3644, "loss_nan_ranks": 0, "loss_rank_avg": 0.12340462952852249, "step": 1290, "valid_targets_mean": 9826.8, "valid_targets_min": 2599 }, { "epoch": 3.9261133603238867, "grad_norm": 0.19322680874339637, "learning_rate": 5.409047120644307e-06, "loss": 0.3686, "loss_nan_ranks": 0, "loss_rank_avg": 0.13688233494758606, "step": 1295, "valid_targets_mean": 11182.3, "valid_targets_min": 1557 }, { "epoch": 3.9412955465587043, "grad_norm": 0.19837836767468806, "learning_rate": 5.265177242471899e-06, "loss": 0.3722, "loss_nan_ranks": 0, "loss_rank_avg": 0.12162747979164124, "step": 1300, "valid_targets_mean": 9966.4, "valid_targets_min": 2376 }, { "epoch": 3.9564777327935223, "grad_norm": 0.1980287432099399, "learning_rate": 5.122956011439486e-06, "loss": 0.3574, "loss_nan_ranks": 0, "loss_rank_avg": 0.12239546328783035, "step": 1305, "valid_targets_mean": 10853.2, "valid_targets_min": 4115 }, { "epoch": 3.97165991902834, "grad_norm": 0.7212715678474377, "learning_rate": 4.982399340370017e-06, "loss": 0.3676, "loss_nan_ranks": 0, "loss_rank_avg": 0.1188947781920433, "step": 1310, "valid_targets_mean": 10789.0, "valid_targets_min": 1678 }, { "epoch": 3.986842105263158, "grad_norm": 0.1958031682699217, "learning_rate": 4.843522955842464e-06, "loss": 0.3719, "loss_nan_ranks": 0, "loss_rank_avg": 0.14273865520954132, "step": 1315, "valid_targets_mean": 10936.7, "valid_targets_min": 1649 }, { "epoch": 4.0, "grad_norm": 0.31774916038719164, "learning_rate": 4.706342396432213e-06, "loss": 0.3496, "loss_nan_ranks": 0, "loss_rank_avg": 0.29835373163223267, "step": 1320, "valid_targets_mean": 10492.2, "valid_targets_min": 2847 }, { "epoch": 4.015182186234818, "grad_norm": 0.1843924113718929, "learning_rate": 4.570873010972477e-06, "loss": 0.3652, "loss_nan_ranks": 0, "loss_rank_avg": 0.10512104630470276, "step": 1325, "valid_targets_mean": 10884.8, "valid_targets_min": 2671 }, { "epoch": 4.030364372469635, "grad_norm": 0.18018854644817908, "learning_rate": 4.43712995683695e-06, "loss": 0.3681, "loss_nan_ranks": 0, "loss_rank_avg": 0.09973619133234024, "step": 1330, "valid_targets_mean": 8272.1, "valid_targets_min": 2067 }, { "epoch": 4.045546558704453, "grad_norm": 0.17571816112831184, "learning_rate": 4.305128198243888e-06, "loss": 0.3689, "loss_nan_ranks": 0, "loss_rank_avg": 0.12737268209457397, "step": 1335, "valid_targets_mean": 11352.1, "valid_targets_min": 1795 }, { "epoch": 4.060728744939271, "grad_norm": 0.18330154031717863, "learning_rate": 4.174882504581794e-06, "loss": 0.3625, "loss_nan_ranks": 0, "loss_rank_avg": 0.10680342465639114, "step": 1340, "valid_targets_mean": 8873.7, "valid_targets_min": 2326 }, { "epoch": 4.075910931174089, "grad_norm": 0.18429043131384004, "learning_rate": 4.046407448756895e-06, "loss": 0.3685, "loss_nan_ranks": 0, "loss_rank_avg": 0.11359091103076935, "step": 1345, "valid_targets_mean": 9797.7, "valid_targets_min": 1810 }, { "epoch": 4.0910931174089065, "grad_norm": 0.18897272974624404, "learning_rate": 3.91971740556262e-06, "loss": 0.3668, "loss_nan_ranks": 0, "loss_rank_avg": 0.10602220892906189, "step": 1350, "valid_targets_mean": 9621.8, "valid_targets_min": 2154 }, { "epoch": 4.1062753036437245, "grad_norm": 0.1928917574346044, "learning_rate": 3.7948265500712313e-06, "loss": 0.3624, "loss_nan_ranks": 0, "loss_rank_avg": 0.12940697371959686, "step": 1355, "valid_targets_mean": 10877.2, "valid_targets_min": 2615 }, { "epoch": 4.1214574898785425, "grad_norm": 0.18005268626554144, "learning_rate": 3.6717488560478096e-06, "loss": 0.3664, "loss_nan_ranks": 0, "loss_rank_avg": 0.12552385032176971, "step": 1360, "valid_targets_mean": 10077.4, "valid_targets_min": 2879 }, { "epoch": 4.136639676113361, "grad_norm": 0.1788632112469236, "learning_rate": 3.5504980943867538e-06, "loss": 0.3746, "loss_nan_ranks": 0, "loss_rank_avg": 0.09735994040966034, "step": 1365, "valid_targets_mean": 9807.6, "valid_targets_min": 684 }, { "epoch": 4.151821862348178, "grad_norm": 0.21241167381424772, "learning_rate": 3.4310878315710074e-06, "loss": 0.3689, "loss_nan_ranks": 0, "loss_rank_avg": 0.11332625150680542, "step": 1370, "valid_targets_mean": 9672.8, "valid_targets_min": 2404 }, { "epoch": 4.167004048582996, "grad_norm": 0.19299307837990345, "learning_rate": 3.3135314281540954e-06, "loss": 0.3724, "loss_nan_ranks": 0, "loss_rank_avg": 0.13851384818553925, "step": 1375, "valid_targets_mean": 11021.4, "valid_targets_min": 2349 }, { "epoch": 4.182186234817814, "grad_norm": 0.17435774249387354, "learning_rate": 3.1978420372652776e-06, "loss": 0.3653, "loss_nan_ranks": 0, "loss_rank_avg": 0.1209459900856018, "step": 1380, "valid_targets_mean": 11227.0, "valid_targets_min": 1843 }, { "epoch": 4.197368421052632, "grad_norm": 0.17793401384716034, "learning_rate": 3.084032603137852e-06, "loss": 0.3728, "loss_nan_ranks": 0, "loss_rank_avg": 0.12819677591323853, "step": 1385, "valid_targets_mean": 10938.0, "valid_targets_min": 2115 }, { "epoch": 4.212550607287449, "grad_norm": 0.1847368183158193, "learning_rate": 2.9721158596608622e-06, "loss": 0.3681, "loss_nan_ranks": 0, "loss_rank_avg": 0.12626275420188904, "step": 1390, "valid_targets_mean": 10809.4, "valid_targets_min": 1640 }, { "epoch": 4.227732793522267, "grad_norm": 0.18692877804023675, "learning_rate": 2.8621043289543314e-06, "loss": 0.374, "loss_nan_ranks": 0, "loss_rank_avg": 0.12628719210624695, "step": 1395, "valid_targets_mean": 11386.6, "valid_targets_min": 818 }, { "epoch": 4.242914979757085, "grad_norm": 0.18025836952356916, "learning_rate": 2.754010319968181e-06, "loss": 0.3722, "loss_nan_ranks": 0, "loss_rank_avg": 0.12013980001211166, "step": 1400, "valid_targets_mean": 10630.8, "valid_targets_min": 2219 }, { "epoch": 4.258097165991903, "grad_norm": 0.1877232221667285, "learning_rate": 2.647845927105015e-06, "loss": 0.3717, "loss_nan_ranks": 0, "loss_rank_avg": 0.1244920939207077, "step": 1405, "valid_targets_mean": 9607.3, "valid_targets_min": 1754 }, { "epoch": 4.27327935222672, "grad_norm": 0.18249632818477718, "learning_rate": 2.543623028866915e-06, "loss": 0.361, "loss_nan_ranks": 0, "loss_rank_avg": 0.13994036614894867, "step": 1410, "valid_targets_mean": 11417.1, "valid_targets_min": 2408 }, { "epoch": 4.288461538461538, "grad_norm": 0.18120025432339984, "learning_rate": 2.4413532865263533e-06, "loss": 0.3751, "loss_nan_ranks": 0, "loss_rank_avg": 0.11695703864097595, "step": 1415, "valid_targets_mean": 9500.3, "valid_targets_min": 1626 }, { "epoch": 4.303643724696356, "grad_norm": 0.18038827988155628, "learning_rate": 2.3410481428214602e-06, "loss": 0.3653, "loss_nan_ranks": 0, "loss_rank_avg": 0.1068282350897789, "step": 1420, "valid_targets_mean": 9340.8, "valid_targets_min": 1542 }, { "epoch": 4.318825910931174, "grad_norm": 0.19901656665636938, "learning_rate": 2.242718820675718e-06, "loss": 0.3696, "loss_nan_ranks": 0, "loss_rank_avg": 0.11531911790370941, "step": 1425, "valid_targets_mean": 10091.9, "valid_targets_min": 1991 }, { "epoch": 4.334008097165992, "grad_norm": 0.17656725900990108, "learning_rate": 2.1463763219422495e-06, "loss": 0.3654, "loss_nan_ranks": 0, "loss_rank_avg": 0.10796954482793808, "step": 1430, "valid_targets_mean": 9848.0, "valid_targets_min": 2599 }, { "epoch": 4.34919028340081, "grad_norm": 0.17833357555078458, "learning_rate": 2.0520314261728357e-06, "loss": 0.3643, "loss_nan_ranks": 0, "loss_rank_avg": 0.10144203156232834, "step": 1435, "valid_targets_mean": 8776.6, "valid_targets_min": 2066 }, { "epoch": 4.364372469635628, "grad_norm": 0.18607221555908898, "learning_rate": 1.9596946894118306e-06, "loss": 0.3585, "loss_nan_ranks": 0, "loss_rank_avg": 0.11383245885372162, "step": 1440, "valid_targets_mean": 9263.6, "valid_targets_min": 1440 }, { "epoch": 4.379554655870446, "grad_norm": 0.18577021193639703, "learning_rate": 1.8693764430150696e-06, "loss": 0.3697, "loss_nan_ranks": 0, "loss_rank_avg": 0.13473168015480042, "step": 1445, "valid_targets_mean": 11097.1, "valid_targets_min": 2380 }, { "epoch": 4.394736842105263, "grad_norm": 0.1709129995577021, "learning_rate": 1.7810867924938978e-06, "loss": 0.3661, "loss_nan_ranks": 0, "loss_rank_avg": 0.12228719145059586, "step": 1450, "valid_targets_mean": 9294.8, "valid_targets_min": 477 }, { "epoch": 4.409919028340081, "grad_norm": 0.1810787994190016, "learning_rate": 1.6948356163845048e-06, "loss": 0.3677, "loss_nan_ranks": 0, "loss_rank_avg": 0.14793120324611664, "step": 1455, "valid_targets_mean": 11832.1, "valid_targets_min": 2338 }, { "epoch": 4.425101214574899, "grad_norm": 0.18892851687010215, "learning_rate": 1.610632565142627e-06, "loss": 0.3691, "loss_nan_ranks": 0, "loss_rank_avg": 0.12090542912483215, "step": 1460, "valid_targets_mean": 10259.0, "valid_targets_min": 3516 }, { "epoch": 4.440283400809717, "grad_norm": 0.17894691933426599, "learning_rate": 1.5284870600637813e-06, "loss": 0.3648, "loss_nan_ranks": 0, "loss_rank_avg": 0.11216691136360168, "step": 1465, "valid_targets_mean": 9150.0, "valid_targets_min": 2502 }, { "epoch": 4.455465587044534, "grad_norm": 0.18757418087454025, "learning_rate": 1.4484082922291376e-06, "loss": 0.3624, "loss_nan_ranks": 0, "loss_rank_avg": 0.1409149169921875, "step": 1470, "valid_targets_mean": 12013.4, "valid_targets_min": 1381 }, { "epoch": 4.470647773279352, "grad_norm": 0.1954402311039142, "learning_rate": 1.3704052214771513e-06, "loss": 0.3682, "loss_nan_ranks": 0, "loss_rank_avg": 0.12278928607702255, "step": 1475, "valid_targets_mean": 9280.6, "valid_targets_min": 2312 }, { "epoch": 4.48582995951417, "grad_norm": 0.1727940865183207, "learning_rate": 1.2944865754010682e-06, "loss": 0.3662, "loss_nan_ranks": 0, "loss_rank_avg": 0.14360244572162628, "step": 1480, "valid_targets_mean": 12639.5, "valid_targets_min": 1846 }, { "epoch": 4.501012145748988, "grad_norm": 0.19104104864167198, "learning_rate": 1.2206608483724013e-06, "loss": 0.3665, "loss_nan_ranks": 0, "loss_rank_avg": 0.11449052393436432, "step": 1485, "valid_targets_mean": 9093.4, "valid_targets_min": 1659 }, { "epoch": 4.516194331983805, "grad_norm": 0.1867664116957937, "learning_rate": 1.1489363005905241e-06, "loss": 0.3598, "loss_nan_ranks": 0, "loss_rank_avg": 0.12641845643520355, "step": 1490, "valid_targets_mean": 9890.8, "valid_targets_min": 2715 }, { "epoch": 4.531376518218623, "grad_norm": 0.18476830111294615, "learning_rate": 1.0793209571584562e-06, "loss": 0.3649, "loss_nan_ranks": 0, "loss_rank_avg": 0.12984566390514374, "step": 1495, "valid_targets_mean": 11103.8, "valid_targets_min": 1663 }, { "epoch": 4.5465587044534415, "grad_norm": 0.18066626607720007, "learning_rate": 1.0118226071849424e-06, "loss": 0.3616, "loss_nan_ranks": 0, "loss_rank_avg": 0.10867150127887726, "step": 1500, "valid_targets_mean": 9276.9, "valid_targets_min": 1694 }, { "epoch": 4.5617408906882595, "grad_norm": 0.16173971571301107, "learning_rate": 9.464488029129581e-07, "loss": 0.3729, "loss_nan_ranks": 0, "loss_rank_avg": 0.10973206907510757, "step": 1505, "valid_targets_mean": 10314.8, "valid_targets_min": 2799 }, { "epoch": 4.576923076923077, "grad_norm": 0.17897671753750052, "learning_rate": 8.832068588746945e-07, "loss": 0.3605, "loss_nan_ranks": 0, "loss_rank_avg": 0.11189191043376923, "step": 1510, "valid_targets_mean": 10190.8, "valid_targets_min": 1649 }, { "epoch": 4.592105263157895, "grad_norm": 0.18012360756114543, "learning_rate": 8.221038510731704e-07, "loss": 0.3714, "loss_nan_ranks": 0, "loss_rank_avg": 0.10856941342353821, "step": 1515, "valid_targets_mean": 8771.6, "valid_targets_min": 1901 }, { "epoch": 4.607287449392713, "grad_norm": 0.1547261000548381, "learning_rate": 7.631466161904821e-07, "loss": 0.3645, "loss_nan_ranks": 0, "loss_rank_avg": 0.12571071088314056, "step": 1520, "valid_targets_mean": 12577.9, "valid_targets_min": 1948 }, { "epoch": 4.62246963562753, "grad_norm": 0.17262392891010045, "learning_rate": 7.063417508228876e-07, "loss": 0.3654, "loss_nan_ranks": 0, "loss_rank_avg": 0.11583705246448517, "step": 1525, "valid_targets_mean": 9228.4, "valid_targets_min": 1600 }, { "epoch": 4.637651821862348, "grad_norm": 0.17161338010550106, "learning_rate": 6.516956107427241e-07, "loss": 0.3691, "loss_nan_ranks": 0, "loss_rank_avg": 0.12043744325637817, "step": 1530, "valid_targets_mean": 10210.4, "valid_targets_min": 1719 }, { "epoch": 4.652834008097166, "grad_norm": 0.18234822744394527, "learning_rate": 5.992143101872638e-07, "loss": 0.3648, "loss_nan_ranks": 0, "loss_rank_avg": 0.09593389928340912, "step": 1535, "valid_targets_mean": 7271.0, "valid_targets_min": 1735 }, { "epoch": 4.668016194331984, "grad_norm": 0.2162606197737493, "learning_rate": 5.489037211746184e-07, "loss": 0.3678, "loss_nan_ranks": 0, "loss_rank_avg": 0.1457173228263855, "step": 1540, "valid_targets_mean": 11809.0, "valid_targets_min": 4250 }, { "epoch": 4.683198380566802, "grad_norm": 0.16995590048199882, "learning_rate": 5.007694728467228e-07, "loss": 0.3676, "loss_nan_ranks": 0, "loss_rank_avg": 0.1253059357404709, "step": 1545, "valid_targets_mean": 10858.9, "valid_targets_min": 1230 }, { "epoch": 4.698380566801619, "grad_norm": 0.17734059670619579, "learning_rate": 4.548169508395028e-07, "loss": 0.3639, "loss_nan_ranks": 0, "loss_rank_avg": 0.12860271334648132, "step": 1550, "valid_targets_mean": 11737.7, "valid_targets_min": 3485 }, { "epoch": 4.713562753036437, "grad_norm": 0.18050233010338437, "learning_rate": 4.1105129668029595e-07, "loss": 0.3703, "loss_nan_ranks": 0, "loss_rank_avg": 0.1319669783115387, "step": 1555, "valid_targets_mean": 10501.0, "valid_targets_min": 1372 }, { "epoch": 4.728744939271255, "grad_norm": 0.18215765373981796, "learning_rate": 3.6947740721257066e-07, "loss": 0.3644, "loss_nan_ranks": 0, "loss_rank_avg": 0.12586212158203125, "step": 1560, "valid_targets_mean": 11023.1, "valid_targets_min": 2636 }, { "epoch": 4.743927125506072, "grad_norm": 0.18748325536702345, "learning_rate": 3.3009993404802486e-07, "loss": 0.3654, "loss_nan_ranks": 0, "loss_rank_avg": 0.10447224229574203, "step": 1565, "valid_targets_mean": 9109.5, "valid_targets_min": 1941 }, { "epoch": 4.7591093117408905, "grad_norm": 0.18050363980638168, "learning_rate": 2.929232830461404e-07, "loss": 0.3681, "loss_nan_ranks": 0, "loss_rank_avg": 0.14107687771320343, "step": 1570, "valid_targets_mean": 11744.7, "valid_targets_min": 2422 }, { "epoch": 4.7742914979757085, "grad_norm": 0.2023921728351548, "learning_rate": 2.579516138212101e-07, "loss": 0.3736, "loss_nan_ranks": 0, "loss_rank_avg": 0.12674914300441742, "step": 1575, "valid_targets_mean": 11620.3, "valid_targets_min": 1761 }, { "epoch": 4.7894736842105265, "grad_norm": 0.1622695741435569, "learning_rate": 2.2518883927692857e-07, "loss": 0.3763, "loss_nan_ranks": 0, "loss_rank_avg": 0.12355431914329529, "step": 1580, "valid_targets_mean": 11819.7, "valid_targets_min": 2489 }, { "epoch": 4.804655870445345, "grad_norm": 0.20101474821198356, "learning_rate": 1.9463862516859277e-07, "loss": 0.3674, "loss_nan_ranks": 0, "loss_rank_avg": 0.1243324801325798, "step": 1585, "valid_targets_mean": 10348.0, "valid_targets_min": 1587 }, { "epoch": 4.819838056680162, "grad_norm": 0.1675194942295015, "learning_rate": 1.6630438969294615e-07, "loss": 0.3729, "loss_nan_ranks": 0, "loss_rank_avg": 0.13866698741912842, "step": 1590, "valid_targets_mean": 12737.4, "valid_targets_min": 3208 }, { "epoch": 4.83502024291498, "grad_norm": 0.17086348937435433, "learning_rate": 1.4018930310571553e-07, "loss": 0.3691, "loss_nan_ranks": 0, "loss_rank_avg": 0.1291865110397339, "step": 1595, "valid_targets_mean": 11653.0, "valid_targets_min": 2275 }, { "epoch": 4.850202429149798, "grad_norm": 0.17068101788111856, "learning_rate": 1.1629628736690824e-07, "loss": 0.3576, "loss_nan_ranks": 0, "loss_rank_avg": 0.1036512702703476, "step": 1600, "valid_targets_mean": 9271.8, "valid_targets_min": 1083 }, { "epoch": 4.865384615384615, "grad_norm": 0.17230658064627632, "learning_rate": 9.46280158138757e-08, "loss": 0.3727, "loss_nan_ranks": 0, "loss_rank_avg": 0.11794915050268173, "step": 1605, "valid_targets_mean": 10087.9, "valid_targets_min": 1841 }, { "epoch": 4.880566801619433, "grad_norm": 0.17026402977551577, "learning_rate": 7.518691286220625e-08, "loss": 0.3684, "loss_nan_ranks": 0, "loss_rank_avg": 0.12925563752651215, "step": 1610, "valid_targets_mean": 11449.4, "valid_targets_min": 2402 }, { "epoch": 4.895748987854251, "grad_norm": 0.16996512113902293, "learning_rate": 5.797515373445084e-08, "loss": 0.3747, "loss_nan_ranks": 0, "loss_rank_avg": 0.12282838672399521, "step": 1615, "valid_targets_mean": 11293.9, "valid_targets_min": 2486 }, { "epoch": 4.910931174089069, "grad_norm": 0.1827513479377631, "learning_rate": 4.299466421675113e-08, "loss": 0.3668, "loss_nan_ranks": 0, "loss_rank_avg": 0.12446284294128418, "step": 1620, "valid_targets_mean": 10751.7, "valid_targets_min": 2692 }, { "epoch": 4.926113360323887, "grad_norm": 0.18688189150800386, "learning_rate": 3.0247120443362976e-08, "loss": 0.3719, "loss_nan_ranks": 0, "loss_rank_avg": 0.10103616118431091, "step": 1625, "valid_targets_mean": 9804.4, "valid_targets_min": 1363 }, { "epoch": 4.941295546558704, "grad_norm": 0.17387094857705057, "learning_rate": 1.973394870912193e-08, "loss": 0.3708, "loss_nan_ranks": 0, "loss_rank_avg": 0.12511810660362244, "step": 1630, "valid_targets_mean": 10579.7, "valid_targets_min": 1087 }, { "epoch": 4.956477732793522, "grad_norm": 0.18424632034257762, "learning_rate": 1.145632530985541e-08, "loss": 0.3659, "loss_nan_ranks": 0, "loss_rank_avg": 0.13365338742733002, "step": 1635, "valid_targets_mean": 10992.5, "valid_targets_min": 1896 }, { "epoch": 4.97165991902834, "grad_norm": 0.16930897925302424, "learning_rate": 5.415176410765721e-09, "loss": 0.3652, "loss_nan_ranks": 0, "loss_rank_avg": 0.14591088891029358, "step": 1640, "valid_targets_mean": 11615.2, "valid_targets_min": 1369 }, { "epoch": 4.9868421052631575, "grad_norm": 0.1867459162363072, "learning_rate": 1.611177942812958e-09, "loss": 0.365, "loss_nan_ranks": 0, "loss_rank_avg": 0.148482084274292, "step": 1645, "valid_targets_mean": 11537.9, "valid_targets_min": 850 }, { "epoch": 5.0, "grad_norm": 0.2764448693570558, "learning_rate": 4.475552707772224e-11, "loss": 0.3697, "loss_nan_ranks": 0, "loss_rank_avg": 0.38277196884155273, "step": 1650, "valid_targets_mean": 11949.7, "valid_targets_min": 2234 }, { "epoch": 5.0, "loss_nan_ranks": 0, "loss_rank_avg": 0.38277196884155273, "step": 1650, "total_flos": 6.453442034819662e+18, "train_loss": 0.1001221828027205, "train_runtime": 13187.2715, "train_samples_per_second": 11.981, "train_steps_per_second": 0.125, "valid_targets_mean": 11949.7, "valid_targets_min": 2234 } ], "logging_steps": 5, "max_steps": 1650, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 300, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.453442034819662e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }