{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 8.0, "eval_steps": 500, "global_step": 2520, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.015873015873015872, "grad_norm": 1.8274213812789266, "learning_rate": 3.0769230769230774e-05, "loss": 0.7577, "loss_nan_ranks": 0, "loss_rank_avg": 0.6461434364318848, "step": 5, "valid_targets_mean": 3240.4, "valid_targets_min": 567 }, { "epoch": 0.031746031746031744, "grad_norm": 0.8909279252952478, "learning_rate": 6.923076923076924e-05, "loss": 0.5599, "loss_nan_ranks": 0, "loss_rank_avg": 0.5154889822006226, "step": 10, "valid_targets_mean": 2951.5, "valid_targets_min": 728 }, { "epoch": 0.047619047619047616, "grad_norm": 0.6539578705231934, "learning_rate": 9.99999607417416e-05, "loss": 0.4714, "loss_nan_ranks": 0, "loss_rank_avg": 0.44698208570480347, "step": 15, "valid_targets_mean": 3411.7, "valid_targets_min": 858 }, { "epoch": 0.06349206349206349, "grad_norm": 0.5869088454096476, "learning_rate": 9.999858670917045e-05, "loss": 0.4533, "loss_nan_ranks": 0, "loss_rank_avg": 0.4649924039840698, "step": 20, "valid_targets_mean": 3109.9, "valid_targets_min": 615 }, { "epoch": 0.07936507936507936, "grad_norm": 0.509457679261325, "learning_rate": 9.999524982532699e-05, "loss": 0.4412, "loss_nan_ranks": 0, "loss_rank_avg": 0.41344499588012695, "step": 25, "valid_targets_mean": 3829.7, "valid_targets_min": 1140 }, { "epoch": 0.09523809523809523, "grad_norm": 0.7062119296057406, "learning_rate": 9.998995022121104e-05, "loss": 0.4127, "loss_nan_ranks": 0, "loss_rank_avg": 0.4501193165779114, "step": 30, "valid_targets_mean": 2775.0, "valid_targets_min": 1050 }, { "epoch": 0.1111111111111111, "grad_norm": 0.499780548134097, "learning_rate": 9.998268810487518e-05, "loss": 0.4262, "loss_nan_ranks": 0, "loss_rank_avg": 0.42958372831344604, "step": 35, "valid_targets_mean": 3573.8, "valid_targets_min": 731 }, { "epoch": 0.12698412698412698, "grad_norm": 0.5092649924433299, "learning_rate": 9.997346376141656e-05, "loss": 0.4256, "loss_nan_ranks": 0, "loss_rank_avg": 0.46939408779144287, "step": 40, "valid_targets_mean": 3167.1, "valid_targets_min": 692 }, { "epoch": 0.14285714285714285, "grad_norm": 0.5017667346997713, "learning_rate": 9.99622775529657e-05, "loss": 0.4034, "loss_nan_ranks": 0, "loss_rank_avg": 0.3935692012310028, "step": 45, "valid_targets_mean": 3086.5, "valid_targets_min": 656 }, { "epoch": 0.15873015873015872, "grad_norm": 0.4948092196535038, "learning_rate": 9.994912991867228e-05, "loss": 0.3939, "loss_nan_ranks": 0, "loss_rank_avg": 0.3957802653312683, "step": 50, "valid_targets_mean": 3103.2, "valid_targets_min": 586 }, { "epoch": 0.1746031746031746, "grad_norm": 0.4506415984779397, "learning_rate": 9.99340213746879e-05, "loss": 0.3878, "loss_nan_ranks": 0, "loss_rank_avg": 0.3558445870876312, "step": 55, "valid_targets_mean": 3752.7, "valid_targets_min": 748 }, { "epoch": 0.19047619047619047, "grad_norm": 0.47637495247663253, "learning_rate": 9.991695251414583e-05, "loss": 0.3804, "loss_nan_ranks": 0, "loss_rank_avg": 0.3717506229877472, "step": 60, "valid_targets_mean": 2804.0, "valid_targets_min": 838 }, { "epoch": 0.20634920634920634, "grad_norm": 0.4697442701659542, "learning_rate": 9.989792400713771e-05, "loss": 0.3763, "loss_nan_ranks": 0, "loss_rank_avg": 0.3844413161277771, "step": 65, "valid_targets_mean": 3328.9, "valid_targets_min": 947 }, { "epoch": 0.2222222222222222, "grad_norm": 0.4029985555816782, "learning_rate": 9.987693660068722e-05, "loss": 0.3624, "loss_nan_ranks": 0, "loss_rank_avg": 0.361744225025177, "step": 70, "valid_targets_mean": 3788.7, "valid_targets_min": 1518 }, { "epoch": 0.23809523809523808, "grad_norm": 0.6171443115375874, "learning_rate": 9.985399111872081e-05, "loss": 0.3893, "loss_nan_ranks": 0, "loss_rank_avg": 0.40640202164649963, "step": 75, "valid_targets_mean": 3092.8, "valid_targets_min": 1045 }, { "epoch": 0.25396825396825395, "grad_norm": 0.4749873069576808, "learning_rate": 9.982908846203529e-05, "loss": 0.3697, "loss_nan_ranks": 0, "loss_rank_avg": 0.39057451486587524, "step": 80, "valid_targets_mean": 3140.4, "valid_targets_min": 723 }, { "epoch": 0.2698412698412698, "grad_norm": 0.48658927757571463, "learning_rate": 9.980222960826254e-05, "loss": 0.3755, "loss_nan_ranks": 0, "loss_rank_avg": 0.3794889450073242, "step": 85, "valid_targets_mean": 3091.6, "valid_targets_min": 849 }, { "epoch": 0.2857142857142857, "grad_norm": 0.5038556894604157, "learning_rate": 9.977341561183109e-05, "loss": 0.3701, "loss_nan_ranks": 0, "loss_rank_avg": 0.3605089783668518, "step": 90, "valid_targets_mean": 2912.1, "valid_targets_min": 741 }, { "epoch": 0.30158730158730157, "grad_norm": 0.4102412169121251, "learning_rate": 9.974264760392466e-05, "loss": 0.3525, "loss_nan_ranks": 0, "loss_rank_avg": 0.3408468961715698, "step": 95, "valid_targets_mean": 3674.1, "valid_targets_min": 694 }, { "epoch": 0.31746031746031744, "grad_norm": 0.5215429312356035, "learning_rate": 9.97099267924379e-05, "loss": 0.3809, "loss_nan_ranks": 0, "loss_rank_avg": 0.366327166557312, "step": 100, "valid_targets_mean": 3165.3, "valid_targets_min": 731 }, { "epoch": 0.3333333333333333, "grad_norm": 0.506147006488813, "learning_rate": 9.967525446192882e-05, "loss": 0.3725, "loss_nan_ranks": 0, "loss_rank_avg": 0.4026761054992676, "step": 105, "valid_targets_mean": 3033.7, "valid_targets_min": 786 }, { "epoch": 0.3492063492063492, "grad_norm": 0.4449016182522291, "learning_rate": 9.963863197356849e-05, "loss": 0.3646, "loss_nan_ranks": 0, "loss_rank_avg": 0.3816118538379669, "step": 110, "valid_targets_mean": 2767.7, "valid_targets_min": 907 }, { "epoch": 0.36507936507936506, "grad_norm": 0.4493995369276615, "learning_rate": 9.960006076508747e-05, "loss": 0.3507, "loss_nan_ranks": 0, "loss_rank_avg": 0.3474277853965759, "step": 115, "valid_targets_mean": 4193.3, "valid_targets_min": 1138 }, { "epoch": 0.38095238095238093, "grad_norm": 0.3995938765741062, "learning_rate": 9.95595423507195e-05, "loss": 0.3607, "loss_nan_ranks": 0, "loss_rank_avg": 0.34774190187454224, "step": 120, "valid_targets_mean": 3404.6, "valid_targets_min": 563 }, { "epoch": 0.3968253968253968, "grad_norm": 0.4078617144812389, "learning_rate": 9.951707832114193e-05, "loss": 0.3543, "loss_nan_ranks": 0, "loss_rank_avg": 0.3508348762989044, "step": 125, "valid_targets_mean": 3827.8, "valid_targets_min": 971 }, { "epoch": 0.4126984126984127, "grad_norm": 0.40638750589791245, "learning_rate": 9.947267034341341e-05, "loss": 0.3533, "loss_nan_ranks": 0, "loss_rank_avg": 0.35496804118156433, "step": 130, "valid_targets_mean": 3386.2, "valid_targets_min": 641 }, { "epoch": 0.42857142857142855, "grad_norm": 0.4234767970543296, "learning_rate": 9.942632016090832e-05, "loss": 0.3476, "loss_nan_ranks": 0, "loss_rank_avg": 0.33436018228530884, "step": 135, "valid_targets_mean": 3398.4, "valid_targets_min": 759 }, { "epoch": 0.4444444444444444, "grad_norm": 0.41265753362471513, "learning_rate": 9.937802959324838e-05, "loss": 0.3583, "loss_nan_ranks": 0, "loss_rank_avg": 0.3562977910041809, "step": 140, "valid_targets_mean": 3618.7, "valid_targets_min": 628 }, { "epoch": 0.4603174603174603, "grad_norm": 0.4757133092662119, "learning_rate": 9.932780053623121e-05, "loss": 0.3466, "loss_nan_ranks": 0, "loss_rank_avg": 0.37288549542427063, "step": 145, "valid_targets_mean": 2592.0, "valid_targets_min": 781 }, { "epoch": 0.47619047619047616, "grad_norm": 0.4239429827849855, "learning_rate": 9.927563496175593e-05, "loss": 0.3467, "loss_nan_ranks": 0, "loss_rank_avg": 0.35352587699890137, "step": 150, "valid_targets_mean": 3382.8, "valid_targets_min": 878 }, { "epoch": 0.49206349206349204, "grad_norm": 0.383979032896486, "learning_rate": 9.922153491774572e-05, "loss": 0.3495, "loss_nan_ranks": 0, "loss_rank_avg": 0.3456829786300659, "step": 155, "valid_targets_mean": 3461.6, "valid_targets_min": 908 }, { "epoch": 0.5079365079365079, "grad_norm": 0.424759371721176, "learning_rate": 9.91655025280674e-05, "loss": 0.3656, "loss_nan_ranks": 0, "loss_rank_avg": 0.3748525381088257, "step": 160, "valid_targets_mean": 3654.6, "valid_targets_min": 849 }, { "epoch": 0.5238095238095238, "grad_norm": 0.43022997649230643, "learning_rate": 9.910753999244811e-05, "loss": 0.3365, "loss_nan_ranks": 0, "loss_rank_avg": 0.3463315963745117, "step": 165, "valid_targets_mean": 3035.5, "valid_targets_min": 881 }, { "epoch": 0.5396825396825397, "grad_norm": 0.42157899649504554, "learning_rate": 9.904764958638889e-05, "loss": 0.3395, "loss_nan_ranks": 0, "loss_rank_avg": 0.38232117891311646, "step": 170, "valid_targets_mean": 2938.3, "valid_targets_min": 777 }, { "epoch": 0.5555555555555556, "grad_norm": 0.4128966508525992, "learning_rate": 9.898583366107538e-05, "loss": 0.343, "loss_nan_ranks": 0, "loss_rank_avg": 0.34359288215637207, "step": 175, "valid_targets_mean": 3082.7, "valid_targets_min": 762 }, { "epoch": 0.5714285714285714, "grad_norm": 0.3945850409315049, "learning_rate": 9.892209464328556e-05, "loss": 0.3625, "loss_nan_ranks": 0, "loss_rank_avg": 0.3760848045349121, "step": 180, "valid_targets_mean": 3291.0, "valid_targets_min": 955 }, { "epoch": 0.5873015873015873, "grad_norm": 0.46514818271074837, "learning_rate": 9.885643503529439e-05, "loss": 0.3498, "loss_nan_ranks": 0, "loss_rank_avg": 0.3739466667175293, "step": 185, "valid_targets_mean": 2763.4, "valid_targets_min": 677 }, { "epoch": 0.6031746031746031, "grad_norm": 0.5471372921378007, "learning_rate": 9.878885741477563e-05, "loss": 0.346, "loss_nan_ranks": 0, "loss_rank_avg": 0.349778950214386, "step": 190, "valid_targets_mean": 3169.9, "valid_targets_min": 852 }, { "epoch": 0.6190476190476191, "grad_norm": 0.37640397987573115, "learning_rate": 9.871936443470063e-05, "loss": 0.3365, "loss_nan_ranks": 0, "loss_rank_avg": 0.3495105504989624, "step": 195, "valid_targets_mean": 3473.1, "valid_targets_min": 802 }, { "epoch": 0.6349206349206349, "grad_norm": 0.34529912636962173, "learning_rate": 9.864795882323421e-05, "loss": 0.3307, "loss_nan_ranks": 0, "loss_rank_avg": 0.32995861768722534, "step": 200, "valid_targets_mean": 3907.2, "valid_targets_min": 718 }, { "epoch": 0.6507936507936508, "grad_norm": 0.33903980298913694, "learning_rate": 9.857464338362748e-05, "loss": 0.3373, "loss_nan_ranks": 0, "loss_rank_avg": 0.3237103521823883, "step": 205, "valid_targets_mean": 3551.8, "valid_targets_min": 1138 }, { "epoch": 0.6666666666666666, "grad_norm": 0.3816336636716071, "learning_rate": 9.849942099410792e-05, "loss": 0.3379, "loss_nan_ranks": 0, "loss_rank_avg": 0.34175223112106323, "step": 210, "valid_targets_mean": 3279.8, "valid_targets_min": 1022 }, { "epoch": 0.6825396825396826, "grad_norm": 0.3691401701105125, "learning_rate": 9.842229460776622e-05, "loss": 0.3428, "loss_nan_ranks": 0, "loss_rank_avg": 0.3277595043182373, "step": 215, "valid_targets_mean": 3653.1, "valid_targets_min": 1357 }, { "epoch": 0.6984126984126984, "grad_norm": 0.3920809302509506, "learning_rate": 9.834326725244049e-05, "loss": 0.3406, "loss_nan_ranks": 0, "loss_rank_avg": 0.3460606038570404, "step": 220, "valid_targets_mean": 3346.0, "valid_targets_min": 293 }, { "epoch": 0.7142857142857143, "grad_norm": 0.386844574098974, "learning_rate": 9.826234203059731e-05, "loss": 0.3462, "loss_nan_ranks": 0, "loss_rank_avg": 0.34869250655174255, "step": 225, "valid_targets_mean": 3427.1, "valid_targets_min": 702 }, { "epoch": 0.7301587301587301, "grad_norm": 0.41899529934934654, "learning_rate": 9.817952211921e-05, "loss": 0.3449, "loss_nan_ranks": 0, "loss_rank_avg": 0.37060266733169556, "step": 230, "valid_targets_mean": 2814.3, "valid_targets_min": 791 }, { "epoch": 0.746031746031746, "grad_norm": 0.34433423051134976, "learning_rate": 9.809481076963383e-05, "loss": 0.3313, "loss_nan_ranks": 0, "loss_rank_avg": 0.34181442856788635, "step": 235, "valid_targets_mean": 4166.8, "valid_targets_min": 971 }, { "epoch": 0.7619047619047619, "grad_norm": 0.4031506293847615, "learning_rate": 9.800821130747837e-05, "loss": 0.3196, "loss_nan_ranks": 0, "loss_rank_avg": 0.32658693194389343, "step": 240, "valid_targets_mean": 2723.3, "valid_targets_min": 270 }, { "epoch": 0.7777777777777778, "grad_norm": 0.34126070155625965, "learning_rate": 9.791972713247704e-05, "loss": 0.3231, "loss_nan_ranks": 0, "loss_rank_avg": 0.3253839910030365, "step": 245, "valid_targets_mean": 3959.7, "valid_targets_min": 1158 }, { "epoch": 0.7936507936507936, "grad_norm": 0.4139092354029092, "learning_rate": 9.782936171835353e-05, "loss": 0.3393, "loss_nan_ranks": 0, "loss_rank_avg": 0.35836488008499146, "step": 250, "valid_targets_mean": 3191.8, "valid_targets_min": 755 }, { "epoch": 0.8095238095238095, "grad_norm": 0.3545532506414492, "learning_rate": 9.773711861268549e-05, "loss": 0.3352, "loss_nan_ranks": 0, "loss_rank_avg": 0.3009134531021118, "step": 255, "valid_targets_mean": 3466.7, "valid_targets_min": 997 }, { "epoch": 0.8253968253968254, "grad_norm": 0.36975136926244245, "learning_rate": 9.764300143676518e-05, "loss": 0.337, "loss_nan_ranks": 0, "loss_rank_avg": 0.3257961869239807, "step": 260, "valid_targets_mean": 3094.9, "valid_targets_min": 782 }, { "epoch": 0.8412698412698413, "grad_norm": 0.37844454589688126, "learning_rate": 9.754701388545745e-05, "loss": 0.3403, "loss_nan_ranks": 0, "loss_rank_avg": 0.3239666223526001, "step": 265, "valid_targets_mean": 3849.2, "valid_targets_min": 741 }, { "epoch": 0.8571428571428571, "grad_norm": 0.428420791431183, "learning_rate": 9.744915972705453e-05, "loss": 0.3308, "loss_nan_ranks": 0, "loss_rank_avg": 0.335654616355896, "step": 270, "valid_targets_mean": 2919.1, "valid_targets_min": 700 }, { "epoch": 0.873015873015873, "grad_norm": 0.34170393330211485, "learning_rate": 9.734944280312824e-05, "loss": 0.3361, "loss_nan_ranks": 0, "loss_rank_avg": 0.3174281120300293, "step": 275, "valid_targets_mean": 3749.4, "valid_targets_min": 808 }, { "epoch": 0.8888888888888888, "grad_norm": 0.38182638697449783, "learning_rate": 9.7247867028379e-05, "loss": 0.3278, "loss_nan_ranks": 0, "loss_rank_avg": 0.35490620136260986, "step": 280, "valid_targets_mean": 3069.8, "valid_targets_min": 781 }, { "epoch": 0.9047619047619048, "grad_norm": 0.37442096963482363, "learning_rate": 9.714443639048232e-05, "loss": 0.34, "loss_nan_ranks": 0, "loss_rank_avg": 0.32899394631385803, "step": 285, "valid_targets_mean": 3468.4, "valid_targets_min": 693 }, { "epoch": 0.9206349206349206, "grad_norm": 0.3582639519502781, "learning_rate": 9.703915494993215e-05, "loss": 0.321, "loss_nan_ranks": 0, "loss_rank_avg": 0.33591094613075256, "step": 290, "valid_targets_mean": 3191.9, "valid_targets_min": 567 }, { "epoch": 0.9365079365079365, "grad_norm": 0.34417265042249817, "learning_rate": 9.693202683988151e-05, "loss": 0.3087, "loss_nan_ranks": 0, "loss_rank_avg": 0.3005759119987488, "step": 295, "valid_targets_mean": 3653.5, "valid_targets_min": 648 }, { "epoch": 0.9523809523809523, "grad_norm": 0.369249163786665, "learning_rate": 9.682305626598023e-05, "loss": 0.3366, "loss_nan_ranks": 0, "loss_rank_avg": 0.34282851219177246, "step": 300, "valid_targets_mean": 3090.8, "valid_targets_min": 920 }, { "epoch": 0.9682539682539683, "grad_norm": 0.31346439307676455, "learning_rate": 9.671224750620981e-05, "loss": 0.3188, "loss_nan_ranks": 0, "loss_rank_avg": 0.29791080951690674, "step": 305, "valid_targets_mean": 3806.2, "valid_targets_min": 851 }, { "epoch": 0.9841269841269841, "grad_norm": 0.3823674576068241, "learning_rate": 9.659960491071554e-05, "loss": 0.3349, "loss_nan_ranks": 0, "loss_rank_avg": 0.33889368176460266, "step": 310, "valid_targets_mean": 3091.2, "valid_targets_min": 948 }, { "epoch": 1.0, "grad_norm": 0.346262826029887, "learning_rate": 9.64851329016356e-05, "loss": 0.3238, "loss_nan_ranks": 0, "loss_rank_avg": 0.3250298500061035, "step": 315, "valid_targets_mean": 3626.7, "valid_targets_min": 879 }, { "epoch": 1.0158730158730158, "grad_norm": 0.43787454332305814, "learning_rate": 9.636883597292762e-05, "loss": 0.2469, "loss_nan_ranks": 0, "loss_rank_avg": 0.23794296383857727, "step": 320, "valid_targets_mean": 3565.3, "valid_targets_min": 485 }, { "epoch": 1.0317460317460316, "grad_norm": 0.3338170186100263, "learning_rate": 9.625071869019215e-05, "loss": 0.249, "loss_nan_ranks": 0, "loss_rank_avg": 0.2461809515953064, "step": 325, "valid_targets_mean": 3799.6, "valid_targets_min": 879 }, { "epoch": 1.0476190476190477, "grad_norm": 0.35543378491397704, "learning_rate": 9.613078569049344e-05, "loss": 0.2497, "loss_nan_ranks": 0, "loss_rank_avg": 0.2544521689414978, "step": 330, "valid_targets_mean": 3516.8, "valid_targets_min": 853 }, { "epoch": 1.0634920634920635, "grad_norm": 0.37748650521262617, "learning_rate": 9.600904168217734e-05, "loss": 0.2431, "loss_nan_ranks": 0, "loss_rank_avg": 0.23706725239753723, "step": 335, "valid_targets_mean": 3417.8, "valid_targets_min": 703 }, { "epoch": 1.0793650793650793, "grad_norm": 0.3435381967499918, "learning_rate": 9.588549144468664e-05, "loss": 0.2519, "loss_nan_ranks": 0, "loss_rank_avg": 0.24922573566436768, "step": 340, "valid_targets_mean": 3267.8, "valid_targets_min": 886 }, { "epoch": 1.0952380952380953, "grad_norm": 0.34815667356425745, "learning_rate": 9.576013982837324e-05, "loss": 0.2448, "loss_nan_ranks": 0, "loss_rank_avg": 0.2301144003868103, "step": 345, "valid_targets_mean": 3309.3, "valid_targets_min": 823 }, { "epoch": 1.1111111111111112, "grad_norm": 0.3412007000641619, "learning_rate": 9.563299175430782e-05, "loss": 0.2413, "loss_nan_ranks": 0, "loss_rank_avg": 0.23151162266731262, "step": 350, "valid_targets_mean": 3243.3, "valid_targets_min": 356 }, { "epoch": 1.126984126984127, "grad_norm": 0.35215519695863917, "learning_rate": 9.550405221408664e-05, "loss": 0.2523, "loss_nan_ranks": 0, "loss_rank_avg": 0.2685403823852539, "step": 355, "valid_targets_mean": 3297.7, "valid_targets_min": 700 }, { "epoch": 1.1428571428571428, "grad_norm": 0.31526351914206535, "learning_rate": 9.537332626963561e-05, "loss": 0.2449, "loss_nan_ranks": 0, "loss_rank_avg": 0.22305209934711456, "step": 360, "valid_targets_mean": 4274.9, "valid_targets_min": 742 }, { "epoch": 1.1587301587301586, "grad_norm": 0.3450806364783924, "learning_rate": 9.524081905301152e-05, "loss": 0.2521, "loss_nan_ranks": 0, "loss_rank_avg": 0.24462619423866272, "step": 365, "valid_targets_mean": 3424.6, "valid_targets_min": 663 }, { "epoch": 1.1746031746031746, "grad_norm": 0.33405132130300835, "learning_rate": 9.510653576620056e-05, "loss": 0.2443, "loss_nan_ranks": 0, "loss_rank_avg": 0.23065048456192017, "step": 370, "valid_targets_mean": 3583.2, "valid_targets_min": 1016 }, { "epoch": 1.1904761904761905, "grad_norm": 0.3992743258279796, "learning_rate": 9.497048168091418e-05, "loss": 0.2541, "loss_nan_ranks": 0, "loss_rank_avg": 0.2545672059059143, "step": 375, "valid_targets_mean": 2822.1, "valid_targets_min": 567 }, { "epoch": 1.2063492063492063, "grad_norm": 0.3786390758502515, "learning_rate": 9.483266213838202e-05, "loss": 0.2542, "loss_nan_ranks": 0, "loss_rank_avg": 0.26987892389297485, "step": 380, "valid_targets_mean": 3785.4, "valid_targets_min": 1214 }, { "epoch": 1.2222222222222223, "grad_norm": 0.32546227871241973, "learning_rate": 9.469308254914231e-05, "loss": 0.2446, "loss_nan_ranks": 0, "loss_rank_avg": 0.23422034084796906, "step": 385, "valid_targets_mean": 3780.2, "valid_targets_min": 1152 }, { "epoch": 1.2380952380952381, "grad_norm": 0.3065912912687044, "learning_rate": 9.455174839282941e-05, "loss": 0.2455, "loss_nan_ranks": 0, "loss_rank_avg": 0.22501501441001892, "step": 390, "valid_targets_mean": 3669.4, "valid_targets_min": 707 }, { "epoch": 1.253968253968254, "grad_norm": 0.37271041897458185, "learning_rate": 9.440866521795874e-05, "loss": 0.247, "loss_nan_ranks": 0, "loss_rank_avg": 0.2661711275577545, "step": 395, "valid_targets_mean": 3099.3, "valid_targets_min": 720 }, { "epoch": 1.2698412698412698, "grad_norm": 0.369136406791236, "learning_rate": 9.426383864170891e-05, "loss": 0.2561, "loss_nan_ranks": 0, "loss_rank_avg": 0.2433825135231018, "step": 400, "valid_targets_mean": 2825.9, "valid_targets_min": 848 }, { "epoch": 1.2857142857142856, "grad_norm": 0.37357502650745017, "learning_rate": 9.411727434970121e-05, "loss": 0.2566, "loss_nan_ranks": 0, "loss_rank_avg": 0.2653917372226715, "step": 405, "valid_targets_mean": 3194.1, "valid_targets_min": 486 }, { "epoch": 1.3015873015873016, "grad_norm": 0.3294343894160417, "learning_rate": 9.396897809577643e-05, "loss": 0.257, "loss_nan_ranks": 0, "loss_rank_avg": 0.23809459805488586, "step": 410, "valid_targets_mean": 3412.5, "valid_targets_min": 878 }, { "epoch": 1.3174603174603174, "grad_norm": 0.40670887974632824, "learning_rate": 9.381895570176893e-05, "loss": 0.2601, "loss_nan_ranks": 0, "loss_rank_avg": 0.2684550881385803, "step": 415, "valid_targets_mean": 2609.8, "valid_targets_min": 692 }, { "epoch": 1.3333333333333333, "grad_norm": 0.3848066802547911, "learning_rate": 9.366721305727813e-05, "loss": 0.2502, "loss_nan_ranks": 0, "loss_rank_avg": 0.270733505487442, "step": 420, "valid_targets_mean": 2846.2, "valid_targets_min": 794 }, { "epoch": 1.3492063492063493, "grad_norm": 0.31019131919407833, "learning_rate": 9.351375611943724e-05, "loss": 0.2382, "loss_nan_ranks": 0, "loss_rank_avg": 0.21716828644275665, "step": 425, "valid_targets_mean": 3436.6, "valid_targets_min": 1281 }, { "epoch": 1.3650793650793651, "grad_norm": 0.33361242153190557, "learning_rate": 9.335859091267952e-05, "loss": 0.2423, "loss_nan_ranks": 0, "loss_rank_avg": 0.25320854783058167, "step": 430, "valid_targets_mean": 3695.6, "valid_targets_min": 972 }, { "epoch": 1.380952380952381, "grad_norm": 0.36539490370574296, "learning_rate": 9.320172352850156e-05, "loss": 0.2546, "loss_nan_ranks": 0, "loss_rank_avg": 0.25512421131134033, "step": 435, "valid_targets_mean": 2833.2, "valid_targets_min": 655 }, { "epoch": 1.3968253968253967, "grad_norm": 0.3457757597947298, "learning_rate": 9.304316012522437e-05, "loss": 0.2486, "loss_nan_ranks": 0, "loss_rank_avg": 0.24607813358306885, "step": 440, "valid_targets_mean": 3310.0, "valid_targets_min": 891 }, { "epoch": 1.4126984126984126, "grad_norm": 0.348654246853821, "learning_rate": 9.288290692775143e-05, "loss": 0.2501, "loss_nan_ranks": 0, "loss_rank_avg": 0.25384521484375, "step": 445, "valid_targets_mean": 3137.3, "valid_targets_min": 974 }, { "epoch": 1.4285714285714286, "grad_norm": 0.3454233001057256, "learning_rate": 9.272097022732443e-05, "loss": 0.2518, "loss_nan_ranks": 0, "loss_rank_avg": 0.26066356897354126, "step": 450, "valid_targets_mean": 3637.1, "valid_targets_min": 677 }, { "epoch": 1.4444444444444444, "grad_norm": 0.33143501884931204, "learning_rate": 9.255735638127623e-05, "loss": 0.2505, "loss_nan_ranks": 0, "loss_rank_avg": 0.25016534328460693, "step": 455, "valid_targets_mean": 3601.1, "valid_targets_min": 854 }, { "epoch": 1.4603174603174602, "grad_norm": 0.35241836784306196, "learning_rate": 9.239207181278131e-05, "loss": 0.2615, "loss_nan_ranks": 0, "loss_rank_avg": 0.25485676527023315, "step": 460, "valid_targets_mean": 3419.8, "valid_targets_min": 1077 }, { "epoch": 1.4761904761904763, "grad_norm": 0.3604417859438151, "learning_rate": 9.222512301060358e-05, "loss": 0.2589, "loss_nan_ranks": 0, "loss_rank_avg": 0.2657771110534668, "step": 465, "valid_targets_mean": 3546.2, "valid_targets_min": 316 }, { "epoch": 1.492063492063492, "grad_norm": 0.37365401606945614, "learning_rate": 9.205651652884169e-05, "loss": 0.2419, "loss_nan_ranks": 0, "loss_rank_avg": 0.26828211545944214, "step": 470, "valid_targets_mean": 3005.4, "valid_targets_min": 859 }, { "epoch": 1.507936507936508, "grad_norm": 0.34227061343365994, "learning_rate": 9.188625898667165e-05, "loss": 0.2503, "loss_nan_ranks": 0, "loss_rank_avg": 0.2581418752670288, "step": 475, "valid_targets_mean": 3869.3, "valid_targets_min": 889 }, { "epoch": 1.5238095238095237, "grad_norm": 0.39954769934278406, "learning_rate": 9.171435706808709e-05, "loss": 0.2618, "loss_nan_ranks": 0, "loss_rank_avg": 0.27356404066085815, "step": 480, "valid_targets_mean": 2616.6, "valid_targets_min": 567 }, { "epoch": 1.5396825396825395, "grad_norm": 0.3779648783322647, "learning_rate": 9.154081752163675e-05, "loss": 0.2584, "loss_nan_ranks": 0, "loss_rank_avg": 0.2693216800689697, "step": 485, "valid_targets_mean": 2724.9, "valid_targets_min": 752 }, { "epoch": 1.5555555555555556, "grad_norm": 0.3409265230520423, "learning_rate": 9.136564716015956e-05, "loss": 0.2599, "loss_nan_ranks": 0, "loss_rank_avg": 0.2872137427330017, "step": 490, "valid_targets_mean": 3691.8, "valid_targets_min": 881 }, { "epoch": 1.5714285714285714, "grad_norm": 0.3335281180583056, "learning_rate": 9.118885286051726e-05, "loss": 0.2582, "loss_nan_ranks": 0, "loss_rank_avg": 0.2598532438278198, "step": 495, "valid_targets_mean": 3319.2, "valid_targets_min": 747 }, { "epoch": 1.5873015873015874, "grad_norm": 0.3302567574084486, "learning_rate": 9.101044156332437e-05, "loss": 0.2444, "loss_nan_ranks": 0, "loss_rank_avg": 0.2369944453239441, "step": 500, "valid_targets_mean": 3715.5, "valid_targets_min": 1155 }, { "epoch": 1.6031746031746033, "grad_norm": 0.37703989593478526, "learning_rate": 9.083042027267567e-05, "loss": 0.2615, "loss_nan_ranks": 0, "loss_rank_avg": 0.286998450756073, "step": 505, "valid_targets_mean": 2952.9, "valid_targets_min": 574 }, { "epoch": 1.619047619047619, "grad_norm": 0.35401957704413434, "learning_rate": 9.064879605587132e-05, "loss": 0.2537, "loss_nan_ranks": 0, "loss_rank_avg": 0.26032260060310364, "step": 510, "valid_targets_mean": 3504.2, "valid_targets_min": 773 }, { "epoch": 1.6349206349206349, "grad_norm": 0.3641434739283903, "learning_rate": 9.046557604313937e-05, "loss": 0.2506, "loss_nan_ranks": 0, "loss_rank_avg": 0.25402820110321045, "step": 515, "valid_targets_mean": 3138.6, "valid_targets_min": 726 }, { "epoch": 1.6507936507936507, "grad_norm": 0.3059033037354985, "learning_rate": 9.028076742735583e-05, "loss": 0.2503, "loss_nan_ranks": 0, "loss_rank_avg": 0.23979029059410095, "step": 520, "valid_targets_mean": 3641.1, "valid_targets_min": 900 }, { "epoch": 1.6666666666666665, "grad_norm": 0.3497297327249346, "learning_rate": 9.009437746376231e-05, "loss": 0.2483, "loss_nan_ranks": 0, "loss_rank_avg": 0.25244393944740295, "step": 525, "valid_targets_mean": 3473.1, "valid_targets_min": 319 }, { "epoch": 1.6825396825396826, "grad_norm": 0.3378221717798311, "learning_rate": 8.990641346968117e-05, "loss": 0.267, "loss_nan_ranks": 0, "loss_rank_avg": 0.27777156233787537, "step": 530, "valid_targets_mean": 3499.6, "valid_targets_min": 734 }, { "epoch": 1.6984126984126984, "grad_norm": 0.33873773306272287, "learning_rate": 8.97168828242283e-05, "loss": 0.2485, "loss_nan_ranks": 0, "loss_rank_avg": 0.2586287558078766, "step": 535, "valid_targets_mean": 3036.5, "valid_targets_min": 725 }, { "epoch": 1.7142857142857144, "grad_norm": 0.34047790836727093, "learning_rate": 8.952579296802339e-05, "loss": 0.247, "loss_nan_ranks": 0, "loss_rank_avg": 0.2612344026565552, "step": 540, "valid_targets_mean": 3229.4, "valid_targets_min": 825 }, { "epoch": 1.7301587301587302, "grad_norm": 0.30335154929897096, "learning_rate": 8.933315140289782e-05, "loss": 0.2426, "loss_nan_ranks": 0, "loss_rank_avg": 0.22845228016376495, "step": 545, "valid_targets_mean": 3568.5, "valid_targets_min": 702 }, { "epoch": 1.746031746031746, "grad_norm": 0.35688828900002534, "learning_rate": 8.91389656916002e-05, "loss": 0.2565, "loss_nan_ranks": 0, "loss_rank_avg": 0.2728371322154999, "step": 550, "valid_targets_mean": 3387.7, "valid_targets_min": 1036 }, { "epoch": 1.7619047619047619, "grad_norm": 0.35026171013621277, "learning_rate": 8.894324345749939e-05, "loss": 0.257, "loss_nan_ranks": 0, "loss_rank_avg": 0.2628636956214905, "step": 555, "valid_targets_mean": 2987.4, "valid_targets_min": 1389 }, { "epoch": 1.7777777777777777, "grad_norm": 0.364303792145663, "learning_rate": 8.874599238428533e-05, "loss": 0.252, "loss_nan_ranks": 0, "loss_rank_avg": 0.25474119186401367, "step": 560, "valid_targets_mean": 2906.6, "valid_targets_min": 332 }, { "epoch": 1.7936507936507935, "grad_norm": 0.3297047549410631, "learning_rate": 8.85472202156673e-05, "loss": 0.2416, "loss_nan_ranks": 0, "loss_rank_avg": 0.2573590874671936, "step": 565, "valid_targets_mean": 3229.9, "valid_targets_min": 719 }, { "epoch": 1.8095238095238095, "grad_norm": 0.35086501481207644, "learning_rate": 8.834693475506992e-05, "loss": 0.2465, "loss_nan_ranks": 0, "loss_rank_avg": 0.25695112347602844, "step": 570, "valid_targets_mean": 3061.9, "valid_targets_min": 586 }, { "epoch": 1.8253968253968254, "grad_norm": 0.3317762868716903, "learning_rate": 8.814514386532691e-05, "loss": 0.247, "loss_nan_ranks": 0, "loss_rank_avg": 0.24048689007759094, "step": 575, "valid_targets_mean": 2892.9, "valid_targets_min": 662 }, { "epoch": 1.8412698412698414, "grad_norm": 0.3474363106947119, "learning_rate": 8.794185546837224e-05, "loss": 0.2583, "loss_nan_ranks": 0, "loss_rank_avg": 0.26812222599983215, "step": 580, "valid_targets_mean": 2800.1, "valid_targets_min": 741 }, { "epoch": 1.8571428571428572, "grad_norm": 0.30940634479021384, "learning_rate": 8.773707754492928e-05, "loss": 0.2613, "loss_nan_ranks": 0, "loss_rank_avg": 0.25138765573501587, "step": 585, "valid_targets_mean": 3781.5, "valid_targets_min": 759 }, { "epoch": 1.873015873015873, "grad_norm": 0.34038821122242374, "learning_rate": 8.753081813419743e-05, "loss": 0.2572, "loss_nan_ranks": 0, "loss_rank_avg": 0.2681252360343933, "step": 590, "valid_targets_mean": 3191.2, "valid_targets_min": 881 }, { "epoch": 1.8888888888888888, "grad_norm": 0.3166643196960125, "learning_rate": 8.73230853335365e-05, "loss": 0.2548, "loss_nan_ranks": 0, "loss_rank_avg": 0.2620237469673157, "step": 595, "valid_targets_mean": 3481.8, "valid_targets_min": 493 }, { "epoch": 1.9047619047619047, "grad_norm": 0.30108357485678805, "learning_rate": 8.711388729814882e-05, "loss": 0.249, "loss_nan_ranks": 0, "loss_rank_avg": 0.24603253602981567, "step": 600, "valid_targets_mean": 3596.4, "valid_targets_min": 1012 }, { "epoch": 1.9206349206349205, "grad_norm": 0.3334463754433709, "learning_rate": 8.690323224075917e-05, "loss": 0.255, "loss_nan_ranks": 0, "loss_rank_avg": 0.2664140462875366, "step": 605, "valid_targets_mean": 3660.5, "valid_targets_min": 821 }, { "epoch": 1.9365079365079365, "grad_norm": 0.29345444440656204, "learning_rate": 8.669112843129221e-05, "loss": 0.2404, "loss_nan_ranks": 0, "loss_rank_avg": 0.23653852939605713, "step": 610, "valid_targets_mean": 3828.9, "valid_targets_min": 872 }, { "epoch": 1.9523809523809523, "grad_norm": 0.3545591460034767, "learning_rate": 8.647758419654798e-05, "loss": 0.2632, "loss_nan_ranks": 0, "loss_rank_avg": 0.28314149379730225, "step": 615, "valid_targets_mean": 3005.8, "valid_targets_min": 950 }, { "epoch": 1.9682539682539684, "grad_norm": 0.37518499037902897, "learning_rate": 8.626260791987488e-05, "loss": 0.264, "loss_nan_ranks": 0, "loss_rank_avg": 0.27982187271118164, "step": 620, "valid_targets_mean": 2737.7, "valid_targets_min": 655 }, { "epoch": 1.9841269841269842, "grad_norm": 0.3141946190884809, "learning_rate": 8.604620804084065e-05, "loss": 0.2604, "loss_nan_ranks": 0, "loss_rank_avg": 0.24237555265426636, "step": 625, "valid_targets_mean": 3285.4, "valid_targets_min": 782 }, { "epoch": 2.0, "grad_norm": 0.31685211804889135, "learning_rate": 8.582839305490094e-05, "loss": 0.246, "loss_nan_ranks": 0, "loss_rank_avg": 0.24083860218524933, "step": 630, "valid_targets_mean": 3718.9, "valid_targets_min": 628 }, { "epoch": 2.015873015873016, "grad_norm": 0.5759647548742526, "learning_rate": 8.560917151306593e-05, "loss": 0.1678, "loss_nan_ranks": 0, "loss_rank_avg": 0.17167878150939941, "step": 635, "valid_targets_mean": 3780.4, "valid_targets_min": 1167 }, { "epoch": 2.0317460317460316, "grad_norm": 0.3727656111117338, "learning_rate": 8.538855202156453e-05, "loss": 0.1622, "loss_nan_ranks": 0, "loss_rank_avg": 0.17625080049037933, "step": 640, "valid_targets_mean": 2937.9, "valid_targets_min": 798 }, { "epoch": 2.0476190476190474, "grad_norm": 0.3884320540436871, "learning_rate": 8.516654324150652e-05, "loss": 0.1567, "loss_nan_ranks": 0, "loss_rank_avg": 0.17556791007518768, "step": 645, "valid_targets_mean": 3192.7, "valid_targets_min": 538 }, { "epoch": 2.0634920634920633, "grad_norm": 0.3289674113366658, "learning_rate": 8.49431538885426e-05, "loss": 0.1614, "loss_nan_ranks": 0, "loss_rank_avg": 0.16414867341518402, "step": 650, "valid_targets_mean": 3249.7, "valid_targets_min": 844 }, { "epoch": 2.0793650793650795, "grad_norm": 0.3766522928896842, "learning_rate": 8.471839273252217e-05, "loss": 0.1619, "loss_nan_ranks": 0, "loss_rank_avg": 0.18375205993652344, "step": 655, "valid_targets_mean": 3164.0, "valid_targets_min": 947 }, { "epoch": 2.0952380952380953, "grad_norm": 0.3507144392463362, "learning_rate": 8.449226859714907e-05, "loss": 0.1519, "loss_nan_ranks": 0, "loss_rank_avg": 0.1520249843597412, "step": 660, "valid_targets_mean": 3328.9, "valid_targets_min": 674 }, { "epoch": 2.111111111111111, "grad_norm": 0.37178157182206373, "learning_rate": 8.426479035963513e-05, "loss": 0.1598, "loss_nan_ranks": 0, "loss_rank_avg": 0.17126235365867615, "step": 665, "valid_targets_mean": 3293.8, "valid_targets_min": 649 }, { "epoch": 2.126984126984127, "grad_norm": 0.3300159891010412, "learning_rate": 8.403596695035174e-05, "loss": 0.166, "loss_nan_ranks": 0, "loss_rank_avg": 0.16688695549964905, "step": 670, "valid_targets_mean": 3549.7, "valid_targets_min": 1096 }, { "epoch": 2.142857142857143, "grad_norm": 0.31856865543305996, "learning_rate": 8.380580735247925e-05, "loss": 0.1657, "loss_nan_ranks": 0, "loss_rank_avg": 0.16608603298664093, "step": 675, "valid_targets_mean": 3707.8, "valid_targets_min": 621 }, { "epoch": 2.1587301587301586, "grad_norm": 0.3256115736923054, "learning_rate": 8.35743206016542e-05, "loss": 0.1565, "loss_nan_ranks": 0, "loss_rank_avg": 0.17094911634922028, "step": 680, "valid_targets_mean": 3957.3, "valid_targets_min": 1029 }, { "epoch": 2.1746031746031744, "grad_norm": 0.34931477561977403, "learning_rate": 8.334151578561478e-05, "loss": 0.1535, "loss_nan_ranks": 0, "loss_rank_avg": 0.15973442792892456, "step": 685, "valid_targets_mean": 3375.2, "valid_targets_min": 787 }, { "epoch": 2.1904761904761907, "grad_norm": 0.32279390014201975, "learning_rate": 8.310740204384387e-05, "loss": 0.1596, "loss_nan_ranks": 0, "loss_rank_avg": 0.16139554977416992, "step": 690, "valid_targets_mean": 3563.2, "valid_targets_min": 698 }, { "epoch": 2.2063492063492065, "grad_norm": 0.35609221581271633, "learning_rate": 8.287198856721042e-05, "loss": 0.1637, "loss_nan_ranks": 0, "loss_rank_avg": 0.1595011055469513, "step": 695, "valid_targets_mean": 2980.4, "valid_targets_min": 1007 }, { "epoch": 2.2222222222222223, "grad_norm": 0.37271121800564905, "learning_rate": 8.263528459760844e-05, "loss": 0.168, "loss_nan_ranks": 0, "loss_rank_avg": 0.17944392561912537, "step": 700, "valid_targets_mean": 3019.2, "valid_targets_min": 551 }, { "epoch": 2.238095238095238, "grad_norm": 0.3492345769276772, "learning_rate": 8.23972994275944e-05, "loss": 0.1595, "loss_nan_ranks": 0, "loss_rank_avg": 0.15329068899154663, "step": 705, "valid_targets_mean": 2981.5, "valid_targets_min": 705 }, { "epoch": 2.253968253968254, "grad_norm": 0.37130802424546083, "learning_rate": 8.215804240002225e-05, "loss": 0.1623, "loss_nan_ranks": 0, "loss_rank_avg": 0.16565588116645813, "step": 710, "valid_targets_mean": 3265.3, "valid_targets_min": 601 }, { "epoch": 2.2698412698412698, "grad_norm": 0.3248478814141257, "learning_rate": 8.191752290767671e-05, "loss": 0.1628, "loss_nan_ranks": 0, "loss_rank_avg": 0.1539839506149292, "step": 715, "valid_targets_mean": 3446.7, "valid_targets_min": 858 }, { "epoch": 2.2857142857142856, "grad_norm": 0.34141572197660325, "learning_rate": 8.167575039290448e-05, "loss": 0.1632, "loss_nan_ranks": 0, "loss_rank_avg": 0.16221433877944946, "step": 720, "valid_targets_mean": 3580.3, "valid_targets_min": 903 }, { "epoch": 2.3015873015873014, "grad_norm": 0.3457143748578184, "learning_rate": 8.143273434724363e-05, "loss": 0.17, "loss_nan_ranks": 0, "loss_rank_avg": 0.16455700993537903, "step": 725, "valid_targets_mean": 3009.4, "valid_targets_min": 721 }, { "epoch": 2.317460317460317, "grad_norm": 0.4063997244980026, "learning_rate": 8.118848431105091e-05, "loss": 0.1687, "loss_nan_ranks": 0, "loss_rank_avg": 0.17688608169555664, "step": 730, "valid_targets_mean": 2648.5, "valid_targets_min": 655 }, { "epoch": 2.3333333333333335, "grad_norm": 0.36420318625821063, "learning_rate": 8.094300987312725e-05, "loss": 0.1658, "loss_nan_ranks": 0, "loss_rank_avg": 0.1653500646352768, "step": 735, "valid_targets_mean": 3228.5, "valid_targets_min": 671 }, { "epoch": 2.3492063492063493, "grad_norm": 0.3426356909940338, "learning_rate": 8.069632067034129e-05, "loss": 0.1593, "loss_nan_ranks": 0, "loss_rank_avg": 0.16358144581317902, "step": 740, "valid_targets_mean": 3252.1, "valid_targets_min": 318 }, { "epoch": 2.365079365079365, "grad_norm": 0.3362094215393869, "learning_rate": 8.044842638725107e-05, "loss": 0.162, "loss_nan_ranks": 0, "loss_rank_avg": 0.16318479180335999, "step": 745, "valid_targets_mean": 3503.7, "valid_targets_min": 1345 }, { "epoch": 2.380952380952381, "grad_norm": 0.35115213542541646, "learning_rate": 8.019933675572389e-05, "loss": 0.1688, "loss_nan_ranks": 0, "loss_rank_avg": 0.17881888151168823, "step": 750, "valid_targets_mean": 3436.2, "valid_targets_min": 833 }, { "epoch": 2.3968253968253967, "grad_norm": 0.33653063427971813, "learning_rate": 7.994906155455411e-05, "loss": 0.1597, "loss_nan_ranks": 0, "loss_rank_avg": 0.15470348298549652, "step": 755, "valid_targets_mean": 3408.1, "valid_targets_min": 662 }, { "epoch": 2.4126984126984126, "grad_norm": 0.41474724537380153, "learning_rate": 7.969761060907943e-05, "loss": 0.1757, "loss_nan_ranks": 0, "loss_rank_avg": 0.18444520235061646, "step": 760, "valid_targets_mean": 2565.2, "valid_targets_min": 799 }, { "epoch": 2.4285714285714284, "grad_norm": 0.3250725810449299, "learning_rate": 7.944499379079502e-05, "loss": 0.1713, "loss_nan_ranks": 0, "loss_rank_avg": 0.16023294627666473, "step": 765, "valid_targets_mean": 3494.1, "valid_targets_min": 673 }, { "epoch": 2.4444444444444446, "grad_norm": 0.33217697389608564, "learning_rate": 7.919122101696606e-05, "loss": 0.1691, "loss_nan_ranks": 0, "loss_rank_avg": 0.1604093611240387, "step": 770, "valid_targets_mean": 3532.6, "valid_targets_min": 1386 }, { "epoch": 2.4603174603174605, "grad_norm": 0.3158728058296167, "learning_rate": 7.893630225023842e-05, "loss": 0.1677, "loss_nan_ranks": 0, "loss_rank_avg": 0.17412611842155457, "step": 775, "valid_targets_mean": 3643.2, "valid_targets_min": 715 }, { "epoch": 2.4761904761904763, "grad_norm": 0.3777164615324642, "learning_rate": 7.868024749824745e-05, "loss": 0.1689, "loss_nan_ranks": 0, "loss_rank_avg": 0.17800076305866241, "step": 780, "valid_targets_mean": 2654.0, "valid_targets_min": 645 }, { "epoch": 2.492063492063492, "grad_norm": 0.3446838338319628, "learning_rate": 7.842306681322522e-05, "loss": 0.1655, "loss_nan_ranks": 0, "loss_rank_avg": 0.152607262134552, "step": 785, "valid_targets_mean": 2785.3, "valid_targets_min": 626 }, { "epoch": 2.507936507936508, "grad_norm": 0.3413803336284991, "learning_rate": 7.816477029160582e-05, "loss": 0.169, "loss_nan_ranks": 0, "loss_rank_avg": 0.169495090842247, "step": 790, "valid_targets_mean": 3203.3, "valid_targets_min": 680 }, { "epoch": 2.5238095238095237, "grad_norm": 0.3422251012343508, "learning_rate": 7.790536807362896e-05, "loss": 0.1668, "loss_nan_ranks": 0, "loss_rank_avg": 0.16888554394245148, "step": 795, "valid_targets_mean": 3368.9, "valid_targets_min": 762 }, { "epoch": 2.5396825396825395, "grad_norm": 0.35255729056148943, "learning_rate": 7.7644870342942e-05, "loss": 0.1767, "loss_nan_ranks": 0, "loss_rank_avg": 0.17574498057365417, "step": 800, "valid_targets_mean": 3200.3, "valid_targets_min": 744 }, { "epoch": 2.5555555555555554, "grad_norm": 0.3675191971330026, "learning_rate": 7.738328732620001e-05, "loss": 0.169, "loss_nan_ranks": 0, "loss_rank_avg": 0.18634290993213654, "step": 805, "valid_targets_mean": 3374.5, "valid_targets_min": 733 }, { "epoch": 2.571428571428571, "grad_norm": 0.37403677830549353, "learning_rate": 7.712062929266444e-05, "loss": 0.1721, "loss_nan_ranks": 0, "loss_rank_avg": 0.16618913412094116, "step": 810, "valid_targets_mean": 2706.5, "valid_targets_min": 924 }, { "epoch": 2.5873015873015874, "grad_norm": 0.3727371847270236, "learning_rate": 7.685690655379984e-05, "loss": 0.1736, "loss_nan_ranks": 0, "loss_rank_avg": 0.17995798587799072, "step": 815, "valid_targets_mean": 3017.0, "valid_targets_min": 748 }, { "epoch": 2.6031746031746033, "grad_norm": 0.30751639885441906, "learning_rate": 7.659212946286912e-05, "loss": 0.1753, "loss_nan_ranks": 0, "loss_rank_avg": 0.15746265649795532, "step": 820, "valid_targets_mean": 4256.9, "valid_targets_min": 374 }, { "epoch": 2.619047619047619, "grad_norm": 0.3310233627973631, "learning_rate": 7.632630841452709e-05, "loss": 0.1688, "loss_nan_ranks": 0, "loss_rank_avg": 0.17239421606063843, "step": 825, "valid_targets_mean": 3438.6, "valid_targets_min": 1014 }, { "epoch": 2.634920634920635, "grad_norm": 0.3796448669599942, "learning_rate": 7.605945384441238e-05, "loss": 0.1742, "loss_nan_ranks": 0, "loss_rank_avg": 0.17648935317993164, "step": 830, "valid_targets_mean": 2657.2, "valid_targets_min": 678 }, { "epoch": 2.6507936507936507, "grad_norm": 0.31336443027803734, "learning_rate": 7.579157622873779e-05, "loss": 0.1763, "loss_nan_ranks": 0, "loss_rank_avg": 0.18123339116573334, "step": 835, "valid_targets_mean": 3867.7, "valid_targets_min": 799 }, { "epoch": 2.6666666666666665, "grad_norm": 0.32297938325759856, "learning_rate": 7.552268608387889e-05, "loss": 0.1754, "loss_nan_ranks": 0, "loss_rank_avg": 0.17806392908096313, "step": 840, "valid_targets_mean": 3531.5, "valid_targets_min": 994 }, { "epoch": 2.682539682539683, "grad_norm": 0.3818920541656617, "learning_rate": 7.525279396596137e-05, "loss": 0.1668, "loss_nan_ranks": 0, "loss_rank_avg": 0.17371223866939545, "step": 845, "valid_targets_mean": 2356.9, "valid_targets_min": 463 }, { "epoch": 2.6984126984126986, "grad_norm": 0.3317772595112286, "learning_rate": 7.498191047044641e-05, "loss": 0.1727, "loss_nan_ranks": 0, "loss_rank_avg": 0.18115673959255219, "step": 850, "valid_targets_mean": 3477.0, "valid_targets_min": 982 }, { "epoch": 2.7142857142857144, "grad_norm": 0.31555206274921216, "learning_rate": 7.471004623171493e-05, "loss": 0.167, "loss_nan_ranks": 0, "loss_rank_avg": 0.16410109400749207, "step": 855, "valid_targets_mean": 3581.8, "valid_targets_min": 694 }, { "epoch": 2.7301587301587302, "grad_norm": 0.3339727492896053, "learning_rate": 7.443721192264991e-05, "loss": 0.1685, "loss_nan_ranks": 0, "loss_rank_avg": 0.1775779128074646, "step": 860, "valid_targets_mean": 3339.6, "valid_targets_min": 899 }, { "epoch": 2.746031746031746, "grad_norm": 0.3741712364122302, "learning_rate": 7.416341825421754e-05, "loss": 0.1747, "loss_nan_ranks": 0, "loss_rank_avg": 0.181113600730896, "step": 865, "valid_targets_mean": 2898.9, "valid_targets_min": 807 }, { "epoch": 2.761904761904762, "grad_norm": 0.3450601435101273, "learning_rate": 7.388867597504664e-05, "loss": 0.1755, "loss_nan_ranks": 0, "loss_rank_avg": 0.16140630841255188, "step": 870, "valid_targets_mean": 2982.1, "valid_targets_min": 777 }, { "epoch": 2.7777777777777777, "grad_norm": 0.30300100600429875, "learning_rate": 7.361299587100671e-05, "loss": 0.1664, "loss_nan_ranks": 0, "loss_rank_avg": 0.14763054251670837, "step": 875, "valid_targets_mean": 3546.3, "valid_targets_min": 331 }, { "epoch": 2.7936507936507935, "grad_norm": 0.33822865794587587, "learning_rate": 7.333638876478453e-05, "loss": 0.1663, "loss_nan_ranks": 0, "loss_rank_avg": 0.15383568406105042, "step": 880, "valid_targets_mean": 3383.7, "valid_targets_min": 965 }, { "epoch": 2.8095238095238093, "grad_norm": 0.33412569714189794, "learning_rate": 7.305886551545926e-05, "loss": 0.1742, "loss_nan_ranks": 0, "loss_rank_avg": 0.16151924431324005, "step": 885, "valid_targets_mean": 3373.7, "valid_targets_min": 735 }, { "epoch": 2.825396825396825, "grad_norm": 0.33412534025670526, "learning_rate": 7.27804370180761e-05, "loss": 0.1781, "loss_nan_ranks": 0, "loss_rank_avg": 0.1706407070159912, "step": 890, "valid_targets_mean": 3179.3, "valid_targets_min": 906 }, { "epoch": 2.8412698412698414, "grad_norm": 0.3339179547455438, "learning_rate": 7.250111420321863e-05, "loss": 0.1711, "loss_nan_ranks": 0, "loss_rank_avg": 0.16379770636558533, "step": 895, "valid_targets_mean": 3285.1, "valid_targets_min": 369 }, { "epoch": 2.857142857142857, "grad_norm": 0.34023627441151805, "learning_rate": 7.222090803657965e-05, "loss": 0.1647, "loss_nan_ranks": 0, "loss_rank_avg": 0.16814827919006348, "step": 900, "valid_targets_mean": 3231.8, "valid_targets_min": 1139 }, { "epoch": 2.873015873015873, "grad_norm": 0.3340218754644537, "learning_rate": 7.193982951853072e-05, "loss": 0.1661, "loss_nan_ranks": 0, "loss_rank_avg": 0.16534548997879028, "step": 905, "valid_targets_mean": 3287.0, "valid_targets_min": 1229 }, { "epoch": 2.888888888888889, "grad_norm": 0.3567976236306708, "learning_rate": 7.165788968369027e-05, "loss": 0.1671, "loss_nan_ranks": 0, "loss_rank_avg": 0.16220903396606445, "step": 910, "valid_targets_mean": 2799.1, "valid_targets_min": 645 }, { "epoch": 2.9047619047619047, "grad_norm": 0.323267924884269, "learning_rate": 7.137509960049043e-05, "loss": 0.1699, "loss_nan_ranks": 0, "loss_rank_avg": 0.1666952222585678, "step": 915, "valid_targets_mean": 3537.4, "valid_targets_min": 743 }, { "epoch": 2.9206349206349205, "grad_norm": 0.3344600534927337, "learning_rate": 7.109147037074249e-05, "loss": 0.1743, "loss_nan_ranks": 0, "loss_rank_avg": 0.16353312134742737, "step": 920, "valid_targets_mean": 3538.2, "valid_targets_min": 586 }, { "epoch": 2.9365079365079367, "grad_norm": 0.3519409005679982, "learning_rate": 7.080701312920106e-05, "loss": 0.1709, "loss_nan_ranks": 0, "loss_rank_avg": 0.17425547540187836, "step": 925, "valid_targets_mean": 2968.8, "valid_targets_min": 734 }, { "epoch": 2.9523809523809526, "grad_norm": 0.31842438352891295, "learning_rate": 7.052173904312699e-05, "loss": 0.1739, "loss_nan_ranks": 0, "loss_rank_avg": 0.1763269454240799, "step": 930, "valid_targets_mean": 3808.2, "valid_targets_min": 926 }, { "epoch": 2.9682539682539684, "grad_norm": 0.3597461145782737, "learning_rate": 7.023565931184888e-05, "loss": 0.1687, "loss_nan_ranks": 0, "loss_rank_avg": 0.17197969555854797, "step": 935, "valid_targets_mean": 2821.0, "valid_targets_min": 381 }, { "epoch": 2.984126984126984, "grad_norm": 0.3773619909688809, "learning_rate": 6.994878516632347e-05, "loss": 0.1722, "loss_nan_ranks": 0, "loss_rank_avg": 0.18970924615859985, "step": 940, "valid_targets_mean": 3117.7, "valid_targets_min": 332 }, { "epoch": 3.0, "grad_norm": 0.35904186673915295, "learning_rate": 6.966112786869471e-05, "loss": 0.169, "loss_nan_ranks": 0, "loss_rank_avg": 0.1610109508037567, "step": 945, "valid_targets_mean": 2840.3, "valid_targets_min": 1321 }, { "epoch": 3.015873015873016, "grad_norm": 0.51988600717683, "learning_rate": 6.937269871185171e-05, "loss": 0.0984, "loss_nan_ranks": 0, "loss_rank_avg": 0.09736981987953186, "step": 950, "valid_targets_mean": 3713.8, "valid_targets_min": 728 }, { "epoch": 3.0317460317460316, "grad_norm": 0.3566692041425496, "learning_rate": 6.908350901898522e-05, "loss": 0.0948, "loss_nan_ranks": 0, "loss_rank_avg": 0.09821034967899323, "step": 955, "valid_targets_mean": 3109.2, "valid_targets_min": 855 }, { "epoch": 3.0476190476190474, "grad_norm": 0.34988594260353406, "learning_rate": 6.87935701431433e-05, "loss": 0.0908, "loss_nan_ranks": 0, "loss_rank_avg": 0.08849053084850311, "step": 960, "valid_targets_mean": 3486.1, "valid_targets_min": 759 }, { "epoch": 3.0634920634920633, "grad_norm": 0.3684968880517492, "learning_rate": 6.850289346678552e-05, "loss": 0.0918, "loss_nan_ranks": 0, "loss_rank_avg": 0.0889333039522171, "step": 965, "valid_targets_mean": 3089.4, "valid_targets_min": 767 }, { "epoch": 3.0793650793650795, "grad_norm": 0.3406031062455158, "learning_rate": 6.821149040133608e-05, "loss": 0.0932, "loss_nan_ranks": 0, "loss_rank_avg": 0.08600161969661713, "step": 970, "valid_targets_mean": 2967.3, "valid_targets_min": 846 }, { "epoch": 3.0952380952380953, "grad_norm": 0.39035591213321374, "learning_rate": 6.791937238673592e-05, "loss": 0.0985, "loss_nan_ranks": 0, "loss_rank_avg": 0.10481473803520203, "step": 975, "valid_targets_mean": 2889.2, "valid_targets_min": 1068 }, { "epoch": 3.111111111111111, "grad_norm": 0.341213524572102, "learning_rate": 6.762655089099353e-05, "loss": 0.0927, "loss_nan_ranks": 0, "loss_rank_avg": 0.09173455089330673, "step": 980, "valid_targets_mean": 3260.5, "valid_targets_min": 889 }, { "epoch": 3.126984126984127, "grad_norm": 0.3104515295578226, "learning_rate": 6.733303740973476e-05, "loss": 0.0945, "loss_nan_ranks": 0, "loss_rank_avg": 0.09036482125520706, "step": 985, "valid_targets_mean": 3952.8, "valid_targets_min": 1409 }, { "epoch": 3.142857142857143, "grad_norm": 0.3551799137750828, "learning_rate": 6.703884346575147e-05, "loss": 0.0912, "loss_nan_ranks": 0, "loss_rank_avg": 0.08662883937358856, "step": 990, "valid_targets_mean": 3036.0, "valid_targets_min": 680 }, { "epoch": 3.1587301587301586, "grad_norm": 0.3713835912690338, "learning_rate": 6.674398060854931e-05, "loss": 0.1005, "loss_nan_ranks": 0, "loss_rank_avg": 0.1003386452794075, "step": 995, "valid_targets_mean": 2793.7, "valid_targets_min": 588 }, { "epoch": 3.1746031746031744, "grad_norm": 0.3471639600992559, "learning_rate": 6.644846041389414e-05, "loss": 0.0988, "loss_nan_ranks": 0, "loss_rank_avg": 0.09873468428850174, "step": 1000, "valid_targets_mean": 3826.8, "valid_targets_min": 563 }, { "epoch": 3.1904761904761907, "grad_norm": 0.3628044132628041, "learning_rate": 6.615229448335769e-05, "loss": 0.0931, "loss_nan_ranks": 0, "loss_rank_avg": 0.09047640860080719, "step": 1005, "valid_targets_mean": 3412.8, "valid_targets_min": 737 }, { "epoch": 3.2063492063492065, "grad_norm": 0.392140458241405, "learning_rate": 6.58554944438621e-05, "loss": 0.0944, "loss_nan_ranks": 0, "loss_rank_avg": 0.10625366866588593, "step": 1010, "valid_targets_mean": 2965.2, "valid_targets_min": 677 }, { "epoch": 3.2222222222222223, "grad_norm": 0.3768633246359088, "learning_rate": 6.555807194722339e-05, "loss": 0.0945, "loss_nan_ranks": 0, "loss_rank_avg": 0.09283259510993958, "step": 1015, "valid_targets_mean": 2806.7, "valid_targets_min": 551 }, { "epoch": 3.238095238095238, "grad_norm": 0.3398289365530458, "learning_rate": 6.526003866969412e-05, "loss": 0.096, "loss_nan_ranks": 0, "loss_rank_avg": 0.09495319426059723, "step": 1020, "valid_targets_mean": 3490.8, "valid_targets_min": 636 }, { "epoch": 3.253968253968254, "grad_norm": 0.3233141200068704, "learning_rate": 6.4961406311505e-05, "loss": 0.0984, "loss_nan_ranks": 0, "loss_rank_avg": 0.09404659271240234, "step": 1025, "valid_targets_mean": 3667.8, "valid_targets_min": 935 }, { "epoch": 3.2698412698412698, "grad_norm": 0.3320120444353232, "learning_rate": 6.466218659640545e-05, "loss": 0.0976, "loss_nan_ranks": 0, "loss_rank_avg": 0.09830652177333832, "step": 1030, "valid_targets_mean": 3334.6, "valid_targets_min": 821 }, { "epoch": 3.2857142857142856, "grad_norm": 0.34959529865828043, "learning_rate": 6.436239127120351e-05, "loss": 0.0988, "loss_nan_ranks": 0, "loss_rank_avg": 0.0943569615483284, "step": 1035, "valid_targets_mean": 3270.2, "valid_targets_min": 1175 }, { "epoch": 3.3015873015873014, "grad_norm": 0.3737089477134585, "learning_rate": 6.406203210530455e-05, "loss": 0.098, "loss_nan_ranks": 0, "loss_rank_avg": 0.10708297789096832, "step": 1040, "valid_targets_mean": 3081.0, "valid_targets_min": 318 }, { "epoch": 3.317460317460317, "grad_norm": 0.36981526277827687, "learning_rate": 6.376112089024928e-05, "loss": 0.0981, "loss_nan_ranks": 0, "loss_rank_avg": 0.10486898571252823, "step": 1045, "valid_targets_mean": 3050.3, "valid_targets_min": 781 }, { "epoch": 3.3333333333333335, "grad_norm": 0.3520581333414948, "learning_rate": 6.345966943925085e-05, "loss": 0.0967, "loss_nan_ranks": 0, "loss_rank_avg": 0.09494173526763916, "step": 1050, "valid_targets_mean": 3372.5, "valid_targets_min": 331 }, { "epoch": 3.3492063492063493, "grad_norm": 0.3044367023749068, "learning_rate": 6.315768958673103e-05, "loss": 0.0993, "loss_nan_ranks": 0, "loss_rank_avg": 0.09523281455039978, "step": 1055, "valid_targets_mean": 3607.8, "valid_targets_min": 671 }, { "epoch": 3.365079365079365, "grad_norm": 0.31184254040567083, "learning_rate": 6.285519318785568e-05, "loss": 0.0957, "loss_nan_ranks": 0, "loss_rank_avg": 0.08346079289913177, "step": 1060, "valid_targets_mean": 3521.0, "valid_targets_min": 971 }, { "epoch": 3.380952380952381, "grad_norm": 0.34386459703071554, "learning_rate": 6.25521921180693e-05, "loss": 0.0986, "loss_nan_ranks": 0, "loss_rank_avg": 0.09842342138290405, "step": 1065, "valid_targets_mean": 3383.3, "valid_targets_min": 789 }, { "epoch": 3.3968253968253967, "grad_norm": 0.3863719804907234, "learning_rate": 6.224869827262885e-05, "loss": 0.1017, "loss_nan_ranks": 0, "loss_rank_avg": 0.10565370321273804, "step": 1070, "valid_targets_mean": 2921.4, "valid_targets_min": 645 }, { "epoch": 3.4126984126984126, "grad_norm": 0.3420872873529232, "learning_rate": 6.194472356613667e-05, "loss": 0.1045, "loss_nan_ranks": 0, "loss_rank_avg": 0.10064145177602768, "step": 1075, "valid_targets_mean": 3480.2, "valid_targets_min": 750 }, { "epoch": 3.4285714285714284, "grad_norm": 0.3603530774863466, "learning_rate": 6.16402799320729e-05, "loss": 0.0996, "loss_nan_ranks": 0, "loss_rank_avg": 0.09136994183063507, "step": 1080, "valid_targets_mean": 3059.1, "valid_targets_min": 693 }, { "epoch": 3.4444444444444446, "grad_norm": 0.33018926023794154, "learning_rate": 6.133537932232684e-05, "loss": 0.0952, "loss_nan_ranks": 0, "loss_rank_avg": 0.0876457691192627, "step": 1085, "valid_targets_mean": 3516.0, "valid_targets_min": 964 }, { "epoch": 3.4603174603174605, "grad_norm": 0.36301475353411755, "learning_rate": 6.1030033706727815e-05, "loss": 0.1003, "loss_nan_ranks": 0, "loss_rank_avg": 0.10114194452762604, "step": 1090, "valid_targets_mean": 3181.5, "valid_targets_min": 905 }, { "epoch": 3.4761904761904763, "grad_norm": 0.36961623868161125, "learning_rate": 6.0724255072575275e-05, "loss": 0.1007, "loss_nan_ranks": 0, "loss_rank_avg": 0.10001328587532043, "step": 1095, "valid_targets_mean": 2927.5, "valid_targets_min": 1054 }, { "epoch": 3.492063492063492, "grad_norm": 0.3354884843924621, "learning_rate": 6.0418055424168154e-05, "loss": 0.0944, "loss_nan_ranks": 0, "loss_rank_avg": 0.0889013260602951, "step": 1100, "valid_targets_mean": 3379.9, "valid_targets_min": 770 }, { "epoch": 3.507936507936508, "grad_norm": 0.3471892332651075, "learning_rate": 6.011144678233359e-05, "loss": 0.097, "loss_nan_ranks": 0, "loss_rank_avg": 0.10150094330310822, "step": 1105, "valid_targets_mean": 3311.4, "valid_targets_min": 787 }, { "epoch": 3.5238095238095237, "grad_norm": 0.31078709832767126, "learning_rate": 5.9804441183955104e-05, "loss": 0.0987, "loss_nan_ranks": 0, "loss_rank_avg": 0.0964626893401146, "step": 1110, "valid_targets_mean": 3926.9, "valid_targets_min": 1073 }, { "epoch": 3.5396825396825395, "grad_norm": 0.3546542889462149, "learning_rate": 5.9497050681499955e-05, "loss": 0.106, "loss_nan_ranks": 0, "loss_rank_avg": 0.10847651958465576, "step": 1115, "valid_targets_mean": 3566.3, "valid_targets_min": 1081 }, { "epoch": 3.5555555555555554, "grad_norm": 0.3423493309583132, "learning_rate": 5.9189287342545996e-05, "loss": 0.0972, "loss_nan_ranks": 0, "loss_rank_avg": 0.09609606862068176, "step": 1120, "valid_targets_mean": 3299.1, "valid_targets_min": 574 }, { "epoch": 3.571428571428571, "grad_norm": 0.3621388313545609, "learning_rate": 5.888116324930798e-05, "loss": 0.1014, "loss_nan_ranks": 0, "loss_rank_avg": 0.09804192185401917, "step": 1125, "valid_targets_mean": 3195.3, "valid_targets_min": 806 }, { "epoch": 3.5873015873015874, "grad_norm": 0.34845940578989215, "learning_rate": 5.8572690498163205e-05, "loss": 0.1007, "loss_nan_ranks": 0, "loss_rank_avg": 0.10415421426296234, "step": 1130, "valid_targets_mean": 3283.8, "valid_targets_min": 849 }, { "epoch": 3.6031746031746033, "grad_norm": 0.3511248351881585, "learning_rate": 5.826388119917658e-05, "loss": 0.1001, "loss_nan_ranks": 0, "loss_rank_avg": 0.10305723547935486, "step": 1135, "valid_targets_mean": 3360.9, "valid_targets_min": 318 }, { "epoch": 3.619047619047619, "grad_norm": 0.3582642943749132, "learning_rate": 5.795474747562533e-05, "loss": 0.0981, "loss_nan_ranks": 0, "loss_rank_avg": 0.10681834071874619, "step": 1140, "valid_targets_mean": 3691.9, "valid_targets_min": 877 }, { "epoch": 3.634920634920635, "grad_norm": 0.38039584848017866, "learning_rate": 5.7645301463522895e-05, "loss": 0.101, "loss_nan_ranks": 0, "loss_rank_avg": 0.1044946163892746, "step": 1145, "valid_targets_mean": 3045.8, "valid_targets_min": 548 }, { "epoch": 3.6507936507936507, "grad_norm": 0.3843884369341132, "learning_rate": 5.7335555311142677e-05, "loss": 0.0957, "loss_nan_ranks": 0, "loss_rank_avg": 0.10071367025375366, "step": 1150, "valid_targets_mean": 2920.6, "valid_targets_min": 955 }, { "epoch": 3.6666666666666665, "grad_norm": 0.3766895411083596, "learning_rate": 5.702552117854093e-05, "loss": 0.1063, "loss_nan_ranks": 0, "loss_rank_avg": 0.10362906754016876, "step": 1155, "valid_targets_mean": 3138.1, "valid_targets_min": 950 }, { "epoch": 3.682539682539683, "grad_norm": 0.3562449704876102, "learning_rate": 5.671521123707955e-05, "loss": 0.0966, "loss_nan_ranks": 0, "loss_rank_avg": 0.10384504497051239, "step": 1160, "valid_targets_mean": 3039.8, "valid_targets_min": 958 }, { "epoch": 3.6984126984126986, "grad_norm": 0.38804734901154214, "learning_rate": 5.640463766894813e-05, "loss": 0.0996, "loss_nan_ranks": 0, "loss_rank_avg": 0.10218723118305206, "step": 1165, "valid_targets_mean": 2734.8, "valid_targets_min": 1405 }, { "epoch": 3.7142857142857144, "grad_norm": 0.3381745400442977, "learning_rate": 5.609381266668578e-05, "loss": 0.0986, "loss_nan_ranks": 0, "loss_rank_avg": 0.09848766773939133, "step": 1170, "valid_targets_mean": 3304.8, "valid_targets_min": 881 }, { "epoch": 3.7301587301587302, "grad_norm": 0.3509546095547907, "learning_rate": 5.5782748432702426e-05, "loss": 0.1037, "loss_nan_ranks": 0, "loss_rank_avg": 0.10381171107292175, "step": 1175, "valid_targets_mean": 3211.6, "valid_targets_min": 611 }, { "epoch": 3.746031746031746, "grad_norm": 0.37045066245950364, "learning_rate": 5.54714571787998e-05, "loss": 0.0967, "loss_nan_ranks": 0, "loss_rank_avg": 0.09512515366077423, "step": 1180, "valid_targets_mean": 3034.8, "valid_targets_min": 1448 }, { "epoch": 3.761904761904762, "grad_norm": 0.3392682530297764, "learning_rate": 5.5159951125692e-05, "loss": 0.1031, "loss_nan_ranks": 0, "loss_rank_avg": 0.09948498010635376, "step": 1185, "valid_targets_mean": 3234.0, "valid_targets_min": 733 }, { "epoch": 3.7777777777777777, "grad_norm": 0.36472000804314153, "learning_rate": 5.484824250252574e-05, "loss": 0.1056, "loss_nan_ranks": 0, "loss_rank_avg": 0.10936148464679718, "step": 1190, "valid_targets_mean": 3208.0, "valid_targets_min": 1007 }, { "epoch": 3.7936507936507935, "grad_norm": 0.3286424664187973, "learning_rate": 5.453634354640028e-05, "loss": 0.0956, "loss_nan_ranks": 0, "loss_rank_avg": 0.10229085385799408, "step": 1195, "valid_targets_mean": 3895.4, "valid_targets_min": 901 }, { "epoch": 3.8095238095238093, "grad_norm": 0.33558484161998225, "learning_rate": 5.422426650188698e-05, "loss": 0.1038, "loss_nan_ranks": 0, "loss_rank_avg": 0.10568304359912872, "step": 1200, "valid_targets_mean": 3686.3, "valid_targets_min": 811 }, { "epoch": 3.825396825396825, "grad_norm": 0.331818482885697, "learning_rate": 5.391202362054859e-05, "loss": 0.1039, "loss_nan_ranks": 0, "loss_rank_avg": 0.09697206318378448, "step": 1205, "valid_targets_mean": 3690.9, "valid_targets_min": 1105 }, { "epoch": 3.8412698412698414, "grad_norm": 0.34884233353948313, "learning_rate": 5.359962716045835e-05, "loss": 0.1012, "loss_nan_ranks": 0, "loss_rank_avg": 0.10613285005092621, "step": 1210, "valid_targets_mean": 3277.8, "valid_targets_min": 1366 }, { "epoch": 3.857142857142857, "grad_norm": 0.3377093966535472, "learning_rate": 5.328708938571872e-05, "loss": 0.0997, "loss_nan_ranks": 0, "loss_rank_avg": 0.09620672464370728, "step": 1215, "valid_targets_mean": 3225.2, "valid_targets_min": 586 }, { "epoch": 3.873015873015873, "grad_norm": 0.34963611135918393, "learning_rate": 5.2974422565979866e-05, "loss": 0.094, "loss_nan_ranks": 0, "loss_rank_avg": 0.09651202708482742, "step": 1220, "valid_targets_mean": 3314.6, "valid_targets_min": 615 }, { "epoch": 3.888888888888889, "grad_norm": 0.312982373502565, "learning_rate": 5.266163897595804e-05, "loss": 0.1005, "loss_nan_ranks": 0, "loss_rank_avg": 0.10331282019615173, "step": 1225, "valid_targets_mean": 4324.3, "valid_targets_min": 1346 }, { "epoch": 3.9047619047619047, "grad_norm": 0.3084921760199167, "learning_rate": 5.234875089495368e-05, "loss": 0.0957, "loss_nan_ranks": 0, "loss_rank_avg": 0.08868332207202911, "step": 1230, "valid_targets_mean": 4299.4, "valid_targets_min": 1377 }, { "epoch": 3.9206349206349205, "grad_norm": 0.31812977348828253, "learning_rate": 5.203577060636935e-05, "loss": 0.0985, "loss_nan_ranks": 0, "loss_rank_avg": 0.0956139788031578, "step": 1235, "valid_targets_mean": 3409.8, "valid_targets_min": 1189 }, { "epoch": 3.9365079365079367, "grad_norm": 0.36283301664873513, "learning_rate": 5.172271039722749e-05, "loss": 0.1054, "loss_nan_ranks": 0, "loss_rank_avg": 0.10029184818267822, "step": 1240, "valid_targets_mean": 2871.2, "valid_targets_min": 1119 }, { "epoch": 3.9523809523809526, "grad_norm": 0.3747003910929305, "learning_rate": 5.140958255768812e-05, "loss": 0.1009, "loss_nan_ranks": 0, "loss_rank_avg": 0.10342127829790115, "step": 1245, "valid_targets_mean": 2885.4, "valid_targets_min": 762 }, { "epoch": 3.9682539682539684, "grad_norm": 0.3158439672350005, "learning_rate": 5.109639938056625e-05, "loss": 0.0931, "loss_nan_ranks": 0, "loss_rank_avg": 0.08681650459766388, "step": 1250, "valid_targets_mean": 3408.3, "valid_targets_min": 586 }, { "epoch": 3.984126984126984, "grad_norm": 0.37808062071501214, "learning_rate": 5.078317316084935e-05, "loss": 0.1016, "loss_nan_ranks": 0, "loss_rank_avg": 0.10480676591396332, "step": 1255, "valid_targets_mean": 2720.9, "valid_targets_min": 373 }, { "epoch": 4.0, "grad_norm": 0.3684537274368403, "learning_rate": 5.0469916195214694e-05, "loss": 0.1014, "loss_nan_ranks": 0, "loss_rank_avg": 0.09857641160488129, "step": 1260, "valid_targets_mean": 2967.4, "valid_targets_min": 825 }, { "epoch": 4.015873015873016, "grad_norm": 0.4153943983019351, "learning_rate": 5.015664078154655e-05, "loss": 0.0531, "loss_nan_ranks": 0, "loss_rank_avg": 0.060983940958976746, "step": 1265, "valid_targets_mean": 3012.4, "valid_targets_min": 1052 }, { "epoch": 4.031746031746032, "grad_norm": 0.3581444583068178, "learning_rate": 4.9843359218453455e-05, "loss": 0.0541, "loss_nan_ranks": 0, "loss_rank_avg": 0.052729278802871704, "step": 1270, "valid_targets_mean": 2914.1, "valid_targets_min": 1004 }, { "epoch": 4.0476190476190474, "grad_norm": 0.31437918787892, "learning_rate": 4.953008380478532e-05, "loss": 0.0514, "loss_nan_ranks": 0, "loss_rank_avg": 0.054422155022621155, "step": 1275, "valid_targets_mean": 3480.0, "valid_targets_min": 1017 }, { "epoch": 4.063492063492063, "grad_norm": 0.351025328877443, "learning_rate": 4.921682683915066e-05, "loss": 0.0528, "loss_nan_ranks": 0, "loss_rank_avg": 0.0532863512635231, "step": 1280, "valid_targets_mean": 3265.0, "valid_targets_min": 684 }, { "epoch": 4.079365079365079, "grad_norm": 0.33069084731208065, "learning_rate": 4.8903600619433775e-05, "loss": 0.0503, "loss_nan_ranks": 0, "loss_rank_avg": 0.050467319786548615, "step": 1285, "valid_targets_mean": 3606.2, "valid_targets_min": 1179 }, { "epoch": 4.095238095238095, "grad_norm": 0.3380162738505722, "learning_rate": 4.85904174423119e-05, "loss": 0.0503, "loss_nan_ranks": 0, "loss_rank_avg": 0.0538782961666584, "step": 1290, "valid_targets_mean": 3495.3, "valid_targets_min": 777 }, { "epoch": 4.111111111111111, "grad_norm": 0.3275848802978787, "learning_rate": 4.8277289602772514e-05, "loss": 0.0501, "loss_nan_ranks": 0, "loss_rank_avg": 0.05028657615184784, "step": 1295, "valid_targets_mean": 3340.8, "valid_targets_min": 726 }, { "epoch": 4.1269841269841265, "grad_norm": 0.30731385336857986, "learning_rate": 4.796422939363066e-05, "loss": 0.0518, "loss_nan_ranks": 0, "loss_rank_avg": 0.05034211650490761, "step": 1300, "valid_targets_mean": 3559.4, "valid_targets_min": 994 }, { "epoch": 4.142857142857143, "grad_norm": 0.2886226480947433, "learning_rate": 4.7651249105046325e-05, "loss": 0.0514, "loss_nan_ranks": 0, "loss_rank_avg": 0.047623828053474426, "step": 1305, "valid_targets_mean": 3644.8, "valid_targets_min": 1056 }, { "epoch": 4.158730158730159, "grad_norm": 0.3292470966545804, "learning_rate": 4.733836102404197e-05, "loss": 0.053, "loss_nan_ranks": 0, "loss_rank_avg": 0.0555991530418396, "step": 1310, "valid_targets_mean": 3310.0, "valid_targets_min": 1148 }, { "epoch": 4.174603174603175, "grad_norm": 0.31671105253869936, "learning_rate": 4.7025577434020146e-05, "loss": 0.0515, "loss_nan_ranks": 0, "loss_rank_avg": 0.050965093076229095, "step": 1315, "valid_targets_mean": 3481.4, "valid_targets_min": 1080 }, { "epoch": 4.190476190476191, "grad_norm": 0.2691814268392951, "learning_rate": 4.671291061428129e-05, "loss": 0.0519, "loss_nan_ranks": 0, "loss_rank_avg": 0.048076000064611435, "step": 1320, "valid_targets_mean": 4136.2, "valid_targets_min": 563 }, { "epoch": 4.2063492063492065, "grad_norm": 0.3193716065061294, "learning_rate": 4.640037283954165e-05, "loss": 0.0488, "loss_nan_ranks": 0, "loss_rank_avg": 0.0491257943212986, "step": 1325, "valid_targets_mean": 3621.5, "valid_targets_min": 886 }, { "epoch": 4.222222222222222, "grad_norm": 0.3321338547756679, "learning_rate": 4.608797637945142e-05, "loss": 0.0503, "loss_nan_ranks": 0, "loss_rank_avg": 0.05201344937086105, "step": 1330, "valid_targets_mean": 3407.8, "valid_targets_min": 939 }, { "epoch": 4.238095238095238, "grad_norm": 0.3245396663647784, "learning_rate": 4.577573349811304e-05, "loss": 0.0551, "loss_nan_ranks": 0, "loss_rank_avg": 0.052470143884420395, "step": 1335, "valid_targets_mean": 3235.7, "valid_targets_min": 1073 }, { "epoch": 4.253968253968254, "grad_norm": 0.3395641998038192, "learning_rate": 4.5463656453599726e-05, "loss": 0.0528, "loss_nan_ranks": 0, "loss_rank_avg": 0.05411034822463989, "step": 1340, "valid_targets_mean": 3194.9, "valid_targets_min": 318 }, { "epoch": 4.26984126984127, "grad_norm": 0.29449840651246995, "learning_rate": 4.515175749747426e-05, "loss": 0.0511, "loss_nan_ranks": 0, "loss_rank_avg": 0.04776880145072937, "step": 1345, "valid_targets_mean": 3797.7, "valid_targets_min": 702 }, { "epoch": 4.285714285714286, "grad_norm": 0.34455860119114856, "learning_rate": 4.484004887430803e-05, "loss": 0.0544, "loss_nan_ranks": 0, "loss_rank_avg": 0.05883609876036644, "step": 1350, "valid_targets_mean": 3001.2, "valid_targets_min": 825 }, { "epoch": 4.301587301587301, "grad_norm": 0.3137045381914164, "learning_rate": 4.452854282120022e-05, "loss": 0.0539, "loss_nan_ranks": 0, "loss_rank_avg": 0.050319183617830276, "step": 1355, "valid_targets_mean": 3514.4, "valid_targets_min": 293 }, { "epoch": 4.317460317460317, "grad_norm": 0.3133895802149954, "learning_rate": 4.4217251567297586e-05, "loss": 0.0537, "loss_nan_ranks": 0, "loss_rank_avg": 0.0509931854903698, "step": 1360, "valid_targets_mean": 3800.1, "valid_targets_min": 1003 }, { "epoch": 4.333333333333333, "grad_norm": 0.29357417262063296, "learning_rate": 4.390618733331423e-05, "loss": 0.0543, "loss_nan_ranks": 0, "loss_rank_avg": 0.04734072834253311, "step": 1365, "valid_targets_mean": 3653.2, "valid_targets_min": 728 }, { "epoch": 4.349206349206349, "grad_norm": 0.3150075563320328, "learning_rate": 4.359536233105187e-05, "loss": 0.0541, "loss_nan_ranks": 0, "loss_rank_avg": 0.05183495208621025, "step": 1370, "valid_targets_mean": 3766.0, "valid_targets_min": 965 }, { "epoch": 4.365079365079365, "grad_norm": 0.3552891281644966, "learning_rate": 4.328478876292045e-05, "loss": 0.0516, "loss_nan_ranks": 0, "loss_rank_avg": 0.055399805307388306, "step": 1375, "valid_targets_mean": 2724.2, "valid_targets_min": 586 }, { "epoch": 4.380952380952381, "grad_norm": 0.33304071178653466, "learning_rate": 4.297447882145907e-05, "loss": 0.0555, "loss_nan_ranks": 0, "loss_rank_avg": 0.053134672343730927, "step": 1380, "valid_targets_mean": 2952.0, "valid_targets_min": 916 }, { "epoch": 4.396825396825397, "grad_norm": 0.34771402929173456, "learning_rate": 4.266444468885735e-05, "loss": 0.0534, "loss_nan_ranks": 0, "loss_rank_avg": 0.05659467354416847, "step": 1385, "valid_targets_mean": 2962.9, "valid_targets_min": 742 }, { "epoch": 4.412698412698413, "grad_norm": 0.30995705929679834, "learning_rate": 4.235469853647711e-05, "loss": 0.054, "loss_nan_ranks": 0, "loss_rank_avg": 0.0572165809571743, "step": 1390, "valid_targets_mean": 4091.7, "valid_targets_min": 984 }, { "epoch": 4.428571428571429, "grad_norm": 0.3413898378795505, "learning_rate": 4.204525252437469e-05, "loss": 0.0535, "loss_nan_ranks": 0, "loss_rank_avg": 0.05606987327337265, "step": 1395, "valid_targets_mean": 3297.2, "valid_targets_min": 916 }, { "epoch": 4.444444444444445, "grad_norm": 0.2938410889592773, "learning_rate": 4.173611880082342e-05, "loss": 0.0539, "loss_nan_ranks": 0, "loss_rank_avg": 0.0523369163274765, "step": 1400, "valid_targets_mean": 3989.2, "valid_targets_min": 615 }, { "epoch": 4.4603174603174605, "grad_norm": 0.3058932143351616, "learning_rate": 4.1427309501836806e-05, "loss": 0.0551, "loss_nan_ranks": 0, "loss_rank_avg": 0.048057764768600464, "step": 1405, "valid_targets_mean": 3375.5, "valid_targets_min": 942 }, { "epoch": 4.476190476190476, "grad_norm": 0.3475308691671556, "learning_rate": 4.111883675069202e-05, "loss": 0.0545, "loss_nan_ranks": 0, "loss_rank_avg": 0.05427522212266922, "step": 1410, "valid_targets_mean": 3109.4, "valid_targets_min": 1195 }, { "epoch": 4.492063492063492, "grad_norm": 0.3546866810389638, "learning_rate": 4.081071265745402e-05, "loss": 0.0546, "loss_nan_ranks": 0, "loss_rank_avg": 0.06188282370567322, "step": 1415, "valid_targets_mean": 2553.4, "valid_targets_min": 1181 }, { "epoch": 4.507936507936508, "grad_norm": 0.3549135864226603, "learning_rate": 4.0502949318500064e-05, "loss": 0.055, "loss_nan_ranks": 0, "loss_rank_avg": 0.05323749780654907, "step": 1420, "valid_targets_mean": 2530.2, "valid_targets_min": 846 }, { "epoch": 4.523809523809524, "grad_norm": 0.34447294739101914, "learning_rate": 4.01955588160449e-05, "loss": 0.0561, "loss_nan_ranks": 0, "loss_rank_avg": 0.057265084236860275, "step": 1425, "valid_targets_mean": 3020.2, "valid_targets_min": 873 }, { "epoch": 4.5396825396825395, "grad_norm": 0.32321348267609445, "learning_rate": 3.9888553217666415e-05, "loss": 0.0549, "loss_nan_ranks": 0, "loss_rank_avg": 0.05526804178953171, "step": 1430, "valid_targets_mean": 3486.1, "valid_targets_min": 1013 }, { "epoch": 4.555555555555555, "grad_norm": 0.33228527568112487, "learning_rate": 3.9581944575831864e-05, "loss": 0.0532, "loss_nan_ranks": 0, "loss_rank_avg": 0.04903782904148102, "step": 1435, "valid_targets_mean": 2660.8, "valid_targets_min": 601 }, { "epoch": 4.571428571428571, "grad_norm": 0.36386571786343064, "learning_rate": 3.927574492742473e-05, "loss": 0.0583, "loss_nan_ranks": 0, "loss_rank_avg": 0.06255079060792923, "step": 1440, "valid_targets_mean": 2925.6, "valid_targets_min": 862 }, { "epoch": 4.587301587301587, "grad_norm": 0.30325305282368664, "learning_rate": 3.896996629327219e-05, "loss": 0.0535, "loss_nan_ranks": 0, "loss_rank_avg": 0.050758376717567444, "step": 1445, "valid_targets_mean": 3914.9, "valid_targets_min": 936 }, { "epoch": 4.603174603174603, "grad_norm": 0.2877745700763886, "learning_rate": 3.8664620677673186e-05, "loss": 0.0501, "loss_nan_ranks": 0, "loss_rank_avg": 0.0491793192923069, "step": 1450, "valid_targets_mean": 4105.0, "valid_targets_min": 932 }, { "epoch": 4.619047619047619, "grad_norm": 0.33246313679919126, "learning_rate": 3.8359720067927115e-05, "loss": 0.053, "loss_nan_ranks": 0, "loss_rank_avg": 0.05500481277704239, "step": 1455, "valid_targets_mean": 3288.3, "valid_targets_min": 816 }, { "epoch": 4.634920634920634, "grad_norm": 0.3255731224642127, "learning_rate": 3.805527643386334e-05, "loss": 0.052, "loss_nan_ranks": 0, "loss_rank_avg": 0.05280488729476929, "step": 1460, "valid_targets_mean": 3135.0, "valid_targets_min": 1126 }, { "epoch": 4.650793650793651, "grad_norm": 0.30648161691148584, "learning_rate": 3.775130172737117e-05, "loss": 0.0523, "loss_nan_ranks": 0, "loss_rank_avg": 0.05356438457965851, "step": 1465, "valid_targets_mean": 3381.0, "valid_targets_min": 586 }, { "epoch": 4.666666666666667, "grad_norm": 0.31556097843679115, "learning_rate": 3.7447807881930694e-05, "loss": 0.0527, "loss_nan_ranks": 0, "loss_rank_avg": 0.049721553921699524, "step": 1470, "valid_targets_mean": 3391.8, "valid_targets_min": 956 }, { "epoch": 4.682539682539683, "grad_norm": 0.337509339024026, "learning_rate": 3.7144806812144324e-05, "loss": 0.0545, "loss_nan_ranks": 0, "loss_rank_avg": 0.056368205696344376, "step": 1475, "valid_targets_mean": 3378.8, "valid_targets_min": 821 }, { "epoch": 4.698412698412699, "grad_norm": 0.31522673917083804, "learning_rate": 3.6842310413269e-05, "loss": 0.0557, "loss_nan_ranks": 0, "loss_rank_avg": 0.05478835850954056, "step": 1480, "valid_targets_mean": 3541.2, "valid_targets_min": 1464 }, { "epoch": 4.714285714285714, "grad_norm": 0.31921752559672634, "learning_rate": 3.654033056074918e-05, "loss": 0.0525, "loss_nan_ranks": 0, "loss_rank_avg": 0.055470626801252365, "step": 1485, "valid_targets_mean": 3424.5, "valid_targets_min": 741 }, { "epoch": 4.73015873015873, "grad_norm": 0.2808688151765656, "learning_rate": 3.6238879109750735e-05, "loss": 0.053, "loss_nan_ranks": 0, "loss_rank_avg": 0.045721858739852905, "step": 1490, "valid_targets_mean": 3944.5, "valid_targets_min": 374 }, { "epoch": 4.746031746031746, "grad_norm": 0.3300324859616459, "learning_rate": 3.593796789469546e-05, "loss": 0.052, "loss_nan_ranks": 0, "loss_rank_avg": 0.05356018245220184, "step": 1495, "valid_targets_mean": 3003.5, "valid_targets_min": 844 }, { "epoch": 4.761904761904762, "grad_norm": 0.3032124702664025, "learning_rate": 3.563760872879649e-05, "loss": 0.0514, "loss_nan_ranks": 0, "loss_rank_avg": 0.047460850328207016, "step": 1500, "valid_targets_mean": 3525.9, "valid_targets_min": 1121 }, { "epoch": 4.777777777777778, "grad_norm": 0.3374039231803274, "learning_rate": 3.5337813403594545e-05, "loss": 0.0523, "loss_nan_ranks": 0, "loss_rank_avg": 0.05037646368145943, "step": 1505, "valid_targets_mean": 2918.6, "valid_targets_min": 1081 }, { "epoch": 4.7936507936507935, "grad_norm": 0.31170176986389236, "learning_rate": 3.5038593688495005e-05, "loss": 0.0541, "loss_nan_ranks": 0, "loss_rank_avg": 0.04973987489938736, "step": 1510, "valid_targets_mean": 3366.8, "valid_targets_min": 285 }, { "epoch": 4.809523809523809, "grad_norm": 0.3393101055826999, "learning_rate": 3.4739961330305894e-05, "loss": 0.0556, "loss_nan_ranks": 0, "loss_rank_avg": 0.05811101943254471, "step": 1515, "valid_targets_mean": 3200.0, "valid_targets_min": 1096 }, { "epoch": 4.825396825396825, "grad_norm": 0.33852940213510874, "learning_rate": 3.444192805277663e-05, "loss": 0.0506, "loss_nan_ranks": 0, "loss_rank_avg": 0.05156991630792618, "step": 1520, "valid_targets_mean": 2913.8, "valid_targets_min": 645 }, { "epoch": 4.841269841269841, "grad_norm": 0.3312924011892462, "learning_rate": 3.414450555613792e-05, "loss": 0.0518, "loss_nan_ranks": 0, "loss_rank_avg": 0.05578611418604851, "step": 1525, "valid_targets_mean": 3475.9, "valid_targets_min": 851 }, { "epoch": 4.857142857142857, "grad_norm": 0.2906890421902625, "learning_rate": 3.3847705516642317e-05, "loss": 0.0509, "loss_nan_ranks": 0, "loss_rank_avg": 0.04764970391988754, "step": 1530, "valid_targets_mean": 3311.6, "valid_targets_min": 1073 }, { "epoch": 4.8730158730158735, "grad_norm": 0.3058135467638653, "learning_rate": 3.355153958610587e-05, "loss": 0.0508, "loss_nan_ranks": 0, "loss_rank_avg": 0.04871264845132828, "step": 1535, "valid_targets_mean": 3466.6, "valid_targets_min": 858 }, { "epoch": 4.888888888888889, "grad_norm": 0.33436172037636874, "learning_rate": 3.325601939145069e-05, "loss": 0.0541, "loss_nan_ranks": 0, "loss_rank_avg": 0.05800117552280426, "step": 1540, "valid_targets_mean": 3362.7, "valid_targets_min": 1061 }, { "epoch": 4.904761904761905, "grad_norm": 0.3688653693932819, "learning_rate": 3.296115653424854e-05, "loss": 0.0586, "loss_nan_ranks": 0, "loss_rank_avg": 0.07181097567081451, "step": 1545, "valid_targets_mean": 2898.6, "valid_targets_min": 845 }, { "epoch": 4.920634920634921, "grad_norm": 0.3062268300108016, "learning_rate": 3.266696259026526e-05, "loss": 0.0538, "loss_nan_ranks": 0, "loss_rank_avg": 0.05916294455528259, "step": 1550, "valid_targets_mean": 3883.2, "valid_targets_min": 567 }, { "epoch": 4.936507936507937, "grad_norm": 0.36817501253465373, "learning_rate": 3.237344910900648e-05, "loss": 0.0532, "loss_nan_ranks": 0, "loss_rank_avg": 0.05916590616106987, "step": 1555, "valid_targets_mean": 2581.8, "valid_targets_min": 1036 }, { "epoch": 4.9523809523809526, "grad_norm": 0.3281489035503953, "learning_rate": 3.208062761326408e-05, "loss": 0.0508, "loss_nan_ranks": 0, "loss_rank_avg": 0.05242155119776726, "step": 1560, "valid_targets_mean": 3023.0, "valid_targets_min": 796 }, { "epoch": 4.968253968253968, "grad_norm": 0.33908599199518963, "learning_rate": 3.178850959866393e-05, "loss": 0.0525, "loss_nan_ranks": 0, "loss_rank_avg": 0.050644807517528534, "step": 1565, "valid_targets_mean": 3031.7, "valid_targets_min": 849 }, { "epoch": 4.984126984126984, "grad_norm": 0.3496241644407292, "learning_rate": 3.14971065332145e-05, "loss": 0.0518, "loss_nan_ranks": 0, "loss_rank_avg": 0.05183893069624901, "step": 1570, "valid_targets_mean": 2509.7, "valid_targets_min": 318 }, { "epoch": 5.0, "grad_norm": 0.3195463471966324, "learning_rate": 3.1206429856856706e-05, "loss": 0.0507, "loss_nan_ranks": 0, "loss_rank_avg": 0.051501572132110596, "step": 1575, "valid_targets_mean": 3284.1, "valid_targets_min": 677 }, { "epoch": 5.015873015873016, "grad_norm": 0.23550431411264172, "learning_rate": 3.091649098101479e-05, "loss": 0.024, "loss_nan_ranks": 0, "loss_rank_avg": 0.02023545652627945, "step": 1580, "valid_targets_mean": 3572.4, "valid_targets_min": 859 }, { "epoch": 5.031746031746032, "grad_norm": 0.33195372440897564, "learning_rate": 3.06273012881483e-05, "loss": 0.0252, "loss_nan_ranks": 0, "loss_rank_avg": 0.0263539906591177, "step": 1585, "valid_targets_mean": 3201.2, "valid_targets_min": 947 }, { "epoch": 5.0476190476190474, "grad_norm": 0.26127862712982075, "learning_rate": 3.0338872131305284e-05, "loss": 0.0254, "loss_nan_ranks": 0, "loss_rank_avg": 0.02652253583073616, "step": 1590, "valid_targets_mean": 3707.3, "valid_targets_min": 801 }, { "epoch": 5.063492063492063, "grad_norm": 0.2614335637158489, "learning_rate": 3.0051214833676545e-05, "loss": 0.0258, "loss_nan_ranks": 0, "loss_rank_avg": 0.02493833750486374, "step": 1595, "valid_targets_mean": 3296.4, "valid_targets_min": 726 }, { "epoch": 5.079365079365079, "grad_norm": 0.2674137096002863, "learning_rate": 2.9764340688151137e-05, "loss": 0.0256, "loss_nan_ranks": 0, "loss_rank_avg": 0.024273354560136795, "step": 1600, "valid_targets_mean": 3360.6, "valid_targets_min": 654 }, { "epoch": 5.095238095238095, "grad_norm": 0.2340596063464024, "learning_rate": 2.9478260956873028e-05, "loss": 0.0238, "loss_nan_ranks": 0, "loss_rank_avg": 0.02273944392800331, "step": 1605, "valid_targets_mean": 4419.8, "valid_targets_min": 810 }, { "epoch": 5.111111111111111, "grad_norm": 0.308811482193481, "learning_rate": 2.919298687079895e-05, "loss": 0.0266, "loss_nan_ranks": 0, "loss_rank_avg": 0.03129608556628227, "step": 1610, "valid_targets_mean": 2848.1, "valid_targets_min": 742 }, { "epoch": 5.1269841269841265, "grad_norm": 0.2842567967216506, "learning_rate": 2.8908529629257543e-05, "loss": 0.0265, "loss_nan_ranks": 0, "loss_rank_avg": 0.02591758593916893, "step": 1615, "valid_targets_mean": 3129.9, "valid_targets_min": 590 }, { "epoch": 5.142857142857143, "grad_norm": 0.24104336758904743, "learning_rate": 2.8624900399509603e-05, "loss": 0.0243, "loss_nan_ranks": 0, "loss_rank_avg": 0.025047089904546738, "step": 1620, "valid_targets_mean": 3620.5, "valid_targets_min": 877 }, { "epoch": 5.158730158730159, "grad_norm": 0.2523939190707324, "learning_rate": 2.8342110316309745e-05, "loss": 0.0259, "loss_nan_ranks": 0, "loss_rank_avg": 0.025033798068761826, "step": 1625, "valid_targets_mean": 3434.6, "valid_targets_min": 1271 }, { "epoch": 5.174603174603175, "grad_norm": 0.2495035494552693, "learning_rate": 2.8060170481469293e-05, "loss": 0.0249, "loss_nan_ranks": 0, "loss_rank_avg": 0.021112697198987007, "step": 1630, "valid_targets_mean": 3306.6, "valid_targets_min": 750 }, { "epoch": 5.190476190476191, "grad_norm": 0.2648511629409598, "learning_rate": 2.777909196342035e-05, "loss": 0.0277, "loss_nan_ranks": 0, "loss_rank_avg": 0.027787357568740845, "step": 1635, "valid_targets_mean": 3520.7, "valid_targets_min": 1071 }, { "epoch": 5.2063492063492065, "grad_norm": 0.2487265676626827, "learning_rate": 2.749888579678138e-05, "loss": 0.0255, "loss_nan_ranks": 0, "loss_rank_avg": 0.025035560131072998, "step": 1640, "valid_targets_mean": 3585.5, "valid_targets_min": 702 }, { "epoch": 5.222222222222222, "grad_norm": 0.27878036038709497, "learning_rate": 2.721956298192392e-05, "loss": 0.027, "loss_nan_ranks": 0, "loss_rank_avg": 0.026966020464897156, "step": 1645, "valid_targets_mean": 3309.6, "valid_targets_min": 725 }, { "epoch": 5.238095238095238, "grad_norm": 0.2707139283998997, "learning_rate": 2.6941134484540774e-05, "loss": 0.0267, "loss_nan_ranks": 0, "loss_rank_avg": 0.026516567915678024, "step": 1650, "valid_targets_mean": 3290.4, "valid_targets_min": 695 }, { "epoch": 5.253968253968254, "grad_norm": 0.3091277534101446, "learning_rate": 2.6663611235215486e-05, "loss": 0.0265, "loss_nan_ranks": 0, "loss_rank_avg": 0.027157654985785484, "step": 1655, "valid_targets_mean": 2847.1, "valid_targets_min": 1120 }, { "epoch": 5.26984126984127, "grad_norm": 0.2616349404434438, "learning_rate": 2.6387004128993314e-05, "loss": 0.0251, "loss_nan_ranks": 0, "loss_rank_avg": 0.024449646472930908, "step": 1660, "valid_targets_mean": 3586.6, "valid_targets_min": 1115 }, { "epoch": 5.285714285714286, "grad_norm": 0.27356996302919745, "learning_rate": 2.6111324024953378e-05, "loss": 0.0256, "loss_nan_ranks": 0, "loss_rank_avg": 0.025334272533655167, "step": 1665, "valid_targets_mean": 3436.2, "valid_targets_min": 963 }, { "epoch": 5.301587301587301, "grad_norm": 0.3214202340630028, "learning_rate": 2.5836581745782475e-05, "loss": 0.0256, "loss_nan_ranks": 0, "loss_rank_avg": 0.0278994832187891, "step": 1670, "valid_targets_mean": 2833.7, "valid_targets_min": 1031 }, { "epoch": 5.317460317460317, "grad_norm": 0.2695726847515144, "learning_rate": 2.556278807735008e-05, "loss": 0.0263, "loss_nan_ranks": 0, "loss_rank_avg": 0.02789219841361046, "step": 1675, "valid_targets_mean": 3449.8, "valid_targets_min": 924 }, { "epoch": 5.333333333333333, "grad_norm": 0.2727973520928148, "learning_rate": 2.5289953768285092e-05, "loss": 0.0257, "loss_nan_ranks": 0, "loss_rank_avg": 0.02461610920727253, "step": 1680, "valid_targets_mean": 3137.1, "valid_targets_min": 759 }, { "epoch": 5.349206349206349, "grad_norm": 0.24666589277394446, "learning_rate": 2.501808952955359e-05, "loss": 0.0262, "loss_nan_ranks": 0, "loss_rank_avg": 0.022942395880818367, "step": 1685, "valid_targets_mean": 3659.5, "valid_targets_min": 778 }, { "epoch": 5.365079365079365, "grad_norm": 0.2336284978989606, "learning_rate": 2.474720603403866e-05, "loss": 0.0242, "loss_nan_ranks": 0, "loss_rank_avg": 0.02202015370130539, "step": 1690, "valid_targets_mean": 4008.6, "valid_targets_min": 1621 }, { "epoch": 5.380952380952381, "grad_norm": 0.2801903131745413, "learning_rate": 2.447731391612112e-05, "loss": 0.0271, "loss_nan_ranks": 0, "loss_rank_avg": 0.02588559314608574, "step": 1695, "valid_targets_mean": 3414.8, "valid_targets_min": 744 }, { "epoch": 5.396825396825397, "grad_norm": 0.2181467290392336, "learning_rate": 2.4208423771262238e-05, "loss": 0.0249, "loss_nan_ranks": 0, "loss_rank_avg": 0.023064320906996727, "step": 1700, "valid_targets_mean": 4122.8, "valid_targets_min": 998 }, { "epoch": 5.412698412698413, "grad_norm": 0.2795289718838943, "learning_rate": 2.3940546155587618e-05, "loss": 0.0272, "loss_nan_ranks": 0, "loss_rank_avg": 0.030484072864055634, "step": 1705, "valid_targets_mean": 3116.4, "valid_targets_min": 356 }, { "epoch": 5.428571428571429, "grad_norm": 0.2949329786637831, "learning_rate": 2.367369158547292e-05, "loss": 0.0265, "loss_nan_ranks": 0, "loss_rank_avg": 0.030314363539218903, "step": 1710, "valid_targets_mean": 3290.7, "valid_targets_min": 1221 }, { "epoch": 5.444444444444445, "grad_norm": 0.2764043874434965, "learning_rate": 2.3407870537130898e-05, "loss": 0.0255, "loss_nan_ranks": 0, "loss_rank_avg": 0.026211140677332878, "step": 1715, "valid_targets_mean": 3457.7, "valid_targets_min": 1297 }, { "epoch": 5.4603174603174605, "grad_norm": 0.30290538424312186, "learning_rate": 2.314309344620019e-05, "loss": 0.0274, "loss_nan_ranks": 0, "loss_rank_avg": 0.028061866760253906, "step": 1720, "valid_targets_mean": 2804.1, "valid_targets_min": 742 }, { "epoch": 5.476190476190476, "grad_norm": 0.3107682447774674, "learning_rate": 2.287937070733557e-05, "loss": 0.0245, "loss_nan_ranks": 0, "loss_rank_avg": 0.02621731534600258, "step": 1725, "valid_targets_mean": 2719.9, "valid_targets_min": 373 }, { "epoch": 5.492063492063492, "grad_norm": 0.2544021308351876, "learning_rate": 2.2616712673799994e-05, "loss": 0.0254, "loss_nan_ranks": 0, "loss_rank_avg": 0.025156507268548012, "step": 1730, "valid_targets_mean": 3616.8, "valid_targets_min": 833 }, { "epoch": 5.507936507936508, "grad_norm": 0.26483540124283883, "learning_rate": 2.2355129657058004e-05, "loss": 0.0255, "loss_nan_ranks": 0, "loss_rank_avg": 0.022783182561397552, "step": 1735, "valid_targets_mean": 3544.8, "valid_targets_min": 576 }, { "epoch": 5.523809523809524, "grad_norm": 0.26901313523570725, "learning_rate": 2.2094631926371045e-05, "loss": 0.0248, "loss_nan_ranks": 0, "loss_rank_avg": 0.024442121386528015, "step": 1740, "valid_targets_mean": 3287.4, "valid_targets_min": 684 }, { "epoch": 5.5396825396825395, "grad_norm": 0.24704696828339448, "learning_rate": 2.18352297083942e-05, "loss": 0.0259, "loss_nan_ranks": 0, "loss_rank_avg": 0.023202896118164062, "step": 1745, "valid_targets_mean": 3396.6, "valid_targets_min": 782 }, { "epoch": 5.555555555555555, "grad_norm": 0.24636088299850367, "learning_rate": 2.1576933186774777e-05, "loss": 0.0258, "loss_nan_ranks": 0, "loss_rank_avg": 0.02773324027657509, "step": 1750, "valid_targets_mean": 3575.5, "valid_targets_min": 741 }, { "epoch": 5.571428571428571, "grad_norm": 0.25071236680671866, "learning_rate": 2.131975250175256e-05, "loss": 0.0265, "loss_nan_ranks": 0, "loss_rank_avg": 0.023207779973745346, "step": 1755, "valid_targets_mean": 3630.2, "valid_targets_min": 1033 }, { "epoch": 5.587301587301587, "grad_norm": 0.27275654624779627, "learning_rate": 2.1063697749761603e-05, "loss": 0.0256, "loss_nan_ranks": 0, "loss_rank_avg": 0.02427113801240921, "step": 1760, "valid_targets_mean": 3178.8, "valid_targets_min": 371 }, { "epoch": 5.603174603174603, "grad_norm": 0.2379523221904174, "learning_rate": 2.080877898303394e-05, "loss": 0.0252, "loss_nan_ranks": 0, "loss_rank_avg": 0.022033121436834335, "step": 1765, "valid_targets_mean": 3439.0, "valid_targets_min": 762 }, { "epoch": 5.619047619047619, "grad_norm": 0.2960282320406999, "learning_rate": 2.0555006209204997e-05, "loss": 0.0255, "loss_nan_ranks": 0, "loss_rank_avg": 0.026485547423362732, "step": 1770, "valid_targets_mean": 2969.6, "valid_targets_min": 655 }, { "epoch": 5.634920634920634, "grad_norm": 0.26944449671962667, "learning_rate": 2.030238939092059e-05, "loss": 0.0252, "loss_nan_ranks": 0, "loss_rank_avg": 0.02524230070412159, "step": 1775, "valid_targets_mean": 3050.5, "valid_targets_min": 1216 }, { "epoch": 5.650793650793651, "grad_norm": 0.2554660378162961, "learning_rate": 2.0050938445445894e-05, "loss": 0.0251, "loss_nan_ranks": 0, "loss_rank_avg": 0.023806503042578697, "step": 1780, "valid_targets_mean": 3239.5, "valid_targets_min": 338 }, { "epoch": 5.666666666666667, "grad_norm": 0.28458142143891874, "learning_rate": 1.980066324427613e-05, "loss": 0.0246, "loss_nan_ranks": 0, "loss_rank_avg": 0.027210228145122528, "step": 1785, "valid_targets_mean": 2952.8, "valid_targets_min": 262 }, { "epoch": 5.682539682539683, "grad_norm": 0.26739426156418444, "learning_rate": 1.9551573612748923e-05, "loss": 0.0256, "loss_nan_ranks": 0, "loss_rank_avg": 0.02571839839220047, "step": 1790, "valid_targets_mean": 3384.7, "valid_targets_min": 1216 }, { "epoch": 5.698412698412699, "grad_norm": 0.2631155742829386, "learning_rate": 1.9303679329658723e-05, "loss": 0.0245, "loss_nan_ranks": 0, "loss_rank_avg": 0.02573530748486519, "step": 1795, "valid_targets_mean": 3206.7, "valid_targets_min": 731 }, { "epoch": 5.714285714285714, "grad_norm": 0.2824175420225818, "learning_rate": 1.905699012687275e-05, "loss": 0.0241, "loss_nan_ranks": 0, "loss_rank_avg": 0.025772523134946823, "step": 1800, "valid_targets_mean": 3128.2, "valid_targets_min": 656 }, { "epoch": 5.73015873015873, "grad_norm": 0.2671365865327459, "learning_rate": 1.881151568894909e-05, "loss": 0.0245, "loss_nan_ranks": 0, "loss_rank_avg": 0.02606208436191082, "step": 1805, "valid_targets_mean": 3463.7, "valid_targets_min": 1332 }, { "epoch": 5.746031746031746, "grad_norm": 0.2460946861875537, "learning_rate": 1.8567265652756378e-05, "loss": 0.0249, "loss_nan_ranks": 0, "loss_rank_avg": 0.022165346890687943, "step": 1810, "valid_targets_mean": 3046.5, "valid_targets_min": 866 }, { "epoch": 5.761904761904762, "grad_norm": 0.25239884686693714, "learning_rate": 1.8324249607095534e-05, "loss": 0.025, "loss_nan_ranks": 0, "loss_rank_avg": 0.024087360128760338, "step": 1815, "valid_targets_mean": 3273.7, "valid_targets_min": 1115 }, { "epoch": 5.777777777777778, "grad_norm": 0.23790823566806268, "learning_rate": 1.8082477092323297e-05, "loss": 0.024, "loss_nan_ranks": 0, "loss_rank_avg": 0.02218548208475113, "step": 1820, "valid_targets_mean": 3519.1, "valid_targets_min": 318 }, { "epoch": 5.7936507936507935, "grad_norm": 0.24495804994980389, "learning_rate": 1.7841957599977755e-05, "loss": 0.0242, "loss_nan_ranks": 0, "loss_rank_avg": 0.022038817405700684, "step": 1825, "valid_targets_mean": 3439.0, "valid_targets_min": 941 }, { "epoch": 5.809523809523809, "grad_norm": 0.27584063642219875, "learning_rate": 1.760270057240559e-05, "loss": 0.0241, "loss_nan_ranks": 0, "loss_rank_avg": 0.027009010314941406, "step": 1830, "valid_targets_mean": 2943.4, "valid_targets_min": 319 }, { "epoch": 5.825396825396825, "grad_norm": 0.2827248862474914, "learning_rate": 1.736471540239156e-05, "loss": 0.0246, "loss_nan_ranks": 0, "loss_rank_avg": 0.02594105526804924, "step": 1835, "valid_targets_mean": 3063.8, "valid_targets_min": 992 }, { "epoch": 5.841269841269841, "grad_norm": 0.24374670667972242, "learning_rate": 1.712801143278961e-05, "loss": 0.0243, "loss_nan_ranks": 0, "loss_rank_avg": 0.02299753949046135, "step": 1840, "valid_targets_mean": 3784.0, "valid_targets_min": 586 }, { "epoch": 5.857142857142857, "grad_norm": 0.2655779019009014, "learning_rate": 1.6892597956156148e-05, "loss": 0.0244, "loss_nan_ranks": 0, "loss_rank_avg": 0.02573399990797043, "step": 1845, "valid_targets_mean": 3114.1, "valid_targets_min": 703 }, { "epoch": 5.8730158730158735, "grad_norm": 0.2723810167625872, "learning_rate": 1.6658484214385234e-05, "loss": 0.024, "loss_nan_ranks": 0, "loss_rank_avg": 0.0256446972489357, "step": 1850, "valid_targets_mean": 2782.9, "valid_targets_min": 1190 }, { "epoch": 5.888888888888889, "grad_norm": 0.223290705015297, "learning_rate": 1.6425679398345812e-05, "loss": 0.0241, "loss_nan_ranks": 0, "loss_rank_avg": 0.020922958850860596, "step": 1855, "valid_targets_mean": 3931.8, "valid_targets_min": 796 }, { "epoch": 5.904761904761905, "grad_norm": 0.27944419013119903, "learning_rate": 1.619419264752076e-05, "loss": 0.024, "loss_nan_ranks": 0, "loss_rank_avg": 0.025158777832984924, "step": 1860, "valid_targets_mean": 2923.1, "valid_targets_min": 466 }, { "epoch": 5.920634920634921, "grad_norm": 0.2681139770398924, "learning_rate": 1.5964033049648262e-05, "loss": 0.0249, "loss_nan_ranks": 0, "loss_rank_avg": 0.02556907758116722, "step": 1865, "valid_targets_mean": 3237.6, "valid_targets_min": 992 }, { "epoch": 5.936507936507937, "grad_norm": 0.26218394989898647, "learning_rate": 1.5735209640364873e-05, "loss": 0.0243, "loss_nan_ranks": 0, "loss_rank_avg": 0.023507630452513695, "step": 1870, "valid_targets_mean": 3332.7, "valid_targets_min": 838 }, { "epoch": 5.9523809523809526, "grad_norm": 0.24469964139348233, "learning_rate": 1.5507731402850956e-05, "loss": 0.0235, "loss_nan_ranks": 0, "loss_rank_avg": 0.021347498521208763, "step": 1875, "valid_targets_mean": 3352.9, "valid_targets_min": 820 }, { "epoch": 5.968253968253968, "grad_norm": 0.2440356190541872, "learning_rate": 1.528160726747783e-05, "loss": 0.0238, "loss_nan_ranks": 0, "loss_rank_avg": 0.02353665605187416, "step": 1880, "valid_targets_mean": 3424.4, "valid_targets_min": 895 }, { "epoch": 5.984126984126984, "grad_norm": 0.27079401801374, "learning_rate": 1.5056846111457407e-05, "loss": 0.0231, "loss_nan_ranks": 0, "loss_rank_avg": 0.023557720705866814, "step": 1885, "valid_targets_mean": 3352.8, "valid_targets_min": 825 }, { "epoch": 6.0, "grad_norm": 0.24564494829508438, "learning_rate": 1.483345675849348e-05, "loss": 0.0241, "loss_nan_ranks": 0, "loss_rank_avg": 0.02200917899608612, "step": 1890, "valid_targets_mean": 3229.2, "valid_targets_min": 752 }, { "epoch": 6.015873015873016, "grad_norm": 0.16169482529118429, "learning_rate": 1.4611447978435478e-05, "loss": 0.0116, "loss_nan_ranks": 0, "loss_rank_avg": 0.00997502077370882, "step": 1895, "valid_targets_mean": 3454.8, "valid_targets_min": 601 }, { "epoch": 6.031746031746032, "grad_norm": 0.24946653742799907, "learning_rate": 1.439082848693406e-05, "loss": 0.0107, "loss_nan_ranks": 0, "loss_rank_avg": 0.012786821462213993, "step": 1900, "valid_targets_mean": 2587.1, "valid_targets_min": 645 }, { "epoch": 6.0476190476190474, "grad_norm": 0.2180413946533584, "learning_rate": 1.4171606945099076e-05, "loss": 0.0104, "loss_nan_ranks": 0, "loss_rank_avg": 0.009333532303571701, "step": 1905, "valid_targets_mean": 3655.9, "valid_targets_min": 364 }, { "epoch": 6.063492063492063, "grad_norm": 0.19567558644860955, "learning_rate": 1.3953791959159368e-05, "loss": 0.0107, "loss_nan_ranks": 0, "loss_rank_avg": 0.009645476937294006, "step": 1910, "valid_targets_mean": 2975.1, "valid_targets_min": 723 }, { "epoch": 6.079365079365079, "grad_norm": 0.16598482793198757, "learning_rate": 1.3737392080125134e-05, "loss": 0.011, "loss_nan_ranks": 0, "loss_rank_avg": 0.010899921879172325, "step": 1915, "valid_targets_mean": 4373.3, "valid_targets_min": 776 }, { "epoch": 6.095238095238095, "grad_norm": 0.18594848404069328, "learning_rate": 1.3522415803452027e-05, "loss": 0.0112, "loss_nan_ranks": 0, "loss_rank_avg": 0.010842259973287582, "step": 1920, "valid_targets_mean": 3016.0, "valid_targets_min": 846 }, { "epoch": 6.111111111111111, "grad_norm": 0.15994392653373266, "learning_rate": 1.3308871568707798e-05, "loss": 0.0108, "loss_nan_ranks": 0, "loss_rank_avg": 0.009601420722901821, "step": 1925, "valid_targets_mean": 3397.0, "valid_targets_min": 845 }, { "epoch": 6.1269841269841265, "grad_norm": 0.18397230525918348, "learning_rate": 1.3096767759240836e-05, "loss": 0.0117, "loss_nan_ranks": 0, "loss_rank_avg": 0.012009745463728905, "step": 1930, "valid_targets_mean": 3255.9, "valid_targets_min": 1029 }, { "epoch": 6.142857142857143, "grad_norm": 0.15937200483272312, "learning_rate": 1.2886112701851178e-05, "loss": 0.0107, "loss_nan_ranks": 0, "loss_rank_avg": 0.008708149194717407, "step": 1935, "valid_targets_mean": 3998.8, "valid_targets_min": 1493 }, { "epoch": 6.158730158730159, "grad_norm": 0.22332299562406527, "learning_rate": 1.2676914666463508e-05, "loss": 0.011, "loss_nan_ranks": 0, "loss_rank_avg": 0.013267268426716328, "step": 1940, "valid_targets_mean": 3523.3, "valid_targets_min": 778 }, { "epoch": 6.174603174603175, "grad_norm": 0.2114465027443654, "learning_rate": 1.2469181865802576e-05, "loss": 0.0118, "loss_nan_ranks": 0, "loss_rank_avg": 0.01177770271897316, "step": 1945, "valid_targets_mean": 2789.5, "valid_targets_min": 775 }, { "epoch": 6.190476190476191, "grad_norm": 0.2002001975909518, "learning_rate": 1.2262922455070719e-05, "loss": 0.0105, "loss_nan_ranks": 0, "loss_rank_avg": 0.011541876941919327, "step": 1950, "valid_targets_mean": 3120.6, "valid_targets_min": 356 }, { "epoch": 6.2063492063492065, "grad_norm": 0.19841543508080267, "learning_rate": 1.2058144531627774e-05, "loss": 0.0112, "loss_nan_ranks": 0, "loss_rank_avg": 0.010849805548787117, "step": 1955, "valid_targets_mean": 3501.3, "valid_targets_min": 381 }, { "epoch": 6.222222222222222, "grad_norm": 0.22079617709202617, "learning_rate": 1.1854856134673097e-05, "loss": 0.0106, "loss_nan_ranks": 0, "loss_rank_avg": 0.011766679584980011, "step": 1960, "valid_targets_mean": 2914.3, "valid_targets_min": 706 }, { "epoch": 6.238095238095238, "grad_norm": 0.20366254465466968, "learning_rate": 1.1653065244930083e-05, "loss": 0.0111, "loss_nan_ranks": 0, "loss_rank_avg": 0.011541249230504036, "step": 1965, "valid_targets_mean": 3002.7, "valid_targets_min": 318 }, { "epoch": 6.253968253968254, "grad_norm": 0.21870013628872753, "learning_rate": 1.1452779784332718e-05, "loss": 0.0113, "loss_nan_ranks": 0, "loss_rank_avg": 0.011631770990788937, "step": 1970, "valid_targets_mean": 2956.9, "valid_targets_min": 955 }, { "epoch": 6.26984126984127, "grad_norm": 0.19876493233509734, "learning_rate": 1.1254007615714685e-05, "loss": 0.0108, "loss_nan_ranks": 0, "loss_rank_avg": 0.010932897217571735, "step": 1975, "valid_targets_mean": 2878.1, "valid_targets_min": 990 }, { "epoch": 6.285714285714286, "grad_norm": 0.22127790602397174, "learning_rate": 1.1056756542500613e-05, "loss": 0.0124, "loss_nan_ranks": 0, "loss_rank_avg": 0.0136713907122612, "step": 1980, "valid_targets_mean": 3617.0, "valid_targets_min": 723 }, { "epoch": 6.301587301587301, "grad_norm": 0.2107125195160337, "learning_rate": 1.086103430839982e-05, "loss": 0.0111, "loss_nan_ranks": 0, "loss_rank_avg": 0.012097819708287716, "step": 1985, "valid_targets_mean": 3181.6, "valid_targets_min": 270 }, { "epoch": 6.317460317460317, "grad_norm": 0.1816728141260571, "learning_rate": 1.066684859710218e-05, "loss": 0.0101, "loss_nan_ranks": 0, "loss_rank_avg": 0.009708253666758537, "step": 1990, "valid_targets_mean": 3332.8, "valid_targets_min": 641 }, { "epoch": 6.333333333333333, "grad_norm": 0.16887535290523142, "learning_rate": 1.0474207031976618e-05, "loss": 0.01, "loss_nan_ranks": 0, "loss_rank_avg": 0.010005680844187737, "step": 1995, "valid_targets_mean": 3711.7, "valid_targets_min": 728 }, { "epoch": 6.349206349206349, "grad_norm": 0.21411607846162323, "learning_rate": 1.0283117175771701e-05, "loss": 0.012, "loss_nan_ranks": 0, "loss_rank_avg": 0.011855946853756905, "step": 2000, "valid_targets_mean": 2993.4, "valid_targets_min": 900 }, { "epoch": 6.365079365079365, "grad_norm": 0.17645356487975158, "learning_rate": 1.0093586530318849e-05, "loss": 0.0108, "loss_nan_ranks": 0, "loss_rank_avg": 0.009679011069238186, "step": 2005, "valid_targets_mean": 3481.0, "valid_targets_min": 634 }, { "epoch": 6.380952380952381, "grad_norm": 0.17743382869891283, "learning_rate": 9.905622536237708e-06, "loss": 0.0115, "loss_nan_ranks": 0, "loss_rank_avg": 0.010511040687561035, "step": 2010, "valid_targets_mean": 3653.9, "valid_targets_min": 463 }, { "epoch": 6.396825396825397, "grad_norm": 0.1748858090662512, "learning_rate": 9.719232572644187e-06, "loss": 0.0109, "loss_nan_ranks": 0, "loss_rank_avg": 0.010769926011562347, "step": 2015, "valid_targets_mean": 3555.8, "valid_targets_min": 473 }, { "epoch": 6.412698412698413, "grad_norm": 0.21446394993733692, "learning_rate": 9.534423956860638e-06, "loss": 0.0116, "loss_nan_ranks": 0, "loss_rank_avg": 0.012041926383972168, "step": 2020, "valid_targets_mean": 3285.3, "valid_targets_min": 819 }, { "epoch": 6.428571428571429, "grad_norm": 0.20144880581326097, "learning_rate": 9.351203944128694e-06, "loss": 0.0112, "loss_nan_ranks": 0, "loss_rank_avg": 0.011740963906049728, "step": 2025, "valid_targets_mean": 3020.3, "valid_targets_min": 906 }, { "epoch": 6.444444444444445, "grad_norm": 0.19143766850758767, "learning_rate": 9.16957972732434e-06, "loss": 0.0109, "loss_nan_ranks": 0, "loss_rank_avg": 0.010045674629509449, "step": 2030, "valid_targets_mean": 3397.0, "valid_targets_min": 680 }, { "epoch": 6.4603174603174605, "grad_norm": 0.2086923194209106, "learning_rate": 8.989558436675643e-06, "loss": 0.011, "loss_nan_ranks": 0, "loss_rank_avg": 0.011913447640836239, "step": 2035, "valid_targets_mean": 3015.7, "valid_targets_min": 785 }, { "epoch": 6.476190476190476, "grad_norm": 0.22807360258432707, "learning_rate": 8.811147139482745e-06, "loss": 0.0105, "loss_nan_ranks": 0, "loss_rank_avg": 0.011762842535972595, "step": 2040, "valid_targets_mean": 3092.6, "valid_targets_min": 590 }, { "epoch": 6.492063492063492, "grad_norm": 0.23354176928347664, "learning_rate": 8.634352839840459e-06, "loss": 0.0115, "loss_nan_ranks": 0, "loss_rank_avg": 0.013229792937636375, "step": 2045, "valid_targets_mean": 3058.2, "valid_targets_min": 678 }, { "epoch": 6.507936507936508, "grad_norm": 0.1844983037827689, "learning_rate": 8.45918247836327e-06, "loss": 0.0099, "loss_nan_ranks": 0, "loss_rank_avg": 0.010129349306225777, "step": 2050, "valid_targets_mean": 3581.6, "valid_targets_min": 1165 }, { "epoch": 6.523809523809524, "grad_norm": 0.19399825454673583, "learning_rate": 8.285642931912918e-06, "loss": 0.0108, "loss_nan_ranks": 0, "loss_rank_avg": 0.009755829349160194, "step": 2055, "valid_targets_mean": 2613.1, "valid_targets_min": 834 }, { "epoch": 6.5396825396825395, "grad_norm": 0.1389749321198228, "learning_rate": 8.113741013328352e-06, "loss": 0.0103, "loss_nan_ranks": 0, "loss_rank_avg": 0.007903524674475193, "step": 2060, "valid_targets_mean": 4402.2, "valid_targets_min": 847 }, { "epoch": 6.555555555555555, "grad_norm": 0.18758006176253833, "learning_rate": 7.943483471158326e-06, "loss": 0.0104, "loss_nan_ranks": 0, "loss_rank_avg": 0.010629910975694656, "step": 2065, "valid_targets_mean": 4012.3, "valid_targets_min": 834 }, { "epoch": 6.571428571428571, "grad_norm": 0.19682939141810737, "learning_rate": 7.774876989396434e-06, "loss": 0.0104, "loss_nan_ranks": 0, "loss_rank_avg": 0.011159196496009827, "step": 2070, "valid_targets_mean": 2739.4, "valid_targets_min": 725 }, { "epoch": 6.587301587301587, "grad_norm": 0.22211885260493783, "learning_rate": 7.607928187218699e-06, "loss": 0.0112, "loss_nan_ranks": 0, "loss_rank_avg": 0.011714112013578415, "step": 2075, "valid_targets_mean": 2769.2, "valid_targets_min": 645 }, { "epoch": 6.603174603174603, "grad_norm": 0.18954660156576889, "learning_rate": 7.442643618723777e-06, "loss": 0.0102, "loss_nan_ranks": 0, "loss_rank_avg": 0.010226866230368614, "step": 2080, "valid_targets_mean": 3419.1, "valid_targets_min": 659 }, { "epoch": 6.619047619047619, "grad_norm": 0.18370859857232805, "learning_rate": 7.2790297726755716e-06, "loss": 0.0104, "loss_nan_ranks": 0, "loss_rank_avg": 0.010806581936776638, "step": 2085, "valid_targets_mean": 3737.0, "valid_targets_min": 1297 }, { "epoch": 6.634920634920634, "grad_norm": 0.18918121566352425, "learning_rate": 7.117093072248571e-06, "loss": 0.0116, "loss_nan_ranks": 0, "loss_rank_avg": 0.010280042886734009, "step": 2090, "valid_targets_mean": 3381.7, "valid_targets_min": 825 }, { "epoch": 6.650793650793651, "grad_norm": 0.16521970564313165, "learning_rate": 6.9568398747756396e-06, "loss": 0.0104, "loss_nan_ranks": 0, "loss_rank_avg": 0.009875962510704994, "step": 2095, "valid_targets_mean": 3724.8, "valid_targets_min": 695 }, { "epoch": 6.666666666666667, "grad_norm": 0.17760484054865824, "learning_rate": 6.798276471498444e-06, "loss": 0.0106, "loss_nan_ranks": 0, "loss_rank_avg": 0.008955635130405426, "step": 2100, "valid_targets_mean": 3321.4, "valid_targets_min": 676 }, { "epoch": 6.682539682539683, "grad_norm": 0.19520090654719327, "learning_rate": 6.6414090873204886e-06, "loss": 0.0096, "loss_nan_ranks": 0, "loss_rank_avg": 0.01039792224764824, "step": 2105, "valid_targets_mean": 3116.7, "valid_targets_min": 486 }, { "epoch": 6.698412698412699, "grad_norm": 0.18028684323612768, "learning_rate": 6.486243880562759e-06, "loss": 0.01, "loss_nan_ranks": 0, "loss_rank_avg": 0.009589990600943565, "step": 2110, "valid_targets_mean": 3307.2, "valid_targets_min": 1014 }, { "epoch": 6.714285714285714, "grad_norm": 0.1818626090504096, "learning_rate": 6.3327869427218855e-06, "loss": 0.0096, "loss_nan_ranks": 0, "loss_rank_avg": 0.008870855905115604, "step": 2115, "valid_targets_mean": 3770.0, "valid_targets_min": 671 }, { "epoch": 6.73015873015873, "grad_norm": 0.20259355193852477, "learning_rate": 6.181044298231081e-06, "loss": 0.011, "loss_nan_ranks": 0, "loss_rank_avg": 0.011193148791790009, "step": 2120, "valid_targets_mean": 3412.4, "valid_targets_min": 1066 }, { "epoch": 6.746031746031746, "grad_norm": 0.1826233680836296, "learning_rate": 6.031021904223572e-06, "loss": 0.01, "loss_nan_ranks": 0, "loss_rank_avg": 0.009859883226454258, "step": 2125, "valid_targets_mean": 3204.7, "valid_targets_min": 909 }, { "epoch": 6.761904761904762, "grad_norm": 0.19159375445872537, "learning_rate": 5.882725650298787e-06, "loss": 0.0101, "loss_nan_ranks": 0, "loss_rank_avg": 0.010744025930762291, "step": 2130, "valid_targets_mean": 3296.4, "valid_targets_min": 782 }, { "epoch": 6.777777777777778, "grad_norm": 0.19885905304525853, "learning_rate": 5.736161358291092e-06, "loss": 0.0101, "loss_nan_ranks": 0, "loss_rank_avg": 0.010427623987197876, "step": 2135, "valid_targets_mean": 3111.9, "valid_targets_min": 677 }, { "epoch": 6.7936507936507935, "grad_norm": 0.17789784161152683, "learning_rate": 5.5913347820412635e-06, "loss": 0.0117, "loss_nan_ranks": 0, "loss_rank_avg": 0.010946325957775116, "step": 2140, "valid_targets_mean": 3855.3, "valid_targets_min": 1147 }, { "epoch": 6.809523809523809, "grad_norm": 0.15707297813715498, "learning_rate": 5.44825160717059e-06, "loss": 0.0094, "loss_nan_ranks": 0, "loss_rank_avg": 0.00876244530081749, "step": 2145, "valid_targets_mean": 3554.3, "valid_targets_min": 1052 }, { "epoch": 6.825396825396825, "grad_norm": 0.18254202956390367, "learning_rate": 5.306917450857702e-06, "loss": 0.0106, "loss_nan_ranks": 0, "loss_rank_avg": 0.010324838571250439, "step": 2150, "valid_targets_mean": 3432.2, "valid_targets_min": 1151 }, { "epoch": 6.841269841269841, "grad_norm": 0.1930099772388768, "learning_rate": 5.167337861617982e-06, "loss": 0.0106, "loss_nan_ranks": 0, "loss_rank_avg": 0.010216512717306614, "step": 2155, "valid_targets_mean": 3203.4, "valid_targets_min": 627 }, { "epoch": 6.857142857142857, "grad_norm": 0.19005087618453437, "learning_rate": 5.029518319085824e-06, "loss": 0.0107, "loss_nan_ranks": 0, "loss_rank_avg": 0.009590497240424156, "step": 2160, "valid_targets_mean": 2832.6, "valid_targets_min": 567 }, { "epoch": 6.8730158730158735, "grad_norm": 0.1909793872450157, "learning_rate": 4.893464233799433e-06, "loss": 0.0105, "loss_nan_ranks": 0, "loss_rank_avg": 0.009531540796160698, "step": 2165, "valid_targets_mean": 3204.7, "valid_targets_min": 1214 }, { "epoch": 6.888888888888889, "grad_norm": 0.1793904616196824, "learning_rate": 4.759180946988495e-06, "loss": 0.0095, "loss_nan_ranks": 0, "loss_rank_avg": 0.009257054887712002, "step": 2170, "valid_targets_mean": 3621.2, "valid_targets_min": 648 }, { "epoch": 6.904761904761905, "grad_norm": 0.1667931999548248, "learning_rate": 4.626673730364395e-06, "loss": 0.0104, "loss_nan_ranks": 0, "loss_rank_avg": 0.01003570482134819, "step": 2175, "valid_targets_mean": 3463.8, "valid_targets_min": 285 }, { "epoch": 6.920634920634921, "grad_norm": 0.20660624187240578, "learning_rate": 4.495947785913368e-06, "loss": 0.0104, "loss_nan_ranks": 0, "loss_rank_avg": 0.011296918615698814, "step": 2180, "valid_targets_mean": 3009.5, "valid_targets_min": 1065 }, { "epoch": 6.936507936507937, "grad_norm": 0.20045998659363987, "learning_rate": 4.367008245692189e-06, "loss": 0.0107, "loss_nan_ranks": 0, "loss_rank_avg": 0.010871585458517075, "step": 2185, "valid_targets_mean": 2747.8, "valid_targets_min": 944 }, { "epoch": 6.9523809523809526, "grad_norm": 0.18038151471049843, "learning_rate": 4.239860171626769e-06, "loss": 0.0096, "loss_nan_ranks": 0, "loss_rank_avg": 0.010644408874213696, "step": 2190, "valid_targets_mean": 3555.1, "valid_targets_min": 1412 }, { "epoch": 6.968253968253968, "grad_norm": 0.2067773581962787, "learning_rate": 4.114508555313351e-06, "loss": 0.0105, "loss_nan_ranks": 0, "loss_rank_avg": 0.011355595663189888, "step": 2195, "valid_targets_mean": 2859.0, "valid_targets_min": 1016 }, { "epoch": 6.984126984126984, "grad_norm": 0.17103297550131963, "learning_rate": 3.990958317822663e-06, "loss": 0.0099, "loss_nan_ranks": 0, "loss_rank_avg": 0.008938717655837536, "step": 2200, "valid_targets_mean": 3593.5, "valid_targets_min": 755 }, { "epoch": 7.0, "grad_norm": 0.1851720566559619, "learning_rate": 3.86921430950658e-06, "loss": 0.0097, "loss_nan_ranks": 0, "loss_rank_avg": 0.009520391002297401, "step": 2205, "valid_targets_mean": 3075.6, "valid_targets_min": 319 }, { "epoch": 7.015873015873016, "grad_norm": 0.10035776473927605, "learning_rate": 3.7492813098078506e-06, "loss": 0.0055, "loss_nan_ranks": 0, "loss_rank_avg": 0.005469363648444414, "step": 2210, "valid_targets_mean": 3383.3, "valid_targets_min": 904 }, { "epoch": 7.031746031746032, "grad_norm": 0.11216010325309983, "learning_rate": 3.6311640270723757e-06, "loss": 0.0049, "loss_nan_ranks": 0, "loss_rank_avg": 0.0050615957006812096, "step": 2215, "valid_targets_mean": 3358.8, "valid_targets_min": 906 }, { "epoch": 7.0476190476190474, "grad_norm": 0.10929451815598369, "learning_rate": 3.5148670983644104e-06, "loss": 0.0045, "loss_nan_ranks": 0, "loss_rank_avg": 0.004745697136968374, "step": 2220, "valid_targets_mean": 3131.4, "valid_targets_min": 641 }, { "epoch": 7.063492063492063, "grad_norm": 0.12271903497817556, "learning_rate": 3.400395089284475e-06, "loss": 0.0051, "loss_nan_ranks": 0, "loss_rank_avg": 0.005019777454435825, "step": 2225, "valid_targets_mean": 3120.6, "valid_targets_min": 1083 }, { "epoch": 7.079365079365079, "grad_norm": 0.12730389484499585, "learning_rate": 3.287752493790186e-06, "loss": 0.0047, "loss_nan_ranks": 0, "loss_rank_avg": 0.005130327306687832, "step": 2230, "valid_targets_mean": 3167.2, "valid_targets_min": 1097 }, { "epoch": 7.095238095238095, "grad_norm": 0.11182416285479005, "learning_rate": 3.1769437340197715e-06, "loss": 0.005, "loss_nan_ranks": 0, "loss_rank_avg": 0.005524179898202419, "step": 2235, "valid_targets_mean": 3423.6, "valid_targets_min": 724 }, { "epoch": 7.111111111111111, "grad_norm": 0.11695963016448252, "learning_rate": 3.067973160118498e-06, "loss": 0.0046, "loss_nan_ranks": 0, "loss_rank_avg": 0.004500727169215679, "step": 2240, "valid_targets_mean": 3632.2, "valid_targets_min": 854 }, { "epoch": 7.1269841269841265, "grad_norm": 0.11470576194363803, "learning_rate": 2.9608450500678565e-06, "loss": 0.005, "loss_nan_ranks": 0, "loss_rank_avg": 0.004933122545480728, "step": 2245, "valid_targets_mean": 3541.1, "valid_targets_min": 955 }, { "epoch": 7.142857142857143, "grad_norm": 0.12111224092063874, "learning_rate": 2.8555636095176975e-06, "loss": 0.0046, "loss_nan_ranks": 0, "loss_rank_avg": 0.004573405720293522, "step": 2250, "valid_targets_mean": 3104.1, "valid_targets_min": 656 }, { "epoch": 7.158730158730159, "grad_norm": 0.12261137209357191, "learning_rate": 2.7521329716210074e-06, "loss": 0.0048, "loss_nan_ranks": 0, "loss_rank_avg": 0.005299875512719154, "step": 2255, "valid_targets_mean": 4327.8, "valid_targets_min": 993 }, { "epoch": 7.174603174603175, "grad_norm": 0.12983398943430338, "learning_rate": 2.6505571968717725e-06, "loss": 0.0051, "loss_nan_ranks": 0, "loss_rank_avg": 0.005398509092628956, "step": 2260, "valid_targets_mean": 3107.6, "valid_targets_min": 786 }, { "epoch": 7.190476190476191, "grad_norm": 0.10675934532792695, "learning_rate": 2.550840272945465e-06, "loss": 0.0046, "loss_nan_ranks": 0, "loss_rank_avg": 0.004216499626636505, "step": 2265, "valid_targets_mean": 3529.8, "valid_targets_min": 911 }, { "epoch": 7.2063492063492065, "grad_norm": 0.1299523571452913, "learning_rate": 2.4529861145425605e-06, "loss": 0.0053, "loss_nan_ranks": 0, "loss_rank_avg": 0.004921115003526211, "step": 2270, "valid_targets_mean": 2665.1, "valid_targets_min": 778 }, { "epoch": 7.222222222222222, "grad_norm": 0.09718138220360564, "learning_rate": 2.3569985632348247e-06, "loss": 0.0047, "loss_nan_ranks": 0, "loss_rank_avg": 0.004324613604694605, "step": 2275, "valid_targets_mean": 3938.3, "valid_targets_min": 770 }, { "epoch": 7.238095238095238, "grad_norm": 0.15050121932011226, "learning_rate": 2.2628813873145303e-06, "loss": 0.0053, "loss_nan_ranks": 0, "loss_rank_avg": 0.006321427412331104, "step": 2280, "valid_targets_mean": 2946.2, "valid_targets_min": 293 }, { "epoch": 7.253968253968254, "grad_norm": 0.10028028833360875, "learning_rate": 2.17063828164647e-06, "loss": 0.0048, "loss_nan_ranks": 0, "loss_rank_avg": 0.0037990009877830744, "step": 2285, "valid_targets_mean": 4333.9, "valid_targets_min": 1228 }, { "epoch": 7.26984126984127, "grad_norm": 0.12203494313480417, "learning_rate": 2.0802728675229587e-06, "loss": 0.0046, "loss_nan_ranks": 0, "loss_rank_avg": 0.004578214138746262, "step": 2290, "valid_targets_mean": 3254.3, "valid_targets_min": 741 }, { "epoch": 7.285714285714286, "grad_norm": 0.12085258195724963, "learning_rate": 1.9917886925216234e-06, "loss": 0.0045, "loss_nan_ranks": 0, "loss_rank_avg": 0.0045486027374863625, "step": 2295, "valid_targets_mean": 3183.5, "valid_targets_min": 702 }, { "epoch": 7.301587301587301, "grad_norm": 0.10039022352128651, "learning_rate": 1.9051892303661834e-06, "loss": 0.0053, "loss_nan_ranks": 0, "loss_rank_avg": 0.004291100427508354, "step": 2300, "valid_targets_mean": 3600.1, "valid_targets_min": 1324 }, { "epoch": 7.317460317460317, "grad_norm": 0.12218249251554697, "learning_rate": 1.8204778807900003e-06, "loss": 0.0047, "loss_nan_ranks": 0, "loss_rank_avg": 0.005083533935248852, "step": 2305, "valid_targets_mean": 3657.6, "valid_targets_min": 1242 }, { "epoch": 7.333333333333333, "grad_norm": 0.10958599619723258, "learning_rate": 1.7376579694026896e-06, "loss": 0.0049, "loss_nan_ranks": 0, "loss_rank_avg": 0.004395646043121815, "step": 2310, "valid_targets_mean": 2992.5, "valid_targets_min": 676 }, { "epoch": 7.349206349206349, "grad_norm": 0.11998147910250385, "learning_rate": 1.6567327475595195e-06, "loss": 0.0046, "loss_nan_ranks": 0, "loss_rank_avg": 0.00454556941986084, "step": 2315, "valid_targets_mean": 3329.6, "valid_targets_min": 318 }, { "epoch": 7.365079365079365, "grad_norm": 0.13220362888562553, "learning_rate": 1.577705392233797e-06, "loss": 0.005, "loss_nan_ranks": 0, "loss_rank_avg": 0.005032036453485489, "step": 2320, "valid_targets_mean": 3215.4, "valid_targets_min": 1121 }, { "epoch": 7.380952380952381, "grad_norm": 0.10580151354074074, "learning_rate": 1.5005790058920943e-06, "loss": 0.005, "loss_nan_ranks": 0, "loss_rank_avg": 0.00551568390801549, "step": 2325, "valid_targets_mean": 4641.6, "valid_targets_min": 879 }, { "epoch": 7.396825396825397, "grad_norm": 0.11928758266665618, "learning_rate": 1.4253566163725252e-06, "loss": 0.0048, "loss_nan_ranks": 0, "loss_rank_avg": 0.005063705146312714, "step": 2330, "valid_targets_mean": 3024.2, "valid_targets_min": 654 }, { "epoch": 7.412698412698413, "grad_norm": 0.12921524618139682, "learning_rate": 1.3520411767658059e-06, "loss": 0.0049, "loss_nan_ranks": 0, "loss_rank_avg": 0.005293884314596653, "step": 2335, "valid_targets_mean": 3078.6, "valid_targets_min": 886 }, { "epoch": 7.428571428571429, "grad_norm": 0.11765595069380638, "learning_rate": 1.2806355652993762e-06, "loss": 0.0047, "loss_nan_ranks": 0, "loss_rank_avg": 0.0043135518208146095, "step": 2340, "valid_targets_mean": 3240.3, "valid_targets_min": 905 }, { "epoch": 7.444444444444445, "grad_norm": 0.14268732952024096, "learning_rate": 1.2111425852243785e-06, "loss": 0.0051, "loss_nan_ranks": 0, "loss_rank_avg": 0.005359090864658356, "step": 2345, "valid_targets_mean": 2711.0, "valid_targets_min": 900 }, { "epoch": 7.4603174603174605, "grad_norm": 0.11255760360881431, "learning_rate": 1.143564964705618e-06, "loss": 0.0049, "loss_nan_ranks": 0, "loss_rank_avg": 0.005487900227308273, "step": 2350, "valid_targets_mean": 3677.5, "valid_targets_min": 446 }, { "epoch": 7.476190476190476, "grad_norm": 0.12881208811316566, "learning_rate": 1.0779053567144427e-06, "loss": 0.005, "loss_nan_ranks": 0, "loss_rank_avg": 0.00521219614893198, "step": 2355, "valid_targets_mean": 3273.8, "valid_targets_min": 1278 }, { "epoch": 7.492063492063492, "grad_norm": 0.10843979278626246, "learning_rate": 1.014166338924627e-06, "loss": 0.0046, "loss_nan_ranks": 0, "loss_rank_avg": 0.004336035810410976, "step": 2360, "valid_targets_mean": 3976.3, "valid_targets_min": 1259 }, { "epoch": 7.507936507936508, "grad_norm": 0.11503295790694797, "learning_rate": 9.523504136111306e-07, "loss": 0.0047, "loss_nan_ranks": 0, "loss_rank_avg": 0.004730178043246269, "step": 2365, "valid_targets_mean": 3674.6, "valid_targets_min": 791 }, { "epoch": 7.523809523809524, "grad_norm": 0.11155461084205073, "learning_rate": 8.92460007551904e-07, "loss": 0.0043, "loss_nan_ranks": 0, "loss_rank_avg": 0.004000429529696703, "step": 2370, "valid_targets_mean": 3718.4, "valid_targets_min": 1046 }, { "epoch": 7.5396825396825395, "grad_norm": 0.11157625198691828, "learning_rate": 8.344974719326104e-07, "loss": 0.0049, "loss_nan_ranks": 0, "loss_rank_avg": 0.004754913039505482, "step": 2375, "valid_targets_mean": 3638.6, "valid_targets_min": 1131 }, { "epoch": 7.555555555555555, "grad_norm": 0.12422499624429512, "learning_rate": 7.784650822542871e-07, "loss": 0.0048, "loss_nan_ranks": 0, "loss_rank_avg": 0.0053131962195038795, "step": 2380, "valid_targets_mean": 3119.1, "valid_targets_min": 835 }, { "epoch": 7.571428571428571, "grad_norm": 0.11431681627624184, "learning_rate": 7.243650382440736e-07, "loss": 0.0054, "loss_nan_ranks": 0, "loss_rank_avg": 0.005361621268093586, "step": 2385, "valid_targets_mean": 3909.5, "valid_targets_min": 741 }, { "epoch": 7.587301587301587, "grad_norm": 0.11004296496426871, "learning_rate": 6.721994637687967e-07, "loss": 0.0047, "loss_nan_ranks": 0, "loss_rank_avg": 0.004509164020419121, "step": 2390, "valid_targets_mean": 3389.0, "valid_targets_min": 984 }, { "epoch": 7.603174603174603, "grad_norm": 0.11863839089336183, "learning_rate": 6.219704067516374e-07, "loss": 0.0048, "loss_nan_ranks": 0, "loss_rank_avg": 0.004457586910575628, "step": 2395, "valid_targets_mean": 3174.5, "valid_targets_min": 655 }, { "epoch": 7.619047619047619, "grad_norm": 0.10878395533070197, "learning_rate": 5.736798390916898e-07, "loss": 0.0045, "loss_nan_ranks": 0, "loss_rank_avg": 0.005195206962525845, "step": 2400, "valid_targets_mean": 2997.7, "valid_targets_min": 331 }, { "epoch": 7.634920634920634, "grad_norm": 0.10609110020177216, "learning_rate": 5.273296565865948e-07, "loss": 0.0051, "loss_nan_ranks": 0, "loss_rank_avg": 0.004106806591153145, "step": 2405, "valid_targets_mean": 3824.8, "valid_targets_min": 586 }, { "epoch": 7.650793650793651, "grad_norm": 0.1089309599479779, "learning_rate": 4.829216788580726e-07, "loss": 0.005, "loss_nan_ranks": 0, "loss_rank_avg": 0.004249934572726488, "step": 2410, "valid_targets_mean": 3445.7, "valid_targets_min": 723 }, { "epoch": 7.666666666666667, "grad_norm": 0.10831204625157581, "learning_rate": 4.404576492805179e-07, "loss": 0.0049, "loss_nan_ranks": 0, "loss_rank_avg": 0.005328735336661339, "step": 2415, "valid_targets_mean": 3627.3, "valid_targets_min": 1129 }, { "epoch": 7.682539682539683, "grad_norm": 0.09343002059771495, "learning_rate": 3.999392349125386e-07, "loss": 0.0047, "loss_nan_ranks": 0, "loss_rank_avg": 0.004198350943624973, "step": 2420, "valid_targets_mean": 4037.8, "valid_targets_min": 802 }, { "epoch": 7.698412698412699, "grad_norm": 0.11986503661890716, "learning_rate": 3.613680264315189e-07, "loss": 0.0045, "loss_nan_ranks": 0, "loss_rank_avg": 0.0049739317037165165, "step": 2425, "valid_targets_mean": 2951.8, "valid_targets_min": 671 }, { "epoch": 7.714285714285714, "grad_norm": 0.12864393943302033, "learning_rate": 3.247455380711806e-07, "loss": 0.0046, "loss_nan_ranks": 0, "loss_rank_avg": 0.004533006809651852, "step": 2430, "valid_targets_mean": 3775.6, "valid_targets_min": 965 }, { "epoch": 7.73015873015873, "grad_norm": 0.10330729032751995, "learning_rate": 2.900732075621082e-07, "loss": 0.0048, "loss_nan_ranks": 0, "loss_rank_avg": 0.004784930497407913, "step": 2435, "valid_targets_mean": 3709.4, "valid_targets_min": 1062 }, { "epoch": 7.746031746031746, "grad_norm": 0.10498794605153543, "learning_rate": 2.5735239607534434e-07, "loss": 0.0045, "loss_nan_ranks": 0, "loss_rank_avg": 0.0041520013473927975, "step": 2440, "valid_targets_mean": 3549.4, "valid_targets_min": 728 }, { "epoch": 7.761904761904762, "grad_norm": 0.14249588185715686, "learning_rate": 2.2658438816892112e-07, "loss": 0.0052, "loss_nan_ranks": 0, "loss_rank_avg": 0.006070821080356836, "step": 2445, "valid_targets_mean": 2594.3, "valid_targets_min": 964 }, { "epoch": 7.777777777777778, "grad_norm": 0.11000276713367281, "learning_rate": 1.9777039173746182e-07, "loss": 0.0044, "loss_nan_ranks": 0, "loss_rank_avg": 0.004534910432994366, "step": 2450, "valid_targets_mean": 3489.1, "valid_targets_min": 993 }, { "epoch": 7.7936507936507935, "grad_norm": 0.11795354828909771, "learning_rate": 1.709115379647186e-07, "loss": 0.0044, "loss_nan_ranks": 0, "loss_rank_avg": 0.004670111462473869, "step": 2455, "valid_targets_mean": 2994.7, "valid_targets_min": 1170 }, { "epoch": 7.809523809523809, "grad_norm": 0.11938780754001632, "learning_rate": 1.460088812792082e-07, "loss": 0.0052, "loss_nan_ranks": 0, "loss_rank_avg": 0.0043688188306987286, "step": 2460, "valid_targets_mean": 3415.9, "valid_targets_min": 1507 }, { "epoch": 7.825396825396825, "grad_norm": 0.09803097399786176, "learning_rate": 1.2306339931279499e-07, "loss": 0.0047, "loss_nan_ranks": 0, "loss_rank_avg": 0.004104914143681526, "step": 2465, "valid_targets_mean": 3690.8, "valid_targets_min": 789 }, { "epoch": 7.841269841269841, "grad_norm": 0.1190925478495514, "learning_rate": 1.0207599286229941e-07, "loss": 0.0047, "loss_nan_ranks": 0, "loss_rank_avg": 0.004594314843416214, "step": 2470, "valid_targets_mean": 3170.5, "valid_targets_min": 974 }, { "epoch": 7.857142857142857, "grad_norm": 0.11013026165583094, "learning_rate": 8.304748585417078e-08, "loss": 0.0047, "loss_nan_ranks": 0, "loss_rank_avg": 0.004138472955673933, "step": 2475, "valid_targets_mean": 3476.1, "valid_targets_min": 897 }, { "epoch": 7.8730158730158735, "grad_norm": 0.13041391832556282, "learning_rate": 6.597862531210197e-08, "loss": 0.0047, "loss_nan_ranks": 0, "loss_rank_avg": 0.004992269910871983, "step": 2480, "valid_targets_mean": 3190.1, "valid_targets_min": 810 }, { "epoch": 7.888888888888889, "grad_norm": 0.17400697758449113, "learning_rate": 5.0870081327725194e-08, "loss": 0.0046, "loss_nan_ranks": 0, "loss_rank_avg": 0.005131562706083059, "step": 2485, "valid_targets_mean": 2452.6, "valid_targets_min": 705 }, { "epoch": 7.904761904761905, "grad_norm": 0.1270973871093473, "learning_rate": 3.7722447034305164e-08, "loss": 0.0047, "loss_nan_ranks": 0, "loss_rank_avg": 0.004991158843040466, "step": 2490, "valid_targets_mean": 2931.3, "valid_targets_min": 735 }, { "epoch": 7.920634920634921, "grad_norm": 0.15159667114358516, "learning_rate": 2.653623858344667e-08, "loss": 0.0053, "loss_nan_ranks": 0, "loss_rank_avg": 0.00546233681961894, "step": 2495, "valid_targets_mean": 2654.0, "valid_targets_min": 622 }, { "epoch": 7.936507936507937, "grad_norm": 0.11113963222959865, "learning_rate": 1.731189512482745e-08, "loss": 0.0049, "loss_nan_ranks": 0, "loss_rank_avg": 0.004757697228342295, "step": 2500, "valid_targets_mean": 3359.4, "valid_targets_min": 373 }, { "epoch": 7.9523809523809526, "grad_norm": 0.11728609394092665, "learning_rate": 1.0049778788967513e-08, "loss": 0.0047, "loss_nan_ranks": 0, "loss_rank_avg": 0.005282067693769932, "step": 2505, "valid_targets_mean": 3915.2, "valid_targets_min": 1487 }, { "epoch": 7.968253968253968, "grad_norm": 0.11591915545705475, "learning_rate": 4.750174673018304e-09, "loss": 0.0054, "loss_nan_ranks": 0, "loss_rank_avg": 0.0053293961100280285, "step": 2510, "valid_targets_mean": 3678.9, "valid_targets_min": 1378 }, { "epoch": 7.984126984126984, "grad_norm": 0.1296960881375092, "learning_rate": 1.4132908295549919e-09, "loss": 0.0049, "loss_nan_ranks": 0, "loss_rank_avg": 0.00557930301874876, "step": 2515, "valid_targets_mean": 3071.5, "valid_targets_min": 798 }, { "epoch": 8.0, "grad_norm": 0.10517511047841067, "learning_rate": 3.925825840522812e-11, "loss": 0.0046, "loss_nan_ranks": 0, "loss_rank_avg": 0.004353777505457401, "step": 2520, "valid_targets_mean": 3003.7, "valid_targets_min": 671 }, { "epoch": 8.0, "loss_nan_ranks": 0, "loss_rank_avg": 0.004353777505457401, "step": 2520, "total_flos": 1257015450664960.0, "train_loss": 0.07835676025835768, "train_runtime": 38657.4963, "train_samples_per_second": 2.085, "train_steps_per_second": 0.065, "valid_targets_mean": 3003.7, "valid_targets_min": 671 } ], "logging_steps": 5, "max_steps": 2520, "num_input_tokens_seen": 0, "num_train_epochs": 8, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1257015450664960.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }