{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 7.0, "eval_steps": 500, "global_step": 2506, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.013986013986013986, "grad_norm": 13.08118363303711, "learning_rate": 6.374501992031873e-07, "loss": 1.0535, "loss_nan_ranks": 0, "loss_rank_avg": 0.5296926498413086, "step": 5, "valid_targets_mean": 4239.0, "valid_targets_min": 3298 }, { "epoch": 0.027972027972027972, "grad_norm": 11.914950011402933, "learning_rate": 1.4342629482071716e-06, "loss": 1.0403, "loss_nan_ranks": 0, "loss_rank_avg": 0.4918077290058136, "step": 10, "valid_targets_mean": 3680.8, "valid_targets_min": 2534 }, { "epoch": 0.04195804195804196, "grad_norm": 8.748664702844085, "learning_rate": 2.2310756972111555e-06, "loss": 1.0073, "loss_nan_ranks": 0, "loss_rank_avg": 0.5401319265365601, "step": 15, "valid_targets_mean": 4520.0, "valid_targets_min": 3358 }, { "epoch": 0.055944055944055944, "grad_norm": 6.578134131681192, "learning_rate": 3.0278884462151397e-06, "loss": 0.9661, "loss_nan_ranks": 0, "loss_rank_avg": 0.5008518695831299, "step": 20, "valid_targets_mean": 4016.0, "valid_targets_min": 3201 }, { "epoch": 0.06993006993006994, "grad_norm": 4.64457041164554, "learning_rate": 3.824701195219123e-06, "loss": 0.9017, "loss_nan_ranks": 0, "loss_rank_avg": 0.43356165289878845, "step": 25, "valid_targets_mean": 4458.5, "valid_targets_min": 2740 }, { "epoch": 0.08391608391608392, "grad_norm": 3.2906721720118743, "learning_rate": 4.621513944223108e-06, "loss": 0.8338, "loss_nan_ranks": 0, "loss_rank_avg": 0.43590834736824036, "step": 30, "valid_targets_mean": 4182.0, "valid_targets_min": 3145 }, { "epoch": 0.0979020979020979, "grad_norm": 2.2213280960302737, "learning_rate": 5.418326693227092e-06, "loss": 0.7852, "loss_nan_ranks": 0, "loss_rank_avg": 0.3904917240142822, "step": 35, "valid_targets_mean": 3757.9, "valid_targets_min": 3232 }, { "epoch": 0.11188811188811189, "grad_norm": 1.517797734360262, "learning_rate": 6.215139442231076e-06, "loss": 0.7225, "loss_nan_ranks": 0, "loss_rank_avg": 0.3458019495010376, "step": 40, "valid_targets_mean": 3985.0, "valid_targets_min": 2319 }, { "epoch": 0.1258741258741259, "grad_norm": 1.3322026291958011, "learning_rate": 7.011952191235061e-06, "loss": 0.686, "loss_nan_ranks": 0, "loss_rank_avg": 0.33076441287994385, "step": 45, "valid_targets_mean": 3783.4, "valid_targets_min": 2961 }, { "epoch": 0.13986013986013987, "grad_norm": 1.1653038557461999, "learning_rate": 7.808764940239044e-06, "loss": 0.6457, "loss_nan_ranks": 0, "loss_rank_avg": 0.320470929145813, "step": 50, "valid_targets_mean": 4048.5, "valid_targets_min": 2644 }, { "epoch": 0.15384615384615385, "grad_norm": 0.9815989525162515, "learning_rate": 8.605577689243028e-06, "loss": 0.6121, "loss_nan_ranks": 0, "loss_rank_avg": 0.2778295576572418, "step": 55, "valid_targets_mean": 3791.1, "valid_targets_min": 2987 }, { "epoch": 0.16783216783216784, "grad_norm": 0.8997344993173911, "learning_rate": 9.402390438247013e-06, "loss": 0.596, "loss_nan_ranks": 0, "loss_rank_avg": 0.28497567772865295, "step": 60, "valid_targets_mean": 4066.9, "valid_targets_min": 3234 }, { "epoch": 0.18181818181818182, "grad_norm": 0.8616110554113583, "learning_rate": 1.0199203187250997e-05, "loss": 0.5676, "loss_nan_ranks": 0, "loss_rank_avg": 0.248211070895195, "step": 65, "valid_targets_mean": 3500.8, "valid_targets_min": 2803 }, { "epoch": 0.1958041958041958, "grad_norm": 0.7612355436071168, "learning_rate": 1.099601593625498e-05, "loss": 0.5405, "loss_nan_ranks": 0, "loss_rank_avg": 0.26128190755844116, "step": 70, "valid_targets_mean": 3998.1, "valid_targets_min": 2923 }, { "epoch": 0.2097902097902098, "grad_norm": 0.7653586307537427, "learning_rate": 1.1792828685258967e-05, "loss": 0.5166, "loss_nan_ranks": 0, "loss_rank_avg": 0.24031201004981995, "step": 75, "valid_targets_mean": 3749.9, "valid_targets_min": 3074 }, { "epoch": 0.22377622377622378, "grad_norm": 0.7206061108519799, "learning_rate": 1.258964143426295e-05, "loss": 0.5094, "loss_nan_ranks": 0, "loss_rank_avg": 0.23275572061538696, "step": 80, "valid_targets_mean": 3978.4, "valid_targets_min": 3212 }, { "epoch": 0.23776223776223776, "grad_norm": 0.7248405909913119, "learning_rate": 1.3386454183266932e-05, "loss": 0.4822, "loss_nan_ranks": 0, "loss_rank_avg": 0.24045932292938232, "step": 85, "valid_targets_mean": 3960.0, "valid_targets_min": 3165 }, { "epoch": 0.2517482517482518, "grad_norm": 0.642102398369385, "learning_rate": 1.4183266932270919e-05, "loss": 0.4835, "loss_nan_ranks": 0, "loss_rank_avg": 0.24778877198696136, "step": 90, "valid_targets_mean": 4437.4, "valid_targets_min": 3689 }, { "epoch": 0.26573426573426573, "grad_norm": 0.6779030700031962, "learning_rate": 1.4980079681274901e-05, "loss": 0.4679, "loss_nan_ranks": 0, "loss_rank_avg": 0.24229872226715088, "step": 95, "valid_targets_mean": 4241.8, "valid_targets_min": 3712 }, { "epoch": 0.27972027972027974, "grad_norm": 0.6919316585268371, "learning_rate": 1.5776892430278886e-05, "loss": 0.4653, "loss_nan_ranks": 0, "loss_rank_avg": 0.23130342364311218, "step": 100, "valid_targets_mean": 4174.5, "valid_targets_min": 3400 }, { "epoch": 0.2937062937062937, "grad_norm": 0.6430352865799613, "learning_rate": 1.6573705179282872e-05, "loss": 0.4423, "loss_nan_ranks": 0, "loss_rank_avg": 0.22104957699775696, "step": 105, "valid_targets_mean": 3760.4, "valid_targets_min": 992 }, { "epoch": 0.3076923076923077, "grad_norm": 0.6362542473505411, "learning_rate": 1.7370517928286855e-05, "loss": 0.4317, "loss_nan_ranks": 0, "loss_rank_avg": 0.20805756747722626, "step": 110, "valid_targets_mean": 4144.9, "valid_targets_min": 3061 }, { "epoch": 0.32167832167832167, "grad_norm": 0.6737054371286955, "learning_rate": 1.8167330677290838e-05, "loss": 0.4364, "loss_nan_ranks": 0, "loss_rank_avg": 0.22847774624824524, "step": 115, "valid_targets_mean": 3954.5, "valid_targets_min": 2889 }, { "epoch": 0.3356643356643357, "grad_norm": 0.7266322768815212, "learning_rate": 1.8964143426294824e-05, "loss": 0.4368, "loss_nan_ranks": 0, "loss_rank_avg": 0.20634719729423523, "step": 120, "valid_targets_mean": 3737.1, "valid_targets_min": 3060 }, { "epoch": 0.34965034965034963, "grad_norm": 0.6829819207654488, "learning_rate": 1.9760956175298807e-05, "loss": 0.4183, "loss_nan_ranks": 0, "loss_rank_avg": 0.19363188743591309, "step": 125, "valid_targets_mean": 3675.4, "valid_targets_min": 2831 }, { "epoch": 0.36363636363636365, "grad_norm": 0.7250977015774923, "learning_rate": 2.055776892430279e-05, "loss": 0.4161, "loss_nan_ranks": 0, "loss_rank_avg": 0.18788361549377441, "step": 130, "valid_targets_mean": 3879.5, "valid_targets_min": 3277 }, { "epoch": 0.3776223776223776, "grad_norm": 0.7210165535643743, "learning_rate": 2.1354581673306773e-05, "loss": 0.4174, "loss_nan_ranks": 0, "loss_rank_avg": 0.17633922398090363, "step": 135, "valid_targets_mean": 3757.0, "valid_targets_min": 3110 }, { "epoch": 0.3916083916083916, "grad_norm": 0.6747533826387861, "learning_rate": 2.2151394422310756e-05, "loss": 0.4071, "loss_nan_ranks": 0, "loss_rank_avg": 0.20017263293266296, "step": 140, "valid_targets_mean": 4174.1, "valid_targets_min": 2672 }, { "epoch": 0.40559440559440557, "grad_norm": 0.7402646690744179, "learning_rate": 2.2948207171314745e-05, "loss": 0.4142, "loss_nan_ranks": 0, "loss_rank_avg": 0.19815121591091156, "step": 145, "valid_targets_mean": 3800.5, "valid_targets_min": 2344 }, { "epoch": 0.4195804195804196, "grad_norm": 0.6882905212492071, "learning_rate": 2.3745019920318728e-05, "loss": 0.4044, "loss_nan_ranks": 0, "loss_rank_avg": 0.23232436180114746, "step": 150, "valid_targets_mean": 4320.9, "valid_targets_min": 3029 }, { "epoch": 0.43356643356643354, "grad_norm": 0.6867230259268453, "learning_rate": 2.454183266932271e-05, "loss": 0.3983, "loss_nan_ranks": 0, "loss_rank_avg": 0.21659091114997864, "step": 155, "valid_targets_mean": 4310.1, "valid_targets_min": 3283 }, { "epoch": 0.44755244755244755, "grad_norm": 0.6859829511382938, "learning_rate": 2.5338645418326694e-05, "loss": 0.3926, "loss_nan_ranks": 0, "loss_rank_avg": 0.18998664617538452, "step": 160, "valid_targets_mean": 3856.8, "valid_targets_min": 2810 }, { "epoch": 0.46153846153846156, "grad_norm": 0.6986164805285058, "learning_rate": 2.6135458167330677e-05, "loss": 0.3901, "loss_nan_ranks": 0, "loss_rank_avg": 0.18579256534576416, "step": 165, "valid_targets_mean": 4134.0, "valid_targets_min": 3603 }, { "epoch": 0.4755244755244755, "grad_norm": 0.6783298773224011, "learning_rate": 2.6932270916334663e-05, "loss": 0.3933, "loss_nan_ranks": 0, "loss_rank_avg": 0.184840589761734, "step": 170, "valid_targets_mean": 4108.1, "valid_targets_min": 3187 }, { "epoch": 0.48951048951048953, "grad_norm": 0.7488260178884795, "learning_rate": 2.772908366533865e-05, "loss": 0.3982, "loss_nan_ranks": 0, "loss_rank_avg": 0.19532346725463867, "step": 175, "valid_targets_mean": 4101.9, "valid_targets_min": 2898 }, { "epoch": 0.5034965034965035, "grad_norm": 0.6861710567678492, "learning_rate": 2.8525896414342632e-05, "loss": 0.3773, "loss_nan_ranks": 0, "loss_rank_avg": 0.1921890676021576, "step": 180, "valid_targets_mean": 4188.2, "valid_targets_min": 3493 }, { "epoch": 0.5174825174825175, "grad_norm": 0.6691851908083688, "learning_rate": 2.9322709163346615e-05, "loss": 0.3861, "loss_nan_ranks": 0, "loss_rank_avg": 0.18755507469177246, "step": 185, "valid_targets_mean": 3822.2, "valid_targets_min": 3149 }, { "epoch": 0.5314685314685315, "grad_norm": 0.7064984838573337, "learning_rate": 3.01195219123506e-05, "loss": 0.3855, "loss_nan_ranks": 0, "loss_rank_avg": 0.20090565085411072, "step": 190, "valid_targets_mean": 4206.1, "valid_targets_min": 2981 }, { "epoch": 0.5454545454545454, "grad_norm": 0.7715107860489125, "learning_rate": 3.0916334661354584e-05, "loss": 0.3825, "loss_nan_ranks": 0, "loss_rank_avg": 0.20451734960079193, "step": 195, "valid_targets_mean": 4249.6, "valid_targets_min": 3523 }, { "epoch": 0.5594405594405595, "grad_norm": 0.7242629319106181, "learning_rate": 3.1713147410358564e-05, "loss": 0.3744, "loss_nan_ranks": 0, "loss_rank_avg": 0.1759917438030243, "step": 200, "valid_targets_mean": 3672.8, "valid_targets_min": 1933 }, { "epoch": 0.5734265734265734, "grad_norm": 0.7583819688407337, "learning_rate": 3.250996015936256e-05, "loss": 0.3894, "loss_nan_ranks": 0, "loss_rank_avg": 0.19551926851272583, "step": 205, "valid_targets_mean": 4001.5, "valid_targets_min": 3657 }, { "epoch": 0.5874125874125874, "grad_norm": 0.7683757040502193, "learning_rate": 3.3306772908366536e-05, "loss": 0.3757, "loss_nan_ranks": 0, "loss_rank_avg": 0.18955138325691223, "step": 210, "valid_targets_mean": 4289.0, "valid_targets_min": 3312 }, { "epoch": 0.6013986013986014, "grad_norm": 0.7244739994581784, "learning_rate": 3.410358565737052e-05, "loss": 0.371, "loss_nan_ranks": 0, "loss_rank_avg": 0.17240570485591888, "step": 215, "valid_targets_mean": 3773.9, "valid_targets_min": 3243 }, { "epoch": 0.6153846153846154, "grad_norm": 0.7405062652243871, "learning_rate": 3.49003984063745e-05, "loss": 0.3841, "loss_nan_ranks": 0, "loss_rank_avg": 0.17839646339416504, "step": 220, "valid_targets_mean": 3992.4, "valid_targets_min": 2738 }, { "epoch": 0.6293706293706294, "grad_norm": 0.7087430013485309, "learning_rate": 3.569721115537849e-05, "loss": 0.3705, "loss_nan_ranks": 0, "loss_rank_avg": 0.2037450224161148, "step": 225, "valid_targets_mean": 4216.2, "valid_targets_min": 3731 }, { "epoch": 0.6433566433566433, "grad_norm": 0.7332815327859702, "learning_rate": 3.6494023904382475e-05, "loss": 0.3643, "loss_nan_ranks": 0, "loss_rank_avg": 0.18066954612731934, "step": 230, "valid_targets_mean": 4160.8, "valid_targets_min": 3559 }, { "epoch": 0.6573426573426573, "grad_norm": 0.6970517350021399, "learning_rate": 3.7290836653386454e-05, "loss": 0.3619, "loss_nan_ranks": 0, "loss_rank_avg": 0.17745010554790497, "step": 235, "valid_targets_mean": 4144.8, "valid_targets_min": 3663 }, { "epoch": 0.6713286713286714, "grad_norm": 0.7451282096768427, "learning_rate": 3.808764940239044e-05, "loss": 0.3613, "loss_nan_ranks": 0, "loss_rank_avg": 0.18617768585681915, "step": 240, "valid_targets_mean": 4251.4, "valid_targets_min": 3508 }, { "epoch": 0.6853146853146853, "grad_norm": 0.7068045251723751, "learning_rate": 3.8884462151394427e-05, "loss": 0.356, "loss_nan_ranks": 0, "loss_rank_avg": 0.1815338283777237, "step": 245, "valid_targets_mean": 4038.9, "valid_targets_min": 3101 }, { "epoch": 0.6993006993006993, "grad_norm": 0.7858813502425421, "learning_rate": 3.968127490039841e-05, "loss": 0.3674, "loss_nan_ranks": 0, "loss_rank_avg": 0.19982239603996277, "step": 250, "valid_targets_mean": 4397.8, "valid_targets_min": 3461 }, { "epoch": 0.7132867132867133, "grad_norm": 0.7176304870172697, "learning_rate": 3.999982531784936e-05, "loss": 0.3586, "loss_nan_ranks": 0, "loss_rank_avg": 0.166753888130188, "step": 255, "valid_targets_mean": 3866.0, "valid_targets_min": 3096 }, { "epoch": 0.7272727272727273, "grad_norm": 0.6347194324198923, "learning_rate": 3.9998757826867935e-05, "loss": 0.3638, "loss_nan_ranks": 0, "loss_rank_avg": 0.17435021698474884, "step": 260, "valid_targets_mean": 3974.8, "valid_targets_min": 2832 }, { "epoch": 0.7412587412587412, "grad_norm": 0.739929776169395, "learning_rate": 3.9996719942279066e-05, "loss": 0.3542, "loss_nan_ranks": 0, "loss_rank_avg": 0.16606023907661438, "step": 265, "valid_targets_mean": 3664.4, "valid_targets_min": 2917 }, { "epoch": 0.7552447552447552, "grad_norm": 0.755272340890183, "learning_rate": 3.999371176296642e-05, "loss": 0.3611, "loss_nan_ranks": 0, "loss_rank_avg": 0.17609718441963196, "step": 270, "valid_targets_mean": 3968.5, "valid_targets_min": 2829 }, { "epoch": 0.7692307692307693, "grad_norm": 0.7497354502032663, "learning_rate": 3.998973343489495e-05, "loss": 0.3623, "loss_nan_ranks": 0, "loss_rank_avg": 0.19211605191230774, "step": 275, "valid_targets_mean": 4043.2, "valid_targets_min": 3490 }, { "epoch": 0.7832167832167832, "grad_norm": 0.7343575604706798, "learning_rate": 3.998478515110385e-05, "loss": 0.352, "loss_nan_ranks": 0, "loss_rank_avg": 0.18720020353794098, "step": 280, "valid_targets_mean": 4071.0, "valid_targets_min": 3224 }, { "epoch": 0.7972027972027972, "grad_norm": 0.68409074012476, "learning_rate": 3.99788671516972e-05, "loss": 0.3534, "loss_nan_ranks": 0, "loss_rank_avg": 0.1671791970729828, "step": 285, "valid_targets_mean": 3782.9, "valid_targets_min": 2959 }, { "epoch": 0.8111888111888111, "grad_norm": 0.7792445633536357, "learning_rate": 3.9971979723832254e-05, "loss": 0.3558, "loss_nan_ranks": 0, "loss_rank_avg": 0.15965257585048676, "step": 290, "valid_targets_mean": 3967.0, "valid_targets_min": 3180 }, { "epoch": 0.8251748251748252, "grad_norm": 0.6610159058798779, "learning_rate": 3.9964123201705584e-05, "loss": 0.3524, "loss_nan_ranks": 0, "loss_rank_avg": 0.18202170729637146, "step": 295, "valid_targets_mean": 4038.8, "valid_targets_min": 3244 }, { "epoch": 0.8391608391608392, "grad_norm": 0.6972532901113331, "learning_rate": 3.995529796653679e-05, "loss": 0.3553, "loss_nan_ranks": 0, "loss_rank_avg": 0.1697453409433365, "step": 300, "valid_targets_mean": 3957.4, "valid_targets_min": 3113 }, { "epoch": 0.8531468531468531, "grad_norm": 0.7127394676926205, "learning_rate": 3.9945504446550074e-05, "loss": 0.3641, "loss_nan_ranks": 0, "loss_rank_avg": 0.1838894933462143, "step": 305, "valid_targets_mean": 4152.6, "valid_targets_min": 3360 }, { "epoch": 0.8671328671328671, "grad_norm": 0.6808745132127945, "learning_rate": 3.99347431169534e-05, "loss": 0.3461, "loss_nan_ranks": 0, "loss_rank_avg": 0.18923607468605042, "step": 310, "valid_targets_mean": 4593.0, "valid_targets_min": 3545 }, { "epoch": 0.8811188811188811, "grad_norm": 0.687007172307545, "learning_rate": 3.992301449991548e-05, "loss": 0.3673, "loss_nan_ranks": 0, "loss_rank_avg": 0.16615572571754456, "step": 315, "valid_targets_mean": 3800.6, "valid_targets_min": 3001 }, { "epoch": 0.8951048951048951, "grad_norm": 0.6816977872307991, "learning_rate": 3.991031916454041e-05, "loss": 0.3366, "loss_nan_ranks": 0, "loss_rank_avg": 0.16688376665115356, "step": 320, "valid_targets_mean": 3923.0, "valid_targets_min": 2836 }, { "epoch": 0.9090909090909091, "grad_norm": 0.729891603562499, "learning_rate": 3.989665772684006e-05, "loss": 0.3495, "loss_nan_ranks": 0, "loss_rank_avg": 0.19064228236675262, "step": 325, "valid_targets_mean": 4230.6, "valid_targets_min": 3136 }, { "epoch": 0.9230769230769231, "grad_norm": 0.6676103880290066, "learning_rate": 3.988203084970418e-05, "loss": 0.3434, "loss_nan_ranks": 0, "loss_rank_avg": 0.18094299733638763, "step": 330, "valid_targets_mean": 3959.9, "valid_targets_min": 2931 }, { "epoch": 0.9370629370629371, "grad_norm": 0.7376874440458068, "learning_rate": 3.9866439242868275e-05, "loss": 0.3442, "loss_nan_ranks": 0, "loss_rank_avg": 0.17640212178230286, "step": 335, "valid_targets_mean": 4032.8, "valid_targets_min": 3145 }, { "epoch": 0.951048951048951, "grad_norm": 0.66910614530988, "learning_rate": 3.98498836628791e-05, "loss": 0.3365, "loss_nan_ranks": 0, "loss_rank_avg": 0.1491106152534485, "step": 340, "valid_targets_mean": 4081.6, "valid_targets_min": 3248 }, { "epoch": 0.965034965034965, "grad_norm": 0.7616870298472965, "learning_rate": 3.983236491305801e-05, "loss": 0.3389, "loss_nan_ranks": 0, "loss_rank_avg": 0.17968814074993134, "step": 345, "valid_targets_mean": 4023.8, "valid_targets_min": 2892 }, { "epoch": 0.9790209790209791, "grad_norm": 0.632714729243677, "learning_rate": 3.981388384346193e-05, "loss": 0.3478, "loss_nan_ranks": 0, "loss_rank_avg": 0.1566416621208191, "step": 350, "valid_targets_mean": 3693.1, "valid_targets_min": 2343 }, { "epoch": 0.993006993006993, "grad_norm": 0.6609906108291348, "learning_rate": 3.979444135084215e-05, "loss": 0.3346, "loss_nan_ranks": 0, "loss_rank_avg": 0.15235170722007751, "step": 355, "valid_targets_mean": 3772.6, "valid_targets_min": 2905 }, { "epoch": 1.0055944055944055, "grad_norm": 0.6785452885452187, "learning_rate": 3.9774038378600796e-05, "loss": 0.3392, "loss_nan_ranks": 0, "loss_rank_avg": 0.16667693853378296, "step": 360, "valid_targets_mean": 3899.9, "valid_targets_min": 3402 }, { "epoch": 1.0195804195804197, "grad_norm": 0.7216286836712269, "learning_rate": 3.975267591674504e-05, "loss": 0.3389, "loss_nan_ranks": 0, "loss_rank_avg": 0.16874702274799347, "step": 365, "valid_targets_mean": 3837.2, "valid_targets_min": 2694 }, { "epoch": 1.0335664335664336, "grad_norm": 0.7538286002527794, "learning_rate": 3.973035500183909e-05, "loss": 0.3425, "loss_nan_ranks": 0, "loss_rank_avg": 0.1855105757713318, "step": 370, "valid_targets_mean": 4314.4, "valid_targets_min": 3164 }, { "epoch": 1.0475524475524476, "grad_norm": 0.6855638013823442, "learning_rate": 3.9707076716953866e-05, "loss": 0.3303, "loss_nan_ranks": 0, "loss_rank_avg": 0.17090822756290436, "step": 375, "valid_targets_mean": 3918.8, "valid_targets_min": 3060 }, { "epoch": 1.0615384615384615, "grad_norm": 0.6406278133349705, "learning_rate": 3.9682842191614466e-05, "loss": 0.3413, "loss_nan_ranks": 0, "loss_rank_avg": 0.18505993485450745, "step": 380, "valid_targets_mean": 4170.4, "valid_targets_min": 2313 }, { "epoch": 1.0755244755244755, "grad_norm": 0.624336684500911, "learning_rate": 3.965765260174534e-05, "loss": 0.3344, "loss_nan_ranks": 0, "loss_rank_avg": 0.14859825372695923, "step": 385, "valid_targets_mean": 3817.0, "valid_targets_min": 2413 }, { "epoch": 1.0895104895104895, "grad_norm": 0.6815493498131907, "learning_rate": 3.9631509169613265e-05, "loss": 0.3403, "loss_nan_ranks": 0, "loss_rank_avg": 0.1760796457529068, "step": 390, "valid_targets_mean": 4037.1, "valid_targets_min": 3431 }, { "epoch": 1.1034965034965034, "grad_norm": 0.6427970727384303, "learning_rate": 3.9604413163767985e-05, "loss": 0.3338, "loss_nan_ranks": 0, "loss_rank_avg": 0.14617708325386047, "step": 395, "valid_targets_mean": 3709.0, "valid_targets_min": 2929 }, { "epoch": 1.1174825174825176, "grad_norm": 0.6901088697837418, "learning_rate": 3.957636589898072e-05, "loss": 0.3249, "loss_nan_ranks": 0, "loss_rank_avg": 0.1801576465368271, "step": 400, "valid_targets_mean": 4472.5, "valid_targets_min": 3444 }, { "epoch": 1.1314685314685315, "grad_norm": 0.7016517054603091, "learning_rate": 3.95473687361803e-05, "loss": 0.327, "loss_nan_ranks": 0, "loss_rank_avg": 0.16067080199718475, "step": 405, "valid_targets_mean": 3830.8, "valid_targets_min": 3285 }, { "epoch": 1.1454545454545455, "grad_norm": 0.663970593232612, "learning_rate": 3.951742308238719e-05, "loss": 0.3364, "loss_nan_ranks": 0, "loss_rank_avg": 0.1717248409986496, "step": 410, "valid_targets_mean": 4072.2, "valid_targets_min": 3187 }, { "epoch": 1.1594405594405595, "grad_norm": 0.6484212894219356, "learning_rate": 3.948653039064519e-05, "loss": 0.3255, "loss_nan_ranks": 0, "loss_rank_avg": 0.1626005917787552, "step": 415, "valid_targets_mean": 3676.9, "valid_targets_min": 3259 }, { "epoch": 1.1734265734265734, "grad_norm": 0.6922369837367284, "learning_rate": 3.9454692159950935e-05, "loss": 0.3275, "loss_nan_ranks": 0, "loss_rank_avg": 0.16085588932037354, "step": 420, "valid_targets_mean": 4151.0, "valid_targets_min": 3364 }, { "epoch": 1.1874125874125874, "grad_norm": 0.6510823882497541, "learning_rate": 3.9421909935181146e-05, "loss": 0.3274, "loss_nan_ranks": 0, "loss_rank_avg": 0.19022603332996368, "step": 425, "valid_targets_mean": 4349.9, "valid_targets_min": 3851 }, { "epoch": 1.2013986013986013, "grad_norm": 0.6443695433649274, "learning_rate": 3.938818530701768e-05, "loss": 0.3322, "loss_nan_ranks": 0, "loss_rank_avg": 0.17442061007022858, "step": 430, "valid_targets_mean": 4019.0, "valid_targets_min": 3361 }, { "epoch": 1.2153846153846155, "grad_norm": 0.6810759757069667, "learning_rate": 3.935351991187035e-05, "loss": 0.3253, "loss_nan_ranks": 0, "loss_rank_avg": 0.16698354482650757, "step": 435, "valid_targets_mean": 4215.0, "valid_targets_min": 3192 }, { "epoch": 1.2293706293706295, "grad_norm": 0.6464880936746293, "learning_rate": 3.9317915431797535e-05, "loss": 0.3276, "loss_nan_ranks": 0, "loss_rank_avg": 0.1752656102180481, "step": 440, "valid_targets_mean": 4225.0, "valid_targets_min": 2949 }, { "epoch": 1.2433566433566434, "grad_norm": 0.7740804090329195, "learning_rate": 3.928137359442452e-05, "loss": 0.3276, "loss_nan_ranks": 0, "loss_rank_avg": 0.1801951825618744, "step": 445, "valid_targets_mean": 4177.0, "valid_targets_min": 2728 }, { "epoch": 1.2573426573426574, "grad_norm": 0.6433269129380625, "learning_rate": 3.924389617285969e-05, "loss": 0.3328, "loss_nan_ranks": 0, "loss_rank_avg": 0.14021332561969757, "step": 450, "valid_targets_mean": 3671.8, "valid_targets_min": 2837 }, { "epoch": 1.2713286713286713, "grad_norm": 0.6724022246014172, "learning_rate": 3.920548498560852e-05, "loss": 0.3278, "loss_nan_ranks": 0, "loss_rank_avg": 0.1560121476650238, "step": 455, "valid_targets_mean": 3616.0, "valid_targets_min": 2346 }, { "epoch": 1.2853146853146853, "grad_norm": 0.7049576278984316, "learning_rate": 3.9166141896485295e-05, "loss": 0.3256, "loss_nan_ranks": 0, "loss_rank_avg": 0.16156238317489624, "step": 460, "valid_targets_mean": 4018.1, "valid_targets_min": 3278 }, { "epoch": 1.2993006993006992, "grad_norm": 0.6610441492885364, "learning_rate": 3.912586881452268e-05, "loss": 0.319, "loss_nan_ranks": 0, "loss_rank_avg": 0.16865435242652893, "step": 465, "valid_targets_mean": 4217.9, "valid_targets_min": 3455 }, { "epoch": 1.3132867132867134, "grad_norm": 0.6857457459046699, "learning_rate": 3.9084667693879116e-05, "loss": 0.3269, "loss_nan_ranks": 0, "loss_rank_avg": 0.1624636948108673, "step": 470, "valid_targets_mean": 4248.5, "valid_targets_min": 3029 }, { "epoch": 1.3272727272727272, "grad_norm": 0.6748828329687094, "learning_rate": 3.904254053374398e-05, "loss": 0.3263, "loss_nan_ranks": 0, "loss_rank_avg": 0.1654050052165985, "step": 475, "valid_targets_mean": 4087.2, "valid_targets_min": 3125 }, { "epoch": 1.3412587412587413, "grad_norm": 0.6130205692601665, "learning_rate": 3.899948937824058e-05, "loss": 0.3255, "loss_nan_ranks": 0, "loss_rank_avg": 0.1577146351337433, "step": 480, "valid_targets_mean": 4006.1, "valid_targets_min": 3390 }, { "epoch": 1.3552447552447553, "grad_norm": 0.6849941983340869, "learning_rate": 3.895551631632694e-05, "loss": 0.3298, "loss_nan_ranks": 0, "loss_rank_avg": 0.17559069395065308, "step": 485, "valid_targets_mean": 4253.0, "valid_targets_min": 3799 }, { "epoch": 1.3692307692307693, "grad_norm": 0.6672737622478275, "learning_rate": 3.8910623481694514e-05, "loss": 0.3334, "loss_nan_ranks": 0, "loss_rank_avg": 0.15144488215446472, "step": 490, "valid_targets_mean": 3746.4, "valid_targets_min": 2415 }, { "epoch": 1.3832167832167832, "grad_norm": 0.6386020019411028, "learning_rate": 3.886481305266456e-05, "loss": 0.3249, "loss_nan_ranks": 0, "loss_rank_avg": 0.16336670517921448, "step": 495, "valid_targets_mean": 4049.5, "valid_targets_min": 3218 }, { "epoch": 1.3972027972027972, "grad_norm": 0.6442787344362436, "learning_rate": 3.881808725208253e-05, "loss": 0.33, "loss_nan_ranks": 0, "loss_rank_avg": 0.17428100109100342, "step": 500, "valid_targets_mean": 4064.1, "valid_targets_min": 3023 }, { "epoch": 1.4111888111888111, "grad_norm": 0.6125443310145898, "learning_rate": 3.8770448347210144e-05, "loss": 0.3352, "loss_nan_ranks": 0, "loss_rank_avg": 0.1777600347995758, "step": 505, "valid_targets_mean": 4344.0, "valid_targets_min": 3361 }, { "epoch": 1.425174825174825, "grad_norm": 0.6329017642332787, "learning_rate": 3.87218986496154e-05, "loss": 0.3196, "loss_nan_ranks": 0, "loss_rank_avg": 0.18501895666122437, "step": 510, "valid_targets_mean": 4310.1, "valid_targets_min": 3364 }, { "epoch": 1.4391608391608393, "grad_norm": 0.666727217355154, "learning_rate": 3.867244051506042e-05, "loss": 0.3244, "loss_nan_ranks": 0, "loss_rank_avg": 0.1838594377040863, "step": 515, "valid_targets_mean": 4075.8, "valid_targets_min": 2931 }, { "epoch": 1.4531468531468532, "grad_norm": 0.6441646180577535, "learning_rate": 3.862207634338715e-05, "loss": 0.3285, "loss_nan_ranks": 0, "loss_rank_avg": 0.15423941612243652, "step": 520, "valid_targets_mean": 3791.9, "valid_targets_min": 3202 }, { "epoch": 1.4671328671328672, "grad_norm": 0.6668894524119863, "learning_rate": 3.857080857840087e-05, "loss": 0.3234, "loss_nan_ranks": 0, "loss_rank_avg": 0.1552126258611679, "step": 525, "valid_targets_mean": 3679.8, "valid_targets_min": 2938 }, { "epoch": 1.4811188811188811, "grad_norm": 0.6590831356215007, "learning_rate": 3.851863970775166e-05, "loss": 0.3267, "loss_nan_ranks": 0, "loss_rank_avg": 0.16087287664413452, "step": 530, "valid_targets_mean": 3947.5, "valid_targets_min": 2770 }, { "epoch": 1.495104895104895, "grad_norm": 0.6287181845606609, "learning_rate": 3.846557226281367e-05, "loss": 0.3293, "loss_nan_ranks": 0, "loss_rank_avg": 0.15328553318977356, "step": 535, "valid_targets_mean": 3824.4, "valid_targets_min": 2878 }, { "epoch": 1.509090909090909, "grad_norm": 0.633987975154039, "learning_rate": 3.84116088185623e-05, "loss": 0.3271, "loss_nan_ranks": 0, "loss_rank_avg": 0.17237834632396698, "step": 540, "valid_targets_mean": 4145.6, "valid_targets_min": 3694 }, { "epoch": 1.523076923076923, "grad_norm": 0.616935636790922, "learning_rate": 3.835675199344923e-05, "loss": 0.3202, "loss_nan_ranks": 0, "loss_rank_avg": 0.17144149541854858, "step": 545, "valid_targets_mean": 4156.9, "valid_targets_min": 2985 }, { "epoch": 1.5370629370629372, "grad_norm": 0.6334911296670876, "learning_rate": 3.830100444927542e-05, "loss": 0.3184, "loss_nan_ranks": 0, "loss_rank_avg": 0.15144981443881989, "step": 550, "valid_targets_mean": 3769.9, "valid_targets_min": 3100 }, { "epoch": 1.551048951048951, "grad_norm": 0.588231502791246, "learning_rate": 3.8244368891061884e-05, "loss": 0.3118, "loss_nan_ranks": 0, "loss_rank_avg": 0.16937857866287231, "step": 555, "valid_targets_mean": 4288.6, "valid_targets_min": 3352 }, { "epoch": 1.565034965034965, "grad_norm": 0.5853188773467621, "learning_rate": 3.81868480669185e-05, "loss": 0.3229, "loss_nan_ranks": 0, "loss_rank_avg": 0.14558354020118713, "step": 560, "valid_targets_mean": 4037.2, "valid_targets_min": 3164 }, { "epoch": 1.579020979020979, "grad_norm": 0.6125020767553089, "learning_rate": 3.812844476791061e-05, "loss": 0.3255, "loss_nan_ranks": 0, "loss_rank_avg": 0.17109696567058563, "step": 565, "valid_targets_mean": 4062.5, "valid_targets_min": 3222 }, { "epoch": 1.593006993006993, "grad_norm": 0.6151869304117249, "learning_rate": 3.8069161827923624e-05, "loss": 0.3258, "loss_nan_ranks": 0, "loss_rank_avg": 0.17402294278144836, "step": 570, "valid_targets_mean": 4408.9, "valid_targets_min": 3318 }, { "epoch": 1.606993006993007, "grad_norm": 0.6266101504112581, "learning_rate": 3.80090021235255e-05, "loss": 0.3301, "loss_nan_ranks": 0, "loss_rank_avg": 0.17710405588150024, "step": 575, "valid_targets_mean": 3975.9, "valid_targets_min": 2527 }, { "epoch": 1.620979020979021, "grad_norm": 0.6836774754436181, "learning_rate": 3.794796857382717e-05, "loss": 0.3256, "loss_nan_ranks": 0, "loss_rank_avg": 0.17877398431301117, "step": 580, "valid_targets_mean": 4186.1, "valid_targets_min": 3490 }, { "epoch": 1.634965034965035, "grad_norm": 0.6161301970228785, "learning_rate": 3.7886064140340896e-05, "loss": 0.3277, "loss_nan_ranks": 0, "loss_rank_avg": 0.16287347674369812, "step": 585, "valid_targets_mean": 3867.0, "valid_targets_min": 2643 }, { "epoch": 1.6489510489510488, "grad_norm": 0.6460775180383357, "learning_rate": 3.782329182683657e-05, "loss": 0.3154, "loss_nan_ranks": 0, "loss_rank_avg": 0.15202784538269043, "step": 590, "valid_targets_mean": 3923.5, "valid_targets_min": 3167 }, { "epoch": 1.662937062937063, "grad_norm": 0.5995808420248436, "learning_rate": 3.775965467919594e-05, "loss": 0.3219, "loss_nan_ranks": 0, "loss_rank_avg": 0.18219563364982605, "step": 595, "valid_targets_mean": 3959.5, "valid_targets_min": 3053 }, { "epoch": 1.676923076923077, "grad_norm": 0.6386284637394193, "learning_rate": 3.769515578526486e-05, "loss": 0.3237, "loss_nan_ranks": 0, "loss_rank_avg": 0.15771865844726562, "step": 600, "valid_targets_mean": 3612.4, "valid_targets_min": 992 }, { "epoch": 1.690909090909091, "grad_norm": 0.6279213610200182, "learning_rate": 3.762979827470343e-05, "loss": 0.3209, "loss_nan_ranks": 0, "loss_rank_avg": 0.14989188313484192, "step": 605, "valid_targets_mean": 3997.4, "valid_targets_min": 2683 }, { "epoch": 1.7048951048951049, "grad_norm": 0.6277447589292829, "learning_rate": 3.756358531883413e-05, "loss": 0.3158, "loss_nan_ranks": 0, "loss_rank_avg": 0.1424318104982376, "step": 610, "valid_targets_mean": 3939.2, "valid_targets_min": 3266 }, { "epoch": 1.7188811188811188, "grad_norm": 0.6035263117336757, "learning_rate": 3.749652013048797e-05, "loss": 0.3248, "loss_nan_ranks": 0, "loss_rank_avg": 0.16539938747882843, "step": 615, "valid_targets_mean": 4345.2, "valid_targets_min": 3593 }, { "epoch": 1.732867132867133, "grad_norm": 0.622614038645928, "learning_rate": 3.742860596384856e-05, "loss": 0.3259, "loss_nan_ranks": 0, "loss_rank_avg": 0.15031281113624573, "step": 620, "valid_targets_mean": 4044.1, "valid_targets_min": 3318 }, { "epoch": 1.7468531468531467, "grad_norm": 0.639965638141473, "learning_rate": 3.735984611429423e-05, "loss": 0.3144, "loss_nan_ranks": 0, "loss_rank_avg": 0.13543468713760376, "step": 625, "valid_targets_mean": 3618.5, "valid_targets_min": 3066 }, { "epoch": 1.760839160839161, "grad_norm": 0.6730183016192768, "learning_rate": 3.7290243918238117e-05, "loss": 0.3227, "loss_nan_ranks": 0, "loss_rank_avg": 0.15004126727581024, "step": 630, "valid_targets_mean": 3738.6, "valid_targets_min": 2904 }, { "epoch": 1.7748251748251749, "grad_norm": 0.6099293222360501, "learning_rate": 3.72198027529663e-05, "loss": 0.3298, "loss_nan_ranks": 0, "loss_rank_avg": 0.1613060086965561, "step": 635, "valid_targets_mean": 4154.8, "valid_targets_min": 3562 }, { "epoch": 1.7888111888111888, "grad_norm": 0.6824934179672323, "learning_rate": 3.714852603647387e-05, "loss": 0.3252, "loss_nan_ranks": 0, "loss_rank_avg": 0.16229523718357086, "step": 640, "valid_targets_mean": 4170.4, "valid_targets_min": 2457 }, { "epoch": 1.8027972027972028, "grad_norm": 0.5866660922114889, "learning_rate": 3.707641722729915e-05, "loss": 0.3218, "loss_nan_ranks": 0, "loss_rank_avg": 0.16064469516277313, "step": 645, "valid_targets_mean": 4250.9, "valid_targets_min": 2766 }, { "epoch": 1.8167832167832167, "grad_norm": 0.6021573321793868, "learning_rate": 3.700347982435583e-05, "loss": 0.3166, "loss_nan_ranks": 0, "loss_rank_avg": 0.16888031363487244, "step": 650, "valid_targets_mean": 3838.9, "valid_targets_min": 2576 }, { "epoch": 1.830769230769231, "grad_norm": 0.6215416842905932, "learning_rate": 3.6929717366763186e-05, "loss": 0.3233, "loss_nan_ranks": 0, "loss_rank_avg": 0.16497664153575897, "step": 655, "valid_targets_mean": 3762.6, "valid_targets_min": 2730 }, { "epoch": 1.8447552447552447, "grad_norm": 0.6395491001752005, "learning_rate": 3.685513343367438e-05, "loss": 0.3106, "loss_nan_ranks": 0, "loss_rank_avg": 0.16028185188770294, "step": 660, "valid_targets_mean": 4035.4, "valid_targets_min": 3159 }, { "epoch": 1.8587412587412588, "grad_norm": 0.6160971784063584, "learning_rate": 3.677973164410278e-05, "loss": 0.3192, "loss_nan_ranks": 0, "loss_rank_avg": 0.16289816796779633, "step": 665, "valid_targets_mean": 3981.8, "valid_targets_min": 2859 }, { "epoch": 1.8727272727272726, "grad_norm": 0.5826891645612171, "learning_rate": 3.6703515656746365e-05, "loss": 0.3164, "loss_nan_ranks": 0, "loss_rank_avg": 0.15475299954414368, "step": 670, "valid_targets_mean": 4015.0, "valid_targets_min": 3141 }, { "epoch": 1.8867132867132868, "grad_norm": 0.5808959508873444, "learning_rate": 3.662648916981015e-05, "loss": 0.3156, "loss_nan_ranks": 0, "loss_rank_avg": 0.1405600607395172, "step": 675, "valid_targets_mean": 3794.0, "valid_targets_min": 2621 }, { "epoch": 1.9006993006993007, "grad_norm": 0.6044157534723815, "learning_rate": 3.654865592082681e-05, "loss": 0.3198, "loss_nan_ranks": 0, "loss_rank_avg": 0.15282100439071655, "step": 680, "valid_targets_mean": 4143.5, "valid_targets_min": 2980 }, { "epoch": 1.9146853146853147, "grad_norm": 0.6392815281243518, "learning_rate": 3.647001968647527e-05, "loss": 0.3144, "loss_nan_ranks": 0, "loss_rank_avg": 0.14917393028736115, "step": 685, "valid_targets_mean": 3979.6, "valid_targets_min": 2717 }, { "epoch": 1.9286713286713286, "grad_norm": 0.6286155160321347, "learning_rate": 3.6390584282397464e-05, "loss": 0.3216, "loss_nan_ranks": 0, "loss_rank_avg": 0.17181769013404846, "step": 690, "valid_targets_mean": 4348.1, "valid_targets_min": 3396 }, { "epoch": 1.9426573426573426, "grad_norm": 0.6298464119641867, "learning_rate": 3.631035356301321e-05, "loss": 0.3267, "loss_nan_ranks": 0, "loss_rank_avg": 0.153742253780365, "step": 695, "valid_targets_mean": 3986.4, "valid_targets_min": 2569 }, { "epoch": 1.9566433566433568, "grad_norm": 0.6339318759105212, "learning_rate": 3.6229331421333155e-05, "loss": 0.317, "loss_nan_ranks": 0, "loss_rank_avg": 0.14185550808906555, "step": 700, "valid_targets_mean": 3847.1, "valid_targets_min": 2589 }, { "epoch": 1.9706293706293705, "grad_norm": 0.6343178715607247, "learning_rate": 3.6147521788769884e-05, "loss": 0.3146, "loss_nan_ranks": 0, "loss_rank_avg": 0.13274836540222168, "step": 705, "valid_targets_mean": 3321.9, "valid_targets_min": 2781 }, { "epoch": 1.9846153846153847, "grad_norm": 0.6110252636645802, "learning_rate": 3.606492863494718e-05, "loss": 0.3136, "loss_nan_ranks": 0, "loss_rank_avg": 0.15173494815826416, "step": 710, "valid_targets_mean": 3732.9, "valid_targets_min": 3067 }, { "epoch": 1.9986013986013986, "grad_norm": 0.5812066041833913, "learning_rate": 3.598155596750736e-05, "loss": 0.3158, "loss_nan_ranks": 0, "loss_rank_avg": 0.16258171200752258, "step": 715, "valid_targets_mean": 3896.4, "valid_targets_min": 2959 }, { "epoch": 2.011188811188811, "grad_norm": 0.5743614871834676, "learning_rate": 3.589740783191688e-05, "loss": 0.3132, "loss_nan_ranks": 0, "loss_rank_avg": 0.14478623867034912, "step": 720, "valid_targets_mean": 3748.2, "valid_targets_min": 2964 }, { "epoch": 2.025174825174825, "grad_norm": 0.595189094441731, "learning_rate": 3.581248831126996e-05, "loss": 0.3026, "loss_nan_ranks": 0, "loss_rank_avg": 0.16492405533790588, "step": 725, "valid_targets_mean": 4154.8, "valid_targets_min": 3265 }, { "epoch": 2.0391608391608393, "grad_norm": 0.6172798210211731, "learning_rate": 3.572680152609053e-05, "loss": 0.3088, "loss_nan_ranks": 0, "loss_rank_avg": 0.1609179973602295, "step": 730, "valid_targets_mean": 4198.1, "valid_targets_min": 3168 }, { "epoch": 2.053146853146853, "grad_norm": 0.5691716424074825, "learning_rate": 3.564035163413225e-05, "loss": 0.3129, "loss_nan_ranks": 0, "loss_rank_avg": 0.16421687602996826, "step": 735, "valid_targets_mean": 4140.0, "valid_targets_min": 3504 }, { "epoch": 2.0671328671328673, "grad_norm": 0.5937600015154407, "learning_rate": 3.555314283017677e-05, "loss": 0.3041, "loss_nan_ranks": 0, "loss_rank_avg": 0.15526092052459717, "step": 740, "valid_targets_mean": 4100.5, "valid_targets_min": 3536 }, { "epoch": 2.081118881118881, "grad_norm": 0.6223941210676391, "learning_rate": 3.546517934583021e-05, "loss": 0.3097, "loss_nan_ranks": 0, "loss_rank_avg": 0.16442987322807312, "step": 745, "valid_targets_mean": 4481.6, "valid_targets_min": 3849 }, { "epoch": 2.095104895104895, "grad_norm": 0.6087190275417665, "learning_rate": 3.5376465449317816e-05, "loss": 0.3057, "loss_nan_ranks": 0, "loss_rank_avg": 0.13107624650001526, "step": 750, "valid_targets_mean": 3853.0, "valid_targets_min": 3042 }, { "epoch": 2.109090909090909, "grad_norm": 0.588449529918364, "learning_rate": 3.5287005445276835e-05, "loss": 0.2998, "loss_nan_ranks": 0, "loss_rank_avg": 0.13993586599826813, "step": 755, "valid_targets_mean": 3786.8, "valid_targets_min": 3101 }, { "epoch": 2.123076923076923, "grad_norm": 0.589351215775367, "learning_rate": 3.5196803674547674e-05, "loss": 0.2969, "loss_nan_ranks": 0, "loss_rank_avg": 0.14328411221504211, "step": 760, "valid_targets_mean": 4038.6, "valid_targets_min": 2740 }, { "epoch": 2.1370629370629373, "grad_norm": 0.8156144735369695, "learning_rate": 3.510586451396326e-05, "loss": 0.3083, "loss_nan_ranks": 0, "loss_rank_avg": 0.1407259702682495, "step": 765, "valid_targets_mean": 3942.2, "valid_targets_min": 3268 }, { "epoch": 2.151048951048951, "grad_norm": 0.6002072237961282, "learning_rate": 3.5014192376136655e-05, "loss": 0.3125, "loss_nan_ranks": 0, "loss_rank_avg": 0.14943557977676392, "step": 770, "valid_targets_mean": 4074.9, "valid_targets_min": 3462 }, { "epoch": 2.165034965034965, "grad_norm": 0.635268094736542, "learning_rate": 3.492179170924696e-05, "loss": 0.3028, "loss_nan_ranks": 0, "loss_rank_avg": 0.16214363276958466, "step": 775, "valid_targets_mean": 4062.0, "valid_targets_min": 3218 }, { "epoch": 2.179020979020979, "grad_norm": 0.5881271243993743, "learning_rate": 3.482866699682347e-05, "loss": 0.3072, "loss_nan_ranks": 0, "loss_rank_avg": 0.15315216779708862, "step": 780, "valid_targets_mean": 3918.9, "valid_targets_min": 2894 }, { "epoch": 2.193006993006993, "grad_norm": 0.6422135086829801, "learning_rate": 3.47348227575281e-05, "loss": 0.3084, "loss_nan_ranks": 0, "loss_rank_avg": 0.1634099781513214, "step": 785, "valid_targets_mean": 4611.0, "valid_targets_min": 3717 }, { "epoch": 2.206993006993007, "grad_norm": 0.5906081895645069, "learning_rate": 3.464026354493617e-05, "loss": 0.3042, "loss_nan_ranks": 0, "loss_rank_avg": 0.14315278828144073, "step": 790, "valid_targets_mean": 3942.5, "valid_targets_min": 3405 }, { "epoch": 2.220979020979021, "grad_norm": 0.7105849807417328, "learning_rate": 3.454499394731543e-05, "loss": 0.3008, "loss_nan_ranks": 0, "loss_rank_avg": 0.13363346457481384, "step": 795, "valid_targets_mean": 3611.0, "valid_targets_min": 2723 }, { "epoch": 2.234965034965035, "grad_norm": 0.6098423245269216, "learning_rate": 3.4449018587403414e-05, "loss": 0.3169, "loss_nan_ranks": 0, "loss_rank_avg": 0.14147688448429108, "step": 800, "valid_targets_mean": 3944.2, "valid_targets_min": 2635 }, { "epoch": 2.248951048951049, "grad_norm": 0.5863096258549487, "learning_rate": 3.435234212218313e-05, "loss": 0.301, "loss_nan_ranks": 0, "loss_rank_avg": 0.13860483467578888, "step": 805, "valid_targets_mean": 3520.5, "valid_targets_min": 2908 }, { "epoch": 2.262937062937063, "grad_norm": 0.598037261266258, "learning_rate": 3.425496924265714e-05, "loss": 0.3069, "loss_nan_ranks": 0, "loss_rank_avg": 0.15808618068695068, "step": 810, "valid_targets_mean": 4074.6, "valid_targets_min": 3061 }, { "epoch": 2.276923076923077, "grad_norm": 0.6073448842298433, "learning_rate": 3.415690467361989e-05, "loss": 0.3061, "loss_nan_ranks": 0, "loss_rank_avg": 0.16971629858016968, "step": 815, "valid_targets_mean": 4395.5, "valid_targets_min": 3602 }, { "epoch": 2.290909090909091, "grad_norm": 0.6548317072525282, "learning_rate": 3.405815317342844e-05, "loss": 0.3064, "loss_nan_ranks": 0, "loss_rank_avg": 0.164471834897995, "step": 820, "valid_targets_mean": 4100.1, "valid_targets_min": 3447 }, { "epoch": 2.3048951048951047, "grad_norm": 0.618089201274623, "learning_rate": 3.395871953377164e-05, "loss": 0.2918, "loss_nan_ranks": 0, "loss_rank_avg": 0.1507072150707245, "step": 825, "valid_targets_mean": 4090.2, "valid_targets_min": 2831 }, { "epoch": 2.318881118881119, "grad_norm": 0.6031388487536505, "learning_rate": 3.3858608579437556e-05, "loss": 0.3025, "loss_nan_ranks": 0, "loss_rank_avg": 0.13351577520370483, "step": 830, "valid_targets_mean": 3678.9, "valid_targets_min": 2680 }, { "epoch": 2.3328671328671327, "grad_norm": 0.6045729601671705, "learning_rate": 3.3757825168079396e-05, "loss": 0.3034, "loss_nan_ranks": 0, "loss_rank_avg": 0.15121564269065857, "step": 835, "valid_targets_mean": 3937.5, "valid_targets_min": 2920 }, { "epoch": 2.346853146853147, "grad_norm": 0.5731552691739368, "learning_rate": 3.365637418997981e-05, "loss": 0.3054, "loss_nan_ranks": 0, "loss_rank_avg": 0.1540403664112091, "step": 840, "valid_targets_mean": 4148.5, "valid_targets_min": 3554 }, { "epoch": 2.360839160839161, "grad_norm": 0.6292676398227383, "learning_rate": 3.3554260567813546e-05, "loss": 0.2999, "loss_nan_ranks": 0, "loss_rank_avg": 0.1413697451353073, "step": 845, "valid_targets_mean": 3904.2, "valid_targets_min": 3167 }, { "epoch": 2.3748251748251747, "grad_norm": 0.5972678726345569, "learning_rate": 3.3451489256408664e-05, "loss": 0.3011, "loss_nan_ranks": 0, "loss_rank_avg": 0.13888861238956451, "step": 850, "valid_targets_mean": 3959.9, "valid_targets_min": 2950 }, { "epoch": 2.388811188811189, "grad_norm": 0.6192844037159877, "learning_rate": 3.3348065242506066e-05, "loss": 0.3045, "loss_nan_ranks": 0, "loss_rank_avg": 0.15562085807323456, "step": 855, "valid_targets_mean": 3538.9, "valid_targets_min": 1327 }, { "epoch": 2.4027972027972027, "grad_norm": 0.595293999532859, "learning_rate": 3.3243993544517525e-05, "loss": 0.3014, "loss_nan_ranks": 0, "loss_rank_avg": 0.1708168387413025, "step": 860, "valid_targets_mean": 4071.6, "valid_targets_min": 3415 }, { "epoch": 2.416783216783217, "grad_norm": 0.5779936717582573, "learning_rate": 3.313927921228221e-05, "loss": 0.304, "loss_nan_ranks": 0, "loss_rank_avg": 0.15482652187347412, "step": 865, "valid_targets_mean": 4141.9, "valid_targets_min": 3504 }, { "epoch": 2.430769230769231, "grad_norm": 0.5924911850269354, "learning_rate": 3.303392732682163e-05, "loss": 0.3004, "loss_nan_ranks": 0, "loss_rank_avg": 0.14046156406402588, "step": 870, "valid_targets_mean": 3971.1, "valid_targets_min": 2608 }, { "epoch": 2.4447552447552447, "grad_norm": 0.5649308938190039, "learning_rate": 3.292794300009309e-05, "loss": 0.3092, "loss_nan_ranks": 0, "loss_rank_avg": 0.1662825047969818, "step": 875, "valid_targets_mean": 4169.5, "valid_targets_min": 3295 }, { "epoch": 2.458741258741259, "grad_norm": 0.6038324946866227, "learning_rate": 3.282133137474164e-05, "loss": 0.3091, "loss_nan_ranks": 0, "loss_rank_avg": 0.14935675263404846, "step": 880, "valid_targets_mean": 3792.0, "valid_targets_min": 3233 }, { "epoch": 2.4727272727272727, "grad_norm": 0.5628142894986014, "learning_rate": 3.271409762385057e-05, "loss": 0.2987, "loss_nan_ranks": 0, "loss_rank_avg": 0.1566285341978073, "step": 885, "valid_targets_mean": 4116.4, "valid_targets_min": 3233 }, { "epoch": 2.486713286713287, "grad_norm": 0.5690126323587896, "learning_rate": 3.2606246950690365e-05, "loss": 0.3052, "loss_nan_ranks": 0, "loss_rank_avg": 0.14359772205352783, "step": 890, "valid_targets_mean": 4127.1, "valid_targets_min": 2999 }, { "epoch": 2.5006993006993006, "grad_norm": 0.5786323924138527, "learning_rate": 3.2497784588466235e-05, "loss": 0.3095, "loss_nan_ranks": 0, "loss_rank_avg": 0.13480177521705627, "step": 895, "valid_targets_mean": 3744.5, "valid_targets_min": 3057 }, { "epoch": 2.5146853146853148, "grad_norm": 0.6135926415957672, "learning_rate": 3.23887158000642e-05, "loss": 0.3064, "loss_nan_ranks": 0, "loss_rank_avg": 0.1602180004119873, "step": 900, "valid_targets_mean": 4174.6, "valid_targets_min": 3046 }, { "epoch": 2.5286713286713285, "grad_norm": 0.6166674089503721, "learning_rate": 3.2279045877795724e-05, "loss": 0.3007, "loss_nan_ranks": 0, "loss_rank_avg": 0.15190884470939636, "step": 905, "valid_targets_mean": 4146.0, "valid_targets_min": 2904 }, { "epoch": 2.5426573426573427, "grad_norm": 0.591655929505564, "learning_rate": 3.216878014314088e-05, "loss": 0.3016, "loss_nan_ranks": 0, "loss_rank_avg": 0.14331448078155518, "step": 910, "valid_targets_mean": 3779.4, "valid_targets_min": 3294 }, { "epoch": 2.556643356643357, "grad_norm": 0.5722745458923197, "learning_rate": 3.205792394649017e-05, "loss": 0.2981, "loss_nan_ranks": 0, "loss_rank_avg": 0.14977261424064636, "step": 915, "valid_targets_mean": 3924.5, "valid_targets_min": 2738 }, { "epoch": 2.5706293706293706, "grad_norm": 0.5714914092502298, "learning_rate": 3.194648266688492e-05, "loss": 0.3028, "loss_nan_ranks": 0, "loss_rank_avg": 0.15439298748970032, "step": 920, "valid_targets_mean": 4133.9, "valid_targets_min": 3535 }, { "epoch": 2.5846153846153848, "grad_norm": 0.6157264534450223, "learning_rate": 3.183446171175623e-05, "loss": 0.3003, "loss_nan_ranks": 0, "loss_rank_avg": 0.15904788672924042, "step": 925, "valid_targets_mean": 3680.8, "valid_targets_min": 2910 }, { "epoch": 2.5986013986013985, "grad_norm": 0.6096304017907245, "learning_rate": 3.1721866516662646e-05, "loss": 0.296, "loss_nan_ranks": 0, "loss_rank_avg": 0.1429840326309204, "step": 930, "valid_targets_mean": 3547.1, "valid_targets_min": 2803 }, { "epoch": 2.6125874125874127, "grad_norm": 0.5939141460355332, "learning_rate": 3.160870254502637e-05, "loss": 0.3065, "loss_nan_ranks": 0, "loss_rank_avg": 0.16182993352413177, "step": 935, "valid_targets_mean": 4263.1, "valid_targets_min": 3364 }, { "epoch": 2.626573426573427, "grad_norm": 0.6078547754682735, "learning_rate": 3.1494975287868166e-05, "loss": 0.3006, "loss_nan_ranks": 0, "loss_rank_avg": 0.16442373394966125, "step": 940, "valid_targets_mean": 4124.8, "valid_targets_min": 3573 }, { "epoch": 2.6405594405594406, "grad_norm": 0.5332110011961554, "learning_rate": 3.138069026354095e-05, "loss": 0.298, "loss_nan_ranks": 0, "loss_rank_avg": 0.13804768025875092, "step": 945, "valid_targets_mean": 3926.1, "valid_targets_min": 3110 }, { "epoch": 2.6545454545454543, "grad_norm": 0.5992219248556111, "learning_rate": 3.1265853017461984e-05, "loss": 0.2966, "loss_nan_ranks": 0, "loss_rank_avg": 0.14446228742599487, "step": 950, "valid_targets_mean": 3793.1, "valid_targets_min": 2634 }, { "epoch": 2.6685314685314685, "grad_norm": 0.5906808715236577, "learning_rate": 3.115046912184382e-05, "loss": 0.3065, "loss_nan_ranks": 0, "loss_rank_avg": 0.15945465862751007, "step": 955, "valid_targets_mean": 4146.2, "valid_targets_min": 3248 }, { "epoch": 2.6825174825174827, "grad_norm": 0.5851720383851202, "learning_rate": 3.103454417542394e-05, "loss": 0.3064, "loss_nan_ranks": 0, "loss_rank_avg": 0.13522455096244812, "step": 960, "valid_targets_mean": 3813.8, "valid_targets_min": 2961 }, { "epoch": 2.6965034965034964, "grad_norm": 0.5896699725800286, "learning_rate": 3.091808380319305e-05, "loss": 0.3062, "loss_nan_ranks": 0, "loss_rank_avg": 0.1530534029006958, "step": 965, "valid_targets_mean": 4099.0, "valid_targets_min": 2977 }, { "epoch": 2.7104895104895106, "grad_norm": 0.582215290493422, "learning_rate": 3.0801093656122136e-05, "loss": 0.2989, "loss_nan_ranks": 0, "loss_rank_avg": 0.14571411907672882, "step": 970, "valid_targets_mean": 3914.6, "valid_targets_min": 3250 }, { "epoch": 2.7244755244755243, "grad_norm": 0.558341621419776, "learning_rate": 3.0683579410888345e-05, "loss": 0.3048, "loss_nan_ranks": 0, "loss_rank_avg": 0.14048753678798676, "step": 975, "valid_targets_mean": 3710.1, "valid_targets_min": 2534 }, { "epoch": 2.7384615384615385, "grad_norm": 0.5656858687504162, "learning_rate": 3.056554676959942e-05, "loss": 0.299, "loss_nan_ranks": 0, "loss_rank_avg": 0.12521235644817352, "step": 980, "valid_targets_mean": 3584.2, "valid_targets_min": 2981 }, { "epoch": 2.7524475524475527, "grad_norm": 0.5449736994099068, "learning_rate": 3.0447001459517117e-05, "loss": 0.3031, "loss_nan_ranks": 0, "loss_rank_avg": 0.14620693027973175, "step": 985, "valid_targets_mean": 3956.8, "valid_targets_min": 3004 }, { "epoch": 2.7664335664335664, "grad_norm": 2.4950395364840245, "learning_rate": 3.0327949232779242e-05, "loss": 0.3043, "loss_nan_ranks": 0, "loss_rank_avg": 0.13793346285820007, "step": 990, "valid_targets_mean": 3818.4, "valid_targets_min": 3323 }, { "epoch": 2.78041958041958, "grad_norm": 0.6273958669859198, "learning_rate": 3.020839586612057e-05, "loss": 0.3034, "loss_nan_ranks": 0, "loss_rank_avg": 0.15779882669448853, "step": 995, "valid_targets_mean": 3840.5, "valid_targets_min": 2683 }, { "epoch": 2.7944055944055943, "grad_norm": 0.5873562506440816, "learning_rate": 3.0088347160592534e-05, "loss": 0.2919, "loss_nan_ranks": 0, "loss_rank_avg": 0.1517733931541443, "step": 1000, "valid_targets_mean": 4189.0, "valid_targets_min": 3177 }, { "epoch": 2.8083916083916085, "grad_norm": 0.5952889862867294, "learning_rate": 2.996780894128174e-05, "loss": 0.3059, "loss_nan_ranks": 0, "loss_rank_avg": 0.15184283256530762, "step": 1005, "valid_targets_mean": 3989.8, "valid_targets_min": 2859 }, { "epoch": 2.8223776223776222, "grad_norm": 0.5772743752732764, "learning_rate": 2.9846787057027335e-05, "loss": 0.3046, "loss_nan_ranks": 0, "loss_rank_avg": 0.13652929663658142, "step": 1010, "valid_targets_mean": 3563.9, "valid_targets_min": 2926 }, { "epoch": 2.8363636363636364, "grad_norm": 0.6153984951792437, "learning_rate": 2.972528738013717e-05, "loss": 0.303, "loss_nan_ranks": 0, "loss_rank_avg": 0.14372235536575317, "step": 1015, "valid_targets_mean": 3852.0, "valid_targets_min": 3140 }, { "epoch": 2.85034965034965, "grad_norm": 0.6175706903658995, "learning_rate": 2.960331580610291e-05, "loss": 0.2978, "loss_nan_ranks": 0, "loss_rank_avg": 0.1684339940547943, "step": 1020, "valid_targets_mean": 4381.4, "valid_targets_min": 3633 }, { "epoch": 2.8643356643356643, "grad_norm": 0.6013087024609713, "learning_rate": 2.9480878253313908e-05, "loss": 0.303, "loss_nan_ranks": 0, "loss_rank_avg": 0.13773339986801147, "step": 1025, "valid_targets_mean": 3716.9, "valid_targets_min": 2800 }, { "epoch": 2.8783216783216785, "grad_norm": 0.5690562225384614, "learning_rate": 2.9357980662770082e-05, "loss": 0.3073, "loss_nan_ranks": 0, "loss_rank_avg": 0.15410861372947693, "step": 1030, "valid_targets_mean": 4014.1, "valid_targets_min": 3246 }, { "epoch": 2.8923076923076922, "grad_norm": 0.5836767309730673, "learning_rate": 2.923462899779363e-05, "loss": 0.2955, "loss_nan_ranks": 0, "loss_rank_avg": 0.1419641524553299, "step": 1035, "valid_targets_mean": 3847.4, "valid_targets_min": 660 }, { "epoch": 2.9062937062937064, "grad_norm": 0.5943150762666415, "learning_rate": 2.9110829243739638e-05, "loss": 0.2922, "loss_nan_ranks": 0, "loss_rank_avg": 0.14206726849079132, "step": 1040, "valid_targets_mean": 3915.0, "valid_targets_min": 3165 }, { "epoch": 2.92027972027972, "grad_norm": 0.5901244471485346, "learning_rate": 2.8986587407705698e-05, "loss": 0.3146, "loss_nan_ranks": 0, "loss_rank_avg": 0.15809054672718048, "step": 1045, "valid_targets_mean": 4720.0, "valid_targets_min": 3354 }, { "epoch": 2.9342657342657343, "grad_norm": 0.574348712454776, "learning_rate": 2.8861909518240412e-05, "loss": 0.3047, "loss_nan_ranks": 0, "loss_rank_avg": 0.13096821308135986, "step": 1050, "valid_targets_mean": 3712.1, "valid_targets_min": 2726 }, { "epoch": 2.9482517482517485, "grad_norm": 0.5775146916340026, "learning_rate": 2.873680162505087e-05, "loss": 0.2988, "loss_nan_ranks": 0, "loss_rank_avg": 0.15335050225257874, "step": 1055, "valid_targets_mean": 4055.0, "valid_targets_min": 2802 }, { "epoch": 2.9622377622377623, "grad_norm": 0.5886462559759911, "learning_rate": 2.8611269798709088e-05, "loss": 0.2979, "loss_nan_ranks": 0, "loss_rank_avg": 0.15094557404518127, "step": 1060, "valid_targets_mean": 3997.2, "valid_targets_min": 3389 }, { "epoch": 2.976223776223776, "grad_norm": 0.5792930097643623, "learning_rate": 2.8485320130357467e-05, "loss": 0.3048, "loss_nan_ranks": 0, "loss_rank_avg": 0.15749546885490417, "step": 1065, "valid_targets_mean": 4244.0, "valid_targets_min": 3307 }, { "epoch": 2.99020979020979, "grad_norm": 0.5412531316096433, "learning_rate": 2.8358958731413237e-05, "loss": 0.3053, "loss_nan_ranks": 0, "loss_rank_avg": 0.14990413188934326, "step": 1070, "valid_targets_mean": 4070.8, "valid_targets_min": 2959 }, { "epoch": 3.0027972027972027, "grad_norm": 0.5987773847286868, "learning_rate": 2.8232191733271902e-05, "loss": 0.2893, "loss_nan_ranks": 0, "loss_rank_avg": 0.15005648136138916, "step": 1075, "valid_targets_mean": 4235.8, "valid_targets_min": 3358 }, { "epoch": 3.016783216783217, "grad_norm": 0.6102100675399175, "learning_rate": 2.8105025287009722e-05, "loss": 0.289, "loss_nan_ranks": 0, "loss_rank_avg": 0.13793258368968964, "step": 1080, "valid_targets_mean": 3900.8, "valid_targets_min": 2882 }, { "epoch": 3.0307692307692307, "grad_norm": 0.6062042303419031, "learning_rate": 2.7977465563085266e-05, "loss": 0.2849, "loss_nan_ranks": 0, "loss_rank_avg": 0.13424527645111084, "step": 1085, "valid_targets_mean": 3554.8, "valid_targets_min": 756 }, { "epoch": 3.044755244755245, "grad_norm": 0.5903347227932119, "learning_rate": 2.7849518751039988e-05, "loss": 0.2889, "loss_nan_ranks": 0, "loss_rank_avg": 0.13443614542484283, "step": 1090, "valid_targets_mean": 4138.4, "valid_targets_min": 2779 }, { "epoch": 3.0587412587412586, "grad_norm": 0.5512429822848546, "learning_rate": 2.7721191059197906e-05, "loss": 0.2936, "loss_nan_ranks": 0, "loss_rank_avg": 0.14363616704940796, "step": 1095, "valid_targets_mean": 3785.6, "valid_targets_min": 3061 }, { "epoch": 3.0727272727272728, "grad_norm": 0.5873538300440081, "learning_rate": 2.7592488714364346e-05, "loss": 0.2839, "loss_nan_ranks": 0, "loss_rank_avg": 0.14701727032661438, "step": 1100, "valid_targets_mean": 4083.5, "valid_targets_min": 3004 }, { "epoch": 3.0867132867132865, "grad_norm": 0.572390188505052, "learning_rate": 2.7463417961523818e-05, "loss": 0.2921, "loss_nan_ranks": 0, "loss_rank_avg": 0.13609379529953003, "step": 1105, "valid_targets_mean": 3602.6, "valid_targets_min": 2527 }, { "epoch": 3.1006993006993007, "grad_norm": 0.5622134342613668, "learning_rate": 2.7333985063536963e-05, "loss": 0.29, "loss_nan_ranks": 0, "loss_rank_avg": 0.1393313705921173, "step": 1110, "valid_targets_mean": 4095.5, "valid_targets_min": 3101 }, { "epoch": 3.114685314685315, "grad_norm": 0.5593515284390895, "learning_rate": 2.72041963008367e-05, "loss": 0.2882, "loss_nan_ranks": 0, "loss_rank_avg": 0.14072662591934204, "step": 1115, "valid_targets_mean": 4279.5, "valid_targets_min": 3243 }, { "epoch": 3.1286713286713286, "grad_norm": 0.5610889742269053, "learning_rate": 2.707405797112344e-05, "loss": 0.2912, "loss_nan_ranks": 0, "loss_rank_avg": 0.158025324344635, "step": 1120, "valid_targets_mean": 4263.9, "valid_targets_min": 3177 }, { "epoch": 3.1426573426573428, "grad_norm": 0.6024251945209658, "learning_rate": 2.6943576389059555e-05, "loss": 0.286, "loss_nan_ranks": 0, "loss_rank_avg": 0.1297697126865387, "step": 1125, "valid_targets_mean": 3528.4, "valid_targets_min": 1980 }, { "epoch": 3.1566433566433565, "grad_norm": 0.5597392486313498, "learning_rate": 2.6812757885962925e-05, "loss": 0.2911, "loss_nan_ranks": 0, "loss_rank_avg": 0.13107940554618835, "step": 1130, "valid_targets_mean": 4273.8, "valid_targets_min": 3953 }, { "epoch": 3.1706293706293707, "grad_norm": 0.5876810531953238, "learning_rate": 2.6681608809499742e-05, "loss": 0.2859, "loss_nan_ranks": 0, "loss_rank_avg": 0.15600530803203583, "step": 1135, "valid_targets_mean": 4078.9, "valid_targets_min": 3364 }, { "epoch": 3.184615384615385, "grad_norm": 0.5783426546002428, "learning_rate": 2.6550135523376536e-05, "loss": 0.2925, "loss_nan_ranks": 0, "loss_rank_avg": 0.15904384851455688, "step": 1140, "valid_targets_mean": 4351.2, "valid_targets_min": 3517 }, { "epoch": 3.1986013986013986, "grad_norm": 0.5813694968263312, "learning_rate": 2.641834440703133e-05, "loss": 0.2838, "loss_nan_ranks": 0, "loss_rank_avg": 0.15258988738059998, "step": 1145, "valid_targets_mean": 4117.5, "valid_targets_min": 3063 }, { "epoch": 3.2125874125874128, "grad_norm": 0.5905046846776977, "learning_rate": 2.6286241855324148e-05, "loss": 0.2869, "loss_nan_ranks": 0, "loss_rank_avg": 0.14545059204101562, "step": 1150, "valid_targets_mean": 4036.8, "valid_targets_min": 2770 }, { "epoch": 3.2265734265734265, "grad_norm": 0.6251524497566068, "learning_rate": 2.615383427822669e-05, "loss": 0.2945, "loss_nan_ranks": 0, "loss_rank_avg": 0.14009219408035278, "step": 1155, "valid_targets_mean": 4025.4, "valid_targets_min": 3274 }, { "epoch": 3.2405594405594407, "grad_norm": 0.5833207093013281, "learning_rate": 2.6021128100511312e-05, "loss": 0.2948, "loss_nan_ranks": 0, "loss_rank_avg": 0.1496381163597107, "step": 1160, "valid_targets_mean": 3876.6, "valid_targets_min": 3294 }, { "epoch": 3.2545454545454544, "grad_norm": 0.5869118064695467, "learning_rate": 2.5888129761439268e-05, "loss": 0.2854, "loss_nan_ranks": 0, "loss_rank_avg": 0.12796083092689514, "step": 1165, "valid_targets_mean": 4000.5, "valid_targets_min": 3198 }, { "epoch": 3.2685314685314686, "grad_norm": 0.6105334860746966, "learning_rate": 2.575484571444828e-05, "loss": 0.2947, "loss_nan_ranks": 0, "loss_rank_avg": 0.15768763422966003, "step": 1170, "valid_targets_mean": 4069.2, "valid_targets_min": 3275 }, { "epoch": 3.2825174825174823, "grad_norm": 0.6148135538144459, "learning_rate": 2.5621282426839376e-05, "loss": 0.2966, "loss_nan_ranks": 0, "loss_rank_avg": 0.14051169157028198, "step": 1175, "valid_targets_mean": 3816.0, "valid_targets_min": 3018 }, { "epoch": 3.2965034965034965, "grad_norm": 0.5784403352707399, "learning_rate": 2.5487446379463095e-05, "loss": 0.2859, "loss_nan_ranks": 0, "loss_rank_avg": 0.14809316396713257, "step": 1180, "valid_targets_mean": 4309.2, "valid_targets_min": 3524 }, { "epoch": 3.3104895104895107, "grad_norm": 0.5661451133427263, "learning_rate": 2.535334406640503e-05, "loss": 0.2888, "loss_nan_ranks": 0, "loss_rank_avg": 0.14418381452560425, "step": 1185, "valid_targets_mean": 3979.0, "valid_targets_min": 3214 }, { "epoch": 3.3244755244755244, "grad_norm": 0.5459169548362879, "learning_rate": 2.5218981994670683e-05, "loss": 0.2918, "loss_nan_ranks": 0, "loss_rank_avg": 0.16877073049545288, "step": 1190, "valid_targets_mean": 4564.4, "valid_targets_min": 2955 }, { "epoch": 3.3384615384615386, "grad_norm": 0.5750076988938344, "learning_rate": 2.5084366683869746e-05, "loss": 0.2838, "loss_nan_ranks": 0, "loss_rank_avg": 0.16434863209724426, "step": 1195, "valid_targets_mean": 4575.8, "valid_targets_min": 3817 }, { "epoch": 3.3524475524475523, "grad_norm": 0.5796812607678961, "learning_rate": 2.494950466589976e-05, "loss": 0.2944, "loss_nan_ranks": 0, "loss_rank_avg": 0.1434839367866516, "step": 1200, "valid_targets_mean": 4157.1, "valid_targets_min": 3372 }, { "epoch": 3.3664335664335665, "grad_norm": 0.5571773596093456, "learning_rate": 2.4814402484629172e-05, "loss": 0.2874, "loss_nan_ranks": 0, "loss_rank_avg": 0.16117364168167114, "step": 1205, "valid_targets_mean": 4443.8, "valid_targets_min": 3930 }, { "epoch": 3.3804195804195802, "grad_norm": 0.542012095877534, "learning_rate": 2.4679066695579783e-05, "loss": 0.2912, "loss_nan_ranks": 0, "loss_rank_avg": 0.13552315533161163, "step": 1210, "valid_targets_mean": 3729.8, "valid_targets_min": 2346 }, { "epoch": 3.3944055944055944, "grad_norm": 0.6262546928082594, "learning_rate": 2.454350386560868e-05, "loss": 0.2958, "loss_nan_ranks": 0, "loss_rank_avg": 0.132088303565979, "step": 1215, "valid_targets_mean": 3667.1, "valid_targets_min": 3007 }, { "epoch": 3.408391608391608, "grad_norm": 0.5654793675821718, "learning_rate": 2.440772057258958e-05, "loss": 0.293, "loss_nan_ranks": 0, "loss_rank_avg": 0.12824289500713348, "step": 1220, "valid_targets_mean": 3630.8, "valid_targets_min": 2659 }, { "epoch": 3.4223776223776223, "grad_norm": 0.5847485061544787, "learning_rate": 2.4271723405093683e-05, "loss": 0.2905, "loss_nan_ranks": 0, "loss_rank_avg": 0.13168518245220184, "step": 1225, "valid_targets_mean": 3625.9, "valid_targets_min": 3142 }, { "epoch": 3.4363636363636365, "grad_norm": 0.5821821351030247, "learning_rate": 2.4135518962069924e-05, "loss": 0.2953, "loss_nan_ranks": 0, "loss_rank_avg": 0.12825189530849457, "step": 1230, "valid_targets_mean": 3588.0, "valid_targets_min": 2988 }, { "epoch": 3.4503496503496502, "grad_norm": 0.5659166757325212, "learning_rate": 2.3999113852524825e-05, "loss": 0.2874, "loss_nan_ranks": 0, "loss_rank_avg": 0.12714150547981262, "step": 1235, "valid_targets_mean": 3578.1, "valid_targets_min": 2938 }, { "epoch": 3.4643356643356644, "grad_norm": 0.5861208282116811, "learning_rate": 2.386251469520179e-05, "loss": 0.2846, "loss_nan_ranks": 0, "loss_rank_avg": 0.15201061964035034, "step": 1240, "valid_targets_mean": 4156.2, "valid_targets_min": 3405 }, { "epoch": 3.478321678321678, "grad_norm": 0.5535911039436388, "learning_rate": 2.3725728118259927e-05, "loss": 0.2851, "loss_nan_ranks": 0, "loss_rank_avg": 0.1573452353477478, "step": 1245, "valid_targets_mean": 4481.2, "valid_targets_min": 3900 }, { "epoch": 3.4923076923076923, "grad_norm": 0.5346692692232985, "learning_rate": 2.358876075895247e-05, "loss": 0.2904, "loss_nan_ranks": 0, "loss_rank_avg": 0.13130052387714386, "step": 1250, "valid_targets_mean": 3746.9, "valid_targets_min": 2357 }, { "epoch": 3.5062937062937065, "grad_norm": 0.5590784293301762, "learning_rate": 2.345161926330468e-05, "loss": 0.2895, "loss_nan_ranks": 0, "loss_rank_avg": 0.1504519134759903, "step": 1255, "valid_targets_mean": 3962.4, "valid_targets_min": 3050 }, { "epoch": 3.5202797202797202, "grad_norm": 0.5526105682767487, "learning_rate": 2.3314310285791395e-05, "loss": 0.2878, "loss_nan_ranks": 0, "loss_rank_avg": 0.14131543040275574, "step": 1260, "valid_targets_mean": 3988.5, "valid_targets_min": 3365 }, { "epoch": 3.5342657342657344, "grad_norm": 0.5605267073009159, "learning_rate": 2.3176840489014127e-05, "loss": 0.2851, "loss_nan_ranks": 0, "loss_rank_avg": 0.14908947050571442, "step": 1265, "valid_targets_mean": 4193.6, "valid_targets_min": 3234 }, { "epoch": 3.548251748251748, "grad_norm": 0.5567496368325469, "learning_rate": 2.303921654337776e-05, "loss": 0.29, "loss_nan_ranks": 0, "loss_rank_avg": 0.15985560417175293, "step": 1270, "valid_targets_mean": 4427.0, "valid_targets_min": 3686 }, { "epoch": 3.5622377622377623, "grad_norm": 0.580201073915128, "learning_rate": 2.29014451267669e-05, "loss": 0.2948, "loss_nan_ranks": 0, "loss_rank_avg": 0.1479303240776062, "step": 1275, "valid_targets_mean": 3910.6, "valid_targets_min": 3425 }, { "epoch": 3.576223776223776, "grad_norm": 0.5329574918906276, "learning_rate": 2.276353292422185e-05, "loss": 0.2919, "loss_nan_ranks": 0, "loss_rank_avg": 0.15829353034496307, "step": 1280, "valid_targets_mean": 3959.4, "valid_targets_min": 3298 }, { "epoch": 3.5902097902097903, "grad_norm": 0.6871295147458576, "learning_rate": 2.2625486627614223e-05, "loss": 0.282, "loss_nan_ranks": 0, "loss_rank_avg": 0.14807263016700745, "step": 1285, "valid_targets_mean": 3968.4, "valid_targets_min": 2854 }, { "epoch": 3.604195804195804, "grad_norm": 0.5781576024288366, "learning_rate": 2.248731293532222e-05, "loss": 0.2799, "loss_nan_ranks": 0, "loss_rank_avg": 0.14441080391407013, "step": 1290, "valid_targets_mean": 4244.2, "valid_targets_min": 3643 }, { "epoch": 3.618181818181818, "grad_norm": 0.5642792543631863, "learning_rate": 2.2349018551905653e-05, "loss": 0.2911, "loss_nan_ranks": 0, "loss_rank_avg": 0.15375354886054993, "step": 1295, "valid_targets_mean": 3981.8, "valid_targets_min": 2294 }, { "epoch": 3.6321678321678323, "grad_norm": 0.5603604594097068, "learning_rate": 2.221061018778058e-05, "loss": 0.2847, "loss_nan_ranks": 0, "loss_rank_avg": 0.147846981883049, "step": 1300, "valid_targets_mean": 3968.8, "valid_targets_min": 3391 }, { "epoch": 3.646153846153846, "grad_norm": 0.5673580651732941, "learning_rate": 2.207209455889368e-05, "loss": 0.2885, "loss_nan_ranks": 0, "loss_rank_avg": 0.126178577542305, "step": 1305, "valid_targets_mean": 3585.2, "valid_targets_min": 2635 }, { "epoch": 3.6601398601398603, "grad_norm": 0.5367686648454142, "learning_rate": 2.193347838639647e-05, "loss": 0.2906, "loss_nan_ranks": 0, "loss_rank_avg": 0.13222312927246094, "step": 1310, "valid_targets_mean": 3554.9, "valid_targets_min": 495 }, { "epoch": 3.674125874125874, "grad_norm": 0.5576957411476551, "learning_rate": 2.1794768396319058e-05, "loss": 0.2825, "loss_nan_ranks": 0, "loss_rank_avg": 0.12126507610082626, "step": 1315, "valid_targets_mean": 3548.1, "valid_targets_min": 2486 }, { "epoch": 3.688111888111888, "grad_norm": 0.5614984976925238, "learning_rate": 2.1655971319243853e-05, "loss": 0.2924, "loss_nan_ranks": 0, "loss_rank_avg": 0.13723014295101166, "step": 1320, "valid_targets_mean": 3631.2, "valid_targets_min": 2436 }, { "epoch": 3.7020979020979023, "grad_norm": 0.5682898573966657, "learning_rate": 2.1517093889978966e-05, "loss": 0.2817, "loss_nan_ranks": 0, "loss_rank_avg": 0.13498768210411072, "step": 1325, "valid_targets_mean": 3797.8, "valid_targets_min": 3111 }, { "epoch": 3.716083916083916, "grad_norm": 0.5563945992386156, "learning_rate": 2.1378142847231417e-05, "loss": 0.284, "loss_nan_ranks": 0, "loss_rank_avg": 0.14077872037887573, "step": 1330, "valid_targets_mean": 3898.6, "valid_targets_min": 2740 }, { "epoch": 3.73006993006993, "grad_norm": 0.5525569479616117, "learning_rate": 2.123912493328013e-05, "loss": 0.283, "loss_nan_ranks": 0, "loss_rank_avg": 0.15129277110099792, "step": 1335, "valid_targets_mean": 4313.1, "valid_targets_min": 3386 }, { "epoch": 3.744055944055944, "grad_norm": 0.5613543756192618, "learning_rate": 2.1100046893648813e-05, "loss": 0.2929, "loss_nan_ranks": 0, "loss_rank_avg": 0.1311524659395218, "step": 1340, "valid_targets_mean": 3788.1, "valid_targets_min": 3177 }, { "epoch": 3.758041958041958, "grad_norm": 0.5960495305158579, "learning_rate": 2.096091547677864e-05, "loss": 0.2932, "loss_nan_ranks": 0, "loss_rank_avg": 0.13484007120132446, "step": 1345, "valid_targets_mean": 3836.5, "valid_targets_min": 3397 }, { "epoch": 3.772027972027972, "grad_norm": 0.534714708744089, "learning_rate": 2.0821737433700773e-05, "loss": 0.2889, "loss_nan_ranks": 0, "loss_rank_avg": 0.13426387310028076, "step": 1350, "valid_targets_mean": 3965.5, "valid_targets_min": 3316 }, { "epoch": 3.786013986013986, "grad_norm": 0.5608719066575932, "learning_rate": 2.068251951770882e-05, "loss": 0.2922, "loss_nan_ranks": 0, "loss_rank_avg": 0.14681580662727356, "step": 1355, "valid_targets_mean": 3656.8, "valid_targets_min": 609 }, { "epoch": 3.8, "grad_norm": 0.5392596511784064, "learning_rate": 2.054326848403113e-05, "loss": 0.2847, "loss_nan_ranks": 0, "loss_rank_avg": 0.1456853449344635, "step": 1360, "valid_targets_mean": 3982.4, "valid_targets_min": 3106 }, { "epoch": 3.813986013986014, "grad_norm": 0.5824843228693691, "learning_rate": 2.0403991089502995e-05, "loss": 0.2865, "loss_nan_ranks": 0, "loss_rank_avg": 0.1382858157157898, "step": 1365, "valid_targets_mean": 3885.2, "valid_targets_min": 3067 }, { "epoch": 3.827972027972028, "grad_norm": 0.5447761110635077, "learning_rate": 2.026469409223883e-05, "loss": 0.291, "loss_nan_ranks": 0, "loss_rank_avg": 0.15115302801132202, "step": 1370, "valid_targets_mean": 3895.8, "valid_targets_min": 3321 }, { "epoch": 3.841958041958042, "grad_norm": 0.5264471128691284, "learning_rate": 2.012538425130421e-05, "loss": 0.2891, "loss_nan_ranks": 0, "loss_rank_avg": 0.15932296216487885, "step": 1375, "valid_targets_mean": 4400.5, "valid_targets_min": 3458 }, { "epoch": 3.855944055944056, "grad_norm": 0.553572290453652, "learning_rate": 1.998606832638792e-05, "loss": 0.295, "loss_nan_ranks": 0, "loss_rank_avg": 0.14538070559501648, "step": 1380, "valid_targets_mean": 3952.4, "valid_targets_min": 3027 }, { "epoch": 3.86993006993007, "grad_norm": 0.5503890705860522, "learning_rate": 1.984675307747397e-05, "loss": 0.2978, "loss_nan_ranks": 0, "loss_rank_avg": 0.13637414574623108, "step": 1385, "valid_targets_mean": 3990.6, "valid_targets_min": 3045 }, { "epoch": 3.883916083916084, "grad_norm": 0.5695849028067055, "learning_rate": 1.970744526451356e-05, "loss": 0.2876, "loss_nan_ranks": 0, "loss_rank_avg": 0.15931588411331177, "step": 1390, "valid_targets_mean": 4204.8, "valid_targets_min": 3360 }, { "epoch": 3.8979020979020977, "grad_norm": 0.5065264632685823, "learning_rate": 1.956815164709707e-05, "loss": 0.2926, "loss_nan_ranks": 0, "loss_rank_avg": 0.14099319279193878, "step": 1395, "valid_targets_mean": 4106.9, "valid_targets_min": 3364 }, { "epoch": 3.911888111888112, "grad_norm": 0.5552657926969193, "learning_rate": 1.942887898412608e-05, "loss": 0.2826, "loss_nan_ranks": 0, "loss_rank_avg": 0.1472746580839157, "step": 1400, "valid_targets_mean": 3892.6, "valid_targets_min": 3214 }, { "epoch": 3.9258741258741257, "grad_norm": 0.5873648605403345, "learning_rate": 1.928963403348541e-05, "loss": 0.2872, "loss_nan_ranks": 0, "loss_rank_avg": 0.15483799576759338, "step": 1405, "valid_targets_mean": 4065.4, "valid_targets_min": 2955 }, { "epoch": 3.93986013986014, "grad_norm": 0.5571666073607908, "learning_rate": 1.91504235517152e-05, "loss": 0.2941, "loss_nan_ranks": 0, "loss_rank_avg": 0.14902883768081665, "step": 1410, "valid_targets_mean": 4142.4, "valid_targets_min": 3177 }, { "epoch": 3.953846153846154, "grad_norm": 0.5534221036668195, "learning_rate": 1.9011254293683067e-05, "loss": 0.2879, "loss_nan_ranks": 0, "loss_rank_avg": 0.14316719770431519, "step": 1415, "valid_targets_mean": 4087.0, "valid_targets_min": 3266 }, { "epoch": 3.9678321678321677, "grad_norm": 0.5596823503493599, "learning_rate": 1.8872133012256328e-05, "loss": 0.2929, "loss_nan_ranks": 0, "loss_rank_avg": 0.14076735079288483, "step": 1420, "valid_targets_mean": 3920.1, "valid_targets_min": 3317 }, { "epoch": 3.981818181818182, "grad_norm": 0.5765862283093903, "learning_rate": 1.8733066457974373e-05, "loss": 0.2831, "loss_nan_ranks": 0, "loss_rank_avg": 0.12725147604942322, "step": 1425, "valid_targets_mean": 3559.5, "valid_targets_min": 2868 }, { "epoch": 3.9958041958041957, "grad_norm": 0.5779364420855677, "learning_rate": 1.8594061378721057e-05, "loss": 0.2897, "loss_nan_ranks": 0, "loss_rank_avg": 0.13181552290916443, "step": 1430, "valid_targets_mean": 3682.4, "valid_targets_min": 2829 }, { "epoch": 4.008391608391609, "grad_norm": 0.5346526605133614, "learning_rate": 1.8455124519397308e-05, "loss": 0.2839, "loss_nan_ranks": 0, "loss_rank_avg": 0.15485987067222595, "step": 1435, "valid_targets_mean": 4141.9, "valid_targets_min": 3243 }, { "epoch": 4.022377622377622, "grad_norm": 0.5766020959293648, "learning_rate": 1.831626262159386e-05, "loss": 0.2728, "loss_nan_ranks": 0, "loss_rank_avg": 0.13374757766723633, "step": 1440, "valid_targets_mean": 3724.5, "valid_targets_min": 3272 }, { "epoch": 4.036363636363636, "grad_norm": 0.6086344578776043, "learning_rate": 1.817748242326409e-05, "loss": 0.2832, "loss_nan_ranks": 0, "loss_rank_avg": 0.15734803676605225, "step": 1445, "valid_targets_mean": 4231.8, "valid_targets_min": 3283 }, { "epoch": 4.05034965034965, "grad_norm": 0.5838271193374448, "learning_rate": 1.8038790658397097e-05, "loss": 0.2803, "loss_nan_ranks": 0, "loss_rank_avg": 0.13393522799015045, "step": 1450, "valid_targets_mean": 3853.5, "valid_targets_min": 2955 }, { "epoch": 4.0643356643356645, "grad_norm": 0.5866672557191704, "learning_rate": 1.7900194056690955e-05, "loss": 0.2771, "loss_nan_ranks": 0, "loss_rank_avg": 0.13227057456970215, "step": 1455, "valid_targets_mean": 4068.1, "valid_targets_min": 3364 }, { "epoch": 4.078321678321679, "grad_norm": 0.5829979033149572, "learning_rate": 1.7761699343226167e-05, "loss": 0.2771, "loss_nan_ranks": 0, "loss_rank_avg": 0.13871875405311584, "step": 1460, "valid_targets_mean": 4002.0, "valid_targets_min": 2979 }, { "epoch": 4.092307692307692, "grad_norm": 0.563177618637755, "learning_rate": 1.7623313238139335e-05, "loss": 0.2767, "loss_nan_ranks": 0, "loss_rank_avg": 0.12607219815254211, "step": 1465, "valid_targets_mean": 3888.6, "valid_targets_min": 3177 }, { "epoch": 4.106293706293706, "grad_norm": 0.5860307723730551, "learning_rate": 1.748504245629711e-05, "loss": 0.2811, "loss_nan_ranks": 0, "loss_rank_avg": 0.1451467126607895, "step": 1470, "valid_targets_mean": 4002.6, "valid_targets_min": 3262 }, { "epoch": 4.12027972027972, "grad_norm": 0.5551191446812656, "learning_rate": 1.7346893706970333e-05, "loss": 0.2858, "loss_nan_ranks": 0, "loss_rank_avg": 0.14970549941062927, "step": 1475, "valid_targets_mean": 4383.4, "valid_targets_min": 3061 }, { "epoch": 4.1342657342657345, "grad_norm": 0.5581366796607292, "learning_rate": 1.7208873693508493e-05, "loss": 0.2794, "loss_nan_ranks": 0, "loss_rank_avg": 0.15094107389450073, "step": 1480, "valid_targets_mean": 4262.2, "valid_targets_min": 3051 }, { "epoch": 4.148251748251749, "grad_norm": 0.5729297719466728, "learning_rate": 1.7070989113014483e-05, "loss": 0.2777, "loss_nan_ranks": 0, "loss_rank_avg": 0.15359677374362946, "step": 1485, "valid_targets_mean": 4198.6, "valid_targets_min": 3511 }, { "epoch": 4.162237762237762, "grad_norm": 0.5634444934272489, "learning_rate": 1.6933246656019613e-05, "loss": 0.2772, "loss_nan_ranks": 0, "loss_rank_avg": 0.1567789912223816, "step": 1490, "valid_targets_mean": 4295.5, "valid_targets_min": 3127 }, { "epoch": 4.176223776223776, "grad_norm": 0.5581693723899023, "learning_rate": 1.6795653006158977e-05, "loss": 0.2806, "loss_nan_ranks": 0, "loss_rank_avg": 0.13514573872089386, "step": 1495, "valid_targets_mean": 3887.1, "valid_targets_min": 3602 }, { "epoch": 4.19020979020979, "grad_norm": 0.5863814075958925, "learning_rate": 1.6658214839847168e-05, "loss": 0.2804, "loss_nan_ranks": 0, "loss_rank_avg": 0.1320951133966446, "step": 1500, "valid_targets_mean": 3769.1, "valid_targets_min": 3233 }, { "epoch": 4.2041958041958045, "grad_norm": 0.5635215900428802, "learning_rate": 1.6520938825954265e-05, "loss": 0.28, "loss_nan_ranks": 0, "loss_rank_avg": 0.14185290038585663, "step": 1505, "valid_targets_mean": 4110.9, "valid_targets_min": 3167 }, { "epoch": 4.218181818181818, "grad_norm": 0.5423622664089363, "learning_rate": 1.638383162548229e-05, "loss": 0.2794, "loss_nan_ranks": 0, "loss_rank_avg": 0.13250473141670227, "step": 1510, "valid_targets_mean": 4070.2, "valid_targets_min": 3372 }, { "epoch": 4.232167832167832, "grad_norm": 0.5675193264978978, "learning_rate": 1.6246899891241995e-05, "loss": 0.2806, "loss_nan_ranks": 0, "loss_rank_avg": 0.14088118076324463, "step": 1515, "valid_targets_mean": 4075.2, "valid_targets_min": 2977 }, { "epoch": 4.246153846153846, "grad_norm": 0.6175459473635595, "learning_rate": 1.6110150267530017e-05, "loss": 0.2765, "loss_nan_ranks": 0, "loss_rank_avg": 0.14475172758102417, "step": 1520, "valid_targets_mean": 4005.4, "valid_targets_min": 3295 }, { "epoch": 4.26013986013986, "grad_norm": 0.5504368434307025, "learning_rate": 1.597358938980651e-05, "loss": 0.2774, "loss_nan_ranks": 0, "loss_rank_avg": 0.1377810835838318, "step": 1525, "valid_targets_mean": 3906.4, "valid_targets_min": 3145 }, { "epoch": 4.2741258741258745, "grad_norm": 0.5735624376122325, "learning_rate": 1.583722388437317e-05, "loss": 0.2782, "loss_nan_ranks": 0, "loss_rank_avg": 0.14436104893684387, "step": 1530, "valid_targets_mean": 3985.4, "valid_targets_min": 3227 }, { "epoch": 4.288111888111888, "grad_norm": 0.5295048011764109, "learning_rate": 1.570106036805169e-05, "loss": 0.2732, "loss_nan_ranks": 0, "loss_rank_avg": 0.12768390774726868, "step": 1535, "valid_targets_mean": 3741.1, "valid_targets_min": 3090 }, { "epoch": 4.302097902097902, "grad_norm": 0.5946384554538933, "learning_rate": 1.5565105447862716e-05, "loss": 0.2835, "loss_nan_ranks": 0, "loss_rank_avg": 0.15328478813171387, "step": 1540, "valid_targets_mean": 4330.8, "valid_targets_min": 3690 }, { "epoch": 4.316083916083916, "grad_norm": 0.5527044105773028, "learning_rate": 1.5429365720705247e-05, "loss": 0.2794, "loss_nan_ranks": 0, "loss_rank_avg": 0.14041993021965027, "step": 1545, "valid_targets_mean": 4291.0, "valid_targets_min": 3663 }, { "epoch": 4.33006993006993, "grad_norm": 0.5932036773483532, "learning_rate": 1.5293847773036526e-05, "loss": 0.2788, "loss_nan_ranks": 0, "loss_rank_avg": 0.14854060113430023, "step": 1550, "valid_targets_mean": 4029.6, "valid_targets_min": 3392 }, { "epoch": 4.344055944055944, "grad_norm": 0.5756488887636685, "learning_rate": 1.5158558180552467e-05, "loss": 0.2809, "loss_nan_ranks": 0, "loss_rank_avg": 0.14178456366062164, "step": 1555, "valid_targets_mean": 3695.5, "valid_targets_min": 2346 }, { "epoch": 4.358041958041958, "grad_norm": 0.5551445420921324, "learning_rate": 1.5023503507868586e-05, "loss": 0.2802, "loss_nan_ranks": 0, "loss_rank_avg": 0.11629106104373932, "step": 1560, "valid_targets_mean": 3662.4, "valid_targets_min": 2917 }, { "epoch": 4.372027972027972, "grad_norm": 0.5551336199610265, "learning_rate": 1.4888690308201442e-05, "loss": 0.2817, "loss_nan_ranks": 0, "loss_rank_avg": 0.1290501058101654, "step": 1565, "valid_targets_mean": 3783.2, "valid_targets_min": 3226 }, { "epoch": 4.386013986013986, "grad_norm": 0.5403841543747977, "learning_rate": 1.4754125123050668e-05, "loss": 0.2805, "loss_nan_ranks": 0, "loss_rank_avg": 0.13973750174045563, "step": 1570, "valid_targets_mean": 4077.0, "valid_targets_min": 2524 }, { "epoch": 4.4, "grad_norm": 0.5332210520239432, "learning_rate": 1.4619814481881582e-05, "loss": 0.277, "loss_nan_ranks": 0, "loss_rank_avg": 0.16121509671211243, "step": 1575, "valid_targets_mean": 4338.4, "valid_targets_min": 3256 }, { "epoch": 4.413986013986014, "grad_norm": 0.5722141726370856, "learning_rate": 1.4485764901808328e-05, "loss": 0.2703, "loss_nan_ranks": 0, "loss_rank_avg": 0.13942039012908936, "step": 1580, "valid_targets_mean": 3827.6, "valid_targets_min": 2615 }, { "epoch": 4.427972027972028, "grad_norm": 0.5578695901658584, "learning_rate": 1.435198288727766e-05, "loss": 0.2743, "loss_nan_ranks": 0, "loss_rank_avg": 0.14053496718406677, "step": 1585, "valid_targets_mean": 4390.1, "valid_targets_min": 3765 }, { "epoch": 4.441958041958042, "grad_norm": 0.5694976114609359, "learning_rate": 1.4218474929753358e-05, "loss": 0.2768, "loss_nan_ranks": 0, "loss_rank_avg": 0.13226018846035004, "step": 1590, "valid_targets_mean": 4177.5, "valid_targets_min": 3413 }, { "epoch": 4.455944055944056, "grad_norm": 0.5771053577968344, "learning_rate": 1.4085247507401188e-05, "loss": 0.2878, "loss_nan_ranks": 0, "loss_rank_avg": 0.16159263253211975, "step": 1595, "valid_targets_mean": 4184.8, "valid_targets_min": 3045 }, { "epoch": 4.46993006993007, "grad_norm": 0.5281811897589362, "learning_rate": 1.3952307084774599e-05, "loss": 0.2738, "loss_nan_ranks": 0, "loss_rank_avg": 0.1455651968717575, "step": 1600, "valid_targets_mean": 3982.0, "valid_targets_min": 2879 }, { "epoch": 4.483916083916084, "grad_norm": 0.5470105044154708, "learning_rate": 1.3819660112501054e-05, "loss": 0.2785, "loss_nan_ranks": 0, "loss_rank_avg": 0.1340590864419937, "step": 1605, "valid_targets_mean": 3864.2, "valid_targets_min": 2878 }, { "epoch": 4.497902097902098, "grad_norm": 0.5516752626597579, "learning_rate": 1.3687313026969003e-05, "loss": 0.2811, "loss_nan_ranks": 0, "loss_rank_avg": 0.1467994898557663, "step": 1610, "valid_targets_mean": 4225.1, "valid_targets_min": 3888 }, { "epoch": 4.511888111888112, "grad_norm": 0.5312163411468419, "learning_rate": 1.3555272250015575e-05, "loss": 0.2769, "loss_nan_ranks": 0, "loss_rank_avg": 0.12923194468021393, "step": 1615, "valid_targets_mean": 3883.1, "valid_targets_min": 3304 }, { "epoch": 4.525874125874126, "grad_norm": 0.5771253783673834, "learning_rate": 1.342354418861501e-05, "loss": 0.2898, "loss_nan_ranks": 0, "loss_rank_avg": 0.13594885170459747, "step": 1620, "valid_targets_mean": 3983.5, "valid_targets_min": 3446 }, { "epoch": 4.5398601398601395, "grad_norm": 0.5781191215370589, "learning_rate": 1.329213523456772e-05, "loss": 0.2807, "loss_nan_ranks": 0, "loss_rank_avg": 0.15987467765808105, "step": 1625, "valid_targets_mean": 4329.5, "valid_targets_min": 3487 }, { "epoch": 4.553846153846154, "grad_norm": 0.529526946498919, "learning_rate": 1.316105176419018e-05, "loss": 0.273, "loss_nan_ranks": 0, "loss_rank_avg": 0.13953280448913574, "step": 1630, "valid_targets_mean": 4159.1, "valid_targets_min": 3214 }, { "epoch": 4.567832167832168, "grad_norm": 0.5921815456958419, "learning_rate": 1.3030300138005516e-05, "loss": 0.277, "loss_nan_ranks": 0, "loss_rank_avg": 0.14543133974075317, "step": 1635, "valid_targets_mean": 4053.8, "valid_targets_min": 3316 }, { "epoch": 4.581818181818182, "grad_norm": 0.5523405721602177, "learning_rate": 1.2899886700434885e-05, "loss": 0.2783, "loss_nan_ranks": 0, "loss_rank_avg": 0.13520309329032898, "step": 1640, "valid_targets_mean": 4057.2, "valid_targets_min": 2859 }, { "epoch": 4.595804195804196, "grad_norm": 0.566729641970418, "learning_rate": 1.2769817779489606e-05, "loss": 0.2793, "loss_nan_ranks": 0, "loss_rank_avg": 0.1414514183998108, "step": 1645, "valid_targets_mean": 3926.2, "valid_targets_min": 2998 }, { "epoch": 4.6097902097902095, "grad_norm": 0.5298872696100833, "learning_rate": 1.2640099686464157e-05, "loss": 0.2834, "loss_nan_ranks": 0, "loss_rank_avg": 0.13758346438407898, "step": 1650, "valid_targets_mean": 4043.6, "valid_targets_min": 3057 }, { "epoch": 4.623776223776224, "grad_norm": 0.5428239821038428, "learning_rate": 1.2510738715629866e-05, "loss": 0.2759, "loss_nan_ranks": 0, "loss_rank_avg": 0.1377483308315277, "step": 1655, "valid_targets_mean": 4040.9, "valid_targets_min": 3152 }, { "epoch": 4.637762237762238, "grad_norm": 0.5376319363172789, "learning_rate": 1.2381741143929547e-05, "loss": 0.2828, "loss_nan_ranks": 0, "loss_rank_avg": 0.13625864684581757, "step": 1660, "valid_targets_mean": 4123.6, "valid_targets_min": 3342 }, { "epoch": 4.651748251748252, "grad_norm": 0.5376602142844261, "learning_rate": 1.22531132306729e-05, "loss": 0.2778, "loss_nan_ranks": 0, "loss_rank_avg": 0.1259552240371704, "step": 1665, "valid_targets_mean": 4075.9, "valid_targets_min": 3132 }, { "epoch": 4.665734265734265, "grad_norm": 0.5540515888363535, "learning_rate": 1.212486121723281e-05, "loss": 0.2704, "loss_nan_ranks": 0, "loss_rank_avg": 0.14987067878246307, "step": 1670, "valid_targets_mean": 4072.9, "valid_targets_min": 3441 }, { "epoch": 4.6797202797202795, "grad_norm": 0.5164474972749298, "learning_rate": 1.1996991326742484e-05, "loss": 0.2813, "loss_nan_ranks": 0, "loss_rank_avg": 0.14604651927947998, "step": 1675, "valid_targets_mean": 4272.6, "valid_targets_min": 3428 }, { "epoch": 4.693706293706294, "grad_norm": 0.5824465695451619, "learning_rate": 1.1869509763793497e-05, "loss": 0.2775, "loss_nan_ranks": 0, "loss_rank_avg": 0.14033998548984528, "step": 1680, "valid_targets_mean": 4183.6, "valid_targets_min": 3243 }, { "epoch": 4.707692307692308, "grad_norm": 0.5404895105893867, "learning_rate": 1.174242271413473e-05, "loss": 0.2708, "loss_nan_ranks": 0, "loss_rank_avg": 0.15117552876472473, "step": 1685, "valid_targets_mean": 4291.9, "valid_targets_min": 3409 }, { "epoch": 4.721678321678322, "grad_norm": 0.5489265801598253, "learning_rate": 1.1615736344372203e-05, "loss": 0.2764, "loss_nan_ranks": 0, "loss_rank_avg": 0.1279614418745041, "step": 1690, "valid_targets_mean": 3617.1, "valid_targets_min": 3062 }, { "epoch": 4.735664335664335, "grad_norm": 0.5308545623191452, "learning_rate": 1.148945680166989e-05, "loss": 0.2704, "loss_nan_ranks": 0, "loss_rank_avg": 0.1290333867073059, "step": 1695, "valid_targets_mean": 3886.9, "valid_targets_min": 2923 }, { "epoch": 4.7496503496503495, "grad_norm": 0.5503407817873848, "learning_rate": 1.136359021345139e-05, "loss": 0.2818, "loss_nan_ranks": 0, "loss_rank_avg": 0.13935747742652893, "step": 1700, "valid_targets_mean": 4160.4, "valid_targets_min": 3091 }, { "epoch": 4.763636363636364, "grad_norm": 0.5674354574812547, "learning_rate": 1.123814268710267e-05, "loss": 0.2822, "loss_nan_ranks": 0, "loss_rank_avg": 0.154280424118042, "step": 1705, "valid_targets_mean": 4431.9, "valid_targets_min": 3089 }, { "epoch": 4.777622377622378, "grad_norm": 0.5078199113419997, "learning_rate": 1.1113120309675645e-05, "loss": 0.2754, "loss_nan_ranks": 0, "loss_rank_avg": 0.12909035384655, "step": 1710, "valid_targets_mean": 4203.4, "valid_targets_min": 3700 }, { "epoch": 4.791608391608392, "grad_norm": 0.528464450299718, "learning_rate": 1.098852914759292e-05, "loss": 0.2755, "loss_nan_ranks": 0, "loss_rank_avg": 0.12253694981336594, "step": 1715, "valid_targets_mean": 3791.1, "valid_targets_min": 3212 }, { "epoch": 4.805594405594405, "grad_norm": 0.5539863408262431, "learning_rate": 1.086437524635331e-05, "loss": 0.2748, "loss_nan_ranks": 0, "loss_rank_avg": 0.1329549103975296, "step": 1720, "valid_targets_mean": 4068.2, "valid_targets_min": 2964 }, { "epoch": 4.8195804195804195, "grad_norm": 0.564749576404987, "learning_rate": 1.0740664630238592e-05, "loss": 0.2689, "loss_nan_ranks": 0, "loss_rank_avg": 0.12435588240623474, "step": 1725, "valid_targets_mean": 3838.9, "valid_targets_min": 2756 }, { "epoch": 4.833566433566434, "grad_norm": 0.5470450188336874, "learning_rate": 1.0617403302021128e-05, "loss": 0.2758, "loss_nan_ranks": 0, "loss_rank_avg": 0.14149603247642517, "step": 1730, "valid_targets_mean": 4215.6, "valid_targets_min": 3468 }, { "epoch": 4.847552447552448, "grad_norm": 0.5565904255417349, "learning_rate": 1.0494597242672647e-05, "loss": 0.2777, "loss_nan_ranks": 0, "loss_rank_avg": 0.13995948433876038, "step": 1735, "valid_targets_mean": 4206.4, "valid_targets_min": 3317 }, { "epoch": 4.861538461538462, "grad_norm": 0.6516067057690171, "learning_rate": 1.037225241107399e-05, "loss": 0.2772, "loss_nan_ranks": 0, "loss_rank_avg": 0.13070757687091827, "step": 1740, "valid_targets_mean": 3938.9, "valid_targets_min": 2717 }, { "epoch": 4.875524475524475, "grad_norm": 0.5643871855173305, "learning_rate": 1.025037474372599e-05, "loss": 0.2817, "loss_nan_ranks": 0, "loss_rank_avg": 0.15128248929977417, "step": 1745, "valid_targets_mean": 4264.1, "valid_targets_min": 3654 }, { "epoch": 4.8895104895104895, "grad_norm": 0.5377462969057915, "learning_rate": 1.0128970154461424e-05, "loss": 0.2763, "loss_nan_ranks": 0, "loss_rank_avg": 0.14507511258125305, "step": 1750, "valid_targets_mean": 4460.8, "valid_targets_min": 3433 }, { "epoch": 4.903496503496504, "grad_norm": 0.6056957063528053, "learning_rate": 1.000804453415801e-05, "loss": 0.2827, "loss_nan_ranks": 0, "loss_rank_avg": 0.1173890233039856, "step": 1755, "valid_targets_mean": 3437.2, "valid_targets_min": 2078 }, { "epoch": 4.917482517482518, "grad_norm": 0.5482919546590974, "learning_rate": 9.887603750452646e-06, "loss": 0.2756, "loss_nan_ranks": 0, "loss_rank_avg": 0.13681714236736298, "step": 1760, "valid_targets_mean": 3923.8, "valid_targets_min": 3110 }, { "epoch": 4.931468531468531, "grad_norm": 0.5724368546420713, "learning_rate": 9.767653647456614e-06, "loss": 0.2823, "loss_nan_ranks": 0, "loss_rank_avg": 0.14031797647476196, "step": 1765, "valid_targets_mean": 3936.8, "valid_targets_min": 3029 }, { "epoch": 4.945454545454545, "grad_norm": 0.5162764043379312, "learning_rate": 9.648200045472071e-06, "loss": 0.2825, "loss_nan_ranks": 0, "loss_rank_avg": 0.13154445588588715, "step": 1770, "valid_targets_mean": 4408.4, "valid_targets_min": 3414 }, { "epoch": 4.9594405594405595, "grad_norm": 0.5574455444195064, "learning_rate": 9.5292487407096e-06, "loss": 0.2774, "loss_nan_ranks": 0, "loss_rank_avg": 0.1369532346725464, "step": 1775, "valid_targets_mean": 3946.4, "valid_targets_min": 3012 }, { "epoch": 4.973426573426574, "grad_norm": 0.5516667838189067, "learning_rate": 9.410805505006974e-06, "loss": 0.2691, "loss_nan_ranks": 0, "loss_rank_avg": 0.14430870115756989, "step": 1780, "valid_targets_mean": 3940.4, "valid_targets_min": 3259 }, { "epoch": 4.987412587412587, "grad_norm": 0.5624377252296943, "learning_rate": 9.29287608554907e-06, "loss": 0.2825, "loss_nan_ranks": 0, "loss_rank_avg": 0.13863155245780945, "step": 1785, "valid_targets_mean": 3757.0, "valid_targets_min": 3237 }, { "epoch": 5.0, "grad_norm": 0.7795219641149852, "learning_rate": 9.175466204589039e-06, "loss": 0.2778, "loss_nan_ranks": 0, "loss_rank_avg": 0.2640479803085327, "step": 1790, "valid_targets_mean": 3898.2, "valid_targets_min": 3124 }, { "epoch": 5.013986013986014, "grad_norm": 0.5219102843986503, "learning_rate": 9.0585815591706e-06, "loss": 0.271, "loss_nan_ranks": 0, "loss_rank_avg": 0.12575533986091614, "step": 1795, "valid_targets_mean": 4108.4, "valid_targets_min": 3131 }, { "epoch": 5.027972027972028, "grad_norm": 0.5652738851475124, "learning_rate": 8.942227820851653e-06, "loss": 0.2696, "loss_nan_ranks": 0, "loss_rank_avg": 0.1312795877456665, "step": 1800, "valid_targets_mean": 4253.5, "valid_targets_min": 3575 }, { "epoch": 5.041958041958042, "grad_norm": 0.5488102769899961, "learning_rate": 8.82641063542904e-06, "loss": 0.2672, "loss_nan_ranks": 0, "loss_rank_avg": 0.13220839202404022, "step": 1805, "valid_targets_mean": 3746.8, "valid_targets_min": 2958 }, { "epoch": 5.055944055944056, "grad_norm": 0.5326044748082528, "learning_rate": 8.711135622664622e-06, "loss": 0.2677, "loss_nan_ranks": 0, "loss_rank_avg": 0.13870134949684143, "step": 1810, "valid_targets_mean": 4063.0, "valid_targets_min": 2738 }, { "epoch": 5.06993006993007, "grad_norm": 0.5286347807264584, "learning_rate": 8.596408376012562e-06, "loss": 0.2678, "loss_nan_ranks": 0, "loss_rank_avg": 0.14026141166687012, "step": 1815, "valid_targets_mean": 4341.8, "valid_targets_min": 3070 }, { "epoch": 5.083916083916084, "grad_norm": 0.5694766098358943, "learning_rate": 8.482234462347955e-06, "loss": 0.2629, "loss_nan_ranks": 0, "loss_rank_avg": 0.1287498027086258, "step": 1820, "valid_targets_mean": 3671.8, "valid_targets_min": 2751 }, { "epoch": 5.0979020979020975, "grad_norm": 0.5253387879455069, "learning_rate": 8.368619421696693e-06, "loss": 0.2686, "loss_nan_ranks": 0, "loss_rank_avg": 0.13443228602409363, "step": 1825, "valid_targets_mean": 3833.9, "valid_targets_min": 2874 }, { "epoch": 5.111888111888112, "grad_norm": 0.5620562693740041, "learning_rate": 8.255568766966613e-06, "loss": 0.2734, "loss_nan_ranks": 0, "loss_rank_avg": 0.14100167155265808, "step": 1830, "valid_targets_mean": 4265.2, "valid_targets_min": 3186 }, { "epoch": 5.125874125874126, "grad_norm": 0.5796942165125105, "learning_rate": 8.143087983680061e-06, "loss": 0.2705, "loss_nan_ranks": 0, "loss_rank_avg": 0.12822787463665009, "step": 1835, "valid_targets_mean": 4018.0, "valid_targets_min": 2579 }, { "epoch": 5.13986013986014, "grad_norm": 0.5690161371950347, "learning_rate": 8.031182529707664e-06, "loss": 0.2716, "loss_nan_ranks": 0, "loss_rank_avg": 0.1354401558637619, "step": 1840, "valid_targets_mean": 4168.8, "valid_targets_min": 3519 }, { "epoch": 5.153846153846154, "grad_norm": 0.5629144093332551, "learning_rate": 7.919857835003537e-06, "loss": 0.2711, "loss_nan_ranks": 0, "loss_rank_avg": 0.15186838805675507, "step": 1845, "valid_targets_mean": 4165.1, "valid_targets_min": 3368 }, { "epoch": 5.1678321678321675, "grad_norm": 0.563743984339271, "learning_rate": 7.80911930134177e-06, "loss": 0.2728, "loss_nan_ranks": 0, "loss_rank_avg": 0.14788247644901276, "step": 1850, "valid_targets_mean": 4042.1, "valid_targets_min": 3037 }, { "epoch": 5.181818181818182, "grad_norm": 0.5709261686227755, "learning_rate": 7.698972302054363e-06, "loss": 0.2737, "loss_nan_ranks": 0, "loss_rank_avg": 0.12226757407188416, "step": 1855, "valid_targets_mean": 3660.8, "valid_targets_min": 2672 }, { "epoch": 5.195804195804196, "grad_norm": 0.5230511431966958, "learning_rate": 7.589422181770445e-06, "loss": 0.2733, "loss_nan_ranks": 0, "loss_rank_avg": 0.1555660367012024, "step": 1860, "valid_targets_mean": 4756.2, "valid_targets_min": 3550 }, { "epoch": 5.20979020979021, "grad_norm": 0.5355198645927597, "learning_rate": 7.480474256157009e-06, "loss": 0.2736, "loss_nan_ranks": 0, "loss_rank_avg": 0.13351857662200928, "step": 1865, "valid_targets_mean": 4117.5, "valid_targets_min": 3253 }, { "epoch": 5.223776223776224, "grad_norm": 0.5608540652136996, "learning_rate": 7.3721338116609e-06, "loss": 0.2747, "loss_nan_ranks": 0, "loss_rank_avg": 0.11535479128360748, "step": 1870, "valid_targets_mean": 3693.8, "valid_targets_min": 3153 }, { "epoch": 5.2377622377622375, "grad_norm": 0.5154018340638021, "learning_rate": 7.264406105252371e-06, "loss": 0.2665, "loss_nan_ranks": 0, "loss_rank_avg": 0.13983562588691711, "step": 1875, "valid_targets_mean": 4123.9, "valid_targets_min": 3040 }, { "epoch": 5.251748251748252, "grad_norm": 0.5467856713482777, "learning_rate": 7.15729636416995e-06, "loss": 0.2632, "loss_nan_ranks": 0, "loss_rank_avg": 0.1238892674446106, "step": 1880, "valid_targets_mean": 3791.5, "valid_targets_min": 2608 }, { "epoch": 5.265734265734266, "grad_norm": 0.54842359149951, "learning_rate": 7.050809785666843e-06, "loss": 0.2653, "loss_nan_ranks": 0, "loss_rank_avg": 0.13005605340003967, "step": 1885, "valid_targets_mean": 4164.2, "valid_targets_min": 3396 }, { "epoch": 5.27972027972028, "grad_norm": 0.545252910180718, "learning_rate": 6.944951536758704e-06, "loss": 0.2694, "loss_nan_ranks": 0, "loss_rank_avg": 0.13909177482128143, "step": 1890, "valid_targets_mean": 4063.8, "valid_targets_min": 2827 }, { "epoch": 5.293706293706293, "grad_norm": 0.5397346270972334, "learning_rate": 6.83972675397298e-06, "loss": 0.2772, "loss_nan_ranks": 0, "loss_rank_avg": 0.13181471824645996, "step": 1895, "valid_targets_mean": 3866.4, "valid_targets_min": 2803 }, { "epoch": 5.3076923076923075, "grad_norm": 0.5548237621459055, "learning_rate": 6.7351405430995945e-06, "loss": 0.2692, "loss_nan_ranks": 0, "loss_rank_avg": 0.13173869252204895, "step": 1900, "valid_targets_mean": 3641.2, "valid_targets_min": 3145 }, { "epoch": 5.321678321678322, "grad_norm": 0.5345974972781379, "learning_rate": 6.631197978943273e-06, "loss": 0.2717, "loss_nan_ranks": 0, "loss_rank_avg": 0.13525938987731934, "step": 1905, "valid_targets_mean": 4209.6, "valid_targets_min": 3261 }, { "epoch": 5.335664335664336, "grad_norm": 0.5454110327227513, "learning_rate": 6.527904105077243e-06, "loss": 0.2747, "loss_nan_ranks": 0, "loss_rank_avg": 0.13628993928432465, "step": 1910, "valid_targets_mean": 3950.0, "valid_targets_min": 3165 }, { "epoch": 5.34965034965035, "grad_norm": 0.5592856850404151, "learning_rate": 6.425263933598549e-06, "loss": 0.2733, "loss_nan_ranks": 0, "loss_rank_avg": 0.14988678693771362, "step": 1915, "valid_targets_mean": 4385.8, "valid_targets_min": 3143 }, { "epoch": 5.363636363636363, "grad_norm": 0.5382247504745173, "learning_rate": 6.323282444884826e-06, "loss": 0.2751, "loss_nan_ranks": 0, "loss_rank_avg": 0.12678101658821106, "step": 1920, "valid_targets_mean": 3971.8, "valid_targets_min": 3394 }, { "epoch": 5.3776223776223775, "grad_norm": 0.5598315390711681, "learning_rate": 6.221964587352653e-06, "loss": 0.2798, "loss_nan_ranks": 0, "loss_rank_avg": 0.14666910469532013, "step": 1925, "valid_targets_mean": 4419.0, "valid_targets_min": 3410 }, { "epoch": 5.391608391608392, "grad_norm": 0.5706554507694481, "learning_rate": 6.121315277217441e-06, "loss": 0.2642, "loss_nan_ranks": 0, "loss_rank_avg": 0.12421803176403046, "step": 1930, "valid_targets_mean": 3574.9, "valid_targets_min": 2831 }, { "epoch": 5.405594405594406, "grad_norm": 0.544445641577291, "learning_rate": 6.0213393982548555e-06, "loss": 0.2729, "loss_nan_ranks": 0, "loss_rank_avg": 0.13498976826667786, "step": 1935, "valid_targets_mean": 3970.4, "valid_targets_min": 2889 }, { "epoch": 5.41958041958042, "grad_norm": 0.5301613281703389, "learning_rate": 5.922041801563898e-06, "loss": 0.2712, "loss_nan_ranks": 0, "loss_rank_avg": 0.1342143714427948, "step": 1940, "valid_targets_mean": 4135.8, "valid_targets_min": 2934 }, { "epoch": 5.433566433566433, "grad_norm": 0.5854176960773179, "learning_rate": 5.823427305331461e-06, "loss": 0.2693, "loss_nan_ranks": 0, "loss_rank_avg": 0.1379438191652298, "step": 1945, "valid_targets_mean": 3838.6, "valid_targets_min": 3264 }, { "epoch": 5.4475524475524475, "grad_norm": 0.5436336083045578, "learning_rate": 5.72550069459858e-06, "loss": 0.2733, "loss_nan_ranks": 0, "loss_rank_avg": 0.11156716197729111, "step": 1950, "valid_targets_mean": 3711.8, "valid_targets_min": 2618 }, { "epoch": 5.461538461538462, "grad_norm": 0.5441899128462344, "learning_rate": 5.628266721028226e-06, "loss": 0.2718, "loss_nan_ranks": 0, "loss_rank_avg": 0.13238012790679932, "step": 1955, "valid_targets_mean": 4007.5, "valid_targets_min": 2541 }, { "epoch": 5.475524475524476, "grad_norm": 0.5509661923488149, "learning_rate": 5.5317301026747575e-06, "loss": 0.2701, "loss_nan_ranks": 0, "loss_rank_avg": 0.122523233294487, "step": 1960, "valid_targets_mean": 3560.5, "valid_targets_min": 2805 }, { "epoch": 5.489510489510489, "grad_norm": 0.5387065769733951, "learning_rate": 5.435895523754957e-06, "loss": 0.2714, "loss_nan_ranks": 0, "loss_rank_avg": 0.11541329324245453, "step": 1965, "valid_targets_mean": 3595.0, "valid_targets_min": 2677 }, { "epoch": 5.503496503496503, "grad_norm": 0.5689105133041431, "learning_rate": 5.340767634420794e-06, "loss": 0.2701, "loss_nan_ranks": 0, "loss_rank_avg": 0.14338526129722595, "step": 1970, "valid_targets_mean": 4151.4, "valid_targets_min": 3121 }, { "epoch": 5.5174825174825175, "grad_norm": 0.5512275132765089, "learning_rate": 5.24635105053372e-06, "loss": 0.2721, "loss_nan_ranks": 0, "loss_rank_avg": 0.12350888550281525, "step": 1975, "valid_targets_mean": 3481.2, "valid_targets_min": 1555 }, { "epoch": 5.531468531468532, "grad_norm": 0.5431318474984762, "learning_rate": 5.15265035344076e-06, "loss": 0.2709, "loss_nan_ranks": 0, "loss_rank_avg": 0.12556716799736023, "step": 1980, "valid_targets_mean": 4025.4, "valid_targets_min": 3229 }, { "epoch": 5.545454545454545, "grad_norm": 0.5234687553570591, "learning_rate": 5.059670089752166e-06, "loss": 0.2728, "loss_nan_ranks": 0, "loss_rank_avg": 0.13217531144618988, "step": 1985, "valid_targets_mean": 4185.8, "valid_targets_min": 3466 }, { "epoch": 5.559440559440559, "grad_norm": 0.5398701726946824, "learning_rate": 4.967414771120837e-06, "loss": 0.2746, "loss_nan_ranks": 0, "loss_rank_avg": 0.12115156650543213, "step": 1990, "valid_targets_mean": 3713.0, "valid_targets_min": 3033 }, { "epoch": 5.573426573426573, "grad_norm": 0.5233699630630368, "learning_rate": 4.875888874023358e-06, "loss": 0.2719, "loss_nan_ranks": 0, "loss_rank_avg": 0.13094103336334229, "step": 1995, "valid_targets_mean": 3981.8, "valid_targets_min": 2770 }, { "epoch": 5.5874125874125875, "grad_norm": 0.5586978374619914, "learning_rate": 4.78509683954284e-06, "loss": 0.2802, "loss_nan_ranks": 0, "loss_rank_avg": 0.12762457132339478, "step": 2000, "valid_targets_mean": 3771.9, "valid_targets_min": 2910 }, { "epoch": 5.601398601398602, "grad_norm": 0.5548139517512097, "learning_rate": 4.695043073153398e-06, "loss": 0.2645, "loss_nan_ranks": 0, "loss_rank_avg": 0.12287722527980804, "step": 2005, "valid_targets_mean": 3610.1, "valid_targets_min": 2849 }, { "epoch": 5.615384615384615, "grad_norm": 0.5591205721510735, "learning_rate": 4.605731944506377e-06, "loss": 0.2749, "loss_nan_ranks": 0, "loss_rank_avg": 0.14226460456848145, "step": 2010, "valid_targets_mean": 4057.4, "valid_targets_min": 3171 }, { "epoch": 5.629370629370629, "grad_norm": 0.5834660820002847, "learning_rate": 4.5171677872183506e-06, "loss": 0.266, "loss_nan_ranks": 0, "loss_rank_avg": 0.12792709469795227, "step": 2015, "valid_targets_mean": 3787.1, "valid_targets_min": 3070 }, { "epoch": 5.643356643356643, "grad_norm": 0.5665120389034493, "learning_rate": 4.429354898660829e-06, "loss": 0.2699, "loss_nan_ranks": 0, "loss_rank_avg": 0.12466013431549072, "step": 2020, "valid_targets_mean": 3776.8, "valid_targets_min": 2837 }, { "epoch": 5.6573426573426575, "grad_norm": 0.5403150986710322, "learning_rate": 4.3422975397517455e-06, "loss": 0.2601, "loss_nan_ranks": 0, "loss_rank_avg": 0.12709736824035645, "step": 2025, "valid_targets_mean": 3963.9, "valid_targets_min": 2884 }, { "epoch": 5.671328671328672, "grad_norm": 0.566316186869702, "learning_rate": 4.255999934748673e-06, "loss": 0.2729, "loss_nan_ranks": 0, "loss_rank_avg": 0.13081523776054382, "step": 2030, "valid_targets_mean": 3995.1, "valid_targets_min": 2928 }, { "epoch": 5.685314685314685, "grad_norm": 0.5427526585310279, "learning_rate": 4.1704662710439156e-06, "loss": 0.269, "loss_nan_ranks": 0, "loss_rank_avg": 0.1348644345998764, "step": 2035, "valid_targets_mean": 3812.4, "valid_targets_min": 2977 }, { "epoch": 5.699300699300699, "grad_norm": 0.543804002042524, "learning_rate": 4.085700698961252e-06, "loss": 0.2763, "loss_nan_ranks": 0, "loss_rank_avg": 0.1415868103504181, "step": 2040, "valid_targets_mean": 4173.8, "valid_targets_min": 3361 }, { "epoch": 5.713286713286713, "grad_norm": 0.5598561184404056, "learning_rate": 4.00170733155461e-06, "loss": 0.2662, "loss_nan_ranks": 0, "loss_rank_avg": 0.14282873272895813, "step": 2045, "valid_targets_mean": 4030.9, "valid_targets_min": 3101 }, { "epoch": 5.7272727272727275, "grad_norm": 0.5358578632226371, "learning_rate": 3.9184902444084575e-06, "loss": 0.2729, "loss_nan_ranks": 0, "loss_rank_avg": 0.16486287117004395, "step": 2050, "valid_targets_mean": 4656.9, "valid_targets_min": 3610 }, { "epoch": 5.741258741258742, "grad_norm": 0.5307718718193138, "learning_rate": 3.836053475440058e-06, "loss": 0.2702, "loss_nan_ranks": 0, "loss_rank_avg": 0.1442933827638626, "step": 2055, "valid_targets_mean": 4207.2, "valid_targets_min": 3318 }, { "epoch": 5.755244755244755, "grad_norm": 0.5732719806023284, "learning_rate": 3.7544010247035247e-06, "loss": 0.2698, "loss_nan_ranks": 0, "loss_rank_avg": 0.12907767295837402, "step": 2060, "valid_targets_mean": 3727.5, "valid_targets_min": 3049 }, { "epoch": 5.769230769230769, "grad_norm": 0.5276593358765387, "learning_rate": 3.6735368541957494e-06, "loss": 0.2675, "loss_nan_ranks": 0, "loss_rank_avg": 0.12089847773313522, "step": 2065, "valid_targets_mean": 3708.5, "valid_targets_min": 1757 }, { "epoch": 5.783216783216783, "grad_norm": 0.5519567254529144, "learning_rate": 3.5934648876641287e-06, "loss": 0.2671, "loss_nan_ranks": 0, "loss_rank_avg": 0.11965890228748322, "step": 2070, "valid_targets_mean": 3717.1, "valid_targets_min": 2343 }, { "epoch": 5.7972027972027975, "grad_norm": 0.5322446259091591, "learning_rate": 3.5141890104162e-06, "loss": 0.2717, "loss_nan_ranks": 0, "loss_rank_avg": 0.12779703736305237, "step": 2075, "valid_targets_mean": 4028.6, "valid_targets_min": 3267 }, { "epoch": 5.811188811188811, "grad_norm": 0.5334986962890785, "learning_rate": 3.4357130691311057e-06, "loss": 0.2792, "loss_nan_ranks": 0, "loss_rank_avg": 0.13747793436050415, "step": 2080, "valid_targets_mean": 4066.5, "valid_targets_min": 3061 }, { "epoch": 5.825174825174825, "grad_norm": 0.5476187220706545, "learning_rate": 3.3580408716729342e-06, "loss": 0.2763, "loss_nan_ranks": 0, "loss_rank_avg": 0.13098829984664917, "step": 2085, "valid_targets_mean": 4036.9, "valid_targets_min": 3111 }, { "epoch": 5.839160839160839, "grad_norm": 0.5266622418784924, "learning_rate": 3.2811761869059524e-06, "loss": 0.2755, "loss_nan_ranks": 0, "loss_rank_avg": 0.13867492973804474, "step": 2090, "valid_targets_mean": 4135.0, "valid_targets_min": 3449 }, { "epoch": 5.853146853146853, "grad_norm": 0.5437713900346135, "learning_rate": 3.205122744511746e-06, "loss": 0.2723, "loss_nan_ranks": 0, "loss_rank_avg": 0.11924904584884644, "step": 2095, "valid_targets_mean": 3806.0, "valid_targets_min": 2644 }, { "epoch": 5.867132867132867, "grad_norm": 0.5349444010988446, "learning_rate": 3.129884234808238e-06, "loss": 0.2695, "loss_nan_ranks": 0, "loss_rank_avg": 0.13737419247627258, "step": 2100, "valid_targets_mean": 3994.4, "valid_targets_min": 2756 }, { "epoch": 5.881118881118881, "grad_norm": 0.5553025674903422, "learning_rate": 3.0554643085706037e-06, "loss": 0.2644, "loss_nan_ranks": 0, "loss_rank_avg": 0.12544646859169006, "step": 2105, "valid_targets_mean": 4000.6, "valid_targets_min": 2457 }, { "epoch": 5.895104895104895, "grad_norm": 0.6817629822110741, "learning_rate": 2.981866576854164e-06, "loss": 0.2693, "loss_nan_ranks": 0, "loss_rank_avg": 0.12345971167087555, "step": 2110, "valid_targets_mean": 3586.5, "valid_targets_min": 992 }, { "epoch": 5.909090909090909, "grad_norm": 0.5590147260294989, "learning_rate": 2.909094610819134e-06, "loss": 0.2698, "loss_nan_ranks": 0, "loss_rank_avg": 0.1251392960548401, "step": 2115, "valid_targets_mean": 3674.2, "valid_targets_min": 2820 }, { "epoch": 5.923076923076923, "grad_norm": 0.557366303536245, "learning_rate": 2.8371519415573635e-06, "loss": 0.2653, "loss_nan_ranks": 0, "loss_rank_avg": 0.13488712906837463, "step": 2120, "valid_targets_mean": 3840.5, "valid_targets_min": 2575 }, { "epoch": 5.937062937062937, "grad_norm": 0.5544828872161118, "learning_rate": 2.7660420599209726e-06, "loss": 0.2711, "loss_nan_ranks": 0, "loss_rank_avg": 0.14264677464962006, "step": 2125, "valid_targets_mean": 4114.9, "valid_targets_min": 3232 }, { "epoch": 5.951048951048951, "grad_norm": 0.524463252308108, "learning_rate": 2.6957684163530017e-06, "loss": 0.2733, "loss_nan_ranks": 0, "loss_rank_avg": 0.1365211009979248, "step": 2130, "valid_targets_mean": 4064.0, "valid_targets_min": 3214 }, { "epoch": 5.965034965034965, "grad_norm": 0.5565069001442264, "learning_rate": 2.6263344207199446e-06, "loss": 0.2681, "loss_nan_ranks": 0, "loss_rank_avg": 0.12422733008861542, "step": 2135, "valid_targets_mean": 3880.2, "valid_targets_min": 3098 }, { "epoch": 5.979020979020979, "grad_norm": 0.5546179099722904, "learning_rate": 2.557743442146343e-06, "loss": 0.2713, "loss_nan_ranks": 0, "loss_rank_avg": 0.12977087497711182, "step": 2140, "valid_targets_mean": 3723.1, "valid_targets_min": 3051 }, { "epoch": 5.993006993006993, "grad_norm": 0.5418205062024356, "learning_rate": 2.489998808851255e-06, "loss": 0.2654, "loss_nan_ranks": 0, "loss_rank_avg": 0.12254351377487183, "step": 2145, "valid_targets_mean": 3896.9, "valid_targets_min": 3199 }, { "epoch": 6.0055944055944055, "grad_norm": 0.552957468149782, "learning_rate": 2.423103807986802e-06, "loss": 0.2756, "loss_nan_ranks": 0, "loss_rank_avg": 0.14880408346652985, "step": 2150, "valid_targets_mean": 4114.1, "valid_targets_min": 3250 }, { "epoch": 6.01958041958042, "grad_norm": 0.5492004448838709, "learning_rate": 2.3570616854786364e-06, "loss": 0.269, "loss_nan_ranks": 0, "loss_rank_avg": 0.14903533458709717, "step": 2155, "valid_targets_mean": 4255.5, "valid_targets_min": 2806 }, { "epoch": 6.033566433566434, "grad_norm": 0.571435073296382, "learning_rate": 2.291875645868471e-06, "loss": 0.2639, "loss_nan_ranks": 0, "loss_rank_avg": 0.13312679529190063, "step": 2160, "valid_targets_mean": 3886.4, "valid_targets_min": 3200 }, { "epoch": 6.047552447552447, "grad_norm": 0.5374462445034749, "learning_rate": 2.227548852158552e-06, "loss": 0.2663, "loss_nan_ranks": 0, "loss_rank_avg": 0.13961821794509888, "step": 2165, "valid_targets_mean": 4129.4, "valid_targets_min": 2926 }, { "epoch": 6.061538461538461, "grad_norm": 0.5438203346611344, "learning_rate": 2.1640844256582262e-06, "loss": 0.2679, "loss_nan_ranks": 0, "loss_rank_avg": 0.12883234024047852, "step": 2170, "valid_targets_mean": 4176.5, "valid_targets_min": 2854 }, { "epoch": 6.0755244755244755, "grad_norm": 0.5505312743429058, "learning_rate": 2.10148544583243e-06, "loss": 0.2658, "loss_nan_ranks": 0, "loss_rank_avg": 0.1427292674779892, "step": 2175, "valid_targets_mean": 4324.2, "valid_targets_min": 3172 }, { "epoch": 6.08951048951049, "grad_norm": 0.5667608880027779, "learning_rate": 2.039754950152313e-06, "loss": 0.2728, "loss_nan_ranks": 0, "loss_rank_avg": 0.1438840627670288, "step": 2180, "valid_targets_mean": 4165.2, "valid_targets_min": 3731 }, { "epoch": 6.103496503496504, "grad_norm": 0.5400616178402221, "learning_rate": 1.978895933947835e-06, "loss": 0.2698, "loss_nan_ranks": 0, "loss_rank_avg": 0.11797113716602325, "step": 2185, "valid_targets_mean": 3583.9, "valid_targets_min": 2896 }, { "epoch": 6.117482517482517, "grad_norm": 0.5821801793796898, "learning_rate": 1.918911350262411e-06, "loss": 0.2598, "loss_nan_ranks": 0, "loss_rank_avg": 0.1384894847869873, "step": 2190, "valid_targets_mean": 3854.6, "valid_targets_min": 3037 }, { "epoch": 6.131468531468531, "grad_norm": 0.5502620689863138, "learning_rate": 1.859804109709651e-06, "loss": 0.2668, "loss_nan_ranks": 0, "loss_rank_avg": 0.13089479506015778, "step": 2195, "valid_targets_mean": 4039.4, "valid_targets_min": 3085 }, { "epoch": 6.1454545454545455, "grad_norm": 0.5322934652272718, "learning_rate": 1.8015770803320997e-06, "loss": 0.265, "loss_nan_ranks": 0, "loss_rank_avg": 0.13279101252555847, "step": 2200, "valid_targets_mean": 4039.9, "valid_targets_min": 3100 }, { "epoch": 6.15944055944056, "grad_norm": 0.5632067853325199, "learning_rate": 1.744233087462095e-06, "loss": 0.2721, "loss_nan_ranks": 0, "loss_rank_avg": 0.13131995499134064, "step": 2205, "valid_targets_mean": 4003.1, "valid_targets_min": 2486 }, { "epoch": 6.173426573426573, "grad_norm": 0.553891728108681, "learning_rate": 1.6877749135846521e-06, "loss": 0.2651, "loss_nan_ranks": 0, "loss_rank_avg": 0.12857134640216827, "step": 2210, "valid_targets_mean": 4070.9, "valid_targets_min": 3413 }, { "epoch": 6.187412587412587, "grad_norm": 0.5454514353907892, "learning_rate": 1.6322052982024739e-06, "loss": 0.2686, "loss_nan_ranks": 0, "loss_rank_avg": 0.1367558240890503, "step": 2215, "valid_targets_mean": 4020.5, "valid_targets_min": 3350 }, { "epoch": 6.201398601398601, "grad_norm": 0.5198130923177857, "learning_rate": 1.577526937703e-06, "loss": 0.2679, "loss_nan_ranks": 0, "loss_rank_avg": 0.13713058829307556, "step": 2220, "valid_targets_mean": 4349.2, "valid_targets_min": 3645 }, { "epoch": 6.2153846153846155, "grad_norm": 0.5571955144273739, "learning_rate": 1.5237424852275905e-06, "loss": 0.2694, "loss_nan_ranks": 0, "loss_rank_avg": 0.13345694541931152, "step": 2225, "valid_targets_mean": 3902.6, "valid_targets_min": 3360 }, { "epoch": 6.22937062937063, "grad_norm": 0.5457269649577497, "learning_rate": 1.4708545505427796e-06, "loss": 0.2715, "loss_nan_ranks": 0, "loss_rank_avg": 0.13142569363117218, "step": 2230, "valid_targets_mean": 4007.6, "valid_targets_min": 2861 }, { "epoch": 6.243356643356643, "grad_norm": 0.5554927561165469, "learning_rate": 1.418865699913643e-06, "loss": 0.2698, "loss_nan_ranks": 0, "loss_rank_avg": 0.138395756483078, "step": 2235, "valid_targets_mean": 4075.2, "valid_targets_min": 3224 }, { "epoch": 6.257342657342657, "grad_norm": 0.5404990692630535, "learning_rate": 1.3677784559792672e-06, "loss": 0.2695, "loss_nan_ranks": 0, "loss_rank_avg": 0.11688215285539627, "step": 2240, "valid_targets_mean": 3285.1, "valid_targets_min": 1616 }, { "epoch": 6.271328671328671, "grad_norm": 0.531430730417053, "learning_rate": 1.3175952976303675e-06, "loss": 0.2649, "loss_nan_ranks": 0, "loss_rank_avg": 0.138889878988266, "step": 2245, "valid_targets_mean": 4228.5, "valid_targets_min": 2868 }, { "epoch": 6.2853146853146855, "grad_norm": 0.5253694275916504, "learning_rate": 1.268318659888974e-06, "loss": 0.2641, "loss_nan_ranks": 0, "loss_rank_avg": 0.129787415266037, "step": 2250, "valid_targets_mean": 4033.1, "valid_targets_min": 3074 }, { "epoch": 6.2993006993007, "grad_norm": 0.5497783467552961, "learning_rate": 1.2199509337903103e-06, "loss": 0.2646, "loss_nan_ranks": 0, "loss_rank_avg": 0.13305220007896423, "step": 2255, "valid_targets_mean": 3959.0, "valid_targets_min": 3331 }, { "epoch": 6.313286713286713, "grad_norm": 0.5448391159677953, "learning_rate": 1.172494466266747e-06, "loss": 0.2733, "loss_nan_ranks": 0, "loss_rank_avg": 0.14321637153625488, "step": 2260, "valid_targets_mean": 4348.4, "valid_targets_min": 3067 }, { "epoch": 6.327272727272727, "grad_norm": 0.5238073262215436, "learning_rate": 1.1259515600339465e-06, "loss": 0.263, "loss_nan_ranks": 0, "loss_rank_avg": 0.13795027136802673, "step": 2265, "valid_targets_mean": 4254.9, "valid_targets_min": 3826 }, { "epoch": 6.341258741258741, "grad_norm": 0.5784905306811993, "learning_rate": 1.0803244734790996e-06, "loss": 0.2637, "loss_nan_ranks": 0, "loss_rank_avg": 0.1256517767906189, "step": 2270, "valid_targets_mean": 4051.8, "valid_targets_min": 3172 }, { "epoch": 6.3552447552447555, "grad_norm": 0.5269490201207044, "learning_rate": 1.0356154205513724e-06, "loss": 0.2645, "loss_nan_ranks": 0, "loss_rank_avg": 0.13470402359962463, "step": 2275, "valid_targets_mean": 4274.8, "valid_targets_min": 3491 }, { "epoch": 6.36923076923077, "grad_norm": 0.538753432570666, "learning_rate": 9.918265706544617e-07, "loss": 0.2684, "loss_nan_ranks": 0, "loss_rank_avg": 0.12407108396291733, "step": 2280, "valid_targets_mean": 3773.9, "valid_targets_min": 2717 }, { "epoch": 6.383216783216783, "grad_norm": 0.5368620159533775, "learning_rate": 9.489600485413297e-07, "loss": 0.2725, "loss_nan_ranks": 0, "loss_rank_avg": 0.13677144050598145, "step": 2285, "valid_targets_mean": 4002.6, "valid_targets_min": 3199 }, { "epoch": 6.397202797202797, "grad_norm": 0.5426039569577848, "learning_rate": 9.070179342111163e-07, "loss": 0.2666, "loss_nan_ranks": 0, "loss_rank_avg": 0.13231492042541504, "step": 2290, "valid_targets_mean": 3965.5, "valid_targets_min": 3125 }, { "epoch": 6.411188811188811, "grad_norm": 0.5749418323529093, "learning_rate": 8.660022628082033e-07, "loss": 0.2745, "loss_nan_ranks": 0, "loss_rank_avg": 0.1334465742111206, "step": 2295, "valid_targets_mean": 3806.8, "valid_targets_min": 2858 }, { "epoch": 6.4251748251748255, "grad_norm": 0.5390735537017816, "learning_rate": 8.259150245234671e-07, "loss": 0.2642, "loss_nan_ranks": 0, "loss_rank_avg": 0.13433882594108582, "step": 2300, "valid_targets_mean": 4025.5, "valid_targets_min": 3216 }, { "epoch": 6.439160839160839, "grad_norm": 0.5333577613609506, "learning_rate": 7.867581644977029e-07, "loss": 0.2627, "loss_nan_ranks": 0, "loss_rank_avg": 0.12350418418645859, "step": 2305, "valid_targets_mean": 3507.1, "valid_targets_min": 1980 }, { "epoch": 6.453146853146853, "grad_norm": 0.5202092545268794, "learning_rate": 7.485335827272555e-07, "loss": 0.2615, "loss_nan_ranks": 0, "loss_rank_avg": 0.12309978902339935, "step": 2310, "valid_targets_mean": 3788.5, "valid_targets_min": 3092 }, { "epoch": 6.467132867132867, "grad_norm": 0.5503621055173982, "learning_rate": 7.11243133971804e-07, "loss": 0.2648, "loss_nan_ranks": 0, "loss_rank_avg": 0.11553049087524414, "step": 2315, "valid_targets_mean": 3731.9, "valid_targets_min": 3035 }, { "epoch": 6.481118881118881, "grad_norm": 0.5575585617975486, "learning_rate": 6.748886276643874e-07, "loss": 0.2592, "loss_nan_ranks": 0, "loss_rank_avg": 0.14074815809726715, "step": 2320, "valid_targets_mean": 3874.0, "valid_targets_min": 2985 }, { "epoch": 6.495104895104895, "grad_norm": 0.5620344157367718, "learning_rate": 6.394718278235923e-07, "loss": 0.2678, "loss_nan_ranks": 0, "loss_rank_avg": 0.13184072077274323, "step": 2325, "valid_targets_mean": 4030.6, "valid_targets_min": 3225 }, { "epoch": 6.509090909090909, "grad_norm": 0.5346237968435054, "learning_rate": 6.049944529679641e-07, "loss": 0.2651, "loss_nan_ranks": 0, "loss_rank_avg": 0.130045086145401, "step": 2330, "valid_targets_mean": 4099.5, "valid_targets_min": 3545 }, { "epoch": 6.523076923076923, "grad_norm": 0.5410265786955848, "learning_rate": 5.714581760326133e-07, "loss": 0.2699, "loss_nan_ranks": 0, "loss_rank_avg": 0.14165914058685303, "step": 2335, "valid_targets_mean": 4237.1, "valid_targets_min": 3637 }, { "epoch": 6.537062937062937, "grad_norm": 0.5346944294313545, "learning_rate": 5.388646242880446e-07, "loss": 0.2636, "loss_nan_ranks": 0, "loss_rank_avg": 0.14222806692123413, "step": 2340, "valid_targets_mean": 4174.2, "valid_targets_min": 3276 }, { "epoch": 6.551048951048951, "grad_norm": 0.5386590991971009, "learning_rate": 5.072153792611967e-07, "loss": 0.269, "loss_nan_ranks": 0, "loss_rank_avg": 0.147568941116333, "step": 2345, "valid_targets_mean": 4274.8, "valid_targets_min": 2955 }, { "epoch": 6.565034965034965, "grad_norm": 0.5331131790767714, "learning_rate": 4.765119766587023e-07, "loss": 0.2662, "loss_nan_ranks": 0, "loss_rank_avg": 0.13249267637729645, "step": 2350, "valid_targets_mean": 3939.5, "valid_targets_min": 3084 }, { "epoch": 6.579020979020979, "grad_norm": 0.5390748002008539, "learning_rate": 4.4675590629237543e-07, "loss": 0.2715, "loss_nan_ranks": 0, "loss_rank_avg": 0.14052125811576843, "step": 2355, "valid_targets_mean": 4178.8, "valid_targets_min": 3183 }, { "epoch": 6.593006993006993, "grad_norm": 0.5409625226651458, "learning_rate": 4.1794861200691317e-07, "loss": 0.26, "loss_nan_ranks": 0, "loss_rank_avg": 0.1431492567062378, "step": 2360, "valid_targets_mean": 4139.4, "valid_targets_min": 2476 }, { "epoch": 6.606993006993007, "grad_norm": 0.5264929203806954, "learning_rate": 3.9009149160984305e-07, "loss": 0.2641, "loss_nan_ranks": 0, "loss_rank_avg": 0.14085371792316437, "step": 2365, "valid_targets_mean": 4276.2, "valid_targets_min": 3364 }, { "epoch": 6.620979020979021, "grad_norm": 0.527456064000642, "learning_rate": 3.6318589680369276e-07, "loss": 0.2683, "loss_nan_ranks": 0, "loss_rank_avg": 0.12847009301185608, "step": 2370, "valid_targets_mean": 4058.8, "valid_targets_min": 3111 }, { "epoch": 6.634965034965035, "grad_norm": 0.535149286903409, "learning_rate": 3.3723313312040927e-07, "loss": 0.2656, "loss_nan_ranks": 0, "loss_rank_avg": 0.1440798044204712, "step": 2375, "valid_targets_mean": 4225.2, "valid_targets_min": 3191 }, { "epoch": 6.648951048951049, "grad_norm": 0.5572363983642188, "learning_rate": 3.1223445985800294e-07, "loss": 0.2656, "loss_nan_ranks": 0, "loss_rank_avg": 0.11801092326641083, "step": 2380, "valid_targets_mean": 3499.2, "valid_targets_min": 2751 }, { "epoch": 6.662937062937063, "grad_norm": 0.5441750509803539, "learning_rate": 2.88191090019454e-07, "loss": 0.2678, "loss_nan_ranks": 0, "loss_rank_avg": 0.14033466577529907, "step": 2385, "valid_targets_mean": 3916.5, "valid_targets_min": 3171 }, { "epoch": 6.676923076923077, "grad_norm": 0.5320750039573556, "learning_rate": 2.651041902538332e-07, "loss": 0.2602, "loss_nan_ranks": 0, "loss_rank_avg": 0.13491207361221313, "step": 2390, "valid_targets_mean": 3886.1, "valid_targets_min": 2984 }, { "epoch": 6.690909090909091, "grad_norm": 0.5549125626831143, "learning_rate": 2.429748807997201e-07, "loss": 0.275, "loss_nan_ranks": 0, "loss_rank_avg": 0.1348915696144104, "step": 2395, "valid_targets_mean": 3911.8, "valid_targets_min": 3358 }, { "epoch": 6.704895104895105, "grad_norm": 0.527522284417272, "learning_rate": 2.2180423543082253e-07, "loss": 0.2653, "loss_nan_ranks": 0, "loss_rank_avg": 0.14493533968925476, "step": 2400, "valid_targets_mean": 4229.1, "valid_targets_min": 3291 }, { "epoch": 6.718881118881119, "grad_norm": 0.5422104630142606, "learning_rate": 2.0159328140389346e-07, "loss": 0.2661, "loss_nan_ranks": 0, "loss_rank_avg": 0.1292484998703003, "step": 2405, "valid_targets_mean": 3970.4, "valid_targets_min": 3076 }, { "epoch": 6.732867132867133, "grad_norm": 0.5413238372734106, "learning_rate": 1.8234299940886434e-07, "loss": 0.2658, "loss_nan_ranks": 0, "loss_rank_avg": 0.14486880600452423, "step": 2410, "valid_targets_mean": 4450.5, "valid_targets_min": 3504 }, { "epoch": 6.746853146853147, "grad_norm": 0.5611932644120156, "learning_rate": 1.640543235212877e-07, "loss": 0.2629, "loss_nan_ranks": 0, "loss_rank_avg": 0.1367993950843811, "step": 2415, "valid_targets_mean": 3955.9, "valid_targets_min": 2917 }, { "epoch": 6.7608391608391605, "grad_norm": 0.5335539255184142, "learning_rate": 1.467281411569821e-07, "loss": 0.269, "loss_nan_ranks": 0, "loss_rank_avg": 0.1371554732322693, "step": 2420, "valid_targets_mean": 3969.2, "valid_targets_min": 3193 }, { "epoch": 6.774825174825175, "grad_norm": 0.5248690412448175, "learning_rate": 1.303652930289956e-07, "loss": 0.2622, "loss_nan_ranks": 0, "loss_rank_avg": 0.12784817814826965, "step": 2425, "valid_targets_mean": 4070.5, "valid_targets_min": 2717 }, { "epoch": 6.788811188811189, "grad_norm": 0.5406486609963523, "learning_rate": 1.1496657310680282e-07, "loss": 0.2701, "loss_nan_ranks": 0, "loss_rank_avg": 0.12824086844921112, "step": 2430, "valid_targets_mean": 3958.2, "valid_targets_min": 2986 }, { "epoch": 6.802797202797203, "grad_norm": 0.5626681678696616, "learning_rate": 1.0053272857777797e-07, "loss": 0.2721, "loss_nan_ranks": 0, "loss_rank_avg": 0.13599160313606262, "step": 2435, "valid_targets_mean": 4058.1, "valid_targets_min": 3411 }, { "epoch": 6.816783216783216, "grad_norm": 0.5283427444759582, "learning_rate": 8.706445981093937e-08, "loss": 0.2604, "loss_nan_ranks": 0, "loss_rank_avg": 0.1412394791841507, "step": 2440, "valid_targets_mean": 4395.0, "valid_targets_min": 3198 }, { "epoch": 6.8307692307692305, "grad_norm": 0.5438460618102079, "learning_rate": 7.45624203229789e-08, "loss": 0.2713, "loss_nan_ranks": 0, "loss_rank_avg": 0.13004207611083984, "step": 2445, "valid_targets_mean": 3813.1, "valid_targets_min": 2914 }, { "epoch": 6.844755244755245, "grad_norm": 0.5429046624514684, "learning_rate": 6.302721674652957e-08, "loss": 0.2603, "loss_nan_ranks": 0, "loss_rank_avg": 0.14121410250663757, "step": 2450, "valid_targets_mean": 3919.0, "valid_targets_min": 3132 }, { "epoch": 6.858741258741259, "grad_norm": 0.5441887195893792, "learning_rate": 5.2459408800744626e-08, "loss": 0.2624, "loss_nan_ranks": 0, "loss_rank_avg": 0.1358700692653656, "step": 2455, "valid_targets_mean": 3912.2, "valid_targets_min": 1919 }, { "epoch": 6.872727272727273, "grad_norm": 0.5288312054094918, "learning_rate": 4.285950926413929e-08, "loss": 0.2674, "loss_nan_ranks": 0, "loss_rank_avg": 0.11977240443229675, "step": 2460, "valid_targets_mean": 3656.6, "valid_targets_min": 3087 }, { "epoch": 6.886713286713286, "grad_norm": 0.5639120371598475, "learning_rate": 3.4227983949699506e-08, "loss": 0.2689, "loss_nan_ranks": 0, "loss_rank_avg": 0.12470817565917969, "step": 2465, "valid_targets_mean": 3575.1, "valid_targets_min": 2499 }, { "epoch": 6.9006993006993005, "grad_norm": 0.5268719788697689, "learning_rate": 2.656525168228674e-08, "loss": 0.2713, "loss_nan_ranks": 0, "loss_rank_avg": 0.13214604556560516, "step": 2470, "valid_targets_mean": 4328.4, "valid_targets_min": 3577 }, { "epoch": 6.914685314685315, "grad_norm": 0.5451610230902841, "learning_rate": 1.9871684278314207e-08, "loss": 0.2687, "loss_nan_ranks": 0, "loss_rank_avg": 0.15296560525894165, "step": 2475, "valid_targets_mean": 4383.1, "valid_targets_min": 3174 }, { "epoch": 6.928671328671329, "grad_norm": 0.5363716539542387, "learning_rate": 1.4147606527707969e-08, "loss": 0.2678, "loss_nan_ranks": 0, "loss_rank_avg": 0.14485123753547668, "step": 2480, "valid_targets_mean": 4384.8, "valid_targets_min": 3575 }, { "epoch": 6.942657342657343, "grad_norm": 0.552159799619343, "learning_rate": 9.393296178137334e-09, "loss": 0.2635, "loss_nan_ranks": 0, "loss_rank_avg": 0.141590878367424, "step": 2485, "valid_targets_mean": 3966.6, "valid_targets_min": 3358 }, { "epoch": 6.956643356643356, "grad_norm": 0.5470148363304371, "learning_rate": 5.6089839215522916e-09, "loss": 0.2609, "loss_nan_ranks": 0, "loss_rank_avg": 0.14139097929000854, "step": 2490, "valid_targets_mean": 4122.8, "valid_targets_min": 3202 }, { "epoch": 6.9706293706293705, "grad_norm": 0.5343799935594152, "learning_rate": 2.794853382976914e-09, "loss": 0.2668, "loss_nan_ranks": 0, "loss_rank_avg": 0.14910870790481567, "step": 2495, "valid_targets_mean": 4249.1, "valid_targets_min": 3352 }, { "epoch": 6.984615384615385, "grad_norm": 0.5388994673635329, "learning_rate": 9.510411116075978e-10, "loss": 0.2702, "loss_nan_ranks": 0, "loss_rank_avg": 0.12203998863697052, "step": 2500, "valid_targets_mean": 3729.9, "valid_targets_min": 2964 }, { "epoch": 6.998601398601399, "grad_norm": 0.5557446952727101, "learning_rate": 7.763657418280446e-11, "loss": 0.2672, "loss_nan_ranks": 0, "loss_rank_avg": 0.14027032256126404, "step": 2505, "valid_targets_mean": 3807.5, "valid_targets_min": 2712 }, { "epoch": 7.0, "loss_nan_ranks": 0, "loss_rank_avg": 0.26545536518096924, "step": 2506, "total_flos": 9.890960502181724e+17, "train_loss": 0.3139860285869714, "train_runtime": 19951.9657, "train_samples_per_second": 2.007, "train_steps_per_second": 0.126, "valid_targets_mean": 4094.0, "valid_targets_min": 3070 } ], "logging_steps": 5, "max_steps": 2506, "num_input_tokens_seen": 0, "num_train_epochs": 7, "save_steps": 1500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.890960502181724e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }