{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 7.0, "eval_steps": 500, "global_step": 1246, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02819548872180451, "grad_norm": 11.27667292176756, "learning_rate": 1.28e-06, "loss": 0.9036, "loss_nan_ranks": 0, "loss_rank_avg": 0.29199543595314026, "step": 5, "valid_targets_mean": 9628.2, "valid_targets_min": 1853 }, { "epoch": 0.05639097744360902, "grad_norm": 8.317911958432434, "learning_rate": 2.88e-06, "loss": 0.8814, "loss_nan_ranks": 0, "loss_rank_avg": 0.3067687451839447, "step": 10, "valid_targets_mean": 10587.8, "valid_targets_min": 6075 }, { "epoch": 0.08458646616541353, "grad_norm": 4.357377107906101, "learning_rate": 4.48e-06, "loss": 0.8293, "loss_nan_ranks": 0, "loss_rank_avg": 0.28829988837242126, "step": 15, "valid_targets_mean": 10112.8, "valid_targets_min": 4070 }, { "epoch": 0.11278195488721804, "grad_norm": 1.930567122176637, "learning_rate": 6.08e-06, "loss": 0.7689, "loss_nan_ranks": 0, "loss_rank_avg": 0.254660964012146, "step": 20, "valid_targets_mean": 9978.3, "valid_targets_min": 4467 }, { "epoch": 0.14097744360902256, "grad_norm": 1.4457544018909307, "learning_rate": 7.680000000000001e-06, "loss": 0.7462, "loss_nan_ranks": 0, "loss_rank_avg": 0.25300565361976624, "step": 25, "valid_targets_mean": 9490.9, "valid_targets_min": 3438 }, { "epoch": 0.16917293233082706, "grad_norm": 1.1826535917994672, "learning_rate": 9.280000000000001e-06, "loss": 0.7089, "loss_nan_ranks": 0, "loss_rank_avg": 0.22363917529582977, "step": 30, "valid_targets_mean": 9377.6, "valid_targets_min": 2854 }, { "epoch": 0.19736842105263158, "grad_norm": 0.735665641953498, "learning_rate": 1.0880000000000001e-05, "loss": 0.678, "loss_nan_ranks": 0, "loss_rank_avg": 0.2022905796766281, "step": 35, "valid_targets_mean": 8942.0, "valid_targets_min": 2253 }, { "epoch": 0.22556390977443608, "grad_norm": 0.6077005768579393, "learning_rate": 1.248e-05, "loss": 0.6568, "loss_nan_ranks": 0, "loss_rank_avg": 0.23237602412700653, "step": 40, "valid_targets_mean": 10732.1, "valid_targets_min": 4297 }, { "epoch": 0.25375939849624063, "grad_norm": 0.5156231779831542, "learning_rate": 1.408e-05, "loss": 0.6408, "loss_nan_ranks": 0, "loss_rank_avg": 0.20694395899772644, "step": 45, "valid_targets_mean": 9659.8, "valid_targets_min": 3259 }, { "epoch": 0.2819548872180451, "grad_norm": 0.37161737532708955, "learning_rate": 1.5680000000000002e-05, "loss": 0.6238, "loss_nan_ranks": 0, "loss_rank_avg": 0.209224671125412, "step": 50, "valid_targets_mean": 10202.5, "valid_targets_min": 5347 }, { "epoch": 0.3101503759398496, "grad_norm": 0.3417973572153747, "learning_rate": 1.728e-05, "loss": 0.6053, "loss_nan_ranks": 0, "loss_rank_avg": 0.2059696614742279, "step": 55, "valid_targets_mean": 9660.9, "valid_targets_min": 4023 }, { "epoch": 0.3383458646616541, "grad_norm": 0.31758682305547137, "learning_rate": 1.8880000000000002e-05, "loss": 0.5907, "loss_nan_ranks": 0, "loss_rank_avg": 0.20821066200733185, "step": 60, "valid_targets_mean": 9809.2, "valid_targets_min": 4066 }, { "epoch": 0.36654135338345867, "grad_norm": 0.2507740265564074, "learning_rate": 2.048e-05, "loss": 0.5641, "loss_nan_ranks": 0, "loss_rank_avg": 0.18758416175842285, "step": 65, "valid_targets_mean": 9209.8, "valid_targets_min": 3945 }, { "epoch": 0.39473684210526316, "grad_norm": 0.23718427206276319, "learning_rate": 2.2080000000000002e-05, "loss": 0.5576, "loss_nan_ranks": 0, "loss_rank_avg": 0.19377034902572632, "step": 70, "valid_targets_mean": 9540.7, "valid_targets_min": 5447 }, { "epoch": 0.42293233082706766, "grad_norm": 0.24872982035759736, "learning_rate": 2.368e-05, "loss": 0.5446, "loss_nan_ranks": 0, "loss_rank_avg": 0.18347474932670593, "step": 75, "valid_targets_mean": 9611.9, "valid_targets_min": 2717 }, { "epoch": 0.45112781954887216, "grad_norm": 0.21797546092426584, "learning_rate": 2.5280000000000002e-05, "loss": 0.5433, "loss_nan_ranks": 0, "loss_rank_avg": 0.18492883443832397, "step": 80, "valid_targets_mean": 9892.9, "valid_targets_min": 2107 }, { "epoch": 0.4793233082706767, "grad_norm": 0.2468075467899746, "learning_rate": 2.6880000000000004e-05, "loss": 0.5308, "loss_nan_ranks": 0, "loss_rank_avg": 0.1767086386680603, "step": 85, "valid_targets_mean": 9605.6, "valid_targets_min": 4655 }, { "epoch": 0.5075187969924813, "grad_norm": 0.22619547032239734, "learning_rate": 2.8480000000000002e-05, "loss": 0.5262, "loss_nan_ranks": 0, "loss_rank_avg": 0.15464217960834503, "step": 90, "valid_targets_mean": 8574.6, "valid_targets_min": 4031 }, { "epoch": 0.5357142857142857, "grad_norm": 0.23131634119706504, "learning_rate": 3.0080000000000003e-05, "loss": 0.5173, "loss_nan_ranks": 0, "loss_rank_avg": 0.16377687454223633, "step": 95, "valid_targets_mean": 9045.4, "valid_targets_min": 4294 }, { "epoch": 0.5639097744360902, "grad_norm": 0.2240444930912022, "learning_rate": 3.168e-05, "loss": 0.5054, "loss_nan_ranks": 0, "loss_rank_avg": 0.14757883548736572, "step": 100, "valid_targets_mean": 8858.5, "valid_targets_min": 2554 }, { "epoch": 0.5921052631578947, "grad_norm": 0.22516374944025624, "learning_rate": 3.328e-05, "loss": 0.4985, "loss_nan_ranks": 0, "loss_rank_avg": 0.1508697122335434, "step": 105, "valid_targets_mean": 8304.1, "valid_targets_min": 242 }, { "epoch": 0.6203007518796992, "grad_norm": 0.25125223132051416, "learning_rate": 3.4880000000000005e-05, "loss": 0.4935, "loss_nan_ranks": 0, "loss_rank_avg": 0.163669615983963, "step": 110, "valid_targets_mean": 9637.2, "valid_targets_min": 1700 }, { "epoch": 0.6484962406015038, "grad_norm": 0.2741917627559633, "learning_rate": 3.648e-05, "loss": 0.4863, "loss_nan_ranks": 0, "loss_rank_avg": 0.17761027812957764, "step": 115, "valid_targets_mean": 10324.5, "valid_targets_min": 5329 }, { "epoch": 0.6766917293233082, "grad_norm": 0.5530142596392317, "learning_rate": 3.808e-05, "loss": 0.4853, "loss_nan_ranks": 0, "loss_rank_avg": 0.16129770874977112, "step": 120, "valid_targets_mean": 9438.1, "valid_targets_min": 2586 }, { "epoch": 0.7048872180451128, "grad_norm": 0.2501311718678911, "learning_rate": 3.9680000000000006e-05, "loss": 0.4774, "loss_nan_ranks": 0, "loss_rank_avg": 0.16897393763065338, "step": 125, "valid_targets_mean": 9593.8, "valid_targets_min": 4187 }, { "epoch": 0.7330827067669173, "grad_norm": 0.2574918108729956, "learning_rate": 3.9998743380036454e-05, "loss": 0.4779, "loss_nan_ranks": 0, "loss_rank_avg": 0.14019961655139923, "step": 130, "valid_targets_mean": 8366.4, "valid_targets_min": 321 }, { "epoch": 0.7612781954887218, "grad_norm": 0.24407543075435603, "learning_rate": 3.999363863206902e-05, "loss": 0.4714, "loss_nan_ranks": 0, "loss_rank_avg": 0.15657010674476624, "step": 135, "valid_targets_mean": 9552.6, "valid_targets_min": 1814 }, { "epoch": 0.7894736842105263, "grad_norm": 0.2894802340984338, "learning_rate": 3.9984608218874315e-05, "loss": 0.4789, "loss_nan_ranks": 0, "loss_rank_avg": 0.1610604226589203, "step": 140, "valid_targets_mean": 9847.2, "valid_targets_min": 6162 }, { "epoch": 0.8176691729323309, "grad_norm": 0.2845790780592853, "learning_rate": 3.997165391353524e-05, "loss": 0.468, "loss_nan_ranks": 0, "loss_rank_avg": 0.1624469757080078, "step": 145, "valid_targets_mean": 10109.4, "valid_targets_min": 4090 }, { "epoch": 0.8458646616541353, "grad_norm": 0.2572172583431577, "learning_rate": 3.995477825957412e-05, "loss": 0.4555, "loss_nan_ranks": 0, "loss_rank_avg": 0.15889598429203033, "step": 150, "valid_targets_mean": 9374.6, "valid_targets_min": 2968 }, { "epoch": 0.8740601503759399, "grad_norm": 0.2612824650214882, "learning_rate": 3.9933984570453255e-05, "loss": 0.4551, "loss_nan_ranks": 0, "loss_rank_avg": 0.13706836104393005, "step": 155, "valid_targets_mean": 9366.4, "valid_targets_min": 4764 }, { "epoch": 0.9022556390977443, "grad_norm": 0.25358505200342496, "learning_rate": 3.990927692892435e-05, "loss": 0.4576, "loss_nan_ranks": 0, "loss_rank_avg": 0.14485809206962585, "step": 160, "valid_targets_mean": 9978.2, "valid_targets_min": 4852 }, { "epoch": 0.9304511278195489, "grad_norm": 0.22521949735204938, "learning_rate": 3.9880660186226905e-05, "loss": 0.4587, "loss_nan_ranks": 0, "loss_rank_avg": 0.1584489345550537, "step": 165, "valid_targets_mean": 9612.8, "valid_targets_min": 2895 }, { "epoch": 0.9586466165413534, "grad_norm": 0.25106154655786905, "learning_rate": 3.9848139961135647e-05, "loss": 0.4559, "loss_nan_ranks": 0, "loss_rank_avg": 0.15277701616287231, "step": 170, "valid_targets_mean": 10049.1, "valid_targets_min": 5349 }, { "epoch": 0.9868421052631579, "grad_norm": 0.2928102281837378, "learning_rate": 3.981172263885736e-05, "loss": 0.4576, "loss_nan_ranks": 0, "loss_rank_avg": 0.15941108763217926, "step": 175, "valid_targets_mean": 9364.9, "valid_targets_min": 3674 }, { "epoch": 1.0112781954887218, "grad_norm": 0.2718097968773888, "learning_rate": 3.977141536977713e-05, "loss": 0.4489, "loss_nan_ranks": 0, "loss_rank_avg": 0.14890170097351074, "step": 180, "valid_targets_mean": 9873.9, "valid_targets_min": 5207 }, { "epoch": 1.0394736842105263, "grad_norm": 0.27830371358135386, "learning_rate": 3.972722606805445e-05, "loss": 0.4386, "loss_nan_ranks": 0, "loss_rank_avg": 0.14617493748664856, "step": 185, "valid_targets_mean": 9708.8, "valid_targets_min": 2309 }, { "epoch": 1.0676691729323309, "grad_norm": 0.24888473802447664, "learning_rate": 3.967916341006925e-05, "loss": 0.4524, "loss_nan_ranks": 0, "loss_rank_avg": 0.16539250314235687, "step": 190, "valid_targets_mean": 9782.2, "valid_targets_min": 4150 }, { "epoch": 1.0958646616541354, "grad_norm": 0.2536978404456141, "learning_rate": 3.962723683271837e-05, "loss": 0.4476, "loss_nan_ranks": 0, "loss_rank_avg": 0.15597505867481232, "step": 195, "valid_targets_mean": 10103.2, "valid_targets_min": 6501 }, { "epoch": 1.1240601503759398, "grad_norm": 0.2712191334628464, "learning_rate": 3.957145653156265e-05, "loss": 0.4506, "loss_nan_ranks": 0, "loss_rank_avg": 0.1572057008743286, "step": 200, "valid_targets_mean": 10415.6, "valid_targets_min": 3634 }, { "epoch": 1.1522556390977443, "grad_norm": 0.26617835772104925, "learning_rate": 3.9511833458825076e-05, "loss": 0.4416, "loss_nan_ranks": 0, "loss_rank_avg": 0.15055301785469055, "step": 205, "valid_targets_mean": 9808.2, "valid_targets_min": 4399 }, { "epoch": 1.1804511278195489, "grad_norm": 0.26894103365631217, "learning_rate": 3.944837932124036e-05, "loss": 0.4442, "loss_nan_ranks": 0, "loss_rank_avg": 0.1498723179101944, "step": 210, "valid_targets_mean": 9847.2, "valid_targets_min": 5140 }, { "epoch": 1.2086466165413534, "grad_norm": 0.29824687817690704, "learning_rate": 3.9381106577756374e-05, "loss": 0.4487, "loss_nan_ranks": 0, "loss_rank_avg": 0.14083534479141235, "step": 215, "valid_targets_mean": 9044.2, "valid_targets_min": 3147 }, { "epoch": 1.236842105263158, "grad_norm": 0.2371578018995009, "learning_rate": 3.931002843708788e-05, "loss": 0.4455, "loss_nan_ranks": 0, "loss_rank_avg": 0.1637556552886963, "step": 220, "valid_targets_mean": 10195.7, "valid_targets_min": 5971 }, { "epoch": 1.2650375939849625, "grad_norm": 0.2641963474548977, "learning_rate": 3.923515885512307e-05, "loss": 0.4401, "loss_nan_ranks": 0, "loss_rank_avg": 0.13960033655166626, "step": 225, "valid_targets_mean": 9160.0, "valid_targets_min": 731 }, { "epoch": 1.2932330827067668, "grad_norm": 0.2407752036577108, "learning_rate": 3.9156512532183384e-05, "loss": 0.4401, "loss_nan_ranks": 0, "loss_rank_avg": 0.14932051301002502, "step": 230, "valid_targets_mean": 9700.4, "valid_targets_min": 5426 }, { "epoch": 1.3214285714285714, "grad_norm": 0.27828486846847406, "learning_rate": 3.907410491013714e-05, "loss": 0.4364, "loss_nan_ranks": 0, "loss_rank_avg": 0.14444586634635925, "step": 235, "valid_targets_mean": 9815.5, "valid_targets_min": 2947 }, { "epoch": 1.349624060150376, "grad_norm": 0.2776790733337406, "learning_rate": 3.898795216936763e-05, "loss": 0.436, "loss_nan_ranks": 0, "loss_rank_avg": 0.14298999309539795, "step": 240, "valid_targets_mean": 9667.4, "valid_targets_min": 312 }, { "epoch": 1.3778195488721805, "grad_norm": 0.2478141720344162, "learning_rate": 3.889807122559614e-05, "loss": 0.4335, "loss_nan_ranks": 0, "loss_rank_avg": 0.15552732348442078, "step": 245, "valid_targets_mean": 10064.5, "valid_targets_min": 1095 }, { "epoch": 1.4060150375939848, "grad_norm": 0.2881202815268935, "learning_rate": 3.8804479726560644e-05, "loss": 0.4383, "loss_nan_ranks": 0, "loss_rank_avg": 0.14759336411952972, "step": 250, "valid_targets_mean": 10201.8, "valid_targets_min": 298 }, { "epoch": 1.4342105263157894, "grad_norm": 0.2360033680760221, "learning_rate": 3.870719604855071e-05, "loss": 0.4279, "loss_nan_ranks": 0, "loss_rank_avg": 0.1278318464756012, "step": 255, "valid_targets_mean": 9452.0, "valid_targets_min": 3097 }, { "epoch": 1.462406015037594, "grad_norm": 0.2678985162825098, "learning_rate": 3.860623929279946e-05, "loss": 0.4301, "loss_nan_ranks": 0, "loss_rank_avg": 0.13829311728477478, "step": 260, "valid_targets_mean": 9852.8, "valid_targets_min": 2718 }, { "epoch": 1.4906015037593985, "grad_norm": 0.24111892462438012, "learning_rate": 3.850162928173303e-05, "loss": 0.4407, "loss_nan_ranks": 0, "loss_rank_avg": 0.13607658445835114, "step": 265, "valid_targets_mean": 9477.2, "valid_targets_min": 5643 }, { "epoch": 1.518796992481203, "grad_norm": 0.29578707903937657, "learning_rate": 3.839338655507861e-05, "loss": 0.4342, "loss_nan_ranks": 0, "loss_rank_avg": 0.13879016041755676, "step": 270, "valid_targets_mean": 9750.3, "valid_targets_min": 1886 }, { "epoch": 1.5469924812030076, "grad_norm": 0.2669302765773925, "learning_rate": 3.828153236583152e-05, "loss": 0.4401, "loss_nan_ranks": 0, "loss_rank_avg": 0.14289501309394836, "step": 275, "valid_targets_mean": 9182.9, "valid_targets_min": 1758 }, { "epoch": 1.5751879699248121, "grad_norm": 0.25183783099282825, "learning_rate": 3.816608867608227e-05, "loss": 0.4293, "loss_nan_ranks": 0, "loss_rank_avg": 0.1422910988330841, "step": 280, "valid_targets_mean": 9582.1, "valid_targets_min": 2641 }, { "epoch": 1.6033834586466167, "grad_norm": 0.25792139048892554, "learning_rate": 3.80470781527044e-05, "loss": 0.4347, "loss_nan_ranks": 0, "loss_rank_avg": 0.14068448543548584, "step": 285, "valid_targets_mean": 9446.0, "valid_targets_min": 1282 }, { "epoch": 1.631578947368421, "grad_norm": 0.22282061010532125, "learning_rate": 3.792452416290393e-05, "loss": 0.4354, "loss_nan_ranks": 0, "loss_rank_avg": 0.15323126316070557, "step": 290, "valid_targets_mean": 9909.8, "valid_targets_min": 3343 }, { "epoch": 1.6597744360902256, "grad_norm": 0.25037386293951636, "learning_rate": 3.779845076963136e-05, "loss": 0.4319, "loss_nan_ranks": 0, "loss_rank_avg": 0.1571081131696701, "step": 295, "valid_targets_mean": 10085.2, "valid_targets_min": 3558 }, { "epoch": 1.6879699248120301, "grad_norm": 0.2324474919615337, "learning_rate": 3.766888272685693e-05, "loss": 0.4255, "loss_nan_ranks": 0, "loss_rank_avg": 0.15331974625587463, "step": 300, "valid_targets_mean": 9570.4, "valid_targets_min": 3686 }, { "epoch": 1.7161654135338344, "grad_norm": 0.252366647005355, "learning_rate": 3.753584547471036e-05, "loss": 0.4271, "loss_nan_ranks": 0, "loss_rank_avg": 0.14724957942962646, "step": 305, "valid_targets_mean": 9747.0, "valid_targets_min": 2611 }, { "epoch": 1.744360902255639, "grad_norm": 0.22612296362484413, "learning_rate": 3.739936513448573e-05, "loss": 0.4202, "loss_nan_ranks": 0, "loss_rank_avg": 0.13358333706855774, "step": 310, "valid_targets_mean": 9892.2, "valid_targets_min": 3969 }, { "epoch": 1.7725563909774436, "grad_norm": 0.242275248738654, "learning_rate": 3.725946850351272e-05, "loss": 0.4274, "loss_nan_ranks": 0, "loss_rank_avg": 0.14728665351867676, "step": 315, "valid_targets_mean": 9580.7, "valid_targets_min": 5246 }, { "epoch": 1.800751879699248, "grad_norm": 0.2301098778262513, "learning_rate": 3.7116183049895054e-05, "loss": 0.4233, "loss_nan_ranks": 0, "loss_rank_avg": 0.14626328647136688, "step": 320, "valid_targets_mean": 9595.5, "valid_targets_min": 3599 }, { "epoch": 1.8289473684210527, "grad_norm": 0.255145468595074, "learning_rate": 3.6969536907117234e-05, "loss": 0.4139, "loss_nan_ranks": 0, "loss_rank_avg": 0.1345970779657364, "step": 325, "valid_targets_mean": 8776.2, "valid_targets_min": 354 }, { "epoch": 1.8571428571428572, "grad_norm": 0.2536680403230651, "learning_rate": 3.681955886852069e-05, "loss": 0.4206, "loss_nan_ranks": 0, "loss_rank_avg": 0.13223111629486084, "step": 330, "valid_targets_mean": 9203.2, "valid_targets_min": 3441 }, { "epoch": 1.8853383458646618, "grad_norm": 0.23460226413834012, "learning_rate": 3.66662783816503e-05, "loss": 0.4206, "loss_nan_ranks": 0, "loss_rank_avg": 0.15391016006469727, "step": 335, "valid_targets_mean": 10040.4, "valid_targets_min": 2118 }, { "epoch": 1.9135338345864663, "grad_norm": 0.2445266751763312, "learning_rate": 3.6509725542472516e-05, "loss": 0.4211, "loss_nan_ranks": 0, "loss_rank_avg": 0.14127874374389648, "step": 340, "valid_targets_mean": 9436.1, "valid_targets_min": 352 }, { "epoch": 1.9417293233082706, "grad_norm": 0.2671155341941523, "learning_rate": 3.6349931089466114e-05, "loss": 0.4154, "loss_nan_ranks": 0, "loss_rank_avg": 0.14088255167007446, "step": 345, "valid_targets_mean": 10083.5, "valid_targets_min": 1785 }, { "epoch": 1.9699248120300752, "grad_norm": 0.24327846719266866, "learning_rate": 3.6186926397586866e-05, "loss": 0.4224, "loss_nan_ranks": 0, "loss_rank_avg": 0.15060530602931976, "step": 350, "valid_targets_mean": 9973.8, "valid_targets_min": 3180 }, { "epoch": 1.9981203007518797, "grad_norm": 0.2515435073960223, "learning_rate": 3.602074347210717e-05, "loss": 0.4214, "loss_nan_ranks": 0, "loss_rank_avg": 0.1467684507369995, "step": 355, "valid_targets_mean": 10120.1, "valid_targets_min": 5849 }, { "epoch": 2.0225563909774436, "grad_norm": 0.27460864105225735, "learning_rate": 3.5851414942331986e-05, "loss": 0.4123, "loss_nan_ranks": 0, "loss_rank_avg": 0.1371781826019287, "step": 360, "valid_targets_mean": 9121.8, "valid_targets_min": 3424 }, { "epoch": 2.050751879699248, "grad_norm": 0.30961309307690715, "learning_rate": 3.5678974055192176e-05, "loss": 0.4121, "loss_nan_ranks": 0, "loss_rank_avg": 0.1294780969619751, "step": 365, "valid_targets_mean": 8944.1, "valid_targets_min": 2724 }, { "epoch": 2.0789473684210527, "grad_norm": 0.2381008546148629, "learning_rate": 3.550345466871662e-05, "loss": 0.4133, "loss_nan_ranks": 0, "loss_rank_avg": 0.1437140703201294, "step": 370, "valid_targets_mean": 9402.6, "valid_targets_min": 3163 }, { "epoch": 2.107142857142857, "grad_norm": 0.23784456731163375, "learning_rate": 3.5324891245384354e-05, "loss": 0.4109, "loss_nan_ranks": 0, "loss_rank_avg": 0.11915447562932968, "step": 375, "valid_targets_mean": 9617.0, "valid_targets_min": 303 }, { "epoch": 2.1353383458646618, "grad_norm": 0.236728125374762, "learning_rate": 3.5143318845358006e-05, "loss": 0.4197, "loss_nan_ranks": 0, "loss_rank_avg": 0.14139775931835175, "step": 380, "valid_targets_mean": 9871.7, "valid_targets_min": 5207 }, { "epoch": 2.1635338345864663, "grad_norm": 0.2504925152689405, "learning_rate": 3.4958773119599874e-05, "loss": 0.4108, "loss_nan_ranks": 0, "loss_rank_avg": 0.13846886157989502, "step": 385, "valid_targets_mean": 9910.7, "valid_targets_min": 2559 }, { "epoch": 2.191729323308271, "grad_norm": 0.25485707228406046, "learning_rate": 3.4771290302872e-05, "loss": 0.4062, "loss_nan_ranks": 0, "loss_rank_avg": 0.13879907131195068, "step": 390, "valid_targets_mean": 9667.2, "valid_targets_min": 5697 }, { "epoch": 2.219924812030075, "grad_norm": 0.2898577935562, "learning_rate": 3.458090720662167e-05, "loss": 0.4192, "loss_nan_ranks": 0, "loss_rank_avg": 0.14320047199726105, "step": 395, "valid_targets_mean": 9772.4, "valid_targets_min": 3686 }, { "epoch": 2.2481203007518795, "grad_norm": 0.2919595914258059, "learning_rate": 3.438766121175361e-05, "loss": 0.403, "loss_nan_ranks": 0, "loss_rank_avg": 0.13252058625221252, "step": 400, "valid_targets_mean": 9276.8, "valid_targets_min": 3180 }, { "epoch": 2.276315789473684, "grad_norm": 0.2500124988262427, "learning_rate": 3.4191590261290405e-05, "loss": 0.4067, "loss_nan_ranks": 0, "loss_rank_avg": 0.161848783493042, "step": 405, "valid_targets_mean": 10478.8, "valid_targets_min": 2726 }, { "epoch": 2.3045112781954886, "grad_norm": 0.2687590505702195, "learning_rate": 3.399273285292258e-05, "loss": 0.4129, "loss_nan_ranks": 0, "loss_rank_avg": 0.13902103900909424, "step": 410, "valid_targets_mean": 9363.7, "valid_targets_min": 3981 }, { "epoch": 2.332706766917293, "grad_norm": 0.22847314394479787, "learning_rate": 3.379112803144971e-05, "loss": 0.4145, "loss_nan_ranks": 0, "loss_rank_avg": 0.14047273993492126, "step": 415, "valid_targets_mean": 9385.0, "valid_targets_min": 1673 }, { "epoch": 2.3609022556390977, "grad_norm": 0.26353674089580303, "learning_rate": 3.358681538111415e-05, "loss": 0.4172, "loss_nan_ranks": 0, "loss_rank_avg": 0.1435493379831314, "step": 420, "valid_targets_mean": 9179.6, "valid_targets_min": 2170 }, { "epoch": 2.3890977443609023, "grad_norm": 0.24163690514511912, "learning_rate": 3.3379835017828855e-05, "loss": 0.4157, "loss_nan_ranks": 0, "loss_rank_avg": 0.1342683732509613, "step": 425, "valid_targets_mean": 9376.6, "valid_targets_min": 5213 }, { "epoch": 2.417293233082707, "grad_norm": 0.23583527410814714, "learning_rate": 3.317022758130078e-05, "loss": 0.4146, "loss_nan_ranks": 0, "loss_rank_avg": 0.1486685574054718, "step": 430, "valid_targets_mean": 9923.1, "valid_targets_min": 3855 }, { "epoch": 2.4454887218045114, "grad_norm": 0.23323844774390975, "learning_rate": 3.295803422705143e-05, "loss": 0.4137, "loss_nan_ranks": 0, "loss_rank_avg": 0.14215630292892456, "step": 435, "valid_targets_mean": 9026.8, "valid_targets_min": 1700 }, { "epoch": 2.473684210526316, "grad_norm": 0.29945443437999814, "learning_rate": 3.27432966183362e-05, "loss": 0.4132, "loss_nan_ranks": 0, "loss_rank_avg": 0.12867683172225952, "step": 440, "valid_targets_mean": 9211.3, "valid_targets_min": 4959 }, { "epoch": 2.5018796992481205, "grad_norm": 0.25272960687873497, "learning_rate": 3.25260569179639e-05, "loss": 0.4104, "loss_nan_ranks": 0, "loss_rank_avg": 0.1396941989660263, "step": 445, "valid_targets_mean": 9191.3, "valid_targets_min": 282 }, { "epoch": 2.530075187969925, "grad_norm": 0.2457426246972927, "learning_rate": 3.230635778001836e-05, "loss": 0.4017, "loss_nan_ranks": 0, "loss_rank_avg": 0.14244124293327332, "step": 450, "valid_targets_mean": 9817.1, "valid_targets_min": 2712 }, { "epoch": 2.5582706766917296, "grad_norm": 0.25887288298744265, "learning_rate": 3.208424234148338e-05, "loss": 0.3996, "loss_nan_ranks": 0, "loss_rank_avg": 0.1393296867609024, "step": 455, "valid_targets_mean": 9903.1, "valid_targets_min": 3147 }, { "epoch": 2.5864661654135337, "grad_norm": 0.21877812272153066, "learning_rate": 3.185975421377307e-05, "loss": 0.4088, "loss_nan_ranks": 0, "loss_rank_avg": 0.12278774380683899, "step": 460, "valid_targets_mean": 8609.7, "valid_targets_min": 274 }, { "epoch": 2.6146616541353382, "grad_norm": 0.23735497326800922, "learning_rate": 3.1632937474168855e-05, "loss": 0.4118, "loss_nan_ranks": 0, "loss_rank_avg": 0.13202975690364838, "step": 465, "valid_targets_mean": 10006.2, "valid_targets_min": 3324 }, { "epoch": 2.642857142857143, "grad_norm": 0.22193176122691816, "learning_rate": 3.140383665716512e-05, "loss": 0.396, "loss_nan_ranks": 0, "loss_rank_avg": 0.1347397416830063, "step": 470, "valid_targets_mean": 8921.9, "valid_targets_min": 3634 }, { "epoch": 2.6710526315789473, "grad_norm": 0.23139797768847337, "learning_rate": 3.1172496745725085e-05, "loss": 0.409, "loss_nan_ranks": 0, "loss_rank_avg": 0.13778147101402283, "step": 475, "valid_targets_mean": 9567.4, "valid_targets_min": 4070 }, { "epoch": 2.699248120300752, "grad_norm": 0.21530585156050375, "learning_rate": 3.093896316244855e-05, "loss": 0.4046, "loss_nan_ranks": 0, "loss_rank_avg": 0.1352767050266266, "step": 480, "valid_targets_mean": 10076.9, "valid_targets_min": 4043 }, { "epoch": 2.7274436090225564, "grad_norm": 0.2582975552308644, "learning_rate": 3.0703281760653336e-05, "loss": 0.4083, "loss_nan_ranks": 0, "loss_rank_avg": 0.1417762041091919, "step": 485, "valid_targets_mean": 9484.2, "valid_targets_min": 3106 }, { "epoch": 2.755639097744361, "grad_norm": 0.25989182642732045, "learning_rate": 3.0465498815372285e-05, "loss": 0.4031, "loss_nan_ranks": 0, "loss_rank_avg": 0.12462268769741058, "step": 490, "valid_targets_mean": 9462.6, "valid_targets_min": 1853 }, { "epoch": 2.7838345864661656, "grad_norm": 0.22331231185945719, "learning_rate": 3.0225661014267255e-05, "loss": 0.4025, "loss_nan_ranks": 0, "loss_rank_avg": 0.14205169677734375, "step": 495, "valid_targets_mean": 9931.2, "valid_targets_min": 4449 }, { "epoch": 2.8120300751879697, "grad_norm": 0.237834758537334, "learning_rate": 2.9983815448462245e-05, "loss": 0.4072, "loss_nan_ranks": 0, "loss_rank_avg": 0.1147095113992691, "step": 500, "valid_targets_mean": 8174.8, "valid_targets_min": 312 }, { "epoch": 2.840225563909774, "grad_norm": 0.23790363756246555, "learning_rate": 2.9740009603297236e-05, "loss": 0.4127, "loss_nan_ranks": 0, "loss_rank_avg": 0.13740326464176178, "step": 505, "valid_targets_mean": 9327.2, "valid_targets_min": 4517 }, { "epoch": 2.8684210526315788, "grad_norm": 0.2658433889855018, "learning_rate": 2.949429134900468e-05, "loss": 0.4017, "loss_nan_ranks": 0, "loss_rank_avg": 0.15330830216407776, "step": 510, "valid_targets_mean": 10222.3, "valid_targets_min": 4176 }, { "epoch": 2.8966165413533833, "grad_norm": 0.2129277840732677, "learning_rate": 2.924670893131033e-05, "loss": 0.4039, "loss_nan_ranks": 0, "loss_rank_avg": 0.1338801383972168, "step": 515, "valid_targets_mean": 9623.5, "valid_targets_min": 5138 }, { "epoch": 2.924812030075188, "grad_norm": 0.23260012738615085, "learning_rate": 2.8997310961960456e-05, "loss": 0.4065, "loss_nan_ranks": 0, "loss_rank_avg": 0.12857681512832642, "step": 520, "valid_targets_mean": 9521.6, "valid_targets_min": 2854 }, { "epoch": 2.9530075187969924, "grad_norm": 0.23948753412160725, "learning_rate": 2.8746146409177112e-05, "loss": 0.4017, "loss_nan_ranks": 0, "loss_rank_avg": 0.13795359432697296, "step": 525, "valid_targets_mean": 9622.8, "valid_targets_min": 1144 }, { "epoch": 2.981203007518797, "grad_norm": 0.2434249410932302, "learning_rate": 2.849326458804341e-05, "loss": 0.4079, "loss_nan_ranks": 0, "loss_rank_avg": 0.13328373432159424, "step": 530, "valid_targets_mean": 9911.9, "valid_targets_min": 4039 }, { "epoch": 3.005639097744361, "grad_norm": 0.26324430203522414, "learning_rate": 2.8238715150820764e-05, "loss": 0.3958, "loss_nan_ranks": 0, "loss_rank_avg": 0.11944051831960678, "step": 535, "valid_targets_mean": 9227.2, "valid_targets_min": 4837 }, { "epoch": 3.0338345864661656, "grad_norm": 0.23159723969729487, "learning_rate": 2.7982548077199853e-05, "loss": 0.3961, "loss_nan_ranks": 0, "loss_rank_avg": 0.13192662596702576, "step": 540, "valid_targets_mean": 9961.9, "valid_targets_min": 5917 }, { "epoch": 3.06203007518797, "grad_norm": 0.23925085896994178, "learning_rate": 2.7724813664487323e-05, "loss": 0.4004, "loss_nan_ranks": 0, "loss_rank_avg": 0.13217054307460785, "step": 545, "valid_targets_mean": 10060.8, "valid_targets_min": 4260 }, { "epoch": 3.090225563909774, "grad_norm": 0.23759165570511362, "learning_rate": 2.7465562517730132e-05, "loss": 0.3946, "loss_nan_ranks": 0, "loss_rank_avg": 0.14225870370864868, "step": 550, "valid_targets_mean": 10172.3, "valid_targets_min": 2940 }, { "epoch": 3.1184210526315788, "grad_norm": 0.23673756939271293, "learning_rate": 2.7204845539779468e-05, "loss": 0.4026, "loss_nan_ranks": 0, "loss_rank_avg": 0.14777497947216034, "step": 555, "valid_targets_mean": 9553.6, "valid_targets_min": 2366 }, { "epoch": 3.1466165413533833, "grad_norm": 0.25165539406420895, "learning_rate": 2.6942713921296186e-05, "loss": 0.3974, "loss_nan_ranks": 0, "loss_rank_avg": 0.13649114966392517, "step": 560, "valid_targets_mean": 10204.3, "valid_targets_min": 5230 }, { "epoch": 3.174812030075188, "grad_norm": 0.23177453783787236, "learning_rate": 2.6679219130699747e-05, "loss": 0.4, "loss_nan_ranks": 0, "loss_rank_avg": 0.1317581832408905, "step": 565, "valid_targets_mean": 9459.1, "valid_targets_min": 3125 }, { "epoch": 3.2030075187969924, "grad_norm": 0.2439671619550467, "learning_rate": 2.641441290406261e-05, "loss": 0.4012, "loss_nan_ranks": 0, "loss_rank_avg": 0.13911797106266022, "step": 570, "valid_targets_mean": 9394.8, "valid_targets_min": 4771 }, { "epoch": 3.231203007518797, "grad_norm": 0.24705170121255965, "learning_rate": 2.614834723495208e-05, "loss": 0.3962, "loss_nan_ranks": 0, "loss_rank_avg": 0.1319943219423294, "step": 575, "valid_targets_mean": 10358.8, "valid_targets_min": 5759 }, { "epoch": 3.2593984962406015, "grad_norm": 0.2431113202848758, "learning_rate": 2.5881074364221593e-05, "loss": 0.3926, "loss_nan_ranks": 0, "loss_rank_avg": 0.13881921768188477, "step": 580, "valid_targets_mean": 9148.1, "valid_targets_min": 3053 }, { "epoch": 3.287593984962406, "grad_norm": 0.22917367826880933, "learning_rate": 2.5612646769753454e-05, "loss": 0.3984, "loss_nan_ranks": 0, "loss_rank_avg": 0.12870250642299652, "step": 585, "valid_targets_mean": 9295.9, "valid_targets_min": 264 }, { "epoch": 3.3157894736842106, "grad_norm": 0.23874522846122448, "learning_rate": 2.5343117156154994e-05, "loss": 0.395, "loss_nan_ranks": 0, "loss_rank_avg": 0.13996782898902893, "step": 590, "valid_targets_mean": 9605.2, "valid_targets_min": 2517 }, { "epoch": 3.343984962406015, "grad_norm": 0.22969310162018472, "learning_rate": 2.507253844441031e-05, "loss": 0.3913, "loss_nan_ranks": 0, "loss_rank_avg": 0.1319817304611206, "step": 595, "valid_targets_mean": 9670.2, "valid_targets_min": 2436 }, { "epoch": 3.3721804511278197, "grad_norm": 0.22621680783522202, "learning_rate": 2.480096376148941e-05, "loss": 0.4003, "loss_nan_ranks": 0, "loss_rank_avg": 0.12083794176578522, "step": 600, "valid_targets_mean": 9294.9, "valid_targets_min": 2342 }, { "epoch": 3.4003759398496243, "grad_norm": 0.23285299892205277, "learning_rate": 2.4528446429916973e-05, "loss": 0.3988, "loss_nan_ranks": 0, "loss_rank_avg": 0.1262476146221161, "step": 605, "valid_targets_mean": 8696.2, "valid_targets_min": 306 }, { "epoch": 3.4285714285714284, "grad_norm": 0.23236710961218587, "learning_rate": 2.4255039957302692e-05, "loss": 0.3978, "loss_nan_ranks": 0, "loss_rank_avg": 0.1298486590385437, "step": 610, "valid_targets_mean": 9753.8, "valid_targets_min": 2766 }, { "epoch": 3.456766917293233, "grad_norm": 0.23202738403502496, "learning_rate": 2.3980798025835298e-05, "loss": 0.3966, "loss_nan_ranks": 0, "loss_rank_avg": 0.12037086486816406, "step": 615, "valid_targets_mean": 8639.8, "valid_targets_min": 3035 }, { "epoch": 3.4849624060150375, "grad_norm": 0.22584889839089625, "learning_rate": 2.370577448174229e-05, "loss": 0.3954, "loss_nan_ranks": 0, "loss_rank_avg": 0.13931915163993835, "step": 620, "valid_targets_mean": 10037.7, "valid_targets_min": 3089 }, { "epoch": 3.513157894736842, "grad_norm": 0.22178066562941928, "learning_rate": 2.3430023324717443e-05, "loss": 0.3894, "loss_nan_ranks": 0, "loss_rank_avg": 0.12872684001922607, "step": 625, "valid_targets_mean": 9377.9, "valid_targets_min": 1095 }, { "epoch": 3.5413533834586466, "grad_norm": 0.2568058966490674, "learning_rate": 2.3153598697318237e-05, "loss": 0.4027, "loss_nan_ranks": 0, "loss_rank_avg": 0.1503409445285797, "step": 630, "valid_targets_mean": 10004.6, "valid_targets_min": 3068 }, { "epoch": 3.569548872180451, "grad_norm": 0.2118375304003317, "learning_rate": 2.2876554874335124e-05, "loss": 0.3926, "loss_nan_ranks": 0, "loss_rank_avg": 0.13170304894447327, "step": 635, "valid_targets_mean": 9806.5, "valid_targets_min": 4963 }, { "epoch": 3.5977443609022557, "grad_norm": 0.22824112120822312, "learning_rate": 2.2598946252135017e-05, "loss": 0.4018, "loss_nan_ranks": 0, "loss_rank_avg": 0.13064341247081757, "step": 640, "valid_targets_mean": 9978.8, "valid_targets_min": 4497 }, { "epoch": 3.6259398496240602, "grad_norm": 0.22511608795603558, "learning_rate": 2.2320827337980676e-05, "loss": 0.3914, "loss_nan_ranks": 0, "loss_rank_avg": 0.12164842337369919, "step": 645, "valid_targets_mean": 9501.5, "valid_targets_min": 5364 }, { "epoch": 3.654135338345865, "grad_norm": 0.21962275837845524, "learning_rate": 2.204225273932855e-05, "loss": 0.3946, "loss_nan_ranks": 0, "loss_rank_avg": 0.12767906486988068, "step": 650, "valid_targets_mean": 9994.6, "valid_targets_min": 5580 }, { "epoch": 3.682330827067669, "grad_norm": 0.22326981379224142, "learning_rate": 2.176327715310673e-05, "loss": 0.3916, "loss_nan_ranks": 0, "loss_rank_avg": 0.13035039603710175, "step": 655, "valid_targets_mean": 9763.4, "valid_targets_min": 1826 }, { "epoch": 3.7105263157894735, "grad_norm": 0.22785142761097762, "learning_rate": 2.1483955354975557e-05, "loss": 0.3968, "loss_nan_ranks": 0, "loss_rank_avg": 0.13587889075279236, "step": 660, "valid_targets_mean": 9339.4, "valid_targets_min": 4732 }, { "epoch": 3.738721804511278, "grad_norm": 0.2338735231717096, "learning_rate": 2.120434218857254e-05, "loss": 0.3949, "loss_nan_ranks": 0, "loss_rank_avg": 0.14184817671775818, "step": 665, "valid_targets_mean": 9305.5, "valid_targets_min": 3570 }, { "epoch": 3.7669172932330826, "grad_norm": 0.23510291425793847, "learning_rate": 2.0924492554744145e-05, "loss": 0.3946, "loss_nan_ranks": 0, "loss_rank_avg": 0.1337651014328003, "step": 670, "valid_targets_mean": 8838.2, "valid_targets_min": 1853 }, { "epoch": 3.795112781954887, "grad_norm": 0.22779287321668726, "learning_rate": 2.0644461400766174e-05, "loss": 0.3958, "loss_nan_ranks": 0, "loss_rank_avg": 0.14175552129745483, "step": 675, "valid_targets_mean": 10304.8, "valid_targets_min": 5382 }, { "epoch": 3.8233082706766917, "grad_norm": 0.22017841967874427, "learning_rate": 2.036430370955514e-05, "loss": 0.3893, "loss_nan_ranks": 0, "loss_rank_avg": 0.13052602112293243, "step": 680, "valid_targets_mean": 9796.6, "valid_targets_min": 5183 }, { "epoch": 3.851503759398496, "grad_norm": 0.24234384303043424, "learning_rate": 2.0084074488872606e-05, "loss": 0.3942, "loss_nan_ranks": 0, "loss_rank_avg": 0.13533687591552734, "step": 685, "valid_targets_mean": 10036.1, "valid_targets_min": 5742 }, { "epoch": 3.8796992481203008, "grad_norm": 0.23247406840250695, "learning_rate": 1.9803828760524627e-05, "loss": 0.3928, "loss_nan_ranks": 0, "loss_rank_avg": 0.13567492365837097, "step": 690, "valid_targets_mean": 9282.2, "valid_targets_min": 2939 }, { "epoch": 3.9078947368421053, "grad_norm": 0.2217230300549226, "learning_rate": 1.952362154955848e-05, "loss": 0.3918, "loss_nan_ranks": 0, "loss_rank_avg": 0.13407818973064423, "step": 695, "valid_targets_mean": 10740.7, "valid_targets_min": 5872 }, { "epoch": 3.93609022556391, "grad_norm": 0.2103692494148384, "learning_rate": 1.9243507873458717e-05, "loss": 0.3943, "loss_nan_ranks": 0, "loss_rank_avg": 0.12526118755340576, "step": 700, "valid_targets_mean": 9699.6, "valid_targets_min": 3534 }, { "epoch": 3.9642857142857144, "grad_norm": 0.21604893038726108, "learning_rate": 1.896354273134472e-05, "loss": 0.3905, "loss_nan_ranks": 0, "loss_rank_avg": 0.13225769996643066, "step": 705, "valid_targets_mean": 10412.7, "valid_targets_min": 5378 }, { "epoch": 3.992481203007519, "grad_norm": 0.23178482273025192, "learning_rate": 1.8683781093171846e-05, "loss": 0.3884, "loss_nan_ranks": 0, "loss_rank_avg": 0.12075187265872955, "step": 710, "valid_targets_mean": 8626.9, "valid_targets_min": 4149 }, { "epoch": 4.0169172932330826, "grad_norm": 0.21899831151451, "learning_rate": 1.8404277888938337e-05, "loss": 0.3899, "loss_nan_ranks": 0, "loss_rank_avg": 0.12334593385457993, "step": 715, "valid_targets_mean": 9348.6, "valid_targets_min": 3053 }, { "epoch": 4.045112781954887, "grad_norm": 0.22782795872601927, "learning_rate": 1.8125087997900054e-05, "loss": 0.386, "loss_nan_ranks": 0, "loss_rank_avg": 0.13175661861896515, "step": 720, "valid_targets_mean": 9545.6, "valid_targets_min": 3147 }, { "epoch": 4.073308270676692, "grad_norm": 0.2215031525141966, "learning_rate": 1.784626623779512e-05, "loss": 0.3907, "loss_nan_ranks": 0, "loss_rank_avg": 0.12877237796783447, "step": 725, "valid_targets_mean": 9193.4, "valid_targets_min": 3835 }, { "epoch": 4.101503759398496, "grad_norm": 0.21240434399860283, "learning_rate": 1.7567867354080766e-05, "loss": 0.3793, "loss_nan_ranks": 0, "loss_rank_avg": 0.11605602502822876, "step": 730, "valid_targets_mean": 9114.1, "valid_targets_min": 3085 }, { "epoch": 4.129699248120301, "grad_norm": 0.20674610974143964, "learning_rate": 1.7289946009184217e-05, "loss": 0.3889, "loss_nan_ranks": 0, "loss_rank_avg": 0.11780436336994171, "step": 735, "valid_targets_mean": 9190.0, "valid_targets_min": 1095 }, { "epoch": 4.157894736842105, "grad_norm": 0.2151999993530917, "learning_rate": 1.701255677177004e-05, "loss": 0.3857, "loss_nan_ranks": 0, "loss_rank_avg": 0.13381031155586243, "step": 740, "valid_targets_mean": 10586.0, "valid_targets_min": 4279 }, { "epoch": 4.18609022556391, "grad_norm": 0.24136733794847645, "learning_rate": 1.6735754106025747e-05, "loss": 0.3856, "loss_nan_ranks": 0, "loss_rank_avg": 0.14070633053779602, "step": 745, "valid_targets_mean": 9712.8, "valid_targets_min": 4449 }, { "epoch": 4.214285714285714, "grad_norm": 0.2689899308894728, "learning_rate": 1.6459592360968036e-05, "loss": 0.3837, "loss_nan_ranks": 0, "loss_rank_avg": 0.13649453222751617, "step": 750, "valid_targets_mean": 9510.1, "valid_targets_min": 4483 }, { "epoch": 4.242481203007519, "grad_norm": 0.21065172047325978, "learning_rate": 1.618412575977156e-05, "loss": 0.3798, "loss_nan_ranks": 0, "loss_rank_avg": 0.11696235090494156, "step": 755, "valid_targets_mean": 9127.5, "valid_targets_min": 4573 }, { "epoch": 4.2706766917293235, "grad_norm": 0.22547082757713885, "learning_rate": 1.5909408389122473e-05, "loss": 0.39, "loss_nan_ranks": 0, "loss_rank_avg": 0.12373727560043335, "step": 760, "valid_targets_mean": 8933.2, "valid_targets_min": 3761 }, { "epoch": 4.298872180451128, "grad_norm": 0.2224972608121181, "learning_rate": 1.5635494188598713e-05, "loss": 0.3892, "loss_nan_ranks": 0, "loss_rank_avg": 0.12517967820167542, "step": 765, "valid_targets_mean": 8647.8, "valid_targets_min": 3420 }, { "epoch": 4.327067669172933, "grad_norm": 0.21014542918768556, "learning_rate": 1.5362436940079194e-05, "loss": 0.3841, "loss_nan_ranks": 0, "loss_rank_avg": 0.12938112020492554, "step": 770, "valid_targets_mean": 10605.2, "valid_targets_min": 3102 }, { "epoch": 4.355263157894737, "grad_norm": 0.22326364891523268, "learning_rate": 1.5090290257184019e-05, "loss": 0.3938, "loss_nan_ranks": 0, "loss_rank_avg": 0.12240595370531082, "step": 775, "valid_targets_mean": 9136.8, "valid_targets_min": 3968 }, { "epoch": 4.383458646616542, "grad_norm": 0.2290266501731758, "learning_rate": 1.481910757474759e-05, "loss": 0.3869, "loss_nan_ranks": 0, "loss_rank_avg": 0.11885623633861542, "step": 780, "valid_targets_mean": 8942.6, "valid_targets_min": 328 }, { "epoch": 4.411654135338346, "grad_norm": 0.20583422703018228, "learning_rate": 1.4548942138326978e-05, "loss": 0.3875, "loss_nan_ranks": 0, "loss_rank_avg": 0.12395472824573517, "step": 785, "valid_targets_mean": 8851.2, "valid_targets_min": 758 }, { "epoch": 4.43984962406015, "grad_norm": 0.21949993262902937, "learning_rate": 1.4279846993747342e-05, "loss": 0.3859, "loss_nan_ranks": 0, "loss_rank_avg": 0.12866610288619995, "step": 790, "valid_targets_mean": 9402.3, "valid_targets_min": 2886 }, { "epoch": 4.4680451127819545, "grad_norm": 0.1981370750992788, "learning_rate": 1.4011874976686648e-05, "loss": 0.3842, "loss_nan_ranks": 0, "loss_rank_avg": 0.12139122933149338, "step": 795, "valid_targets_mean": 10267.0, "valid_targets_min": 3276 }, { "epoch": 4.496240601503759, "grad_norm": 0.22529503663433614, "learning_rate": 1.3745078702301569e-05, "loss": 0.3855, "loss_nan_ranks": 0, "loss_rank_avg": 0.12190576642751694, "step": 800, "valid_targets_mean": 9045.6, "valid_targets_min": 283 }, { "epoch": 4.524436090225564, "grad_norm": 0.21131753037367373, "learning_rate": 1.3479510554896762e-05, "loss": 0.3877, "loss_nan_ranks": 0, "loss_rank_avg": 0.13228657841682434, "step": 805, "valid_targets_mean": 9695.1, "valid_targets_min": 2237 }, { "epoch": 4.552631578947368, "grad_norm": 0.2228431849996671, "learning_rate": 1.3215222677639394e-05, "loss": 0.3862, "loss_nan_ranks": 0, "loss_rank_avg": 0.13006101548671722, "step": 810, "valid_targets_mean": 10069.1, "valid_targets_min": 5674 }, { "epoch": 4.580827067669173, "grad_norm": 0.21469936302557577, "learning_rate": 1.2952266962321106e-05, "loss": 0.391, "loss_nan_ranks": 0, "loss_rank_avg": 0.12173780798912048, "step": 815, "valid_targets_mean": 8536.1, "valid_targets_min": 255 }, { "epoch": 4.609022556390977, "grad_norm": 0.21888380673587698, "learning_rate": 1.2690695039169231e-05, "loss": 0.3929, "loss_nan_ranks": 0, "loss_rank_avg": 0.13081592321395874, "step": 820, "valid_targets_mean": 9528.8, "valid_targets_min": 1502 }, { "epoch": 4.637218045112782, "grad_norm": 0.20661151271918127, "learning_rate": 1.2430558266709451e-05, "loss": 0.3849, "loss_nan_ranks": 0, "loss_rank_avg": 0.1282854676246643, "step": 825, "valid_targets_mean": 9321.7, "valid_targets_min": 2724 }, { "epoch": 4.665413533834586, "grad_norm": 0.21221277245667652, "learning_rate": 1.2171907721681755e-05, "loss": 0.383, "loss_nan_ranks": 0, "loss_rank_avg": 0.12876039743423462, "step": 830, "valid_targets_mean": 9454.6, "valid_targets_min": 3387 }, { "epoch": 4.693609022556391, "grad_norm": 0.21708808777941638, "learning_rate": 1.1914794189011767e-05, "loss": 0.3786, "loss_nan_ranks": 0, "loss_rank_avg": 0.12404484301805496, "step": 835, "valid_targets_mean": 8156.2, "valid_targets_min": 233 }, { "epoch": 4.7218045112781954, "grad_norm": 0.20897527096483418, "learning_rate": 1.1659268151839305e-05, "loss": 0.3851, "loss_nan_ranks": 0, "loss_rank_avg": 0.1307717263698578, "step": 840, "valid_targets_mean": 9895.8, "valid_targets_min": 4674 }, { "epoch": 4.75, "grad_norm": 0.22210410229590297, "learning_rate": 1.1405379781606264e-05, "loss": 0.3874, "loss_nan_ranks": 0, "loss_rank_avg": 0.13114789128303528, "step": 845, "valid_targets_mean": 10139.0, "valid_targets_min": 3599 }, { "epoch": 4.7781954887218046, "grad_norm": 0.20204916090873393, "learning_rate": 1.115317892820564e-05, "loss": 0.3912, "loss_nan_ranks": 0, "loss_rank_avg": 0.13144543766975403, "step": 850, "valid_targets_mean": 9591.1, "valid_targets_min": 1761 }, { "epoch": 4.806390977443609, "grad_norm": 0.21947799398719045, "learning_rate": 1.0902715110193758e-05, "loss": 0.3883, "loss_nan_ranks": 0, "loss_rank_avg": 0.12176516652107239, "step": 855, "valid_targets_mean": 9651.8, "valid_targets_min": 330 }, { "epoch": 4.834586466165414, "grad_norm": 0.2161101557758461, "learning_rate": 1.0654037505067474e-05, "loss": 0.3862, "loss_nan_ranks": 0, "loss_rank_avg": 0.11943140625953674, "step": 860, "valid_targets_mean": 9051.2, "valid_targets_min": 263 }, { "epoch": 4.862781954887218, "grad_norm": 0.23013476486288856, "learning_rate": 1.0407194939608388e-05, "loss": 0.3912, "loss_nan_ranks": 0, "loss_rank_avg": 0.13007856905460358, "step": 865, "valid_targets_mean": 8389.8, "valid_targets_min": 1988 }, { "epoch": 4.890977443609023, "grad_norm": 0.21572479500745287, "learning_rate": 1.016223588029598e-05, "loss": 0.3902, "loss_nan_ranks": 0, "loss_rank_avg": 0.13947170972824097, "step": 870, "valid_targets_mean": 10650.0, "valid_targets_min": 4889 }, { "epoch": 4.919172932330827, "grad_norm": 0.22701666134084075, "learning_rate": 9.919208423791327e-06, "loss": 0.3869, "loss_nan_ranks": 0, "loss_rank_avg": 0.11517477035522461, "step": 875, "valid_targets_mean": 8416.4, "valid_targets_min": 665 }, { "epoch": 4.947368421052632, "grad_norm": 0.20121129270758675, "learning_rate": 9.678160287493586e-06, "loss": 0.3923, "loss_nan_ranks": 0, "loss_rank_avg": 0.13425880670547485, "step": 880, "valid_targets_mean": 10285.5, "valid_targets_min": 6432 }, { "epoch": 4.975563909774436, "grad_norm": 0.2037395492075607, "learning_rate": 9.439138800170873e-06, "loss": 0.3849, "loss_nan_ranks": 0, "loss_rank_avg": 0.13961973786354065, "step": 885, "valid_targets_mean": 9947.3, "valid_targets_min": 3588 }, { "epoch": 5.0, "grad_norm": 0.3346947281102825, "learning_rate": 9.202190892667482e-06, "loss": 0.3942, "loss_nan_ranks": 0, "loss_rank_avg": 0.3897612690925598, "step": 890, "valid_targets_mean": 9228.0, "valid_targets_min": 264 }, { "epoch": 5.0281954887218046, "grad_norm": 0.21302247364187424, "learning_rate": 8.9673630886892e-06, "loss": 0.3772, "loss_nan_ranks": 0, "loss_rank_avg": 0.1213647872209549, "step": 895, "valid_targets_mean": 9319.2, "valid_targets_min": 3102 }, { "epoch": 5.056390977443609, "grad_norm": 0.21587342614420138, "learning_rate": 8.734701495668564e-06, "loss": 0.3911, "loss_nan_ranks": 0, "loss_rank_avg": 0.12353166937828064, "step": 900, "valid_targets_mean": 8606.1, "valid_targets_min": 3015 }, { "epoch": 5.084586466165414, "grad_norm": 0.2054472136286052, "learning_rate": 8.504251795711865e-06, "loss": 0.3771, "loss_nan_ranks": 0, "loss_rank_avg": 0.12194749712944031, "step": 905, "valid_targets_mean": 9603.5, "valid_targets_min": 5439 }, { "epoch": 5.112781954887218, "grad_norm": 0.20120803043734226, "learning_rate": 8.276059236629704e-06, "loss": 0.3792, "loss_nan_ranks": 0, "loss_rank_avg": 0.14248129725456238, "step": 910, "valid_targets_mean": 10743.0, "valid_targets_min": 6817 }, { "epoch": 5.140977443609023, "grad_norm": 0.19670861515459628, "learning_rate": 8.050168623052737e-06, "loss": 0.3792, "loss_nan_ranks": 0, "loss_rank_avg": 0.12006880342960358, "step": 915, "valid_targets_mean": 9209.8, "valid_targets_min": 2766 }, { "epoch": 5.169172932330827, "grad_norm": 0.20191000822437544, "learning_rate": 7.826624307634478e-06, "loss": 0.3846, "loss_nan_ranks": 0, "loss_rank_avg": 0.13062483072280884, "step": 920, "valid_targets_mean": 9041.1, "valid_targets_min": 323 }, { "epoch": 5.197368421052632, "grad_norm": 0.2397344582787734, "learning_rate": 7.605470182342862e-06, "loss": 0.3804, "loss_nan_ranks": 0, "loss_rank_avg": 0.13111959397792816, "step": 925, "valid_targets_mean": 9180.5, "valid_targets_min": 3764 }, { "epoch": 5.225563909774436, "grad_norm": 0.19522323823550103, "learning_rate": 7.386749669842246e-06, "loss": 0.3847, "loss_nan_ranks": 0, "loss_rank_avg": 0.12215429544448853, "step": 930, "valid_targets_mean": 10079.3, "valid_targets_min": 588 }, { "epoch": 5.253759398496241, "grad_norm": 0.20319852194089968, "learning_rate": 7.170505714967551e-06, "loss": 0.3802, "loss_nan_ranks": 0, "loss_rank_avg": 0.13277199864387512, "step": 935, "valid_targets_mean": 9177.5, "valid_targets_min": 4229 }, { "epoch": 5.2819548872180455, "grad_norm": 0.2204976318873647, "learning_rate": 6.956780776292211e-06, "loss": 0.3883, "loss_nan_ranks": 0, "loss_rank_avg": 0.12151244282722473, "step": 940, "valid_targets_mean": 9185.8, "valid_targets_min": 3386 }, { "epoch": 5.31015037593985, "grad_norm": 0.21028831630622308, "learning_rate": 6.7456168177916494e-06, "loss": 0.3815, "loss_nan_ranks": 0, "loss_rank_avg": 0.13375839591026306, "step": 945, "valid_targets_mean": 9805.7, "valid_targets_min": 6152 }, { "epoch": 5.338345864661654, "grad_norm": 0.2137812404469613, "learning_rate": 6.537055300603796e-06, "loss": 0.3851, "loss_nan_ranks": 0, "loss_rank_avg": 0.12712323665618896, "step": 950, "valid_targets_mean": 9146.8, "valid_targets_min": 4794 }, { "epoch": 5.366541353383458, "grad_norm": 0.20464250361296388, "learning_rate": 6.331137174888382e-06, "loss": 0.3842, "loss_nan_ranks": 0, "loss_rank_avg": 0.1389198750257492, "step": 955, "valid_targets_mean": 10260.5, "valid_targets_min": 6000 }, { "epoch": 5.394736842105263, "grad_norm": 0.23219865944151416, "learning_rate": 6.127902871786573e-06, "loss": 0.3832, "loss_nan_ranks": 0, "loss_rank_avg": 0.130233034491539, "step": 960, "valid_targets_mean": 8265.2, "valid_targets_min": 3281 }, { "epoch": 5.422932330827067, "grad_norm": 0.19786930591243593, "learning_rate": 5.927392295482468e-06, "loss": 0.3805, "loss_nan_ranks": 0, "loss_rank_avg": 0.13488087058067322, "step": 965, "valid_targets_mean": 11112.7, "valid_targets_min": 314 }, { "epoch": 5.451127819548872, "grad_norm": 0.20387542461587269, "learning_rate": 5.729644815368076e-06, "loss": 0.3892, "loss_nan_ranks": 0, "loss_rank_avg": 0.13123825192451477, "step": 970, "valid_targets_mean": 10086.3, "valid_targets_min": 2633 }, { "epoch": 5.4793233082706765, "grad_norm": 0.2101922545961769, "learning_rate": 5.534699258313314e-06, "loss": 0.3765, "loss_nan_ranks": 0, "loss_rank_avg": 0.12793463468551636, "step": 975, "valid_targets_mean": 8901.5, "valid_targets_min": 3394 }, { "epoch": 5.507518796992481, "grad_norm": 0.20752634763736152, "learning_rate": 5.342593901042532e-06, "loss": 0.3842, "loss_nan_ranks": 0, "loss_rank_avg": 0.14083635807037354, "step": 980, "valid_targets_mean": 9925.1, "valid_targets_min": 4286 }, { "epoch": 5.535714285714286, "grad_norm": 0.35335285262089283, "learning_rate": 5.15336646261903e-06, "loss": 0.388, "loss_nan_ranks": 0, "loss_rank_avg": 0.12494904547929764, "step": 985, "valid_targets_mean": 9939.7, "valid_targets_min": 1785 }, { "epoch": 5.56390977443609, "grad_norm": 0.19133182123598222, "learning_rate": 4.9670540970390636e-06, "loss": 0.3811, "loss_nan_ranks": 0, "loss_rank_avg": 0.1396777480840683, "step": 990, "valid_targets_mean": 10060.9, "valid_targets_min": 1024 }, { "epoch": 5.592105263157895, "grad_norm": 0.1964296720328508, "learning_rate": 4.783693385936841e-06, "loss": 0.3848, "loss_nan_ranks": 0, "loss_rank_avg": 0.13299846649169922, "step": 995, "valid_targets_mean": 9927.0, "valid_targets_min": 4932 }, { "epoch": 5.620300751879699, "grad_norm": 0.2292044326812199, "learning_rate": 4.603320331401873e-06, "loss": 0.3774, "loss_nan_ranks": 0, "loss_rank_avg": 0.13259446620941162, "step": 1000, "valid_targets_mean": 9544.0, "valid_targets_min": 3814 }, { "epoch": 5.648496240601504, "grad_norm": 0.19384689052380707, "learning_rate": 4.425970348910118e-06, "loss": 0.3829, "loss_nan_ranks": 0, "loss_rank_avg": 0.12481251358985901, "step": 1005, "valid_targets_mean": 9298.8, "valid_targets_min": 4568 }, { "epoch": 5.676691729323308, "grad_norm": 0.241070665980004, "learning_rate": 4.251678260370282e-06, "loss": 0.3871, "loss_nan_ranks": 0, "loss_rank_avg": 0.1310911476612091, "step": 1010, "valid_targets_mean": 9981.9, "valid_targets_min": 1687 }, { "epoch": 5.704887218045113, "grad_norm": 0.20849322427978206, "learning_rate": 4.080478287286711e-06, "loss": 0.3776, "loss_nan_ranks": 0, "loss_rank_avg": 0.14320482313632965, "step": 1015, "valid_targets_mean": 10985.3, "valid_targets_min": 6200 }, { "epoch": 5.7330827067669174, "grad_norm": 0.2046835056413381, "learning_rate": 3.912404044040146e-06, "loss": 0.3825, "loss_nan_ranks": 0, "loss_rank_avg": 0.12832486629486084, "step": 1020, "valid_targets_mean": 9840.4, "valid_targets_min": 5349 }, { "epoch": 5.761278195488722, "grad_norm": 0.209012305730083, "learning_rate": 3.747488531287662e-06, "loss": 0.3803, "loss_nan_ranks": 0, "loss_rank_avg": 0.1327711045742035, "step": 1025, "valid_targets_mean": 9742.9, "valid_targets_min": 2831 }, { "epoch": 5.7894736842105265, "grad_norm": 0.19674967780451014, "learning_rate": 3.58576412948316e-06, "loss": 0.3835, "loss_nan_ranks": 0, "loss_rank_avg": 0.13531413674354553, "step": 1030, "valid_targets_mean": 10704.5, "valid_targets_min": 5899 }, { "epoch": 5.817669172932331, "grad_norm": 0.24741818557781142, "learning_rate": 3.4272625925195667e-06, "loss": 0.3844, "loss_nan_ranks": 0, "loss_rank_avg": 0.1324344128370285, "step": 1035, "valid_targets_mean": 10045.7, "valid_targets_min": 3420 }, { "epoch": 5.845864661654136, "grad_norm": 0.20550211199403412, "learning_rate": 3.2720150414941366e-06, "loss": 0.3809, "loss_nan_ranks": 0, "loss_rank_avg": 0.14488618075847626, "step": 1040, "valid_targets_mean": 10401.9, "valid_targets_min": 267 }, { "epoch": 5.87406015037594, "grad_norm": 0.1945930539190225, "learning_rate": 3.120051958597916e-06, "loss": 0.3835, "loss_nan_ranks": 0, "loss_rank_avg": 0.115653857588768, "step": 1045, "valid_targets_mean": 8594.2, "valid_targets_min": 4221 }, { "epoch": 5.902255639097744, "grad_norm": 0.19145738360862058, "learning_rate": 2.971403181130734e-06, "loss": 0.3777, "loss_nan_ranks": 0, "loss_rank_avg": 0.12772971391677856, "step": 1050, "valid_targets_mean": 10064.6, "valid_targets_min": 289 }, { "epoch": 5.930451127819548, "grad_norm": 0.19566345119018624, "learning_rate": 2.8260978956427388e-06, "loss": 0.386, "loss_nan_ranks": 0, "loss_rank_avg": 0.1241394504904747, "step": 1055, "valid_targets_mean": 9185.3, "valid_targets_min": 3536 }, { "epoch": 5.958646616541353, "grad_norm": 0.21202924343158872, "learning_rate": 2.6841646322037827e-06, "loss": 0.3805, "loss_nan_ranks": 0, "loss_rank_avg": 0.12480293959379196, "step": 1060, "valid_targets_mean": 9387.4, "valid_targets_min": 3979 }, { "epoch": 5.9868421052631575, "grad_norm": 0.20254745835885732, "learning_rate": 2.5456312588016285e-06, "loss": 0.3826, "loss_nan_ranks": 0, "loss_rank_avg": 0.1360769122838974, "step": 1065, "valid_targets_mean": 9964.2, "valid_targets_min": 5163 }, { "epoch": 6.011278195488722, "grad_norm": 0.19454756606987048, "learning_rate": 2.410524975870221e-06, "loss": 0.374, "loss_nan_ranks": 0, "loss_rank_avg": 0.12437275052070618, "step": 1070, "valid_targets_mean": 9493.0, "valid_targets_min": 3644 }, { "epoch": 6.0394736842105265, "grad_norm": 0.20219767320500046, "learning_rate": 2.2788723109489675e-06, "loss": 0.38, "loss_nan_ranks": 0, "loss_rank_avg": 0.13244961202144623, "step": 1075, "valid_targets_mean": 8873.8, "valid_targets_min": 2542 }, { "epoch": 6.067669172932331, "grad_norm": 0.19808173432092233, "learning_rate": 2.1506991134742017e-06, "loss": 0.3803, "loss_nan_ranks": 0, "loss_rank_avg": 0.11605939269065857, "step": 1080, "valid_targets_mean": 9045.4, "valid_targets_min": 2608 }, { "epoch": 6.095864661654136, "grad_norm": 0.1993158153091752, "learning_rate": 2.026030549703735e-06, "loss": 0.3777, "loss_nan_ranks": 0, "loss_rank_avg": 0.1352459192276001, "step": 1085, "valid_targets_mean": 9781.2, "valid_targets_min": 4811 }, { "epoch": 6.12406015037594, "grad_norm": 0.19751305555869267, "learning_rate": 1.9048910977755675e-06, "loss": 0.3819, "loss_nan_ranks": 0, "loss_rank_avg": 0.11429759114980698, "step": 1090, "valid_targets_mean": 8647.2, "valid_targets_min": 1578 }, { "epoch": 6.152255639097745, "grad_norm": 0.18493304636707505, "learning_rate": 1.7873045429017356e-06, "loss": 0.3803, "loss_nan_ranks": 0, "loss_rank_avg": 0.12613776326179504, "step": 1095, "valid_targets_mean": 9668.5, "valid_targets_min": 3259 }, { "epoch": 6.180451127819548, "grad_norm": 0.18776161177206288, "learning_rate": 1.6732939726981645e-06, "loss": 0.3842, "loss_nan_ranks": 0, "loss_rank_avg": 0.11943809688091278, "step": 1100, "valid_targets_mean": 9483.2, "valid_targets_min": 2189 }, { "epoch": 6.208646616541353, "grad_norm": 0.18838448317833534, "learning_rate": 1.5628817726515166e-06, "loss": 0.3792, "loss_nan_ranks": 0, "loss_rank_avg": 0.12597958743572235, "step": 1105, "valid_targets_mean": 9460.0, "valid_targets_min": 4144 }, { "epoch": 6.2368421052631575, "grad_norm": 0.2375705493410105, "learning_rate": 1.4560896217239017e-06, "loss": 0.388, "loss_nan_ranks": 0, "loss_rank_avg": 0.11778394877910614, "step": 1110, "valid_targets_mean": 8584.2, "valid_targets_min": 4502 }, { "epoch": 6.265037593984962, "grad_norm": 0.18438660001412205, "learning_rate": 1.3529384880963092e-06, "loss": 0.3864, "loss_nan_ranks": 0, "loss_rank_avg": 0.12522190809249878, "step": 1115, "valid_targets_mean": 9539.2, "valid_targets_min": 4219 }, { "epoch": 6.293233082706767, "grad_norm": 0.1938567399119474, "learning_rate": 1.2534486250515943e-06, "loss": 0.3805, "loss_nan_ranks": 0, "loss_rank_avg": 0.12083841115236282, "step": 1120, "valid_targets_mean": 9289.3, "valid_targets_min": 5185 }, { "epoch": 6.321428571428571, "grad_norm": 0.18207638742287655, "learning_rate": 1.1576395669978212e-06, "loss": 0.3777, "loss_nan_ranks": 0, "loss_rank_avg": 0.12208961695432663, "step": 1125, "valid_targets_mean": 9382.1, "valid_targets_min": 2742 }, { "epoch": 6.349624060150376, "grad_norm": 0.19649914276382552, "learning_rate": 1.0655301256327788e-06, "loss": 0.3809, "loss_nan_ranks": 0, "loss_rank_avg": 0.13968250155448914, "step": 1130, "valid_targets_mean": 9572.9, "valid_targets_min": 3557 }, { "epoch": 6.37781954887218, "grad_norm": 0.1867429869043491, "learning_rate": 9.771383862503847e-07, "loss": 0.3794, "loss_nan_ranks": 0, "loss_rank_avg": 0.13156625628471375, "step": 1135, "valid_targets_mean": 9659.4, "valid_targets_min": 4569 }, { "epoch": 6.406015037593985, "grad_norm": 0.19929157079790816, "learning_rate": 8.924817041897072e-07, "loss": 0.3782, "loss_nan_ranks": 0, "loss_rank_avg": 0.12250533699989319, "step": 1140, "valid_targets_mean": 9121.6, "valid_targets_min": 924 }, { "epoch": 6.434210526315789, "grad_norm": 0.2013351979879708, "learning_rate": 8.115767014273213e-07, "loss": 0.3722, "loss_nan_ranks": 0, "loss_rank_avg": 0.128327414393425, "step": 1145, "valid_targets_mean": 10044.7, "valid_targets_min": 5153 }, { "epoch": 6.462406015037594, "grad_norm": 0.23209535963825284, "learning_rate": 7.344392633136555e-07, "loss": 0.3797, "loss_nan_ranks": 0, "loss_rank_avg": 0.12083439528942108, "step": 1150, "valid_targets_mean": 8392.7, "valid_targets_min": 2634 }, { "epoch": 6.4906015037593985, "grad_norm": 0.1840825719493825, "learning_rate": 6.610845354539796e-07, "loss": 0.3818, "loss_nan_ranks": 0, "loss_rank_avg": 0.1267203688621521, "step": 1155, "valid_targets_mean": 9207.0, "valid_targets_min": 3549 }, { "epoch": 6.518796992481203, "grad_norm": 0.2134352841117307, "learning_rate": 5.91526920734613e-07, "loss": 0.3764, "loss_nan_ranks": 0, "loss_rank_avg": 0.12298408150672913, "step": 1160, "valid_targets_mean": 9080.2, "valid_targets_min": 3904 }, { "epoch": 6.546992481203008, "grad_norm": 0.19169087509874685, "learning_rate": 5.257800764949972e-07, "loss": 0.3819, "loss_nan_ranks": 0, "loss_rank_avg": 0.12543432414531708, "step": 1165, "valid_targets_mean": 10044.2, "valid_targets_min": 3035 }, { "epoch": 6.575187969924812, "grad_norm": 0.19277136539646209, "learning_rate": 4.6385691184611894e-07, "loss": 0.3784, "loss_nan_ranks": 0, "loss_rank_avg": 0.11640805006027222, "step": 1170, "valid_targets_mean": 8799.6, "valid_targets_min": 1095 }, { "epoch": 6.603383458646617, "grad_norm": 0.20398736100512838, "learning_rate": 4.057695851358823e-07, "loss": 0.3756, "loss_nan_ranks": 0, "loss_rank_avg": 0.13924311101436615, "step": 1175, "valid_targets_mean": 9870.4, "valid_targets_min": 4946 }, { "epoch": 6.631578947368421, "grad_norm": 0.18409054912360828, "learning_rate": 3.5152950156184475e-07, "loss": 0.3767, "loss_nan_ranks": 0, "loss_rank_avg": 0.12219169735908508, "step": 1180, "valid_targets_mean": 10114.9, "valid_targets_min": 4133 }, { "epoch": 6.659774436090226, "grad_norm": 0.18569484256245025, "learning_rate": 3.0114731093187743e-07, "loss": 0.38, "loss_nan_ranks": 0, "loss_rank_avg": 0.12492775917053223, "step": 1185, "valid_targets_mean": 9793.9, "valid_targets_min": 3441 }, { "epoch": 6.68796992481203, "grad_norm": 0.18747732627298463, "learning_rate": 2.5463290557310895e-07, "loss": 0.3867, "loss_nan_ranks": 0, "loss_rank_avg": 0.14373329281806946, "step": 1190, "valid_targets_mean": 10796.1, "valid_targets_min": 5625 }, { "epoch": 6.716165413533835, "grad_norm": 0.21713007804547255, "learning_rate": 2.1199541838961003e-07, "loss": 0.3802, "loss_nan_ranks": 0, "loss_rank_avg": 0.11859285831451416, "step": 1195, "valid_targets_mean": 8973.0, "valid_targets_min": 1970 }, { "epoch": 6.7443609022556394, "grad_norm": 0.1977847982916674, "learning_rate": 1.7324322106919033e-07, "loss": 0.3772, "loss_nan_ranks": 0, "loss_rank_avg": 0.11961568892002106, "step": 1200, "valid_targets_mean": 8944.6, "valid_targets_min": 2968 }, { "epoch": 6.772556390977444, "grad_norm": 0.19875337528932926, "learning_rate": 1.3838392243965548e-07, "loss": 0.381, "loss_nan_ranks": 0, "loss_rank_avg": 0.1357158124446869, "step": 1205, "valid_targets_mean": 9618.5, "valid_targets_min": 4026 }, { "epoch": 6.8007518796992485, "grad_norm": 0.18307191543836435, "learning_rate": 1.0742436697483761e-07, "loss": 0.3807, "loss_nan_ranks": 0, "loss_rank_avg": 0.1133594959974289, "step": 1210, "valid_targets_mean": 9182.8, "valid_targets_min": 3276 }, { "epoch": 6.828947368421053, "grad_norm": 0.18809350835162797, "learning_rate": 8.03706334507215e-08, "loss": 0.385, "loss_nan_ranks": 0, "loss_rank_avg": 0.13438230752944946, "step": 1215, "valid_targets_mean": 9513.1, "valid_targets_min": 2890 }, { "epoch": 6.857142857142857, "grad_norm": 0.20793246089025288, "learning_rate": 5.72280337518949e-08, "loss": 0.3771, "loss_nan_ranks": 0, "loss_rank_avg": 0.13449817895889282, "step": 1220, "valid_targets_mean": 10282.8, "valid_targets_min": 3864 }, { "epoch": 6.885338345864661, "grad_norm": 0.19353297540771533, "learning_rate": 3.8001111828593897e-08, "loss": 0.3788, "loss_nan_ranks": 0, "loss_rank_avg": 0.12465780973434448, "step": 1225, "valid_targets_mean": 9802.5, "valid_targets_min": 5736 }, { "epoch": 6.913533834586466, "grad_norm": 0.1963382310528581, "learning_rate": 2.2693642804505477e-08, "loss": 0.3862, "loss_nan_ranks": 0, "loss_rank_avg": 0.13435246050357819, "step": 1230, "valid_targets_mean": 10365.5, "valid_targets_min": 3981 }, { "epoch": 6.94172932330827, "grad_norm": 0.19442832123497505, "learning_rate": 1.1308632235547123e-08, "loss": 0.3796, "loss_nan_ranks": 0, "loss_rank_avg": 0.1150025874376297, "step": 1235, "valid_targets_mean": 8373.7, "valid_targets_min": 2958 }, { "epoch": 6.969924812030075, "grad_norm": 0.20224428523142735, "learning_rate": 3.848315519729973e-09, "loss": 0.3829, "loss_nan_ranks": 0, "loss_rank_avg": 0.1252399981021881, "step": 1240, "valid_targets_mean": 8950.3, "valid_targets_min": 4169 }, { "epoch": 6.9981203007518795, "grad_norm": 0.1923211103930577, "learning_rate": 3.1415745826102185e-10, "loss": 0.3839, "loss_nan_ranks": 0, "loss_rank_avg": 0.14353609085083008, "step": 1245, "valid_targets_mean": 9890.2, "valid_targets_min": 4361 }, { "epoch": 7.0, "step": 1246, "total_flos": 5.085080059986117e+18, "train_loss": 0.0, "train_runtime": 1.1558, "train_samples_per_second": 103019.289, "train_steps_per_second": 1078.038 } ], "logging_steps": 5, "max_steps": 1246, "num_input_tokens_seen": 0, "num_train_epochs": 7, "save_steps": 300, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.085080059986117e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }