{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 7.0, "eval_steps": 500, "global_step": 1281, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0273224043715847, "grad_norm": 11.838881491183738, "learning_rate": 1.2403100775193799e-06, "loss": 0.781, "loss_nan_ranks": 0, "loss_rank_avg": 0.2679702043533325, "step": 5, "valid_targets_mean": 8483.3, "valid_targets_min": 3529 }, { "epoch": 0.0546448087431694, "grad_norm": 8.917193900897011, "learning_rate": 2.790697674418605e-06, "loss": 0.7665, "loss_nan_ranks": 0, "loss_rank_avg": 0.2544458508491516, "step": 10, "valid_targets_mean": 9028.7, "valid_targets_min": 3229 }, { "epoch": 0.08196721311475409, "grad_norm": 4.5763398940842155, "learning_rate": 4.34108527131783e-06, "loss": 0.7206, "loss_nan_ranks": 0, "loss_rank_avg": 0.2399713397026062, "step": 15, "valid_targets_mean": 8973.2, "valid_targets_min": 2531 }, { "epoch": 0.1092896174863388, "grad_norm": 2.0138887147015327, "learning_rate": 5.891472868217055e-06, "loss": 0.6551, "loss_nan_ranks": 0, "loss_rank_avg": 0.21829384565353394, "step": 20, "valid_targets_mean": 9968.4, "valid_targets_min": 3738 }, { "epoch": 0.1366120218579235, "grad_norm": 1.2798621873265643, "learning_rate": 7.44186046511628e-06, "loss": 0.6161, "loss_nan_ranks": 0, "loss_rank_avg": 0.1876898854970932, "step": 25, "valid_targets_mean": 8451.4, "valid_targets_min": 824 }, { "epoch": 0.16393442622950818, "grad_norm": 1.0982561722021733, "learning_rate": 8.992248062015505e-06, "loss": 0.5942, "loss_nan_ranks": 0, "loss_rank_avg": 0.19325317442417145, "step": 30, "valid_targets_mean": 7975.0, "valid_targets_min": 2809 }, { "epoch": 0.1912568306010929, "grad_norm": 0.7393796649528086, "learning_rate": 1.0542635658914731e-05, "loss": 0.5691, "loss_nan_ranks": 0, "loss_rank_avg": 0.21067436039447784, "step": 35, "valid_targets_mean": 9765.5, "valid_targets_min": 4534 }, { "epoch": 0.2185792349726776, "grad_norm": 0.5772669792435342, "learning_rate": 1.2093023255813954e-05, "loss": 0.5515, "loss_nan_ranks": 0, "loss_rank_avg": 0.1745697259902954, "step": 40, "valid_targets_mean": 9180.5, "valid_targets_min": 3882 }, { "epoch": 0.2459016393442623, "grad_norm": 0.49439644059594706, "learning_rate": 1.3643410852713179e-05, "loss": 0.5309, "loss_nan_ranks": 0, "loss_rank_avg": 0.17998377978801727, "step": 45, "valid_targets_mean": 9357.6, "valid_targets_min": 4159 }, { "epoch": 0.273224043715847, "grad_norm": 0.3884582710415025, "learning_rate": 1.5193798449612405e-05, "loss": 0.5012, "loss_nan_ranks": 0, "loss_rank_avg": 0.14882785081863403, "step": 50, "valid_targets_mean": 7960.4, "valid_targets_min": 1379 }, { "epoch": 0.3005464480874317, "grad_norm": 0.3526692098911082, "learning_rate": 1.674418604651163e-05, "loss": 0.5, "loss_nan_ranks": 0, "loss_rank_avg": 0.15850183367729187, "step": 55, "valid_targets_mean": 8110.5, "valid_targets_min": 3249 }, { "epoch": 0.32786885245901637, "grad_norm": 0.31239085933982574, "learning_rate": 1.8294573643410854e-05, "loss": 0.473, "loss_nan_ranks": 0, "loss_rank_avg": 0.14768633246421814, "step": 60, "valid_targets_mean": 9303.1, "valid_targets_min": 4146 }, { "epoch": 0.3551912568306011, "grad_norm": 0.2501662208549285, "learning_rate": 1.9844961240310078e-05, "loss": 0.4613, "loss_nan_ranks": 0, "loss_rank_avg": 0.1516689956188202, "step": 65, "valid_targets_mean": 8677.5, "valid_targets_min": 3997 }, { "epoch": 0.3825136612021858, "grad_norm": 0.2594417146172949, "learning_rate": 2.1395348837209303e-05, "loss": 0.4435, "loss_nan_ranks": 0, "loss_rank_avg": 0.1571815013885498, "step": 70, "valid_targets_mean": 9267.3, "valid_targets_min": 3503 }, { "epoch": 0.4098360655737705, "grad_norm": 0.24083691213954445, "learning_rate": 2.294573643410853e-05, "loss": 0.447, "loss_nan_ranks": 0, "loss_rank_avg": 0.14124611020088196, "step": 75, "valid_targets_mean": 8758.0, "valid_targets_min": 3576 }, { "epoch": 0.4371584699453552, "grad_norm": 0.2508353728645926, "learning_rate": 2.449612403100775e-05, "loss": 0.4181, "loss_nan_ranks": 0, "loss_rank_avg": 0.13334429264068604, "step": 80, "valid_targets_mean": 8603.5, "valid_targets_min": 2661 }, { "epoch": 0.4644808743169399, "grad_norm": 0.2379197029399351, "learning_rate": 2.604651162790698e-05, "loss": 0.4213, "loss_nan_ranks": 0, "loss_rank_avg": 0.13828732073307037, "step": 85, "valid_targets_mean": 9615.9, "valid_targets_min": 2674 }, { "epoch": 0.4918032786885246, "grad_norm": 0.22995401239255409, "learning_rate": 2.7596899224806204e-05, "loss": 0.4103, "loss_nan_ranks": 0, "loss_rank_avg": 0.1267620027065277, "step": 90, "valid_targets_mean": 8508.2, "valid_targets_min": 1611 }, { "epoch": 0.5191256830601093, "grad_norm": 0.21968035219511445, "learning_rate": 2.914728682170543e-05, "loss": 0.3985, "loss_nan_ranks": 0, "loss_rank_avg": 0.1344222128391266, "step": 95, "valid_targets_mean": 9016.9, "valid_targets_min": 2853 }, { "epoch": 0.546448087431694, "grad_norm": 0.2291396621839033, "learning_rate": 3.0697674418604656e-05, "loss": 0.3942, "loss_nan_ranks": 0, "loss_rank_avg": 0.12746687233448029, "step": 100, "valid_targets_mean": 9609.2, "valid_targets_min": 3162 }, { "epoch": 0.5737704918032787, "grad_norm": 0.26633772490859486, "learning_rate": 3.224806201550388e-05, "loss": 0.3951, "loss_nan_ranks": 0, "loss_rank_avg": 0.12758472561836243, "step": 105, "valid_targets_mean": 9060.2, "valid_targets_min": 3599 }, { "epoch": 0.6010928961748634, "grad_norm": 0.21028182137462473, "learning_rate": 3.37984496124031e-05, "loss": 0.3886, "loss_nan_ranks": 0, "loss_rank_avg": 0.11767546087503433, "step": 110, "valid_targets_mean": 8864.8, "valid_targets_min": 3255 }, { "epoch": 0.6284153005464481, "grad_norm": 0.23885654048701244, "learning_rate": 3.5348837209302326e-05, "loss": 0.3856, "loss_nan_ranks": 0, "loss_rank_avg": 0.1476028561592102, "step": 115, "valid_targets_mean": 10285.3, "valid_targets_min": 5453 }, { "epoch": 0.6557377049180327, "grad_norm": 0.2303960817635363, "learning_rate": 3.6899224806201554e-05, "loss": 0.3767, "loss_nan_ranks": 0, "loss_rank_avg": 0.12939713895320892, "step": 120, "valid_targets_mean": 9426.7, "valid_targets_min": 1574 }, { "epoch": 0.6830601092896175, "grad_norm": 0.22422648576742915, "learning_rate": 3.844961240310078e-05, "loss": 0.3824, "loss_nan_ranks": 0, "loss_rank_avg": 0.12232644855976105, "step": 125, "valid_targets_mean": 9503.8, "valid_targets_min": 2188 }, { "epoch": 0.7103825136612022, "grad_norm": 0.22671595678179757, "learning_rate": 4e-05, "loss": 0.3683, "loss_nan_ranks": 0, "loss_rank_avg": 0.12635521590709686, "step": 130, "valid_targets_mean": 9434.6, "valid_targets_min": 4111 }, { "epoch": 0.7377049180327869, "grad_norm": 0.20637831279045626, "learning_rate": 3.9998140791624865e-05, "loss": 0.3741, "loss_nan_ranks": 0, "loss_rank_avg": 0.11574558913707733, "step": 135, "valid_targets_mean": 8746.7, "valid_targets_min": 1626 }, { "epoch": 0.7650273224043715, "grad_norm": 0.22119347463803593, "learning_rate": 3.999256351216504e-05, "loss": 0.3706, "loss_nan_ranks": 0, "loss_rank_avg": 0.11761431396007538, "step": 140, "valid_targets_mean": 9328.4, "valid_targets_min": 285 }, { "epoch": 0.7923497267759563, "grad_norm": 0.2529631456141331, "learning_rate": 3.9983269198552975e-05, "loss": 0.3671, "loss_nan_ranks": 0, "loss_rank_avg": 0.12752871215343475, "step": 145, "valid_targets_mean": 10542.8, "valid_targets_min": 4902 }, { "epoch": 0.819672131147541, "grad_norm": 0.27077929233544257, "learning_rate": 3.9970259578795265e-05, "loss": 0.3662, "loss_nan_ranks": 0, "loss_rank_avg": 0.12587572634220123, "step": 150, "valid_targets_mean": 9893.7, "valid_targets_min": 4229 }, { "epoch": 0.8469945355191257, "grad_norm": 0.24792011083795368, "learning_rate": 3.99535370716513e-05, "loss": 0.3593, "loss_nan_ranks": 0, "loss_rank_avg": 0.11843468248844147, "step": 155, "valid_targets_mean": 9644.2, "valid_targets_min": 3843 }, { "epoch": 0.8743169398907104, "grad_norm": 0.25208723026736696, "learning_rate": 3.993310478618361e-05, "loss": 0.364, "loss_nan_ranks": 0, "loss_rank_avg": 0.12046317011117935, "step": 160, "valid_targets_mean": 9617.7, "valid_targets_min": 3083 }, { "epoch": 0.9016393442622951, "grad_norm": 0.5699971833507397, "learning_rate": 3.990896652117983e-05, "loss": 0.3672, "loss_nan_ranks": 0, "loss_rank_avg": 0.10490091145038605, "step": 165, "valid_targets_mean": 9126.5, "valid_targets_min": 3756 }, { "epoch": 0.9289617486338798, "grad_norm": 0.21915062353617268, "learning_rate": 3.988112676444639e-05, "loss": 0.3617, "loss_nan_ranks": 0, "loss_rank_avg": 0.10891340672969818, "step": 170, "valid_targets_mean": 7968.0, "valid_targets_min": 1920 }, { "epoch": 0.9562841530054644, "grad_norm": 0.22162783285385126, "learning_rate": 3.9849590691974206e-05, "loss": 0.3591, "loss_nan_ranks": 0, "loss_rank_avg": 0.1189221441745758, "step": 175, "valid_targets_mean": 9948.0, "valid_targets_min": 1449 }, { "epoch": 0.9836065573770492, "grad_norm": 0.2505497323569372, "learning_rate": 3.981436416697625e-05, "loss": 0.3534, "loss_nan_ranks": 0, "loss_rank_avg": 0.13000932335853577, "step": 180, "valid_targets_mean": 9891.6, "valid_targets_min": 3077 }, { "epoch": 1.010928961748634, "grad_norm": 0.257548615241039, "learning_rate": 3.977545373879759e-05, "loss": 0.3574, "loss_nan_ranks": 0, "loss_rank_avg": 0.11166360974311829, "step": 185, "valid_targets_mean": 8307.2, "valid_targets_min": 2528 }, { "epoch": 1.0382513661202186, "grad_norm": 0.2350098965649614, "learning_rate": 3.9732866641697586e-05, "loss": 0.3478, "loss_nan_ranks": 0, "loss_rank_avg": 0.10175259411334991, "step": 190, "valid_targets_mean": 7673.5, "valid_targets_min": 921 }, { "epoch": 1.0655737704918034, "grad_norm": 0.25048777204583766, "learning_rate": 3.968661079350501e-05, "loss": 0.3517, "loss_nan_ranks": 0, "loss_rank_avg": 0.11078484356403351, "step": 195, "valid_targets_mean": 7534.5, "valid_targets_min": 2569 }, { "epoch": 1.092896174863388, "grad_norm": 0.29042532002341653, "learning_rate": 3.963669479414591e-05, "loss": 0.3487, "loss_nan_ranks": 0, "loss_rank_avg": 0.10936549305915833, "step": 200, "valid_targets_mean": 9098.6, "valid_targets_min": 3617 }, { "epoch": 1.1202185792349726, "grad_norm": 0.24753762424635495, "learning_rate": 3.958312792404468e-05, "loss": 0.3446, "loss_nan_ranks": 0, "loss_rank_avg": 0.11017704755067825, "step": 205, "valid_targets_mean": 8941.7, "valid_targets_min": 3491 }, { "epoch": 1.1475409836065573, "grad_norm": 0.2857582899316236, "learning_rate": 3.952592014239867e-05, "loss": 0.3477, "loss_nan_ranks": 0, "loss_rank_avg": 0.11123620718717575, "step": 210, "valid_targets_mean": 8961.5, "valid_targets_min": 3218 }, { "epoch": 1.174863387978142, "grad_norm": 0.3107975974617015, "learning_rate": 3.946508208532656e-05, "loss": 0.3508, "loss_nan_ranks": 0, "loss_rank_avg": 0.10983406007289886, "step": 215, "valid_targets_mean": 9042.6, "valid_targets_min": 3869 }, { "epoch": 1.2021857923497268, "grad_norm": 0.2794395978672758, "learning_rate": 3.940062506389089e-05, "loss": 0.3504, "loss_nan_ranks": 0, "loss_rank_avg": 0.108359694480896, "step": 220, "valid_targets_mean": 8741.7, "valid_targets_min": 1611 }, { "epoch": 1.2295081967213115, "grad_norm": 0.2681446844893584, "learning_rate": 3.9332561061995036e-05, "loss": 0.3513, "loss_nan_ranks": 0, "loss_rank_avg": 0.11848342418670654, "step": 225, "valid_targets_mean": 9721.8, "valid_targets_min": 4370 }, { "epoch": 1.2568306010928962, "grad_norm": 0.2298534429968579, "learning_rate": 3.926090273415526e-05, "loss": 0.3414, "loss_nan_ranks": 0, "loss_rank_avg": 0.10455862432718277, "step": 230, "valid_targets_mean": 7769.4, "valid_targets_min": 930 }, { "epoch": 1.2841530054644807, "grad_norm": 0.22827152947020307, "learning_rate": 3.918566340314788e-05, "loss": 0.3433, "loss_nan_ranks": 0, "loss_rank_avg": 0.10215024650096893, "step": 235, "valid_targets_mean": 8790.8, "valid_targets_min": 4165 }, { "epoch": 1.3114754098360657, "grad_norm": 0.2488097440722295, "learning_rate": 3.910685705753233e-05, "loss": 0.3472, "loss_nan_ranks": 0, "loss_rank_avg": 0.11619973182678223, "step": 240, "valid_targets_mean": 9364.5, "valid_targets_min": 2827 }, { "epoch": 1.3387978142076502, "grad_norm": 0.22057735394385433, "learning_rate": 3.9024498349050385e-05, "loss": 0.3477, "loss_nan_ranks": 0, "loss_rank_avg": 0.11552197486162186, "step": 245, "valid_targets_mean": 9655.0, "valid_targets_min": 4307 }, { "epoch": 1.366120218579235, "grad_norm": 0.2163271712853194, "learning_rate": 3.893860258990212e-05, "loss": 0.3324, "loss_nan_ranks": 0, "loss_rank_avg": 0.1090204268693924, "step": 250, "valid_targets_mean": 9703.5, "valid_targets_min": 3165 }, { "epoch": 1.3934426229508197, "grad_norm": 0.2528209867857862, "learning_rate": 3.8849185749898996e-05, "loss": 0.3367, "loss_nan_ranks": 0, "loss_rank_avg": 0.11540880799293518, "step": 255, "valid_targets_mean": 10452.7, "valid_targets_min": 4897 }, { "epoch": 1.4207650273224044, "grad_norm": 0.22333938650940646, "learning_rate": 3.87562644534948e-05, "loss": 0.3382, "loss_nan_ranks": 0, "loss_rank_avg": 0.11605552583932877, "step": 260, "valid_targets_mean": 8834.8, "valid_targets_min": 852 }, { "epoch": 1.4480874316939891, "grad_norm": 0.2670327230642673, "learning_rate": 3.865985597669478e-05, "loss": 0.3427, "loss_nan_ranks": 0, "loss_rank_avg": 0.11774078011512756, "step": 265, "valid_targets_mean": 9915.6, "valid_targets_min": 1283 }, { "epoch": 1.4754098360655736, "grad_norm": 0.24992505310393578, "learning_rate": 3.855997824384369e-05, "loss": 0.3433, "loss_nan_ranks": 0, "loss_rank_avg": 0.11916451156139374, "step": 270, "valid_targets_mean": 9151.8, "valid_targets_min": 4312 }, { "epoch": 1.5027322404371586, "grad_norm": 0.23459272261084146, "learning_rate": 3.845664982429328e-05, "loss": 0.3363, "loss_nan_ranks": 0, "loss_rank_avg": 0.11957293748855591, "step": 275, "valid_targets_mean": 9786.7, "valid_targets_min": 3164 }, { "epoch": 1.530054644808743, "grad_norm": 0.23828026929857207, "learning_rate": 3.834988992894983e-05, "loss": 0.3319, "loss_nan_ranks": 0, "loss_rank_avg": 0.12325535714626312, "step": 280, "valid_targets_mean": 9396.9, "valid_targets_min": 4421 }, { "epoch": 1.5573770491803278, "grad_norm": 0.21959387205650174, "learning_rate": 3.823971840670251e-05, "loss": 0.3387, "loss_nan_ranks": 0, "loss_rank_avg": 0.12241716682910919, "step": 285, "valid_targets_mean": 9773.9, "valid_targets_min": 3191 }, { "epoch": 1.5846994535519126, "grad_norm": 0.22362204727522375, "learning_rate": 3.812615574073301e-05, "loss": 0.3377, "loss_nan_ranks": 0, "loss_rank_avg": 0.10839522629976273, "step": 290, "valid_targets_mean": 9549.2, "valid_targets_min": 3643 }, { "epoch": 1.6120218579234973, "grad_norm": 0.23343607016117385, "learning_rate": 3.800922304470728e-05, "loss": 0.3313, "loss_nan_ranks": 0, "loss_rank_avg": 0.10157692432403564, "step": 295, "valid_targets_mean": 8760.2, "valid_targets_min": 2346 }, { "epoch": 1.639344262295082, "grad_norm": 0.25421982858320236, "learning_rate": 3.7888942058850105e-05, "loss": 0.3359, "loss_nan_ranks": 0, "loss_rank_avg": 0.11778868734836578, "step": 300, "valid_targets_mean": 9254.8, "valid_targets_min": 3701 }, { "epoch": 1.6666666666666665, "grad_norm": 0.24398684968303413, "learning_rate": 3.7765335145903124e-05, "loss": 0.3301, "loss_nan_ranks": 0, "loss_rank_avg": 0.10742858797311783, "step": 305, "valid_targets_mean": 10601.6, "valid_targets_min": 4584 }, { "epoch": 1.6939890710382515, "grad_norm": 0.22746398932508854, "learning_rate": 3.76384252869671e-05, "loss": 0.3322, "loss_nan_ranks": 0, "loss_rank_avg": 0.1208631843328476, "step": 310, "valid_targets_mean": 9742.0, "valid_targets_min": 3459 }, { "epoch": 1.721311475409836, "grad_norm": 0.24480565463344894, "learning_rate": 3.750823607722931e-05, "loss": 0.3285, "loss_nan_ranks": 0, "loss_rank_avg": 0.11582465469837189, "step": 315, "valid_targets_mean": 9165.2, "valid_targets_min": 3364 }, { "epoch": 1.748633879781421, "grad_norm": 0.2180718409308529, "learning_rate": 3.737479172157665e-05, "loss": 0.3357, "loss_nan_ranks": 0, "loss_rank_avg": 0.1275629997253418, "step": 320, "valid_targets_mean": 9666.4, "valid_targets_min": 4633 }, { "epoch": 1.7759562841530054, "grad_norm": 0.24489899725053552, "learning_rate": 3.723811703009549e-05, "loss": 0.3308, "loss_nan_ranks": 0, "loss_rank_avg": 0.10959871858358383, "step": 325, "valid_targets_mean": 8244.2, "valid_targets_min": 3566 }, { "epoch": 1.8032786885245902, "grad_norm": 0.2479273797839025, "learning_rate": 3.709823741345894e-05, "loss": 0.331, "loss_nan_ranks": 0, "loss_rank_avg": 0.11131682991981506, "step": 330, "valid_targets_mean": 8796.8, "valid_targets_min": 2477 }, { "epoch": 1.830601092896175, "grad_norm": 0.2571862418094793, "learning_rate": 3.695517887820247e-05, "loss": 0.3274, "loss_nan_ranks": 0, "loss_rank_avg": 0.10547453165054321, "step": 335, "valid_targets_mean": 8258.8, "valid_targets_min": 1715 }, { "epoch": 1.8579234972677594, "grad_norm": 0.22348674772376723, "learning_rate": 3.680896802188876e-05, "loss": 0.33, "loss_nan_ranks": 0, "loss_rank_avg": 0.1058754026889801, "step": 340, "valid_targets_mean": 8889.8, "valid_targets_min": 913 }, { "epoch": 1.8852459016393444, "grad_norm": 0.21028274581991213, "learning_rate": 3.66596320281627e-05, "loss": 0.3296, "loss_nan_ranks": 0, "loss_rank_avg": 0.12007634341716766, "step": 345, "valid_targets_mean": 10142.2, "valid_targets_min": 3243 }, { "epoch": 1.9125683060109289, "grad_norm": 0.22986592321070748, "learning_rate": 3.6507198661697276e-05, "loss": 0.3338, "loss_nan_ranks": 0, "loss_rank_avg": 0.11267776787281036, "step": 350, "valid_targets_mean": 8925.9, "valid_targets_min": 2910 }, { "epoch": 1.9398907103825138, "grad_norm": 0.24096406367512213, "learning_rate": 3.635169626303168e-05, "loss": 0.3307, "loss_nan_ranks": 0, "loss_rank_avg": 0.10832738876342773, "step": 355, "valid_targets_mean": 8725.2, "valid_targets_min": 3893 }, { "epoch": 1.9672131147540983, "grad_norm": 0.2754776858828639, "learning_rate": 3.619315374330208e-05, "loss": 0.3345, "loss_nan_ranks": 0, "loss_rank_avg": 0.11519007384777069, "step": 360, "valid_targets_mean": 9125.4, "valid_targets_min": 1016 }, { "epoch": 1.994535519125683, "grad_norm": 0.2486552522027151, "learning_rate": 3.603160057886655e-05, "loss": 0.3334, "loss_nan_ranks": 0, "loss_rank_avg": 0.11450847238302231, "step": 365, "valid_targets_mean": 8982.2, "valid_targets_min": 3333 }, { "epoch": 2.021857923497268, "grad_norm": 0.2743213274153223, "learning_rate": 3.586706680582471e-05, "loss": 0.3253, "loss_nan_ranks": 0, "loss_rank_avg": 0.10886304080486298, "step": 370, "valid_targets_mean": 9614.8, "valid_targets_min": 3756 }, { "epoch": 2.0491803278688523, "grad_norm": 0.255653642834224, "learning_rate": 3.569958301443344e-05, "loss": 0.321, "loss_nan_ranks": 0, "loss_rank_avg": 0.11270833015441895, "step": 375, "valid_targets_mean": 9813.9, "valid_targets_min": 2054 }, { "epoch": 2.0765027322404372, "grad_norm": 0.24133265109850416, "learning_rate": 3.552918034341952e-05, "loss": 0.3195, "loss_nan_ranks": 0, "loss_rank_avg": 0.11231839656829834, "step": 380, "valid_targets_mean": 8981.4, "valid_targets_min": 2826 }, { "epoch": 2.1038251366120218, "grad_norm": 0.25526011562795836, "learning_rate": 3.5355890474190244e-05, "loss": 0.3288, "loss_nan_ranks": 0, "loss_rank_avg": 0.11535529792308807, "step": 385, "valid_targets_mean": 9212.4, "valid_targets_min": 4995 }, { "epoch": 2.1311475409836067, "grad_norm": 0.23604329145357902, "learning_rate": 3.517974562494324e-05, "loss": 0.3231, "loss_nan_ranks": 0, "loss_rank_avg": 0.10826494544744492, "step": 390, "valid_targets_mean": 8674.5, "valid_targets_min": 717 }, { "epoch": 2.158469945355191, "grad_norm": 0.21848145690843088, "learning_rate": 3.5000778544676404e-05, "loss": 0.3254, "loss_nan_ranks": 0, "loss_rank_avg": 0.11187595129013062, "step": 395, "valid_targets_mean": 8702.6, "valid_targets_min": 359 }, { "epoch": 2.185792349726776, "grad_norm": 0.24660363230345203, "learning_rate": 3.4819022507099184e-05, "loss": 0.3217, "loss_nan_ranks": 0, "loss_rank_avg": 0.1158638447523117, "step": 400, "valid_targets_mean": 9505.6, "valid_targets_min": 4652 }, { "epoch": 2.2131147540983607, "grad_norm": 0.2317494188303323, "learning_rate": 3.463451130444631e-05, "loss": 0.324, "loss_nan_ranks": 0, "loss_rank_avg": 0.10151409357786179, "step": 405, "valid_targets_mean": 8673.2, "valid_targets_min": 3371 }, { "epoch": 2.240437158469945, "grad_norm": 0.2558065717810365, "learning_rate": 3.444727924119511e-05, "loss": 0.328, "loss_nan_ranks": 0, "loss_rank_avg": 0.10935921221971512, "step": 410, "valid_targets_mean": 8613.8, "valid_targets_min": 1834 }, { "epoch": 2.26775956284153, "grad_norm": 0.2468684240893415, "learning_rate": 3.42573611276876e-05, "loss": 0.3169, "loss_nan_ranks": 0, "loss_rank_avg": 0.09871050715446472, "step": 415, "valid_targets_mean": 8523.5, "valid_targets_min": 4438 }, { "epoch": 2.2950819672131146, "grad_norm": 0.2606641720946758, "learning_rate": 3.4064792273658494e-05, "loss": 0.3226, "loss_nan_ranks": 0, "loss_rank_avg": 0.09632217884063721, "step": 420, "valid_targets_mean": 7952.3, "valid_targets_min": 576 }, { "epoch": 2.3224043715846996, "grad_norm": 0.2349015992186721, "learning_rate": 3.386960848167041e-05, "loss": 0.3197, "loss_nan_ranks": 0, "loss_rank_avg": 0.12064424157142639, "step": 425, "valid_targets_mean": 8627.5, "valid_targets_min": 2693 }, { "epoch": 2.349726775956284, "grad_norm": 0.26046775499998737, "learning_rate": 3.367184604045743e-05, "loss": 0.3222, "loss_nan_ranks": 0, "loss_rank_avg": 0.10175183415412903, "step": 430, "valid_targets_mean": 8524.4, "valid_targets_min": 3249 }, { "epoch": 2.3770491803278686, "grad_norm": 0.22651301464400855, "learning_rate": 3.347154171817825e-05, "loss": 0.3248, "loss_nan_ranks": 0, "loss_rank_avg": 0.11612209677696228, "step": 435, "valid_targets_mean": 9728.2, "valid_targets_min": 3810 }, { "epoch": 2.4043715846994536, "grad_norm": 0.23039268184631387, "learning_rate": 3.3268732755580226e-05, "loss": 0.318, "loss_nan_ranks": 0, "loss_rank_avg": 0.11132186651229858, "step": 440, "valid_targets_mean": 8813.7, "valid_targets_min": 2476 }, { "epoch": 2.431693989071038, "grad_norm": 0.23764518989398273, "learning_rate": 3.306345685907553e-05, "loss": 0.3132, "loss_nan_ranks": 0, "loss_rank_avg": 0.10579263418912888, "step": 445, "valid_targets_mean": 8756.8, "valid_targets_min": 2697 }, { "epoch": 2.459016393442623, "grad_norm": 0.23123022720630637, "learning_rate": 3.285575219373079e-05, "loss": 0.3212, "loss_nan_ranks": 0, "loss_rank_avg": 0.1140381246805191, "step": 450, "valid_targets_mean": 9343.9, "valid_targets_min": 2043 }, { "epoch": 2.4863387978142075, "grad_norm": 0.269360392744887, "learning_rate": 3.264565737617132e-05, "loss": 0.3169, "loss_nan_ranks": 0, "loss_rank_avg": 0.10570000857114792, "step": 455, "valid_targets_mean": 9540.1, "valid_targets_min": 4028 }, { "epoch": 2.5136612021857925, "grad_norm": 0.221114564777979, "learning_rate": 3.243321146740155e-05, "loss": 0.3269, "loss_nan_ranks": 0, "loss_rank_avg": 0.12460845708847046, "step": 460, "valid_targets_mean": 9913.7, "valid_targets_min": 2636 }, { "epoch": 2.540983606557377, "grad_norm": 0.2233016882583844, "learning_rate": 3.2218453965542785e-05, "loss": 0.3254, "loss_nan_ranks": 0, "loss_rank_avg": 0.107123464345932, "step": 465, "valid_targets_mean": 9036.3, "valid_targets_min": 2631 }, { "epoch": 2.5683060109289615, "grad_norm": 0.2159132253652805, "learning_rate": 3.2001424798489625e-05, "loss": 0.3179, "loss_nan_ranks": 0, "loss_rank_avg": 0.10757862031459808, "step": 470, "valid_targets_mean": 8646.6, "valid_targets_min": 1410 }, { "epoch": 2.5956284153005464, "grad_norm": 0.24917088168724738, "learning_rate": 3.1782164316486566e-05, "loss": 0.3152, "loss_nan_ranks": 0, "loss_rank_avg": 0.10982252657413483, "step": 475, "valid_targets_mean": 9668.7, "valid_targets_min": 1508 }, { "epoch": 2.6229508196721314, "grad_norm": 0.2174015848024412, "learning_rate": 3.156071328462607e-05, "loss": 0.3165, "loss_nan_ranks": 0, "loss_rank_avg": 0.09340976923704147, "step": 480, "valid_targets_mean": 9737.8, "valid_targets_min": 3792 }, { "epoch": 2.650273224043716, "grad_norm": 0.2512049654033023, "learning_rate": 3.1337112875269436e-05, "loss": 0.3167, "loss_nan_ranks": 0, "loss_rank_avg": 0.09841690212488174, "step": 485, "valid_targets_mean": 8864.9, "valid_targets_min": 2941 }, { "epoch": 2.6775956284153004, "grad_norm": 0.22880792088164945, "learning_rate": 3.111140466039205e-05, "loss": 0.32, "loss_nan_ranks": 0, "loss_rank_avg": 0.10708227008581161, "step": 490, "valid_targets_mean": 8706.9, "valid_targets_min": 2747 }, { "epoch": 2.7049180327868854, "grad_norm": 0.231439018967597, "learning_rate": 3.088363060385424e-05, "loss": 0.3131, "loss_nan_ranks": 0, "loss_rank_avg": 0.11007743328809738, "step": 495, "valid_targets_mean": 10188.8, "valid_targets_min": 3932 }, { "epoch": 2.73224043715847, "grad_norm": 0.2428291380653805, "learning_rate": 3.065383305359938e-05, "loss": 0.3155, "loss_nan_ranks": 0, "loss_rank_avg": 0.10607362538576126, "step": 500, "valid_targets_mean": 8751.4, "valid_targets_min": 2661 }, { "epoch": 2.7595628415300544, "grad_norm": 0.2113160568789913, "learning_rate": 3.0422054733780474e-05, "loss": 0.3214, "loss_nan_ranks": 0, "loss_rank_avg": 0.11031060665845871, "step": 505, "valid_targets_mean": 9535.6, "valid_targets_min": 4219 }, { "epoch": 2.7868852459016393, "grad_norm": 0.23720201082771936, "learning_rate": 3.018833873681684e-05, "loss": 0.3203, "loss_nan_ranks": 0, "loss_rank_avg": 0.10088831186294556, "step": 510, "valid_targets_mean": 8321.9, "valid_targets_min": 3472 }, { "epoch": 2.8142076502732243, "grad_norm": 0.27037560406468397, "learning_rate": 2.9952728515382383e-05, "loss": 0.3125, "loss_nan_ranks": 0, "loss_rank_avg": 0.1012275442481041, "step": 515, "valid_targets_mean": 8525.2, "valid_targets_min": 930 }, { "epoch": 2.841530054644809, "grad_norm": 0.21901703489991992, "learning_rate": 2.9715267874326805e-05, "loss": 0.3167, "loss_nan_ranks": 0, "loss_rank_avg": 0.11215992271900177, "step": 520, "valid_targets_mean": 9853.0, "valid_targets_min": 1782 }, { "epoch": 2.8688524590163933, "grad_norm": 0.2473747830382745, "learning_rate": 2.947600096253136e-05, "loss": 0.3169, "loss_nan_ranks": 0, "loss_rank_avg": 0.0938570499420166, "step": 525, "valid_targets_mean": 8734.0, "valid_targets_min": 3715 }, { "epoch": 2.8961748633879782, "grad_norm": 0.19923680438401953, "learning_rate": 2.9234972264700687e-05, "loss": 0.3138, "loss_nan_ranks": 0, "loss_rank_avg": 0.10463187098503113, "step": 530, "valid_targets_mean": 8936.3, "valid_targets_min": 2301 }, { "epoch": 2.9234972677595628, "grad_norm": 0.22922414429489327, "learning_rate": 2.8992226593092135e-05, "loss": 0.3133, "loss_nan_ranks": 0, "loss_rank_avg": 0.11466294527053833, "step": 535, "valid_targets_mean": 8984.4, "valid_targets_min": 3255 }, { "epoch": 2.9508196721311473, "grad_norm": 0.20989994462632877, "learning_rate": 2.874780907918429e-05, "loss": 0.3165, "loss_nan_ranks": 0, "loss_rank_avg": 0.09710144996643066, "step": 540, "valid_targets_mean": 8905.9, "valid_targets_min": 3163 }, { "epoch": 2.978142076502732, "grad_norm": 0.21839827056447508, "learning_rate": 2.8501765165286025e-05, "loss": 0.3196, "loss_nan_ranks": 0, "loss_rank_avg": 0.10432031750679016, "step": 545, "valid_targets_mean": 9281.0, "valid_targets_min": 2322 }, { "epoch": 3.0054644808743167, "grad_norm": 0.2285305181088268, "learning_rate": 2.8254140596087897e-05, "loss": 0.3094, "loss_nan_ranks": 0, "loss_rank_avg": 0.10178789496421814, "step": 550, "valid_targets_mean": 8931.3, "valid_targets_min": 3180 }, { "epoch": 3.0327868852459017, "grad_norm": 0.2017808641425933, "learning_rate": 2.8004981410157187e-05, "loss": 0.3079, "loss_nan_ranks": 0, "loss_rank_avg": 0.09892808645963669, "step": 555, "valid_targets_mean": 9654.6, "valid_targets_min": 2883 }, { "epoch": 3.060109289617486, "grad_norm": 0.2251907804199061, "learning_rate": 2.775433393137841e-05, "loss": 0.3004, "loss_nan_ranks": 0, "loss_rank_avg": 0.10582572966814041, "step": 560, "valid_targets_mean": 8835.2, "valid_targets_min": 2457 }, { "epoch": 3.087431693989071, "grad_norm": 0.2516272437175909, "learning_rate": 2.750224476034076e-05, "loss": 0.3089, "loss_nan_ranks": 0, "loss_rank_avg": 0.1124773770570755, "step": 565, "valid_targets_mean": 9312.9, "valid_targets_min": 2875 }, { "epoch": 3.1147540983606556, "grad_norm": 0.2227184002299704, "learning_rate": 2.7248760765674033e-05, "loss": 0.3132, "loss_nan_ranks": 0, "loss_rank_avg": 0.10076034069061279, "step": 570, "valid_targets_mean": 8707.6, "valid_targets_min": 2296 }, { "epoch": 3.1420765027322406, "grad_norm": 0.22416125947761548, "learning_rate": 2.699392907533482e-05, "loss": 0.3057, "loss_nan_ranks": 0, "loss_rank_avg": 0.11026563495397568, "step": 575, "valid_targets_mean": 9723.8, "valid_targets_min": 3465 }, { "epoch": 3.169398907103825, "grad_norm": 0.20899809066246997, "learning_rate": 2.6737797067844403e-05, "loss": 0.3131, "loss_nan_ranks": 0, "loss_rank_avg": 0.10523172467947006, "step": 580, "valid_targets_mean": 9560.2, "valid_targets_min": 4534 }, { "epoch": 3.19672131147541, "grad_norm": 0.23222838408930938, "learning_rate": 2.6480412363480138e-05, "loss": 0.3096, "loss_nan_ranks": 0, "loss_rank_avg": 0.10207949578762054, "step": 585, "valid_targets_mean": 9779.5, "valid_targets_min": 4601 }, { "epoch": 3.2240437158469946, "grad_norm": 0.2802069274859931, "learning_rate": 2.6221822815421817e-05, "loss": 0.3069, "loss_nan_ranks": 0, "loss_rank_avg": 0.10540831089019775, "step": 590, "valid_targets_mean": 10070.7, "valid_targets_min": 2866 }, { "epoch": 3.251366120218579, "grad_norm": 0.20542978202801448, "learning_rate": 2.5962076500854804e-05, "loss": 0.3108, "loss_nan_ranks": 0, "loss_rank_avg": 0.08749028295278549, "step": 595, "valid_targets_mean": 8740.9, "valid_targets_min": 2098 }, { "epoch": 3.278688524590164, "grad_norm": 0.22733842428820036, "learning_rate": 2.570122171203142e-05, "loss": 0.3088, "loss_nan_ranks": 0, "loss_rank_avg": 0.09117378294467926, "step": 600, "valid_targets_mean": 8412.1, "valid_targets_min": 805 }, { "epoch": 3.3060109289617485, "grad_norm": 0.2438261017999824, "learning_rate": 2.5439306947292485e-05, "loss": 0.3072, "loss_nan_ranks": 0, "loss_rank_avg": 0.11073037981987, "step": 605, "valid_targets_mean": 9375.6, "valid_targets_min": 2173 }, { "epoch": 3.3333333333333335, "grad_norm": 0.24465291986437376, "learning_rate": 2.5176380902050418e-05, "loss": 0.3192, "loss_nan_ranks": 0, "loss_rank_avg": 0.09698040783405304, "step": 610, "valid_targets_mean": 8386.6, "valid_targets_min": 1156 }, { "epoch": 3.360655737704918, "grad_norm": 0.2008109008659842, "learning_rate": 2.4912492459735752e-05, "loss": 0.3062, "loss_nan_ranks": 0, "loss_rank_avg": 0.09698103368282318, "step": 615, "valid_targets_mean": 9220.1, "valid_targets_min": 4023 }, { "epoch": 3.387978142076503, "grad_norm": 0.20970173880663623, "learning_rate": 2.4647690682708695e-05, "loss": 0.3092, "loss_nan_ranks": 0, "loss_rank_avg": 0.10502799600362778, "step": 620, "valid_targets_mean": 10270.1, "valid_targets_min": 4413 }, { "epoch": 3.4153005464480874, "grad_norm": 0.20616431756721573, "learning_rate": 2.4382024803137396e-05, "loss": 0.3114, "loss_nan_ranks": 0, "loss_rank_avg": 0.10608991235494614, "step": 625, "valid_targets_mean": 9512.1, "valid_targets_min": 3351 }, { "epoch": 3.442622950819672, "grad_norm": 0.20966320270309535, "learning_rate": 2.41155442138447e-05, "loss": 0.3066, "loss_nan_ranks": 0, "loss_rank_avg": 0.09832080453634262, "step": 630, "valid_targets_mean": 8836.7, "valid_targets_min": 2827 }, { "epoch": 3.469945355191257, "grad_norm": 0.2143240783515177, "learning_rate": 2.384829845912494e-05, "loss": 0.3131, "loss_nan_ranks": 0, "loss_rank_avg": 0.09457945823669434, "step": 635, "valid_targets_mean": 9319.6, "valid_targets_min": 2697 }, { "epoch": 3.4972677595628414, "grad_norm": 0.217793398021425, "learning_rate": 2.3580337225532663e-05, "loss": 0.3071, "loss_nan_ranks": 0, "loss_rank_avg": 0.09785357862710953, "step": 640, "valid_targets_mean": 8962.5, "valid_targets_min": 2043 }, { "epoch": 3.5245901639344264, "grad_norm": 0.21567616833440192, "learning_rate": 2.331171033264482e-05, "loss": 0.3139, "loss_nan_ranks": 0, "loss_rank_avg": 0.0948730856180191, "step": 645, "valid_targets_mean": 8423.4, "valid_targets_min": 3658 }, { "epoch": 3.551912568306011, "grad_norm": 0.21559528731790625, "learning_rate": 2.3042467723798335e-05, "loss": 0.3081, "loss_nan_ranks": 0, "loss_rank_avg": 0.10408905148506165, "step": 650, "valid_targets_mean": 9092.1, "valid_targets_min": 2668 }, { "epoch": 3.579234972677596, "grad_norm": 0.211833304339463, "learning_rate": 2.2772659456804537e-05, "loss": 0.3129, "loss_nan_ranks": 0, "loss_rank_avg": 0.10551073402166367, "step": 655, "valid_targets_mean": 9483.7, "valid_targets_min": 3771 }, { "epoch": 3.6065573770491803, "grad_norm": 0.22483771277803868, "learning_rate": 2.2502335694642388e-05, "loss": 0.3107, "loss_nan_ranks": 0, "loss_rank_avg": 0.10551051795482635, "step": 660, "valid_targets_mean": 8905.8, "valid_targets_min": 3549 }, { "epoch": 3.633879781420765, "grad_norm": 0.22386943756040195, "learning_rate": 2.223154669613215e-05, "loss": 0.3085, "loss_nan_ranks": 0, "loss_rank_avg": 0.10307562351226807, "step": 665, "valid_targets_mean": 8859.0, "valid_targets_min": 3300 }, { "epoch": 3.66120218579235, "grad_norm": 0.22170142391966322, "learning_rate": 2.196034280659122e-05, "loss": 0.3104, "loss_nan_ranks": 0, "loss_rank_avg": 0.09553318470716476, "step": 670, "valid_targets_mean": 8777.2, "valid_targets_min": 2879 }, { "epoch": 3.6885245901639343, "grad_norm": 0.2142352891381397, "learning_rate": 2.1688774448473863e-05, "loss": 0.3109, "loss_nan_ranks": 0, "loss_rank_avg": 0.10052451491355896, "step": 675, "valid_targets_mean": 8826.8, "valid_targets_min": 4146 }, { "epoch": 3.7158469945355193, "grad_norm": 0.19977901434675086, "learning_rate": 2.1416892111996685e-05, "loss": 0.3029, "loss_nan_ranks": 0, "loss_rank_avg": 0.11022960394620895, "step": 680, "valid_targets_mean": 9945.0, "valid_targets_min": 3549 }, { "epoch": 3.7431693989071038, "grad_norm": 0.193299048595288, "learning_rate": 2.114474634575138e-05, "loss": 0.3104, "loss_nan_ranks": 0, "loss_rank_avg": 0.10461431741714478, "step": 685, "valid_targets_mean": 8731.8, "valid_targets_min": 2403 }, { "epoch": 3.7704918032786887, "grad_norm": 0.20778558224455732, "learning_rate": 2.0872387747306725e-05, "loss": 0.3151, "loss_nan_ranks": 0, "loss_rank_avg": 0.10199467837810516, "step": 690, "valid_targets_mean": 9486.6, "valid_targets_min": 3255 }, { "epoch": 3.797814207650273, "grad_norm": 0.23682061658623274, "learning_rate": 2.0599866953801456e-05, "loss": 0.3037, "loss_nan_ranks": 0, "loss_rank_avg": 0.09984984993934631, "step": 695, "valid_targets_mean": 9708.8, "valid_targets_min": 1938 }, { "epoch": 3.8251366120218577, "grad_norm": 0.235770071313055, "learning_rate": 2.0327234632529738e-05, "loss": 0.3125, "loss_nan_ranks": 0, "loss_rank_avg": 0.11001993715763092, "step": 700, "valid_targets_mean": 9621.1, "valid_targets_min": 4455 }, { "epoch": 3.8524590163934427, "grad_norm": 0.24380201762655782, "learning_rate": 2.005454147152108e-05, "loss": 0.3088, "loss_nan_ranks": 0, "loss_rank_avg": 0.10252358019351959, "step": 705, "valid_targets_mean": 8781.7, "valid_targets_min": 2964 }, { "epoch": 3.879781420765027, "grad_norm": 0.22538655705455618, "learning_rate": 1.9781838170116357e-05, "loss": 0.3014, "loss_nan_ranks": 0, "loss_rank_avg": 0.09685234725475311, "step": 710, "valid_targets_mean": 8708.4, "valid_targets_min": 3186 }, { "epoch": 3.907103825136612, "grad_norm": 0.2225369904461113, "learning_rate": 1.950917542954176e-05, "loss": 0.31, "loss_nan_ranks": 0, "loss_rank_avg": 0.1139606386423111, "step": 715, "valid_targets_mean": 9864.2, "valid_targets_min": 3034 }, { "epoch": 3.9344262295081966, "grad_norm": 0.1900714759599998, "learning_rate": 1.923660394348237e-05, "loss": 0.3022, "loss_nan_ranks": 0, "loss_rank_avg": 0.09253083169460297, "step": 720, "valid_targets_mean": 8268.0, "valid_targets_min": 2846 }, { "epoch": 3.9617486338797816, "grad_norm": 0.19618089341199454, "learning_rate": 1.8964174388657167e-05, "loss": 0.312, "loss_nan_ranks": 0, "loss_rank_avg": 0.09711985290050507, "step": 725, "valid_targets_mean": 9442.0, "valid_targets_min": 4287 }, { "epoch": 3.989071038251366, "grad_norm": 0.21451682228357283, "learning_rate": 1.869193741539714e-05, "loss": 0.306, "loss_nan_ranks": 0, "loss_rank_avg": 0.11297638714313507, "step": 730, "valid_targets_mean": 9649.7, "valid_targets_min": 2839 }, { "epoch": 4.016393442622951, "grad_norm": 0.2267764588190383, "learning_rate": 1.8419943638228362e-05, "loss": 0.3005, "loss_nan_ranks": 0, "loss_rank_avg": 0.10513165593147278, "step": 735, "valid_targets_mean": 9610.8, "valid_targets_min": 3783 }, { "epoch": 4.043715846994536, "grad_norm": 0.23188280374229425, "learning_rate": 1.8148243626461693e-05, "loss": 0.3077, "loss_nan_ranks": 0, "loss_rank_avg": 0.10651808232069016, "step": 740, "valid_targets_mean": 9377.2, "valid_targets_min": 2556 }, { "epoch": 4.0710382513661205, "grad_norm": 0.20234257345492324, "learning_rate": 1.7876887894790856e-05, "loss": 0.3051, "loss_nan_ranks": 0, "loss_rank_avg": 0.10677113384008408, "step": 745, "valid_targets_mean": 9526.9, "valid_targets_min": 1232 }, { "epoch": 4.098360655737705, "grad_norm": 0.2006664192357114, "learning_rate": 1.7605926893900755e-05, "loss": 0.3073, "loss_nan_ranks": 0, "loss_rank_avg": 0.10224565118551254, "step": 750, "valid_targets_mean": 9564.0, "valid_targets_min": 3879 }, { "epoch": 4.1256830601092895, "grad_norm": 0.21611930448024275, "learning_rate": 1.7335411001087604e-05, "loss": 0.3029, "loss_nan_ranks": 0, "loss_rank_avg": 0.10021790862083435, "step": 755, "valid_targets_mean": 9495.2, "valid_targets_min": 4249 }, { "epoch": 4.1530054644808745, "grad_norm": 0.20492543825613885, "learning_rate": 1.7065390510892767e-05, "loss": 0.3062, "loss_nan_ranks": 0, "loss_rank_avg": 0.1090320274233818, "step": 760, "valid_targets_mean": 9487.8, "valid_targets_min": 2368 }, { "epoch": 4.180327868852459, "grad_norm": 0.22303909905490066, "learning_rate": 1.6795915625751916e-05, "loss": 0.3033, "loss_nan_ranks": 0, "loss_rank_avg": 0.09251723438501358, "step": 765, "valid_targets_mean": 7996.1, "valid_targets_min": 2941 }, { "epoch": 4.2076502732240435, "grad_norm": 0.216240174021842, "learning_rate": 1.6527036446661396e-05, "loss": 0.2986, "loss_nan_ranks": 0, "loss_rank_avg": 0.09408397972583771, "step": 770, "valid_targets_mean": 9731.0, "valid_targets_min": 3576 }, { "epoch": 4.2349726775956285, "grad_norm": 0.21817938152151142, "learning_rate": 1.625880296386336e-05, "loss": 0.3002, "loss_nan_ranks": 0, "loss_rank_avg": 0.09434144198894501, "step": 775, "valid_targets_mean": 9463.3, "valid_targets_min": 4429 }, { "epoch": 4.262295081967213, "grad_norm": 0.2117746616975632, "learning_rate": 1.599126504755159e-05, "loss": 0.3031, "loss_nan_ranks": 0, "loss_rank_avg": 0.09988536685705185, "step": 780, "valid_targets_mean": 8893.2, "valid_targets_min": 1144 }, { "epoch": 4.2896174863387975, "grad_norm": 0.24386065924604794, "learning_rate": 1.5724472438599554e-05, "loss": 0.2994, "loss_nan_ranks": 0, "loss_rank_avg": 0.09023170173168182, "step": 785, "valid_targets_mean": 8590.4, "valid_targets_min": 2235 }, { "epoch": 4.316939890710382, "grad_norm": 0.21134859055315403, "learning_rate": 1.545847473931254e-05, "loss": 0.3035, "loss_nan_ranks": 0, "loss_rank_avg": 0.10283560305833817, "step": 790, "valid_targets_mean": 10174.0, "valid_targets_min": 3954 }, { "epoch": 4.344262295081967, "grad_norm": 0.20065533277407502, "learning_rate": 1.5193321404205583e-05, "loss": 0.3026, "loss_nan_ranks": 0, "loss_rank_avg": 0.10268115997314453, "step": 795, "valid_targets_mean": 9324.8, "valid_targets_min": 1283 }, { "epoch": 4.371584699453552, "grad_norm": 0.215674808813511, "learning_rate": 1.4929061730808813e-05, "loss": 0.3026, "loss_nan_ranks": 0, "loss_rank_avg": 0.09143080562353134, "step": 800, "valid_targets_mean": 9008.8, "valid_targets_min": 2476 }, { "epoch": 4.398907103825136, "grad_norm": 0.19754230475541815, "learning_rate": 1.4665744850502035e-05, "loss": 0.2991, "loss_nan_ranks": 0, "loss_rank_avg": 0.09778804332017899, "step": 805, "valid_targets_mean": 8793.2, "valid_targets_min": 917 }, { "epoch": 4.426229508196721, "grad_norm": 0.21173287315948128, "learning_rate": 1.4403419719380161e-05, "loss": 0.3026, "loss_nan_ranks": 0, "loss_rank_avg": 0.10597346723079681, "step": 810, "valid_targets_mean": 8464.4, "valid_targets_min": 4000 }, { "epoch": 4.453551912568306, "grad_norm": 0.21557820949214415, "learning_rate": 1.4142135109151273e-05, "loss": 0.3018, "loss_nan_ranks": 0, "loss_rank_avg": 0.10664163529872894, "step": 815, "valid_targets_mean": 8985.9, "valid_targets_min": 3113 }, { "epoch": 4.48087431693989, "grad_norm": 0.2057707972831818, "learning_rate": 1.388193959806893e-05, "loss": 0.3029, "loss_nan_ranks": 0, "loss_rank_avg": 0.100834421813488, "step": 820, "valid_targets_mean": 9252.4, "valid_targets_min": 3107 }, { "epoch": 4.508196721311475, "grad_norm": 0.2035704407094551, "learning_rate": 1.3622881561900476e-05, "loss": 0.3017, "loss_nan_ranks": 0, "loss_rank_avg": 0.09915086627006531, "step": 825, "valid_targets_mean": 9511.0, "valid_targets_min": 2668 }, { "epoch": 4.53551912568306, "grad_norm": 0.19575235441533922, "learning_rate": 1.3365009164932964e-05, "loss": 0.3091, "loss_nan_ranks": 0, "loss_rank_avg": 0.10887756943702698, "step": 830, "valid_targets_mean": 9509.0, "valid_targets_min": 3946 }, { "epoch": 4.562841530054644, "grad_norm": 0.19655396398761169, "learning_rate": 1.3108370351018393e-05, "loss": 0.2992, "loss_nan_ranks": 0, "loss_rank_avg": 0.09594590961933136, "step": 835, "valid_targets_mean": 9079.8, "valid_targets_min": 3052 }, { "epoch": 4.590163934426229, "grad_norm": 0.21253925854859518, "learning_rate": 1.285301283466e-05, "loss": 0.303, "loss_nan_ranks": 0, "loss_rank_avg": 0.0905645489692688, "step": 840, "valid_targets_mean": 8182.4, "valid_targets_min": 3218 }, { "epoch": 4.617486338797814, "grad_norm": 0.20072810587848328, "learning_rate": 1.2598984092141083e-05, "loss": 0.3033, "loss_nan_ranks": 0, "loss_rank_avg": 0.10311625897884369, "step": 845, "valid_targets_mean": 9788.3, "valid_targets_min": 3318 }, { "epoch": 4.644808743169399, "grad_norm": 0.18665016397140438, "learning_rate": 1.2346331352698206e-05, "loss": 0.2985, "loss_nan_ranks": 0, "loss_rank_avg": 0.09972083568572998, "step": 850, "valid_targets_mean": 9216.6, "valid_targets_min": 3940 }, { "epoch": 4.672131147540983, "grad_norm": 0.19938029887487937, "learning_rate": 1.2095101589740291e-05, "loss": 0.3015, "loss_nan_ranks": 0, "loss_rank_avg": 0.10425819456577301, "step": 855, "valid_targets_mean": 9457.9, "valid_targets_min": 3127 }, { "epoch": 4.699453551912568, "grad_norm": 0.1909538138649328, "learning_rate": 1.1845341512115267e-05, "loss": 0.3034, "loss_nan_ranks": 0, "loss_rank_avg": 0.09648244082927704, "step": 860, "valid_targets_mean": 9526.7, "valid_targets_min": 1217 }, { "epoch": 4.726775956284153, "grad_norm": 0.20046945627414886, "learning_rate": 1.1597097555425954e-05, "loss": 0.3059, "loss_nan_ranks": 0, "loss_rank_avg": 0.1106850728392601, "step": 865, "valid_targets_mean": 9296.9, "valid_targets_min": 1941 }, { "epoch": 4.754098360655737, "grad_norm": 0.19632506560486584, "learning_rate": 1.1350415873396673e-05, "loss": 0.298, "loss_nan_ranks": 0, "loss_rank_avg": 0.11077627539634705, "step": 870, "valid_targets_mean": 10079.2, "valid_targets_min": 3742 }, { "epoch": 4.781420765027322, "grad_norm": 0.20226350645324653, "learning_rate": 1.1105342329292368e-05, "loss": 0.2986, "loss_nan_ranks": 0, "loss_rank_avg": 0.09841288626194, "step": 875, "valid_targets_mean": 9524.3, "valid_targets_min": 2640 }, { "epoch": 4.808743169398907, "grad_norm": 0.19833226467815507, "learning_rate": 1.0861922487391588e-05, "loss": 0.3036, "loss_nan_ranks": 0, "loss_rank_avg": 0.11468029022216797, "step": 880, "valid_targets_mean": 9712.4, "valid_targets_min": 2192 }, { "epoch": 4.836065573770492, "grad_norm": 0.2078497374363878, "learning_rate": 1.0620201604515225e-05, "loss": 0.2988, "loss_nan_ranks": 0, "loss_rank_avg": 0.09936510026454926, "step": 885, "valid_targets_mean": 9141.4, "valid_targets_min": 3946 }, { "epoch": 4.863387978142076, "grad_norm": 0.2088859014000221, "learning_rate": 1.0380224621612252e-05, "loss": 0.3092, "loss_nan_ranks": 0, "loss_rank_avg": 0.09573344886302948, "step": 890, "valid_targets_mean": 8703.2, "valid_targets_min": 3200 }, { "epoch": 4.890710382513661, "grad_norm": 0.20305305414716004, "learning_rate": 1.0142036155404322e-05, "loss": 0.2958, "loss_nan_ranks": 0, "loss_rank_avg": 0.10266251862049103, "step": 895, "valid_targets_mean": 9156.1, "valid_targets_min": 2608 }, { "epoch": 4.918032786885246, "grad_norm": 0.19630418224220209, "learning_rate": 9.905680490090557e-06, "loss": 0.3014, "loss_nan_ranks": 0, "loss_rank_avg": 0.09524133801460266, "step": 900, "valid_targets_mean": 8565.4, "valid_targets_min": 3327 }, { "epoch": 4.945355191256831, "grad_norm": 0.20168250280220565, "learning_rate": 9.671201569114213e-06, "loss": 0.3004, "loss_nan_ranks": 0, "loss_rank_avg": 0.1022343784570694, "step": 905, "valid_targets_mean": 9478.6, "valid_targets_min": 3999 }, { "epoch": 4.972677595628415, "grad_norm": 0.18721802046864403, "learning_rate": 9.438642986992641e-06, "loss": 0.3017, "loss_nan_ranks": 0, "loss_rank_avg": 0.09467854350805283, "step": 910, "valid_targets_mean": 9398.7, "valid_targets_min": 3953 }, { "epoch": 5.0, "grad_norm": 0.1849748779265062, "learning_rate": 9.20804798121221e-06, "loss": 0.3005, "loss_nan_ranks": 0, "loss_rank_avg": 0.10355669260025024, "step": 915, "valid_targets_mean": 10328.0, "valid_targets_min": 5001 }, { "epoch": 5.027322404371585, "grad_norm": 0.19962321315407866, "learning_rate": 8.979459424189525e-06, "loss": 0.3005, "loss_nan_ranks": 0, "loss_rank_avg": 0.09472991526126862, "step": 920, "valid_targets_mean": 9011.2, "valid_targets_min": 3075 }, { "epoch": 5.054644808743169, "grad_norm": 0.20809601692737512, "learning_rate": 8.752919815300541e-06, "loss": 0.3, "loss_nan_ranks": 0, "loss_rank_avg": 0.09876327216625214, "step": 925, "valid_targets_mean": 8703.9, "valid_targets_min": 3837 }, { "epoch": 5.081967213114754, "grad_norm": 0.19153145876651814, "learning_rate": 8.528471272979083e-06, "loss": 0.3006, "loss_nan_ranks": 0, "loss_rank_avg": 0.0934215560555458, "step": 930, "valid_targets_mean": 8727.2, "valid_targets_min": 1647 }, { "epoch": 5.109289617486339, "grad_norm": 0.19078481475641737, "learning_rate": 8.30615552688611e-06, "loss": 0.3012, "loss_nan_ranks": 0, "loss_rank_avg": 0.10409128665924072, "step": 935, "valid_targets_mean": 9045.9, "valid_targets_min": 3195 }, { "epoch": 5.136612021857924, "grad_norm": 0.1771089600396969, "learning_rate": 8.086013910151334e-06, "loss": 0.2989, "loss_nan_ranks": 0, "loss_rank_avg": 0.09420415759086609, "step": 940, "valid_targets_mean": 9303.5, "valid_targets_min": 1773 }, { "epoch": 5.163934426229508, "grad_norm": 0.2047670944434993, "learning_rate": 7.868087351688508e-06, "loss": 0.2981, "loss_nan_ranks": 0, "loss_rank_avg": 0.10569000244140625, "step": 945, "valid_targets_mean": 9336.9, "valid_targets_min": 3503 }, { "epoch": 5.191256830601093, "grad_norm": 0.19039028840148223, "learning_rate": 7.652416368585904e-06, "loss": 0.3008, "loss_nan_ranks": 0, "loss_rank_avg": 0.09627662599086761, "step": 950, "valid_targets_mean": 8417.0, "valid_targets_min": 4164 }, { "epoch": 5.218579234972678, "grad_norm": 0.18347981206285033, "learning_rate": 7.4390410585733176e-06, "loss": 0.3038, "loss_nan_ranks": 0, "loss_rank_avg": 0.10283610969781876, "step": 955, "valid_targets_mean": 10466.1, "valid_targets_min": 3068 }, { "epoch": 5.245901639344262, "grad_norm": 0.1879595722998192, "learning_rate": 7.228001092567094e-06, "loss": 0.2949, "loss_nan_ranks": 0, "loss_rank_avg": 0.10533443093299866, "step": 960, "valid_targets_mean": 9295.6, "valid_targets_min": 3928 }, { "epoch": 5.273224043715847, "grad_norm": 0.192338487969101, "learning_rate": 7.01933570729447e-06, "loss": 0.2972, "loss_nan_ranks": 0, "loss_rank_avg": 0.10413585603237152, "step": 965, "valid_targets_mean": 9177.1, "valid_targets_min": 2608 }, { "epoch": 5.300546448087432, "grad_norm": 0.18316019445161066, "learning_rate": 6.8130836979986236e-06, "loss": 0.3004, "loss_nan_ranks": 0, "loss_rank_avg": 0.09926620125770569, "step": 970, "valid_targets_mean": 9234.4, "valid_targets_min": 898 }, { "epoch": 5.327868852459017, "grad_norm": 0.1938556846923826, "learning_rate": 6.609283411225873e-06, "loss": 0.3033, "loss_nan_ranks": 0, "loss_rank_avg": 0.1027819961309433, "step": 975, "valid_targets_mean": 10174.5, "valid_targets_min": 5253 }, { "epoch": 5.355191256830601, "grad_norm": 0.2002549610033798, "learning_rate": 6.407972737696211e-06, "loss": 0.2984, "loss_nan_ranks": 0, "loss_rank_avg": 0.10788215696811676, "step": 980, "valid_targets_mean": 9056.6, "valid_targets_min": 1809 }, { "epoch": 5.382513661202186, "grad_norm": 0.23628923762267479, "learning_rate": 6.209189105258661e-06, "loss": 0.3033, "loss_nan_ranks": 0, "loss_rank_avg": 0.10605168342590332, "step": 985, "valid_targets_mean": 9745.0, "valid_targets_min": 335 }, { "epoch": 5.409836065573771, "grad_norm": 0.20266745514688106, "learning_rate": 6.012969471932657e-06, "loss": 0.2998, "loss_nan_ranks": 0, "loss_rank_avg": 0.08208920806646347, "step": 990, "valid_targets_mean": 7854.0, "valid_targets_min": 3258 }, { "epoch": 5.437158469945355, "grad_norm": 0.19564425846701008, "learning_rate": 5.819350319036765e-06, "loss": 0.2918, "loss_nan_ranks": 0, "loss_rank_avg": 0.09307222813367844, "step": 995, "valid_targets_mean": 9187.5, "valid_targets_min": 4142 }, { "epoch": 5.46448087431694, "grad_norm": 0.18755944297837682, "learning_rate": 5.628367644406039e-06, "loss": 0.2944, "loss_nan_ranks": 0, "loss_rank_avg": 0.10457966476678848, "step": 1000, "valid_targets_mean": 10211.1, "valid_targets_min": 2593 }, { "epoch": 5.491803278688525, "grad_norm": 0.17901385962822888, "learning_rate": 5.440056955699304e-06, "loss": 0.2939, "loss_nan_ranks": 0, "loss_rank_avg": 0.10055485367774963, "step": 1005, "valid_targets_mean": 9671.6, "valid_targets_min": 3367 }, { "epoch": 5.51912568306011, "grad_norm": 0.1847063215491466, "learning_rate": 5.254453263797521e-06, "loss": 0.2983, "loss_nan_ranks": 0, "loss_rank_avg": 0.10656660050153732, "step": 1010, "valid_targets_mean": 9140.8, "valid_targets_min": 2622 }, { "epoch": 5.546448087431694, "grad_norm": 0.19112949612412253, "learning_rate": 5.0715910762945245e-06, "loss": 0.3021, "loss_nan_ranks": 0, "loss_rank_avg": 0.09959493577480316, "step": 1015, "valid_targets_mean": 9179.6, "valid_targets_min": 3364 }, { "epoch": 5.573770491803279, "grad_norm": 0.18311052180054005, "learning_rate": 4.8915043910813745e-06, "loss": 0.2891, "loss_nan_ranks": 0, "loss_rank_avg": 0.08283189684152603, "step": 1020, "valid_targets_mean": 8305.5, "valid_targets_min": 3385 }, { "epoch": 5.601092896174864, "grad_norm": 0.20148162135858644, "learning_rate": 4.7142266900254006e-06, "loss": 0.2968, "loss_nan_ranks": 0, "loss_rank_avg": 0.09707757830619812, "step": 1025, "valid_targets_mean": 8936.8, "valid_targets_min": 1156 }, { "epoch": 5.628415300546449, "grad_norm": 0.20074382989705658, "learning_rate": 4.53979093274526e-06, "loss": 0.2994, "loss_nan_ranks": 0, "loss_rank_avg": 0.0964854508638382, "step": 1030, "valid_targets_mean": 8863.9, "valid_targets_min": 2173 }, { "epoch": 5.655737704918033, "grad_norm": 0.1885114584658996, "learning_rate": 4.3682295504830474e-06, "loss": 0.3026, "loss_nan_ranks": 0, "loss_rank_avg": 0.09963271021842957, "step": 1035, "valid_targets_mean": 9199.4, "valid_targets_min": 1574 }, { "epoch": 5.683060109289618, "grad_norm": 0.18296985933463858, "learning_rate": 4.199574440074623e-06, "loss": 0.2976, "loss_nan_ranks": 0, "loss_rank_avg": 0.10074016451835632, "step": 1040, "valid_targets_mean": 10138.1, "valid_targets_min": 857 }, { "epoch": 5.7103825136612025, "grad_norm": 0.2863001675152787, "learning_rate": 4.033856958019371e-06, "loss": 0.2997, "loss_nan_ranks": 0, "loss_rank_avg": 0.09942857921123505, "step": 1045, "valid_targets_mean": 9787.4, "valid_targets_min": 3371 }, { "epoch": 5.737704918032787, "grad_norm": 0.1898147998476062, "learning_rate": 3.8711079146503474e-06, "loss": 0.303, "loss_nan_ranks": 0, "loss_rank_avg": 0.10072211176156998, "step": 1050, "valid_targets_mean": 9318.2, "valid_targets_min": 4205 }, { "epoch": 5.7650273224043715, "grad_norm": 0.1919997166958646, "learning_rate": 3.7113575684060045e-06, "loss": 0.2935, "loss_nan_ranks": 0, "loss_rank_avg": 0.09942483901977539, "step": 1055, "valid_targets_mean": 8786.4, "valid_targets_min": 3289 }, { "epoch": 5.7923497267759565, "grad_norm": 0.18796578742686923, "learning_rate": 3.554635620204503e-06, "loss": 0.298, "loss_nan_ranks": 0, "loss_rank_avg": 0.10854838043451309, "step": 1060, "valid_targets_mean": 9333.5, "valid_targets_min": 2235 }, { "epoch": 5.8196721311475414, "grad_norm": 0.19241542356969288, "learning_rate": 3.400971207921706e-06, "loss": 0.2952, "loss_nan_ranks": 0, "loss_rank_avg": 0.10749229788780212, "step": 1065, "valid_targets_mean": 9327.5, "valid_targets_min": 3286 }, { "epoch": 5.8469945355191255, "grad_norm": 0.1767732898447703, "learning_rate": 3.2503929009738443e-06, "loss": 0.2973, "loss_nan_ranks": 0, "loss_rank_avg": 0.10737217217683792, "step": 1070, "valid_targets_mean": 10028.6, "valid_targets_min": 3630 }, { "epoch": 5.8743169398907105, "grad_norm": 0.18563482469756257, "learning_rate": 3.102928695005858e-06, "loss": 0.3014, "loss_nan_ranks": 0, "loss_rank_avg": 0.10022571682929993, "step": 1075, "valid_targets_mean": 9828.6, "valid_targets_min": 2734 }, { "epoch": 5.901639344262295, "grad_norm": 0.19161764885066354, "learning_rate": 2.9586060066864286e-06, "loss": 0.3016, "loss_nan_ranks": 0, "loss_rank_avg": 0.09793832898139954, "step": 1080, "valid_targets_mean": 8281.3, "valid_targets_min": 3716 }, { "epoch": 5.9289617486338795, "grad_norm": 0.18758187439096646, "learning_rate": 2.8174516686106334e-06, "loss": 0.2953, "loss_nan_ranks": 0, "loss_rank_avg": 0.09064612537622452, "step": 1085, "valid_targets_mean": 8252.4, "valid_targets_min": 3597 }, { "epoch": 5.956284153005464, "grad_norm": 0.17933861822945216, "learning_rate": 2.679491924311226e-06, "loss": 0.2939, "loss_nan_ranks": 0, "loss_rank_avg": 0.08484348654747009, "step": 1090, "valid_targets_mean": 8447.0, "valid_targets_min": 1606 }, { "epoch": 5.983606557377049, "grad_norm": 0.2184035657642095, "learning_rate": 2.5447524233794154e-06, "loss": 0.2952, "loss_nan_ranks": 0, "loss_rank_avg": 0.10270243883132935, "step": 1095, "valid_targets_mean": 9839.3, "valid_targets_min": 2240 }, { "epoch": 6.0109289617486334, "grad_norm": 0.18016930588695532, "learning_rate": 2.4132582166960594e-06, "loss": 0.2993, "loss_nan_ranks": 0, "loss_rank_avg": 0.09060963988304138, "step": 1100, "valid_targets_mean": 8204.4, "valid_targets_min": 4318 }, { "epoch": 6.038251366120218, "grad_norm": 0.1807505264729319, "learning_rate": 2.2850337517741926e-06, "loss": 0.2953, "loss_nan_ranks": 0, "loss_rank_avg": 0.11188945174217224, "step": 1105, "valid_targets_mean": 9868.6, "valid_targets_min": 2986 }, { "epoch": 6.065573770491803, "grad_norm": 0.17725550842049373, "learning_rate": 2.1601028682137184e-06, "loss": 0.2982, "loss_nan_ranks": 0, "loss_rank_avg": 0.10796324908733368, "step": 1110, "valid_targets_mean": 9903.1, "valid_targets_min": 4404 }, { "epoch": 6.092896174863388, "grad_norm": 0.17843385797969258, "learning_rate": 2.038488793269142e-06, "loss": 0.2962, "loss_nan_ranks": 0, "loss_rank_avg": 0.10193131864070892, "step": 1115, "valid_targets_mean": 9263.0, "valid_targets_min": 2531 }, { "epoch": 6.120218579234972, "grad_norm": 0.18922618634146468, "learning_rate": 1.9202141375311335e-06, "loss": 0.2987, "loss_nan_ranks": 0, "loss_rank_avg": 0.11177726089954376, "step": 1120, "valid_targets_mean": 10449.2, "valid_targets_min": 1942 }, { "epoch": 6.147540983606557, "grad_norm": 0.18531454529279406, "learning_rate": 1.8053008907227454e-06, "loss": 0.2971, "loss_nan_ranks": 0, "loss_rank_avg": 0.09427063167095184, "step": 1125, "valid_targets_mean": 8988.2, "valid_targets_min": 3419 }, { "epoch": 6.174863387978142, "grad_norm": 0.16935445242728722, "learning_rate": 1.6937704176110582e-06, "loss": 0.3047, "loss_nan_ranks": 0, "loss_rank_avg": 0.10389615595340729, "step": 1130, "valid_targets_mean": 9609.2, "valid_targets_min": 4159 }, { "epoch": 6.202185792349727, "grad_norm": 0.16871211622471122, "learning_rate": 1.5856434540350462e-06, "loss": 0.2888, "loss_nan_ranks": 0, "loss_rank_avg": 0.09071569889783859, "step": 1135, "valid_targets_mean": 9943.8, "valid_targets_min": 1810 }, { "epoch": 6.229508196721311, "grad_norm": 0.17256264539708166, "learning_rate": 1.4809401030503345e-06, "loss": 0.2958, "loss_nan_ranks": 0, "loss_rank_avg": 0.09398217499256134, "step": 1140, "valid_targets_mean": 9639.2, "valid_targets_min": 1414 }, { "epoch": 6.256830601092896, "grad_norm": 0.18438155928726757, "learning_rate": 1.3796798311916337e-06, "loss": 0.2961, "loss_nan_ranks": 0, "loss_rank_avg": 0.09930949658155441, "step": 1145, "valid_targets_mean": 8487.8, "valid_targets_min": 2400 }, { "epoch": 6.284153005464481, "grad_norm": 0.1997423930316252, "learning_rate": 1.2818814648534895e-06, "loss": 0.2962, "loss_nan_ranks": 0, "loss_rank_avg": 0.1077326089143753, "step": 1150, "valid_targets_mean": 9475.9, "valid_targets_min": 3065 }, { "epoch": 6.311475409836065, "grad_norm": 0.18503322678054349, "learning_rate": 1.187563186790075e-06, "loss": 0.2974, "loss_nan_ranks": 0, "loss_rank_avg": 0.10482992976903915, "step": 1155, "valid_targets_mean": 9660.8, "valid_targets_min": 785 }, { "epoch": 6.33879781420765, "grad_norm": 0.18384891461773237, "learning_rate": 1.0967425327346447e-06, "loss": 0.2923, "loss_nan_ranks": 0, "loss_rank_avg": 0.1072668582201004, "step": 1160, "valid_targets_mean": 10043.8, "valid_targets_min": 2948 }, { "epoch": 6.366120218579235, "grad_norm": 0.17679068550790888, "learning_rate": 1.0094363881392665e-06, "loss": 0.2925, "loss_nan_ranks": 0, "loss_rank_avg": 0.08984339237213135, "step": 1165, "valid_targets_mean": 9161.0, "valid_targets_min": 2769 }, { "epoch": 6.39344262295082, "grad_norm": 0.1832802026343294, "learning_rate": 9.256609850354636e-07, "loss": 0.2969, "loss_nan_ranks": 0, "loss_rank_avg": 0.08827763050794601, "step": 1170, "valid_targets_mean": 7520.1, "valid_targets_min": 1938 }, { "epoch": 6.420765027322404, "grad_norm": 0.1866939536850409, "learning_rate": 8.45431899016338e-07, "loss": 0.2994, "loss_nan_ranks": 0, "loss_rank_avg": 0.10817040503025055, "step": 1175, "valid_targets_mean": 9274.0, "valid_targets_min": 418 }, { "epoch": 6.448087431693989, "grad_norm": 0.1744012616260319, "learning_rate": 7.687640463407597e-07, "loss": 0.2936, "loss_nan_ranks": 0, "loss_rank_avg": 0.09306332468986511, "step": 1180, "valid_targets_mean": 8181.0, "valid_targets_min": 2941 }, { "epoch": 6.475409836065574, "grad_norm": 0.17390086899257662, "learning_rate": 6.956716811601106e-07, "loss": 0.2957, "loss_nan_ranks": 0, "loss_rank_avg": 0.09819996356964111, "step": 1185, "valid_targets_mean": 9636.3, "valid_targets_min": 3687 }, { "epoch": 6.502732240437158, "grad_norm": 0.18082724032868075, "learning_rate": 6.261683928681383e-07, "loss": 0.2948, "loss_nan_ranks": 0, "loss_rank_avg": 0.09933126717805862, "step": 1190, "valid_targets_mean": 9554.2, "valid_targets_min": 3068 }, { "epoch": 6.530054644808743, "grad_norm": 0.1835801622956054, "learning_rate": 5.602671035744123e-07, "loss": 0.2963, "loss_nan_ranks": 0, "loss_rank_avg": 0.0911635234951973, "step": 1195, "valid_targets_mean": 8480.4, "valid_targets_min": 2558 }, { "epoch": 6.557377049180328, "grad_norm": 0.1824726825862776, "learning_rate": 4.979800657018308e-07, "loss": 0.2996, "loss_nan_ranks": 0, "loss_rank_avg": 0.09349572658538818, "step": 1200, "valid_targets_mean": 9020.2, "valid_targets_min": 2332 }, { "epoch": 6.584699453551913, "grad_norm": 0.17047555364265193, "learning_rate": 4.393188597086395e-07, "loss": 0.29, "loss_nan_ranks": 0, "loss_rank_avg": 0.09120497107505798, "step": 1205, "valid_targets_mean": 8665.4, "valid_targets_min": 3232 }, { "epoch": 6.612021857923497, "grad_norm": 0.18297000199911517, "learning_rate": 3.842943919353914e-07, "loss": 0.2949, "loss_nan_ranks": 0, "loss_rank_avg": 0.09089861810207367, "step": 1210, "valid_targets_mean": 8642.9, "valid_targets_min": 3945 }, { "epoch": 6.639344262295082, "grad_norm": 0.17410787574033595, "learning_rate": 3.3291689257721526e-07, "loss": 0.3004, "loss_nan_ranks": 0, "loss_rank_avg": 0.09799309074878693, "step": 1215, "valid_targets_mean": 10470.1, "valid_targets_min": 2412 }, { "epoch": 6.666666666666667, "grad_norm": 0.1760736064792558, "learning_rate": 2.8519591378181944e-07, "loss": 0.2948, "loss_nan_ranks": 0, "loss_rank_avg": 0.09015963971614838, "step": 1220, "valid_targets_mean": 8567.0, "valid_targets_min": 3465 }, { "epoch": 6.693989071038251, "grad_norm": 0.17863351381213097, "learning_rate": 2.4114032787355246e-07, "loss": 0.3015, "loss_nan_ranks": 0, "loss_rank_avg": 0.1061282753944397, "step": 1225, "valid_targets_mean": 8916.6, "valid_targets_min": 816 }, { "epoch": 6.721311475409836, "grad_norm": 0.1734492997559578, "learning_rate": 2.0075832570384257e-07, "loss": 0.298, "loss_nan_ranks": 0, "loss_rank_avg": 0.09828498959541321, "step": 1230, "valid_targets_mean": 9364.9, "valid_targets_min": 3576 }, { "epoch": 6.748633879781421, "grad_norm": 0.1716812499454898, "learning_rate": 1.6405741512835137e-07, "loss": 0.2962, "loss_nan_ranks": 0, "loss_rank_avg": 0.1010083481669426, "step": 1235, "valid_targets_mean": 9363.5, "valid_targets_min": 2674 }, { "epoch": 6.775956284153006, "grad_norm": 0.17904530357170415, "learning_rate": 1.310444196111127e-07, "loss": 0.2995, "loss_nan_ranks": 0, "loss_rank_avg": 0.10142147541046143, "step": 1240, "valid_targets_mean": 9184.9, "valid_targets_min": 3464 }, { "epoch": 6.80327868852459, "grad_norm": 0.1737805584907467, "learning_rate": 1.0172547695590062e-07, "loss": 0.2992, "loss_nan_ranks": 0, "loss_rank_avg": 0.10017992556095123, "step": 1245, "valid_targets_mean": 10094.7, "valid_targets_min": 3627 }, { "epoch": 6.830601092896175, "grad_norm": 0.1845270392071439, "learning_rate": 7.61060381650891e-08, "loss": 0.2947, "loss_nan_ranks": 0, "loss_rank_avg": 0.10975556075572968, "step": 1250, "valid_targets_mean": 10162.3, "valid_targets_min": 3446 }, { "epoch": 6.85792349726776, "grad_norm": 0.175468702056559, "learning_rate": 5.4190866426195866e-08, "loss": 0.2945, "loss_nan_ranks": 0, "loss_rank_avg": 0.11438390612602234, "step": 1255, "valid_targets_mean": 10338.3, "valid_targets_min": 4181 }, { "epoch": 6.885245901639344, "grad_norm": 0.1695938568274096, "learning_rate": 3.59840362263042e-08, "loss": 0.2939, "loss_nan_ranks": 0, "loss_rank_avg": 0.09429032355546951, "step": 1260, "valid_targets_mean": 8700.3, "valid_targets_min": 2328 }, { "epoch": 6.912568306010929, "grad_norm": 0.18293122350836902, "learning_rate": 2.148893259453111e-08, "loss": 0.2965, "loss_nan_ranks": 0, "loss_rank_avg": 0.09766453504562378, "step": 1265, "valid_targets_mean": 9036.6, "valid_targets_min": 3738 }, { "epoch": 6.939890710382514, "grad_norm": 0.17737915360825743, "learning_rate": 1.070825047268631e-08, "loss": 0.2973, "loss_nan_ranks": 0, "loss_rank_avg": 0.09531472623348236, "step": 1270, "valid_targets_mean": 9068.0, "valid_targets_min": 3580 }, { "epoch": 6.967213114754099, "grad_norm": 0.18806311305133536, "learning_rate": 3.6439942142196815e-09, "loss": 0.3026, "loss_nan_ranks": 0, "loss_rank_avg": 0.10511855781078339, "step": 1275, "valid_targets_mean": 9262.5, "valid_targets_min": 2472 }, { "epoch": 6.994535519125683, "grad_norm": 0.18563493223557567, "learning_rate": 2.974772115682534e-10, "loss": 0.303, "loss_nan_ranks": 0, "loss_rank_avg": 0.09767770022153854, "step": 1280, "valid_targets_mean": 9584.3, "valid_targets_min": 3673 }, { "epoch": 7.0, "step": 1281, "total_flos": 5.272385024627311e+18, "train_loss": 0.0, "train_runtime": 1.2338, "train_samples_per_second": 99654.386, "train_steps_per_second": 1038.244 } ], "logging_steps": 5, "max_steps": 1281, "num_input_tokens_seen": 0, "num_train_epochs": 7, "save_steps": 300, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.272385024627311e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }